Mercurial > geeqie
comparison doc/wiki2docbook.py @ 1773:2ae81598b254
scripts for converting wiki documentation to docbook
author | nadvornik |
---|---|
date | Sun, 22 Nov 2009 09:12:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1772:9f3b7a089caf | 1773:2ae81598b254 |
---|---|
1 #!/usr/bin/python | |
2 # | |
3 # This script converts trac wiki to docbook | |
4 # wiki pages must be in wiki/ directory and their names must start with "Guide" | |
5 # the first page is named GuideIndex | |
6 # output is written to docbook/ directory | |
7 # | |
8 # based on the following scripts: | |
9 # | |
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin | |
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py | |
12 # | |
13 # see the links above for a list of requirements | |
14 | |
15 | |
16 import sys | |
17 import os | |
18 from trac.test import EnvironmentStub, Mock, MockPerm | |
19 from trac.mimeview import Context | |
20 from trac.wiki.formatter import HtmlFormatter | |
21 from trac.wiki.model import WikiPage | |
22 from trac.web.href import Href | |
23 | |
24 import urllib | |
25 from tidy import parseString | |
26 import libxml2 | |
27 import libxslt | |
28 import re | |
29 | |
30 datadir = os.getcwd() + "/wiki2docbook" | |
31 | |
32 | |
33 xhtml2dbXsl = u"""<?xml version="1.0"?> | |
34 <xsl:stylesheet version="1.0" | |
35 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
36 | |
37 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" /> | |
38 <xsl:output method="xml" indent="no" encoding="utf-8"/> | |
39 <xsl:param name="document-root" select="'__top_element__'"/> | |
40 </xsl:stylesheet> | |
41 """ | |
42 | |
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?> | |
44 <xsl:stylesheet version="1.0" | |
45 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
46 | |
47 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" /> | |
48 <xsl:output method="xml" indent="no" encoding="utf-8"/> | |
49 <xsl:param name="defaultTopHeading" select="FIXME"/> | |
50 </xsl:stylesheet> | |
51 """ | |
52 | |
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl) | |
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc) | |
55 | |
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl) | |
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc) | |
58 | |
59 def html2docbook(html): | |
60 | |
61 options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8') | |
62 xhtml = parseString(html.encode("utf-8"), **options) | |
63 | |
64 xhtml_xmldoc = libxml2.parseDoc(str(xhtml)) | |
65 | |
66 xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None) | |
67 | |
68 nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc) | |
69 | |
70 docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None) | |
71 | |
72 dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc) | |
73 | |
74 xhtml_xmldoc.freeDoc() | |
75 xhtml2_xmldoc.freeDoc() | |
76 docbook_xmldoc.freeDoc() | |
77 return dbstring.decode('utf-8') | |
78 | |
79 | |
80 text = {} #wiki text | |
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2 | |
82 parent = {}#parent document (if depth > 0) | |
83 inner = {} #defined for documents that are parents | |
84 | |
85 #top element indexed by depth | |
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ] | |
87 | |
88 env = EnvironmentStub() | |
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'), | |
90 authname='anonymous', perm=MockPerm(), args={}) | |
91 context = Context.from_request(req, 'wiki') | |
92 | |
93 | |
94 def read_file(name): | |
95 text[name] = file("wiki/" + name).read().decode('utf-8') | |
96 page = WikiPage(env) | |
97 page.name = name | |
98 page.text = '--' | |
99 page.save('', '', '::1', 0) | |
100 | |
101 | |
102 def read_index(): | |
103 index_name = "GuideIndex" | |
104 read_file(index_name) | |
105 index_text = text[index_name] | |
106 depth[index_name] = 0 | |
107 inner[index_name] = 1 | |
108 | |
109 stack = [ index_name , '', '', '' ] | |
110 | |
111 for line in index_text.splitlines() : | |
112 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line) | |
113 if match: | |
114 name = match.group(2) | |
115 d = len(match.group(1)) / 2 | |
116 if (d > 0): | |
117 depth[name] = d | |
118 parent[name] = stack[d - 1] | |
119 inner[stack[d - 1]] = 1 | |
120 stack[d] = name | |
121 read_file(name) | |
122 | |
123 # exclude links with depth > 1 from wiki text, they will be included indirectly | |
124 def filter_out_indirect(text): | |
125 out = "" | |
126 for line in text.splitlines() : | |
127 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line) | |
128 d = 1 | |
129 if match: | |
130 d = len(match.group(1)) / 2 | |
131 if (d == 1): | |
132 out = out + line + "\n" | |
133 return out | |
134 | |
135 def process_pages(): | |
136 for name in text.keys(): | |
137 txt = text[name] | |
138 | |
139 if name in inner: | |
140 txt = filter_out_indirect(txt) | |
141 | |
142 html = HtmlFormatter(env, context, txt).generate() | |
143 | |
144 html = html.replace("/wiki/Guide", "#Guide") | |
145 | |
146 top = top_element[depth[name]] | |
147 db = html2docbook(html) | |
148 | |
149 if name in inner: | |
150 # replace list items with XIncludes, FIXME: this is ugly | |
151 r = re.compile('<itemizedlist[^>]*>') | |
152 db = r.sub(r'', db); | |
153 | |
154 r = re.compile('</itemizedlist>') | |
155 db = r.sub(r'', db); | |
156 | |
157 r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>') | |
158 db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db); | |
159 | |
160 | |
161 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">") | |
162 db = db.replace("</__top_element__>", "</" + top + ">") | |
163 | |
164 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8')) | |
165 | |
166 | |
167 read_index() | |
168 process_pages() | |
169 | |
170 | |
171 |