comparison doc/wiki2docbook.py @ 1773:2ae81598b254

scripts for converting wiki documentation to docbook
author nadvornik
date Sun, 22 Nov 2009 09:12:22 +0000
parents
children
comparison
equal deleted inserted replaced
1772:9f3b7a089caf 1773:2ae81598b254
1 #!/usr/bin/python
2 #
3 # This script converts trac wiki to docbook
4 # wiki pages must be in wiki/ directory and their names must start with "Guide"
5 # the first page is named GuideIndex
6 # output is written to docbook/ directory
7 #
8 # based on the following scripts:
9 #
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py
12 #
13 # see the links above for a list of requirements
14
15
16 import sys
17 import os
18 from trac.test import EnvironmentStub, Mock, MockPerm
19 from trac.mimeview import Context
20 from trac.wiki.formatter import HtmlFormatter
21 from trac.wiki.model import WikiPage
22 from trac.web.href import Href
23
24 import urllib
25 from tidy import parseString
26 import libxml2
27 import libxslt
28 import re
29
30 datadir = os.getcwd() + "/wiki2docbook"
31
32
33 xhtml2dbXsl = u"""<?xml version="1.0"?>
34 <xsl:stylesheet version="1.0"
35 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
36
37 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" />
38 <xsl:output method="xml" indent="no" encoding="utf-8"/>
39 <xsl:param name="document-root" select="'__top_element__'"/>
40 </xsl:stylesheet>
41 """
42
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?>
44 <xsl:stylesheet version="1.0"
45 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
46
47 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" />
48 <xsl:output method="xml" indent="no" encoding="utf-8"/>
49 <xsl:param name="defaultTopHeading" select="FIXME"/>
50 </xsl:stylesheet>
51 """
52
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl)
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc)
55
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl)
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc)
58
59 def html2docbook(html):
60
61 options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8')
62 xhtml = parseString(html.encode("utf-8"), **options)
63
64 xhtml_xmldoc = libxml2.parseDoc(str(xhtml))
65
66 xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None)
67
68 nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc)
69
70 docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None)
71
72 dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc)
73
74 xhtml_xmldoc.freeDoc()
75 xhtml2_xmldoc.freeDoc()
76 docbook_xmldoc.freeDoc()
77 return dbstring.decode('utf-8')
78
79
80 text = {} #wiki text
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2
82 parent = {}#parent document (if depth > 0)
83 inner = {} #defined for documents that are parents
84
85 #top element indexed by depth
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ]
87
88 env = EnvironmentStub()
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'),
90 authname='anonymous', perm=MockPerm(), args={})
91 context = Context.from_request(req, 'wiki')
92
93
94 def read_file(name):
95 text[name] = file("wiki/" + name).read().decode('utf-8')
96 page = WikiPage(env)
97 page.name = name
98 page.text = '--'
99 page.save('', '', '::1', 0)
100
101
102 def read_index():
103 index_name = "GuideIndex"
104 read_file(index_name)
105 index_text = text[index_name]
106 depth[index_name] = 0
107 inner[index_name] = 1
108
109 stack = [ index_name , '', '', '' ]
110
111 for line in index_text.splitlines() :
112 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
113 if match:
114 name = match.group(2)
115 d = len(match.group(1)) / 2
116 if (d > 0):
117 depth[name] = d
118 parent[name] = stack[d - 1]
119 inner[stack[d - 1]] = 1
120 stack[d] = name
121 read_file(name)
122
123 # exclude links with depth > 1 from wiki text, they will be included indirectly
124 def filter_out_indirect(text):
125 out = ""
126 for line in text.splitlines() :
127 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
128 d = 1
129 if match:
130 d = len(match.group(1)) / 2
131 if (d == 1):
132 out = out + line + "\n"
133 return out
134
135 def process_pages():
136 for name in text.keys():
137 txt = text[name]
138
139 if name in inner:
140 txt = filter_out_indirect(txt)
141
142 html = HtmlFormatter(env, context, txt).generate()
143
144 html = html.replace("/wiki/Guide", "#Guide")
145
146 top = top_element[depth[name]]
147 db = html2docbook(html)
148
149 if name in inner:
150 # replace list items with XIncludes, FIXME: this is ugly
151 r = re.compile('<itemizedlist[^>]*>')
152 db = r.sub(r'', db);
153
154 r = re.compile('</itemizedlist>')
155 db = r.sub(r'', db);
156
157 r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>')
158 db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db);
159
160
161 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">")
162 db = db.replace("</__top_element__>", "</" + top + ">")
163
164 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8'))
165
166
167 read_index()
168 process_pages()
169
170
171