1773
|
1 #!/usr/bin/python
|
|
2 #
|
|
3 # This script converts trac wiki to docbook
|
|
4 # wiki pages must be in wiki/ directory and their names must start with "Guide"
|
|
5 # the first page is named GuideIndex
|
|
6 # output is written to docbook/ directory
|
|
7 #
|
|
8 # based on the following scripts:
|
|
9 #
|
|
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin
|
|
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py
|
|
12 #
|
|
13 # see the links above for a list of requirements
|
|
14
|
|
15
|
|
16 import sys
|
|
17 import os
|
|
18 from trac.test import EnvironmentStub, Mock, MockPerm
|
|
19 from trac.mimeview import Context
|
|
20 from trac.wiki.formatter import HtmlFormatter
|
|
21 from trac.wiki.model import WikiPage
|
|
22 from trac.web.href import Href
|
|
23
|
|
24 import urllib
|
|
25 from tidy import parseString
|
|
26 import libxml2
|
|
27 import libxslt
|
|
28 import re
|
|
29
|
|
30 datadir = os.getcwd() + "/wiki2docbook"
|
|
31
|
|
32
|
|
33 xhtml2dbXsl = u"""<?xml version="1.0"?>
|
|
34 <xsl:stylesheet version="1.0"
|
|
35 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
36
|
|
37 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" />
|
|
38 <xsl:output method="xml" indent="no" encoding="utf-8"/>
|
|
39 <xsl:param name="document-root" select="'__top_element__'"/>
|
|
40 </xsl:stylesheet>
|
|
41 """
|
|
42
|
|
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?>
|
|
44 <xsl:stylesheet version="1.0"
|
|
45 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
46
|
|
47 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" />
|
|
48 <xsl:output method="xml" indent="no" encoding="utf-8"/>
|
|
49 <xsl:param name="defaultTopHeading" select="FIXME"/>
|
|
50 </xsl:stylesheet>
|
|
51 """
|
|
52
|
|
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl)
|
|
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc)
|
|
55
|
|
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl)
|
|
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc)
|
|
58
|
|
59 def html2docbook(html):
|
|
60
|
|
61 options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8')
|
|
62 xhtml = parseString(html.encode("utf-8"), **options)
|
|
63
|
|
64 xhtml_xmldoc = libxml2.parseDoc(str(xhtml))
|
|
65
|
|
66 xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None)
|
|
67
|
|
68 nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc)
|
|
69
|
|
70 docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None)
|
|
71
|
|
72 dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc)
|
|
73
|
|
74 xhtml_xmldoc.freeDoc()
|
|
75 xhtml2_xmldoc.freeDoc()
|
|
76 docbook_xmldoc.freeDoc()
|
|
77 return dbstring.decode('utf-8')
|
|
78
|
|
79
|
|
80 text = {} #wiki text
|
|
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2
|
|
82 parent = {}#parent document (if depth > 0)
|
|
83 inner = {} #defined for documents that are parents
|
|
84
|
|
85 #top element indexed by depth
|
|
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ]
|
|
87
|
|
88 env = EnvironmentStub()
|
|
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'),
|
|
90 authname='anonymous', perm=MockPerm(), args={})
|
|
91 context = Context.from_request(req, 'wiki')
|
|
92
|
|
93
|
|
94 def read_file(name):
|
|
95 text[name] = file("wiki/" + name).read().decode('utf-8')
|
|
96 page = WikiPage(env)
|
|
97 page.name = name
|
|
98 page.text = '--'
|
|
99 page.save('', '', '::1', 0)
|
|
100
|
|
101
|
|
102 def read_index():
|
|
103 index_name = "GuideIndex"
|
|
104 read_file(index_name)
|
|
105 index_text = text[index_name]
|
|
106 depth[index_name] = 0
|
|
107 inner[index_name] = 1
|
|
108
|
|
109 stack = [ index_name , '', '', '' ]
|
|
110
|
|
111 for line in index_text.splitlines() :
|
|
112 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
|
|
113 if match:
|
|
114 name = match.group(2)
|
|
115 d = len(match.group(1)) / 2
|
|
116 if (d > 0):
|
|
117 depth[name] = d
|
|
118 parent[name] = stack[d - 1]
|
|
119 inner[stack[d - 1]] = 1
|
|
120 stack[d] = name
|
|
121 read_file(name)
|
|
122
|
|
123 # exclude links with depth > 1 from wiki text, they will be included indirectly
|
|
124 def filter_out_indirect(text):
|
|
125 out = ""
|
|
126 for line in text.splitlines() :
|
|
127 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
|
|
128 d = 1
|
|
129 if match:
|
|
130 d = len(match.group(1)) / 2
|
|
131 if (d == 1):
|
|
132 out = out + line + "\n"
|
|
133 return out
|
|
134
|
|
135 def process_pages():
|
|
136 for name in text.keys():
|
|
137 txt = text[name]
|
|
138
|
|
139 if name in inner:
|
|
140 txt = filter_out_indirect(txt)
|
|
141
|
|
142 html = HtmlFormatter(env, context, txt).generate()
|
|
143
|
|
144 html = html.replace("/wiki/Guide", "#Guide")
|
|
145
|
|
146 top = top_element[depth[name]]
|
|
147 db = html2docbook(html)
|
|
148
|
|
149 if name in inner:
|
|
150 # replace list items with XIncludes, FIXME: this is ugly
|
|
151 r = re.compile('<itemizedlist[^>]*>')
|
|
152 db = r.sub(r'', db);
|
|
153
|
|
154 r = re.compile('</itemizedlist>')
|
|
155 db = r.sub(r'', db);
|
|
156
|
|
157 r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>')
|
|
158 db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db);
|
|
159
|
|
160
|
|
161 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">")
|
|
162 db = db.replace("</__top_element__>", "</" + top + ">")
|
|
163
|
|
164 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8'))
|
|
165
|
|
166
|
|
167 read_index()
|
|
168 process_pages()
|
|
169
|
|
170
|
|
171
|