annotate doc/wiki2docbook.py @ 1773:2ae81598b254

scripts for converting wiki documentation to docbook
author nadvornik
date Sun, 22 Nov 2009 09:12:22 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1773
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
1 #!/usr/bin/python
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
2 #
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
3 # This script converts trac wiki to docbook
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
4 # wiki pages must be in wiki/ directory and their names must start with "Guide"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
5 # the first page is named GuideIndex
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
6 # output is written to docbook/ directory
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
7 #
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
8 # based on the following scripts:
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
9 #
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
12 #
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
13 # see the links above for a list of requirements
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
14
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
15
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
16 import sys
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
17 import os
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
18 from trac.test import EnvironmentStub, Mock, MockPerm
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
19 from trac.mimeview import Context
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
20 from trac.wiki.formatter import HtmlFormatter
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
21 from trac.wiki.model import WikiPage
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
22 from trac.web.href import Href
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
23
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
24 import urllib
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
25 from tidy import parseString
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
26 import libxml2
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
27 import libxslt
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
28 import re
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
29
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
30 datadir = os.getcwd() + "/wiki2docbook"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
31
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
32
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
33 xhtml2dbXsl = u"""<?xml version="1.0"?>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
34 <xsl:stylesheet version="1.0"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
35 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
36
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
37 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" />
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
38 <xsl:output method="xml" indent="no" encoding="utf-8"/>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
39 <xsl:param name="document-root" select="'__top_element__'"/>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
40 </xsl:stylesheet>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
41 """
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
42
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
44 <xsl:stylesheet version="1.0"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
45 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
46
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
47 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" />
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
48 <xsl:output method="xml" indent="no" encoding="utf-8"/>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
49 <xsl:param name="defaultTopHeading" select="FIXME"/>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
50 </xsl:stylesheet>
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
51 """
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
52
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
55
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
58
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
59 def html2docbook(html):
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
60
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
61 options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
62 xhtml = parseString(html.encode("utf-8"), **options)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
63
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
64 xhtml_xmldoc = libxml2.parseDoc(str(xhtml))
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
65
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
66 xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
67
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
68 nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
69
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
70 docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
71
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
72 dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
73
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
74 xhtml_xmldoc.freeDoc()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
75 xhtml2_xmldoc.freeDoc()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
76 docbook_xmldoc.freeDoc()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
77 return dbstring.decode('utf-8')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
78
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
79
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
80 text = {} #wiki text
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
82 parent = {}#parent document (if depth > 0)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
83 inner = {} #defined for documents that are parents
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
84
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
85 #top element indexed by depth
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
87
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
88 env = EnvironmentStub()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'),
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
90 authname='anonymous', perm=MockPerm(), args={})
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
91 context = Context.from_request(req, 'wiki')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
92
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
93
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
94 def read_file(name):
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
95 text[name] = file("wiki/" + name).read().decode('utf-8')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
96 page = WikiPage(env)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
97 page.name = name
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
98 page.text = '--'
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
99 page.save('', '', '::1', 0)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
100
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
101
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
102 def read_index():
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
103 index_name = "GuideIndex"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
104 read_file(index_name)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
105 index_text = text[index_name]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
106 depth[index_name] = 0
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
107 inner[index_name] = 1
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
108
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
109 stack = [ index_name , '', '', '' ]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
110
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
111 for line in index_text.splitlines() :
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
112 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
113 if match:
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
114 name = match.group(2)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
115 d = len(match.group(1)) / 2
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
116 if (d > 0):
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
117 depth[name] = d
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
118 parent[name] = stack[d - 1]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
119 inner[stack[d - 1]] = 1
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
120 stack[d] = name
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
121 read_file(name)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
122
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
123 # exclude links with depth > 1 from wiki text, they will be included indirectly
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
124 def filter_out_indirect(text):
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
125 out = ""
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
126 for line in text.splitlines() :
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
127 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
128 d = 1
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
129 if match:
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
130 d = len(match.group(1)) / 2
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
131 if (d == 1):
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
132 out = out + line + "\n"
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
133 return out
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
134
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
135 def process_pages():
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
136 for name in text.keys():
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
137 txt = text[name]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
138
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
139 if name in inner:
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
140 txt = filter_out_indirect(txt)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
141
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
142 html = HtmlFormatter(env, context, txt).generate()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
143
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
144 html = html.replace("/wiki/Guide", "#Guide")
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
145
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
146 top = top_element[depth[name]]
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
147 db = html2docbook(html)
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
148
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
149 if name in inner:
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
150 # replace list items with XIncludes, FIXME: this is ugly
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
151 r = re.compile('<itemizedlist[^>]*>')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
152 db = r.sub(r'', db);
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
153
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
154 r = re.compile('</itemizedlist>')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
155 db = r.sub(r'', db);
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
156
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
157 r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>')
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
158 db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db);
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
159
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
160
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
161 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">")
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
162 db = db.replace("</__top_element__>", "</" + top + ">")
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
163
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
164 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8'))
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
165
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
166
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
167 read_index()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
168 process_pages()
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
169
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
170
2ae81598b254 scripts for converting wiki documentation to docbook
nadvornik
parents:
diff changeset
171