* subversion/libsvn_fs_fs/structure
[svn.git] / www / toctool.py
blobfc029b99a944a86e992b72dd418722b87d4d74cf
1 #!/usr/bin/env python
3 """\
4 This tool regenerates and replaces the ToC in an HTML file from the actual
5 structure of <div>s and <h[2345]>s present in the body of the document.
6 The section to be overwritten is identified as the XML subtree
7 rooted at <ol id="toc">.
9 Usage: ./toctool.py filename...
10 """
12 import sys
13 import os
14 import xml.parsers.expat
17 class Index:
18 def __init__(self):
19 self.title = None
20 self.tree = []
21 self._ptr_stack = [self.tree]
23 def addLevel(self, id, title):
24 newlevel = [(id, title)]
25 self._ptr_stack[-1].append(newlevel)
26 self._ptr_stack.append(newlevel)
28 def upLevel(self):
29 self._ptr_stack.pop(-1)
31 def prettyString(self):
32 out = []
33 def step(ilevel, node):
34 if type(node) == list:
35 for subnode in node:
36 step(ilevel+1, subnode)
37 else:
38 out.append("%s%s" % (" "*ilevel, node))
39 step(-2, self.tree)
40 return "\n".join(out)
42 def renderXML(self):
43 out = []
44 def step(ilevel, node):
45 if len(node) == 1:
46 out.append('%s<li><a href="#%s">%s</a></li>'
47 % (' '*ilevel, node[0][0], node[0][1]))
48 else:
49 out.append('%s<li><a href="#%s">%s</a>'
50 % (' '*ilevel, node[0][0], node[0][1]))
51 out.append('%s<ol>' % (' '*ilevel))
52 for subnode in node[1:]:
53 step(ilevel+1, subnode)
54 out.append('%s</ol>' % (' '*ilevel))
55 out.append('%s</li> <!-- %s -->' % (' '*ilevel, node[0][0]))
56 out.append('<ol id="toc">')
57 for node in self.tree:
58 step(1, node)
59 out.append('</ol>')
60 return "\n".join(out)
63 class ExpatParseJob:
64 def parse(self, file):
65 p = xml.parsers.expat.ParserCreate()
66 p.ordered_attributes = self._ordered_attributes
67 p.returns_unicode = False
68 p.specified_attributes = True
69 for name in dir(self):
70 if name.endswith('Handler'):
71 setattr(p, name, getattr(self, name))
72 p.ParseFile(file)
75 class IndexBuildParse(ExpatParseJob):
76 keys = {'h2':None, 'h3':None, 'h4':None, 'h5':None}
78 def __init__(self):
79 self.index = Index()
80 self.keyptr = 0
81 self.collecting_text = False
82 self.text = ''
83 self.waiting_for_elt = None
84 self.saved_id = None
85 self.elt_stack = []
86 self._ordered_attributes = False
88 def StartElementHandler(self, name, attrs):
89 if name == 'div':
90 cl = attrs.get('class')
91 if cl in self.keys:
92 self.waiting_for_elt = cl
93 self.saved_id = attrs.get('id')
94 self.elt_stack.append((name, True))
95 return
96 elif name == 'title':
97 self.collecting_text = name
98 self.text = ''
99 elif name == self.waiting_for_elt:
100 self.waiting_for_elt = None
101 self.collecting_text = name
102 self.text = ''
103 self.elt_stack.append((name, False))
105 def EndElementHandler(self, name):
106 if self.collecting_text:
107 if name == self.collecting_text:
108 if name == 'title':
109 self.index.title = self.text
110 else:
111 self.index.addLevel(self.saved_id, self.text)
112 self.saved_id = None
113 self.collecting_text = False
114 else:
115 raise RuntimeError('foo')
116 eltinfo = self.elt_stack.pop(-1)
117 assert eltinfo[0] == name
118 if eltinfo[1]:
119 self.index.upLevel()
121 def DefaultHandler(self, data) :
122 if self.collecting_text:
123 self.text += data
126 def attrlist_to_dict(l):
127 d = {}
128 for i in xrange(0, len(l), 2):
129 d[l[i]] = l[i+1]
130 return d
133 def escape_entities(s):
134 return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
137 class IndexInsertParse(ExpatParseJob):
138 def __init__(self, index, outfp):
139 self._ordered_attributes = True
140 self.index = index
141 self.outfp = outfp
142 self.elt_stack = []
143 self.skipping_toc = False
145 self._line_in_progress = []
146 self._element_open = None
147 self.linepos = 0
148 self.indentpos = 0
150 self.do_not_minimize = {'script':None}
151 self.do_not_indent = {'div':None, 'a':None, 'strong':None, 'em':None}
152 self.do_not_wrap = {'div':None, 'strong':None, 'em':None, 'li':None}
154 if self.index.title == 'Subversion Design':
155 self.do_not_wrap['a'] = None
157 def put_token(self, token, tag_name):
158 self._line_in_progress.append((token, tag_name))
160 def done_line(self):
161 linepos = 0
162 last_was_tag = False
163 outq = []
164 for token, tag_name in self._line_in_progress:
165 is_tag = tag_name is not None and tag_name not in self.do_not_wrap
166 no_indent_if_wrap = tag_name in self.do_not_indent
167 linepos += len(token)
168 if linepos > 79 and is_tag and last_was_tag:
169 token = token.lstrip(' ')
170 if no_indent_if_wrap:
171 linepos = len(token)
172 outq.append('\n')
173 else:
174 linepos = len(token) + 2
175 outq.append('\n ')
176 outq.append(token)
177 last_was_tag = is_tag
178 outq.append('\n')
179 for i in outq:
180 self.outfp.write(i)
181 del self._line_in_progress[:]
183 def _finish_pending(self, minimized_form):
184 if self._element_open is not None:
185 name = self._element_open
186 self._element_open = None
187 if minimized_form:
188 self.put_token(' />', name)
189 return True
190 else:
191 self.put_token('>', name)
192 return False
194 def StartElementHandler(self, name, attrs):
195 self._finish_pending(False)
196 if name == 'ol' and attrlist_to_dict(attrs).get('id') == 'toc':
197 self.outfp.write(self.index.renderXML())
198 self.skipping_toc = True
199 self.elt_stack.append((name, True))
200 return
201 if not self.skipping_toc:
202 self.put_token("<%s" % name, name)
203 while attrs:
204 aname = attrs.pop(0)
205 aval = escape_entities(attrs.pop(0))
206 self.put_token(' %s="%s"' % (aname, aval), name)
207 self._element_open = name
208 self.elt_stack.append((name, False))
210 def EndElementHandler(self, name):
211 if not self.skipping_toc:
212 if not self._finish_pending(name not in self.do_not_minimize):
213 self.put_token("</%s>" % name, name)
214 eltinfo = self.elt_stack.pop(-1)
215 assert eltinfo[0] == name
216 if eltinfo[1]:
217 self.skipping_toc = False
219 def DefaultHandler(self, data):
220 if self.skipping_toc:
221 return
222 self._finish_pending(False)
223 # This makes an unsafe assumption that expat will pass '\n' as individual
224 # characters to this function. Seems to work at the moment.
225 # Will almost certainly break later.
226 if data == '\n':
227 self.done_line()
228 else:
229 self.put_token(data, None)
232 def process(fn):
233 infp = open(fn, 'r')
234 builder = IndexBuildParse()
235 builder.parse(infp)
237 infp.seek(0)
238 outfp = open(fn + '.new', 'w')
239 inserter = IndexInsertParse(builder.index, outfp)
240 inserter.parse(infp)
242 infp.close()
243 outfp.close()
244 os.rename(fn, fn + '.toctool-backup~')
245 os.rename(fn + '.new', fn)
248 def main():
249 for fn in sys.argv[1:]:
250 process(fn)
253 if __name__ == '__main__':
254 main()