4 This tool regenerates and replaces the ToC in an HTML file from the actual
5 structure of <div>s and <h[2345]>s present in the body of the document.
6 The section to be overwritten is identified as the XML subtree
7 rooted at <ol id="toc">.
9 Usage: ./toctool.py filename...
14 import xml
.parsers
.expat
21 self
._ptr
_stack
= [self
.tree
]
23 def addLevel(self
, id, title
):
24 newlevel
= [(id, title
)]
25 self
._ptr
_stack
[-1].append(newlevel
)
26 self
._ptr
_stack
.append(newlevel
)
29 self
._ptr
_stack
.pop(-1)
31 def prettyString(self
):
33 def step(ilevel
, node
):
34 if type(node
) == list:
36 step(ilevel
+1, subnode
)
38 out
.append("%s%s" % (" "*ilevel
, node
))
44 def step(ilevel
, node
):
46 out
.append('%s<li><a href="#%s">%s</a></li>'
47 % (' '*ilevel
, node
[0][0], node
[0][1]))
49 out
.append('%s<li><a href="#%s">%s</a>'
50 % (' '*ilevel
, node
[0][0], node
[0][1]))
51 out
.append('%s<ol>' % (' '*ilevel
))
52 for subnode
in node
[1:]:
53 step(ilevel
+1, subnode
)
54 out
.append('%s</ol>' % (' '*ilevel
))
55 out
.append('%s</li> <!-- %s -->' % (' '*ilevel
, node
[0][0]))
56 out
.append('<ol id="toc">')
57 for node
in self
.tree
:
64 def parse(self
, file):
65 p
= xml
.parsers
.expat
.ParserCreate()
66 p
.ordered_attributes
= self
._ordered
_attributes
67 p
.returns_unicode
= False
68 p
.specified_attributes
= True
69 for name
in dir(self
):
70 if name
.endswith('Handler'):
71 setattr(p
, name
, getattr(self
, name
))
75 class IndexBuildParse(ExpatParseJob
):
76 keys
= {'h2':None, 'h3':None, 'h4':None, 'h5':None}
81 self
.collecting_text
= False
83 self
.waiting_for_elt
= None
86 self
._ordered
_attributes
= False
88 def StartElementHandler(self
, name
, attrs
):
90 cl
= attrs
.get('class')
92 self
.waiting_for_elt
= cl
93 self
.saved_id
= attrs
.get('id')
94 self
.elt_stack
.append((name
, True))
97 self
.collecting_text
= name
99 elif name
== self
.waiting_for_elt
:
100 self
.waiting_for_elt
= None
101 self
.collecting_text
= name
103 self
.elt_stack
.append((name
, False))
105 def EndElementHandler(self
, name
):
106 if self
.collecting_text
:
107 if name
== self
.collecting_text
:
109 self
.index
.title
= self
.text
111 self
.index
.addLevel(self
.saved_id
, self
.text
)
113 self
.collecting_text
= False
115 raise RuntimeError('foo')
116 eltinfo
= self
.elt_stack
.pop(-1)
117 assert eltinfo
[0] == name
121 def DefaultHandler(self
, data
) :
122 if self
.collecting_text
:
126 def attrlist_to_dict(l
):
128 for i
in xrange(0, len(l
), 2):
133 def escape_entities(s
):
134 return s
.replace('&', '&').replace('<', '<').replace('>', '>')
137 class IndexInsertParse(ExpatParseJob
):
138 def __init__(self
, index
, outfp
):
139 self
._ordered
_attributes
= True
143 self
.skipping_toc
= False
145 self
._line
_in
_progress
= []
146 self
._element
_open
= None
150 self
.do_not_minimize
= {'script':None}
151 self
.do_not_indent
= {'div':None, 'a':None, 'strong':None, 'em':None}
152 self
.do_not_wrap
= {'div':None, 'strong':None, 'em':None, 'li':None}
154 if self
.index
.title
== 'Subversion Design':
155 self
.do_not_wrap
['a'] = None
157 def put_token(self
, token
, tag_name
):
158 self
._line
_in
_progress
.append((token
, tag_name
))
164 for token
, tag_name
in self
._line
_in
_progress
:
165 is_tag
= tag_name
is not None and tag_name
not in self
.do_not_wrap
166 no_indent_if_wrap
= tag_name
in self
.do_not_indent
167 linepos
+= len(token
)
168 if linepos
> 79 and is_tag
and last_was_tag
:
169 token
= token
.lstrip(' ')
170 if no_indent_if_wrap
:
174 linepos
= len(token
) + 2
177 last_was_tag
= is_tag
181 del self
._line
_in
_progress
[:]
183 def _finish_pending(self
, minimized_form
):
184 if self
._element
_open
is not None:
185 name
= self
._element
_open
186 self
._element
_open
= None
188 self
.put_token(' />', name
)
191 self
.put_token('>', name
)
194 def StartElementHandler(self
, name
, attrs
):
195 self
._finish
_pending
(False)
196 if name
== 'ol' and attrlist_to_dict(attrs
).get('id') == 'toc':
197 self
.outfp
.write(self
.index
.renderXML())
198 self
.skipping_toc
= True
199 self
.elt_stack
.append((name
, True))
201 if not self
.skipping_toc
:
202 self
.put_token("<%s" % name
, name
)
205 aval
= escape_entities(attrs
.pop(0))
206 self
.put_token(' %s="%s"' % (aname
, aval
), name
)
207 self
._element
_open
= name
208 self
.elt_stack
.append((name
, False))
210 def EndElementHandler(self
, name
):
211 if not self
.skipping_toc
:
212 if not self
._finish
_pending
(name
not in self
.do_not_minimize
):
213 self
.put_token("</%s>" % name
, name
)
214 eltinfo
= self
.elt_stack
.pop(-1)
215 assert eltinfo
[0] == name
217 self
.skipping_toc
= False
219 def DefaultHandler(self
, data
):
220 if self
.skipping_toc
:
222 self
._finish
_pending
(False)
223 # This makes an unsafe assumption that expat will pass '\n' as individual
224 # characters to this function. Seems to work at the moment.
225 # Will almost certainly break later.
229 self
.put_token(data
, None)
234 builder
= IndexBuildParse()
238 outfp
= open(fn
+ '.new', 'w')
239 inserter
= IndexInsertParse(builder
.index
, outfp
)
244 os
.rename(fn
, fn
+ '.toctool-backup~')
245 os
.rename(fn
+ '.new', fn
)
249 for fn
in sys
.argv
[1:]:
253 if __name__
== '__main__':