3 """Perform massive transformations on a document tree created from the LaTeX
4 of the Python documentation, and dump the ESIS data for the transformed tree.
6 __version__
= '$Revision$'
16 from xml
.dom
.core
import \
22 class ConversionError(Exception):
26 ewrite
= sys
.stderr
.write
28 # We can only do this trick on Unix (if tput is on $PATH)!
29 if sys
.platform
!= "posix" or not sys
.stderr
.isatty():
36 def bwrite(s
, BOLDON
=commands
.getoutput("tput bold"),
37 BOLDOFF
=commands
.getoutput("tput sgr0")):
38 ewrite("%s%s%s" % (BOLDON
, s
, BOLDOFF
))
47 ewrite("*** %s\n" % s
)
53 # Workaround to deal with invalid documents (multiple root elements). This
54 # does not indicate a bug in the DOM implementation.
56 def get_documentElement(doc
):
58 for n
in doc
.childNodes
:
59 if n
.nodeType
== ELEMENT
:
63 xml
.dom
.core
.Document
.get_documentElement
= get_documentElement
66 # Replace get_childNodes for the Document class; without this, children
67 # accessed from the Document object via .childNodes (no matter how many
68 # levels of access are used) will be given an ownerDocument of None.
70 def get_childNodes(doc
):
71 return xml
.dom
.core
.NodeList(doc
._node
.children
, doc
._node
)
73 xml
.dom
.core
.Document
.get_childNodes
= get_childNodes
76 def get_first_element(doc
, gi
):
77 for n
in doc
.childNodes
:
78 if n
.get_nodeName() == gi
:
81 def extract_first_element(doc
, gi
):
82 node
= get_first_element(doc
, gi
)
88 def find_all_elements(doc
, gi
):
90 if doc
.get_nodeName() == gi
:
92 for child
in doc
.childNodes
:
93 if child
.nodeType
== ELEMENT
:
94 if child
.get_tagName() == gi
:
96 for node
in child
.getElementsByTagName(gi
):
100 def find_all_child_elements(doc
, gi
):
102 for child
in doc
.childNodes
:
103 if child
.get_nodeName() == gi
:
107 def find_all_elements_from_set(doc
, gi_set
):
108 return __find_all_elements_from_set(doc
, gi_set
, [])
110 def __find_all_elements_from_set(doc
, gi_set
, nodes
):
111 if doc
.get_nodeName() in gi_set
:
113 for child
in doc
.childNodes
:
114 if child
.get_nodeType() == ELEMENT
:
115 __find_all_elements_from_set(child
, gi_set
, nodes
)
119 def simplify(doc
, fragment
):
120 # Try to rationalize the document a bit, since these things are simply
121 # not valid SGML/XML documents as they stand, and need a little work.
122 documentclass
= "document"
124 node
= extract_first_element(fragment
, "documentclass")
126 documentclass
= node
.getAttribute("classname")
127 node
= extract_first_element(fragment
, "title")
130 # update the name of the root element
131 node
= get_first_element(fragment
, "document")
133 node
._node
.name
= documentclass
135 node
= extract_first_element(fragment
, "input")
140 docelem
= get_documentElement(fragment
)
143 text
= doc
.createTextNode("\n")
144 docelem
.insertBefore(text
, docelem
.firstChild
)
145 docelem
.insertBefore(node
, text
)
146 docelem
.insertBefore(doc
.createTextNode("\n"), docelem
.firstChild
)
147 while fragment
.firstChild
and fragment
.firstChild
.get_nodeType() == TEXT
:
148 fragment
.removeChild(fragment
.firstChild
)
151 def cleanup_root_text(doc
):
154 for n
in doc
.childNodes
:
157 if n
.get_nodeType() == TEXT
and not prevskip
:
159 elif n
.get_nodeName() == "COMMENT":
161 for node
in discards
:
162 doc
.removeChild(node
)
165 DESCRIPTOR_ELEMENTS
= (
166 "cfuncdesc", "cvardesc", "ctypedesc",
167 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
168 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
169 "datadesc", "datadescni",
172 def fixup_descriptors(doc
, fragment
):
173 sections
= find_all_elements(fragment
, "section")
174 for section
in sections
:
175 find_and_fix_descriptors(doc
, section
)
178 def find_and_fix_descriptors(doc
, container
):
179 children
= container
.childNodes
180 for child
in children
:
181 if child
.get_nodeType() == ELEMENT
:
182 tagName
= child
.get_tagName()
183 if tagName
in DESCRIPTOR_ELEMENTS
:
184 rewrite_descriptor(doc
, child
)
185 elif tagName
== "subsection":
186 find_and_fix_descriptors(doc
, child
)
189 def rewrite_descriptor(doc
, descriptor
):
192 # 1. Add an "index='no'" attribute to the element if the tagName
193 # ends in 'ni', removing the 'ni' from the name.
194 # 2. Create a <signature> from the name attribute
195 # 2a.Create an <args> if it appears to be available.
196 # 3. Create additional <signature>s from <*line{,ni}> elements,
198 # 4. If a <versionadded> is found, move it to an attribute on the
200 # 5. Move remaining child nodes to a <description> element.
201 # 6. Put it back together.
204 descname
= descriptor
.get_tagName()
206 if descname
[-2:] == "ni":
207 descname
= descname
[:-2]
208 descriptor
.setAttribute("index", "no")
209 descriptor
._node
.name
= descname
211 desctype
= descname
[:-4] # remove 'desc'
212 linename
= desctype
+ "line"
214 linename
= linename
+ "ni"
216 signature
= doc
.createElement("signature")
217 name
= doc
.createElement("name")
218 signature
.appendChild(doc
.createTextNode("\n "))
219 signature
.appendChild(name
)
220 name
.appendChild(doc
.createTextNode(descriptor
.getAttribute("name")))
221 descriptor
.removeAttribute("name")
223 if descriptor
.attributes
.has_key("var"):
224 if descname
!= "opcodedesc":
225 raise RuntimeError, \
226 "got 'var' attribute on descriptor other than opcodedesc"
227 variable
= descriptor
.getAttribute("var")
229 args
= doc
.createElement("args")
230 args
.appendChild(doc
.createTextNode(variable
))
231 signature
.appendChild(doc
.createTextNode("\n "))
232 signature
.appendChild(args
)
233 descriptor
.removeAttribute("var")
234 newchildren
= [signature
]
235 children
= descriptor
.childNodes
236 pos
= skip_leading_nodes(children
)
237 if pos
< len(children
):
238 child
= children
[pos
]
239 if child
.nodeName
== "args":
240 # move <args> to <signature>, or remove if empty:
241 child
.parentNode
.removeChild(child
)
242 if len(child
.childNodes
):
243 signature
.appendChild(doc
.createTextNode("\n "))
244 signature
.appendChild(child
)
245 signature
.appendChild(doc
.createTextNode("\n "))
247 pos
= skip_leading_nodes(children
, pos
)
248 while pos
< len(children
) \
249 and children
[pos
].get_nodeName() in (linename
, "versionadded"):
250 if children
[pos
].get_tagName() == linename
:
251 # this is really a supplemental signature, create <signature>
252 sig
= methodline_to_signature(doc
, children
[pos
])
253 newchildren
.append(sig
)
255 # <versionadded added=...>
256 descriptor
.setAttribute(
257 "added", children
[pos
].getAttribute("version"))
258 pos
= skip_leading_nodes(children
, pos
+ 1)
260 description
= doc
.createElement("description")
261 description
.appendChild(doc
.createTextNode("\n"))
262 newchildren
.append(description
)
263 move_children(descriptor
, description
, pos
)
264 last
= description
.childNodes
[-1]
265 if last
.nodeType
== TEXT
:
266 last
.data
= string
.rstrip(last
.data
) + "\n "
268 # should have nothing but whitespace and signature lines in <descriptor>;
270 while descriptor
.childNodes
:
271 descriptor
.removeChild(descriptor
.childNodes
[0])
272 for node
in newchildren
:
273 descriptor
.appendChild(doc
.createTextNode("\n "))
274 descriptor
.appendChild(node
)
275 descriptor
.appendChild(doc
.createTextNode("\n"))
278 def methodline_to_signature(doc
, methodline
):
279 signature
= doc
.createElement("signature")
280 signature
.appendChild(doc
.createTextNode("\n "))
281 name
= doc
.createElement("name")
282 name
.appendChild(doc
.createTextNode(methodline
.getAttribute("name")))
283 methodline
.removeAttribute("name")
284 signature
.appendChild(name
)
285 if len(methodline
.childNodes
):
286 args
= doc
.createElement("args")
287 signature
.appendChild(doc
.createTextNode("\n "))
288 signature
.appendChild(args
)
289 move_children(methodline
, args
)
290 signature
.appendChild(doc
.createTextNode("\n "))
294 def move_children(origin
, dest
, start
=0):
295 children
= origin
.childNodes
296 while start
< len(children
):
297 node
= children
[start
]
298 origin
.removeChild(node
)
299 dest
.appendChild(node
)
302 def handle_appendix(doc
, fragment
):
303 # must be called after simplfy() if document is multi-rooted to begin with
304 docelem
= get_documentElement(fragment
)
305 toplevel
= docelem
.get_tagName() == "manual" and "chapter" or "section"
308 for node
in docelem
.childNodes
:
311 elif node
.nodeType
== ELEMENT
:
312 appnodes
= node
.getElementsByTagName("appendix")
315 parent
= appnodes
[0].parentNode
316 parent
.removeChild(appnodes
[0])
319 map(docelem
.removeChild
, nodes
)
320 docelem
.appendChild(doc
.createTextNode("\n\n\n"))
321 back
= doc
.createElement("back-matter")
322 docelem
.appendChild(back
)
323 back
.appendChild(doc
.createTextNode("\n"))
324 while nodes
and nodes
[0].nodeType
== TEXT \
325 and not string
.strip(nodes
[0].data
):
327 map(back
.appendChild
, nodes
)
328 docelem
.appendChild(doc
.createTextNode("\n"))
331 def handle_labels(doc
, fragment
):
332 for label
in find_all_elements(fragment
, "label"):
333 id = label
.getAttribute("id")
336 parent
= label
.parentNode
337 parentTagName
= parent
.get_tagName()
338 if parentTagName
== "title":
339 parent
.parentNode
.setAttribute("id", id)
341 parent
.setAttribute("id", id)
342 # now, remove <label id="..."/> from parent:
343 parent
.removeChild(label
)
344 if parentTagName
== "title":
346 children
= parent
.childNodes
347 if children
[-1].nodeType
== TEXT
:
348 children
[-1].data
= string
.rstrip(children
[-1].data
)
351 def fixup_trailing_whitespace(doc
, wsmap
):
356 if wsmap
.has_key(node
.get_nodeName()):
357 ws
= wsmap
[node
.get_tagName()]
358 children
= node
.childNodes
360 if children
[0].nodeType
== TEXT
:
361 data
= string
.rstrip(children
[0].data
) + ws
362 children
[0].data
= data
364 # hack to get the title in place:
365 if node
.get_tagName() == "title" \
366 and node
.parentNode
.firstChild
.get_nodeType() == ELEMENT
:
367 node
.parentNode
.insertBefore(doc
.createText("\n "),
368 node
.parentNode
.firstChild
)
369 for child
in node
.childNodes
:
370 if child
.nodeType
== ELEMENT
:
375 for node
in doc
.childNodes
:
376 if node
.nodeType
== ELEMENT
:
380 def cleanup_trailing_parens(doc
, element_names
):
382 for gi
in element_names
:
384 rewrite_element
= d
.has_key
386 for node
in doc
.childNodes
:
387 if node
.nodeType
== ELEMENT
:
392 if rewrite_element(node
.get_tagName()):
393 children
= node
.childNodes
394 if len(children
) == 1 \
395 and children
[0].nodeType
== TEXT
:
396 data
= children
[0].data
397 if data
[-2:] == "()":
398 children
[0].data
= data
[:-2]
400 for child
in node
.childNodes
:
401 if child
.nodeType
== ELEMENT
:
405 def contents_match(left
, right
):
406 left_children
= left
.childNodes
407 right_children
= right
.childNodes
408 if len(left_children
) != len(right_children
):
410 for l
, r
in map(None, left_children
, right_children
):
411 nodeType
= l
.nodeType
412 if nodeType
!= r
.nodeType
:
414 if nodeType
== ELEMENT
:
415 if l
.get_tagName() != r
.get_tagName():
417 # should check attributes, but that's not a problem here
418 if not contents_match(l
, r
):
420 elif nodeType
== TEXT
:
424 # not quite right, but good enough
429 def create_module_info(doc
, section
):
431 node
= extract_first_element(section
, "modulesynopsis")
434 node
._node
.name
= "synopsis"
435 lastchild
= node
.childNodes
[-1]
436 if lastchild
.nodeType
== TEXT \
437 and lastchild
.data
[-1:] == ".":
438 lastchild
.data
= lastchild
.data
[:-1]
439 modauthor
= extract_first_element(section
, "moduleauthor")
441 modauthor
._node
.name
= "author"
442 modauthor
.appendChild(doc
.createTextNode(
443 modauthor
.getAttribute("name")))
444 modauthor
.removeAttribute("name")
445 platform
= extract_first_element(section
, "platform")
446 if section
.get_tagName() == "section":
448 modinfo
= doc
.createElement("moduleinfo")
449 moddecl
= extract_first_element(section
, "declaremodule")
452 modinfo
.appendChild(doc
.createTextNode("\n "))
453 name
= moddecl
.attributes
["name"].value
454 namenode
= doc
.createElement("name")
455 namenode
.appendChild(doc
.createTextNode(name
))
456 modinfo
.appendChild(namenode
)
457 type = moddecl
.attributes
.get("type")
460 modinfo
.appendChild(doc
.createTextNode("\n "))
461 typenode
= doc
.createElement("type")
462 typenode
.appendChild(doc
.createTextNode(type))
463 modinfo
.appendChild(typenode
)
464 versionadded
= extract_first_element(section
, "versionadded")
466 modinfo
.setAttribute("added", versionadded
.getAttribute("version"))
467 title
= get_first_element(section
, "title")
469 children
= title
.childNodes
470 if len(children
) >= 2 \
471 and children
[0].get_nodeName() == "module" \
472 and children
[0].childNodes
[0].data
== name
:
473 # this is it; morph the <title> into <short-synopsis>
474 first_data
= children
[1]
475 if first_data
.data
[:4] == " ---":
476 first_data
.data
= string
.lstrip(first_data
.data
[4:])
477 title
._node
.name
= "short-synopsis"
478 if children
[-1].nodeType
== TEXT \
479 and children
[-1].data
[-1:] == ".":
480 children
[-1].data
= children
[-1].data
[:-1]
481 section
.removeChild(title
)
482 section
.removeChild(section
.childNodes
[0])
483 title
.removeChild(children
[0])
486 ewrite("module name in title doesn't match"
487 " <declaremodule/>; no <short-synopsis/>\n")
489 ewrite("Unexpected condition: <section/> without <title/>\n")
490 modinfo
.appendChild(doc
.createTextNode("\n "))
491 modinfo
.appendChild(node
)
492 if title
and not contents_match(title
, node
):
493 # The short synopsis is actually different,
494 # and needs to be stored:
495 modinfo
.appendChild(doc
.createTextNode("\n "))
496 modinfo
.appendChild(title
)
498 modinfo
.appendChild(doc
.createTextNode("\n "))
499 modinfo
.appendChild(modauthor
)
501 modinfo
.appendChild(doc
.createTextNode("\n "))
502 modinfo
.appendChild(platform
)
503 modinfo
.appendChild(doc
.createTextNode("\n "))
504 section
.insertBefore(modinfo
, section
.childNodes
[modinfo_pos
])
505 section
.insertBefore(doc
.createTextNode("\n "), modinfo
)
507 # The rest of this removes extra newlines from where we cut out
508 # a lot of elements. A lot of code for minimal value, but keeps
509 # keeps the generated *ML from being too funny looking.
512 children
= section
.childNodes
513 for i
in range(len(children
)):
515 if node
.get_nodeName() == "moduleinfo":
516 nextnode
= children
[i
+1]
517 if nextnode
.nodeType
== TEXT
:
519 if len(string
.lstrip(data
)) < (len(data
) - 4):
520 nextnode
.data
= "\n\n\n" + string
.lstrip(data
)
523 def cleanup_synopses(doc
, fragment
):
524 for node
in find_all_elements(fragment
, "section"):
525 create_module_info(doc
, node
)
528 def fixup_table_structures(doc
, fragment
):
529 for table
in find_all_elements(fragment
, "table"):
530 fixup_table(doc
, table
)
533 def fixup_table(doc
, table
):
534 # create the table head
535 thead
= doc
.createElement("thead")
536 row
= doc
.createElement("row")
537 move_elements_by_name(doc
, table
, row
, "entry")
538 thead
.appendChild(doc
.createTextNode("\n "))
539 thead
.appendChild(row
)
540 thead
.appendChild(doc
.createTextNode("\n "))
541 # create the table body
542 tbody
= doc
.createElement("tbody")
545 children
= table
.childNodes
546 for child
in children
:
547 if child
.nodeType
== ELEMENT
:
548 tagName
= child
.get_tagName()
549 if tagName
== "hline" and prev_row
is not None:
550 prev_row
.setAttribute("rowsep", "1")
551 elif tagName
== "row":
554 tbody
.appendChild(doc
.createTextNode("\n "))
555 move_elements_by_name(doc
, table
, tbody
, "row", sep
="\n ")
559 nodeType
= child
.nodeType
561 if string
.strip(child
.data
):
562 raise ConversionError("unexpected free data in table")
563 table
.removeChild(child
)
565 if nodeType
== ELEMENT
:
566 if child
.get_tagName() != "hline":
567 raise ConversionError(
568 "unexpected <%s> in table" % child
.get_tagName())
569 table
.removeChild(child
)
571 raise ConversionError(
572 "unexpected %s node in table" % child
.__class
__.__name
__)
573 # nothing left in the <table>; add the <thead> and <tbody>
574 tgroup
= doc
.createElement("tgroup")
575 tgroup
.appendChild(doc
.createTextNode("\n "))
576 tgroup
.appendChild(thead
)
577 tgroup
.appendChild(doc
.createTextNode("\n "))
578 tgroup
.appendChild(tbody
)
579 tgroup
.appendChild(doc
.createTextNode("\n "))
580 table
.appendChild(tgroup
)
581 # now make the <entry>s look nice:
582 for row
in table
.getElementsByTagName("row"):
586 def fixup_row(doc
, row
):
588 map(entries
.append
, row
.childNodes
[1:])
589 for entry
in entries
:
590 row
.insertBefore(doc
.createTextNode("\n "), entry
)
591 # row.appendChild(doc.createTextNode("\n "))
594 def move_elements_by_name(doc
, source
, dest
, name
, sep
=None):
596 for child
in source
.childNodes
:
597 if child
.get_nodeName() == name
:
600 source
.removeChild(node
)
601 dest
.appendChild(node
)
603 dest
.appendChild(doc
.createTextNode(sep
))
606 RECURSE_INTO_PARA_CONTAINERS
= (
607 "chapter", "abstract", "enumerate",
608 "section", "subsection", "subsubsection",
609 "paragraph", "subparagraph", "back-matter",
611 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
612 "definitionlist", "definition",
615 PARA_LEVEL_ELEMENTS
= (
616 "moduleinfo", "title", "verbatim", "enumerate", "item",
617 "interpreter-session", "back-matter", "interactive-session",
618 "opcodedesc", "classdesc", "datadesc",
619 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
620 "funcdescni", "methoddescni", "excdescni",
621 "tableii", "tableiii", "tableiv", "localmoduletable",
622 "sectionauthor", "seealso", "itemize",
623 # include <para>, so we can just do it again to get subsequent paras:
627 PARA_LEVEL_PRECEEDERS
= (
629 "stindex", "obindex", "COMMENT", "label", "input", "title",
630 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
631 "moduleauthor", "indexterm", "leader",
635 def fixup_paras(doc
, fragment
):
636 for child
in fragment
.childNodes
:
637 if child
.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS
:
638 fixup_paras_helper(doc
, child
)
639 descriptions
= find_all_elements(fragment
, "description")
640 for description
in descriptions
:
641 fixup_paras_helper(doc
, description
)
644 def fixup_paras_helper(doc
, container
, depth
=0):
645 # document is already normalized
646 children
= container
.childNodes
647 start
= skip_leading_nodes(children
)
648 while len(children
) > start
:
649 if children
[start
].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS
:
650 # Something to recurse into:
651 fixup_paras_helper(doc
, children
[start
])
653 # Paragraph material:
654 build_para(doc
, container
, start
, len(children
))
655 if DEBUG_PARA_FIXER
and depth
== 10:
657 start
= skip_leading_nodes(children
, start
+ 1)
660 def build_para(doc
, parent
, start
, i
):
661 children
= parent
.childNodes
664 BREAK_ELEMENTS
= PARA_LEVEL_ELEMENTS
+ RECURSE_INTO_PARA_CONTAINERS
665 # Collect all children until \n\n+ is found in a text node or a
666 # member of BREAK_ELEMENTS is found.
667 for j
in range(start
, i
):
670 nodeType
= child
.nodeType
671 if nodeType
== ELEMENT
:
672 if child
.get_tagName() in BREAK_ELEMENTS
:
675 elif nodeType
== TEXT
:
676 pos
= string
.find(child
.data
, "\n\n")
685 if (start
+ 1) > after
:
686 raise ConversionError(
687 "build_para() could not identify content to turn into a paragraph")
688 if children
[after
- 1].nodeType
== TEXT
:
689 # we may need to split off trailing white space:
690 child
= children
[after
- 1]
692 if string
.rstrip(data
) != data
:
694 child
.splitText(len(string
.rstrip(data
)))
695 para
= doc
.createElement(PARA_ELEMENT
)
697 indexes
= range(start
, after
)
700 node
= parent
.childNodes
[j
]
701 parent
.removeChild(node
)
702 para
.insertBefore(node
, prev
)
705 parent
.appendChild(para
)
706 parent
.appendChild(doc
.createTextNode("\n\n"))
707 return len(parent
.childNodes
)
709 nextnode
= parent
.childNodes
[start
]
710 if nextnode
.nodeType
== TEXT
:
711 if nextnode
.data
and nextnode
.data
[0] != "\n":
712 nextnode
.data
= "\n" + nextnode
.data
714 newnode
= doc
.createTextNode("\n")
715 parent
.insertBefore(newnode
, nextnode
)
718 parent
.insertBefore(para
, nextnode
)
722 def skip_leading_nodes(children
, start
=0):
723 """Return index into children of a node at which paragraph building should
724 begin or a recursive call to fixup_paras_helper() should be made (for
727 When the return value >= len(children), we've built all the paras we can
728 from this list of children.
732 # skip over leading comments and whitespace:
733 child
= children
[start
]
734 nodeType
= child
.nodeType
737 shortened
= string
.lstrip(data
)
739 if data
!= shortened
:
740 # break into two nodes: whitespace and non-whitespace
741 child
.splitText(len(data
) - len(shortened
))
744 # all whitespace, just skip
745 elif nodeType
== ELEMENT
:
746 tagName
= child
.get_tagName()
747 if tagName
in RECURSE_INTO_PARA_CONTAINERS
:
749 if tagName
not in PARA_LEVEL_ELEMENTS
+ PARA_LEVEL_PRECEEDERS
:
755 def fixup_rfc_references(doc
, fragment
):
756 for rfcnode
in find_all_elements(fragment
, "rfc"):
757 rfcnode
.appendChild(doc
.createTextNode(
758 "RFC " + rfcnode
.getAttribute("num")))
761 def fixup_signatures(doc
, fragment
):
762 for child
in fragment
.childNodes
:
763 if child
.nodeType
== ELEMENT
:
764 args
= child
.getElementsByTagName("args")
768 args
= child
.getElementsByTagName("constructor-args")
774 def fixup_args(doc
, arglist
):
775 for child
in arglist
.childNodes
:
776 if child
.get_nodeName() == "optional":
777 # found it; fix and return
778 arglist
.insertBefore(doc
.createTextNode("["), child
)
779 optkids
= child
.childNodes
783 arglist
.insertBefore(k
, child
)
784 arglist
.insertBefore(doc
.createTextNode("]"), child
)
785 arglist
.removeChild(child
)
786 return fixup_args(doc
, arglist
)
789 def fixup_sectionauthors(doc
, fragment
):
790 for sectauth
in find_all_elements(fragment
, "sectionauthor"):
791 section
= sectauth
.parentNode
792 section
.removeChild(sectauth
)
793 sectauth
._node
.name
= "author"
794 sectauth
.appendChild(doc
.createTextNode(
795 sectauth
.getAttribute("name")))
796 sectauth
.removeAttribute("name")
797 after
= section
.childNodes
[2]
798 title
= section
.childNodes
[1]
799 if title
.get_nodeName() != "title":
800 after
= section
.childNodes
[0]
801 section
.insertBefore(doc
.createTextNode("\n "), after
)
802 section
.insertBefore(sectauth
, after
)
805 def fixup_verbatims(doc
):
806 for verbatim
in find_all_elements(doc
, "verbatim"):
807 child
= verbatim
.childNodes
[0]
808 if child
.nodeType
== TEXT \
809 and string
.lstrip(child
.data
)[:3] == ">>>":
810 verbatim
._node
.name
= "interactive-session"
813 def add_node_ids(fragment
, counter
=0):
814 fragment
._node
.node_id
= counter
815 for node
in fragment
.childNodes
:
816 counter
= counter
+ 1
817 if node
.nodeType
== ELEMENT
:
818 counter
= add_node_ids(node
, counter
)
820 node
._node
.node_id
= counter
824 REFMODINDEX_ELEMENTS
= ('refmodindex', 'refbimodindex',
825 'refexmodindex', 'refstmodindex')
827 def fixup_refmodindexes(fragment
):
828 # Locate <ref*modindex>...</> co-located with <module>...</>, and
829 # remove the <ref*modindex>, replacing it with index=index on the
831 nodes
= find_all_elements_from_set(fragment
, REFMODINDEX_ELEMENTS
)
834 parent
= node
.parentNode
835 d
[parent
._node
.node_id
] = parent
837 map(fixup_refmodindexes_chunk
, d
.values())
840 def fixup_refmodindexes_chunk(container
):
841 # node is probably a <para>; let's see how often it isn't:
842 if container
.get_tagName() != PARA_ELEMENT
:
843 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container
)
844 module_entries
= find_all_elements(container
, "module")
845 if not module_entries
:
847 index_entries
= find_all_elements_from_set(container
, REFMODINDEX_ELEMENTS
)
849 for entry
in index_entries
:
850 children
= entry
.childNodes
851 if len(children
) != 0:
852 bwrite("--- unexpected number of children for %s node:\n"
853 % entry
.get_tagName())
854 ewrite(entry
.toxml() + "\n")
857 module_name
= entry
.getAttribute("module")
858 for node
in module_entries
:
859 if len(node
.childNodes
) != 1:
861 this_name
= node
.childNodes
[0].data
862 if this_name
== module_name
:
864 node
.setAttribute("index", "yes")
866 removes
.append(entry
)
868 container
.removeChild(node
)
871 def fixup_bifuncindexes(fragment
):
872 nodes
= find_all_elements(fragment
, 'bifuncindex')
874 # make sure that each parent is only processed once:
876 parent
= node
.parentNode
877 d
[parent
._node
.node_id
] = parent
879 map(fixup_bifuncindexes_chunk
, d
.values())
882 def fixup_bifuncindexes_chunk(container
):
884 entries
= find_all_child_elements(container
, "bifuncindex")
885 function_entries
= find_all_child_elements(container
, "function")
886 for entry
in entries
:
887 function_name
= entry
.getAttribute("name")
889 for func_entry
in function_entries
:
890 t2
= func_entry
.childNodes
[0].data
894 if t2
== function_name
:
895 func_entry
.setAttribute("index", "yes")
896 func_entry
.setAttribute("module", "__builtin__")
899 removes
.append(entry
)
900 for entry
in removes
:
901 container
.removeChild(entry
)
904 def join_adjacent_elements(container
, gi
):
909 children
= parent
.get_childNodes()
910 nchildren
= len(children
)
911 while i
< (nchildren
- 1):
913 if child
.nodeName
== gi
:
914 if children
[i
+1].nodeName
== gi
:
915 ewrite("--- merging two <%s/> elements\n" % gi
)
917 nextchild
= children
[i
+1]
918 nextchildren
= nextchild
.get_childNodes()
919 while len(nextchildren
):
920 node
= nextchildren
[0]
921 nextchild
.removeChild(node
)
922 child
.appendChild(node
)
923 parent
.removeChild(nextchild
)
925 if child
.nodeType
== ELEMENT
:
930 _token_rx
= re
.compile(r
"[a-zA-Z][a-zA-Z0-9.-]*$")
932 def write_esis(doc
, ofp
, knownempty
):
933 for node
in doc
.childNodes
:
934 nodeType
= node
.nodeType
935 if nodeType
== ELEMENT
:
936 gi
= node
.get_tagName()
938 if node
.hasChildNodes():
940 "declared-empty node <%s> has children" % gi
942 for k
, v
in node
.attributes
.items():
944 if _token_rx
.match(value
):
948 ofp
.write("A%s %s %s\n" % (k
, dtype
, esistools
.encode(value
)))
949 ofp
.write("(%s\n" % gi
)
950 write_esis(node
, ofp
, knownempty
)
951 ofp
.write(")%s\n" % gi
)
952 elif nodeType
== TEXT
:
953 ofp
.write("-%s\n" % esistools
.encode(node
.data
))
954 elif nodeType
== ENTITY_REFERENCE
:
955 ofp
.write("&%s\n" % node
.get_nodeName())
957 raise RuntimeError, "unsupported node type: %s" % nodeType
960 def convert(ifp
, ofp
):
961 p
= esistools
.ExtendedEsisBuilder()
964 fragment
= p
.fragment
966 simplify(doc
, fragment
)
967 handle_labels(doc
, fragment
)
968 handle_appendix(doc
, fragment
)
969 fixup_trailing_whitespace(doc
, {
974 "subsection": "\n\n",
975 "subsubsection": "\n\n",
977 "subparagraph": "\n\n",
979 cleanup_root_text(doc
)
980 cleanup_trailing_parens(fragment
, ["function", "method", "cfunction"])
981 cleanup_synopses(doc
, fragment
)
982 fixup_descriptors(doc
, fragment
)
983 fixup_verbatims(fragment
)
985 fixup_paras(doc
, fragment
)
986 fixup_sectionauthors(doc
, fragment
)
987 fixup_table_structures(doc
, fragment
)
988 fixup_rfc_references(doc
, fragment
)
989 fixup_signatures(doc
, fragment
)
990 add_node_ids(fragment
)
991 fixup_refmodindexes(fragment
)
992 fixup_bifuncindexes(fragment
)
993 # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
994 # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
995 join_adjacent_elements(fragment
, "option")
998 for gi
in p
.get_empties():
1000 if d
.has_key("rfc"):
1002 knownempty
= d
.has_key
1005 write_esis(fragment
, ofp
, knownempty
)
1006 except IOError, (err
, msg
):
1007 # Ignore EPIPE; it just means that whoever we're writing to stopped
1008 # reading. The rest of the output would be ignored. All other errors
1009 # should still be reported,
1010 if err
!= errno
.EPIPE
:
1015 if len(sys
.argv
) == 1:
1018 elif len(sys
.argv
) == 2:
1019 ifp
= open(sys
.argv
[1])
1021 elif len(sys
.argv
) == 3:
1022 ifp
= open(sys
.argv
[1])
1023 ofp
= open(sys
.argv
[2], "w")
1030 if __name__
== "__main__":