Bump version to 0.9.1.
[python/dscho.git] / Doc / tools / toc2bkm.py
blobc2edea3527060981b8ae0ba455fae1dc6ead0725
1 #! /usr/bin/env python
3 """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
5 The output file has an extension of '.bkm' instead of '.out', since hyperref
6 already uses that extension.
7 """
9 import getopt
10 import os
11 import re
12 import string
13 import sys
16 # Ench item in an entry is a tuple of:
18 # Section #, Title String, Page #, List of Sub-entries
20 # The return value of parse_toc() is such a tuple.
22 cline_re = r"""^
23 \\contentsline\ \{([a-z]*)} # type of section in $1
24 \{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
25 (.*)} # title string
26 \{(\d+)}$""" # page number
28 cline_rx = re.compile(cline_re, re.VERBOSE)
30 OUTER_TO_INNER = -1
32 _transition_map = {
33 ('chapter', 'section'): OUTER_TO_INNER,
34 ('section', 'subsection'): OUTER_TO_INNER,
35 ('subsection', 'subsubsection'): OUTER_TO_INNER,
36 ('subsubsection', 'subsection'): 1,
37 ('subsection', 'section'): 1,
38 ('section', 'chapter'): 1,
39 ('subsection', 'chapter'): 2,
40 ('subsubsection', 'section'): 2,
41 ('subsubsection', 'chapter'): 3,
44 INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
47 def parse_toc(fp, bigpart=None):
48 toc = top = []
49 stack = [toc]
50 level = bigpart or 'chapter'
51 lineno = 0
52 while 1:
53 line = fp.readline()
54 if not line:
55 break
56 lineno = lineno + 1
57 m = cline_rx.match(line)
58 if m:
59 stype, snum, title, pageno = m.group(1, 2, 3, 4)
60 title = clean_title(title)
61 entry = (stype, snum, title, string.atoi(pageno), [])
62 if stype == level:
63 toc.append(entry)
64 else:
65 if stype not in INCLUDED_LEVELS:
66 # we don't want paragraphs & subparagraphs
67 continue
68 direction = _transition_map[(level, stype)]
69 if direction == OUTER_TO_INNER:
70 toc = toc[-1][-1]
71 stack.insert(0, toc)
72 toc.append(entry)
73 else:
74 for i in range(direction):
75 del stack[0]
76 toc = stack[0]
77 toc.append(entry)
78 level = stype
79 else:
80 sys.stderr.write("l.%s: " + line)
81 return top
84 hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
85 raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
86 title_rx = re.compile(r"\\([a-zA-Z])+\s+")
87 title_trans = string.maketrans("", "")
89 def clean_title(title):
90 title = raisebox_rx.sub("", title)
91 title = hackscore_rx.sub(r"\\_", title)
92 pos = 0
93 while 1:
94 m = title_rx.search(title, pos)
95 if m:
96 start = m.start()
97 if title[start:start+15] != "\\textunderscore":
98 title = title[:start] + title[m.end():]
99 pos = start + 1
100 else:
101 break
102 title = string.translate(title, title_trans, "{}")
103 return title
106 def write_toc(toc, fp):
107 for entry in toc:
108 write_toc_entry(entry, fp, 0)
110 def write_toc_entry(entry, fp, layer):
111 stype, snum, title, pageno, toc = entry
112 s = "\\pdfoutline goto name{page%03d}" % pageno
113 if toc:
114 s = "%s count -%d" % (s, len(toc))
115 if snum:
116 title = "%s %s" % (snum, title)
117 s = "%s {%s}\n" % (s, title)
118 fp.write(s)
119 for entry in toc:
120 write_toc_entry(entry, fp, layer + 1)
123 def process(ifn, ofn, bigpart=None):
124 toc = parse_toc(open(ifn), bigpart)
125 write_toc(toc, open(ofn, "w"))
128 def main():
129 bigpart = None
130 opts, args = getopt.getopt(sys.argv[1:], "c:")
131 if opts:
132 bigpart = opts[0][1]
133 if not args:
134 usage()
135 sys.exit(2)
136 for filename in args:
137 base, ext = os.path.splitext(filename)
138 ext = ext or ".toc"
139 process(base + ext, base + ".bkm", bigpart)
142 if __name__ == "__main__":
143 main()