Doc/tools/toc2bkm.py

   1 #! /usr/bin/env python
   2
   3 """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
   4
   5 The output file has an extension of '.bkm' instead of '.out', since hyperref
   6 already uses that extension.
   7 """
   8
   9 import getopt
  10 import os
  11 import re
  12 import string
  13 import sys
  14
  15
  16 # Ench item in an entry is a tuple of:
  17 #
  18 #   Section #,  Title String,  Page #,  List of Sub-entries
  19 #
  20 # The return value of parse_toc() is such a tuple.
  21
  22 cline_re = r"""^
  23 \\contentsline\ \{([a-z]*)}             # type of section in $1
  24 \{(?:\\numberline\ \{([0-9.A-Z]+)})?     # section number
  25 (.*)}                                   # title string
  26 \{(\d+)}$"""                            # page number
  27
  28 cline_rx = re.compile(cline_re, re.VERBOSE)
  29
  30 OUTER_TO_INNER = -1
  31
  32 _transition_map = {
  33     ('chapter', 'section'): OUTER_TO_INNER,
  34     ('section', 'subsection'): OUTER_TO_INNER,
  35     ('subsection', 'subsubsection'): OUTER_TO_INNER,
  36     ('subsubsection', 'subsection'): 1,
  37     ('subsection', 'section'): 1,
  38     ('section', 'chapter'): 1,
  39     ('subsection', 'chapter'): 2,
  40     ('subsubsection', 'section'): 2,
  41     ('subsubsection', 'chapter'): 3,
  42     }
  43
  44 INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
  45
  46
  47 def parse_toc(fp, bigpart=None):
  48     toc = top = []
  49     stack = [toc]
  50     level = bigpart or 'chapter'
  51     lineno = 0
  52     while 1:
  53         line = fp.readline()
  54         if not line:
  55             break
  56         lineno = lineno + 1
  57         m = cline_rx.match(line)
  58         if m:
  59             stype, snum, title, pageno = m.group(1, 2, 3, 4)
  60             title = clean_title(title)
  61             entry = (stype, snum, title, string.atoi(pageno), [])
  62             if stype == level:
  63                 toc.append(entry)
  64             else:
  65                 if stype not in INCLUDED_LEVELS:
  66                     # we don't want paragraphs & subparagraphs
  67                     continue
  68                 direction = _transition_map[(level, stype)]
  69                 if direction == OUTER_TO_INNER:
  70                     toc = toc[-1][-1]
  71                     stack.insert(0, toc)
  72                     toc.append(entry)
  73                 else:
  74                     for i in range(direction):
  75                         del stack[0]
  76                         toc = stack[0]
  77                     toc.append(entry)
  78                 level = stype
  79         else:
  80             sys.stderr.write("l.%s: " + line)
  81     return top
  82
  83
  84 hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
  85 raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
  86 title_rx = re.compile(r"\\([a-zA-Z])+\s+")
  87 title_trans = string.maketrans("", "")
  88
  89 def clean_title(title):
  90     title = raisebox_rx.sub("", title)
  91     title = hackscore_rx.sub(r"\\_", title)
  92     pos = 0
  93     while 1:
  94         m = title_rx.search(title, pos)
  95         if m:
  96             start = m.start()
  97             if title[start:start+15] != "\\textunderscore":
  98                 title = title[:start] + title[m.end():]
  99             pos = start + 1
 100         else:
 101             break
 102     title = string.translate(title, title_trans, "{}")
 103     return title
 104
 105
 106 def write_toc(toc, fp):
 107     for entry in toc:
 108         write_toc_entry(entry, fp, 0)
 109
 110 def write_toc_entry(entry, fp, layer):
 111     stype, snum, title, pageno, toc = entry
 112     s = "\\pdfoutline goto name{page%03d}" % pageno
 113     if toc:
 114         s = "%s count -%d" % (s, len(toc))
 115     if snum:
 116         title = "%s %s" % (snum, title)
 117     s = "%s {%s}\n" % (s, title)
 118     fp.write(s)
 119     for entry in toc:
 120         write_toc_entry(entry, fp, layer + 1)
 121
 122
 123 def process(ifn, ofn, bigpart=None):
 124     toc = parse_toc(open(ifn), bigpart)
 125     write_toc(toc, open(ofn, "w"))
 126
 127
 128 def main():
 129     bigpart = None
 130     opts, args = getopt.getopt(sys.argv[1:], "c:")
 131     if opts:
 132         bigpart = opts[0][1]
 133     if not args:
 134         usage()
 135         sys.exit(2)
 136     for filename in args:
 137         base, ext = os.path.splitext(filename)
 138         ext = ext or ".toc"
 139         process(base + ext, base + ".bkm", bigpart)
 140
 141
 142 if __name__ == "__main__":
 143     main()