This commit was manufactured by cvs2svn to create tag 'r211c1'.
[python/dscho.git] / Doc / tools / mkhowto
blob6481e93e2bb558b5b68fc5abdf17f426fedd6c41
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --link Specify the number of levels to include on each page.
17 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
18 --iconserver, -i Specify location of icons (default: ../).
19 --image-type Specify the image type to use in HTML output;
20 values: gif (default), png.
21 --numeric Don't rename the HTML files; just keep node#.html for
22 the filenames.
23 --style Specify the CSS file to use for the output (filename,
24 not a URL).
25 --up-link URL to a parent document.
26 --up-title Title of a parent document.
28 Other options:
29 --a4 Format for A4 paper.
30 --letter Format for US letter paper (the default).
31 --help, -H Show this text.
32 --logging, -l Log stdout and stderr to a file (*.how).
33 --debugging, -D Echo commands as they are executed.
34 --keep, -k Keep temporary files around.
35 --quiet, -q Do not print command output to stdout.
36 (stderr is also lost, sorry; see *.how for errors)
37 """
39 import getopt
40 import glob
41 import os
42 import re
43 import shutil
44 import string
45 import sys
46 import tempfile
49 MYDIR = os.path.abspath(sys.path[0])
50 TOPDIR = os.path.dirname(MYDIR)
52 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
53 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
54 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
56 BIBTEX_BINARY = "bibtex"
57 DVIPS_BINARY = "dvips"
58 LATEX_BINARY = "latex"
59 LATEX2HTML_BINARY = "latex2html"
60 LYNX_BINARY = "lynx"
61 MAKEINDEX_BINARY = "makeindex"
62 PDFLATEX_BINARY = "pdflatex"
63 PERL_BINARY = "perl"
64 PYTHON_BINARY = "python"
67 def usage(options):
68 print __doc__ % options
70 def error(options, message, err=2):
71 sys.stdout = sys.stderr
72 print message
73 print
74 usage(options)
75 sys.exit(2)
78 class Options:
79 program = os.path.basename(sys.argv[0])
81 address = ''
82 builddir = None
83 debugging = 0
84 discard_temps = 1
85 have_temps = 0
86 icon_server = None
87 image_type = "gif"
88 logging = 0
89 max_link_depth = 3
90 max_split_depth = 6
91 paper = "letter"
92 quiet = 0
93 runs = 0
94 numeric = 0
95 global_module_index = None
96 style_file = os.path.join(TOPDIR, "html", "style.css")
97 about_file = os.path.join(TOPDIR, "html", "about.dat")
98 up_link = None
99 up_title = None
101 DEFAULT_FORMATS = ("html",)
102 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
104 def __init__(self):
105 self.formats = []
106 self.l2h_init_files = []
108 def __getitem__(self, key):
109 # This is used when formatting the usage message.
110 try:
111 return getattr(self, key)
112 except AttributeError:
113 raise KeyError, key
115 def parse(self, args):
116 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
117 ["all", "postscript", "help", "iconserver=",
118 "address=", "a4", "letter", "l2h-init=",
119 "link=", "split=", "logging", "debugging",
120 "keep", "quiet", "runs=", "image-type=",
121 "about=", "numeric", "style=", "paper=",
122 "up-link=", "up-title=", "dir=",
123 "global-module-index="]
124 + list(self.ALL_FORMATS))
125 for opt, arg in opts:
126 if opt == "--all":
127 self.formats = list(self.ALL_FORMATS)
128 elif opt in ("-H", "--help"):
129 usage(self)
130 sys.exit()
131 elif opt == "--iconserver":
132 self.icon_server = arg
133 elif opt in ("-a", "--address"):
134 self.address = arg
135 elif opt == "--a4":
136 self.paper = "a4"
137 elif opt == "--letter":
138 self.paper = "letter"
139 elif opt == "--link":
140 self.max_link_depth = int(arg)
141 elif opt in ("-s", "--split"):
142 self.max_split_depth = int(arg)
143 elif opt in ("-l", "--logging"):
144 self.logging = self.logging + 1
145 elif opt in ("-D", "--debugging"):
146 self.debugging = self.debugging + 1
147 elif opt in ("-k", "--keep"):
148 self.discard_temps = 0
149 elif opt in ("-q", "--quiet"):
150 self.quiet = 1
151 elif opt in ("-r", "--runs"):
152 self.runs = int(arg)
153 elif opt == "--image-type":
154 self.image_type = arg
155 elif opt == "--about":
156 # always make this absolute:
157 self.about_file = os.path.normpath(
158 os.path.abspath(arg))
159 elif opt == "--numeric":
160 self.numeric = 1
161 elif opt == "--style":
162 self.style_file = os.path.abspath(arg)
163 elif opt == "--l2h-init":
164 self.l2h_init_files.append(os.path.abspath(arg))
165 elif opt == "--up-link":
166 self.up_link = arg
167 elif opt == "--up-title":
168 self.up_title = arg
169 elif opt == "--global-module-index":
170 self.global_module_index = arg
171 elif opt == "--dir":
172 self.builddir = arg
173 elif opt == "--paper":
174 self.paper = arg
176 # Format specifiers:
178 elif opt[2:] in self.ALL_FORMATS:
179 self.add_format(opt[2:])
180 elif opt == "--postscript":
181 # synonym for --ps
182 self.add_format("ps")
183 self.initialize()
185 # return the args to allow the caller access:
187 return args
189 def add_format(self, format):
190 """Add a format to the formats list if not present."""
191 if not format in self.formats:
192 self.formats.append(format)
194 def initialize(self):
195 """Complete initialization. This is needed if parse() isn't used."""
196 # add the default format if no formats were specified:
197 if not self.formats:
198 self.formats = self.DEFAULT_FORMATS
199 # determine the base set of texinputs directories:
200 texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
201 if not texinputs:
202 texinputs = ['']
203 self.base_texinputs = [
204 os.path.join(TOPDIR, "paper-" + self.paper),
205 os.path.join(TOPDIR, "texinputs"),
206 ] + texinputs
209 class Job:
210 latex_runs = 0
212 def __init__(self, options, path):
213 self.options = options
214 self.doctype = get_doctype(path)
215 self.filedir, self.doc = split_pathname(path)
216 self.log_filename = self.doc + ".how"
217 if os.path.exists(self.log_filename):
218 os.unlink(self.log_filename)
219 if os.path.exists(self.doc + ".l2h"):
220 self.l2h_aux_init_file = tempfile.mktemp()
221 else:
222 self.l2h_aux_init_file = self.doc + ".l2h"
223 self.write_l2h_aux_init_file()
225 def build(self):
226 self.setup_texinputs()
227 formats = self.options.formats
228 if "dvi" in formats or "ps" in formats:
229 self.build_dvi()
230 if "pdf" in formats:
231 self.build_pdf()
232 if "ps" in formats:
233 self.build_ps()
234 if "html" in formats:
235 self.require_temps()
236 self.build_html(self.options.builddir or self.doc)
237 if self.options.icon_server == ".":
238 pattern = os.path.join(TOPDIR, "html", "icons",
239 "*." + self.options.image_type)
240 imgs = glob.glob(pattern)
241 if not imgs:
242 self.warning(
243 "Could not locate support images of type %s."
244 % `self.options.image_type`)
245 for fn in imgs:
246 new_fn = os.path.join(self.doc, os.path.basename(fn))
247 shutil.copyfile(fn, new_fn)
248 if "text" in formats:
249 self.require_temps()
250 tempdir = self.doc
251 need_html = "html" not in formats
252 if self.options.max_split_depth != 1:
253 fp = open(self.l2h_aux_init_file, "a")
254 fp.write("# re-hack this file for --text:\n")
255 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
256 fp.write("1;\n")
257 fp.close()
258 tempdir = self.doc + "-temp-html"
259 need_html = 1
260 if need_html:
261 self.build_html(tempdir, max_split_depth=1)
262 self.build_text(tempdir)
263 if self.options.discard_temps:
264 self.cleanup()
266 def setup_texinputs(self):
267 texinputs = [self.filedir] + list(self.options.base_texinputs)
268 os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep)
269 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
271 def build_aux(self, binary=None):
272 if binary is None:
273 binary = LATEX_BINARY
274 new_index( "%s.ind" % self.doc, "genindex")
275 new_index("mod%s.ind" % self.doc, "modindex")
276 self.run("%s %s" % (binary, self.doc))
277 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
278 self.latex_runs = 1
280 def build_dvi(self):
281 self.use_latex(LATEX_BINARY)
283 def build_pdf(self):
284 self.use_latex(PDFLATEX_BINARY)
286 def use_latex(self, binary):
287 self.require_temps(binary=binary)
288 if self.latex_runs < 2:
289 if os.path.isfile("mod%s.idx" % self.doc):
290 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
291 use_indfix = 0
292 if os.path.isfile(self.doc + ".idx"):
293 use_indfix = 1
294 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
295 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
296 import indfix
297 indfix.process(self.doc + ".ind")
298 if self.use_bibtex:
299 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
300 self.process_synopsis_files()
302 # let the doctype-specific handler do some intermediate work:
304 self.run("%s %s" % (binary, self.doc))
305 self.latex_runs = self.latex_runs + 1
306 if os.path.isfile("mod%s.idx" % self.doc):
307 self.run("%s -s %s mod%s.idx"
308 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
309 if use_indfix:
310 self.run("%s -s %s %s.idx"
311 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
312 indfix.process(self.doc + ".ind")
313 self.process_synopsis_files()
315 # and now finish it off:
317 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
318 import toc2bkm
319 if self.doctype == "manual":
320 bigpart = "chapter"
321 else:
322 bigpart = "section"
323 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
324 if self.use_bibtex:
325 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
326 self.run("%s %s" % (binary, self.doc))
327 self.latex_runs = self.latex_runs + 1
329 def process_synopsis_files(self):
330 synopsis_files = glob.glob(self.doc + "*.syn")
331 for path in synopsis_files:
332 uniqify_module_table(path)
334 def build_ps(self):
335 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
337 def build_html(self, builddir=None, max_split_depth=None):
338 if builddir is None:
339 builddir = self.doc
340 if max_split_depth is None:
341 max_split_depth = self.options.max_split_depth
342 texfile = None
343 for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
344 fn = os.path.join(p, self.doc + ".tex")
345 if os.path.isfile(fn):
346 texfile = fn
347 break
348 if not texfile:
349 self.warning("Could not locate %s.tex; aborting." % self.doc)
350 sys.exit(1)
351 # remove leading ./ (or equiv.); might avoid problems w/ dvips
352 if texfile[:2] == os.curdir + os.sep:
353 texfile = texfile[2:]
354 # build the command line and run LaTeX2HTML:
355 if not os.path.isdir(builddir):
356 os.mkdir(builddir)
357 else:
358 for fname in glob.glob(os.path.join(builddir, "*.html")):
359 os.unlink(fname)
360 args = [LATEX2HTML_BINARY,
361 "-init_file", self.l2h_aux_init_file,
362 "-dir", builddir,
363 texfile
365 self.run(string.join(args)) # XXX need quoting!
366 # ... postprocess
367 shutil.copyfile(self.options.style_file,
368 os.path.join(builddir, self.doc + ".css"))
369 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
370 os.path.join(builddir, "index.html"))
371 if max_split_depth != 1:
372 label_file = os.path.join(builddir, "labels.pl")
373 fp = open(label_file)
374 about_node = None
375 target = " = q/about/;\n"
376 x = len(target)
377 while 1:
378 line = fp.readline()
379 if not line:
380 break
381 if line[-x:] == target:
382 line = fp.readline()
383 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
384 about_node = m.group(1)
385 shutil.copyfile(os.path.join(builddir, about_node),
386 os.path.join(builddir, "about.html"))
387 break
388 if not self.options.numeric:
389 pwd = os.getcwd()
390 try:
391 os.chdir(builddir)
392 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
393 finally:
394 os.chdir(pwd)
396 def build_text(self, tempdir=None):
397 if tempdir is None:
398 tempdir = self.doc
399 indexfile = os.path.join(tempdir, "index.html")
400 self.run("%s -nolist -dump %s >%s.txt"
401 % (LYNX_BINARY, indexfile, self.doc))
403 def require_temps(self, binary=None):
404 if not self.latex_runs:
405 self.build_aux(binary=binary)
407 def write_l2h_aux_init_file(self):
408 options = self.options
409 fp = open(self.l2h_aux_init_file, "w")
410 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
411 fp.write("package main;\n"
412 "push (@INC, '%s');\n"
413 "$mydir = '%s';\n"
414 % (d, d))
415 fp.write(open(L2H_INIT_FILE).read())
416 for filename in options.l2h_init_files:
417 fp.write("\n# initialization code incorporated from:\n# ")
418 fp.write(filename)
419 fp.write("\n")
420 fp.write(open(filename).read())
421 fp.write("\n"
422 "# auxillary init file for latex2html\n"
423 "# generated by mkhowto\n"
424 "$NO_AUTO_LINK = 1;\n"
426 l2hoption(fp, "ABOUT_FILE", options.about_file)
427 l2hoption(fp, "ICONSERVER", options.icon_server)
428 l2hoption(fp, "IMAGE_TYPE", options.image_type)
429 l2hoption(fp, "ADDRESS", options.address)
430 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
431 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
432 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
433 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
434 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
435 fp.write("1;\n")
436 fp.close()
438 def cleanup(self):
439 self.__have_temps = 0
440 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
441 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
442 "%s.bbl", "%s.blg",
443 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
445 safe_unlink(pattern % self.doc)
446 map(safe_unlink, glob.glob(self.doc + "*.syn"))
447 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
448 pattern = os.path.join(self.doc, spec)
449 map(safe_unlink, glob.glob(pattern))
450 if "dvi" not in self.options.formats:
451 safe_unlink(self.doc + ".dvi")
452 if os.path.isdir(self.doc + "-temp-html"):
453 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
454 if not self.options.logging:
455 os.unlink(self.log_filename)
456 if not self.options.debugging:
457 os.unlink(self.l2h_aux_init_file)
459 def run(self, command):
460 self.message(command)
461 rc = os.system("(%s) </dev/null >>%s 2>&1"
462 % (command, self.log_filename))
463 if rc:
464 self.warning(
465 "Session transcript and error messages are in %s."
466 % self.log_filename)
467 sys.stderr.write("The relevant lines from the transcript are:\n")
468 sys.stderr.write("-" * 72 + "\n")
469 sys.stderr.writelines(get_run_transcript(self.log_filename))
470 sys.exit(rc)
472 def message(self, msg):
473 msg = "+++ " + msg
474 if not self.options.quiet:
475 print msg
476 self.log(msg + "\n")
478 def warning(self, msg):
479 msg = "*** %s\n" % msg
480 sys.stderr.write(msg)
481 self.log(msg)
483 def log(self, msg):
484 fp = open(self.log_filename, "a")
485 fp.write(msg)
486 fp.close()
489 def get_run_transcript(filename):
490 """Return lines from the transcript file for the most recent run() call."""
491 fp = open(filename)
492 lines = fp.readlines()
493 fp.close()
494 lines.reverse()
495 L = []
496 for line in lines:
497 L.append(line)
498 if line[:4] == "+++ ":
499 break
500 L.reverse()
501 return L
504 def safe_unlink(path):
505 """Unlink a file without raising an error if it doesn't exist."""
506 try:
507 os.unlink(path)
508 except os.error:
509 pass
512 def split_pathname(path):
513 path = os.path.normpath(os.path.join(os.getcwd(), path))
514 dirname, basename = os.path.split(path)
515 if basename[-4:] == ".tex":
516 basename = basename[:-4]
517 return dirname, basename
520 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
521 def get_doctype(path):
522 fp = open(path)
523 doctype = None
524 while 1:
525 line = fp.readline()
526 if not line:
527 break
528 m = _doctype_rx.match(line)
529 if m:
530 doctype = m.group(1)
531 break
532 fp.close()
533 return doctype
536 def main():
537 options = Options()
538 try:
539 args = options.parse(sys.argv[1:])
540 except getopt.error, msg:
541 error(options, msg)
542 if not args:
543 # attempt to locate single .tex file in current directory:
544 args = glob.glob("*.tex")
545 if not args:
546 error(options, "No file to process.")
547 if len(args) > 1:
548 error(options, "Could not deduce which files should be processed.")
550 # parameters are processed, let's go!
552 for path in args:
553 Job(options, path).build()
556 def l2hoption(fp, option, value):
557 if value:
558 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
561 _to_perl = {}
562 for c in map(chr, range(1, 256)):
563 _to_perl[c] = c
564 _to_perl["@"] = "\\@"
565 _to_perl["$"] = "\\$"
566 _to_perl['"'] = '\\"'
568 def string_to_perl(s):
569 return string.join(map(_to_perl.get, s), '')
572 def check_for_bibtex(filename):
573 fp = open(filename)
574 pos = string.find(fp.read(), r"\bibdata{")
575 fp.close()
576 return pos >= 0
578 def uniqify_module_table(filename):
579 lines = open(filename).readlines()
580 if len(lines) > 1:
581 if lines[-1] == lines[-2]:
582 del lines[-1]
583 open(filename, "w").writelines(lines)
586 def new_index(filename, label="genindex"):
587 fp = open(filename, "w")
588 fp.write(r"""\
589 \begin{theindex}
590 \label{%s}
591 \end{theindex}
592 """ % label)
593 fp.close()
596 if __name__ == "__main__":
597 main()