This commit was manufactured by cvs2svn to create tag 'r234'.
[python/dscho.git] / Doc / tools / mkhowto
blobcee6ca524bb41659752373f15100013e74ea526e
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --dir Specify the directory for HTML output.
17 --link Specify the number of levels to include on each page.
18 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
19 --iconserver, -i Specify location of icons (default: ./).
20 --image-type Specify the image type to use in HTML output;
21 values: gif (default), png.
22 --numeric Don't rename the HTML files; just keep node#.html for
23 the filenames.
24 --style Specify the CSS file to use for the output (filename,
25 not a URL).
26 --up-link URL to a parent document.
27 --up-title Title of a parent document.
28 --favicon Icon to display in the browsers location bar.
30 Other options:
31 --a4 Format for A4 paper.
32 --letter Format for US letter paper (the default).
33 --help, -H Show this text.
34 --logging, -l Log stdout and stderr to a file (*.how).
35 --debugging, -D Echo commands as they are executed.
36 --keep, -k Keep temporary files around.
37 --quiet, -q Do not print command output to stdout.
38 (stderr is also lost, sorry; see *.how for errors)
39 """
41 import getopt
42 import glob
43 import os
44 import re
45 import shutil
46 import sys
49 if not hasattr(os.path, "abspath"):
50 # Python 1.5.1 or earlier
51 def abspath(path):
52 """Return an absolute path."""
53 if not os.path.isabs(path):
54 path = os.path.join(os.getcwd(), path)
55 return os.path.normpath(path)
57 os.path.abspath = abspath
60 MYDIR = os.path.abspath(sys.path[0])
61 TOPDIR = os.path.dirname(MYDIR)
63 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
64 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
65 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
67 BIBTEX_BINARY = "bibtex"
68 DVIPS_BINARY = "dvips"
69 LATEX_BINARY = "latex"
70 LATEX2HTML_BINARY = "latex2html"
71 LYNX_BINARY = "lynx"
72 MAKEINDEX_BINARY = "makeindex"
73 PDFLATEX_BINARY = "pdflatex"
74 PERL_BINARY = "perl"
75 PYTHON_BINARY = "python"
78 def usage(options):
79 print __doc__ % options
81 def error(options, message, err=2):
82 sys.stdout = sys.stderr
83 print message
84 print
85 usage(options)
86 sys.exit(2)
89 class Options:
90 program = os.path.basename(sys.argv[0])
92 address = ''
93 builddir = None
94 debugging = 0
95 discard_temps = 1
96 have_temps = 0
97 icon_server = "."
98 image_type = "gif"
99 logging = 0
100 max_link_depth = 3
101 max_split_depth = 6
102 paper = "letter"
103 quiet = 0
104 runs = 0
105 numeric = 0
106 global_module_index = None
107 style_file = os.path.join(TOPDIR, "html", "style.css")
108 about_file = os.path.join(TOPDIR, "html", "about.dat")
109 up_link = None
110 up_title = None
111 favicon = None
113 # 'dvips_safe' is a weird option. It is used mostly to make
114 # LaTeX2HTML not try to be too smart about protecting the user
115 # from a bad version of dvips -- some versions would core dump if
116 # the path to the source DVI contained a dot, and it's appearantly
117 # difficult to determine if the version available has that bug.
118 # This option gets set when PostScript output is requested
119 # (because we're going to run dvips regardless, and we'll either
120 # know it succeeds before LaTeX2HTML is run, or we'll have
121 # detected the failure and bailed), or the user asserts that it's
122 # safe from the command line.
124 # So, why does LaTeX2HTML think it appropriate to protect the user
125 # from a dvips that's only potentially going to core dump? Only
126 # because they want to avoid doing a lot of work just to have to
127 # bail later with no useful intermediates. Unfortunately, they
128 # bail *before* they know whether dvips will be needed at all.
129 # I've gone around the bush a few times with the LaTeX2HTML
130 # developers over whether this is appropriate behavior, and they
131 # don't seem interested in changing their position.
133 dvips_safe = 0
135 DEFAULT_FORMATS = ("html",)
136 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
138 def __init__(self):
139 self.formats = []
140 self.l2h_init_files = []
142 def __getitem__(self, key):
143 # This is used when formatting the usage message.
144 try:
145 return getattr(self, key)
146 except AttributeError:
147 raise KeyError, key
149 def parse(self, args):
150 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
151 ["all", "postscript", "help", "iconserver=",
152 "address=", "a4", "letter", "l2h-init=",
153 "link=", "split=", "logging", "debugging",
154 "keep", "quiet", "runs=", "image-type=",
155 "about=", "numeric", "style=", "paper=",
156 "up-link=", "up-title=", "dir=",
157 "global-module-index=", "dvips-safe",
158 "favicon="]
159 + list(self.ALL_FORMATS))
160 for opt, arg in opts:
161 if opt == "--all":
162 self.formats = list(self.ALL_FORMATS)
163 self.dvips_safe = "ps" in self.formats
164 elif opt in ("-H", "--help"):
165 usage(self)
166 sys.exit()
167 elif opt == "--iconserver":
168 self.icon_server = arg
169 elif opt in ("-a", "--address"):
170 self.address = arg
171 elif opt == "--a4":
172 self.paper = "a4"
173 elif opt == "--letter":
174 self.paper = "letter"
175 elif opt == "--link":
176 self.max_link_depth = int(arg)
177 elif opt in ("-s", "--split"):
178 self.max_split_depth = int(arg)
179 elif opt in ("-l", "--logging"):
180 self.logging = self.logging + 1
181 elif opt in ("-D", "--debugging"):
182 self.debugging = self.debugging + 1
183 elif opt in ("-k", "--keep"):
184 self.discard_temps = 0
185 elif opt in ("-q", "--quiet"):
186 self.quiet = 1
187 elif opt in ("-r", "--runs"):
188 self.runs = int(arg)
189 elif opt == "--image-type":
190 self.image_type = arg
191 elif opt == "--about":
192 # always make this absolute:
193 self.about_file = os.path.normpath(
194 os.path.abspath(arg))
195 elif opt == "--numeric":
196 self.numeric = 1
197 elif opt == "--style":
198 self.style_file = os.path.abspath(arg)
199 elif opt == "--l2h-init":
200 self.l2h_init_files.append(os.path.abspath(arg))
201 elif opt == "--favicon":
202 self.favicon = arg
203 elif opt == "--up-link":
204 self.up_link = arg
205 elif opt == "--up-title":
206 self.up_title = arg
207 elif opt == "--global-module-index":
208 self.global_module_index = arg
209 elif opt == "--dir":
210 if os.sep == "\\":
211 arg = re.sub("/", "\\\\", arg)
212 self.builddir = os.path.expanduser(arg)
213 elif opt == "--paper":
214 self.paper = arg
215 elif opt == "--dvips-safe":
216 self.dvips_safe = 1
218 # Format specifiers:
220 elif opt[2:] in self.ALL_FORMATS:
221 self.add_format(opt[2:])
222 elif opt == "--postscript":
223 # synonym for --ps
224 self.add_format("ps")
225 self.initialize()
227 # return the args to allow the caller access:
229 return args
231 def add_format(self, format):
232 """Add a format to the formats list if not present."""
233 if not format in self.formats:
234 if format == "ps":
235 # assume this is safe since we're going to run it anyway
236 self.dvips_safe = 1
237 self.formats.append(format)
239 def initialize(self):
240 """Complete initialization. This is needed if parse() isn't used."""
241 # add the default format if no formats were specified:
242 if not self.formats:
243 self.formats = self.DEFAULT_FORMATS
244 # determine the base set of texinputs directories:
245 texinputs = os.environ.get("TEXINPUTS", "").split(os.pathsep)
246 if not texinputs:
247 texinputs = ['']
248 mydirs = [os.path.join(TOPDIR, "paper-" + self.paper),
249 os.path.join(TOPDIR, "texinputs"),
251 if '' in texinputs:
252 i = texinputs.index('')
253 texinputs[i:i] = mydirs
254 else:
255 texinputs += mydirs
256 self.base_texinputs = texinputs
257 if self.builddir:
258 self.builddir = os.path.abspath(self.builddir)
261 class Job:
262 latex_runs = 0
264 def __init__(self, options, path):
265 self.options = options
266 self.doctype = get_doctype(path)
267 self.filedir, self.doc = split_pathname(path)
268 self.builddir = os.path.abspath(options.builddir or self.doc)
269 if ("html" in options.formats or "text" in options.formats):
270 if not os.path.exists(self.builddir):
271 os.mkdir(self.builddir)
272 self.log_filename = os.path.join(self.builddir, self.doc + ".how")
273 else:
274 self.log_filename = os.path.abspath(self.doc + ".how")
275 if os.path.exists(self.log_filename):
276 os.unlink(self.log_filename)
277 l2hconf = self.doc + ".l2h"
278 if os.path.exists(l2hconf):
279 if os.path.exists(l2hconf + "~"):
280 os.unlink(l2hconf + "~")
281 os.rename(l2hconf, l2hconf + "~")
282 self.l2h_aux_init_file = self.doc + ".l2h"
283 self.write_l2h_aux_init_file()
285 def build(self):
286 self.setup_texinputs()
287 formats = self.options.formats
288 if "dvi" in formats or "ps" in formats:
289 self.build_dvi()
290 if "pdf" in formats:
291 self.build_pdf()
292 if "ps" in formats:
293 self.build_ps()
294 if "html" in formats:
295 self.require_temps()
296 self.build_html(self.builddir)
297 if self.options.icon_server == ".":
298 pattern = os.path.join(TOPDIR, "html", "icons",
299 "*." + self.options.image_type)
300 imgs = glob.glob(pattern)
301 if not imgs:
302 self.warning(
303 "Could not locate support images of type %s."
304 % `self.options.image_type`)
305 for fn in imgs:
306 new_fn = os.path.join(self.builddir, os.path.basename(fn))
307 shutil.copyfile(fn, new_fn)
308 if "text" in formats:
309 self.require_temps()
310 tempdir = self.doc
311 need_html = "html" not in formats
312 if self.options.max_split_depth != 1:
313 fp = open(self.l2h_aux_init_file, "a")
314 fp.write("# re-hack this file for --text:\n")
315 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
316 fp.write("1;\n")
317 fp.close()
318 tempdir = self.doc + "-temp-html"
319 need_html = 1
320 if need_html:
321 self.build_html(tempdir, max_split_depth=1)
322 self.build_text(tempdir)
323 if self.options.discard_temps:
324 self.cleanup()
326 def setup_texinputs(self):
327 texinputs = [self.filedir] + self.options.base_texinputs
328 texinputs = os.pathsep.join(texinputs)
329 os.environ["TEXINPUTS"] = texinputs
330 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
332 def build_aux(self, binary=None):
333 if binary is None:
334 binary = LATEX_BINARY
335 new_index( "%s.ind" % self.doc, "genindex")
336 new_index("mod%s.ind" % self.doc, "modindex")
337 self.run("%s %s" % (binary, self.doc))
338 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
339 self.latex_runs = 1
341 def build_dvi(self):
342 self.use_latex(LATEX_BINARY)
344 def build_pdf(self):
345 self.use_latex(PDFLATEX_BINARY)
347 def use_latex(self, binary):
348 self.require_temps(binary=binary)
349 if self.latex_runs < 2:
350 if os.path.isfile("mod%s.idx" % self.doc):
351 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
352 use_indfix = 0
353 if os.path.isfile(self.doc + ".idx"):
354 use_indfix = 1
355 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
356 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
357 import indfix
358 indfix.process(self.doc + ".ind")
359 if self.use_bibtex:
360 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
361 self.process_synopsis_files()
362 self.run("%s %s" % (binary, self.doc))
363 self.latex_runs = self.latex_runs + 1
364 if os.path.isfile("mod%s.idx" % self.doc):
365 self.run("%s -s %s mod%s.idx"
366 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
367 if use_indfix:
368 self.run("%s -s %s %s.idx"
369 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
370 indfix.process(self.doc + ".ind")
371 self.process_synopsis_files()
373 # and now finish it off:
375 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
376 import toc2bkm
377 if self.doctype == "manual":
378 bigpart = "chapter"
379 else:
380 bigpart = "section"
381 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
382 if self.use_bibtex:
383 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
384 self.run("%s %s" % (binary, self.doc))
385 self.latex_runs = self.latex_runs + 1
387 def process_synopsis_files(self):
388 synopsis_files = glob.glob(self.doc + "*.syn")
389 for path in synopsis_files:
390 uniqify_module_table(path)
392 def build_ps(self):
393 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
395 def build_html(self, builddir, max_split_depth=None):
396 if max_split_depth is None:
397 max_split_depth = self.options.max_split_depth
398 texfile = None
399 for p in os.environ["TEXINPUTS"].split(os.pathsep):
400 fn = os.path.join(p, self.doc + ".tex")
401 if os.path.isfile(fn):
402 texfile = fn
403 break
404 if not texfile:
405 self.warning("Could not locate %s.tex; aborting." % self.doc)
406 sys.exit(1)
407 # remove leading ./ (or equiv.); might avoid problems w/ dvips
408 if texfile[:2] == os.curdir + os.sep:
409 texfile = texfile[2:]
410 # build the command line and run LaTeX2HTML:
411 if not os.path.isdir(builddir):
412 os.mkdir(builddir)
413 else:
414 for fname in glob.glob(os.path.join(builddir, "*.html")):
415 os.unlink(fname)
416 args = [LATEX2HTML_BINARY,
417 "-init_file", self.l2h_aux_init_file,
418 "-dir", builddir,
419 texfile
421 self.run(" ".join(args)) # XXX need quoting!
422 # ... postprocess
423 shutil.copyfile(self.options.style_file,
424 os.path.join(builddir, self.doc + ".css"))
425 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
426 os.path.join(builddir, "index.html"))
427 if max_split_depth != 1:
428 label_file = os.path.join(builddir, "labels.pl")
429 fp = open(label_file)
430 about_node = None
431 target = " = q/about/;\n"
432 x = len(target)
433 while 1:
434 line = fp.readline()
435 if not line:
436 break
437 if line[-x:] == target:
438 line = fp.readline()
439 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
440 about_node = m.group(1)
441 shutil.copyfile(os.path.join(builddir, about_node),
442 os.path.join(builddir, "about.html"))
443 break
444 if not self.options.numeric:
445 pwd = os.getcwd()
446 try:
447 os.chdir(builddir)
448 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
449 finally:
450 os.chdir(pwd)
451 # These files need to be cleaned up here since builddir there
452 # can be more than one, so we clean each of them.
453 if self.options.discard_temps:
454 for fn in ("images.tex", "images.log", "images.aux"):
455 safe_unlink(os.path.join(builddir, fn))
457 def build_text(self, tempdir=None):
458 if tempdir is None:
459 tempdir = self.doc
460 indexfile = os.path.join(tempdir, "index.html")
461 self.run("%s -nolist -dump %s >%s.txt"
462 % (LYNX_BINARY, indexfile, self.doc))
464 def require_temps(self, binary=None):
465 if not self.latex_runs:
466 self.build_aux(binary=binary)
468 def write_l2h_aux_init_file(self):
469 options = self.options
470 fp = open(self.l2h_aux_init_file, "w")
471 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
472 fp.write("package main;\n"
473 "push (@INC, '%s');\n"
474 "$mydir = '%s';\n"
475 % (d, d))
476 fp.write(open(L2H_INIT_FILE).read())
477 for filename in options.l2h_init_files:
478 fp.write("\n# initialization code incorporated from:\n# ")
479 fp.write(filename)
480 fp.write("\n")
481 fp.write(open(filename).read())
482 fp.write("\n"
483 "# auxillary init file for latex2html\n"
484 "# generated by mkhowto\n"
485 "$NO_AUTO_LINK = 1;\n"
487 l2hoption(fp, "ABOUT_FILE", options.about_file)
488 l2hoption(fp, "ICONSERVER", options.icon_server)
489 l2hoption(fp, "IMAGE_TYPE", options.image_type)
490 l2hoption(fp, "ADDRESS", options.address)
491 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
492 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
493 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
494 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
495 l2hoption(fp, "FAVORITES_ICON", options.favicon)
496 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
497 l2hoption(fp, "DVIPS_SAFE", options.dvips_safe)
498 fp.write("1;\n")
499 fp.close()
501 def cleanup(self):
502 self.__have_temps = 0
503 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
504 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
505 "%s.bbl", "%s.blg",
506 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
508 safe_unlink(pattern % self.doc)
509 map(safe_unlink, glob.glob(self.doc + "*.syn"))
510 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
511 pattern = os.path.join(self.doc, spec)
512 map(safe_unlink, glob.glob(pattern))
513 if "dvi" not in self.options.formats:
514 safe_unlink(self.doc + ".dvi")
515 if os.path.isdir(self.doc + "-temp-html"):
516 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
517 if not self.options.logging:
518 os.unlink(self.log_filename)
519 if not self.options.debugging:
520 os.unlink(self.l2h_aux_init_file)
522 def run(self, command):
523 self.message(command)
524 if sys.platform.startswith("win"):
525 rc = os.system(command)
526 else:
527 rc = os.system("(%s) </dev/null >>%s 2>&1"
528 % (command, self.log_filename))
529 if rc:
530 self.warning(
531 "Session transcript and error messages are in %s."
532 % self.log_filename)
533 result = 1
534 if hasattr(os, "WIFEXITED"):
535 if os.WIFEXITED(rc):
536 result = os.WEXITSTATUS(rc)
537 self.warning("Exited with status %s." % result)
538 else:
539 self.warning("Killed by signal %s." % os.WSTOPSIG(rc))
540 else:
541 self.warning("Return code: %s" % rc)
542 sys.stderr.write("The relevant lines from the transcript are:\n")
543 sys.stderr.write("-" * 72 + "\n")
544 sys.stderr.writelines(get_run_transcript(self.log_filename))
545 sys.exit(result)
547 def message(self, msg):
548 msg = "+++ " + msg
549 if not self.options.quiet:
550 print msg
551 self.log(msg + "\n")
553 def warning(self, msg):
554 msg = "*** %s\n" % msg
555 sys.stderr.write(msg)
556 self.log(msg)
558 def log(self, msg):
559 fp = open(self.log_filename, "a")
560 fp.write(msg)
561 fp.close()
564 def get_run_transcript(filename):
565 """Return lines from the transcript file for the most recent run() call."""
566 fp = open(filename)
567 lines = fp.readlines()
568 fp.close()
569 lines.reverse()
570 L = []
571 for line in lines:
572 L.append(line)
573 if line[:4] == "+++ ":
574 break
575 L.reverse()
576 return L
579 def safe_unlink(path):
580 """Unlink a file without raising an error if it doesn't exist."""
581 try:
582 os.unlink(path)
583 except os.error:
584 pass
587 def split_pathname(path):
588 path = os.path.abspath(path)
589 dirname, basename = os.path.split(path)
590 if basename[-4:] == ".tex":
591 basename = basename[:-4]
592 return dirname, basename
595 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
596 def get_doctype(path):
597 fp = open(path)
598 doctype = None
599 while 1:
600 line = fp.readline()
601 if not line:
602 break
603 m = _doctype_rx.match(line)
604 if m:
605 doctype = m.group(1)
606 break
607 fp.close()
608 return doctype
611 def main():
612 options = Options()
613 try:
614 args = options.parse(sys.argv[1:])
615 except getopt.error, msg:
616 error(options, msg)
617 if not args:
618 # attempt to locate single .tex file in current directory:
619 args = glob.glob("*.tex")
620 if not args:
621 error(options, "No file to process.")
622 if len(args) > 1:
623 error(options, "Could not deduce which files should be processed.")
625 # parameters are processed, let's go!
627 for path in args:
628 Job(options, path).build()
631 def l2hoption(fp, option, value):
632 if value:
633 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
636 _to_perl = {}
637 for c in map(chr, range(1, 256)):
638 _to_perl[c] = c
639 _to_perl["@"] = "\\@"
640 _to_perl["$"] = "\\$"
641 _to_perl['"'] = '\\"'
643 def string_to_perl(s):
644 return "".join(map(_to_perl.get, s))
647 def check_for_bibtex(filename):
648 fp = open(filename)
649 pos = fp.read().find(r"\bibdata{")
650 fp.close()
651 return pos >= 0
653 def uniqify_module_table(filename):
654 lines = open(filename).readlines()
655 if len(lines) > 1:
656 if lines[-1] == lines[-2]:
657 del lines[-1]
658 open(filename, "w").writelines(lines)
661 def new_index(filename, label="genindex"):
662 fp = open(filename, "w")
663 fp.write(r"""\
664 \begin{theindex}
665 \label{%s}
666 \end{theindex}
667 """ % label)
668 fp.close()
671 if __name__ == "__main__":
672 main()