py-cvs-rel2_1 (Rev 1.2) merge
[python/dscho.git] / Doc / tools / mkhowto
blob5b69fe3c712a2206814b1fd3c0f8b49bb2012bdb
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --link Specify the number of levels to include on each page.
17 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
18 --iconserver, -i Specify location of icons (default: ../).
19 --image-type Specify the image type to use in HTML output;
20 values: gif (default), png.
21 --numeric Don't rename the HTML files; just keep node#.html for
22 the filenames.
23 --style Specify the CSS file to use for the output (filename,
24 not a URL).
25 --up-link URL to a parent document.
26 --up-title Title of a parent document.
28 Other options:
29 --a4 Format for A4 paper.
30 --letter Format for US letter paper (the default).
31 --help, -H Show this text.
32 --logging, -l Log stdout and stderr to a file (*.how).
33 --debugging, -D Echo commands as they are executed.
34 --keep, -k Keep temporary files around.
35 --quiet, -q Do not print command output to stdout.
36 (stderr is also lost, sorry; see *.how for errors)
37 """
39 import getopt
40 import glob
41 import os
42 import re
43 import shutil
44 import string
45 import sys
46 import tempfile
49 if not hasattr(os.path, "abspath"):
50 # Python 1.5.1 or earlier
51 def abspath(path):
52 """Return an absolute path."""
53 if not os.path.isabs(path):
54 path = os.path.join(os.getcwd(), path)
55 return os.path.normpath(path)
57 os.path.abspath = abspath
60 MYDIR = os.path.abspath(sys.path[0])
61 TOPDIR = os.path.dirname(MYDIR)
63 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
64 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
65 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
67 BIBTEX_BINARY = "bibtex"
68 DVIPS_BINARY = "dvips"
69 LATEX_BINARY = "latex"
70 LATEX2HTML_BINARY = "latex2html"
71 LYNX_BINARY = "lynx"
72 MAKEINDEX_BINARY = "makeindex"
73 PDFLATEX_BINARY = "pdflatex"
74 PERL_BINARY = "perl"
75 PYTHON_BINARY = "python"
78 def usage(options):
79 print __doc__ % options
81 def error(options, message, err=2):
82 sys.stdout = sys.stderr
83 print message
84 print
85 usage(options)
86 sys.exit(2)
89 class Options:
90 program = os.path.basename(sys.argv[0])
92 address = ''
93 builddir = None
94 debugging = 0
95 discard_temps = 1
96 have_temps = 0
97 icon_server = None
98 image_type = "gif"
99 logging = 0
100 max_link_depth = 3
101 max_split_depth = 6
102 paper = "letter"
103 quiet = 0
104 runs = 0
105 numeric = 0
106 global_module_index = None
107 style_file = os.path.join(TOPDIR, "html", "style.css")
108 about_file = os.path.join(TOPDIR, "html", "about.dat")
109 up_link = None
110 up_title = None
112 DEFAULT_FORMATS = ("html",)
113 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
115 def __init__(self):
116 self.formats = []
117 self.l2h_init_files = []
119 def __getitem__(self, key):
120 # This is used when formatting the usage message.
121 try:
122 return getattr(self, key)
123 except AttributeError:
124 raise KeyError, key
126 def parse(self, args):
127 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
128 ["all", "postscript", "help", "iconserver=",
129 "address=", "a4", "letter", "l2h-init=",
130 "link=", "split=", "logging", "debugging",
131 "keep", "quiet", "runs=", "image-type=",
132 "about=", "numeric", "style=", "paper=",
133 "up-link=", "up-title=", "dir=",
134 "global-module-index="]
135 + list(self.ALL_FORMATS))
136 for opt, arg in opts:
137 if opt == "--all":
138 self.formats = list(self.ALL_FORMATS)
139 elif opt in ("-H", "--help"):
140 usage(self)
141 sys.exit()
142 elif opt == "--iconserver":
143 self.icon_server = arg
144 elif opt in ("-a", "--address"):
145 self.address = arg
146 elif opt == "--a4":
147 self.paper = "a4"
148 elif opt == "--letter":
149 self.paper = "letter"
150 elif opt == "--link":
151 self.max_link_depth = int(arg)
152 elif opt in ("-s", "--split"):
153 self.max_split_depth = int(arg)
154 elif opt in ("-l", "--logging"):
155 self.logging = self.logging + 1
156 elif opt in ("-D", "--debugging"):
157 self.debugging = self.debugging + 1
158 elif opt in ("-k", "--keep"):
159 self.discard_temps = 0
160 elif opt in ("-q", "--quiet"):
161 self.quiet = 1
162 elif opt in ("-r", "--runs"):
163 self.runs = int(arg)
164 elif opt == "--image-type":
165 self.image_type = arg
166 elif opt == "--about":
167 # always make this absolute:
168 self.about_file = os.path.normpath(
169 os.path.abspath(arg))
170 elif opt == "--numeric":
171 self.numeric = 1
172 elif opt == "--style":
173 self.style_file = os.path.abspath(arg)
174 elif opt == "--l2h-init":
175 self.l2h_init_files.append(os.path.abspath(arg))
176 elif opt == "--up-link":
177 self.up_link = arg
178 elif opt == "--up-title":
179 self.up_title = arg
180 elif opt == "--global-module-index":
181 self.global_module_index = arg
182 elif opt == "--dir":
183 self.builddir = arg
184 elif opt == "--paper":
185 self.paper = arg
187 # Format specifiers:
189 elif opt[2:] in self.ALL_FORMATS:
190 self.add_format(opt[2:])
191 elif opt == "--postscript":
192 # synonym for --ps
193 self.add_format("ps")
194 self.initialize()
196 # return the args to allow the caller access:
198 return args
200 def add_format(self, format):
201 """Add a format to the formats list if not present."""
202 if not format in self.formats:
203 self.formats.append(format)
205 def initialize(self):
206 """Complete initialization. This is needed if parse() isn't used."""
207 # add the default format if no formats were specified:
208 if not self.formats:
209 self.formats = self.DEFAULT_FORMATS
210 # determine the base set of texinputs directories:
211 texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
212 if not texinputs:
213 texinputs = ['']
214 self.base_texinputs = [
215 os.path.join(TOPDIR, "paper-" + self.paper),
216 os.path.join(TOPDIR, "texinputs"),
217 ] + texinputs
218 if self.builddir:
219 self.builddir = os.path.abspath(self.builddir)
222 class Job:
223 latex_runs = 0
225 def __init__(self, options, path):
226 self.options = options
227 self.doctype = get_doctype(path)
228 self.filedir, self.doc = split_pathname(path)
229 self.builddir = os.path.abspath(options.builddir or self.doc)
230 if not os.path.exists(self.builddir):
231 os.mkdir(self.builddir)
232 self.log_filename = os.path.join(self.builddir, self.doc + ".how")
233 if os.path.exists(self.log_filename):
234 os.unlink(self.log_filename)
235 if os.path.exists(self.doc + ".l2h"):
236 self.l2h_aux_init_file = tempfile.mktemp()
237 else:
238 self.l2h_aux_init_file = self.doc + ".l2h"
239 self.write_l2h_aux_init_file()
241 def build(self):
242 self.setup_texinputs()
243 formats = self.options.formats
244 if "dvi" in formats or "ps" in formats:
245 self.build_dvi()
246 if "pdf" in formats:
247 self.build_pdf()
248 if "ps" in formats:
249 self.build_ps()
250 if "html" in formats:
251 self.require_temps()
252 self.build_html(self.builddir)
253 if self.options.icon_server == ".":
254 pattern = os.path.join(TOPDIR, "html", "icons",
255 "*." + self.options.image_type)
256 imgs = glob.glob(pattern)
257 if not imgs:
258 self.warning(
259 "Could not locate support images of type %s."
260 % `self.options.image_type`)
261 for fn in imgs:
262 new_fn = os.path.join(self.doc, os.path.basename(fn))
263 shutil.copyfile(fn, new_fn)
264 if "text" in formats:
265 self.require_temps()
266 tempdir = self.doc
267 need_html = "html" not in formats
268 if self.options.max_split_depth != 1:
269 fp = open(self.l2h_aux_init_file, "a")
270 fp.write("# re-hack this file for --text:\n")
271 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
272 fp.write("1;\n")
273 fp.close()
274 tempdir = self.doc + "-temp-html"
275 need_html = 1
276 if need_html:
277 self.build_html(tempdir, max_split_depth=1)
278 self.build_text(tempdir)
279 if self.options.discard_temps:
280 self.cleanup()
282 def setup_texinputs(self):
283 texinputs = [self.filedir] + list(self.options.base_texinputs)
284 os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep)
285 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
287 def build_aux(self, binary=None):
288 if binary is None:
289 binary = LATEX_BINARY
290 new_index( "%s.ind" % self.doc, "genindex")
291 new_index("mod%s.ind" % self.doc, "modindex")
292 self.run("%s %s" % (binary, self.doc))
293 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
294 self.latex_runs = 1
296 def build_dvi(self):
297 self.use_latex(LATEX_BINARY)
299 def build_pdf(self):
300 self.use_latex(PDFLATEX_BINARY)
302 def use_latex(self, binary):
303 self.require_temps(binary=binary)
304 if self.latex_runs < 2:
305 if os.path.isfile("mod%s.idx" % self.doc):
306 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
307 use_indfix = 0
308 if os.path.isfile(self.doc + ".idx"):
309 use_indfix = 1
310 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
311 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
312 import indfix
313 indfix.process(self.doc + ".ind")
314 if self.use_bibtex:
315 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
316 self.process_synopsis_files()
318 # let the doctype-specific handler do some intermediate work:
320 self.run("%s %s" % (binary, self.doc))
321 self.latex_runs = self.latex_runs + 1
322 if os.path.isfile("mod%s.idx" % self.doc):
323 self.run("%s -s %s mod%s.idx"
324 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
325 if use_indfix:
326 self.run("%s -s %s %s.idx"
327 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
328 indfix.process(self.doc + ".ind")
329 self.process_synopsis_files()
331 # and now finish it off:
333 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
334 import toc2bkm
335 if self.doctype == "manual":
336 bigpart = "chapter"
337 else:
338 bigpart = "section"
339 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
340 if self.use_bibtex:
341 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
342 self.run("%s %s" % (binary, self.doc))
343 self.latex_runs = self.latex_runs + 1
345 def process_synopsis_files(self):
346 synopsis_files = glob.glob(self.doc + "*.syn")
347 for path in synopsis_files:
348 uniqify_module_table(path)
350 def build_ps(self):
351 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
353 def build_html(self, builddir=None, max_split_depth=None):
354 if builddir is None:
355 builddir = self.builddir
356 if max_split_depth is None:
357 max_split_depth = self.options.max_split_depth
358 texfile = None
359 for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
360 fn = os.path.join(p, self.doc + ".tex")
361 if os.path.isfile(fn):
362 texfile = fn
363 break
364 if not texfile:
365 self.warning("Could not locate %s.tex; aborting." % self.doc)
366 sys.exit(1)
367 # remove leading ./ (or equiv.); might avoid problems w/ dvips
368 if texfile[:2] == os.curdir + os.sep:
369 texfile = texfile[2:]
370 # build the command line and run LaTeX2HTML:
371 if not os.path.isdir(builddir):
372 os.mkdir(builddir)
373 else:
374 for fname in glob.glob(os.path.join(builddir, "*.html")):
375 os.unlink(fname)
376 args = [LATEX2HTML_BINARY,
377 "-init_file", self.l2h_aux_init_file,
378 "-dir", builddir,
379 texfile
381 self.run(string.join(args)) # XXX need quoting!
382 # ... postprocess
383 shutil.copyfile(self.options.style_file,
384 os.path.join(builddir, self.doc + ".css"))
385 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
386 os.path.join(builddir, "index.html"))
387 if max_split_depth != 1:
388 label_file = os.path.join(builddir, "labels.pl")
389 fp = open(label_file)
390 about_node = None
391 target = " = q/about/;\n"
392 x = len(target)
393 while 1:
394 line = fp.readline()
395 if not line:
396 break
397 if line[-x:] == target:
398 line = fp.readline()
399 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
400 about_node = m.group(1)
401 shutil.copyfile(os.path.join(builddir, about_node),
402 os.path.join(builddir, "about.html"))
403 break
404 if not self.options.numeric:
405 pwd = os.getcwd()
406 try:
407 os.chdir(builddir)
408 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
409 finally:
410 os.chdir(pwd)
412 def build_text(self, tempdir=None):
413 if tempdir is None:
414 tempdir = self.doc
415 indexfile = os.path.join(tempdir, "index.html")
416 self.run("%s -nolist -dump %s >%s.txt"
417 % (LYNX_BINARY, indexfile, self.doc))
419 def require_temps(self, binary=None):
420 if not self.latex_runs:
421 self.build_aux(binary=binary)
423 def write_l2h_aux_init_file(self):
424 options = self.options
425 fp = open(self.l2h_aux_init_file, "w")
426 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
427 fp.write("package main;\n"
428 "push (@INC, '%s');\n"
429 "$mydir = '%s';\n"
430 % (d, d))
431 fp.write(open(L2H_INIT_FILE).read())
432 for filename in options.l2h_init_files:
433 fp.write("\n# initialization code incorporated from:\n# ")
434 fp.write(filename)
435 fp.write("\n")
436 fp.write(open(filename).read())
437 fp.write("\n"
438 "# auxillary init file for latex2html\n"
439 "# generated by mkhowto\n"
440 "$NO_AUTO_LINK = 1;\n"
442 l2hoption(fp, "ABOUT_FILE", options.about_file)
443 l2hoption(fp, "ICONSERVER", options.icon_server)
444 l2hoption(fp, "IMAGE_TYPE", options.image_type)
445 l2hoption(fp, "ADDRESS", options.address)
446 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
447 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
448 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
449 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
450 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
451 fp.write("1;\n")
452 fp.close()
454 def cleanup(self):
455 self.__have_temps = 0
456 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
457 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
458 "%s.bbl", "%s.blg",
459 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
461 safe_unlink(pattern % self.doc)
462 map(safe_unlink, glob.glob(self.doc + "*.syn"))
463 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
464 pattern = os.path.join(self.doc, spec)
465 map(safe_unlink, glob.glob(pattern))
466 if "dvi" not in self.options.formats:
467 safe_unlink(self.doc + ".dvi")
468 if os.path.isdir(self.doc + "-temp-html"):
469 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
470 if not self.options.logging:
471 os.unlink(self.log_filename)
472 if not self.options.debugging:
473 os.unlink(self.l2h_aux_init_file)
475 def run(self, command):
476 self.message(command)
477 rc = os.system("(%s) </dev/null >>%s 2>&1"
478 % (command, self.log_filename))
479 if rc:
480 self.warning(
481 "Session transcript and error messages are in %s."
482 % self.log_filename)
483 sys.stderr.write("The relevant lines from the transcript are:\n")
484 sys.stderr.write("-" * 72 + "\n")
485 sys.stderr.writelines(get_run_transcript(self.log_filename))
486 sys.exit(rc)
488 def message(self, msg):
489 msg = "+++ " + msg
490 if not self.options.quiet:
491 print msg
492 self.log(msg + "\n")
494 def warning(self, msg):
495 msg = "*** %s\n" % msg
496 sys.stderr.write(msg)
497 self.log(msg)
499 def log(self, msg):
500 fp = open(self.log_filename, "a")
501 fp.write(msg)
502 fp.close()
505 def get_run_transcript(filename):
506 """Return lines from the transcript file for the most recent run() call."""
507 fp = open(filename)
508 lines = fp.readlines()
509 fp.close()
510 lines.reverse()
511 L = []
512 for line in lines:
513 L.append(line)
514 if line[:4] == "+++ ":
515 break
516 L.reverse()
517 return L
520 def safe_unlink(path):
521 """Unlink a file without raising an error if it doesn't exist."""
522 try:
523 os.unlink(path)
524 except os.error:
525 pass
528 def split_pathname(path):
529 path = os.path.abspath(path)
530 dirname, basename = os.path.split(path)
531 if basename[-4:] == ".tex":
532 basename = basename[:-4]
533 return dirname, basename
536 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
537 def get_doctype(path):
538 fp = open(path)
539 doctype = None
540 while 1:
541 line = fp.readline()
542 if not line:
543 break
544 m = _doctype_rx.match(line)
545 if m:
546 doctype = m.group(1)
547 break
548 fp.close()
549 return doctype
552 def main():
553 options = Options()
554 try:
555 args = options.parse(sys.argv[1:])
556 except getopt.error, msg:
557 error(options, msg)
558 if not args:
559 # attempt to locate single .tex file in current directory:
560 args = glob.glob("*.tex")
561 if not args:
562 error(options, "No file to process.")
563 if len(args) > 1:
564 error(options, "Could not deduce which files should be processed.")
566 # parameters are processed, let's go!
568 for path in args:
569 Job(options, path).build()
572 def l2hoption(fp, option, value):
573 if value:
574 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
577 _to_perl = {}
578 for c in map(chr, range(1, 256)):
579 _to_perl[c] = c
580 _to_perl["@"] = "\\@"
581 _to_perl["$"] = "\\$"
582 _to_perl['"'] = '\\"'
584 def string_to_perl(s):
585 return string.join(map(_to_perl.get, s), '')
588 def check_for_bibtex(filename):
589 fp = open(filename)
590 pos = string.find(fp.read(), r"\bibdata{")
591 fp.close()
592 return pos >= 0
594 def uniqify_module_table(filename):
595 lines = open(filename).readlines()
596 if len(lines) > 1:
597 if lines[-1] == lines[-2]:
598 del lines[-1]
599 open(filename, "w").writelines(lines)
602 def new_index(filename, label="genindex"):
603 fp = open(filename, "w")
604 fp.write(r"""\
605 \begin{theindex}
606 \label{%s}
607 \end{theindex}
608 """ % label)
609 fp.close()
612 if __name__ == "__main__":
613 main()