_make_boundary(): Fix for SF bug #745478, broken boundary calculation
[python/dscho.git] / Doc / tools / mkhowto
blobeed1a58754e04b724ff5a588590c5633dcbf925d
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --dir Specify the directory for HTML output.
17 --link Specify the number of levels to include on each page.
18 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
19 --iconserver, -i Specify location of icons (default: ./).
20 --image-type Specify the image type to use in HTML output;
21 values: gif (default), png.
22 --numeric Don't rename the HTML files; just keep node#.html for
23 the filenames.
24 --style Specify the CSS file to use for the output (filename,
25 not a URL).
26 --up-link URL to a parent document.
27 --up-title Title of a parent document.
28 --favicon Icon to display in the browsers location bar.
30 Other options:
31 --a4 Format for A4 paper.
32 --letter Format for US letter paper (the default).
33 --help, -H Show this text.
34 --logging, -l Log stdout and stderr to a file (*.how).
35 --debugging, -D Echo commands as they are executed.
36 --keep, -k Keep temporary files around.
37 --quiet, -q Do not print command output to stdout.
38 (stderr is also lost, sorry; see *.how for errors)
39 """
41 import getopt
42 import glob
43 import os
44 import re
45 import shutil
46 import string
47 import sys
50 if not hasattr(os.path, "abspath"):
51 # Python 1.5.1 or earlier
52 def abspath(path):
53 """Return an absolute path."""
54 if not os.path.isabs(path):
55 path = os.path.join(os.getcwd(), path)
56 return os.path.normpath(path)
58 os.path.abspath = abspath
61 MYDIR = os.path.abspath(sys.path[0])
62 TOPDIR = os.path.dirname(MYDIR)
64 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
65 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
66 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
68 BIBTEX_BINARY = "bibtex"
69 DVIPS_BINARY = "dvips"
70 LATEX_BINARY = "latex"
71 LATEX2HTML_BINARY = "latex2html"
72 LYNX_BINARY = "lynx"
73 MAKEINDEX_BINARY = "makeindex"
74 PDFLATEX_BINARY = "pdflatex"
75 PERL_BINARY = "perl"
76 PYTHON_BINARY = "python"
79 def usage(options):
80 print __doc__ % options
82 def error(options, message, err=2):
83 sys.stdout = sys.stderr
84 print message
85 print
86 usage(options)
87 sys.exit(2)
90 class Options:
91 program = os.path.basename(sys.argv[0])
93 address = ''
94 builddir = None
95 debugging = 0
96 discard_temps = 1
97 have_temps = 0
98 icon_server = "."
99 image_type = "gif"
100 logging = 0
101 max_link_depth = 3
102 max_split_depth = 6
103 paper = "letter"
104 quiet = 0
105 runs = 0
106 numeric = 0
107 global_module_index = None
108 style_file = os.path.join(TOPDIR, "html", "style.css")
109 about_file = os.path.join(TOPDIR, "html", "about.dat")
110 up_link = None
111 up_title = None
112 favicon = None
114 # 'dvips_safe' is a weird option. It is used mostly to make
115 # LaTeX2HTML not try to be too smart about protecting the user
116 # from a bad version of dvips -- some versions would core dump if
117 # the path to the source DVI contained a dot, and it's appearantly
118 # difficult to determine if the version available has that bug.
119 # This option gets set when PostScript output is requested
120 # (because we're going to run dvips regardless, and we'll either
121 # know it succeeds before LaTeX2HTML is run, or we'll have
122 # detected the failure and bailed), or the user asserts that it's
123 # safe from the command line.
125 # So, why does LaTeX2HTML think it appropriate to protect the user
126 # from a dvips that's only potentially going to core dump? Only
127 # because they want to avoid doing a lot of work just to have to
128 # bail later with no useful intermediates. Unfortunately, they
129 # bail *before* they know whether dvips will be needed at all.
130 # I've gone around the bush a few times with the LaTeX2HTML
131 # developers over whether this is appropriate behavior, and they
132 # don't seem interested in changing their position.
134 dvips_safe = 0
136 DEFAULT_FORMATS = ("html",)
137 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
139 def __init__(self):
140 self.formats = []
141 self.l2h_init_files = []
143 def __getitem__(self, key):
144 # This is used when formatting the usage message.
145 try:
146 return getattr(self, key)
147 except AttributeError:
148 raise KeyError, key
150 def parse(self, args):
151 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
152 ["all", "postscript", "help", "iconserver=",
153 "address=", "a4", "letter", "l2h-init=",
154 "link=", "split=", "logging", "debugging",
155 "keep", "quiet", "runs=", "image-type=",
156 "about=", "numeric", "style=", "paper=",
157 "up-link=", "up-title=", "dir=",
158 "global-module-index=", "dvips-safe",
159 "favicon="]
160 + list(self.ALL_FORMATS))
161 for opt, arg in opts:
162 if opt == "--all":
163 self.formats = list(self.ALL_FORMATS)
164 self.dvips_safe = "ps" in self.formats
165 elif opt in ("-H", "--help"):
166 usage(self)
167 sys.exit()
168 elif opt == "--iconserver":
169 self.icon_server = arg
170 elif opt in ("-a", "--address"):
171 self.address = arg
172 elif opt == "--a4":
173 self.paper = "a4"
174 elif opt == "--letter":
175 self.paper = "letter"
176 elif opt == "--link":
177 self.max_link_depth = int(arg)
178 elif opt in ("-s", "--split"):
179 self.max_split_depth = int(arg)
180 elif opt in ("-l", "--logging"):
181 self.logging = self.logging + 1
182 elif opt in ("-D", "--debugging"):
183 self.debugging = self.debugging + 1
184 elif opt in ("-k", "--keep"):
185 self.discard_temps = 0
186 elif opt in ("-q", "--quiet"):
187 self.quiet = 1
188 elif opt in ("-r", "--runs"):
189 self.runs = int(arg)
190 elif opt == "--image-type":
191 self.image_type = arg
192 elif opt == "--about":
193 # always make this absolute:
194 self.about_file = os.path.normpath(
195 os.path.abspath(arg))
196 elif opt == "--numeric":
197 self.numeric = 1
198 elif opt == "--style":
199 self.style_file = os.path.abspath(arg)
200 elif opt == "--l2h-init":
201 self.l2h_init_files.append(os.path.abspath(arg))
202 elif opt == "--favicon":
203 self.favicon = arg
204 elif opt == "--up-link":
205 self.up_link = arg
206 elif opt == "--up-title":
207 self.up_title = arg
208 elif opt == "--global-module-index":
209 self.global_module_index = arg
210 elif opt == "--dir":
211 if os.sep == "\\":
212 arg = re.sub("/", "\\", arg)
213 self.builddir = os.path.expanduser(arg)
214 elif opt == "--paper":
215 self.paper = arg
216 elif opt == "--dvips-safe":
217 self.dvips_safe = 1
219 # Format specifiers:
221 elif opt[2:] in self.ALL_FORMATS:
222 self.add_format(opt[2:])
223 elif opt == "--postscript":
224 # synonym for --ps
225 self.add_format("ps")
226 self.initialize()
228 # return the args to allow the caller access:
230 return args
232 def add_format(self, format):
233 """Add a format to the formats list if not present."""
234 if not format in self.formats:
235 if format == "ps":
236 # assume this is safe since we're going to run it anyway
237 self.dvips_safe = 1
238 self.formats.append(format)
240 def initialize(self):
241 """Complete initialization. This is needed if parse() isn't used."""
242 # add the default format if no formats were specified:
243 if not self.formats:
244 self.formats = self.DEFAULT_FORMATS
245 # determine the base set of texinputs directories:
246 texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
247 if not texinputs:
248 texinputs = ['']
249 self.base_texinputs = [
250 os.path.join(TOPDIR, "paper-" + self.paper),
251 os.path.join(TOPDIR, "texinputs"),
252 ] + texinputs
253 if self.builddir:
254 self.builddir = os.path.abspath(self.builddir)
257 class Job:
258 latex_runs = 0
260 def __init__(self, options, path):
261 self.options = options
262 self.doctype = get_doctype(path)
263 self.filedir, self.doc = split_pathname(path)
264 self.builddir = os.path.abspath(options.builddir or self.doc)
265 if ("html" in options.formats or "text" in options.formats):
266 if not os.path.exists(self.builddir):
267 os.mkdir(self.builddir)
268 self.log_filename = os.path.join(self.builddir, self.doc + ".how")
269 else:
270 self.log_filename = os.path.abspath(self.doc + ".how")
271 if os.path.exists(self.log_filename):
272 os.unlink(self.log_filename)
273 l2hconf = self.doc + ".l2h"
274 if os.path.exists(l2hconf):
275 if os.path.exists(l2hconf + "~"):
276 os.unlink(l2hconf + "~")
277 os.rename(l2hconf, l2hconf + "~")
278 self.l2h_aux_init_file = self.doc + ".l2h"
279 self.write_l2h_aux_init_file()
281 def build(self):
282 self.setup_texinputs()
283 formats = self.options.formats
284 if "dvi" in formats or "ps" in formats:
285 self.build_dvi()
286 if "pdf" in formats:
287 self.build_pdf()
288 if "ps" in formats:
289 self.build_ps()
290 if "html" in formats:
291 self.require_temps()
292 self.build_html(self.builddir)
293 if self.options.icon_server == ".":
294 pattern = os.path.join(TOPDIR, "html", "icons",
295 "*." + self.options.image_type)
296 imgs = glob.glob(pattern)
297 if not imgs:
298 self.warning(
299 "Could not locate support images of type %s."
300 % `self.options.image_type`)
301 for fn in imgs:
302 new_fn = os.path.join(self.builddir, os.path.basename(fn))
303 shutil.copyfile(fn, new_fn)
304 if "text" in formats:
305 self.require_temps()
306 tempdir = self.doc
307 need_html = "html" not in formats
308 if self.options.max_split_depth != 1:
309 fp = open(self.l2h_aux_init_file, "a")
310 fp.write("# re-hack this file for --text:\n")
311 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
312 fp.write("1;\n")
313 fp.close()
314 tempdir = self.doc + "-temp-html"
315 need_html = 1
316 if need_html:
317 self.build_html(tempdir, max_split_depth=1)
318 self.build_text(tempdir)
319 if self.options.discard_temps:
320 self.cleanup()
322 def setup_texinputs(self):
323 texinputs = [self.filedir] + list(self.options.base_texinputs)
324 os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep)
325 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
327 def build_aux(self, binary=None):
328 if binary is None:
329 binary = LATEX_BINARY
330 new_index( "%s.ind" % self.doc, "genindex")
331 new_index("mod%s.ind" % self.doc, "modindex")
332 self.run("%s %s" % (binary, self.doc))
333 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
334 self.latex_runs = 1
336 def build_dvi(self):
337 self.use_latex(LATEX_BINARY)
339 def build_pdf(self):
340 self.use_latex(PDFLATEX_BINARY)
342 def use_latex(self, binary):
343 self.require_temps(binary=binary)
344 if self.latex_runs < 2:
345 if os.path.isfile("mod%s.idx" % self.doc):
346 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
347 use_indfix = 0
348 if os.path.isfile(self.doc + ".idx"):
349 use_indfix = 1
350 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
351 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
352 import indfix
353 indfix.process(self.doc + ".ind")
354 if self.use_bibtex:
355 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
356 self.process_synopsis_files()
357 self.run("%s %s" % (binary, self.doc))
358 self.latex_runs = self.latex_runs + 1
359 if os.path.isfile("mod%s.idx" % self.doc):
360 self.run("%s -s %s mod%s.idx"
361 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
362 if use_indfix:
363 self.run("%s -s %s %s.idx"
364 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
365 indfix.process(self.doc + ".ind")
366 self.process_synopsis_files()
368 # and now finish it off:
370 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
371 import toc2bkm
372 if self.doctype == "manual":
373 bigpart = "chapter"
374 else:
375 bigpart = "section"
376 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
377 if self.use_bibtex:
378 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
379 self.run("%s %s" % (binary, self.doc))
380 self.latex_runs = self.latex_runs + 1
382 def process_synopsis_files(self):
383 synopsis_files = glob.glob(self.doc + "*.syn")
384 for path in synopsis_files:
385 uniqify_module_table(path)
387 def build_ps(self):
388 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
390 def build_html(self, builddir, max_split_depth=None):
391 if max_split_depth is None:
392 max_split_depth = self.options.max_split_depth
393 texfile = None
394 for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
395 fn = os.path.join(p, self.doc + ".tex")
396 if os.path.isfile(fn):
397 texfile = fn
398 break
399 if not texfile:
400 self.warning("Could not locate %s.tex; aborting." % self.doc)
401 sys.exit(1)
402 # remove leading ./ (or equiv.); might avoid problems w/ dvips
403 if texfile[:2] == os.curdir + os.sep:
404 texfile = texfile[2:]
405 # build the command line and run LaTeX2HTML:
406 if not os.path.isdir(builddir):
407 os.mkdir(builddir)
408 else:
409 for fname in glob.glob(os.path.join(builddir, "*.html")):
410 os.unlink(fname)
411 args = [LATEX2HTML_BINARY,
412 "-init_file", self.l2h_aux_init_file,
413 "-dir", builddir,
414 texfile
416 self.run(string.join(args)) # XXX need quoting!
417 # ... postprocess
418 shutil.copyfile(self.options.style_file,
419 os.path.join(builddir, self.doc + ".css"))
420 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
421 os.path.join(builddir, "index.html"))
422 if max_split_depth != 1:
423 label_file = os.path.join(builddir, "labels.pl")
424 fp = open(label_file)
425 about_node = None
426 target = " = q/about/;\n"
427 x = len(target)
428 while 1:
429 line = fp.readline()
430 if not line:
431 break
432 if line[-x:] == target:
433 line = fp.readline()
434 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
435 about_node = m.group(1)
436 shutil.copyfile(os.path.join(builddir, about_node),
437 os.path.join(builddir, "about.html"))
438 break
439 if not self.options.numeric:
440 pwd = os.getcwd()
441 try:
442 os.chdir(builddir)
443 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
444 finally:
445 os.chdir(pwd)
446 # These files need to be cleaned up here since builddir there
447 # can be more than one, so we clean each of them.
448 if self.options.discard_temps:
449 for fn in ("images.tex", "images.log", "images.aux"):
450 safe_unlink(os.path.join(builddir, fn))
452 def build_text(self, tempdir=None):
453 if tempdir is None:
454 tempdir = self.doc
455 indexfile = os.path.join(tempdir, "index.html")
456 self.run("%s -nolist -dump %s >%s.txt"
457 % (LYNX_BINARY, indexfile, self.doc))
459 def require_temps(self, binary=None):
460 if not self.latex_runs:
461 self.build_aux(binary=binary)
463 def write_l2h_aux_init_file(self):
464 options = self.options
465 fp = open(self.l2h_aux_init_file, "w")
466 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
467 fp.write("package main;\n"
468 "push (@INC, '%s');\n"
469 "$mydir = '%s';\n"
470 % (d, d))
471 fp.write(open(L2H_INIT_FILE).read())
472 for filename in options.l2h_init_files:
473 fp.write("\n# initialization code incorporated from:\n# ")
474 fp.write(filename)
475 fp.write("\n")
476 fp.write(open(filename).read())
477 fp.write("\n"
478 "# auxillary init file for latex2html\n"
479 "# generated by mkhowto\n"
480 "$NO_AUTO_LINK = 1;\n"
482 l2hoption(fp, "ABOUT_FILE", options.about_file)
483 l2hoption(fp, "ICONSERVER", options.icon_server)
484 l2hoption(fp, "IMAGE_TYPE", options.image_type)
485 l2hoption(fp, "ADDRESS", options.address)
486 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
487 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
488 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
489 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
490 l2hoption(fp, "FAVORITES_ICON", options.favicon)
491 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
492 l2hoption(fp, "DVIPS_SAFE", options.dvips_safe)
493 fp.write("1;\n")
494 fp.close()
496 def cleanup(self):
497 self.__have_temps = 0
498 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
499 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
500 "%s.bbl", "%s.blg",
501 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
503 safe_unlink(pattern % self.doc)
504 map(safe_unlink, glob.glob(self.doc + "*.syn"))
505 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
506 pattern = os.path.join(self.doc, spec)
507 map(safe_unlink, glob.glob(pattern))
508 if "dvi" not in self.options.formats:
509 safe_unlink(self.doc + ".dvi")
510 if os.path.isdir(self.doc + "-temp-html"):
511 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
512 if not self.options.logging:
513 os.unlink(self.log_filename)
514 if not self.options.debugging:
515 os.unlink(self.l2h_aux_init_file)
517 def run(self, command):
518 self.message(command)
519 if sys.platform.startswith("win"):
520 rc = os.system(command)
521 else:
522 rc = os.system("(%s) </dev/null >>%s 2>&1"
523 % (command, self.log_filename))
524 if rc:
525 self.warning(
526 "Session transcript and error messages are in %s."
527 % self.log_filename)
528 result = 1
529 if hasattr(os, "WIFEXITED"):
530 if os.WIFEXITED(rc):
531 result = os.WEXITSTATUS(rc)
532 self.warning("Exited with status %s." % result)
533 else:
534 self.warning("Killed by signal %s." % os.WSTOPSIG(rc))
535 else:
536 self.warning("Return code: %s" % rc)
537 sys.stderr.write("The relevant lines from the transcript are:\n")
538 sys.stderr.write("-" * 72 + "\n")
539 sys.stderr.writelines(get_run_transcript(self.log_filename))
540 sys.exit(result)
542 def message(self, msg):
543 msg = "+++ " + msg
544 if not self.options.quiet:
545 print msg
546 self.log(msg + "\n")
548 def warning(self, msg):
549 msg = "*** %s\n" % msg
550 sys.stderr.write(msg)
551 self.log(msg)
553 def log(self, msg):
554 fp = open(self.log_filename, "a")
555 fp.write(msg)
556 fp.close()
559 def get_run_transcript(filename):
560 """Return lines from the transcript file for the most recent run() call."""
561 fp = open(filename)
562 lines = fp.readlines()
563 fp.close()
564 lines.reverse()
565 L = []
566 for line in lines:
567 L.append(line)
568 if line[:4] == "+++ ":
569 break
570 L.reverse()
571 return L
574 def safe_unlink(path):
575 """Unlink a file without raising an error if it doesn't exist."""
576 try:
577 os.unlink(path)
578 except os.error:
579 pass
582 def split_pathname(path):
583 path = os.path.abspath(path)
584 dirname, basename = os.path.split(path)
585 if basename[-4:] == ".tex":
586 basename = basename[:-4]
587 return dirname, basename
590 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
591 def get_doctype(path):
592 fp = open(path)
593 doctype = None
594 while 1:
595 line = fp.readline()
596 if not line:
597 break
598 m = _doctype_rx.match(line)
599 if m:
600 doctype = m.group(1)
601 break
602 fp.close()
603 return doctype
606 def main():
607 options = Options()
608 try:
609 args = options.parse(sys.argv[1:])
610 except getopt.error, msg:
611 error(options, msg)
612 if not args:
613 # attempt to locate single .tex file in current directory:
614 args = glob.glob("*.tex")
615 if not args:
616 error(options, "No file to process.")
617 if len(args) > 1:
618 error(options, "Could not deduce which files should be processed.")
620 # parameters are processed, let's go!
622 for path in args:
623 Job(options, path).build()
626 def l2hoption(fp, option, value):
627 if value:
628 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
631 _to_perl = {}
632 for c in map(chr, range(1, 256)):
633 _to_perl[c] = c
634 _to_perl["@"] = "\\@"
635 _to_perl["$"] = "\\$"
636 _to_perl['"'] = '\\"'
638 def string_to_perl(s):
639 return string.join(map(_to_perl.get, s), '')
642 def check_for_bibtex(filename):
643 fp = open(filename)
644 pos = string.find(fp.read(), r"\bibdata{")
645 fp.close()
646 return pos >= 0
648 def uniqify_module_table(filename):
649 lines = open(filename).readlines()
650 if len(lines) > 1:
651 if lines[-1] == lines[-2]:
652 del lines[-1]
653 open(filename, "w").writelines(lines)
656 def new_index(filename, label="genindex"):
657 fp = open(filename, "w")
658 fp.write(r"""\
659 \begin{theindex}
660 \label{%s}
661 \end{theindex}
662 """ % label)
663 fp.close()
666 if __name__ == "__main__":
667 main()