- Got rid of newmodule.c
[python/dscho.git] / Doc / tools / mkhowto
blobaf46fffbbcd50f4e4b67fca268b186d83ab504a1
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --link Specify the number of levels to include on each page.
17 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
18 --iconserver, -i Specify location of icons (default: ../).
19 --image-type Specify the image type to use in HTML output;
20 values: gif (default), png.
21 --numeric Don't rename the HTML files; just keep node#.html for
22 the filenames.
23 --style Specify the CSS file to use for the output (filename,
24 not a URL).
25 --up-link URL to a parent document.
26 --up-title Title of a parent document.
28 Other options:
29 --a4 Format for A4 paper.
30 --letter Format for US letter paper (the default).
31 --help, -H Show this text.
32 --logging, -l Log stdout and stderr to a file (*.how).
33 --debugging, -D Echo commands as they are executed.
34 --keep, -k Keep temporary files around.
35 --quiet, -q Do not print command output to stdout.
36 (stderr is also lost, sorry; see *.how for errors)
37 """
39 import getopt
40 import glob
41 import os
42 import re
43 import shutil
44 import string
45 import sys
46 import tempfile
49 if not hasattr(os.path, "abspath"):
50 # Python 1.5.1 or earlier
51 def abspath(path):
52 """Return an absolute path."""
53 if not os.path.isabs(path):
54 path = os.path.join(os.getcwd(), path)
55 return os.path.normpath(path)
57 os.path.abspath = abspath
60 MYDIR = os.path.abspath(sys.path[0])
61 TOPDIR = os.path.dirname(MYDIR)
63 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
64 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
65 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
67 BIBTEX_BINARY = "bibtex"
68 DVIPS_BINARY = "dvips"
69 LATEX_BINARY = "latex"
70 LATEX2HTML_BINARY = "latex2html"
71 LYNX_BINARY = "lynx"
72 MAKEINDEX_BINARY = "makeindex"
73 PDFLATEX_BINARY = "pdflatex"
74 PERL_BINARY = "perl"
75 PYTHON_BINARY = "python"
78 def usage(options):
79 print __doc__ % options
81 def error(options, message, err=2):
82 sys.stdout = sys.stderr
83 print message
84 print
85 usage(options)
86 sys.exit(2)
89 class Options:
90 program = os.path.basename(sys.argv[0])
92 address = ''
93 builddir = None
94 debugging = 0
95 discard_temps = 1
96 have_temps = 0
97 icon_server = None
98 image_type = "gif"
99 logging = 0
100 max_link_depth = 3
101 max_split_depth = 6
102 paper = "letter"
103 quiet = 0
104 runs = 0
105 numeric = 0
106 global_module_index = None
107 style_file = os.path.join(TOPDIR, "html", "style.css")
108 about_file = os.path.join(TOPDIR, "html", "about.dat")
109 up_link = None
110 up_title = None
112 # 'dvips_safe' is a weird option. It is used mostly to make
113 # LaTeX2HTML not try to be too smart about protecting the user
114 # from a bad version of dvips -- some versions would core dump if
115 # the path to the source DVI contained a dot, and it's appearantly
116 # difficult to determine if the version available has that bug.
117 # This option gets set when PostScript output is requested
118 # (because we're going to run dvips regardless, and we'll either
119 # know it succeeds before LaTeX2HTML is run, or we'll have
120 # detected the failure and bailed), or the user asserts that it's
121 # safe from the command line.
123 # So, why does LaTeX2HTML think it appropriate to protect the user
124 # from a dvips that's only potentially going to core dump? Only
125 # because they want to avoid doing a lot of work just to have to
126 # bail later with no useful intermediates. Unfortunately, they
127 # bail *before* they know whether dvips will be needed at all.
128 # I've gone around the bush a few times with the LaTeX2HTML
129 # developers over whether this is appropriate behavior, and they
130 # don't seem interested in changing their position.
132 dvips_safe = 0
134 DEFAULT_FORMATS = ("html",)
135 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
137 def __init__(self):
138 self.formats = []
139 self.l2h_init_files = []
141 def __getitem__(self, key):
142 # This is used when formatting the usage message.
143 try:
144 return getattr(self, key)
145 except AttributeError:
146 raise KeyError, key
148 def parse(self, args):
149 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
150 ["all", "postscript", "help", "iconserver=",
151 "address=", "a4", "letter", "l2h-init=",
152 "link=", "split=", "logging", "debugging",
153 "keep", "quiet", "runs=", "image-type=",
154 "about=", "numeric", "style=", "paper=",
155 "up-link=", "up-title=", "dir=",
156 "global-module-index=", "dvips-safe"]
157 + list(self.ALL_FORMATS))
158 for opt, arg in opts:
159 if opt == "--all":
160 self.formats = list(self.ALL_FORMATS)
161 self.dvips_safe = "ps" in self.formats
162 elif opt in ("-H", "--help"):
163 usage(self)
164 sys.exit()
165 elif opt == "--iconserver":
166 self.icon_server = arg
167 elif opt in ("-a", "--address"):
168 self.address = arg
169 elif opt == "--a4":
170 self.paper = "a4"
171 elif opt == "--letter":
172 self.paper = "letter"
173 elif opt == "--link":
174 self.max_link_depth = int(arg)
175 elif opt in ("-s", "--split"):
176 self.max_split_depth = int(arg)
177 elif opt in ("-l", "--logging"):
178 self.logging = self.logging + 1
179 elif opt in ("-D", "--debugging"):
180 self.debugging = self.debugging + 1
181 elif opt in ("-k", "--keep"):
182 self.discard_temps = 0
183 elif opt in ("-q", "--quiet"):
184 self.quiet = 1
185 elif opt in ("-r", "--runs"):
186 self.runs = int(arg)
187 elif opt == "--image-type":
188 self.image_type = arg
189 elif opt == "--about":
190 # always make this absolute:
191 self.about_file = os.path.normpath(
192 os.path.abspath(arg))
193 elif opt == "--numeric":
194 self.numeric = 1
195 elif opt == "--style":
196 self.style_file = os.path.abspath(arg)
197 elif opt == "--l2h-init":
198 self.l2h_init_files.append(os.path.abspath(arg))
199 elif opt == "--up-link":
200 self.up_link = arg
201 elif opt == "--up-title":
202 self.up_title = arg
203 elif opt == "--global-module-index":
204 self.global_module_index = arg
205 elif opt == "--dir":
206 if os.sep == "\\":
207 arg = re.sub("/", "\\", arg)
208 self.builddir = arg
209 elif opt == "--paper":
210 self.paper = arg
211 elif opt == "--dvips-safe":
212 self.dvips_safe = 1
214 # Format specifiers:
216 elif opt[2:] in self.ALL_FORMATS:
217 self.add_format(opt[2:])
218 elif opt == "--postscript":
219 # synonym for --ps
220 self.add_format("ps")
221 self.initialize()
223 # return the args to allow the caller access:
225 return args
227 def add_format(self, format):
228 """Add a format to the formats list if not present."""
229 if not format in self.formats:
230 if format == "ps":
231 # assume this is safe since we're going to run it anyway
232 self.dvips_safe = 1
233 self.formats.append(format)
235 def initialize(self):
236 """Complete initialization. This is needed if parse() isn't used."""
237 # add the default format if no formats were specified:
238 if not self.formats:
239 self.formats = self.DEFAULT_FORMATS
240 # determine the base set of texinputs directories:
241 texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
242 if not texinputs:
243 texinputs = ['']
244 self.base_texinputs = [
245 os.path.join(TOPDIR, "paper-" + self.paper),
246 os.path.join(TOPDIR, "texinputs"),
247 ] + texinputs
248 if self.builddir:
249 self.builddir = os.path.abspath(self.builddir)
252 class Job:
253 latex_runs = 0
255 def __init__(self, options, path):
256 self.options = options
257 self.doctype = get_doctype(path)
258 self.filedir, self.doc = split_pathname(path)
259 self.builddir = os.path.abspath(options.builddir or self.doc)
260 if ("html" in options.formats or "text" in options.formats):
261 if not os.path.exists(self.builddir):
262 os.mkdir(self.builddir)
263 self.log_filename = os.path.join(self.builddir, self.doc + ".how")
264 else:
265 self.log_filename = os.path.abspath(self.doc + ".how")
266 if os.path.exists(self.log_filename):
267 os.unlink(self.log_filename)
268 if os.path.exists(self.doc + ".l2h"):
269 self.l2h_aux_init_file = tempfile.mktemp()
270 else:
271 self.l2h_aux_init_file = self.doc + ".l2h"
272 self.write_l2h_aux_init_file()
274 def build(self):
275 self.setup_texinputs()
276 formats = self.options.formats
277 if "dvi" in formats or "ps" in formats:
278 self.build_dvi()
279 if "pdf" in formats:
280 self.build_pdf()
281 if "ps" in formats:
282 self.build_ps()
283 if "html" in formats:
284 self.require_temps()
285 self.build_html(self.builddir)
286 if self.options.icon_server == ".":
287 pattern = os.path.join(TOPDIR, "html", "icons",
288 "*." + self.options.image_type)
289 imgs = glob.glob(pattern)
290 if not imgs:
291 self.warning(
292 "Could not locate support images of type %s."
293 % `self.options.image_type`)
294 for fn in imgs:
295 new_fn = os.path.join(self.doc, os.path.basename(fn))
296 shutil.copyfile(fn, new_fn)
297 if "text" in formats:
298 self.require_temps()
299 tempdir = self.doc
300 need_html = "html" not in formats
301 if self.options.max_split_depth != 1:
302 fp = open(self.l2h_aux_init_file, "a")
303 fp.write("# re-hack this file for --text:\n")
304 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
305 fp.write("1;\n")
306 fp.close()
307 tempdir = self.doc + "-temp-html"
308 need_html = 1
309 if need_html:
310 self.build_html(tempdir, max_split_depth=1)
311 self.build_text(tempdir)
312 if self.options.discard_temps:
313 self.cleanup()
315 def setup_texinputs(self):
316 texinputs = [self.filedir] + list(self.options.base_texinputs)
317 os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep)
318 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
320 def build_aux(self, binary=None):
321 if binary is None:
322 binary = LATEX_BINARY
323 new_index( "%s.ind" % self.doc, "genindex")
324 new_index("mod%s.ind" % self.doc, "modindex")
325 self.run("%s %s" % (binary, self.doc))
326 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
327 self.latex_runs = 1
329 def build_dvi(self):
330 self.use_latex(LATEX_BINARY)
332 def build_pdf(self):
333 self.use_latex(PDFLATEX_BINARY)
335 def use_latex(self, binary):
336 self.require_temps(binary=binary)
337 if self.latex_runs < 2:
338 if os.path.isfile("mod%s.idx" % self.doc):
339 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
340 use_indfix = 0
341 if os.path.isfile(self.doc + ".idx"):
342 use_indfix = 1
343 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
344 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
345 import indfix
346 indfix.process(self.doc + ".ind")
347 if self.use_bibtex:
348 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
349 self.process_synopsis_files()
350 self.run("%s %s" % (binary, self.doc))
351 self.latex_runs = self.latex_runs + 1
352 if os.path.isfile("mod%s.idx" % self.doc):
353 self.run("%s -s %s mod%s.idx"
354 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
355 if use_indfix:
356 self.run("%s -s %s %s.idx"
357 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
358 indfix.process(self.doc + ".ind")
359 self.process_synopsis_files()
361 # and now finish it off:
363 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
364 import toc2bkm
365 if self.doctype == "manual":
366 bigpart = "chapter"
367 else:
368 bigpart = "section"
369 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
370 if self.use_bibtex:
371 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
372 self.run("%s %s" % (binary, self.doc))
373 self.latex_runs = self.latex_runs + 1
375 def process_synopsis_files(self):
376 synopsis_files = glob.glob(self.doc + "*.syn")
377 for path in synopsis_files:
378 uniqify_module_table(path)
380 def build_ps(self):
381 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
383 def build_html(self, builddir, max_split_depth=None):
384 if max_split_depth is None:
385 max_split_depth = self.options.max_split_depth
386 texfile = None
387 for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
388 fn = os.path.join(p, self.doc + ".tex")
389 if os.path.isfile(fn):
390 texfile = fn
391 break
392 if not texfile:
393 self.warning("Could not locate %s.tex; aborting." % self.doc)
394 sys.exit(1)
395 # remove leading ./ (or equiv.); might avoid problems w/ dvips
396 if texfile[:2] == os.curdir + os.sep:
397 texfile = texfile[2:]
398 # build the command line and run LaTeX2HTML:
399 if not os.path.isdir(builddir):
400 os.mkdir(builddir)
401 else:
402 for fname in glob.glob(os.path.join(builddir, "*.html")):
403 os.unlink(fname)
404 args = [LATEX2HTML_BINARY,
405 "-init_file", self.l2h_aux_init_file,
406 "-dir", builddir,
407 texfile
409 self.run(string.join(args)) # XXX need quoting!
410 # ... postprocess
411 shutil.copyfile(self.options.style_file,
412 os.path.join(builddir, self.doc + ".css"))
413 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
414 os.path.join(builddir, "index.html"))
415 if max_split_depth != 1:
416 label_file = os.path.join(builddir, "labels.pl")
417 fp = open(label_file)
418 about_node = None
419 target = " = q/about/;\n"
420 x = len(target)
421 while 1:
422 line = fp.readline()
423 if not line:
424 break
425 if line[-x:] == target:
426 line = fp.readline()
427 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
428 about_node = m.group(1)
429 shutil.copyfile(os.path.join(builddir, about_node),
430 os.path.join(builddir, "about.html"))
431 break
432 if not self.options.numeric:
433 pwd = os.getcwd()
434 try:
435 os.chdir(builddir)
436 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
437 finally:
438 os.chdir(pwd)
440 def build_text(self, tempdir=None):
441 if tempdir is None:
442 tempdir = self.doc
443 indexfile = os.path.join(tempdir, "index.html")
444 self.run("%s -nolist -dump %s >%s.txt"
445 % (LYNX_BINARY, indexfile, self.doc))
447 def require_temps(self, binary=None):
448 if not self.latex_runs:
449 self.build_aux(binary=binary)
451 def write_l2h_aux_init_file(self):
452 options = self.options
453 fp = open(self.l2h_aux_init_file, "w")
454 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
455 fp.write("package main;\n"
456 "push (@INC, '%s');\n"
457 "$mydir = '%s';\n"
458 % (d, d))
459 fp.write(open(L2H_INIT_FILE).read())
460 for filename in options.l2h_init_files:
461 fp.write("\n# initialization code incorporated from:\n# ")
462 fp.write(filename)
463 fp.write("\n")
464 fp.write(open(filename).read())
465 fp.write("\n"
466 "# auxillary init file for latex2html\n"
467 "# generated by mkhowto\n"
468 "$NO_AUTO_LINK = 1;\n"
470 l2hoption(fp, "ABOUT_FILE", options.about_file)
471 l2hoption(fp, "ICONSERVER", options.icon_server)
472 l2hoption(fp, "IMAGE_TYPE", options.image_type)
473 l2hoption(fp, "ADDRESS", options.address)
474 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
475 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
476 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
477 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
478 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
479 l2hoption(fp, "DVIPS_SAFE", options.dvips_safe)
480 fp.write("1;\n")
481 fp.close()
483 def cleanup(self):
484 self.__have_temps = 0
485 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
486 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
487 "%s.bbl", "%s.blg",
488 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
490 safe_unlink(pattern % self.doc)
491 map(safe_unlink, glob.glob(self.doc + "*.syn"))
492 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
493 pattern = os.path.join(self.doc, spec)
494 map(safe_unlink, glob.glob(pattern))
495 if "dvi" not in self.options.formats:
496 safe_unlink(self.doc + ".dvi")
497 if os.path.isdir(self.doc + "-temp-html"):
498 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
499 if not self.options.logging:
500 os.unlink(self.log_filename)
501 if not self.options.debugging:
502 os.unlink(self.l2h_aux_init_file)
504 def run(self, command):
505 self.message(command)
506 if sys.platform.startswith("win"):
507 rc = os.system(command)
508 else:
509 rc = os.system("(%s) </dev/null >>%s 2>&1"
510 % (command, self.log_filename))
511 if rc:
512 self.warning(
513 "Session transcript and error messages are in %s."
514 % self.log_filename)
515 if hasattr(os, "WIFEXITED"):
516 if os.WIFEXITED(rc):
517 self.warning("Exited with status %s." % os.WEXITSTATUS(rc))
518 else:
519 self.warning("Killed by signal %s." % os.WSTOPSIG(rc))
520 else:
521 self.warning("Return code: %s" % rc)
522 sys.stderr.write("The relevant lines from the transcript are:\n")
523 sys.stderr.write("-" * 72 + "\n")
524 sys.stderr.writelines(get_run_transcript(self.log_filename))
525 sys.exit(rc)
527 def message(self, msg):
528 msg = "+++ " + msg
529 if not self.options.quiet:
530 print msg
531 self.log(msg + "\n")
533 def warning(self, msg):
534 msg = "*** %s\n" % msg
535 sys.stderr.write(msg)
536 self.log(msg)
538 def log(self, msg):
539 fp = open(self.log_filename, "a")
540 fp.write(msg)
541 fp.close()
544 def get_run_transcript(filename):
545 """Return lines from the transcript file for the most recent run() call."""
546 fp = open(filename)
547 lines = fp.readlines()
548 fp.close()
549 lines.reverse()
550 L = []
551 for line in lines:
552 L.append(line)
553 if line[:4] == "+++ ":
554 break
555 L.reverse()
556 return L
559 def safe_unlink(path):
560 """Unlink a file without raising an error if it doesn't exist."""
561 try:
562 os.unlink(path)
563 except os.error:
564 pass
567 def split_pathname(path):
568 path = os.path.abspath(path)
569 dirname, basename = os.path.split(path)
570 if basename[-4:] == ".tex":
571 basename = basename[:-4]
572 return dirname, basename
575 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
576 def get_doctype(path):
577 fp = open(path)
578 doctype = None
579 while 1:
580 line = fp.readline()
581 if not line:
582 break
583 m = _doctype_rx.match(line)
584 if m:
585 doctype = m.group(1)
586 break
587 fp.close()
588 return doctype
591 def main():
592 options = Options()
593 try:
594 args = options.parse(sys.argv[1:])
595 except getopt.error, msg:
596 error(options, msg)
597 if not args:
598 # attempt to locate single .tex file in current directory:
599 args = glob.glob("*.tex")
600 if not args:
601 error(options, "No file to process.")
602 if len(args) > 1:
603 error(options, "Could not deduce which files should be processed.")
605 # parameters are processed, let's go!
607 for path in args:
608 Job(options, path).build()
611 def l2hoption(fp, option, value):
612 if value:
613 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
616 _to_perl = {}
617 for c in map(chr, range(1, 256)):
618 _to_perl[c] = c
619 _to_perl["@"] = "\\@"
620 _to_perl["$"] = "\\$"
621 _to_perl['"'] = '\\"'
623 def string_to_perl(s):
624 return string.join(map(_to_perl.get, s), '')
627 def check_for_bibtex(filename):
628 fp = open(filename)
629 pos = string.find(fp.read(), r"\bibdata{")
630 fp.close()
631 return pos >= 0
633 def uniqify_module_table(filename):
634 lines = open(filename).readlines()
635 if len(lines) > 1:
636 if lines[-1] == lines[-2]:
637 del lines[-1]
638 open(filename, "w").writelines(lines)
641 def new_index(filename, label="genindex"):
642 fp = open(filename, "w")
643 fp.write(r"""\
644 \begin{theindex}
645 \label{%s}
646 \end{theindex}
647 """ % label)
648 fp.close()
651 if __name__ == "__main__":
652 main()