Fix bug #3926: Various limits give UND where they should give IND
[maxima.git] / doc / info / build-html-index.lisp
blob6f1b14b42f55679d6835eb5b0bccb22f23739a15
1 (in-package #:maxima)
3 (defvar *html-index*
4 (make-hash-table :test #'equal)
5 "Hash table for looking up which html file contains the
6 documentation. The key is the topic we're looking for and the value
7 is the html file containing the documentation for the topic.")
10 ;; This might be rather slow. Perhaps an alternative solution is to
11 ;; leave these alone and have $hdescribe encode any special characters
12 ;; before looking them up. Since ? only used occasionally, we don't
13 ;; incur the cost here and move it to ? where the impact is lower.
15 ;; However, a test run where this function was removed made virtually
16 ;; no difference in runtime (with cmucl). (31.97 sec with and 31.62
17 ;; sec without; well within timing noise probably.) Note, however,
18 ;; that this file is not normally compiled before running, but earlier
19 ;; tests showed that compiling didn't make much difference either. I
20 ;; think this is because most of the cost is in pregexp, which is
21 ;; compiled.
22 (defun handle-special-chars (item)
23 "Handle special encoded characters in HTML file. Texinfo encodes
24 special characters to hexadecimal form and this needs to be undone
25 so we know what the actual character is when looking up the
26 documentation."
27 ;; This is probably not the best way to do this. Regexp searches
28 ;; are probably pretty expensive.
29 (dolist (spec-char '(#\% #\$ #\? #\. #\< #\> #\#
30 #\= #\: #\* #\- #\\ #\^ #\+ #\/ #\'
31 #\( #\)))
32 (let ((code (string-downcase
33 (format nil "_~4,'0x" (char-code spec-char)))))
34 (setf item
35 (pregexp:pregexp-replace* code item (string spec-char)))))
36 item)
38 (defun process-one-html-file (file entry-regexp section-regexp fnindex-regexp)
39 "Process one html file to find all the documentation entries.
40 ENTRY-REGEXP is the regexp to use for find function and variable
41 items. SECTION-REGEXP is the regexp to find sections to include."
42 (format *debug-io* "Processing: ~S~%" file)
43 (let ((base-name (make-pathname :name (pathname-name file)
44 :type (pathname-type file))))
45 (flet ((add-entry (item item-id file line)
46 ;; Add entry to the hash table.
48 ;; Replace any special chars that texinfo has encoded.
49 (setf item (handle-special-chars item))
51 ;; Check if the entry already exists and print a message.
52 ;; Presumably, this shouldn't happen, so warn if it does.
53 (when (gethash item *html-index*)
54 (format t "Already added entry ~S ~S: ~S~%"
55 item (gethash item *html-index*)
56 line))
57 (setf (gethash item *html-index*)
58 (cons file item-id))))
60 (with-open-file (s file :direction :input)
61 (loop for line = (read-line s nil)
62 while line
64 (let (match)
65 (cond
66 ((setf match (pregexp:pregexp-match-positions entry-regexp line))
67 (let ((item-id (subseq line
68 (car (elt match 1))
69 (cdr (elt match 1))))
70 item)
71 ;; Remove "005f" which texinfo adds before every "_".
72 #+nil
73 (format t "item-id = ~A~%" item-id)
74 (setf item
75 (pregexp:pregexp-replace* "005f" item-id ""))
76 #+nil
77 (format t "match = ~S ~A~%" match item)
78 (add-entry item item-id base-name line)))
79 ((setf match (pregexp:pregexp-match-positions section-regexp line))
80 (let ((item-id (subseq line
81 (car (elt match 1))
82 (cdr (elt match 1))))
83 (item (subseq line
84 (car (elt match 2))
85 (cdr (elt match 2)))))
86 #+nil
87 (format t "section item = ~A~%" item)
88 (add-entry item item-id base-name line)))
89 ((setf match (pregexp:pregexp-match-positions fnindex-regexp line))
90 (let* ((item-id (subseq line
91 (car (elt match 1))
92 (cdr (elt match 1))))
93 (item (pregexp::pregexp-replace* "-" item-id " ")))
94 ;; However if the item ends in digits, we
95 ;; replaced too many "-" with spaces. So if
96 ;; it ends with a space followed by digits, we
97 ;; need to replace the space with "-" again.
98 (setf item (pregexp::pregexp-replace* " \(\\d+\)$" item "-\\1"))
99 (setf item (handle-special-chars item))
100 (add-entry item item-id base-name line))))))))))
102 ;; Run this build a hash table from the topic to the HTML file
103 ;; containing the documentation. The single argument DIR should be a
104 ;; directory that contains the html files to be searched for the
105 ;; topics. For exapmle it can be "<maxima-dir>/doc/info/*.html"
106 (defun build-html-index (dir)
107 (clrhash *html-index*)
108 ;; entry-regexp searches for entries for functions and variables.
109 ;; We're looking for something like
111 ;; <dt id="index-<foo>"
113 ;; and extracting "foo".
115 ;; section-regexp searches for section headings so we can get to
116 ;; things like "Functions and Variables for...". We're looking for
118 ;; <span id="<id>">...<h3 class="section">12.2 <heading><
120 ;; where <heading> is the heading we want, and <id> is the id we can
121 ;; use to link to this item.
123 ;; fnindex-regexp searches for id's that are associated with
124 ;; @fnindex. These look like
126 ;; <span id="index-<id>"></span>
128 ;; all on one line. The <id> is is the id we can use to link to
129 ;; this item.
130 (let ((entry-regexp (pregexp:pregexp "<dt id=\"index-([^\"]+)\""))
131 (section-regexp
132 (pregexp:pregexp "<span id=\"\([^\"]+\)\">.*<h3 class=\"section\">[0-9.,]+ *\(.*\)<"))
133 (fnindex-regexp
134 (pregexp:pregexp "<span id=\"index-\([^\"]+\)\"></span>$")))
135 ;; Get a list of the files in the directory. Remove all the ones
136 ;; that don't start with "maxima". Then sort them all in
137 ;; numerical order.
138 (let ((files (directory dir)))
139 ;; First, remove "maxima_singlepage.html"
140 (setf files (remove-if #'(lambda (name)
141 (string-equal name "maxima_singlepage"))
142 files
143 :key #'pathname-name))
144 ;; Now remove any that don't start with "maxima"
145 (setf files (remove-if-not #'(lambda (name)
146 (string-equal "maxima" name :end2 (min (length name) 6)))
147 files
148 :key #'pathname-name))
149 ;; Now sort them in numerical order.
150 (setf files
151 (sort files #'<
152 :key #'(lambda (p)
153 (let ((name (pathname-name p)))
154 (cond ((string-equal name "maxima_toc")
155 ;; maxima_toc.html is first
157 ((string-equal name "maxima")
158 ;; maxima.html is second.
161 ;; Everything else is the number
162 ;; in the file name, which starts
163 ;; with 1.
164 (if (> (length name) 7)
165 (parse-integer (subseq name 7))
166 0)))))))
167 (dolist (file files)
168 ;; We want to ignore maxima_singlepage.html for now.
169 (unless (string-equal (pathname-name file)
170 "maxima_singlepage")
171 (process-one-html-file file entry-regexp section-regexp fnindex-regexp))))))
173 (defun build-and-dump-html-index (dir)
174 (build-html-index dir)
175 (let (entries)
176 (maphash #'(lambda (k v)
177 (push (list k (namestring (car v)) (cdr v)) entries))
178 *html-index*)
179 (with-open-file (s "maxima-index-html.lisp"
180 :direction :output
181 :if-exists :supersede)
182 (with-standard-io-syntax
183 ;; Set up printer settings to print the output the way we want.
185 ;; *package* set to :cl-info so that the symbols aren't
186 ;; preceded by the cl-info package marker.
188 ;; *print-length* is NIL because the list of entries is very
189 ;; long.
191 ;; *print-case* is :downcase just to make it look more
192 ;; natural; not really needed.
194 ;; *print-readably* is nil so base-strings and strings can be
195 ;; printed without any kind of special syntax for base-strings
196 ;; for lisps that distinguish between strings and
197 ;; base-strings.
198 (let ((*package* (find-package :cl-info))
199 (*print-length* nil)
200 (*print-case* :downcase)
201 (*print-readably* nil))
202 (format s ";;; Do not edit; automatically generated via build-html-index.lisp~2%")
203 (pprint '(in-package :cl-info)
206 (pprint `(let ((cl-info::html-index ',entries))
207 (cl-info::load-html-index cl-info::html-index))
208 s))))))
210 (build-and-dump-html-index "./*.html")