Rename specvar integer-info to *integer-info*
[maxima.git] / doc / info / build-html-index.lisp
blobc65d2b208fdd71911e490ce293f9bac467d4e222
1 ;;; -*- Mode: Lisp; Package: Maxima; Syntax: Common-Lisp; Base: 10 -*- ;;;;
2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3 ;;;
4 ;;; Build an index of all the relevant links in the HTML version of
5 ;;; the manual so that it can be used via help instead of the text
6 ;;; version.
7 ;;;
8 ;;; Copyright (C) 2023 Raymond Toy
9 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11 (in-package :maxima)
13 (defvar *texinfo-version-string* nil
14 "The texinfo version string used to generate the HTML files.")
16 (defvar *texinfo-version* nil
17 "The texinfo version arranged as an integer. Version 7.0.3 is
18 represented as 70003.")
20 (defvar *html-index*
21 (make-hash-table :test #'equal)
22 "Hash table for looking up which html file contains the
23 documentation. The key is the topic we're looking for and the value
24 is the html file containing the documentation for the topic.")
26 (defvar *log-file* nil
27 "Log file containing info for each entry that is added to the index
28 table.")
30 (defun texinfo-version-number (major minor &optional (patch 0))
31 "Convert the major, minor, and patch to an integer."
32 (+ (* 10000 major)
33 (* 100 minor)
34 patch))
36 (let ((maxima_nnn-pattern (pregexp:pregexp "^maxima_[0-9][0-9]*$")))
37 (defun maxima_nnn-p (f)
38 "Determine if F is a pathname-name that looks like
39 \"maxima_<digits>\". Return non-NIL if so."
40 (pregexp:pregexp-match maxima_nnn-pattern f)))
42 (defun grep-l (content-pattern f-path)
43 "Similar to grep -l: returns F-PATH if file contains CONTENT-PATTERN, otherwise NIL."
44 (with-open-file (s f-path)
45 (loop for line = (read-line s nil)
46 while line
47 do (when (pregexp:pregexp-match content-pattern line)
48 (return f-path)))))
50 (defun add-entry (item item-id file line &key replace-dash-p prefix)
51 "Add entry to the html hash table. ITEM is the key, FILE is the html
52 file where this item is found, ITEM-ID is the html id of this item.
53 LINE is the line where this was found and is used for information if
54 a duplicate key was found."
56 ;; Replace any special chars that texinfo has encoded.
57 (setf item (handle-special-chars item))
59 (when (find #\- item)
60 (when replace-dash-p
61 ;; Replace "-" with a space (if requested). The HTML output
62 ;; puts "-" where spaces were originally in the texi name.
63 ;; But this isn't perfect. See HANDLE-SPECIAL-CASES where we
64 ;; shouldn't have done this.
65 (setf item (pregexp:pregexp-replace* "-([^[:digit:]])" item " \\1")))
67 ;; Now replace things like "ztics-1" with "ztics <1>". The
68 ;; former is the HTML index item for the info item with the
69 ;; latter name.
70 (setf item (pregexp:pregexp-replace "-([[:digit:]])" item " <\\1>")))
72 ;; Check if the entry already exists and print a message. This
73 ;; shouldn't happen, so print a message if it does.
74 (when (gethash item *html-index*)
75 (format t "Already added entry ~S ~S: ~S~%"
76 item (gethash item *html-index*)
77 line))
78 (format *log-file* "~A: ~S -> ~S ~S~%"
79 prefix item file item-id)
81 (setf (gethash item *html-index*)
82 (cons file item-id)))
84 (defun process-line (line matcher &key replace-dash-p (prefix "Add:"))
85 "Process the LINE using the function MATCHER to determine if this line
86 contains something interesting to add to the index. REPLACE-DASH-P
87 and PREFIX are passed to ADD-ENTRY."
88 (multiple-value-bind (item item-id file line)
89 (funcall matcher line)
90 (when item
91 (add-entry item item-id file line
92 :replace-dash-p replace-dash-p
93 :prefix prefix))))
95 (defun process-one-html-file (file matcher replace-dash-p prefix)
96 "Process one html file named FILE using MATCHER to determine matches.
97 REPLACE-DASH-P and PREFIX are passed to PROCESS-LINE which will
98 handle these."
99 (format *debug-io* "build-html-index: processing: ~S~%" file)
100 (with-open-file (s file :direction :input)
101 (loop for line = (read-line s nil)
102 while line
104 (process-line line matcher
105 :replace-dash-p replace-dash-p
106 :prefix prefix))))
108 (defun handle-special-cases ()
109 "These HTML topics need special handling because we didn't quite
110 process them correctly previously because we accidentally changed
111 #\- to #\space."
112 (flet ((update-entry (old new)
113 ;; Set the new index entry with the value of the old index
114 ;; entry and remove the old entry from the index.
115 (when (gethash old *html-index*)
116 (setf (gethash new *html-index*)
117 (gethash old *html-index*))
118 (remhash old *html-index*))))
119 ;; List of special cases. The car of each item is the index entry
120 ;; that should not have had the hyphen replaced. The cdr is what
121 ;; it should be.
122 (dolist (items '(("Quote quote operator" "Quote-quote operator")
123 ("Assignment operator (evaluates left hand side)"
124 "Assignment operator (evaluates left-hand side)")
125 ("Euler Mascheroni constant"
126 "Euler-Mascheroni constant")
127 ("maxima init.lisp" "maxima-init.lisp")
128 ("maxima init.mac" "maxima-init.mac")))
129 (destructuring-bind (old new)
130 items
131 (update-entry old new)))
133 ;; This is messy. Texinfo 6.8 uses plain apostrophes in the info
134 ;; file. But with texinfo 7.0.[23], some entries in HTML use an
135 ;; apostrophe (U+27) character, but the info file uses
136 ;; Right_Single_Quotation_Mark (U+2019). And apparently, the
137 ;; Texinfo 7.1 will not.
139 ;; Convert these only for the cases we know this is a problem.
141 ;; This current implementation will very likely not work with gcl
142 ;; only supports 8-bit characters.
143 (when *texinfo-version*
144 (cond ((>= *texinfo-version* (texinfo-version-number 7 1))
145 ;; Don't need anything special for texinfo 7.1 and greater.
147 ((>= *texinfo-version* (texinfo-version-number 7 0 2))
148 (dolist (item '("Catalan's Constant"
149 "Euler's number"
150 "Introduction to Maxima's Database"))
151 (update-entry item
152 (pregexp::pregexp-replace* "'" item (string (code-char #x2019))))))))))
154 ;; Find entries from the function and variable index. An example of
155 ;; what we're looking for:
157 ;; <a href="Elementary-Functions.html#index-asinh">
159 ;; We extract the file name, "Elementary-Functions.html", and the
160 ;; stuff starting with "#index-". This gives the item-id,
161 ;; "index-asinh" that we want to use for the link (without the "#"),
162 ;; and the stuff after "#index-", "asinh" is the id topic we need.
163 (let ((href (pregexp:pregexp "<a href=\"([[:alnum:]_-]+\.html)#(index-([^\"]*))\">")))
164 (defun match-entries (line)
165 (let ((match (pregexp:pregexp-match href line)))
166 (when match
167 (destructuring-bind (whole file item-id id)
168 match
169 (declare (ignore whole))
170 (values id item-id file line))))))
172 ;; Find entries from the TOC. An example of what we're looking for:
174 ;; <a id="toc-Bessel-Functions-1" href="Special-Functions.html#Bessel-Functions">15.2 Bessel Functions</a></li>
176 ;; We extract the file name, "Special-Functions.html" and the id,
177 ;; "Bessel-Functions" and then the title of the subsection, without
178 ;; the section numbers, "Bessel Functions".
179 ;; Further subsections are ignored.
180 (let ((regexp (pregexp:pregexp "<a id=\"toc-.*\" href=\"([^#\"]+)(#([^\"]+))\">[[:digit:]]+\.[[:digit:]]+ ([^\"]+?)</a>")))
181 (defun match-toc (line)
182 (let ((match (pregexp:pregexp-match regexp line)))
183 (when match
184 (destructuring-bind (whole file item# id item)
185 match
186 (declare (ignore whole item#))
187 (when (find #\& item :test #'char=)
188 ;; Replace HTML entities with the corresponding char.
189 ;; See, for example,
190 ;; https://lilith.fisica.ufmg.br/~wag/TRANSF/codehtml.html
192 ;; Perhaps we should use the language to reduce the number
193 ;; of calls to pregexp?
194 (dolist (replacement '(("&rsquo;" #x27) ; Right single-quote -> apostrophe
195 ;; From the de manual
196 ("&uuml;" 252)
197 ("&Uuml;" 220)
198 ("&auml;" 228)
199 ;; From the pt manual
200 ("&ccedil;" 231)
201 ("&atilde;" 227)
202 ("&oacute;" 243)
203 ;; From the pt_BR manual
204 ("&aacute;" 225)
205 ("&ouml;" 246)
207 (destructuring-bind (html-entity char-code)
208 replacement
209 (setf item (pregexp:pregexp-replace* html-entity
210 item
211 (string (code-char char-code)))))))
213 (format *log-file* "TOC: ~S -> ~S~%" item file)
215 (values item id file line))))))
217 (defparameter *index-file-name*
218 (make-hash-table :test 'equal)
219 "Hash table whose key is the lang and whose value is a list of the
220 file name of function and variable index and the title of the
221 index.")
223 ;; Setup the hashtable. The default (English) is the empty string.
224 (dolist (entry
225 '(("" "Function-and-Variable-Index.html" "Function and Variable Index")
226 ("de" "Index-der-Variablen-und-Funktionen.html" "Index der Variablen und Funktionen")
227 ("pt" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")
228 ("es" "Indice-de-Funciones-y-Variables.html" "Índice de Funciones y Variables")
229 ("pt_BR" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")))
230 (destructuring-bind (key &rest value)
231 entry
232 (setf (gethash key *index-file-name*) value)))
234 (defun get-index-file-name (lang)
235 (first (gethash lang *index-file-name*)))
237 (defun get-index-title (lang)
238 (second (gethash lang *index-file-name*)))
240 (defun find-index-file (dir lang)
241 "Find the name of HTML file containing the function and variable
242 index."
243 (unless (gethash lang *index-file-name*)
244 (merror "Unknown or unsupported language for HTML help: ~A" lang))
245 (let ((f-a-v-i (merge-pathnames (get-index-file-name lang)
246 dir)))
247 (when (probe-file f-a-v-i)
248 (return-from find-index-file f-a-v-i)))
249 (let ((files (directory dir)))
250 ;; Keep just the files of the form "maxima_nnn", where "nnn" are
251 ;; digits. These are the only files that can contain the function
252 ;; and variable index that we want.
253 (setf files (remove-if-not
254 #'(lambda (path-name)
255 (maxima_nnn-p path-name))
256 files
257 :key #'pathname-name))
259 ;; Now sort them in numerical order.
260 (setf files
261 (sort files #'<
262 :key #'(lambda (p)
263 (let ((name (pathname-name p)))
264 ;; Everything else is the number in the
265 ;; file name, which starts with 1.
266 (if (> (length name) 7)
267 (parse-integer (subseq name 7))
268 0)))))
270 ;; Check if the last 2 files to see if one of them contains the
271 ;; function and variable index we want. Return the first one that
272 ;; matches.
273 (let* ((title (get-index-title lang))
274 (search-item (format nil "<title>.*~A" title)))
275 (format t "Looking for function and variable index: ~A~%" title)
276 (dolist (file (last files 2))
277 (when (grep-l search-item file)
278 (format t "Function index: ~S.~%"
279 (namestring file))
280 (return-from find-index-file file))))))
282 ;; Parse the texinfo version string. It should look something like
283 ;; "M.m.p", where M and m are a sequence of (base 10) digits and ".p"
284 ;; is the optional patch version.
285 (defun parse-texinfo-version (string)
286 (when string
287 (let ((posn 0)
288 (len (length string))
289 version)
290 (dotimes (k 3)
291 (when (<= posn len)
292 (multiple-value-bind (digits end)
293 (parse-integer string
294 :start posn
295 :junk-allowed t)
296 (push digits version)
297 (setf posn (1+ end)))))
298 (apply #'texinfo-version-number (nreverse version)))))
300 (defun get-texinfo-version (file)
301 "Get the texinfo version from FILE"
302 (let ((version-line
303 (with-open-file (f file :direction :input)
304 ;; Texinfo writes a comment line containing the version number of
305 ;; the texinfo used to build the html file. It's at the
306 ;; beginnning so search just the first 5 lines or so.
307 (loop for count from 1 to 5
308 for line = (read-line f nil) then (read-line f nil)
309 when (and line (search "Created by GNU Texinfo" line))
310 return line))))
311 (unless version-line
312 (warn "Could not find GNU Texinfo version in ~S~%" file)
313 (return-from get-texinfo-version))
314 (setf *texinfo-version-string*
315 (second (pregexp:pregexp-match "GNU Texinfo \(.*?\)," version-line)))
316 (setf *texinfo-version*
317 (parse-texinfo-version *texinfo-version-string*))))
319 (defun find-toc-file (dir)
320 ;; List of possible filenames that contain the table of contents.
321 ;; The first is used by texinfo 6.8 and later. The second is used
322 ;; by texinfo 5.1. Return the first one found.
323 (dolist (toc '("index.html" "maxima_0.html"))
324 (let ((toc-path (merge-pathnames toc dir)))
325 (when (probe-file toc-path)
326 (return-from find-toc-file toc-path)))))
328 (defun build-html-index (dir lang)
329 (clrhash *html-index*)
330 (let ((index-file (find-index-file dir lang)))
331 (unless index-file
332 (error "Could not find HTML file containing the function and variable index."))
334 (with-open-file (*log-file* "build-html-index.log"
335 :direction :output :if-exists :supersede)
336 (let ((toc-path (find-toc-file dir)))
337 (get-texinfo-version toc-path)
338 (format t "Texinfo Version ~A: ~D~%" *texinfo-version-string* *texinfo-version*)
339 (process-one-html-file index-file #'match-entries t "Add")
340 (process-one-html-file toc-path #'match-toc nil "TOC")
341 (handle-special-cases)))))
343 ;; Run this to build a hash table from the topic to the HTML file
344 ;; containing the documentation. The single argument DIR should be a
345 ;; directory that contains the html files to be searched for the
346 ;; topics. For example it can be "<maxima-dir>/doc/info/*.html". The
347 ;; LANG arg specifies the language to use. For English, either leave
348 ;; the argument out, or use "".
349 (defmfun $build_and_dump_html_index (dir &optional (lang ""))
350 (build-html-index dir lang)
351 (let (entries)
352 (maphash #'(lambda (k v)
353 (push (list k (namestring (car v)) (cdr v)) entries))
354 *html-index*)
355 (with-open-file (s "maxima-index-html.lisp"
356 :direction :output
357 :if-exists :supersede)
358 (with-standard-io-syntax
359 ;; Set up printer settings to print the output the way we want.
361 ;; *package* set to :cl-info so that the symbols aren't
362 ;; preceded by the cl-info package marker.
364 ;; *print-length* is NIL because the list of entries is very
365 ;; long.
367 ;; *print-case* is :downcase just to make it look more
368 ;; natural; not really needed.
370 ;; *print-readably* is nil so base-strings and strings can be
371 ;; printed without any kind of special syntax for base-strings
372 ;; for lisps that distinguish between strings and
373 ;; base-strings.
374 (let ((*package* (find-package :cl-info))
375 (*print-length* nil)
376 (*print-case* :downcase)
377 (*print-readably* nil))
378 (format s ";;; Do not edit; automatically generated via build-html-index.lisp~2%")
379 (pprint '(in-package :cl-info)
382 (pprint `(let ((cl-info::html-index ',entries))
383 (cl-info::load-html-index cl-info::html-index))
384 s))))
385 (format t "HTML index has ~D entries~%" (hash-table-count *html-index*))
386 ;; Hash table can't be empty unless we screwed up somewhere.
387 (assert (plusp (hash-table-count *html-index*)))
388 (plusp (hash-table-count *html-index*))))
390 ;;(build-and-dump-html-index "./*.html")