Fix some issues with make dist-gzip
[maxima.git] / doc / info / build-html-index.lisp
blob89b3e14746316b943346c7717c2f1c386a071e07
1 ;;; -*- Mode: Lisp; Package: Maxima; Syntax: Common-Lisp; Base: 10 -*- ;;;;
2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3 ;;;
4 ;;; Build an index of all the relevant links in the HTML version of
5 ;;; the manual so that it can be used via help instead of the text
6 ;;; version.
7 ;;;
8 ;;; Copyright (C) 2023 Raymond Toy
9 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11 (in-package :maxima)
13 (defvar *texinfo-version-string* nil
14 "The texinfo version string used to generate the HTML files.")
16 (defvar *texinfo-version* nil
17 "The texinfo version arranged as an integer. Version 7.0.3 is
18 represented as 70003.")
20 (defvar *html-index*
21 (make-hash-table :test #'equal)
22 "Hash table for looking up which html file contains the
23 documentation. The key is the topic we're looking for and the value
24 is the html file containing the documentation for the topic.")
26 (defvar *log-file* nil
27 "Log file containing info for each entry that is added to the index
28 table.")
30 (defun texinfo-version-number (major minor &optional (patch 0))
31 "Convert the major, minor, and patch to an integer."
32 (+ (* 10000 major)
33 (* 100 minor)
34 patch))
36 (let ((maxima_nnn-pattern (pregexp:pregexp "^maxima_[0-9][0-9]*$")))
37 (defun maxima_nnn-p (f)
38 "Determine if F is a pathname-name that looks like
39 \"maxima_<digits>\". Return non-NIL if so."
40 (pregexp:pregexp-match maxima_nnn-pattern f)))
42 (defun grep-l (content-pattern f-path)
43 "Similar to grep -l: returns F-PATH if file contains CONTENT-PATTERN, otherwise NIL."
44 (with-open-file (s f-path)
45 (loop for line = (read-line s nil)
46 while line
47 do (when (pregexp:pregexp-match content-pattern line)
48 (return f-path)))))
50 (defun add-entry (item item-id file line &key replace-dash-p prefix)
51 "Add entry to the html hash table. ITEM is the key, FILE is the html
52 file where this item is found, ITEM-ID is the html id of this item.
53 LINE is the line where this was found and is used for information if
54 a duplicate key was found."
56 ;; Replace any special chars that texinfo has encoded.
57 (setf item (handle-special-chars item))
59 (when (find #\- item)
60 (when replace-dash-p
61 ;; Replace "-" with a space (if requested). The HTML output
62 ;; puts "-" where spaces were originally in the texi name.
63 ;; But this isn't perfect. See HANDLE-SPECIAL-CASES where we
64 ;; shouldn't have done this.
65 (setf item (pregexp:pregexp-replace* "-([^[:digit:]])" item " \\1")))
67 ;; Now replace things like "ztics-1" with "ztics <1>". The
68 ;; former is the HTML index item for the info item with the
69 ;; latter name.
70 (setf item (pregexp:pregexp-replace "-([[:digit:]])" item " <\\1>")))
72 ;; Check if the entry already exists and print a message. This
73 ;; shouldn't happen, so print a message if it does.
74 (when (gethash item *html-index*)
75 (format t "Already added entry ~S ~S: ~S~%"
76 item (gethash item *html-index*)
77 line))
78 (format *log-file* "~A: ~S -> ~S ~S~%"
79 prefix item file item-id)
81 (setf (gethash item *html-index*)
82 (cons file item-id)))
84 (defun process-line (line matcher path &key replace-dash-p (prefix "Add:") truenamep)
85 "Process the LINE using the function MATCHER to determine if this line
86 contains something interesting to add to the index. REPLACE-DASH-P
87 and PREFIX are passed to ADD-ENTRY. If TRUENAMEP is non-NIL, the
88 entry is the full path to the file specified in the line based on
89 the value of PATH."
90 (multiple-value-bind (item item-id file line)
91 (funcall matcher line)
92 (when item
93 #+nil
94 (format t "process-line: file, path = ~A ~A~%" file path)
95 (when truenamep
96 (setf file (truename (merge-pathnames file path))))
97 (add-entry item item-id
98 file
99 line
100 :replace-dash-p replace-dash-p
101 :prefix prefix))))
103 (defun process-one-html-file (file matcher replace-dash-p prefix truenamep)
104 "Process one html file named FILE using MATCHER to determine matches.
105 REPLACE-DASH-P and PREFIX are passed to PROCESS-LINE which will
106 handle these."
107 (format *debug-io* "build-html-index: processing: ~S~%" file)
108 (with-open-file (s file :direction :input)
109 (loop for line = (read-line s nil)
110 while line
112 (process-line line matcher
113 file
114 :replace-dash-p replace-dash-p
115 :prefix prefix
116 :truenamep truenamep))))
118 (defun handle-special-cases ()
119 "These HTML topics need special handling because we didn't quite
120 process them correctly previously because we accidentally changed
121 #\- to #\space."
122 (flet ((update-entry (old new)
123 ;; Set the new index entry with the value of the old index
124 ;; entry and remove the old entry from the index.
125 (when (gethash old *html-index*)
126 (setf (gethash new *html-index*)
127 (gethash old *html-index*))
128 (remhash old *html-index*))))
129 ;; List of special cases. The car of each item is the index entry
130 ;; that should not have had the hyphen replaced. The cdr is what
131 ;; it should be.
132 (dolist (items '(("Quote quote operator" "Quote-quote operator")
133 ("Assignment operator (evaluates left hand side)"
134 "Assignment operator (evaluates left-hand side)")
135 ("Euler Mascheroni constant"
136 "Euler-Mascheroni constant")
137 ("maxima init.lisp" "maxima-init.lisp")
138 ("maxima init.mac" "maxima-init.mac")))
139 (destructuring-bind (old new)
140 items
141 (update-entry old new)))
143 ;; This is messy. Texinfo 6.8 uses plain apostrophes in the info
144 ;; file. But with texinfo 7.0.[23], some entries in HTML use an
145 ;; apostrophe (U+27) character, but the info file uses
146 ;; Right_Single_Quotation_Mark (U+2019). And apparently, the
147 ;; Texinfo 7.1 will not.
149 ;; Convert these only for the cases we know this is a problem.
151 ;; This current implementation will very likely not work with gcl
152 ;; only supports 8-bit characters.
153 (when *texinfo-version*
154 (cond ((>= *texinfo-version* (texinfo-version-number 7 1))
155 ;; Don't need anything special for texinfo 7.1 and greater.
157 ((>= *texinfo-version* (texinfo-version-number 7 0 2))
158 (dolist (item '("Catalan's Constant"
159 "Euler's number"
160 "Introduction to Maxima's Database"))
161 (update-entry item
162 (pregexp::pregexp-replace* "'" item (string (code-char #x2019))))))))))
164 ;; Find entries from the function and variable index. An example of
165 ;; what we're looking for:
167 ;; <a href="Elementary-Functions.html#index-asinh">
169 ;; We extract the file name, "Elementary-Functions.html", and the
170 ;; stuff starting with "#index-". This gives the item-id,
171 ;; "index-asinh" that we want to use for the link (without the "#"),
172 ;; and the stuff after "#index-", "asinh" is the id topic we need.
173 (let ((href (pregexp:pregexp "<a href=\"([[:alnum:]_-]+\.html)#(index-([^\"]*))\">")))
174 (defun match-entries (line)
175 (let ((match (pregexp:pregexp-match href line)))
176 (when match
177 (destructuring-bind (whole file item-id id)
178 match
179 (declare (ignore whole))
180 (values id item-id file line))))))
182 ;; Find entries from the TOC. An example of what we're looking for:
184 ;; <a id="toc-Bessel-Functions-1" href="Special-Functions.html#Bessel-Functions">15.2 Bessel Functions</a></li>
186 ;; We extract the file name, "Special-Functions.html" and the id,
187 ;; "Bessel-Functions" and then the title of the subsection, without
188 ;; the section numbers, "Bessel Functions".
189 ;; Further subsections are ignored.
190 (let ((regexp (pregexp:pregexp "<a id=\"toc-.*\" href=\"([^#\"]+)(#([^\"]+))\">[[:digit:]]+\.[[:digit:]]+ ([^\"]+?)</a>")))
191 (defun match-toc (line)
192 (let ((match (pregexp:pregexp-match regexp line)))
193 (when match
194 (destructuring-bind (whole file item# id item)
195 match
196 (declare (ignore whole item#))
197 (when (find #\& item :test #'char=)
198 ;; Replace HTML entities with the corresponding char.
199 ;; See, for example,
200 ;; https://lilith.fisica.ufmg.br/~wag/TRANSF/codehtml.html
202 ;; Perhaps we should use the language to reduce the number
203 ;; of calls to pregexp?
204 (dolist (replacement '(("&rsquo;" #x27) ; Right single-quote -> apostrophe
205 ;; From the de manual
206 ("&uuml;" 252)
207 ("&Uuml;" 220)
208 ("&auml;" 228)
209 ;; From the pt manual
210 ("&ccedil;" 231)
211 ("&atilde;" 227)
212 ("&oacute;" 243)
213 ;; From the pt_BR manual
214 ("&aacute;" 225)
215 ("&ouml;" 246)
217 (destructuring-bind (html-entity char-code)
218 replacement
219 (setf item (pregexp:pregexp-replace* html-entity
220 item
221 (string (code-char char-code)))))))
223 (format *log-file* "TOC: ~S -> ~S~%" item file)
225 (values item id file line))))))
227 (defparameter *index-file-name*
228 (make-hash-table :test 'equal)
229 "Hash table whose key is the lang and whose value is a list of the
230 file name of function and variable index and the title of the
231 index.")
233 ;; Setup the hashtable. The default (English) is the empty string.
234 (dolist (entry
235 '(("" "Function-and-Variable-Index.html" "Function and Variable Index")
236 ("de" "Index-der-Variablen-und-Funktionen.html" "Index der Variablen und Funktionen")
237 ("pt" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")
238 ("es" "Indice-de-Funciones-y-Variables.html" "Índice de Funciones y Variables")
239 ("pt_BR" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")
240 ("ja" "Function-and-Variable-Index.html" "Function and Variable Index")))
241 (destructuring-bind (key &rest value)
242 entry
243 (setf (gethash key *index-file-name*) value)))
245 (defun get-index-file-name (lang)
246 (first (gethash lang *index-file-name*)))
248 (defun get-index-title (lang)
249 (second (gethash lang *index-file-name*)))
251 (defun find-index-file (dir lang)
252 "Find the name of HTML file containing the function and variable
253 index."
254 (unless (gethash lang *index-file-name*)
255 (merror "Unknown or unsupported language for HTML help: ~A" lang))
256 (let ((f-a-v-i (merge-pathnames (get-index-file-name lang)
257 dir)))
258 (when (probe-file f-a-v-i)
259 (return-from find-index-file f-a-v-i)))
260 (let ((files (directory dir)))
261 ;; Keep just the files of the form "maxima_nnn", where "nnn" are
262 ;; digits. These are the only files that can contain the function
263 ;; and variable index that we want.
264 (setf files (remove-if-not
265 #'(lambda (path-name)
266 (maxima_nnn-p path-name))
267 files
268 :key #'pathname-name))
270 ;; Now sort them in numerical order.
271 (setf files
272 (sort files #'<
273 :key #'(lambda (p)
274 (let ((name (pathname-name p)))
275 ;; Everything else is the number in the
276 ;; file name, which starts with 1.
277 (if (> (length name) 7)
278 (parse-integer (subseq name 7))
279 0)))))
281 ;; Check if the last 2 files to see if one of them contains the
282 ;; function and variable index we want. Return the first one that
283 ;; matches.
284 (let* ((title (get-index-title lang))
285 (search-item (format nil "<title>.*~A" title)))
286 (format t "Looking for function and variable index: ~A~%" title)
287 (dolist (file (last files 2))
288 (when (grep-l search-item file)
289 (format t "Function index: ~S.~%"
290 (namestring file))
291 (return-from find-index-file file))))))
293 ;; Parse the texinfo version string. It should look something like
294 ;; "M.m.p", where M and m are a sequence of (base 10) digits and ".p"
295 ;; is the optional patch version.
296 (defun parse-texinfo-version (string)
297 (when string
298 (let ((posn 0)
299 (len (length string))
300 version)
301 (dotimes (k 3)
302 (when (<= posn len)
303 (multiple-value-bind (digits end)
304 (parse-integer string
305 :start posn
306 :junk-allowed t)
307 (push digits version)
308 (setf posn (1+ end)))))
309 (apply #'texinfo-version-number (nreverse version)))))
311 (defun get-texinfo-version (file)
312 "Get the texinfo version from FILE"
313 (let ((version-line
314 (with-open-file (f file :direction :input)
315 ;; Texinfo writes a comment line containing the version number of
316 ;; the texinfo used to build the html file. It's at the
317 ;; beginnning so search just the first 5 lines or so.
318 (loop for count from 1 to 5
319 for line = (read-line f nil) then (read-line f nil)
320 when (and line (search "Created by GNU Texinfo" line))
321 return line))))
322 (unless version-line
323 (warn "Could not find GNU Texinfo version in ~S~%" file)
324 (return-from get-texinfo-version))
325 (setf *texinfo-version-string*
326 (second (pregexp:pregexp-match "GNU Texinfo \(.*?\)," version-line)))
327 (setf *texinfo-version*
328 (parse-texinfo-version *texinfo-version-string*))))
330 (defun find-toc-file (dir)
331 ;; List of possible filenames that contain the table of contents.
332 ;; The first is used by texinfo 6.8 and later. The second is used
333 ;; by texinfo 5.1. Return the first one found.
334 (dolist (toc '("index.html" "maxima_0.html"))
335 (let ((toc-path (merge-pathnames toc dir)))
336 (when (probe-file toc-path)
337 (return-from find-toc-file toc-path)))))
339 (defun build-html-index (dir lang truenamep)
340 (clrhash *html-index*)
341 (let ((index-file (find-index-file dir lang)))
342 (unless index-file
343 (error "Could not find HTML file containing the function and variable index."))
345 (with-open-file (*log-file* "build-html-index.log"
346 :direction :output :if-exists :supersede)
347 (let ((toc-path (find-toc-file dir)))
348 (get-texinfo-version toc-path)
349 (format t "Texinfo Version ~A: ~D~%" *texinfo-version-string* *texinfo-version*)
350 (process-one-html-file index-file #'match-entries t "Add" truenamep)
351 (process-one-html-file toc-path #'match-toc nil "TOC" truenamep)
352 (handle-special-cases)))))
354 ;; Run this to build a hash table from the topic to the HTML file
355 ;; containing the documentation. It is written to the file given by
356 ;; OUTPUT_FILE. The output can then subsequently be read back in to
357 ;; update Maxima's database of available HTML documentation. However,
358 ;; fot this to work Maxima must have also have the updated index to
359 ;; the info files for the documentation.
360 (defmfun $build_and_dump_html_index (dir &key
361 (output_file "maxima-index-html.lisp")
362 (lang "")
363 (truenamep nil))
364 "Creates a file that contains data that maxima can use to produce
365 HTML documentation. The parameters are:
368 Pathname to where the html files are. This is usually a wildcard
369 pathname of the form \"<path>/*.html\".
370 :OUTPUT-FILE
371 Specifies the name of the file where the data is written.
372 Defaults to \"maxima-index-html.lisp\".
373 :LANG
374 Specifies the language to use. Defaults to \"\" for English.
375 This is used primarly when building Maxima's user manual to
376 determine the name of file containing the function and variable
377 index.
378 :TRUENAMEP
379 If non-NIL, the data will use the full pathname to the html
380 files. Defaults to NIL. Otherwise, the data will be a relative
381 path. This MUST be set to NIL when building Maxima's user
382 manual.
384 (build-html-index dir lang truenamep)
385 (let (entries)
386 (maphash #'(lambda (k v)
387 (push (list k (namestring (car v)) (cdr v)) entries))
388 *html-index*)
389 (with-open-file (s output_file
390 :direction :output
391 :if-exists :supersede)
392 (with-standard-io-syntax
393 ;; Set up printer settings to print the output the way we want.
395 ;; *package* set to :cl-info so that the symbols aren't
396 ;; preceded by the cl-info package marker.
398 ;; *print-length* is NIL because the list of entries is very
399 ;; long.
401 ;; *print-case* is :downcase just to make it look more
402 ;; natural; not really needed.
404 ;; *print-readably* is nil so base-strings and strings can be
405 ;; printed without any kind of special syntax for base-strings
406 ;; for lisps that distinguish between strings and
407 ;; base-strings.
408 (let ((*package* (find-package :cl-info))
409 (*print-length* nil)
410 (*print-case* :downcase)
411 (*print-readably* nil))
412 (format s ";;; Do not edit; automatically generated via build-html-index.lisp~2%")
413 (pprint '(in-package :cl-info)
416 (pprint `(let ((cl-info::html-index ',entries))
417 (cl-info::load-html-index cl-info::html-index))
418 s))))
419 (format t "HTML index has ~D entries~%" (hash-table-count *html-index*))
420 ;; Hash table can't be empty unless we screwed up somewhere.
421 (assert (plusp (hash-table-count *html-index*)))
422 (plusp (hash-table-count *html-index*))))
424 ;;(build-and-dump-html-index "./*.html")