1 ;;; -*- Mode: Lisp; Package: Maxima; Syntax: Common-Lisp; Base: 10 -*- ;;;;
2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4 ;;; Build an index of all the relevant links in the HTML version of
5 ;;; the manual so that it can be used via help instead of the text
8 ;;; Copyright (C) 2023 Raymond Toy
9 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13 (defvar *texinfo-version-string
* nil
14 "The texinfo version string used to generate the HTML files.")
16 (defvar *texinfo-version
* nil
17 "The texinfo version arranged as an integer. Version 7.0.3 is
18 represented as 70003.")
21 (make-hash-table :test
#'equal
)
22 "Hash table for looking up which html file contains the
23 documentation. The key is the topic we're looking for and the value
24 is the html file containing the documentation for the topic.")
26 (defvar *log-file
* nil
27 "Log file containing info for each entry that is added to the index
30 (defun texinfo-version-number (major minor
&optional
(patch 0))
31 "Convert the major, minor, and patch to an integer."
36 (let ((maxima_nnn-pattern (pregexp:pregexp
"^maxima_[0-9][0-9]*$")))
37 (defun maxima_nnn-p (f)
38 "Determine if F is a pathname-name that looks like
39 \"maxima_<digits>\". Return non-NIL if so."
40 (pregexp:pregexp-match maxima_nnn-pattern f
)))
42 (defun grep-l (content-pattern f-path
)
43 "Similar to grep -l: returns F-PATH if file contains CONTENT-PATTERN, otherwise NIL."
44 (with-open-file (s f-path
)
45 (loop for line
= (read-line s nil
)
47 do
(when (pregexp:pregexp-match content-pattern line
)
50 (defun add-entry (item item-id file line
&key replace-dash-p prefix
)
51 "Add entry to the html hash table. ITEM is the key, FILE is the html
52 file where this item is found, ITEM-ID is the html id of this item.
53 LINE is the line where this was found and is used for information if
54 a duplicate key was found."
56 ;; Replace any special chars that texinfo has encoded.
57 (setf item
(handle-special-chars item
))
61 ;; Replace "-" with a space (if requested). The HTML output
62 ;; puts "-" where spaces were originally in the texi name.
63 ;; But this isn't perfect. See HANDLE-SPECIAL-CASES where we
64 ;; shouldn't have done this.
65 (setf item
(pregexp:pregexp-replace
* "-([^[:digit:]])" item
" \\1")))
67 ;; Now replace things like "ztics-1" with "ztics <1>". The
68 ;; former is the HTML index item for the info item with the
70 (setf item
(pregexp:pregexp-replace
"-([[:digit:]])" item
" <\\1>")))
72 ;; Check if the entry already exists and print a message. This
73 ;; shouldn't happen, so print a message if it does.
74 (when (gethash item
*html-index
*)
75 (format t
"Already added entry ~S ~S: ~S~%"
76 item
(gethash item
*html-index
*)
78 (format *log-file
* "~A: ~S -> ~S ~S~%"
79 prefix item file item-id
)
81 (setf (gethash item
*html-index
*)
84 (defun process-line (line matcher
&key replace-dash-p
(prefix "Add:"))
85 "Process the LINE using the function MATCHER to determine if this line
86 contains something interesting to add to the index. REPLACE-DASH-P
87 and PREFIX are passed to ADD-ENTRY."
88 (multiple-value-bind (item item-id file line
)
89 (funcall matcher line
)
91 (add-entry item item-id file line
92 :replace-dash-p replace-dash-p
95 (defun process-one-html-file (file matcher replace-dash-p prefix
)
96 "Process one html file named FILE using MATCHER to determine matches.
97 REPLACE-DASH-P and PREFIX are passed to PROCESS-LINE which will
99 (format *debug-io
* "build-html-index: processing: ~S~%" file
)
100 (with-open-file (s file
:direction
:input
)
101 (loop for line
= (read-line s nil
)
104 (process-line line matcher
105 :replace-dash-p replace-dash-p
108 (defun handle-special-cases ()
109 "These HTML topics need special handling because we didn't quite
110 process them correctly previously because we accidentally changed
112 (flet ((update-entry (old new
)
113 ;; Set the new index entry with the value of the old index
114 ;; entry and remove the old entry from the index.
115 (when (gethash old
*html-index
*)
116 (setf (gethash new
*html-index
*)
117 (gethash old
*html-index
*))
118 (remhash old
*html-index
*))))
119 ;; List of special cases. The car of each item is the index entry
120 ;; that should not have had the hyphen replaced. The cdr is what
122 (dolist (items '(("Quote quote operator" "Quote-quote operator")
123 ("Assignment operator (evaluates left hand side)"
124 "Assignment operator (evaluates left-hand side)")
125 ("Euler Mascheroni constant"
126 "Euler-Mascheroni constant")
127 ("maxima init.lisp" "maxima-init.lisp")
128 ("maxima init.mac" "maxima-init.mac")))
129 (destructuring-bind (old new
)
131 (update-entry old new
)))
133 ;; This is messy. Texinfo 6.8 uses plain apostrophes in the info
134 ;; file. But with texinfo 7.0.[23], some entries in HTML use an
135 ;; apostrophe (U+27) character, but the info file uses
136 ;; Right_Single_Quotation_Mark (U+2019). And apparently, the
137 ;; Texinfo 7.1 will not.
139 ;; Convert these only for the cases we know this is a problem.
141 ;; This current implementation will very likely not work with gcl
142 ;; only supports 8-bit characters.
143 (when *texinfo-version
*
144 (cond ((>= *texinfo-version
* (texinfo-version-number 7 1))
145 ;; Don't need anything special for texinfo 7.1 and greater.
147 ((>= *texinfo-version
* (texinfo-version-number 7 0 2))
148 (dolist (item '("Catalan's Constant"
150 "Introduction to Maxima's Database"))
152 (pregexp::pregexp-replace
* "'" item
(string (code-char #x2019
))))))))))
154 ;; Find entries from the function and variable index. An example of
155 ;; what we're looking for:
157 ;; <a href="Elementary-Functions.html#index-asinh">
159 ;; We extract the file name, "Elementary-Functions.html", and the
160 ;; stuff starting with "#index-". This gives the item-id,
161 ;; "index-asinh" that we want to use for the link (without the "#"),
162 ;; and the stuff after "#index-", "asinh" is the id topic we need.
163 (let ((href (pregexp:pregexp
"<a href=\"([[:alnum:]_-]+\.html)#(index-([^\"]*))\">")))
164 (defun match-entries (line)
165 (let ((match (pregexp:pregexp-match href line
)))
167 (destructuring-bind (whole file item-id id
)
169 (declare (ignore whole
))
170 (values id item-id file line
))))))
172 ;; Find entries from the TOC. An example of what we're looking for:
174 ;; <a id="toc-Bessel-Functions-1" href="Special-Functions.html#Bessel-Functions">15.2 Bessel Functions</a></li>
176 ;; We extract the file name, "Special-Functions.html" and the id,
177 ;; "Bessel-Functions" and then the title of the subsection, without
178 ;; the section numbers, "Bessel Functions".
179 ;; Further subsections are ignored.
180 (let ((regexp (pregexp:pregexp
"<a id=\"toc-.*\" href=\"([^#\"]+)(#([^\"]+))\">[[:digit:]]+\.[[:digit:]]+ ([^\"]+?)</a>")))
181 (defun match-toc (line)
182 (let ((match (pregexp:pregexp-match regexp line
)))
184 (destructuring-bind (whole file item
# id item
)
186 (declare (ignore whole item
#))
187 (when (find #\
& item
:test
#'char
=)
188 ;; Replace HTML entities with the corresponding char.
190 ;; https://lilith.fisica.ufmg.br/~wag/TRANSF/codehtml.html
192 ;; Perhaps we should use the language to reduce the number
193 ;; of calls to pregexp?
194 (dolist (replacement '(("’" #x27
) ; Right single-quote -> apostrophe
195 ;; From the de manual
199 ;; From the pt manual
203 ;; From the pt_BR manual
207 (destructuring-bind (html-entity char-code
)
209 (setf item
(pregexp:pregexp-replace
* html-entity
211 (string (code-char char-code
)))))))
213 (format *log-file
* "TOC: ~S -> ~S~%" item file
)
215 (values item id file line
))))))
217 (defparameter *index-file-name
*
218 (make-hash-table :test
'equal
)
219 "Hash table whose key is the lang and whose value is a list of the
220 file name of function and variable index and the title of the
223 ;; Setup the hashtable. The default (English) is the empty string.
225 '(("" "Function-and-Variable-Index.html" "Function and Variable Index")
226 ("de" "Index-der-Variablen-und-Funktionen.html" "Index der Variablen und Funktionen")
227 ("pt" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")
228 ("es" "Indice-de-Funciones-y-Variables.html" "Índice de Funciones y Variables")
229 ("pt_BR" "Indice-de-Funcoes-e-Variaveis.html" "Índice de Funções e Variáveis")))
230 (destructuring-bind (key &rest value
)
232 (setf (gethash key
*index-file-name
*) value
)))
234 (defun get-index-file-name (lang)
235 (first (gethash lang
*index-file-name
*)))
237 (defun get-index-title (lang)
238 (second (gethash lang
*index-file-name
*)))
240 (defun find-index-file (dir lang
)
241 "Find the name of HTML file containing the function and variable
243 (unless (gethash lang
*index-file-name
*)
244 (merror "Unknown or unsupported language for HTML help: ~A" lang
))
245 (let ((f-a-v-i (merge-pathnames (get-index-file-name lang
)
247 (when (probe-file f-a-v-i
)
248 (return-from find-index-file f-a-v-i
)))
249 (let ((files (directory dir
)))
250 ;; Keep just the files of the form "maxima_nnn", where "nnn" are
251 ;; digits. These are the only files that can contain the function
252 ;; and variable index that we want.
253 (setf files
(remove-if-not
254 #'(lambda (path-name)
255 (maxima_nnn-p path-name
))
257 :key
#'pathname-name
))
259 ;; Now sort them in numerical order.
263 (let ((name (pathname-name p
)))
264 ;; Everything else is the number in the
265 ;; file name, which starts with 1.
266 (if (> (length name
) 7)
267 (parse-integer (subseq name
7))
270 ;; Check if the last 2 files to see if one of them contains the
271 ;; function and variable index we want. Return the first one that
273 (let* ((title (get-index-title lang
))
274 (search-item (format nil
"<title>.*~A" title
)))
275 (format t
"Looking for function and variable index: ~A~%" title
)
276 (dolist (file (last files
2))
277 (when (grep-l search-item file
)
278 (format t
"Function index: ~S.~%"
280 (return-from find-index-file file
))))))
282 ;; Parse the texinfo version string. It should look something like
283 ;; "M.m.p", where M and m are a sequence of (base 10) digits and ".p"
284 ;; is the optional patch version.
285 (defun parse-texinfo-version (string)
288 (len (length string
))
292 (multiple-value-bind (digits end
)
293 (parse-integer string
296 (push digits version
)
297 (setf posn
(1+ end
)))))
298 (apply #'texinfo-version-number
(nreverse version
)))))
300 (defun get-texinfo-version (file)
301 "Get the texinfo version from FILE"
303 (with-open-file (f file
:direction
:input
)
304 ;; Texinfo writes a comment line containing the version number of
305 ;; the texinfo used to build the html file. It's at the
306 ;; beginnning so search just the first 5 lines or so.
307 (loop for count from
1 to
5
308 for line
= (read-line f nil
) then
(read-line f nil
)
309 when
(and line
(search "Created by GNU Texinfo" line
))
312 (warn "Could not find GNU Texinfo version in ~S~%" file
)
313 (return-from get-texinfo-version
))
314 (setf *texinfo-version-string
*
315 (second (pregexp:pregexp-match
"GNU Texinfo \(.*?\)," version-line
)))
316 (setf *texinfo-version
*
317 (parse-texinfo-version *texinfo-version-string
*))))
319 (defun find-toc-file (dir)
320 ;; List of possible filenames that contain the table of contents.
321 ;; The first is used by texinfo 6.8 and later. The second is used
322 ;; by texinfo 5.1. Return the first one found.
323 (dolist (toc '("index.html" "maxima_0.html"))
324 (let ((toc-path (merge-pathnames toc dir
)))
325 (when (probe-file toc-path
)
326 (return-from find-toc-file toc-path
)))))
328 (defun build-html-index (dir lang
)
329 (clrhash *html-index
*)
330 (let ((index-file (find-index-file dir lang
)))
332 (error "Could not find HTML file containing the function and variable index."))
334 (with-open-file (*log-file
* "build-html-index.log"
335 :direction
:output
:if-exists
:supersede
)
336 (let ((toc-path (find-toc-file dir
)))
337 (get-texinfo-version toc-path
)
338 (format t
"Texinfo Version ~A: ~D~%" *texinfo-version-string
* *texinfo-version
*)
339 (process-one-html-file index-file
#'match-entries t
"Add")
340 (process-one-html-file toc-path
#'match-toc nil
"TOC")
341 (handle-special-cases)))))
343 ;; Run this to build a hash table from the topic to the HTML file
344 ;; containing the documentation. The single argument DIR should be a
345 ;; directory that contains the html files to be searched for the
346 ;; topics. For example it can be "<maxima-dir>/doc/info/*.html". The
347 ;; LANG arg specifies the language to use. For English, either leave
348 ;; the argument out, or use "".
349 (defmfun $build_and_dump_html_index
(dir &optional
(lang ""))
350 (build-html-index dir lang
)
352 (maphash #'(lambda (k v
)
353 (push (list k
(namestring (car v
)) (cdr v
)) entries
))
355 (with-open-file (s "maxima-index-html.lisp"
357 :if-exists
:supersede
)
358 (with-standard-io-syntax
359 ;; Set up printer settings to print the output the way we want.
361 ;; *package* set to :cl-info so that the symbols aren't
362 ;; preceded by the cl-info package marker.
364 ;; *print-length* is NIL because the list of entries is very
367 ;; *print-case* is :downcase just to make it look more
368 ;; natural; not really needed.
370 ;; *print-readably* is nil so base-strings and strings can be
371 ;; printed without any kind of special syntax for base-strings
372 ;; for lisps that distinguish between strings and
374 (let ((*package
* (find-package :cl-info
))
376 (*print-case
* :downcase
)
377 (*print-readably
* nil
))
378 (format s
";;; Do not edit; automatically generated via build-html-index.lisp~2%")
379 (pprint '(in-package :cl-info
)
382 (pprint `(let ((cl-info::html-index
',entries
))
383 (cl-info::load-html-index cl-info
::html-index
))
385 (format t
"HTML index has ~D entries~%" (hash-table-count *html-index
*))
386 ;; Hash table can't be empty unless we screwed up somewhere.
387 (assert (plusp (hash-table-count *html-index
*)))
388 (plusp (hash-table-count *html-index
*))))
390 ;;(build-and-dump-html-index "./*.html")