Add mathjax for dlange
[maxima.git] / src / verify-html-index.lisp
blobca54df6c05bebb45acc4f0bd8f4f76bc12767efb
1 ;;; -*- Mode: Lisp; Package: Maxima; Syntax: Common-Lisp; Base: 10 -*- ;;;;
2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3 ;;;
4 ;;; Verification of HTML index entries.
5 ;;;
6 ;;; Copyright (C) 2023 Raymond Toy
7 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9 (in-package :maxima)
12 ;;;; Verification of the HTML index. Use command-line option
13 ;;;; --verify-html-index to perform this verification.
15 (defvar *text-topics*
16 (make-hash-table :test #'equal)
17 "All available text topics")
19 (defvar *html-topics* nil
20 "All available html topics")
22 (defvar *extra-html-entries* nil
23 "Full list of all the HTML entries that are not in the text entries.
24 Ideally should be empty.")
26 (defvar *missing-html-entries* nil
27 "Full list of all the text entries that are not the the HTML entries.
28 Ideally should be empty.")
30 ;; NOTE: This function should only be called by build-html-index.lisp
31 ;; to build the html index. build-html-index.lisp isn't compiled and
32 ;; this function might be slow without being compiled so leave it here
33 ;; so that it's compiled.
35 ;; This might be rather slow. Perhaps an alternative solution is to
36 ;; leave these alone and have $hdescribe encode any special characters
37 ;; before looking them up. Since "?" is only used occasionally, we
38 ;; don't incur the cost here and move it to "?" where the impact is
39 ;; lower.
41 ;; However, a test run where this function was removed made virtually
42 ;; no difference in runtime (with cmucl). (31.97 sec with and 31.62
43 ;; sec without; well within timing noise probably.) Note, however,
44 ;; that this file is not normally compiled before running, but earlier
45 ;; tests showed that compiling didn't make much difference either. I
46 ;; think this is because most of the cost is in pregexp, which is
47 ;; compiled.
49 ;; spec-chars-string are the special characters that texinfo converted
50 ;; to lower-case hex digits representing the char-code of the
51 ;; character.
52 (let* ((spec-chars-string "_.%$?,<>#=:;*-^+/'()[]!@|`~\\")
53 (regexp-quoted (map 'list
54 #'(lambda (c)
55 ;; Bug in pregexp? If we pregexp-quote
56 ;; "$" (to "\\$"), when we try to
57 ;; replace the match with "\\$", we end
58 ;; up with an empty string. So don't
59 ;; quote this character.
60 (if (char= c #\$)
61 (string c)
62 (pregexp:pregexp-quote (string c))))
63 spec-chars-string))
64 (codes (map 'list #'(lambda (spec-char)
65 (pregexp:pregexp-quote
66 (string-downcase
67 (format nil "_~4,'0x" (char-code spec-char)))))
68 spec-chars-string)))
69 (defun handle-special-chars (item)
70 "Handle special encoded characters in HTML file. Texinfo encodes
71 special characters to hexadecimal form and this needs to be undone
72 so we know what the actual character is when looking up the
73 documentation."
74 (loop for code in codes
75 and replacement in regexp-quoted
76 ;; Exit early if there are not "_" characters left in the topic
77 while (find #\_ item :test #'char=)
78 do (setf item
79 (pregexp:pregexp-replace* code item
80 replacement)))
81 item))
83 (defun get-html-topics ()
84 ;; Find all the HTML entries and place in a list.
85 #+nil
86 (format t "Get html topics: table size ~D~%" (hash-table-count cl-info::*html-index*))
88 (setf *html-topics*
89 (loop for topic being the hash-keys of cl-info::*html-index*
90 collect topic))
91 #+nil
92 (format t "html topic length ~D~%" (length *html-topics*)))
94 (defun get-text-topics ()
95 ;; Find all the text entries and place in a list.
96 (clrhash *text-topics*)
97 (maphash #'(lambda (k v)
98 (declare (ignore k))
99 (dolist (table v)
100 (loop for topic being the hash-keys of table
101 ;; We don't care what the value is.
102 do (setf (gethash topic *text-topics*) t))))
103 cl-info::*info-tables*))
105 (defun verify-html-index ()
106 ;; Make sure the hash table has the correct test! This is important.
107 (unless (eql (hash-table-test cl-info::*html-index*) #-clisp 'equal
108 #+clisp 'ext:fasthash-equal)
109 (mwarning (format nil
110 (intl:gettext "HTML index hash table test must be `equal not ~S~%")
111 (hash-table-test cl-info::*html-index*))))
112 (get-html-topics)
113 (get-text-topics)
115 ;; The text entries are the source of truth about documentation.
116 ;; Print out differences between the html entries and the text
117 ;; entries.
118 (unless (= (hash-table-count cl-info::*html-index*)
119 (hash-table-count *text-topics*))
120 (mwarning
121 (format nil
122 (intl:gettext "Number of HTML entries (~A) does not match text entries (~A)~%")
123 (hash-table-count cl-info::*html-index*)
124 (hash-table-count *text-topics*))))
126 ;; If the set of topics differs between HTML and text, print out
127 ;; the differences.
128 (setf *extra-html-entries*
129 (loop for key being the hash-keys of cl-info::*html-index*
130 unless (gethash key *text-topics*)
131 collect key))
133 (setf *missing-html-entries*
134 (loop for key being the hash-keys of *text-topics*
135 unless (gethash key cl-info::*html-index*)
136 collect key))
138 (flet
139 ((maybe-print-warning (prefix-msg diffs)
140 (let ((max-display-length 20))
141 (when diffs
142 (let* ((diff-length (length diffs))
143 (displayed-length (min diff-length max-display-length))
144 (message
145 (with-output-to-string (s)
146 (format s
147 "~D ~A:~% ~{~S~^ ~}"
148 (length diffs)
149 prefix-msg
150 (subseq diffs 0 displayed-length))
151 (when (> diff-length max-display-length)
152 (format s "... plus ~D more" (- diff-length max-display-length)))
153 (format s "~%"))))
154 (mwarning message))))))
155 (maybe-print-warning (intl:gettext "HTML entries not in text entries")
156 *extra-html-entries*)
157 (maybe-print-warning (intl:gettext "Text entries not in HTML entries")
158 *missing-html-entries*))
159 ;; Return true if there a no extra or missing entries.
160 (not (or *extra-html-entries* *missing-html-entries*)))
162 ;; Undocumented Maxima function to verify the HTML index so it can be
163 ;; easily run from the REPL. Mostly for debugging. The optional arg
164 ;; TIMEP will call TIME to show how long it took to run the
165 ;; verification. This is also for debugging in case the time seems to
166 ;; have gotten too long on startup.
167 (defmfun $verify_html_index (&optional timep)
168 (if timep
169 (time (verify-html-index))
170 (verify-html-index)))