Added classes and code for annotations
[parse-docstrings.git] / syntax-sbcl.lisp
blobdc7b7ba8625ed9ad03fcdba452c16fa06700d1ad
1 ;;; -*- lisp -*-
3 ;;;; A docstring parser implementing syntax used for the sbcl manual.
5 ;;;; This software was originally part of the SBCL software system.
6 ;;;; SBCL is in the public domain and is provided with absolutely no warranty.
7 ;;;; See the COPYING file for more information.
8 ;;;;
9 ;;;; Written by Rudi Schlatte <rudi@constantly.at>, mangled
10 ;;;; by Nikodemus Siivola, Luis Oliveira, David Lichteblau, and others.
12 ;;;; TODO
13 ;;;; * Verbatim text
14 ;;;; * Quotations
15 ;;;; * This is getting complicated enough that tests would be good
16 ;;;; * Nesting (currently only nested itemizations work)
18 ;;;; Formatting heuristics (tweaked to format SAVE-LISP-AND-DIE sanely):
19 ;;;;
20 ;;;; Formats SYMBOL as @code{symbol}, or @var{symbol} if symbol is in
21 ;;;; the argument list of the defun / defmacro.
22 ;;;;
23 ;;;; Lines starting with * or - that are followed by intented lines
24 ;;;; are marked up with @itemize.
25 ;;;;
26 ;;;; Lines containing only a SYMBOL that are followed by indented
27 ;;;; lines are marked up as @table @code, with the SYMBOL as the item.
29 (in-package #:parse-docstrings.sbcl)
32 ;;;; utilities
34 (defun string-lines (string)
35 "Lines in STRING as a vector."
36 (coerce (with-input-from-string (s string)
37 (loop for line = (read-line s nil nil)
38 while line collect line))
39 'vector))
41 (defun whitespacep (char)
42 (find char #(#\tab #\space #\page)))
44 (defun indentation (line)
45 "Position of first non-SPACE character in LINE."
46 (position-if-not (lambda (c) (char= c #\Space)) line))
48 (defun flatten (list)
49 (cond ((null list)
50 nil)
51 ((consp (car list))
52 (nconc (flatten (car list)) (flatten (cdr list))))
53 ((null (cdr list))
54 (cons (car list) nil))
56 (cons (car list) (flatten (cdr list))))))
58 (defun flatten-to-string (list)
59 (format nil "~{~A~^~%~}" (flatten list)))
62 ;;; line markups
64 (defparameter *itemize-start-characters* '(#\* #\-)
65 "Characters that might start an itemization in docstrings when
66 at the start of a line.")
68 (defparameter *symbol-characters* "ABCDEFGHIJKLMNOPQRSTUVWXYZ*:-+&"
69 "List of characters that make up symbols in a docstring.")
71 (defparameter *symbol-delimiters* " ,.!?;
74 (defun locate-symbols (line)
75 "Return a list of index pairs of symbol-like parts of LINE."
76 ;; This would be a good application for a regex ...
77 (do ((result nil)
78 (begin nil)
79 (maybe-begin t)
80 (i 0 (1+ i)))
81 ((= i (length line))
82 ;; symbol at end of line
83 (when (and begin (or (> i (1+ begin))
84 (not (member (char line begin) '(#\A #\I)))))
85 (push (list begin i) result))
86 (nreverse result))
87 (cond
88 ((and begin (find (char line i) *symbol-delimiters*))
89 ;; symbol end; remember it if it's not "A" or "I"
90 (when (or (> i (1+ begin)) (not (member (char line begin) '(#\A #\I))))
91 (push (list begin i) result))
92 (setf begin nil
93 maybe-begin t))
94 ((and begin (not (find (char line i) *symbol-characters*)))
95 ;; Not a symbol: abort
96 (setf begin nil))
97 ((and maybe-begin (not begin) (find (char line i) *symbol-characters*))
98 ;; potential symbol begin at this position
99 (setf begin i
100 maybe-begin nil))
101 ((find (char line i) *symbol-delimiters*)
102 ;; potential symbol begin after this position
103 (setf maybe-begin t))
105 ;; Not reading a symbol, not at potential start of symbol
106 (setf maybe-begin nil)))))
108 ;;; lisp sections
110 (defun lisp-section-p (line line-number lines)
111 "Returns T if the given LINE looks like start of lisp code --
112 ie. if it starts with whitespace followed by a paren or
113 semicolon, and the previous line is empty"
114 (let ((offset (indentation line)))
115 (and offset
116 (plusp offset)
117 (find (find-if-not #'whitespacep line) "(;")
118 (empty-p (1- line-number) lines))))
120 (defun collect-lisp-section (lines line-number)
121 (let ((lisp (loop for index = line-number then (1+ index)
122 for line = (and (< index (length lines))
123 (svref lines index))
124 while (indentation line)
125 collect line)))
126 (values (length lisp) `("@lisp" ,@lisp "@end lisp"))))
128 ;;; itemized sections
130 (defun maybe-itemize-offset (line)
131 "Return NIL or the indentation offset if LINE looks like it starts
132 an item in an itemization."
133 (let* ((offset (indentation line))
134 (char (when offset (char line offset))))
135 (and offset
136 (member char *itemize-start-characters* :test #'char=)
137 (char= #\Space (find-if-not (lambda (c) (char= c char))
138 line :start offset))
139 offset)))
141 (defun collect-maybe-itemized-section (lines starting-line)
142 ;; Return index of next line to be processed outside
143 (let ((this-offset (maybe-itemize-offset (svref lines starting-line)))
144 (result nil)
145 (lines-consumed 0))
146 (loop for line-number from starting-line below (length lines)
147 for line = (svref lines line-number)
148 for indentation = (indentation line)
149 for offset = (maybe-itemize-offset line)
150 do (cond
151 ((not indentation)
152 ;; empty line -- inserts paragraph.
153 (push "" result)
154 (incf lines-consumed))
155 ((and offset (> indentation this-offset))
156 ;; nested itemization -- handle recursively
157 ;; FIXME: tabulations in itemizations go wrong
158 (multiple-value-bind (sub-lines-consumed sub-itemization)
159 (collect-maybe-itemized-section lines line-number)
160 (when sub-lines-consumed
161 (incf line-number (1- sub-lines-consumed)) ; +1 on next loop
162 (incf lines-consumed sub-lines-consumed)
163 (setf result (nconc (nreverse sub-itemization) result)))))
164 ((and offset (= indentation this-offset))
165 ;; start of new item
166 (push (format nil "@item ~A"
167 (texinfo-line (subseq line (1+ offset))))
168 result)
169 (incf lines-consumed))
170 ((and (not offset) (> indentation this-offset))
171 ;; continued item from previous line
172 (push (texinfo-line line) result)
173 (incf lines-consumed))
175 ;; end of itemization
176 (loop-finish))))
177 ;; a single-line itemization isn't.
178 (if (> (count-if (lambda (line) (> (length line) 0)) result) 1)
179 (values lines-consumed `("@itemize" ,@(reverse result) "@end itemize"))
180 nil)))
182 ;;; tabulation sections
184 (defun tabulation-body-p (offset line-number lines)
185 (when (< -1 line-number (length lines))
186 (let ((offset2 (indentation (svref lines line-number))))
187 (and offset2 (< offset offset2)))))
189 (defun tabulation-p (offset line-number lines direction)
190 (let ((step (ecase direction
191 (:backwards (1- line-number))
192 (:forwards (1+ line-number)))))
193 (when (and (plusp line-number) (< line-number (length lines)))
194 (and (eql offset (indentation (svref lines line-number)))
195 (or (when (eq direction :backwards)
196 (empty-p step lines))
197 (tabulation-p offset step lines direction)
198 (tabulation-body-p offset step lines))))))
200 (defun empty-p (line-number lines)
201 (or (eql -1 line-number)
202 (and (< line-number (length lines))
203 (not (indentation (svref lines line-number))))))
205 (defun maybe-tabulation-offset (line-number lines)
206 "Return NIL or the indentation offset if LINE looks like it starts
207 an item in a tabulation. Ie, if it is (1) indented, (2) preceded by an
208 empty line, another tabulation label, or a tabulation body, (3) and
209 followed another tabulation label or a tabulation body."
210 (let* ((line (svref lines line-number))
211 (offset (indentation line))
212 (prev (1- line-number))
213 (next (1+ line-number)))
214 (when (and offset (plusp offset))
215 (and (or (empty-p prev lines)
216 (tabulation-body-p offset prev lines)
217 (tabulation-p offset prev lines :backwards))
218 (or (tabulation-body-p offset next lines)
219 (tabulation-p offset next lines :forwards))
220 offset))))
222 ;;; section markup
224 (defmacro with-maybe-section (index &rest forms)
225 `(multiple-value-bind (count collected) (progn ,@forms)
226 (when count
227 (dolist (line collected)
228 (write-line line *document-output*))
229 (incf ,index (1- count)))))
231 (defmacro maybe-section (index &body forms)
232 `(multiple-value-bind (count section) (progn ,@forms)
233 (when count
234 (push section parsed)
235 (incf ,index (1- count)))))
237 (defun parse-docstring (docstring)
238 (let ((lines (string-lines docstring))
239 (parsed nil)
240 (current nil))
241 (labels ((end-paragraph ()
242 (when current
243 (push (make-instance 'parse-docstrings:paragraph
244 :string (flatten-to-string (reverse current)))
245 parsed)
246 (setf current nil)))
247 (add-line (line)
248 (let ((trimmed (string-trim '(#\space #\tab) line)))
249 (if (plusp (length trimmed))
250 (push trimmed current)
251 (end-paragraph)))))
252 (loop for line-number from 0 below (length lines)
253 for line = (svref lines line-number)
254 do (cond
255 ((maybe-section line-number
256 (and (lisp-section-p line line-number lines)
257 (parse-lisp-block lines line-number))))
258 ((maybe-section line-number
259 (and (maybe-itemize-offset line)
260 (parse-maybe-itemization lines line-number))))
261 ((maybe-section line-number
262 (and (maybe-tabulation-offset line-number lines)
263 (parse-maybe-tabulation lines line-number))))
265 (add-line line))))
266 (end-paragraph)
267 (if parsed
268 (section (reverse parsed))
269 (error "empty parse? ~S" docstring)))))
271 (defun parse-lisp-block (lines line-number)
272 (let ((lisp (loop for index = line-number then (1+ index)
273 for line = (and (< index (length lines)) (svref lines index))
274 while (indentation line)
275 collect line)))
276 (values (length lisp) (make-instance 'parse-docstrings:lisp-block
277 :string (flatten-to-string lisp)))))
279 (defun section (paragraphs)
280 (if (cdr paragraphs)
281 (make-instance 'parse-docstrings:section :blocks paragraphs)
282 (car paragraphs)))
284 (defun parse-maybe-itemization (lines starting-line)
285 ;; Return index of next line to be processed outside
286 (let ((this-offset (maybe-itemize-offset (svref lines starting-line)))
287 (items nil)
288 (paragraphs nil)
289 (current nil)
290 (lines-consumed 0))
291 (labels ((end-paragraph ()
292 (when current
293 (push (make-instance 'parse-docstrings:paragraph
294 :string (flatten-to-string (reverse current)))
295 paragraphs)
296 (setf current nil)))
297 (end-item ()
298 (end-paragraph)
299 (when paragraphs
300 (push (section (reverse paragraphs)) items)
301 (setf paragraphs nil)))
302 (add-line (line)
303 (push (string-trim '(#\space #\tab) line) current)))
304 (loop for line-number from starting-line below (length lines)
305 for line = (svref lines line-number)
306 for indentation = (indentation line)
307 for offset = (maybe-itemize-offset line)
308 do (cond
309 ((not indentation)
310 ;; empty line, a paragraph break
311 (end-paragraph)
312 (incf lines-consumed))
313 ((and offset (> indentation this-offset))
314 (end-item)
315 ;; nested itemization -- handle recursively FIXME:
316 ;; tabulations in itemizations go wrong
317 (multiple-value-bind (sub-lines-consumed sublist)
318 (parse-maybe-itemization lines line-number)
319 (when sub-lines-consumed
320 (incf line-number (1- sub-lines-consumed)) ; +1 on next loop
321 (incf lines-consumed sub-lines-consumed)
322 (push sublist items))))
323 ((and offset (= indentation this-offset))
324 ;; start of new item
325 (end-item)
326 (add-line (subseq line (1+ offset)))
327 (incf lines-consumed))
328 ((and (not offset) (> indentation this-offset))
329 ;; continued paragraph from previous line
330 (add-line line)
331 (incf lines-consumed))
333 ;; end of itemization
334 (loop-finish))))
335 (end-item))
336 ;; a single-item itemization isn't.
337 (if (> (length items) 1)
338 (values lines-consumed (make-instance 'parse-docstrings:itemization
339 :items (nreverse items)))
340 (values nil nil))))
343 (defun parse-maybe-tabulation (lines starting-line)
344 ;; Return index of next line to be processed outside
345 (let ((this-offset (maybe-tabulation-offset starting-line lines))
346 (items nil)
347 (paragraphs nil)
348 (title nil)
349 (current nil)
350 (lines-consumed 0))
351 (labels ((end-paragraph ()
352 (when current
353 (push (make-instance 'parse-docstrings:paragraph
354 :string (flatten-to-string (reverse current)))
355 paragraphs)
356 (setf current nil)))
357 (end-item ()
358 (end-paragraph)
359 (when paragraphs
360 (push (make-instance 'parse-docstrings:tabulation-item
361 :title title
362 :body (section (reverse paragraphs)))
363 items)
364 (setf paragraphs nil
365 title nil)))
366 (set-title (line)
367 (setf title (string-trim '(#\space #\tab) line)))
368 (add-line (line)
369 (push (string-trim '(#\space #\tab) line) current)))
370 (assert this-offset)
371 (loop for line-number from starting-line below (length lines)
372 for line = (svref lines line-number)
373 for indentation = (indentation line)
374 for offset = (maybe-tabulation-offset line-number lines)
375 do (cond
376 ((not indentation)
377 (end-paragraph)
378 (incf lines-consumed))
379 ((and offset (= indentation this-offset))
380 ;; start of new item, or continuation of previous item
381 (cond ((and title (not (maybe-tabulation-offset line-number lines)))
382 (add-line line))
384 (end-item)
385 (set-title line)))
386 (incf lines-consumed))
387 ((> indentation this-offset)
388 ;; continued item from previous line
389 (add-line line)
390 (incf lines-consumed))
392 ;; end of itemization
393 (loop-finish))))
394 (end-item))
395 (values lines-consumed
396 (make-instance 'parse-docstrings:tabulation
397 :items (reverse items)))))