various docstrings; release
[cxml/s11.git] / klacks / klacks-impl.lisp
blob50b136b3d4cf957e17fda516563adbb6478e4fd3
1 ;;; -*- Mode: Lisp; readtable: runes; -*-
2 ;;; (c) copyright 2007 David Lichteblau
4 ;;; This library is free software; you can redistribute it and/or
5 ;;; modify it under the terms of the GNU Library General Public
6 ;;; License as published by the Free Software Foundation; either
7 ;;; version 2 of the License, or (at your option) any later version.
8 ;;;
9 ;;; This library is distributed in the hope that it will be useful,
10 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Library General Public License for more details.
13 ;;;
14 ;;; You should have received a copy of the GNU Library General Public
15 ;;; License along with this library; if not, write to the
16 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ;;; Boston, MA 02111-1307 USA.
19 (in-package :cxml)
21 (defclass cxml-source (klacks:source)
22 (;; args to make-source
23 (context :initarg :context)
24 (validate :initarg :validate)
25 (root :initarg :root)
26 (dtd :initarg :dtd)
27 (error-culprit :initarg :error-culprit)
28 ;; current state
29 (continuation)
30 (current-key :initform nil)
31 (current-values)
32 (current-attributes)
33 (cdata-section-p :reader klacks:current-cdata-section-p)
34 ;; extra WITH-SOURCE magic
35 (data-behaviour :initform :DTD)
36 (namespace-stack :initform (list *initial-namespace-bindings*))
37 (current-namespace-declarations)
38 (temporary-streams :initform nil)
39 (scratch-pad :initarg :scratch-pad)
40 (scratch-pad-2 :initarg :scratch-pad-2)
41 (scratch-pad-3 :initarg :scratch-pad-3)
42 (scratch-pad-4 :initarg :scratch-pad-4)))
44 (defmethod klacks:close-source ((source cxml-source))
45 (dolist (xstream (slot-value source 'temporary-streams))
46 ;; fixme: error handling?
47 (close-xstream xstream)))
49 (defmacro with-source ((source &rest slots) &body body)
50 (let ((s (gensym)))
51 `(let* ((,s ,source)
52 (*ctx* (slot-value ,s 'context))
53 (*validate* (slot-value ,s 'validate))
54 (*data-behaviour* (slot-value ,s 'data-behaviour))
55 (*namespace-bindings* (car (slot-value ,s 'namespace-stack)))
56 (*scratch-pad* (slot-value ,s 'scratch-pad))
57 (*scratch-pad-2* (slot-value ,s 'scratch-pad-2))
58 (*scratch-pad-3* (slot-value ,s 'scratch-pad-3))
59 (*scratch-pad-4* (slot-value ,s 'scratch-pad-4)))
60 (handler-case
61 (with-slots (,@slots) ,s
62 ,@body)
63 (runes-encoding:encoding-error (c)
64 (wf-error (slot-value ,s 'error-culprit) "~A" c))))))
66 (defun fill-source (source)
67 (with-slots (current-key current-values continuation) source
68 (unless current-key
69 (setf current-key :bogus)
70 (setf continuation (funcall continuation))
71 (assert (not (eq current-key :bogus))))))
73 (defmethod klacks:peek ((source cxml-source))
74 (with-source (source current-key current-values)
75 (fill-source source)
76 (apply #'values current-key current-values)))
78 (defmethod klacks:peek-value ((source cxml-source))
79 (with-source (source current-key current-values)
80 (fill-source source)
81 (apply #'values current-values)))
83 (defmethod klacks:peek-next ((source cxml-source))
84 (with-source (source current-key current-values)
85 (setf current-key nil)
86 (fill-source source)
87 (apply #'values current-key current-values)))
89 (defmethod klacks:consume ((source cxml-source))
90 (with-source (source current-key current-values)
91 (fill-source source)
92 (multiple-value-prog1
93 (apply #'values current-key current-values)
94 (setf current-key nil))))
96 (defmethod klacks:map-attributes (fn (source cxml-source))
97 (dolist (a (slot-value source 'current-attributes))
98 (funcall fn
99 (sax:attribute-namespace-uri a)
100 (sax:attribute-local-name a)
101 (sax:attribute-qname a)
102 (sax:attribute-value a)
103 (sax:attribute-specified-p a))))
105 (defmethod klacks:get-attribute
106 ((source cxml-source) lname &optional uri)
107 (dolist (a (slot-value source 'current-attributes))
108 (when (and (equal (sax:attribute-local-name a) lname)
109 (equal (sax:attribute-namespace-uri a) uri))
110 (return (sax:attribute-value a)))))
112 (defmethod klacks:list-attributes ((source cxml-source))
113 (slot-value source 'current-attributes))
115 (defun make-source
116 (input &rest args
117 &key validate dtd root entity-resolver disallow-internal-subset
118 (buffering t) pathname)
119 (declare (ignore validate dtd root entity-resolver disallow-internal-subset))
120 (etypecase input
121 (xstream
122 (when (and (not buffering) (< 1 (runes::xstream-speed input)))
123 (warn "make-source called with !buffering, but xstream is buffering"))
124 (let ((*ctx* nil))
125 (let ((zstream (make-zstream :input-stack (list input))))
126 (peek-rune input)
127 (with-scratch-pads ()
128 (apply #'%make-source
129 zstream
130 (loop
131 for (name value) on args by #'cddr
132 unless (member name '(:pathname :buffering))
133 append (list name value)))))))
134 (stream
135 (let ((xstream (make-xstream input :speed (if buffering 8192 1))))
136 (setf (xstream-name xstream)
137 (make-stream-name
138 :entity-name "main document"
139 :entity-kind :main
140 :uri (safe-stream-sysid input)))
141 (apply #'make-source xstream args)))
142 (pathname
143 (let* ((xstream
144 (make-xstream (open input :element-type '(unsigned-byte 8))
145 :speed (if buffering 8192 1))))
146 (setf (xstream-name xstream)
147 (make-stream-name
148 :entity-name "main document"
149 :entity-kind :main
150 :uri (pathname-to-uri (merge-pathnames input))))
151 (let ((source (apply #'make-source
152 xstream
153 :pathname input
154 args)))
155 (push xstream (slot-value source 'temporary-streams))
156 source)))
157 (rod
158 (let ((xstream (string->xstream input)))
159 (setf (xstream-name xstream)
160 (make-stream-name
161 :entity-name "main document"
162 :entity-kind :main
163 :uri nil))
164 (apply #'make-source xstream args)))
165 (array
166 (make-source (cxml::make-octet-input-stream input)))))
168 (defun %make-source
169 (input &key validate dtd root entity-resolver disallow-internal-subset
170 error-culprit)
171 ;; check types of user-supplied arguments for better error messages:
172 (check-type validate boolean)
173 (check-type dtd (or null extid))
174 (check-type root (or null rod))
175 (check-type entity-resolver (or null function symbol))
176 (check-type disallow-internal-subset boolean)
177 (let* ((xstream (car (zstream-input-stack input)))
178 (name (xstream-name xstream))
179 (base (when name (stream-name-uri name)))
180 (context
181 (make-context :main-zstream input
182 :entity-resolver entity-resolver
183 :base-stack (list (or base ""))
184 :disallow-internal-subset disallow-internal-subset))
185 (source
186 (make-instance 'cxml-source
187 :context context
188 :validate validate
189 :dtd dtd
190 :root root
191 :error-culprit error-culprit
192 :scratch-pad *scratch-pad*
193 :scratch-pad-2 *scratch-pad-2*
194 :scratch-pad-3 *scratch-pad-3*
195 :scratch-pad-4 *scratch-pad-4*)))
196 (setf (handler context) (make-instance 'klacks-dtd-handler :source source))
197 (setf (slot-value source 'continuation)
198 (lambda () (klacks/xmldecl source input)))
199 source))
201 (defun klacks/xmldecl (source input)
202 (with-source (source current-key current-values)
203 (let ((hd (p/xmldecl input)))
204 (setf current-key :start-document)
205 (setf current-values
206 (when hd
207 (list (xml-header-version hd)
208 (xml-header-encoding hd)
209 (xml-header-standalone-p hd))))
210 (lambda ()
211 (klacks/misc*-2 source input
212 (lambda ()
213 (klacks/doctype source input)))))))
215 (defun klacks/misc*-2 (source input successor)
216 (with-source (source current-key current-values)
217 (multiple-value-bind (cat sem) (peek-token input)
218 (case cat
219 (:COMMENT
220 (setf current-key :comment)
221 (setf current-values (list sem))
222 (consume-token input)
223 (lambda () (klacks/misc*-2 source input successor)))
224 (:PI
225 (setf current-key :processing-instruction)
226 (setf current-values (list (car sem) (cdr sem)))
227 (consume-token input)
228 (lambda () (klacks/misc*-2 source input successor)))
230 (consume-token input)
231 (klacks/misc*-2 source input successor))
233 (funcall successor))))))
235 (defun klacks/doctype (source input)
236 (with-source (source current-key current-values validate dtd)
237 (let ((cont (lambda () (klacks/finish-doctype source input)))
239 (prog1
240 (cond
241 ((eq (peek-token input) :<!DOCTYPE)
242 (setf l (cdr (p/doctype-decl input dtd)))
243 (lambda () (klacks/misc*-2 source input cont)))
244 (dtd
245 (setf l (cdr (synthesize-doctype dtd input)))
246 cont)
247 ((and validate (not dtd))
248 (validity-error "invalid document: no doctype"))
250 (return-from klacks/doctype
251 (funcall cont))))
252 (destructuring-bind (&optional name extid) l
253 (setf current-key :dtd)
254 (setf current-values
255 (list name
256 (and extid (extid-public extid))
257 (and extid (extid-system extid)))))))))
259 (defun klacks/finish-doctype (source input)
260 (with-source (source current-key current-values root data-behaviour)
261 (ensure-dtd)
262 (when root
263 (setf (model-stack *ctx*) (list (make-root-model root))))
264 (setf data-behaviour :DOC)
265 (setf *data-behaviour* :DOC)
266 (fix-seen-< input)
267 (let* ((final
268 (lambda ()
269 (klacks/eof source input)))
270 (next
271 (lambda ()
272 (setf data-behaviour :DTD)
273 (setf *data-behaviour* :DTD)
274 (klacks/misc*-2 source input final))))
275 (klacks/element source input next))))
277 (defun klacks/eof (source input)
278 (with-source (source current-key current-values)
279 (p/eof input)
280 (klacks:close-source source)
281 (setf current-key :end-document)
282 (setf current-values nil)
283 (lambda () (klacks/nil source))))
285 (defun klacks/nil (source)
286 (with-source (source current-key current-values)
287 (setf current-key nil)
288 (setf current-values nil)
289 (labels ((klacks/done ()
290 (setf current-key nil)
291 (setf current-values nil)
292 #'klacks/done))
293 #'klacks/done)))
295 (defun klacks/element (source input cont)
296 (with-source (source current-key current-values current-attributes
297 current-namespace-declarations)
298 (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
299 (setf current-key :start-element)
300 (setf current-values (list uri lname qname))
301 (setf current-attributes attrs)
302 (setf current-namespace-declarations new-b)
303 (if (eq cat :stag)
304 (lambda ()
305 (klacks/element-2 source input n-b cont))
306 (lambda ()
307 (klacks/ztag source cont))))))
309 (defun klacks/ztag (source cont)
310 (with-source (source current-key current-values current-attributes)
311 (setf current-key :end-element)
312 (setf current-attributes nil)
313 (validate-end-element *ctx* (third current-values))
314 cont))
316 (defun klacks/element-2 (source input n-b cont)
317 (with-source (source
318 current-key current-values current-attributes namespace-stack
319 current-namespace-declarations)
320 (let ((values* current-values)
321 (new-b current-namespace-declarations))
322 (setf current-attributes nil)
323 (push n-b namespace-stack)
324 (let ((finish
325 (lambda ()
326 (setf current-namespace-declarations new-b)
327 (klacks/element-3 source input values* cont))))
328 (klacks/content source input finish)))))
330 (defun klacks/element-3 (source input tag-values cont)
331 (with-source (source current-key current-values current-attributes)
332 (setf current-key :end-element)
333 (setf current-values tag-values)
334 (let ((qname (third tag-values)))
335 (p/etag input qname)
336 (validate-end-element *ctx* qname))
337 cont))
339 (defun klacks/content (source input cont)
340 (with-source (source current-key current-values cdata-section-p)
341 (let ((recurse (lambda () (klacks/content source input cont))))
342 (multiple-value-bind (cat sem) (peek-token input)
343 (case cat
344 ((:stag :ztag)
345 (klacks/element source input recurse))
346 ((:CDATA)
347 (process-characters input sem)
348 (setf current-key :characters)
349 (setf current-values (list sem))
350 (setf cdata-section-p nil)
351 recurse)
352 ((:ENTITY-REF)
353 (let ((name sem))
354 (consume-token input)
355 (klacks/entity-reference source input name recurse)))
356 ((:<!\[)
357 (setf current-key :characters)
358 (setf current-values (list (process-cdata-section input)))
359 (setf cdata-section-p t)
360 recurse)
361 ((:PI)
362 (setf current-key :processing-instruction)
363 (setf current-values (list (car sem) (cdr sem)))
364 (consume-token input)
365 recurse)
366 ((:COMMENT)
367 (setf current-key :comment)
368 (setf current-values (list sem))
369 (consume-token input)
370 recurse)
371 (otherwise
372 (funcall cont)))))))
374 (defun klacks/entity-reference (source zstream name cont)
375 (assert (not (zstream-token-category zstream)))
376 (with-source (source temporary-streams context)
377 (let ((new-xstream (entity->xstream zstream name :general nil)))
378 (push new-xstream temporary-streams)
379 (push :stop (zstream-input-stack zstream))
380 (zstream-push new-xstream zstream)
381 (push (stream-name-uri (xstream-name new-xstream)) (base-stack context))
382 (let ((next
383 (lambda ()
384 (klacks/entity-reference-2 source zstream new-xstream cont))))
385 (etypecase (checked-get-entdef name :general)
386 (internal-entdef
387 (klacks/content source zstream next))
388 (external-entdef
389 (klacks/ext-parsed-ent source zstream next)))))))
391 (defun klacks/entity-reference-2 (source zstream new-xstream cont)
392 (with-source (source temporary-streams context)
393 (unless (eq (peek-token zstream) :eof)
394 (wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
395 (assert (eq (peek-token zstream) :eof))
396 (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
397 (assert (eq (pop (zstream-input-stack zstream)) :stop))
398 (pop (base-stack context))
399 (setf (zstream-token-category zstream) nil)
400 (setf temporary-streams (remove new-xstream temporary-streams))
401 (close-xstream new-xstream)
402 (funcall cont)))
404 (defun klacks/ext-parsed-ent (source input cont)
405 (with-source (source)
406 (when (eq (peek-token input) :xml-decl)
407 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
408 (setup-encoding input hd))
409 (consume-token input))
410 (set-full-speed input)
411 (klacks/content source input cont)))
414 ;;;; terrible kludges
416 (defclass klacks-dtd-handler (sax:default-handler)
417 ((handler-source :initarg :source :reader handler-source)
418 (internal-subset-p :initform nil :accessor handler-internal-subset-p)))
420 (defmethod sax:start-internal-subset ((handler klacks-dtd-handler))
421 (setf (slot-value (handler-source handler) 'internal-declarations) '())
422 (setf (handler-internal-subset-p handler) t))
424 (defmethod sax:end-internal-subset ((handler klacks-dtd-handler))
425 (setf (handler-internal-subset-p handler) nil))
427 (defmethod sax:entity-resolver ((handler klacks-dtd-handler) fn)
428 (setf (slot-value (handler-source handler) 'dom-impl-entity-resolver) fn))
430 (defmethod sax::dtd ((handler klacks-dtd-handler) dtd)
431 (setf (slot-value (handler-source handler) 'dom-impl-dtd) dtd))
433 (defmethod sax:end-dtd ((handler klacks-dtd-handler))
434 (let ((source (handler-source handler)))
435 (when (slot-boundp source 'internal-declarations)
436 (setf (slot-value source 'internal-declarations)
437 (reverse (slot-value source 'internal-declarations)))
438 (setf (slot-value source 'external-declarations)
439 (reverse (slot-value source 'external-declarations))))))
441 (macrolet
442 ((defhandler (name &rest args)
443 `(defmethod ,name ((handler klacks-dtd-handler) ,@args)
444 (let ((source (handler-source handler))
445 (spec (list ',name ,@args)))
446 (if (handler-internal-subset-p handler)
447 (push spec (slot-value source 'internal-declarations))
448 (push spec (slot-value source 'external-declarations)))))))
449 (defhandler sax:unparsed-entity-declaration
450 name public-id system-id notation-name)
451 (defhandler sax:external-entity-declaration
452 kind name public-id system-id)
453 (defhandler sax:internal-entity-declaration
454 kind name value)
455 (defhandler sax:notation-declaration
456 name public-id system-id)
457 (defhandler sax:element-declaration
458 name model)
459 (defhandler sax:attribute-declaration
460 element-name attribute-name type default))
463 ;;;; locator
465 (defun source-xstream (source)
466 (car (zstream-input-stack (main-zstream (slot-value source 'context)))))
468 (defun source-stream-name (source)
469 (let ((xstream (source-xstream source)))
470 (if xstream
471 (xstream-name xstream)
472 nil)))
474 (defmethod klacks:current-line-number ((source cxml-source))
475 (let ((x (source-xstream source)))
476 (if x
477 (xstream-line-number x)
478 nil)))
480 (defmethod klacks:current-column-number ((source cxml-source))
481 (let ((x (source-xstream source)))
482 (if x
483 (xstream-column-number x)
484 nil)))
486 (defmethod klacks:current-system-id ((source cxml-source))
487 (let ((name (source-stream-name source)))
488 (if name
489 (stream-name-uri name)
490 nil)))
492 (defmethod klacks:current-xml-base ((source cxml-source))
493 (let ((x (car (base-stack (slot-value source 'context)))))
494 (if (stringp x)
496 (puri:render-uri x nil))))
498 (defmethod klacks:map-current-namespace-declarations (fn (source cxml-source))
499 (loop
500 for (prefix . uri) in (slot-value source 'current-namespace-declarations)
502 (funcall fn prefix uri)))
504 (defmethod klacks:find-namespace-binding (prefix (source cxml-source))
505 (with-source (source)
506 (find-namespace-binding prefix)))
508 (defmethod klacks:decode-qname (qname (source cxml-source))
509 (with-source (source)
510 (multiple-value-bind (prefix local-name) (split-qname qname)
511 (values (and prefix (find-namespace-binding prefix))
512 local-name
513 prefix))))
516 ;;;; debugging
518 #+(or)
519 (trace CXML::KLACKS/DOCTYPE
520 CXML::KLACKS/EXT-PARSED-ENT
521 CXML::KLACKS/MISC*-2
522 CXML::KLACKS/ENTITY-REFERENCE
523 CXML::KLACKS/ENTITY-REFERENCE-2
524 CXML::KLACKS/ELEMENT
525 CXML::KLACKS/ZTAG
526 CXML::KLACKS/XMLDECL
527 CXML::KLACKS/FINISH-DOCTYPE
528 CXML::KLACKS/ELEMENT-3
529 CXML::KLACKS/EOF
530 CXML::KLACKS/ELEMENT-2
531 CXML::KLACKS/CONTENT )