1 ;;; -*- Mode: Lisp; readtable: runes; -*-
2 ;;; (c) copyright 2007 David Lichteblau
4 ;;; This library is free software; you can redistribute it and/or
5 ;;; modify it under the terms of the GNU Library General Public
6 ;;; License as published by the Free Software Foundation; either
7 ;;; version 2 of the License, or (at your option) any later version.
9 ;;; This library is distributed in the hope that it will be useful,
10 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Library General Public License for more details.
14 ;;; You should have received a copy of the GNU Library General Public
15 ;;; License along with this library; if not, write to the
16 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ;;; Boston, MA 02111-1307 USA.
21 (defclass cxml-source
(klacks:source
)
22 (;; args to make-source
23 (context :initarg
:context
)
24 (validate :initarg
:validate
)
27 (error-culprit :initarg
:error-culprit
)
30 (current-key :initform nil
)
33 (cdata-section-p :reader klacks
:current-cdata-section-p
)
34 ;; extra WITH-SOURCE magic
35 (data-behaviour :initform
:DTD
)
36 (namespace-stack :initform
(list *initial-namespace-bindings
*))
37 (current-namespace-declarations)
38 (temporary-streams :initform nil
)
39 (scratch-pad :initarg
:scratch-pad
)
40 (scratch-pad-2 :initarg
:scratch-pad-2
)
41 (scratch-pad-3 :initarg
:scratch-pad-3
)
42 (scratch-pad-4 :initarg
:scratch-pad-4
)))
44 (defmethod klacks:close-source
((source cxml-source
))
45 (dolist (xstream (slot-value source
'temporary-streams
))
46 ;; fixme: error handling?
47 (close-xstream xstream
)))
49 (defmacro with-source
((source &rest slots
) &body body
)
52 (*ctx
* (slot-value ,s
'context
))
53 (*validate
* (slot-value ,s
'validate
))
54 (*data-behaviour
* (slot-value ,s
'data-behaviour
))
55 (*namespace-bindings
* (car (slot-value ,s
'namespace-stack
)))
56 (*scratch-pad
* (slot-value ,s
'scratch-pad
))
57 (*scratch-pad-2
* (slot-value ,s
'scratch-pad-2
))
58 (*scratch-pad-3
* (slot-value ,s
'scratch-pad-3
))
59 (*scratch-pad-4
* (slot-value ,s
'scratch-pad-4
)))
61 (with-slots (,@slots
) ,s
63 (runes-encoding:encoding-error
(c)
64 (wf-error (slot-value ,s
'error-culprit
) "~A" c
))))))
66 (defun fill-source (source)
67 (with-slots (current-key current-values continuation
) source
69 (setf current-key
:bogus
)
70 (setf continuation
(funcall continuation
))
71 (assert (not (eq current-key
:bogus
))))))
73 (defmethod klacks:peek
((source cxml-source
))
74 (with-source (source current-key current-values
)
76 (apply #'values current-key current-values
)))
78 (defmethod klacks:peek-value
((source cxml-source
))
79 (with-source (source current-key current-values
)
81 (apply #'values current-values
)))
83 (defmethod klacks:peek-next
((source cxml-source
))
84 (with-source (source current-key current-values
)
85 (setf current-key nil
)
87 (apply #'values current-key current-values
)))
89 (defmethod klacks:consume
((source cxml-source
))
90 (with-source (source current-key current-values
)
93 (apply #'values current-key current-values
)
94 (setf current-key nil
))))
96 (defmethod klacks:map-attributes
(fn (source cxml-source
))
97 (dolist (a (slot-value source
'current-attributes
))
99 (sax:attribute-namespace-uri a
)
100 (sax:attribute-local-name a
)
101 (sax:attribute-qname a
)
102 (sax:attribute-value a
)
103 (sax:attribute-specified-p a
))))
105 (defmethod klacks:get-attribute
106 ((source cxml-source
) lname
&optional uri
)
107 (dolist (a (slot-value source
'current-attributes
))
108 (when (and (equal (sax:attribute-local-name a
) lname
)
109 (equal (sax:attribute-namespace-uri a
) uri
))
110 (return (sax:attribute-value a
)))))
112 (defmethod klacks:list-attributes
((source cxml-source
))
113 (slot-value source
'current-attributes
))
117 &key validate dtd root entity-resolver disallow-internal-subset
118 (buffering t
) pathname
)
119 (declare (ignore validate dtd root entity-resolver disallow-internal-subset
))
122 (when (and (not buffering
) (< 1 (runes::xstream-speed input
)))
123 (warn "make-source called with !buffering, but xstream is buffering"))
125 (let ((zstream (make-zstream :input-stack
(list input
))))
127 (with-scratch-pads ()
128 (apply #'%make-source
131 for
(name value
) on args by
#'cddr
132 unless
(member name
'(:pathname
:buffering
))
133 append
(list name value
)))))))
135 (let ((xstream (make-xstream input
:speed
(if buffering
8192 1))))
136 (setf (xstream-name xstream
)
138 :entity-name
"main document"
140 :uri
(safe-stream-sysid input
)))
141 (apply #'make-source xstream args
)))
144 (make-xstream (open input
:element-type
'(unsigned-byte 8))
145 :speed
(if buffering
8192 1))))
146 (setf (xstream-name xstream
)
148 :entity-name
"main document"
150 :uri
(pathname-to-uri (merge-pathnames input
))))
151 (let ((source (apply #'make-source
155 (push xstream
(slot-value source
'temporary-streams
))
158 (let ((xstream (string->xstream input
)))
159 (setf (xstream-name xstream
)
161 :entity-name
"main document"
164 (apply #'make-source xstream args
)))
166 (make-source (cxml::make-octet-input-stream input
)))))
169 (input &key validate dtd root entity-resolver disallow-internal-subset
171 ;; check types of user-supplied arguments for better error messages:
172 (check-type validate boolean
)
173 (check-type dtd
(or null extid
))
174 (check-type root
(or null rod
))
175 (check-type entity-resolver
(or null function symbol
))
176 (check-type disallow-internal-subset boolean
)
177 (let* ((xstream (car (zstream-input-stack input
)))
178 (name (xstream-name xstream
))
179 (base (when name
(stream-name-uri name
)))
181 (make-context :main-zstream input
182 :entity-resolver entity-resolver
183 :base-stack
(list (or base
""))
184 :disallow-internal-subset disallow-internal-subset
))
186 (make-instance 'cxml-source
191 :error-culprit error-culprit
192 :scratch-pad
*scratch-pad
*
193 :scratch-pad-2
*scratch-pad-2
*
194 :scratch-pad-3
*scratch-pad-3
*
195 :scratch-pad-4
*scratch-pad-4
*)))
196 (setf (handler context
) (make-instance 'klacks-dtd-handler
:source source
))
197 (setf (slot-value source
'continuation
)
198 (lambda () (klacks/xmldecl source input
)))
201 (defun klacks/xmldecl
(source input
)
202 (with-source (source current-key current-values
)
203 (let ((hd (p/xmldecl input
)))
204 (setf current-key
:start-document
)
207 (list (xml-header-version hd
)
208 (xml-header-encoding hd
)
209 (xml-header-standalone-p hd
))))
211 (klacks/misc
*-
2 source input
213 (klacks/doctype source input
)))))))
215 (defun klacks/misc
*-
2 (source input successor
)
216 (with-source (source current-key current-values
)
217 (multiple-value-bind (cat sem
) (peek-token input
)
220 (setf current-key
:comment
)
221 (setf current-values
(list sem
))
222 (consume-token input
)
223 (lambda () (klacks/misc
*-
2 source input successor
)))
225 (setf current-key
:processing-instruction
)
226 (setf current-values
(list (car sem
) (cdr sem
)))
227 (consume-token input
)
228 (lambda () (klacks/misc
*-
2 source input successor
)))
230 (consume-token input
)
231 (klacks/misc
*-
2 source input successor
))
233 (funcall successor
))))))
235 (defun klacks/doctype
(source input
)
236 (with-source (source current-key current-values validate dtd
)
237 (let ((cont (lambda () (klacks/finish-doctype source input
)))
241 ((eq (peek-token input
) :<!DOCTYPE
)
242 (setf l
(cdr (p/doctype-decl input dtd
)))
243 (lambda () (klacks/misc
*-
2 source input cont
)))
245 (setf l
(cdr (synthesize-doctype dtd input
)))
247 ((and validate
(not dtd
))
248 (validity-error "invalid document: no doctype"))
250 (return-from klacks
/doctype
252 (destructuring-bind (&optional name extid
) l
253 (setf current-key
:dtd
)
256 (and extid
(extid-public extid
))
257 (and extid
(extid-system extid
)))))))))
259 (defun klacks/finish-doctype
(source input
)
260 (with-source (source current-key current-values root data-behaviour
)
263 (setf (model-stack *ctx
*) (list (make-root-model root
))))
264 (setf data-behaviour
:DOC
)
265 (setf *data-behaviour
* :DOC
)
269 (klacks/eof source input
)))
272 (setf data-behaviour
:DTD
)
273 (setf *data-behaviour
* :DTD
)
274 (klacks/misc
*-
2 source input final
))))
275 (klacks/element source input next
))))
277 (defun klacks/eof
(source input
)
278 (with-source (source current-key current-values
)
280 (klacks:close-source source
)
281 (setf current-key
:end-document
)
282 (setf current-values nil
)
283 (lambda () (klacks/nil source
))))
285 (defun klacks/nil
(source)
286 (with-source (source current-key current-values
)
287 (setf current-key nil
)
288 (setf current-values nil
)
289 (labels ((klacks/done
()
290 (setf current-key nil
)
291 (setf current-values nil
)
295 (defun klacks/element
(source input cont
)
296 (with-source (source current-key current-values current-attributes
297 current-namespace-declarations
)
298 (multiple-value-bind (cat n-b new-b uri lname qname attrs
) (p/sztag input
)
299 (setf current-key
:start-element
)
300 (setf current-values
(list uri lname qname
))
301 (setf current-attributes attrs
)
302 (setf current-namespace-declarations new-b
)
305 (klacks/element-2 source input n-b cont
))
307 (klacks/ztag source cont
))))))
309 (defun klacks/ztag
(source cont
)
310 (with-source (source current-key current-values current-attributes
)
311 (setf current-key
:end-element
)
312 (setf current-attributes nil
)
313 (validate-end-element *ctx
* (third current-values
))
316 (defun klacks/element-2
(source input n-b cont
)
318 current-key current-values current-attributes namespace-stack
319 current-namespace-declarations
)
320 (let ((values* current-values
)
321 (new-b current-namespace-declarations
))
322 (setf current-attributes nil
)
323 (push n-b namespace-stack
)
326 (setf current-namespace-declarations new-b
)
327 (klacks/element-3 source input values
* cont
))))
328 (klacks/content source input finish
)))))
330 (defun klacks/element-3
(source input tag-values cont
)
331 (with-source (source current-key current-values current-attributes
)
332 (setf current-key
:end-element
)
333 (setf current-values tag-values
)
334 (let ((qname (third tag-values
)))
336 (validate-end-element *ctx
* qname
))
339 (defun klacks/content
(source input cont
)
340 (with-source (source current-key current-values cdata-section-p
)
341 (let ((recurse (lambda () (klacks/content source input cont
))))
342 (multiple-value-bind (cat sem
) (peek-token input
)
345 (klacks/element source input recurse
))
347 (process-characters input sem
)
348 (setf current-key
:characters
)
349 (setf current-values
(list sem
))
350 (setf cdata-section-p nil
)
354 (consume-token input
)
355 (klacks/entity-reference source input name recurse
)))
357 (setf current-key
:characters
)
358 (setf current-values
(list (process-cdata-section input
)))
359 (setf cdata-section-p t
)
362 (setf current-key
:processing-instruction
)
363 (setf current-values
(list (car sem
) (cdr sem
)))
364 (consume-token input
)
367 (setf current-key
:comment
)
368 (setf current-values
(list sem
))
369 (consume-token input
)
374 (defun klacks/entity-reference
(source zstream name cont
)
375 (assert (not (zstream-token-category zstream
)))
376 (with-source (source temporary-streams context
)
377 (let ((new-xstream (entity->xstream zstream name
:general nil
)))
378 (push new-xstream temporary-streams
)
379 (push :stop
(zstream-input-stack zstream
))
380 (zstream-push new-xstream zstream
)
381 (push (stream-name-uri (xstream-name new-xstream
)) (base-stack context
))
384 (klacks/entity-reference-2 source zstream new-xstream cont
))))
385 (etypecase (checked-get-entdef name
:general
)
387 (klacks/content source zstream next
))
389 (klacks/ext-parsed-ent source zstream next
)))))))
391 (defun klacks/entity-reference-2
(source zstream new-xstream cont
)
392 (with-source (source temporary-streams context
)
393 (unless (eq (peek-token zstream
) :eof
)
394 (wf-error zstream
"Trailing garbage. - ~S" (peek-token zstream
)))
395 (assert (eq (peek-token zstream
) :eof
))
396 (assert (eq (pop (zstream-input-stack zstream
)) new-xstream
))
397 (assert (eq (pop (zstream-input-stack zstream
)) :stop
))
398 (pop (base-stack context
))
399 (setf (zstream-token-category zstream
) nil
)
400 (setf temporary-streams
(remove new-xstream temporary-streams
))
401 (close-xstream new-xstream
)
404 (defun klacks/ext-parsed-ent
(source input cont
)
405 (with-source (source)
406 (when (eq (peek-token input
) :xml-decl
)
407 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input
))))))
408 (setup-encoding input hd
))
409 (consume-token input
))
410 (set-full-speed input
)
411 (klacks/content source input cont
)))
414 ;;;; terrible kludges
416 (defclass klacks-dtd-handler
(sax:default-handler
)
417 ((handler-source :initarg
:source
:reader handler-source
)
418 (internal-subset-p :initform nil
:accessor handler-internal-subset-p
)))
420 (defmethod sax:start-internal-subset
((handler klacks-dtd-handler
))
421 (setf (slot-value (handler-source handler
) 'internal-declarations
) '())
422 (setf (handler-internal-subset-p handler
) t
))
424 (defmethod sax:end-internal-subset
((handler klacks-dtd-handler
))
425 (setf (handler-internal-subset-p handler
) nil
))
427 (defmethod sax:entity-resolver
((handler klacks-dtd-handler
) fn
)
428 (setf (slot-value (handler-source handler
) 'dom-impl-entity-resolver
) fn
))
430 (defmethod sax::dtd
((handler klacks-dtd-handler
) dtd
)
431 (setf (slot-value (handler-source handler
) 'dom-impl-dtd
) dtd
))
433 (defmethod sax:end-dtd
((handler klacks-dtd-handler
))
434 (let ((source (handler-source handler
)))
435 (when (slot-boundp source
'internal-declarations
)
436 (setf (slot-value source
'internal-declarations
)
437 (reverse (slot-value source
'internal-declarations
)))
438 (setf (slot-value source
'external-declarations
)
439 (reverse (slot-value source
'external-declarations
))))))
442 ((defhandler (name &rest args
)
443 `(defmethod ,name
((handler klacks-dtd-handler
) ,@args
)
444 (let ((source (handler-source handler
))
445 (spec (list ',name
,@args
)))
446 (if (handler-internal-subset-p handler
)
447 (push spec
(slot-value source
'internal-declarations
))
448 (push spec
(slot-value source
'external-declarations
)))))))
449 (defhandler sax
:unparsed-entity-declaration
450 name public-id system-id notation-name
)
451 (defhandler sax
:external-entity-declaration
452 kind name public-id system-id
)
453 (defhandler sax
:internal-entity-declaration
455 (defhandler sax
:notation-declaration
456 name public-id system-id
)
457 (defhandler sax
:element-declaration
459 (defhandler sax
:attribute-declaration
460 element-name attribute-name type default
))
465 (defun source-xstream (source)
466 (car (zstream-input-stack (main-zstream (slot-value source
'context
)))))
468 (defun source-stream-name (source)
469 (let ((xstream (source-xstream source
)))
471 (xstream-name xstream
)
474 (defmethod klacks:current-line-number
((source cxml-source
))
475 (let ((x (source-xstream source
)))
477 (xstream-line-number x
)
480 (defmethod klacks:current-column-number
((source cxml-source
))
481 (let ((x (source-xstream source
)))
483 (xstream-column-number x
)
486 (defmethod klacks:current-system-id
((source cxml-source
))
487 (let ((name (source-stream-name source
)))
489 (stream-name-uri name
)
492 (defmethod klacks:current-xml-base
((source cxml-source
))
493 (let ((x (car (base-stack (slot-value source
'context
)))))
496 (puri:render-uri x nil
))))
498 (defmethod klacks:map-current-namespace-declarations
(fn (source cxml-source
))
500 for
(prefix . uri
) in
(slot-value source
'current-namespace-declarations
)
502 (funcall fn prefix uri
)))
504 (defmethod klacks:find-namespace-binding
(prefix (source cxml-source
))
505 (with-source (source)
506 (find-namespace-binding prefix
)))
508 (defmethod klacks:decode-qname
(qname (source cxml-source
))
509 (with-source (source)
510 (multiple-value-bind (prefix local-name
) (split-qname qname
)
511 (values (and prefix
(find-namespace-binding prefix
))
519 (trace CXML
::KLACKS
/DOCTYPE
520 CXML
::KLACKS
/EXT-PARSED-ENT
522 CXML
::KLACKS
/ENTITY-REFERENCE
523 CXML
::KLACKS
/ENTITY-REFERENCE-2
527 CXML
::KLACKS
/FINISH-DOCTYPE
528 CXML
::KLACKS
/ELEMENT-3
530 CXML
::KLACKS
/ELEMENT-2
531 CXML
::KLACKS
/CONTENT
)