4.21
[cxml-rng.git] / parse.lisp
blob93b64499e705edc0e1e7bf5d4fa88b0e66718110
1 (in-package :cxml-rng)
3 #+sbcl
4 (declaim (optimize (debug 2)))
7 ;;;; Errors
9 (define-condition rng-error (simple-error) ())
11 (defun rng-error (source fmt &rest args)
12 (let ((s (make-string-output-stream)))
13 (apply #'format s fmt args)
14 (when source
15 (format s "~& [ Error at line ~D, column ~D in ~S ]"
16 (klacks:current-line-number source)
17 (klacks:current-column-number source)
18 (klacks:current-system-id source)))
19 (error 'rng-error
20 :format-control "~A"
21 :format-arguments (list (get-output-stream-string s)))))
24 ;;;; Parser
26 (defvar *datatype-library*)
27 (defvar *namespace-uri*)
28 (defvar *entity-resolver*)
29 (defvar *external-href-stack*)
30 (defvar *include-uri-stack*)
31 (defvar *include-body-p* nil)
32 (defvar *grammar*)
34 (defvar *debug* nil)
36 (defun invoke-with-klacks-handler (fn source)
37 (if *debug*
38 (funcall fn)
39 (handler-case
40 (funcall fn)
41 (cxml:xml-parse-error (c)
42 (rng-error source "Cannot parse schema: ~A" c)))))
44 (defun parse-relax-ng (input &key entity-resolver)
45 (klacks:with-open-source (source (cxml:make-source input))
46 (invoke-with-klacks-handler
47 (lambda ()
48 (klacks:find-event source :start-element)
49 (let* ((*datatype-library* "")
50 (*namespace-uri* "")
51 (*entity-resolver* entity-resolver)
52 (*external-href-stack* '())
53 (*include-uri-stack* '())
54 (*grammar* (make-grammar nil))
55 (result (p/pattern source)))
56 (unless result
57 (rng-error nil "empty grammar"))
58 (setf (grammar-start *grammar*)
59 (make-definition :name :start :child result))
60 (check-pattern-definitions source *grammar*)
61 (check-recursion result 0)
62 (setf result (fold-not-allowed result))
63 (setf result (fold-empty result))
64 result))
65 source)))
68 ;;;; pattern structures
70 (defstruct pattern)
72 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
73 child)
75 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
76 name)
77 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
78 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
80 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
81 a b)
82 (defstruct (group
83 (:include %combination)
84 (:constructor make-group (a b))))
85 (defstruct (interleave
86 (:include %combination)
87 (:constructor make-interleave (a b))))
88 (defstruct (choice
89 (:include %combination)
90 (:constructor make-choice (a b))))
92 (defstruct (one-or-more
93 (:include %parent)
94 (:constructor make-one-or-more (child))))
95 (defstruct (list-pattern
96 (:include %parent)
97 (:constructor make-list-pattern (child))))
99 (defstruct (ref
100 (:include pattern)
101 (:conc-name "PATTERN-")
102 (:constructor make-ref (target)))
103 crdepth
104 target)
106 (defstruct (%leaf (:include pattern)))
108 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
109 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
111 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
112 datatype-library
113 type)
115 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
117 string)
119 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
120 params
121 except)
123 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
126 ;;;; non-pattern
128 (defstruct (grammar (:constructor make-grammar (parent)))
129 (start nil)
130 parent
131 (definitions (make-hash-table :test 'equal)))
133 (defstruct param
134 name
135 string)
137 ;; Clark calls this structure "RefPattern"
138 (defstruct (definition (:conc-name "DEFN-"))
139 name
140 combine-method
141 head-p
142 redefinition
143 child)
146 ;;;; parser
148 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
150 (defun skip-foreign* (source)
151 (loop
152 (case (klacks:peek-next source)
153 (:start-element (skip-foreign source))
154 (:end-element (return)))))
156 (defun skip-to-native (source)
157 (loop
158 (case (klacks:peek source)
159 (:start-element
160 (when (equal (klacks:current-uri source) *rng-namespace*)
161 (return))
162 (klacks:serialize-element source nil))
163 (:end-element (return)))
164 (klacks:consume source)))
166 (defun consume-and-skip-to-native (source)
167 (klacks:consume source)
168 (skip-to-native source))
170 (defun skip-foreign (source)
171 (when (equal (klacks:current-uri source) *rng-namespace*)
172 (rng-error source
173 "invalid schema: ~A not allowed here"
174 (klacks:current-lname source)))
175 (klacks:serialize-element source nil))
177 (defun attribute (lname attrs)
178 (let ((a (sax:find-attribute-ns "" lname attrs)))
179 (if a
180 (sax:attribute-value a)
181 nil)))
183 (defparameter *whitespace*
184 (format nil "~C~C~C~C"
185 (code-char 9)
186 (code-char 32)
187 (code-char 13)
188 (code-char 10)))
190 (defun ntc (lname source-or-attrs)
191 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
192 (let* ((attrs
193 (if (listp source-or-attrs)
194 source-or-attrs
195 (klacks:list-attributes source-or-attrs)))
196 (a (sax:find-attribute-ns "" lname attrs)))
197 (if a
198 (string-trim *whitespace* (sax:attribute-value a))
199 nil)))
201 (defmacro with-library-and-ns (attrs &body body)
202 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
204 (defun invoke-with-library-and-ns (fn attrs)
205 (let* ((dl (attribute "datatypeLibrary" attrs))
206 (ns (attribute "ns" attrs))
207 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
208 (*namespace-uri* (or ns *namespace-uri*)))
209 (funcall fn)))
211 (defun p/pattern (source)
212 (let* ((lname (klacks:current-lname source))
213 (attrs (klacks:list-attributes source)))
214 (with-library-and-ns attrs
215 (case (find-symbol lname :keyword)
216 (:|element| (p/element source (ntc "name" attrs)))
217 (:|attribute| (p/attribute source (ntc "name" attrs)))
218 (:|group| (p/combination #'groupify source))
219 (:|interleave| (p/combination #'interleave-ify source))
220 (:|choice| (p/combination #'choice-ify source))
221 (:|optional| (p/optional source))
222 (:|zeroOrMore| (p/zero-or-more source))
223 (:|oneOrMore| (p/one-or-more source))
224 (:|list| (p/list source))
225 (:|mixed| (p/mixed source))
226 (:|ref| (p/ref source))
227 (:|parentRef| (p/parent-ref source))
228 (:|empty| (p/empty source))
229 (:|text| (p/text source))
230 (:|value| (p/value source))
231 (:|data| (p/data source))
232 (:|notAllowed| (p/not-allowed source))
233 (:|externalRef| (p/external-ref source))
234 (:|grammar| (p/grammar source))
235 (t (skip-foreign source))))))
237 (defun p/pattern+ (source)
238 (let ((children nil))
239 (loop
240 (case (klacks:peek source)
241 (:start-element
242 (let ((p (p/pattern source))) (when p (push p children))))
243 (:end-element
244 (return))
246 (klacks:consume source))))
247 (unless children
248 (rng-error source "empty element"))
249 (nreverse children)))
251 (defun p/pattern? (source)
252 (let ((result nil))
253 (loop
254 (skip-to-native source)
255 (case (klacks:peek source)
256 (:start-element
257 (when result
258 (rng-error source "at most one pattern expected here"))
259 (setf result (p/pattern source)))
260 (:end-element
261 (return))
263 (klacks:consume source))))
264 result))
266 (defun p/element (source name)
267 (klacks:expecting-element (source "element")
268 (let ((result (make-element)))
269 (consume-and-skip-to-native source)
270 (if name
271 (setf (pattern-name result) (destructure-name source name))
272 (setf (pattern-name result) (p/name-class source)))
273 (skip-to-native source)
274 (setf (pattern-child result) (groupify (p/pattern+ source)))
275 result)))
277 (defvar *attribute-namespace-p* nil)
279 (defun p/attribute (source name)
280 (klacks:expecting-element (source "attribute")
281 (let ((result (make-attribute)))
282 (consume-and-skip-to-native source)
283 (if name
284 (setf (pattern-name result)
285 (let ((*namespace-uri* ""))
286 (destructure-name source name)))
287 (setf (pattern-name result)
288 (let ((*attribute-namespace-p* t))
289 (p/name-class source))))
290 (skip-to-native source)
291 (setf (pattern-child result)
292 (or (p/pattern? source) (make-text)))
293 result)))
295 (defun p/combination (zipper source)
296 (klacks:expecting-element (source)
297 (consume-and-skip-to-native source)
298 (funcall zipper (p/pattern+ source))))
300 (defun p/one-or-more (source)
301 (klacks:expecting-element (source "oneOrMore")
302 (consume-and-skip-to-native source)
303 (let ((children (p/pattern+ source)))
304 (make-one-or-more (groupify children)))))
306 (defun p/zero-or-more (source)
307 (klacks:expecting-element (source "zeroOrMore")
308 (consume-and-skip-to-native source)
309 (let ((children (p/pattern+ source)))
310 (make-choice (make-one-or-more (groupify children))
311 (make-empty)))))
313 (defun p/optional (source)
314 (klacks:expecting-element (source "optional")
315 (consume-and-skip-to-native source)
316 (let ((children (p/pattern+ source)))
317 (make-choice (groupify children) (make-empty)))))
319 (defun p/list (source)
320 (klacks:expecting-element (source "list")
321 (consume-and-skip-to-native source)
322 (let ((children (p/pattern+ source)))
323 (make-list-pattern (groupify children)))))
325 (defun p/mixed (source)
326 (klacks:expecting-element (source "mixed")
327 (consume-and-skip-to-native source)
328 (let ((children (p/pattern+ source)))
329 (make-interleave (groupify children) (make-text)))))
331 (defun p/ref (source)
332 (klacks:expecting-element (source "ref")
333 (prog1
334 (let* ((name (ntc "name" source))
335 (pdefinition
336 (or (find-definition name)
337 (setf (find-definition name)
338 (make-definition :name name :child nil)))))
339 (make-ref pdefinition))
340 (skip-foreign* source))))
342 (defun p/parent-ref (source)
343 (klacks:expecting-element (source "parentRef")
344 (prog1
345 (let* ((name (ntc "name" source))
346 (grammar (grammar-parent *grammar*))
347 (pdefinition
348 (or (find-definition name grammar)
349 (setf (find-definition name grammar)
350 (make-definition :name name :child nil)))))
351 (make-ref pdefinition))
352 (skip-foreign* source))))
354 (defun p/empty (source)
355 (klacks:expecting-element (source "empty")
356 (skip-foreign* source)
357 (make-empty)))
359 (defun p/text (source)
360 (klacks:expecting-element (source "text")
361 (skip-foreign* source)
362 (make-text)))
364 (defun consume-and-parse-characters (source)
365 ;; fixme
366 (let ((tmp ""))
367 (loop
368 (multiple-value-bind (key data) (klacks:peek-next source)
369 (case key
370 (:characters
371 (setf tmp (concatenate 'string tmp data)))
372 (:end-element (return)))))
373 tmp))
375 (defun p/value (source)
376 (klacks:expecting-element (source "value")
377 (let* ((type (ntc "type" source))
378 (string (consume-and-parse-characters source))
379 (ns *namespace-uri*)
380 (dl *datatype-library*))
381 (unless type
382 (setf type "token")
383 (setf dl ""))
384 (make-value :string string :type type :ns ns :datatype-library dl))))
386 (defun p/data (source)
387 (klacks:expecting-element (source "data")
388 (let* ((type (ntc "type" source))
389 (result (make-data :type type
390 :datatype-library *datatype-library*
392 (params '()))
393 (loop
394 (multiple-value-bind (key uri lname)
395 (klacks:peek-next source)
397 (case key
398 (:start-element
399 (case (find-symbol lname :keyword)
400 (:|param| (push (p/param source) params))
401 (:|except|
402 (setf (pattern-except result) (p/except-pattern source))
403 (skip-to-native source)
404 (return))
405 (t (skip-foreign source))))
406 (:end-element
407 (return)))))
408 (setf (pattern-params result) (nreverse params))
409 result)))
411 (defun p/param (source)
412 (klacks:expecting-element (source "param")
413 (let ((name (ntc "name" source))
414 (string (consume-and-parse-characters source)))
415 (make-param :name name :string string))))
417 (defun p/except-pattern (source)
418 (klacks:expecting-element (source "except")
419 (with-library-and-ns (klacks:list-attributes source)
420 (klacks:consume source)
421 (choice-ify (p/pattern+ source)))))
423 (defun p/not-allowed (source)
424 (klacks:expecting-element (source "notAllowed")
425 (consume-and-skip-to-native source)
426 (make-not-allowed)))
428 (defun safe-parse-uri (source str &optional base)
429 (when (zerop (length str))
430 (rng-error source "missing URI"))
431 (handler-case
432 (if base
433 (puri:merge-uris str base)
434 (puri:parse-uri str))
435 (puri:uri-parse-error ()
436 (rng-error source "invalid URI: ~A" str))))
438 (defun p/external-ref (source)
439 (klacks:expecting-element (source "externalRef")
440 (let* ((href
441 (escape-uri (attribute "href" (klacks:list-attributes source))))
442 (base (klacks:current-xml-base source))
443 (uri (safe-parse-uri source href base)))
444 (when (find uri *include-uri-stack* :test #'puri:uri=)
445 (rng-error source "looping include"))
446 (prog1
447 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
448 (xstream
449 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
450 (klacks:with-open-source (source (cxml:make-source xstream))
451 (invoke-with-klacks-handler
452 (lambda ()
453 (klacks:find-event source :start-element)
454 (let ((*datatype-library* ""))
455 (p/pattern source)))
456 source)))
457 (skip-foreign* source)))))
459 (defun p/grammar (source &optional grammar)
460 (klacks:expecting-element (source "grammar")
461 (consume-and-skip-to-native source)
462 (let ((*grammar* (or grammar (make-grammar *grammar*))))
463 (process-grammar-content* source)
464 (unless (grammar-start *grammar*)
465 (rng-error source "no <start> in grammar"))
466 (check-pattern-definitions source *grammar*)
467 (defn-child (grammar-start *grammar*)))))
469 (defvar *include-start*)
470 (defvar *include-definitions*)
472 (defun process-grammar-content* (source &key disallow-include)
473 (loop
474 (multiple-value-bind (key uri lname) (klacks:peek source)
476 (case key
477 (:start-element
478 (with-library-and-ns (klacks:list-attributes source)
479 (case (find-symbol lname :keyword)
480 (:|start| (process-start source))
481 (:|define| (process-define source))
482 (:|div| (process-div source))
483 (:|include|
484 (when disallow-include
485 (rng-error source "nested include not permitted"))
486 (process-include source))
488 (skip-foreign source)))))
489 (:end-element
490 (return))))
491 (klacks:consume source)))
493 (defun process-start (source)
494 (klacks:expecting-element (source "start")
495 (let* ((combine0 (ntc "combine" source))
496 (combine
497 (when combine0
498 (find-symbol (string-upcase combine0) :keyword)))
499 (child
500 (progn
501 (consume-and-skip-to-native source)
502 (p/pattern source)))
503 (pdefinition (grammar-start *grammar*)))
504 (skip-foreign* source)
505 ;; fixme: shared code with process-define
506 (unless pdefinition
507 (setf pdefinition (make-definition :name :start :child nil))
508 (setf (grammar-start *grammar*) pdefinition))
509 (when *include-body-p*
510 (setf *include-start* pdefinition))
511 (cond
512 ((defn-child pdefinition)
513 (ecase (defn-redefinition pdefinition)
514 (:not-being-redefined
515 (when (and combine
516 (defn-combine-method pdefinition)
517 (not (eq combine
518 (defn-combine-method pdefinition))))
519 (rng-error source "conflicting combine values for <start>"))
520 (unless combine
521 (when (defn-head-p pdefinition)
522 (rng-error source "multiple definitions for <start>"))
523 (setf (defn-head-p pdefinition) t))
524 (unless (defn-combine-method pdefinition)
525 (setf (defn-combine-method pdefinition) combine))
526 (setf (defn-child pdefinition)
527 (case (defn-combine-method pdefinition)
528 (:choice
529 (make-choice (defn-child pdefinition) child))
530 (:interleave
531 (make-interleave (defn-child pdefinition) child)))))
532 (:being-redefined-and-no-original
533 (setf (defn-redefinition pdefinition)
534 :being-redefined-and-original))
535 (:being-redefined-and-original)))
537 (setf (defn-child pdefinition) child)
538 (setf (defn-combine-method pdefinition) combine)
539 (setf (defn-head-p pdefinition) (null combine))
540 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
542 (defun zip (constructor children)
543 (cond
544 ((null children)
545 (rng-error nil "empty choice?"))
546 ((null (cdr children))
547 (car children))
549 (destructuring-bind (a b &rest rest)
550 children
551 (zip constructor (cons (funcall constructor a b) rest))))))
553 (defun choice-ify (children) (zip #'make-choice children))
554 (defun groupify (children) (zip #'make-group children))
555 (defun interleave-ify (children) (zip #'make-interleave children))
557 (defun find-definition (name &optional (grammar *grammar*))
558 (gethash name (grammar-definitions grammar)))
560 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
561 (setf (gethash name (grammar-definitions grammar)) newval))
563 (defun process-define (source)
564 (klacks:expecting-element (source "define")
565 (let* ((name (ntc "name" source))
566 (combine0 (ntc "combine" source))
567 (combine (when combine0
568 (find-symbol (string-upcase combine0) :keyword)))
569 (child (groupify
570 (progn
571 (consume-and-skip-to-native source)
572 (p/pattern+ source))))
573 (pdefinition (find-definition name)))
574 (unless pdefinition
575 (setf pdefinition (make-definition :name name :child nil))
576 (setf (find-definition name) pdefinition))
577 (when *include-body-p*
578 (push pdefinition *include-definitions*))
579 (cond
580 ((defn-child pdefinition)
581 (case (defn-redefinition pdefinition)
582 (:not-being-redefined
583 (when (and combine
584 (defn-combine-method pdefinition)
585 (not (eq combine
586 (defn-combine-method pdefinition))))
587 (rng-error source "conflicting combine values for ~A" name))
588 (unless combine
589 (when (defn-head-p pdefinition)
590 (rng-error source "multiple definitions for ~A" name))
591 (setf (defn-head-p pdefinition) t))
592 (unless (defn-combine-method pdefinition)
593 (setf (defn-combine-method pdefinition) combine))
594 (setf (defn-child pdefinition)
595 (case (defn-combine-method pdefinition)
596 (:choice
597 (make-choice (defn-child pdefinition) child))
598 (:interleave
599 (make-interleave (defn-child pdefinition) child)))))
600 (:being-redefined-and-no-original
601 (setf (defn-redefinition pdefinition)
602 :being-redefined-and-original))
603 (:being-redefined-and-original)))
605 (setf (defn-child pdefinition) child)
606 (setf (defn-combine-method pdefinition) combine)
607 (setf (defn-head-p pdefinition) (null combine))
608 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
610 (defun process-div (source)
611 (klacks:expecting-element (source "div")
612 (consume-and-skip-to-native source)
613 (process-grammar-content* source)))
615 (defun reset-definition-for-include (defn)
616 (setf (defn-combine-method defn) nil)
617 (setf (defn-redefinition defn) :being-redefined-and-no-original)
618 (setf (defn-head-p defn) nil))
620 (defun restore-definition (defn original)
621 (setf (defn-combine-method defn) (defn-combine-method original))
622 (setf (defn-redefinition defn) (defn-redefinition original))
623 (setf (defn-head-p defn) (defn-head-p original)))
625 (defun process-include (source)
626 (klacks:expecting-element (source "include")
627 (let* ((href
628 (escape-uri (attribute "href" (klacks:list-attributes source))))
629 (base (klacks:current-xml-base source))
630 (uri (safe-parse-uri source href base))
631 (*include-start* nil)
632 (*include-definitions* '()))
633 (consume-and-skip-to-native source)
634 (let ((*include-body-p* t))
635 (process-grammar-content* source :disallow-include t))
636 (let ((tmp-start
637 (when *include-start*
638 (prog1
639 (copy-structure *include-start*)
640 (reset-definition-for-include *include-start*))))
641 (tmp-defns
642 (loop
643 for defn in *include-definitions*
644 collect
645 (prog1
646 (copy-structure defn)
647 (reset-definition-for-include defn)))))
648 (when (find uri *include-uri-stack* :test #'puri:uri=)
649 (rng-error source "looping include"))
650 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
651 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
652 (klacks:with-open-source (source (cxml:make-source xstream))
653 (invoke-with-klacks-handler
654 (lambda ()
655 (klacks:find-event source :start-element)
656 (let ((*datatype-library* ""))
657 (p/grammar source *grammar*)))
658 source))
659 (check-pattern-definitions source *grammar*)
660 (when tmp-start
661 (restore-definition *include-start* tmp-start))
662 (dolist (copy tmp-defns)
663 (let ((defn (gethash (defn-name copy)
664 (grammar-definitions *grammar*))))
665 (restore-definition defn copy)))
666 (defn-child (grammar-start *grammar*)))))))
668 (defun check-pattern-definitions (source grammar)
669 (when (eq (defn-redefinition (grammar-start grammar))
670 :being-redefined-and-no-original)
671 (rng-error source "start not found in redefinition of grammar"))
672 (loop for defn being each hash-value in (grammar-definitions grammar) do
673 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
674 (rng-error source "redefinition not found in grammar"))
675 (unless (defn-child defn)
676 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
678 (defvar *any-name-allowed-p* t)
679 (defvar *ns-name-allowed-p* t)
681 (defun destructure-name (source qname)
682 (multiple-value-bind (uri lname)
683 (klacks:decode-qname qname source)
684 (setf uri (or uri *namespace-uri*))
685 (when (and *attribute-namespace-p*
686 (or (and (equal lname "xmlns") (equal uri ""))
687 (equal uri "http://www.w3.org/2000/xmlns")))
688 (rng-error source "namespace attribute not permitted"))
689 (list :name lname uri)))
691 (defun p/name-class (source)
692 (klacks:expecting-element (source)
693 (with-library-and-ns (klacks:list-attributes source)
694 (case (find-symbol (klacks:current-lname source) :keyword)
695 (:|name|
696 (let ((qname (string-trim *whitespace*
697 (consume-and-parse-characters source))))
698 (destructure-name source qname)))
699 (:|anyName|
700 (unless *any-name-allowed-p*
701 (rng-error source "anyname now permitted in except"))
702 (klacks:consume source)
703 (prog1
704 (let ((*any-name-allowed-p* nil))
705 (cons :any (p/except-name-class? source)))
706 (skip-to-native source)))
707 (:|nsName|
708 (unless *ns-name-allowed-p*
709 (rng-error source "nsname now permitted in except"))
710 (let ((uri *namespace-uri*)
711 (*any-name-allowed-p* nil)
712 (*ns-name-allowed-p* nil))
713 (when (and *attribute-namespace-p*
714 (equal uri "http://www.w3.org/2000/xmlns"))
715 (rng-error source "namespace attribute not permitted"))
716 (klacks:consume source)
717 (prog1
718 (list :nsname uri (p/except-name-class? source))
719 (skip-to-native source))))
720 (:|choice|
721 (klacks:consume source)
722 (cons :choice (p/name-class* source)))
724 (rng-error source "invalid child in except"))))))
726 (defun p/name-class* (source)
727 (let ((results nil))
728 (loop
729 (skip-to-native source)
730 (case (klacks:peek source)
731 (:start-element (push (p/name-class source) results))
732 (:end-element (return)))
733 (klacks:consume source))
734 (nreverse results)))
736 (defun p/except-name-class? (source)
737 (skip-to-native source)
738 (multiple-value-bind (key uri lname)
739 (klacks:peek source)
741 (if (and (eq key :start-element)
742 (string= (find-symbol lname :keyword) "except"))
743 (p/except-name-class source)
744 nil)))
746 (defun p/except-name-class (source)
747 (klacks:expecting-element (source "except")
748 (with-library-and-ns (klacks:list-attributes source)
749 (klacks:consume source)
750 (cons :except (p/name-class* source)))))
752 (defun escape-uri (string)
753 (with-output-to-string (out)
754 (loop for c across (cxml::rod-to-utf8-string string) do
755 (let ((code (char-code c)))
756 ;; http://www.w3.org/TR/xlink/#link-locators
757 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
758 (format out "%~2,'0X" code)
759 (write-char c out))))))
762 ;;;; unparsing
764 (defvar *definitions-to-names*)
765 (defvar *seen-names*)
767 (defun serialization-name (defn)
768 (or (gethash defn *definitions-to-names*)
769 (setf (gethash defn *definitions-to-names*)
770 (let ((name (if (gethash (defn-name defn) *seen-names*)
771 (format nil "~A-~D"
772 (defn-name defn)
773 (hash-table-count *seen-names*))
774 (defn-name defn))))
775 (setf (gethash name *seen-names*) defn)
776 name))))
778 (defun serialize-grammar (grammar sink)
779 (cxml:with-xml-output sink
780 (let ((*definitions-to-names* (make-hash-table))
781 (*seen-names* (make-hash-table :test 'equal)))
782 (cxml:with-element "grammar"
783 (cxml:with-element "start"
784 (serialize-pattern grammar))
785 (loop for defn being each hash-key in *definitions-to-names* do
786 (serialize-definition defn))))))
788 (defun serialize-pattern (pattern)
789 (etypecase pattern
790 (element
791 (cxml:with-element "element"
792 (serialize-name (pattern-name pattern))
793 (serialize-pattern (pattern-child pattern))))
794 (attribute
795 (cxml:with-element "attribute"
796 (serialize-name (pattern-name pattern))
797 (serialize-pattern (pattern-child pattern))))
798 (%combination
799 (cxml:with-element
800 (etypecase pattern
801 (group "group")
802 (interleave "interleave")
803 (choice "choice"))
804 (serialize-pattern (pattern-a pattern))
805 (serialize-pattern (pattern-b pattern))))
806 (one-or-more
807 (cxml:with-element "oneOrmore"
808 (serialize-pattern (pattern-child pattern))))
809 (list-pattern
810 (cxml:with-element "list"
811 (serialize-pattern (pattern-child pattern))))
812 (ref
813 (cxml:with-element "ref"
814 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
815 (empty
816 (cxml:with-element "empty"))
817 (not-allowed
818 (cxml:with-element "notAllowed"))
819 (text
820 (cxml:with-element "text"))
821 (value
822 (cxml:with-element "value"
823 (cxml:attribute "datatype-library"
824 (pattern-datatype-library pattern))
825 (cxml:attribute "type" (pattern-type pattern))
826 (cxml:attribute "ns" (pattern-ns pattern))
827 (cxml:text (pattern-string pattern))))
828 (data
829 (cxml:with-element "value"
830 (cxml:attribute "datatype-library"
831 (pattern-datatype-library pattern))
832 (cxml:attribute "type" (pattern-type pattern))
833 (dolist (param (pattern-params pattern))
834 (cxml:with-element "param"
835 (cxml:attribute "name" (param-name param))
836 (cxml:text (param-string param))))
837 (when (pattern-except pattern)
838 (cxml:with-element "except"
839 (serialize-pattern (pattern-except pattern))))))))
841 (defun serialize-definition (defn)
842 (cxml:with-element "define"
843 (cxml:attribute "name" (serialization-name defn))
844 (serialize-pattern (defn-child defn))))
846 (defun serialize-name (name)
847 (ecase (car name)
848 (:name
849 (cxml:with-element "name"
850 (destructuring-bind (lname uri)
851 (cdr name)
852 (cxml:attribute "ns" uri)
853 (cxml:text lname))))
854 (:any
855 (cxml:with-element "anyName"
856 (when (cdr name)
857 (serialize-except-name name))))
858 (:nsname
859 (cxml:with-element "anyName"
860 (destructuring-bind (uri except)
861 (cdr name)
862 (cxml:attribute "ns" uri)
863 (when except
864 (serialize-except-name name)))))
865 (:choice
866 (cxml:with-element "choice"
867 (mapc #'serialize-name (cdr name))))))
869 (defun serialize-except-name (spec)
870 (cxml:with-element "except"
871 (mapc #'serialize-name (cdr spec))))
874 ;;;; simplification
876 ;;; 4.1 Annotations
877 ;;; Foreign attributes and elements are removed implicitly while parsing.
879 ;;; 4.2 Whitespace
880 ;;; All character data is discarded while parsing (which can only be
881 ;;; whitespace after validation).
883 ;;; Whitespace in name, type, and combine attributes is stripped while
884 ;;; parsing. Ditto for <name/>.
886 ;;; 4.3. datatypeLibrary attribute
887 ;;; Escaping is done by p/pattern.
888 ;;; Attribute value defaulting is done using *datatype-library*; only
889 ;;; p/data and p/value record the computed value.
891 ;;; 4.4. type attribute of value element
892 ;;; Done by p/value.
894 ;;; 4.5. href attribute
895 ;;; Escaping is done by process-include and p/external-ref.
897 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
898 ;;; but that requires xstream hacking.
900 ;;; 4.6. externalRef element
901 ;;; Done by p/external-ref.
903 ;;; 4.7. include element
904 ;;; Done by process-include.
906 ;;; 4.8. name attribute of element and attribute elements
907 ;;; `name' is stored as a slot, not a child. Done by p/element and
908 ;;; p/attribute.
910 ;;; 4.9. ns attribute
911 ;;; done by p/name-class, p/value, p/element, p/attribute
913 ;;; 4.10. QNames
914 ;;; done by p/name-class
916 ;;; 4.11. div element
917 ;;; Legen wir gar nicht erst an.
919 ;;; 4.12. 4.13 4.14 4.15
920 ;;; beim anlegen
922 ;;; 4.16
923 ;;; p/name-class
924 ;;; -- ausser der sache mit den datentypen
926 ;;; 4.17, 4.18, 4.19
927 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
928 ;;; beschrieben.
930 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
931 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
932 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
933 ;;; dafuer beim Serialisieren um.
935 (defmethod check-recursion ((pattern element) depth)
936 (check-recursion (pattern-child pattern) (1+ depth)))
938 (defmethod check-recursion ((pattern ref) depth)
939 (when (eql (pattern-crdepth pattern) depth)
940 (rng-error nil "infinite recursion in ~A"
941 (defn-name (pattern-target pattern))))
942 (when (null (pattern-crdepth pattern))
943 (setf (pattern-crdepth pattern) depth)
944 (check-recursion (defn-child (pattern-target pattern)) depth)
945 (setf (pattern-crdepth pattern) t)))
947 (defmethod check-recursion ((pattern %parent) depth)
948 (check-recursion (pattern-child pattern) depth))
950 (defmethod check-recursion ((pattern %combination) depth)
951 (check-recursion (pattern-a pattern) depth)
952 (check-recursion (pattern-b pattern) depth))
954 (defmethod check-recursion ((pattern %leaf) depth)
955 (declare (ignore depth)))
957 (defmethod check-recursion ((pattern data) depth)
958 (when (pattern-except pattern)
959 (check-recursion (pattern-except pattern) depth)))
962 ;;;; 4.20
964 ;;; %PARENT
966 (defmethod fold-not-allowed ((pattern element))
967 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
968 pattern)
970 (defmethod fold-not-allowed ((pattern %parent))
971 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
972 (if (typep (pattern-child pattern) 'not-allowed)
973 (pattern-child pattern)
974 pattern))
976 ;;; %COMBINATION
978 (defmethod fold-not-allowed ((pattern %combination))
979 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
980 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
981 pattern)
983 (defmethod fold-not-allowed ((pattern group))
984 (call-next-method)
985 (cond
986 ;; remove if any child is not allowed
987 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
988 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
989 (t pattern)))
991 (defmethod fold-not-allowed ((pattern interleave))
992 (call-next-method)
993 (cond
994 ;; remove if any child is not allowed
995 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
996 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
997 (t pattern)))
999 (defmethod fold-not-allowed ((pattern choice))
1000 (call-next-method)
1001 (cond
1002 ;; if any child is not allowed, choose the other
1003 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1004 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1005 (t pattern)))
1007 ;;; LEAF
1009 (defmethod fold-not-allowed ((pattern %leaf))
1010 pattern)
1012 (defmethod fold-not-allowed ((pattern data))
1013 (when (pattern-except pattern)
1014 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1015 (when (typep (pattern-except pattern) 'not-allowed)
1016 (setf (pattern-except pattern) nil)))
1017 pattern)
1019 ;;; REF
1021 (defmethod fold-not-allowed ((pattern ref))
1022 pattern)
1025 ;;;; 4.21
1027 ;;; %PARENT
1029 (defmethod fold-empty ((pattern one-or-more))
1030 (call-next-method)
1031 (if (typep (pattern-child pattern) 'empty)
1032 (pattern-child pattern)
1033 pattern))
1035 (defmethod fold-empty ((pattern %parent))
1036 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1037 pattern)
1039 ;;; %COMBINATION
1041 (defmethod fold-empty ((pattern %combination))
1042 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1043 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1044 pattern)
1046 (defmethod fold-empty ((pattern group))
1047 (call-next-method)
1048 (cond
1049 ;; if any child is empty, choose the other
1050 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1051 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1052 (t pattern)))
1054 (defmethod fold-empty ((pattern interleave))
1055 (call-next-method)
1056 (cond
1057 ;; if any child is empty, choose the other
1058 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1059 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1060 (t pattern)))
1062 (defmethod fold-empty ((pattern choice))
1063 (call-next-method)
1064 (if (typep (pattern-b pattern) 'empty)
1065 (cond
1066 ((typep (pattern-a pattern) 'empty)
1067 (pattern-a pattern))
1069 (rotatef (pattern-a pattern) (pattern-b pattern))
1070 pattern))
1071 pattern))
1073 ;;; LEAF
1075 (defmethod fold-empty ((pattern %leaf))
1076 pattern)
1078 (defmethod fold-empty ((pattern data))
1079 (when (pattern-except pattern)
1080 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1081 pattern)
1083 ;;; REF
1085 (defmethod fold-empty ((pattern ref))
1086 pattern)
1089 ;;;; tests
1091 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
1092 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
1093 (let ((pass 0)
1094 (total 0)
1095 (*package* (find-package :cxml-rng)))
1096 (dolist (d (directory p))
1097 (let ((name (car (last (pathname-directory d)))))
1098 (when (parse-integer name :junk-allowed t)
1099 (incf total)
1100 (when (test1 d)
1101 (incf pass)))))
1102 (format t "Passed ~D/~D tests.~%" pass total))
1103 (dribble))
1105 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
1106 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
1108 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
1109 (let* ((*debug* t)
1110 (d (merge-pathnames (format nil "~3,'0D/" n) p))
1111 (i (merge-pathnames "i.rng" d))
1112 (c (merge-pathnames "c.rng" d))
1113 (rng (if (probe-file c) c i)))
1114 (format t "~A: " (car (last (pathname-directory d))))
1115 (print rng)
1116 (parse-relax-ng rng)))
1118 (defun test1 (d)
1119 (let* ((i (merge-pathnames "i.rng" d))
1120 (c (merge-pathnames "c.rng" d)))
1121 (format t "~A: " (car (last (pathname-directory d))))
1122 (if (probe-file c)
1123 (handler-case
1124 (progn
1125 (parse-relax-ng c)
1126 (format t " PASS~%")
1128 (error (c)
1129 (format t " FAIL: ~A~%" c)
1130 nil))
1131 (handler-case
1132 (progn
1133 (parse-relax-ng i)
1134 (format t " FAIL: didn't detect invalid schema~%")
1135 nil)
1136 (rng-error (c)
1137 (format t " PASS: ~S~%" (type-of c))
1139 (error (c)
1140 (format t " FAIL: incorrect condition type: ~A~%" c)
1141 nil)))))