noch dokumentation
[cxml-rng.git] / parse.lisp
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error)
36 ((line-number :initarg :line-number :accessor rng-error-line-number)
37 (column-number :initarg :column-number :accessor rng-error-column-number)
38 (system-id :initarg :system-id :accessor rng-error-system-id))
39 (:documentation
40 "The class of all validation errors.
41 @see-slot{rng-error-line-number}
42 @see-slot{rng-error-column-number}
43 @see-slot{rng-error-system-id}"))
45 (setf (documentation 'rng-error-line-number 'function)
46 "@arg[instance]{an instance of @class{rng-error}}
47 @return{an integer, or nil}
48 Return the line number reported by the parser when the Relax NG error
49 was detected, or NIL if not available.")
51 (setf (documentation 'rng-error-column-number 'function)
52 "@arg[instance]{an instance of @class{rng-error}}
53 @return{an integer, or nil}
54 Return the column number reported by the parser when the Relax NG error
55 was detected, or NIL if not available.")
57 (setf (documentation 'rng-error-system-id 'function)
58 "@arg[instance]{an instance of @class{rng-error}}
59 @return{a puri:uri, or nil}
60 Return the System ID of the document being parsed when the Relax NG
61 error was detected, or NIL if not available.")
63 (defun rng-error (source fmt &rest args)
64 "@unexport{}"
65 (let ((s (make-string-output-stream)))
66 (apply #'format s fmt args)
67 (multiple-value-bind (line-number column-number system-id)
68 (etypecase source
69 (null)
70 (klacks:source
71 (values (klacks:current-line-number source)
72 (klacks:current-column-number source)
73 (klacks:current-system-id source)))
74 (sax:sax-parser-mixin
75 (values (sax:line-number source)
76 (sax:column-number source)
77 (sax:system-id source))))
78 (when (or line-number column-number system-id)
79 (format s "~& [ Error at line ~D, column ~D in ~S ]"
80 line-number
81 column-number
82 system-id))
83 (error 'rng-error
84 :format-control "~A"
85 :format-arguments (list (get-output-stream-string s))
86 :line-number line-number
87 :column-number column-number
88 :system-id system-id))))
91 ;;;; Parser
93 (defvar *datatype-library*)
94 (defvar *namespace-uri*)
95 (defvar *ns*)
96 (defvar *entity-resolver*)
97 (defvar *external-href-stack*)
98 (defvar *include-uri-stack*)
99 (defvar *include-body-p* nil)
100 (defvar *grammar*)
102 (defvar *debug* nil)
104 (defstruct (schema
105 (:constructor make-schema (start definitions)))
106 "An instance of this class represents a Relax NG grammar that has
107 been parsed and simplified.
108 @see-slot{schema-start}
109 @see-constructor{parse-schema}
110 @see{make-validator}
111 @see{serialize-schema} "
112 (start (missing) :type pattern)
113 (definitions (missing) :type list)
114 (interned-start nil :type (or null pattern))
115 (registratur nil :type (or null hash-table)))
117 (setf (documentation 'schema-start 'function)
118 "@arg[instance]{an instance of @class{schema}}
119 @return{the start pattern, an instance of @class{pattern}}
120 Reader function for the grammar's start pattern, from which all
121 of the grammar's patters are reachable.")
123 (defmethod print-object ((object schema) stream)
124 (print-unreadable-object (object stream :type t :identity t)))
126 (defun invoke-with-klacks-handler (fn source)
127 (if *debug*
128 (funcall fn)
129 (handler-case
130 (funcall fn)
131 (cxml:xml-parse-error (c)
132 (rng-error source "Cannot parse schema: ~A" c)))))
134 (defvar *validate-grammar* t)
135 (defparameter *relax-ng-grammar* nil)
137 (defun make-validating-source (input)
138 (let ((upstream (cxml:make-source input)))
139 (if *validate-grammar*
140 (klacks:make-tapping-source upstream
141 (make-validator *relax-ng-grammar*))
142 upstream)))
144 (defun parse-schema (input &key entity-resolver)
145 "@arg[input]{a string, pathname, stream, or xstream}
146 @arg[entity-resolver]{a function of two arguments, or NIL}
147 @return{a parsed @class{schema}}
148 @short{This function parses a Relax NG schema file in XML syntax}
149 and returns a parsed representation of that schema.
151 @code{input} can be any stream designator as understood by
152 @code{cxml:make-source}.
154 Note that namestrings are not valid arguments,
155 because they would be interpreted as XML source code. Use pathnames
156 instead.
158 @code{entity-resolver} can be passed as a function of two arguments.
159 It is invoked for every entity referenced by the
160 document with the entity's Public ID (a rod) and System ID (an
161 URI object) as arguments. The function may either return
162 nil, CXML will then try to resolve the entity as usual.
163 Alternatively it may return a Common Lisp stream specialized on
164 @code{(unsigned-byte 8)} which will be used instead.
166 @see{parse-compact}
167 @see{make-validator}"
168 (when *validate-grammar*
169 (unless *relax-ng-grammar*
170 (setf *relax-ng-grammar*
171 (let* ((*validate-grammar* nil)
172 (d (slot-value (asdf:find-system :cxml-rng)
173 'asdf::relative-pathname)))
174 (parse-schema (merge-pathnames "rng.rng" d))
175 #+(or)
176 (parse-compact (merge-pathnames "rng.rnc" d))))))
177 (klacks:with-open-source (source (make-validating-source input))
178 (invoke-with-klacks-handler
179 (lambda ()
180 (klacks:find-event source :start-element)
181 (let* ((*datatype-library* "")
182 (*namespace-uri* "")
183 (*entity-resolver* entity-resolver)
184 (*external-href-stack* '())
185 (*include-uri-stack* '())
186 (*grammar* (make-grammar nil))
187 (start (p/pattern source)))
188 (unless start
189 (rng-error nil "empty grammar"))
190 (setf (grammar-start *grammar*)
191 (make-definition :name :start :child start))
192 (check-pattern-definitions source *grammar*)
193 (check-recursion start 0)
194 (multiple-value-bind (new-start defns)
195 (finalize-definitions start)
196 (setf start (fold-not-allowed new-start))
197 (dolist (defn defns)
198 (setf (defn-child defn) (fold-not-allowed (defn-child defn))))
199 (setf start (fold-empty start))
200 (dolist (defn defns)
201 (setf (defn-child defn) (fold-empty (defn-child defn)))))
202 (multiple-value-bind (new-start defns)
203 (finalize-definitions start)
204 (check-start-restrictions new-start)
205 (dolist (defn defns)
206 (check-restrictions (defn-child defn)))
207 (make-schema new-start defns))))
208 source)))
211 ;;;; pattern structures
213 (defstruct pattern
214 "@short{The superclass of all patterns.}
215 Instances of this class represent elements of the simplified syntax
216 for Relax NG.
218 Patterns are documented for introspective purposes and are not meant to
219 be modified by user code.
221 The start pattern of a schema is available through @fun{schema-start}.
223 @see{schema}"
224 (nullable :uninitialized))
226 (defmethod print-object :around ((object pattern) stream)
227 (if *debug*
228 (let ((*print-circle* t))
229 (call-next-method))
230 (print-unreadable-object (object stream :type t :identity t))))
232 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
233 child)
235 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
236 name)
238 (setf (documentation 'pattern-name 'function)
239 "@arg[instance]{an instance of @class{pattern}}
240 @return{a @class{name-class}}
241 @short{Returns the @code{pattern}'s name class.}
243 This slot describes the name allowed for the current element or
244 attribute.
246 @see{element}
247 @see{attribute}")
249 (setf (documentation 'pattern-child 'function)
250 "@arg[instance]{an instance of @class{pattern}}
251 @return{an instance of @class{pattern}}
252 @short{Returns the pattern's sub-pattern.}
254 (Elements in the full Relax NG syntax allow more than one child
255 pattern, but simplification normalizes the representation so that
256 any such element has exactly one child.)
258 @see{element}
259 @see{attribute}
260 @see{one-or-more}
261 @see{list-pattern}
262 @see{choice}")
264 (defstruct (element (:include %named-pattern))
265 "@short{This pattern specifies that an element of a certain name class
266 is required.}
268 Its child pattern describes the attributes and child nodes
269 of this element.
270 @see-slot{pattern-name}
271 @see-slot{pattern-child}")
273 (defstruct (attribute (:include %named-pattern))
274 "@short{This pattern specifies that an attribute of a certain name class
275 is required.}
277 Its child pattern describes the type of the attribute's
278 contents.
279 @see-slot{pattern-name}
280 @see-slot{pattern-child}")
282 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
283 a b)
285 (setf (documentation 'pattern-a 'function)
286 "@arg[instance]{an instance of @class{pattern}}
287 @return{an instance of @class{pattern}}
288 @short{Returns the first of two sub-patterns the pattern instance has.}
290 (Elements in the full Relax NG syntax allow more than two child
291 patterns, but simplification normalizes the representation so that
292 any such element has exactly two children.)
294 @see{pattern-b}
295 @see{group}
296 @see{interleave}
297 @see{choice}")
299 (setf (documentation 'pattern-b 'function)
300 "@arg[instance]{an instance of @class{pattern}}
301 @return{an instance of @class{pattern}}
302 @short{Returns the second of two sub-patterns the pattern instance has.}
304 (Elements in the full Relax NG syntax allow more than two child
305 patterns, but simplification normalizes the representation so that
306 any such element has exactly two children.)
308 @see{pattern-a}
309 @see{group}
310 @see{interleave}
311 @see{choice}")
313 (defstruct (group
314 (:include %combination)
315 (:constructor make-group (a b)))
316 "@short{This pattern specifies that two subpatterns are
317 required at the current position in a specific order.}
319 @see-slot{pattern-a}
320 @see-slot{pattern-b}")
321 (defstruct (interleave
322 (:include %combination)
323 (:constructor make-interleave (a b)))
324 "@short{This pattern specifies that two possible subpatterns are
325 allowed to occur in any order at the current position.}
327 @see-slot{pattern-a}
328 @see-slot{pattern-b}")
329 (defstruct (choice
330 (:include %combination)
331 (:constructor make-choice (a b)))
332 "@short{This pattern specifies that one of two possible subpatterns are
333 allowed at the current position, given as its children.}
335 @see-slot{pattern-a}
336 @see-slot{pattern-b}")
337 (defstruct (after
338 (:include %combination)
339 (:constructor make-after (a b))))
341 (defstruct (one-or-more
342 (:include %parent)
343 (:constructor make-one-or-more (child)))
344 "@short{This pattern specifies that its subpattern is
345 allowed to occur at the current position one or more times.}
347 @see-slot{pattern-child}")
348 (defstruct (list-pattern
349 (:include %parent)
350 (:constructor make-list-pattern (child)))
351 "@short{This pattern specifies that a subpatterns is allowed multiple
352 times a the current position, with whitespace as a separator.}
354 @see-slot{pattern-child}")
356 (defstruct (ref
357 (:include pattern)
358 (:conc-name "PATTERN-")
359 (:constructor make-ref (target)))
360 "@short{This pattern references another part of the pattern graph.}
362 @code{ref} is the only pattern to introduce shared structure and
363 circularity into the pattern graph, by referring to elements defined
364 elsewhere.
366 (@code{ref} pattern in the full Relax NG syntax can be used to refer
367 to any pattern definition in the grammar. Simplification normalizes
368 the schema so that ref patterns only refer to definitions which have
369 an @code{element} as their child.)
371 @see-slot{pattern-element}"
372 crdepth
373 target)
375 (defun pattern-element (ref)
376 "@arg[ref]{an instance of @class{ref}}
377 @return{an instance of @class{element}}
378 @short{Returns the ref pattern's target.}
380 @code{ref} is the only pattern to introduce shared structure and
381 circularity into the pattern graph, by referring to elements defined
382 elsewhere.
384 (@code{ref} pattern in the full Relax NG syntax can be used to refer
385 to any pattern definition in the grammar. Simplification normalizes
386 the schema so that ref patterns only refer to definitions which have
387 an @code{element} as their child.)"
388 (defn-child (pattern-target ref)))
390 (defstruct (%leaf (:include pattern)))
392 (defstruct (empty (:include %leaf))
393 "@short{This pattern specifies that nothing more is expected at the current
394 position.}")
396 (defstruct (text (:include %leaf))
397 "@short{This pattern specifies that text is expected here.}")
399 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
400 type)
402 (setf (documentation 'pattern-type 'function)
403 "@arg[instance]{an instance of @class{pattern}}
404 @return{a @class{cxml-types:data-type}}
405 @short{Returns the data type expected at this position.}
407 This type has already been parsed into an object. Its name and
408 the URI of its library can be queried from that object.
410 @see{data}
411 @see{value}
412 @see{cxml-types:type-name}
413 @see{cxml-types:type-library}")
415 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
416 "@short{This pattern specifies that a specific value is expected as text
417 here.}
419 The value expected is @code{pattern-value}, parsed from
420 @code{pattern-string} using @code{pattern-type}.
422 @see-slot{pattern-type}
423 @see-slot{pattern-value}
424 @see-slot{pattern-string}"
426 string
427 value)
429 (setf (documentation 'pattern-string 'function)
430 "@arg[instance]{an instance of @class{value}}
431 @return{a string}
432 @short{Returns the string expected at this position.}
434 This string is the lexical representation expected, not parsed into
435 a value object yet. The parsed object is available as
436 @fun{pattern-value}.
438 @see{pattern-type}")
440 (setf (documentation 'pattern-value 'function)
441 "@arg[instance]{an instance of @class{value}}
442 @return{an object as returned by @fun{cxml-types:parse}}
443 @short{Returns the value expected at this position.}
445 This object is the result of parsing @fun{pattern-string} using
446 @fun{pattern-type}.")
448 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
449 "@short{This pattern specifies that text of a specific data type is
450 expected.}
452 The data type instance stored in the @code{pattern-type} slot takes into
453 account additional paramaters, which can be retrieved using
454 @code{pattern-params} in their original form.
456 @see-slot{pattern-type}
457 @see-slot{pattern-params}
458 @see-slot{pattern-except}"
459 params
460 except)
462 (setf (documentation 'pattern-except 'function)
463 "@arg[instance]{an instance of @class{data}}
464 @return{a @class{pattern}, or @code{nil}}
465 @short{Returns the @code{data} instance's @code{except} pattern.}
467 In addition to a data type, @code{data} can specify that certain
468 values are @em{not} permitted. They are described using a pattern.
470 If this slot is @code{nil}, no exception is defined.")
472 (setf (documentation 'pattern-params 'function)
473 "@arg[instance]{an instance of @class{data}}
474 @return{a list of @fun{cxml-types:param}}
475 @short{The data type parameters for this data pattern.}
477 (With the XSD type library, these are known as restricting facets.)")
479 (defstruct (not-allowed (:include %leaf))
480 "@short{This pattern specifies that the part of the schema reached at
481 this point is not valid.}")
484 ;;;; non-pattern
486 (defstruct (grammar (:constructor make-grammar (parent)))
487 (start nil)
488 parent
489 (definitions (make-hash-table :test 'equal)))
491 ;; Clark calls this structure "RefPattern"
492 (defstruct (definition (:conc-name "DEFN-"))
493 name
494 combine-method
495 head-p
496 redefinition
497 child)
500 ;;; name-class
502 (defun missing ()
503 (error "missing arg"))
505 (defstruct name-class
506 "@short{The abstract superclass of all name-related classes.}
508 Name classes represent sets of permissible names for an element or
509 attribute.
511 Names are pairs of namespace URI and local-name.
513 @see{attribute}
514 @see{element}")
516 (defstruct (any-name (:include name-class)
517 (:constructor make-any-name (except)))
518 "@short{This name class allows any name.}
520 Exceptions are given as @code{any-name-except}.
522 @see-slot{any-name-except}"
523 (except (missing) :type (or null name-class)))
525 (setf (documentation 'any-name-except 'function)
526 "@arg[instance]{an instance of @class{any-name}}
527 @return{a @class{name-class} or @code{nil}}
529 Return the name class @em{not} allowed by this @code{any-name},
530 or @code{nil} if there is no such exception.")
532 (defstruct (name (:include name-class)
533 (:constructor make-name (uri lname)))
534 "@short{This name class allows only a specific name.}
536 A specific namespace URI and local name are expected.
538 @see-slot{name-uri}
539 @see-slot{name-lname}"
540 (uri (missing) :type string)
541 (lname (missing) :type string))
543 (setf (documentation 'name-uri 'function)
544 "@arg[instance]{an instance of @class{name}}
545 @return{a string}
546 Return the expected namespace URI.")
548 (setf (documentation 'name-lname 'function)
549 "@arg[instance]{an instance of @class{name}}
550 @return{a string}
551 Return the expected local name.")
553 (defstruct (ns-name (:include name-class)
554 (:constructor make-ns-name (uri except)))
555 "@short{This name class allows all names in a specific namespace}, with
556 possible exceptions.
558 A specific namespace URI is expected.
560 Exceptions are given as @code{ns-name-except}.
562 @see-slot{ns-name-uri}
563 @see-slot{ns-name-except}"
564 (uri (missing) :type string)
565 (except (missing) :type (or null name-class)))
567 (setf (documentation 'ns-name-uri 'function)
568 "@arg[instance]{an instance of @class{ns-name}}
569 @return{a string}
570 Return the expected namespace URI.")
572 (setf (documentation 'ns-name-except 'function)
573 "@arg[instance]{an instance of @class{ns-name}}
574 @return{a @class{name-class} or @code{nil}}
576 Return the name class @em{not} allowed by this @code{ns-name},
577 or @code{nil} if there is no such exception.")
579 (defstruct (name-class-choice (:include name-class)
580 (:constructor make-name-class-choice (a b)))
581 "@short{This name class represents the union of two other name classes.}
583 @see-slot{name-class-choice-a}
584 @see-slot{name-class-choice-b}"
585 (a (missing) :type name-class)
586 (b (missing) :type name-class))
588 (setf (documentation 'name-class-choice-a 'function)
589 "@arg[instance]{an instance of @class{name-class-choice}}
590 @return{a @class{name-class}}
591 Returns the 'first' of two name classes that are allowed.
592 @see{name-class-choice-b}")
594 (setf (documentation 'name-class-choice-b 'function)
595 "@arg[instance]{an instance of @class{name-class-choice}}
596 @return{a @class{name-class}}
597 Returns the 'second' of two name classes that are allowed.
598 @see{name-class-choice-a}")
600 (defun simplify-nc-choice (values)
601 (zip #'make-name-class-choice values))
604 ;;;; parser
606 (defvar *rng-namespace* "")
608 (defun skip-foreign* (source)
609 (loop
610 (case (klacks:peek-next source)
611 (:start-element (skip-foreign source))
612 (:end-element (return)))))
614 (defun skip-to-native (source)
615 (loop
616 (case (klacks:peek source)
617 (:start-element
618 (when (equal (klacks:current-uri source) *rng-namespace*)
619 (return))
620 (klacks:serialize-element source nil))
621 (:end-element (return)))
622 (klacks:consume source)))
624 (defun consume-and-skip-to-native (source)
625 (klacks:consume source)
626 (skip-to-native source))
628 (defun skip-foreign (source)
629 (when (equal (klacks:current-uri source) *rng-namespace*)
630 (rng-error source
631 "invalid schema: ~A not allowed here"
632 (klacks:current-lname source)))
633 (klacks:serialize-element source nil))
635 (defun attribute (lname attrs)
636 "@unexport{}"
637 (let ((a (sax:find-attribute-ns "" lname attrs)))
638 (if a
639 (sax:attribute-value a)
640 nil)))
642 (defparameter *whitespace*
643 (format nil "~C~C~C~C"
644 (code-char 9)
645 (code-char 32)
646 (code-char 13)
647 (code-char 10)))
649 (defun ntc (lname source-or-attrs)
650 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
651 (let* ((attrs
652 (if (listp source-or-attrs)
653 source-or-attrs
654 (klacks:list-attributes source-or-attrs)))
655 (a (sax:find-attribute-ns "" lname attrs)))
656 (if a
657 (string-trim *whitespace* (sax:attribute-value a))
658 nil)))
660 (defmacro with-library-and-ns (attrs &body body)
661 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
663 (defun invoke-with-library-and-ns (fn attrs)
664 (let* ((dl (attribute "datatypeLibrary" attrs))
665 (ns (attribute "ns" attrs))
666 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
667 (*namespace-uri* (or ns *namespace-uri*))
668 (*ns* ns))
669 ;; FIXME: Ganz boese gehackt -- gerade so, dass wir die Relax NG
670 ;; Test-Suite bestehen.
671 (when (and dl
672 (not (zerop (length *datatype-library*)))
673 ;; scheme pruefen, und es muss was folgen
674 (or (not (cl-ppcre:all-matches
675 "^[a-zA-Z][a-zA-Z0-9+.-]*:.+"
676 *datatype-library*))
677 ;; keine kaputten %te, keine #
678 (cl-ppcre:all-matches
679 "(%$|%.$|%[^0-9A-Fa-f][^0-9A-Fa-f]|#)"
680 *datatype-library*)))
681 (rng-error nil "malformed datatypeLibrary: ~A" *datatype-library*))
682 (funcall fn)))
684 (defun p/pattern (source)
685 (let* ((lname (klacks:current-lname source))
686 (attrs (klacks:list-attributes source)))
687 (with-library-and-ns attrs
688 (case (find-symbol lname :keyword)
689 (:|element| (p/element source (ntc "name" attrs)))
690 (:|attribute| (p/attribute source (ntc "name" attrs)))
691 (:|group| (p/combination #'groupify source))
692 (:|interleave| (p/combination #'interleave-ify source))
693 (:|choice| (p/combination #'choice-ify source))
694 (:|optional| (p/optional source))
695 (:|zeroOrMore| (p/zero-or-more source))
696 (:|oneOrMore| (p/one-or-more source))
697 (:|list| (p/list source))
698 (:|mixed| (p/mixed source))
699 (:|ref| (p/ref source))
700 (:|parentRef| (p/parent-ref source))
701 (:|empty| (p/empty source))
702 (:|text| (p/text source))
703 (:|value| (p/value source))
704 (:|data| (p/data source))
705 (:|notAllowed| (p/not-allowed source))
706 (:|externalRef| (p/external-ref source))
707 (:|grammar| (p/grammar source))
708 (t (skip-foreign source))))))
710 (defun p/pattern+ (source)
711 (let ((children nil))
712 (loop
713 (case (klacks:peek source)
714 (:start-element
715 (let ((p (p/pattern source))) (when p (push p children))))
716 (:end-element
717 (return))
719 (klacks:consume source))))
720 (unless children
721 (rng-error source "empty element"))
722 (nreverse children)))
724 (defun p/pattern? (source)
725 (let ((result nil))
726 (loop
727 (skip-to-native source)
728 (case (klacks:peek source)
729 (:start-element
730 (when result
731 (rng-error source "at most one pattern expected here"))
732 (setf result (p/pattern source)))
733 (:end-element
734 (return))
736 (klacks:consume source))))
737 result))
739 (defun p/element (source name)
740 (klacks:expecting-element (source "element")
741 (let ((elt (make-element)))
742 (consume-and-skip-to-native source)
743 (if name
744 (setf (pattern-name elt) (destructure-name source name))
745 (setf (pattern-name elt) (p/name-class source)))
746 (skip-to-native source)
747 (setf (pattern-child elt) (groupify (p/pattern+ source)))
748 (make-ref (make-definition :name (gensym "ANONYMOUS") :child elt)))))
750 (defvar *attribute-namespace-p* nil)
752 (defun p/attribute (source name)
753 (klacks:expecting-element (source "attribute")
754 (let ((result (make-attribute)))
755 (consume-and-skip-to-native source)
756 (if name
757 (setf (pattern-name result)
758 (let ((*namespace-uri* (or *ns* ""))
759 (*attribute-namespace-p* t))
760 (destructure-name source name)))
761 (setf (pattern-name result)
762 (let ((*attribute-namespace-p* t))
763 (p/name-class source))))
764 (skip-to-native source)
765 (setf (pattern-child result)
766 (or (p/pattern? source) (make-text)))
767 result)))
769 (defun p/combination (zipper source)
770 (klacks:expecting-element (source)
771 (consume-and-skip-to-native source)
772 (funcall zipper (p/pattern+ source))))
774 (defun p/one-or-more (source)
775 (klacks:expecting-element (source "oneOrMore")
776 (consume-and-skip-to-native source)
777 (let ((children (p/pattern+ source)))
778 (make-one-or-more (groupify children)))))
780 (defun p/zero-or-more (source)
781 (klacks:expecting-element (source "zeroOrMore")
782 (consume-and-skip-to-native source)
783 (let ((children (p/pattern+ source)))
784 (make-choice (make-one-or-more (groupify children))
785 (make-empty)))))
787 (defun p/optional (source)
788 (klacks:expecting-element (source "optional")
789 (consume-and-skip-to-native source)
790 (let ((children (p/pattern+ source)))
791 (make-choice (groupify children) (make-empty)))))
793 (defun p/list (source)
794 (klacks:expecting-element (source "list")
795 (consume-and-skip-to-native source)
796 (let ((children (p/pattern+ source)))
797 (make-list-pattern (groupify children)))))
799 (defun p/mixed (source)
800 (klacks:expecting-element (source "mixed")
801 (consume-and-skip-to-native source)
802 (let ((children (p/pattern+ source)))
803 (make-interleave (groupify children) (make-text)))))
805 (defun p/ref (source)
806 (klacks:expecting-element (source "ref")
807 (prog1
808 (let* ((name (ntc "name" source))
809 (pdefinition
810 (or (find-definition name)
811 (setf (find-definition name)
812 (make-definition :name name :child nil)))))
813 (make-ref pdefinition))
814 (skip-foreign* source))))
816 (defun p/parent-ref (source)
817 (klacks:expecting-element (source "parentRef")
818 (prog1
819 (let* ((name (ntc "name" source))
820 (grammar (grammar-parent *grammar*))
821 (pdefinition
822 (or (find-definition name grammar)
823 (setf (find-definition name grammar)
824 (make-definition :name name :child nil)))))
825 (make-ref pdefinition))
826 (skip-foreign* source))))
828 (defun p/empty (source)
829 (klacks:expecting-element (source "empty")
830 (skip-foreign* source)
831 (make-empty)))
833 (defun p/text (source)
834 (klacks:expecting-element (source "text")
835 (skip-foreign* source)
836 (make-text)))
838 (defun consume-and-parse-characters (source)
839 ;; fixme
840 (let ((tmp ""))
841 (loop
842 (multiple-value-bind (key data) (klacks:peek-next source)
843 (case key
844 (:characters
845 (setf tmp (concatenate 'string tmp data)))
846 (:end-element (return)))))
847 tmp))
849 (defun p/value (source)
850 (klacks:expecting-element (source "value")
851 (let* ((type (ntc "type" source))
852 (string (consume-and-parse-characters source))
853 (ns *namespace-uri*)
854 (dl *datatype-library*))
855 (unless type
856 (setf type "token")
857 (setf dl ""))
858 (let ((data-type
859 (cxml-types:find-type (and dl (find-symbol dl :keyword))
860 type
861 nil))
862 (vc (cxml-types:make-klacks-validation-context source)))
863 (unless data-type
864 (rng-error source "type not found: ~A/~A" type dl))
865 (make-value :string string
866 :value (cxml-types:parse data-type string vc)
867 :type data-type
868 :ns ns)))))
870 (defun p/data (source)
871 (klacks:expecting-element (source "data")
872 (let* ((type (ntc "type" source))
873 (params '())
874 (except nil))
875 (loop
876 (multiple-value-bind (key uri lname)
877 (klacks:peek-next source)
879 (case key
880 (:start-element
881 (case (find-symbol lname :keyword)
882 (:|param| (push (p/param source) params))
883 (:|except|
884 (setf except (p/except-pattern source))
885 (skip-to-native source)
886 (return))
887 (t (skip-foreign source))))
888 (:end-element
889 (return)))))
890 (setf params (nreverse params))
891 (let* ((dl *datatype-library*)
892 (data-type (cxml-types:find-type
893 (and dl (find-symbol dl :keyword))
894 type
895 params)))
896 (unless data-type
897 (rng-error source "type not found: ~A/~A" type dl))
898 (when (eq data-type :error)
899 (rng-error source "params not valid for type: ~A/~A/~A"
900 type dl params))
901 (make-data
902 :type data-type
903 :params params
904 :except except)))))
906 (defun p/param (source)
907 (klacks:expecting-element (source "param")
908 (let ((name (ntc "name" source))
909 (string (consume-and-parse-characters source)))
910 (cxml-types:make-param name string))))
912 (defun p/except-pattern (source)
913 (klacks:expecting-element (source "except")
914 (with-library-and-ns (klacks:list-attributes source)
915 (klacks:consume source)
916 (choice-ify (p/pattern+ source)))))
918 (defun p/not-allowed (source)
919 (klacks:expecting-element (source "notAllowed")
920 (consume-and-skip-to-native source)
921 (make-not-allowed)))
923 (defun safe-parse-uri (source str &optional base)
924 (when (zerop (length str))
925 (rng-error source "missing URI"))
926 (let* ((compactp (rnc-uri-p str))
927 (str (if compactp (follow-rnc-uri str) str))
928 (uri
929 (handler-case
930 (if base
931 (puri:merge-uris str base)
932 (puri:parse-uri str))
933 (puri:uri-parse-error ()
934 (rng-error source "invalid URI: ~A" str)))))
935 (when (and (eq (puri:uri-scheme uri) :file)
936 (puri:uri-fragment uri))
937 (rng-error source "Forbidden fragment in URI: ~A" str))
938 (values uri compactp)))
940 (defun named-string-xstream (str uri)
941 (let ((xstream (cxml::string->xstream str)))
942 (setf (cxml::xstream-name xstream)
943 (cxml::make-stream-name
944 :entity-name "main document"
945 :entity-kind :main
946 :uri uri))
947 xstream))
949 (defun xstream-open-schema (uri compactp)
950 (if compactp
951 (named-string-xstream
952 (uncompact-file
953 ;; fixme: Hier waere es schon, mit *entity-resolver* arbeiten
954 ;; zu koennen, aber der liefert binaere Streams.
955 (open (cxml::uri-to-pathname uri)
956 :element-type 'character
957 :direction :input))
958 uri)
959 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
961 (defun p/external-ref (source)
962 (klacks:expecting-element (source "externalRef")
963 (let* ((href
964 (escape-uri (attribute "href" (klacks:list-attributes source))))
965 (base (klacks:current-xml-base source)))
966 (multiple-value-bind (uri compactp)
967 (safe-parse-uri source href base)
968 (when (find uri *include-uri-stack* :test #'puri:uri=)
969 (rng-error source "looping include"))
970 (prog1
971 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
972 (xstream (xstream-open-schema uri compactp)))
973 (klacks:with-open-source
974 (source (make-validating-source xstream))
975 (invoke-with-klacks-handler
976 (lambda ()
977 (klacks:find-event source :start-element)
978 (let ((*datatype-library* ""))
979 (p/pattern source)))
980 source)))
981 (skip-foreign* source))))))
983 (defun p/grammar (source &optional grammar)
984 (klacks:expecting-element (source "grammar")
985 (consume-and-skip-to-native source)
986 (let ((*grammar* (or grammar (make-grammar *grammar*)))
987 (includep grammar))
988 (process-grammar-content* source)
989 (unless (or includep (grammar-start *grammar*))
990 (rng-error source "no <start> in grammar"))
991 (unless includep
992 (check-pattern-definitions source *grammar*)
993 (defn-child (grammar-start *grammar*))))))
995 (defvar *include-start*)
996 (defvar *include-definitions*)
998 (defun process-grammar-content* (source &key disallow-include)
999 (loop
1000 (multiple-value-bind (key uri lname) (klacks:peek source)
1002 (ecase key
1003 (:characters
1004 (klacks:consume source))
1005 (:start-element
1006 (with-library-and-ns (klacks:list-attributes source)
1007 (case (find-symbol lname :keyword)
1008 (:|start|
1009 (process-start source))
1010 (:|define| (process-define source))
1011 (:|div| (process-div source))
1012 (:|include|
1013 (when disallow-include
1014 (rng-error source "nested include not permitted"))
1015 (process-include source))
1017 (skip-foreign source)))))
1018 (:end-element
1019 (return))))))
1021 (defun process-start (source)
1022 (klacks:expecting-element (source "start")
1023 (let* ((combine0 (ntc "combine" source))
1024 (combine
1025 (when combine0
1026 (find-symbol (string-upcase combine0) :keyword)))
1027 (child
1028 (progn
1029 (consume-and-skip-to-native source)
1030 (p/pattern source)))
1031 (pdefinition (grammar-start *grammar*)))
1032 (skip-foreign* source)
1033 ;; fixme: shared code with process-define
1034 (unless pdefinition
1035 (setf pdefinition (make-definition :name :start :child nil))
1036 (setf (grammar-start *grammar*) pdefinition))
1037 (when *include-body-p*
1038 (setf *include-start* pdefinition))
1039 (cond
1040 ((defn-child pdefinition)
1041 (ecase (defn-redefinition pdefinition)
1042 (:not-being-redefined
1043 (when (and combine
1044 (defn-combine-method pdefinition)
1045 (not (eq combine
1046 (defn-combine-method pdefinition))))
1047 (rng-error source "conflicting combine values for <start>"))
1048 (unless combine
1049 (when (defn-head-p pdefinition)
1050 (rng-error source "multiple definitions for <start>"))
1051 (setf (defn-head-p pdefinition) t))
1052 (unless (defn-combine-method pdefinition)
1053 (setf (defn-combine-method pdefinition) combine))
1054 (setf (defn-child pdefinition)
1055 (case (defn-combine-method pdefinition)
1056 (:choice
1057 (make-choice (defn-child pdefinition) child))
1058 (:interleave
1059 (make-interleave (defn-child pdefinition) child)))))
1060 (:being-redefined-and-no-original
1061 (setf (defn-redefinition pdefinition)
1062 :being-redefined-and-original))
1063 (:being-redefined-and-original)))
1065 (setf (defn-child pdefinition) child)
1066 (setf (defn-combine-method pdefinition) combine)
1067 (setf (defn-head-p pdefinition) (null combine))
1068 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
1070 (defun zip (constructor children)
1071 (cond
1072 ((null children)
1073 (rng-error nil "empty choice?"))
1074 ((null (cdr children))
1075 (car children))
1077 (destructuring-bind (a b &rest rest)
1078 children
1079 (zip constructor (cons (funcall constructor a b) rest))))))
1081 (defun choice-ify (children) (zip #'make-choice children))
1082 (defun groupify (children) (zip #'make-group children))
1083 (defun interleave-ify (children) (zip #'make-interleave children))
1085 (defun find-definition (name &optional (grammar *grammar*))
1086 (gethash name (grammar-definitions grammar)))
1088 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
1089 (setf (gethash name (grammar-definitions grammar)) newval))
1091 (defun process-define (source)
1092 (klacks:expecting-element (source "define")
1093 (let* ((name (ntc "name" source))
1094 (combine0 (ntc "combine" source))
1095 (combine (when combine0
1096 (find-symbol (string-upcase combine0) :keyword)))
1097 (child (groupify
1098 (progn
1099 (consume-and-skip-to-native source)
1100 (p/pattern+ source))))
1101 (pdefinition (find-definition name)))
1102 (unless pdefinition
1103 (setf pdefinition (make-definition :name name :child nil))
1104 (setf (find-definition name) pdefinition))
1105 (when *include-body-p*
1106 (push pdefinition *include-definitions*))
1107 (cond
1108 ((defn-child pdefinition)
1109 (case (defn-redefinition pdefinition)
1110 (:not-being-redefined
1111 (when (and combine
1112 (defn-combine-method pdefinition)
1113 (not (eq combine
1114 (defn-combine-method pdefinition))))
1115 (rng-error source "conflicting combine values for ~A" name))
1116 (unless combine
1117 (when (defn-head-p pdefinition)
1118 (rng-error source "multiple definitions for ~A" name))
1119 (setf (defn-head-p pdefinition) t))
1120 (unless (defn-combine-method pdefinition)
1121 (setf (defn-combine-method pdefinition) combine))
1122 (setf (defn-child pdefinition)
1123 (case (defn-combine-method pdefinition)
1124 (:choice
1125 (make-choice (defn-child pdefinition) child))
1126 (:interleave
1127 (make-interleave (defn-child pdefinition) child)))))
1128 (:being-redefined-and-no-original
1129 (setf (defn-redefinition pdefinition)
1130 :being-redefined-and-original))
1131 (:being-redefined-and-original)))
1133 (setf (defn-child pdefinition) child)
1134 (setf (defn-combine-method pdefinition) combine)
1135 (setf (defn-head-p pdefinition) (null combine))
1136 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
1138 (defun process-div (source)
1139 (klacks:expecting-element (source "div")
1140 (consume-and-skip-to-native source)
1141 (process-grammar-content* source)))
1143 (defun reset-definition-for-include (defn)
1144 (setf (defn-combine-method defn) nil)
1145 (setf (defn-redefinition defn) :being-redefined-and-no-original)
1146 (setf (defn-head-p defn) nil))
1148 (defun restore-definition (defn original)
1149 (setf (defn-combine-method defn) (defn-combine-method original))
1150 (setf (defn-redefinition defn) (defn-redefinition original))
1151 (setf (defn-head-p defn) (defn-head-p original)))
1153 (defun process-include (source)
1154 (klacks:expecting-element (source "include")
1155 (let* ((href
1156 (escape-uri (attribute "href" (klacks:list-attributes source))))
1157 (base (klacks:current-xml-base source))
1158 (*include-start* nil)
1159 (*include-definitions* '()))
1160 (multiple-value-bind (uri compactp)
1161 (safe-parse-uri source href base)
1162 (consume-and-skip-to-native source)
1163 (let ((*include-body-p* t))
1164 (process-grammar-content* source :disallow-include t))
1165 (let ((tmp-start
1166 (when *include-start*
1167 (prog1
1168 (copy-structure *include-start*)
1169 (reset-definition-for-include *include-start*))))
1170 (tmp-defns
1171 (loop
1172 for defn in *include-definitions*
1173 collect
1174 (prog1
1175 (copy-structure defn)
1176 (reset-definition-for-include defn)))))
1177 (when (find uri *include-uri-stack* :test #'puri:uri=)
1178 (rng-error source "looping include"))
1179 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
1180 (xstream (xstream-open-schema uri compactp)))
1181 (klacks:with-open-source (source (make-validating-source xstream))
1182 (invoke-with-klacks-handler
1183 (lambda ()
1184 (klacks:find-event source :start-element)
1185 (let ((*datatype-library* ""))
1186 (p/grammar source *grammar*)))
1187 source))
1188 (when tmp-start
1189 (when (eq (defn-redefinition *include-start*)
1190 :being-redefined-and-no-original)
1191 (rng-error source "start not found in redefinition of grammar"))
1192 (restore-definition *include-start* tmp-start))
1193 (dolist (copy tmp-defns)
1194 (let ((defn (gethash (defn-name copy)
1195 (grammar-definitions *grammar*))))
1196 (when (eq (defn-redefinition defn)
1197 :being-redefined-and-no-original)
1198 (rng-error source "redefinition not found in grammar"))
1199 (restore-definition defn copy)))
1200 nil))))))
1202 (defun check-pattern-definitions (source grammar)
1203 (when (and (grammar-start grammar)
1204 (eq (defn-redefinition (grammar-start grammar))
1205 :being-redefined-and-no-original))
1206 (rng-error source "start not found in redefinition of grammar"))
1207 (loop for defn being each hash-value in (grammar-definitions grammar) do
1208 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
1209 (rng-error source "redefinition not found in grammar"))
1210 (unless (defn-child defn)
1211 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
1213 (defvar *any-name-allowed-p* t)
1214 (defvar *ns-name-allowed-p* t)
1216 (defun destructure-name (source qname)
1217 (multiple-value-bind (uri lname)
1218 (klacks:decode-qname qname source)
1219 (setf uri (or uri *namespace-uri*))
1220 (when (and *attribute-namespace-p*
1221 (or (and (equal lname "xmlns") (equal uri ""))
1222 (equal uri "")))
1223 (rng-error source "namespace attribute not permitted"))
1224 (make-name uri lname)))
1226 (defun p/name-class (source)
1227 (klacks:expecting-element (source)
1228 (with-library-and-ns (klacks:list-attributes source)
1229 (case (find-symbol (klacks:current-lname source) :keyword)
1230 (:|name|
1231 (let ((qname (string-trim *whitespace*
1232 (consume-and-parse-characters source))))
1233 (destructure-name source qname)))
1234 (:|anyName|
1235 (unless *any-name-allowed-p*
1236 (rng-error source "anyname not permitted in except"))
1237 (klacks:consume source)
1238 (prog1
1239 (let ((*any-name-allowed-p* nil))
1240 (make-any-name (p/except-name-class? source)))
1241 (skip-to-native source)))
1242 (:|nsName|
1243 (unless *ns-name-allowed-p*
1244 (rng-error source "nsname not permitted in except"))
1245 (let ((uri *namespace-uri*)
1246 (*any-name-allowed-p* nil)
1247 (*ns-name-allowed-p* nil))
1248 (when (and *attribute-namespace-p*
1249 (equal uri ""))
1250 (rng-error source "namespace attribute not permitted"))
1251 (klacks:consume source)
1252 (prog1
1253 (make-ns-name uri (p/except-name-class? source))
1254 (skip-to-native source))))
1255 (:|choice|
1256 (klacks:consume source)
1257 (simplify-nc-choice (p/name-class* source)))
1259 (rng-error source "invalid child in except"))))))
1261 (defun p/name-class* (source)
1262 (let ((results nil))
1263 (loop
1264 (skip-to-native source)
1265 (case (klacks:peek source)
1266 (:characters
1267 (klacks:consume source))
1268 (:start-element
1269 (push (p/name-class source) results))
1270 (:end-element
1271 (return))))
1272 (nreverse results)))
1274 (defun p/except-name-class? (source)
1275 (skip-to-native source)
1276 (multiple-value-bind (key uri lname)
1277 (klacks:peek source)
1279 (if (and (eq key :start-element)
1280 (string= (find-symbol lname :keyword) "except"))
1281 (p/except-name-class source)
1282 nil)))
1284 (defun p/except-name-class (source)
1285 (klacks:expecting-element (source "except")
1286 (with-library-and-ns (klacks:list-attributes source)
1287 (klacks:consume source)
1288 (let ((x (p/name-class* source)))
1289 (if (cdr x)
1290 (simplify-nc-choice x)
1291 (car x))))))
1293 (defun escape-uri (string)
1294 (with-output-to-string (out)
1295 (loop for c across (cxml::rod-to-utf8-string string) do
1296 (let ((code (char-code c)))
1297 ;;
1298 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
1299 (format out "%~2,'0X" code)
1300 (write-char c out))))))
1303 ;;;; unparsing
1305 (defvar *definitions-to-names*)
1306 (defvar *seen-names*)
1308 (defun serialization-name (defn)
1309 (or (gethash defn *definitions-to-names*)
1310 (setf (gethash defn *definitions-to-names*)
1311 (let ((name (if (gethash (defn-name defn) *seen-names*)
1312 (format nil "~A-~D"
1313 (defn-name defn)
1314 (hash-table-count *seen-names*))
1315 (defn-name defn))))
1316 (setf (gethash name *seen-names*) defn)
1317 name))))
1319 (defun serialize-schema (schema sink)
1320 "@arg[schema]{a Relax NG @class{schema}}
1321 @arg[sink]{a SAX handler}
1322 @return{the result of @code{sax:end-document}}
1323 @short{This function serializes a parsed Relax NG back into XML syntax.}
1325 Note that the schema represented in memory has gone through simplification
1326 as is textually different from the original XML document.
1328 @see{parse-schema}"
1329 (cxml:with-xml-output sink
1330 (let ((*definitions-to-names* (make-hash-table))
1331 (*seen-names* (make-hash-table :test 'equal)))
1332 (cxml:with-element "grammar"
1333 (cxml:with-element "start"
1334 (serialize-pattern (schema-start schema)))
1335 (loop for defn being each hash-key in *definitions-to-names* do
1336 (serialize-definition defn))))))
1338 (defun serialize-pattern (pattern)
1339 (etypecase pattern
1340 (element
1341 (cxml:with-element "element"
1342 (serialize-name (pattern-name pattern))
1343 (serialize-pattern (pattern-child pattern))))
1344 (attribute
1345 (cxml:with-element "attribute"
1346 (serialize-name (pattern-name pattern))
1347 (serialize-pattern (pattern-child pattern))))
1348 (%combination
1349 (cxml:with-element
1350 (etypecase pattern
1351 (group "group")
1352 (interleave "interleave")
1353 (choice "choice"))
1354 (serialize-pattern (pattern-a pattern))
1355 (serialize-pattern (pattern-b pattern))))
1356 (one-or-more
1357 (cxml:with-element "oneOrMore"
1358 (serialize-pattern (pattern-child pattern))))
1359 (list-pattern
1360 (cxml:with-element "list"
1361 (serialize-pattern (pattern-child pattern))))
1362 (ref
1363 (cxml:with-element "ref"
1364 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
1365 (empty
1366 (cxml:with-element "empty"))
1367 (not-allowed
1368 (cxml:with-element "notAllowed"))
1369 (text
1370 (cxml:with-element "text"))
1371 (value
1372 (cxml:with-element "value"
1373 (let ((type (pattern-type pattern)))
1374 (cxml:attribute "datatype-library"
1375 (symbol-name (cxml-types:type-library type)))
1376 (cxml:attribute "type" (cxml-types:type-name type)))
1377 (cxml:attribute "ns" (pattern-ns pattern))
1378 (cxml:text (pattern-string pattern))))
1379 (data
1380 (cxml:with-element "value"
1381 (let ((type (pattern-type pattern)))
1382 (cxml:attribute "datatype-library"
1383 (symbol-name (cxml-types:type-library type)))
1384 (cxml:attribute "type" (cxml-types:type-name type)))
1385 (dolist (param (pattern-params pattern))
1386 (cxml:with-element "param"
1387 (cxml:attribute "name" (cxml-types:param-name param))
1388 (cxml:text (cxml-types:param-value param))))
1389 (when (pattern-except pattern)
1390 (cxml:with-element "except"
1391 (serialize-pattern (pattern-except pattern))))))))
1393 (defun serialize-definition (defn)
1394 (cxml:with-element "define"
1395 (cxml:attribute "name" (serialization-name defn))
1396 (serialize-pattern (defn-child defn))))
1398 (defun serialize-name (name)
1399 (etypecase name
1400 (name
1401 (cxml:with-element "name"
1402 (cxml:attribute "ns" (name-uri name))
1403 (cxml:text (name-lname name))))
1404 (any-name
1405 (cxml:with-element "anyName"
1406 (when (any-name-except name)
1407 (serialize-except-name (any-name-except name)))))
1408 (ns-name
1409 (cxml:with-element "anyName"
1410 (cxml:attribute "ns" (ns-name-uri name))
1411 (when (ns-name-except name)
1412 (serialize-except-name (ns-name-except name)))))
1413 (name-class-choice
1414 (cxml:with-element "choice"
1415 (serialize-name (name-class-choice-a name))
1416 (serialize-name (name-class-choice-b name))))))
1418 (defun serialize-except-name (spec)
1419 (cxml:with-element "except"
1420 (serialize-name spec)))
1423 ;;;; simplification
1425 ;;; 4.1 Annotations
1426 ;;; Foreign attributes and elements are removed implicitly while parsing.
1428 ;;; 4.2 Whitespace
1429 ;;; All character data is discarded while parsing (which can only be
1430 ;;; whitespace after validation).
1432 ;;; Whitespace in name, type, and combine attributes is stripped while
1433 ;;; parsing. Ditto for <name/>.
1435 ;;; 4.3. datatypeLibrary attribute
1436 ;;; Escaping is done by p/pattern.
1437 ;;; Attribute value defaulting is done using *datatype-library*; only
1438 ;;; p/data and p/value record the computed value.
1440 ;;; 4.4. type attribute of value element
1441 ;;; Done by p/value.
1443 ;;; 4.5. href attribute
1444 ;;; Escaping is done by process-include and p/external-ref.
1446 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
1447 ;;; but that requires xstream hacking.
1449 ;;; 4.6. externalRef element
1450 ;;; Done by p/external-ref.
1452 ;;; 4.7. include element
1453 ;;; Done by process-include.
1455 ;;; 4.8. name attribute of element and attribute elements
1456 ;;; `name' is stored as a slot, not a child. Done by p/element and
1457 ;;; p/attribute.
1459 ;;; 4.9. ns attribute
1460 ;;; done by p/name-class, p/value, p/element, p/attribute
1462 ;;; 4.10. QNames
1463 ;;; done by p/name-class
1465 ;;; 4.11. div element
1466 ;;; Legen wir gar nicht erst an.
1468 ;;; 4.12. 4.13 4.14 4.15
1469 ;;; beim anlegen
1471 ;;; 4.16
1472 ;;; p/name-class
1473 ;;; -- ausser der sache mit den datentypen
1475 ;;; 4.17, 4.18, 4.19
1476 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
1477 ;;; beschrieben.
1479 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1480 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1481 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1482 ;;; dafuer beim Serialisieren um.
1484 (defmethod check-recursion ((pattern element) depth)
1485 (check-recursion (pattern-child pattern) (1+ depth)))
1487 (defmethod check-recursion ((pattern ref) depth)
1488 (when (eql (pattern-crdepth pattern) depth)
1489 (rng-error nil "infinite recursion in ~A"
1490 (defn-name (pattern-target pattern))))
1491 (when (null (pattern-crdepth pattern))
1492 (setf (pattern-crdepth pattern) depth)
1493 (check-recursion (defn-child (pattern-target pattern)) depth)
1494 (setf (pattern-crdepth pattern) t)))
1496 (defmethod check-recursion ((pattern %parent) depth)
1497 (check-recursion (pattern-child pattern) depth))
1499 (defmethod check-recursion ((pattern %combination) depth)
1500 (check-recursion (pattern-a pattern) depth)
1501 (check-recursion (pattern-b pattern) depth))
1503 (defmethod check-recursion ((pattern %leaf) depth)
1504 (declare (ignore depth)))
1506 (defmethod check-recursion ((pattern data) depth)
1507 (when (pattern-except pattern)
1508 (check-recursion (pattern-except pattern) depth)))
1511 ;;;; 4.20
1513 ;;; %PARENT
1515 (defmethod fold-not-allowed ((pattern element))
1516 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1517 pattern)
1519 (defmethod fold-not-allowed ((pattern %parent))
1520 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1521 (if (typep (pattern-child pattern) 'not-allowed)
1522 (pattern-child pattern)
1523 pattern))
1527 (defmethod fold-not-allowed ((pattern %combination))
1528 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1529 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1530 pattern)
1532 (defmethod fold-not-allowed ((pattern group))
1533 (call-next-method)
1534 (cond
1535 ;; remove if any child is not allowed
1536 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1537 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1538 (t pattern)))
1540 (defmethod fold-not-allowed ((pattern interleave))
1541 (call-next-method)
1542 (cond
1543 ;; remove if any child is not allowed
1544 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1545 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1546 (t pattern)))
1548 (defmethod fold-not-allowed ((pattern choice))
1549 (call-next-method)
1550 (cond
1551 ;; if any child is not allowed, choose the other
1552 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1553 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1554 (t pattern)))
1556 ;;; LEAF
1558 (defmethod fold-not-allowed ((pattern %leaf))
1559 pattern)
1561 (defmethod fold-not-allowed ((pattern data))
1562 (when (pattern-except pattern)
1563 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1564 (when (typep (pattern-except pattern) 'not-allowed)
1565 (setf (pattern-except pattern) nil)))
1566 pattern)
1568 ;;; REF
1570 (defmethod fold-not-allowed ((pattern ref))
1571 pattern)
1574 ;;;; 4.21
1576 ;;; %PARENT
1578 (defmethod fold-empty ((pattern one-or-more))
1579 (call-next-method)
1580 (if (typep (pattern-child pattern) 'empty)
1581 (pattern-child pattern)
1582 pattern))
1584 (defmethod fold-empty ((pattern %parent))
1585 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1586 pattern)
1590 (defmethod fold-empty ((pattern %combination))
1591 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1592 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1593 pattern)
1595 (defmethod fold-empty ((pattern group))
1596 (call-next-method)
1597 (cond
1598 ;; if any child is empty, choose the other
1599 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1600 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1601 (t pattern)))
1603 (defmethod fold-empty ((pattern interleave))
1604 (call-next-method)
1605 (cond
1606 ;; if any child is empty, choose the other
1607 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1608 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1609 (t pattern)))
1611 (defmethod fold-empty ((pattern choice))
1612 (call-next-method)
1613 (if (typep (pattern-b pattern) 'empty)
1614 (cond
1615 ((typep (pattern-a pattern) 'empty)
1616 (pattern-a pattern))
1618 (rotatef (pattern-a pattern) (pattern-b pattern))
1619 pattern))
1620 pattern))
1622 ;;; LEAF
1624 (defmethod fold-empty ((pattern %leaf))
1625 pattern)
1627 (defmethod fold-empty ((pattern data))
1628 (when (pattern-except pattern)
1629 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1630 pattern)
1632 ;;; REF
1634 (defmethod fold-empty ((pattern ref))
1635 pattern)
1638 ;;;; name class overlap
1640 ;;; fixme: memorize this stuff?
1642 (defparameter !uri (string (code-char 1)))
1643 (defparameter !lname "")
1645 (defun classes-overlap-p (nc1 nc2)
1646 (flet ((both-contain (x)
1647 (and (contains nc1 (car x) (cdr x))
1648 (contains nc2 (car x) (cdr x)))))
1649 (or (some #'both-contain (representatives nc1))
1650 (some #'both-contain (representatives nc2)))))
1652 (defmethod representatives ((nc any-name))
1653 (cons (cons !uri !lname)
1654 (if (any-name-except nc)
1655 (representatives (any-name-except nc))
1656 nil)))
1658 (defmethod representatives ((nc ns-name))
1659 (cons (cons (ns-name-uri nc) !lname)
1660 (if (ns-name-except nc)
1661 (representatives (ns-name-except nc))
1662 nil)))
1664 (defmethod representatives ((nc name))
1665 (list (cons (name-uri nc) (name-lname nc))))
1667 (defmethod representatives ((nc name-class-choice))
1668 (nconc (representatives (name-class-choice-a nc))
1669 (representatives (name-class-choice-b nc))))
1672 ;;;; 7.1
1674 (defun finalize-definitions (pattern)
1675 (let ((defns (make-hash-table)))
1676 (labels ((recurse (p)
1677 (cond
1678 ((typep p 'ref)
1679 (let ((target (pattern-target p)))
1680 (unless (gethash target defns)
1681 (setf (gethash target defns) t)
1682 (setf (defn-child target) (recurse (defn-child target))))
1683 (if (typep (defn-child target) 'element)
1685 (copy-pattern-tree (defn-child target)))))
1687 (etypecase p
1688 (data
1689 (when (pattern-except p)
1690 (setf (pattern-except p) (recurse (pattern-except p)))))
1691 (%parent
1692 (setf (pattern-child p) (recurse (pattern-child p))))
1693 (%combination
1694 (setf (pattern-a p) (recurse (pattern-a p)))
1695 (setf (pattern-b p) (recurse (pattern-b p))))
1696 (%leaf))
1697 p))))
1698 (values
1699 (recurse pattern)
1700 (loop
1701 for defn being each hash-key in defns
1702 collect defn)))))
1704 (defun copy-pattern-tree (pattern)
1705 (labels ((recurse (p)
1706 (let ((q (copy-structure p)))
1707 (etypecase p
1708 (data
1709 (when (pattern-except p)
1710 (setf (pattern-except q) (recurse (pattern-except p)))))
1711 (%parent
1712 (setf (pattern-child q) (recurse (pattern-child p))))
1713 (%combination
1714 (setf (pattern-a q) (recurse (pattern-a p)))
1715 (setf (pattern-b q) (recurse (pattern-b p))))
1716 ((or %leaf ref)))
1717 q)))
1718 (recurse pattern)))
1720 (defparameter *in-attribute-p* nil)
1721 (defparameter *in-one-or-more-p* nil)
1722 (defparameter *in-one-or-more//group-or-interleave-p* nil)
1723 (defparameter *in-list-p* nil)
1724 (defparameter *in-data-except-p* nil)
1725 (defparameter *in-start-p* nil)
1727 (defun check-start-restrictions (pattern)
1728 (let ((*in-start-p* t))
1729 (check-restrictions pattern)))
1731 (defun content-type-max (a b)
1732 (if (and a b)
1733 (cond
1734 ((eq a :empty) b)
1735 ((eq b :empty) a)
1736 ((eq a :complex) b)
1737 (:simple))
1738 nil))
1740 (defun groupable-max (a b)
1741 (if (or (eq a :empty)
1742 (eq b :empty)
1743 (and (eq a :complex)
1744 (eq b :complex)))
1745 (content-type-max a b)
1746 nil))
1748 (defun assert-name-class-finite (nc)
1749 (etypecase nc
1750 ((or any-name ns-name)
1751 (rng-error nil "infinite attribute name class outside of one-or-more"))
1752 (name)
1753 (name-class-choice
1754 (assert-name-class-finite (name-class-choice-a nc))
1755 (assert-name-class-finite (name-class-choice-b nc)))))
1757 (defmethod check-restrictions ((pattern attribute))
1758 (when *in-attribute-p*
1759 (rng-error nil "nested attribute not allowed"))
1760 (when *in-one-or-more//group-or-interleave-p*
1761 (rng-error nil "attribute not allowed in oneOrMore//group, oneOrMore//interleave"))
1762 (when *in-list-p*
1763 (rng-error nil "attribute in list not allowed"))
1764 (when *in-data-except-p*
1765 (rng-error nil "attribute in data/except not allowed"))
1766 (when *in-start-p*
1767 (rng-error nil "attribute in start not allowed"))
1768 (let ((*in-attribute-p* t))
1769 (unless *in-one-or-more-p*
1770 (assert-name-class-finite (pattern-name pattern)))
1771 (values (if (check-restrictions (pattern-child pattern))
1772 :empty
1773 nil)
1774 (list (pattern-name pattern))
1775 nil)))
1777 (defmethod check-restrictions ((pattern ref))
1778 (when *in-attribute-p*
1779 (rng-error nil "ref in attribute not allowed"))
1780 (when *in-list-p*
1781 (rng-error nil "ref in list not allowed"))
1782 (when *in-data-except-p*
1783 (rng-error nil "ref in data/except not allowed"))
1784 (values :complex
1786 (list (pattern-name (defn-child (pattern-target pattern))))
1787 nil))
1789 (defmethod check-restrictions ((pattern one-or-more))
1790 (when *in-data-except-p*
1791 (rng-error nil "oneOrMore in data/except not allowed"))
1792 (when *in-start-p*
1793 (rng-error nil "one-or-more in start not allowed"))
1794 (let* ((*in-one-or-more-p* t))
1795 (multiple-value-bind (x a e textp)
1796 (check-restrictions (pattern-child pattern))
1797 (values (groupable-max x x) a e textp))))
1799 (defmethod check-restrictions ((pattern group))
1800 (when *in-data-except-p*
1801 (rng-error nil "group in data/except not allowed"))
1802 (when *in-start-p*
1803 (rng-error nil "group in start not allowed"))
1804 (let ((*in-one-or-more//group-or-interleave-p*
1805 *in-one-or-more-p*))
1806 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1807 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1808 (dolist (nc1 a)
1809 (dolist (nc2 b)
1810 (when (classes-overlap-p nc1 nc2)
1811 (rng-error nil "attribute name overlap in group: ~A ~A"
1812 nc1 nc2))))
1813 (values (groupable-max x y)
1814 (append a b)
1815 (append e f)
1816 (or tp tq))))))
1818 (defmethod check-restrictions ((pattern interleave))
1819 (when *in-list-p*
1820 (rng-error nil "interleave in list not allowed"))
1821 (when *in-data-except-p*
1822 (rng-error nil "interleave in data/except not allowed"))
1823 (when *in-start-p*
1824 (rng-error nil "interleave in start not allowed"))
1825 (let ((*in-one-or-more//group-or-interleave-p*
1826 *in-one-or-more-p*))
1827 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1828 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1829 (dolist (nc1 a)
1830 (dolist (nc2 b)
1831 (when (classes-overlap-p nc1 nc2)
1832 (rng-error nil "attribute name overlap in interleave: ~A ~A"
1833 nc1 nc2))))
1834 (dolist (nc1 e)
1835 (dolist (nc2 f)
1836 (when (classes-overlap-p nc1 nc2)
1837 (rng-error nil "element name overlap in interleave: ~A ~A"
1838 nc1 nc2))))
1839 (when (and tp tq)
1840 (rng-error nil "multiple text permitted by interleave"))
1841 (values (groupable-max x y)
1842 (append a b)
1843 (append e f)
1844 (or tp tq))))))
1846 (defmethod check-restrictions ((pattern choice))
1847 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1848 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1849 (values (content-type-max x y)
1850 (append a b)
1851 (append e f)
1852 (or tp tq)))))
1854 (defmethod check-restrictions ((pattern list-pattern))
1855 (when *in-list-p*
1856 (rng-error nil "nested list not allowed"))
1857 (when *in-data-except-p*
1858 (rng-error nil "list in data/except not allowed"))
1859 (let ((*in-list-p* t))
1860 (check-restrictions (pattern-child pattern)))
1861 (when *in-start-p*
1862 (rng-error nil "list in start not allowed"))
1863 :simple)
1865 (defmethod check-restrictions ((pattern text))
1866 (when *in-list-p*
1867 (rng-error nil "text in list not allowed"))
1868 (when *in-data-except-p*
1869 (rng-error nil "text in data/except not allowed"))
1870 (when *in-start-p*
1871 (rng-error nil "text in start not allowed"))
1872 (values :complex nil nil t))
1874 (defmethod check-restrictions ((pattern data))
1875 (when *in-start-p*
1876 (rng-error nil "data in start not allowed"))
1877 (when (pattern-except pattern)
1878 (let ((*in-data-except-p* t))
1879 (check-restrictions (pattern-except pattern))))
1880 :simple)
1882 (defmethod check-restrictions ((pattern value))
1883 (when *in-start-p*
1884 (rng-error nil "value in start not allowed"))
1885 :simple)
1887 (defmethod check-restrictions ((pattern empty))
1888 (when *in-data-except-p*
1889 (rng-error nil "empty in data/except not allowed"))
1890 (when *in-start-p*
1891 (rng-error nil "empty in start not allowed"))
1892 :empty)
1894 (defmethod check-restrictions ((pattern element))
1895 (unless (check-restrictions (pattern-child pattern))
1896 (rng-error nil "restrictions on string sequences violated")))
1898 (defmethod check-restrictions ((pattern not-allowed))
1899 nil)