1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng
)
33 (defvar *empty
* (make-empty))
34 (defvar *not-allowed
* (make-not-allowed))
37 (defun make-validator (grammar)
38 (let* ((table (ensure-registratur grammar
))
39 (start (parsed-grammar-interned-start grammar
))
41 (make-instance 'validator
43 :current-pattern start
)))
44 (make-instance 'text-normalizer
:chained-handler validator
)))
49 (defgeneric contains
(nc uri lname
))
51 (defmethod contains ((nc any-name
) uri lname
)
52 (let ((except (any-name-except nc
)))
54 (not (contains except uri lname
))
57 (defmethod contains ((nc ns-name
) uri lname
)
58 (and (equal (ns-name-uri nc
) uri
)
59 (let ((except (ns-name-except nc
)))
61 (not (contains except uri lname
))
64 (defmethod contains ((nc name
) uri lname
)
65 (and (equal (name-uri nc
) uri
)
66 (equal (name-lname nc
) lname
)))
68 (defmethod contains ((nc name-class-choice
) uri lname
)
69 (or (contains (name-class-choice-a nc
) uri lname
)
70 (contains (name-class-choice-b nc
) uri lname
)))
75 (defgeneric nullable
(pattern))
77 (defmethod nullable ((pattern group
))
78 (and (nullable (pattern-a pattern
))
79 (nullable (pattern-b pattern
))))
81 (defmethod nullable ((pattern interleave
))
82 (and (nullable (pattern-a pattern
))
83 (nullable (pattern-b pattern
))))
85 (defmethod nullable ((pattern choice
))
86 (or (nullable (pattern-a pattern
))
87 (nullable (pattern-b pattern
))))
89 (defmethod nullable ((pattern one-or-more
))
90 (nullable (pattern-child pattern
)))
92 (defmethod nullable ((pattern element
)) nil
)
93 (defmethod nullable ((pattern attribute
)) nil
)
94 (defmethod nullable ((pattern list-pattern
)) nil
)
95 (defmethod nullable ((pattern value
)) nil
)
96 (defmethod nullable ((pattern data
)) nil
)
97 (defmethod nullable ((pattern not-allowed
)) nil
)
98 (defmethod nullable ((pattern after
)) nil
)
100 (defmethod nullable ((pattern empty
)) t
)
101 (defmethod nullable ((pattern text
)) t
)
106 (defclass validator
(sax:sax-parser-mixin
)
107 ((current-pattern :initarg
:current-pattern
:accessor current-pattern
)
108 (after-start-tag-p :accessor after-start-tag-p
)
109 (pending-text-node :initform nil
:accessor pending-text-node
)
110 (registratur :initarg
:registratur
:accessor registratur
)))
112 (defun advance (hsx pattern message
)
113 (when (typep pattern
'not-allowed
)
114 (rng-error hsx
"~A, was expecting a ~A"
116 (replace-scary-characters (current-pattern hsx
))))
118 (write-line (replace-scary-characters (current-pattern hsx
))))
119 (setf (current-pattern hsx
) pattern
))
121 ;; make sure slime doesn't die
122 (defun replace-scary-characters (pattern)
123 (let ((str (write-to-string pattern
130 when
(>= (char-code c
) 128)
131 do
(setf (elt str i
) #\?))
134 (defmethod sax:characters
((hsx validator
) data
)
135 (assert (null (pending-text-node hsx
))) ;parser must be normalize
136 (if (after-start-tag-p hsx
)
137 (setf (pending-text-node hsx
) data
)
138 (unless (whitespacep data
)
139 ;; we already saw an element sibling, so discard whitespace
141 (text\' hsx
(current-pattern hsx
) data
)
142 "text node not valid")))
143 (setf (after-start-tag-p hsx
) nil
))
145 (defmethod sax:start-element
((hsx validator
) uri lname qname attributes
)
146 (declare (ignore qname
))
147 (when (pending-text-node hsx
)
148 ;; text node was the previous child, and we're in element content.
149 ;; process non-whitespace now; discard whitespace completely
150 (let ((data (pending-text-node hsx
)))
151 (unless (whitespacep data
)
153 (text\' hsx
(current-pattern hsx
) data
)
155 (setf (pending-text-node hsx
) nil
))
157 (remove-if (cxml::compose
#'cxml
::xmlns-attr-p
#'sax
:attribute-qname
)
159 (let* ((p0 (current-pattern hsx
))
160 (p1 (open-start-tag\' hsx p0 uri lname
))
162 (advance hsx p1
"element not valid")
163 (attributes\' hsx p1 attributes
)))
165 (advance hsx p2
"attributes not valid")
166 (close-start-tag\' hsx p2
))))
167 (advance hsx p3
"attributes not valid")
168 (setf (after-start-tag-p hsx
) t
)))
170 (defmethod sax:end-element
((hsx validator
) uri lname qname
)
171 (declare (ignore uri lname qname
))
172 (when (after-start-tag-p hsx
)
173 ;; nothing at all? pretend we saw whitespace.
174 (sax:characters hsx
""))
175 (when (pending-text-node hsx
)
176 ;; text node was the only child?
177 ;; process it and handle whitespace specially
178 (let* ((current (current-pattern hsx
))
179 (data (pending-text-node hsx
))
180 (next (text\' hsx current data
)))
182 (if (whitespacep data
)
183 (intern-choice hsx current next
)
185 "text node not valid"))
186 (setf (pending-text-node hsx
) nil
))
188 (end-tag\' hsx
(current-pattern hsx
))
189 "end of element not valid"))
194 (defgeneric text
\' (handler pattern data
))
196 (defmethod text\' (hsx (pattern choice
) data
)
198 (text\' hsx
(pattern-a pattern
) data
)
199 (text\' hsx
(pattern-b pattern
) data
)))
201 (defmethod text\' (hsx (pattern interleave
) data
)
202 (let ((a (pattern-a pattern
))
203 (b (pattern-b pattern
)))
205 (intern-interleave hsx
(text\' hsx a data
) b
)
206 (intern-interleave hsx a
(text\' hsx b data
)))))
208 (defmethod text\' (hsx (pattern group
) data
)
209 (let* ((a (pattern-a pattern
))
210 (b (pattern-b pattern
))
211 (p (intern-group hsx
(text\' hsx a data
) b
)))
213 (intern-choice hsx p
(text\' hsx b data
))
216 (defmethod text\' (hsx (pattern after
) data
)
218 (text\' hsx
(pattern-a pattern
) data
)
219 (pattern-b pattern
)))
221 (defmethod text\' (hsx (pattern one-or-more
) data
)
222 (let ((child (pattern-child pattern
)))
224 (text\' hsx child data
)
225 (intern-zero-or-more hsx child
))))
227 (defmethod text\' (hsx (pattern text
) data
)
228 (declare (ignore data
))
232 (if ok
*empty
* *not-allowed
*))
234 (defmethod text\' (hsx (pattern value
) data
)
235 (eat (equal* (pattern-datatype-library pattern
)
236 (pattern-type pattern
)
237 (pattern-string pattern
)
240 (defmethod text\' (hsx (pattern data
) data
)
241 (eat (and (typep* (pattern-datatype-library pattern
)
242 (pattern-type pattern
)
244 (let ((except (pattern-except pattern
)))
245 (not (and except
(nullable (text\' hsx except data
))))))))
247 (defmethod text\' (hsx (pattern list-pattern
) data
)
248 (eat (nullable (list\' hsx
(pattern-child pattern
) (words data
)))))
250 (defmethod text\' (hsx pattern data
)
251 (declare (ignore pattern data
))
254 (defun list\' (hsx pattern words
)
256 (setf pattern
(text\' hsx pattern word
)))
260 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*)
261 (string-trim *whitespace
* str
)))
266 (defmacro ensuref
(key table value
)
267 `(ensure-hash ,key
,table
(lambda () ,value
)))
269 (defun ensure-hash (key table fn
)
270 (or (gethash key table
)
271 (setf (gethash key table
) (funcall fn
))))
273 (defgeneric intern-choice
(handler a b
))
274 (defmethod intern-choice (hsx a
(b not-allowed
)) a
)
275 (defmethod intern-choice (hsx (a not-allowed
) b
) b
)
276 (defmethod intern-choice (hsx a b
)
277 (ensuref (list 'choice a b
) (registratur hsx
) (make-choice a b
)))
279 (defgeneric intern-group
(handler a b
))
280 (defmethod intern-group (hsx (a pattern
) (b not-allowed
)) b
)
281 (defmethod intern-group (hsx (a not-allowed
) (b pattern
)) a
)
282 (defmethod intern-group (hsx a
(b empty
)) a
)
283 (defmethod intern-group (hsx (a empty
) b
) b
)
284 (defmethod intern-group (hsx a b
)
285 (ensuref (list 'group a b
) (registratur hsx
) (make-group a b
)))
287 (defgeneric intern-interleave
(handler a b
))
288 (defmethod intern-interleave (hsx (a pattern
) (b not-allowed
)) b
)
289 (defmethod intern-interleave (hsx (a not-allowed
) (b pattern
)) a
)
290 (defmethod intern-interleave (hsx a
(b empty
)) a
)
291 (defmethod intern-interleave (hsx (a empty
) b
) b
)
292 (defmethod intern-interleave (hsx a b
)
293 (ensuref (list 'interleave a b
) (registratur hsx
) (make-interleave a b
)))
295 (defgeneric intern-after
(handler a b
))
296 (defmethod intern-after (hsx (a pattern
) (b not-allowed
)) b
)
297 (defmethod intern-after (hsx (a not-allowed
) (b pattern
)) a
)
298 (defmethod intern-after (hsx a b
)
299 (ensuref (list 'after a b
) (registratur hsx
) (make-after a b
)))
301 (defgeneric intern-one-or-more
(handler c
))
302 (defmethod intern-one-or-more (hsx (c not-allowed
)) c
)
303 (defmethod intern-one-or-more (hsx c
)
304 (ensuref (list 'one-or-more c
) (registratur hsx
) (make-one-or-more c
)))
307 ;;;; ENSURE-REGISTRATUR
309 (defvar *seen-elements
*)
311 (defun ensure-registratur (grammar)
312 (or (parsed-grammar-registratur grammar
)
313 (setf (parsed-grammar-registratur grammar
)
314 (let ((table (make-hash-table :test
'equal
))
315 (*seen-elements
* '())
317 (setf (parsed-grammar-interned-start grammar
)
318 (intern-pattern (parsed-grammar-pattern grammar
) table
))
320 for elements
= *seen-elements
*
322 (setf *seen-elements
* nil
)
323 (dolist (pattern elements
)
324 (unless (find pattern done-elements
)
325 (push pattern done-elements
)
326 (setf (pattern-child pattern
)
327 (intern-pattern (pattern-child pattern
) table
)))))
330 ;;; FIXME: misnamed. we don't really intern the originals pattern yet.
332 (defgeneric intern-pattern
(pattern table
))
334 (defmethod intern-pattern ((pattern element
) table
)
335 (pushnew pattern
*seen-elements
*)
338 (defmethod intern-pattern ((pattern %parent
) table
)
339 (let ((c (intern-pattern (pattern-child pattern
) table
)))
340 (if (eq c
(pattern-child pattern
))
342 (let ((copy (copy-structure pattern
)))
343 (setf (pattern-child copy
) c
)
346 (defmethod intern-pattern ((pattern %combination
) table
)
347 (let ((a (intern-pattern (pattern-a pattern
) table
))
348 (b (intern-pattern (pattern-b pattern
) table
)))
349 (if (and (eq a
(pattern-a pattern
)) (eq b
(pattern-b pattern
)))
351 (let ((copy (copy-structure pattern
)))
352 (setf (pattern-a copy
) a
)
353 (setf (pattern-b copy
) b
)
356 (defmethod intern-pattern ((pattern data
) table
)
357 (let ((e (when (pattern-except pattern
)
358 (intern-pattern (pattern-except pattern
) table
))))
359 (if (eq e
(pattern-except pattern
))
361 (let ((copy (copy-structure pattern
)))
362 (setf (pattern-except copy
) e
)
365 (defmethod intern-pattern ((pattern ref
) table
)
366 (intern-pattern (defn-child (pattern-target pattern
)) table
))
368 (defmethod intern-pattern ((pattern empty
) table
)
371 (defmethod intern-pattern ((pattern not-allowed
) table
)
374 (defmethod intern-pattern ((pattern %leaf
) table
)
378 ;;;; built-in data type library
382 (defun equal* (dl type a b
)
383 (unless (equal dl
"")
384 (error "data type library not found: ~A" dl
))
385 (ecase (find-symbol type
:keyword
)
386 (:|string|
(equal a b
))
387 (:|token|
(equal (normalize-whitespace a
) (normalize-whitespace b
)))))
389 (defun typep* (dl type str
)
390 (declare (ignore str
))
391 (unless (equal dl
"")
392 (error "data type library not found: ~A" dl
))
393 (ecase (find-symbol type
:keyword
)
394 ((:|string|
:|token|
) t
)))
396 (defun normalize-whitespace (str)
397 (cl-ppcre:regex-replace-all
#.
(format nil
"[~A]+" *whitespace
*)
398 (string-trim *whitespace
* str
)
404 (defgeneric apply-after
(handler fn pattern
))
406 (defmethod apply-after (hsx fn
(pattern after
))
409 (funcall fn
(pattern-b pattern
))))
411 (defmethod apply-after (hsx fn
(pattern choice
))
413 (apply-after hsx fn
(pattern-a pattern
))
414 (apply-after hsx fn
(pattern-b pattern
))))
416 (defmethod apply-after (hsx fn
(pattern not-allowed
))
417 (declare (ignore hsx fn
))
423 (defgeneric open-start-tag
\' (handler pattern uri lname
))
425 (defmethod open-start-tag\' (hsx (pattern choice
) uri lname
)
427 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)
428 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
)))
430 (defmethod open-start-tag\' (hsx (pattern element
) uri lname
)
431 (if (contains (pattern-name pattern
) (or uri
"") lname
)
432 (intern-after hsx
(pattern-child pattern
) *empty
*)
435 (defmethod open-start-tag\' (hsx (pattern interleave
) uri lname
)
439 (lambda (p) (intern-interleave hsx p
(pattern-b pattern
)))
440 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))
443 (lambda (p) (intern-interleave hsx
(pattern-a pattern
) p
))
444 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))))
446 (defun intern-zero-or-more (hsx c
)
447 (intern-choice hsx
(intern-one-or-more hsx c
) *empty
*))
449 (defmethod open-start-tag\' (hsx (pattern one-or-more
) uri lname
)
450 (let ((c (intern-zero-or-more hsx
(pattern-child pattern
))))
452 (lambda (p) (intern-group hsx p c
))
453 (open-start-tag\' hsx
(pattern-child pattern
) uri lname
))))
455 (defmethod open-start-tag\' (hsx (pattern group
) uri lname
)
456 (let ((x (apply-after hsx
458 (intern-group hsx p
(pattern-b pattern
)))
459 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))))
460 (if (nullable (pattern-a pattern
))
463 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))
466 (defmethod open-start-tag\' (hsx (pattern after
) uri lname
)
469 (intern-after hsx p
(pattern-b pattern
)))
470 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)))
472 (defmethod open-start-tag\' (hsx pattern uri lname
)
473 (declare (ignore hsx pattern uri lname
))
479 (defun attributes\' (handler pattern attributes
)
480 (dolist (a attributes
)
481 (setf pattern
(attribute\' handler pattern a
)))
484 (defgeneric attribute
\' (handler pattern attribute
))
486 (defmethod attribute\' (hsx (pattern after
) a
)
488 (attribute\' hsx
(pattern-a pattern
) a
)
489 (pattern-b pattern
)))
491 (defmethod attribute\' (hsx (pattern choice
) a
)
493 (attribute\' hsx
(pattern-a pattern
) a
)
494 (attribute\' hsx
(pattern-b pattern
) a
)))
496 (defmethod attribute\' (hsx (pattern group
) a
)
499 (attribute\' hsx
(pattern-a pattern
) a
)
503 (attribute\' hsx
(pattern-b pattern
) a
))))
505 (defmethod attribute\' (hsx (pattern interleave
) a
)
507 (intern-interleave hsx
508 (attribute\' hsx
(pattern-a pattern
) a
)
510 (intern-interleave hsx
512 (attribute\' hsx
(pattern-b pattern
) a
))))
514 (defmethod attribute\' (hsx (pattern one-or-more
) a
)
516 (attribute\' hsx
(pattern-child pattern
) a
)
517 (intern-zero-or-more hsx
(pattern-child pattern
))))
519 (defmethod attribute\' (hsx (pattern attribute
) a
)
520 (eat (and (contains (pattern-name pattern
)
521 (or (sax:attribute-namespace-uri a
) "")
522 (sax:attribute-local-name a
))
524 (pattern-child pattern
)
525 (sax:attribute-value a
)))))
527 (defun value-matches-p (hsx pattern value
)
528 (or (and (nullable pattern
) (whitespacep value
))
529 (nullable (text\' hsx pattern value
))))
531 (defun whitespacep (str)
532 (zerop (length (string-trim *whitespace
* str
))))
534 (defmethod attribute\' (hsx pattern a
)
535 (declare (ignore hsx pattern a
))
539 ;;;; CLOSE-START-TAG'
541 (defgeneric close-start-tag
\' (handler pattern
))
543 (defmethod close-start-tag\' (hsx (pattern after
))
545 (close-start-tag\' hsx
(pattern-a pattern
))
546 (pattern-b pattern
)))
548 (defmethod close-start-tag\' (hsx (pattern choice
))
550 (close-start-tag\' hsx
(pattern-a pattern
))
551 (close-start-tag\' hsx
(pattern-b pattern
))))
553 (defmethod close-start-tag\' (hsx (pattern group
))
555 (close-start-tag\' hsx
(pattern-a pattern
))
556 (close-start-tag\' hsx
(pattern-b pattern
))))
558 (defmethod close-start-tag\' (hsx (pattern interleave
))
559 (intern-interleave hsx
560 (close-start-tag\' hsx
(pattern-a pattern
))
561 (close-start-tag\' hsx
(pattern-b pattern
))))
563 (defmethod close-start-tag\' (hsx (pattern one-or-more
))
564 (intern-one-or-more hsx
(close-start-tag\' hsx
(pattern-child pattern
))))
566 (defmethod close-start-tag\' (hsx (pattern attribute
))
567 (declare (ignore hsx
))
570 (defmethod close-start-tag\' (hsx pattern
)
571 (declare (ignore hsx
))
577 (defgeneric end-tag
\' (handler pattern
))
579 (defmethod end-tag\' (hsx (pattern choice
))
581 (end-tag\' hsx
(pattern-a pattern
))
582 (end-tag\' hsx
(pattern-b pattern
))))
584 (defmethod end-tag\' (hsx (pattern after
))
585 (if (nullable (pattern-a pattern
))
589 (defmethod end-tag\' (hsx pattern
)
590 (declare (ignore hsx pattern
))
596 ;;; FIXME: cxml should do that
598 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
599 ;;; discard them properly.
601 (defclass text-normalizer
(cxml:sax-proxy
)
602 ((pending-text-node :initform
(make-string-output-stream)
603 :accessor pending-text-node
)))
605 (defmethod sax:characters
((handler text-normalizer
) data
)
606 (write-string data
(pending-text-node handler
)))
608 (defun flush-pending (handler)
609 (let ((str (get-output-stream-string (pending-text-node handler
))))
610 (unless (zerop (length str
))
611 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
613 (defmethod sax:start-element
:before
614 ((handler text-normalizer
) uri lname qname attributes
)
615 (declare (ignore uri lname qname attributes
))
616 (flush-pending handler
))
618 (defmethod sax:end-element
:before
619 ((handler text-normalizer
) uri lname qname
)
620 (declare (ignore uri lname qname
))
621 (flush-pending handler
))