1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng
)
33 (defvar *empty
* (make-empty))
34 (defvar *not-allowed
* (make-not-allowed))
37 (defun make-validator (grammar)
38 (let* ((table (ensure-registratur grammar
))
39 (start (parsed-grammar-interned-start grammar
))
41 (make-instance 'validator
43 :current-pattern start
)))
44 (make-instance 'text-normalizer
:chained-handler validator
)))
49 (defgeneric contains
(nc uri lname
))
51 (defmethod contains ((nc any-name
) uri lname
)
52 (let ((except (any-name-except nc
)))
54 (not (contains except uri lname
))
57 (defmethod contains ((nc ns-name
) uri lname
)
58 (and (equal (ns-name-uri nc
) uri
)
59 (let ((except (ns-name-except nc
)))
61 (not (contains except uri lname
))
64 (defmethod contains ((nc name
) uri lname
)
65 (and (equal (name-uri nc
) uri
)
66 (equal (name-lname nc
) lname
)))
68 (defmethod contains ((nc name-class-choice
) uri lname
)
69 (or (contains (name-class-choice-a nc
) uri lname
)
70 (contains (name-class-choice-b nc
) uri lname
)))
75 (defgeneric nullable
(pattern))
77 (defmethod nullable ((pattern group
))
78 (and (nullable (pattern-a pattern
))
79 (nullable (pattern-b pattern
))))
81 (defmethod nullable ((pattern interleave
))
82 (and (nullable (pattern-a pattern
))
83 (nullable (pattern-b pattern
))))
85 (defmethod nullable ((pattern choice
))
86 (or (nullable (pattern-a pattern
))
87 (nullable (pattern-b pattern
))))
89 (defmethod nullable ((pattern one-or-more
))
90 (nullable (pattern-child pattern
)))
92 (defmethod nullable ((pattern element
)) nil
)
93 (defmethod nullable ((pattern attribute
)) nil
)
94 (defmethod nullable ((pattern list-pattern
)) nil
)
95 (defmethod nullable ((pattern value
)) nil
)
96 (defmethod nullable ((pattern data
)) nil
)
97 (defmethod nullable ((pattern not-allowed
)) nil
)
98 (defmethod nullable ((pattern after
)) nil
)
100 (defmethod nullable ((pattern empty
)) t
)
101 (defmethod nullable ((pattern text
)) t
)
106 (defclass validator
(sax:sax-parser-mixin
)
107 ((current-pattern :initarg
:current-pattern
:accessor current-pattern
)
108 (after-start-tag-p :accessor after-start-tag-p
)
109 (pending-text-node :initform nil
:accessor pending-text-node
)
110 (registratur :initarg
:registratur
:accessor registratur
)))
112 (defun advance (hsx pattern message
)
113 (when (typep pattern
'not-allowed
)
114 (rng-error hsx
"~A, was expecting a ~A"
116 (replace-scary-characters (current-pattern hsx
))))
118 (write-line (replace-scary-characters (current-pattern hsx
))))
119 (setf (current-pattern hsx
) pattern
))
121 ;; make sure slime doesn't die
122 (defun replace-scary-characters (pattern)
123 (let ((str (write-to-string pattern
130 when
(>= (char-code c
) 128)
131 do
(setf (elt str i
) #\?))
134 (defmethod sax:characters
((hsx validator
) data
)
135 (assert (null (pending-text-node hsx
))) ;parser must be normalize
136 (if (after-start-tag-p hsx
)
137 (setf (pending-text-node hsx
) data
)
138 (unless (whitespacep data
)
139 ;; we already saw an element sibling, so discard whitespace
141 (text\' hsx
(current-pattern hsx
) data
)
142 "text node not valid")))
143 (setf (after-start-tag-p hsx
) nil
))
145 (defmethod sax:start-element
((hsx validator
) uri lname qname attributes
)
146 (declare (ignore qname
))
147 (when (pending-text-node hsx
)
148 ;; text node was the previous child, and we're in element content.
149 ;; process non-whitespace now; discard whitespace completely
150 (let ((data (pending-text-node hsx
)))
151 (unless (whitespacep data
)
153 (text\' hsx
(current-pattern hsx
) data
)
155 (setf (pending-text-node hsx
) nil
))
157 (remove-if (cxml::compose
#'cxml
::xmlns-attr-p
#'sax
:attribute-qname
)
159 (let* ((p0 (current-pattern hsx
))
160 (p1 (open-start-tag\' hsx p0 uri lname
))
162 (advance hsx p1
"element not valid")
163 (attributes\' hsx p1 attributes
)))
165 (advance hsx p2
"attributes not valid")
166 (close-start-tag\' hsx p2
))))
167 (advance hsx p3
"attributes not valid")
168 (setf (after-start-tag-p hsx
) t
)))
170 (defmethod sax:end-element
((hsx validator
) uri lname qname
)
171 (declare (ignore uri lname qname
))
172 (when (after-start-tag-p hsx
)
173 ;; nothing at all? pretend we saw whitespace.
174 (sax:characters hsx
""))
175 (when (pending-text-node hsx
)
176 ;; text node was the only child?
177 ;; process it and handle whitespace specially
178 (let* ((current (current-pattern hsx
))
179 (data (pending-text-node hsx
))
180 (next (text\' hsx current data
)))
182 (if (whitespacep data
)
183 (intern-choice hsx current next
)
185 "text node not valid"))
186 (setf (pending-text-node hsx
) nil
))
188 (end-tag\' hsx
(current-pattern hsx
))
189 "end of element not valid"))
194 (defgeneric text
\' (handler pattern data
))
196 (defmethod text\' (hsx (pattern choice
) data
)
198 (text\' hsx
(pattern-a pattern
) data
)
199 (text\' hsx
(pattern-b pattern
) data
)))
201 (defmethod text\' (hsx (pattern interleave
) data
)
202 (let ((a (pattern-a pattern
))
203 (b (pattern-b pattern
)))
205 (intern-interleave hsx
(text\' hsx a data
) b
)
206 (intern-interleave hsx a
(text\' hsx b data
)))))
208 (defmethod text\' (hsx (pattern group
) data
)
209 (let* ((a (pattern-a pattern
))
210 (b (pattern-b pattern
))
211 (p (intern-group hsx
(text\' hsx a data
) b
)))
213 (intern-choice hsx p
(text\' hsx b data
))
216 (defmethod text\' (hsx (pattern after
) data
)
218 (text\' hsx
(pattern-a pattern
) data
)
219 (pattern-b pattern
)))
221 (defmethod text\' (hsx (pattern one-or-more
) data
)
222 (let ((child (pattern-child pattern
)))
224 (text\' hsx child data
)
225 (intern-zero-or-more hsx child
))))
227 (defmethod text\' (hsx (pattern text
) data
)
228 (declare (ignore data
))
232 (if ok
*empty
* *not-allowed
*))
234 (defmethod text\' (hsx (pattern value
) data
)
235 (eat (cxml-types:equal
* (pattern-type pattern
)
236 (pattern-string pattern
)
239 (defmethod text\' (hsx (pattern data
) data
)
240 (eat (and (cxml-types:typep
* (pattern-type pattern
) data
)
241 (let ((except (pattern-except pattern
)))
242 (not (and except
(nullable (text\' hsx except data
))))))))
244 (defmethod text\' (hsx (pattern list-pattern
) data
)
245 (eat (nullable (list\' hsx
(pattern-child pattern
) (words data
)))))
247 (defmethod text\' (hsx pattern data
)
248 (declare (ignore pattern data
))
251 (defun list\' (hsx pattern words
)
253 (setf pattern
(text\' hsx pattern word
)))
257 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*)
258 (string-trim *whitespace
* str
)))
263 (defmacro ensuref
(key table value
)
264 `(ensure-hash ,key
,table
(lambda () ,value
)))
266 (defun ensure-hash (key table fn
)
267 (or (gethash key table
)
268 (setf (gethash key table
) (funcall fn
))))
270 (defgeneric intern-choice
(handler a b
))
271 (defmethod intern-choice (hsx a
(b not-allowed
)) a
)
272 (defmethod intern-choice (hsx (a not-allowed
) b
) b
)
273 (defmethod intern-choice (hsx a b
)
274 (ensuref (list 'choice a b
) (registratur hsx
) (make-choice a b
)))
276 (defgeneric intern-group
(handler a b
))
277 (defmethod intern-group (hsx (a pattern
) (b not-allowed
)) b
)
278 (defmethod intern-group (hsx (a not-allowed
) (b pattern
)) a
)
279 (defmethod intern-group (hsx a
(b empty
)) a
)
280 (defmethod intern-group (hsx (a empty
) b
) b
)
281 (defmethod intern-group (hsx a b
)
282 (ensuref (list 'group a b
) (registratur hsx
) (make-group a b
)))
284 (defgeneric intern-interleave
(handler a b
))
285 (defmethod intern-interleave (hsx (a pattern
) (b not-allowed
)) b
)
286 (defmethod intern-interleave (hsx (a not-allowed
) (b pattern
)) a
)
287 (defmethod intern-interleave (hsx a
(b empty
)) a
)
288 (defmethod intern-interleave (hsx (a empty
) b
) b
)
289 (defmethod intern-interleave (hsx a b
)
290 (ensuref (list 'interleave a b
) (registratur hsx
) (make-interleave a b
)))
292 (defgeneric intern-after
(handler a b
))
293 (defmethod intern-after (hsx (a pattern
) (b not-allowed
)) b
)
294 (defmethod intern-after (hsx (a not-allowed
) (b pattern
)) a
)
295 (defmethod intern-after (hsx a b
)
296 (ensuref (list 'after a b
) (registratur hsx
) (make-after a b
)))
298 (defgeneric intern-one-or-more
(handler c
))
299 (defmethod intern-one-or-more (hsx (c not-allowed
)) c
)
300 (defmethod intern-one-or-more (hsx c
)
301 (ensuref (list 'one-or-more c
) (registratur hsx
) (make-one-or-more c
)))
304 ;;;; ENSURE-REGISTRATUR
306 (defvar *seen-elements
*)
308 (defun ensure-registratur (grammar)
309 (or (parsed-grammar-registratur grammar
)
310 (setf (parsed-grammar-registratur grammar
)
311 (let ((table (make-hash-table :test
'equal
))
312 (*seen-elements
* '())
314 (setf (parsed-grammar-interned-start grammar
)
315 (intern-pattern (parsed-grammar-pattern grammar
) table
))
317 for elements
= *seen-elements
*
319 (setf *seen-elements
* nil
)
320 (dolist (pattern elements
)
321 (unless (find pattern done-elements
)
322 (push pattern done-elements
)
323 (setf (pattern-child pattern
)
324 (intern-pattern (pattern-child pattern
) table
)))))
327 ;;; FIXME: misnamed. we don't really intern the originals pattern yet.
329 (defgeneric intern-pattern
(pattern table
))
331 (defmethod intern-pattern ((pattern element
) table
)
332 (pushnew pattern
*seen-elements
*)
335 (defmethod intern-pattern ((pattern %parent
) table
)
336 (let ((c (intern-pattern (pattern-child pattern
) table
)))
337 (if (eq c
(pattern-child pattern
))
339 (let ((copy (copy-structure pattern
)))
340 (setf (pattern-child copy
) c
)
343 (defmethod intern-pattern ((pattern %combination
) table
)
344 (let ((a (intern-pattern (pattern-a pattern
) table
))
345 (b (intern-pattern (pattern-b pattern
) table
)))
346 (if (and (eq a
(pattern-a pattern
)) (eq b
(pattern-b pattern
)))
348 (let ((copy (copy-structure pattern
)))
349 (setf (pattern-a copy
) a
)
350 (setf (pattern-b copy
) b
)
353 (defmethod intern-pattern ((pattern data
) table
)
354 (let ((e (when (pattern-except pattern
)
355 (intern-pattern (pattern-except pattern
) table
))))
356 (if (eq e
(pattern-except pattern
))
358 (let ((copy (copy-structure pattern
)))
359 (setf (pattern-except copy
) e
)
362 (defmethod intern-pattern ((pattern ref
) table
)
363 (intern-pattern (defn-child (pattern-target pattern
)) table
))
365 (defmethod intern-pattern ((pattern empty
) table
)
368 (defmethod intern-pattern ((pattern not-allowed
) table
)
371 (defmethod intern-pattern ((pattern %leaf
) table
)
377 (defgeneric apply-after
(handler fn pattern
))
379 (defmethod apply-after (hsx fn
(pattern after
))
382 (funcall fn
(pattern-b pattern
))))
384 (defmethod apply-after (hsx fn
(pattern choice
))
386 (apply-after hsx fn
(pattern-a pattern
))
387 (apply-after hsx fn
(pattern-b pattern
))))
389 (defmethod apply-after (hsx fn
(pattern not-allowed
))
390 (declare (ignore hsx fn
))
396 (defgeneric open-start-tag
\' (handler pattern uri lname
))
398 (defmethod open-start-tag\' (hsx (pattern choice
) uri lname
)
400 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)
401 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
)))
403 (defmethod open-start-tag\' (hsx (pattern element
) uri lname
)
404 (if (contains (pattern-name pattern
) (or uri
"") lname
)
405 (intern-after hsx
(pattern-child pattern
) *empty
*)
408 (defmethod open-start-tag\' (hsx (pattern interleave
) uri lname
)
412 (lambda (p) (intern-interleave hsx p
(pattern-b pattern
)))
413 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))
416 (lambda (p) (intern-interleave hsx
(pattern-a pattern
) p
))
417 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))))
419 (defun intern-zero-or-more (hsx c
)
420 (intern-choice hsx
(intern-one-or-more hsx c
) *empty
*))
422 (defmethod open-start-tag\' (hsx (pattern one-or-more
) uri lname
)
423 (let ((c (intern-zero-or-more hsx
(pattern-child pattern
))))
425 (lambda (p) (intern-group hsx p c
))
426 (open-start-tag\' hsx
(pattern-child pattern
) uri lname
))))
428 (defmethod open-start-tag\' (hsx (pattern group
) uri lname
)
429 (let ((x (apply-after hsx
431 (intern-group hsx p
(pattern-b pattern
)))
432 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))))
433 (if (nullable (pattern-a pattern
))
436 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))
439 (defmethod open-start-tag\' (hsx (pattern after
) uri lname
)
442 (intern-after hsx p
(pattern-b pattern
)))
443 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)))
445 (defmethod open-start-tag\' (hsx pattern uri lname
)
446 (declare (ignore hsx pattern uri lname
))
452 (defun attributes\' (handler pattern attributes
)
453 (dolist (a attributes
)
454 (setf pattern
(attribute\' handler pattern a
)))
457 (defgeneric attribute
\' (handler pattern attribute
))
459 (defmethod attribute\' (hsx (pattern after
) a
)
461 (attribute\' hsx
(pattern-a pattern
) a
)
462 (pattern-b pattern
)))
464 (defmethod attribute\' (hsx (pattern choice
) a
)
466 (attribute\' hsx
(pattern-a pattern
) a
)
467 (attribute\' hsx
(pattern-b pattern
) a
)))
469 (defmethod attribute\' (hsx (pattern group
) a
)
472 (attribute\' hsx
(pattern-a pattern
) a
)
476 (attribute\' hsx
(pattern-b pattern
) a
))))
478 (defmethod attribute\' (hsx (pattern interleave
) a
)
480 (intern-interleave hsx
481 (attribute\' hsx
(pattern-a pattern
) a
)
483 (intern-interleave hsx
485 (attribute\' hsx
(pattern-b pattern
) a
))))
487 (defmethod attribute\' (hsx (pattern one-or-more
) a
)
489 (attribute\' hsx
(pattern-child pattern
) a
)
490 (intern-zero-or-more hsx
(pattern-child pattern
))))
492 (defmethod attribute\' (hsx (pattern attribute
) a
)
493 (eat (and (contains (pattern-name pattern
)
494 (or (sax:attribute-namespace-uri a
) "")
495 (sax:attribute-local-name a
))
497 (pattern-child pattern
)
498 (sax:attribute-value a
)))))
500 (defun value-matches-p (hsx pattern value
)
501 (or (and (nullable pattern
) (whitespacep value
))
502 (nullable (text\' hsx pattern value
))))
504 (defun whitespacep (str)
505 (zerop (length (string-trim *whitespace
* str
))))
507 (defmethod attribute\' (hsx pattern a
)
508 (declare (ignore hsx pattern a
))
512 ;;;; CLOSE-START-TAG'
514 (defgeneric close-start-tag
\' (handler pattern
))
516 (defmethod close-start-tag\' (hsx (pattern after
))
518 (close-start-tag\' hsx
(pattern-a pattern
))
519 (pattern-b pattern
)))
521 (defmethod close-start-tag\' (hsx (pattern choice
))
523 (close-start-tag\' hsx
(pattern-a pattern
))
524 (close-start-tag\' hsx
(pattern-b pattern
))))
526 (defmethod close-start-tag\' (hsx (pattern group
))
528 (close-start-tag\' hsx
(pattern-a pattern
))
529 (close-start-tag\' hsx
(pattern-b pattern
))))
531 (defmethod close-start-tag\' (hsx (pattern interleave
))
532 (intern-interleave hsx
533 (close-start-tag\' hsx
(pattern-a pattern
))
534 (close-start-tag\' hsx
(pattern-b pattern
))))
536 (defmethod close-start-tag\' (hsx (pattern one-or-more
))
537 (intern-one-or-more hsx
(close-start-tag\' hsx
(pattern-child pattern
))))
539 (defmethod close-start-tag\' (hsx (pattern attribute
))
540 (declare (ignore hsx
))
543 (defmethod close-start-tag\' (hsx pattern
)
544 (declare (ignore hsx
))
550 (defgeneric end-tag
\' (handler pattern
))
552 (defmethod end-tag\' (hsx (pattern choice
))
554 (end-tag\' hsx
(pattern-a pattern
))
555 (end-tag\' hsx
(pattern-b pattern
))))
557 (defmethod end-tag\' (hsx (pattern after
))
558 (if (nullable (pattern-a pattern
))
562 (defmethod end-tag\' (hsx pattern
)
563 (declare (ignore hsx pattern
))
569 ;;; FIXME: cxml should do that
571 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
572 ;;; discard them properly.
574 (defclass text-normalizer
(cxml:sax-proxy
)
575 ((pending-text-node :initform
(make-string-output-stream)
576 :accessor pending-text-node
)))
578 (defmethod sax:characters
((handler text-normalizer
) data
)
579 (write-string data
(pending-text-node handler
)))
581 (defun flush-pending (handler)
582 (let ((str (get-output-stream-string (pending-text-node handler
))))
583 (unless (zerop (length str
))
584 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
586 (defmethod sax:start-element
:before
587 ((handler text-normalizer
) uri lname qname attributes
)
588 (declare (ignore uri lname qname attributes
))
589 (flush-pending handler
))
591 (defmethod sax:end-element
:before
592 ((handler text-normalizer
) uri lname qname
)
593 (declare (ignore uri lname qname
))
594 (flush-pending handler
))