include/start-problem geloest
[cxml-rng.git] / validate.lisp
blobd9874bc7903ba6f309296e4b89aa5e173e247b1c
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
2 ;;;
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
8 ;;; are met:
9 ;;;
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
12 ;;;
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
17 ;;;
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng)
33 (defvar *empty* (make-empty))
34 (defvar *not-allowed* (make-not-allowed))
37 (defun make-validator (grammar)
38 (let* ((table (ensure-registratur grammar))
39 (start (parsed-grammar-interned-start grammar))
40 (validator
41 (make-instance 'validator
42 :registratur table
43 :current-pattern start)))
44 (make-instance 'text-normalizer :chained-handler validator)))
47 ;;;; CONTAINS
49 (defgeneric contains (nc uri lname))
51 (defmethod contains ((nc any-name) uri lname)
52 (let ((except (any-name-except nc)))
53 (if except
54 (not (contains except uri lname))
55 t)))
57 (defmethod contains ((nc ns-name) uri lname)
58 (and (equal (ns-name-uri nc) uri)
59 (let ((except (ns-name-except nc)))
60 (if except
61 (not (contains except uri lname))
62 t))))
64 (defmethod contains ((nc name) uri lname)
65 (and (equal (name-uri nc) uri)
66 (equal (name-lname nc) lname)))
68 (defmethod contains ((nc name-class-choice) uri lname)
69 (or (contains (name-class-choice-a nc) uri lname)
70 (contains (name-class-choice-b nc) uri lname)))
73 ;;;; NULLABLE
75 (defgeneric nullable (pattern))
77 (defmethod nullable ((pattern group))
78 (and (nullable (pattern-a pattern))
79 (nullable (pattern-b pattern))))
81 (defmethod nullable ((pattern interleave))
82 (and (nullable (pattern-a pattern))
83 (nullable (pattern-b pattern))))
85 (defmethod nullable ((pattern choice))
86 (or (nullable (pattern-a pattern))
87 (nullable (pattern-b pattern))))
89 (defmethod nullable ((pattern one-or-more))
90 (nullable (pattern-child pattern)))
92 (defmethod nullable ((pattern element)) nil)
93 (defmethod nullable ((pattern attribute)) nil)
94 (defmethod nullable ((pattern list-pattern)) nil)
95 (defmethod nullable ((pattern value)) nil)
96 (defmethod nullable ((pattern data)) nil)
97 (defmethod nullable ((pattern not-allowed)) nil)
98 (defmethod nullable ((pattern after)) nil)
100 (defmethod nullable ((pattern empty)) t)
101 (defmethod nullable ((pattern text)) t)
104 ;;;; VALIDATOR
106 (defclass validator (sax:sax-parser-mixin)
107 ((current-pattern :initarg :current-pattern :accessor current-pattern)
108 (after-start-tag-p :accessor after-start-tag-p)
109 (pending-text-node :initform nil :accessor pending-text-node)
110 (registratur :initarg :registratur :accessor registratur)))
112 (defun advance (hsx pattern message)
113 (when (typep pattern 'not-allowed)
114 (rng-error hsx "~A, was expecting a ~A"
115 message
116 (replace-scary-characters (current-pattern hsx))))
117 (when *debug*
118 (write-line (replace-scary-characters (current-pattern hsx))))
119 (setf (current-pattern hsx) pattern))
121 ;; make sure slime doesn't die
122 (defun replace-scary-characters (pattern)
123 (let ((str (write-to-string pattern
124 :circle t
125 :escape nil
126 :pretty nil)))
127 (loop
128 for c across str
129 for i from 0
130 when (>= (char-code c) 128)
131 do (setf (elt str i) #\?))
132 str))
134 (defmethod sax:characters ((hsx validator) data)
135 (assert (null (pending-text-node hsx))) ;parser must be normalize
136 (if (after-start-tag-p hsx)
137 (setf (pending-text-node hsx) data)
138 (unless (whitespacep data)
139 ;; we already saw an element sibling, so discard whitespace
140 (advance hsx
141 (text\' hsx (current-pattern hsx) data)
142 "text node not valid")))
143 (setf (after-start-tag-p hsx) nil))
145 (defmethod sax:start-element ((hsx validator) uri lname qname attributes)
146 (declare (ignore qname))
147 (when (pending-text-node hsx)
148 ;; text node was the previous child, and we're in element content.
149 ;; process non-whitespace now; discard whitespace completely
150 (let ((data (pending-text-node hsx)))
151 (unless (whitespacep data)
152 (advance hsx
153 (text\' hsx (current-pattern hsx) data)
154 "text node")))
155 (setf (pending-text-node hsx) nil))
156 (setf attributes
157 (remove-if (cxml::compose #'cxml::xmlns-attr-p #'sax:attribute-qname)
158 attributes))
159 (let* ((p0 (current-pattern hsx))
160 (p1 (open-start-tag\' hsx p0 uri lname))
161 (p2 (progn
162 (advance hsx p1 "element not valid")
163 (attributes\' hsx p1 attributes)))
164 (p3 (progn
165 (advance hsx p2 "attributes not valid")
166 (close-start-tag\' hsx p2))))
167 (advance hsx p3 "attributes not valid")
168 (setf (after-start-tag-p hsx) t)))
170 (defmethod sax:end-element ((hsx validator) uri lname qname)
171 (declare (ignore uri lname qname))
172 (when (after-start-tag-p hsx)
173 ;; nothing at all? pretend we saw whitespace.
174 (sax:characters hsx ""))
175 (when (pending-text-node hsx)
176 ;; text node was the only child?
177 ;; process it and handle whitespace specially
178 (let* ((current (current-pattern hsx))
179 (data (pending-text-node hsx))
180 (next (text\' hsx current data)))
181 (advance hsx
182 (if (whitespacep data)
183 (intern-choice hsx current next)
184 next)
185 "text node not valid"))
186 (setf (pending-text-node hsx) nil))
187 (advance hsx
188 (end-tag\' hsx (current-pattern hsx))
189 "end of element not valid"))
192 ;;;; TEXT'
194 (defgeneric text\' (handler pattern data))
196 (defmethod text\' (hsx (pattern choice) data)
197 (intern-choice hsx
198 (text\' hsx (pattern-a pattern) data)
199 (text\' hsx (pattern-b pattern) data)))
201 (defmethod text\' (hsx (pattern interleave) data)
202 (let ((a (pattern-a pattern))
203 (b (pattern-b pattern)))
204 (intern-choice hsx
205 (intern-interleave hsx (text\' hsx a data) b)
206 (intern-interleave hsx a (text\' hsx b data)))))
208 (defmethod text\' (hsx (pattern group) data)
209 (let* ((a (pattern-a pattern))
210 (b (pattern-b pattern))
211 (p (intern-group hsx (text\' hsx a data) b)))
212 (if (nullable a)
213 (intern-choice hsx p (text\' hsx b data))
214 p)))
216 (defmethod text\' (hsx (pattern after) data)
217 (intern-after hsx
218 (text\' hsx (pattern-a pattern) data)
219 (pattern-b pattern)))
221 (defmethod text\' (hsx (pattern one-or-more) data)
222 (let ((child (pattern-child pattern)))
223 (intern-group hsx
224 (text\' hsx child data)
225 (intern-zero-or-more hsx child))))
227 (defmethod text\' (hsx (pattern text) data)
228 (declare (ignore data))
229 pattern)
231 (defun eat (ok)
232 (if ok *empty* *not-allowed*))
234 (defmethod text\' (hsx (pattern value) data)
235 (eat (equal* (pattern-datatype-library pattern)
236 (pattern-type pattern)
237 (pattern-string pattern)
238 data)))
240 (defmethod text\' (hsx (pattern data) data)
241 (eat (and (typep* (pattern-datatype-library pattern)
242 (pattern-type pattern)
243 data)
244 (let ((except (pattern-except pattern)))
245 (not (and except (nullable (text\' hsx except data))))))))
247 (defmethod text\' (hsx (pattern list-pattern) data)
248 (eat (nullable (list\' hsx (pattern-child pattern) (words data)))))
250 (defmethod text\' (hsx pattern data)
251 (declare (ignore pattern data))
252 *not-allowed*)
254 (defun list\' (hsx pattern words)
255 (dolist (word words)
256 (setf pattern (text\' hsx pattern word)))
257 pattern)
259 (defun words (str)
260 (cl-ppcre:split #.(format nil "[~A]+" *whitespace*)
261 (string-trim *whitespace* str)))
264 ;;;; INTERN
266 (defmacro ensuref (key table value)
267 `(ensure-hash ,key ,table (lambda () ,value)))
269 (defun ensure-hash (key table fn)
270 (or (gethash key table)
271 (setf (gethash key table) (funcall fn))))
273 (defgeneric intern-choice (handler a b))
274 (defmethod intern-choice (hsx a (b not-allowed)) a)
275 (defmethod intern-choice (hsx (a not-allowed) b) b)
276 (defmethod intern-choice (hsx a b)
277 (ensuref (list 'choice a b) (registratur hsx) (make-choice a b)))
279 (defgeneric intern-group (handler a b))
280 (defmethod intern-group (hsx (a pattern) (b not-allowed)) b)
281 (defmethod intern-group (hsx (a not-allowed) (b pattern)) a)
282 (defmethod intern-group (hsx a (b empty)) a)
283 (defmethod intern-group (hsx (a empty) b) b)
284 (defmethod intern-group (hsx a b)
285 (ensuref (list 'group a b) (registratur hsx) (make-group a b)))
287 (defgeneric intern-interleave (handler a b))
288 (defmethod intern-interleave (hsx (a pattern) (b not-allowed)) b)
289 (defmethod intern-interleave (hsx (a not-allowed) (b pattern)) a)
290 (defmethod intern-interleave (hsx a (b empty)) a)
291 (defmethod intern-interleave (hsx (a empty) b) b)
292 (defmethod intern-interleave (hsx a b)
293 (ensuref (list 'interleave a b) (registratur hsx) (make-interleave a b)))
295 (defgeneric intern-after (handler a b))
296 (defmethod intern-after (hsx (a pattern) (b not-allowed)) b)
297 (defmethod intern-after (hsx (a not-allowed) (b pattern)) a)
298 (defmethod intern-after (hsx a b)
299 (ensuref (list 'after a b) (registratur hsx) (make-after a b)))
301 (defgeneric intern-one-or-more (handler c))
302 (defmethod intern-one-or-more (hsx (c not-allowed)) c)
303 (defmethod intern-one-or-more (hsx c)
304 (ensuref (list 'one-or-more c) (registratur hsx) (make-one-or-more c)))
307 ;;;; ENSURE-REGISTRATUR
309 (defvar *seen-elements*)
311 (defun ensure-registratur (grammar)
312 (or (parsed-grammar-registratur grammar)
313 (setf (parsed-grammar-registratur grammar)
314 (let ((table (make-hash-table :test 'equal))
315 (*seen-elements* '())
316 (done-elements '()))
317 (setf (parsed-grammar-interned-start grammar)
318 (intern-pattern (parsed-grammar-pattern grammar) table))
319 (loop
320 for elements = *seen-elements*
321 while elements do
322 (setf *seen-elements* nil)
323 (dolist (pattern elements)
324 (unless (find pattern done-elements)
325 (push pattern done-elements)
326 (setf (pattern-child pattern)
327 (intern-pattern (pattern-child pattern) table)))))
328 table))))
330 ;;; FIXME: misnamed. we don't really intern the originals pattern yet.
332 (defgeneric intern-pattern (pattern table))
334 (defmethod intern-pattern ((pattern element) table)
335 (pushnew pattern *seen-elements*)
336 pattern)
338 (defmethod intern-pattern ((pattern %parent) table)
339 (let ((c (intern-pattern (pattern-child pattern) table)))
340 (if (eq c (pattern-child pattern))
341 pattern
342 (let ((copy (copy-structure pattern)))
343 (setf (pattern-child copy) c)
344 copy))))
346 (defmethod intern-pattern ((pattern %combination) table)
347 (let ((a (intern-pattern (pattern-a pattern) table))
348 (b (intern-pattern (pattern-b pattern) table)))
349 (if (and (eq a (pattern-a pattern)) (eq b (pattern-b pattern)))
350 pattern
351 (let ((copy (copy-structure pattern)))
352 (setf (pattern-a copy) a)
353 (setf (pattern-b copy) b)
354 copy))))
356 (defmethod intern-pattern ((pattern data) table)
357 (let ((e (when (pattern-except pattern)
358 (intern-pattern (pattern-except pattern) table))))
359 (if (eq e (pattern-except pattern))
360 pattern
361 (let ((copy (copy-structure pattern)))
362 (setf (pattern-except copy) e)
363 copy))))
365 (defmethod intern-pattern ((pattern ref) table)
366 (intern-pattern (defn-child (pattern-target pattern)) table))
368 (defmethod intern-pattern ((pattern empty) table)
369 *empty*)
371 (defmethod intern-pattern ((pattern not-allowed) table)
372 *not-allowed*)
374 (defmethod intern-pattern ((pattern %leaf) table)
375 pattern)
378 ;;;; built-in data type library
380 ;;; FIXME
382 (defun equal* (dl type a b)
383 (unless (equal dl "")
384 (error "data type library not found: ~A" dl))
385 (ecase (find-symbol type :keyword)
386 (:|string| (equal a b))
387 (:|token| (equal (normalize-whitespace a) (normalize-whitespace b)))))
389 (defun typep* (dl type str)
390 (declare (ignore str))
391 (unless (equal dl "")
392 (error "data type library not found: ~A" dl))
393 (ecase (find-symbol type :keyword)
394 ((:|string| :|token|) t)))
396 (defun normalize-whitespace (str)
397 (cl-ppcre:regex-replace-all #.(format nil "[~A]+" *whitespace*)
398 (string-trim *whitespace* str)
399 " "))
402 ;;;; APPLY-AFTER
404 (defgeneric apply-after (handler fn pattern))
406 (defmethod apply-after (hsx fn (pattern after))
407 (intern-after hsx
408 (pattern-a pattern)
409 (funcall fn (pattern-b pattern))))
411 (defmethod apply-after (hsx fn (pattern choice))
412 (intern-choice hsx
413 (apply-after hsx fn (pattern-a pattern))
414 (apply-after hsx fn (pattern-b pattern))))
416 (defmethod apply-after (hsx fn (pattern not-allowed))
417 (declare (ignore hsx fn))
418 pattern)
421 ;;;; OPEN-START-TAG'
423 (defgeneric open-start-tag\' (handler pattern uri lname))
425 (defmethod open-start-tag\' (hsx (pattern choice) uri lname)
426 (intern-choice hsx
427 (open-start-tag\' hsx (pattern-a pattern) uri lname)
428 (open-start-tag\' hsx (pattern-b pattern) uri lname)))
430 (defmethod open-start-tag\' (hsx (pattern element) uri lname)
431 (if (contains (pattern-name pattern) (or uri "") lname)
432 (intern-after hsx (pattern-child pattern) *empty*)
433 *not-allowed*))
435 (defmethod open-start-tag\' (hsx (pattern interleave) uri lname)
436 (intern-choice hsx
437 (apply-after
439 (lambda (p) (intern-interleave hsx p (pattern-b pattern)))
440 (open-start-tag\' hsx (pattern-a pattern) uri lname))
441 (apply-after
443 (lambda (p) (intern-interleave hsx (pattern-a pattern) p))
444 (open-start-tag\' hsx (pattern-b pattern) uri lname))))
446 (defun intern-zero-or-more (hsx c)
447 (intern-choice hsx (intern-one-or-more hsx c) *empty*))
449 (defmethod open-start-tag\' (hsx (pattern one-or-more) uri lname)
450 (let ((c (intern-zero-or-more hsx (pattern-child pattern))))
451 (apply-after hsx
452 (lambda (p) (intern-group hsx p c))
453 (open-start-tag\' hsx (pattern-child pattern) uri lname))))
455 (defmethod open-start-tag\' (hsx (pattern group) uri lname)
456 (let ((x (apply-after hsx
457 (lambda (p)
458 (intern-group hsx p (pattern-b pattern)))
459 (open-start-tag\' hsx (pattern-a pattern) uri lname))))
460 (if (nullable (pattern-a pattern))
461 (intern-choice hsx
463 (open-start-tag\' hsx (pattern-b pattern) uri lname))
464 x)))
466 (defmethod open-start-tag\' (hsx (pattern after) uri lname)
467 (apply-after hsx
468 (lambda (p)
469 (intern-after hsx p (pattern-b pattern)))
470 (open-start-tag\' hsx (pattern-a pattern) uri lname)))
472 (defmethod open-start-tag\' (hsx pattern uri lname)
473 (declare (ignore hsx pattern uri lname))
474 *not-allowed*)
477 ;;;; ATTRIBUTES'
479 (defun attributes\' (handler pattern attributes)
480 (dolist (a attributes)
481 (setf pattern (attribute\' handler pattern a)))
482 pattern)
484 (defgeneric attribute\' (handler pattern attribute))
486 (defmethod attribute\' (hsx (pattern after) a)
487 (intern-after hsx
488 (attribute\' hsx (pattern-a pattern) a)
489 (pattern-b pattern)))
491 (defmethod attribute\' (hsx (pattern choice) a)
492 (intern-choice hsx
493 (attribute\' hsx (pattern-a pattern) a)
494 (attribute\' hsx (pattern-b pattern) a)))
496 (defmethod attribute\' (hsx (pattern group) a)
497 (intern-choice hsx
498 (intern-group hsx
499 (attribute\' hsx (pattern-a pattern) a)
500 (pattern-b pattern))
501 (intern-group hsx
502 (pattern-a pattern)
503 (attribute\' hsx (pattern-b pattern) a))))
505 (defmethod attribute\' (hsx (pattern interleave) a)
506 (intern-choice hsx
507 (intern-interleave hsx
508 (attribute\' hsx (pattern-a pattern) a)
509 (pattern-b pattern))
510 (intern-interleave hsx
511 (pattern-a pattern)
512 (attribute\' hsx (pattern-b pattern) a))))
514 (defmethod attribute\' (hsx (pattern one-or-more) a)
515 (intern-group hsx
516 (attribute\' hsx (pattern-child pattern) a)
517 (intern-zero-or-more hsx (pattern-child pattern))))
519 (defmethod attribute\' (hsx (pattern attribute) a)
520 (eat (and (contains (pattern-name pattern)
521 (or (sax:attribute-namespace-uri a) "")
522 (sax:attribute-local-name a))
523 (value-matches-p hsx
524 (pattern-child pattern)
525 (sax:attribute-value a)))))
527 (defun value-matches-p (hsx pattern value)
528 (or (and (nullable pattern) (whitespacep value))
529 (nullable (text\' hsx pattern value))))
531 (defun whitespacep (str)
532 (zerop (length (string-trim *whitespace* str))))
534 (defmethod attribute\' (hsx pattern a)
535 (declare (ignore hsx pattern a))
536 *not-allowed*)
539 ;;;; CLOSE-START-TAG'
541 (defgeneric close-start-tag\' (handler pattern))
543 (defmethod close-start-tag\' (hsx (pattern after))
544 (intern-after hsx
545 (close-start-tag\' hsx (pattern-a pattern))
546 (pattern-b pattern)))
548 (defmethod close-start-tag\' (hsx (pattern choice))
549 (intern-choice hsx
550 (close-start-tag\' hsx (pattern-a pattern))
551 (close-start-tag\' hsx (pattern-b pattern))))
553 (defmethod close-start-tag\' (hsx (pattern group))
554 (intern-group hsx
555 (close-start-tag\' hsx (pattern-a pattern))
556 (close-start-tag\' hsx (pattern-b pattern))))
558 (defmethod close-start-tag\' (hsx (pattern interleave))
559 (intern-interleave hsx
560 (close-start-tag\' hsx (pattern-a pattern))
561 (close-start-tag\' hsx (pattern-b pattern))))
563 (defmethod close-start-tag\' (hsx (pattern one-or-more))
564 (intern-one-or-more hsx (close-start-tag\' hsx (pattern-child pattern))))
566 (defmethod close-start-tag\' (hsx (pattern attribute))
567 (declare (ignore hsx))
568 *not-allowed*)
570 (defmethod close-start-tag\' (hsx pattern)
571 (declare (ignore hsx))
572 pattern)
575 ;;;; END-TAG\'
577 (defgeneric end-tag\' (handler pattern))
579 (defmethod end-tag\' (hsx (pattern choice))
580 (intern-choice hsx
581 (end-tag\' hsx (pattern-a pattern))
582 (end-tag\' hsx (pattern-b pattern))))
584 (defmethod end-tag\' (hsx (pattern after))
585 (if (nullable (pattern-a pattern))
586 (pattern-b pattern)
587 *not-allowed*))
589 (defmethod end-tag\' (hsx pattern)
590 (declare (ignore hsx pattern))
591 *not-allowed*)
594 ;;;; TEXT NORMALIZER
596 ;;; FIXME: cxml should do that
598 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
599 ;;; discard them properly.
601 (defclass text-normalizer (cxml:sax-proxy)
602 ((pending-text-node :initform (make-string-output-stream)
603 :accessor pending-text-node)))
605 (defmethod sax:characters ((handler text-normalizer) data)
606 (write-string data (pending-text-node handler)))
608 (defun flush-pending (handler)
609 (let ((str (get-output-stream-string (pending-text-node handler))))
610 (unless (zerop (length str))
611 (sax:characters (cxml:proxy-chained-handler handler) str))))
613 (defmethod sax:start-element :before
614 ((handler text-normalizer) uri lname qname attributes)
615 (declare (ignore uri lname qname attributes))
616 (flush-pending handler))
618 (defmethod sax:end-element :before
619 ((handler text-normalizer) uri lname qname)
620 (declare (ignore uri lname qname))
621 (flush-pending handler))