defekte grammatik
[cxml-rng.git] / compact.lisp
blob02ed2f62bbe342d2a9b2a8d4a9606750571acaef
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
2 ;;;
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 (in-package :cxml-rng)
32 (defparameter *keywords*
33 '("attribute" "default" "datatypes" "div" "element" "empty" "external"
34 "grammar" "include" "inherit" "list" "mixed" "namespace" "notAllowed"
35 "parent" "start" "string" "text" "token"))
37 (clex:deflexer test
39 ;; NCName
40 (letter+extras
41 (or (range #x0041 #x005A) (range #x0061 #x007A)
42 ;; just allow the rest of unicode, because clex can't deal with the
43 ;; complete definition of name-char:
44 (range #x00c0 #xd7ff)
45 (range #xe000 #xfffd)
46 (range #x10000 #x10ffff)))
47 (digit (range #x0030 #x0039)) ;ditto
48 (nc-name-start-char (or letter+extras #\_))
49 (nc-name-char (or letter+extras digit #\. #\- #\_))
51 ;; some RNC ranges
52 (char
53 (or 9 10 13
54 (range 32 #xd7ff)
55 (range #xe000 #xfffd)
56 (range #x10000 #x10ffff)))
57 (comment-char
58 (or 9
59 (range 32 #xd7ff)
60 (range #xe000 #xfffd)
61 (range #x10000 #x10ffff)))
62 (string-char
63 (or 32
64 ;; #\"
65 (range 35 38)
66 ;; #\'
67 (range 40 #xd7ff)
68 (range #xe000 #xfffd)
69 (range #x10000 #x10ffff)))
70 (space (or 9 10 13 32))
71 (newline (or 10 13)))
73 ((* space))
75 (#\# (clex:begin 'comment))
76 ((clex::in comment newline) (clex:begin 'clex:initial))
77 ((clex::in comment comment-char))
79 ((and "'''" (* (or string-char #\")) "'''")
80 (return
81 (values 'literal-segment (subseq clex:bag 3 (- (length clex:bag) 3)))))
83 ((and #\' (* (or string-char #\")) #\')
84 (when (or (find (code-char 13) clex:bag)
85 (find (code-char 10) clex:bag))
86 (rng-error nil "disallowed newline in string literal"))
87 (return
88 (values 'literal-segment (subseq clex:bag 1 (- (length clex:bag) 1)))))
90 ((and #\" #\" #\" (* (or string-char #\")) #\" #\" #\")
91 (return
92 (values 'literal-segment (subseq clex:bag 3 (- (length clex:bag) 3)))))
94 ((and #\" (* (or string-char #\')) #\")
95 (when (or (find (code-char 13) clex:bag)
96 (find (code-char 10) clex:bag))
97 (rng-error nil "disallowed newline in string literal"))
98 (return
99 (values 'literal-segment (subseq clex:bag 1 (- (length clex:bag) 1)))))
101 ((and nc-name-start-char (* nc-name-char))
102 (return
103 (cond
104 ((find clex:bag *keywords* :test #'equal)
105 (let ((sym (intern (string-upcase clex:bag) :keyword)))
106 (values sym sym)))
108 (unless (cxml-types::nc-name-p clex:bag)
109 (rng-error nil "not an ncname: ~A" clex:bag))
110 (values 'identifier clex:bag)))))
112 ((and #\\ nc-name-start-char (* nc-name-char))
113 (let ((str (subseq clex:bag 1)))
114 (unless (cxml-types::nc-name-p str)
115 (rng-error nil "not an ncname: ~A" clex:bag))
116 (return (values 'identifier str))))
118 (#\= (return '=))
119 (#\{ (return '{))
120 (#\} (return '}))
121 (#\, (return '|,|))
122 (#\& (return '&))
123 (#\| (return '|\||))
124 (#\? (return '?))
125 (#\* (return '*))
126 (#\+ (return '+))
127 (#\( (return '|(|))
128 (#\) (return '|)|))
129 (#\| (return '|\|=|))
130 (#\& (return '&=))
131 (#\: (return '|:|))
132 (#\: (return '|:*|))
133 (#\~ (return '~))
134 (#\- (return '-)))
136 (yacc:define-parser *compact-parser*
137 (:start-symbol top-level)
138 (:terminals (:attribute :default :datatypes :div :element :empty
139 :external :grammar :include :inherit :list
140 :mixed :namespace :notAllowed :parent :start
141 :string :text :token
142 = { } |,| & |\|| ? * + |(| |)| |\|=| &= |:| |:*| ~
143 identifier literal-segment))
145 (top-level (decl* pattern)
146 (decl* grammmar-content*))
148 (decl* ()
149 (decl decl*))
151 (decl (:namespace identifier-or-keyword = namespace-uri-literal
152 #'(lambda (a b c d)
153 (declare (ignorable a b c d))
154 (print (list :saw-namespace b d))))
155 (:default :namespace = namespace-uri-literal)
156 (:default :namespace identifier-or-keyword = namespace-uri-literal)
157 (:datatypes identifier-or-keyword = literal))
159 (pattern (:element name-class { pattern })
160 (:attribute name-class { pattern })
161 (pattern \, pattern)
162 (pattern & pattern)
163 (pattern \| pattern)
164 (pattern ?)
165 (pattern *)
166 (pattern +)
167 (:list { pattern })
168 (:mixed { pattern } )
169 identifier
170 (:parent identifier)
171 :empty
172 :text
173 ([data-type-name] data-type-value [params] [except-pattern])
174 :not-allowed
175 (:external any-uri-literal [inherit])
176 (:grammar { grammar-content* })
177 (\( pattern \)))
179 (param (identifier-or-keyword = literal))
181 (except-pattern (- pattern))
183 (grammar-content* ()
184 (grammar-content grammar-content*))
186 (grammar-content (start)
187 (define)
188 (:div { grammar-content* })
189 (:include any-uri-literal [inherit] [include-content]))
191 (include-content* ()
192 (include-content include-content*))
194 (include-content (start)
195 (define)
196 (:div { grammar-content* }))
198 (start (:start assign-method pattern))
200 (define (identifier assign-method pattern))
202 (assign-method (=) (\|=) (&=))
204 (name-class (name)
205 (ns-name [except-name-class])
206 (any-name [except-name-class])
207 (name-class \| name-class)
208 (\( name-class \)))
210 (name (identifier-or-keyword)
211 (cname))
213 (except-name-class (- name-class))
215 (data-type-name (cname)
216 (:string)
217 (:token))
219 (data-type-value literal)
220 (any-uri-literal literal)
222 (namespace-uri-literal literal
223 :inherit)
225 (inherit (:inherit = identifier-or-keyword))
227 (identifier-or-keyword identifier
228 keyword)
230 ;; identifier ::= (ncname - keyword) | quotedidentifier
231 ;; quotedidentifier ::= "\" ncname
233 (cname (ncname \: ncname))
235 (ns-name (ncname \:*))
237 (any-name (*))
239 (literal literal-segment
240 (literal-segment ~ literal))
242 ;; literalsegment ::= ...
244 (keyword :default :datatypes :div :element :empty :external :grammar :include
245 :inherit :list :mixed :namespace :notAllowed :parent :start :string
246 :text :token)
248 ;; optional stuff
249 ([data-type-name] () data-type-name)
250 ([inherit] () inherit)
251 ([params] () ({ params }))
252 (params () (param params))
253 ([except-pattern] () (except-pattern))
254 ([include-content] () ({ include-content* }))
255 ([except-name-class] () except-name-class))
257 (defun compact (&optional (p #p"/home/david/src/lisp/cxml-rng/rng.rnc"))
258 (flet ((doit (s)
259 (let ((lexer (make-test-lexer s)))
260 (yacc:parse-with-lexer
261 (lambda ()
262 (multiple-value-bind (cat sem) (funcall lexer)
263 (if (eq cat :eof)
265 (values cat sem))))
266 *compact-parser*))))
267 (if (pathnamep p)
268 (with-open-file (s p) (doit s))
269 (with-input-from-string (s p) (doit s)))))
271 #+(or)
272 (compact)