3 # Copyright (c) 2005 Jonas Fonseca
6 test_description
='Test incremental parsing of SGML documents.
8 This test checks if the SGML parser correctly recovers during incremental
14 test_incremental_parsing
() {
19 URI
="test:$(normalize "$desc")"
21 echo "#document: $URI" > expected
22 printf "%s\n" "$out" |
sed -n '2,$p' |
sed -e 's/^/ /' >> expected
24 for size
in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do
25 printf "%s" "$src" | sgml-parser
--uri "$URI" --stdin "$size" > output
27 test_run_
'cmp output expected'
28 if [ "$?" != 0 -o "$eval_ret" != 0 ]
30 test_failure_
"$desc" "($size bytes)"
38 test_incremental_parsing \
39 "Parse a small document." \
40 '<html><body><p>Hello World!</p></body></html>' \
47 test_incremental_parsing \
49 '<root><child attr="value" /><child2></><child3 >a</></root>' \
53 attribute: attr -> value
58 test_incremental_parsing \
59 'Parse tag soup elements.' \
60 '<parent attr="value" <child:1></><child:2</>a</parent>' \
63 attribute: attr -> value
68 test_incremental_parsing \
69 'Parse an enclosed comment.' \
70 '<root><!-- Hello World! --></root>' \
73 #comment: Hello World! '
75 test_incremental_parsing \
76 'Parse comment combinations. (I)' \
77 '<root><!-- <!-- -- > --><!--foo--><!----></root>' \
84 test_incremental_parsing \
85 'Parse comment combinations. (II).' \
86 '<! -- comment -->s<!-->-->t<!----->u' \
95 test_incremental_parsing \
96 'Parse bad comment. (I)' \
101 test_incremental_parsing \
102 'Parse bad comment. (II)' \
103 '<!--a--!>bad comment' \
108 test_incremental_parsing \
109 'Parse empty notation.' \
114 test_incremental_parsing \
115 'Parse an enclosed CDATA section.' \
116 '<root><![CDATA[...] ]>...]]></root>' \
119 #cdata-section: ...] ]>...'
121 test_incremental_parsing \
122 'Parse non-enclosed CDATA section.' \
127 test_incremental_parsing \
128 'Parse a bad CDATA section.' \
133 test_incremental_parsing \
134 'Parse attributes.' \
135 '<root lang="fr" attr name="value with &foo; <stuff"></root>' \
138 attribute: lang -> fr
140 attribute: name -> value with &foo; <stuff'
142 test_incremental_parsing \
143 'Parse attributes with garbage.' \
144 "<root a=b c='d' e'f' g= h i = j k =></root>" \
153 test_incremental_parsing \
154 'Parse attribute with non-quoted values.' \
155 '<root color=#abc path=/to/%61-&\one";files/>...' \
158 attribute: color -> #abc
159 attribute: path -> /to/%61-&\one";files
162 test_incremental_parsing \
163 'Parse entity references.' \
166 entity-reference: amp
168 entity-reference: #42'
170 # Just how these should be gracefully handled is not clear to me.
171 test_incremental_parsing \
172 'Parse badly formatted entity references.' \
173 '& m33p;-&.:-copy;-&;-&#;-&#xx;' \
177 entity-reference: .:-copy
183 entity-reference: #xx'
185 test_incremental_parsing \
186 'Parse processing instructions.' \
187 '<?xml encoding="UTF8"?>
193 proc-instruction: xml -> encoding="UTF8"
194 attribute: encoding -> UTF8
196 proc-instruction: ecmascript -> var val=2;\n'
198 test_incremental_parsing \
199 'Parse XML processing instructions.' \
200 '<?xml version="1.0" />?><?xml />-' \
202 proc-instruction: xml -> version="1.0" />
203 attribute: version -> 1.0
204 proc-instruction: xml -> />-'
206 test_incremental_parsing \
207 'Parse XML stylesheet processing instructions.' \
208 '<?xml-stylesheet type="text/xsl" href="url"?>' \
210 proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
211 attribute: type -> text/xsl
212 attribute: href -> url'
214 test_incremental_parsing \
215 'Parse exotic processing instructions.' \
216 '<?xml ?+>+?>-?>-<?js?>-<??>-' \
218 proc-instruction: xml -> ?+>+
220 proc-instruction: js ->
225 test_incremental_parsing \
226 'Parse incorrect processing instructions.' \
227 '<?js<?>-<?<??>-<?xml <=";&?>-<?' \
229 proc-instruction: js -> <
231 proc-instruction: -> <?
233 proc-instruction: xml -> <=";&
236 test_incremental_parsing \
237 'Parse incorrect processing instructions (II).' \
240 proc-instruction: -> ><?'
242 test_incremental_parsing \
243 'Skip spaces not inside text.' \
254 attribute: ns:attr -> value
255 proc-instruction: target -> data'