openfile(): Go back to opening the files in text mode. This undoes
[python/dscho.git] / Modules / expat / xmlrole.c
blobb9f7f6c414fbef85ffcaafdf665f8c97a4c3d773
1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
6 #ifdef COMPILED_FROM_DSP
7 # include "winconfig.h"
8 #else
9 #ifdef HAVE_CONFIG_H
10 # include <config.h>
11 #endif
12 #endif /* ndef COMPILED_FROM_DSP */
14 #include "xmlrole.h"
15 #include "ascii.h"
17 /* Doesn't check:
19 that ,| are not mixed in a model group
20 content of literals
24 static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
25 static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
26 static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
27 static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
28 static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
29 static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
30 static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
31 static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
32 static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
33 static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
34 static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
35 static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
36 static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
37 static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
38 static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
39 static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
40 static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
41 static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
42 static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
43 static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
44 static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
45 static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
46 static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
48 #ifndef MIN_BYTES_PER_CHAR
49 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
50 #endif
52 #ifdef XML_DTD
53 #define setTopLevel(state) \
54 ((state)->handler = ((state)->documentEntity \
55 ? internalSubset \
56 : externalSubset1))
57 #else /* not XML_DTD */
58 #define setTopLevel(state) ((state)->handler = internalSubset)
59 #endif /* not XML_DTD */
61 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
62 int tok,
63 const char *ptr,
64 const char *end,
65 const ENCODING *enc);
67 static PROLOG_HANDLER
68 prolog0, prolog1, prolog2,
69 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
70 internalSubset,
71 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
72 entity7, entity8, entity9,
73 notation0, notation1, notation2, notation3, notation4,
74 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
75 attlist7, attlist8, attlist9,
76 element0, element1, element2, element3, element4, element5, element6,
77 element7,
78 #ifdef XML_DTD
79 externalSubset0, externalSubset1,
80 condSect0, condSect1, condSect2,
81 #endif /* XML_DTD */
82 declClose,
83 error;
85 static
86 int common(PROLOG_STATE *state, int tok);
88 static
89 int prolog0(PROLOG_STATE *state,
90 int tok,
91 const char *ptr,
92 const char *end,
93 const ENCODING *enc)
95 switch (tok) {
96 case XML_TOK_PROLOG_S:
97 state->handler = prolog1;
98 return XML_ROLE_NONE;
99 case XML_TOK_XML_DECL:
100 state->handler = prolog1;
101 return XML_ROLE_XML_DECL;
102 case XML_TOK_PI:
103 state->handler = prolog1;
104 return XML_ROLE_NONE;
105 case XML_TOK_COMMENT:
106 state->handler = prolog1;
107 case XML_TOK_BOM:
108 return XML_ROLE_NONE;
109 case XML_TOK_DECL_OPEN:
110 if (!XmlNameMatchesAscii(enc,
111 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
112 end,
113 KW_DOCTYPE))
114 break;
115 state->handler = doctype0;
116 return XML_ROLE_NONE;
117 case XML_TOK_INSTANCE_START:
118 state->handler = error;
119 return XML_ROLE_INSTANCE_START;
121 return common(state, tok);
124 static
125 int prolog1(PROLOG_STATE *state,
126 int tok,
127 const char *ptr,
128 const char *end,
129 const ENCODING *enc)
131 switch (tok) {
132 case XML_TOK_PROLOG_S:
133 return XML_ROLE_NONE;
134 case XML_TOK_PI:
135 case XML_TOK_COMMENT:
136 case XML_TOK_BOM:
137 return XML_ROLE_NONE;
138 case XML_TOK_DECL_OPEN:
139 if (!XmlNameMatchesAscii(enc,
140 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
141 end,
142 KW_DOCTYPE))
143 break;
144 state->handler = doctype0;
145 return XML_ROLE_NONE;
146 case XML_TOK_INSTANCE_START:
147 state->handler = error;
148 return XML_ROLE_INSTANCE_START;
150 return common(state, tok);
153 static
154 int prolog2(PROLOG_STATE *state,
155 int tok,
156 const char *ptr,
157 const char *end,
158 const ENCODING *enc)
160 switch (tok) {
161 case XML_TOK_PROLOG_S:
162 return XML_ROLE_NONE;
163 case XML_TOK_PI:
164 case XML_TOK_COMMENT:
165 return XML_ROLE_NONE;
166 case XML_TOK_INSTANCE_START:
167 state->handler = error;
168 return XML_ROLE_INSTANCE_START;
170 return common(state, tok);
173 static
174 int doctype0(PROLOG_STATE *state,
175 int tok,
176 const char *ptr,
177 const char *end,
178 const ENCODING *enc)
180 switch (tok) {
181 case XML_TOK_PROLOG_S:
182 return XML_ROLE_NONE;
183 case XML_TOK_NAME:
184 case XML_TOK_PREFIXED_NAME:
185 state->handler = doctype1;
186 return XML_ROLE_DOCTYPE_NAME;
188 return common(state, tok);
191 static
192 int doctype1(PROLOG_STATE *state,
193 int tok,
194 const char *ptr,
195 const char *end,
196 const ENCODING *enc)
198 switch (tok) {
199 case XML_TOK_PROLOG_S:
200 return XML_ROLE_NONE;
201 case XML_TOK_OPEN_BRACKET:
202 state->handler = internalSubset;
203 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
204 case XML_TOK_DECL_CLOSE:
205 state->handler = prolog2;
206 return XML_ROLE_DOCTYPE_CLOSE;
207 case XML_TOK_NAME:
208 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
209 state->handler = doctype3;
210 return XML_ROLE_NONE;
212 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
213 state->handler = doctype2;
214 return XML_ROLE_NONE;
216 break;
218 return common(state, tok);
221 static
222 int doctype2(PROLOG_STATE *state,
223 int tok,
224 const char *ptr,
225 const char *end,
226 const ENCODING *enc)
228 switch (tok) {
229 case XML_TOK_PROLOG_S:
230 return XML_ROLE_NONE;
231 case XML_TOK_LITERAL:
232 state->handler = doctype3;
233 return XML_ROLE_DOCTYPE_PUBLIC_ID;
235 return common(state, tok);
238 static
239 int doctype3(PROLOG_STATE *state,
240 int tok,
241 const char *ptr,
242 const char *end,
243 const ENCODING *enc)
245 switch (tok) {
246 case XML_TOK_PROLOG_S:
247 return XML_ROLE_NONE;
248 case XML_TOK_LITERAL:
249 state->handler = doctype4;
250 return XML_ROLE_DOCTYPE_SYSTEM_ID;
252 return common(state, tok);
255 static
256 int doctype4(PROLOG_STATE *state,
257 int tok,
258 const char *ptr,
259 const char *end,
260 const ENCODING *enc)
262 switch (tok) {
263 case XML_TOK_PROLOG_S:
264 return XML_ROLE_NONE;
265 case XML_TOK_OPEN_BRACKET:
266 state->handler = internalSubset;
267 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
268 case XML_TOK_DECL_CLOSE:
269 state->handler = prolog2;
270 return XML_ROLE_DOCTYPE_CLOSE;
272 return common(state, tok);
275 static
276 int doctype5(PROLOG_STATE *state,
277 int tok,
278 const char *ptr,
279 const char *end,
280 const ENCODING *enc)
282 switch (tok) {
283 case XML_TOK_PROLOG_S:
284 return XML_ROLE_NONE;
285 case XML_TOK_DECL_CLOSE:
286 state->handler = prolog2;
287 return XML_ROLE_DOCTYPE_CLOSE;
289 return common(state, tok);
292 static
293 int internalSubset(PROLOG_STATE *state,
294 int tok,
295 const char *ptr,
296 const char *end,
297 const ENCODING *enc)
299 switch (tok) {
300 case XML_TOK_PROLOG_S:
301 return XML_ROLE_NONE;
302 case XML_TOK_DECL_OPEN:
303 if (XmlNameMatchesAscii(enc,
304 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
305 end,
306 KW_ENTITY)) {
307 state->handler = entity0;
308 return XML_ROLE_NONE;
310 if (XmlNameMatchesAscii(enc,
311 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
312 end,
313 KW_ATTLIST)) {
314 state->handler = attlist0;
315 return XML_ROLE_NONE;
317 if (XmlNameMatchesAscii(enc,
318 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
319 end,
320 KW_ELEMENT)) {
321 state->handler = element0;
322 return XML_ROLE_NONE;
324 if (XmlNameMatchesAscii(enc,
325 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
326 end,
327 KW_NOTATION)) {
328 state->handler = notation0;
329 return XML_ROLE_NONE;
331 break;
332 case XML_TOK_PI:
333 case XML_TOK_COMMENT:
334 return XML_ROLE_NONE;
335 case XML_TOK_PARAM_ENTITY_REF:
336 return XML_ROLE_PARAM_ENTITY_REF;
337 case XML_TOK_CLOSE_BRACKET:
338 state->handler = doctype5;
339 return XML_ROLE_NONE;
341 return common(state, tok);
344 #ifdef XML_DTD
346 static
347 int externalSubset0(PROLOG_STATE *state,
348 int tok,
349 const char *ptr,
350 const char *end,
351 const ENCODING *enc)
353 state->handler = externalSubset1;
354 if (tok == XML_TOK_XML_DECL)
355 return XML_ROLE_TEXT_DECL;
356 return externalSubset1(state, tok, ptr, end, enc);
359 static
360 int externalSubset1(PROLOG_STATE *state,
361 int tok,
362 const char *ptr,
363 const char *end,
364 const ENCODING *enc)
366 switch (tok) {
367 case XML_TOK_COND_SECT_OPEN:
368 state->handler = condSect0;
369 return XML_ROLE_NONE;
370 case XML_TOK_COND_SECT_CLOSE:
371 if (state->includeLevel == 0)
372 break;
373 state->includeLevel -= 1;
374 return XML_ROLE_NONE;
375 case XML_TOK_PROLOG_S:
376 return XML_ROLE_NONE;
377 case XML_TOK_CLOSE_BRACKET:
378 break;
379 case XML_TOK_NONE:
380 if (state->includeLevel)
381 break;
382 return XML_ROLE_NONE;
383 default:
384 return internalSubset(state, tok, ptr, end, enc);
386 return common(state, tok);
389 #endif /* XML_DTD */
391 static
392 int entity0(PROLOG_STATE *state,
393 int tok,
394 const char *ptr,
395 const char *end,
396 const ENCODING *enc)
398 switch (tok) {
399 case XML_TOK_PROLOG_S:
400 return XML_ROLE_NONE;
401 case XML_TOK_PERCENT:
402 state->handler = entity1;
403 return XML_ROLE_NONE;
404 case XML_TOK_NAME:
405 state->handler = entity2;
406 return XML_ROLE_GENERAL_ENTITY_NAME;
408 return common(state, tok);
411 static
412 int entity1(PROLOG_STATE *state,
413 int tok,
414 const char *ptr,
415 const char *end,
416 const ENCODING *enc)
418 switch (tok) {
419 case XML_TOK_PROLOG_S:
420 return XML_ROLE_NONE;
421 case XML_TOK_NAME:
422 state->handler = entity7;
423 return XML_ROLE_PARAM_ENTITY_NAME;
425 return common(state, tok);
428 static
429 int entity2(PROLOG_STATE *state,
430 int tok,
431 const char *ptr,
432 const char *end,
433 const ENCODING *enc)
435 switch (tok) {
436 case XML_TOK_PROLOG_S:
437 return XML_ROLE_NONE;
438 case XML_TOK_NAME:
439 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
440 state->handler = entity4;
441 return XML_ROLE_NONE;
443 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
444 state->handler = entity3;
445 return XML_ROLE_NONE;
447 break;
448 case XML_TOK_LITERAL:
449 state->handler = declClose;
450 return XML_ROLE_ENTITY_VALUE;
452 return common(state, tok);
455 static
456 int entity3(PROLOG_STATE *state,
457 int tok,
458 const char *ptr,
459 const char *end,
460 const ENCODING *enc)
462 switch (tok) {
463 case XML_TOK_PROLOG_S:
464 return XML_ROLE_NONE;
465 case XML_TOK_LITERAL:
466 state->handler = entity4;
467 return XML_ROLE_ENTITY_PUBLIC_ID;
469 return common(state, tok);
473 static
474 int entity4(PROLOG_STATE *state,
475 int tok,
476 const char *ptr,
477 const char *end,
478 const ENCODING *enc)
480 switch (tok) {
481 case XML_TOK_PROLOG_S:
482 return XML_ROLE_NONE;
483 case XML_TOK_LITERAL:
484 state->handler = entity5;
485 return XML_ROLE_ENTITY_SYSTEM_ID;
487 return common(state, tok);
490 static
491 int entity5(PROLOG_STATE *state,
492 int tok,
493 const char *ptr,
494 const char *end,
495 const ENCODING *enc)
497 switch (tok) {
498 case XML_TOK_PROLOG_S:
499 return XML_ROLE_NONE;
500 case XML_TOK_DECL_CLOSE:
501 setTopLevel(state);
502 return XML_ROLE_ENTITY_COMPLETE;
503 case XML_TOK_NAME:
504 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
505 state->handler = entity6;
506 return XML_ROLE_NONE;
508 break;
510 return common(state, tok);
513 static
514 int entity6(PROLOG_STATE *state,
515 int tok,
516 const char *ptr,
517 const char *end,
518 const ENCODING *enc)
520 switch (tok) {
521 case XML_TOK_PROLOG_S:
522 return XML_ROLE_NONE;
523 case XML_TOK_NAME:
524 state->handler = declClose;
525 return XML_ROLE_ENTITY_NOTATION_NAME;
527 return common(state, tok);
530 static
531 int entity7(PROLOG_STATE *state,
532 int tok,
533 const char *ptr,
534 const char *end,
535 const ENCODING *enc)
537 switch (tok) {
538 case XML_TOK_PROLOG_S:
539 return XML_ROLE_NONE;
540 case XML_TOK_NAME:
541 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
542 state->handler = entity9;
543 return XML_ROLE_NONE;
545 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
546 state->handler = entity8;
547 return XML_ROLE_NONE;
549 break;
550 case XML_TOK_LITERAL:
551 state->handler = declClose;
552 return XML_ROLE_ENTITY_VALUE;
554 return common(state, tok);
557 static
558 int entity8(PROLOG_STATE *state,
559 int tok,
560 const char *ptr,
561 const char *end,
562 const ENCODING *enc)
564 switch (tok) {
565 case XML_TOK_PROLOG_S:
566 return XML_ROLE_NONE;
567 case XML_TOK_LITERAL:
568 state->handler = entity9;
569 return XML_ROLE_ENTITY_PUBLIC_ID;
571 return common(state, tok);
574 static
575 int entity9(PROLOG_STATE *state,
576 int tok,
577 const char *ptr,
578 const char *end,
579 const ENCODING *enc)
581 switch (tok) {
582 case XML_TOK_PROLOG_S:
583 return XML_ROLE_NONE;
584 case XML_TOK_LITERAL:
585 state->handler = declClose;
586 return XML_ROLE_ENTITY_SYSTEM_ID;
588 return common(state, tok);
591 static
592 int notation0(PROLOG_STATE *state,
593 int tok,
594 const char *ptr,
595 const char *end,
596 const ENCODING *enc)
598 switch (tok) {
599 case XML_TOK_PROLOG_S:
600 return XML_ROLE_NONE;
601 case XML_TOK_NAME:
602 state->handler = notation1;
603 return XML_ROLE_NOTATION_NAME;
605 return common(state, tok);
608 static
609 int notation1(PROLOG_STATE *state,
610 int tok,
611 const char *ptr,
612 const char *end,
613 const ENCODING *enc)
615 switch (tok) {
616 case XML_TOK_PROLOG_S:
617 return XML_ROLE_NONE;
618 case XML_TOK_NAME:
619 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
620 state->handler = notation3;
621 return XML_ROLE_NONE;
623 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
624 state->handler = notation2;
625 return XML_ROLE_NONE;
627 break;
629 return common(state, tok);
632 static
633 int notation2(PROLOG_STATE *state,
634 int tok,
635 const char *ptr,
636 const char *end,
637 const ENCODING *enc)
639 switch (tok) {
640 case XML_TOK_PROLOG_S:
641 return XML_ROLE_NONE;
642 case XML_TOK_LITERAL:
643 state->handler = notation4;
644 return XML_ROLE_NOTATION_PUBLIC_ID;
646 return common(state, tok);
649 static
650 int notation3(PROLOG_STATE *state,
651 int tok,
652 const char *ptr,
653 const char *end,
654 const ENCODING *enc)
656 switch (tok) {
657 case XML_TOK_PROLOG_S:
658 return XML_ROLE_NONE;
659 case XML_TOK_LITERAL:
660 state->handler = declClose;
661 return XML_ROLE_NOTATION_SYSTEM_ID;
663 return common(state, tok);
666 static
667 int notation4(PROLOG_STATE *state,
668 int tok,
669 const char *ptr,
670 const char *end,
671 const ENCODING *enc)
673 switch (tok) {
674 case XML_TOK_PROLOG_S:
675 return XML_ROLE_NONE;
676 case XML_TOK_LITERAL:
677 state->handler = declClose;
678 return XML_ROLE_NOTATION_SYSTEM_ID;
679 case XML_TOK_DECL_CLOSE:
680 setTopLevel(state);
681 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
683 return common(state, tok);
686 static
687 int attlist0(PROLOG_STATE *state,
688 int tok,
689 const char *ptr,
690 const char *end,
691 const ENCODING *enc)
693 switch (tok) {
694 case XML_TOK_PROLOG_S:
695 return XML_ROLE_NONE;
696 case XML_TOK_NAME:
697 case XML_TOK_PREFIXED_NAME:
698 state->handler = attlist1;
699 return XML_ROLE_ATTLIST_ELEMENT_NAME;
701 return common(state, tok);
704 static
705 int attlist1(PROLOG_STATE *state,
706 int tok,
707 const char *ptr,
708 const char *end,
709 const ENCODING *enc)
711 switch (tok) {
712 case XML_TOK_PROLOG_S:
713 return XML_ROLE_NONE;
714 case XML_TOK_DECL_CLOSE:
715 setTopLevel(state);
716 return XML_ROLE_NONE;
717 case XML_TOK_NAME:
718 case XML_TOK_PREFIXED_NAME:
719 state->handler = attlist2;
720 return XML_ROLE_ATTRIBUTE_NAME;
722 return common(state, tok);
725 static
726 int attlist2(PROLOG_STATE *state,
727 int tok,
728 const char *ptr,
729 const char *end,
730 const ENCODING *enc)
732 switch (tok) {
733 case XML_TOK_PROLOG_S:
734 return XML_ROLE_NONE;
735 case XML_TOK_NAME:
737 static const char *types[] = {
738 KW_CDATA,
739 KW_ID,
740 KW_IDREF,
741 KW_IDREFS,
742 KW_ENTITY,
743 KW_ENTITIES,
744 KW_NMTOKEN,
745 KW_NMTOKENS,
747 int i;
748 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
749 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
750 state->handler = attlist8;
751 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
754 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
755 state->handler = attlist5;
756 return XML_ROLE_NONE;
758 break;
759 case XML_TOK_OPEN_PAREN:
760 state->handler = attlist3;
761 return XML_ROLE_NONE;
763 return common(state, tok);
766 static
767 int attlist3(PROLOG_STATE *state,
768 int tok,
769 const char *ptr,
770 const char *end,
771 const ENCODING *enc)
773 switch (tok) {
774 case XML_TOK_PROLOG_S:
775 return XML_ROLE_NONE;
776 case XML_TOK_NMTOKEN:
777 case XML_TOK_NAME:
778 case XML_TOK_PREFIXED_NAME:
779 state->handler = attlist4;
780 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
782 return common(state, tok);
785 static
786 int attlist4(PROLOG_STATE *state,
787 int tok,
788 const char *ptr,
789 const char *end,
790 const ENCODING *enc)
792 switch (tok) {
793 case XML_TOK_PROLOG_S:
794 return XML_ROLE_NONE;
795 case XML_TOK_CLOSE_PAREN:
796 state->handler = attlist8;
797 return XML_ROLE_NONE;
798 case XML_TOK_OR:
799 state->handler = attlist3;
800 return XML_ROLE_NONE;
802 return common(state, tok);
805 static
806 int attlist5(PROLOG_STATE *state,
807 int tok,
808 const char *ptr,
809 const char *end,
810 const ENCODING *enc)
812 switch (tok) {
813 case XML_TOK_PROLOG_S:
814 return XML_ROLE_NONE;
815 case XML_TOK_OPEN_PAREN:
816 state->handler = attlist6;
817 return XML_ROLE_NONE;
819 return common(state, tok);
823 static
824 int attlist6(PROLOG_STATE *state,
825 int tok,
826 const char *ptr,
827 const char *end,
828 const ENCODING *enc)
830 switch (tok) {
831 case XML_TOK_PROLOG_S:
832 return XML_ROLE_NONE;
833 case XML_TOK_NAME:
834 state->handler = attlist7;
835 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
837 return common(state, tok);
840 static
841 int attlist7(PROLOG_STATE *state,
842 int tok,
843 const char *ptr,
844 const char *end,
845 const ENCODING *enc)
847 switch (tok) {
848 case XML_TOK_PROLOG_S:
849 return XML_ROLE_NONE;
850 case XML_TOK_CLOSE_PAREN:
851 state->handler = attlist8;
852 return XML_ROLE_NONE;
853 case XML_TOK_OR:
854 state->handler = attlist6;
855 return XML_ROLE_NONE;
857 return common(state, tok);
860 /* default value */
861 static
862 int attlist8(PROLOG_STATE *state,
863 int tok,
864 const char *ptr,
865 const char *end,
866 const ENCODING *enc)
868 switch (tok) {
869 case XML_TOK_PROLOG_S:
870 return XML_ROLE_NONE;
871 case XML_TOK_POUND_NAME:
872 if (XmlNameMatchesAscii(enc,
873 ptr + MIN_BYTES_PER_CHAR(enc),
874 end,
875 KW_IMPLIED)) {
876 state->handler = attlist1;
877 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
879 if (XmlNameMatchesAscii(enc,
880 ptr + MIN_BYTES_PER_CHAR(enc),
881 end,
882 KW_REQUIRED)) {
883 state->handler = attlist1;
884 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
886 if (XmlNameMatchesAscii(enc,
887 ptr + MIN_BYTES_PER_CHAR(enc),
888 end,
889 KW_FIXED)) {
890 state->handler = attlist9;
891 return XML_ROLE_NONE;
893 break;
894 case XML_TOK_LITERAL:
895 state->handler = attlist1;
896 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
898 return common(state, tok);
901 static
902 int attlist9(PROLOG_STATE *state,
903 int tok,
904 const char *ptr,
905 const char *end,
906 const ENCODING *enc)
908 switch (tok) {
909 case XML_TOK_PROLOG_S:
910 return XML_ROLE_NONE;
911 case XML_TOK_LITERAL:
912 state->handler = attlist1;
913 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
915 return common(state, tok);
918 static
919 int element0(PROLOG_STATE *state,
920 int tok,
921 const char *ptr,
922 const char *end,
923 const ENCODING *enc)
925 switch (tok) {
926 case XML_TOK_PROLOG_S:
927 return XML_ROLE_NONE;
928 case XML_TOK_NAME:
929 case XML_TOK_PREFIXED_NAME:
930 state->handler = element1;
931 return XML_ROLE_ELEMENT_NAME;
933 return common(state, tok);
936 static
937 int element1(PROLOG_STATE *state,
938 int tok,
939 const char *ptr,
940 const char *end,
941 const ENCODING *enc)
943 switch (tok) {
944 case XML_TOK_PROLOG_S:
945 return XML_ROLE_NONE;
946 case XML_TOK_NAME:
947 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
948 state->handler = declClose;
949 return XML_ROLE_CONTENT_EMPTY;
951 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
952 state->handler = declClose;
953 return XML_ROLE_CONTENT_ANY;
955 break;
956 case XML_TOK_OPEN_PAREN:
957 state->handler = element2;
958 state->level = 1;
959 return XML_ROLE_GROUP_OPEN;
961 return common(state, tok);
964 static
965 int element2(PROLOG_STATE *state,
966 int tok,
967 const char *ptr,
968 const char *end,
969 const ENCODING *enc)
971 switch (tok) {
972 case XML_TOK_PROLOG_S:
973 return XML_ROLE_NONE;
974 case XML_TOK_POUND_NAME:
975 if (XmlNameMatchesAscii(enc,
976 ptr + MIN_BYTES_PER_CHAR(enc),
977 end,
978 KW_PCDATA)) {
979 state->handler = element3;
980 return XML_ROLE_CONTENT_PCDATA;
982 break;
983 case XML_TOK_OPEN_PAREN:
984 state->level = 2;
985 state->handler = element6;
986 return XML_ROLE_GROUP_OPEN;
987 case XML_TOK_NAME:
988 case XML_TOK_PREFIXED_NAME:
989 state->handler = element7;
990 return XML_ROLE_CONTENT_ELEMENT;
991 case XML_TOK_NAME_QUESTION:
992 state->handler = element7;
993 return XML_ROLE_CONTENT_ELEMENT_OPT;
994 case XML_TOK_NAME_ASTERISK:
995 state->handler = element7;
996 return XML_ROLE_CONTENT_ELEMENT_REP;
997 case XML_TOK_NAME_PLUS:
998 state->handler = element7;
999 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1001 return common(state, tok);
1004 static
1005 int element3(PROLOG_STATE *state,
1006 int tok,
1007 const char *ptr,
1008 const char *end,
1009 const ENCODING *enc)
1011 switch (tok) {
1012 case XML_TOK_PROLOG_S:
1013 return XML_ROLE_NONE;
1014 case XML_TOK_CLOSE_PAREN:
1015 state->handler = declClose;
1016 return XML_ROLE_GROUP_CLOSE;
1017 case XML_TOK_CLOSE_PAREN_ASTERISK:
1018 state->handler = declClose;
1019 return XML_ROLE_GROUP_CLOSE_REP;
1020 case XML_TOK_OR:
1021 state->handler = element4;
1022 return XML_ROLE_NONE;
1024 return common(state, tok);
1027 static
1028 int element4(PROLOG_STATE *state,
1029 int tok,
1030 const char *ptr,
1031 const char *end,
1032 const ENCODING *enc)
1034 switch (tok) {
1035 case XML_TOK_PROLOG_S:
1036 return XML_ROLE_NONE;
1037 case XML_TOK_NAME:
1038 case XML_TOK_PREFIXED_NAME:
1039 state->handler = element5;
1040 return XML_ROLE_CONTENT_ELEMENT;
1042 return common(state, tok);
1045 static
1046 int element5(PROLOG_STATE *state,
1047 int tok,
1048 const char *ptr,
1049 const char *end,
1050 const ENCODING *enc)
1052 switch (tok) {
1053 case XML_TOK_PROLOG_S:
1054 return XML_ROLE_NONE;
1055 case XML_TOK_CLOSE_PAREN_ASTERISK:
1056 state->handler = declClose;
1057 return XML_ROLE_GROUP_CLOSE_REP;
1058 case XML_TOK_OR:
1059 state->handler = element4;
1060 return XML_ROLE_NONE;
1062 return common(state, tok);
1065 static
1066 int element6(PROLOG_STATE *state,
1067 int tok,
1068 const char *ptr,
1069 const char *end,
1070 const ENCODING *enc)
1072 switch (tok) {
1073 case XML_TOK_PROLOG_S:
1074 return XML_ROLE_NONE;
1075 case XML_TOK_OPEN_PAREN:
1076 state->level += 1;
1077 return XML_ROLE_GROUP_OPEN;
1078 case XML_TOK_NAME:
1079 case XML_TOK_PREFIXED_NAME:
1080 state->handler = element7;
1081 return XML_ROLE_CONTENT_ELEMENT;
1082 case XML_TOK_NAME_QUESTION:
1083 state->handler = element7;
1084 return XML_ROLE_CONTENT_ELEMENT_OPT;
1085 case XML_TOK_NAME_ASTERISK:
1086 state->handler = element7;
1087 return XML_ROLE_CONTENT_ELEMENT_REP;
1088 case XML_TOK_NAME_PLUS:
1089 state->handler = element7;
1090 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1092 return common(state, tok);
1095 static
1096 int element7(PROLOG_STATE *state,
1097 int tok,
1098 const char *ptr,
1099 const char *end,
1100 const ENCODING *enc)
1102 switch (tok) {
1103 case XML_TOK_PROLOG_S:
1104 return XML_ROLE_NONE;
1105 case XML_TOK_CLOSE_PAREN:
1106 state->level -= 1;
1107 if (state->level == 0)
1108 state->handler = declClose;
1109 return XML_ROLE_GROUP_CLOSE;
1110 case XML_TOK_CLOSE_PAREN_ASTERISK:
1111 state->level -= 1;
1112 if (state->level == 0)
1113 state->handler = declClose;
1114 return XML_ROLE_GROUP_CLOSE_REP;
1115 case XML_TOK_CLOSE_PAREN_QUESTION:
1116 state->level -= 1;
1117 if (state->level == 0)
1118 state->handler = declClose;
1119 return XML_ROLE_GROUP_CLOSE_OPT;
1120 case XML_TOK_CLOSE_PAREN_PLUS:
1121 state->level -= 1;
1122 if (state->level == 0)
1123 state->handler = declClose;
1124 return XML_ROLE_GROUP_CLOSE_PLUS;
1125 case XML_TOK_COMMA:
1126 state->handler = element6;
1127 return XML_ROLE_GROUP_SEQUENCE;
1128 case XML_TOK_OR:
1129 state->handler = element6;
1130 return XML_ROLE_GROUP_CHOICE;
1132 return common(state, tok);
1135 #ifdef XML_DTD
1137 static
1138 int condSect0(PROLOG_STATE *state,
1139 int tok,
1140 const char *ptr,
1141 const char *end,
1142 const ENCODING *enc)
1144 switch (tok) {
1145 case XML_TOK_PROLOG_S:
1146 return XML_ROLE_NONE;
1147 case XML_TOK_NAME:
1148 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1149 state->handler = condSect1;
1150 return XML_ROLE_NONE;
1152 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1153 state->handler = condSect2;
1154 return XML_ROLE_NONE;
1156 break;
1158 return common(state, tok);
1161 static
1162 int condSect1(PROLOG_STATE *state,
1163 int tok,
1164 const char *ptr,
1165 const char *end,
1166 const ENCODING *enc)
1168 switch (tok) {
1169 case XML_TOK_PROLOG_S:
1170 return XML_ROLE_NONE;
1171 case XML_TOK_OPEN_BRACKET:
1172 state->handler = externalSubset1;
1173 state->includeLevel += 1;
1174 return XML_ROLE_NONE;
1176 return common(state, tok);
1179 static
1180 int condSect2(PROLOG_STATE *state,
1181 int tok,
1182 const char *ptr,
1183 const char *end,
1184 const ENCODING *enc)
1186 switch (tok) {
1187 case XML_TOK_PROLOG_S:
1188 return XML_ROLE_NONE;
1189 case XML_TOK_OPEN_BRACKET:
1190 state->handler = externalSubset1;
1191 return XML_ROLE_IGNORE_SECT;
1193 return common(state, tok);
1196 #endif /* XML_DTD */
1198 static
1199 int declClose(PROLOG_STATE *state,
1200 int tok,
1201 const char *ptr,
1202 const char *end,
1203 const ENCODING *enc)
1205 switch (tok) {
1206 case XML_TOK_PROLOG_S:
1207 return XML_ROLE_NONE;
1208 case XML_TOK_DECL_CLOSE:
1209 setTopLevel(state);
1210 return XML_ROLE_NONE;
1212 return common(state, tok);
1215 #if 0
1217 static
1218 int ignore(PROLOG_STATE *state,
1219 int tok,
1220 const char *ptr,
1221 const char *end,
1222 const ENCODING *enc)
1224 switch (tok) {
1225 case XML_TOK_DECL_CLOSE:
1226 state->handler = internalSubset;
1227 return 0;
1228 default:
1229 return XML_ROLE_NONE;
1231 return common(state, tok);
1233 #endif
1235 static
1236 int error(PROLOG_STATE *state,
1237 int tok,
1238 const char *ptr,
1239 const char *end,
1240 const ENCODING *enc)
1242 return XML_ROLE_NONE;
1245 static
1246 int common(PROLOG_STATE *state, int tok)
1248 #ifdef XML_DTD
1249 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1250 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1251 #endif
1252 state->handler = error;
1253 return XML_ROLE_ERROR;
1256 void XmlPrologStateInit(PROLOG_STATE *state)
1258 state->handler = prolog0;
1259 #ifdef XML_DTD
1260 state->documentEntity = 1;
1261 state->includeLevel = 0;
1262 #endif /* XML_DTD */
1265 #ifdef XML_DTD
1267 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1269 state->handler = externalSubset0;
1270 state->documentEntity = 0;
1271 state->includeLevel = 0;
1274 #endif /* XML_DTD */