Fix for bug#9442, ctest crash if CTEST_SOURCE_DIRECTORY was not set.
[cmake.git] / Utilities / cmexpat / xmltok_impl.c
blob6e0be95515eda65916fb68adbf9d8994f7e0073f
1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
6 #ifndef IS_INVALID_CHAR
7 #define IS_INVALID_CHAR(enc, ptr, n) (0)
8 #endif
10 #ifndef INVALID_LEAD_CASE
11 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
12 case BT_LEAD ## n: \
13 if (end - ptr < n) \
14 return XML_TOK_PARTIAL_CHAR; \
15 if (IS_INVALID_CHAR(enc, ptr, n)) { \
16 *(nextTokPtr) = (ptr); \
17 return XML_TOK_INVALID; \
18 } \
19 ptr += n; \
20 break;
21 #endif
23 #define INVALID_CASES(ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
27 case BT_NONXML: \
28 case BT_MALFORM: \
29 case BT_TRAIL: \
30 *(nextTokPtr) = (ptr); \
31 return XML_TOK_INVALID;
33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34 case BT_LEAD ## n: \
35 if (end - ptr < n) \
36 return XML_TOK_PARTIAL_CHAR; \
37 if (!IS_NAME_CHAR(enc, ptr, n)) { \
38 *nextTokPtr = ptr; \
39 return XML_TOK_INVALID; \
40 } \
41 ptr += n; \
42 break;
44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
45 case BT_NONASCII: \
46 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
47 *nextTokPtr = ptr; \
48 return XML_TOK_INVALID; \
49 } \
50 case BT_NMSTRT: \
51 case BT_HEX: \
52 case BT_DIGIT: \
53 case BT_NAME: \
54 case BT_MINUS: \
55 ptr += MINBPC(enc); \
56 break; \
57 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62 case BT_LEAD ## n: \
63 if (end - ptr < n) \
64 return XML_TOK_PARTIAL_CHAR; \
65 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
66 *nextTokPtr = ptr; \
67 return XML_TOK_INVALID; \
68 } \
69 ptr += n; \
70 break;
72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
73 case BT_NONASCII: \
74 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
75 *nextTokPtr = ptr; \
76 return XML_TOK_INVALID; \
77 } \
78 case BT_NMSTRT: \
79 case BT_HEX: \
80 ptr += MINBPC(enc); \
81 break; \
82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
86 #ifndef PREFIX
87 #define PREFIX(ident) ident
88 #endif
90 /* ptr points to character following "<!-" */
92 static
93 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
94 const char **nextTokPtr)
96 if (ptr != end) {
97 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98 *nextTokPtr = ptr;
99 return XML_TOK_INVALID;
101 ptr += MINBPC(enc);
102 while (ptr != end) {
103 switch (BYTE_TYPE(enc, ptr)) {
104 INVALID_CASES(ptr, nextTokPtr)
105 case BT_MINUS:
106 if ((ptr += MINBPC(enc)) == end)
107 return XML_TOK_PARTIAL;
108 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109 if ((ptr += MINBPC(enc)) == end)
110 return XML_TOK_PARTIAL;
111 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112 *nextTokPtr = ptr;
113 return XML_TOK_INVALID;
115 *nextTokPtr = ptr + MINBPC(enc);
116 return XML_TOK_COMMENT;
118 break;
119 default:
120 ptr += MINBPC(enc);
121 break;
125 return XML_TOK_PARTIAL;
128 /* ptr points to character following "<!" */
130 static
131 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
132 const char **nextTokPtr)
134 if (ptr == end)
135 return XML_TOK_PARTIAL;
136 switch (BYTE_TYPE(enc, ptr)) {
137 case BT_MINUS:
138 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139 case BT_LSQB:
140 *nextTokPtr = ptr + MINBPC(enc);
141 return XML_TOK_COND_SECT_OPEN;
142 case BT_NMSTRT:
143 case BT_HEX:
144 ptr += MINBPC(enc);
145 break;
146 default:
147 *nextTokPtr = ptr;
148 return XML_TOK_INVALID;
150 while (ptr != end) {
151 switch (BYTE_TYPE(enc, ptr)) {
152 case BT_PERCNT:
153 if (ptr + MINBPC(enc) == end)
154 return XML_TOK_PARTIAL;
155 /* don't allow <!ENTITY% foo "whatever"> */
156 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158 *nextTokPtr = ptr;
159 return XML_TOK_INVALID;
161 /* fall through */
162 case BT_S: case BT_CR: case BT_LF:
163 *nextTokPtr = ptr;
164 return XML_TOK_DECL_OPEN;
165 case BT_NMSTRT:
166 case BT_HEX:
167 ptr += MINBPC(enc);
168 break;
169 default:
170 *nextTokPtr = ptr;
171 return XML_TOK_INVALID;
174 return XML_TOK_PARTIAL;
177 static
178 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
180 int upper = 0;
181 cmExpatUnused(enc);
182 *tokPtr = XML_TOK_PI;
183 if (end - ptr != MINBPC(enc)*3)
184 return 1;
185 switch (BYTE_TO_ASCII(enc, ptr)) {
186 case ASCII_x:
187 break;
188 case ASCII_X:
189 upper = 1;
190 break;
191 default:
192 return 1;
194 ptr += MINBPC(enc);
195 switch (BYTE_TO_ASCII(enc, ptr)) {
196 case ASCII_m:
197 break;
198 case ASCII_M:
199 upper = 1;
200 break;
201 default:
202 return 1;
204 ptr += MINBPC(enc);
205 switch (BYTE_TO_ASCII(enc, ptr)) {
206 case ASCII_l:
207 break;
208 case ASCII_L:
209 upper = 1;
210 break;
211 default:
212 return 1;
214 if (upper)
215 return 0;
216 *tokPtr = XML_TOK_XML_DECL;
217 return 1;
220 /* ptr points to character following "<?" */
222 static
223 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
224 const char **nextTokPtr)
226 int tok;
227 const char *target = ptr;
228 if (ptr == end)
229 return XML_TOK_PARTIAL;
230 switch (BYTE_TYPE(enc, ptr)) {
231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232 default:
233 *nextTokPtr = ptr;
234 return XML_TOK_INVALID;
236 while (ptr != end) {
237 switch (BYTE_TYPE(enc, ptr)) {
238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239 case BT_S: case BT_CR: case BT_LF:
240 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241 *nextTokPtr = ptr;
242 return XML_TOK_INVALID;
244 ptr += MINBPC(enc);
245 while (ptr != end) {
246 switch (BYTE_TYPE(enc, ptr)) {
247 INVALID_CASES(ptr, nextTokPtr)
248 case BT_QUEST:
249 ptr += MINBPC(enc);
250 if (ptr == end)
251 return XML_TOK_PARTIAL;
252 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253 *nextTokPtr = ptr + MINBPC(enc);
254 return tok;
256 break;
257 default:
258 ptr += MINBPC(enc);
259 break;
262 return XML_TOK_PARTIAL;
263 case BT_QUEST:
264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265 *nextTokPtr = ptr;
266 return XML_TOK_INVALID;
268 ptr += MINBPC(enc);
269 if (ptr == end)
270 return XML_TOK_PARTIAL;
271 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272 *nextTokPtr = ptr + MINBPC(enc);
273 return tok;
275 /* fall through */
276 default:
277 *nextTokPtr = ptr;
278 return XML_TOK_INVALID;
281 return XML_TOK_PARTIAL;
285 static
286 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
287 const char **nextTokPtr)
289 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
290 int i;
291 cmExpatUnused(enc);
292 /* CDATA[ */
293 if (end - ptr < 6 * MINBPC(enc))
294 return XML_TOK_PARTIAL;
295 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
296 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
297 *nextTokPtr = ptr;
298 return XML_TOK_INVALID;
301 *nextTokPtr = ptr;
302 return XML_TOK_CDATA_SECT_OPEN;
305 static
306 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
307 const char **nextTokPtr)
309 if (ptr == end)
310 return XML_TOK_NONE;
311 if (MINBPC(enc) > 1) {
312 size_t n = end - ptr;
313 if (n & (MINBPC(enc) - 1)) {
314 n &= ~(MINBPC(enc) - 1);
315 if (n == 0)
316 return XML_TOK_PARTIAL;
317 end = ptr + n;
320 switch (BYTE_TYPE(enc, ptr)) {
321 case BT_RSQB:
322 ptr += MINBPC(enc);
323 if (ptr == end)
324 return XML_TOK_PARTIAL;
325 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
326 break;
327 ptr += MINBPC(enc);
328 if (ptr == end)
329 return XML_TOK_PARTIAL;
330 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
331 ptr -= MINBPC(enc);
332 break;
334 *nextTokPtr = ptr + MINBPC(enc);
335 return XML_TOK_CDATA_SECT_CLOSE;
336 case BT_CR:
337 ptr += MINBPC(enc);
338 if (ptr == end)
339 return XML_TOK_PARTIAL;
340 if (BYTE_TYPE(enc, ptr) == BT_LF)
341 ptr += MINBPC(enc);
342 *nextTokPtr = ptr;
343 return XML_TOK_DATA_NEWLINE;
344 case BT_LF:
345 *nextTokPtr = ptr + MINBPC(enc);
346 return XML_TOK_DATA_NEWLINE;
347 INVALID_CASES(ptr, nextTokPtr)
348 default:
349 ptr += MINBPC(enc);
350 break;
352 while (ptr != end) {
353 switch (BYTE_TYPE(enc, ptr)) {
354 #define LEAD_CASE(n) \
355 case BT_LEAD ## n: \
356 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
357 *nextTokPtr = ptr; \
358 return XML_TOK_DATA_CHARS; \
360 ptr += n; \
361 break;
362 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
363 #undef LEAD_CASE
364 case BT_NONXML:
365 case BT_MALFORM:
366 case BT_TRAIL:
367 case BT_CR:
368 case BT_LF:
369 case BT_RSQB:
370 *nextTokPtr = ptr;
371 return XML_TOK_DATA_CHARS;
372 default:
373 ptr += MINBPC(enc);
374 break;
377 *nextTokPtr = ptr;
378 return XML_TOK_DATA_CHARS;
381 /* ptr points to character following "</" */
383 static
384 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
385 const char **nextTokPtr)
387 if (ptr == end)
388 return XML_TOK_PARTIAL;
389 switch (BYTE_TYPE(enc, ptr)) {
390 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
391 default:
392 *nextTokPtr = ptr;
393 return XML_TOK_INVALID;
395 while (ptr != end) {
396 switch (BYTE_TYPE(enc, ptr)) {
397 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
398 case BT_S: case BT_CR: case BT_LF:
399 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
400 switch (BYTE_TYPE(enc, ptr)) {
401 case BT_S: case BT_CR: case BT_LF:
402 break;
403 case BT_GT:
404 *nextTokPtr = ptr + MINBPC(enc);
405 return XML_TOK_END_TAG;
406 default:
407 *nextTokPtr = ptr;
408 return XML_TOK_INVALID;
411 return XML_TOK_PARTIAL;
412 #ifdef XML_NS
413 case BT_COLON:
414 /* no need to check qname syntax here, since end-tag must match exactly */
415 ptr += MINBPC(enc);
416 break;
417 #endif
418 case BT_GT:
419 *nextTokPtr = ptr + MINBPC(enc);
420 return XML_TOK_END_TAG;
421 default:
422 *nextTokPtr = ptr;
423 return XML_TOK_INVALID;
426 return XML_TOK_PARTIAL;
429 /* ptr points to character following "&#X" */
431 static
432 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
433 const char **nextTokPtr)
435 if (ptr != end) {
436 switch (BYTE_TYPE(enc, ptr)) {
437 case BT_DIGIT:
438 case BT_HEX:
439 break;
440 default:
441 *nextTokPtr = ptr;
442 return XML_TOK_INVALID;
444 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445 switch (BYTE_TYPE(enc, ptr)) {
446 case BT_DIGIT:
447 case BT_HEX:
448 break;
449 case BT_SEMI:
450 *nextTokPtr = ptr + MINBPC(enc);
451 return XML_TOK_CHAR_REF;
452 default:
453 *nextTokPtr = ptr;
454 return XML_TOK_INVALID;
458 return XML_TOK_PARTIAL;
461 /* ptr points to character following "&#" */
463 static
464 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
465 const char **nextTokPtr)
467 if (ptr != end) {
468 if (CHAR_MATCHES(enc, ptr, ASCII_x))
469 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470 switch (BYTE_TYPE(enc, ptr)) {
471 case BT_DIGIT:
472 break;
473 default:
474 *nextTokPtr = ptr;
475 return XML_TOK_INVALID;
477 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478 switch (BYTE_TYPE(enc, ptr)) {
479 case BT_DIGIT:
480 break;
481 case BT_SEMI:
482 *nextTokPtr = ptr + MINBPC(enc);
483 return XML_TOK_CHAR_REF;
484 default:
485 *nextTokPtr = ptr;
486 return XML_TOK_INVALID;
490 return XML_TOK_PARTIAL;
493 /* ptr points to character following "&" */
495 static
496 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497 const char **nextTokPtr)
499 if (ptr == end)
500 return XML_TOK_PARTIAL;
501 switch (BYTE_TYPE(enc, ptr)) {
502 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503 case BT_NUM:
504 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505 default:
506 *nextTokPtr = ptr;
507 return XML_TOK_INVALID;
509 while (ptr != end) {
510 switch (BYTE_TYPE(enc, ptr)) {
511 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512 case BT_SEMI:
513 *nextTokPtr = ptr + MINBPC(enc);
514 return XML_TOK_ENTITY_REF;
515 default:
516 *nextTokPtr = ptr;
517 return XML_TOK_INVALID;
520 return XML_TOK_PARTIAL;
523 /* ptr points to character following first character of attribute name */
525 static
526 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527 const char **nextTokPtr)
529 #ifdef XML_NS
530 int hadColon = 0;
531 #endif
532 while (ptr != end) {
533 switch (BYTE_TYPE(enc, ptr)) {
534 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535 #ifdef XML_NS
536 case BT_COLON:
537 if (hadColon) {
538 *nextTokPtr = ptr;
539 return XML_TOK_INVALID;
541 hadColon = 1;
542 ptr += MINBPC(enc);
543 if (ptr == end)
544 return XML_TOK_PARTIAL;
545 switch (BYTE_TYPE(enc, ptr)) {
546 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547 default:
548 *nextTokPtr = ptr;
549 return XML_TOK_INVALID;
551 break;
552 #endif
553 case BT_S: case BT_CR: case BT_LF:
554 for (;;) {
555 int t;
557 ptr += MINBPC(enc);
558 if (ptr == end)
559 return XML_TOK_PARTIAL;
560 t = BYTE_TYPE(enc, ptr);
561 if (t == BT_EQUALS)
562 break;
563 switch (t) {
564 case BT_S:
565 case BT_LF:
566 case BT_CR:
567 break;
568 default:
569 *nextTokPtr = ptr;
570 return XML_TOK_INVALID;
573 /* fall through */
574 case BT_EQUALS:
576 int open;
577 #ifdef XML_NS
578 hadColon = 0;
579 #endif
580 for (;;) {
582 ptr += MINBPC(enc);
583 if (ptr == end)
584 return XML_TOK_PARTIAL;
585 open = BYTE_TYPE(enc, ptr);
586 if (open == BT_QUOT || open == BT_APOS)
587 break;
588 switch (open) {
589 case BT_S:
590 case BT_LF:
591 case BT_CR:
592 break;
593 default:
594 *nextTokPtr = ptr;
595 return XML_TOK_INVALID;
598 ptr += MINBPC(enc);
599 /* in attribute value */
600 for (;;) {
601 int t;
602 if (ptr == end)
603 return XML_TOK_PARTIAL;
604 t = BYTE_TYPE(enc, ptr);
605 if (t == open)
606 break;
607 switch (t) {
608 INVALID_CASES(ptr, nextTokPtr)
609 case BT_AMP:
611 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
612 if (tok <= 0) {
613 if (tok == XML_TOK_INVALID)
614 *nextTokPtr = ptr;
615 return tok;
617 break;
619 case BT_LT:
620 *nextTokPtr = ptr;
621 return XML_TOK_INVALID;
622 default:
623 ptr += MINBPC(enc);
624 break;
627 ptr += MINBPC(enc);
628 if (ptr == end)
629 return XML_TOK_PARTIAL;
630 switch (BYTE_TYPE(enc, ptr)) {
631 case BT_S:
632 case BT_CR:
633 case BT_LF:
634 break;
635 case BT_SOL:
636 goto sol;
637 case BT_GT:
638 goto gt;
639 default:
640 *nextTokPtr = ptr;
641 return XML_TOK_INVALID;
643 /* ptr points to closing quote */
644 for (;;) {
645 ptr += MINBPC(enc);
646 if (ptr == end)
647 return XML_TOK_PARTIAL;
648 switch (BYTE_TYPE(enc, ptr)) {
649 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
650 case BT_S: case BT_CR: case BT_LF:
651 continue;
652 case BT_GT:
654 *nextTokPtr = ptr + MINBPC(enc);
655 return XML_TOK_START_TAG_WITH_ATTS;
656 case BT_SOL:
657 sol:
658 ptr += MINBPC(enc);
659 if (ptr == end)
660 return XML_TOK_PARTIAL;
661 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
662 *nextTokPtr = ptr;
663 return XML_TOK_INVALID;
665 *nextTokPtr = ptr + MINBPC(enc);
666 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
667 default:
668 *nextTokPtr = ptr;
669 return XML_TOK_INVALID;
671 break;
673 break;
675 default:
676 *nextTokPtr = ptr;
677 return XML_TOK_INVALID;
680 return XML_TOK_PARTIAL;
683 /* ptr points to character following "<" */
685 static
686 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
687 const char **nextTokPtr)
689 #ifdef XML_NS
690 int hadColon;
691 #endif
692 if (ptr == end)
693 return XML_TOK_PARTIAL;
694 switch (BYTE_TYPE(enc, ptr)) {
695 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
696 case BT_EXCL:
697 if ((ptr += MINBPC(enc)) == end)
698 return XML_TOK_PARTIAL;
699 switch (BYTE_TYPE(enc, ptr)) {
700 case BT_MINUS:
701 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
702 case BT_LSQB:
703 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
705 *nextTokPtr = ptr;
706 return XML_TOK_INVALID;
707 case BT_QUEST:
708 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709 case BT_SOL:
710 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711 default:
712 *nextTokPtr = ptr;
713 return XML_TOK_INVALID;
715 #ifdef XML_NS
716 hadColon = 0;
717 #endif
718 /* we have a start-tag */
719 while (ptr != end) {
720 switch (BYTE_TYPE(enc, ptr)) {
721 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722 #ifdef XML_NS
723 case BT_COLON:
724 if (hadColon) {
725 *nextTokPtr = ptr;
726 return XML_TOK_INVALID;
728 hadColon = 1;
729 ptr += MINBPC(enc);
730 if (ptr == end)
731 return XML_TOK_PARTIAL;
732 switch (BYTE_TYPE(enc, ptr)) {
733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734 default:
735 *nextTokPtr = ptr;
736 return XML_TOK_INVALID;
738 break;
739 #endif
740 case BT_S: case BT_CR: case BT_LF:
742 ptr += MINBPC(enc);
743 while (ptr != end) {
744 switch (BYTE_TYPE(enc, ptr)) {
745 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746 case BT_GT:
747 goto gt;
748 case BT_SOL:
749 goto sol;
750 case BT_S: case BT_CR: case BT_LF:
751 ptr += MINBPC(enc);
752 continue;
753 default:
754 *nextTokPtr = ptr;
755 return XML_TOK_INVALID;
757 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
759 return XML_TOK_PARTIAL;
761 case BT_GT:
763 *nextTokPtr = ptr + MINBPC(enc);
764 return XML_TOK_START_TAG_NO_ATTS;
765 case BT_SOL:
766 sol:
767 ptr += MINBPC(enc);
768 if (ptr == end)
769 return XML_TOK_PARTIAL;
770 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771 *nextTokPtr = ptr;
772 return XML_TOK_INVALID;
774 *nextTokPtr = ptr + MINBPC(enc);
775 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
776 default:
777 *nextTokPtr = ptr;
778 return XML_TOK_INVALID;
781 return XML_TOK_PARTIAL;
784 static
785 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786 const char **nextTokPtr)
788 if (ptr == end)
789 return XML_TOK_NONE;
790 if (MINBPC(enc) > 1) {
791 size_t n = end - ptr;
792 if (n & (MINBPC(enc) - 1)) {
793 n &= ~(MINBPC(enc) - 1);
794 if (n == 0)
795 return XML_TOK_PARTIAL;
796 end = ptr + n;
799 switch (BYTE_TYPE(enc, ptr)) {
800 case BT_LT:
801 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802 case BT_AMP:
803 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804 case BT_CR:
805 ptr += MINBPC(enc);
806 if (ptr == end)
807 return XML_TOK_TRAILING_CR;
808 if (BYTE_TYPE(enc, ptr) == BT_LF)
809 ptr += MINBPC(enc);
810 *nextTokPtr = ptr;
811 return XML_TOK_DATA_NEWLINE;
812 case BT_LF:
813 *nextTokPtr = ptr + MINBPC(enc);
814 return XML_TOK_DATA_NEWLINE;
815 case BT_RSQB:
816 ptr += MINBPC(enc);
817 if (ptr == end)
818 return XML_TOK_TRAILING_RSQB;
819 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820 break;
821 ptr += MINBPC(enc);
822 if (ptr == end)
823 return XML_TOK_TRAILING_RSQB;
824 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825 ptr -= MINBPC(enc);
826 break;
828 *nextTokPtr = ptr;
829 return XML_TOK_INVALID;
830 INVALID_CASES(ptr, nextTokPtr)
831 default:
832 ptr += MINBPC(enc);
833 break;
835 while (ptr != end) {
836 switch (BYTE_TYPE(enc, ptr)) {
837 #define LEAD_CASE(n) \
838 case BT_LEAD ## n: \
839 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840 *nextTokPtr = ptr; \
841 return XML_TOK_DATA_CHARS; \
843 ptr += n; \
844 break;
845 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846 #undef LEAD_CASE
847 case BT_RSQB:
848 if (ptr + MINBPC(enc) != end) {
849 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850 ptr += MINBPC(enc);
851 break;
853 if (ptr + 2*MINBPC(enc) != end) {
854 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855 ptr += MINBPC(enc);
856 break;
858 *nextTokPtr = ptr + 2*MINBPC(enc);
859 return XML_TOK_INVALID;
862 /* fall through */
863 case BT_AMP:
864 case BT_LT:
865 case BT_NONXML:
866 case BT_MALFORM:
867 case BT_TRAIL:
868 case BT_CR:
869 case BT_LF:
870 *nextTokPtr = ptr;
871 return XML_TOK_DATA_CHARS;
872 default:
873 ptr += MINBPC(enc);
874 break;
877 *nextTokPtr = ptr;
878 return XML_TOK_DATA_CHARS;
881 /* ptr points to character following "%" */
883 static
884 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885 const char **nextTokPtr)
887 if (ptr == end)
888 return XML_TOK_PARTIAL;
889 switch (BYTE_TYPE(enc, ptr)) {
890 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892 *nextTokPtr = ptr;
893 return XML_TOK_PERCENT;
894 default:
895 *nextTokPtr = ptr;
896 return XML_TOK_INVALID;
898 while (ptr != end) {
899 switch (BYTE_TYPE(enc, ptr)) {
900 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901 case BT_SEMI:
902 *nextTokPtr = ptr + MINBPC(enc);
903 return XML_TOK_PARAM_ENTITY_REF;
904 default:
905 *nextTokPtr = ptr;
906 return XML_TOK_INVALID;
909 return XML_TOK_PARTIAL;
912 static
913 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914 const char **nextTokPtr)
916 if (ptr == end)
917 return XML_TOK_PARTIAL;
918 switch (BYTE_TYPE(enc, ptr)) {
919 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920 default:
921 *nextTokPtr = ptr;
922 return XML_TOK_INVALID;
924 while (ptr != end) {
925 switch (BYTE_TYPE(enc, ptr)) {
926 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927 case BT_CR: case BT_LF: case BT_S:
928 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929 *nextTokPtr = ptr;
930 return XML_TOK_POUND_NAME;
931 default:
932 *nextTokPtr = ptr;
933 return XML_TOK_INVALID;
936 return -XML_TOK_POUND_NAME;
939 static
940 int PREFIX(scanLit)(int open, const ENCODING *enc,
941 const char *ptr, const char *end,
942 const char **nextTokPtr)
944 while (ptr != end) {
945 int t = BYTE_TYPE(enc, ptr);
946 switch (t) {
947 INVALID_CASES(ptr, nextTokPtr)
948 case BT_QUOT:
949 case BT_APOS:
950 ptr += MINBPC(enc);
951 if (t != open)
952 break;
953 if (ptr == end)
954 return -XML_TOK_LITERAL;
955 *nextTokPtr = ptr;
956 switch (BYTE_TYPE(enc, ptr)) {
957 case BT_S: case BT_CR: case BT_LF:
958 case BT_GT: case BT_PERCNT: case BT_LSQB:
959 return XML_TOK_LITERAL;
960 default:
961 return XML_TOK_INVALID;
963 default:
964 ptr += MINBPC(enc);
965 break;
968 return XML_TOK_PARTIAL;
971 static
972 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973 const char **nextTokPtr)
975 int tok;
976 if (ptr == end)
977 return XML_TOK_NONE;
978 if (MINBPC(enc) > 1) {
979 size_t n = end - ptr;
980 if (n & (MINBPC(enc) - 1)) {
981 n &= ~(MINBPC(enc) - 1);
982 if (n == 0)
983 return XML_TOK_PARTIAL;
984 end = ptr + n;
987 switch (BYTE_TYPE(enc, ptr)) {
988 case BT_QUOT:
989 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990 case BT_APOS:
991 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992 case BT_LT:
994 ptr += MINBPC(enc);
995 if (ptr == end)
996 return XML_TOK_PARTIAL;
997 switch (BYTE_TYPE(enc, ptr)) {
998 case BT_EXCL:
999 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000 case BT_QUEST:
1001 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002 case BT_NMSTRT:
1003 case BT_HEX:
1004 case BT_NONASCII:
1005 case BT_LEAD2:
1006 case BT_LEAD3:
1007 case BT_LEAD4:
1008 *nextTokPtr = ptr - MINBPC(enc);
1009 return XML_TOK_INSTANCE_START;
1011 *nextTokPtr = ptr;
1012 return XML_TOK_INVALID;
1014 case BT_CR:
1015 if (ptr + MINBPC(enc) == end)
1016 return -XML_TOK_PROLOG_S;
1017 /* fall through */
1018 case BT_S: case BT_LF:
1019 for (;;) {
1020 ptr += MINBPC(enc);
1021 if (ptr == end)
1022 break;
1023 switch (BYTE_TYPE(enc, ptr)) {
1024 case BT_S: case BT_LF:
1025 break;
1026 case BT_CR:
1027 /* don't split CR/LF pair */
1028 if (ptr + MINBPC(enc) != end)
1029 break;
1030 /* fall through */
1031 default:
1032 *nextTokPtr = ptr;
1033 return XML_TOK_PROLOG_S;
1036 *nextTokPtr = ptr;
1037 return XML_TOK_PROLOG_S;
1038 case BT_PERCNT:
1039 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1040 case BT_COMMA:
1041 *nextTokPtr = ptr + MINBPC(enc);
1042 return XML_TOK_COMMA;
1043 case BT_LSQB:
1044 *nextTokPtr = ptr + MINBPC(enc);
1045 return XML_TOK_OPEN_BRACKET;
1046 case BT_RSQB:
1047 ptr += MINBPC(enc);
1048 if (ptr == end)
1049 return -XML_TOK_CLOSE_BRACKET;
1050 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1051 if (ptr + MINBPC(enc) == end)
1052 return XML_TOK_PARTIAL;
1053 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1054 *nextTokPtr = ptr + 2*MINBPC(enc);
1055 return XML_TOK_COND_SECT_CLOSE;
1058 *nextTokPtr = ptr;
1059 return XML_TOK_CLOSE_BRACKET;
1060 case BT_LPAR:
1061 *nextTokPtr = ptr + MINBPC(enc);
1062 return XML_TOK_OPEN_PAREN;
1063 case BT_RPAR:
1064 ptr += MINBPC(enc);
1065 if (ptr == end)
1066 return -XML_TOK_CLOSE_PAREN;
1067 switch (BYTE_TYPE(enc, ptr)) {
1068 case BT_AST:
1069 *nextTokPtr = ptr + MINBPC(enc);
1070 return XML_TOK_CLOSE_PAREN_ASTERISK;
1071 case BT_QUEST:
1072 *nextTokPtr = ptr + MINBPC(enc);
1073 return XML_TOK_CLOSE_PAREN_QUESTION;
1074 case BT_PLUS:
1075 *nextTokPtr = ptr + MINBPC(enc);
1076 return XML_TOK_CLOSE_PAREN_PLUS;
1077 case BT_CR: case BT_LF: case BT_S:
1078 case BT_GT: case BT_COMMA: case BT_VERBAR:
1079 case BT_RPAR:
1080 *nextTokPtr = ptr;
1081 return XML_TOK_CLOSE_PAREN;
1083 *nextTokPtr = ptr;
1084 return XML_TOK_INVALID;
1085 case BT_VERBAR:
1086 *nextTokPtr = ptr + MINBPC(enc);
1087 return XML_TOK_OR;
1088 case BT_GT:
1089 *nextTokPtr = ptr + MINBPC(enc);
1090 return XML_TOK_DECL_CLOSE;
1091 case BT_NUM:
1092 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1093 #define LEAD_CASE(n) \
1094 case BT_LEAD ## n: \
1095 if (end - ptr < n) \
1096 return XML_TOK_PARTIAL_CHAR; \
1097 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1098 ptr += n; \
1099 tok = XML_TOK_NAME; \
1100 break; \
1102 if (IS_NAME_CHAR(enc, ptr, n)) { \
1103 ptr += n; \
1104 tok = XML_TOK_NMTOKEN; \
1105 break; \
1107 *nextTokPtr = ptr; \
1108 return XML_TOK_INVALID;
1109 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1110 #undef LEAD_CASE
1111 case BT_NMSTRT:
1112 case BT_HEX:
1113 tok = XML_TOK_NAME;
1114 ptr += MINBPC(enc);
1115 break;
1116 case BT_DIGIT:
1117 case BT_NAME:
1118 case BT_MINUS:
1119 #ifdef XML_NS
1120 case BT_COLON:
1121 #endif
1122 tok = XML_TOK_NMTOKEN;
1123 ptr += MINBPC(enc);
1124 break;
1125 case BT_NONASCII:
1126 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1127 ptr += MINBPC(enc);
1128 tok = XML_TOK_NAME;
1129 break;
1131 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1132 ptr += MINBPC(enc);
1133 tok = XML_TOK_NMTOKEN;
1134 break;
1136 /* fall through */
1137 default:
1138 *nextTokPtr = ptr;
1139 return XML_TOK_INVALID;
1141 while (ptr != end) {
1142 switch (BYTE_TYPE(enc, ptr)) {
1143 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1144 case BT_GT: case BT_RPAR: case BT_COMMA:
1145 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1146 case BT_S: case BT_CR: case BT_LF:
1147 *nextTokPtr = ptr;
1148 return tok;
1149 #ifdef XML_NS
1150 case BT_COLON:
1151 ptr += MINBPC(enc);
1152 switch (tok) {
1153 case XML_TOK_NAME:
1154 if (ptr == end)
1155 return XML_TOK_PARTIAL;
1156 tok = XML_TOK_PREFIXED_NAME;
1157 switch (BYTE_TYPE(enc, ptr)) {
1158 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1159 default:
1160 tok = XML_TOK_NMTOKEN;
1161 break;
1163 break;
1164 case XML_TOK_PREFIXED_NAME:
1165 tok = XML_TOK_NMTOKEN;
1166 break;
1168 break;
1169 #endif
1170 case BT_PLUS:
1171 if (tok == XML_TOK_NMTOKEN) {
1172 *nextTokPtr = ptr;
1173 return XML_TOK_INVALID;
1175 *nextTokPtr = ptr + MINBPC(enc);
1176 return XML_TOK_NAME_PLUS;
1177 case BT_AST:
1178 if (tok == XML_TOK_NMTOKEN) {
1179 *nextTokPtr = ptr;
1180 return XML_TOK_INVALID;
1182 *nextTokPtr = ptr + MINBPC(enc);
1183 return XML_TOK_NAME_ASTERISK;
1184 case BT_QUEST:
1185 if (tok == XML_TOK_NMTOKEN) {
1186 *nextTokPtr = ptr;
1187 return XML_TOK_INVALID;
1189 *nextTokPtr = ptr + MINBPC(enc);
1190 return XML_TOK_NAME_QUESTION;
1191 default:
1192 *nextTokPtr = ptr;
1193 return XML_TOK_INVALID;
1196 return -tok;
1199 static
1200 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1201 const char **nextTokPtr)
1203 const char *start;
1204 if (ptr == end)
1205 return XML_TOK_NONE;
1206 start = ptr;
1207 while (ptr != end) {
1208 switch (BYTE_TYPE(enc, ptr)) {
1209 #define LEAD_CASE(n) \
1210 case BT_LEAD ## n: ptr += n; break;
1211 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1212 #undef LEAD_CASE
1213 case BT_AMP:
1214 if (ptr == start)
1215 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1216 *nextTokPtr = ptr;
1217 return XML_TOK_DATA_CHARS;
1218 case BT_LT:
1219 /* this is for inside entity references */
1220 *nextTokPtr = ptr;
1221 return XML_TOK_INVALID;
1222 case BT_LF:
1223 if (ptr == start) {
1224 *nextTokPtr = ptr + MINBPC(enc);
1225 return XML_TOK_DATA_NEWLINE;
1227 *nextTokPtr = ptr;
1228 return XML_TOK_DATA_CHARS;
1229 case BT_CR:
1230 if (ptr == start) {
1231 ptr += MINBPC(enc);
1232 if (ptr == end)
1233 return XML_TOK_TRAILING_CR;
1234 if (BYTE_TYPE(enc, ptr) == BT_LF)
1235 ptr += MINBPC(enc);
1236 *nextTokPtr = ptr;
1237 return XML_TOK_DATA_NEWLINE;
1239 *nextTokPtr = ptr;
1240 return XML_TOK_DATA_CHARS;
1241 case BT_S:
1242 if (ptr == start) {
1243 *nextTokPtr = ptr + MINBPC(enc);
1244 return XML_TOK_ATTRIBUTE_VALUE_S;
1246 *nextTokPtr = ptr;
1247 return XML_TOK_DATA_CHARS;
1248 default:
1249 ptr += MINBPC(enc);
1250 break;
1253 *nextTokPtr = ptr;
1254 return XML_TOK_DATA_CHARS;
1257 static
1258 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1259 const char **nextTokPtr)
1261 const char *start;
1262 if (ptr == end)
1263 return XML_TOK_NONE;
1264 start = ptr;
1265 while (ptr != end) {
1266 switch (BYTE_TYPE(enc, ptr)) {
1267 #define LEAD_CASE(n) \
1268 case BT_LEAD ## n: ptr += n; break;
1269 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1270 #undef LEAD_CASE
1271 case BT_AMP:
1272 if (ptr == start)
1273 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1274 *nextTokPtr = ptr;
1275 return XML_TOK_DATA_CHARS;
1276 case BT_PERCNT:
1277 if (ptr == start) {
1278 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1279 end, nextTokPtr);
1280 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1282 *nextTokPtr = ptr;
1283 return XML_TOK_DATA_CHARS;
1284 case BT_LF:
1285 if (ptr == start) {
1286 *nextTokPtr = ptr + MINBPC(enc);
1287 return XML_TOK_DATA_NEWLINE;
1289 *nextTokPtr = ptr;
1290 return XML_TOK_DATA_CHARS;
1291 case BT_CR:
1292 if (ptr == start) {
1293 ptr += MINBPC(enc);
1294 if (ptr == end)
1295 return XML_TOK_TRAILING_CR;
1296 if (BYTE_TYPE(enc, ptr) == BT_LF)
1297 ptr += MINBPC(enc);
1298 *nextTokPtr = ptr;
1299 return XML_TOK_DATA_NEWLINE;
1301 *nextTokPtr = ptr;
1302 return XML_TOK_DATA_CHARS;
1303 default:
1304 ptr += MINBPC(enc);
1305 break;
1308 *nextTokPtr = ptr;
1309 return XML_TOK_DATA_CHARS;
1312 #ifdef XML_DTD
1314 static
1315 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1316 const char **nextTokPtr)
1318 int level = 0;
1319 if (MINBPC(enc) > 1) {
1320 size_t n = end - ptr;
1321 if (n & (MINBPC(enc) - 1)) {
1322 n &= ~(MINBPC(enc) - 1);
1323 end = ptr + n;
1326 while (ptr != end) {
1327 switch (BYTE_TYPE(enc, ptr)) {
1328 INVALID_CASES(ptr, nextTokPtr)
1329 case BT_LT:
1330 if ((ptr += MINBPC(enc)) == end)
1331 return XML_TOK_PARTIAL;
1332 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1333 if ((ptr += MINBPC(enc)) == end)
1334 return XML_TOK_PARTIAL;
1335 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1336 ++level;
1337 ptr += MINBPC(enc);
1340 break;
1341 case BT_RSQB:
1342 if ((ptr += MINBPC(enc)) == end)
1343 return XML_TOK_PARTIAL;
1344 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1345 if ((ptr += MINBPC(enc)) == end)
1346 return XML_TOK_PARTIAL;
1347 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1348 ptr += MINBPC(enc);
1349 if (level == 0) {
1350 *nextTokPtr = ptr;
1351 return XML_TOK_IGNORE_SECT;
1353 --level;
1356 break;
1357 default:
1358 ptr += MINBPC(enc);
1359 break;
1362 return XML_TOK_PARTIAL;
1365 #endif /* XML_DTD */
1367 static
1368 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1369 const char **badPtr)
1371 ptr += MINBPC(enc);
1372 end -= MINBPC(enc);
1373 for (; ptr != end; ptr += MINBPC(enc)) {
1374 switch (BYTE_TYPE(enc, ptr)) {
1375 case BT_DIGIT:
1376 case BT_HEX:
1377 case BT_MINUS:
1378 case BT_APOS:
1379 case BT_LPAR:
1380 case BT_RPAR:
1381 case BT_PLUS:
1382 case BT_COMMA:
1383 case BT_SOL:
1384 case BT_EQUALS:
1385 case BT_QUEST:
1386 case BT_CR:
1387 case BT_LF:
1388 case BT_SEMI:
1389 case BT_EXCL:
1390 case BT_AST:
1391 case BT_PERCNT:
1392 case BT_NUM:
1393 #ifdef XML_NS
1394 case BT_COLON:
1395 #endif
1396 break;
1397 case BT_S:
1398 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1399 *badPtr = ptr;
1400 return 0;
1402 break;
1403 case BT_NAME:
1404 case BT_NMSTRT:
1405 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1406 break;
1407 default:
1408 switch (BYTE_TO_ASCII(enc, ptr)) {
1409 case 0x24: /* $ */
1410 case 0x40: /* @ */
1411 break;
1412 default:
1413 *badPtr = ptr;
1414 return 0;
1416 break;
1419 return 1;
1422 /* This must only be called for a well-formed start-tag or empty element tag.
1423 Returns the number of attributes. Pointers to the first attsMax attributes
1424 are stored in atts. */
1426 static
1427 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1428 int attsMax, ATTRIBUTE *atts)
1430 enum { other, inName, inValue } state = inName;
1431 int nAtts = 0;
1432 int open = 0; /* defined when state == inValue;
1433 initialization just to shut up compilers */
1435 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1436 switch (BYTE_TYPE(enc, ptr)) {
1437 #define START_NAME \
1438 if (state == other) { \
1439 if (nAtts < attsMax) { \
1440 atts[nAtts].name = ptr; \
1441 atts[nAtts].normalized = 1; \
1443 state = inName; \
1445 #define LEAD_CASE(n) \
1446 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1447 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1448 #undef LEAD_CASE
1449 case BT_NONASCII:
1450 case BT_NMSTRT:
1451 case BT_HEX:
1452 START_NAME
1453 break;
1454 #undef START_NAME
1455 case BT_QUOT:
1456 if (state != inValue) {
1457 if (nAtts < attsMax)
1458 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1459 state = inValue;
1460 open = BT_QUOT;
1462 else if (open == BT_QUOT) {
1463 state = other;
1464 if (nAtts < attsMax)
1465 atts[nAtts].valueEnd = ptr;
1466 nAtts++;
1468 break;
1469 case BT_APOS:
1470 if (state != inValue) {
1471 if (nAtts < attsMax)
1472 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1473 state = inValue;
1474 open = BT_APOS;
1476 else if (open == BT_APOS) {
1477 state = other;
1478 if (nAtts < attsMax)
1479 atts[nAtts].valueEnd = ptr;
1480 nAtts++;
1482 break;
1483 case BT_AMP:
1484 if (nAtts < attsMax)
1485 atts[nAtts].normalized = 0;
1486 break;
1487 case BT_S:
1488 if (state == inName)
1489 state = other;
1490 else if (state == inValue
1491 && nAtts < attsMax
1492 && atts[nAtts].normalized
1493 && (ptr == atts[nAtts].valuePtr
1494 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1495 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1496 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1497 atts[nAtts].normalized = 0;
1498 break;
1499 case BT_CR: case BT_LF:
1500 /* This case ensures that the first attribute name is counted
1501 Apart from that we could just change state on the quote. */
1502 if (state == inName)
1503 state = other;
1504 else if (state == inValue && nAtts < attsMax)
1505 atts[nAtts].normalized = 0;
1506 break;
1507 case BT_GT:
1508 case BT_SOL:
1509 if (state != inValue)
1510 return nAtts;
1511 break;
1512 default:
1513 break;
1516 /* not reached */
1519 static
1520 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1522 int result = 0;
1523 cmExpatUnused(enc);
1524 /* skip &# */
1525 ptr += 2*MINBPC(enc);
1526 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1527 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1528 int c = BYTE_TO_ASCII(enc, ptr);
1529 switch (c) {
1530 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1531 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1532 result <<= 4;
1533 result |= (c - ASCII_0);
1534 break;
1535 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1536 result <<= 4;
1537 result += 10 + (c - ASCII_A);
1538 break;
1539 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1540 result <<= 4;
1541 result += 10 + (c - ASCII_a);
1542 break;
1544 if (result >= 0x110000)
1545 return -1;
1548 else {
1549 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1550 int c = BYTE_TO_ASCII(enc, ptr);
1551 result *= 10;
1552 result += (c - ASCII_0);
1553 if (result >= 0x110000)
1554 return -1;
1557 return checkCharRefNumber(result);
1560 static
1561 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
1563 cmExpatUnused(enc);
1564 switch ((end - ptr)/MINBPC(enc)) {
1565 case 2:
1566 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1567 switch (BYTE_TO_ASCII(enc, ptr)) {
1568 case ASCII_l:
1569 return ASCII_LT;
1570 case ASCII_g:
1571 return ASCII_GT;
1574 break;
1575 case 3:
1576 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1577 ptr += MINBPC(enc);
1578 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1579 ptr += MINBPC(enc);
1580 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1581 return ASCII_AMP;
1584 break;
1585 case 4:
1586 switch (BYTE_TO_ASCII(enc, ptr)) {
1587 case ASCII_q:
1588 ptr += MINBPC(enc);
1589 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1590 ptr += MINBPC(enc);
1591 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1592 ptr += MINBPC(enc);
1593 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1594 return ASCII_QUOT;
1597 break;
1598 case ASCII_a:
1599 ptr += MINBPC(enc);
1600 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1601 ptr += MINBPC(enc);
1602 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1603 ptr += MINBPC(enc);
1604 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1605 return ASCII_APOS;
1608 break;
1611 return 0;
1614 static
1615 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1617 for (;;) {
1618 switch (BYTE_TYPE(enc, ptr1)) {
1619 #define LEAD_CASE(n) \
1620 case BT_LEAD ## n: \
1621 if (*ptr1++ != *ptr2++) \
1622 return 0;
1623 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1624 #undef LEAD_CASE
1625 /* fall through */
1626 if (*ptr1++ != *ptr2++)
1627 return 0;
1628 break;
1629 case BT_NONASCII:
1630 case BT_NMSTRT:
1631 #ifdef XML_NS
1632 case BT_COLON:
1633 #endif
1634 case BT_HEX:
1635 case BT_DIGIT:
1636 case BT_NAME:
1637 case BT_MINUS:
1638 if (*ptr2++ != *ptr1++)
1639 return 0;
1640 if (MINBPC(enc) > 1) {
1641 if (*ptr2++ != *ptr1++)
1642 return 0;
1643 if (MINBPC(enc) > 2) {
1644 if (*ptr2++ != *ptr1++)
1645 return 0;
1646 if (MINBPC(enc) > 3) {
1647 if (*ptr2++ != *ptr1++)
1648 return 0;
1652 break;
1653 default:
1654 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1655 return 1;
1656 switch (BYTE_TYPE(enc, ptr2)) {
1657 case BT_LEAD2:
1658 case BT_LEAD3:
1659 case BT_LEAD4:
1660 case BT_NONASCII:
1661 case BT_NMSTRT:
1662 #ifdef XML_NS
1663 case BT_COLON:
1664 #endif
1665 case BT_HEX:
1666 case BT_DIGIT:
1667 case BT_NAME:
1668 case BT_MINUS:
1669 return 0;
1670 default:
1671 return 1;
1675 /* not reached */
1678 static
1679 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1680 const char *end1, const char *ptr2)
1682 cmExpatUnused(enc);
1683 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1684 if (ptr1 == end1)
1685 return 0;
1686 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1687 return 0;
1689 return ptr1 == end1;
1692 static
1693 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1695 const char *start = ptr;
1696 for (;;) {
1697 switch (BYTE_TYPE(enc, ptr)) {
1698 #define LEAD_CASE(n) \
1699 case BT_LEAD ## n: ptr += n; break;
1700 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1701 #undef LEAD_CASE
1702 case BT_NONASCII:
1703 case BT_NMSTRT:
1704 #ifdef XML_NS
1705 case BT_COLON:
1706 #endif
1707 case BT_HEX:
1708 case BT_DIGIT:
1709 case BT_NAME:
1710 case BT_MINUS:
1711 ptr += MINBPC(enc);
1712 break;
1713 default:
1714 return ptr - start;
1719 static
1720 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1722 for (;;) {
1723 switch (BYTE_TYPE(enc, ptr)) {
1724 case BT_LF:
1725 case BT_CR:
1726 case BT_S:
1727 ptr += MINBPC(enc);
1728 break;
1729 default:
1730 return ptr;
1735 static
1736 void PREFIX(updatePosition)(const ENCODING *enc,
1737 const char *ptr,
1738 const char *end,
1739 POSITION *pos)
1741 while (ptr != end) {
1742 switch (BYTE_TYPE(enc, ptr)) {
1743 #define LEAD_CASE(n) \
1744 case BT_LEAD ## n: \
1745 ptr += n; \
1746 break;
1747 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1748 #undef LEAD_CASE
1749 case BT_LF:
1750 pos->columnNumber = (unsigned)-1;
1751 pos->lineNumber++;
1752 ptr += MINBPC(enc);
1753 break;
1754 case BT_CR:
1755 pos->lineNumber++;
1756 ptr += MINBPC(enc);
1757 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1758 ptr += MINBPC(enc);
1759 pos->columnNumber = (unsigned)-1;
1760 break;
1761 default:
1762 ptr += MINBPC(enc);
1763 break;
1765 pos->columnNumber++;
1769 #undef DO_LEAD_CASE
1770 #undef MULTIBYTE_CASES
1771 #undef INVALID_CASES
1772 #undef CHECK_NAME_CASE
1773 #undef CHECK_NAME_CASES
1774 #undef CHECK_NMSTRT_CASE
1775 #undef CHECK_NMSTRT_CASES