1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
5 /* This file is included! */
8 #ifndef IS_INVALID_CHAR
9 #define IS_INVALID_CHAR(enc, ptr, n) (0)
12 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
15 return XML_TOK_PARTIAL_CHAR; \
16 if (IS_INVALID_CHAR(enc, ptr, n)) { \
17 *(nextTokPtr) = (ptr); \
18 return XML_TOK_INVALID; \
23 #define INVALID_CASES(ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
30 *(nextTokPtr) = (ptr); \
31 return XML_TOK_INVALID;
33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
36 return XML_TOK_PARTIAL_CHAR; \
37 if (!IS_NAME_CHAR(enc, ptr, n)) { \
39 return XML_TOK_INVALID; \
44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
46 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
48 return XML_TOK_INVALID; \
57 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
64 return XML_TOK_PARTIAL_CHAR; \
65 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
67 return XML_TOK_INVALID; \
72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
74 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
76 return XML_TOK_INVALID; \
82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
87 #define PREFIX(ident) ident
90 /* ptr points to character following "<!-" */
93 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94 const char *end, const char **nextTokPtr)
97 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
99 return XML_TOK_INVALID;
103 switch (BYTE_TYPE(enc, ptr)) {
104 INVALID_CASES(ptr, nextTokPtr)
106 if ((ptr += MINBPC(enc)) == end)
107 return XML_TOK_PARTIAL;
108 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109 if ((ptr += MINBPC(enc)) == end)
110 return XML_TOK_PARTIAL;
111 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
113 return XML_TOK_INVALID;
115 *nextTokPtr = ptr + MINBPC(enc);
116 return XML_TOK_COMMENT;
125 return XML_TOK_PARTIAL;
128 /* ptr points to character following "<!" */
131 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132 const char *end, const char **nextTokPtr)
135 return XML_TOK_PARTIAL;
136 switch (BYTE_TYPE(enc, ptr)) {
138 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
140 *nextTokPtr = ptr + MINBPC(enc);
141 return XML_TOK_COND_SECT_OPEN;
148 return XML_TOK_INVALID;
151 switch (BYTE_TYPE(enc, ptr)) {
153 if (ptr + MINBPC(enc) == end)
154 return XML_TOK_PARTIAL;
155 /* don't allow <!ENTITY% foo "whatever"> */
156 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
159 return XML_TOK_INVALID;
162 case BT_S: case BT_CR: case BT_LF:
164 return XML_TOK_DECL_OPEN;
171 return XML_TOK_INVALID;
174 return XML_TOK_PARTIAL;
178 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
179 const char *end, int *tokPtr)
182 *tokPtr = XML_TOK_PI;
183 if (end - ptr != MINBPC(enc)*3)
185 switch (BYTE_TO_ASCII(enc, ptr)) {
195 switch (BYTE_TO_ASCII(enc, ptr)) {
205 switch (BYTE_TO_ASCII(enc, ptr)) {
216 *tokPtr = XML_TOK_XML_DECL;
220 /* ptr points to character following "<?" */
223 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224 const char *end, const char **nextTokPtr)
227 const char *target = ptr;
229 return XML_TOK_PARTIAL;
230 switch (BYTE_TYPE(enc, ptr)) {
231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
234 return XML_TOK_INVALID;
237 switch (BYTE_TYPE(enc, ptr)) {
238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239 case BT_S: case BT_CR: case BT_LF:
240 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
242 return XML_TOK_INVALID;
246 switch (BYTE_TYPE(enc, ptr)) {
247 INVALID_CASES(ptr, nextTokPtr)
251 return XML_TOK_PARTIAL;
252 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253 *nextTokPtr = ptr + MINBPC(enc);
262 return XML_TOK_PARTIAL;
264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
266 return XML_TOK_INVALID;
270 return XML_TOK_PARTIAL;
271 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272 *nextTokPtr = ptr + MINBPC(enc);
278 return XML_TOK_INVALID;
281 return XML_TOK_PARTIAL;
285 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
286 const char *end, const char **nextTokPtr)
288 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
289 ASCII_T, ASCII_A, ASCII_LSQB };
292 if (end - ptr < 6 * MINBPC(enc))
293 return XML_TOK_PARTIAL;
294 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
297 return XML_TOK_INVALID;
301 return XML_TOK_CDATA_SECT_OPEN;
305 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306 const char *end, const char **nextTokPtr)
310 if (MINBPC(enc) > 1) {
311 size_t n = end - ptr;
312 if (n & (MINBPC(enc) - 1)) {
313 n &= ~(MINBPC(enc) - 1);
315 return XML_TOK_PARTIAL;
319 switch (BYTE_TYPE(enc, ptr)) {
323 return XML_TOK_PARTIAL;
324 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
328 return XML_TOK_PARTIAL;
329 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
333 *nextTokPtr = ptr + MINBPC(enc);
334 return XML_TOK_CDATA_SECT_CLOSE;
338 return XML_TOK_PARTIAL;
339 if (BYTE_TYPE(enc, ptr) == BT_LF)
342 return XML_TOK_DATA_NEWLINE;
344 *nextTokPtr = ptr + MINBPC(enc);
345 return XML_TOK_DATA_NEWLINE;
346 INVALID_CASES(ptr, nextTokPtr)
352 switch (BYTE_TYPE(enc, ptr)) {
353 #define LEAD_CASE(n) \
355 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
357 return XML_TOK_DATA_CHARS; \
361 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
370 return XML_TOK_DATA_CHARS;
377 return XML_TOK_DATA_CHARS;
380 /* ptr points to character following "</" */
383 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384 const char *end, const char **nextTokPtr)
387 return XML_TOK_PARTIAL;
388 switch (BYTE_TYPE(enc, ptr)) {
389 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
392 return XML_TOK_INVALID;
395 switch (BYTE_TYPE(enc, ptr)) {
396 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397 case BT_S: case BT_CR: case BT_LF:
398 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
399 switch (BYTE_TYPE(enc, ptr)) {
400 case BT_S: case BT_CR: case BT_LF:
403 *nextTokPtr = ptr + MINBPC(enc);
404 return XML_TOK_END_TAG;
407 return XML_TOK_INVALID;
410 return XML_TOK_PARTIAL;
413 /* no need to check qname syntax here,
414 since end-tag must match exactly */
419 *nextTokPtr = ptr + MINBPC(enc);
420 return XML_TOK_END_TAG;
423 return XML_TOK_INVALID;
426 return XML_TOK_PARTIAL;
429 /* ptr points to character following "&#X" */
432 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433 const char *end, const char **nextTokPtr)
436 switch (BYTE_TYPE(enc, ptr)) {
442 return XML_TOK_INVALID;
444 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445 switch (BYTE_TYPE(enc, ptr)) {
450 *nextTokPtr = ptr + MINBPC(enc);
451 return XML_TOK_CHAR_REF;
454 return XML_TOK_INVALID;
458 return XML_TOK_PARTIAL;
461 /* ptr points to character following "&#" */
464 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465 const char *end, const char **nextTokPtr)
468 if (CHAR_MATCHES(enc, ptr, ASCII_x))
469 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470 switch (BYTE_TYPE(enc, ptr)) {
475 return XML_TOK_INVALID;
477 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478 switch (BYTE_TYPE(enc, ptr)) {
482 *nextTokPtr = ptr + MINBPC(enc);
483 return XML_TOK_CHAR_REF;
486 return XML_TOK_INVALID;
490 return XML_TOK_PARTIAL;
493 /* ptr points to character following "&" */
496 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497 const char **nextTokPtr)
500 return XML_TOK_PARTIAL;
501 switch (BYTE_TYPE(enc, ptr)) {
502 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
504 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
507 return XML_TOK_INVALID;
510 switch (BYTE_TYPE(enc, ptr)) {
511 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
513 *nextTokPtr = ptr + MINBPC(enc);
514 return XML_TOK_ENTITY_REF;
517 return XML_TOK_INVALID;
520 return XML_TOK_PARTIAL;
523 /* ptr points to character following first character of attribute name */
526 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527 const char **nextTokPtr)
533 switch (BYTE_TYPE(enc, ptr)) {
534 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
539 return XML_TOK_INVALID;
544 return XML_TOK_PARTIAL;
545 switch (BYTE_TYPE(enc, ptr)) {
546 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549 return XML_TOK_INVALID;
553 case BT_S: case BT_CR: case BT_LF:
559 return XML_TOK_PARTIAL;
560 t = BYTE_TYPE(enc, ptr);
570 return XML_TOK_INVALID;
583 return XML_TOK_PARTIAL;
584 open = BYTE_TYPE(enc, ptr);
585 if (open == BT_QUOT || open == BT_APOS)
594 return XML_TOK_INVALID;
598 /* in attribute value */
602 return XML_TOK_PARTIAL;
603 t = BYTE_TYPE(enc, ptr);
607 INVALID_CASES(ptr, nextTokPtr)
610 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
612 if (tok == XML_TOK_INVALID)
620 return XML_TOK_INVALID;
628 return XML_TOK_PARTIAL;
629 switch (BYTE_TYPE(enc, ptr)) {
640 return XML_TOK_INVALID;
642 /* ptr points to closing quote */
646 return XML_TOK_PARTIAL;
647 switch (BYTE_TYPE(enc, ptr)) {
648 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649 case BT_S: case BT_CR: case BT_LF:
653 *nextTokPtr = ptr + MINBPC(enc);
654 return XML_TOK_START_TAG_WITH_ATTS;
659 return XML_TOK_PARTIAL;
660 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
662 return XML_TOK_INVALID;
664 *nextTokPtr = ptr + MINBPC(enc);
665 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
668 return XML_TOK_INVALID;
676 return XML_TOK_INVALID;
679 return XML_TOK_PARTIAL;
682 /* ptr points to character following "<" */
685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686 const char **nextTokPtr)
692 return XML_TOK_PARTIAL;
693 switch (BYTE_TYPE(enc, ptr)) {
694 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
696 if ((ptr += MINBPC(enc)) == end)
697 return XML_TOK_PARTIAL;
698 switch (BYTE_TYPE(enc, ptr)) {
700 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
702 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
706 return XML_TOK_INVALID;
708 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
710 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
713 return XML_TOK_INVALID;
718 /* we have a start-tag */
720 switch (BYTE_TYPE(enc, ptr)) {
721 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
726 return XML_TOK_INVALID;
731 return XML_TOK_PARTIAL;
732 switch (BYTE_TYPE(enc, ptr)) {
733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
736 return XML_TOK_INVALID;
740 case BT_S: case BT_CR: case BT_LF:
744 switch (BYTE_TYPE(enc, ptr)) {
745 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
750 case BT_S: case BT_CR: case BT_LF:
755 return XML_TOK_INVALID;
757 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
759 return XML_TOK_PARTIAL;
763 *nextTokPtr = ptr + MINBPC(enc);
764 return XML_TOK_START_TAG_NO_ATTS;
769 return XML_TOK_PARTIAL;
770 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
772 return XML_TOK_INVALID;
774 *nextTokPtr = ptr + MINBPC(enc);
775 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
778 return XML_TOK_INVALID;
781 return XML_TOK_PARTIAL;
785 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786 const char **nextTokPtr)
790 if (MINBPC(enc) > 1) {
791 size_t n = end - ptr;
792 if (n & (MINBPC(enc) - 1)) {
793 n &= ~(MINBPC(enc) - 1);
795 return XML_TOK_PARTIAL;
799 switch (BYTE_TYPE(enc, ptr)) {
801 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
803 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
807 return XML_TOK_TRAILING_CR;
808 if (BYTE_TYPE(enc, ptr) == BT_LF)
811 return XML_TOK_DATA_NEWLINE;
813 *nextTokPtr = ptr + MINBPC(enc);
814 return XML_TOK_DATA_NEWLINE;
818 return XML_TOK_TRAILING_RSQB;
819 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
823 return XML_TOK_TRAILING_RSQB;
824 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
829 return XML_TOK_INVALID;
830 INVALID_CASES(ptr, nextTokPtr)
836 switch (BYTE_TYPE(enc, ptr)) {
837 #define LEAD_CASE(n) \
839 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
841 return XML_TOK_DATA_CHARS; \
845 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
848 if (ptr + MINBPC(enc) != end) {
849 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
853 if (ptr + 2*MINBPC(enc) != end) {
854 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
858 *nextTokPtr = ptr + 2*MINBPC(enc);
859 return XML_TOK_INVALID;
871 return XML_TOK_DATA_CHARS;
878 return XML_TOK_DATA_CHARS;
881 /* ptr points to character following "%" */
884 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885 const char **nextTokPtr)
888 return -XML_TOK_PERCENT;
889 switch (BYTE_TYPE(enc, ptr)) {
890 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
893 return XML_TOK_PERCENT;
896 return XML_TOK_INVALID;
899 switch (BYTE_TYPE(enc, ptr)) {
900 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
902 *nextTokPtr = ptr + MINBPC(enc);
903 return XML_TOK_PARAM_ENTITY_REF;
906 return XML_TOK_INVALID;
909 return XML_TOK_PARTIAL;
913 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914 const char **nextTokPtr)
917 return XML_TOK_PARTIAL;
918 switch (BYTE_TYPE(enc, ptr)) {
919 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
922 return XML_TOK_INVALID;
925 switch (BYTE_TYPE(enc, ptr)) {
926 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927 case BT_CR: case BT_LF: case BT_S:
928 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
930 return XML_TOK_POUND_NAME;
933 return XML_TOK_INVALID;
936 return -XML_TOK_POUND_NAME;
940 PREFIX(scanLit)(int open, const ENCODING *enc,
941 const char *ptr, const char *end,
942 const char **nextTokPtr)
945 int t = BYTE_TYPE(enc, ptr);
947 INVALID_CASES(ptr, nextTokPtr)
954 return -XML_TOK_LITERAL;
956 switch (BYTE_TYPE(enc, ptr)) {
957 case BT_S: case BT_CR: case BT_LF:
958 case BT_GT: case BT_PERCNT: case BT_LSQB:
959 return XML_TOK_LITERAL;
961 return XML_TOK_INVALID;
968 return XML_TOK_PARTIAL;
972 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973 const char **nextTokPtr)
978 if (MINBPC(enc) > 1) {
979 size_t n = end - ptr;
980 if (n & (MINBPC(enc) - 1)) {
981 n &= ~(MINBPC(enc) - 1);
983 return XML_TOK_PARTIAL;
987 switch (BYTE_TYPE(enc, ptr)) {
989 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
991 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
996 return XML_TOK_PARTIAL;
997 switch (BYTE_TYPE(enc, ptr)) {
999 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1001 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1008 *nextTokPtr = ptr - MINBPC(enc);
1009 return XML_TOK_INSTANCE_START;
1012 return XML_TOK_INVALID;
1015 if (ptr + MINBPC(enc) == end) {
1017 /* indicate that this might be part of a CR/LF pair */
1018 return -XML_TOK_PROLOG_S;
1021 case BT_S: case BT_LF:
1026 switch (BYTE_TYPE(enc, ptr)) {
1027 case BT_S: case BT_LF:
1030 /* don't split CR/LF pair */
1031 if (ptr + MINBPC(enc) != end)
1036 return XML_TOK_PROLOG_S;
1040 return XML_TOK_PROLOG_S;
1042 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044 *nextTokPtr = ptr + MINBPC(enc);
1045 return XML_TOK_COMMA;
1047 *nextTokPtr = ptr + MINBPC(enc);
1048 return XML_TOK_OPEN_BRACKET;
1052 return -XML_TOK_CLOSE_BRACKET;
1053 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054 if (ptr + MINBPC(enc) == end)
1055 return XML_TOK_PARTIAL;
1056 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057 *nextTokPtr = ptr + 2*MINBPC(enc);
1058 return XML_TOK_COND_SECT_CLOSE;
1062 return XML_TOK_CLOSE_BRACKET;
1064 *nextTokPtr = ptr + MINBPC(enc);
1065 return XML_TOK_OPEN_PAREN;
1069 return -XML_TOK_CLOSE_PAREN;
1070 switch (BYTE_TYPE(enc, ptr)) {
1072 *nextTokPtr = ptr + MINBPC(enc);
1073 return XML_TOK_CLOSE_PAREN_ASTERISK;
1075 *nextTokPtr = ptr + MINBPC(enc);
1076 return XML_TOK_CLOSE_PAREN_QUESTION;
1078 *nextTokPtr = ptr + MINBPC(enc);
1079 return XML_TOK_CLOSE_PAREN_PLUS;
1080 case BT_CR: case BT_LF: case BT_S:
1081 case BT_GT: case BT_COMMA: case BT_VERBAR:
1084 return XML_TOK_CLOSE_PAREN;
1087 return XML_TOK_INVALID;
1089 *nextTokPtr = ptr + MINBPC(enc);
1092 *nextTokPtr = ptr + MINBPC(enc);
1093 return XML_TOK_DECL_CLOSE;
1095 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1096 #define LEAD_CASE(n) \
1097 case BT_LEAD ## n: \
1098 if (end - ptr < n) \
1099 return XML_TOK_PARTIAL_CHAR; \
1100 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1102 tok = XML_TOK_NAME; \
1105 if (IS_NAME_CHAR(enc, ptr, n)) { \
1107 tok = XML_TOK_NMTOKEN; \
1110 *nextTokPtr = ptr; \
1111 return XML_TOK_INVALID;
1112 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1125 tok = XML_TOK_NMTOKEN;
1129 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1134 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1136 tok = XML_TOK_NMTOKEN;
1142 return XML_TOK_INVALID;
1144 while (ptr != end) {
1145 switch (BYTE_TYPE(enc, ptr)) {
1146 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147 case BT_GT: case BT_RPAR: case BT_COMMA:
1148 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149 case BT_S: case BT_CR: case BT_LF:
1158 return XML_TOK_PARTIAL;
1159 tok = XML_TOK_PREFIXED_NAME;
1160 switch (BYTE_TYPE(enc, ptr)) {
1161 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1163 tok = XML_TOK_NMTOKEN;
1167 case XML_TOK_PREFIXED_NAME:
1168 tok = XML_TOK_NMTOKEN;
1174 if (tok == XML_TOK_NMTOKEN) {
1176 return XML_TOK_INVALID;
1178 *nextTokPtr = ptr + MINBPC(enc);
1179 return XML_TOK_NAME_PLUS;
1181 if (tok == XML_TOK_NMTOKEN) {
1183 return XML_TOK_INVALID;
1185 *nextTokPtr = ptr + MINBPC(enc);
1186 return XML_TOK_NAME_ASTERISK;
1188 if (tok == XML_TOK_NMTOKEN) {
1190 return XML_TOK_INVALID;
1192 *nextTokPtr = ptr + MINBPC(enc);
1193 return XML_TOK_NAME_QUESTION;
1196 return XML_TOK_INVALID;
1203 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204 const char *end, const char **nextTokPtr)
1208 return XML_TOK_NONE;
1210 while (ptr != end) {
1211 switch (BYTE_TYPE(enc, ptr)) {
1212 #define LEAD_CASE(n) \
1213 case BT_LEAD ## n: ptr += n; break;
1214 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1218 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1220 return XML_TOK_DATA_CHARS;
1222 /* this is for inside entity references */
1224 return XML_TOK_INVALID;
1227 *nextTokPtr = ptr + MINBPC(enc);
1228 return XML_TOK_DATA_NEWLINE;
1231 return XML_TOK_DATA_CHARS;
1236 return XML_TOK_TRAILING_CR;
1237 if (BYTE_TYPE(enc, ptr) == BT_LF)
1240 return XML_TOK_DATA_NEWLINE;
1243 return XML_TOK_DATA_CHARS;
1246 *nextTokPtr = ptr + MINBPC(enc);
1247 return XML_TOK_ATTRIBUTE_VALUE_S;
1250 return XML_TOK_DATA_CHARS;
1257 return XML_TOK_DATA_CHARS;
1261 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262 const char *end, const char **nextTokPtr)
1266 return XML_TOK_NONE;
1268 while (ptr != end) {
1269 switch (BYTE_TYPE(enc, ptr)) {
1270 #define LEAD_CASE(n) \
1271 case BT_LEAD ## n: ptr += n; break;
1272 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1276 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1278 return XML_TOK_DATA_CHARS;
1281 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1283 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1286 return XML_TOK_DATA_CHARS;
1289 *nextTokPtr = ptr + MINBPC(enc);
1290 return XML_TOK_DATA_NEWLINE;
1293 return XML_TOK_DATA_CHARS;
1298 return XML_TOK_TRAILING_CR;
1299 if (BYTE_TYPE(enc, ptr) == BT_LF)
1302 return XML_TOK_DATA_NEWLINE;
1305 return XML_TOK_DATA_CHARS;
1312 return XML_TOK_DATA_CHARS;
1318 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1319 const char *end, const char **nextTokPtr)
1322 if (MINBPC(enc) > 1) {
1323 size_t n = end - ptr;
1324 if (n & (MINBPC(enc) - 1)) {
1325 n &= ~(MINBPC(enc) - 1);
1329 while (ptr != end) {
1330 switch (BYTE_TYPE(enc, ptr)) {
1331 INVALID_CASES(ptr, nextTokPtr)
1333 if ((ptr += MINBPC(enc)) == end)
1334 return XML_TOK_PARTIAL;
1335 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336 if ((ptr += MINBPC(enc)) == end)
1337 return XML_TOK_PARTIAL;
1338 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1345 if ((ptr += MINBPC(enc)) == end)
1346 return XML_TOK_PARTIAL;
1347 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348 if ((ptr += MINBPC(enc)) == end)
1349 return XML_TOK_PARTIAL;
1350 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1354 return XML_TOK_IGNORE_SECT;
1365 return XML_TOK_PARTIAL;
1368 #endif /* XML_DTD */
1371 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372 const char **badPtr)
1376 for (; ptr != end; ptr += MINBPC(enc)) {
1377 switch (BYTE_TYPE(enc, ptr)) {
1401 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1408 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1411 switch (BYTE_TO_ASCII(enc, ptr)) {
1425 /* This must only be called for a well-formed start-tag or empty
1426 element tag. Returns the number of attributes. Pointers to the
1427 first attsMax attributes are stored in atts.
1431 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1432 int attsMax, ATTRIBUTE *atts)
1434 enum { other, inName, inValue } state = inName;
1436 int open = 0; /* defined when state == inValue;
1437 initialization just to shut up compilers */
1439 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1440 switch (BYTE_TYPE(enc, ptr)) {
1441 #define START_NAME \
1442 if (state == other) { \
1443 if (nAtts < attsMax) { \
1444 atts[nAtts].name = ptr; \
1445 atts[nAtts].normalized = 1; \
1449 #define LEAD_CASE(n) \
1450 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1460 if (state != inValue) {
1461 if (nAtts < attsMax)
1462 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1466 else if (open == BT_QUOT) {
1468 if (nAtts < attsMax)
1469 atts[nAtts].valueEnd = ptr;
1474 if (state != inValue) {
1475 if (nAtts < attsMax)
1476 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1480 else if (open == BT_APOS) {
1482 if (nAtts < attsMax)
1483 atts[nAtts].valueEnd = ptr;
1488 if (nAtts < attsMax)
1489 atts[nAtts].normalized = 0;
1492 if (state == inName)
1494 else if (state == inValue
1496 && atts[nAtts].normalized
1497 && (ptr == atts[nAtts].valuePtr
1498 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1499 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1500 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1501 atts[nAtts].normalized = 0;
1503 case BT_CR: case BT_LF:
1504 /* This case ensures that the first attribute name is counted
1505 Apart from that we could just change state on the quote. */
1506 if (state == inName)
1508 else if (state == inValue && nAtts < attsMax)
1509 atts[nAtts].normalized = 0;
1513 if (state != inValue)
1523 static int PTRFASTCALL
1524 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1528 ptr += 2*MINBPC(enc);
1529 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530 for (ptr += MINBPC(enc);
1531 !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532 ptr += MINBPC(enc)) {
1533 int c = BYTE_TO_ASCII(enc, ptr);
1535 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1536 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1538 result |= (c - ASCII_0);
1540 case ASCII_A: case ASCII_B: case ASCII_C:
1541 case ASCII_D: case ASCII_E: case ASCII_F:
1543 result += 10 + (c - ASCII_A);
1545 case ASCII_a: case ASCII_b: case ASCII_c:
1546 case ASCII_d: case ASCII_e: case ASCII_f:
1548 result += 10 + (c - ASCII_a);
1551 if (result >= 0x110000)
1556 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1557 int c = BYTE_TO_ASCII(enc, ptr);
1559 result += (c - ASCII_0);
1560 if (result >= 0x110000)
1564 return checkCharRefNumber(result);
1568 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1571 switch ((end - ptr)/MINBPC(enc)) {
1573 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574 switch (BYTE_TO_ASCII(enc, ptr)) {
1583 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1585 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1587 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1593 switch (BYTE_TO_ASCII(enc, ptr)) {
1596 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1598 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1600 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1607 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1609 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1611 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1622 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1625 switch (BYTE_TYPE(enc, ptr1)) {
1626 #define LEAD_CASE(n) \
1627 case BT_LEAD ## n: \
1628 if (*ptr1++ != *ptr2++) \
1630 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1633 if (*ptr1++ != *ptr2++)
1645 if (*ptr2++ != *ptr1++)
1647 if (MINBPC(enc) > 1) {
1648 if (*ptr2++ != *ptr1++)
1650 if (MINBPC(enc) > 2) {
1651 if (*ptr2++ != *ptr1++)
1653 if (MINBPC(enc) > 3) {
1654 if (*ptr2++ != *ptr1++)
1661 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1663 switch (BYTE_TYPE(enc, ptr2)) {
1686 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1687 const char *end1, const char *ptr2)
1689 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1692 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1695 return ptr1 == end1;
1698 static int PTRFASTCALL
1699 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1701 const char *start = ptr;
1703 switch (BYTE_TYPE(enc, ptr)) {
1704 #define LEAD_CASE(n) \
1705 case BT_LEAD ## n: ptr += n; break;
1706 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1720 return (int)(ptr - start);
1725 static const char * PTRFASTCALL
1726 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1729 switch (BYTE_TYPE(enc, ptr)) {
1742 PREFIX(updatePosition)(const ENCODING *enc,
1747 while (ptr != end) {
1748 switch (BYTE_TYPE(enc, ptr)) {
1749 #define LEAD_CASE(n) \
1750 case BT_LEAD ## n: \
1753 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1756 pos->columnNumber = (XML_Size)-1;
1763 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1765 pos->columnNumber = (XML_Size)-1;
1771 pos->columnNumber++;
1776 #undef MULTIBYTE_CASES
1777 #undef INVALID_CASES
1778 #undef CHECK_NAME_CASE
1779 #undef CHECK_NAME_CASES
1780 #undef CHECK_NMSTRT_CASE
1781 #undef CHECK_NMSTRT_CASES
1783 #endif /* XML_TOK_IMPL_C */