2 * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
3 * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
5 * Redistribution and modifications are permitted subject to BSD license.
7 #include <asn_system.h>
8 #include <xer_support.h>
17 ST_TAG_UNQUOTED_STRING
,
18 ST_COMMENT_WAIT_DASH1
, /* "<!--"[1] */
19 ST_COMMENT_WAIT_DASH2
, /* "<!--"[2] */
21 ST_COMMENT_CLO_DASH2
, /* "-->"[0] */
22 ST_COMMENT_CLO_RT
/* "-->"[1] */
25 static pxml_chunk_type_e final_chunk_type
[] = {
36 0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
37 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
38 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
39 2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */
40 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */
41 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */
42 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */
43 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */
45 #define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)
46 #define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)
47 #define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)
49 /* Aliases for characters, ASCII/UTF-8 */
50 #define EXCLAM 0x21 /* '!' */
51 #define CQUOTE 0x22 /* '"' */
52 #define CDASH 0x2d /* '-' */
53 #define CSLASH 0x2f /* '/' */
54 #define LANGLE 0x3c /* '<' */
55 #define CEQUAL 0x3d /* '=' */
56 #define RANGLE 0x3e /* '>' */
57 #define CQUEST 0x3f /* '?' */
59 /* Invoke token callback */
60 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \
63 ssize_t _sz = (p - chunk_start) + _current_too; \
69 _ret = cb(type, chunk_start, _sz, key); \
71 if(_current_too && _ret == -1) \
75 chunk_start = p + _current_too; \
79 #define TOKEN_CB(_type, _ns, _current_too) \
80 TOKEN_CB_CALL(_type, _ns, _current_too, 0)
82 #define TOKEN_CB_FINAL(_type, _ns, _current_too) \
83 TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
88 ssize_t
pxml_parse(int *stateContext
, const void *xmlbuf
, size_t size
, pxml_callback_f
*cb
, void *key
) {
89 pstate_e state
= (pstate_e
)*stateContext
;
90 const char *chunk_start
= (const char *)xmlbuf
;
91 const char *p
= chunk_start
;
92 const char *end
= p
+ size
;
95 int C
= *(const unsigned char *)p
;
99 * Initial state: we're in the middle of some text,
100 * or just have started.
103 /* We're now in the tag, probably */
104 TOKEN_CB(PXML_TEXT
, ST_TAG_START
, 0);
107 if (ALPHA(C
) || (C
== CSLASH
))
109 else if (C
== EXCLAM
)
110 state
= ST_COMMENT_WAIT_DASH1
;
113 * Not characters and not whitespace.
114 * Must be something like "3 < 4".
116 TOKEN_CB(PXML_TEXT
, ST_TEXT
, 1);/* Flush as data */
122 TOKEN_CB_FINAL(PXML_TAG
, ST_TEXT
, 1);
126 * The previous tag wasn't completed, but still
127 * recognized as valid. (Mozilla-compatible)
129 TOKEN_CB_FINAL(PXML_TAG
, ST_TAG_START
, 0);
132 state
= ST_TAG_QUOTE_WAIT
;
136 case ST_TAG_QUOTE_WAIT
:
138 * State after the equal sign ("=") in the tag.
142 state
= ST_TAG_QUOTED_STRING
;
146 TOKEN_CB_FINAL(PXML_TAG
, ST_TEXT
, 1);
150 /* Unquoted string value */
151 state
= ST_TAG_UNQUOTED_STRING
;
154 case ST_TAG_QUOTED_STRING
:
156 * Tag attribute's string value in quotes.
159 /* Return back to the tag state */
163 case ST_TAG_UNQUOTED_STRING
:
166 TOKEN_CB_FINAL(PXML_TAG
, ST_TEXT
, 1);
167 } else if(WHITESPACE(C
)) {
168 /* Return back to the tag state */
172 case ST_COMMENT_WAIT_DASH1
:
174 state
= ST_COMMENT_WAIT_DASH2
;
176 /* Some ordinary tag. */
180 case ST_COMMENT_WAIT_DASH2
:
185 /* Some ordinary tag */
191 state
= ST_COMMENT_CLO_DASH2
;
194 case ST_COMMENT_CLO_DASH2
:
196 state
= ST_COMMENT_CLO_RT
;
198 /* This is not an end of a comment */
202 case ST_COMMENT_CLO_RT
:
204 TOKEN_CB_FINAL(PXML_COMMENT
, ST_TEXT
, 1);
205 } else if(C
== CDASH
) {
206 /* Maintain current state, still waiting for '>' */
215 * Flush the partially processed chunk, state permitting.
217 if(p
- chunk_start
) {
220 TOKEN_CB(PXML_COMMENT
, state
, 0);
223 TOKEN_CB(PXML_TEXT
, state
, 0);
225 default: break; /* a no-op */
230 *stateContext
= (int)state
;
231 return chunk_start
- (const char *)xmlbuf
;