[gaim-migrate @ 3063]
[pidgin-git.git] / src / protocols / jabber / xmlparse.c
blob5bcca2387a0ff5791df0d6498922abc3458ae720
1 /*
2 The contents of this file are subject to the Mozilla Public License
3 Version 1.1 (the "License"); you may not use this file except in
4 compliance with the License. You may obtain a copy of the License at
5 http://www.mozilla.org/MPL/
7 Software distributed under the License is distributed on an "AS IS"
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9 License for the specific language governing rights and limitations
10 under the License.
12 The Original Code is expat.
14 The Initial Developer of the Original Code is James Clark.
15 Portions created by James Clark are Copyright (C) 1998, 1999
16 James Clark. All Rights Reserved.
18 Contributor(s):
20 Alternatively, the contents of this file may be used under the terms
21 of the GNU General Public License (the "GPL"), in which case the
22 provisions of the GPL are applicable instead of those above. If you
23 wish to allow use of your version of this file only under the terms of
24 the GPL and not to allow others to use your version of this file under
25 the MPL, indicate your decision by deleting the provisions above and
26 replace them with the notice and other provisions required by the
27 GPL. If you do not delete the provisions above, a recipient may use
28 your version of this file under either the MPL or the GPL.
31 #include "xmldef.h"
32 #include "xmlparse.h"
34 #ifdef XML_UNICODE
35 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36 #define XmlConvert XmlUtf16Convert
37 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
38 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
39 #define XmlEncode XmlUtf16Encode
40 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41 typedef unsigned short ICHAR;
42 #else
43 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44 #define XmlConvert XmlUtf8Convert
45 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
46 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
47 #define XmlEncode XmlUtf8Encode
48 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
49 typedef char ICHAR;
50 #endif
53 #ifndef XML_NS
55 #define XmlInitEncodingNS XmlInitEncoding
56 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
57 #undef XmlGetInternalEncodingNS
58 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
59 #define XmlParseXmlDeclNS XmlParseXmlDecl
61 #endif
64 #ifdef XML_UNICODE_WCHAR_T
65 #define XML_T(x) L ## x
66 #else
67 #define XML_T(x) x
68 #endif
70 /* Round up n to be a multiple of sz, where sz is a power of 2. */
71 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
73 #include "xmltok.h"
74 #include "xmlrole.h"
75 #include "hashtable.h"
77 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
78 #define INIT_DATA_BUF_SIZE 1024
79 #define INIT_ATTS_SIZE 16
80 #define INIT_BLOCK_SIZE 1024
81 #define INIT_BUFFER_SIZE 1024
83 #define EXPAND_SPARE 24
85 typedef struct binding {
86 struct prefix *prefix;
87 struct binding *nextTagBinding;
88 struct binding *prevPrefixBinding;
89 const struct attribute_id *attId;
90 XML_Char *uri;
91 int uriLen;
92 int uriAlloc;
93 } BINDING;
95 typedef struct prefix {
96 const XML_Char *name;
97 BINDING *binding;
98 } PREFIX;
100 typedef struct {
101 const XML_Char *str;
102 const XML_Char *localPart;
103 int uriLen;
104 } TAG_NAME;
106 typedef struct tag {
107 struct tag *parent;
108 const char *rawName;
109 int rawNameLength;
110 TAG_NAME name;
111 char *buf;
112 char *bufEnd;
113 BINDING *bindings;
114 } TAG;
116 typedef struct {
117 const XML_Char *name;
118 const XML_Char *textPtr;
119 int textLen;
120 const XML_Char *systemId;
121 const XML_Char *base;
122 const XML_Char *publicId;
123 const XML_Char *notation;
124 char open;
125 } ENTITY;
127 typedef struct block {
128 struct block *next;
129 int size;
130 XML_Char s[1];
131 } BLOCK;
133 typedef struct {
134 BLOCK *blocks;
135 BLOCK *freeBlocks;
136 const XML_Char *end;
137 XML_Char *ptr;
138 XML_Char *start;
139 } STRING_POOL;
141 /* The XML_Char before the name is used to determine whether
142 an attribute has been specified. */
143 typedef struct attribute_id {
144 XML_Char *name;
145 PREFIX *prefix;
146 char maybeTokenized;
147 char xmlns;
148 } ATTRIBUTE_ID;
150 typedef struct {
151 const ATTRIBUTE_ID *id;
152 char isCdata;
153 const XML_Char *value;
154 } DEFAULT_ATTRIBUTE;
156 typedef struct {
157 const XML_Char *name;
158 PREFIX *prefix;
159 int nDefaultAtts;
160 int allocDefaultAtts;
161 DEFAULT_ATTRIBUTE *defaultAtts;
162 } ELEMENT_TYPE;
164 typedef struct {
165 HASH_TABLE generalEntities;
166 HASH_TABLE elementTypes;
167 HASH_TABLE attributeIds;
168 HASH_TABLE prefixes;
169 STRING_POOL pool;
170 int complete;
171 int standalone;
172 const XML_Char *base;
173 PREFIX defaultPrefix;
174 } DTD;
176 typedef struct open_internal_entity {
177 const char *internalEventPtr;
178 const char *internalEventEndPtr;
179 struct open_internal_entity *next;
180 ENTITY *entity;
181 } OPEN_INTERNAL_ENTITY;
183 typedef enum XML_Error Processor(XML_Parser parser,
184 const char *start,
185 const char *end,
186 const char **endPtr);
188 static Processor prologProcessor;
189 static Processor prologInitProcessor;
190 static Processor contentProcessor;
191 static Processor cdataSectionProcessor;
192 static Processor epilogProcessor;
193 static Processor errorProcessor;
194 static Processor externalEntityInitProcessor;
195 static Processor externalEntityInitProcessor2;
196 static Processor externalEntityInitProcessor3;
197 static Processor externalEntityContentProcessor;
199 static enum XML_Error
200 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
201 static enum XML_Error
202 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
203 static enum XML_Error
204 initializeEncoding(XML_Parser parser);
205 static enum XML_Error
206 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
207 const char *start, const char *end, const char **endPtr);
208 static enum XML_Error
209 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
210 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
211 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
212 static
213 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
214 static int
215 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
216 static enum XML_Error
217 storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
218 STRING_POOL *);
219 static enum XML_Error
220 appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
221 STRING_POOL *);
222 static ATTRIBUTE_ID *
223 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
224 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
225 static enum XML_Error
226 storeEntityValue(XML_Parser parser, const char *start, const char *end);
227 static int
228 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
229 static int
230 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
231 static void
232 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
234 static const XML_Char *getContext(XML_Parser parser);
235 static int setContext(XML_Parser parser, const XML_Char *context);
236 static void normalizePublicId(XML_Char *s);
237 static int dtdInit(DTD *);
238 static void dtdDestroy(DTD *);
239 static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
240 static void poolInit(STRING_POOL *);
241 static void poolClear(STRING_POOL *);
242 static void poolDestroy(STRING_POOL *);
243 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
244 const char *ptr, const char *end);
245 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
246 const char *ptr, const char *end);
247 static int poolGrow(STRING_POOL *pool);
248 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
249 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
251 #define poolStart(pool) ((pool)->start)
252 #define poolEnd(pool) ((pool)->ptr)
253 #define poolLength(pool) ((pool)->ptr - (pool)->start)
254 #define poolChop(pool) ((void)--(pool->ptr))
255 #define poolLastChar(pool) (((pool)->ptr)[-1])
256 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
257 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
258 #define poolAppendChar(pool, c) \
259 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
260 ? 0 \
261 : ((*((pool)->ptr)++ = c), 1))
263 typedef struct {
264 /* The first member must be userData so that the XML_GetUserData macro works. */
265 void *m_userData;
266 void *m_handlerArg;
267 char *m_buffer;
268 /* first character to be parsed */
269 const char *m_bufferPtr;
270 /* past last character to be parsed */
271 char *m_bufferEnd;
272 /* allocated end of buffer */
273 const char *m_bufferLim;
274 long m_parseEndByteIndex;
275 const char *m_parseEndPtr;
276 XML_Char *m_dataBuf;
277 XML_Char *m_dataBufEnd;
278 XML_StartElementHandler m_startElementHandler;
279 XML_EndElementHandler m_endElementHandler;
280 XML_CharacterDataHandler m_characterDataHandler;
281 XML_ProcessingInstructionHandler m_processingInstructionHandler;
282 XML_CommentHandler m_commentHandler;
283 XML_StartCdataSectionHandler m_startCdataSectionHandler;
284 XML_EndCdataSectionHandler m_endCdataSectionHandler;
285 XML_DefaultHandler m_defaultHandler;
286 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
287 XML_NotationDeclHandler m_notationDeclHandler;
288 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
289 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
290 XML_NotStandaloneHandler m_notStandaloneHandler;
291 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
292 void *m_externalEntityRefHandlerArg;
293 XML_UnknownEncodingHandler m_unknownEncodingHandler;
294 const ENCODING *m_encoding;
295 INIT_ENCODING m_initEncoding;
296 const XML_Char *m_protocolEncodingName;
297 int m_ns;
298 void *m_unknownEncodingMem;
299 void *m_unknownEncodingData;
300 void *m_unknownEncodingHandlerData;
301 void (*m_unknownEncodingRelease)(void *);
302 PROLOG_STATE m_prologState;
303 Processor *m_processor;
304 enum XML_Error m_errorCode;
305 const char *m_eventPtr;
306 const char *m_eventEndPtr;
307 const char *m_positionPtr;
308 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
309 int m_defaultExpandInternalEntities;
310 int m_tagLevel;
311 ENTITY *m_declEntity;
312 const XML_Char *m_declNotationName;
313 const XML_Char *m_declNotationPublicId;
314 ELEMENT_TYPE *m_declElementType;
315 ATTRIBUTE_ID *m_declAttributeId;
316 char m_declAttributeIsCdata;
317 DTD m_dtd;
318 TAG *m_tagStack;
319 TAG *m_freeTagList;
320 BINDING *m_inheritedBindings;
321 BINDING *m_freeBindingList;
322 int m_attsSize;
323 int m_nSpecifiedAtts;
324 ATTRIBUTE *m_atts;
325 POSITION m_position;
326 STRING_POOL m_tempPool;
327 STRING_POOL m_temp2Pool;
328 char *m_groupConnector;
329 unsigned m_groupSize;
330 int m_hadExternalDoctype;
331 XML_Char m_namespaceSeparator;
332 } Parser;
334 #define userData (((Parser *)parser)->m_userData)
335 #define handlerArg (((Parser *)parser)->m_handlerArg)
336 #define startElementHandler (((Parser *)parser)->m_startElementHandler)
337 #define endElementHandler (((Parser *)parser)->m_endElementHandler)
338 #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
339 #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
340 #define commentHandler (((Parser *)parser)->m_commentHandler)
341 #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
342 #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
343 #define defaultHandler (((Parser *)parser)->m_defaultHandler)
344 #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
345 #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
346 #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
347 #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
348 #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
349 #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
350 #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
351 #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
352 #define encoding (((Parser *)parser)->m_encoding)
353 #define initEncoding (((Parser *)parser)->m_initEncoding)
354 #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
355 #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
356 #define unknownEncodingHandlerData \
357 (((Parser *)parser)->m_unknownEncodingHandlerData)
358 #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
359 #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
360 #define ns (((Parser *)parser)->m_ns)
361 #define prologState (((Parser *)parser)->m_prologState)
362 #define processor (((Parser *)parser)->m_processor)
363 #define errorCode (((Parser *)parser)->m_errorCode)
364 #define eventPtr (((Parser *)parser)->m_eventPtr)
365 #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
366 #define positionPtr (((Parser *)parser)->m_positionPtr)
367 #define position (((Parser *)parser)->m_position)
368 #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
369 #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
370 #define tagLevel (((Parser *)parser)->m_tagLevel)
371 #define buffer (((Parser *)parser)->m_buffer)
372 #define bufferPtr (((Parser *)parser)->m_bufferPtr)
373 #define bufferEnd (((Parser *)parser)->m_bufferEnd)
374 #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
375 #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
376 #define bufferLim (((Parser *)parser)->m_bufferLim)
377 #define dataBuf (((Parser *)parser)->m_dataBuf)
378 #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
379 #define dtd (((Parser *)parser)->m_dtd)
380 #define declEntity (((Parser *)parser)->m_declEntity)
381 #define declNotationName (((Parser *)parser)->m_declNotationName)
382 #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
383 #define declElementType (((Parser *)parser)->m_declElementType)
384 #define declAttributeId (((Parser *)parser)->m_declAttributeId)
385 #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
386 #define freeTagList (((Parser *)parser)->m_freeTagList)
387 #define freeBindingList (((Parser *)parser)->m_freeBindingList)
388 #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
389 #define tagStack (((Parser *)parser)->m_tagStack)
390 #define atts (((Parser *)parser)->m_atts)
391 #define attsSize (((Parser *)parser)->m_attsSize)
392 #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
393 #define tempPool (((Parser *)parser)->m_tempPool)
394 #define temp2Pool (((Parser *)parser)->m_temp2Pool)
395 #define groupConnector (((Parser *)parser)->m_groupConnector)
396 #define groupSize (((Parser *)parser)->m_groupSize)
397 #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
398 #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
400 #ifdef _MSC_VER
401 #ifdef _DEBUG
402 Parser *asParser(XML_Parser parser)
404 return parser;
406 #endif
407 #endif
409 XML_Parser XML_ParserCreate(const XML_Char *encodingName)
411 XML_Parser parser = malloc(sizeof(Parser));
412 if (!parser)
413 return parser;
414 processor = prologInitProcessor;
415 XmlPrologStateInit(&prologState);
416 userData = 0;
417 handlerArg = 0;
418 startElementHandler = 0;
419 endElementHandler = 0;
420 characterDataHandler = 0;
421 processingInstructionHandler = 0;
422 commentHandler = 0;
423 startCdataSectionHandler = 0;
424 endCdataSectionHandler = 0;
425 defaultHandler = 0;
426 unparsedEntityDeclHandler = 0;
427 notationDeclHandler = 0;
428 startNamespaceDeclHandler = 0;
429 endNamespaceDeclHandler = 0;
430 notStandaloneHandler = 0;
431 externalEntityRefHandler = 0;
432 externalEntityRefHandlerArg = parser;
433 unknownEncodingHandler = 0;
434 buffer = 0;
435 bufferPtr = 0;
436 bufferEnd = 0;
437 parseEndByteIndex = 0;
438 parseEndPtr = 0;
439 bufferLim = 0;
440 declElementType = 0;
441 declAttributeId = 0;
442 declEntity = 0;
443 declNotationName = 0;
444 declNotationPublicId = 0;
445 memset(&position, 0, sizeof(POSITION));
446 errorCode = XML_ERROR_NONE;
447 eventPtr = 0;
448 eventEndPtr = 0;
449 positionPtr = 0;
450 openInternalEntities = 0;
451 tagLevel = 0;
452 tagStack = 0;
453 freeTagList = 0;
454 freeBindingList = 0;
455 inheritedBindings = 0;
456 attsSize = INIT_ATTS_SIZE;
457 atts = malloc(attsSize * sizeof(ATTRIBUTE));
458 nSpecifiedAtts = 0;
459 dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
460 groupSize = 0;
461 groupConnector = 0;
462 hadExternalDoctype = 0;
463 unknownEncodingMem = 0;
464 unknownEncodingRelease = 0;
465 unknownEncodingData = 0;
466 unknownEncodingHandlerData = 0;
467 namespaceSeparator = '!';
468 ns = 0;
469 poolInit(&tempPool);
470 poolInit(&temp2Pool);
471 protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
472 if (!dtdInit(&dtd) || !atts || !dataBuf
473 || (encodingName && !protocolEncodingName)) {
474 XML_ParserFree(parser);
475 return 0;
477 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
478 XmlInitEncoding(&initEncoding, &encoding, 0);
479 return parser;
482 XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
484 static
485 const XML_Char implicitContext[] = {
486 XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
487 XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
488 XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
489 XML_T('.'), XML_T('w'), XML_T('3'),
490 XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
491 XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
492 XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
493 XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
494 XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
495 XML_T('\0')
498 XML_Parser parser = XML_ParserCreate(encodingName);
499 if (parser) {
500 XmlInitEncodingNS(&initEncoding, &encoding, 0);
501 ns = 1;
502 namespaceSeparator = nsSep;
504 if (!setContext(parser, implicitContext)) {
505 XML_ParserFree(parser);
506 return 0;
508 return parser;
511 int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
513 if (!encodingName)
514 protocolEncodingName = 0;
515 else {
516 protocolEncodingName = poolCopyString(&tempPool, encodingName);
517 if (!protocolEncodingName)
518 return 0;
520 return 1;
523 XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
524 const XML_Char *context,
525 const XML_Char *encodingName)
527 XML_Parser parser = oldParser;
528 DTD *oldDtd = &dtd;
529 XML_StartElementHandler oldStartElementHandler = startElementHandler;
530 XML_EndElementHandler oldEndElementHandler = endElementHandler;
531 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
532 XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
533 XML_CommentHandler oldCommentHandler = commentHandler;
534 XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
535 XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
536 XML_DefaultHandler oldDefaultHandler = defaultHandler;
537 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
538 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
539 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
540 XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
541 XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
542 void *oldUserData = userData;
543 void *oldHandlerArg = handlerArg;
544 int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
545 void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
547 parser = (ns
548 ? XML_ParserCreateNS(encodingName, namespaceSeparator)
549 : XML_ParserCreate(encodingName));
550 if (!parser)
551 return 0;
552 startElementHandler = oldStartElementHandler;
553 endElementHandler = oldEndElementHandler;
554 characterDataHandler = oldCharacterDataHandler;
555 processingInstructionHandler = oldProcessingInstructionHandler;
556 commentHandler = oldCommentHandler;
557 startCdataSectionHandler = oldStartCdataSectionHandler;
558 endCdataSectionHandler = oldEndCdataSectionHandler;
559 defaultHandler = oldDefaultHandler;
560 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
561 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
562 notStandaloneHandler = oldNotStandaloneHandler;
563 externalEntityRefHandler = oldExternalEntityRefHandler;
564 unknownEncodingHandler = oldUnknownEncodingHandler;
565 userData = oldUserData;
566 if (oldUserData == oldHandlerArg)
567 handlerArg = userData;
568 else
569 handlerArg = parser;
570 if (oldExternalEntityRefHandlerArg != oldParser)
571 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
572 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
573 if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
574 XML_ParserFree(parser);
575 return 0;
577 processor = externalEntityInitProcessor;
578 return parser;
581 static
582 void destroyBindings(BINDING *bindings)
584 for (;;) {
585 BINDING *b = bindings;
586 if (!b)
587 break;
588 bindings = b->nextTagBinding;
589 free(b->uri);
590 free(b);
594 void XML_ParserFree(XML_Parser parser)
596 for (;;) {
597 TAG *p;
598 if (tagStack == 0) {
599 if (freeTagList == 0)
600 break;
601 tagStack = freeTagList;
602 freeTagList = 0;
604 p = tagStack;
605 tagStack = tagStack->parent;
606 free(p->buf);
607 destroyBindings(p->bindings);
608 free(p);
610 destroyBindings(freeBindingList);
611 destroyBindings(inheritedBindings);
612 poolDestroy(&tempPool);
613 poolDestroy(&temp2Pool);
614 dtdDestroy(&dtd);
615 free((void *)atts);
616 free(groupConnector);
617 free(buffer);
618 free(dataBuf);
619 free(unknownEncodingMem);
620 if (unknownEncodingRelease)
621 unknownEncodingRelease(unknownEncodingData);
622 free(parser);
625 void XML_UseParserAsHandlerArg(XML_Parser parser)
627 handlerArg = parser;
630 void XML_SetUserData(XML_Parser parser, void *p)
632 if (handlerArg == userData)
633 handlerArg = userData = p;
634 else
635 userData = p;
638 int XML_SetBase(XML_Parser parser, const XML_Char *p)
640 if (p) {
641 p = poolCopyString(&dtd.pool, p);
642 if (!p)
643 return 0;
644 dtd.base = p;
646 else
647 dtd.base = 0;
648 return 1;
651 const XML_Char *XML_GetBase(XML_Parser parser)
653 return dtd.base;
656 int XML_GetSpecifiedAttributeCount(XML_Parser parser)
658 return nSpecifiedAtts;
661 void XML_SetElementHandler(XML_Parser parser,
662 XML_StartElementHandler start,
663 XML_EndElementHandler end)
665 startElementHandler = start;
666 endElementHandler = end;
669 void XML_SetCharacterDataHandler(XML_Parser parser,
670 XML_CharacterDataHandler handler)
672 characterDataHandler = handler;
675 void XML_SetProcessingInstructionHandler(XML_Parser parser,
676 XML_ProcessingInstructionHandler handler)
678 processingInstructionHandler = handler;
681 void XML_SetCommentHandler(XML_Parser parser,
682 XML_CommentHandler handler)
684 commentHandler = handler;
687 void XML_SetCdataSectionHandler(XML_Parser parser,
688 XML_StartCdataSectionHandler start,
689 XML_EndCdataSectionHandler end)
691 startCdataSectionHandler = start;
692 endCdataSectionHandler = end;
695 void XML_SetDefaultHandler(XML_Parser parser,
696 XML_DefaultHandler handler)
698 defaultHandler = handler;
699 defaultExpandInternalEntities = 0;
702 void XML_SetDefaultHandlerExpand(XML_Parser parser,
703 XML_DefaultHandler handler)
705 defaultHandler = handler;
706 defaultExpandInternalEntities = 1;
709 void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
710 XML_UnparsedEntityDeclHandler handler)
712 unparsedEntityDeclHandler = handler;
715 void XML_SetNotationDeclHandler(XML_Parser parser,
716 XML_NotationDeclHandler handler)
718 notationDeclHandler = handler;
721 void XML_SetNamespaceDeclHandler(XML_Parser parser,
722 XML_StartNamespaceDeclHandler start,
723 XML_EndNamespaceDeclHandler end)
725 startNamespaceDeclHandler = start;
726 endNamespaceDeclHandler = end;
729 void XML_SetNotStandaloneHandler(XML_Parser parser,
730 XML_NotStandaloneHandler handler)
732 notStandaloneHandler = handler;
735 void XML_SetExternalEntityRefHandler(XML_Parser parser,
736 XML_ExternalEntityRefHandler handler)
738 externalEntityRefHandler = handler;
741 void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
743 if (arg)
744 externalEntityRefHandlerArg = arg;
745 else
746 externalEntityRefHandlerArg = parser;
749 void XML_SetUnknownEncodingHandler(XML_Parser parser,
750 XML_UnknownEncodingHandler handler,
751 void *data)
753 unknownEncodingHandler = handler;
754 unknownEncodingHandlerData = data;
757 int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
759 if (len == 0) {
760 if (!isFinal)
761 return 1;
762 positionPtr = bufferPtr;
763 errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
764 if (errorCode == XML_ERROR_NONE)
765 return 1;
766 eventEndPtr = eventPtr;
767 return 0;
769 else if (bufferPtr == bufferEnd) {
770 const char *end;
771 int nLeftOver;
772 parseEndByteIndex += len;
773 positionPtr = s;
774 if (isFinal) {
775 errorCode = processor(parser, s, parseEndPtr = s + len, 0);
776 if (errorCode == XML_ERROR_NONE)
777 return 1;
778 eventEndPtr = eventPtr;
779 return 0;
781 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
782 if (errorCode != XML_ERROR_NONE) {
783 eventEndPtr = eventPtr;
784 return 0;
786 XmlUpdatePosition(encoding, positionPtr, end, &position);
787 nLeftOver = s + len - end;
788 if (nLeftOver) {
789 if (buffer == 0 || nLeftOver > bufferLim - buffer) {
790 /* FIXME avoid integer overflow */
791 buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
792 if (!buffer) {
793 errorCode = XML_ERROR_NO_MEMORY;
794 eventPtr = eventEndPtr = 0;
795 return 0;
797 bufferLim = buffer + len * 2;
799 memcpy(buffer, end, nLeftOver);
800 bufferPtr = buffer;
801 bufferEnd = buffer + nLeftOver;
803 return 1;
805 else {
806 memcpy(XML_GetBuffer(parser, len), s, len);
807 return XML_ParseBuffer(parser, len, isFinal);
811 int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
813 const char *start = bufferPtr;
814 positionPtr = start;
815 bufferEnd += len;
816 parseEndByteIndex += len;
817 errorCode = processor(parser, start, parseEndPtr = bufferEnd,
818 isFinal ? (const char **)0 : &bufferPtr);
819 if (errorCode == XML_ERROR_NONE) {
820 if (!isFinal)
821 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
822 return 1;
824 else {
825 eventEndPtr = eventPtr;
826 return 0;
830 void *XML_GetBuffer(XML_Parser parser, int len)
832 if (len > bufferLim - bufferEnd) {
833 /* FIXME avoid integer overflow */
834 int neededSize = len + (bufferEnd - bufferPtr);
835 if (neededSize <= bufferLim - buffer) {
836 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
837 bufferEnd = buffer + (bufferEnd - bufferPtr);
838 bufferPtr = buffer;
840 else {
841 char *newBuf;
842 int bufferSize = bufferLim - bufferPtr;
843 if (bufferSize == 0)
844 bufferSize = INIT_BUFFER_SIZE;
845 do {
846 bufferSize *= 2;
847 } while (bufferSize < neededSize);
848 newBuf = malloc(bufferSize);
849 if (newBuf == 0) {
850 errorCode = XML_ERROR_NO_MEMORY;
851 return 0;
853 bufferLim = newBuf + bufferSize;
854 if (bufferPtr) {
855 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
856 free(buffer);
858 bufferEnd = newBuf + (bufferEnd - bufferPtr);
859 bufferPtr = buffer = newBuf;
862 return bufferEnd;
865 enum XML_Error XML_GetErrorCode(XML_Parser parser)
867 return errorCode;
870 long XML_GetCurrentByteIndex(XML_Parser parser)
872 if (eventPtr)
873 return parseEndByteIndex - (parseEndPtr - eventPtr);
874 return -1;
877 int XML_GetCurrentByteCount(XML_Parser parser)
879 if (eventEndPtr && eventPtr)
880 return eventEndPtr - eventPtr;
881 return 0;
884 int XML_GetCurrentLineNumber(XML_Parser parser)
886 if (eventPtr) {
887 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
888 positionPtr = eventPtr;
890 return position.lineNumber + 1;
893 int XML_GetCurrentColumnNumber(XML_Parser parser)
895 if (eventPtr) {
896 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
897 positionPtr = eventPtr;
899 return position.columnNumber;
902 void XML_DefaultCurrent(XML_Parser parser)
904 if (defaultHandler) {
905 if (openInternalEntities)
906 reportDefault(parser,
907 ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(),
908 openInternalEntities->internalEventPtr,
909 openInternalEntities->internalEventEndPtr);
910 else
911 reportDefault(parser, encoding, eventPtr, eventEndPtr);
915 const XML_LChar *XML_ErrorString(int code)
917 static const XML_LChar *message[] = {
919 XML_T("out of memory"),
920 XML_T("syntax error"),
921 XML_T("no element found"),
922 XML_T("not well-formed"),
923 XML_T("unclosed token"),
924 XML_T("unclosed token"),
925 XML_T("mismatched tag"),
926 XML_T("duplicate attribute"),
927 XML_T("junk after document element"),
928 XML_T("illegal parameter entity reference"),
929 XML_T("undefined entity"),
930 XML_T("recursive entity reference"),
931 XML_T("asynchronous entity"),
932 XML_T("reference to invalid character number"),
933 XML_T("reference to binary entity"),
934 XML_T("reference to external entity in attribute"),
935 XML_T("xml processing instruction not at start of external entity"),
936 XML_T("unknown encoding"),
937 XML_T("encoding specified in XML declaration is incorrect"),
938 XML_T("unclosed CDATA section"),
939 XML_T("error in processing external entity reference"),
940 XML_T("document is not standalone")
942 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
943 return message[code];
944 return 0;
947 static
948 enum XML_Error contentProcessor(XML_Parser parser,
949 const char *start,
950 const char *end,
951 const char **endPtr)
953 return doContent(parser, 0, encoding, start, end, endPtr);
956 static
957 enum XML_Error externalEntityInitProcessor(XML_Parser parser,
958 const char *start,
959 const char *end,
960 const char **endPtr)
962 enum XML_Error result = initializeEncoding(parser);
963 if (result != XML_ERROR_NONE)
964 return result;
965 processor = externalEntityInitProcessor2;
966 return externalEntityInitProcessor2(parser, start, end, endPtr);
969 static
970 enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
971 const char *start,
972 const char *end,
973 const char **endPtr)
975 const char *next;
976 int tok = XmlContentTok(encoding, start, end, &next);
977 switch (tok) {
978 case XML_TOK_BOM:
979 start = next;
980 break;
981 case XML_TOK_PARTIAL:
982 if (endPtr) {
983 *endPtr = start;
984 return XML_ERROR_NONE;
986 eventPtr = start;
987 return XML_ERROR_UNCLOSED_TOKEN;
988 case XML_TOK_PARTIAL_CHAR:
989 if (endPtr) {
990 *endPtr = start;
991 return XML_ERROR_NONE;
993 eventPtr = start;
994 return XML_ERROR_PARTIAL_CHAR;
996 processor = externalEntityInitProcessor3;
997 return externalEntityInitProcessor3(parser, start, end, endPtr);
1000 static
1001 enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1002 const char *start,
1003 const char *end,
1004 const char **endPtr)
1006 const char *next;
1007 int tok = XmlContentTok(encoding, start, end, &next);
1008 switch (tok) {
1009 case XML_TOK_XML_DECL:
1011 enum XML_Error result = processXmlDecl(parser, 1, start, next);
1012 if (result != XML_ERROR_NONE)
1013 return result;
1014 start = next;
1016 break;
1017 case XML_TOK_PARTIAL:
1018 if (endPtr) {
1019 *endPtr = start;
1020 return XML_ERROR_NONE;
1022 eventPtr = start;
1023 return XML_ERROR_UNCLOSED_TOKEN;
1024 case XML_TOK_PARTIAL_CHAR:
1025 if (endPtr) {
1026 *endPtr = start;
1027 return XML_ERROR_NONE;
1029 eventPtr = start;
1030 return XML_ERROR_PARTIAL_CHAR;
1032 processor = externalEntityContentProcessor;
1033 tagLevel = 1;
1034 return doContent(parser, 1, encoding, start, end, endPtr);
1037 static
1038 enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1039 const char *start,
1040 const char *end,
1041 const char **endPtr)
1043 return doContent(parser, 1, encoding, start, end, endPtr);
1046 static enum XML_Error
1047 doContent(XML_Parser parser,
1048 int startTagLevel,
1049 const ENCODING *enc,
1050 const char *s,
1051 const char *end,
1052 const char **nextPtr)
1054 const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
1055 const char **eventPP;
1056 const char **eventEndPP;
1057 if (enc == encoding) {
1058 eventPP = &eventPtr;
1059 eventEndPP = &eventEndPtr;
1061 else {
1062 eventPP = &(openInternalEntities->internalEventPtr);
1063 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1065 *eventPP = s;
1066 for (;;) {
1067 const char *next = s; /* XmlContentTok doesn't always set the last arg */
1068 int tok = XmlContentTok(enc, s, end, &next);
1069 *eventEndPP = next;
1070 switch (tok) {
1071 case XML_TOK_TRAILING_CR:
1072 if (nextPtr) {
1073 *nextPtr = s;
1074 return XML_ERROR_NONE;
1076 *eventEndPP = end;
1077 if (characterDataHandler) {
1078 XML_Char c = 0xA;
1079 characterDataHandler(handlerArg, &c, 1);
1081 else if (defaultHandler)
1082 reportDefault(parser, enc, s, end);
1083 if (startTagLevel == 0)
1084 return XML_ERROR_NO_ELEMENTS;
1085 if (tagLevel != startTagLevel)
1086 return XML_ERROR_ASYNC_ENTITY;
1087 return XML_ERROR_NONE;
1088 case XML_TOK_NONE:
1089 if (nextPtr) {
1090 *nextPtr = s;
1091 return XML_ERROR_NONE;
1093 if (startTagLevel > 0) {
1094 if (tagLevel != startTagLevel)
1095 return XML_ERROR_ASYNC_ENTITY;
1096 return XML_ERROR_NONE;
1098 return XML_ERROR_NO_ELEMENTS;
1099 case XML_TOK_INVALID:
1100 *eventPP = next;
1101 return XML_ERROR_INVALID_TOKEN;
1102 case XML_TOK_PARTIAL:
1103 if (nextPtr) {
1104 *nextPtr = s;
1105 return XML_ERROR_NONE;
1107 return XML_ERROR_UNCLOSED_TOKEN;
1108 case XML_TOK_PARTIAL_CHAR:
1109 if (nextPtr) {
1110 *nextPtr = s;
1111 return XML_ERROR_NONE;
1113 return XML_ERROR_PARTIAL_CHAR;
1114 case XML_TOK_ENTITY_REF:
1116 const XML_Char *name;
1117 ENTITY *entity;
1118 XML_Char ch = XmlPredefinedEntityName(enc,
1119 s + enc->minBytesPerChar,
1120 next - enc->minBytesPerChar);
1121 if (ch) {
1122 if (characterDataHandler)
1123 characterDataHandler(handlerArg, &ch, 1);
1124 else if (defaultHandler)
1125 reportDefault(parser, enc, s, next);
1126 break;
1128 name = poolStoreString(&dtd.pool, enc,
1129 s + enc->minBytesPerChar,
1130 next - enc->minBytesPerChar);
1131 if (!name)
1132 return XML_ERROR_NO_MEMORY;
1133 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1134 poolDiscard(&dtd.pool);
1135 if (!entity) {
1136 if (dtd.complete || dtd.standalone)
1137 return XML_ERROR_UNDEFINED_ENTITY;
1138 if (defaultHandler)
1139 reportDefault(parser, enc, s, next);
1140 break;
1142 if (entity->open)
1143 return XML_ERROR_RECURSIVE_ENTITY_REF;
1144 if (entity->notation)
1145 return XML_ERROR_BINARY_ENTITY_REF;
1146 if (entity) {
1147 if (entity->textPtr) {
1148 enum XML_Error result;
1149 OPEN_INTERNAL_ENTITY openEntity;
1150 if (defaultHandler && !defaultExpandInternalEntities) {
1151 reportDefault(parser, enc, s, next);
1152 break;
1154 entity->open = 1;
1155 openEntity.next = openInternalEntities;
1156 openInternalEntities = &openEntity;
1157 openEntity.entity = entity;
1158 openEntity.internalEventPtr = 0;
1159 openEntity.internalEventEndPtr = 0;
1160 result = doContent(parser,
1161 tagLevel,
1162 internalEnc,
1163 (char *)entity->textPtr,
1164 (char *)(entity->textPtr + entity->textLen),
1166 entity->open = 0;
1167 openInternalEntities = openEntity.next;
1168 if (result)
1169 return result;
1171 else if (externalEntityRefHandler) {
1172 const XML_Char *context;
1173 entity->open = 1;
1174 context = getContext(parser);
1175 entity->open = 0;
1176 if (!context)
1177 return XML_ERROR_NO_MEMORY;
1178 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1179 context,
1180 dtd.base,
1181 entity->systemId,
1182 entity->publicId))
1183 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1184 poolDiscard(&tempPool);
1186 else if (defaultHandler)
1187 reportDefault(parser, enc, s, next);
1189 break;
1191 case XML_TOK_START_TAG_WITH_ATTS:
1192 if (!startElementHandler) {
1193 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1194 if (result)
1195 return result;
1197 /* fall through */
1198 case XML_TOK_START_TAG_NO_ATTS:
1200 TAG *tag;
1201 if (freeTagList) {
1202 tag = freeTagList;
1203 freeTagList = freeTagList->parent;
1205 else {
1206 tag = malloc(sizeof(TAG));
1207 if (!tag)
1208 return XML_ERROR_NO_MEMORY;
1209 tag->buf = malloc(INIT_TAG_BUF_SIZE);
1210 if (!tag->buf)
1211 return XML_ERROR_NO_MEMORY;
1212 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1214 tag->bindings = 0;
1215 tag->parent = tagStack;
1216 tagStack = tag;
1217 tag->name.localPart = 0;
1218 tag->rawName = s + enc->minBytesPerChar;
1219 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1220 if (nextPtr) {
1221 /* Need to guarantee that:
1222 tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1223 if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1224 int bufSize = tag->rawNameLength * 4;
1225 bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1226 tag->buf = realloc(tag->buf, bufSize);
1227 if (!tag->buf)
1228 return XML_ERROR_NO_MEMORY;
1229 tag->bufEnd = tag->buf + bufSize;
1231 memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1232 tag->rawName = tag->buf;
1234 ++tagLevel;
1235 if (startElementHandler) {
1236 enum XML_Error result;
1237 XML_Char *toPtr;
1238 for (;;) {
1239 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1240 const char *fromPtr = tag->rawName;
1241 int bufSize;
1242 if (nextPtr)
1243 toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1244 else
1245 toPtr = (XML_Char *)tag->buf;
1246 tag->name.str = toPtr;
1247 XmlConvert(enc,
1248 &fromPtr, rawNameEnd,
1249 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1250 if (fromPtr == rawNameEnd)
1251 break;
1252 bufSize = (tag->bufEnd - tag->buf) << 1;
1253 tag->buf = realloc(tag->buf, bufSize);
1254 if (!tag->buf)
1255 return XML_ERROR_NO_MEMORY;
1256 tag->bufEnd = tag->buf + bufSize;
1257 if (nextPtr)
1258 tag->rawName = tag->buf;
1260 *toPtr = XML_T('\0');
1261 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1262 if (result)
1263 return result;
1264 startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1265 poolClear(&tempPool);
1267 else {
1268 tag->name.str = 0;
1269 if (defaultHandler)
1270 reportDefault(parser, enc, s, next);
1272 break;
1274 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1275 if (!startElementHandler) {
1276 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1277 if (result)
1278 return result;
1280 /* fall through */
1281 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1282 if (startElementHandler || endElementHandler) {
1283 const char *rawName = s + enc->minBytesPerChar;
1284 enum XML_Error result;
1285 BINDING *bindings = 0;
1286 TAG_NAME name;
1287 name.str = poolStoreString(&tempPool, enc, rawName,
1288 rawName + XmlNameLength(enc, rawName));
1289 if (!name.str)
1290 return XML_ERROR_NO_MEMORY;
1291 poolFinish(&tempPool);
1292 result = storeAtts(parser, enc, s, &name, &bindings);
1293 if (result)
1294 return result;
1295 poolFinish(&tempPool);
1296 if (startElementHandler)
1297 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1298 if (endElementHandler) {
1299 if (startElementHandler)
1300 *eventPP = *eventEndPP;
1301 endElementHandler(handlerArg, name.str);
1303 poolClear(&tempPool);
1304 while (bindings) {
1305 BINDING *b = bindings;
1306 if (endNamespaceDeclHandler)
1307 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1308 bindings = bindings->nextTagBinding;
1309 b->nextTagBinding = freeBindingList;
1310 freeBindingList = b;
1311 b->prefix->binding = b->prevPrefixBinding;
1314 else if (defaultHandler)
1315 reportDefault(parser, enc, s, next);
1316 if (tagLevel == 0)
1317 return epilogProcessor(parser, next, end, nextPtr);
1318 break;
1319 case XML_TOK_END_TAG:
1320 if (tagLevel == startTagLevel)
1321 return XML_ERROR_ASYNC_ENTITY;
1322 else {
1323 int len;
1324 const char *rawName;
1325 TAG *tag = tagStack;
1326 tagStack = tag->parent;
1327 tag->parent = freeTagList;
1328 freeTagList = tag;
1329 rawName = s + enc->minBytesPerChar*2;
1330 len = XmlNameLength(enc, rawName);
1331 if (len != tag->rawNameLength
1332 || memcmp(tag->rawName, rawName, len) != 0) {
1333 *eventPP = rawName;
1334 return XML_ERROR_TAG_MISMATCH;
1336 --tagLevel;
1337 if (endElementHandler && tag->name.str) {
1338 if (tag->name.localPart) {
1339 XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1340 const XML_Char *from = tag->name.localPart;
1341 while ((*to++ = *from++) != 0)
1344 endElementHandler(handlerArg, tag->name.str);
1346 else if (defaultHandler)
1347 reportDefault(parser, enc, s, next);
1348 while (tag->bindings) {
1349 BINDING *b = tag->bindings;
1350 if (endNamespaceDeclHandler)
1351 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1352 tag->bindings = tag->bindings->nextTagBinding;
1353 b->nextTagBinding = freeBindingList;
1354 freeBindingList = b;
1355 b->prefix->binding = b->prevPrefixBinding;
1357 if (tagLevel == 0)
1358 return epilogProcessor(parser, next, end, nextPtr);
1360 break;
1361 case XML_TOK_CHAR_REF:
1363 int n = XmlCharRefNumber(enc, s);
1364 if (n < 0)
1365 return XML_ERROR_BAD_CHAR_REF;
1366 if (characterDataHandler) {
1367 XML_Char buf[XML_ENCODE_MAX];
1368 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1370 else if (defaultHandler)
1371 reportDefault(parser, enc, s, next);
1373 break;
1374 case XML_TOK_XML_DECL:
1375 return XML_ERROR_MISPLACED_XML_PI;
1376 case XML_TOK_DATA_NEWLINE:
1377 if (characterDataHandler) {
1378 XML_Char c = 0xA;
1379 characterDataHandler(handlerArg, &c, 1);
1381 else if (defaultHandler)
1382 reportDefault(parser, enc, s, next);
1383 break;
1384 case XML_TOK_CDATA_SECT_OPEN:
1386 enum XML_Error result;
1387 if (startCdataSectionHandler)
1388 startCdataSectionHandler(handlerArg);
1389 #if 0
1390 /* Suppose you doing a transformation on a document that involves
1391 changing only the character data. You set up a defaultHandler
1392 and a characterDataHandler. The defaultHandler simply copies
1393 characters through. The characterDataHandler does the transformation
1394 and writes the characters out escaping them as necessary. This case
1395 will fail to work if we leave out the following two lines (because &
1396 and < inside CDATA sections will be incorrectly escaped).
1398 However, now we have a start/endCdataSectionHandler, so it seems
1399 easier to let the user deal with this. */
1401 else if (characterDataHandler)
1402 characterDataHandler(handlerArg, dataBuf, 0);
1403 #endif
1404 else if (defaultHandler)
1405 reportDefault(parser, enc, s, next);
1406 result = doCdataSection(parser, enc, &next, end, nextPtr);
1407 if (!next) {
1408 processor = cdataSectionProcessor;
1409 return result;
1412 break;
1413 case XML_TOK_TRAILING_RSQB:
1414 if (nextPtr) {
1415 *nextPtr = s;
1416 return XML_ERROR_NONE;
1418 if (characterDataHandler) {
1419 if (MUST_CONVERT(enc, s)) {
1420 ICHAR *dataPtr = (ICHAR *)dataBuf;
1421 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1422 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1424 else
1425 characterDataHandler(handlerArg,
1426 (XML_Char *)s,
1427 (XML_Char *)end - (XML_Char *)s);
1429 else if (defaultHandler)
1430 reportDefault(parser, enc, s, end);
1431 if (startTagLevel == 0) {
1432 *eventPP = end;
1433 return XML_ERROR_NO_ELEMENTS;
1435 if (tagLevel != startTagLevel) {
1436 *eventPP = end;
1437 return XML_ERROR_ASYNC_ENTITY;
1439 return XML_ERROR_NONE;
1440 case XML_TOK_DATA_CHARS:
1441 if (characterDataHandler) {
1442 if (MUST_CONVERT(enc, s)) {
1443 for (;;) {
1444 ICHAR *dataPtr = (ICHAR *)dataBuf;
1445 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1446 *eventEndPP = s;
1447 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1448 if (s == next)
1449 break;
1450 *eventPP = s;
1453 else
1454 characterDataHandler(handlerArg,
1455 (XML_Char *)s,
1456 (XML_Char *)next - (XML_Char *)s);
1458 else if (defaultHandler)
1459 reportDefault(parser, enc, s, next);
1460 break;
1461 case XML_TOK_PI:
1462 if (!reportProcessingInstruction(parser, enc, s, next))
1463 return XML_ERROR_NO_MEMORY;
1464 break;
1465 case XML_TOK_COMMENT:
1466 if (!reportComment(parser, enc, s, next))
1467 return XML_ERROR_NO_MEMORY;
1468 break;
1469 default:
1470 if (defaultHandler)
1471 reportDefault(parser, enc, s, next);
1472 break;
1474 *eventPP = s = next;
1476 /* not reached */
1479 /* If tagNamePtr is non-null, build a real list of attributes,
1480 otherwise just check the attributes for well-formedness. */
1482 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1483 const char *s, TAG_NAME *tagNamePtr,
1484 BINDING **bindingsPtr)
1486 ELEMENT_TYPE *elementType = 0;
1487 int nDefaultAtts = 0;
1488 const XML_Char **appAtts;
1489 int attIndex = 0;
1490 int i;
1491 int n;
1492 int nPrefixes = 0;
1493 BINDING *binding;
1494 const XML_Char *localPart;
1496 if (tagNamePtr) {
1497 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1498 if (!elementType) {
1499 tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1500 if (!tagNamePtr->str)
1501 return XML_ERROR_NO_MEMORY;
1502 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1503 if (!elementType)
1504 return XML_ERROR_NO_MEMORY;
1505 if (ns && !setElementTypePrefix(parser, elementType))
1506 return XML_ERROR_NO_MEMORY;
1508 nDefaultAtts = elementType->nDefaultAtts;
1510 n = XmlGetAttributes(enc, s, attsSize, atts);
1511 if (n + nDefaultAtts > attsSize) {
1512 int oldAttsSize = attsSize;
1513 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1514 atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1515 if (!atts)
1516 return XML_ERROR_NO_MEMORY;
1517 if (n > oldAttsSize)
1518 XmlGetAttributes(enc, s, n, atts);
1520 appAtts = (const XML_Char **)atts;
1521 for (i = 0; i < n; i++) {
1522 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1523 atts[i].name
1524 + XmlNameLength(enc, atts[i].name));
1525 if (!attId)
1526 return XML_ERROR_NO_MEMORY;
1527 if ((attId->name)[-1]) {
1528 if (enc == encoding)
1529 eventPtr = atts[i].name;
1530 return XML_ERROR_DUPLICATE_ATTRIBUTE;
1532 (attId->name)[-1] = 1;
1533 appAtts[attIndex++] = attId->name;
1534 if (!atts[i].normalized) {
1535 enum XML_Error result;
1536 int isCdata = 1;
1538 if (attId->maybeTokenized) {
1539 int j;
1540 for (j = 0; j < nDefaultAtts; j++) {
1541 if (attId == elementType->defaultAtts[j].id) {
1542 isCdata = elementType->defaultAtts[j].isCdata;
1543 break;
1548 result = storeAttributeValue(parser, enc, isCdata,
1549 atts[i].valuePtr, atts[i].valueEnd,
1550 &tempPool);
1551 if (result)
1552 return result;
1553 if (tagNamePtr) {
1554 appAtts[attIndex] = poolStart(&tempPool);
1555 poolFinish(&tempPool);
1557 else
1558 poolDiscard(&tempPool);
1560 else if (tagNamePtr) {
1561 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1562 if (appAtts[attIndex] == 0)
1563 return XML_ERROR_NO_MEMORY;
1564 poolFinish(&tempPool);
1566 if (attId->prefix && tagNamePtr) {
1567 if (attId->xmlns) {
1568 if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1569 return XML_ERROR_NO_MEMORY;
1570 --attIndex;
1572 else {
1573 attIndex++;
1574 nPrefixes++;
1575 (attId->name)[-1] = 2;
1578 else
1579 attIndex++;
1581 nSpecifiedAtts = attIndex;
1582 if (tagNamePtr) {
1583 int j;
1584 for (j = 0; j < nDefaultAtts; j++) {
1585 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1586 if (!(da->id->name)[-1] && da->value) {
1587 if (da->id->prefix) {
1588 if (da->id->xmlns) {
1589 if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1590 return XML_ERROR_NO_MEMORY;
1592 else {
1593 (da->id->name)[-1] = 2;
1594 nPrefixes++;
1595 appAtts[attIndex++] = da->id->name;
1596 appAtts[attIndex++] = da->value;
1599 else {
1600 (da->id->name)[-1] = 1;
1601 appAtts[attIndex++] = da->id->name;
1602 appAtts[attIndex++] = da->value;
1606 appAtts[attIndex] = 0;
1608 i = 0;
1609 if (nPrefixes) {
1610 for (; i < attIndex; i += 2) {
1611 if (appAtts[i][-1] == 2) {
1612 ATTRIBUTE_ID *id;
1613 ((XML_Char *)(appAtts[i]))[-1] = 0;
1614 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1615 if (id->prefix->binding) {
1616 int j;
1617 const BINDING *b = id->prefix->binding;
1618 const XML_Char *s = appAtts[i];
1619 for (j = 0; j < b->uriLen; j++) {
1620 if (!poolAppendChar(&tempPool, b->uri[j]))
1621 return XML_ERROR_NO_MEMORY;
1623 while (*s++ != ':')
1625 do {
1626 if (!poolAppendChar(&tempPool, *s))
1627 return XML_ERROR_NO_MEMORY;
1628 } while (*s++);
1629 appAtts[i] = poolStart(&tempPool);
1630 poolFinish(&tempPool);
1632 if (!--nPrefixes)
1633 break;
1635 else
1636 ((XML_Char *)(appAtts[i]))[-1] = 0;
1639 for (; i < attIndex; i += 2)
1640 ((XML_Char *)(appAtts[i]))[-1] = 0;
1641 if (!tagNamePtr)
1642 return XML_ERROR_NONE;
1643 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1644 binding->attId->name[-1] = 0;
1645 if (elementType->prefix) {
1646 binding = elementType->prefix->binding;
1647 if (!binding)
1648 return XML_ERROR_NONE;
1649 localPart = tagNamePtr->str;
1650 while (*localPart++ != XML_T(':'))
1653 else if (dtd.defaultPrefix.binding) {
1654 binding = dtd.defaultPrefix.binding;
1655 localPart = tagNamePtr->str;
1657 else
1658 return XML_ERROR_NONE;
1659 tagNamePtr->localPart = localPart;
1660 tagNamePtr->uriLen = binding->uriLen;
1661 i = binding->uriLen;
1662 do {
1663 if (i == binding->uriAlloc) {
1664 binding->uri = realloc(binding->uri, binding->uriAlloc *= 2);
1665 if (!binding->uri)
1666 return XML_ERROR_NO_MEMORY;
1668 binding->uri[i++] = *localPart;
1669 } while (*localPart++);
1670 tagNamePtr->str = binding->uri;
1671 return XML_ERROR_NONE;
1674 static
1675 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1677 BINDING *b;
1678 int len;
1679 for (len = 0; uri[len]; len++)
1681 if (namespaceSeparator)
1682 len++;
1683 if (freeBindingList) {
1684 b = freeBindingList;
1685 if (len > b->uriAlloc) {
1686 b->uri = realloc(b->uri, len + EXPAND_SPARE);
1687 if (!b->uri)
1688 return 0;
1689 b->uriAlloc = len + EXPAND_SPARE;
1691 freeBindingList = b->nextTagBinding;
1693 else {
1694 b = malloc(sizeof(BINDING));
1695 if (!b)
1696 return 0;
1697 b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE);
1698 if (!b->uri) {
1699 free(b);
1700 return 0;
1702 b->uriAlloc = len;
1704 b->uriLen = len;
1705 memcpy(b->uri, uri, len * sizeof(XML_Char));
1706 if (namespaceSeparator)
1707 b->uri[len - 1] = namespaceSeparator;
1708 b->prefix = prefix;
1709 b->attId = attId;
1710 b->prevPrefixBinding = prefix->binding;
1711 if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1712 prefix->binding = 0;
1713 else
1714 prefix->binding = b;
1715 b->nextTagBinding = *bindingsPtr;
1716 *bindingsPtr = b;
1717 if (startNamespaceDeclHandler)
1718 startNamespaceDeclHandler(handlerArg, prefix->name,
1719 prefix->binding ? uri : 0);
1720 return 1;
1723 /* The idea here is to avoid using stack for each CDATA section when
1724 the whole file is parsed with one call. */
1726 static
1727 enum XML_Error cdataSectionProcessor(XML_Parser parser,
1728 const char *start,
1729 const char *end,
1730 const char **endPtr)
1732 enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1733 if (start) {
1734 processor = contentProcessor;
1735 return contentProcessor(parser, start, end, endPtr);
1737 return result;
1740 /* startPtr gets set to non-null is the section is closed, and to null if
1741 the section is not yet closed. */
1743 static
1744 enum XML_Error doCdataSection(XML_Parser parser,
1745 const ENCODING *enc,
1746 const char **startPtr,
1747 const char *end,
1748 const char **nextPtr)
1750 const char *s = *startPtr;
1751 const char **eventPP;
1752 const char **eventEndPP;
1753 if (enc == encoding) {
1754 eventPP = &eventPtr;
1755 *eventPP = s;
1756 eventEndPP = &eventEndPtr;
1758 else {
1759 eventPP = &(openInternalEntities->internalEventPtr);
1760 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1762 *eventPP = s;
1763 *startPtr = 0;
1764 for (;;) {
1765 const char *next;
1766 int tok = XmlCdataSectionTok(enc, s, end, &next);
1767 *eventEndPP = next;
1768 switch (tok) {
1769 case XML_TOK_CDATA_SECT_CLOSE:
1770 if (endCdataSectionHandler)
1771 endCdataSectionHandler(handlerArg);
1772 #if 0
1773 /* see comment under XML_TOK_CDATA_SECT_OPEN */
1774 else if (characterDataHandler)
1775 characterDataHandler(handlerArg, dataBuf, 0);
1776 #endif
1777 else if (defaultHandler)
1778 reportDefault(parser, enc, s, next);
1779 *startPtr = next;
1780 return XML_ERROR_NONE;
1781 case XML_TOK_DATA_NEWLINE:
1782 if (characterDataHandler) {
1783 XML_Char c = 0xA;
1784 characterDataHandler(handlerArg, &c, 1);
1786 else if (defaultHandler)
1787 reportDefault(parser, enc, s, next);
1788 break;
1789 case XML_TOK_DATA_CHARS:
1790 if (characterDataHandler) {
1791 if (MUST_CONVERT(enc, s)) {
1792 for (;;) {
1793 ICHAR *dataPtr = (ICHAR *)dataBuf;
1794 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1795 *eventEndPP = next;
1796 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1797 if (s == next)
1798 break;
1799 *eventPP = s;
1802 else
1803 characterDataHandler(handlerArg,
1804 (XML_Char *)s,
1805 (XML_Char *)next - (XML_Char *)s);
1807 else if (defaultHandler)
1808 reportDefault(parser, enc, s, next);
1809 break;
1810 case XML_TOK_INVALID:
1811 *eventPP = next;
1812 return XML_ERROR_INVALID_TOKEN;
1813 case XML_TOK_PARTIAL_CHAR:
1814 if (nextPtr) {
1815 *nextPtr = s;
1816 return XML_ERROR_NONE;
1818 return XML_ERROR_PARTIAL_CHAR;
1819 case XML_TOK_PARTIAL:
1820 case XML_TOK_NONE:
1821 if (nextPtr) {
1822 *nextPtr = s;
1823 return XML_ERROR_NONE;
1825 return XML_ERROR_UNCLOSED_CDATA_SECTION;
1826 default:
1827 abort();
1829 *eventPP = s = next;
1831 /* not reached */
1834 static enum XML_Error
1835 initializeEncoding(XML_Parser parser)
1837 const char *s;
1838 #ifdef XML_UNICODE
1839 char encodingBuf[128];
1840 if (!protocolEncodingName)
1841 s = 0;
1842 else {
1843 int i;
1844 for (i = 0; protocolEncodingName[i]; i++) {
1845 if (i == sizeof(encodingBuf) - 1
1846 || protocolEncodingName[i] >= 0x80
1847 || protocolEncodingName[i] < 0) {
1848 encodingBuf[0] = '\0';
1849 break;
1851 encodingBuf[i] = (char)protocolEncodingName[i];
1853 encodingBuf[i] = '\0';
1854 s = encodingBuf;
1856 #else
1857 s = protocolEncodingName;
1858 #endif
1859 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1860 return XML_ERROR_NONE;
1861 return handleUnknownEncoding(parser, protocolEncodingName);
1864 static enum XML_Error
1865 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
1866 const char *s, const char *next)
1868 const char *encodingName = 0;
1869 const ENCODING *newEncoding = 0;
1870 const char *version;
1871 int standalone = -1;
1872 if (!(ns
1873 ? XmlParseXmlDeclNS
1874 : XmlParseXmlDecl)(isGeneralTextEntity,
1875 encoding,
1877 next,
1878 &eventPtr,
1879 &version,
1880 &encodingName,
1881 &newEncoding,
1882 &standalone))
1883 return XML_ERROR_SYNTAX;
1884 if (!isGeneralTextEntity && standalone == 1)
1885 dtd.standalone = 1;
1886 if (defaultHandler)
1887 reportDefault(parser, encoding, s, next);
1888 if (!protocolEncodingName) {
1889 if (newEncoding) {
1890 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
1891 eventPtr = encodingName;
1892 return XML_ERROR_INCORRECT_ENCODING;
1894 encoding = newEncoding;
1896 else if (encodingName) {
1897 enum XML_Error result;
1898 const XML_Char *s = poolStoreString(&tempPool,
1899 encoding,
1900 encodingName,
1901 encodingName
1902 + XmlNameLength(encoding, encodingName));
1903 if (!s)
1904 return XML_ERROR_NO_MEMORY;
1905 result = handleUnknownEncoding(parser, s);
1906 poolDiscard(&tempPool);
1907 if (result == XML_ERROR_UNKNOWN_ENCODING)
1908 eventPtr = encodingName;
1909 return result;
1912 return XML_ERROR_NONE;
1915 static enum XML_Error
1916 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
1918 if (unknownEncodingHandler) {
1919 XML_Encoding info;
1920 int i;
1921 for (i = 0; i < 256; i++)
1922 info.map[i] = -1;
1923 info.convert = 0;
1924 info.data = 0;
1925 info.release = 0;
1926 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
1927 ENCODING *enc;
1928 unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
1929 if (!unknownEncodingMem) {
1930 if (info.release)
1931 info.release(info.data);
1932 return XML_ERROR_NO_MEMORY;
1934 enc = (ns
1935 ? XmlInitUnknownEncodingNS
1936 : XmlInitUnknownEncoding)(unknownEncodingMem,
1937 info.map,
1938 info.convert,
1939 info.data);
1940 if (enc) {
1941 unknownEncodingData = info.data;
1942 unknownEncodingRelease = info.release;
1943 encoding = enc;
1944 return XML_ERROR_NONE;
1947 if (info.release)
1948 info.release(info.data);
1950 return XML_ERROR_UNKNOWN_ENCODING;
1953 static enum XML_Error
1954 prologInitProcessor(XML_Parser parser,
1955 const char *s,
1956 const char *end,
1957 const char **nextPtr)
1959 enum XML_Error result = initializeEncoding(parser);
1960 if (result != XML_ERROR_NONE)
1961 return result;
1962 processor = prologProcessor;
1963 return prologProcessor(parser, s, end, nextPtr);
1966 static enum XML_Error
1967 prologProcessor(XML_Parser parser,
1968 const char *s,
1969 const char *end,
1970 const char **nextPtr)
1972 for (;;) {
1973 const char *next;
1974 int tok = XmlPrologTok(encoding, s, end, &next);
1975 if (tok <= 0) {
1976 if (nextPtr != 0 && tok != XML_TOK_INVALID) {
1977 *nextPtr = s;
1978 return XML_ERROR_NONE;
1980 switch (tok) {
1981 case XML_TOK_INVALID:
1982 eventPtr = next;
1983 return XML_ERROR_INVALID_TOKEN;
1984 case XML_TOK_NONE:
1985 return XML_ERROR_NO_ELEMENTS;
1986 case XML_TOK_PARTIAL:
1987 return XML_ERROR_UNCLOSED_TOKEN;
1988 case XML_TOK_PARTIAL_CHAR:
1989 return XML_ERROR_PARTIAL_CHAR;
1990 case XML_TOK_TRAILING_CR:
1991 eventPtr = s + encoding->minBytesPerChar;
1992 return XML_ERROR_NO_ELEMENTS;
1993 default:
1994 abort();
1997 switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
1998 case XML_ROLE_XML_DECL:
2000 enum XML_Error result = processXmlDecl(parser, 0, s, next);
2001 if (result != XML_ERROR_NONE)
2002 return result;
2004 break;
2005 case XML_ROLE_DOCTYPE_SYSTEM_ID:
2006 if (!dtd.standalone
2007 && notStandaloneHandler
2008 && !notStandaloneHandler(handlerArg))
2009 return XML_ERROR_NOT_STANDALONE;
2010 hadExternalDoctype = 1;
2011 break;
2012 case XML_ROLE_DOCTYPE_PUBLIC_ID:
2013 case XML_ROLE_ENTITY_PUBLIC_ID:
2014 if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2015 return XML_ERROR_SYNTAX;
2016 if (declEntity) {
2017 XML_Char *tem = poolStoreString(&dtd.pool,
2018 encoding,
2019 s + encoding->minBytesPerChar,
2020 next - encoding->minBytesPerChar);
2021 if (!tem)
2022 return XML_ERROR_NO_MEMORY;
2023 normalizePublicId(tem);
2024 declEntity->publicId = tem;
2025 poolFinish(&dtd.pool);
2027 break;
2028 case XML_ROLE_INSTANCE_START:
2029 processor = contentProcessor;
2030 if (hadExternalDoctype)
2031 dtd.complete = 0;
2032 return contentProcessor(parser, s, end, nextPtr);
2033 case XML_ROLE_ATTLIST_ELEMENT_NAME:
2035 const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
2036 if (!name)
2037 return XML_ERROR_NO_MEMORY;
2038 declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2039 if (!declElementType)
2040 return XML_ERROR_NO_MEMORY;
2041 if (declElementType->name != name)
2042 poolDiscard(&dtd.pool);
2043 else {
2044 poolFinish(&dtd.pool);
2045 if (!setElementTypePrefix(parser, declElementType))
2046 return XML_ERROR_NO_MEMORY;
2048 break;
2050 case XML_ROLE_ATTRIBUTE_NAME:
2051 declAttributeId = getAttributeId(parser, encoding, s, next);
2052 if (!declAttributeId)
2053 return XML_ERROR_NO_MEMORY;
2054 declAttributeIsCdata = 0;
2055 break;
2056 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2057 declAttributeIsCdata = 1;
2058 break;
2059 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2060 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2061 if (dtd.complete
2062 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2063 return XML_ERROR_NO_MEMORY;
2064 break;
2065 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2066 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2068 const XML_Char *attVal;
2069 enum XML_Error result
2070 = storeAttributeValue(parser, encoding, declAttributeIsCdata,
2071 s + encoding->minBytesPerChar,
2072 next - encoding->minBytesPerChar,
2073 &dtd.pool);
2074 if (result)
2075 return result;
2076 attVal = poolStart(&dtd.pool);
2077 poolFinish(&dtd.pool);
2078 if (dtd.complete
2079 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2080 return XML_ERROR_NO_MEMORY;
2081 break;
2083 case XML_ROLE_ENTITY_VALUE:
2085 enum XML_Error result = storeEntityValue(parser, s, next);
2086 if (result != XML_ERROR_NONE)
2087 return result;
2089 break;
2090 case XML_ROLE_ENTITY_SYSTEM_ID:
2091 if (declEntity) {
2092 declEntity->systemId = poolStoreString(&dtd.pool, encoding,
2093 s + encoding->minBytesPerChar,
2094 next - encoding->minBytesPerChar);
2095 if (!declEntity->systemId)
2096 return XML_ERROR_NO_MEMORY;
2097 declEntity->base = dtd.base;
2098 poolFinish(&dtd.pool);
2100 break;
2101 case XML_ROLE_ENTITY_NOTATION_NAME:
2102 if (declEntity) {
2103 declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
2104 if (!declEntity->notation)
2105 return XML_ERROR_NO_MEMORY;
2106 poolFinish(&dtd.pool);
2107 if (unparsedEntityDeclHandler) {
2108 eventPtr = eventEndPtr = s;
2109 unparsedEntityDeclHandler(handlerArg,
2110 declEntity->name,
2111 declEntity->base,
2112 declEntity->systemId,
2113 declEntity->publicId,
2114 declEntity->notation);
2118 break;
2119 case XML_ROLE_GENERAL_ENTITY_NAME:
2121 const XML_Char *name;
2122 if (XmlPredefinedEntityName(encoding, s, next)) {
2123 declEntity = 0;
2124 break;
2126 name = poolStoreString(&dtd.pool, encoding, s, next);
2127 if (!name)
2128 return XML_ERROR_NO_MEMORY;
2129 if (dtd.complete) {
2130 declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2131 if (!declEntity)
2132 return XML_ERROR_NO_MEMORY;
2133 if (declEntity->name != name) {
2134 poolDiscard(&dtd.pool);
2135 declEntity = 0;
2137 else
2138 poolFinish(&dtd.pool);
2140 else {
2141 poolDiscard(&dtd.pool);
2142 declEntity = 0;
2145 break;
2146 case XML_ROLE_PARAM_ENTITY_NAME:
2147 declEntity = 0;
2148 break;
2149 case XML_ROLE_NOTATION_NAME:
2150 declNotationPublicId = 0;
2151 declNotationName = 0;
2152 if (notationDeclHandler) {
2153 declNotationName = poolStoreString(&tempPool, encoding, s, next);
2154 if (!declNotationName)
2155 return XML_ERROR_NO_MEMORY;
2156 poolFinish(&tempPool);
2158 break;
2159 case XML_ROLE_NOTATION_PUBLIC_ID:
2160 if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2161 return XML_ERROR_SYNTAX;
2162 if (declNotationName) {
2163 XML_Char *tem = poolStoreString(&tempPool,
2164 encoding,
2165 s + encoding->minBytesPerChar,
2166 next - encoding->minBytesPerChar);
2167 if (!tem)
2168 return XML_ERROR_NO_MEMORY;
2169 normalizePublicId(tem);
2170 declNotationPublicId = tem;
2171 poolFinish(&tempPool);
2173 break;
2174 case XML_ROLE_NOTATION_SYSTEM_ID:
2175 if (declNotationName && notationDeclHandler) {
2176 const XML_Char *systemId
2177 = poolStoreString(&tempPool, encoding,
2178 s + encoding->minBytesPerChar,
2179 next - encoding->minBytesPerChar);
2180 if (!systemId)
2181 return XML_ERROR_NO_MEMORY;
2182 eventPtr = eventEndPtr = s;
2183 notationDeclHandler(handlerArg,
2184 declNotationName,
2185 dtd.base,
2186 systemId,
2187 declNotationPublicId);
2189 poolClear(&tempPool);
2190 break;
2191 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2192 if (declNotationPublicId && notationDeclHandler) {
2193 eventPtr = eventEndPtr = s;
2194 notationDeclHandler(handlerArg,
2195 declNotationName,
2196 dtd.base,
2198 declNotationPublicId);
2200 poolClear(&tempPool);
2201 break;
2202 case XML_ROLE_ERROR:
2203 eventPtr = s;
2204 switch (tok) {
2205 case XML_TOK_PARAM_ENTITY_REF:
2206 return XML_ERROR_PARAM_ENTITY_REF;
2207 case XML_TOK_XML_DECL:
2208 return XML_ERROR_MISPLACED_XML_PI;
2209 default:
2210 return XML_ERROR_SYNTAX;
2212 case XML_ROLE_GROUP_OPEN:
2213 if (prologState.level >= groupSize) {
2214 if (groupSize)
2215 groupConnector = realloc(groupConnector, groupSize *= 2);
2216 else
2217 groupConnector = malloc(groupSize = 32);
2218 if (!groupConnector)
2219 return XML_ERROR_NO_MEMORY;
2221 groupConnector[prologState.level] = 0;
2222 break;
2223 case XML_ROLE_GROUP_SEQUENCE:
2224 if (groupConnector[prologState.level] == '|') {
2225 eventPtr = s;
2226 return XML_ERROR_SYNTAX;
2228 groupConnector[prologState.level] = ',';
2229 break;
2230 case XML_ROLE_GROUP_CHOICE:
2231 if (groupConnector[prologState.level] == ',') {
2232 eventPtr = s;
2233 return XML_ERROR_SYNTAX;
2235 groupConnector[prologState.level] = '|';
2236 break;
2237 case XML_ROLE_PARAM_ENTITY_REF:
2238 if (!dtd.standalone
2239 && notStandaloneHandler
2240 && !notStandaloneHandler(handlerArg))
2241 return XML_ERROR_NOT_STANDALONE;
2242 dtd.complete = 0;
2243 break;
2244 case XML_ROLE_NONE:
2245 switch (tok) {
2246 case XML_TOK_PI:
2247 eventPtr = s;
2248 eventEndPtr = next;
2249 if (!reportProcessingInstruction(parser, encoding, s, next))
2250 return XML_ERROR_NO_MEMORY;
2251 break;
2252 case XML_TOK_COMMENT:
2253 eventPtr = s;
2254 eventEndPtr = next;
2255 if (!reportComment(parser, encoding, s, next))
2256 return XML_ERROR_NO_MEMORY;
2257 break;
2259 break;
2261 if (defaultHandler) {
2262 switch (tok) {
2263 case XML_TOK_PI:
2264 case XML_TOK_COMMENT:
2265 case XML_TOK_BOM:
2266 case XML_TOK_XML_DECL:
2267 break;
2268 default:
2269 eventPtr = s;
2270 eventEndPtr = next;
2271 reportDefault(parser, encoding, s, next);
2274 s = next;
2276 /* not reached */
2279 static
2280 enum XML_Error epilogProcessor(XML_Parser parser,
2281 const char *s,
2282 const char *end,
2283 const char **nextPtr)
2285 processor = epilogProcessor;
2286 eventPtr = s;
2287 for (;;) {
2288 const char *next;
2289 int tok = XmlPrologTok(encoding, s, end, &next);
2290 eventEndPtr = next;
2291 switch (tok) {
2292 case XML_TOK_TRAILING_CR:
2293 if (defaultHandler) {
2294 eventEndPtr = end;
2295 reportDefault(parser, encoding, s, end);
2297 /* fall through */
2298 case XML_TOK_NONE:
2299 if (nextPtr)
2300 *nextPtr = end;
2301 return XML_ERROR_NONE;
2302 case XML_TOK_PROLOG_S:
2303 if (defaultHandler)
2304 reportDefault(parser, encoding, s, next);
2305 break;
2306 case XML_TOK_PI:
2307 if (!reportProcessingInstruction(parser, encoding, s, next))
2308 return XML_ERROR_NO_MEMORY;
2309 break;
2310 case XML_TOK_COMMENT:
2311 if (!reportComment(parser, encoding, s, next))
2312 return XML_ERROR_NO_MEMORY;
2313 break;
2314 case XML_TOK_INVALID:
2315 eventPtr = next;
2316 return XML_ERROR_INVALID_TOKEN;
2317 case XML_TOK_PARTIAL:
2318 if (nextPtr) {
2319 *nextPtr = s;
2320 return XML_ERROR_NONE;
2322 return XML_ERROR_UNCLOSED_TOKEN;
2323 case XML_TOK_PARTIAL_CHAR:
2324 if (nextPtr) {
2325 *nextPtr = s;
2326 return XML_ERROR_NONE;
2328 return XML_ERROR_PARTIAL_CHAR;
2329 default:
2330 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2332 eventPtr = s = next;
2336 static
2337 enum XML_Error errorProcessor(XML_Parser parser,
2338 const char *s,
2339 const char *end,
2340 const char **nextPtr)
2342 return errorCode;
2345 static enum XML_Error
2346 storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2347 const char *ptr, const char *end,
2348 STRING_POOL *pool)
2350 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2351 if (result)
2352 return result;
2353 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
2354 poolChop(pool);
2355 if (!poolAppendChar(pool, XML_T('\0')))
2356 return XML_ERROR_NO_MEMORY;
2357 return XML_ERROR_NONE;
2360 static enum XML_Error
2361 appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2362 const char *ptr, const char *end,
2363 STRING_POOL *pool)
2365 const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
2366 for (;;) {
2367 const char *next;
2368 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2369 switch (tok) {
2370 case XML_TOK_NONE:
2371 return XML_ERROR_NONE;
2372 case XML_TOK_INVALID:
2373 if (enc == encoding)
2374 eventPtr = next;
2375 return XML_ERROR_INVALID_TOKEN;
2376 case XML_TOK_PARTIAL:
2377 if (enc == encoding)
2378 eventPtr = ptr;
2379 return XML_ERROR_INVALID_TOKEN;
2380 case XML_TOK_CHAR_REF:
2382 XML_Char buf[XML_ENCODE_MAX];
2383 int i;
2384 int n = XmlCharRefNumber(enc, ptr);
2385 if (n < 0) {
2386 if (enc == encoding)
2387 eventPtr = ptr;
2388 return XML_ERROR_BAD_CHAR_REF;
2390 if (!isCdata
2391 && n == 0x20 /* space */
2392 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2393 break;
2394 n = XmlEncode(n, (ICHAR *)buf);
2395 if (!n) {
2396 if (enc == encoding)
2397 eventPtr = ptr;
2398 return XML_ERROR_BAD_CHAR_REF;
2400 for (i = 0; i < n; i++) {
2401 if (!poolAppendChar(pool, buf[i]))
2402 return XML_ERROR_NO_MEMORY;
2405 break;
2406 case XML_TOK_DATA_CHARS:
2407 if (!poolAppend(pool, enc, ptr, next))
2408 return XML_ERROR_NO_MEMORY;
2409 break;
2410 break;
2411 case XML_TOK_TRAILING_CR:
2412 next = ptr + enc->minBytesPerChar;
2413 /* fall through */
2414 case XML_TOK_ATTRIBUTE_VALUE_S:
2415 case XML_TOK_DATA_NEWLINE:
2416 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2417 break;
2418 if (!poolAppendChar(pool, 0x20))
2419 return XML_ERROR_NO_MEMORY;
2420 break;
2421 case XML_TOK_ENTITY_REF:
2423 const XML_Char *name;
2424 ENTITY *entity;
2425 XML_Char ch = XmlPredefinedEntityName(enc,
2426 ptr + enc->minBytesPerChar,
2427 next - enc->minBytesPerChar);
2428 if (ch) {
2429 if (!poolAppendChar(pool, ch))
2430 return XML_ERROR_NO_MEMORY;
2431 break;
2433 name = poolStoreString(&temp2Pool, enc,
2434 ptr + enc->minBytesPerChar,
2435 next - enc->minBytesPerChar);
2436 if (!name)
2437 return XML_ERROR_NO_MEMORY;
2438 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2439 poolDiscard(&temp2Pool);
2440 if (!entity) {
2441 if (dtd.complete) {
2442 if (enc == encoding)
2443 eventPtr = ptr;
2444 return XML_ERROR_UNDEFINED_ENTITY;
2447 else if (entity->open) {
2448 if (enc == encoding)
2449 eventPtr = ptr;
2450 return XML_ERROR_RECURSIVE_ENTITY_REF;
2452 else if (entity->notation) {
2453 if (enc == encoding)
2454 eventPtr = ptr;
2455 return XML_ERROR_BINARY_ENTITY_REF;
2457 else if (!entity->textPtr) {
2458 if (enc == encoding)
2459 eventPtr = ptr;
2460 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2462 else {
2463 enum XML_Error result;
2464 const XML_Char *textEnd = entity->textPtr + entity->textLen;
2465 entity->open = 1;
2466 result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
2467 entity->open = 0;
2468 if (result)
2469 return result;
2472 break;
2473 default:
2474 abort();
2476 ptr = next;
2478 /* not reached */
2481 static
2482 enum XML_Error storeEntityValue(XML_Parser parser,
2483 const char *entityTextPtr,
2484 const char *entityTextEnd)
2486 const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
2487 STRING_POOL *pool = &(dtd.pool);
2488 entityTextPtr += encoding->minBytesPerChar;
2489 entityTextEnd -= encoding->minBytesPerChar;
2490 for (;;) {
2491 const char *next;
2492 int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
2493 switch (tok) {
2494 case XML_TOK_PARAM_ENTITY_REF:
2495 eventPtr = entityTextPtr;
2496 return XML_ERROR_SYNTAX;
2497 case XML_TOK_NONE:
2498 if (declEntity) {
2499 declEntity->textPtr = pool->start;
2500 declEntity->textLen = pool->ptr - pool->start;
2501 poolFinish(pool);
2503 else
2504 poolDiscard(pool);
2505 return XML_ERROR_NONE;
2506 case XML_TOK_ENTITY_REF:
2507 case XML_TOK_DATA_CHARS:
2508 if (!poolAppend(pool, encoding, entityTextPtr, next))
2509 return XML_ERROR_NO_MEMORY;
2510 break;
2511 case XML_TOK_TRAILING_CR:
2512 next = entityTextPtr + encoding->minBytesPerChar;
2513 /* fall through */
2514 case XML_TOK_DATA_NEWLINE:
2515 if (pool->end == pool->ptr && !poolGrow(pool))
2516 return XML_ERROR_NO_MEMORY;
2517 *(pool->ptr)++ = 0xA;
2518 break;
2519 case XML_TOK_CHAR_REF:
2521 XML_Char buf[XML_ENCODE_MAX];
2522 int i;
2523 int n = XmlCharRefNumber(encoding, entityTextPtr);
2524 if (n < 0) {
2525 eventPtr = entityTextPtr;
2526 return XML_ERROR_BAD_CHAR_REF;
2528 n = XmlEncode(n, (ICHAR *)buf);
2529 if (!n) {
2530 eventPtr = entityTextPtr;
2531 return XML_ERROR_BAD_CHAR_REF;
2533 for (i = 0; i < n; i++) {
2534 if (pool->end == pool->ptr && !poolGrow(pool))
2535 return XML_ERROR_NO_MEMORY;
2536 *(pool->ptr)++ = buf[i];
2539 break;
2540 case XML_TOK_PARTIAL:
2541 eventPtr = entityTextPtr;
2542 return XML_ERROR_INVALID_TOKEN;
2543 case XML_TOK_INVALID:
2544 eventPtr = next;
2545 return XML_ERROR_INVALID_TOKEN;
2546 default:
2547 abort();
2549 entityTextPtr = next;
2551 /* not reached */
2554 static void
2555 normalizeLines(XML_Char *s)
2557 XML_Char *p;
2558 for (;; s++) {
2559 if (*s == XML_T('\0'))
2560 return;
2561 if (*s == 0xD)
2562 break;
2564 p = s;
2565 do {
2566 if (*s == 0xD) {
2567 *p++ = 0xA;
2568 if (*++s == 0xA)
2569 s++;
2571 else
2572 *p++ = *s++;
2573 } while (*s);
2574 *p = XML_T('\0');
2577 static int
2578 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2580 const XML_Char *target;
2581 XML_Char *data;
2582 const char *tem;
2583 if (!processingInstructionHandler) {
2584 if (defaultHandler)
2585 reportDefault(parser, enc, start, end);
2586 return 1;
2588 start += enc->minBytesPerChar * 2;
2589 tem = start + XmlNameLength(enc, start);
2590 target = poolStoreString(&tempPool, enc, start, tem);
2591 if (!target)
2592 return 0;
2593 poolFinish(&tempPool);
2594 data = poolStoreString(&tempPool, enc,
2595 XmlSkipS(enc, tem),
2596 end - enc->minBytesPerChar*2);
2597 if (!data)
2598 return 0;
2599 normalizeLines(data);
2600 processingInstructionHandler(handlerArg, target, data);
2601 poolClear(&tempPool);
2602 return 1;
2605 static int
2606 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2608 XML_Char *data;
2609 if (!commentHandler) {
2610 if (defaultHandler)
2611 reportDefault(parser, enc, start, end);
2612 return 1;
2614 data = poolStoreString(&tempPool,
2615 enc,
2616 start + enc->minBytesPerChar * 4,
2617 end - enc->minBytesPerChar * 3);
2618 if (!data)
2619 return 0;
2620 normalizeLines(data);
2621 commentHandler(handlerArg, data);
2622 poolClear(&tempPool);
2623 return 1;
2626 static void
2627 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
2629 if (MUST_CONVERT(enc, s)) {
2630 const char **eventPP;
2631 const char **eventEndPP;
2632 if (enc == encoding) {
2633 eventPP = &eventPtr;
2634 eventEndPP = &eventEndPtr;
2636 else {
2637 eventPP = &(openInternalEntities->internalEventPtr);
2638 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2640 do {
2641 ICHAR *dataPtr = (ICHAR *)dataBuf;
2642 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2643 *eventEndPP = s;
2644 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2645 *eventPP = s;
2646 } while (s != end);
2648 else
2649 defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
2653 static int
2654 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
2656 DEFAULT_ATTRIBUTE *att;
2657 if (type->nDefaultAtts == type->allocDefaultAtts) {
2658 if (type->allocDefaultAtts == 0) {
2659 type->allocDefaultAtts = 8;
2660 type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2662 else {
2663 type->allocDefaultAtts *= 2;
2664 type->defaultAtts = realloc(type->defaultAtts,
2665 type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2667 if (!type->defaultAtts)
2668 return 0;
2670 att = type->defaultAtts + type->nDefaultAtts;
2671 att->id = attId;
2672 att->value = value;
2673 att->isCdata = isCdata;
2674 if (!isCdata)
2675 attId->maybeTokenized = 1;
2676 type->nDefaultAtts += 1;
2677 return 1;
2680 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
2682 const XML_Char *name;
2683 for (name = elementType->name; *name; name++) {
2684 if (*name == XML_T(':')) {
2685 PREFIX *prefix;
2686 const XML_Char *s;
2687 for (s = elementType->name; s != name; s++) {
2688 if (!poolAppendChar(&dtd.pool, *s))
2689 return 0;
2691 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2692 return 0;
2693 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
2694 if (!prefix)
2695 return 0;
2696 if (prefix->name == poolStart(&dtd.pool))
2697 poolFinish(&dtd.pool);
2698 else
2699 poolDiscard(&dtd.pool);
2700 elementType->prefix = prefix;
2704 return 1;
2707 static ATTRIBUTE_ID *
2708 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2710 ATTRIBUTE_ID *id;
2711 const XML_Char *name;
2712 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2713 return 0;
2714 name = poolStoreString(&dtd.pool, enc, start, end);
2715 if (!name)
2716 return 0;
2717 ++name;
2718 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
2719 if (!id)
2720 return 0;
2721 if (id->name != name)
2722 poolDiscard(&dtd.pool);
2723 else {
2724 poolFinish(&dtd.pool);
2725 if (!ns)
2727 else if (name[0] == 'x'
2728 && name[1] == 'm'
2729 && name[2] == 'l'
2730 && name[3] == 'n'
2731 && name[4] == 's'
2732 && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
2733 if (name[5] == '\0')
2734 id->prefix = &dtd.defaultPrefix;
2735 else
2736 id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
2737 id->xmlns = 1;
2739 else {
2740 int i;
2741 for (i = 0; name[i]; i++) {
2742 if (name[i] == XML_T(':')) {
2743 int j;
2744 for (j = 0; j < i; j++) {
2745 if (!poolAppendChar(&dtd.pool, name[j]))
2746 return 0;
2748 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2749 return 0;
2750 id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
2751 if (id->prefix->name == poolStart(&dtd.pool))
2752 poolFinish(&dtd.pool);
2753 else
2754 poolDiscard(&dtd.pool);
2755 break;
2760 return id;
2763 #define CONTEXT_SEP XML_T('\f')
2765 static
2766 const XML_Char *getContext(XML_Parser parser)
2768 HASH_TABLE_ITER iter;
2769 int needSep = 0;
2771 if (dtd.defaultPrefix.binding) {
2772 int i;
2773 int len;
2774 if (!poolAppendChar(&tempPool, XML_T('=')))
2775 return 0;
2776 len = dtd.defaultPrefix.binding->uriLen;
2777 if (namespaceSeparator != XML_T('\0'))
2778 len--;
2779 for (i = 0; i < len; i++)
2780 if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
2781 return 0;
2782 needSep = 1;
2785 hashTableIterInit(&iter, &(dtd.prefixes));
2786 for (;;) {
2787 int i;
2788 int len;
2789 const XML_Char *s;
2790 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
2791 if (!prefix)
2792 break;
2793 if (!prefix->binding)
2794 continue;
2795 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
2796 return 0;
2797 for (s = prefix->name; *s; s++)
2798 if (!poolAppendChar(&tempPool, *s))
2799 return 0;
2800 if (!poolAppendChar(&tempPool, XML_T('=')))
2801 return 0;
2802 len = prefix->binding->uriLen;
2803 if (namespaceSeparator != XML_T('\0'))
2804 len--;
2805 for (i = 0; i < len; i++)
2806 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
2807 return 0;
2808 needSep = 1;
2812 hashTableIterInit(&iter, &(dtd.generalEntities));
2813 for (;;) {
2814 const XML_Char *s;
2815 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
2816 if (!e)
2817 break;
2818 if (!e->open)
2819 continue;
2820 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
2821 return 0;
2822 for (s = e->name; *s; s++)
2823 if (!poolAppendChar(&tempPool, *s))
2824 return 0;
2825 needSep = 1;
2828 if (!poolAppendChar(&tempPool, XML_T('\0')))
2829 return 0;
2830 return tempPool.start;
2833 static
2834 int setContext(XML_Parser parser, const XML_Char *context)
2836 const XML_Char *s = context;
2838 while (*context != XML_T('\0')) {
2839 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
2840 ENTITY *e;
2841 if (!poolAppendChar(&tempPool, XML_T('\0')))
2842 return 0;
2843 e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
2844 if (e)
2845 e->open = 1;
2846 if (*s != XML_T('\0'))
2847 s++;
2848 context = s;
2849 poolDiscard(&tempPool);
2851 else if (*s == '=') {
2852 PREFIX *prefix;
2853 if (poolLength(&tempPool) == 0)
2854 prefix = &dtd.defaultPrefix;
2855 else {
2856 if (!poolAppendChar(&tempPool, XML_T('\0')))
2857 return 0;
2858 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
2859 if (!prefix)
2860 return 0;
2861 if (prefix->name == poolStart(&tempPool))
2862 poolFinish(&tempPool);
2863 else
2864 poolDiscard(&tempPool);
2866 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
2867 if (!poolAppendChar(&tempPool, *context))
2868 return 0;
2869 if (!poolAppendChar(&tempPool, XML_T('\0')))
2870 return 0;
2871 if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
2872 return 0;
2873 poolDiscard(&tempPool);
2874 if (*context != XML_T('\0'))
2875 ++context;
2876 s = context;
2878 else {
2879 if (!poolAppendChar(&tempPool, *s))
2880 return 0;
2881 s++;
2884 return 1;
2888 static
2889 void normalizePublicId(XML_Char *publicId)
2891 XML_Char *p = publicId;
2892 XML_Char *s;
2893 for (s = publicId; *s; s++) {
2894 switch (*s) {
2895 case 0x20:
2896 case 0xD:
2897 case 0xA:
2898 if (p != publicId && p[-1] != 0x20)
2899 *p++ = 0x20;
2900 break;
2901 default:
2902 *p++ = *s;
2905 if (p != publicId && p[-1] == 0x20)
2906 --p;
2907 *p = XML_T('\0');
2910 static int dtdInit(DTD *p)
2912 poolInit(&(p->pool));
2913 hashTableInit(&(p->generalEntities));
2914 hashTableInit(&(p->elementTypes));
2915 hashTableInit(&(p->attributeIds));
2916 hashTableInit(&(p->prefixes));
2917 p->complete = 1;
2918 p->standalone = 0;
2919 p->base = 0;
2920 p->defaultPrefix.name = 0;
2921 p->defaultPrefix.binding = 0;
2922 return 1;
2925 static void dtdDestroy(DTD *p)
2927 HASH_TABLE_ITER iter;
2928 hashTableIterInit(&iter, &(p->elementTypes));
2929 for (;;) {
2930 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2931 if (!e)
2932 break;
2933 if (e->allocDefaultAtts != 0)
2934 free(e->defaultAtts);
2936 hashTableDestroy(&(p->generalEntities));
2937 hashTableDestroy(&(p->elementTypes));
2938 hashTableDestroy(&(p->attributeIds));
2939 hashTableDestroy(&(p->prefixes));
2940 poolDestroy(&(p->pool));
2943 /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
2944 The new DTD has already been initialized. */
2946 static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
2948 HASH_TABLE_ITER iter;
2950 if (oldDtd->base) {
2951 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
2952 if (!tem)
2953 return 0;
2954 newDtd->base = tem;
2957 /* Copy the prefix table. */
2959 hashTableIterInit(&iter, &(oldDtd->prefixes));
2960 for (;;) {
2961 const XML_Char *name;
2962 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
2963 if (!oldP)
2964 break;
2965 name = poolCopyString(&(newDtd->pool), oldP->name);
2966 if (!name)
2967 return 0;
2968 if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
2969 return 0;
2972 hashTableIterInit(&iter, &(oldDtd->attributeIds));
2974 /* Copy the attribute id table. */
2976 for (;;) {
2977 ATTRIBUTE_ID *newA;
2978 const XML_Char *name;
2979 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
2981 if (!oldA)
2982 break;
2983 /* Remember to allocate the scratch byte before the name. */
2984 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
2985 return 0;
2986 name = poolCopyString(&(newDtd->pool), oldA->name);
2987 if (!name)
2988 return 0;
2989 ++name;
2990 newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
2991 if (!newA)
2992 return 0;
2993 newA->maybeTokenized = oldA->maybeTokenized;
2994 if (oldA->prefix) {
2995 newA->xmlns = oldA->xmlns;
2996 if (oldA->prefix == &oldDtd->defaultPrefix)
2997 newA->prefix = &newDtd->defaultPrefix;
2998 else
2999 newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
3003 /* Copy the element type table. */
3005 hashTableIterInit(&iter, &(oldDtd->elementTypes));
3007 for (;;) {
3008 int i;
3009 ELEMENT_TYPE *newE;
3010 const XML_Char *name;
3011 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3012 if (!oldE)
3013 break;
3014 name = poolCopyString(&(newDtd->pool), oldE->name);
3015 if (!name)
3016 return 0;
3017 newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3018 if (!newE)
3019 return 0;
3020 if (oldE->nDefaultAtts) {
3021 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3022 if (!newE->defaultAtts)
3023 return 0;
3025 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
3026 if (oldE->prefix)
3027 newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
3028 for (i = 0; i < newE->nDefaultAtts; i++) {
3029 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3030 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3031 if (oldE->defaultAtts[i].value) {
3032 newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3033 if (!newE->defaultAtts[i].value)
3034 return 0;
3036 else
3037 newE->defaultAtts[i].value = 0;
3041 /* Copy the entity table. */
3043 hashTableIterInit(&iter, &(oldDtd->generalEntities));
3045 for (;;) {
3046 ENTITY *newE;
3047 const XML_Char *name;
3048 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3049 if (!oldE)
3050 break;
3051 name = poolCopyString(&(newDtd->pool), oldE->name);
3052 if (!name)
3053 return 0;
3054 newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
3055 if (!newE)
3056 return 0;
3057 if (oldE->systemId) {
3058 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
3059 if (!tem)
3060 return 0;
3061 newE->systemId = tem;
3062 if (oldE->base) {
3063 if (oldE->base == oldDtd->base)
3064 newE->base = newDtd->base;
3065 tem = poolCopyString(&(newDtd->pool), oldE->base);
3066 if (!tem)
3067 return 0;
3068 newE->base = tem;
3071 else {
3072 const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
3073 if (!tem)
3074 return 0;
3075 newE->textPtr = tem;
3076 newE->textLen = oldE->textLen;
3078 if (oldE->notation) {
3079 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
3080 if (!tem)
3081 return 0;
3082 newE->notation = tem;
3086 newDtd->complete = oldDtd->complete;
3087 newDtd->standalone = oldDtd->standalone;
3088 return 1;
3091 static
3092 void poolInit(STRING_POOL *pool)
3094 pool->blocks = 0;
3095 pool->freeBlocks = 0;
3096 pool->start = 0;
3097 pool->ptr = 0;
3098 pool->end = 0;
3101 static
3102 void poolClear(STRING_POOL *pool)
3104 if (!pool->freeBlocks)
3105 pool->freeBlocks = pool->blocks;
3106 else {
3107 BLOCK *p = pool->blocks;
3108 while (p) {
3109 BLOCK *tem = p->next;
3110 p->next = pool->freeBlocks;
3111 pool->freeBlocks = p;
3112 p = tem;
3115 pool->blocks = 0;
3116 pool->start = 0;
3117 pool->ptr = 0;
3118 pool->end = 0;
3121 static
3122 void poolDestroy(STRING_POOL *pool)
3124 BLOCK *p = pool->blocks;
3125 while (p) {
3126 BLOCK *tem = p->next;
3127 free(p);
3128 p = tem;
3130 pool->blocks = 0;
3131 p = pool->freeBlocks;
3132 while (p) {
3133 BLOCK *tem = p->next;
3134 free(p);
3135 p = tem;
3137 pool->freeBlocks = 0;
3138 pool->ptr = 0;
3139 pool->start = 0;
3140 pool->end = 0;
3143 static
3144 XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3145 const char *ptr, const char *end)
3147 if (!pool->ptr && !poolGrow(pool))
3148 return 0;
3149 for (;;) {
3150 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3151 if (ptr == end)
3152 break;
3153 if (!poolGrow(pool))
3154 return 0;
3156 return pool->start;
3159 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3161 do {
3162 if (!poolAppendChar(pool, *s))
3163 return 0;
3164 } while (*s++);
3165 s = pool->start;
3166 poolFinish(pool);
3167 return s;
3170 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3172 if (!pool->ptr && !poolGrow(pool))
3173 return 0;
3174 for (; n > 0; --n, s++) {
3175 if (!poolAppendChar(pool, *s))
3176 return 0;
3179 s = pool->start;
3180 poolFinish(pool);
3181 return s;
3184 static
3185 XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3186 const char *ptr, const char *end)
3188 if (!poolAppend(pool, enc, ptr, end))
3189 return 0;
3190 if (pool->ptr == pool->end && !poolGrow(pool))
3191 return 0;
3192 *(pool->ptr)++ = 0;
3193 return pool->start;
3196 static
3197 int poolGrow(STRING_POOL *pool)
3199 if (pool->freeBlocks) {
3200 if (pool->start == 0) {
3201 pool->blocks = pool->freeBlocks;
3202 pool->freeBlocks = pool->freeBlocks->next;
3203 pool->blocks->next = 0;
3204 pool->start = pool->blocks->s;
3205 pool->end = pool->start + pool->blocks->size;
3206 pool->ptr = pool->start;
3207 return 1;
3209 if (pool->end - pool->start < pool->freeBlocks->size) {
3210 BLOCK *tem = pool->freeBlocks->next;
3211 pool->freeBlocks->next = pool->blocks;
3212 pool->blocks = pool->freeBlocks;
3213 pool->freeBlocks = tem;
3214 memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3215 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3216 pool->start = pool->blocks->s;
3217 pool->end = pool->start + pool->blocks->size;
3218 return 1;
3221 if (pool->blocks && pool->start == pool->blocks->s) {
3222 int blockSize = (pool->end - pool->start)*2;
3223 pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3224 if (!pool->blocks)
3225 return 0;
3226 pool->blocks->size = blockSize;
3227 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3228 pool->start = pool->blocks->s;
3229 pool->end = pool->start + blockSize;
3231 else {
3232 BLOCK *tem;
3233 int blockSize = pool->end - pool->start;
3234 if (blockSize < INIT_BLOCK_SIZE)
3235 blockSize = INIT_BLOCK_SIZE;
3236 else
3237 blockSize *= 2;
3238 tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3239 if (!tem)
3240 return 0;
3241 tem->size = blockSize;
3242 tem->next = pool->blocks;
3243 pool->blocks = tem;
3244 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3245 pool->ptr = tem->s + (pool->ptr - pool->start);
3246 pool->start = tem->s;
3247 pool->end = tem->s + blockSize;
3249 return 1;