More installation info. Bump alpha version.
[python/dscho.git] / Modules / expat / xmlparse.c
blobc46b6010baa3273238c7442d64c1d1805e2da0f7
1 /*
2 Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
3 See the file COPYING for copying permission.
4 */
6 #ifdef COMPILED_FROM_DSP
7 # include "winconfig.h"
8 # define XMLPARSEAPI(type) __declspec(dllexport) type __cdecl
9 # include "expat.h"
10 # undef XMLPARSEAPI
11 #else
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
16 #ifdef __declspec
17 # define XMLPARSEAPI(type) __declspec(dllexport) type __cdecl
18 #endif
20 #include "expat.h"
22 #ifdef __declspec
23 # undef XMLPARSEAPI
24 #endif
25 #endif /* ndef COMPILED_FROM_DSP */
27 #include <stddef.h>
28 #include <string.h>
30 #ifdef XML_UNICODE
31 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
32 #define XmlConvert XmlUtf16Convert
33 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
34 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
35 #define XmlEncode XmlUtf16Encode
36 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
37 typedef unsigned short ICHAR;
38 #else
39 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
40 #define XmlConvert XmlUtf8Convert
41 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
42 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
43 #define XmlEncode XmlUtf8Encode
44 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
45 typedef char ICHAR;
46 #endif
49 #ifndef XML_NS
51 #define XmlInitEncodingNS XmlInitEncoding
52 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
53 #undef XmlGetInternalEncodingNS
54 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
55 #define XmlParseXmlDeclNS XmlParseXmlDecl
57 #endif
59 #ifdef XML_UNICODE_WCHAR_T
60 #define XML_T(x) L ## x
61 #else
62 #define XML_T(x) x
63 #endif
65 /* Round up n to be a multiple of sz, where sz is a power of 2. */
66 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
68 #include "xmltok.h"
69 #include "xmlrole.h"
71 typedef const XML_Char *KEY;
73 typedef struct {
74 KEY name;
75 } NAMED;
77 typedef struct {
78 NAMED **v;
79 size_t size;
80 size_t used;
81 size_t usedLim;
82 XML_Memory_Handling_Suite *mem;
83 } HASH_TABLE;
85 typedef struct {
86 NAMED **p;
87 NAMED **end;
88 } HASH_TABLE_ITER;
90 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
91 #define INIT_DATA_BUF_SIZE 1024
92 #define INIT_ATTS_SIZE 16
93 #define INIT_BLOCK_SIZE 1024
94 #define INIT_BUFFER_SIZE 1024
96 #define EXPAND_SPARE 24
98 typedef struct binding {
99 struct prefix *prefix;
100 struct binding *nextTagBinding;
101 struct binding *prevPrefixBinding;
102 const struct attribute_id *attId;
103 XML_Char *uri;
104 int uriLen;
105 int uriAlloc;
106 } BINDING;
108 typedef struct prefix {
109 const XML_Char *name;
110 BINDING *binding;
111 } PREFIX;
113 typedef struct {
114 const XML_Char *str;
115 const XML_Char *localPart;
116 int uriLen;
117 } TAG_NAME;
119 typedef struct tag {
120 struct tag *parent;
121 const char *rawName;
122 int rawNameLength;
123 TAG_NAME name;
124 char *buf;
125 char *bufEnd;
126 BINDING *bindings;
127 } TAG;
129 typedef struct {
130 const XML_Char *name;
131 const XML_Char *textPtr;
132 int textLen;
133 const XML_Char *systemId;
134 const XML_Char *base;
135 const XML_Char *publicId;
136 const XML_Char *notation;
137 char open;
138 char is_param;
139 } ENTITY;
141 typedef struct {
142 enum XML_Content_Type type;
143 enum XML_Content_Quant quant;
144 const XML_Char * name;
145 int firstchild;
146 int lastchild;
147 int childcnt;
148 int nextsib;
149 } CONTENT_SCAFFOLD;
151 typedef struct block {
152 struct block *next;
153 int size;
154 XML_Char s[1];
155 } BLOCK;
157 typedef struct {
158 BLOCK *blocks;
159 BLOCK *freeBlocks;
160 const XML_Char *end;
161 XML_Char *ptr;
162 XML_Char *start;
163 XML_Memory_Handling_Suite *mem;
164 } STRING_POOL;
166 /* The XML_Char before the name is used to determine whether
167 an attribute has been specified. */
168 typedef struct attribute_id {
169 XML_Char *name;
170 PREFIX *prefix;
171 char maybeTokenized;
172 char xmlns;
173 } ATTRIBUTE_ID;
175 typedef struct {
176 const ATTRIBUTE_ID *id;
177 char isCdata;
178 const XML_Char *value;
179 } DEFAULT_ATTRIBUTE;
181 typedef struct {
182 const XML_Char *name;
183 PREFIX *prefix;
184 const ATTRIBUTE_ID *idAtt;
185 int nDefaultAtts;
186 int allocDefaultAtts;
187 DEFAULT_ATTRIBUTE *defaultAtts;
188 } ELEMENT_TYPE;
190 typedef struct {
191 HASH_TABLE generalEntities;
192 HASH_TABLE elementTypes;
193 HASH_TABLE attributeIds;
194 HASH_TABLE prefixes;
195 STRING_POOL pool;
196 int complete;
197 int standalone;
198 #ifdef XML_DTD
199 HASH_TABLE paramEntities;
200 #endif /* XML_DTD */
201 PREFIX defaultPrefix;
202 /* === scaffolding for building content model === */
203 int in_eldecl;
204 CONTENT_SCAFFOLD *scaffold;
205 unsigned contentStringLen;
206 unsigned scaffSize;
207 unsigned scaffCount;
208 int scaffLevel;
209 int *scaffIndex;
210 } DTD;
212 typedef struct open_internal_entity {
213 const char *internalEventPtr;
214 const char *internalEventEndPtr;
215 struct open_internal_entity *next;
216 ENTITY *entity;
217 } OPEN_INTERNAL_ENTITY;
219 typedef enum XML_Error Processor(XML_Parser parser,
220 const char *start,
221 const char *end,
222 const char **endPtr);
224 static Processor prologProcessor;
225 static Processor prologInitProcessor;
226 static Processor contentProcessor;
227 static Processor cdataSectionProcessor;
228 #ifdef XML_DTD
229 static Processor ignoreSectionProcessor;
230 #endif /* XML_DTD */
231 static Processor epilogProcessor;
232 static Processor errorProcessor;
233 static Processor externalEntityInitProcessor;
234 static Processor externalEntityInitProcessor2;
235 static Processor externalEntityInitProcessor3;
236 static Processor externalEntityContentProcessor;
238 static enum XML_Error
239 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
240 static enum XML_Error
241 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
242 static enum XML_Error
243 initializeEncoding(XML_Parser parser);
244 static enum XML_Error
245 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
246 const char *end, int tok, const char *next, const char **nextPtr);
247 static enum XML_Error
248 processInternalParamEntity(XML_Parser parser, ENTITY *entity);
249 static enum XML_Error
250 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
251 const char *start, const char *end, const char **endPtr);
252 static enum XML_Error
253 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
254 #ifdef XML_DTD
255 static enum XML_Error
256 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
257 #endif /* XML_DTD */
258 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
259 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
260 static
261 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
263 static int
264 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *,
265 int isCdata, int isId, const XML_Char *dfltValue,
266 XML_Parser parser);
268 static enum XML_Error
269 storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
270 STRING_POOL *);
271 static enum XML_Error
272 appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
273 STRING_POOL *);
274 static ATTRIBUTE_ID *
275 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
276 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
277 static enum XML_Error
278 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
279 static int
280 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
281 static int
282 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
283 static void
284 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
286 static const XML_Char *getContext(XML_Parser parser);
287 static int setContext(XML_Parser parser, const XML_Char *context);
288 static void normalizePublicId(XML_Char *s);
289 static int dtdInit(DTD *, XML_Parser parser);
291 static void dtdDestroy(DTD *, XML_Parser parser);
293 static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser);
295 static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *,
296 XML_Parser parser);
298 #ifdef XML_DTD
299 static void dtdSwap(DTD *, DTD *);
300 #endif /* XML_DTD */
302 static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize);
304 static void hashTableInit(HASH_TABLE *, XML_Memory_Handling_Suite *ms);
306 static void hashTableDestroy(HASH_TABLE *);
307 static void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
308 static NAMED *hashTableIterNext(HASH_TABLE_ITER *);
309 static void poolInit(STRING_POOL *, XML_Memory_Handling_Suite *ms);
310 static void poolClear(STRING_POOL *);
311 static void poolDestroy(STRING_POOL *);
312 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
313 const char *ptr, const char *end);
314 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
315 const char *ptr, const char *end);
317 static int poolGrow(STRING_POOL *pool);
319 static int nextScaffoldPart(XML_Parser parser);
320 static XML_Content *build_model(XML_Parser parser);
322 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
323 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
324 static const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s);
325 static ELEMENT_TYPE * getElementType(XML_Parser Paraser,
326 const ENCODING *enc,
327 const char *ptr,
328 const char *end);
330 #define poolStart(pool) ((pool)->start)
331 #define poolEnd(pool) ((pool)->ptr)
332 #define poolLength(pool) ((pool)->ptr - (pool)->start)
333 #define poolChop(pool) ((void)--(pool->ptr))
334 #define poolLastChar(pool) (((pool)->ptr)[-1])
335 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
336 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
337 #define poolAppendChar(pool, c) \
338 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
339 ? 0 \
340 : ((*((pool)->ptr)++ = c), 1))
342 typedef struct {
343 /* The first member must be userData so that the XML_GetUserData macro works. */
344 void *m_userData;
345 void *m_handlerArg;
346 char *m_buffer;
347 XML_Memory_Handling_Suite m_mem;
348 /* first character to be parsed */
349 const char *m_bufferPtr;
350 /* past last character to be parsed */
351 char *m_bufferEnd;
352 /* allocated end of buffer */
353 const char *m_bufferLim;
354 long m_parseEndByteIndex;
355 const char *m_parseEndPtr;
356 XML_Char *m_dataBuf;
357 XML_Char *m_dataBufEnd;
358 XML_StartElementHandler m_startElementHandler;
359 XML_EndElementHandler m_endElementHandler;
360 XML_CharacterDataHandler m_characterDataHandler;
361 XML_ProcessingInstructionHandler m_processingInstructionHandler;
362 XML_CommentHandler m_commentHandler;
363 XML_StartCdataSectionHandler m_startCdataSectionHandler;
364 XML_EndCdataSectionHandler m_endCdataSectionHandler;
365 XML_DefaultHandler m_defaultHandler;
366 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
367 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
368 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
369 XML_NotationDeclHandler m_notationDeclHandler;
370 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
371 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
372 XML_NotStandaloneHandler m_notStandaloneHandler;
373 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
374 void *m_externalEntityRefHandlerArg;
375 XML_UnknownEncodingHandler m_unknownEncodingHandler;
376 XML_ElementDeclHandler m_elementDeclHandler;
377 XML_AttlistDeclHandler m_attlistDeclHandler;
378 XML_EntityDeclHandler m_entityDeclHandler;
379 XML_XmlDeclHandler m_xmlDeclHandler;
380 const ENCODING *m_encoding;
381 INIT_ENCODING m_initEncoding;
382 const ENCODING *m_internalEncoding;
383 const XML_Char *m_protocolEncodingName;
384 int m_ns;
385 int m_ns_triplets;
386 void *m_unknownEncodingMem;
387 void *m_unknownEncodingData;
388 void *m_unknownEncodingHandlerData;
389 void (*m_unknownEncodingRelease)(void *);
390 PROLOG_STATE m_prologState;
391 Processor *m_processor;
392 enum XML_Error m_errorCode;
393 const char *m_eventPtr;
394 const char *m_eventEndPtr;
395 const char *m_positionPtr;
396 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
397 int m_defaultExpandInternalEntities;
398 int m_tagLevel;
399 ENTITY *m_declEntity;
400 const XML_Char *m_doctypeName;
401 const XML_Char *m_doctypeSysid;
402 const XML_Char *m_doctypePubid;
403 const XML_Char *m_declAttributeType;
404 const XML_Char *m_declNotationName;
405 const XML_Char *m_declNotationPublicId;
406 ELEMENT_TYPE *m_declElementType;
407 ATTRIBUTE_ID *m_declAttributeId;
408 char m_declAttributeIsCdata;
409 char m_declAttributeIsId;
410 DTD m_dtd;
411 const XML_Char *m_curBase;
412 TAG *m_tagStack;
413 TAG *m_freeTagList;
414 BINDING *m_inheritedBindings;
415 BINDING *m_freeBindingList;
416 int m_attsSize;
417 int m_nSpecifiedAtts;
418 int m_idAttIndex;
419 ATTRIBUTE *m_atts;
420 POSITION m_position;
421 STRING_POOL m_tempPool;
422 STRING_POOL m_temp2Pool;
423 char *m_groupConnector;
424 unsigned m_groupSize;
425 int m_hadExternalDoctype;
426 XML_Char m_namespaceSeparator;
427 #ifdef XML_DTD
428 enum XML_ParamEntityParsing m_paramEntityParsing;
429 XML_Parser m_parentParser;
430 #endif
431 } Parser;
433 #define MALLOC(s) (((Parser *)parser)->m_mem.malloc_fcn((s)))
434 #define REALLOC(p,s) (((Parser *)parser)->m_mem.realloc_fcn((p),(s)))
435 #define FREE(p) (((Parser *)parser)->m_mem.free_fcn((p)))
437 #define userData (((Parser *)parser)->m_userData)
438 #define handlerArg (((Parser *)parser)->m_handlerArg)
439 #define startElementHandler (((Parser *)parser)->m_startElementHandler)
440 #define endElementHandler (((Parser *)parser)->m_endElementHandler)
441 #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
442 #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
443 #define commentHandler (((Parser *)parser)->m_commentHandler)
444 #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
445 #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
446 #define defaultHandler (((Parser *)parser)->m_defaultHandler)
447 #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler)
448 #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler)
449 #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
450 #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
451 #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
452 #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
453 #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
454 #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
455 #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
456 #define internalEntityRefHandler (((Parser *)parser)->m_internalEntityRefHandler)
457 #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
458 #define elementDeclHandler (((Parser *)parser)->m_elementDeclHandler)
459 #define attlistDeclHandler (((Parser *)parser)->m_attlistDeclHandler)
460 #define entityDeclHandler (((Parser *)parser)->m_entityDeclHandler)
461 #define xmlDeclHandler (((Parser *)parser)->m_xmlDeclHandler)
462 #define encoding (((Parser *)parser)->m_encoding)
463 #define initEncoding (((Parser *)parser)->m_initEncoding)
464 #define internalEncoding (((Parser *)parser)->m_internalEncoding)
465 #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
466 #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
467 #define unknownEncodingHandlerData \
468 (((Parser *)parser)->m_unknownEncodingHandlerData)
469 #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
470 #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
471 #define ns (((Parser *)parser)->m_ns)
472 #define ns_triplets (((Parser *)parser)->m_ns_triplets)
473 #define prologState (((Parser *)parser)->m_prologState)
474 #define processor (((Parser *)parser)->m_processor)
475 #define errorCode (((Parser *)parser)->m_errorCode)
476 #define eventPtr (((Parser *)parser)->m_eventPtr)
477 #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
478 #define positionPtr (((Parser *)parser)->m_positionPtr)
479 #define position (((Parser *)parser)->m_position)
480 #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
481 #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
482 #define tagLevel (((Parser *)parser)->m_tagLevel)
483 #define buffer (((Parser *)parser)->m_buffer)
484 #define bufferPtr (((Parser *)parser)->m_bufferPtr)
485 #define bufferEnd (((Parser *)parser)->m_bufferEnd)
486 #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
487 #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
488 #define bufferLim (((Parser *)parser)->m_bufferLim)
489 #define dataBuf (((Parser *)parser)->m_dataBuf)
490 #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
491 #define dtd (((Parser *)parser)->m_dtd)
492 #define curBase (((Parser *)parser)->m_curBase)
493 #define declEntity (((Parser *)parser)->m_declEntity)
494 #define doctypeName (((Parser *)parser)->m_doctypeName)
495 #define doctypeSysid (((Parser *)parser)->m_doctypeSysid)
496 #define doctypePubid (((Parser *)parser)->m_doctypePubid)
497 #define declAttributeType (((Parser *)parser)->m_declAttributeType)
498 #define declNotationName (((Parser *)parser)->m_declNotationName)
499 #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
500 #define declElementType (((Parser *)parser)->m_declElementType)
501 #define declAttributeId (((Parser *)parser)->m_declAttributeId)
502 #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
503 #define declAttributeIsId (((Parser *)parser)->m_declAttributeIsId)
504 #define freeTagList (((Parser *)parser)->m_freeTagList)
505 #define freeBindingList (((Parser *)parser)->m_freeBindingList)
506 #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
507 #define tagStack (((Parser *)parser)->m_tagStack)
508 #define atts (((Parser *)parser)->m_atts)
509 #define attsSize (((Parser *)parser)->m_attsSize)
510 #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
511 #define idAttIndex (((Parser *)parser)->m_idAttIndex)
512 #define tempPool (((Parser *)parser)->m_tempPool)
513 #define temp2Pool (((Parser *)parser)->m_temp2Pool)
514 #define groupConnector (((Parser *)parser)->m_groupConnector)
515 #define groupSize (((Parser *)parser)->m_groupSize)
516 #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
517 #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
518 #ifdef XML_DTD
519 #define parentParser (((Parser *)parser)->m_parentParser)
520 #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
521 #endif /* XML_DTD */
523 #ifdef COMPILED_FROM_DSP
524 BOOL WINAPI DllMain(HINSTANCE h, DWORD r, LPVOID p) {
525 return TRUE;
527 #endif /* def COMPILED_FROM_DSP */
529 #ifdef _MSC_VER
530 #ifdef _DEBUG
531 Parser *asParser(XML_Parser parser)
533 return parser;
535 #endif
536 #endif
538 XML_Parser XML_ParserCreate(const XML_Char *encodingName)
540 return XML_ParserCreate_MM(encodingName, NULL, NULL);
543 XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
545 XML_Char tmp[2];
546 *tmp = nsSep;
547 return XML_ParserCreate_MM(encodingName, NULL, tmp);
550 XML_Parser
551 XML_ParserCreate_MM(const XML_Char *encodingName,
552 const XML_Memory_Handling_Suite *memsuite,
553 const XML_Char *nameSep) {
555 XML_Parser parser;
556 static
557 const XML_Char implicitContext[] = {
558 XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
559 XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
560 XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
561 XML_T('.'), XML_T('w'), XML_T('3'),
562 XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
563 XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
564 XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
565 XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
566 XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
567 XML_T('\0')
571 if (memsuite) {
572 XML_Memory_Handling_Suite *mtemp;
573 parser = memsuite->malloc_fcn(sizeof(Parser));
574 mtemp = &(((Parser *) parser)->m_mem);
575 mtemp->malloc_fcn = memsuite->malloc_fcn;
576 mtemp->realloc_fcn = memsuite->realloc_fcn;
577 mtemp->free_fcn = memsuite->free_fcn;
579 else {
580 XML_Memory_Handling_Suite *mtemp;
581 parser = malloc(sizeof(Parser));
582 mtemp = &(((Parser *) parser)->m_mem);
583 mtemp->malloc_fcn = malloc;
584 mtemp->realloc_fcn = realloc;
585 mtemp->free_fcn = free;
588 if (!parser)
589 return parser;
590 processor = prologInitProcessor;
591 XmlPrologStateInit(&prologState);
592 userData = 0;
593 handlerArg = 0;
594 startElementHandler = 0;
595 endElementHandler = 0;
596 characterDataHandler = 0;
597 processingInstructionHandler = 0;
598 commentHandler = 0;
599 startCdataSectionHandler = 0;
600 endCdataSectionHandler = 0;
601 defaultHandler = 0;
602 startDoctypeDeclHandler = 0;
603 endDoctypeDeclHandler = 0;
604 unparsedEntityDeclHandler = 0;
605 notationDeclHandler = 0;
606 startNamespaceDeclHandler = 0;
607 endNamespaceDeclHandler = 0;
608 notStandaloneHandler = 0;
609 externalEntityRefHandler = 0;
610 externalEntityRefHandlerArg = parser;
611 unknownEncodingHandler = 0;
612 elementDeclHandler = 0;
613 attlistDeclHandler = 0;
614 entityDeclHandler = 0;
615 xmlDeclHandler = 0;
616 buffer = 0;
617 bufferPtr = 0;
618 bufferEnd = 0;
619 parseEndByteIndex = 0;
620 parseEndPtr = 0;
621 bufferLim = 0;
622 declElementType = 0;
623 declAttributeId = 0;
624 declEntity = 0;
625 doctypeName = 0;
626 doctypeSysid = 0;
627 doctypePubid = 0;
628 declAttributeType = 0;
629 declNotationName = 0;
630 declNotationPublicId = 0;
631 memset(&position, 0, sizeof(POSITION));
632 errorCode = XML_ERROR_NONE;
633 eventPtr = 0;
634 eventEndPtr = 0;
635 positionPtr = 0;
636 openInternalEntities = 0;
637 tagLevel = 0;
638 tagStack = 0;
639 freeTagList = 0;
640 freeBindingList = 0;
641 inheritedBindings = 0;
642 attsSize = INIT_ATTS_SIZE;
643 atts = MALLOC(attsSize * sizeof(ATTRIBUTE));
644 nSpecifiedAtts = 0;
645 dataBuf = MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
646 groupSize = 0;
647 groupConnector = 0;
648 hadExternalDoctype = 0;
649 unknownEncodingMem = 0;
650 unknownEncodingRelease = 0;
651 unknownEncodingData = 0;
652 unknownEncodingHandlerData = 0;
653 namespaceSeparator = '!';
654 #ifdef XML_DTD
655 parentParser = 0;
656 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
657 #endif
658 ns = 0;
659 ns_triplets = 0;
660 poolInit(&tempPool, &(((Parser *) parser)->m_mem));
661 poolInit(&temp2Pool, &(((Parser *) parser)->m_mem));
662 protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
663 curBase = 0;
664 if (!dtdInit(&dtd, parser) || !atts || !dataBuf
665 || (encodingName && !protocolEncodingName)) {
666 XML_ParserFree(parser);
667 return 0;
669 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
671 if (nameSep) {
672 XmlInitEncodingNS(&initEncoding, &encoding, 0);
673 ns = 1;
674 internalEncoding = XmlGetInternalEncodingNS();
675 namespaceSeparator = *nameSep;
677 if (! setContext(parser, implicitContext)) {
678 XML_ParserFree(parser);
679 return 0;
682 else {
683 XmlInitEncoding(&initEncoding, &encoding, 0);
684 internalEncoding = XmlGetInternalEncoding();
687 return parser;
688 } /* End XML_ParserCreate_MM */
690 int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
692 if (!encodingName)
693 protocolEncodingName = 0;
694 else {
695 protocolEncodingName = poolCopyString(&tempPool, encodingName);
696 if (!protocolEncodingName)
697 return 0;
699 return 1;
702 XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
703 const XML_Char *context,
704 const XML_Char *encodingName)
706 XML_Parser parser = oldParser;
707 DTD *oldDtd = &dtd;
708 XML_StartElementHandler oldStartElementHandler = startElementHandler;
709 XML_EndElementHandler oldEndElementHandler = endElementHandler;
710 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
711 XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
712 XML_CommentHandler oldCommentHandler = commentHandler;
713 XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
714 XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
715 XML_DefaultHandler oldDefaultHandler = defaultHandler;
716 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler;
717 XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
718 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
719 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
720 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
721 XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
722 XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
723 XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
724 XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
725 XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
726 XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
727 ELEMENT_TYPE * oldDeclElementType = declElementType;
729 void *oldUserData = userData;
730 void *oldHandlerArg = handlerArg;
731 int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
732 void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
733 #ifdef XML_DTD
734 int oldParamEntityParsing = paramEntityParsing;
735 #endif
736 int oldns_triplets = ns_triplets;
738 if (ns) {
739 XML_Char tmp[2];
741 *tmp = namespaceSeparator;
742 parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem,
743 tmp);
745 else {
746 parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem,
747 NULL);
750 if (!parser)
751 return 0;
753 startElementHandler = oldStartElementHandler;
754 endElementHandler = oldEndElementHandler;
755 characterDataHandler = oldCharacterDataHandler;
756 processingInstructionHandler = oldProcessingInstructionHandler;
757 commentHandler = oldCommentHandler;
758 startCdataSectionHandler = oldStartCdataSectionHandler;
759 endCdataSectionHandler = oldEndCdataSectionHandler;
760 defaultHandler = oldDefaultHandler;
761 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
762 notationDeclHandler = oldNotationDeclHandler;
763 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
764 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
765 notStandaloneHandler = oldNotStandaloneHandler;
766 externalEntityRefHandler = oldExternalEntityRefHandler;
767 unknownEncodingHandler = oldUnknownEncodingHandler;
768 elementDeclHandler = oldElementDeclHandler;
769 attlistDeclHandler = oldAttlistDeclHandler;
770 entityDeclHandler = oldEntityDeclHandler;
771 xmlDeclHandler = oldXmlDeclHandler;
772 declElementType = oldDeclElementType;
773 userData = oldUserData;
774 if (oldUserData == oldHandlerArg)
775 handlerArg = userData;
776 else
777 handlerArg = parser;
778 if (oldExternalEntityRefHandlerArg != oldParser)
779 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
780 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
781 ns_triplets = oldns_triplets;
782 #ifdef XML_DTD
783 paramEntityParsing = oldParamEntityParsing;
784 if (context) {
785 #endif /* XML_DTD */
786 if (!dtdCopy(&dtd, oldDtd, parser) || !setContext(parser, context)) {
787 XML_ParserFree(parser);
788 return 0;
790 processor = externalEntityInitProcessor;
791 #ifdef XML_DTD
793 else {
794 dtdSwap(&dtd, oldDtd);
795 parentParser = oldParser;
796 XmlPrologStateInitExternalEntity(&prologState);
797 dtd.complete = 1;
798 hadExternalDoctype = 1;
800 #endif /* XML_DTD */
801 return parser;
804 static
805 void destroyBindings(BINDING *bindings, XML_Parser parser)
807 for (;;) {
808 BINDING *b = bindings;
809 if (!b)
810 break;
811 bindings = b->nextTagBinding;
812 FREE(b->uri);
813 FREE(b);
817 void XML_ParserFree(XML_Parser parser)
819 for (;;) {
820 TAG *p;
821 if (tagStack == 0) {
822 if (freeTagList == 0)
823 break;
824 tagStack = freeTagList;
825 freeTagList = 0;
827 p = tagStack;
828 tagStack = tagStack->parent;
829 FREE(p->buf);
830 destroyBindings(p->bindings, parser);
831 FREE(p);
833 destroyBindings(freeBindingList, parser);
834 destroyBindings(inheritedBindings, parser);
835 poolDestroy(&tempPool);
836 poolDestroy(&temp2Pool);
837 #ifdef XML_DTD
838 if (parentParser) {
839 if (hadExternalDoctype)
840 dtd.complete = 0;
841 dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
843 #endif /* XML_DTD */
844 dtdDestroy(&dtd, parser);
845 FREE((void *)atts);
846 if (groupConnector)
847 FREE(groupConnector);
848 if (buffer)
849 FREE(buffer);
850 FREE(dataBuf);
851 if (unknownEncodingMem)
852 FREE(unknownEncodingMem);
853 if (unknownEncodingRelease)
854 unknownEncodingRelease(unknownEncodingData);
855 FREE(parser);
858 void XML_UseParserAsHandlerArg(XML_Parser parser)
860 handlerArg = parser;
863 void
864 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
865 ns_triplets = do_nst;
868 void XML_SetUserData(XML_Parser parser, void *p)
870 if (handlerArg == userData)
871 handlerArg = userData = p;
872 else
873 userData = p;
876 int XML_SetBase(XML_Parser parser, const XML_Char *p)
878 if (p) {
879 p = poolCopyString(&dtd.pool, p);
880 if (!p)
881 return 0;
882 curBase = p;
884 else
885 curBase = 0;
886 return 1;
889 const XML_Char *XML_GetBase(XML_Parser parser)
891 return curBase;
894 int XML_GetSpecifiedAttributeCount(XML_Parser parser)
896 return nSpecifiedAtts;
899 int XML_GetIdAttributeIndex(XML_Parser parser)
901 return idAttIndex;
904 void XML_SetElementHandler(XML_Parser parser,
905 XML_StartElementHandler start,
906 XML_EndElementHandler end)
908 startElementHandler = start;
909 endElementHandler = end;
912 void XML_SetStartElementHandler(XML_Parser parser,
913 XML_StartElementHandler start) {
914 startElementHandler = start;
917 void XML_SetEndElementHandler(XML_Parser parser,
918 XML_EndElementHandler end) {
919 endElementHandler = end;
922 void XML_SetCharacterDataHandler(XML_Parser parser,
923 XML_CharacterDataHandler handler)
925 characterDataHandler = handler;
928 void XML_SetProcessingInstructionHandler(XML_Parser parser,
929 XML_ProcessingInstructionHandler handler)
931 processingInstructionHandler = handler;
934 void XML_SetCommentHandler(XML_Parser parser,
935 XML_CommentHandler handler)
937 commentHandler = handler;
940 void XML_SetCdataSectionHandler(XML_Parser parser,
941 XML_StartCdataSectionHandler start,
942 XML_EndCdataSectionHandler end)
944 startCdataSectionHandler = start;
945 endCdataSectionHandler = end;
948 void XML_SetStartCdataSectionHandler(XML_Parser parser,
949 XML_StartCdataSectionHandler start) {
950 startCdataSectionHandler = start;
953 void XML_SetEndCdataSectionHandler(XML_Parser parser,
954 XML_EndCdataSectionHandler end) {
955 endCdataSectionHandler = end;
958 void XML_SetDefaultHandler(XML_Parser parser,
959 XML_DefaultHandler handler)
961 defaultHandler = handler;
962 defaultExpandInternalEntities = 0;
965 void XML_SetDefaultHandlerExpand(XML_Parser parser,
966 XML_DefaultHandler handler)
968 defaultHandler = handler;
969 defaultExpandInternalEntities = 1;
972 void XML_SetDoctypeDeclHandler(XML_Parser parser,
973 XML_StartDoctypeDeclHandler start,
974 XML_EndDoctypeDeclHandler end)
976 startDoctypeDeclHandler = start;
977 endDoctypeDeclHandler = end;
980 void XML_SetStartDoctypeDeclHandler(XML_Parser parser,
981 XML_StartDoctypeDeclHandler start) {
982 startDoctypeDeclHandler = start;
985 void XML_SetEndDoctypeDeclHandler(XML_Parser parser,
986 XML_EndDoctypeDeclHandler end) {
987 endDoctypeDeclHandler = end;
990 void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
991 XML_UnparsedEntityDeclHandler handler)
993 unparsedEntityDeclHandler = handler;
996 void XML_SetNotationDeclHandler(XML_Parser parser,
997 XML_NotationDeclHandler handler)
999 notationDeclHandler = handler;
1002 void XML_SetNamespaceDeclHandler(XML_Parser parser,
1003 XML_StartNamespaceDeclHandler start,
1004 XML_EndNamespaceDeclHandler end)
1006 startNamespaceDeclHandler = start;
1007 endNamespaceDeclHandler = end;
1010 void XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1011 XML_StartNamespaceDeclHandler start) {
1012 startNamespaceDeclHandler = start;
1015 void XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1016 XML_EndNamespaceDeclHandler end) {
1017 endNamespaceDeclHandler = end;
1021 void XML_SetNotStandaloneHandler(XML_Parser parser,
1022 XML_NotStandaloneHandler handler)
1024 notStandaloneHandler = handler;
1027 void XML_SetExternalEntityRefHandler(XML_Parser parser,
1028 XML_ExternalEntityRefHandler handler)
1030 externalEntityRefHandler = handler;
1033 void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1035 if (arg)
1036 externalEntityRefHandlerArg = arg;
1037 else
1038 externalEntityRefHandlerArg = parser;
1041 void XML_SetUnknownEncodingHandler(XML_Parser parser,
1042 XML_UnknownEncodingHandler handler,
1043 void *data)
1045 unknownEncodingHandler = handler;
1046 unknownEncodingHandlerData = data;
1049 void XML_SetElementDeclHandler(XML_Parser parser,
1050 XML_ElementDeclHandler eldecl)
1052 elementDeclHandler = eldecl;
1055 void XML_SetAttlistDeclHandler(XML_Parser parser,
1056 XML_AttlistDeclHandler attdecl)
1058 attlistDeclHandler = attdecl;
1061 void XML_SetEntityDeclHandler(XML_Parser parser,
1062 XML_EntityDeclHandler handler)
1064 entityDeclHandler = handler;
1067 void XML_SetXmlDeclHandler(XML_Parser parser,
1068 XML_XmlDeclHandler handler) {
1069 xmlDeclHandler = handler;
1072 int XML_SetParamEntityParsing(XML_Parser parser,
1073 enum XML_ParamEntityParsing parsing)
1075 #ifdef XML_DTD
1076 paramEntityParsing = parsing;
1077 return 1;
1078 #else
1079 return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
1080 #endif
1083 int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1085 if (len == 0) {
1086 if (!isFinal)
1087 return 1;
1088 positionPtr = bufferPtr;
1089 errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
1090 if (errorCode == XML_ERROR_NONE)
1091 return 1;
1092 eventEndPtr = eventPtr;
1093 processor = errorProcessor;
1094 return 0;
1096 #ifndef XML_CONTEXT_BYTES
1097 else if (bufferPtr == bufferEnd) {
1098 const char *end;
1099 int nLeftOver;
1100 parseEndByteIndex += len;
1101 positionPtr = s;
1102 if (isFinal) {
1103 errorCode = processor(parser, s, parseEndPtr = s + len, 0);
1104 if (errorCode == XML_ERROR_NONE)
1105 return 1;
1106 eventEndPtr = eventPtr;
1107 processor = errorProcessor;
1108 return 0;
1110 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
1111 if (errorCode != XML_ERROR_NONE) {
1112 eventEndPtr = eventPtr;
1113 processor = errorProcessor;
1114 return 0;
1116 XmlUpdatePosition(encoding, positionPtr, end, &position);
1117 nLeftOver = s + len - end;
1118 if (nLeftOver) {
1119 if (buffer == 0 || nLeftOver > bufferLim - buffer) {
1120 /* FIXME avoid integer overflow */
1121 buffer = buffer == 0 ? MALLOC(len * 2) : REALLOC(buffer, len * 2);
1122 /* FIXME storage leak if realloc fails */
1123 if (!buffer) {
1124 errorCode = XML_ERROR_NO_MEMORY;
1125 eventPtr = eventEndPtr = 0;
1126 processor = errorProcessor;
1127 return 0;
1129 bufferLim = buffer + len * 2;
1131 memcpy(buffer, end, nLeftOver);
1132 bufferPtr = buffer;
1133 bufferEnd = buffer + nLeftOver;
1135 return 1;
1137 #endif /* not defined XML_CONTEXT_BYTES */
1138 else {
1139 memcpy(XML_GetBuffer(parser, len), s, len);
1140 return XML_ParseBuffer(parser, len, isFinal);
1144 int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1146 const char *start = bufferPtr;
1147 positionPtr = start;
1148 bufferEnd += len;
1149 parseEndByteIndex += len;
1150 errorCode = processor(parser, start, parseEndPtr = bufferEnd,
1151 isFinal ? (const char **)0 : &bufferPtr);
1152 if (errorCode == XML_ERROR_NONE) {
1153 if (!isFinal)
1154 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1155 return 1;
1157 else {
1158 eventEndPtr = eventPtr;
1159 processor = errorProcessor;
1160 return 0;
1164 void *XML_GetBuffer(XML_Parser parser, int len)
1166 if (len > bufferLim - bufferEnd) {
1167 /* FIXME avoid integer overflow */
1168 int neededSize = len + (bufferEnd - bufferPtr);
1169 #ifdef XML_CONTEXT_BYTES
1170 int keep = bufferPtr - buffer;
1172 if (keep > XML_CONTEXT_BYTES)
1173 keep = XML_CONTEXT_BYTES;
1174 neededSize += keep;
1175 #endif /* defined XML_CONTEXT_BYTES */
1176 if (neededSize <= bufferLim - buffer) {
1177 #ifdef XML_CONTEXT_BYTES
1178 if (keep < bufferPtr - buffer) {
1179 int offset = (bufferPtr - buffer) - keep;
1180 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1181 bufferEnd -= offset;
1182 bufferPtr -= offset;
1184 #else
1185 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1186 bufferEnd = buffer + (bufferEnd - bufferPtr);
1187 bufferPtr = buffer;
1188 #endif /* not defined XML_CONTEXT_BYTES */
1190 else {
1191 char *newBuf;
1192 int bufferSize = bufferLim - bufferPtr;
1193 if (bufferSize == 0)
1194 bufferSize = INIT_BUFFER_SIZE;
1195 do {
1196 bufferSize *= 2;
1197 } while (bufferSize < neededSize);
1198 newBuf = MALLOC(bufferSize);
1199 if (newBuf == 0) {
1200 errorCode = XML_ERROR_NO_MEMORY;
1201 return 0;
1203 bufferLim = newBuf + bufferSize;
1204 #ifdef XML_CONTEXT_BYTES
1205 if (bufferPtr) {
1206 int keep = bufferPtr - buffer;
1207 if (keep > XML_CONTEXT_BYTES)
1208 keep = XML_CONTEXT_BYTES;
1209 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1210 FREE(buffer);
1211 buffer = newBuf;
1212 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
1213 bufferPtr = buffer + keep;
1215 else {
1216 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1217 bufferPtr = buffer = newBuf;
1219 #else
1220 if (bufferPtr) {
1221 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1222 FREE(buffer);
1224 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1225 bufferPtr = buffer = newBuf;
1226 #endif /* not defined XML_CONTEXT_BYTES */
1229 return bufferEnd;
1232 enum XML_Error XML_GetErrorCode(XML_Parser parser)
1234 return errorCode;
1237 long XML_GetCurrentByteIndex(XML_Parser parser)
1239 if (eventPtr)
1240 return parseEndByteIndex - (parseEndPtr - eventPtr);
1241 return -1;
1244 int XML_GetCurrentByteCount(XML_Parser parser)
1246 if (eventEndPtr && eventPtr)
1247 return eventEndPtr - eventPtr;
1248 return 0;
1251 const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size)
1253 #ifdef XML_CONTEXT_BYTES
1254 if (eventPtr && buffer) {
1255 *offset = eventPtr - buffer;
1256 *size = bufferEnd - buffer;
1257 return buffer;
1259 #endif /* defined XML_CONTEXT_BYTES */
1260 return (char *) 0;
1263 int XML_GetCurrentLineNumber(XML_Parser parser)
1265 if (eventPtr) {
1266 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1267 positionPtr = eventPtr;
1269 return position.lineNumber + 1;
1272 int XML_GetCurrentColumnNumber(XML_Parser parser)
1274 if (eventPtr) {
1275 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1276 positionPtr = eventPtr;
1278 return position.columnNumber;
1281 void XML_DefaultCurrent(XML_Parser parser)
1283 if (defaultHandler) {
1284 if (openInternalEntities)
1285 reportDefault(parser,
1286 internalEncoding,
1287 openInternalEntities->internalEventPtr,
1288 openInternalEntities->internalEventEndPtr);
1289 else
1290 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1294 const XML_LChar *XML_ErrorString(int code)
1296 static const XML_LChar *message[] = {
1298 XML_T("out of memory"),
1299 XML_T("syntax error"),
1300 XML_T("no element found"),
1301 XML_T("not well-formed (invalid token)"),
1302 XML_T("unclosed token"),
1303 XML_T("unclosed token"),
1304 XML_T("mismatched tag"),
1305 XML_T("duplicate attribute"),
1306 XML_T("junk after document element"),
1307 XML_T("illegal parameter entity reference"),
1308 XML_T("undefined entity"),
1309 XML_T("recursive entity reference"),
1310 XML_T("asynchronous entity"),
1311 XML_T("reference to invalid character number"),
1312 XML_T("reference to binary entity"),
1313 XML_T("reference to external entity in attribute"),
1314 XML_T("xml processing instruction not at start of external entity"),
1315 XML_T("unknown encoding"),
1316 XML_T("encoding specified in XML declaration is incorrect"),
1317 XML_T("unclosed CDATA section"),
1318 XML_T("error in processing external entity reference"),
1319 XML_T("document is not standalone"),
1320 XML_T("unexpected parser state - please send a bug report")
1322 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1323 return message[code];
1324 return 0;
1327 const XML_LChar *
1328 XML_ExpatVersion(void) {
1329 return VERSION;
1332 XML_Expat_Version
1333 XML_ExpatVersionInfo(void) {
1334 XML_Expat_Version version;
1336 version.major = XML_MAJOR_VERSION;
1337 version.minor = XML_MINOR_VERSION;
1338 version.micro = XML_MICRO_VERSION;
1340 return version;
1343 static
1344 enum XML_Error contentProcessor(XML_Parser parser,
1345 const char *start,
1346 const char *end,
1347 const char **endPtr)
1349 return doContent(parser, 0, encoding, start, end, endPtr);
1352 static
1353 enum XML_Error externalEntityInitProcessor(XML_Parser parser,
1354 const char *start,
1355 const char *end,
1356 const char **endPtr)
1358 enum XML_Error result = initializeEncoding(parser);
1359 if (result != XML_ERROR_NONE)
1360 return result;
1361 processor = externalEntityInitProcessor2;
1362 return externalEntityInitProcessor2(parser, start, end, endPtr);
1365 static
1366 enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
1367 const char *start,
1368 const char *end,
1369 const char **endPtr)
1371 const char *next;
1372 int tok = XmlContentTok(encoding, start, end, &next);
1373 switch (tok) {
1374 case XML_TOK_BOM:
1375 start = next;
1376 break;
1377 case XML_TOK_PARTIAL:
1378 if (endPtr) {
1379 *endPtr = start;
1380 return XML_ERROR_NONE;
1382 eventPtr = start;
1383 return XML_ERROR_UNCLOSED_TOKEN;
1384 case XML_TOK_PARTIAL_CHAR:
1385 if (endPtr) {
1386 *endPtr = start;
1387 return XML_ERROR_NONE;
1389 eventPtr = start;
1390 return XML_ERROR_PARTIAL_CHAR;
1392 processor = externalEntityInitProcessor3;
1393 return externalEntityInitProcessor3(parser, start, end, endPtr);
1396 static
1397 enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1398 const char *start,
1399 const char *end,
1400 const char **endPtr)
1402 const char *next;
1403 int tok = XmlContentTok(encoding, start, end, &next);
1404 switch (tok) {
1405 case XML_TOK_XML_DECL:
1407 enum XML_Error result = processXmlDecl(parser, 1, start, next);
1408 if (result != XML_ERROR_NONE)
1409 return result;
1410 start = next;
1412 break;
1413 case XML_TOK_PARTIAL:
1414 if (endPtr) {
1415 *endPtr = start;
1416 return XML_ERROR_NONE;
1418 eventPtr = start;
1419 return XML_ERROR_UNCLOSED_TOKEN;
1420 case XML_TOK_PARTIAL_CHAR:
1421 if (endPtr) {
1422 *endPtr = start;
1423 return XML_ERROR_NONE;
1425 eventPtr = start;
1426 return XML_ERROR_PARTIAL_CHAR;
1428 processor = externalEntityContentProcessor;
1429 tagLevel = 1;
1430 return doContent(parser, 1, encoding, start, end, endPtr);
1433 static
1434 enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1435 const char *start,
1436 const char *end,
1437 const char **endPtr)
1439 return doContent(parser, 1, encoding, start, end, endPtr);
1442 static enum XML_Error
1443 doContent(XML_Parser parser,
1444 int startTagLevel,
1445 const ENCODING *enc,
1446 const char *s,
1447 const char *end,
1448 const char **nextPtr)
1450 const char **eventPP;
1451 const char **eventEndPP;
1452 if (enc == encoding) {
1453 eventPP = &eventPtr;
1454 eventEndPP = &eventEndPtr;
1456 else {
1457 eventPP = &(openInternalEntities->internalEventPtr);
1458 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1460 *eventPP = s;
1461 for (;;) {
1462 const char *next = s; /* XmlContentTok doesn't always set the last arg */
1463 int tok = XmlContentTok(enc, s, end, &next);
1464 *eventEndPP = next;
1465 switch (tok) {
1466 case XML_TOK_TRAILING_CR:
1467 if (nextPtr) {
1468 *nextPtr = s;
1469 return XML_ERROR_NONE;
1471 *eventEndPP = end;
1472 if (characterDataHandler) {
1473 XML_Char c = 0xA;
1474 characterDataHandler(handlerArg, &c, 1);
1476 else if (defaultHandler)
1477 reportDefault(parser, enc, s, end);
1478 if (startTagLevel == 0)
1479 return XML_ERROR_NO_ELEMENTS;
1480 if (tagLevel != startTagLevel)
1481 return XML_ERROR_ASYNC_ENTITY;
1482 return XML_ERROR_NONE;
1483 case XML_TOK_NONE:
1484 if (nextPtr) {
1485 *nextPtr = s;
1486 return XML_ERROR_NONE;
1488 if (startTagLevel > 0) {
1489 if (tagLevel != startTagLevel)
1490 return XML_ERROR_ASYNC_ENTITY;
1491 return XML_ERROR_NONE;
1493 return XML_ERROR_NO_ELEMENTS;
1494 case XML_TOK_INVALID:
1495 *eventPP = next;
1496 return XML_ERROR_INVALID_TOKEN;
1497 case XML_TOK_PARTIAL:
1498 if (nextPtr) {
1499 *nextPtr = s;
1500 return XML_ERROR_NONE;
1502 return XML_ERROR_UNCLOSED_TOKEN;
1503 case XML_TOK_PARTIAL_CHAR:
1504 if (nextPtr) {
1505 *nextPtr = s;
1506 return XML_ERROR_NONE;
1508 return XML_ERROR_PARTIAL_CHAR;
1509 case XML_TOK_ENTITY_REF:
1511 const XML_Char *name;
1512 ENTITY *entity;
1513 XML_Char ch = XmlPredefinedEntityName(enc,
1514 s + enc->minBytesPerChar,
1515 next - enc->minBytesPerChar);
1516 if (ch) {
1517 if (characterDataHandler)
1518 characterDataHandler(handlerArg, &ch, 1);
1519 else if (defaultHandler)
1520 reportDefault(parser, enc, s, next);
1521 break;
1523 name = poolStoreString(&dtd.pool, enc,
1524 s + enc->minBytesPerChar,
1525 next - enc->minBytesPerChar);
1526 if (!name)
1527 return XML_ERROR_NO_MEMORY;
1528 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1529 poolDiscard(&dtd.pool);
1530 if (!entity) {
1531 if (dtd.complete || dtd.standalone)
1532 return XML_ERROR_UNDEFINED_ENTITY;
1533 if (defaultHandler)
1534 reportDefault(parser, enc, s, next);
1535 break;
1537 if (entity->open)
1538 return XML_ERROR_RECURSIVE_ENTITY_REF;
1539 if (entity->notation)
1540 return XML_ERROR_BINARY_ENTITY_REF;
1541 if (entity) {
1542 if (entity->textPtr) {
1543 enum XML_Error result;
1544 OPEN_INTERNAL_ENTITY openEntity;
1545 if (defaultHandler && !defaultExpandInternalEntities) {
1546 reportDefault(parser, enc, s, next);
1547 break;
1549 entity->open = 1;
1550 openEntity.next = openInternalEntities;
1551 openInternalEntities = &openEntity;
1552 openEntity.entity = entity;
1553 openEntity.internalEventPtr = 0;
1554 openEntity.internalEventEndPtr = 0;
1555 result = doContent(parser,
1556 tagLevel,
1557 internalEncoding,
1558 (char *)entity->textPtr,
1559 (char *)(entity->textPtr + entity->textLen),
1561 entity->open = 0;
1562 openInternalEntities = openEntity.next;
1563 if (result)
1564 return result;
1566 else if (externalEntityRefHandler) {
1567 const XML_Char *context;
1568 entity->open = 1;
1569 context = getContext(parser);
1570 entity->open = 0;
1571 if (!context)
1572 return XML_ERROR_NO_MEMORY;
1573 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1574 context,
1575 entity->base,
1576 entity->systemId,
1577 entity->publicId))
1578 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1579 poolDiscard(&tempPool);
1581 else if (defaultHandler)
1582 reportDefault(parser, enc, s, next);
1584 break;
1586 case XML_TOK_START_TAG_WITH_ATTS:
1587 if (!startElementHandler) {
1588 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1589 if (result)
1590 return result;
1592 /* fall through */
1593 case XML_TOK_START_TAG_NO_ATTS:
1595 TAG *tag;
1596 if (freeTagList) {
1597 tag = freeTagList;
1598 freeTagList = freeTagList->parent;
1600 else {
1601 tag = MALLOC(sizeof(TAG));
1602 if (!tag)
1603 return XML_ERROR_NO_MEMORY;
1604 tag->buf = MALLOC(INIT_TAG_BUF_SIZE);
1605 if (!tag->buf)
1606 return XML_ERROR_NO_MEMORY;
1607 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1609 tag->bindings = 0;
1610 tag->parent = tagStack;
1611 tagStack = tag;
1612 tag->name.localPart = 0;
1613 tag->rawName = s + enc->minBytesPerChar;
1614 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1615 if (nextPtr) {
1616 /* Need to guarantee that:
1617 tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1618 if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1619 int bufSize = tag->rawNameLength * 4;
1620 bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1621 tag->buf = REALLOC(tag->buf, bufSize);
1622 if (!tag->buf)
1623 return XML_ERROR_NO_MEMORY;
1624 tag->bufEnd = tag->buf + bufSize;
1626 memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1627 tag->rawName = tag->buf;
1629 ++tagLevel;
1630 if (startElementHandler) {
1631 enum XML_Error result;
1632 XML_Char *toPtr;
1633 for (;;) {
1634 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1635 const char *fromPtr = tag->rawName;
1636 int bufSize;
1637 if (nextPtr)
1638 toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1639 else
1640 toPtr = (XML_Char *)tag->buf;
1641 tag->name.str = toPtr;
1642 XmlConvert(enc,
1643 &fromPtr, rawNameEnd,
1644 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1645 if (fromPtr == rawNameEnd)
1646 break;
1647 bufSize = (tag->bufEnd - tag->buf) << 1;
1648 tag->buf = REALLOC(tag->buf, bufSize);
1649 if (!tag->buf)
1650 return XML_ERROR_NO_MEMORY;
1651 tag->bufEnd = tag->buf + bufSize;
1652 if (nextPtr)
1653 tag->rawName = tag->buf;
1655 *toPtr = XML_T('\0');
1656 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1657 if (result)
1658 return result;
1659 startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1660 poolClear(&tempPool);
1662 else {
1663 tag->name.str = 0;
1664 if (defaultHandler)
1665 reportDefault(parser, enc, s, next);
1667 break;
1669 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1670 if (!startElementHandler) {
1671 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1672 if (result)
1673 return result;
1675 /* fall through */
1676 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1677 if (startElementHandler || endElementHandler) {
1678 const char *rawName = s + enc->minBytesPerChar;
1679 enum XML_Error result;
1680 BINDING *bindings = 0;
1681 TAG_NAME name;
1682 name.str = poolStoreString(&tempPool, enc, rawName,
1683 rawName + XmlNameLength(enc, rawName));
1684 if (!name.str)
1685 return XML_ERROR_NO_MEMORY;
1686 poolFinish(&tempPool);
1687 result = storeAtts(parser, enc, s, &name, &bindings);
1688 if (result)
1689 return result;
1690 poolFinish(&tempPool);
1691 if (startElementHandler)
1692 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1693 if (endElementHandler) {
1694 if (startElementHandler)
1695 *eventPP = *eventEndPP;
1696 endElementHandler(handlerArg, name.str);
1698 poolClear(&tempPool);
1699 while (bindings) {
1700 BINDING *b = bindings;
1701 if (endNamespaceDeclHandler)
1702 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1703 bindings = bindings->nextTagBinding;
1704 b->nextTagBinding = freeBindingList;
1705 freeBindingList = b;
1706 b->prefix->binding = b->prevPrefixBinding;
1709 else if (defaultHandler)
1710 reportDefault(parser, enc, s, next);
1711 if (tagLevel == 0)
1712 return epilogProcessor(parser, next, end, nextPtr);
1713 break;
1714 case XML_TOK_END_TAG:
1715 if (tagLevel == startTagLevel)
1716 return XML_ERROR_ASYNC_ENTITY;
1717 else {
1718 int len;
1719 const char *rawName;
1720 TAG *tag = tagStack;
1721 tagStack = tag->parent;
1722 tag->parent = freeTagList;
1723 freeTagList = tag;
1724 rawName = s + enc->minBytesPerChar*2;
1725 len = XmlNameLength(enc, rawName);
1726 if (len != tag->rawNameLength
1727 || memcmp(tag->rawName, rawName, len) != 0) {
1728 *eventPP = rawName;
1729 return XML_ERROR_TAG_MISMATCH;
1731 --tagLevel;
1732 if (endElementHandler && tag->name.str) {
1733 if (tag->name.localPart) {
1734 XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1735 const XML_Char *from = tag->name.localPart;
1736 while ((*to++ = *from++) != 0)
1739 endElementHandler(handlerArg, tag->name.str);
1741 else if (defaultHandler)
1742 reportDefault(parser, enc, s, next);
1743 while (tag->bindings) {
1744 BINDING *b = tag->bindings;
1745 if (endNamespaceDeclHandler)
1746 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1747 tag->bindings = tag->bindings->nextTagBinding;
1748 b->nextTagBinding = freeBindingList;
1749 freeBindingList = b;
1750 b->prefix->binding = b->prevPrefixBinding;
1752 if (tagLevel == 0)
1753 return epilogProcessor(parser, next, end, nextPtr);
1755 break;
1756 case XML_TOK_CHAR_REF:
1758 int n = XmlCharRefNumber(enc, s);
1759 if (n < 0)
1760 return XML_ERROR_BAD_CHAR_REF;
1761 if (characterDataHandler) {
1762 XML_Char buf[XML_ENCODE_MAX];
1763 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1765 else if (defaultHandler)
1766 reportDefault(parser, enc, s, next);
1768 break;
1769 case XML_TOK_XML_DECL:
1770 return XML_ERROR_MISPLACED_XML_PI;
1771 case XML_TOK_DATA_NEWLINE:
1772 if (characterDataHandler) {
1773 XML_Char c = 0xA;
1774 characterDataHandler(handlerArg, &c, 1);
1776 else if (defaultHandler)
1777 reportDefault(parser, enc, s, next);
1778 break;
1779 case XML_TOK_CDATA_SECT_OPEN:
1781 enum XML_Error result;
1782 if (startCdataSectionHandler)
1783 startCdataSectionHandler(handlerArg);
1784 #if 0
1785 /* Suppose you doing a transformation on a document that involves
1786 changing only the character data. You set up a defaultHandler
1787 and a characterDataHandler. The defaultHandler simply copies
1788 characters through. The characterDataHandler does the transformation
1789 and writes the characters out escaping them as necessary. This case
1790 will fail to work if we leave out the following two lines (because &
1791 and < inside CDATA sections will be incorrectly escaped).
1793 However, now we have a start/endCdataSectionHandler, so it seems
1794 easier to let the user deal with this. */
1796 else if (characterDataHandler)
1797 characterDataHandler(handlerArg, dataBuf, 0);
1798 #endif
1799 else if (defaultHandler)
1800 reportDefault(parser, enc, s, next);
1801 result = doCdataSection(parser, enc, &next, end, nextPtr);
1802 if (!next) {
1803 processor = cdataSectionProcessor;
1804 return result;
1807 break;
1808 case XML_TOK_TRAILING_RSQB:
1809 if (nextPtr) {
1810 *nextPtr = s;
1811 return XML_ERROR_NONE;
1813 if (characterDataHandler) {
1814 if (MUST_CONVERT(enc, s)) {
1815 ICHAR *dataPtr = (ICHAR *)dataBuf;
1816 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1817 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1819 else
1820 characterDataHandler(handlerArg,
1821 (XML_Char *)s,
1822 (XML_Char *)end - (XML_Char *)s);
1824 else if (defaultHandler)
1825 reportDefault(parser, enc, s, end);
1826 if (startTagLevel == 0) {
1827 *eventPP = end;
1828 return XML_ERROR_NO_ELEMENTS;
1830 if (tagLevel != startTagLevel) {
1831 *eventPP = end;
1832 return XML_ERROR_ASYNC_ENTITY;
1834 return XML_ERROR_NONE;
1835 case XML_TOK_DATA_CHARS:
1836 if (characterDataHandler) {
1837 if (MUST_CONVERT(enc, s)) {
1838 for (;;) {
1839 ICHAR *dataPtr = (ICHAR *)dataBuf;
1840 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1841 *eventEndPP = s;
1842 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1843 if (s == next)
1844 break;
1845 *eventPP = s;
1848 else
1849 characterDataHandler(handlerArg,
1850 (XML_Char *)s,
1851 (XML_Char *)next - (XML_Char *)s);
1853 else if (defaultHandler)
1854 reportDefault(parser, enc, s, next);
1855 break;
1856 case XML_TOK_PI:
1857 if (!reportProcessingInstruction(parser, enc, s, next))
1858 return XML_ERROR_NO_MEMORY;
1859 break;
1860 case XML_TOK_COMMENT:
1861 if (!reportComment(parser, enc, s, next))
1862 return XML_ERROR_NO_MEMORY;
1863 break;
1864 default:
1865 if (defaultHandler)
1866 reportDefault(parser, enc, s, next);
1867 break;
1869 *eventPP = s = next;
1871 /* not reached */
1874 /* If tagNamePtr is non-null, build a real list of attributes,
1875 otherwise just check the attributes for well-formedness. */
1877 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1878 const char *attStr, TAG_NAME *tagNamePtr,
1879 BINDING **bindingsPtr)
1881 ELEMENT_TYPE *elementType = 0;
1882 int nDefaultAtts = 0;
1883 const XML_Char **appAtts; /* the attribute list to pass to the application */
1884 int attIndex = 0;
1885 int i;
1886 int n;
1887 int nPrefixes = 0;
1888 BINDING *binding;
1889 const XML_Char *localPart;
1891 /* lookup the element type name */
1892 if (tagNamePtr) {
1893 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str,0);
1894 if (!elementType) {
1895 tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1896 if (!tagNamePtr->str)
1897 return XML_ERROR_NO_MEMORY;
1898 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1899 if (!elementType)
1900 return XML_ERROR_NO_MEMORY;
1901 if (ns && !setElementTypePrefix(parser, elementType))
1902 return XML_ERROR_NO_MEMORY;
1904 nDefaultAtts = elementType->nDefaultAtts;
1906 /* get the attributes from the tokenizer */
1907 n = XmlGetAttributes(enc, attStr, attsSize, atts);
1908 if (n + nDefaultAtts > attsSize) {
1909 int oldAttsSize = attsSize;
1910 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1911 atts = REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
1912 if (!atts)
1913 return XML_ERROR_NO_MEMORY;
1914 if (n > oldAttsSize)
1915 XmlGetAttributes(enc, attStr, n, atts);
1917 appAtts = (const XML_Char **)atts;
1918 for (i = 0; i < n; i++) {
1919 /* add the name and value to the attribute list */
1920 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1921 atts[i].name
1922 + XmlNameLength(enc, atts[i].name));
1923 if (!attId)
1924 return XML_ERROR_NO_MEMORY;
1925 /* detect duplicate attributes */
1926 if ((attId->name)[-1]) {
1927 if (enc == encoding)
1928 eventPtr = atts[i].name;
1929 return XML_ERROR_DUPLICATE_ATTRIBUTE;
1931 (attId->name)[-1] = 1;
1932 appAtts[attIndex++] = attId->name;
1933 if (!atts[i].normalized) {
1934 enum XML_Error result;
1935 int isCdata = 1;
1937 /* figure out whether declared as other than CDATA */
1938 if (attId->maybeTokenized) {
1939 int j;
1940 for (j = 0; j < nDefaultAtts; j++) {
1941 if (attId == elementType->defaultAtts[j].id) {
1942 isCdata = elementType->defaultAtts[j].isCdata;
1943 break;
1948 /* normalize the attribute value */
1949 result = storeAttributeValue(parser, enc, isCdata,
1950 atts[i].valuePtr, atts[i].valueEnd,
1951 &tempPool);
1952 if (result)
1953 return result;
1954 if (tagNamePtr) {
1955 appAtts[attIndex] = poolStart(&tempPool);
1956 poolFinish(&tempPool);
1958 else
1959 poolDiscard(&tempPool);
1961 else if (tagNamePtr) {
1962 /* the value did not need normalizing */
1963 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1964 if (appAtts[attIndex] == 0)
1965 return XML_ERROR_NO_MEMORY;
1966 poolFinish(&tempPool);
1968 /* handle prefixed attribute names */
1969 if (attId->prefix && tagNamePtr) {
1970 if (attId->xmlns) {
1971 /* deal with namespace declarations here */
1972 if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1973 return XML_ERROR_NO_MEMORY;
1974 --attIndex;
1976 else {
1977 /* deal with other prefixed names later */
1978 attIndex++;
1979 nPrefixes++;
1980 (attId->name)[-1] = 2;
1983 else
1984 attIndex++;
1986 if (tagNamePtr) {
1987 int j;
1988 nSpecifiedAtts = attIndex;
1989 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
1990 for (i = 0; i < attIndex; i += 2)
1991 if (appAtts[i] == elementType->idAtt->name) {
1992 idAttIndex = i;
1993 break;
1996 else
1997 idAttIndex = -1;
1998 /* do attribute defaulting */
1999 for (j = 0; j < nDefaultAtts; j++) {
2000 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
2001 if (!(da->id->name)[-1] && da->value) {
2002 if (da->id->prefix) {
2003 if (da->id->xmlns) {
2004 if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
2005 return XML_ERROR_NO_MEMORY;
2007 else {
2008 (da->id->name)[-1] = 2;
2009 nPrefixes++;
2010 appAtts[attIndex++] = da->id->name;
2011 appAtts[attIndex++] = da->value;
2014 else {
2015 (da->id->name)[-1] = 1;
2016 appAtts[attIndex++] = da->id->name;
2017 appAtts[attIndex++] = da->value;
2021 appAtts[attIndex] = 0;
2023 i = 0;
2024 if (nPrefixes) {
2025 /* expand prefixed attribute names */
2026 for (; i < attIndex; i += 2) {
2027 if (appAtts[i][-1] == 2) {
2028 ATTRIBUTE_ID *id;
2029 ((XML_Char *)(appAtts[i]))[-1] = 0;
2030 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
2031 if (id->prefix->binding) {
2032 int j;
2033 const BINDING *b = id->prefix->binding;
2034 const XML_Char *s = appAtts[i];
2035 for (j = 0; j < b->uriLen; j++) {
2036 if (!poolAppendChar(&tempPool, b->uri[j]))
2037 return XML_ERROR_NO_MEMORY;
2039 while (*s++ != ':')
2041 do {
2042 if (!poolAppendChar(&tempPool, *s))
2043 return XML_ERROR_NO_MEMORY;
2044 } while (*s++);
2045 if (ns_triplets) {
2046 tempPool.ptr[-1] = namespaceSeparator;
2047 s = b->prefix->name;
2048 do {
2049 if (!poolAppendChar(&tempPool, *s))
2050 return XML_ERROR_NO_MEMORY;
2051 } while (*s++);
2054 appAtts[i] = poolStart(&tempPool);
2055 poolFinish(&tempPool);
2057 if (!--nPrefixes)
2058 break;
2060 else
2061 ((XML_Char *)(appAtts[i]))[-1] = 0;
2064 /* clear the flags that say whether attributes were specified */
2065 for (; i < attIndex; i += 2)
2066 ((XML_Char *)(appAtts[i]))[-1] = 0;
2067 if (!tagNamePtr)
2068 return XML_ERROR_NONE;
2069 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
2070 binding->attId->name[-1] = 0;
2071 /* expand the element type name */
2072 if (elementType->prefix) {
2073 binding = elementType->prefix->binding;
2074 if (!binding)
2075 return XML_ERROR_NONE;
2076 localPart = tagNamePtr->str;
2077 while (*localPart++ != XML_T(':'))
2080 else if (dtd.defaultPrefix.binding) {
2081 binding = dtd.defaultPrefix.binding;
2082 localPart = tagNamePtr->str;
2084 else
2085 return XML_ERROR_NONE;
2086 tagNamePtr->localPart = localPart;
2087 tagNamePtr->uriLen = binding->uriLen;
2088 for (i = 0; localPart[i++];)
2090 n = i + binding->uriLen;
2091 if (n > binding->uriAlloc) {
2092 TAG *p;
2093 XML_Char *uri = MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
2094 if (!uri)
2095 return XML_ERROR_NO_MEMORY;
2096 binding->uriAlloc = n + EXPAND_SPARE;
2097 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
2098 for (p = tagStack; p; p = p->parent)
2099 if (p->name.str == binding->uri)
2100 p->name.str = uri;
2101 FREE(binding->uri);
2102 binding->uri = uri;
2104 memcpy(binding->uri + binding->uriLen, localPart, i * sizeof(XML_Char));
2105 tagNamePtr->str = binding->uri;
2106 return XML_ERROR_NONE;
2109 static
2110 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
2112 BINDING *b;
2113 int len;
2114 for (len = 0; uri[len]; len++)
2116 if (namespaceSeparator)
2117 len++;
2118 if (freeBindingList) {
2119 b = freeBindingList;
2120 if (len > b->uriAlloc) {
2121 b->uri = REALLOC(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
2122 if (!b->uri)
2123 return 0;
2124 b->uriAlloc = len + EXPAND_SPARE;
2126 freeBindingList = b->nextTagBinding;
2128 else {
2129 b = MALLOC(sizeof(BINDING));
2130 if (!b)
2131 return 0;
2132 b->uri = MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
2133 if (!b->uri) {
2134 FREE(b);
2135 return 0;
2137 b->uriAlloc = len + EXPAND_SPARE;
2139 b->uriLen = len;
2140 memcpy(b->uri, uri, len * sizeof(XML_Char));
2141 if (namespaceSeparator)
2142 b->uri[len - 1] = namespaceSeparator;
2143 b->prefix = prefix;
2144 b->attId = attId;
2145 b->prevPrefixBinding = prefix->binding;
2146 if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
2147 prefix->binding = 0;
2148 else
2149 prefix->binding = b;
2150 b->nextTagBinding = *bindingsPtr;
2151 *bindingsPtr = b;
2152 if (startNamespaceDeclHandler)
2153 startNamespaceDeclHandler(handlerArg, prefix->name,
2154 prefix->binding ? uri : 0);
2155 return 1;
2158 /* The idea here is to avoid using stack for each CDATA section when
2159 the whole file is parsed with one call. */
2161 static
2162 enum XML_Error cdataSectionProcessor(XML_Parser parser,
2163 const char *start,
2164 const char *end,
2165 const char **endPtr)
2167 enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
2168 if (start) {
2169 processor = contentProcessor;
2170 return contentProcessor(parser, start, end, endPtr);
2172 return result;
2175 /* startPtr gets set to non-null is the section is closed, and to null if
2176 the section is not yet closed. */
2178 static
2179 enum XML_Error doCdataSection(XML_Parser parser,
2180 const ENCODING *enc,
2181 const char **startPtr,
2182 const char *end,
2183 const char **nextPtr)
2185 const char *s = *startPtr;
2186 const char **eventPP;
2187 const char **eventEndPP;
2188 if (enc == encoding) {
2189 eventPP = &eventPtr;
2190 *eventPP = s;
2191 eventEndPP = &eventEndPtr;
2193 else {
2194 eventPP = &(openInternalEntities->internalEventPtr);
2195 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2197 *eventPP = s;
2198 *startPtr = 0;
2199 for (;;) {
2200 const char *next;
2201 int tok = XmlCdataSectionTok(enc, s, end, &next);
2202 *eventEndPP = next;
2203 switch (tok) {
2204 case XML_TOK_CDATA_SECT_CLOSE:
2205 if (endCdataSectionHandler)
2206 endCdataSectionHandler(handlerArg);
2207 #if 0
2208 /* see comment under XML_TOK_CDATA_SECT_OPEN */
2209 else if (characterDataHandler)
2210 characterDataHandler(handlerArg, dataBuf, 0);
2211 #endif
2212 else if (defaultHandler)
2213 reportDefault(parser, enc, s, next);
2214 *startPtr = next;
2215 return XML_ERROR_NONE;
2216 case XML_TOK_DATA_NEWLINE:
2217 if (characterDataHandler) {
2218 XML_Char c = 0xA;
2219 characterDataHandler(handlerArg, &c, 1);
2221 else if (defaultHandler)
2222 reportDefault(parser, enc, s, next);
2223 break;
2224 case XML_TOK_DATA_CHARS:
2225 if (characterDataHandler) {
2226 if (MUST_CONVERT(enc, s)) {
2227 for (;;) {
2228 ICHAR *dataPtr = (ICHAR *)dataBuf;
2229 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
2230 *eventEndPP = next;
2231 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2232 if (s == next)
2233 break;
2234 *eventPP = s;
2237 else
2238 characterDataHandler(handlerArg,
2239 (XML_Char *)s,
2240 (XML_Char *)next - (XML_Char *)s);
2242 else if (defaultHandler)
2243 reportDefault(parser, enc, s, next);
2244 break;
2245 case XML_TOK_INVALID:
2246 *eventPP = next;
2247 return XML_ERROR_INVALID_TOKEN;
2248 case XML_TOK_PARTIAL_CHAR:
2249 if (nextPtr) {
2250 *nextPtr = s;
2251 return XML_ERROR_NONE;
2253 return XML_ERROR_PARTIAL_CHAR;
2254 case XML_TOK_PARTIAL:
2255 case XML_TOK_NONE:
2256 if (nextPtr) {
2257 *nextPtr = s;
2258 return XML_ERROR_NONE;
2260 return XML_ERROR_UNCLOSED_CDATA_SECTION;
2261 default:
2262 *eventPP = next;
2263 return XML_ERROR_UNEXPECTED_STATE;
2265 *eventPP = s = next;
2267 /* not reached */
2270 #ifdef XML_DTD
2272 /* The idea here is to avoid using stack for each IGNORE section when
2273 the whole file is parsed with one call. */
2275 static
2276 enum XML_Error ignoreSectionProcessor(XML_Parser parser,
2277 const char *start,
2278 const char *end,
2279 const char **endPtr)
2281 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
2282 if (start) {
2283 processor = prologProcessor;
2284 return prologProcessor(parser, start, end, endPtr);
2286 return result;
2289 /* startPtr gets set to non-null is the section is closed, and to null if
2290 the section is not yet closed. */
2292 static
2293 enum XML_Error doIgnoreSection(XML_Parser parser,
2294 const ENCODING *enc,
2295 const char **startPtr,
2296 const char *end,
2297 const char **nextPtr)
2299 const char *next;
2300 int tok;
2301 const char *s = *startPtr;
2302 const char **eventPP;
2303 const char **eventEndPP;
2304 if (enc == encoding) {
2305 eventPP = &eventPtr;
2306 *eventPP = s;
2307 eventEndPP = &eventEndPtr;
2309 else {
2310 eventPP = &(openInternalEntities->internalEventPtr);
2311 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2313 *eventPP = s;
2314 *startPtr = 0;
2315 tok = XmlIgnoreSectionTok(enc, s, end, &next);
2316 *eventEndPP = next;
2317 switch (tok) {
2318 case XML_TOK_IGNORE_SECT:
2319 if (defaultHandler)
2320 reportDefault(parser, enc, s, next);
2321 *startPtr = next;
2322 return XML_ERROR_NONE;
2323 case XML_TOK_INVALID:
2324 *eventPP = next;
2325 return XML_ERROR_INVALID_TOKEN;
2326 case XML_TOK_PARTIAL_CHAR:
2327 if (nextPtr) {
2328 *nextPtr = s;
2329 return XML_ERROR_NONE;
2331 return XML_ERROR_PARTIAL_CHAR;
2332 case XML_TOK_PARTIAL:
2333 case XML_TOK_NONE:
2334 if (nextPtr) {
2335 *nextPtr = s;
2336 return XML_ERROR_NONE;
2338 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
2339 default:
2340 *eventPP = next;
2341 return XML_ERROR_UNEXPECTED_STATE;
2343 /* not reached */
2346 #endif /* XML_DTD */
2348 static enum XML_Error
2349 initializeEncoding(XML_Parser parser)
2351 const char *s;
2352 #ifdef XML_UNICODE
2353 char encodingBuf[128];
2354 if (!protocolEncodingName)
2355 s = 0;
2356 else {
2357 int i;
2358 for (i = 0; protocolEncodingName[i]; i++) {
2359 if (i == sizeof(encodingBuf) - 1
2360 || (protocolEncodingName[i] & ~0x7f) != 0) {
2361 encodingBuf[0] = '\0';
2362 break;
2364 encodingBuf[i] = (char)protocolEncodingName[i];
2366 encodingBuf[i] = '\0';
2367 s = encodingBuf;
2369 #else
2370 s = protocolEncodingName;
2371 #endif
2372 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
2373 return XML_ERROR_NONE;
2374 return handleUnknownEncoding(parser, protocolEncodingName);
2377 static enum XML_Error
2378 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
2379 const char *s, const char *next)
2381 const char *encodingName = 0;
2382 const char *storedEncName = 0;
2383 const ENCODING *newEncoding = 0;
2384 const char *version = 0;
2385 const char *versionend;
2386 const char *storedversion = 0;
2387 int standalone = -1;
2388 if (!(ns
2389 ? XmlParseXmlDeclNS
2390 : XmlParseXmlDecl)(isGeneralTextEntity,
2391 encoding,
2393 next,
2394 &eventPtr,
2395 &version,
2396 &versionend,
2397 &encodingName,
2398 &newEncoding,
2399 &standalone))
2400 return XML_ERROR_SYNTAX;
2401 if (!isGeneralTextEntity && standalone == 1) {
2402 dtd.standalone = 1;
2403 #ifdef XML_DTD
2404 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
2405 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
2406 #endif /* XML_DTD */
2408 if (xmlDeclHandler) {
2409 if (encodingName) {
2410 storedEncName = poolStoreString(&temp2Pool,
2411 encoding,
2412 encodingName,
2413 encodingName
2414 + XmlNameLength(encoding, encodingName));
2415 if (! storedEncName)
2416 return XML_ERROR_NO_MEMORY;
2417 poolFinish(&temp2Pool);
2419 if (version) {
2420 storedversion = poolStoreString(&temp2Pool,
2421 encoding,
2422 version,
2423 versionend - encoding->minBytesPerChar);
2424 if (! storedversion)
2425 return XML_ERROR_NO_MEMORY;
2427 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
2429 else if (defaultHandler)
2430 reportDefault(parser, encoding, s, next);
2431 if (!protocolEncodingName) {
2432 if (newEncoding) {
2433 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
2434 eventPtr = encodingName;
2435 return XML_ERROR_INCORRECT_ENCODING;
2437 encoding = newEncoding;
2439 else if (encodingName) {
2440 enum XML_Error result;
2441 if (! storedEncName) {
2442 storedEncName = poolStoreString(&temp2Pool,
2443 encoding,
2444 encodingName,
2445 encodingName
2446 + XmlNameLength(encoding, encodingName));
2447 if (! storedEncName)
2448 return XML_ERROR_NO_MEMORY;
2450 result = handleUnknownEncoding(parser, storedEncName);
2451 poolClear(&tempPool);
2452 if (result == XML_ERROR_UNKNOWN_ENCODING)
2453 eventPtr = encodingName;
2454 return result;
2458 if (storedEncName || storedversion)
2459 poolClear(&temp2Pool);
2461 return XML_ERROR_NONE;
2464 static enum XML_Error
2465 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
2467 if (unknownEncodingHandler) {
2468 XML_Encoding info;
2469 int i;
2470 for (i = 0; i < 256; i++)
2471 info.map[i] = -1;
2472 info.convert = 0;
2473 info.data = 0;
2474 info.release = 0;
2475 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
2476 ENCODING *enc;
2477 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
2478 if (!unknownEncodingMem) {
2479 if (info.release)
2480 info.release(info.data);
2481 return XML_ERROR_NO_MEMORY;
2483 enc = (ns
2484 ? XmlInitUnknownEncodingNS
2485 : XmlInitUnknownEncoding)(unknownEncodingMem,
2486 info.map,
2487 info.convert,
2488 info.data);
2489 if (enc) {
2490 unknownEncodingData = info.data;
2491 unknownEncodingRelease = info.release;
2492 encoding = enc;
2493 return XML_ERROR_NONE;
2496 if (info.release)
2497 info.release(info.data);
2499 return XML_ERROR_UNKNOWN_ENCODING;
2502 static enum XML_Error
2503 prologInitProcessor(XML_Parser parser,
2504 const char *s,
2505 const char *end,
2506 const char **nextPtr)
2508 enum XML_Error result = initializeEncoding(parser);
2509 if (result != XML_ERROR_NONE)
2510 return result;
2511 processor = prologProcessor;
2512 return prologProcessor(parser, s, end, nextPtr);
2515 static enum XML_Error
2516 prologProcessor(XML_Parser parser,
2517 const char *s,
2518 const char *end,
2519 const char **nextPtr)
2521 const char *next;
2522 int tok = XmlPrologTok(encoding, s, end, &next);
2523 return doProlog(parser, encoding, s, end, tok, next, nextPtr);
2526 static enum XML_Error
2527 doProlog(XML_Parser parser,
2528 const ENCODING *enc,
2529 const char *s,
2530 const char *end,
2531 int tok,
2532 const char *next,
2533 const char **nextPtr)
2535 #ifdef XML_DTD
2536 static const XML_Char externalSubsetName[] = { '#' , '\0' };
2537 #endif /* XML_DTD */
2539 const char **eventPP;
2540 const char **eventEndPP;
2541 enum XML_Content_Quant quant;
2543 if (enc == encoding) {
2544 eventPP = &eventPtr;
2545 eventEndPP = &eventEndPtr;
2547 else {
2548 eventPP = &(openInternalEntities->internalEventPtr);
2549 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2551 for (;;) {
2552 int role;
2553 *eventPP = s;
2554 *eventEndPP = next;
2555 if (tok <= 0) {
2556 if (nextPtr != 0 && tok != XML_TOK_INVALID) {
2557 *nextPtr = s;
2558 return XML_ERROR_NONE;
2560 switch (tok) {
2561 case XML_TOK_INVALID:
2562 *eventPP = next;
2563 return XML_ERROR_INVALID_TOKEN;
2564 case XML_TOK_PARTIAL:
2565 return XML_ERROR_UNCLOSED_TOKEN;
2566 case XML_TOK_PARTIAL_CHAR:
2567 return XML_ERROR_PARTIAL_CHAR;
2568 case XML_TOK_NONE:
2569 #ifdef XML_DTD
2570 if (enc != encoding)
2571 return XML_ERROR_NONE;
2572 if (parentParser) {
2573 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
2574 == XML_ROLE_ERROR)
2575 return XML_ERROR_SYNTAX;
2576 hadExternalDoctype = 0;
2577 return XML_ERROR_NONE;
2579 #endif /* XML_DTD */
2580 return XML_ERROR_NO_ELEMENTS;
2581 default:
2582 tok = -tok;
2583 next = end;
2584 break;
2587 role = XmlTokenRole(&prologState, tok, s, next, enc);
2588 switch (role) {
2589 case XML_ROLE_XML_DECL:
2591 enum XML_Error result = processXmlDecl(parser, 0, s, next);
2592 if (result != XML_ERROR_NONE)
2593 return result;
2594 enc = encoding;
2596 break;
2597 case XML_ROLE_DOCTYPE_NAME:
2598 if (startDoctypeDeclHandler) {
2599 doctypeName = poolStoreString(&tempPool, enc, s, next);
2600 if (! doctypeName)
2601 return XML_ERROR_NO_MEMORY;
2602 poolFinish(&tempPool);
2603 doctypeSysid = 0;
2604 doctypePubid = 0;
2606 break;
2607 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
2608 if (startDoctypeDeclHandler) {
2609 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
2610 doctypePubid, 1);
2611 doctypeName = 0;
2612 poolClear(&tempPool);
2614 break;
2615 #ifdef XML_DTD
2616 case XML_ROLE_TEXT_DECL:
2618 enum XML_Error result = processXmlDecl(parser, 1, s, next);
2619 if (result != XML_ERROR_NONE)
2620 return result;
2621 enc = encoding;
2623 break;
2624 #endif /* XML_DTD */
2625 case XML_ROLE_DOCTYPE_PUBLIC_ID:
2626 if (startDoctypeDeclHandler) {
2627 doctypePubid = poolStoreString(&tempPool, enc, s + 1, next - 1);
2628 if (! doctypePubid)
2629 return XML_ERROR_NO_MEMORY;
2630 poolFinish(&tempPool);
2632 #ifdef XML_DTD
2633 declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2634 externalSubsetName,
2635 sizeof(ENTITY));
2636 if (!declEntity)
2637 return XML_ERROR_NO_MEMORY;
2638 #endif /* XML_DTD */
2639 /* fall through */
2640 case XML_ROLE_ENTITY_PUBLIC_ID:
2641 if (!XmlIsPublicId(enc, s, next, eventPP))
2642 return XML_ERROR_SYNTAX;
2643 if (declEntity) {
2644 XML_Char *tem = poolStoreString(&dtd.pool,
2645 enc,
2646 s + enc->minBytesPerChar,
2647 next - enc->minBytesPerChar);
2648 if (!tem)
2649 return XML_ERROR_NO_MEMORY;
2650 normalizePublicId(tem);
2651 declEntity->publicId = tem;
2652 poolFinish(&dtd.pool);
2654 break;
2655 case XML_ROLE_DOCTYPE_CLOSE:
2656 if (doctypeName) {
2657 startDoctypeDeclHandler(handlerArg, doctypeName,
2658 doctypeSysid, doctypePubid, 0);
2659 poolClear(&tempPool);
2661 if (dtd.complete && hadExternalDoctype) {
2662 dtd.complete = 0;
2663 #ifdef XML_DTD
2664 if (paramEntityParsing && externalEntityRefHandler) {
2665 ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
2666 externalSubsetName,
2668 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2670 entity->base,
2671 entity->systemId,
2672 entity->publicId))
2673 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2675 #endif /* XML_DTD */
2676 if (!dtd.complete
2677 && !dtd.standalone
2678 && notStandaloneHandler
2679 && !notStandaloneHandler(handlerArg))
2680 return XML_ERROR_NOT_STANDALONE;
2682 if (endDoctypeDeclHandler)
2683 endDoctypeDeclHandler(handlerArg);
2684 break;
2685 case XML_ROLE_INSTANCE_START:
2686 processor = contentProcessor;
2687 return contentProcessor(parser, s, end, nextPtr);
2688 case XML_ROLE_ATTLIST_ELEMENT_NAME:
2689 declElementType = getElementType(parser, enc, s, next);
2690 if (!declElementType)
2691 return XML_ERROR_NO_MEMORY;
2692 break;
2693 case XML_ROLE_ATTRIBUTE_NAME:
2694 declAttributeId = getAttributeId(parser, enc, s, next);
2695 if (!declAttributeId)
2696 return XML_ERROR_NO_MEMORY;
2697 declAttributeIsCdata = 0;
2698 declAttributeType = 0;
2699 declAttributeIsId = 0;
2700 break;
2701 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2702 declAttributeIsCdata = 1;
2703 declAttributeType = "CDATA";
2704 break;
2705 case XML_ROLE_ATTRIBUTE_TYPE_ID:
2706 declAttributeIsId = 1;
2707 declAttributeType = "ID";
2708 break;
2709 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
2710 declAttributeType = "IDREF";
2711 break;
2712 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
2713 declAttributeType = "IDREFS";
2714 break;
2715 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
2716 declAttributeType = "ENTITY";
2717 break;
2718 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
2719 declAttributeType = "ENTITIES";
2720 break;
2721 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
2722 declAttributeType = "NMTOKEN";
2723 break;
2724 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
2725 declAttributeType = "NMTOKENS";
2726 break;
2728 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
2729 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
2730 if (attlistDeclHandler)
2732 char *prefix;
2733 if (declAttributeType) {
2734 prefix = "|";
2736 else {
2737 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
2738 ? "NOTATION("
2739 : "(");
2741 if (! poolAppendString(&tempPool, prefix))
2742 return XML_ERROR_NO_MEMORY;
2743 if (! poolAppend(&tempPool, enc, s, next))
2744 return XML_ERROR_NO_MEMORY;
2745 declAttributeType = tempPool.start;
2747 break;
2748 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2749 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2750 if (dtd.complete
2751 && !defineAttribute(declElementType, declAttributeId,
2752 declAttributeIsCdata, declAttributeIsId, 0,
2753 parser))
2754 return XML_ERROR_NO_MEMORY;
2755 if (attlistDeclHandler && declAttributeType) {
2756 if (*declAttributeType == '('
2757 || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) {
2758 /* Enumerated or Notation type */
2759 if (! poolAppendChar(&tempPool, ')')
2760 || ! poolAppendChar(&tempPool, '\0'))
2761 return XML_ERROR_NO_MEMORY;
2762 declAttributeType = tempPool.start;
2763 poolFinish(&tempPool);
2765 *eventEndPP = s;
2766 attlistDeclHandler(handlerArg, declElementType->name,
2767 declAttributeId->name, declAttributeType,
2768 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
2769 poolClear(&tempPool);
2771 break;
2772 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2773 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2775 const XML_Char *attVal;
2776 enum XML_Error result
2777 = storeAttributeValue(parser, enc, declAttributeIsCdata,
2778 s + enc->minBytesPerChar,
2779 next - enc->minBytesPerChar,
2780 &dtd.pool);
2781 if (result)
2782 return result;
2783 attVal = poolStart(&dtd.pool);
2784 poolFinish(&dtd.pool);
2785 if (dtd.complete
2786 /* ID attributes aren't allowed to have a default */
2787 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal, parser))
2788 return XML_ERROR_NO_MEMORY;
2789 if (attlistDeclHandler && declAttributeType) {
2790 if (*declAttributeType == '('
2791 || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) {
2792 /* Enumerated or Notation type */
2793 if (! poolAppendChar(&tempPool, ')')
2794 || ! poolAppendChar(&tempPool, '\0'))
2795 return XML_ERROR_NO_MEMORY;
2796 declAttributeType = tempPool.start;
2797 poolFinish(&tempPool);
2799 *eventEndPP = s;
2800 attlistDeclHandler(handlerArg, declElementType->name,
2801 declAttributeId->name, declAttributeType,
2802 attVal,
2803 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
2804 poolClear(&tempPool);
2806 break;
2808 case XML_ROLE_ENTITY_VALUE:
2810 enum XML_Error result = storeEntityValue(parser, enc,
2811 s + enc->minBytesPerChar,
2812 next - enc->minBytesPerChar);
2813 if (declEntity) {
2814 declEntity->textPtr = poolStart(&dtd.pool);
2815 declEntity->textLen = poolLength(&dtd.pool);
2816 poolFinish(&dtd.pool);
2817 if (entityDeclHandler) {
2818 *eventEndPP = s;
2819 entityDeclHandler(handlerArg,
2820 declEntity->name,
2821 declEntity->is_param,
2822 declEntity->textPtr,
2823 declEntity->textLen,
2824 curBase, 0, 0, 0);
2827 else
2828 poolDiscard(&dtd.pool);
2829 if (result != XML_ERROR_NONE)
2830 return result;
2832 break;
2833 case XML_ROLE_DOCTYPE_SYSTEM_ID:
2834 if (startDoctypeDeclHandler) {
2835 doctypeSysid = poolStoreString(&tempPool, enc, s + 1, next - 1);
2836 if (! doctypeSysid)
2837 return XML_ERROR_NO_MEMORY;
2838 poolFinish(&tempPool);
2840 if (!dtd.standalone
2841 #ifdef XML_DTD
2842 && !paramEntityParsing
2843 #endif /* XML_DTD */
2844 && notStandaloneHandler
2845 && !notStandaloneHandler(handlerArg))
2846 return XML_ERROR_NOT_STANDALONE;
2847 hadExternalDoctype = 1;
2848 #ifndef XML_DTD
2849 break;
2850 #else /* XML_DTD */
2851 if (!declEntity) {
2852 declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2853 externalSubsetName,
2854 sizeof(ENTITY));
2855 declEntity->publicId = 0;
2856 if (!declEntity)
2857 return XML_ERROR_NO_MEMORY;
2859 /* fall through */
2860 #endif /* XML_DTD */
2861 case XML_ROLE_ENTITY_SYSTEM_ID:
2862 if (declEntity) {
2863 declEntity->systemId = poolStoreString(&dtd.pool, enc,
2864 s + enc->minBytesPerChar,
2865 next - enc->minBytesPerChar);
2866 if (!declEntity->systemId)
2867 return XML_ERROR_NO_MEMORY;
2868 declEntity->base = curBase;
2869 poolFinish(&dtd.pool);
2871 break;
2872 case XML_ROLE_ENTITY_COMPLETE:
2873 if (declEntity && entityDeclHandler) {
2874 *eventEndPP = s;
2875 entityDeclHandler(handlerArg,
2876 declEntity->name,
2877 0,0,0,
2878 declEntity->base,
2879 declEntity->systemId,
2880 declEntity->publicId,
2883 break;
2884 case XML_ROLE_ENTITY_NOTATION_NAME:
2885 if (declEntity) {
2886 declEntity->notation = poolStoreString(&dtd.pool, enc, s, next);
2887 if (!declEntity->notation)
2888 return XML_ERROR_NO_MEMORY;
2889 poolFinish(&dtd.pool);
2890 if (unparsedEntityDeclHandler) {
2891 *eventEndPP = s;
2892 unparsedEntityDeclHandler(handlerArg,
2893 declEntity->name,
2894 declEntity->base,
2895 declEntity->systemId,
2896 declEntity->publicId,
2897 declEntity->notation);
2899 else if (entityDeclHandler) {
2900 *eventEndPP = s;
2901 entityDeclHandler(handlerArg,
2902 declEntity->name,
2903 0,0,0,
2904 declEntity->base,
2905 declEntity->systemId,
2906 declEntity->publicId,
2907 declEntity->notation);
2910 break;
2911 case XML_ROLE_GENERAL_ENTITY_NAME:
2913 const XML_Char *name;
2914 if (XmlPredefinedEntityName(enc, s, next)) {
2915 declEntity = 0;
2916 break;
2918 name = poolStoreString(&dtd.pool, enc, s, next);
2919 if (!name)
2920 return XML_ERROR_NO_MEMORY;
2921 if (dtd.complete) {
2922 declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2923 if (!declEntity)
2924 return XML_ERROR_NO_MEMORY;
2925 if (declEntity->name != name) {
2926 poolDiscard(&dtd.pool);
2927 declEntity = 0;
2929 else {
2930 poolFinish(&dtd.pool);
2931 declEntity->publicId = 0;
2932 declEntity->is_param = 0;
2935 else {
2936 poolDiscard(&dtd.pool);
2937 declEntity = 0;
2940 break;
2941 case XML_ROLE_PARAM_ENTITY_NAME:
2942 #ifdef XML_DTD
2943 if (dtd.complete) {
2944 const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
2945 if (!name)
2946 return XML_ERROR_NO_MEMORY;
2947 declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2948 name, sizeof(ENTITY));
2949 if (!declEntity)
2950 return XML_ERROR_NO_MEMORY;
2951 if (declEntity->name != name) {
2952 poolDiscard(&dtd.pool);
2953 declEntity = 0;
2955 else {
2956 poolFinish(&dtd.pool);
2957 declEntity->publicId = 0;
2958 declEntity->is_param = 1;
2961 #else /* not XML_DTD */
2962 declEntity = 0;
2963 #endif /* not XML_DTD */
2964 break;
2965 case XML_ROLE_NOTATION_NAME:
2966 declNotationPublicId = 0;
2967 declNotationName = 0;
2968 if (notationDeclHandler) {
2969 declNotationName = poolStoreString(&tempPool, enc, s, next);
2970 if (!declNotationName)
2971 return XML_ERROR_NO_MEMORY;
2972 poolFinish(&tempPool);
2974 break;
2975 case XML_ROLE_NOTATION_PUBLIC_ID:
2976 if (!XmlIsPublicId(enc, s, next, eventPP))
2977 return XML_ERROR_SYNTAX;
2978 if (declNotationName) {
2979 XML_Char *tem = poolStoreString(&tempPool,
2980 enc,
2981 s + enc->minBytesPerChar,
2982 next - enc->minBytesPerChar);
2983 if (!tem)
2984 return XML_ERROR_NO_MEMORY;
2985 normalizePublicId(tem);
2986 declNotationPublicId = tem;
2987 poolFinish(&tempPool);
2989 break;
2990 case XML_ROLE_NOTATION_SYSTEM_ID:
2991 if (declNotationName && notationDeclHandler) {
2992 const XML_Char *systemId
2993 = poolStoreString(&tempPool, enc,
2994 s + enc->minBytesPerChar,
2995 next - enc->minBytesPerChar);
2996 if (!systemId)
2997 return XML_ERROR_NO_MEMORY;
2998 *eventEndPP = s;
2999 notationDeclHandler(handlerArg,
3000 declNotationName,
3001 curBase,
3002 systemId,
3003 declNotationPublicId);
3005 poolClear(&tempPool);
3006 break;
3007 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
3008 if (declNotationPublicId && notationDeclHandler) {
3009 *eventEndPP = s;
3010 notationDeclHandler(handlerArg,
3011 declNotationName,
3012 curBase,
3014 declNotationPublicId);
3016 poolClear(&tempPool);
3017 break;
3018 case XML_ROLE_ERROR:
3019 switch (tok) {
3020 case XML_TOK_PARAM_ENTITY_REF:
3021 return XML_ERROR_PARAM_ENTITY_REF;
3022 case XML_TOK_XML_DECL:
3023 return XML_ERROR_MISPLACED_XML_PI;
3024 default:
3025 return XML_ERROR_SYNTAX;
3027 #ifdef XML_DTD
3028 case XML_ROLE_IGNORE_SECT:
3030 enum XML_Error result;
3031 if (defaultHandler)
3032 reportDefault(parser, enc, s, next);
3033 result = doIgnoreSection(parser, enc, &next, end, nextPtr);
3034 if (!next) {
3035 processor = ignoreSectionProcessor;
3036 return result;
3039 break;
3040 #endif /* XML_DTD */
3041 case XML_ROLE_GROUP_OPEN:
3042 if (prologState.level >= groupSize) {
3043 if (groupSize) {
3044 groupConnector = REALLOC(groupConnector, groupSize *= 2);
3045 if (dtd.scaffIndex)
3046 dtd.scaffIndex = REALLOC(dtd.scaffIndex, groupSize * sizeof(int));
3048 else
3049 groupConnector = MALLOC(groupSize = 32);
3050 if (!groupConnector)
3051 return XML_ERROR_NO_MEMORY;
3053 groupConnector[prologState.level] = 0;
3054 if (dtd.in_eldecl) {
3055 int myindex = nextScaffoldPart(parser);
3056 if (myindex < 0)
3057 return XML_ERROR_NO_MEMORY;
3058 dtd.scaffIndex[dtd.scaffLevel] = myindex;
3059 dtd.scaffLevel++;
3060 dtd.scaffold[myindex].type = XML_CTYPE_SEQ;
3062 break;
3063 case XML_ROLE_GROUP_SEQUENCE:
3064 if (groupConnector[prologState.level] == '|')
3065 return XML_ERROR_SYNTAX;
3066 groupConnector[prologState.level] = ',';
3067 break;
3068 case XML_ROLE_GROUP_CHOICE:
3069 if (groupConnector[prologState.level] == ',')
3070 return XML_ERROR_SYNTAX;
3071 if (dtd.in_eldecl
3072 && ! groupConnector[prologState.level]
3073 && dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type != XML_CTYPE_MIXED
3075 dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_CHOICE;
3077 groupConnector[prologState.level] = '|';
3078 break;
3079 case XML_ROLE_PARAM_ENTITY_REF:
3080 #ifdef XML_DTD
3081 case XML_ROLE_INNER_PARAM_ENTITY_REF:
3082 if (paramEntityParsing
3083 && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) {
3084 const XML_Char *name;
3085 ENTITY *entity;
3086 name = poolStoreString(&dtd.pool, enc,
3087 s + enc->minBytesPerChar,
3088 next - enc->minBytesPerChar);
3089 if (!name)
3090 return XML_ERROR_NO_MEMORY;
3091 entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
3092 poolDiscard(&dtd.pool);
3093 if (!entity) {
3094 /* FIXME what to do if !dtd.complete? */
3095 return XML_ERROR_UNDEFINED_ENTITY;
3097 if (entity->open)
3098 return XML_ERROR_RECURSIVE_ENTITY_REF;
3099 if (entity->textPtr) {
3100 enum XML_Error result;
3101 result = processInternalParamEntity(parser, entity);
3102 if (result != XML_ERROR_NONE)
3103 return result;
3104 break;
3106 if (role == XML_ROLE_INNER_PARAM_ENTITY_REF)
3107 return XML_ERROR_PARAM_ENTITY_REF;
3108 if (externalEntityRefHandler) {
3109 dtd.complete = 0;
3110 entity->open = 1;
3111 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
3113 entity->base,
3114 entity->systemId,
3115 entity->publicId)) {
3116 entity->open = 0;
3117 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3119 entity->open = 0;
3120 if (dtd.complete)
3121 break;
3124 #endif /* XML_DTD */
3125 if (!dtd.standalone
3126 && notStandaloneHandler
3127 && !notStandaloneHandler(handlerArg))
3128 return XML_ERROR_NOT_STANDALONE;
3129 dtd.complete = 0;
3130 if (defaultHandler)
3131 reportDefault(parser, enc, s, next);
3132 break;
3134 /* Element declaration stuff */
3136 case XML_ROLE_ELEMENT_NAME:
3137 if (elementDeclHandler) {
3138 declElementType = getElementType(parser, enc, s, next);
3139 if (! declElementType)
3140 return XML_ERROR_NO_MEMORY;
3141 dtd.scaffLevel = 0;
3142 dtd.scaffCount = 0;
3143 dtd.in_eldecl = 1;
3145 break;
3147 case XML_ROLE_CONTENT_ANY:
3148 case XML_ROLE_CONTENT_EMPTY:
3149 if (dtd.in_eldecl) {
3150 if (elementDeclHandler) {
3151 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
3152 if (! content)
3153 return XML_ERROR_NO_MEMORY;
3154 content->quant = XML_CQUANT_NONE;
3155 content->name = 0;
3156 content->numchildren = 0;
3157 content->children = 0;
3158 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
3159 XML_CTYPE_ANY :
3160 XML_CTYPE_EMPTY);
3161 *eventEndPP = s;
3162 elementDeclHandler(handlerArg, declElementType->name, content);
3164 dtd.in_eldecl = 0;
3166 break;
3168 case XML_ROLE_CONTENT_PCDATA:
3169 if (dtd.in_eldecl) {
3170 dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_MIXED;
3172 break;
3174 case XML_ROLE_CONTENT_ELEMENT:
3175 quant = XML_CQUANT_NONE;
3176 goto elementContent;
3177 case XML_ROLE_CONTENT_ELEMENT_OPT:
3178 quant = XML_CQUANT_OPT;
3179 goto elementContent;
3180 case XML_ROLE_CONTENT_ELEMENT_REP:
3181 quant = XML_CQUANT_REP;
3182 goto elementContent;
3183 case XML_ROLE_CONTENT_ELEMENT_PLUS:
3184 quant = XML_CQUANT_PLUS;
3185 elementContent:
3186 if (dtd.in_eldecl)
3188 ELEMENT_TYPE *el;
3189 const char *nxt = quant == XML_CQUANT_NONE ? next : next - 1;
3190 int myindex = nextScaffoldPart(parser);
3191 if (myindex < 0)
3192 return XML_ERROR_NO_MEMORY;
3193 dtd.scaffold[myindex].type = XML_CTYPE_NAME;
3194 dtd.scaffold[myindex].quant = quant;
3195 el = getElementType(parser, enc, s, nxt);
3196 if (! el)
3197 return XML_ERROR_NO_MEMORY;
3198 dtd.scaffold[myindex].name = el->name;
3199 dtd.contentStringLen += nxt - s + 1;
3201 break;
3203 case XML_ROLE_GROUP_CLOSE:
3204 quant = XML_CQUANT_NONE;
3205 goto closeGroup;
3206 case XML_ROLE_GROUP_CLOSE_OPT:
3207 quant = XML_CQUANT_OPT;
3208 goto closeGroup;
3209 case XML_ROLE_GROUP_CLOSE_REP:
3210 quant = XML_CQUANT_REP;
3211 goto closeGroup;
3212 case XML_ROLE_GROUP_CLOSE_PLUS:
3213 quant = XML_CQUANT_PLUS;
3214 closeGroup:
3215 if (dtd.in_eldecl) {
3216 dtd.scaffLevel--;
3217 dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel]].quant = quant;
3218 if (dtd.scaffLevel == 0) {
3219 if (elementDeclHandler) {
3220 XML_Content *model = build_model(parser);
3221 if (! model)
3222 return XML_ERROR_NO_MEMORY;
3223 *eventEndPP = s;
3224 elementDeclHandler(handlerArg, declElementType->name, model);
3226 dtd.in_eldecl = 0;
3227 dtd.contentStringLen = 0;
3230 break;
3231 /* End element declaration stuff */
3233 case XML_ROLE_NONE:
3234 switch (tok) {
3235 case XML_TOK_PI:
3236 if (!reportProcessingInstruction(parser, enc, s, next))
3237 return XML_ERROR_NO_MEMORY;
3238 break;
3239 case XML_TOK_COMMENT:
3240 if (!reportComment(parser, enc, s, next))
3241 return XML_ERROR_NO_MEMORY;
3242 break;
3244 break;
3246 if (defaultHandler) {
3247 switch (tok) {
3248 case XML_TOK_PI:
3249 case XML_TOK_COMMENT:
3250 case XML_TOK_BOM:
3251 case XML_TOK_XML_DECL:
3252 #ifdef XML_DTD
3253 case XML_TOK_IGNORE_SECT:
3254 #endif /* XML_DTD */
3255 case XML_TOK_PARAM_ENTITY_REF:
3256 break;
3257 default:
3258 #ifdef XML_DTD
3259 if (role != XML_ROLE_IGNORE_SECT)
3260 #endif /* XML_DTD */
3261 reportDefault(parser, enc, s, next);
3264 s = next;
3265 tok = XmlPrologTok(enc, s, end, &next);
3267 /* not reached */
3270 static
3271 enum XML_Error epilogProcessor(XML_Parser parser,
3272 const char *s,
3273 const char *end,
3274 const char **nextPtr)
3276 processor = epilogProcessor;
3277 eventPtr = s;
3278 for (;;) {
3279 const char *next;
3280 int tok = XmlPrologTok(encoding, s, end, &next);
3281 eventEndPtr = next;
3282 switch (tok) {
3283 case -XML_TOK_PROLOG_S:
3284 if (defaultHandler) {
3285 eventEndPtr = end;
3286 reportDefault(parser, encoding, s, end);
3288 /* fall through */
3289 case XML_TOK_NONE:
3290 if (nextPtr)
3291 *nextPtr = end;
3292 return XML_ERROR_NONE;
3293 case XML_TOK_PROLOG_S:
3294 if (defaultHandler)
3295 reportDefault(parser, encoding, s, next);
3296 break;
3297 case XML_TOK_PI:
3298 if (!reportProcessingInstruction(parser, encoding, s, next))
3299 return XML_ERROR_NO_MEMORY;
3300 break;
3301 case XML_TOK_COMMENT:
3302 if (!reportComment(parser, encoding, s, next))
3303 return XML_ERROR_NO_MEMORY;
3304 break;
3305 case XML_TOK_INVALID:
3306 eventPtr = next;
3307 return XML_ERROR_INVALID_TOKEN;
3308 case XML_TOK_PARTIAL:
3309 if (nextPtr) {
3310 *nextPtr = s;
3311 return XML_ERROR_NONE;
3313 return XML_ERROR_UNCLOSED_TOKEN;
3314 case XML_TOK_PARTIAL_CHAR:
3315 if (nextPtr) {
3316 *nextPtr = s;
3317 return XML_ERROR_NONE;
3319 return XML_ERROR_PARTIAL_CHAR;
3320 default:
3321 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
3323 eventPtr = s = next;
3327 #ifdef XML_DTD
3329 static enum XML_Error
3330 processInternalParamEntity(XML_Parser parser, ENTITY *entity)
3332 const char *s, *end, *next;
3333 int tok;
3334 enum XML_Error result;
3335 OPEN_INTERNAL_ENTITY openEntity;
3336 entity->open = 1;
3337 openEntity.next = openInternalEntities;
3338 openInternalEntities = &openEntity;
3339 openEntity.entity = entity;
3340 openEntity.internalEventPtr = 0;
3341 openEntity.internalEventEndPtr = 0;
3342 s = (char *)entity->textPtr;
3343 end = (char *)(entity->textPtr + entity->textLen);
3344 tok = XmlPrologTok(internalEncoding, s, end, &next);
3345 result = doProlog(parser, internalEncoding, s, end, tok, next, 0);
3346 entity->open = 0;
3347 openInternalEntities = openEntity.next;
3348 return result;
3351 #endif /* XML_DTD */
3353 static
3354 enum XML_Error errorProcessor(XML_Parser parser,
3355 const char *s,
3356 const char *end,
3357 const char **nextPtr)
3359 return errorCode;
3362 static enum XML_Error
3363 storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
3364 const char *ptr, const char *end,
3365 STRING_POOL *pool)
3367 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
3368 if (result)
3369 return result;
3370 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
3371 poolChop(pool);
3372 if (!poolAppendChar(pool, XML_T('\0')))
3373 return XML_ERROR_NO_MEMORY;
3374 return XML_ERROR_NONE;
3377 static enum XML_Error
3378 appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
3379 const char *ptr, const char *end,
3380 STRING_POOL *pool)
3382 for (;;) {
3383 const char *next;
3384 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
3385 switch (tok) {
3386 case XML_TOK_NONE:
3387 return XML_ERROR_NONE;
3388 case XML_TOK_INVALID:
3389 if (enc == encoding)
3390 eventPtr = next;
3391 return XML_ERROR_INVALID_TOKEN;
3392 case XML_TOK_PARTIAL:
3393 if (enc == encoding)
3394 eventPtr = ptr;
3395 return XML_ERROR_INVALID_TOKEN;
3396 case XML_TOK_CHAR_REF:
3398 XML_Char buf[XML_ENCODE_MAX];
3399 int i;
3400 int n = XmlCharRefNumber(enc, ptr);
3401 if (n < 0) {
3402 if (enc == encoding)
3403 eventPtr = ptr;
3404 return XML_ERROR_BAD_CHAR_REF;
3406 if (!isCdata
3407 && n == 0x20 /* space */
3408 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
3409 break;
3410 n = XmlEncode(n, (ICHAR *)buf);
3411 if (!n) {
3412 if (enc == encoding)
3413 eventPtr = ptr;
3414 return XML_ERROR_BAD_CHAR_REF;
3416 for (i = 0; i < n; i++) {
3417 if (!poolAppendChar(pool, buf[i]))
3418 return XML_ERROR_NO_MEMORY;
3421 break;
3422 case XML_TOK_DATA_CHARS:
3423 if (!poolAppend(pool, enc, ptr, next))
3424 return XML_ERROR_NO_MEMORY;
3425 break;
3426 break;
3427 case XML_TOK_TRAILING_CR:
3428 next = ptr + enc->minBytesPerChar;
3429 /* fall through */
3430 case XML_TOK_ATTRIBUTE_VALUE_S:
3431 case XML_TOK_DATA_NEWLINE:
3432 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
3433 break;
3434 if (!poolAppendChar(pool, 0x20))
3435 return XML_ERROR_NO_MEMORY;
3436 break;
3437 case XML_TOK_ENTITY_REF:
3439 const XML_Char *name;
3440 ENTITY *entity;
3441 XML_Char ch = XmlPredefinedEntityName(enc,
3442 ptr + enc->minBytesPerChar,
3443 next - enc->minBytesPerChar);
3444 if (ch) {
3445 if (!poolAppendChar(pool, ch))
3446 return XML_ERROR_NO_MEMORY;
3447 break;
3449 name = poolStoreString(&temp2Pool, enc,
3450 ptr + enc->minBytesPerChar,
3451 next - enc->minBytesPerChar);
3452 if (!name)
3453 return XML_ERROR_NO_MEMORY;
3454 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
3455 poolDiscard(&temp2Pool);
3456 if (!entity) {
3457 if (dtd.complete) {
3458 if (enc == encoding)
3459 eventPtr = ptr;
3460 return XML_ERROR_UNDEFINED_ENTITY;
3463 else if (entity->open) {
3464 if (enc == encoding)
3465 eventPtr = ptr;
3466 return XML_ERROR_RECURSIVE_ENTITY_REF;
3468 else if (entity->notation) {
3469 if (enc == encoding)
3470 eventPtr = ptr;
3471 return XML_ERROR_BINARY_ENTITY_REF;
3473 else if (!entity->textPtr) {
3474 if (enc == encoding)
3475 eventPtr = ptr;
3476 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
3478 else {
3479 enum XML_Error result;
3480 const XML_Char *textEnd = entity->textPtr + entity->textLen;
3481 entity->open = 1;
3482 result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
3483 entity->open = 0;
3484 if (result)
3485 return result;
3488 break;
3489 default:
3490 if (enc == encoding)
3491 eventPtr = ptr;
3492 return XML_ERROR_UNEXPECTED_STATE;
3494 ptr = next;
3496 /* not reached */
3499 static
3500 enum XML_Error storeEntityValue(XML_Parser parser,
3501 const ENCODING *enc,
3502 const char *entityTextPtr,
3503 const char *entityTextEnd)
3505 STRING_POOL *pool = &(dtd.pool);
3506 for (;;) {
3507 const char *next;
3508 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
3509 switch (tok) {
3510 case XML_TOK_PARAM_ENTITY_REF:
3511 #ifdef XML_DTD
3512 if (parentParser || enc != encoding) {
3513 enum XML_Error result;
3514 const XML_Char *name;
3515 ENTITY *entity;
3516 name = poolStoreString(&tempPool, enc,
3517 entityTextPtr + enc->minBytesPerChar,
3518 next - enc->minBytesPerChar);
3519 if (!name)
3520 return XML_ERROR_NO_MEMORY;
3521 entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
3522 poolDiscard(&tempPool);
3523 if (!entity) {
3524 if (enc == encoding)
3525 eventPtr = entityTextPtr;
3526 return XML_ERROR_UNDEFINED_ENTITY;
3528 if (entity->open) {
3529 if (enc == encoding)
3530 eventPtr = entityTextPtr;
3531 return XML_ERROR_RECURSIVE_ENTITY_REF;
3533 if (entity->systemId) {
3534 if (enc == encoding)
3535 eventPtr = entityTextPtr;
3536 return XML_ERROR_PARAM_ENTITY_REF;
3538 entity->open = 1;
3539 result = storeEntityValue(parser,
3540 internalEncoding,
3541 (char *)entity->textPtr,
3542 (char *)(entity->textPtr + entity->textLen));
3543 entity->open = 0;
3544 if (result)
3545 return result;
3546 break;
3548 #endif /* XML_DTD */
3549 eventPtr = entityTextPtr;
3550 return XML_ERROR_SYNTAX;
3551 case XML_TOK_NONE:
3552 return XML_ERROR_NONE;
3553 case XML_TOK_ENTITY_REF:
3554 case XML_TOK_DATA_CHARS:
3555 if (!poolAppend(pool, enc, entityTextPtr, next))
3556 return XML_ERROR_NO_MEMORY;
3557 break;
3558 case XML_TOK_TRAILING_CR:
3559 next = entityTextPtr + enc->minBytesPerChar;
3560 /* fall through */
3561 case XML_TOK_DATA_NEWLINE:
3562 if (pool->end == pool->ptr && !poolGrow(pool))
3563 return XML_ERROR_NO_MEMORY;
3564 *(pool->ptr)++ = 0xA;
3565 break;
3566 case XML_TOK_CHAR_REF:
3568 XML_Char buf[XML_ENCODE_MAX];
3569 int i;
3570 int n = XmlCharRefNumber(enc, entityTextPtr);
3571 if (n < 0) {
3572 if (enc == encoding)
3573 eventPtr = entityTextPtr;
3574 return XML_ERROR_BAD_CHAR_REF;
3576 n = XmlEncode(n, (ICHAR *)buf);
3577 if (!n) {
3578 if (enc == encoding)
3579 eventPtr = entityTextPtr;
3580 return XML_ERROR_BAD_CHAR_REF;
3582 for (i = 0; i < n; i++) {
3583 if (pool->end == pool->ptr && !poolGrow(pool))
3584 return XML_ERROR_NO_MEMORY;
3585 *(pool->ptr)++ = buf[i];
3588 break;
3589 case XML_TOK_PARTIAL:
3590 if (enc == encoding)
3591 eventPtr = entityTextPtr;
3592 return XML_ERROR_INVALID_TOKEN;
3593 case XML_TOK_INVALID:
3594 if (enc == encoding)
3595 eventPtr = next;
3596 return XML_ERROR_INVALID_TOKEN;
3597 default:
3598 if (enc == encoding)
3599 eventPtr = entityTextPtr;
3600 return XML_ERROR_UNEXPECTED_STATE;
3602 entityTextPtr = next;
3604 /* not reached */
3607 static void
3608 normalizeLines(XML_Char *s)
3610 XML_Char *p;
3611 for (;; s++) {
3612 if (*s == XML_T('\0'))
3613 return;
3614 if (*s == 0xD)
3615 break;
3617 p = s;
3618 do {
3619 if (*s == 0xD) {
3620 *p++ = 0xA;
3621 if (*++s == 0xA)
3622 s++;
3624 else
3625 *p++ = *s++;
3626 } while (*s);
3627 *p = XML_T('\0');
3630 static int
3631 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3633 const XML_Char *target;
3634 XML_Char *data;
3635 const char *tem;
3636 if (!processingInstructionHandler) {
3637 if (defaultHandler)
3638 reportDefault(parser, enc, start, end);
3639 return 1;
3641 start += enc->minBytesPerChar * 2;
3642 tem = start + XmlNameLength(enc, start);
3643 target = poolStoreString(&tempPool, enc, start, tem);
3644 if (!target)
3645 return 0;
3646 poolFinish(&tempPool);
3647 data = poolStoreString(&tempPool, enc,
3648 XmlSkipS(enc, tem),
3649 end - enc->minBytesPerChar*2);
3650 if (!data)
3651 return 0;
3652 normalizeLines(data);
3653 processingInstructionHandler(handlerArg, target, data);
3654 poolClear(&tempPool);
3655 return 1;
3658 static int
3659 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3661 XML_Char *data;
3662 if (!commentHandler) {
3663 if (defaultHandler)
3664 reportDefault(parser, enc, start, end);
3665 return 1;
3667 data = poolStoreString(&tempPool,
3668 enc,
3669 start + enc->minBytesPerChar * 4,
3670 end - enc->minBytesPerChar * 3);
3671 if (!data)
3672 return 0;
3673 normalizeLines(data);
3674 commentHandler(handlerArg, data);
3675 poolClear(&tempPool);
3676 return 1;
3679 static void
3680 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
3682 if (MUST_CONVERT(enc, s)) {
3683 const char **eventPP;
3684 const char **eventEndPP;
3685 if (enc == encoding) {
3686 eventPP = &eventPtr;
3687 eventEndPP = &eventEndPtr;
3689 else {
3690 eventPP = &(openInternalEntities->internalEventPtr);
3691 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3693 do {
3694 ICHAR *dataPtr = (ICHAR *)dataBuf;
3695 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
3696 *eventEndPP = s;
3697 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
3698 *eventPP = s;
3699 } while (s != end);
3701 else
3702 defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
3706 static int
3707 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata,
3708 int isId, const XML_Char *value, XML_Parser parser)
3710 DEFAULT_ATTRIBUTE *att;
3711 if (value || isId) {
3712 /* The handling of default attributes gets messed up if we have
3713 a default which duplicates a non-default. */
3714 int i;
3715 for (i = 0; i < type->nDefaultAtts; i++)
3716 if (attId == type->defaultAtts[i].id)
3717 return 1;
3718 if (isId && !type->idAtt && !attId->xmlns)
3719 type->idAtt = attId;
3721 if (type->nDefaultAtts == type->allocDefaultAtts) {
3722 if (type->allocDefaultAtts == 0) {
3723 type->allocDefaultAtts = 8;
3724 type->defaultAtts = MALLOC(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3726 else {
3727 type->allocDefaultAtts *= 2;
3728 type->defaultAtts = REALLOC(type->defaultAtts,
3729 type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3731 if (!type->defaultAtts)
3732 return 0;
3734 att = type->defaultAtts + type->nDefaultAtts;
3735 att->id = attId;
3736 att->value = value;
3737 att->isCdata = isCdata;
3738 if (!isCdata)
3739 attId->maybeTokenized = 1;
3740 type->nDefaultAtts += 1;
3741 return 1;
3744 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
3746 const XML_Char *name;
3747 for (name = elementType->name; *name; name++) {
3748 if (*name == XML_T(':')) {
3749 PREFIX *prefix;
3750 const XML_Char *s;
3751 for (s = elementType->name; s != name; s++) {
3752 if (!poolAppendChar(&dtd.pool, *s))
3753 return 0;
3755 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3756 return 0;
3757 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3758 if (!prefix)
3759 return 0;
3760 if (prefix->name == poolStart(&dtd.pool))
3761 poolFinish(&dtd.pool);
3762 else
3763 poolDiscard(&dtd.pool);
3764 elementType->prefix = prefix;
3768 return 1;
3771 static ATTRIBUTE_ID *
3772 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3774 ATTRIBUTE_ID *id;
3775 const XML_Char *name;
3776 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3777 return 0;
3778 name = poolStoreString(&dtd.pool, enc, start, end);
3779 if (!name)
3780 return 0;
3781 ++name;
3782 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
3783 if (!id)
3784 return 0;
3785 if (id->name != name)
3786 poolDiscard(&dtd.pool);
3787 else {
3788 poolFinish(&dtd.pool);
3789 if (!ns)
3791 else if (name[0] == 'x'
3792 && name[1] == 'm'
3793 && name[2] == 'l'
3794 && name[3] == 'n'
3795 && name[4] == 's'
3796 && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
3797 if (name[5] == '\0')
3798 id->prefix = &dtd.defaultPrefix;
3799 else
3800 id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
3801 id->xmlns = 1;
3803 else {
3804 int i;
3805 for (i = 0; name[i]; i++) {
3806 if (name[i] == XML_T(':')) {
3807 int j;
3808 for (j = 0; j < i; j++) {
3809 if (!poolAppendChar(&dtd.pool, name[j]))
3810 return 0;
3812 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3813 return 0;
3814 id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3815 if (id->prefix->name == poolStart(&dtd.pool))
3816 poolFinish(&dtd.pool);
3817 else
3818 poolDiscard(&dtd.pool);
3819 break;
3824 return id;
3827 #define CONTEXT_SEP XML_T('\f')
3829 static
3830 const XML_Char *getContext(XML_Parser parser)
3832 HASH_TABLE_ITER iter;
3833 int needSep = 0;
3835 if (dtd.defaultPrefix.binding) {
3836 int i;
3837 int len;
3838 if (!poolAppendChar(&tempPool, XML_T('=')))
3839 return 0;
3840 len = dtd.defaultPrefix.binding->uriLen;
3841 if (namespaceSeparator != XML_T('\0'))
3842 len--;
3843 for (i = 0; i < len; i++)
3844 if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
3845 return 0;
3846 needSep = 1;
3849 hashTableIterInit(&iter, &(dtd.prefixes));
3850 for (;;) {
3851 int i;
3852 int len;
3853 const XML_Char *s;
3854 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
3855 if (!prefix)
3856 break;
3857 if (!prefix->binding)
3858 continue;
3859 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3860 return 0;
3861 for (s = prefix->name; *s; s++)
3862 if (!poolAppendChar(&tempPool, *s))
3863 return 0;
3864 if (!poolAppendChar(&tempPool, XML_T('=')))
3865 return 0;
3866 len = prefix->binding->uriLen;
3867 if (namespaceSeparator != XML_T('\0'))
3868 len--;
3869 for (i = 0; i < len; i++)
3870 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
3871 return 0;
3872 needSep = 1;
3876 hashTableIterInit(&iter, &(dtd.generalEntities));
3877 for (;;) {
3878 const XML_Char *s;
3879 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
3880 if (!e)
3881 break;
3882 if (!e->open)
3883 continue;
3884 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3885 return 0;
3886 for (s = e->name; *s; s++)
3887 if (!poolAppendChar(&tempPool, *s))
3888 return 0;
3889 needSep = 1;
3892 if (!poolAppendChar(&tempPool, XML_T('\0')))
3893 return 0;
3894 return tempPool.start;
3897 static
3898 int setContext(XML_Parser parser, const XML_Char *context)
3900 const XML_Char *s = context;
3902 while (*context != XML_T('\0')) {
3903 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
3904 ENTITY *e;
3905 if (!poolAppendChar(&tempPool, XML_T('\0')))
3906 return 0;
3907 e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
3908 if (e)
3909 e->open = 1;
3910 if (*s != XML_T('\0'))
3911 s++;
3912 context = s;
3913 poolDiscard(&tempPool);
3915 else if (*s == '=') {
3916 PREFIX *prefix;
3917 if (poolLength(&tempPool) == 0)
3918 prefix = &dtd.defaultPrefix;
3919 else {
3920 if (!poolAppendChar(&tempPool, XML_T('\0')))
3921 return 0;
3922 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
3923 if (!prefix)
3924 return 0;
3925 if (prefix->name == poolStart(&tempPool)) {
3926 prefix->name = poolCopyString(&dtd.pool, prefix->name);
3927 if (!prefix->name)
3928 return 0;
3930 poolDiscard(&tempPool);
3932 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
3933 if (!poolAppendChar(&tempPool, *context))
3934 return 0;
3935 if (!poolAppendChar(&tempPool, XML_T('\0')))
3936 return 0;
3937 if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
3938 return 0;
3939 poolDiscard(&tempPool);
3940 if (*context != XML_T('\0'))
3941 ++context;
3942 s = context;
3944 else {
3945 if (!poolAppendChar(&tempPool, *s))
3946 return 0;
3947 s++;
3950 return 1;
3954 static
3955 void normalizePublicId(XML_Char *publicId)
3957 XML_Char *p = publicId;
3958 XML_Char *s;
3959 for (s = publicId; *s; s++) {
3960 switch (*s) {
3961 case 0x20:
3962 case 0xD:
3963 case 0xA:
3964 if (p != publicId && p[-1] != 0x20)
3965 *p++ = 0x20;
3966 break;
3967 default:
3968 *p++ = *s;
3971 if (p != publicId && p[-1] == 0x20)
3972 --p;
3973 *p = XML_T('\0');
3976 static int dtdInit(DTD *p, XML_Parser parser)
3978 XML_Memory_Handling_Suite *ms = &((Parser *) parser)->m_mem;
3979 poolInit(&(p->pool), ms);
3980 hashTableInit(&(p->generalEntities), ms);
3981 hashTableInit(&(p->elementTypes), ms);
3982 hashTableInit(&(p->attributeIds), ms);
3983 hashTableInit(&(p->prefixes), ms);
3984 p->complete = 1;
3985 p->standalone = 0;
3986 #ifdef XML_DTD
3987 hashTableInit(&(p->paramEntities), ms);
3988 #endif /* XML_DTD */
3989 p->defaultPrefix.name = 0;
3990 p->defaultPrefix.binding = 0;
3992 p->in_eldecl = 0;
3993 p->scaffIndex = 0;
3994 p->scaffLevel = 0;
3995 p->scaffold = 0;
3996 p->contentStringLen = 0;
3997 p->scaffSize = 0;
3998 p->scaffCount = 0;
4000 return 1;
4003 #ifdef XML_DTD
4005 static void dtdSwap(DTD *p1, DTD *p2)
4007 DTD tem;
4008 memcpy(&tem, p1, sizeof(DTD));
4009 memcpy(p1, p2, sizeof(DTD));
4010 memcpy(p2, &tem, sizeof(DTD));
4013 #endif /* XML_DTD */
4015 static void dtdDestroy(DTD *p, XML_Parser parser)
4017 HASH_TABLE_ITER iter;
4018 hashTableIterInit(&iter, &(p->elementTypes));
4019 for (;;) {
4020 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
4021 if (!e)
4022 break;
4023 if (e->allocDefaultAtts != 0)
4024 FREE(e->defaultAtts);
4026 hashTableDestroy(&(p->generalEntities));
4027 #ifdef XML_DTD
4028 hashTableDestroy(&(p->paramEntities));
4029 #endif /* XML_DTD */
4030 hashTableDestroy(&(p->elementTypes));
4031 hashTableDestroy(&(p->attributeIds));
4032 hashTableDestroy(&(p->prefixes));
4033 poolDestroy(&(p->pool));
4034 if (p->scaffIndex)
4035 FREE(p->scaffIndex);
4036 if (p->scaffold)
4037 FREE(p->scaffold);
4040 /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
4041 The new DTD has already been initialized. */
4043 static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser)
4045 HASH_TABLE_ITER iter;
4047 /* Copy the prefix table. */
4049 hashTableIterInit(&iter, &(oldDtd->prefixes));
4050 for (;;) {
4051 const XML_Char *name;
4052 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
4053 if (!oldP)
4054 break;
4055 name = poolCopyString(&(newDtd->pool), oldP->name);
4056 if (!name)
4057 return 0;
4058 if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
4059 return 0;
4062 hashTableIterInit(&iter, &(oldDtd->attributeIds));
4064 /* Copy the attribute id table. */
4066 for (;;) {
4067 ATTRIBUTE_ID *newA;
4068 const XML_Char *name;
4069 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
4071 if (!oldA)
4072 break;
4073 /* Remember to allocate the scratch byte before the name. */
4074 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
4075 return 0;
4076 name = poolCopyString(&(newDtd->pool), oldA->name);
4077 if (!name)
4078 return 0;
4079 ++name;
4080 newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
4081 if (!newA)
4082 return 0;
4083 newA->maybeTokenized = oldA->maybeTokenized;
4084 if (oldA->prefix) {
4085 newA->xmlns = oldA->xmlns;
4086 if (oldA->prefix == &oldDtd->defaultPrefix)
4087 newA->prefix = &newDtd->defaultPrefix;
4088 else
4089 newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
4093 /* Copy the element type table. */
4095 hashTableIterInit(&iter, &(oldDtd->elementTypes));
4097 for (;;) {
4098 int i;
4099 ELEMENT_TYPE *newE;
4100 const XML_Char *name;
4101 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
4102 if (!oldE)
4103 break;
4104 name = poolCopyString(&(newDtd->pool), oldE->name);
4105 if (!name)
4106 return 0;
4107 newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
4108 if (!newE)
4109 return 0;
4110 if (oldE->nDefaultAtts) {
4111 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
4112 if (!newE->defaultAtts)
4113 return 0;
4115 if (oldE->idAtt)
4116 newE->idAtt = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0);
4117 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
4118 if (oldE->prefix)
4119 newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
4120 for (i = 0; i < newE->nDefaultAtts; i++) {
4121 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
4122 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
4123 if (oldE->defaultAtts[i].value) {
4124 newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
4125 if (!newE->defaultAtts[i].value)
4126 return 0;
4128 else
4129 newE->defaultAtts[i].value = 0;
4133 /* Copy the entity tables. */
4134 if (!copyEntityTable(&(newDtd->generalEntities),
4135 &(newDtd->pool),
4136 &(oldDtd->generalEntities), parser))
4137 return 0;
4139 #ifdef XML_DTD
4140 if (!copyEntityTable(&(newDtd->paramEntities),
4141 &(newDtd->pool),
4142 &(oldDtd->paramEntities), parser))
4143 return 0;
4144 #endif /* XML_DTD */
4146 newDtd->complete = oldDtd->complete;
4147 newDtd->standalone = oldDtd->standalone;
4149 /* Don't want deep copying for scaffolding */
4150 newDtd->in_eldecl = oldDtd->in_eldecl;
4151 newDtd->scaffold = oldDtd->scaffold;
4152 newDtd->contentStringLen = oldDtd->contentStringLen;
4153 newDtd->scaffSize = oldDtd->scaffSize;
4154 newDtd->scaffLevel = oldDtd->scaffLevel;
4155 newDtd->scaffIndex = oldDtd->scaffIndex;
4157 return 1;
4158 } /* End dtdCopy */
4160 static int copyEntityTable(HASH_TABLE *newTable,
4161 STRING_POOL *newPool,
4162 const HASH_TABLE *oldTable,
4163 XML_Parser parser)
4165 HASH_TABLE_ITER iter;
4166 const XML_Char *cachedOldBase = 0;
4167 const XML_Char *cachedNewBase = 0;
4169 hashTableIterInit(&iter, oldTable);
4171 for (;;) {
4172 ENTITY *newE;
4173 const XML_Char *name;
4174 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
4175 if (!oldE)
4176 break;
4177 name = poolCopyString(newPool, oldE->name);
4178 if (!name)
4179 return 0;
4180 newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
4181 if (!newE)
4182 return 0;
4183 if (oldE->systemId) {
4184 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
4185 if (!tem)
4186 return 0;
4187 newE->systemId = tem;
4188 if (oldE->base) {
4189 if (oldE->base == cachedOldBase)
4190 newE->base = cachedNewBase;
4191 else {
4192 cachedOldBase = oldE->base;
4193 tem = poolCopyString(newPool, cachedOldBase);
4194 if (!tem)
4195 return 0;
4196 cachedNewBase = newE->base = tem;
4200 else {
4201 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
4202 if (!tem)
4203 return 0;
4204 newE->textPtr = tem;
4205 newE->textLen = oldE->textLen;
4207 if (oldE->notation) {
4208 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
4209 if (!tem)
4210 return 0;
4211 newE->notation = tem;
4214 return 1;
4217 #define INIT_SIZE 64
4219 static
4220 int keyeq(KEY s1, KEY s2)
4222 for (; *s1 == *s2; s1++, s2++)
4223 if (*s1 == 0)
4224 return 1;
4225 return 0;
4228 static
4229 unsigned long hash(KEY s)
4231 unsigned long h = 0;
4232 while (*s)
4233 h = (h << 5) + h + (unsigned char)*s++;
4234 return h;
4237 static
4238 NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize)
4240 size_t i;
4241 if (table->size == 0) {
4242 size_t tsize;
4244 if (!createSize)
4245 return 0;
4246 tsize = INIT_SIZE * sizeof(NAMED *);
4247 table->v = table->mem->malloc_fcn(tsize);
4248 if (!table->v)
4249 return 0;
4250 memset(table->v, 0, tsize);
4251 table->size = INIT_SIZE;
4252 table->usedLim = INIT_SIZE / 2;
4253 i = hash(name) & (table->size - 1);
4255 else {
4256 unsigned long h = hash(name);
4257 for (i = h & (table->size - 1);
4258 table->v[i];
4259 i == 0 ? i = table->size - 1 : --i) {
4260 if (keyeq(name, table->v[i]->name))
4261 return table->v[i];
4263 if (!createSize)
4264 return 0;
4265 if (table->used == table->usedLim) {
4266 /* check for overflow */
4267 size_t newSize = table->size * 2;
4268 size_t tsize = newSize * sizeof(NAMED *);
4269 NAMED **newV = table->mem->malloc_fcn(tsize);
4270 if (!newV)
4271 return 0;
4272 memset(newV, 0, tsize);
4273 for (i = 0; i < table->size; i++)
4274 if (table->v[i]) {
4275 size_t j;
4276 for (j = hash(table->v[i]->name) & (newSize - 1);
4277 newV[j];
4278 j == 0 ? j = newSize - 1 : --j)
4280 newV[j] = table->v[i];
4282 table->mem->free_fcn(table->v);
4283 table->v = newV;
4284 table->size = newSize;
4285 table->usedLim = newSize/2;
4286 for (i = h & (table->size - 1);
4287 table->v[i];
4288 i == 0 ? i = table->size - 1 : --i)
4292 table->v[i] = table->mem->malloc_fcn(createSize);
4293 if (!table->v[i])
4294 return 0;
4295 memset(table->v[i], 0, createSize);
4296 table->v[i]->name = name;
4297 (table->used)++;
4298 return table->v[i];
4301 static
4302 void hashTableDestroy(HASH_TABLE *table)
4304 size_t i;
4305 for (i = 0; i < table->size; i++) {
4306 NAMED *p = table->v[i];
4307 if (p)
4308 table->mem->free_fcn(p);
4310 if (table->v)
4311 table->mem->free_fcn(table->v);
4314 static
4315 void hashTableInit(HASH_TABLE *p, XML_Memory_Handling_Suite *ms)
4317 p->size = 0;
4318 p->usedLim = 0;
4319 p->used = 0;
4320 p->v = 0;
4321 p->mem = ms;
4324 static
4325 void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
4327 iter->p = table->v;
4328 iter->end = iter->p + table->size;
4331 static
4332 NAMED *hashTableIterNext(HASH_TABLE_ITER *iter)
4334 while (iter->p != iter->end) {
4335 NAMED *tem = *(iter->p)++;
4336 if (tem)
4337 return tem;
4339 return 0;
4343 static
4344 void poolInit(STRING_POOL *pool, XML_Memory_Handling_Suite *ms)
4346 pool->blocks = 0;
4347 pool->freeBlocks = 0;
4348 pool->start = 0;
4349 pool->ptr = 0;
4350 pool->end = 0;
4351 pool->mem = ms;
4354 static
4355 void poolClear(STRING_POOL *pool)
4357 if (!pool->freeBlocks)
4358 pool->freeBlocks = pool->blocks;
4359 else {
4360 BLOCK *p = pool->blocks;
4361 while (p) {
4362 BLOCK *tem = p->next;
4363 p->next = pool->freeBlocks;
4364 pool->freeBlocks = p;
4365 p = tem;
4368 pool->blocks = 0;
4369 pool->start = 0;
4370 pool->ptr = 0;
4371 pool->end = 0;
4374 static
4375 void poolDestroy(STRING_POOL *pool)
4377 BLOCK *p = pool->blocks;
4378 while (p) {
4379 BLOCK *tem = p->next;
4380 pool->mem->free_fcn(p);
4381 p = tem;
4383 pool->blocks = 0;
4384 p = pool->freeBlocks;
4385 while (p) {
4386 BLOCK *tem = p->next;
4387 pool->mem->free_fcn(p);
4388 p = tem;
4390 pool->freeBlocks = 0;
4391 pool->ptr = 0;
4392 pool->start = 0;
4393 pool->end = 0;
4396 static
4397 XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
4398 const char *ptr, const char *end)
4400 if (!pool->ptr && !poolGrow(pool))
4401 return 0;
4402 for (;;) {
4403 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
4404 if (ptr == end)
4405 break;
4406 if (!poolGrow(pool))
4407 return 0;
4409 return pool->start;
4412 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
4414 do {
4415 if (!poolAppendChar(pool, *s))
4416 return 0;
4417 } while (*s++);
4418 s = pool->start;
4419 poolFinish(pool);
4420 return s;
4423 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
4425 if (!pool->ptr && !poolGrow(pool))
4426 return 0;
4427 for (; n > 0; --n, s++) {
4428 if (!poolAppendChar(pool, *s))
4429 return 0;
4432 s = pool->start;
4433 poolFinish(pool);
4434 return s;
4437 static
4438 const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s)
4440 while (*s) {
4441 if (!poolAppendChar(pool, *s))
4442 return 0;
4443 s++;
4445 return pool->start;
4446 } /* End poolAppendString */
4448 static
4449 XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
4450 const char *ptr, const char *end)
4452 if (!poolAppend(pool, enc, ptr, end))
4453 return 0;
4454 if (pool->ptr == pool->end && !poolGrow(pool))
4455 return 0;
4456 *(pool->ptr)++ = 0;
4457 return pool->start;
4460 static
4461 int poolGrow(STRING_POOL *pool)
4463 if (pool->freeBlocks) {
4464 if (pool->start == 0) {
4465 pool->blocks = pool->freeBlocks;
4466 pool->freeBlocks = pool->freeBlocks->next;
4467 pool->blocks->next = 0;
4468 pool->start = pool->blocks->s;
4469 pool->end = pool->start + pool->blocks->size;
4470 pool->ptr = pool->start;
4471 return 1;
4473 if (pool->end - pool->start < pool->freeBlocks->size) {
4474 BLOCK *tem = pool->freeBlocks->next;
4475 pool->freeBlocks->next = pool->blocks;
4476 pool->blocks = pool->freeBlocks;
4477 pool->freeBlocks = tem;
4478 memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
4479 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
4480 pool->start = pool->blocks->s;
4481 pool->end = pool->start + pool->blocks->size;
4482 return 1;
4485 if (pool->blocks && pool->start == pool->blocks->s) {
4486 int blockSize = (pool->end - pool->start)*2;
4487 pool->blocks = pool->mem->realloc_fcn(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
4488 if (!pool->blocks)
4489 return 0;
4490 pool->blocks->size = blockSize;
4491 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
4492 pool->start = pool->blocks->s;
4493 pool->end = pool->start + blockSize;
4495 else {
4496 BLOCK *tem;
4497 int blockSize = pool->end - pool->start;
4498 if (blockSize < INIT_BLOCK_SIZE)
4499 blockSize = INIT_BLOCK_SIZE;
4500 else
4501 blockSize *= 2;
4502 tem = pool->mem->malloc_fcn(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
4503 if (!tem)
4504 return 0;
4505 tem->size = blockSize;
4506 tem->next = pool->blocks;
4507 pool->blocks = tem;
4508 if (pool->ptr != pool->start)
4509 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
4510 pool->ptr = tem->s + (pool->ptr - pool->start);
4511 pool->start = tem->s;
4512 pool->end = tem->s + blockSize;
4514 return 1;
4517 static int
4518 nextScaffoldPart(XML_Parser parser)
4520 CONTENT_SCAFFOLD * me;
4521 int next;
4523 if (! dtd.scaffIndex) {
4524 dtd.scaffIndex = MALLOC(groupSize * sizeof(int));
4525 if (! dtd.scaffIndex)
4526 return -1;
4527 dtd.scaffIndex[0] = 0;
4530 if (dtd.scaffCount >= dtd.scaffSize) {
4531 if (dtd.scaffold) {
4532 dtd.scaffSize *= 2;
4533 dtd.scaffold = (CONTENT_SCAFFOLD *) REALLOC(dtd.scaffold,
4534 dtd.scaffSize * sizeof(CONTENT_SCAFFOLD));
4536 else {
4537 dtd.scaffSize = 32;
4538 dtd.scaffold = (CONTENT_SCAFFOLD *) MALLOC(dtd.scaffSize * sizeof(CONTENT_SCAFFOLD));
4540 if (! dtd.scaffold)
4541 return -1;
4543 next = dtd.scaffCount++;
4544 me = &dtd.scaffold[next];
4545 if (dtd.scaffLevel) {
4546 CONTENT_SCAFFOLD *parent = &dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]];
4547 if (parent->lastchild) {
4548 dtd.scaffold[parent->lastchild].nextsib = next;
4550 if (! parent->childcnt)
4551 parent->firstchild = next;
4552 parent->lastchild = next;
4553 parent->childcnt++;
4555 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
4556 return next;
4557 } /* End nextScaffoldPart */
4559 static void
4560 build_node (XML_Parser parser,
4561 int src_node,
4562 XML_Content *dest,
4563 XML_Content **contpos,
4564 char **strpos)
4566 dest->type = dtd.scaffold[src_node].type;
4567 dest->quant = dtd.scaffold[src_node].quant;
4568 if (dest->type == XML_CTYPE_NAME) {
4569 const char *src;
4570 dest->name = *strpos;
4571 src = dtd.scaffold[src_node].name;
4572 for (;;) {
4573 *(*strpos)++ = *src;
4574 if (! *src)
4575 break;
4576 src++;
4578 dest->numchildren = 0;
4579 dest->children = 0;
4581 else {
4582 unsigned int i;
4583 int cn;
4584 dest->numchildren = dtd.scaffold[src_node].childcnt;
4585 dest->children = *contpos;
4586 *contpos += dest->numchildren;
4587 for (i = 0, cn = dtd.scaffold[src_node].firstchild;
4588 i < dest->numchildren;
4589 i++, cn = dtd.scaffold[cn].nextsib) {
4590 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
4592 dest->name = 0;
4594 } /* End build_node */
4596 static XML_Content *
4597 build_model (XML_Parser parser)
4599 XML_Content *ret;
4600 XML_Content *cpos;
4601 char * str;
4602 int allocsize = dtd.scaffCount * sizeof(XML_Content) + dtd.contentStringLen;
4604 ret = MALLOC(allocsize);
4605 if (! ret)
4606 return 0;
4608 str = (char *) (&ret[dtd.scaffCount]);
4609 cpos = &ret[1];
4611 build_node(parser, 0, ret, &cpos, &str);
4612 return ret;
4613 } /* End build_model */
4615 static ELEMENT_TYPE *
4616 getElementType(XML_Parser parser,
4617 const ENCODING *enc,
4618 const char *ptr,
4619 const char *end)
4621 const XML_Char *name = poolStoreString(&dtd.pool, enc, ptr, end);
4622 ELEMENT_TYPE *ret;
4624 if (! name)
4625 return 0;
4626 ret = (ELEMENT_TYPE *) lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
4627 if (! ret)
4628 return 0;
4629 if (ret->name != name)
4630 poolDiscard(&dtd.pool);
4631 else {
4632 poolFinish(&dtd.pool);
4633 if (!setElementTypePrefix(parser, ret))
4634 return 0;
4636 return ret;
4637 } /* End getElementType */