Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / expat / files / lib / xmlparse.c
blobede7b5bb6673eed3fecbe83056da3a906cdafe41
1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
3 */
5 #include <stddef.h>
6 #include <string.h> /* memset(), memcpy() */
7 #include <assert.h>
8 #include <limits.h> /* UINT_MAX */
9 #include <time.h> /* time() */
11 #define XML_BUILDING_EXPAT 1
13 #ifdef COMPILED_FROM_DSP
14 #include "winconfig.h"
15 #elif defined(MACOS_CLASSIC)
16 #include "macconfig.h"
17 #elif defined(__amigaos__)
18 #include "amigaconfig.h"
19 #elif defined(__WATCOMC__)
20 #include "watcomconfig.h"
21 #elif defined(HAVE_EXPAT_CONFIG_H)
22 #include <expat_config.h>
23 #endif /* ndef COMPILED_FROM_DSP */
25 #include "ascii.h"
26 #include "expat.h"
28 #ifdef XML_UNICODE
29 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
30 #define XmlConvert XmlUtf16Convert
31 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
32 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
33 #define XmlEncode XmlUtf16Encode
34 /* Using pointer subtraction to convert to integer type. */
35 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
36 typedef unsigned short ICHAR;
37 #else
38 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
39 #define XmlConvert XmlUtf8Convert
40 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
41 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
42 #define XmlEncode XmlUtf8Encode
43 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
44 typedef char ICHAR;
45 #endif
48 #ifndef XML_NS
50 #define XmlInitEncodingNS XmlInitEncoding
51 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
52 #undef XmlGetInternalEncodingNS
53 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
54 #define XmlParseXmlDeclNS XmlParseXmlDecl
56 #endif
58 #ifdef XML_UNICODE
60 #ifdef XML_UNICODE_WCHAR_T
61 #define XML_T(x) (const wchar_t)x
62 #define XML_L(x) L ## x
63 #else
64 #define XML_T(x) (const unsigned short)x
65 #define XML_L(x) x
66 #endif
68 #else
70 #define XML_T(x) x
71 #define XML_L(x) x
73 #endif
75 /* Round up n to be a multiple of sz, where sz is a power of 2. */
76 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
78 /* Handle the case where memmove() doesn't exist. */
79 #ifndef HAVE_MEMMOVE
80 #ifdef HAVE_BCOPY
81 #define memmove(d,s,l) bcopy((s),(d),(l))
82 #else
83 #error memmove does not exist on this platform, nor is a substitute available
84 #endif /* HAVE_BCOPY */
85 #endif /* HAVE_MEMMOVE */
87 #include "internal.h"
88 #include "xmltok.h"
89 #include "xmlrole.h"
91 typedef const XML_Char *KEY;
93 typedef struct {
94 KEY name;
95 } NAMED;
97 typedef struct {
98 NAMED **v;
99 unsigned char power;
100 size_t size;
101 size_t used;
102 const XML_Memory_Handling_Suite *mem;
103 } HASH_TABLE;
105 /* Basic character hash algorithm, taken from Python's string hash:
106 h = h * 1000003 ^ character, the constant being a prime number.
109 #ifdef XML_UNICODE
110 #define CHAR_HASH(h, c) \
111 (((h) * 0xF4243) ^ (unsigned short)(c))
112 #else
113 #define CHAR_HASH(h, c) \
114 (((h) * 0xF4243) ^ (unsigned char)(c))
115 #endif
117 /* For probing (after a collision) we need a step size relative prime
118 to the hash table size, which is a power of 2. We use double-hashing,
119 since we can calculate a second hash value cheaply by taking those bits
120 of the first hash value that were discarded (masked out) when the table
121 index was calculated: index = hash & mask, where mask = table->size - 1.
122 We limit the maximum step size to table->size / 4 (mask >> 2) and make
123 it odd, since odd numbers are always relative prime to a power of 2.
125 #define SECOND_HASH(hash, mask, power) \
126 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
127 #define PROBE_STEP(hash, mask, power) \
128 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
130 typedef struct {
131 NAMED **p;
132 NAMED **end;
133 } HASH_TABLE_ITER;
135 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
136 #define INIT_DATA_BUF_SIZE 1024
137 #define INIT_ATTS_SIZE 16
138 #define INIT_ATTS_VERSION 0xFFFFFFFF
139 #define INIT_BLOCK_SIZE 1024
140 #define INIT_BUFFER_SIZE 1024
142 #define EXPAND_SPARE 24
144 typedef struct binding {
145 struct prefix *prefix;
146 struct binding *nextTagBinding;
147 struct binding *prevPrefixBinding;
148 const struct attribute_id *attId;
149 XML_Char *uri;
150 int uriLen;
151 int uriAlloc;
152 } BINDING;
154 typedef struct prefix {
155 const XML_Char *name;
156 BINDING *binding;
157 } PREFIX;
159 typedef struct {
160 const XML_Char *str;
161 const XML_Char *localPart;
162 const XML_Char *prefix;
163 int strLen;
164 int uriLen;
165 int prefixLen;
166 } TAG_NAME;
168 /* TAG represents an open element.
169 The name of the element is stored in both the document and API
170 encodings. The memory buffer 'buf' is a separately-allocated
171 memory area which stores the name. During the XML_Parse()/
172 XMLParseBuffer() when the element is open, the memory for the 'raw'
173 version of the name (in the document encoding) is shared with the
174 document buffer. If the element is open across calls to
175 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
176 contain the 'raw' name as well.
178 A parser re-uses these structures, maintaining a list of allocated
179 TAG objects in a free list.
181 typedef struct tag {
182 struct tag *parent; /* parent of this element */
183 const char *rawName; /* tagName in the original encoding */
184 int rawNameLength;
185 TAG_NAME name; /* tagName in the API encoding */
186 char *buf; /* buffer for name components */
187 char *bufEnd; /* end of the buffer */
188 BINDING *bindings;
189 } TAG;
191 typedef struct {
192 const XML_Char *name;
193 const XML_Char *textPtr;
194 int textLen; /* length in XML_Chars */
195 int processed; /* # of processed bytes - when suspended */
196 const XML_Char *systemId;
197 const XML_Char *base;
198 const XML_Char *publicId;
199 const XML_Char *notation;
200 XML_Bool open;
201 XML_Bool is_param;
202 XML_Bool is_internal; /* true if declared in internal subset outside PE */
203 } ENTITY;
205 typedef struct {
206 enum XML_Content_Type type;
207 enum XML_Content_Quant quant;
208 const XML_Char * name;
209 int firstchild;
210 int lastchild;
211 int childcnt;
212 int nextsib;
213 } CONTENT_SCAFFOLD;
215 #define INIT_SCAFFOLD_ELEMENTS 32
217 typedef struct block {
218 struct block *next;
219 int size;
220 XML_Char s[1];
221 } BLOCK;
223 typedef struct {
224 BLOCK *blocks;
225 BLOCK *freeBlocks;
226 const XML_Char *end;
227 XML_Char *ptr;
228 XML_Char *start;
229 const XML_Memory_Handling_Suite *mem;
230 } STRING_POOL;
232 /* The XML_Char before the name is used to determine whether
233 an attribute has been specified. */
234 typedef struct attribute_id {
235 XML_Char *name;
236 PREFIX *prefix;
237 XML_Bool maybeTokenized;
238 XML_Bool xmlns;
239 } ATTRIBUTE_ID;
241 typedef struct {
242 const ATTRIBUTE_ID *id;
243 XML_Bool isCdata;
244 const XML_Char *value;
245 } DEFAULT_ATTRIBUTE;
247 typedef struct {
248 unsigned long version;
249 unsigned long hash;
250 const XML_Char *uriName;
251 } NS_ATT;
253 typedef struct {
254 const XML_Char *name;
255 PREFIX *prefix;
256 const ATTRIBUTE_ID *idAtt;
257 int nDefaultAtts;
258 int allocDefaultAtts;
259 DEFAULT_ATTRIBUTE *defaultAtts;
260 } ELEMENT_TYPE;
262 typedef struct {
263 HASH_TABLE generalEntities;
264 HASH_TABLE elementTypes;
265 HASH_TABLE attributeIds;
266 HASH_TABLE prefixes;
267 STRING_POOL pool;
268 STRING_POOL entityValuePool;
269 /* false once a parameter entity reference has been skipped */
270 XML_Bool keepProcessing;
271 /* true once an internal or external PE reference has been encountered;
272 this includes the reference to an external subset */
273 XML_Bool hasParamEntityRefs;
274 XML_Bool standalone;
275 #ifdef XML_DTD
276 /* indicates if external PE has been read */
277 XML_Bool paramEntityRead;
278 HASH_TABLE paramEntities;
279 #endif /* XML_DTD */
280 PREFIX defaultPrefix;
281 /* === scaffolding for building content model === */
282 XML_Bool in_eldecl;
283 CONTENT_SCAFFOLD *scaffold;
284 unsigned contentStringLen;
285 unsigned scaffSize;
286 unsigned scaffCount;
287 int scaffLevel;
288 int *scaffIndex;
289 } DTD;
291 typedef struct open_internal_entity {
292 const char *internalEventPtr;
293 const char *internalEventEndPtr;
294 struct open_internal_entity *next;
295 ENTITY *entity;
296 int startTagLevel;
297 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
298 } OPEN_INTERNAL_ENTITY;
300 typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
301 const char *start,
302 const char *end,
303 const char **endPtr);
305 static Processor prologProcessor;
306 static Processor prologInitProcessor;
307 static Processor contentProcessor;
308 static Processor cdataSectionProcessor;
309 #ifdef XML_DTD
310 static Processor ignoreSectionProcessor;
311 static Processor externalParEntProcessor;
312 static Processor externalParEntInitProcessor;
313 static Processor entityValueProcessor;
314 static Processor entityValueInitProcessor;
315 #endif /* XML_DTD */
316 static Processor epilogProcessor;
317 static Processor errorProcessor;
318 static Processor externalEntityInitProcessor;
319 static Processor externalEntityInitProcessor2;
320 static Processor externalEntityInitProcessor3;
321 static Processor externalEntityContentProcessor;
322 static Processor internalEntityProcessor;
324 static enum XML_Error
325 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
326 static enum XML_Error
327 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
328 const char *s, const char *next);
329 static enum XML_Error
330 initializeEncoding(XML_Parser parser);
331 static enum XML_Error
332 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
333 const char *end, int tok, const char *next, const char **nextPtr,
334 XML_Bool haveMore);
335 static enum XML_Error
336 processInternalEntity(XML_Parser parser, ENTITY *entity,
337 XML_Bool betweenDecl);
338 static enum XML_Error
339 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
340 const char *start, const char *end, const char **endPtr,
341 XML_Bool haveMore);
342 static enum XML_Error
343 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
344 const char *end, const char **nextPtr, XML_Bool haveMore);
345 #ifdef XML_DTD
346 static enum XML_Error
347 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
348 const char *end, const char **nextPtr, XML_Bool haveMore);
349 #endif /* XML_DTD */
351 static enum XML_Error
352 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
353 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
354 static enum XML_Error
355 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
356 const XML_Char *uri, BINDING **bindingsPtr);
357 static int
358 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
359 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
360 static enum XML_Error
361 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
362 const char *, const char *, STRING_POOL *);
363 static enum XML_Error
364 appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
365 const char *, const char *, STRING_POOL *);
366 static ATTRIBUTE_ID *
367 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
368 const char *end);
369 static int
370 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
371 static enum XML_Error
372 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
373 const char *end);
374 static int
375 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
376 const char *start, const char *end);
377 static int
378 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
379 const char *end);
380 static void
381 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
382 const char *end);
384 static const XML_Char * getContext(XML_Parser parser);
385 static XML_Bool
386 setContext(XML_Parser parser, const XML_Char *context);
388 static void FASTCALL normalizePublicId(XML_Char *s);
390 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
391 /* do not call if parentParser != NULL */
392 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
393 static void
394 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
395 static int
396 dtdCopy(XML_Parser oldParser,
397 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
398 static int
399 copyEntityTable(XML_Parser oldParser,
400 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
401 static NAMED *
402 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
403 static void FASTCALL
404 hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
405 static void FASTCALL hashTableClear(HASH_TABLE *);
406 static void FASTCALL hashTableDestroy(HASH_TABLE *);
407 static void FASTCALL
408 hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
409 static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
411 static void FASTCALL
412 poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
413 static void FASTCALL poolClear(STRING_POOL *);
414 static void FASTCALL poolDestroy(STRING_POOL *);
415 static XML_Char *
416 poolAppend(STRING_POOL *pool, const ENCODING *enc,
417 const char *ptr, const char *end);
418 static XML_Char *
419 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
420 const char *ptr, const char *end);
421 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
422 static const XML_Char * FASTCALL
423 poolCopyString(STRING_POOL *pool, const XML_Char *s);
424 static const XML_Char *
425 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
426 static const XML_Char * FASTCALL
427 poolAppendString(STRING_POOL *pool, const XML_Char *s);
429 static int FASTCALL nextScaffoldPart(XML_Parser parser);
430 static XML_Content * build_model(XML_Parser parser);
431 static ELEMENT_TYPE *
432 getElementType(XML_Parser parser, const ENCODING *enc,
433 const char *ptr, const char *end);
435 static unsigned long generate_hash_secret_salt(void);
436 static XML_Bool startParsing(XML_Parser parser);
438 static XML_Parser
439 parserCreate(const XML_Char *encodingName,
440 const XML_Memory_Handling_Suite *memsuite,
441 const XML_Char *nameSep,
442 DTD *dtd);
444 static void
445 parserInit(XML_Parser parser, const XML_Char *encodingName);
447 #define poolStart(pool) ((pool)->start)
448 #define poolEnd(pool) ((pool)->ptr)
449 #define poolLength(pool) ((pool)->ptr - (pool)->start)
450 #define poolChop(pool) ((void)--(pool->ptr))
451 #define poolLastChar(pool) (((pool)->ptr)[-1])
452 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
453 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
454 #define poolAppendChar(pool, c) \
455 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
456 ? 0 \
457 : ((*((pool)->ptr)++ = c), 1))
459 struct XML_ParserStruct {
460 /* The first member must be userData so that the XML_GetUserData
461 macro works. */
462 void *m_userData;
463 void *m_handlerArg;
464 char *m_buffer;
465 const XML_Memory_Handling_Suite m_mem;
466 /* first character to be parsed */
467 const char *m_bufferPtr;
468 /* past last character to be parsed */
469 char *m_bufferEnd;
470 /* allocated end of buffer */
471 const char *m_bufferLim;
472 XML_Index m_parseEndByteIndex;
473 const char *m_parseEndPtr;
474 XML_Char *m_dataBuf;
475 XML_Char *m_dataBufEnd;
476 XML_StartElementHandler m_startElementHandler;
477 XML_EndElementHandler m_endElementHandler;
478 XML_CharacterDataHandler m_characterDataHandler;
479 XML_ProcessingInstructionHandler m_processingInstructionHandler;
480 XML_CommentHandler m_commentHandler;
481 XML_StartCdataSectionHandler m_startCdataSectionHandler;
482 XML_EndCdataSectionHandler m_endCdataSectionHandler;
483 XML_DefaultHandler m_defaultHandler;
484 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
485 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
486 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
487 XML_NotationDeclHandler m_notationDeclHandler;
488 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
489 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
490 XML_NotStandaloneHandler m_notStandaloneHandler;
491 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
492 XML_Parser m_externalEntityRefHandlerArg;
493 XML_SkippedEntityHandler m_skippedEntityHandler;
494 XML_UnknownEncodingHandler m_unknownEncodingHandler;
495 XML_ElementDeclHandler m_elementDeclHandler;
496 XML_AttlistDeclHandler m_attlistDeclHandler;
497 XML_EntityDeclHandler m_entityDeclHandler;
498 XML_XmlDeclHandler m_xmlDeclHandler;
499 const ENCODING *m_encoding;
500 INIT_ENCODING m_initEncoding;
501 const ENCODING *m_internalEncoding;
502 const XML_Char *m_protocolEncodingName;
503 XML_Bool m_ns;
504 XML_Bool m_ns_triplets;
505 void *m_unknownEncodingMem;
506 void *m_unknownEncodingData;
507 void *m_unknownEncodingHandlerData;
508 void (XMLCALL *m_unknownEncodingRelease)(void *);
509 PROLOG_STATE m_prologState;
510 Processor *m_processor;
511 enum XML_Error m_errorCode;
512 const char *m_eventPtr;
513 const char *m_eventEndPtr;
514 const char *m_positionPtr;
515 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
516 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
517 XML_Bool m_defaultExpandInternalEntities;
518 int m_tagLevel;
519 ENTITY *m_declEntity;
520 const XML_Char *m_doctypeName;
521 const XML_Char *m_doctypeSysid;
522 const XML_Char *m_doctypePubid;
523 const XML_Char *m_declAttributeType;
524 const XML_Char *m_declNotationName;
525 const XML_Char *m_declNotationPublicId;
526 ELEMENT_TYPE *m_declElementType;
527 ATTRIBUTE_ID *m_declAttributeId;
528 XML_Bool m_declAttributeIsCdata;
529 XML_Bool m_declAttributeIsId;
530 DTD *m_dtd;
531 const XML_Char *m_curBase;
532 TAG *m_tagStack;
533 TAG *m_freeTagList;
534 BINDING *m_inheritedBindings;
535 BINDING *m_freeBindingList;
536 int m_attsSize;
537 int m_nSpecifiedAtts;
538 int m_idAttIndex;
539 ATTRIBUTE *m_atts;
540 NS_ATT *m_nsAtts;
541 unsigned long m_nsAttsVersion;
542 unsigned char m_nsAttsPower;
543 #ifdef XML_ATTR_INFO
544 XML_AttrInfo *m_attInfo;
545 #endif
546 POSITION m_position;
547 STRING_POOL m_tempPool;
548 STRING_POOL m_temp2Pool;
549 char *m_groupConnector;
550 unsigned int m_groupSize;
551 XML_Char m_namespaceSeparator;
552 XML_Parser m_parentParser;
553 XML_ParsingStatus m_parsingStatus;
554 #ifdef XML_DTD
555 XML_Bool m_isParamEntity;
556 XML_Bool m_useForeignDTD;
557 enum XML_ParamEntityParsing m_paramEntityParsing;
558 #endif
559 unsigned long m_hash_secret_salt;
562 #define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
563 #define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
564 #define FREE(p) (parser->m_mem.free_fcn((p)))
566 #define userData (parser->m_userData)
567 #define handlerArg (parser->m_handlerArg)
568 #define startElementHandler (parser->m_startElementHandler)
569 #define endElementHandler (parser->m_endElementHandler)
570 #define characterDataHandler (parser->m_characterDataHandler)
571 #define processingInstructionHandler \
572 (parser->m_processingInstructionHandler)
573 #define commentHandler (parser->m_commentHandler)
574 #define startCdataSectionHandler \
575 (parser->m_startCdataSectionHandler)
576 #define endCdataSectionHandler (parser->m_endCdataSectionHandler)
577 #define defaultHandler (parser->m_defaultHandler)
578 #define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
579 #define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
580 #define unparsedEntityDeclHandler \
581 (parser->m_unparsedEntityDeclHandler)
582 #define notationDeclHandler (parser->m_notationDeclHandler)
583 #define startNamespaceDeclHandler \
584 (parser->m_startNamespaceDeclHandler)
585 #define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
586 #define notStandaloneHandler (parser->m_notStandaloneHandler)
587 #define externalEntityRefHandler \
588 (parser->m_externalEntityRefHandler)
589 #define externalEntityRefHandlerArg \
590 (parser->m_externalEntityRefHandlerArg)
591 #define internalEntityRefHandler \
592 (parser->m_internalEntityRefHandler)
593 #define skippedEntityHandler (parser->m_skippedEntityHandler)
594 #define unknownEncodingHandler (parser->m_unknownEncodingHandler)
595 #define elementDeclHandler (parser->m_elementDeclHandler)
596 #define attlistDeclHandler (parser->m_attlistDeclHandler)
597 #define entityDeclHandler (parser->m_entityDeclHandler)
598 #define xmlDeclHandler (parser->m_xmlDeclHandler)
599 #define encoding (parser->m_encoding)
600 #define initEncoding (parser->m_initEncoding)
601 #define internalEncoding (parser->m_internalEncoding)
602 #define unknownEncodingMem (parser->m_unknownEncodingMem)
603 #define unknownEncodingData (parser->m_unknownEncodingData)
604 #define unknownEncodingHandlerData \
605 (parser->m_unknownEncodingHandlerData)
606 #define unknownEncodingRelease (parser->m_unknownEncodingRelease)
607 #define protocolEncodingName (parser->m_protocolEncodingName)
608 #define ns (parser->m_ns)
609 #define ns_triplets (parser->m_ns_triplets)
610 #define prologState (parser->m_prologState)
611 #define processor (parser->m_processor)
612 #define errorCode (parser->m_errorCode)
613 #define eventPtr (parser->m_eventPtr)
614 #define eventEndPtr (parser->m_eventEndPtr)
615 #define positionPtr (parser->m_positionPtr)
616 #define position (parser->m_position)
617 #define openInternalEntities (parser->m_openInternalEntities)
618 #define freeInternalEntities (parser->m_freeInternalEntities)
619 #define defaultExpandInternalEntities \
620 (parser->m_defaultExpandInternalEntities)
621 #define tagLevel (parser->m_tagLevel)
622 #define buffer (parser->m_buffer)
623 #define bufferPtr (parser->m_bufferPtr)
624 #define bufferEnd (parser->m_bufferEnd)
625 #define parseEndByteIndex (parser->m_parseEndByteIndex)
626 #define parseEndPtr (parser->m_parseEndPtr)
627 #define bufferLim (parser->m_bufferLim)
628 #define dataBuf (parser->m_dataBuf)
629 #define dataBufEnd (parser->m_dataBufEnd)
630 #define _dtd (parser->m_dtd)
631 #define curBase (parser->m_curBase)
632 #define declEntity (parser->m_declEntity)
633 #define doctypeName (parser->m_doctypeName)
634 #define doctypeSysid (parser->m_doctypeSysid)
635 #define doctypePubid (parser->m_doctypePubid)
636 #define declAttributeType (parser->m_declAttributeType)
637 #define declNotationName (parser->m_declNotationName)
638 #define declNotationPublicId (parser->m_declNotationPublicId)
639 #define declElementType (parser->m_declElementType)
640 #define declAttributeId (parser->m_declAttributeId)
641 #define declAttributeIsCdata (parser->m_declAttributeIsCdata)
642 #define declAttributeIsId (parser->m_declAttributeIsId)
643 #define freeTagList (parser->m_freeTagList)
644 #define freeBindingList (parser->m_freeBindingList)
645 #define inheritedBindings (parser->m_inheritedBindings)
646 #define tagStack (parser->m_tagStack)
647 #define atts (parser->m_atts)
648 #define attsSize (parser->m_attsSize)
649 #define nSpecifiedAtts (parser->m_nSpecifiedAtts)
650 #define idAttIndex (parser->m_idAttIndex)
651 #define nsAtts (parser->m_nsAtts)
652 #define nsAttsVersion (parser->m_nsAttsVersion)
653 #define nsAttsPower (parser->m_nsAttsPower)
654 #define attInfo (parser->m_attInfo)
655 #define tempPool (parser->m_tempPool)
656 #define temp2Pool (parser->m_temp2Pool)
657 #define groupConnector (parser->m_groupConnector)
658 #define groupSize (parser->m_groupSize)
659 #define namespaceSeparator (parser->m_namespaceSeparator)
660 #define parentParser (parser->m_parentParser)
661 #define ps_parsing (parser->m_parsingStatus.parsing)
662 #define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
663 #ifdef XML_DTD
664 #define isParamEntity (parser->m_isParamEntity)
665 #define useForeignDTD (parser->m_useForeignDTD)
666 #define paramEntityParsing (parser->m_paramEntityParsing)
667 #endif /* XML_DTD */
668 #define hash_secret_salt (parser->m_hash_secret_salt)
670 XML_Parser XMLCALL
671 XML_ParserCreate(const XML_Char *encodingName)
673 return XML_ParserCreate_MM(encodingName, NULL, NULL);
676 XML_Parser XMLCALL
677 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
679 XML_Char tmp[2];
680 *tmp = nsSep;
681 return XML_ParserCreate_MM(encodingName, NULL, tmp);
684 static const XML_Char implicitContext[] = {
685 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
686 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
687 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
688 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
689 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
690 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
693 static unsigned long
694 generate_hash_secret_salt(void)
696 unsigned int seed = time(NULL) % UINT_MAX;
697 srand(seed);
698 return rand();
701 static XML_Bool /* only valid for root parser */
702 startParsing(XML_Parser parser)
704 /* hash functions must be initialized before setContext() is called */
705 if (hash_secret_salt == 0)
706 hash_secret_salt = generate_hash_secret_salt();
707 if (ns) {
708 /* implicit context only set for root parser, since child
709 parsers (i.e. external entity parsers) will inherit it
711 return setContext(parser, implicitContext);
713 return XML_TRUE;
716 XML_Parser XMLCALL
717 XML_ParserCreate_MM(const XML_Char *encodingName,
718 const XML_Memory_Handling_Suite *memsuite,
719 const XML_Char *nameSep)
721 return parserCreate(encodingName, memsuite, nameSep, NULL);
724 static XML_Parser
725 parserCreate(const XML_Char *encodingName,
726 const XML_Memory_Handling_Suite *memsuite,
727 const XML_Char *nameSep,
728 DTD *dtd)
730 XML_Parser parser;
732 if (memsuite) {
733 XML_Memory_Handling_Suite *mtemp;
734 parser = (XML_Parser)
735 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
736 if (parser != NULL) {
737 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
738 mtemp->malloc_fcn = memsuite->malloc_fcn;
739 mtemp->realloc_fcn = memsuite->realloc_fcn;
740 mtemp->free_fcn = memsuite->free_fcn;
743 else {
744 XML_Memory_Handling_Suite *mtemp;
745 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
746 if (parser != NULL) {
747 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
748 mtemp->malloc_fcn = malloc;
749 mtemp->realloc_fcn = realloc;
750 mtemp->free_fcn = free;
754 if (!parser)
755 return parser;
757 buffer = NULL;
758 bufferLim = NULL;
760 attsSize = INIT_ATTS_SIZE;
761 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
762 if (atts == NULL) {
763 FREE(parser);
764 return NULL;
766 #ifdef XML_ATTR_INFO
767 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
768 if (attInfo == NULL) {
769 FREE(atts);
770 FREE(parser);
771 return NULL;
773 #endif
774 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
775 if (dataBuf == NULL) {
776 FREE(atts);
777 #ifdef XML_ATTR_INFO
778 FREE(attInfo);
779 #endif
780 FREE(parser);
781 return NULL;
783 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
785 if (dtd)
786 _dtd = dtd;
787 else {
788 _dtd = dtdCreate(&parser->m_mem);
789 if (_dtd == NULL) {
790 FREE(dataBuf);
791 FREE(atts);
792 #ifdef XML_ATTR_INFO
793 FREE(attInfo);
794 #endif
795 FREE(parser);
796 return NULL;
800 freeBindingList = NULL;
801 freeTagList = NULL;
802 freeInternalEntities = NULL;
804 groupSize = 0;
805 groupConnector = NULL;
807 unknownEncodingHandler = NULL;
808 unknownEncodingHandlerData = NULL;
810 namespaceSeparator = ASCII_EXCL;
811 ns = XML_FALSE;
812 ns_triplets = XML_FALSE;
814 nsAtts = NULL;
815 nsAttsVersion = 0;
816 nsAttsPower = 0;
818 poolInit(&tempPool, &(parser->m_mem));
819 poolInit(&temp2Pool, &(parser->m_mem));
820 parserInit(parser, encodingName);
822 if (encodingName && !protocolEncodingName) {
823 XML_ParserFree(parser);
824 return NULL;
827 if (nameSep) {
828 ns = XML_TRUE;
829 internalEncoding = XmlGetInternalEncodingNS();
830 namespaceSeparator = *nameSep;
832 else {
833 internalEncoding = XmlGetInternalEncoding();
836 return parser;
839 static void
840 parserInit(XML_Parser parser, const XML_Char *encodingName)
842 processor = prologInitProcessor;
843 XmlPrologStateInit(&prologState);
844 protocolEncodingName = (encodingName != NULL
845 ? poolCopyString(&tempPool, encodingName)
846 : NULL);
847 curBase = NULL;
848 XmlInitEncoding(&initEncoding, &encoding, 0);
849 userData = NULL;
850 handlerArg = NULL;
851 startElementHandler = NULL;
852 endElementHandler = NULL;
853 characterDataHandler = NULL;
854 processingInstructionHandler = NULL;
855 commentHandler = NULL;
856 startCdataSectionHandler = NULL;
857 endCdataSectionHandler = NULL;
858 defaultHandler = NULL;
859 startDoctypeDeclHandler = NULL;
860 endDoctypeDeclHandler = NULL;
861 unparsedEntityDeclHandler = NULL;
862 notationDeclHandler = NULL;
863 startNamespaceDeclHandler = NULL;
864 endNamespaceDeclHandler = NULL;
865 notStandaloneHandler = NULL;
866 externalEntityRefHandler = NULL;
867 externalEntityRefHandlerArg = parser;
868 skippedEntityHandler = NULL;
869 elementDeclHandler = NULL;
870 attlistDeclHandler = NULL;
871 entityDeclHandler = NULL;
872 xmlDeclHandler = NULL;
873 bufferPtr = buffer;
874 bufferEnd = buffer;
875 parseEndByteIndex = 0;
876 parseEndPtr = NULL;
877 declElementType = NULL;
878 declAttributeId = NULL;
879 declEntity = NULL;
880 doctypeName = NULL;
881 doctypeSysid = NULL;
882 doctypePubid = NULL;
883 declAttributeType = NULL;
884 declNotationName = NULL;
885 declNotationPublicId = NULL;
886 declAttributeIsCdata = XML_FALSE;
887 declAttributeIsId = XML_FALSE;
888 memset(&position, 0, sizeof(POSITION));
889 errorCode = XML_ERROR_NONE;
890 eventPtr = NULL;
891 eventEndPtr = NULL;
892 positionPtr = NULL;
893 openInternalEntities = NULL;
894 defaultExpandInternalEntities = XML_TRUE;
895 tagLevel = 0;
896 tagStack = NULL;
897 inheritedBindings = NULL;
898 nSpecifiedAtts = 0;
899 unknownEncodingMem = NULL;
900 unknownEncodingRelease = NULL;
901 unknownEncodingData = NULL;
902 parentParser = NULL;
903 ps_parsing = XML_INITIALIZED;
904 #ifdef XML_DTD
905 isParamEntity = XML_FALSE;
906 useForeignDTD = XML_FALSE;
907 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
908 #endif
909 hash_secret_salt = 0;
912 /* moves list of bindings to freeBindingList */
913 static void FASTCALL
914 moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
916 while (bindings) {
917 BINDING *b = bindings;
918 bindings = bindings->nextTagBinding;
919 b->nextTagBinding = freeBindingList;
920 freeBindingList = b;
924 XML_Bool XMLCALL
925 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
927 TAG *tStk;
928 OPEN_INTERNAL_ENTITY *openEntityList;
929 if (parentParser)
930 return XML_FALSE;
931 /* move tagStack to freeTagList */
932 tStk = tagStack;
933 while (tStk) {
934 TAG *tag = tStk;
935 tStk = tStk->parent;
936 tag->parent = freeTagList;
937 moveToFreeBindingList(parser, tag->bindings);
938 tag->bindings = NULL;
939 freeTagList = tag;
941 /* move openInternalEntities to freeInternalEntities */
942 openEntityList = openInternalEntities;
943 while (openEntityList) {
944 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
945 openEntityList = openEntity->next;
946 openEntity->next = freeInternalEntities;
947 freeInternalEntities = openEntity;
949 moveToFreeBindingList(parser, inheritedBindings);
950 FREE(unknownEncodingMem);
951 if (unknownEncodingRelease)
952 unknownEncodingRelease(unknownEncodingData);
953 poolClear(&tempPool);
954 poolClear(&temp2Pool);
955 parserInit(parser, encodingName);
956 dtdReset(_dtd, &parser->m_mem);
957 return XML_TRUE;
960 enum XML_Status XMLCALL
961 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
963 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
964 XXX There's no way for the caller to determine which of the
965 XXX possible error cases caused the XML_STATUS_ERROR return.
967 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
968 return XML_STATUS_ERROR;
969 if (encodingName == NULL)
970 protocolEncodingName = NULL;
971 else {
972 protocolEncodingName = poolCopyString(&tempPool, encodingName);
973 if (!protocolEncodingName)
974 return XML_STATUS_ERROR;
976 return XML_STATUS_OK;
979 XML_Parser XMLCALL
980 XML_ExternalEntityParserCreate(XML_Parser oldParser,
981 const XML_Char *context,
982 const XML_Char *encodingName)
984 XML_Parser parser = oldParser;
985 DTD *newDtd = NULL;
986 DTD *oldDtd = _dtd;
987 XML_StartElementHandler oldStartElementHandler = startElementHandler;
988 XML_EndElementHandler oldEndElementHandler = endElementHandler;
989 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
990 XML_ProcessingInstructionHandler oldProcessingInstructionHandler
991 = processingInstructionHandler;
992 XML_CommentHandler oldCommentHandler = commentHandler;
993 XML_StartCdataSectionHandler oldStartCdataSectionHandler
994 = startCdataSectionHandler;
995 XML_EndCdataSectionHandler oldEndCdataSectionHandler
996 = endCdataSectionHandler;
997 XML_DefaultHandler oldDefaultHandler = defaultHandler;
998 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
999 = unparsedEntityDeclHandler;
1000 XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
1001 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
1002 = startNamespaceDeclHandler;
1003 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
1004 = endNamespaceDeclHandler;
1005 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1006 XML_ExternalEntityRefHandler oldExternalEntityRefHandler
1007 = externalEntityRefHandler;
1008 XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler;
1009 XML_UnknownEncodingHandler oldUnknownEncodingHandler
1010 = unknownEncodingHandler;
1011 XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
1012 XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
1013 XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
1014 XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
1015 ELEMENT_TYPE * oldDeclElementType = declElementType;
1017 void *oldUserData = userData;
1018 void *oldHandlerArg = handlerArg;
1019 XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1020 XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1021 #ifdef XML_DTD
1022 enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing;
1023 int oldInEntityValue = prologState.inEntityValue;
1024 #endif
1025 XML_Bool oldns_triplets = ns_triplets;
1026 /* Note that the new parser shares the same hash secret as the old
1027 parser, so that dtdCopy and copyEntityTable can lookup values
1028 from hash tables associated with either parser without us having
1029 to worry which hash secrets each table has.
1031 unsigned long oldhash_secret_salt = hash_secret_salt;
1033 #ifdef XML_DTD
1034 if (!context)
1035 newDtd = oldDtd;
1036 #endif /* XML_DTD */
1038 /* Note that the magical uses of the pre-processor to make field
1039 access look more like C++ require that `parser' be overwritten
1040 here. This makes this function more painful to follow than it
1041 would be otherwise.
1043 if (ns) {
1044 XML_Char tmp[2];
1045 *tmp = namespaceSeparator;
1046 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1048 else {
1049 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1052 if (!parser)
1053 return NULL;
1055 startElementHandler = oldStartElementHandler;
1056 endElementHandler = oldEndElementHandler;
1057 characterDataHandler = oldCharacterDataHandler;
1058 processingInstructionHandler = oldProcessingInstructionHandler;
1059 commentHandler = oldCommentHandler;
1060 startCdataSectionHandler = oldStartCdataSectionHandler;
1061 endCdataSectionHandler = oldEndCdataSectionHandler;
1062 defaultHandler = oldDefaultHandler;
1063 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1064 notationDeclHandler = oldNotationDeclHandler;
1065 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1066 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1067 notStandaloneHandler = oldNotStandaloneHandler;
1068 externalEntityRefHandler = oldExternalEntityRefHandler;
1069 skippedEntityHandler = oldSkippedEntityHandler;
1070 unknownEncodingHandler = oldUnknownEncodingHandler;
1071 elementDeclHandler = oldElementDeclHandler;
1072 attlistDeclHandler = oldAttlistDeclHandler;
1073 entityDeclHandler = oldEntityDeclHandler;
1074 xmlDeclHandler = oldXmlDeclHandler;
1075 declElementType = oldDeclElementType;
1076 userData = oldUserData;
1077 if (oldUserData == oldHandlerArg)
1078 handlerArg = userData;
1079 else
1080 handlerArg = parser;
1081 if (oldExternalEntityRefHandlerArg != oldParser)
1082 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1083 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1084 ns_triplets = oldns_triplets;
1085 hash_secret_salt = oldhash_secret_salt;
1086 parentParser = oldParser;
1087 #ifdef XML_DTD
1088 paramEntityParsing = oldParamEntityParsing;
1089 prologState.inEntityValue = oldInEntityValue;
1090 if (context) {
1091 #endif /* XML_DTD */
1092 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
1093 || !setContext(parser, context)) {
1094 XML_ParserFree(parser);
1095 return NULL;
1097 processor = externalEntityInitProcessor;
1098 #ifdef XML_DTD
1100 else {
1101 /* The DTD instance referenced by _dtd is shared between the document's
1102 root parser and external PE parsers, therefore one does not need to
1103 call setContext. In addition, one also *must* not call setContext,
1104 because this would overwrite existing prefix->binding pointers in
1105 _dtd with ones that get destroyed with the external PE parser.
1106 This would leave those prefixes with dangling pointers.
1108 isParamEntity = XML_TRUE;
1109 XmlPrologStateInitExternalEntity(&prologState);
1110 processor = externalParEntInitProcessor;
1112 #endif /* XML_DTD */
1113 return parser;
1116 static void FASTCALL
1117 destroyBindings(BINDING *bindings, XML_Parser parser)
1119 for (;;) {
1120 BINDING *b = bindings;
1121 if (!b)
1122 break;
1123 bindings = b->nextTagBinding;
1124 FREE(b->uri);
1125 FREE(b);
1129 void XMLCALL
1130 XML_ParserFree(XML_Parser parser)
1132 TAG *tagList;
1133 OPEN_INTERNAL_ENTITY *entityList;
1134 if (parser == NULL)
1135 return;
1136 /* free tagStack and freeTagList */
1137 tagList = tagStack;
1138 for (;;) {
1139 TAG *p;
1140 if (tagList == NULL) {
1141 if (freeTagList == NULL)
1142 break;
1143 tagList = freeTagList;
1144 freeTagList = NULL;
1146 p = tagList;
1147 tagList = tagList->parent;
1148 FREE(p->buf);
1149 destroyBindings(p->bindings, parser);
1150 FREE(p);
1152 /* free openInternalEntities and freeInternalEntities */
1153 entityList = openInternalEntities;
1154 for (;;) {
1155 OPEN_INTERNAL_ENTITY *openEntity;
1156 if (entityList == NULL) {
1157 if (freeInternalEntities == NULL)
1158 break;
1159 entityList = freeInternalEntities;
1160 freeInternalEntities = NULL;
1162 openEntity = entityList;
1163 entityList = entityList->next;
1164 FREE(openEntity);
1167 destroyBindings(freeBindingList, parser);
1168 destroyBindings(inheritedBindings, parser);
1169 poolDestroy(&tempPool);
1170 poolDestroy(&temp2Pool);
1171 #ifdef XML_DTD
1172 /* external parameter entity parsers share the DTD structure
1173 parser->m_dtd with the root parser, so we must not destroy it
1175 if (!isParamEntity && _dtd)
1176 #else
1177 if (_dtd)
1178 #endif /* XML_DTD */
1179 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
1180 FREE((void *)atts);
1181 #ifdef XML_ATTR_INFO
1182 FREE((void *)attInfo);
1183 #endif
1184 FREE(groupConnector);
1185 FREE(buffer);
1186 FREE(dataBuf);
1187 FREE(nsAtts);
1188 FREE(unknownEncodingMem);
1189 if (unknownEncodingRelease)
1190 unknownEncodingRelease(unknownEncodingData);
1191 FREE(parser);
1194 void XMLCALL
1195 XML_UseParserAsHandlerArg(XML_Parser parser)
1197 handlerArg = parser;
1200 enum XML_Error XMLCALL
1201 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1203 #ifdef XML_DTD
1204 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1205 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1206 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1207 useForeignDTD = useDTD;
1208 return XML_ERROR_NONE;
1209 #else
1210 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1211 #endif
1214 void XMLCALL
1215 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1217 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1218 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1219 return;
1220 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1223 void XMLCALL
1224 XML_SetUserData(XML_Parser parser, void *p)
1226 if (handlerArg == userData)
1227 handlerArg = userData = p;
1228 else
1229 userData = p;
1232 enum XML_Status XMLCALL
1233 XML_SetBase(XML_Parser parser, const XML_Char *p)
1235 if (p) {
1236 p = poolCopyString(&_dtd->pool, p);
1237 if (!p)
1238 return XML_STATUS_ERROR;
1239 curBase = p;
1241 else
1242 curBase = NULL;
1243 return XML_STATUS_OK;
1246 const XML_Char * XMLCALL
1247 XML_GetBase(XML_Parser parser)
1249 return curBase;
1252 int XMLCALL
1253 XML_GetSpecifiedAttributeCount(XML_Parser parser)
1255 return nSpecifiedAtts;
1258 int XMLCALL
1259 XML_GetIdAttributeIndex(XML_Parser parser)
1261 return idAttIndex;
1264 #ifdef XML_ATTR_INFO
1265 const XML_AttrInfo * XMLCALL
1266 XML_GetAttributeInfo(XML_Parser parser)
1268 return attInfo;
1270 #endif
1272 void XMLCALL
1273 XML_SetElementHandler(XML_Parser parser,
1274 XML_StartElementHandler start,
1275 XML_EndElementHandler end)
1277 startElementHandler = start;
1278 endElementHandler = end;
1281 void XMLCALL
1282 XML_SetStartElementHandler(XML_Parser parser,
1283 XML_StartElementHandler start) {
1284 startElementHandler = start;
1287 void XMLCALL
1288 XML_SetEndElementHandler(XML_Parser parser,
1289 XML_EndElementHandler end) {
1290 endElementHandler = end;
1293 void XMLCALL
1294 XML_SetCharacterDataHandler(XML_Parser parser,
1295 XML_CharacterDataHandler handler)
1297 characterDataHandler = handler;
1300 void XMLCALL
1301 XML_SetProcessingInstructionHandler(XML_Parser parser,
1302 XML_ProcessingInstructionHandler handler)
1304 processingInstructionHandler = handler;
1307 void XMLCALL
1308 XML_SetCommentHandler(XML_Parser parser,
1309 XML_CommentHandler handler)
1311 commentHandler = handler;
1314 void XMLCALL
1315 XML_SetCdataSectionHandler(XML_Parser parser,
1316 XML_StartCdataSectionHandler start,
1317 XML_EndCdataSectionHandler end)
1319 startCdataSectionHandler = start;
1320 endCdataSectionHandler = end;
1323 void XMLCALL
1324 XML_SetStartCdataSectionHandler(XML_Parser parser,
1325 XML_StartCdataSectionHandler start) {
1326 startCdataSectionHandler = start;
1329 void XMLCALL
1330 XML_SetEndCdataSectionHandler(XML_Parser parser,
1331 XML_EndCdataSectionHandler end) {
1332 endCdataSectionHandler = end;
1335 void XMLCALL
1336 XML_SetDefaultHandler(XML_Parser parser,
1337 XML_DefaultHandler handler)
1339 defaultHandler = handler;
1340 defaultExpandInternalEntities = XML_FALSE;
1343 void XMLCALL
1344 XML_SetDefaultHandlerExpand(XML_Parser parser,
1345 XML_DefaultHandler handler)
1347 defaultHandler = handler;
1348 defaultExpandInternalEntities = XML_TRUE;
1351 void XMLCALL
1352 XML_SetDoctypeDeclHandler(XML_Parser parser,
1353 XML_StartDoctypeDeclHandler start,
1354 XML_EndDoctypeDeclHandler end)
1356 startDoctypeDeclHandler = start;
1357 endDoctypeDeclHandler = end;
1360 void XMLCALL
1361 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1362 XML_StartDoctypeDeclHandler start) {
1363 startDoctypeDeclHandler = start;
1366 void XMLCALL
1367 XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1368 XML_EndDoctypeDeclHandler end) {
1369 endDoctypeDeclHandler = end;
1372 void XMLCALL
1373 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1374 XML_UnparsedEntityDeclHandler handler)
1376 unparsedEntityDeclHandler = handler;
1379 void XMLCALL
1380 XML_SetNotationDeclHandler(XML_Parser parser,
1381 XML_NotationDeclHandler handler)
1383 notationDeclHandler = handler;
1386 void XMLCALL
1387 XML_SetNamespaceDeclHandler(XML_Parser parser,
1388 XML_StartNamespaceDeclHandler start,
1389 XML_EndNamespaceDeclHandler end)
1391 startNamespaceDeclHandler = start;
1392 endNamespaceDeclHandler = end;
1395 void XMLCALL
1396 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1397 XML_StartNamespaceDeclHandler start) {
1398 startNamespaceDeclHandler = start;
1401 void XMLCALL
1402 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1403 XML_EndNamespaceDeclHandler end) {
1404 endNamespaceDeclHandler = end;
1407 void XMLCALL
1408 XML_SetNotStandaloneHandler(XML_Parser parser,
1409 XML_NotStandaloneHandler handler)
1411 notStandaloneHandler = handler;
1414 void XMLCALL
1415 XML_SetExternalEntityRefHandler(XML_Parser parser,
1416 XML_ExternalEntityRefHandler handler)
1418 externalEntityRefHandler = handler;
1421 void XMLCALL
1422 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1424 if (arg)
1425 externalEntityRefHandlerArg = (XML_Parser)arg;
1426 else
1427 externalEntityRefHandlerArg = parser;
1430 void XMLCALL
1431 XML_SetSkippedEntityHandler(XML_Parser parser,
1432 XML_SkippedEntityHandler handler)
1434 skippedEntityHandler = handler;
1437 void XMLCALL
1438 XML_SetUnknownEncodingHandler(XML_Parser parser,
1439 XML_UnknownEncodingHandler handler,
1440 void *data)
1442 unknownEncodingHandler = handler;
1443 unknownEncodingHandlerData = data;
1446 void XMLCALL
1447 XML_SetElementDeclHandler(XML_Parser parser,
1448 XML_ElementDeclHandler eldecl)
1450 elementDeclHandler = eldecl;
1453 void XMLCALL
1454 XML_SetAttlistDeclHandler(XML_Parser parser,
1455 XML_AttlistDeclHandler attdecl)
1457 attlistDeclHandler = attdecl;
1460 void XMLCALL
1461 XML_SetEntityDeclHandler(XML_Parser parser,
1462 XML_EntityDeclHandler handler)
1464 entityDeclHandler = handler;
1467 void XMLCALL
1468 XML_SetXmlDeclHandler(XML_Parser parser,
1469 XML_XmlDeclHandler handler) {
1470 xmlDeclHandler = handler;
1473 int XMLCALL
1474 XML_SetParamEntityParsing(XML_Parser parser,
1475 enum XML_ParamEntityParsing peParsing)
1477 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1478 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1479 return 0;
1480 #ifdef XML_DTD
1481 paramEntityParsing = peParsing;
1482 return 1;
1483 #else
1484 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1485 #endif
1488 int XMLCALL
1489 XML_SetHashSalt(XML_Parser parser,
1490 unsigned long hash_salt)
1492 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1493 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1494 return 0;
1495 hash_secret_salt = hash_salt;
1496 return 1;
1499 enum XML_Status XMLCALL
1500 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1502 switch (ps_parsing) {
1503 case XML_SUSPENDED:
1504 errorCode = XML_ERROR_SUSPENDED;
1505 return XML_STATUS_ERROR;
1506 case XML_FINISHED:
1507 errorCode = XML_ERROR_FINISHED;
1508 return XML_STATUS_ERROR;
1509 case XML_INITIALIZED:
1510 if (parentParser == NULL && !startParsing(parser)) {
1511 errorCode = XML_ERROR_NO_MEMORY;
1512 return XML_STATUS_ERROR;
1514 default:
1515 ps_parsing = XML_PARSING;
1518 if (len == 0) {
1519 ps_finalBuffer = (XML_Bool)isFinal;
1520 if (!isFinal)
1521 return XML_STATUS_OK;
1522 positionPtr = bufferPtr;
1523 parseEndPtr = bufferEnd;
1525 /* If data are left over from last buffer, and we now know that these
1526 data are the final chunk of input, then we have to check them again
1527 to detect errors based on that fact.
1529 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1531 if (errorCode == XML_ERROR_NONE) {
1532 switch (ps_parsing) {
1533 case XML_SUSPENDED:
1534 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1535 positionPtr = bufferPtr;
1536 return XML_STATUS_SUSPENDED;
1537 case XML_INITIALIZED:
1538 case XML_PARSING:
1539 ps_parsing = XML_FINISHED;
1540 /* fall through */
1541 default:
1542 return XML_STATUS_OK;
1545 eventEndPtr = eventPtr;
1546 processor = errorProcessor;
1547 return XML_STATUS_ERROR;
1549 #ifndef XML_CONTEXT_BYTES
1550 else if (bufferPtr == bufferEnd) {
1551 const char *end;
1552 int nLeftOver;
1553 enum XML_Error result;
1554 parseEndByteIndex += len;
1555 positionPtr = s;
1556 ps_finalBuffer = (XML_Bool)isFinal;
1558 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
1560 if (errorCode != XML_ERROR_NONE) {
1561 eventEndPtr = eventPtr;
1562 processor = errorProcessor;
1563 return XML_STATUS_ERROR;
1565 else {
1566 switch (ps_parsing) {
1567 case XML_SUSPENDED:
1568 result = XML_STATUS_SUSPENDED;
1569 break;
1570 case XML_INITIALIZED:
1571 case XML_PARSING:
1572 if (isFinal) {
1573 ps_parsing = XML_FINISHED;
1574 return XML_STATUS_OK;
1576 /* fall through */
1577 default:
1578 result = XML_STATUS_OK;
1582 XmlUpdatePosition(encoding, positionPtr, end, &position);
1583 nLeftOver = s + len - end;
1584 if (nLeftOver) {
1585 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
1586 /* FIXME avoid integer overflow */
1587 char *temp;
1588 temp = (buffer == NULL
1589 ? (char *)MALLOC(len * 2)
1590 : (char *)REALLOC(buffer, len * 2));
1591 if (temp == NULL) {
1592 errorCode = XML_ERROR_NO_MEMORY;
1593 eventPtr = eventEndPtr = NULL;
1594 processor = errorProcessor;
1595 return XML_STATUS_ERROR;
1597 buffer = temp;
1598 bufferLim = buffer + len * 2;
1600 memcpy(buffer, end, nLeftOver);
1602 bufferPtr = buffer;
1603 bufferEnd = buffer + nLeftOver;
1604 positionPtr = bufferPtr;
1605 parseEndPtr = bufferEnd;
1606 eventPtr = bufferPtr;
1607 eventEndPtr = bufferPtr;
1608 return result;
1610 #endif /* not defined XML_CONTEXT_BYTES */
1611 else {
1612 void *buff = XML_GetBuffer(parser, len);
1613 if (buff == NULL)
1614 return XML_STATUS_ERROR;
1615 else {
1616 memcpy(buff, s, len);
1617 return XML_ParseBuffer(parser, len, isFinal);
1622 enum XML_Status XMLCALL
1623 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1625 const char *start;
1626 enum XML_Status result = XML_STATUS_OK;
1628 switch (ps_parsing) {
1629 case XML_SUSPENDED:
1630 errorCode = XML_ERROR_SUSPENDED;
1631 return XML_STATUS_ERROR;
1632 case XML_FINISHED:
1633 errorCode = XML_ERROR_FINISHED;
1634 return XML_STATUS_ERROR;
1635 case XML_INITIALIZED:
1636 if (parentParser == NULL && !startParsing(parser)) {
1637 errorCode = XML_ERROR_NO_MEMORY;
1638 return XML_STATUS_ERROR;
1640 default:
1641 ps_parsing = XML_PARSING;
1644 start = bufferPtr;
1645 positionPtr = start;
1646 bufferEnd += len;
1647 parseEndPtr = bufferEnd;
1648 parseEndByteIndex += len;
1649 ps_finalBuffer = (XML_Bool)isFinal;
1651 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
1653 if (errorCode != XML_ERROR_NONE) {
1654 eventEndPtr = eventPtr;
1655 processor = errorProcessor;
1656 return XML_STATUS_ERROR;
1658 else {
1659 switch (ps_parsing) {
1660 case XML_SUSPENDED:
1661 result = XML_STATUS_SUSPENDED;
1662 break;
1663 case XML_INITIALIZED:
1664 case XML_PARSING:
1665 if (isFinal) {
1666 ps_parsing = XML_FINISHED;
1667 return result;
1669 default: ; /* should not happen */
1673 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1674 positionPtr = bufferPtr;
1675 return result;
1678 void * XMLCALL
1679 XML_GetBuffer(XML_Parser parser, int len)
1681 /* BEGIN MOZILLA CHANGE (sanity check len) */
1682 if (len < 0) {
1683 errorCode = XML_ERROR_NO_MEMORY;
1684 return NULL;
1686 /* END MOZILLA CHANGE */
1687 switch (ps_parsing) {
1688 case XML_SUSPENDED:
1689 errorCode = XML_ERROR_SUSPENDED;
1690 return NULL;
1691 case XML_FINISHED:
1692 errorCode = XML_ERROR_FINISHED;
1693 return NULL;
1694 default: ;
1697 if (len > bufferLim - bufferEnd) {
1698 int neededSize = len + (int)(bufferEnd - bufferPtr);
1699 /* BEGIN MOZILLA CHANGE (sanity check neededSize) */
1700 if (neededSize < 0) {
1701 errorCode = XML_ERROR_NO_MEMORY;
1702 return NULL;
1704 /* END MOZILLA CHANGE */
1705 #ifdef XML_CONTEXT_BYTES
1706 int keep = (int)(bufferPtr - buffer);
1708 if (keep > XML_CONTEXT_BYTES)
1709 keep = XML_CONTEXT_BYTES;
1710 neededSize += keep;
1711 #endif /* defined XML_CONTEXT_BYTES */
1712 if (neededSize <= bufferLim - buffer) {
1713 #ifdef XML_CONTEXT_BYTES
1714 if (keep < bufferPtr - buffer) {
1715 int offset = (int)(bufferPtr - buffer) - keep;
1716 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1717 bufferEnd -= offset;
1718 bufferPtr -= offset;
1720 #else
1721 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1722 bufferEnd = buffer + (bufferEnd - bufferPtr);
1723 bufferPtr = buffer;
1724 #endif /* not defined XML_CONTEXT_BYTES */
1726 else {
1727 char *newBuf;
1728 int bufferSize = (int)(bufferLim - bufferPtr);
1729 if (bufferSize == 0)
1730 bufferSize = INIT_BUFFER_SIZE;
1731 do {
1732 bufferSize *= 2;
1733 /* BEGIN MOZILLA CHANGE (prevent infinite loop on overflow) */
1734 } while (bufferSize < neededSize && bufferSize > 0);
1735 /* END MOZILLA CHANGE */
1736 /* BEGIN MOZILLA CHANGE (sanity check bufferSize) */
1737 if (bufferSize <= 0) {
1738 errorCode = XML_ERROR_NO_MEMORY;
1739 return NULL;
1741 /* END MOZILLA CHANGE */
1742 newBuf = (char *)MALLOC(bufferSize);
1743 if (newBuf == 0) {
1744 errorCode = XML_ERROR_NO_MEMORY;
1745 return NULL;
1747 bufferLim = newBuf + bufferSize;
1748 #ifdef XML_CONTEXT_BYTES
1749 if (bufferPtr) {
1750 int keep = (int)(bufferPtr - buffer);
1751 if (keep > XML_CONTEXT_BYTES)
1752 keep = XML_CONTEXT_BYTES;
1753 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1754 FREE(buffer);
1755 buffer = newBuf;
1756 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
1757 bufferPtr = buffer + keep;
1759 else {
1760 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1761 bufferPtr = buffer = newBuf;
1763 #else
1764 if (bufferPtr) {
1765 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1766 FREE(buffer);
1768 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1769 bufferPtr = buffer = newBuf;
1770 #endif /* not defined XML_CONTEXT_BYTES */
1772 eventPtr = eventEndPtr = NULL;
1773 positionPtr = NULL;
1775 return bufferEnd;
1778 enum XML_Status XMLCALL
1779 XML_StopParser(XML_Parser parser, XML_Bool resumable)
1781 switch (ps_parsing) {
1782 case XML_SUSPENDED:
1783 if (resumable) {
1784 errorCode = XML_ERROR_SUSPENDED;
1785 return XML_STATUS_ERROR;
1787 ps_parsing = XML_FINISHED;
1788 break;
1789 case XML_FINISHED:
1790 errorCode = XML_ERROR_FINISHED;
1791 return XML_STATUS_ERROR;
1792 default:
1793 if (resumable) {
1794 #ifdef XML_DTD
1795 if (isParamEntity) {
1796 errorCode = XML_ERROR_SUSPEND_PE;
1797 return XML_STATUS_ERROR;
1799 #endif
1800 ps_parsing = XML_SUSPENDED;
1802 else
1803 ps_parsing = XML_FINISHED;
1805 return XML_STATUS_OK;
1808 enum XML_Status XMLCALL
1809 XML_ResumeParser(XML_Parser parser)
1811 enum XML_Status result = XML_STATUS_OK;
1813 if (ps_parsing != XML_SUSPENDED) {
1814 errorCode = XML_ERROR_NOT_SUSPENDED;
1815 return XML_STATUS_ERROR;
1817 ps_parsing = XML_PARSING;
1819 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1821 if (errorCode != XML_ERROR_NONE) {
1822 eventEndPtr = eventPtr;
1823 processor = errorProcessor;
1824 return XML_STATUS_ERROR;
1826 else {
1827 switch (ps_parsing) {
1828 case XML_SUSPENDED:
1829 result = XML_STATUS_SUSPENDED;
1830 break;
1831 case XML_INITIALIZED:
1832 case XML_PARSING:
1833 if (ps_finalBuffer) {
1834 ps_parsing = XML_FINISHED;
1835 return result;
1837 default: ;
1841 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1842 positionPtr = bufferPtr;
1843 return result;
1846 void XMLCALL
1847 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
1849 assert(status != NULL);
1850 *status = parser->m_parsingStatus;
1853 enum XML_Error XMLCALL
1854 XML_GetErrorCode(XML_Parser parser)
1856 return errorCode;
1859 XML_Index XMLCALL
1860 XML_GetCurrentByteIndex(XML_Parser parser)
1862 if (eventPtr)
1863 return parseEndByteIndex - (parseEndPtr - eventPtr);
1864 return -1;
1867 int XMLCALL
1868 XML_GetCurrentByteCount(XML_Parser parser)
1870 if (eventEndPtr && eventPtr)
1871 return (int)(eventEndPtr - eventPtr);
1872 return 0;
1875 const char * XMLCALL
1876 XML_GetInputContext(XML_Parser parser, int *offset, int *size)
1878 #ifdef XML_CONTEXT_BYTES
1879 if (eventPtr && buffer) {
1880 *offset = (int)(eventPtr - buffer);
1881 *size = (int)(bufferEnd - buffer);
1882 return buffer;
1884 #endif /* defined XML_CONTEXT_BYTES */
1885 return (char *) 0;
1888 XML_Size XMLCALL
1889 XML_GetCurrentLineNumber(XML_Parser parser)
1891 if (eventPtr && eventPtr >= positionPtr) {
1892 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1893 positionPtr = eventPtr;
1895 return position.lineNumber + 1;
1898 XML_Size XMLCALL
1899 XML_GetCurrentColumnNumber(XML_Parser parser)
1901 if (eventPtr && eventPtr >= positionPtr) {
1902 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1903 positionPtr = eventPtr;
1905 return position.columnNumber;
1908 void XMLCALL
1909 XML_FreeContentModel(XML_Parser parser, XML_Content *model)
1911 FREE(model);
1914 void * XMLCALL
1915 XML_MemMalloc(XML_Parser parser, size_t size)
1917 return MALLOC(size);
1920 void * XMLCALL
1921 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
1923 return REALLOC(ptr, size);
1926 void XMLCALL
1927 XML_MemFree(XML_Parser parser, void *ptr)
1929 FREE(ptr);
1932 void XMLCALL
1933 XML_DefaultCurrent(XML_Parser parser)
1935 if (defaultHandler) {
1936 if (openInternalEntities)
1937 reportDefault(parser,
1938 internalEncoding,
1939 openInternalEntities->internalEventPtr,
1940 openInternalEntities->internalEventEndPtr);
1941 else
1942 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1946 const XML_LChar * XMLCALL
1947 XML_ErrorString(enum XML_Error code)
1949 static const XML_LChar* const message[] = {
1951 XML_L("out of memory"),
1952 XML_L("syntax error"),
1953 XML_L("no element found"),
1954 XML_L("not well-formed (invalid token)"),
1955 XML_L("unclosed token"),
1956 XML_L("partial character"),
1957 XML_L("mismatched tag"),
1958 XML_L("duplicate attribute"),
1959 XML_L("junk after document element"),
1960 XML_L("illegal parameter entity reference"),
1961 XML_L("undefined entity"),
1962 XML_L("recursive entity reference"),
1963 XML_L("asynchronous entity"),
1964 XML_L("reference to invalid character number"),
1965 XML_L("reference to binary entity"),
1966 XML_L("reference to external entity in attribute"),
1967 XML_L("XML or text declaration not at start of entity"),
1968 XML_L("unknown encoding"),
1969 XML_L("encoding specified in XML declaration is incorrect"),
1970 XML_L("unclosed CDATA section"),
1971 XML_L("error in processing external entity reference"),
1972 XML_L("document is not standalone"),
1973 XML_L("unexpected parser state - please send a bug report"),
1974 XML_L("entity declared in parameter entity"),
1975 XML_L("requested feature requires XML_DTD support in Expat"),
1976 XML_L("cannot change setting once parsing has begun"),
1977 XML_L("unbound prefix"),
1978 XML_L("must not undeclare prefix"),
1979 XML_L("incomplete markup in parameter entity"),
1980 XML_L("XML declaration not well-formed"),
1981 XML_L("text declaration not well-formed"),
1982 XML_L("illegal character(s) in public id"),
1983 XML_L("parser suspended"),
1984 XML_L("parser not suspended"),
1985 XML_L("parsing aborted"),
1986 XML_L("parsing finished"),
1987 XML_L("cannot suspend in external parameter entity"),
1988 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
1989 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
1990 XML_L("prefix must not be bound to one of the reserved namespace names")
1992 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1993 return message[code];
1994 return NULL;
1997 const XML_LChar * XMLCALL
1998 XML_ExpatVersion(void) {
2000 /* V1 is used to string-ize the version number. However, it would
2001 string-ize the actual version macro *names* unless we get them
2002 substituted before being passed to V1. CPP is defined to expand
2003 a macro, then rescan for more expansions. Thus, we use V2 to expand
2004 the version macros, then CPP will expand the resulting V1() macro
2005 with the correct numerals. */
2006 /* ### I'm assuming cpp is portable in this respect... */
2008 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2009 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2011 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2013 #undef V1
2014 #undef V2
2017 XML_Expat_Version XMLCALL
2018 XML_ExpatVersionInfo(void)
2020 XML_Expat_Version version;
2022 version.major = XML_MAJOR_VERSION;
2023 version.minor = XML_MINOR_VERSION;
2024 version.micro = XML_MICRO_VERSION;
2026 return version;
2029 const XML_Feature * XMLCALL
2030 XML_GetFeatureList(void)
2032 static const XML_Feature features[] = {
2033 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2034 sizeof(XML_Char)},
2035 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2036 sizeof(XML_LChar)},
2037 #ifdef XML_UNICODE
2038 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2039 #endif
2040 #ifdef XML_UNICODE_WCHAR_T
2041 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2042 #endif
2043 #ifdef XML_DTD
2044 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2045 #endif
2046 #ifdef XML_CONTEXT_BYTES
2047 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2048 XML_CONTEXT_BYTES},
2049 #endif
2050 #ifdef XML_MIN_SIZE
2051 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2052 #endif
2053 #ifdef XML_NS
2054 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2055 #endif
2056 #ifdef XML_LARGE_SIZE
2057 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2058 #endif
2059 #ifdef XML_ATTR_INFO
2060 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2061 #endif
2062 {XML_FEATURE_END, NULL, 0}
2065 return features;
2068 /* Initially tag->rawName always points into the parse buffer;
2069 for those TAG instances opened while the current parse buffer was
2070 processed, and not yet closed, we need to store tag->rawName in a more
2071 permanent location, since the parse buffer is about to be discarded.
2073 static XML_Bool
2074 storeRawNames(XML_Parser parser)
2076 TAG *tag = tagStack;
2077 while (tag) {
2078 int bufSize;
2079 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2080 char *rawNameBuf = tag->buf + nameLen;
2081 /* Stop if already stored. Since tagStack is a stack, we can stop
2082 at the first entry that has already been copied; everything
2083 below it in the stack is already been accounted for in a
2084 previous call to this function.
2086 if (tag->rawName == rawNameBuf)
2087 break;
2088 /* For re-use purposes we need to ensure that the
2089 size of tag->buf is a multiple of sizeof(XML_Char).
2091 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2092 if (bufSize > tag->bufEnd - tag->buf) {
2093 char *temp = (char *)REALLOC(tag->buf, bufSize);
2094 if (temp == NULL)
2095 return XML_FALSE;
2096 /* if tag->name.str points to tag->buf (only when namespace
2097 processing is off) then we have to update it
2099 if (tag->name.str == (XML_Char *)tag->buf)
2100 tag->name.str = (XML_Char *)temp;
2101 /* if tag->name.localPart is set (when namespace processing is on)
2102 then update it as well, since it will always point into tag->buf
2104 if (tag->name.localPart)
2105 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2106 (XML_Char *)tag->buf);
2107 tag->buf = temp;
2108 tag->bufEnd = temp + bufSize;
2109 rawNameBuf = temp + nameLen;
2111 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2112 tag->rawName = rawNameBuf;
2113 tag = tag->parent;
2115 return XML_TRUE;
2118 static enum XML_Error PTRCALL
2119 contentProcessor(XML_Parser parser,
2120 const char *start,
2121 const char *end,
2122 const char **endPtr)
2124 enum XML_Error result = doContent(parser, 0, encoding, start, end,
2125 endPtr, (XML_Bool)!ps_finalBuffer);
2126 if (result == XML_ERROR_NONE) {
2127 if (!storeRawNames(parser))
2128 return XML_ERROR_NO_MEMORY;
2130 return result;
2133 static enum XML_Error PTRCALL
2134 externalEntityInitProcessor(XML_Parser parser,
2135 const char *start,
2136 const char *end,
2137 const char **endPtr)
2139 enum XML_Error result = initializeEncoding(parser);
2140 if (result != XML_ERROR_NONE)
2141 return result;
2142 processor = externalEntityInitProcessor2;
2143 return externalEntityInitProcessor2(parser, start, end, endPtr);
2146 static enum XML_Error PTRCALL
2147 externalEntityInitProcessor2(XML_Parser parser,
2148 const char *start,
2149 const char *end,
2150 const char **endPtr)
2152 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2153 int tok = XmlContentTok(encoding, start, end, &next);
2154 switch (tok) {
2155 case XML_TOK_BOM:
2156 /* If we are at the end of the buffer, this would cause the next stage,
2157 i.e. externalEntityInitProcessor3, to pass control directly to
2158 doContent (by detecting XML_TOK_NONE) without processing any xml text
2159 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2161 if (next == end && !ps_finalBuffer) {
2162 *endPtr = next;
2163 return XML_ERROR_NONE;
2165 start = next;
2166 break;
2167 case XML_TOK_PARTIAL:
2168 if (!ps_finalBuffer) {
2169 *endPtr = start;
2170 return XML_ERROR_NONE;
2172 eventPtr = start;
2173 return XML_ERROR_UNCLOSED_TOKEN;
2174 case XML_TOK_PARTIAL_CHAR:
2175 if (!ps_finalBuffer) {
2176 *endPtr = start;
2177 return XML_ERROR_NONE;
2179 eventPtr = start;
2180 return XML_ERROR_PARTIAL_CHAR;
2182 processor = externalEntityInitProcessor3;
2183 return externalEntityInitProcessor3(parser, start, end, endPtr);
2186 static enum XML_Error PTRCALL
2187 externalEntityInitProcessor3(XML_Parser parser,
2188 const char *start,
2189 const char *end,
2190 const char **endPtr)
2192 int tok;
2193 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2194 eventPtr = start;
2195 tok = XmlContentTok(encoding, start, end, &next);
2196 eventEndPtr = next;
2198 switch (tok) {
2199 case XML_TOK_XML_DECL:
2201 enum XML_Error result;
2202 result = processXmlDecl(parser, 1, start, next);
2203 if (result != XML_ERROR_NONE)
2204 return result;
2205 switch (ps_parsing) {
2206 case XML_SUSPENDED:
2207 *endPtr = next;
2208 return XML_ERROR_NONE;
2209 case XML_FINISHED:
2210 return XML_ERROR_ABORTED;
2211 default:
2212 start = next;
2215 break;
2216 case XML_TOK_PARTIAL:
2217 if (!ps_finalBuffer) {
2218 *endPtr = start;
2219 return XML_ERROR_NONE;
2221 return XML_ERROR_UNCLOSED_TOKEN;
2222 case XML_TOK_PARTIAL_CHAR:
2223 if (!ps_finalBuffer) {
2224 *endPtr = start;
2225 return XML_ERROR_NONE;
2227 return XML_ERROR_PARTIAL_CHAR;
2229 processor = externalEntityContentProcessor;
2230 tagLevel = 1;
2231 return externalEntityContentProcessor(parser, start, end, endPtr);
2234 static enum XML_Error PTRCALL
2235 externalEntityContentProcessor(XML_Parser parser,
2236 const char *start,
2237 const char *end,
2238 const char **endPtr)
2240 enum XML_Error result = doContent(parser, 1, encoding, start, end,
2241 endPtr, (XML_Bool)!ps_finalBuffer);
2242 if (result == XML_ERROR_NONE) {
2243 if (!storeRawNames(parser))
2244 return XML_ERROR_NO_MEMORY;
2246 return result;
2249 static enum XML_Error
2250 doContent(XML_Parser parser,
2251 int startTagLevel,
2252 const ENCODING *enc,
2253 const char *s,
2254 const char *end,
2255 const char **nextPtr,
2256 XML_Bool haveMore)
2258 /* save one level of indirection */
2259 DTD * const dtd = _dtd;
2261 const char **eventPP;
2262 const char **eventEndPP;
2263 if (enc == encoding) {
2264 eventPP = &eventPtr;
2265 eventEndPP = &eventEndPtr;
2267 else {
2268 eventPP = &(openInternalEntities->internalEventPtr);
2269 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2271 *eventPP = s;
2273 for (;;) {
2274 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2275 int tok = XmlContentTok(enc, s, end, &next);
2276 *eventEndPP = next;
2277 switch (tok) {
2278 case XML_TOK_TRAILING_CR:
2279 if (haveMore) {
2280 *nextPtr = s;
2281 return XML_ERROR_NONE;
2283 *eventEndPP = end;
2284 if (characterDataHandler) {
2285 XML_Char c = 0xA;
2286 characterDataHandler(handlerArg, &c, 1);
2288 else if (defaultHandler)
2289 reportDefault(parser, enc, s, end);
2290 /* We are at the end of the final buffer, should we check for
2291 XML_SUSPENDED, XML_FINISHED?
2293 if (startTagLevel == 0)
2294 return XML_ERROR_NO_ELEMENTS;
2295 if (tagLevel != startTagLevel)
2296 return XML_ERROR_ASYNC_ENTITY;
2297 *nextPtr = end;
2298 return XML_ERROR_NONE;
2299 case XML_TOK_NONE:
2300 if (haveMore) {
2301 *nextPtr = s;
2302 return XML_ERROR_NONE;
2304 if (startTagLevel > 0) {
2305 if (tagLevel != startTagLevel)
2306 return XML_ERROR_ASYNC_ENTITY;
2307 *nextPtr = s;
2308 return XML_ERROR_NONE;
2310 return XML_ERROR_NO_ELEMENTS;
2311 case XML_TOK_INVALID:
2312 *eventPP = next;
2313 return XML_ERROR_INVALID_TOKEN;
2314 case XML_TOK_PARTIAL:
2315 if (haveMore) {
2316 *nextPtr = s;
2317 return XML_ERROR_NONE;
2319 return XML_ERROR_UNCLOSED_TOKEN;
2320 case XML_TOK_PARTIAL_CHAR:
2321 if (haveMore) {
2322 *nextPtr = s;
2323 return XML_ERROR_NONE;
2325 return XML_ERROR_PARTIAL_CHAR;
2326 case XML_TOK_ENTITY_REF:
2328 const XML_Char *name;
2329 ENTITY *entity;
2330 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2331 s + enc->minBytesPerChar,
2332 next - enc->minBytesPerChar);
2333 if (ch) {
2334 if (characterDataHandler)
2335 characterDataHandler(handlerArg, &ch, 1);
2336 else if (defaultHandler)
2337 reportDefault(parser, enc, s, next);
2338 break;
2340 name = poolStoreString(&dtd->pool, enc,
2341 s + enc->minBytesPerChar,
2342 next - enc->minBytesPerChar);
2343 if (!name)
2344 return XML_ERROR_NO_MEMORY;
2345 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2346 poolDiscard(&dtd->pool);
2347 /* First, determine if a check for an existing declaration is needed;
2348 if yes, check that the entity exists, and that it is internal,
2349 otherwise call the skipped entity or default handler.
2351 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2352 if (!entity)
2353 return XML_ERROR_UNDEFINED_ENTITY;
2354 else if (!entity->is_internal)
2355 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2357 else if (!entity) {
2358 if (skippedEntityHandler)
2359 skippedEntityHandler(handlerArg, name, 0);
2360 else if (defaultHandler)
2361 reportDefault(parser, enc, s, next);
2362 break;
2364 if (entity->open)
2365 return XML_ERROR_RECURSIVE_ENTITY_REF;
2366 if (entity->notation)
2367 return XML_ERROR_BINARY_ENTITY_REF;
2368 if (entity->textPtr) {
2369 enum XML_Error result;
2370 if (!defaultExpandInternalEntities) {
2371 if (skippedEntityHandler)
2372 skippedEntityHandler(handlerArg, entity->name, 0);
2373 else if (defaultHandler)
2374 reportDefault(parser, enc, s, next);
2375 break;
2377 result = processInternalEntity(parser, entity, XML_FALSE);
2378 if (result != XML_ERROR_NONE)
2379 return result;
2381 else if (externalEntityRefHandler) {
2382 const XML_Char *context;
2383 entity->open = XML_TRUE;
2384 context = getContext(parser);
2385 entity->open = XML_FALSE;
2386 if (!context)
2387 return XML_ERROR_NO_MEMORY;
2388 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2389 context,
2390 entity->base,
2391 entity->systemId,
2392 entity->publicId))
2393 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2394 poolDiscard(&tempPool);
2396 else if (defaultHandler)
2397 reportDefault(parser, enc, s, next);
2398 break;
2400 case XML_TOK_START_TAG_NO_ATTS:
2401 /* fall through */
2402 case XML_TOK_START_TAG_WITH_ATTS:
2404 TAG *tag;
2405 enum XML_Error result;
2406 XML_Char *toPtr;
2407 if (freeTagList) {
2408 tag = freeTagList;
2409 freeTagList = freeTagList->parent;
2411 else {
2412 tag = (TAG *)MALLOC(sizeof(TAG));
2413 if (!tag)
2414 return XML_ERROR_NO_MEMORY;
2415 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2416 if (!tag->buf) {
2417 FREE(tag);
2418 return XML_ERROR_NO_MEMORY;
2420 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2422 tag->bindings = NULL;
2423 tag->parent = tagStack;
2424 tagStack = tag;
2425 tag->name.localPart = NULL;
2426 tag->name.prefix = NULL;
2427 tag->rawName = s + enc->minBytesPerChar;
2428 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2429 ++tagLevel;
2431 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2432 const char *fromPtr = tag->rawName;
2433 toPtr = (XML_Char *)tag->buf;
2434 for (;;) {
2435 int bufSize;
2436 int convLen;
2437 XmlConvert(enc,
2438 &fromPtr, rawNameEnd,
2439 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2440 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2441 if (fromPtr == rawNameEnd) {
2442 tag->name.strLen = convLen;
2443 break;
2445 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2447 char *temp = (char *)REALLOC(tag->buf, bufSize);
2448 if (temp == NULL)
2449 return XML_ERROR_NO_MEMORY;
2450 tag->buf = temp;
2451 tag->bufEnd = temp + bufSize;
2452 toPtr = (XML_Char *)temp + convLen;
2456 tag->name.str = (XML_Char *)tag->buf;
2457 *toPtr = XML_T('\0');
2458 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2459 if (result)
2460 return result;
2461 if (startElementHandler)
2462 startElementHandler(handlerArg, tag->name.str,
2463 (const XML_Char **)atts);
2464 else if (defaultHandler)
2465 reportDefault(parser, enc, s, next);
2466 poolClear(&tempPool);
2467 break;
2469 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2470 /* fall through */
2471 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2473 const char *rawName = s + enc->minBytesPerChar;
2474 enum XML_Error result;
2475 BINDING *bindings = NULL;
2476 XML_Bool noElmHandlers = XML_TRUE;
2477 TAG_NAME name;
2478 name.str = poolStoreString(&tempPool, enc, rawName,
2479 rawName + XmlNameLength(enc, rawName));
2480 if (!name.str)
2481 return XML_ERROR_NO_MEMORY;
2482 poolFinish(&tempPool);
2483 result = storeAtts(parser, enc, s, &name, &bindings);
2484 if (result)
2485 return result;
2486 poolFinish(&tempPool);
2487 if (startElementHandler) {
2488 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2489 noElmHandlers = XML_FALSE;
2491 if (endElementHandler) {
2492 if (startElementHandler)
2493 *eventPP = *eventEndPP;
2494 endElementHandler(handlerArg, name.str);
2495 noElmHandlers = XML_FALSE;
2497 if (noElmHandlers && defaultHandler)
2498 reportDefault(parser, enc, s, next);
2499 poolClear(&tempPool);
2500 while (bindings) {
2501 BINDING *b = bindings;
2502 if (endNamespaceDeclHandler)
2503 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2504 bindings = bindings->nextTagBinding;
2505 b->nextTagBinding = freeBindingList;
2506 freeBindingList = b;
2507 b->prefix->binding = b->prevPrefixBinding;
2510 if (tagLevel == 0)
2511 return epilogProcessor(parser, next, end, nextPtr);
2512 break;
2513 case XML_TOK_END_TAG:
2514 if (tagLevel == startTagLevel)
2515 return XML_ERROR_ASYNC_ENTITY;
2516 else {
2517 int len;
2518 const char *rawName;
2519 TAG *tag = tagStack;
2520 tagStack = tag->parent;
2521 tag->parent = freeTagList;
2522 freeTagList = tag;
2523 rawName = s + enc->minBytesPerChar*2;
2524 len = XmlNameLength(enc, rawName);
2525 if (len != tag->rawNameLength
2526 || memcmp(tag->rawName, rawName, len) != 0) {
2527 *eventPP = rawName;
2528 return XML_ERROR_TAG_MISMATCH;
2530 --tagLevel;
2531 if (endElementHandler) {
2532 const XML_Char *localPart;
2533 const XML_Char *prefix;
2534 XML_Char *uri;
2535 localPart = tag->name.localPart;
2536 if (ns && localPart) {
2537 /* localPart and prefix may have been overwritten in
2538 tag->name.str, since this points to the binding->uri
2539 buffer which gets re-used; so we have to add them again
2541 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2542 /* don't need to check for space - already done in storeAtts() */
2543 while (*localPart) *uri++ = *localPart++;
2544 prefix = (XML_Char *)tag->name.prefix;
2545 if (ns_triplets && prefix) {
2546 *uri++ = namespaceSeparator;
2547 while (*prefix) *uri++ = *prefix++;
2549 *uri = XML_T('\0');
2551 endElementHandler(handlerArg, tag->name.str);
2553 else if (defaultHandler)
2554 reportDefault(parser, enc, s, next);
2555 while (tag->bindings) {
2556 BINDING *b = tag->bindings;
2557 if (endNamespaceDeclHandler)
2558 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2559 tag->bindings = tag->bindings->nextTagBinding;
2560 b->nextTagBinding = freeBindingList;
2561 freeBindingList = b;
2562 b->prefix->binding = b->prevPrefixBinding;
2564 if (tagLevel == 0)
2565 return epilogProcessor(parser, next, end, nextPtr);
2567 break;
2568 case XML_TOK_CHAR_REF:
2570 int n = XmlCharRefNumber(enc, s);
2571 if (n < 0)
2572 return XML_ERROR_BAD_CHAR_REF;
2573 if (characterDataHandler) {
2574 XML_Char buf[XML_ENCODE_MAX];
2575 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2577 else if (defaultHandler)
2578 reportDefault(parser, enc, s, next);
2580 break;
2581 case XML_TOK_XML_DECL:
2582 return XML_ERROR_MISPLACED_XML_PI;
2583 case XML_TOK_DATA_NEWLINE:
2584 if (characterDataHandler) {
2585 XML_Char c = 0xA;
2586 characterDataHandler(handlerArg, &c, 1);
2588 else if (defaultHandler)
2589 reportDefault(parser, enc, s, next);
2590 break;
2591 case XML_TOK_CDATA_SECT_OPEN:
2593 enum XML_Error result;
2594 if (startCdataSectionHandler)
2595 startCdataSectionHandler(handlerArg);
2596 #if 0
2597 /* Suppose you doing a transformation on a document that involves
2598 changing only the character data. You set up a defaultHandler
2599 and a characterDataHandler. The defaultHandler simply copies
2600 characters through. The characterDataHandler does the
2601 transformation and writes the characters out escaping them as
2602 necessary. This case will fail to work if we leave out the
2603 following two lines (because & and < inside CDATA sections will
2604 be incorrectly escaped).
2606 However, now we have a start/endCdataSectionHandler, so it seems
2607 easier to let the user deal with this.
2609 else if (characterDataHandler)
2610 characterDataHandler(handlerArg, dataBuf, 0);
2611 #endif
2612 else if (defaultHandler)
2613 reportDefault(parser, enc, s, next);
2614 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2615 if (result != XML_ERROR_NONE)
2616 return result;
2617 else if (!next) {
2618 processor = cdataSectionProcessor;
2619 return result;
2622 break;
2623 case XML_TOK_TRAILING_RSQB:
2624 if (haveMore) {
2625 *nextPtr = s;
2626 return XML_ERROR_NONE;
2628 if (characterDataHandler) {
2629 if (MUST_CONVERT(enc, s)) {
2630 ICHAR *dataPtr = (ICHAR *)dataBuf;
2631 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2632 characterDataHandler(handlerArg, dataBuf,
2633 (int)(dataPtr - (ICHAR *)dataBuf));
2635 else
2636 characterDataHandler(handlerArg,
2637 (XML_Char *)s,
2638 (int)((XML_Char *)end - (XML_Char *)s));
2640 else if (defaultHandler)
2641 reportDefault(parser, enc, s, end);
2642 /* We are at the end of the final buffer, should we check for
2643 XML_SUSPENDED, XML_FINISHED?
2645 if (startTagLevel == 0) {
2646 *eventPP = end;
2647 return XML_ERROR_NO_ELEMENTS;
2649 if (tagLevel != startTagLevel) {
2650 *eventPP = end;
2651 return XML_ERROR_ASYNC_ENTITY;
2653 *nextPtr = end;
2654 return XML_ERROR_NONE;
2655 case XML_TOK_DATA_CHARS:
2657 XML_CharacterDataHandler charDataHandler = characterDataHandler;
2658 if (charDataHandler) {
2659 if (MUST_CONVERT(enc, s)) {
2660 for (;;) {
2661 ICHAR *dataPtr = (ICHAR *)dataBuf;
2662 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
2663 *eventEndPP = s;
2664 charDataHandler(handlerArg, dataBuf,
2665 (int)(dataPtr - (ICHAR *)dataBuf));
2666 if (s == next)
2667 break;
2668 *eventPP = s;
2671 else
2672 charDataHandler(handlerArg,
2673 (XML_Char *)s,
2674 (int)((XML_Char *)next - (XML_Char *)s));
2676 else if (defaultHandler)
2677 reportDefault(parser, enc, s, next);
2679 break;
2680 case XML_TOK_PI:
2681 if (!reportProcessingInstruction(parser, enc, s, next))
2682 return XML_ERROR_NO_MEMORY;
2683 break;
2684 case XML_TOK_COMMENT:
2685 if (!reportComment(parser, enc, s, next))
2686 return XML_ERROR_NO_MEMORY;
2687 break;
2688 default:
2689 if (defaultHandler)
2690 reportDefault(parser, enc, s, next);
2691 break;
2693 *eventPP = s = next;
2694 switch (ps_parsing) {
2695 case XML_SUSPENDED:
2696 *nextPtr = next;
2697 return XML_ERROR_NONE;
2698 case XML_FINISHED:
2699 return XML_ERROR_ABORTED;
2700 default: ;
2703 /* not reached */
2706 /* Precondition: all arguments must be non-NULL;
2707 Purpose:
2708 - normalize attributes
2709 - check attributes for well-formedness
2710 - generate namespace aware attribute names (URI, prefix)
2711 - build list of attributes for startElementHandler
2712 - default attributes
2713 - process namespace declarations (check and report them)
2714 - generate namespace aware element name (URI, prefix)
2716 static enum XML_Error
2717 storeAtts(XML_Parser parser, const ENCODING *enc,
2718 const char *attStr, TAG_NAME *tagNamePtr,
2719 BINDING **bindingsPtr)
2721 DTD * const dtd = _dtd; /* save one level of indirection */
2722 ELEMENT_TYPE *elementType;
2723 int nDefaultAtts;
2724 const XML_Char **appAtts; /* the attribute list for the application */
2725 int attIndex = 0;
2726 int prefixLen;
2727 int i;
2728 int n;
2729 XML_Char *uri;
2730 int nPrefixes = 0;
2731 BINDING *binding;
2732 const XML_Char *localPart;
2734 /* lookup the element type name */
2735 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
2736 if (!elementType) {
2737 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
2738 if (!name)
2739 return XML_ERROR_NO_MEMORY;
2740 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
2741 sizeof(ELEMENT_TYPE));
2742 if (!elementType)
2743 return XML_ERROR_NO_MEMORY;
2744 if (ns && !setElementTypePrefix(parser, elementType))
2745 return XML_ERROR_NO_MEMORY;
2747 nDefaultAtts = elementType->nDefaultAtts;
2749 /* get the attributes from the tokenizer */
2750 n = XmlGetAttributes(enc, attStr, attsSize, atts);
2751 if (n + nDefaultAtts > attsSize) {
2752 int oldAttsSize = attsSize;
2753 ATTRIBUTE *temp;
2754 #ifdef XML_ATTR_INFO
2755 XML_AttrInfo *temp2;
2756 #endif
2757 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
2758 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
2759 if (temp == NULL)
2760 return XML_ERROR_NO_MEMORY;
2761 atts = temp;
2762 #ifdef XML_ATTR_INFO
2763 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
2764 if (temp2 == NULL)
2765 return XML_ERROR_NO_MEMORY;
2766 attInfo = temp2;
2767 #endif
2768 if (n > oldAttsSize)
2769 XmlGetAttributes(enc, attStr, n, atts);
2772 appAtts = (const XML_Char **)atts;
2773 for (i = 0; i < n; i++) {
2774 ATTRIBUTE *currAtt = &atts[i];
2775 #ifdef XML_ATTR_INFO
2776 XML_AttrInfo *currAttInfo = &attInfo[i];
2777 #endif
2778 /* add the name and value to the attribute list */
2779 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
2780 currAtt->name
2781 + XmlNameLength(enc, currAtt->name));
2782 if (!attId)
2783 return XML_ERROR_NO_MEMORY;
2784 #ifdef XML_ATTR_INFO
2785 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
2786 currAttInfo->nameEnd = currAttInfo->nameStart +
2787 XmlNameLength(enc, currAtt->name);
2788 currAttInfo->valueStart = parseEndByteIndex -
2789 (parseEndPtr - currAtt->valuePtr);
2790 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
2791 #endif
2792 /* Detect duplicate attributes by their QNames. This does not work when
2793 namespace processing is turned on and different prefixes for the same
2794 namespace are used. For this case we have a check further down.
2796 if ((attId->name)[-1]) {
2797 if (enc == encoding)
2798 eventPtr = atts[i].name;
2799 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2801 (attId->name)[-1] = 1;
2802 appAtts[attIndex++] = attId->name;
2803 if (!atts[i].normalized) {
2804 enum XML_Error result;
2805 XML_Bool isCdata = XML_TRUE;
2807 /* figure out whether declared as other than CDATA */
2808 if (attId->maybeTokenized) {
2809 int j;
2810 for (j = 0; j < nDefaultAtts; j++) {
2811 if (attId == elementType->defaultAtts[j].id) {
2812 isCdata = elementType->defaultAtts[j].isCdata;
2813 break;
2818 /* normalize the attribute value */
2819 result = storeAttributeValue(parser, enc, isCdata,
2820 atts[i].valuePtr, atts[i].valueEnd,
2821 &tempPool);
2822 if (result)
2823 return result;
2824 appAtts[attIndex] = poolStart(&tempPool);
2825 poolFinish(&tempPool);
2827 else {
2828 /* the value did not need normalizing */
2829 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
2830 atts[i].valueEnd);
2831 if (appAtts[attIndex] == 0)
2832 return XML_ERROR_NO_MEMORY;
2833 poolFinish(&tempPool);
2835 /* handle prefixed attribute names */
2836 if (attId->prefix) {
2837 if (attId->xmlns) {
2838 /* deal with namespace declarations here */
2839 enum XML_Error result = addBinding(parser, attId->prefix, attId,
2840 appAtts[attIndex], bindingsPtr);
2841 if (result)
2842 return result;
2843 --attIndex;
2845 else {
2846 /* deal with other prefixed names later */
2847 attIndex++;
2848 nPrefixes++;
2849 (attId->name)[-1] = 2;
2852 else
2853 attIndex++;
2856 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
2857 nSpecifiedAtts = attIndex;
2858 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
2859 for (i = 0; i < attIndex; i += 2)
2860 if (appAtts[i] == elementType->idAtt->name) {
2861 idAttIndex = i;
2862 break;
2865 else
2866 idAttIndex = -1;
2868 /* do attribute defaulting */
2869 for (i = 0; i < nDefaultAtts; i++) {
2870 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
2871 if (!(da->id->name)[-1] && da->value) {
2872 if (da->id->prefix) {
2873 if (da->id->xmlns) {
2874 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
2875 da->value, bindingsPtr);
2876 if (result)
2877 return result;
2879 else {
2880 (da->id->name)[-1] = 2;
2881 nPrefixes++;
2882 appAtts[attIndex++] = da->id->name;
2883 appAtts[attIndex++] = da->value;
2886 else {
2887 (da->id->name)[-1] = 1;
2888 appAtts[attIndex++] = da->id->name;
2889 appAtts[attIndex++] = da->value;
2893 appAtts[attIndex] = 0;
2895 /* expand prefixed attribute names, check for duplicates,
2896 and clear flags that say whether attributes were specified */
2897 i = 0;
2898 if (nPrefixes) {
2899 int j; /* hash table index */
2900 unsigned long version = nsAttsVersion;
2901 int nsAttsSize = (int)1 << nsAttsPower;
2902 /* size of hash table must be at least 2 * (# of prefixed attributes) */
2903 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
2904 NS_ATT *temp;
2905 /* hash table size must also be a power of 2 and >= 8 */
2906 while (nPrefixes >> nsAttsPower++);
2907 if (nsAttsPower < 3)
2908 nsAttsPower = 3;
2909 nsAttsSize = (int)1 << nsAttsPower;
2910 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
2911 if (!temp)
2912 return XML_ERROR_NO_MEMORY;
2913 nsAtts = temp;
2914 version = 0; /* force re-initialization of nsAtts hash table */
2916 /* using a version flag saves us from initializing nsAtts every time */
2917 if (!version) { /* initialize version flags when version wraps around */
2918 version = INIT_ATTS_VERSION;
2919 for (j = nsAttsSize; j != 0; )
2920 nsAtts[--j].version = version;
2922 nsAttsVersion = --version;
2924 /* expand prefixed names and check for duplicates */
2925 for (; i < attIndex; i += 2) {
2926 const XML_Char *s = appAtts[i];
2927 if (s[-1] == 2) { /* prefixed */
2928 ATTRIBUTE_ID *id;
2929 const BINDING *b;
2930 unsigned long uriHash = hash_secret_salt;
2931 ((XML_Char *)s)[-1] = 0; /* clear flag */
2932 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
2933 b = id->prefix->binding;
2934 if (!b)
2935 return XML_ERROR_UNBOUND_PREFIX;
2937 /* as we expand the name we also calculate its hash value */
2938 for (j = 0; j < b->uriLen; j++) {
2939 const XML_Char c = b->uri[j];
2940 if (!poolAppendChar(&tempPool, c))
2941 return XML_ERROR_NO_MEMORY;
2942 uriHash = CHAR_HASH(uriHash, c);
2944 while (*s++ != XML_T(ASCII_COLON))
2946 do { /* copies null terminator */
2947 const XML_Char c = *s;
2948 if (!poolAppendChar(&tempPool, *s))
2949 return XML_ERROR_NO_MEMORY;
2950 uriHash = CHAR_HASH(uriHash, c);
2951 } while (*s++);
2953 { /* Check hash table for duplicate of expanded name (uriName).
2954 Derived from code in lookup(parser, HASH_TABLE *table, ...).
2956 unsigned char step = 0;
2957 unsigned long mask = nsAttsSize - 1;
2958 j = uriHash & mask; /* index into hash table */
2959 while (nsAtts[j].version == version) {
2960 /* for speed we compare stored hash values first */
2961 if (uriHash == nsAtts[j].hash) {
2962 const XML_Char *s1 = poolStart(&tempPool);
2963 const XML_Char *s2 = nsAtts[j].uriName;
2964 /* s1 is null terminated, but not s2 */
2965 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
2966 if (*s1 == 0)
2967 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2969 if (!step)
2970 step = PROBE_STEP(uriHash, mask, nsAttsPower);
2971 j < step ? (j += nsAttsSize - step) : (j -= step);
2975 if (ns_triplets) { /* append namespace separator and prefix */
2976 tempPool.ptr[-1] = namespaceSeparator;
2977 s = b->prefix->name;
2978 do {
2979 if (!poolAppendChar(&tempPool, *s))
2980 return XML_ERROR_NO_MEMORY;
2981 } while (*s++);
2984 /* store expanded name in attribute list */
2985 s = poolStart(&tempPool);
2986 poolFinish(&tempPool);
2987 appAtts[i] = s;
2989 /* fill empty slot with new version, uriName and hash value */
2990 nsAtts[j].version = version;
2991 nsAtts[j].hash = uriHash;
2992 nsAtts[j].uriName = s;
2994 if (!--nPrefixes) {
2995 i += 2;
2996 break;
2999 else /* not prefixed */
3000 ((XML_Char *)s)[-1] = 0; /* clear flag */
3003 /* clear flags for the remaining attributes */
3004 for (; i < attIndex; i += 2)
3005 ((XML_Char *)(appAtts[i]))[-1] = 0;
3006 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3007 binding->attId->name[-1] = 0;
3009 if (!ns)
3010 return XML_ERROR_NONE;
3012 /* expand the element type name */
3013 if (elementType->prefix) {
3014 binding = elementType->prefix->binding;
3015 if (!binding)
3016 return XML_ERROR_UNBOUND_PREFIX;
3017 localPart = tagNamePtr->str;
3018 while (*localPart++ != XML_T(ASCII_COLON))
3021 else if (dtd->defaultPrefix.binding) {
3022 binding = dtd->defaultPrefix.binding;
3023 localPart = tagNamePtr->str;
3025 else
3026 return XML_ERROR_NONE;
3027 prefixLen = 0;
3028 if (ns_triplets && binding->prefix->name) {
3029 for (; binding->prefix->name[prefixLen++];)
3030 ; /* prefixLen includes null terminator */
3032 tagNamePtr->localPart = localPart;
3033 tagNamePtr->uriLen = binding->uriLen;
3034 tagNamePtr->prefix = binding->prefix->name;
3035 tagNamePtr->prefixLen = prefixLen;
3036 for (i = 0; localPart[i++];)
3037 ; /* i includes null terminator */
3038 n = i + binding->uriLen + prefixLen;
3039 if (n > binding->uriAlloc) {
3040 TAG *p;
3041 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
3042 if (!uri)
3043 return XML_ERROR_NO_MEMORY;
3044 binding->uriAlloc = n + EXPAND_SPARE;
3045 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3046 for (p = tagStack; p; p = p->parent)
3047 if (p->name.str == binding->uri)
3048 p->name.str = uri;
3049 FREE(binding->uri);
3050 binding->uri = uri;
3052 /* if namespaceSeparator != '\0' then uri includes it already */
3053 uri = binding->uri + binding->uriLen;
3054 memcpy(uri, localPart, i * sizeof(XML_Char));
3055 /* we always have a namespace separator between localPart and prefix */
3056 if (prefixLen) {
3057 uri += i - 1;
3058 *uri = namespaceSeparator; /* replace null terminator */
3059 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3061 tagNamePtr->str = binding->uri;
3062 return XML_ERROR_NONE;
3065 /* addBinding() overwrites the value of prefix->binding without checking.
3066 Therefore one must keep track of the old value outside of addBinding().
3068 static enum XML_Error
3069 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3070 const XML_Char *uri, BINDING **bindingsPtr)
3072 static const XML_Char xmlNamespace[] = {
3073 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3074 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3075 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3076 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3077 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3078 ASCII_e, '\0'
3080 static const int xmlLen =
3081 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3082 static const XML_Char xmlnsNamespace[] = {
3083 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3084 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3085 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3086 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3087 ASCII_SLASH, '\0'
3089 static const int xmlnsLen =
3090 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3092 XML_Bool mustBeXML = XML_FALSE;
3093 XML_Bool isXML = XML_TRUE;
3094 XML_Bool isXMLNS = XML_TRUE;
3096 BINDING *b;
3097 int len;
3099 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3100 if (*uri == XML_T('\0') && prefix->name)
3101 return XML_ERROR_UNDECLARING_PREFIX;
3103 if (prefix->name
3104 && prefix->name[0] == XML_T(ASCII_x)
3105 && prefix->name[1] == XML_T(ASCII_m)
3106 && prefix->name[2] == XML_T(ASCII_l)) {
3108 /* Not allowed to bind xmlns */
3109 if (prefix->name[3] == XML_T(ASCII_n)
3110 && prefix->name[4] == XML_T(ASCII_s)
3111 && prefix->name[5] == XML_T('\0'))
3112 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3114 if (prefix->name[3] == XML_T('\0'))
3115 mustBeXML = XML_TRUE;
3118 for (len = 0; uri[len]; len++) {
3119 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3120 isXML = XML_FALSE;
3122 if (!mustBeXML && isXMLNS
3123 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3124 isXMLNS = XML_FALSE;
3126 isXML = isXML && len == xmlLen;
3127 isXMLNS = isXMLNS && len == xmlnsLen;
3129 if (mustBeXML != isXML)
3130 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3131 : XML_ERROR_RESERVED_NAMESPACE_URI;
3133 if (isXMLNS)
3134 return XML_ERROR_RESERVED_NAMESPACE_URI;
3136 if (namespaceSeparator)
3137 len++;
3138 if (freeBindingList) {
3139 b = freeBindingList;
3140 if (len > b->uriAlloc) {
3141 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3142 sizeof(XML_Char) * (len + EXPAND_SPARE));
3143 if (temp == NULL)
3144 return XML_ERROR_NO_MEMORY;
3145 b->uri = temp;
3146 b->uriAlloc = len + EXPAND_SPARE;
3148 freeBindingList = b->nextTagBinding;
3150 else {
3151 b = (BINDING *)MALLOC(sizeof(BINDING));
3152 if (!b)
3153 return XML_ERROR_NO_MEMORY;
3154 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
3155 if (!b->uri) {
3156 FREE(b);
3157 return XML_ERROR_NO_MEMORY;
3159 b->uriAlloc = len + EXPAND_SPARE;
3161 b->uriLen = len;
3162 memcpy(b->uri, uri, len * sizeof(XML_Char));
3163 if (namespaceSeparator)
3164 b->uri[len - 1] = namespaceSeparator;
3165 b->prefix = prefix;
3166 b->attId = attId;
3167 b->prevPrefixBinding = prefix->binding;
3168 /* NULL binding when default namespace undeclared */
3169 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3170 prefix->binding = NULL;
3171 else
3172 prefix->binding = b;
3173 b->nextTagBinding = *bindingsPtr;
3174 *bindingsPtr = b;
3175 /* if attId == NULL then we are not starting a namespace scope */
3176 if (attId && startNamespaceDeclHandler)
3177 startNamespaceDeclHandler(handlerArg, prefix->name,
3178 prefix->binding ? uri : 0);
3179 return XML_ERROR_NONE;
3182 /* The idea here is to avoid using stack for each CDATA section when
3183 the whole file is parsed with one call.
3185 static enum XML_Error PTRCALL
3186 cdataSectionProcessor(XML_Parser parser,
3187 const char *start,
3188 const char *end,
3189 const char **endPtr)
3191 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
3192 endPtr, (XML_Bool)!ps_finalBuffer);
3193 if (result != XML_ERROR_NONE)
3194 return result;
3195 if (start) {
3196 if (parentParser) { /* we are parsing an external entity */
3197 processor = externalEntityContentProcessor;
3198 return externalEntityContentProcessor(parser, start, end, endPtr);
3200 else {
3201 processor = contentProcessor;
3202 return contentProcessor(parser, start, end, endPtr);
3205 return result;
3208 /* startPtr gets set to non-null if the section is closed, and to null if
3209 the section is not yet closed.
3211 static enum XML_Error
3212 doCdataSection(XML_Parser parser,
3213 const ENCODING *enc,
3214 const char **startPtr,
3215 const char *end,
3216 const char **nextPtr,
3217 XML_Bool haveMore)
3219 const char *s = *startPtr;
3220 const char **eventPP;
3221 const char **eventEndPP;
3222 if (enc == encoding) {
3223 eventPP = &eventPtr;
3224 *eventPP = s;
3225 eventEndPP = &eventEndPtr;
3227 else {
3228 eventPP = &(openInternalEntities->internalEventPtr);
3229 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3231 *eventPP = s;
3232 *startPtr = NULL;
3234 for (;;) {
3235 const char *next;
3236 int tok = XmlCdataSectionTok(enc, s, end, &next);
3237 *eventEndPP = next;
3238 switch (tok) {
3239 case XML_TOK_CDATA_SECT_CLOSE:
3240 if (endCdataSectionHandler)
3241 endCdataSectionHandler(handlerArg);
3242 #if 0
3243 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3244 else if (characterDataHandler)
3245 characterDataHandler(handlerArg, dataBuf, 0);
3246 #endif
3247 else if (defaultHandler)
3248 reportDefault(parser, enc, s, next);
3249 *startPtr = next;
3250 *nextPtr = next;
3251 if (ps_parsing == XML_FINISHED)
3252 return XML_ERROR_ABORTED;
3253 else
3254 return XML_ERROR_NONE;
3255 case XML_TOK_DATA_NEWLINE:
3256 if (characterDataHandler) {
3257 XML_Char c = 0xA;
3258 characterDataHandler(handlerArg, &c, 1);
3260 else if (defaultHandler)
3261 reportDefault(parser, enc, s, next);
3262 break;
3263 case XML_TOK_DATA_CHARS:
3265 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3266 if (charDataHandler) {
3267 if (MUST_CONVERT(enc, s)) {
3268 for (;;) {
3269 ICHAR *dataPtr = (ICHAR *)dataBuf;
3270 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
3271 *eventEndPP = next;
3272 charDataHandler(handlerArg, dataBuf,
3273 (int)(dataPtr - (ICHAR *)dataBuf));
3274 if (s == next)
3275 break;
3276 *eventPP = s;
3279 else
3280 charDataHandler(handlerArg,
3281 (XML_Char *)s,
3282 (int)((XML_Char *)next - (XML_Char *)s));
3284 else if (defaultHandler)
3285 reportDefault(parser, enc, s, next);
3287 break;
3288 case XML_TOK_INVALID:
3289 *eventPP = next;
3290 return XML_ERROR_INVALID_TOKEN;
3291 case XML_TOK_PARTIAL_CHAR:
3292 if (haveMore) {
3293 *nextPtr = s;
3294 return XML_ERROR_NONE;
3296 return XML_ERROR_PARTIAL_CHAR;
3297 case XML_TOK_PARTIAL:
3298 case XML_TOK_NONE:
3299 if (haveMore) {
3300 *nextPtr = s;
3301 return XML_ERROR_NONE;
3303 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3304 default:
3305 *eventPP = next;
3306 return XML_ERROR_UNEXPECTED_STATE;
3309 *eventPP = s = next;
3310 switch (ps_parsing) {
3311 case XML_SUSPENDED:
3312 *nextPtr = next;
3313 return XML_ERROR_NONE;
3314 case XML_FINISHED:
3315 return XML_ERROR_ABORTED;
3316 default: ;
3319 /* not reached */
3322 #ifdef XML_DTD
3324 /* The idea here is to avoid using stack for each IGNORE section when
3325 the whole file is parsed with one call.
3327 static enum XML_Error PTRCALL
3328 ignoreSectionProcessor(XML_Parser parser,
3329 const char *start,
3330 const char *end,
3331 const char **endPtr)
3333 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
3334 endPtr, (XML_Bool)!ps_finalBuffer);
3335 if (result != XML_ERROR_NONE)
3336 return result;
3337 if (start) {
3338 processor = prologProcessor;
3339 return prologProcessor(parser, start, end, endPtr);
3341 return result;
3344 /* startPtr gets set to non-null is the section is closed, and to null
3345 if the section is not yet closed.
3347 static enum XML_Error
3348 doIgnoreSection(XML_Parser parser,
3349 const ENCODING *enc,
3350 const char **startPtr,
3351 const char *end,
3352 const char **nextPtr,
3353 XML_Bool haveMore)
3355 const char *next;
3356 int tok;
3357 const char *s = *startPtr;
3358 const char **eventPP;
3359 const char **eventEndPP;
3360 if (enc == encoding) {
3361 eventPP = &eventPtr;
3362 *eventPP = s;
3363 eventEndPP = &eventEndPtr;
3365 else {
3366 eventPP = &(openInternalEntities->internalEventPtr);
3367 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3369 *eventPP = s;
3370 *startPtr = NULL;
3371 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3372 *eventEndPP = next;
3373 switch (tok) {
3374 case XML_TOK_IGNORE_SECT:
3375 if (defaultHandler)
3376 reportDefault(parser, enc, s, next);
3377 *startPtr = next;
3378 *nextPtr = next;
3379 if (ps_parsing == XML_FINISHED)
3380 return XML_ERROR_ABORTED;
3381 else
3382 return XML_ERROR_NONE;
3383 case XML_TOK_INVALID:
3384 *eventPP = next;
3385 return XML_ERROR_INVALID_TOKEN;
3386 case XML_TOK_PARTIAL_CHAR:
3387 if (haveMore) {
3388 *nextPtr = s;
3389 return XML_ERROR_NONE;
3391 return XML_ERROR_PARTIAL_CHAR;
3392 case XML_TOK_PARTIAL:
3393 case XML_TOK_NONE:
3394 if (haveMore) {
3395 *nextPtr = s;
3396 return XML_ERROR_NONE;
3398 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3399 default:
3400 *eventPP = next;
3401 return XML_ERROR_UNEXPECTED_STATE;
3403 /* not reached */
3406 #endif /* XML_DTD */
3408 static enum XML_Error
3409 initializeEncoding(XML_Parser parser)
3411 const char *s;
3412 #ifdef XML_UNICODE
3413 char encodingBuf[128];
3414 if (!protocolEncodingName)
3415 s = NULL;
3416 else {
3417 int i;
3418 for (i = 0; protocolEncodingName[i]; i++) {
3419 if (i == sizeof(encodingBuf) - 1
3420 || (protocolEncodingName[i] & ~0x7f) != 0) {
3421 encodingBuf[0] = '\0';
3422 break;
3424 encodingBuf[i] = (char)protocolEncodingName[i];
3426 encodingBuf[i] = '\0';
3427 s = encodingBuf;
3429 #else
3430 s = protocolEncodingName;
3431 #endif
3432 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3433 return XML_ERROR_NONE;
3434 return handleUnknownEncoding(parser, protocolEncodingName);
3437 static enum XML_Error
3438 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3439 const char *s, const char *next)
3441 const char *encodingName = NULL;
3442 const XML_Char *storedEncName = NULL;
3443 const ENCODING *newEncoding = NULL;
3444 const char *version = NULL;
3445 const char *versionend;
3446 const XML_Char *storedversion = NULL;
3447 int standalone = -1;
3448 if (!(ns
3449 ? XmlParseXmlDeclNS
3450 : XmlParseXmlDecl)(isGeneralTextEntity,
3451 encoding,
3453 next,
3454 &eventPtr,
3455 &version,
3456 &versionend,
3457 &encodingName,
3458 &newEncoding,
3459 &standalone)) {
3460 if (isGeneralTextEntity)
3461 return XML_ERROR_TEXT_DECL;
3462 else
3463 return XML_ERROR_XML_DECL;
3465 if (!isGeneralTextEntity && standalone == 1) {
3466 _dtd->standalone = XML_TRUE;
3467 #ifdef XML_DTD
3468 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3469 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3470 #endif /* XML_DTD */
3472 if (xmlDeclHandler) {
3473 if (encodingName != NULL) {
3474 storedEncName = poolStoreString(&temp2Pool,
3475 encoding,
3476 encodingName,
3477 encodingName
3478 + XmlNameLength(encoding, encodingName));
3479 if (!storedEncName)
3480 return XML_ERROR_NO_MEMORY;
3481 poolFinish(&temp2Pool);
3483 if (version) {
3484 storedversion = poolStoreString(&temp2Pool,
3485 encoding,
3486 version,
3487 versionend - encoding->minBytesPerChar);
3488 if (!storedversion)
3489 return XML_ERROR_NO_MEMORY;
3491 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3493 else if (defaultHandler)
3494 reportDefault(parser, encoding, s, next);
3495 if (protocolEncodingName == NULL) {
3496 if (newEncoding) {
3497 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
3498 eventPtr = encodingName;
3499 return XML_ERROR_INCORRECT_ENCODING;
3501 encoding = newEncoding;
3503 else if (encodingName) {
3504 enum XML_Error result;
3505 if (!storedEncName) {
3506 storedEncName = poolStoreString(
3507 &temp2Pool, encoding, encodingName,
3508 encodingName + XmlNameLength(encoding, encodingName));
3509 if (!storedEncName)
3510 return XML_ERROR_NO_MEMORY;
3512 result = handleUnknownEncoding(parser, storedEncName);
3513 poolClear(&temp2Pool);
3514 if (result == XML_ERROR_UNKNOWN_ENCODING)
3515 eventPtr = encodingName;
3516 return result;
3520 if (storedEncName || storedversion)
3521 poolClear(&temp2Pool);
3523 return XML_ERROR_NONE;
3526 static enum XML_Error
3527 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3529 if (unknownEncodingHandler) {
3530 XML_Encoding info;
3531 int i;
3532 for (i = 0; i < 256; i++)
3533 info.map[i] = -1;
3534 info.convert = NULL;
3535 info.data = NULL;
3536 info.release = NULL;
3537 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
3538 &info)) {
3539 ENCODING *enc;
3540 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
3541 if (!unknownEncodingMem) {
3542 if (info.release)
3543 info.release(info.data);
3544 return XML_ERROR_NO_MEMORY;
3546 enc = (ns
3547 ? XmlInitUnknownEncodingNS
3548 : XmlInitUnknownEncoding)(unknownEncodingMem,
3549 info.map,
3550 info.convert,
3551 info.data);
3552 if (enc) {
3553 unknownEncodingData = info.data;
3554 unknownEncodingRelease = info.release;
3555 encoding = enc;
3556 return XML_ERROR_NONE;
3559 if (info.release != NULL)
3560 info.release(info.data);
3562 return XML_ERROR_UNKNOWN_ENCODING;
3565 static enum XML_Error PTRCALL
3566 prologInitProcessor(XML_Parser parser,
3567 const char *s,
3568 const char *end,
3569 const char **nextPtr)
3571 enum XML_Error result = initializeEncoding(parser);
3572 if (result != XML_ERROR_NONE)
3573 return result;
3574 processor = prologProcessor;
3575 return prologProcessor(parser, s, end, nextPtr);
3578 #ifdef XML_DTD
3580 static enum XML_Error PTRCALL
3581 externalParEntInitProcessor(XML_Parser parser,
3582 const char *s,
3583 const char *end,
3584 const char **nextPtr)
3586 enum XML_Error result = initializeEncoding(parser);
3587 if (result != XML_ERROR_NONE)
3588 return result;
3590 /* we know now that XML_Parse(Buffer) has been called,
3591 so we consider the external parameter entity read */
3592 _dtd->paramEntityRead = XML_TRUE;
3594 if (prologState.inEntityValue) {
3595 processor = entityValueInitProcessor;
3596 return entityValueInitProcessor(parser, s, end, nextPtr);
3598 else {
3599 processor = externalParEntProcessor;
3600 return externalParEntProcessor(parser, s, end, nextPtr);
3604 static enum XML_Error PTRCALL
3605 entityValueInitProcessor(XML_Parser parser,
3606 const char *s,
3607 const char *end,
3608 const char **nextPtr)
3610 int tok;
3611 const char *start = s;
3612 const char *next = start;
3613 eventPtr = start;
3615 for (;;) {
3616 tok = XmlPrologTok(encoding, start, end, &next);
3617 eventEndPtr = next;
3618 if (tok <= 0) {
3619 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3620 *nextPtr = s;
3621 return XML_ERROR_NONE;
3623 switch (tok) {
3624 case XML_TOK_INVALID:
3625 return XML_ERROR_INVALID_TOKEN;
3626 case XML_TOK_PARTIAL:
3627 return XML_ERROR_UNCLOSED_TOKEN;
3628 case XML_TOK_PARTIAL_CHAR:
3629 return XML_ERROR_PARTIAL_CHAR;
3630 case XML_TOK_NONE: /* start == end */
3631 default:
3632 break;
3634 /* found end of entity value - can store it now */
3635 return storeEntityValue(parser, encoding, s, end);
3637 else if (tok == XML_TOK_XML_DECL) {
3638 enum XML_Error result;
3639 result = processXmlDecl(parser, 0, start, next);
3640 if (result != XML_ERROR_NONE)
3641 return result;
3642 switch (ps_parsing) {
3643 case XML_SUSPENDED:
3644 *nextPtr = next;
3645 return XML_ERROR_NONE;
3646 case XML_FINISHED:
3647 return XML_ERROR_ABORTED;
3648 default:
3649 *nextPtr = next;
3651 /* stop scanning for text declaration - we found one */
3652 processor = entityValueProcessor;
3653 return entityValueProcessor(parser, next, end, nextPtr);
3655 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3656 return XML_TOK_NONE on the next call, which would then cause the
3657 function to exit with *nextPtr set to s - that is what we want for other
3658 tokens, but not for the BOM - we would rather like to skip it;
3659 then, when this routine is entered the next time, XmlPrologTok will
3660 return XML_TOK_INVALID, since the BOM is still in the buffer
3662 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
3663 *nextPtr = next;
3664 return XML_ERROR_NONE;
3666 start = next;
3667 eventPtr = start;
3671 static enum XML_Error PTRCALL
3672 externalParEntProcessor(XML_Parser parser,
3673 const char *s,
3674 const char *end,
3675 const char **nextPtr)
3677 const char *next = s;
3678 int tok;
3680 tok = XmlPrologTok(encoding, s, end, &next);
3681 if (tok <= 0) {
3682 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3683 *nextPtr = s;
3684 return XML_ERROR_NONE;
3686 switch (tok) {
3687 case XML_TOK_INVALID:
3688 return XML_ERROR_INVALID_TOKEN;
3689 case XML_TOK_PARTIAL:
3690 return XML_ERROR_UNCLOSED_TOKEN;
3691 case XML_TOK_PARTIAL_CHAR:
3692 return XML_ERROR_PARTIAL_CHAR;
3693 case XML_TOK_NONE: /* start == end */
3694 default:
3695 break;
3698 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
3699 However, when parsing an external subset, doProlog will not accept a BOM
3700 as valid, and report a syntax error, so we have to skip the BOM
3702 else if (tok == XML_TOK_BOM) {
3703 s = next;
3704 tok = XmlPrologTok(encoding, s, end, &next);
3707 processor = prologProcessor;
3708 return doProlog(parser, encoding, s, end, tok, next,
3709 nextPtr, (XML_Bool)!ps_finalBuffer);
3712 static enum XML_Error PTRCALL
3713 entityValueProcessor(XML_Parser parser,
3714 const char *s,
3715 const char *end,
3716 const char **nextPtr)
3718 const char *start = s;
3719 const char *next = s;
3720 const ENCODING *enc = encoding;
3721 int tok;
3723 for (;;) {
3724 tok = XmlPrologTok(enc, start, end, &next);
3725 if (tok <= 0) {
3726 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3727 *nextPtr = s;
3728 return XML_ERROR_NONE;
3730 switch (tok) {
3731 case XML_TOK_INVALID:
3732 return XML_ERROR_INVALID_TOKEN;
3733 case XML_TOK_PARTIAL:
3734 return XML_ERROR_UNCLOSED_TOKEN;
3735 case XML_TOK_PARTIAL_CHAR:
3736 return XML_ERROR_PARTIAL_CHAR;
3737 case XML_TOK_NONE: /* start == end */
3738 default:
3739 break;
3741 /* found end of entity value - can store it now */
3742 return storeEntityValue(parser, enc, s, end);
3744 start = next;
3748 #endif /* XML_DTD */
3750 static enum XML_Error PTRCALL
3751 prologProcessor(XML_Parser parser,
3752 const char *s,
3753 const char *end,
3754 const char **nextPtr)
3756 const char *next = s;
3757 int tok = XmlPrologTok(encoding, s, end, &next);
3758 return doProlog(parser, encoding, s, end, tok, next,
3759 nextPtr, (XML_Bool)!ps_finalBuffer);
3762 static enum XML_Error
3763 doProlog(XML_Parser parser,
3764 const ENCODING *enc,
3765 const char *s,
3766 const char *end,
3767 int tok,
3768 const char *next,
3769 const char **nextPtr,
3770 XML_Bool haveMore)
3772 #ifdef XML_DTD
3773 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
3774 #endif /* XML_DTD */
3775 static const XML_Char atypeCDATA[] =
3776 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
3777 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
3778 static const XML_Char atypeIDREF[] =
3779 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
3780 static const XML_Char atypeIDREFS[] =
3781 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
3782 static const XML_Char atypeENTITY[] =
3783 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
3784 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
3785 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
3786 static const XML_Char atypeNMTOKEN[] = {
3787 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
3788 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
3789 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
3790 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
3791 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
3792 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
3793 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
3795 /* save one level of indirection */
3796 DTD * const dtd = _dtd;
3798 const char **eventPP;
3799 const char **eventEndPP;
3800 enum XML_Content_Quant quant;
3802 if (enc == encoding) {
3803 eventPP = &eventPtr;
3804 eventEndPP = &eventEndPtr;
3806 else {
3807 eventPP = &(openInternalEntities->internalEventPtr);
3808 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3811 for (;;) {
3812 int role;
3813 XML_Bool handleDefault = XML_TRUE;
3814 *eventPP = s;
3815 *eventEndPP = next;
3816 if (tok <= 0) {
3817 if (haveMore && tok != XML_TOK_INVALID) {
3818 *nextPtr = s;
3819 return XML_ERROR_NONE;
3821 switch (tok) {
3822 case XML_TOK_INVALID:
3823 *eventPP = next;
3824 return XML_ERROR_INVALID_TOKEN;
3825 case XML_TOK_PARTIAL:
3826 return XML_ERROR_UNCLOSED_TOKEN;
3827 case XML_TOK_PARTIAL_CHAR:
3828 return XML_ERROR_PARTIAL_CHAR;
3829 case -XML_TOK_PROLOG_S:
3830 tok = -tok;
3831 break;
3832 case XML_TOK_NONE:
3833 #ifdef XML_DTD
3834 /* for internal PE NOT referenced between declarations */
3835 if (enc != encoding && !openInternalEntities->betweenDecl) {
3836 *nextPtr = s;
3837 return XML_ERROR_NONE;
3839 /* WFC: PE Between Declarations - must check that PE contains
3840 complete markup, not only for external PEs, but also for
3841 internal PEs if the reference occurs between declarations.
3843 if (isParamEntity || enc != encoding) {
3844 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
3845 == XML_ROLE_ERROR)
3846 return XML_ERROR_INCOMPLETE_PE;
3847 *nextPtr = s;
3848 return XML_ERROR_NONE;
3850 #endif /* XML_DTD */
3851 return XML_ERROR_NO_ELEMENTS;
3852 default:
3853 tok = -tok;
3854 next = end;
3855 break;
3858 role = XmlTokenRole(&prologState, tok, s, next, enc);
3859 switch (role) {
3860 case XML_ROLE_XML_DECL:
3862 enum XML_Error result = processXmlDecl(parser, 0, s, next);
3863 if (result != XML_ERROR_NONE)
3864 return result;
3865 enc = encoding;
3866 handleDefault = XML_FALSE;
3868 break;
3869 case XML_ROLE_DOCTYPE_NAME:
3870 if (startDoctypeDeclHandler) {
3871 doctypeName = poolStoreString(&tempPool, enc, s, next);
3872 if (!doctypeName)
3873 return XML_ERROR_NO_MEMORY;
3874 poolFinish(&tempPool);
3875 doctypePubid = NULL;
3876 handleDefault = XML_FALSE;
3878 doctypeSysid = NULL; /* always initialize to NULL */
3879 break;
3880 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
3881 if (startDoctypeDeclHandler) {
3882 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
3883 doctypePubid, 1);
3884 doctypeName = NULL;
3885 poolClear(&tempPool);
3886 handleDefault = XML_FALSE;
3888 break;
3889 #ifdef XML_DTD
3890 case XML_ROLE_TEXT_DECL:
3892 enum XML_Error result = processXmlDecl(parser, 1, s, next);
3893 if (result != XML_ERROR_NONE)
3894 return result;
3895 enc = encoding;
3896 handleDefault = XML_FALSE;
3898 break;
3899 #endif /* XML_DTD */
3900 case XML_ROLE_DOCTYPE_PUBLIC_ID:
3901 #ifdef XML_DTD
3902 useForeignDTD = XML_FALSE;
3903 declEntity = (ENTITY *)lookup(parser,
3904 &dtd->paramEntities,
3905 externalSubsetName,
3906 sizeof(ENTITY));
3907 if (!declEntity)
3908 return XML_ERROR_NO_MEMORY;
3909 #endif /* XML_DTD */
3910 dtd->hasParamEntityRefs = XML_TRUE;
3911 if (startDoctypeDeclHandler) {
3912 XML_Char *pubId;
3913 if (!XmlIsPublicId(enc, s, next, eventPP))
3914 return XML_ERROR_PUBLICID;
3915 pubId = poolStoreString(&tempPool, enc,
3916 s + enc->minBytesPerChar,
3917 next - enc->minBytesPerChar);
3918 if (!pubId)
3919 return XML_ERROR_NO_MEMORY;
3920 normalizePublicId(pubId);
3921 poolFinish(&tempPool);
3922 doctypePubid = pubId;
3923 handleDefault = XML_FALSE;
3924 goto alreadyChecked;
3926 /* fall through */
3927 case XML_ROLE_ENTITY_PUBLIC_ID:
3928 if (!XmlIsPublicId(enc, s, next, eventPP))
3929 return XML_ERROR_PUBLICID;
3930 alreadyChecked:
3931 if (dtd->keepProcessing && declEntity) {
3932 XML_Char *tem = poolStoreString(&dtd->pool,
3933 enc,
3934 s + enc->minBytesPerChar,
3935 next - enc->minBytesPerChar);
3936 if (!tem)
3937 return XML_ERROR_NO_MEMORY;
3938 normalizePublicId(tem);
3939 declEntity->publicId = tem;
3940 poolFinish(&dtd->pool);
3941 if (entityDeclHandler)
3942 handleDefault = XML_FALSE;
3944 break;
3945 case XML_ROLE_DOCTYPE_CLOSE:
3946 if (doctypeName) {
3947 startDoctypeDeclHandler(handlerArg, doctypeName,
3948 doctypeSysid, doctypePubid, 0);
3949 poolClear(&tempPool);
3950 handleDefault = XML_FALSE;
3952 /* doctypeSysid will be non-NULL in the case of a previous
3953 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
3954 was not set, indicating an external subset
3956 #ifdef XML_DTD
3957 if (doctypeSysid || useForeignDTD) {
3958 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3959 dtd->hasParamEntityRefs = XML_TRUE;
3960 if (paramEntityParsing && externalEntityRefHandler) {
3961 ENTITY *entity = (ENTITY *)lookup(parser,
3962 &dtd->paramEntities,
3963 externalSubsetName,
3964 sizeof(ENTITY));
3965 if (!entity)
3966 return XML_ERROR_NO_MEMORY;
3967 if (useForeignDTD)
3968 entity->base = curBase;
3969 dtd->paramEntityRead = XML_FALSE;
3970 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
3972 entity->base,
3973 entity->systemId,
3974 entity->publicId))
3975 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3976 if (dtd->paramEntityRead) {
3977 if (!dtd->standalone &&
3978 notStandaloneHandler &&
3979 !notStandaloneHandler(handlerArg))
3980 return XML_ERROR_NOT_STANDALONE;
3982 /* if we didn't read the foreign DTD then this means that there
3983 is no external subset and we must reset dtd->hasParamEntityRefs
3985 else if (!doctypeSysid)
3986 dtd->hasParamEntityRefs = hadParamEntityRefs;
3987 /* end of DTD - no need to update dtd->keepProcessing */
3989 useForeignDTD = XML_FALSE;
3991 #endif /* XML_DTD */
3992 if (endDoctypeDeclHandler) {
3993 endDoctypeDeclHandler(handlerArg);
3994 handleDefault = XML_FALSE;
3996 break;
3997 case XML_ROLE_INSTANCE_START:
3998 #ifdef XML_DTD
3999 /* if there is no DOCTYPE declaration then now is the
4000 last chance to read the foreign DTD
4002 if (useForeignDTD) {
4003 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4004 dtd->hasParamEntityRefs = XML_TRUE;
4005 if (paramEntityParsing && externalEntityRefHandler) {
4006 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4007 externalSubsetName,
4008 sizeof(ENTITY));
4009 if (!entity)
4010 return XML_ERROR_NO_MEMORY;
4011 entity->base = curBase;
4012 dtd->paramEntityRead = XML_FALSE;
4013 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4015 entity->base,
4016 entity->systemId,
4017 entity->publicId))
4018 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4019 if (dtd->paramEntityRead) {
4020 if (!dtd->standalone &&
4021 notStandaloneHandler &&
4022 !notStandaloneHandler(handlerArg))
4023 return XML_ERROR_NOT_STANDALONE;
4025 /* if we didn't read the foreign DTD then this means that there
4026 is no external subset and we must reset dtd->hasParamEntityRefs
4028 else
4029 dtd->hasParamEntityRefs = hadParamEntityRefs;
4030 /* end of DTD - no need to update dtd->keepProcessing */
4033 #endif /* XML_DTD */
4034 processor = contentProcessor;
4035 return contentProcessor(parser, s, end, nextPtr);
4036 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4037 declElementType = getElementType(parser, enc, s, next);
4038 if (!declElementType)
4039 return XML_ERROR_NO_MEMORY;
4040 goto checkAttListDeclHandler;
4041 case XML_ROLE_ATTRIBUTE_NAME:
4042 declAttributeId = getAttributeId(parser, enc, s, next);
4043 if (!declAttributeId)
4044 return XML_ERROR_NO_MEMORY;
4045 declAttributeIsCdata = XML_FALSE;
4046 declAttributeType = NULL;
4047 declAttributeIsId = XML_FALSE;
4048 goto checkAttListDeclHandler;
4049 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4050 declAttributeIsCdata = XML_TRUE;
4051 declAttributeType = atypeCDATA;
4052 goto checkAttListDeclHandler;
4053 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4054 declAttributeIsId = XML_TRUE;
4055 declAttributeType = atypeID;
4056 goto checkAttListDeclHandler;
4057 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4058 declAttributeType = atypeIDREF;
4059 goto checkAttListDeclHandler;
4060 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4061 declAttributeType = atypeIDREFS;
4062 goto checkAttListDeclHandler;
4063 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4064 declAttributeType = atypeENTITY;
4065 goto checkAttListDeclHandler;
4066 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4067 declAttributeType = atypeENTITIES;
4068 goto checkAttListDeclHandler;
4069 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4070 declAttributeType = atypeNMTOKEN;
4071 goto checkAttListDeclHandler;
4072 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4073 declAttributeType = atypeNMTOKENS;
4074 checkAttListDeclHandler:
4075 if (dtd->keepProcessing && attlistDeclHandler)
4076 handleDefault = XML_FALSE;
4077 break;
4078 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4079 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4080 if (dtd->keepProcessing && attlistDeclHandler) {
4081 const XML_Char *prefix;
4082 if (declAttributeType) {
4083 prefix = enumValueSep;
4085 else {
4086 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4087 ? notationPrefix
4088 : enumValueStart);
4090 if (!poolAppendString(&tempPool, prefix))
4091 return XML_ERROR_NO_MEMORY;
4092 if (!poolAppend(&tempPool, enc, s, next))
4093 return XML_ERROR_NO_MEMORY;
4094 declAttributeType = tempPool.start;
4095 handleDefault = XML_FALSE;
4097 break;
4098 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4099 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4100 if (dtd->keepProcessing) {
4101 if (!defineAttribute(declElementType, declAttributeId,
4102 declAttributeIsCdata, declAttributeIsId,
4103 0, parser))
4104 return XML_ERROR_NO_MEMORY;
4105 if (attlistDeclHandler && declAttributeType) {
4106 if (*declAttributeType == XML_T(ASCII_LPAREN)
4107 || (*declAttributeType == XML_T(ASCII_N)
4108 && declAttributeType[1] == XML_T(ASCII_O))) {
4109 /* Enumerated or Notation type */
4110 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
4111 || !poolAppendChar(&tempPool, XML_T('\0')))
4112 return XML_ERROR_NO_MEMORY;
4113 declAttributeType = tempPool.start;
4114 poolFinish(&tempPool);
4116 *eventEndPP = s;
4117 attlistDeclHandler(handlerArg, declElementType->name,
4118 declAttributeId->name, declAttributeType,
4119 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4120 poolClear(&tempPool);
4121 handleDefault = XML_FALSE;
4124 break;
4125 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4126 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4127 if (dtd->keepProcessing) {
4128 const XML_Char *attVal;
4129 enum XML_Error result =
4130 storeAttributeValue(parser, enc, declAttributeIsCdata,
4131 s + enc->minBytesPerChar,
4132 next - enc->minBytesPerChar,
4133 &dtd->pool);
4134 if (result)
4135 return result;
4136 attVal = poolStart(&dtd->pool);
4137 poolFinish(&dtd->pool);
4138 /* ID attributes aren't allowed to have a default */
4139 if (!defineAttribute(declElementType, declAttributeId,
4140 declAttributeIsCdata, XML_FALSE, attVal, parser))
4141 return XML_ERROR_NO_MEMORY;
4142 if (attlistDeclHandler && declAttributeType) {
4143 if (*declAttributeType == XML_T(ASCII_LPAREN)
4144 || (*declAttributeType == XML_T(ASCII_N)
4145 && declAttributeType[1] == XML_T(ASCII_O))) {
4146 /* Enumerated or Notation type */
4147 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
4148 || !poolAppendChar(&tempPool, XML_T('\0')))
4149 return XML_ERROR_NO_MEMORY;
4150 declAttributeType = tempPool.start;
4151 poolFinish(&tempPool);
4153 *eventEndPP = s;
4154 attlistDeclHandler(handlerArg, declElementType->name,
4155 declAttributeId->name, declAttributeType,
4156 attVal,
4157 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4158 poolClear(&tempPool);
4159 handleDefault = XML_FALSE;
4162 break;
4163 case XML_ROLE_ENTITY_VALUE:
4164 if (dtd->keepProcessing) {
4165 enum XML_Error result = storeEntityValue(parser, enc,
4166 s + enc->minBytesPerChar,
4167 next - enc->minBytesPerChar);
4168 if (declEntity) {
4169 declEntity->textPtr = poolStart(&dtd->entityValuePool);
4170 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4171 poolFinish(&dtd->entityValuePool);
4172 if (entityDeclHandler) {
4173 *eventEndPP = s;
4174 entityDeclHandler(handlerArg,
4175 declEntity->name,
4176 declEntity->is_param,
4177 declEntity->textPtr,
4178 declEntity->textLen,
4179 curBase, 0, 0, 0);
4180 handleDefault = XML_FALSE;
4183 else
4184 poolDiscard(&dtd->entityValuePool);
4185 if (result != XML_ERROR_NONE)
4186 return result;
4188 break;
4189 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4190 #ifdef XML_DTD
4191 useForeignDTD = XML_FALSE;
4192 #endif /* XML_DTD */
4193 dtd->hasParamEntityRefs = XML_TRUE;
4194 if (startDoctypeDeclHandler) {
4195 doctypeSysid = poolStoreString(&tempPool, enc,
4196 s + enc->minBytesPerChar,
4197 next - enc->minBytesPerChar);
4198 if (doctypeSysid == NULL)
4199 return XML_ERROR_NO_MEMORY;
4200 poolFinish(&tempPool);
4201 handleDefault = XML_FALSE;
4203 #ifdef XML_DTD
4204 else
4205 /* use externalSubsetName to make doctypeSysid non-NULL
4206 for the case where no startDoctypeDeclHandler is set */
4207 doctypeSysid = externalSubsetName;
4208 #endif /* XML_DTD */
4209 if (!dtd->standalone
4210 #ifdef XML_DTD
4211 && !paramEntityParsing
4212 #endif /* XML_DTD */
4213 && notStandaloneHandler
4214 && !notStandaloneHandler(handlerArg))
4215 return XML_ERROR_NOT_STANDALONE;
4216 #ifndef XML_DTD
4217 break;
4218 #else /* XML_DTD */
4219 if (!declEntity) {
4220 declEntity = (ENTITY *)lookup(parser,
4221 &dtd->paramEntities,
4222 externalSubsetName,
4223 sizeof(ENTITY));
4224 if (!declEntity)
4225 return XML_ERROR_NO_MEMORY;
4226 declEntity->publicId = NULL;
4228 /* fall through */
4229 #endif /* XML_DTD */
4230 case XML_ROLE_ENTITY_SYSTEM_ID:
4231 if (dtd->keepProcessing && declEntity) {
4232 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4233 s + enc->minBytesPerChar,
4234 next - enc->minBytesPerChar);
4235 if (!declEntity->systemId)
4236 return XML_ERROR_NO_MEMORY;
4237 declEntity->base = curBase;
4238 poolFinish(&dtd->pool);
4239 if (entityDeclHandler)
4240 handleDefault = XML_FALSE;
4242 break;
4243 case XML_ROLE_ENTITY_COMPLETE:
4244 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4245 *eventEndPP = s;
4246 entityDeclHandler(handlerArg,
4247 declEntity->name,
4248 declEntity->is_param,
4249 0,0,
4250 declEntity->base,
4251 declEntity->systemId,
4252 declEntity->publicId,
4254 handleDefault = XML_FALSE;
4256 break;
4257 case XML_ROLE_ENTITY_NOTATION_NAME:
4258 if (dtd->keepProcessing && declEntity) {
4259 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4260 if (!declEntity->notation)
4261 return XML_ERROR_NO_MEMORY;
4262 poolFinish(&dtd->pool);
4263 if (unparsedEntityDeclHandler) {
4264 *eventEndPP = s;
4265 unparsedEntityDeclHandler(handlerArg,
4266 declEntity->name,
4267 declEntity->base,
4268 declEntity->systemId,
4269 declEntity->publicId,
4270 declEntity->notation);
4271 handleDefault = XML_FALSE;
4273 else if (entityDeclHandler) {
4274 *eventEndPP = s;
4275 entityDeclHandler(handlerArg,
4276 declEntity->name,
4277 0,0,0,
4278 declEntity->base,
4279 declEntity->systemId,
4280 declEntity->publicId,
4281 declEntity->notation);
4282 handleDefault = XML_FALSE;
4285 break;
4286 case XML_ROLE_GENERAL_ENTITY_NAME:
4288 if (XmlPredefinedEntityName(enc, s, next)) {
4289 declEntity = NULL;
4290 break;
4292 if (dtd->keepProcessing) {
4293 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4294 if (!name)
4295 return XML_ERROR_NO_MEMORY;
4296 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
4297 sizeof(ENTITY));
4298 if (!declEntity)
4299 return XML_ERROR_NO_MEMORY;
4300 if (declEntity->name != name) {
4301 poolDiscard(&dtd->pool);
4302 declEntity = NULL;
4304 else {
4305 poolFinish(&dtd->pool);
4306 declEntity->publicId = NULL;
4307 declEntity->is_param = XML_FALSE;
4308 /* if we have a parent parser or are reading an internal parameter
4309 entity, then the entity declaration is not considered "internal"
4311 declEntity->is_internal = !(parentParser || openInternalEntities);
4312 if (entityDeclHandler)
4313 handleDefault = XML_FALSE;
4316 else {
4317 poolDiscard(&dtd->pool);
4318 declEntity = NULL;
4321 break;
4322 case XML_ROLE_PARAM_ENTITY_NAME:
4323 #ifdef XML_DTD
4324 if (dtd->keepProcessing) {
4325 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4326 if (!name)
4327 return XML_ERROR_NO_MEMORY;
4328 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4329 name, sizeof(ENTITY));
4330 if (!declEntity)
4331 return XML_ERROR_NO_MEMORY;
4332 if (declEntity->name != name) {
4333 poolDiscard(&dtd->pool);
4334 declEntity = NULL;
4336 else {
4337 poolFinish(&dtd->pool);
4338 declEntity->publicId = NULL;
4339 declEntity->is_param = XML_TRUE;
4340 /* if we have a parent parser or are reading an internal parameter
4341 entity, then the entity declaration is not considered "internal"
4343 declEntity->is_internal = !(parentParser || openInternalEntities);
4344 if (entityDeclHandler)
4345 handleDefault = XML_FALSE;
4348 else {
4349 poolDiscard(&dtd->pool);
4350 declEntity = NULL;
4352 #else /* not XML_DTD */
4353 declEntity = NULL;
4354 #endif /* XML_DTD */
4355 break;
4356 case XML_ROLE_NOTATION_NAME:
4357 declNotationPublicId = NULL;
4358 declNotationName = NULL;
4359 if (notationDeclHandler) {
4360 declNotationName = poolStoreString(&tempPool, enc, s, next);
4361 if (!declNotationName)
4362 return XML_ERROR_NO_MEMORY;
4363 poolFinish(&tempPool);
4364 handleDefault = XML_FALSE;
4366 break;
4367 case XML_ROLE_NOTATION_PUBLIC_ID:
4368 if (!XmlIsPublicId(enc, s, next, eventPP))
4369 return XML_ERROR_PUBLICID;
4370 if (declNotationName) { /* means notationDeclHandler != NULL */
4371 XML_Char *tem = poolStoreString(&tempPool,
4372 enc,
4373 s + enc->minBytesPerChar,
4374 next - enc->minBytesPerChar);
4375 if (!tem)
4376 return XML_ERROR_NO_MEMORY;
4377 normalizePublicId(tem);
4378 declNotationPublicId = tem;
4379 poolFinish(&tempPool);
4380 handleDefault = XML_FALSE;
4382 break;
4383 case XML_ROLE_NOTATION_SYSTEM_ID:
4384 if (declNotationName && notationDeclHandler) {
4385 const XML_Char *systemId
4386 = poolStoreString(&tempPool, enc,
4387 s + enc->minBytesPerChar,
4388 next - enc->minBytesPerChar);
4389 if (!systemId)
4390 return XML_ERROR_NO_MEMORY;
4391 *eventEndPP = s;
4392 notationDeclHandler(handlerArg,
4393 declNotationName,
4394 curBase,
4395 systemId,
4396 declNotationPublicId);
4397 handleDefault = XML_FALSE;
4399 poolClear(&tempPool);
4400 break;
4401 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4402 if (declNotationPublicId && notationDeclHandler) {
4403 *eventEndPP = s;
4404 notationDeclHandler(handlerArg,
4405 declNotationName,
4406 curBase,
4408 declNotationPublicId);
4409 handleDefault = XML_FALSE;
4411 poolClear(&tempPool);
4412 break;
4413 case XML_ROLE_ERROR:
4414 switch (tok) {
4415 case XML_TOK_PARAM_ENTITY_REF:
4416 /* PE references in internal subset are
4417 not allowed within declarations. */
4418 return XML_ERROR_PARAM_ENTITY_REF;
4419 case XML_TOK_XML_DECL:
4420 return XML_ERROR_MISPLACED_XML_PI;
4421 default:
4422 return XML_ERROR_SYNTAX;
4424 #ifdef XML_DTD
4425 case XML_ROLE_IGNORE_SECT:
4427 enum XML_Error result;
4428 if (defaultHandler)
4429 reportDefault(parser, enc, s, next);
4430 handleDefault = XML_FALSE;
4431 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4432 if (result != XML_ERROR_NONE)
4433 return result;
4434 else if (!next) {
4435 processor = ignoreSectionProcessor;
4436 return result;
4439 break;
4440 #endif /* XML_DTD */
4441 case XML_ROLE_GROUP_OPEN:
4442 if (prologState.level >= groupSize) {
4443 if (groupSize) {
4444 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4445 if (temp == NULL)
4446 return XML_ERROR_NO_MEMORY;
4447 groupConnector = temp;
4448 if (dtd->scaffIndex) {
4449 int *temp = (int *)REALLOC(dtd->scaffIndex,
4450 groupSize * sizeof(int));
4451 if (temp == NULL)
4452 return XML_ERROR_NO_MEMORY;
4453 dtd->scaffIndex = temp;
4456 else {
4457 groupConnector = (char *)MALLOC(groupSize = 32);
4458 if (!groupConnector)
4459 return XML_ERROR_NO_MEMORY;
4462 groupConnector[prologState.level] = 0;
4463 if (dtd->in_eldecl) {
4464 int myindex = nextScaffoldPart(parser);
4465 if (myindex < 0)
4466 return XML_ERROR_NO_MEMORY;
4467 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4468 dtd->scaffLevel++;
4469 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4470 if (elementDeclHandler)
4471 handleDefault = XML_FALSE;
4473 break;
4474 case XML_ROLE_GROUP_SEQUENCE:
4475 if (groupConnector[prologState.level] == ASCII_PIPE)
4476 return XML_ERROR_SYNTAX;
4477 groupConnector[prologState.level] = ASCII_COMMA;
4478 if (dtd->in_eldecl && elementDeclHandler)
4479 handleDefault = XML_FALSE;
4480 break;
4481 case XML_ROLE_GROUP_CHOICE:
4482 if (groupConnector[prologState.level] == ASCII_COMMA)
4483 return XML_ERROR_SYNTAX;
4484 if (dtd->in_eldecl
4485 && !groupConnector[prologState.level]
4486 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4487 != XML_CTYPE_MIXED)
4489 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4490 = XML_CTYPE_CHOICE;
4491 if (elementDeclHandler)
4492 handleDefault = XML_FALSE;
4494 groupConnector[prologState.level] = ASCII_PIPE;
4495 break;
4496 case XML_ROLE_PARAM_ENTITY_REF:
4497 #ifdef XML_DTD
4498 case XML_ROLE_INNER_PARAM_ENTITY_REF:
4499 dtd->hasParamEntityRefs = XML_TRUE;
4500 if (!paramEntityParsing)
4501 dtd->keepProcessing = dtd->standalone;
4502 else {
4503 const XML_Char *name;
4504 ENTITY *entity;
4505 name = poolStoreString(&dtd->pool, enc,
4506 s + enc->minBytesPerChar,
4507 next - enc->minBytesPerChar);
4508 if (!name)
4509 return XML_ERROR_NO_MEMORY;
4510 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
4511 poolDiscard(&dtd->pool);
4512 /* first, determine if a check for an existing declaration is needed;
4513 if yes, check that the entity exists, and that it is internal,
4514 otherwise call the skipped entity handler
4516 if (prologState.documentEntity &&
4517 (dtd->standalone
4518 ? !openInternalEntities
4519 : !dtd->hasParamEntityRefs)) {
4520 if (!entity)
4521 return XML_ERROR_UNDEFINED_ENTITY;
4522 else if (!entity->is_internal)
4523 return XML_ERROR_ENTITY_DECLARED_IN_PE;
4525 else if (!entity) {
4526 dtd->keepProcessing = dtd->standalone;
4527 /* cannot report skipped entities in declarations */
4528 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
4529 skippedEntityHandler(handlerArg, name, 1);
4530 handleDefault = XML_FALSE;
4532 break;
4534 if (entity->open)
4535 return XML_ERROR_RECURSIVE_ENTITY_REF;
4536 if (entity->textPtr) {
4537 enum XML_Error result;
4538 XML_Bool betweenDecl =
4539 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4540 result = processInternalEntity(parser, entity, betweenDecl);
4541 if (result != XML_ERROR_NONE)
4542 return result;
4543 handleDefault = XML_FALSE;
4544 break;
4546 if (externalEntityRefHandler) {
4547 dtd->paramEntityRead = XML_FALSE;
4548 entity->open = XML_TRUE;
4549 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4551 entity->base,
4552 entity->systemId,
4553 entity->publicId)) {
4554 entity->open = XML_FALSE;
4555 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4557 entity->open = XML_FALSE;
4558 handleDefault = XML_FALSE;
4559 if (!dtd->paramEntityRead) {
4560 dtd->keepProcessing = dtd->standalone;
4561 break;
4564 else {
4565 dtd->keepProcessing = dtd->standalone;
4566 break;
4569 #endif /* XML_DTD */
4570 if (!dtd->standalone &&
4571 notStandaloneHandler &&
4572 !notStandaloneHandler(handlerArg))
4573 return XML_ERROR_NOT_STANDALONE;
4574 break;
4576 /* Element declaration stuff */
4578 case XML_ROLE_ELEMENT_NAME:
4579 if (elementDeclHandler) {
4580 declElementType = getElementType(parser, enc, s, next);
4581 if (!declElementType)
4582 return XML_ERROR_NO_MEMORY;
4583 dtd->scaffLevel = 0;
4584 dtd->scaffCount = 0;
4585 dtd->in_eldecl = XML_TRUE;
4586 handleDefault = XML_FALSE;
4588 break;
4590 case XML_ROLE_CONTENT_ANY:
4591 case XML_ROLE_CONTENT_EMPTY:
4592 if (dtd->in_eldecl) {
4593 if (elementDeclHandler) {
4594 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
4595 if (!content)
4596 return XML_ERROR_NO_MEMORY;
4597 content->quant = XML_CQUANT_NONE;
4598 content->name = NULL;
4599 content->numchildren = 0;
4600 content->children = NULL;
4601 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
4602 XML_CTYPE_ANY :
4603 XML_CTYPE_EMPTY);
4604 *eventEndPP = s;
4605 elementDeclHandler(handlerArg, declElementType->name, content);
4606 handleDefault = XML_FALSE;
4608 dtd->in_eldecl = XML_FALSE;
4610 break;
4612 case XML_ROLE_CONTENT_PCDATA:
4613 if (dtd->in_eldecl) {
4614 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4615 = XML_CTYPE_MIXED;
4616 if (elementDeclHandler)
4617 handleDefault = XML_FALSE;
4619 break;
4621 case XML_ROLE_CONTENT_ELEMENT:
4622 quant = XML_CQUANT_NONE;
4623 goto elementContent;
4624 case XML_ROLE_CONTENT_ELEMENT_OPT:
4625 quant = XML_CQUANT_OPT;
4626 goto elementContent;
4627 case XML_ROLE_CONTENT_ELEMENT_REP:
4628 quant = XML_CQUANT_REP;
4629 goto elementContent;
4630 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4631 quant = XML_CQUANT_PLUS;
4632 elementContent:
4633 if (dtd->in_eldecl) {
4634 ELEMENT_TYPE *el;
4635 const XML_Char *name;
4636 int nameLen;
4637 const char *nxt = (quant == XML_CQUANT_NONE
4638 ? next
4639 : next - enc->minBytesPerChar);
4640 int myindex = nextScaffoldPart(parser);
4641 if (myindex < 0)
4642 return XML_ERROR_NO_MEMORY;
4643 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4644 dtd->scaffold[myindex].quant = quant;
4645 el = getElementType(parser, enc, s, nxt);
4646 if (!el)
4647 return XML_ERROR_NO_MEMORY;
4648 name = el->name;
4649 dtd->scaffold[myindex].name = name;
4650 nameLen = 0;
4651 for (; name[nameLen++]; );
4652 dtd->contentStringLen += nameLen;
4653 if (elementDeclHandler)
4654 handleDefault = XML_FALSE;
4656 break;
4658 case XML_ROLE_GROUP_CLOSE:
4659 quant = XML_CQUANT_NONE;
4660 goto closeGroup;
4661 case XML_ROLE_GROUP_CLOSE_OPT:
4662 quant = XML_CQUANT_OPT;
4663 goto closeGroup;
4664 case XML_ROLE_GROUP_CLOSE_REP:
4665 quant = XML_CQUANT_REP;
4666 goto closeGroup;
4667 case XML_ROLE_GROUP_CLOSE_PLUS:
4668 quant = XML_CQUANT_PLUS;
4669 closeGroup:
4670 if (dtd->in_eldecl) {
4671 if (elementDeclHandler)
4672 handleDefault = XML_FALSE;
4673 dtd->scaffLevel--;
4674 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
4675 if (dtd->scaffLevel == 0) {
4676 if (!handleDefault) {
4677 XML_Content *model = build_model(parser);
4678 if (!model)
4679 return XML_ERROR_NO_MEMORY;
4680 *eventEndPP = s;
4681 elementDeclHandler(handlerArg, declElementType->name, model);
4683 dtd->in_eldecl = XML_FALSE;
4684 dtd->contentStringLen = 0;
4687 break;
4688 /* End element declaration stuff */
4690 case XML_ROLE_PI:
4691 if (!reportProcessingInstruction(parser, enc, s, next))
4692 return XML_ERROR_NO_MEMORY;
4693 handleDefault = XML_FALSE;
4694 break;
4695 case XML_ROLE_COMMENT:
4696 if (!reportComment(parser, enc, s, next))
4697 return XML_ERROR_NO_MEMORY;
4698 handleDefault = XML_FALSE;
4699 break;
4700 case XML_ROLE_NONE:
4701 switch (tok) {
4702 case XML_TOK_BOM:
4703 handleDefault = XML_FALSE;
4704 break;
4706 break;
4707 case XML_ROLE_DOCTYPE_NONE:
4708 if (startDoctypeDeclHandler)
4709 handleDefault = XML_FALSE;
4710 break;
4711 case XML_ROLE_ENTITY_NONE:
4712 if (dtd->keepProcessing && entityDeclHandler)
4713 handleDefault = XML_FALSE;
4714 break;
4715 case XML_ROLE_NOTATION_NONE:
4716 if (notationDeclHandler)
4717 handleDefault = XML_FALSE;
4718 break;
4719 case XML_ROLE_ATTLIST_NONE:
4720 if (dtd->keepProcessing && attlistDeclHandler)
4721 handleDefault = XML_FALSE;
4722 break;
4723 case XML_ROLE_ELEMENT_NONE:
4724 if (elementDeclHandler)
4725 handleDefault = XML_FALSE;
4726 break;
4727 } /* end of big switch */
4729 if (handleDefault && defaultHandler)
4730 reportDefault(parser, enc, s, next);
4732 switch (ps_parsing) {
4733 case XML_SUSPENDED:
4734 *nextPtr = next;
4735 return XML_ERROR_NONE;
4736 case XML_FINISHED:
4737 return XML_ERROR_ABORTED;
4738 default:
4739 s = next;
4740 tok = XmlPrologTok(enc, s, end, &next);
4743 /* not reached */
4746 static enum XML_Error PTRCALL
4747 epilogProcessor(XML_Parser parser,
4748 const char *s,
4749 const char *end,
4750 const char **nextPtr)
4752 processor = epilogProcessor;
4753 eventPtr = s;
4754 for (;;) {
4755 const char *next = NULL;
4756 int tok = XmlPrologTok(encoding, s, end, &next);
4757 eventEndPtr = next;
4758 switch (tok) {
4759 /* report partial linebreak - it might be the last token */
4760 case -XML_TOK_PROLOG_S:
4761 if (defaultHandler) {
4762 reportDefault(parser, encoding, s, next);
4763 if (ps_parsing == XML_FINISHED)
4764 return XML_ERROR_ABORTED;
4766 *nextPtr = next;
4767 return XML_ERROR_NONE;
4768 case XML_TOK_NONE:
4769 *nextPtr = s;
4770 return XML_ERROR_NONE;
4771 case XML_TOK_PROLOG_S:
4772 if (defaultHandler)
4773 reportDefault(parser, encoding, s, next);
4774 break;
4775 case XML_TOK_PI:
4776 if (!reportProcessingInstruction(parser, encoding, s, next))
4777 return XML_ERROR_NO_MEMORY;
4778 break;
4779 case XML_TOK_COMMENT:
4780 if (!reportComment(parser, encoding, s, next))
4781 return XML_ERROR_NO_MEMORY;
4782 break;
4783 case XML_TOK_INVALID:
4784 eventPtr = next;
4785 return XML_ERROR_INVALID_TOKEN;
4786 case XML_TOK_PARTIAL:
4787 if (!ps_finalBuffer) {
4788 *nextPtr = s;
4789 return XML_ERROR_NONE;
4791 return XML_ERROR_UNCLOSED_TOKEN;
4792 case XML_TOK_PARTIAL_CHAR:
4793 if (!ps_finalBuffer) {
4794 *nextPtr = s;
4795 return XML_ERROR_NONE;
4797 return XML_ERROR_PARTIAL_CHAR;
4798 default:
4799 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
4801 eventPtr = s = next;
4802 switch (ps_parsing) {
4803 case XML_SUSPENDED:
4804 *nextPtr = next;
4805 return XML_ERROR_NONE;
4806 case XML_FINISHED:
4807 return XML_ERROR_ABORTED;
4808 default: ;
4813 static enum XML_Error
4814 processInternalEntity(XML_Parser parser, ENTITY *entity,
4815 XML_Bool betweenDecl)
4817 const char *textStart, *textEnd;
4818 const char *next;
4819 enum XML_Error result;
4820 OPEN_INTERNAL_ENTITY *openEntity;
4822 if (freeInternalEntities) {
4823 openEntity = freeInternalEntities;
4824 freeInternalEntities = openEntity->next;
4826 else {
4827 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
4828 if (!openEntity)
4829 return XML_ERROR_NO_MEMORY;
4831 entity->open = XML_TRUE;
4832 entity->processed = 0;
4833 openEntity->next = openInternalEntities;
4834 openInternalEntities = openEntity;
4835 openEntity->entity = entity;
4836 openEntity->startTagLevel = tagLevel;
4837 openEntity->betweenDecl = betweenDecl;
4838 openEntity->internalEventPtr = NULL;
4839 openEntity->internalEventEndPtr = NULL;
4840 textStart = (char *)entity->textPtr;
4841 textEnd = (char *)(entity->textPtr + entity->textLen);
4843 #ifdef XML_DTD
4844 if (entity->is_param) {
4845 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
4846 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
4847 next, &next, XML_FALSE);
4849 else
4850 #endif /* XML_DTD */
4851 result = doContent(parser, tagLevel, internalEncoding, textStart,
4852 textEnd, &next, XML_FALSE);
4854 if (result == XML_ERROR_NONE) {
4855 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4856 entity->processed = (int)(next - textStart);
4857 processor = internalEntityProcessor;
4859 else {
4860 entity->open = XML_FALSE;
4861 openInternalEntities = openEntity->next;
4862 /* put openEntity back in list of free instances */
4863 openEntity->next = freeInternalEntities;
4864 freeInternalEntities = openEntity;
4867 return result;
4870 static enum XML_Error PTRCALL
4871 internalEntityProcessor(XML_Parser parser,
4872 const char *s,
4873 const char *end,
4874 const char **nextPtr)
4876 ENTITY *entity;
4877 const char *textStart, *textEnd;
4878 const char *next;
4879 enum XML_Error result;
4880 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
4881 if (!openEntity)
4882 return XML_ERROR_UNEXPECTED_STATE;
4884 entity = openEntity->entity;
4885 textStart = ((char *)entity->textPtr) + entity->processed;
4886 textEnd = (char *)(entity->textPtr + entity->textLen);
4888 #ifdef XML_DTD
4889 if (entity->is_param) {
4890 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
4891 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
4892 next, &next, XML_FALSE);
4894 else
4895 #endif /* XML_DTD */
4896 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
4897 textStart, textEnd, &next, XML_FALSE);
4899 if (result != XML_ERROR_NONE)
4900 return result;
4901 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4902 entity->processed = (int)(next - (char *)entity->textPtr);
4903 return result;
4905 else {
4906 entity->open = XML_FALSE;
4907 openInternalEntities = openEntity->next;
4908 /* put openEntity back in list of free instances */
4909 openEntity->next = freeInternalEntities;
4910 freeInternalEntities = openEntity;
4913 #ifdef XML_DTD
4914 if (entity->is_param) {
4915 int tok;
4916 processor = prologProcessor;
4917 tok = XmlPrologTok(encoding, s, end, &next);
4918 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
4919 (XML_Bool)!ps_finalBuffer);
4921 else
4922 #endif /* XML_DTD */
4924 processor = contentProcessor;
4925 /* see externalEntityContentProcessor vs contentProcessor */
4926 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
4927 nextPtr, (XML_Bool)!ps_finalBuffer);
4931 static enum XML_Error PTRCALL
4932 errorProcessor(XML_Parser parser,
4933 const char *s,
4934 const char *end,
4935 const char **nextPtr)
4937 return errorCode;
4940 static enum XML_Error
4941 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4942 const char *ptr, const char *end,
4943 STRING_POOL *pool)
4945 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
4946 end, pool);
4947 if (result)
4948 return result;
4949 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
4950 poolChop(pool);
4951 if (!poolAppendChar(pool, XML_T('\0')))
4952 return XML_ERROR_NO_MEMORY;
4953 return XML_ERROR_NONE;
4956 static enum XML_Error
4957 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4958 const char *ptr, const char *end,
4959 STRING_POOL *pool)
4961 DTD * const dtd = _dtd; /* save one level of indirection */
4962 for (;;) {
4963 const char *next;
4964 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
4965 switch (tok) {
4966 case XML_TOK_NONE:
4967 return XML_ERROR_NONE;
4968 case XML_TOK_INVALID:
4969 if (enc == encoding)
4970 eventPtr = next;
4971 return XML_ERROR_INVALID_TOKEN;
4972 case XML_TOK_PARTIAL:
4973 if (enc == encoding)
4974 eventPtr = ptr;
4975 return XML_ERROR_INVALID_TOKEN;
4976 case XML_TOK_CHAR_REF:
4978 XML_Char buf[XML_ENCODE_MAX];
4979 int i;
4980 int n = XmlCharRefNumber(enc, ptr);
4981 if (n < 0) {
4982 if (enc == encoding)
4983 eventPtr = ptr;
4984 return XML_ERROR_BAD_CHAR_REF;
4986 if (!isCdata
4987 && n == 0x20 /* space */
4988 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
4989 break;
4990 n = XmlEncode(n, (ICHAR *)buf);
4991 if (!n) {
4992 if (enc == encoding)
4993 eventPtr = ptr;
4994 return XML_ERROR_BAD_CHAR_REF;
4996 for (i = 0; i < n; i++) {
4997 if (!poolAppendChar(pool, buf[i]))
4998 return XML_ERROR_NO_MEMORY;
5001 break;
5002 case XML_TOK_DATA_CHARS:
5003 if (!poolAppend(pool, enc, ptr, next))
5004 return XML_ERROR_NO_MEMORY;
5005 break;
5006 case XML_TOK_TRAILING_CR:
5007 next = ptr + enc->minBytesPerChar;
5008 /* fall through */
5009 case XML_TOK_ATTRIBUTE_VALUE_S:
5010 case XML_TOK_DATA_NEWLINE:
5011 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5012 break;
5013 if (!poolAppendChar(pool, 0x20))
5014 return XML_ERROR_NO_MEMORY;
5015 break;
5016 case XML_TOK_ENTITY_REF:
5018 const XML_Char *name;
5019 ENTITY *entity;
5020 char checkEntityDecl;
5021 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5022 ptr + enc->minBytesPerChar,
5023 next - enc->minBytesPerChar);
5024 if (ch) {
5025 if (!poolAppendChar(pool, ch))
5026 return XML_ERROR_NO_MEMORY;
5027 break;
5029 name = poolStoreString(&temp2Pool, enc,
5030 ptr + enc->minBytesPerChar,
5031 next - enc->minBytesPerChar);
5032 if (!name)
5033 return XML_ERROR_NO_MEMORY;
5034 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5035 poolDiscard(&temp2Pool);
5036 /* First, determine if a check for an existing declaration is needed;
5037 if yes, check that the entity exists, and that it is internal.
5039 if (pool == &dtd->pool) /* are we called from prolog? */
5040 checkEntityDecl =
5041 #ifdef XML_DTD
5042 prologState.documentEntity &&
5043 #endif /* XML_DTD */
5044 (dtd->standalone
5045 ? !openInternalEntities
5046 : !dtd->hasParamEntityRefs);
5047 else /* if (pool == &tempPool): we are called from content */
5048 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5049 if (checkEntityDecl) {
5050 if (!entity)
5051 return XML_ERROR_UNDEFINED_ENTITY;
5052 else if (!entity->is_internal)
5053 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5055 else if (!entity) {
5056 /* Cannot report skipped entity here - see comments on
5057 skippedEntityHandler.
5058 if (skippedEntityHandler)
5059 skippedEntityHandler(handlerArg, name, 0);
5061 /* Cannot call the default handler because this would be
5062 out of sync with the call to the startElementHandler.
5063 if ((pool == &tempPool) && defaultHandler)
5064 reportDefault(parser, enc, ptr, next);
5066 break;
5068 if (entity->open) {
5069 if (enc == encoding)
5070 eventPtr = ptr;
5071 return XML_ERROR_RECURSIVE_ENTITY_REF;
5073 if (entity->notation) {
5074 if (enc == encoding)
5075 eventPtr = ptr;
5076 return XML_ERROR_BINARY_ENTITY_REF;
5078 if (!entity->textPtr) {
5079 if (enc == encoding)
5080 eventPtr = ptr;
5081 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5083 else {
5084 enum XML_Error result;
5085 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5086 entity->open = XML_TRUE;
5087 result = appendAttributeValue(parser, internalEncoding, isCdata,
5088 (char *)entity->textPtr,
5089 (char *)textEnd, pool);
5090 entity->open = XML_FALSE;
5091 if (result)
5092 return result;
5095 break;
5096 default:
5097 if (enc == encoding)
5098 eventPtr = ptr;
5099 return XML_ERROR_UNEXPECTED_STATE;
5101 ptr = next;
5103 /* not reached */
5106 static enum XML_Error
5107 storeEntityValue(XML_Parser parser,
5108 const ENCODING *enc,
5109 const char *entityTextPtr,
5110 const char *entityTextEnd)
5112 DTD * const dtd = _dtd; /* save one level of indirection */
5113 STRING_POOL *pool = &(dtd->entityValuePool);
5114 enum XML_Error result = XML_ERROR_NONE;
5115 #ifdef XML_DTD
5116 int oldInEntityValue = prologState.inEntityValue;
5117 prologState.inEntityValue = 1;
5118 #endif /* XML_DTD */
5119 /* never return Null for the value argument in EntityDeclHandler,
5120 since this would indicate an external entity; therefore we
5121 have to make sure that entityValuePool.start is not null */
5122 if (!pool->blocks) {
5123 if (!poolGrow(pool))
5124 return XML_ERROR_NO_MEMORY;
5127 for (;;) {
5128 const char *next;
5129 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5130 switch (tok) {
5131 case XML_TOK_PARAM_ENTITY_REF:
5132 #ifdef XML_DTD
5133 if (isParamEntity || enc != encoding) {
5134 const XML_Char *name;
5135 ENTITY *entity;
5136 name = poolStoreString(&tempPool, enc,
5137 entityTextPtr + enc->minBytesPerChar,
5138 next - enc->minBytesPerChar);
5139 if (!name) {
5140 result = XML_ERROR_NO_MEMORY;
5141 goto endEntityValue;
5143 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5144 poolDiscard(&tempPool);
5145 if (!entity) {
5146 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5147 /* cannot report skipped entity here - see comments on
5148 skippedEntityHandler
5149 if (skippedEntityHandler)
5150 skippedEntityHandler(handlerArg, name, 0);
5152 dtd->keepProcessing = dtd->standalone;
5153 goto endEntityValue;
5155 if (entity->open) {
5156 if (enc == encoding)
5157 eventPtr = entityTextPtr;
5158 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5159 goto endEntityValue;
5161 if (entity->systemId) {
5162 if (externalEntityRefHandler) {
5163 dtd->paramEntityRead = XML_FALSE;
5164 entity->open = XML_TRUE;
5165 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5167 entity->base,
5168 entity->systemId,
5169 entity->publicId)) {
5170 entity->open = XML_FALSE;
5171 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5172 goto endEntityValue;
5174 entity->open = XML_FALSE;
5175 if (!dtd->paramEntityRead)
5176 dtd->keepProcessing = dtd->standalone;
5178 else
5179 dtd->keepProcessing = dtd->standalone;
5181 else {
5182 entity->open = XML_TRUE;
5183 result = storeEntityValue(parser,
5184 internalEncoding,
5185 (char *)entity->textPtr,
5186 (char *)(entity->textPtr
5187 + entity->textLen));
5188 entity->open = XML_FALSE;
5189 if (result)
5190 goto endEntityValue;
5192 break;
5194 #endif /* XML_DTD */
5195 /* In the internal subset, PE references are not legal
5196 within markup declarations, e.g entity values in this case. */
5197 eventPtr = entityTextPtr;
5198 result = XML_ERROR_PARAM_ENTITY_REF;
5199 goto endEntityValue;
5200 case XML_TOK_NONE:
5201 result = XML_ERROR_NONE;
5202 goto endEntityValue;
5203 case XML_TOK_ENTITY_REF:
5204 case XML_TOK_DATA_CHARS:
5205 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5206 result = XML_ERROR_NO_MEMORY;
5207 goto endEntityValue;
5209 break;
5210 case XML_TOK_TRAILING_CR:
5211 next = entityTextPtr + enc->minBytesPerChar;
5212 /* fall through */
5213 case XML_TOK_DATA_NEWLINE:
5214 if (pool->end == pool->ptr && !poolGrow(pool)) {
5215 result = XML_ERROR_NO_MEMORY;
5216 goto endEntityValue;
5218 *(pool->ptr)++ = 0xA;
5219 break;
5220 case XML_TOK_CHAR_REF:
5222 XML_Char buf[XML_ENCODE_MAX];
5223 int i;
5224 int n = XmlCharRefNumber(enc, entityTextPtr);
5225 if (n < 0) {
5226 if (enc == encoding)
5227 eventPtr = entityTextPtr;
5228 result = XML_ERROR_BAD_CHAR_REF;
5229 goto endEntityValue;
5231 n = XmlEncode(n, (ICHAR *)buf);
5232 if (!n) {
5233 if (enc == encoding)
5234 eventPtr = entityTextPtr;
5235 result = XML_ERROR_BAD_CHAR_REF;
5236 goto endEntityValue;
5238 for (i = 0; i < n; i++) {
5239 if (pool->end == pool->ptr && !poolGrow(pool)) {
5240 result = XML_ERROR_NO_MEMORY;
5241 goto endEntityValue;
5243 *(pool->ptr)++ = buf[i];
5246 break;
5247 case XML_TOK_PARTIAL:
5248 if (enc == encoding)
5249 eventPtr = entityTextPtr;
5250 result = XML_ERROR_INVALID_TOKEN;
5251 goto endEntityValue;
5252 case XML_TOK_INVALID:
5253 if (enc == encoding)
5254 eventPtr = next;
5255 result = XML_ERROR_INVALID_TOKEN;
5256 goto endEntityValue;
5257 default:
5258 if (enc == encoding)
5259 eventPtr = entityTextPtr;
5260 result = XML_ERROR_UNEXPECTED_STATE;
5261 goto endEntityValue;
5263 entityTextPtr = next;
5265 endEntityValue:
5266 #ifdef XML_DTD
5267 prologState.inEntityValue = oldInEntityValue;
5268 #endif /* XML_DTD */
5269 return result;
5272 static void FASTCALL
5273 normalizeLines(XML_Char *s)
5275 XML_Char *p;
5276 for (;; s++) {
5277 if (*s == XML_T('\0'))
5278 return;
5279 if (*s == 0xD)
5280 break;
5282 p = s;
5283 do {
5284 if (*s == 0xD) {
5285 *p++ = 0xA;
5286 if (*++s == 0xA)
5287 s++;
5289 else
5290 *p++ = *s++;
5291 } while (*s);
5292 *p = XML_T('\0');
5295 static int
5296 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5297 const char *start, const char *end)
5299 const XML_Char *target;
5300 XML_Char *data;
5301 const char *tem;
5302 if (!processingInstructionHandler) {
5303 if (defaultHandler)
5304 reportDefault(parser, enc, start, end);
5305 return 1;
5307 start += enc->minBytesPerChar * 2;
5308 tem = start + XmlNameLength(enc, start);
5309 target = poolStoreString(&tempPool, enc, start, tem);
5310 if (!target)
5311 return 0;
5312 poolFinish(&tempPool);
5313 data = poolStoreString(&tempPool, enc,
5314 XmlSkipS(enc, tem),
5315 end - enc->minBytesPerChar*2);
5316 if (!data)
5317 return 0;
5318 normalizeLines(data);
5319 processingInstructionHandler(handlerArg, target, data);
5320 poolClear(&tempPool);
5321 return 1;
5324 static int
5325 reportComment(XML_Parser parser, const ENCODING *enc,
5326 const char *start, const char *end)
5328 XML_Char *data;
5329 if (!commentHandler) {
5330 if (defaultHandler)
5331 reportDefault(parser, enc, start, end);
5332 return 1;
5334 data = poolStoreString(&tempPool,
5335 enc,
5336 start + enc->minBytesPerChar * 4,
5337 end - enc->minBytesPerChar * 3);
5338 if (!data)
5339 return 0;
5340 normalizeLines(data);
5341 commentHandler(handlerArg, data);
5342 poolClear(&tempPool);
5343 return 1;
5346 static void
5347 reportDefault(XML_Parser parser, const ENCODING *enc,
5348 const char *s, const char *end)
5350 if (MUST_CONVERT(enc, s)) {
5351 const char **eventPP;
5352 const char **eventEndPP;
5353 if (enc == encoding) {
5354 eventPP = &eventPtr;
5355 eventEndPP = &eventEndPtr;
5357 else {
5358 eventPP = &(openInternalEntities->internalEventPtr);
5359 eventEndPP = &(openInternalEntities->internalEventEndPtr);
5361 do {
5362 ICHAR *dataPtr = (ICHAR *)dataBuf;
5363 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
5364 *eventEndPP = s;
5365 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
5366 *eventPP = s;
5367 } while (s != end);
5369 else
5370 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
5374 static int
5375 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5376 XML_Bool isId, const XML_Char *value, XML_Parser parser)
5378 DEFAULT_ATTRIBUTE *att;
5379 if (value || isId) {
5380 /* The handling of default attributes gets messed up if we have
5381 a default which duplicates a non-default. */
5382 int i;
5383 for (i = 0; i < type->nDefaultAtts; i++)
5384 if (attId == type->defaultAtts[i].id)
5385 return 1;
5386 if (isId && !type->idAtt && !attId->xmlns)
5387 type->idAtt = attId;
5389 if (type->nDefaultAtts == type->allocDefaultAtts) {
5390 if (type->allocDefaultAtts == 0) {
5391 type->allocDefaultAtts = 8;
5392 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
5393 * sizeof(DEFAULT_ATTRIBUTE));
5394 if (!type->defaultAtts)
5395 return 0;
5397 else {
5398 DEFAULT_ATTRIBUTE *temp;
5399 int count = type->allocDefaultAtts * 2;
5400 temp = (DEFAULT_ATTRIBUTE *)
5401 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
5402 if (temp == NULL)
5403 return 0;
5404 type->allocDefaultAtts = count;
5405 type->defaultAtts = temp;
5408 att = type->defaultAtts + type->nDefaultAtts;
5409 att->id = attId;
5410 att->value = value;
5411 att->isCdata = isCdata;
5412 if (!isCdata)
5413 attId->maybeTokenized = XML_TRUE;
5414 type->nDefaultAtts += 1;
5415 return 1;
5418 static int
5419 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
5421 DTD * const dtd = _dtd; /* save one level of indirection */
5422 const XML_Char *name;
5423 for (name = elementType->name; *name; name++) {
5424 if (*name == XML_T(ASCII_COLON)) {
5425 PREFIX *prefix;
5426 const XML_Char *s;
5427 for (s = elementType->name; s != name; s++) {
5428 if (!poolAppendChar(&dtd->pool, *s))
5429 return 0;
5431 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5432 return 0;
5433 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
5434 sizeof(PREFIX));
5435 if (!prefix)
5436 return 0;
5437 if (prefix->name == poolStart(&dtd->pool))
5438 poolFinish(&dtd->pool);
5439 else
5440 poolDiscard(&dtd->pool);
5441 elementType->prefix = prefix;
5445 return 1;
5448 static ATTRIBUTE_ID *
5449 getAttributeId(XML_Parser parser, const ENCODING *enc,
5450 const char *start, const char *end)
5452 DTD * const dtd = _dtd; /* save one level of indirection */
5453 ATTRIBUTE_ID *id;
5454 const XML_Char *name;
5455 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5456 return NULL;
5457 name = poolStoreString(&dtd->pool, enc, start, end);
5458 if (!name)
5459 return NULL;
5460 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
5461 ++name;
5462 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
5463 if (!id)
5464 return NULL;
5465 if (id->name != name)
5466 poolDiscard(&dtd->pool);
5467 else {
5468 poolFinish(&dtd->pool);
5469 if (!ns)
5471 else if (name[0] == XML_T(ASCII_x)
5472 && name[1] == XML_T(ASCII_m)
5473 && name[2] == XML_T(ASCII_l)
5474 && name[3] == XML_T(ASCII_n)
5475 && name[4] == XML_T(ASCII_s)
5476 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
5477 if (name[5] == XML_T('\0'))
5478 id->prefix = &dtd->defaultPrefix;
5479 else
5480 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
5481 id->xmlns = XML_TRUE;
5483 else {
5484 int i;
5485 for (i = 0; name[i]; i++) {
5486 /* attributes without prefix are *not* in the default namespace */
5487 if (name[i] == XML_T(ASCII_COLON)) {
5488 int j;
5489 for (j = 0; j < i; j++) {
5490 if (!poolAppendChar(&dtd->pool, name[j]))
5491 return NULL;
5493 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5494 return NULL;
5495 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
5496 sizeof(PREFIX));
5497 if (id->prefix->name == poolStart(&dtd->pool))
5498 poolFinish(&dtd->pool);
5499 else
5500 poolDiscard(&dtd->pool);
5501 break;
5506 return id;
5509 #define CONTEXT_SEP XML_T(ASCII_FF)
5511 static const XML_Char *
5512 getContext(XML_Parser parser)
5514 DTD * const dtd = _dtd; /* save one level of indirection */
5515 HASH_TABLE_ITER iter;
5516 XML_Bool needSep = XML_FALSE;
5518 if (dtd->defaultPrefix.binding) {
5519 int i;
5520 int len;
5521 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
5522 return NULL;
5523 len = dtd->defaultPrefix.binding->uriLen;
5524 if (namespaceSeparator)
5525 len--;
5526 for (i = 0; i < len; i++)
5527 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
5528 return NULL;
5529 needSep = XML_TRUE;
5532 hashTableIterInit(&iter, &(dtd->prefixes));
5533 for (;;) {
5534 int i;
5535 int len;
5536 const XML_Char *s;
5537 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5538 if (!prefix)
5539 break;
5540 if (!prefix->binding)
5541 continue;
5542 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
5543 return NULL;
5544 for (s = prefix->name; *s; s++)
5545 if (!poolAppendChar(&tempPool, *s))
5546 return NULL;
5547 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
5548 return NULL;
5549 len = prefix->binding->uriLen;
5550 if (namespaceSeparator)
5551 len--;
5552 for (i = 0; i < len; i++)
5553 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
5554 return NULL;
5555 needSep = XML_TRUE;
5559 hashTableIterInit(&iter, &(dtd->generalEntities));
5560 for (;;) {
5561 const XML_Char *s;
5562 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5563 if (!e)
5564 break;
5565 if (!e->open)
5566 continue;
5567 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
5568 return NULL;
5569 for (s = e->name; *s; s++)
5570 if (!poolAppendChar(&tempPool, *s))
5571 return 0;
5572 needSep = XML_TRUE;
5575 if (!poolAppendChar(&tempPool, XML_T('\0')))
5576 return NULL;
5577 return tempPool.start;
5580 static XML_Bool
5581 setContext(XML_Parser parser, const XML_Char *context)
5583 DTD * const dtd = _dtd; /* save one level of indirection */
5584 const XML_Char *s = context;
5586 while (*context != XML_T('\0')) {
5587 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5588 ENTITY *e;
5589 if (!poolAppendChar(&tempPool, XML_T('\0')))
5590 return XML_FALSE;
5591 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
5592 if (e)
5593 e->open = XML_TRUE;
5594 if (*s != XML_T('\0'))
5595 s++;
5596 context = s;
5597 poolDiscard(&tempPool);
5599 else if (*s == XML_T(ASCII_EQUALS)) {
5600 PREFIX *prefix;
5601 if (poolLength(&tempPool) == 0)
5602 prefix = &dtd->defaultPrefix;
5603 else {
5604 if (!poolAppendChar(&tempPool, XML_T('\0')))
5605 return XML_FALSE;
5606 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
5607 sizeof(PREFIX));
5608 if (!prefix)
5609 return XML_FALSE;
5610 if (prefix->name == poolStart(&tempPool)) {
5611 prefix->name = poolCopyString(&dtd->pool, prefix->name);
5612 if (!prefix->name)
5613 return XML_FALSE;
5615 poolDiscard(&tempPool);
5617 for (context = s + 1;
5618 *context != CONTEXT_SEP && *context != XML_T('\0');
5619 context++)
5620 if (!poolAppendChar(&tempPool, *context))
5621 return XML_FALSE;
5622 if (!poolAppendChar(&tempPool, XML_T('\0')))
5623 return XML_FALSE;
5624 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
5625 &inheritedBindings) != XML_ERROR_NONE)
5626 return XML_FALSE;
5627 poolDiscard(&tempPool);
5628 if (*context != XML_T('\0'))
5629 ++context;
5630 s = context;
5632 else {
5633 if (!poolAppendChar(&tempPool, *s))
5634 return XML_FALSE;
5635 s++;
5638 return XML_TRUE;
5641 static void FASTCALL
5642 normalizePublicId(XML_Char *publicId)
5644 XML_Char *p = publicId;
5645 XML_Char *s;
5646 for (s = publicId; *s; s++) {
5647 switch (*s) {
5648 case 0x20:
5649 case 0xD:
5650 case 0xA:
5651 if (p != publicId && p[-1] != 0x20)
5652 *p++ = 0x20;
5653 break;
5654 default:
5655 *p++ = *s;
5658 if (p != publicId && p[-1] == 0x20)
5659 --p;
5660 *p = XML_T('\0');
5663 static DTD *
5664 dtdCreate(const XML_Memory_Handling_Suite *ms)
5666 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
5667 if (p == NULL)
5668 return p;
5669 poolInit(&(p->pool), ms);
5670 poolInit(&(p->entityValuePool), ms);
5671 hashTableInit(&(p->generalEntities), ms);
5672 hashTableInit(&(p->elementTypes), ms);
5673 hashTableInit(&(p->attributeIds), ms);
5674 hashTableInit(&(p->prefixes), ms);
5675 #ifdef XML_DTD
5676 p->paramEntityRead = XML_FALSE;
5677 hashTableInit(&(p->paramEntities), ms);
5678 #endif /* XML_DTD */
5679 p->defaultPrefix.name = NULL;
5680 p->defaultPrefix.binding = NULL;
5682 p->in_eldecl = XML_FALSE;
5683 p->scaffIndex = NULL;
5684 p->scaffold = NULL;
5685 p->scaffLevel = 0;
5686 p->scaffSize = 0;
5687 p->scaffCount = 0;
5688 p->contentStringLen = 0;
5690 p->keepProcessing = XML_TRUE;
5691 p->hasParamEntityRefs = XML_FALSE;
5692 p->standalone = XML_FALSE;
5693 return p;
5696 static void
5697 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
5699 HASH_TABLE_ITER iter;
5700 hashTableIterInit(&iter, &(p->elementTypes));
5701 for (;;) {
5702 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5703 if (!e)
5704 break;
5705 if (e->allocDefaultAtts != 0)
5706 ms->free_fcn(e->defaultAtts);
5708 hashTableClear(&(p->generalEntities));
5709 #ifdef XML_DTD
5710 p->paramEntityRead = XML_FALSE;
5711 hashTableClear(&(p->paramEntities));
5712 #endif /* XML_DTD */
5713 hashTableClear(&(p->elementTypes));
5714 hashTableClear(&(p->attributeIds));
5715 hashTableClear(&(p->prefixes));
5716 poolClear(&(p->pool));
5717 poolClear(&(p->entityValuePool));
5718 p->defaultPrefix.name = NULL;
5719 p->defaultPrefix.binding = NULL;
5721 p->in_eldecl = XML_FALSE;
5723 ms->free_fcn(p->scaffIndex);
5724 p->scaffIndex = NULL;
5725 ms->free_fcn(p->scaffold);
5726 p->scaffold = NULL;
5728 p->scaffLevel = 0;
5729 p->scaffSize = 0;
5730 p->scaffCount = 0;
5731 p->contentStringLen = 0;
5733 p->keepProcessing = XML_TRUE;
5734 p->hasParamEntityRefs = XML_FALSE;
5735 p->standalone = XML_FALSE;
5738 static void
5739 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
5741 HASH_TABLE_ITER iter;
5742 hashTableIterInit(&iter, &(p->elementTypes));
5743 for (;;) {
5744 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5745 if (!e)
5746 break;
5747 if (e->allocDefaultAtts != 0)
5748 ms->free_fcn(e->defaultAtts);
5750 hashTableDestroy(&(p->generalEntities));
5751 #ifdef XML_DTD
5752 hashTableDestroy(&(p->paramEntities));
5753 #endif /* XML_DTD */
5754 hashTableDestroy(&(p->elementTypes));
5755 hashTableDestroy(&(p->attributeIds));
5756 hashTableDestroy(&(p->prefixes));
5757 poolDestroy(&(p->pool));
5758 poolDestroy(&(p->entityValuePool));
5759 if (isDocEntity) {
5760 ms->free_fcn(p->scaffIndex);
5761 ms->free_fcn(p->scaffold);
5763 ms->free_fcn(p);
5766 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
5767 The new DTD has already been initialized.
5769 static int
5770 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
5772 HASH_TABLE_ITER iter;
5774 /* Copy the prefix table. */
5776 hashTableIterInit(&iter, &(oldDtd->prefixes));
5777 for (;;) {
5778 const XML_Char *name;
5779 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
5780 if (!oldP)
5781 break;
5782 name = poolCopyString(&(newDtd->pool), oldP->name);
5783 if (!name)
5784 return 0;
5785 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
5786 return 0;
5789 hashTableIterInit(&iter, &(oldDtd->attributeIds));
5791 /* Copy the attribute id table. */
5793 for (;;) {
5794 ATTRIBUTE_ID *newA;
5795 const XML_Char *name;
5796 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
5798 if (!oldA)
5799 break;
5800 /* Remember to allocate the scratch byte before the name. */
5801 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
5802 return 0;
5803 name = poolCopyString(&(newDtd->pool), oldA->name);
5804 if (!name)
5805 return 0;
5806 ++name;
5807 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
5808 sizeof(ATTRIBUTE_ID));
5809 if (!newA)
5810 return 0;
5811 newA->maybeTokenized = oldA->maybeTokenized;
5812 if (oldA->prefix) {
5813 newA->xmlns = oldA->xmlns;
5814 if (oldA->prefix == &oldDtd->defaultPrefix)
5815 newA->prefix = &newDtd->defaultPrefix;
5816 else
5817 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
5818 oldA->prefix->name, 0);
5822 /* Copy the element type table. */
5824 hashTableIterInit(&iter, &(oldDtd->elementTypes));
5826 for (;;) {
5827 int i;
5828 ELEMENT_TYPE *newE;
5829 const XML_Char *name;
5830 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5831 if (!oldE)
5832 break;
5833 name = poolCopyString(&(newDtd->pool), oldE->name);
5834 if (!name)
5835 return 0;
5836 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
5837 sizeof(ELEMENT_TYPE));
5838 if (!newE)
5839 return 0;
5840 if (oldE->nDefaultAtts) {
5841 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
5842 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5843 if (!newE->defaultAtts) {
5844 ms->free_fcn(newE);
5845 return 0;
5848 if (oldE->idAtt)
5849 newE->idAtt = (ATTRIBUTE_ID *)
5850 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
5851 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
5852 if (oldE->prefix)
5853 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
5854 oldE->prefix->name, 0);
5855 for (i = 0; i < newE->nDefaultAtts; i++) {
5856 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
5857 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
5858 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
5859 if (oldE->defaultAtts[i].value) {
5860 newE->defaultAtts[i].value
5861 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
5862 if (!newE->defaultAtts[i].value)
5863 return 0;
5865 else
5866 newE->defaultAtts[i].value = NULL;
5870 /* Copy the entity tables. */
5871 if (!copyEntityTable(oldParser,
5872 &(newDtd->generalEntities),
5873 &(newDtd->pool),
5874 &(oldDtd->generalEntities)))
5875 return 0;
5877 #ifdef XML_DTD
5878 if (!copyEntityTable(oldParser,
5879 &(newDtd->paramEntities),
5880 &(newDtd->pool),
5881 &(oldDtd->paramEntities)))
5882 return 0;
5883 newDtd->paramEntityRead = oldDtd->paramEntityRead;
5884 #endif /* XML_DTD */
5886 newDtd->keepProcessing = oldDtd->keepProcessing;
5887 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
5888 newDtd->standalone = oldDtd->standalone;
5890 /* Don't want deep copying for scaffolding */
5891 newDtd->in_eldecl = oldDtd->in_eldecl;
5892 newDtd->scaffold = oldDtd->scaffold;
5893 newDtd->contentStringLen = oldDtd->contentStringLen;
5894 newDtd->scaffSize = oldDtd->scaffSize;
5895 newDtd->scaffLevel = oldDtd->scaffLevel;
5896 newDtd->scaffIndex = oldDtd->scaffIndex;
5898 return 1;
5899 } /* End dtdCopy */
5901 static int
5902 copyEntityTable(XML_Parser oldParser,
5903 HASH_TABLE *newTable,
5904 STRING_POOL *newPool,
5905 const HASH_TABLE *oldTable)
5907 HASH_TABLE_ITER iter;
5908 const XML_Char *cachedOldBase = NULL;
5909 const XML_Char *cachedNewBase = NULL;
5911 hashTableIterInit(&iter, oldTable);
5913 for (;;) {
5914 ENTITY *newE;
5915 const XML_Char *name;
5916 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
5917 if (!oldE)
5918 break;
5919 name = poolCopyString(newPool, oldE->name);
5920 if (!name)
5921 return 0;
5922 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
5923 if (!newE)
5924 return 0;
5925 if (oldE->systemId) {
5926 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
5927 if (!tem)
5928 return 0;
5929 newE->systemId = tem;
5930 if (oldE->base) {
5931 if (oldE->base == cachedOldBase)
5932 newE->base = cachedNewBase;
5933 else {
5934 cachedOldBase = oldE->base;
5935 tem = poolCopyString(newPool, cachedOldBase);
5936 if (!tem)
5937 return 0;
5938 cachedNewBase = newE->base = tem;
5941 if (oldE->publicId) {
5942 tem = poolCopyString(newPool, oldE->publicId);
5943 if (!tem)
5944 return 0;
5945 newE->publicId = tem;
5948 else {
5949 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
5950 oldE->textLen);
5951 if (!tem)
5952 return 0;
5953 newE->textPtr = tem;
5954 newE->textLen = oldE->textLen;
5956 if (oldE->notation) {
5957 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
5958 if (!tem)
5959 return 0;
5960 newE->notation = tem;
5962 newE->is_param = oldE->is_param;
5963 newE->is_internal = oldE->is_internal;
5965 return 1;
5968 #define INIT_POWER 6
5970 static XML_Bool FASTCALL
5971 keyeq(KEY s1, KEY s2)
5973 for (; *s1 == *s2; s1++, s2++)
5974 if (*s1 == 0)
5975 return XML_TRUE;
5976 return XML_FALSE;
5979 static unsigned long FASTCALL
5980 hash(XML_Parser parser, KEY s)
5982 unsigned long h = hash_secret_salt;
5983 while (*s)
5984 h = CHAR_HASH(h, *s++);
5985 return h;
5988 static NAMED *
5989 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
5991 size_t i;
5992 if (table->size == 0) {
5993 size_t tsize;
5994 if (!createSize)
5995 return NULL;
5996 table->power = INIT_POWER;
5997 /* table->size is a power of 2 */
5998 table->size = (size_t)1 << INIT_POWER;
5999 tsize = table->size * sizeof(NAMED *);
6000 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6001 if (!table->v) {
6002 table->size = 0;
6003 return NULL;
6005 memset(table->v, 0, tsize);
6006 i = hash(parser, name) & ((unsigned long)table->size - 1);
6008 else {
6009 unsigned long h = hash(parser, name);
6010 unsigned long mask = (unsigned long)table->size - 1;
6011 unsigned char step = 0;
6012 i = h & mask;
6013 while (table->v[i]) {
6014 if (keyeq(name, table->v[i]->name))
6015 return table->v[i];
6016 if (!step)
6017 step = PROBE_STEP(h, mask, table->power);
6018 i < step ? (i += table->size - step) : (i -= step);
6020 if (!createSize)
6021 return NULL;
6023 /* check for overflow (table is half full) */
6024 if (table->used >> (table->power - 1)) {
6025 unsigned char newPower = table->power + 1;
6026 size_t newSize = (size_t)1 << newPower;
6027 unsigned long newMask = (unsigned long)newSize - 1;
6028 size_t tsize = newSize * sizeof(NAMED *);
6029 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6030 if (!newV)
6031 return NULL;
6032 memset(newV, 0, tsize);
6033 for (i = 0; i < table->size; i++)
6034 if (table->v[i]) {
6035 unsigned long newHash = hash(parser, table->v[i]->name);
6036 size_t j = newHash & newMask;
6037 step = 0;
6038 while (newV[j]) {
6039 if (!step)
6040 step = PROBE_STEP(newHash, newMask, newPower);
6041 j < step ? (j += newSize - step) : (j -= step);
6043 newV[j] = table->v[i];
6045 table->mem->free_fcn(table->v);
6046 table->v = newV;
6047 table->power = newPower;
6048 table->size = newSize;
6049 i = h & newMask;
6050 step = 0;
6051 while (table->v[i]) {
6052 if (!step)
6053 step = PROBE_STEP(h, newMask, newPower);
6054 i < step ? (i += newSize - step) : (i -= step);
6058 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6059 if (!table->v[i])
6060 return NULL;
6061 memset(table->v[i], 0, createSize);
6062 table->v[i]->name = name;
6063 (table->used)++;
6064 return table->v[i];
6067 static void FASTCALL
6068 hashTableClear(HASH_TABLE *table)
6070 size_t i;
6071 for (i = 0; i < table->size; i++) {
6072 table->mem->free_fcn(table->v[i]);
6073 table->v[i] = NULL;
6075 table->used = 0;
6078 static void FASTCALL
6079 hashTableDestroy(HASH_TABLE *table)
6081 size_t i;
6082 for (i = 0; i < table->size; i++)
6083 table->mem->free_fcn(table->v[i]);
6084 table->mem->free_fcn(table->v);
6087 static void FASTCALL
6088 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
6090 p->power = 0;
6091 p->size = 0;
6092 p->used = 0;
6093 p->v = NULL;
6094 p->mem = ms;
6097 static void FASTCALL
6098 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
6100 iter->p = table->v;
6101 iter->end = iter->p + table->size;
6104 static NAMED * FASTCALL
6105 hashTableIterNext(HASH_TABLE_ITER *iter)
6107 while (iter->p != iter->end) {
6108 NAMED *tem = *(iter->p)++;
6109 if (tem)
6110 return tem;
6112 return NULL;
6115 static void FASTCALL
6116 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
6118 pool->blocks = NULL;
6119 pool->freeBlocks = NULL;
6120 pool->start = NULL;
6121 pool->ptr = NULL;
6122 pool->end = NULL;
6123 pool->mem = ms;
6126 static void FASTCALL
6127 poolClear(STRING_POOL *pool)
6129 if (!pool->freeBlocks)
6130 pool->freeBlocks = pool->blocks;
6131 else {
6132 BLOCK *p = pool->blocks;
6133 while (p) {
6134 BLOCK *tem = p->next;
6135 p->next = pool->freeBlocks;
6136 pool->freeBlocks = p;
6137 p = tem;
6140 pool->blocks = NULL;
6141 pool->start = NULL;
6142 pool->ptr = NULL;
6143 pool->end = NULL;
6146 static void FASTCALL
6147 poolDestroy(STRING_POOL *pool)
6149 BLOCK *p = pool->blocks;
6150 while (p) {
6151 BLOCK *tem = p->next;
6152 pool->mem->free_fcn(p);
6153 p = tem;
6155 p = pool->freeBlocks;
6156 while (p) {
6157 BLOCK *tem = p->next;
6158 pool->mem->free_fcn(p);
6159 p = tem;
6163 static XML_Char *
6164 poolAppend(STRING_POOL *pool, const ENCODING *enc,
6165 const char *ptr, const char *end)
6167 if (!pool->ptr && !poolGrow(pool))
6168 return NULL;
6169 for (;;) {
6170 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6171 if (ptr == end)
6172 break;
6173 if (!poolGrow(pool))
6174 return NULL;
6176 return pool->start;
6179 static const XML_Char * FASTCALL
6180 poolCopyString(STRING_POOL *pool, const XML_Char *s)
6182 do {
6183 if (!poolAppendChar(pool, *s))
6184 return NULL;
6185 } while (*s++);
6186 s = pool->start;
6187 poolFinish(pool);
6188 return s;
6191 static const XML_Char *
6192 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
6194 if (!pool->ptr && !poolGrow(pool))
6195 return NULL;
6196 for (; n > 0; --n, s++) {
6197 if (!poolAppendChar(pool, *s))
6198 return NULL;
6200 s = pool->start;
6201 poolFinish(pool);
6202 return s;
6205 static const XML_Char * FASTCALL
6206 poolAppendString(STRING_POOL *pool, const XML_Char *s)
6208 while (*s) {
6209 if (!poolAppendChar(pool, *s))
6210 return NULL;
6211 s++;
6213 return pool->start;
6216 static XML_Char *
6217 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6218 const char *ptr, const char *end)
6220 if (!poolAppend(pool, enc, ptr, end))
6221 return NULL;
6222 if (pool->ptr == pool->end && !poolGrow(pool))
6223 return NULL;
6224 *(pool->ptr)++ = 0;
6225 return pool->start;
6228 static XML_Bool FASTCALL
6229 poolGrow(STRING_POOL *pool)
6231 if (pool->freeBlocks) {
6232 if (pool->start == 0) {
6233 pool->blocks = pool->freeBlocks;
6234 pool->freeBlocks = pool->freeBlocks->next;
6235 pool->blocks->next = NULL;
6236 pool->start = pool->blocks->s;
6237 pool->end = pool->start + pool->blocks->size;
6238 pool->ptr = pool->start;
6239 return XML_TRUE;
6241 if (pool->end - pool->start < pool->freeBlocks->size) {
6242 BLOCK *tem = pool->freeBlocks->next;
6243 pool->freeBlocks->next = pool->blocks;
6244 pool->blocks = pool->freeBlocks;
6245 pool->freeBlocks = tem;
6246 memcpy(pool->blocks->s, pool->start,
6247 (pool->end - pool->start) * sizeof(XML_Char));
6248 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6249 pool->start = pool->blocks->s;
6250 pool->end = pool->start + pool->blocks->size;
6251 return XML_TRUE;
6254 if (pool->blocks && pool->start == pool->blocks->s) {
6255 int blockSize = (int)(pool->end - pool->start)*2;
6256 BLOCK *temp = (BLOCK *)
6257 pool->mem->realloc_fcn(pool->blocks,
6258 (offsetof(BLOCK, s)
6259 + blockSize * sizeof(XML_Char)));
6260 if (temp == NULL)
6261 return XML_FALSE;
6262 pool->blocks = temp;
6263 pool->blocks->size = blockSize;
6264 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6265 pool->start = pool->blocks->s;
6266 pool->end = pool->start + blockSize;
6268 else {
6269 BLOCK *tem;
6270 int blockSize = (int)(pool->end - pool->start);
6271 if (blockSize < INIT_BLOCK_SIZE)
6272 blockSize = INIT_BLOCK_SIZE;
6273 else
6274 blockSize *= 2;
6275 tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s)
6276 + blockSize * sizeof(XML_Char));
6277 if (!tem)
6278 return XML_FALSE;
6279 tem->size = blockSize;
6280 tem->next = pool->blocks;
6281 pool->blocks = tem;
6282 if (pool->ptr != pool->start)
6283 memcpy(tem->s, pool->start,
6284 (pool->ptr - pool->start) * sizeof(XML_Char));
6285 pool->ptr = tem->s + (pool->ptr - pool->start);
6286 pool->start = tem->s;
6287 pool->end = tem->s + blockSize;
6289 return XML_TRUE;
6292 static int FASTCALL
6293 nextScaffoldPart(XML_Parser parser)
6295 DTD * const dtd = _dtd; /* save one level of indirection */
6296 CONTENT_SCAFFOLD * me;
6297 int next;
6299 if (!dtd->scaffIndex) {
6300 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
6301 if (!dtd->scaffIndex)
6302 return -1;
6303 dtd->scaffIndex[0] = 0;
6306 if (dtd->scaffCount >= dtd->scaffSize) {
6307 CONTENT_SCAFFOLD *temp;
6308 if (dtd->scaffold) {
6309 temp = (CONTENT_SCAFFOLD *)
6310 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6311 if (temp == NULL)
6312 return -1;
6313 dtd->scaffSize *= 2;
6315 else {
6316 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
6317 * sizeof(CONTENT_SCAFFOLD));
6318 if (temp == NULL)
6319 return -1;
6320 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
6322 dtd->scaffold = temp;
6324 next = dtd->scaffCount++;
6325 me = &dtd->scaffold[next];
6326 if (dtd->scaffLevel) {
6327 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
6328 if (parent->lastchild) {
6329 dtd->scaffold[parent->lastchild].nextsib = next;
6331 if (!parent->childcnt)
6332 parent->firstchild = next;
6333 parent->lastchild = next;
6334 parent->childcnt++;
6336 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6337 return next;
6340 static void
6341 build_node(XML_Parser parser,
6342 int src_node,
6343 XML_Content *dest,
6344 XML_Content **contpos,
6345 XML_Char **strpos)
6347 DTD * const dtd = _dtd; /* save one level of indirection */
6348 dest->type = dtd->scaffold[src_node].type;
6349 dest->quant = dtd->scaffold[src_node].quant;
6350 if (dest->type == XML_CTYPE_NAME) {
6351 const XML_Char *src;
6352 dest->name = *strpos;
6353 src = dtd->scaffold[src_node].name;
6354 for (;;) {
6355 *(*strpos)++ = *src;
6356 if (!*src)
6357 break;
6358 src++;
6360 dest->numchildren = 0;
6361 dest->children = NULL;
6363 else {
6364 unsigned int i;
6365 int cn;
6366 dest->numchildren = dtd->scaffold[src_node].childcnt;
6367 dest->children = *contpos;
6368 *contpos += dest->numchildren;
6369 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
6370 i < dest->numchildren;
6371 i++, cn = dtd->scaffold[cn].nextsib) {
6372 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6374 dest->name = NULL;
6378 static XML_Content *
6379 build_model (XML_Parser parser)
6381 DTD * const dtd = _dtd; /* save one level of indirection */
6382 XML_Content *ret;
6383 XML_Content *cpos;
6384 XML_Char * str;
6385 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6386 + (dtd->contentStringLen * sizeof(XML_Char)));
6388 ret = (XML_Content *)MALLOC(allocsize);
6389 if (!ret)
6390 return NULL;
6392 str = (XML_Char *) (&ret[dtd->scaffCount]);
6393 cpos = &ret[1];
6395 build_node(parser, 0, ret, &cpos, &str);
6396 return ret;
6399 static ELEMENT_TYPE *
6400 getElementType(XML_Parser parser,
6401 const ENCODING *enc,
6402 const char *ptr,
6403 const char *end)
6405 DTD * const dtd = _dtd; /* save one level of indirection */
6406 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
6407 ELEMENT_TYPE *ret;
6409 if (!name)
6410 return NULL;
6411 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
6412 if (!ret)
6413 return NULL;
6414 if (ret->name != name)
6415 poolDiscard(&dtd->pool);
6416 else {
6417 poolFinish(&dtd->pool);
6418 if (!setElementTypePrefix(parser, ret))
6419 return NULL;
6421 return ret;