dsrc isn't necessary for this repo
[client-tools.git] / src / external / 3rd / library / libxml / parser.c
blob41e4caf1fc2c263c2bf9885d8d64f299c2a6f47b
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 #define IN_LIBXML
34 #include "libxml.h"
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
42 #include <stdlib.h>
43 #include <string.h>
44 #include <libxml/xmlmemory.h>
45 #include <libxml/threads.h>
46 #include <libxml/globals.h>
47 #include <libxml/tree.h>
48 #include <libxml/parser.h>
49 #include <libxml/parserInternals.h>
50 #include <libxml/valid.h>
51 #include <libxml/entities.h>
52 #include <libxml/xmlerror.h>
53 #include <libxml/encoding.h>
54 #include <libxml/xmlIO.h>
55 #include <libxml/uri.h>
56 #ifdef LIBXML_CATALOG_ENABLED
57 #include <libxml/catalog.h>
58 #endif
60 #ifdef HAVE_CTYPE_H
61 #include <ctype.h>
62 #endif
63 #ifdef HAVE_STDLIB_H
64 #include <stdlib.h>
65 #endif
66 #ifdef HAVE_SYS_STAT_H
67 #include <sys/stat.h>
68 #endif
69 #ifdef HAVE_FCNTL_H
70 #include <fcntl.h>
71 #endif
72 #ifdef HAVE_UNISTD_H
73 #include <unistd.h>
74 #endif
75 #ifdef HAVE_ZLIB_H
76 #include <zlib.h>
77 #endif
80 #define XML_PARSER_BIG_BUFFER_SIZE 300
81 #define XML_PARSER_BUFFER_SIZE 100
83 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
86 * List of XML prefixed PI allowed by W3C specs
89 static const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
94 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
98 static int
99 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
101 void *user_data, int depth, const xmlChar *URL,
102 const xmlChar *ID, xmlNodePtr *list);
104 static void
105 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
108 static int
109 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
111 /************************************************************************
113 * Parser stacks related functions and macros *
115 ************************************************************************/
117 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
121 * Generic function for accessing stacks in the Parser Context
124 #define PUSH_AND_POP(scope, type, name) \
125 scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
126 if (ctxt->name##Nr >= ctxt->name##Max) { \
127 ctxt->name##Max *= 2; \
128 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
129 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
130 if (ctxt->name##Tab == NULL) { \
131 xmlGenericError(xmlGenericErrorContext, \
132 "realloc failed !\n"); \
133 return(0); \
136 ctxt->name##Tab[ctxt->name##Nr] = value; \
137 ctxt->name = value; \
138 return(ctxt->name##Nr++); \
140 scope type name##Pop(xmlParserCtxtPtr ctxt) { \
141 type ret; \
142 if (ctxt->name##Nr <= 0) return(0); \
143 ctxt->name##Nr--; \
144 if (ctxt->name##Nr > 0) \
145 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
146 else \
147 ctxt->name = NULL; \
148 ret = ctxt->name##Tab[ctxt->name##Nr]; \
149 ctxt->name##Tab[ctxt->name##Nr] = 0; \
150 return(ret); \
154 * inputPop:
155 * @ctxt: an XML parser context
157 * Pops the top parser input from the input stack
159 * Returns the input just removed
162 * inputPush:
163 * @ctxt: an XML parser context
164 * @value: the parser input
166 * Pushes a new parser input on top of the input stack
168 * Returns 0 in case of error, the index in the stack otherwise
171 * namePop:
172 * @ctxt: an XML parser context
174 * Pops the top element name from the name stack
176 * Returns the name just removed
179 * namePush:
180 * @ctxt: an XML parser context
181 * @value: the element name
183 * Pushes a new element name on top of the name stack
185 * Returns 0 in case of error, the index in the stack otherwise
188 * nodePop:
189 * @ctxt: an XML parser context
191 * Pops the top element node from the node stack
193 * Returns the node just removed
196 * nodePush:
197 * @ctxt: an XML parser context
198 * @value: the element node
200 * Pushes a new element node on top of the node stack
202 * Returns 0 in case of error, the index in the stack otherwise
205 * Those macros actually generate the functions
207 PUSH_AND_POP(extern, xmlParserInputPtr, input)
208 PUSH_AND_POP(extern, xmlNodePtr, node)
209 PUSH_AND_POP(extern, xmlChar*, name)
211 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
212 if (ctxt->spaceNr >= ctxt->spaceMax) {
213 ctxt->spaceMax *= 2;
214 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
215 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
216 if (ctxt->spaceTab == NULL) {
217 xmlGenericError(xmlGenericErrorContext,
218 "realloc failed !\n");
219 return(0);
222 ctxt->spaceTab[ctxt->spaceNr] = val;
223 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
224 return(ctxt->spaceNr++);
227 static int spacePop(xmlParserCtxtPtr ctxt) {
228 int ret;
229 if (ctxt->spaceNr <= 0) return(0);
230 ctxt->spaceNr--;
231 if (ctxt->spaceNr > 0)
232 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
233 else
234 ctxt->space = NULL;
235 ret = ctxt->spaceTab[ctxt->spaceNr];
236 ctxt->spaceTab[ctxt->spaceNr] = -1;
237 return(ret);
241 * Macros for accessing the content. Those should be used only by the parser,
242 * and not exported.
244 * Dirty macros, i.e. one often need to make assumption on the context to
245 * use them
247 * CUR_PTR return the current pointer to the xmlChar to be parsed.
248 * To be used with extreme caution since operations consuming
249 * characters may move the input buffer to a different location !
250 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
251 * This should be used internally by the parser
252 * only to compare to ASCII values otherwise it would break when
253 * running with UTF-8 encoding.
254 * RAW same as CUR but in the input buffer, bypass any token
255 * extraction that may have been done
256 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
257 * to compare on ASCII based substring.
258 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
259 * strings within the parser.
261 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
263 * NEXT Skip to the next character, this does the proper decoding
264 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
265 * NEXTL(l) Skip l xmlChar in the input buffer
266 * CUR_CHAR(l) returns the current unicode character (int), set l
267 * to the number of xmlChars used for the encoding [0-5].
268 * CUR_SCHAR same but operate on a string instead of the context
269 * COPY_BUF copy the current unicode char to the target buffer, increment
270 * the index
271 * GROW, SHRINK handling of input buffers
274 #define RAW (*ctxt->input->cur)
275 #define CUR (*ctxt->input->cur)
276 #define NXT(val) ctxt->input->cur[(val)]
277 #define CUR_PTR ctxt->input->cur
279 #define SKIP(val) do { \
280 ctxt->nbChars += (val),ctxt->input->cur += (val); \
281 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
282 if ((*ctxt->input->cur == 0) && \
283 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
284 xmlPopInput(ctxt); \
285 } while (0)
287 #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
288 xmlSHRINK (ctxt);
290 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
291 xmlParserInputShrink(ctxt->input);
292 if ((*ctxt->input->cur == 0) &&
293 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
294 xmlPopInput(ctxt);
297 #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
298 xmlGROW (ctxt);
300 static void xmlGROW (xmlParserCtxtPtr ctxt) {
301 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
302 if ((*ctxt->input->cur == 0) &&
303 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
304 xmlPopInput(ctxt);
307 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
309 #define NEXT xmlNextChar(ctxt)
311 #define NEXT1 { \
312 ctxt->input->cur++; \
313 ctxt->nbChars++; \
314 if (*ctxt->input->cur == 0) \
315 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
318 #define NEXTL(l) do { \
319 if (*(ctxt->input->cur) == '\n') { \
320 ctxt->input->line++; ctxt->input->col = 1; \
321 } else ctxt->input->col++; \
322 ctxt->input->cur += l; \
323 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
324 } while (0)
326 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
327 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
329 #define COPY_BUF(l,b,i,v) \
330 if (l == 1) b[i++] = (xmlChar) v; \
331 else i += xmlCopyCharMultiByte(&b[i],v)
334 * xmlSkipBlankChars:
335 * @ctxt: the XML parser context
337 * skip all blanks character found at that point in the input streams.
338 * It pops up finished entities in the process if allowable at that point.
340 * Returns the number of space chars skipped
344 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
345 int res = 0;
348 * It's Okay to use CUR/NEXT here since all the blanks are on
349 * the ASCII range.
351 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
352 const xmlChar *cur;
354 * if we are in the document content, go really fast
356 cur = ctxt->input->cur;
357 while (IS_BLANK(*cur)) {
358 if (*cur == '\n') {
359 ctxt->input->line++; ctxt->input->col = 1;
361 cur++;
362 res++;
363 if (*cur == 0) {
364 ctxt->input->cur = cur;
365 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
366 cur = ctxt->input->cur;
369 ctxt->input->cur = cur;
370 } else {
371 int cur;
372 do {
373 cur = CUR;
374 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
375 NEXT;
376 cur = CUR;
377 res++;
379 while ((cur == 0) && (ctxt->inputNr > 1) &&
380 (ctxt->instate != XML_PARSER_COMMENT)) {
381 xmlPopInput(ctxt);
382 cur = CUR;
385 * Need to handle support of entities branching here
387 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
388 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
390 return(res);
393 /************************************************************************
395 * Commodity functions to handle entities *
397 ************************************************************************/
400 * xmlPopInput:
401 * @ctxt: an XML parser context
403 * xmlPopInput: the current input pointed by ctxt->input came to an end
404 * pop it and return the next char.
406 * Returns the current xmlChar in the parser context
408 xmlChar
409 xmlPopInput(xmlParserCtxtPtr ctxt) {
410 if (ctxt->inputNr == 1) return(0); /* End of main Input */
411 if (xmlParserDebugEntities)
412 xmlGenericError(xmlGenericErrorContext,
413 "Popping input %d\n", ctxt->inputNr);
414 xmlFreeInputStream(inputPop(ctxt));
415 if ((*ctxt->input->cur == 0) &&
416 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
417 return(xmlPopInput(ctxt));
418 return(CUR);
422 * xmlPushInput:
423 * @ctxt: an XML parser context
424 * @input: an XML parser input fragment (entity, XML fragment ...).
426 * xmlPushInput: switch to a new input stream which is stacked on top
427 * of the previous one(s).
429 void
430 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
431 if (input == NULL) return;
433 if (xmlParserDebugEntities) {
434 if ((ctxt->input != NULL) && (ctxt->input->filename))
435 xmlGenericError(xmlGenericErrorContext,
436 "%s(%d): ", ctxt->input->filename,
437 ctxt->input->line);
438 xmlGenericError(xmlGenericErrorContext,
439 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
441 inputPush(ctxt, input);
442 GROW;
446 * xmlParseCharRef:
447 * @ctxt: an XML parser context
449 * parse Reference declarations
451 * [66] CharRef ::= '&#' [0-9]+ ';' |
452 * '&#x' [0-9a-fA-F]+ ';'
454 * [ WFC: Legal Character ]
455 * Characters referred to using character references must match the
456 * production for Char.
458 * Returns the value parsed (as an int), 0 in case of error
461 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
462 unsigned int val = 0;
463 int count = 0;
466 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
468 if ((RAW == '&') && (NXT(1) == '#') &&
469 (NXT(2) == 'x')) {
470 SKIP(3);
471 GROW;
472 while (RAW != ';') { /* loop blocked by count */
473 if (count++ > 20) {
474 count = 0;
475 GROW;
477 if ((RAW >= '0') && (RAW <= '9'))
478 val = val * 16 + (CUR - '0');
479 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
480 val = val * 16 + (CUR - 'a') + 10;
481 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
482 val = val * 16 + (CUR - 'A') + 10;
483 else {
484 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
486 ctxt->sax->error(ctxt->userData,
487 "xmlParseCharRef: invalid hexadecimal value\n");
488 ctxt->wellFormed = 0;
489 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
490 val = 0;
491 break;
493 NEXT;
494 count++;
496 if (RAW == ';') {
497 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
498 ctxt->nbChars ++;
499 ctxt->input->cur++;
501 } else if ((RAW == '&') && (NXT(1) == '#')) {
502 SKIP(2);
503 GROW;
504 while (RAW != ';') { /* loop blocked by count */
505 if (count++ > 20) {
506 count = 0;
507 GROW;
509 if ((RAW >= '0') && (RAW <= '9'))
510 val = val * 10 + (CUR - '0');
511 else {
512 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid decimal value\n");
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
518 val = 0;
519 break;
521 NEXT;
522 count++;
524 if (RAW == ';') {
525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
526 ctxt->nbChars ++;
527 ctxt->input->cur++;
529 } else {
530 ctxt->errNo = XML_ERR_INVALID_CHARREF;
531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
532 ctxt->sax->error(ctxt->userData,
533 "xmlParseCharRef: invalid value\n");
534 ctxt->wellFormed = 0;
535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
543 if (IS_CHAR(val)) {
544 return(val);
545 } else {
546 ctxt->errNo = XML_ERR_INVALID_CHAR;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548 ctxt->sax->error(ctxt->userData,
549 "xmlParseCharRef: invalid xmlChar value %d\n",
550 val);
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
554 return(0);
558 * xmlParseStringCharRef:
559 * @ctxt: an XML parser context
560 * @str: a pointer to an index in the string
562 * parse Reference declarations, variant parsing from a string rather
563 * than an an input flow.
565 * [66] CharRef ::= '&#' [0-9]+ ';' |
566 * '&#x' [0-9a-fA-F]+ ';'
568 * [ WFC: Legal Character ]
569 * Characters referred to using character references must match the
570 * production for Char.
572 * Returns the value parsed (as an int), 0 in case of error, str will be
573 * updated to the current value of the index
575 static int
576 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
577 const xmlChar *ptr;
578 xmlChar cur;
579 int val = 0;
581 if ((str == NULL) || (*str == NULL)) return(0);
582 ptr = *str;
583 cur = *ptr;
584 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
585 ptr += 3;
586 cur = *ptr;
587 while (cur != ';') { /* Non input consuming loop */
588 if ((cur >= '0') && (cur <= '9'))
589 val = val * 16 + (cur - '0');
590 else if ((cur >= 'a') && (cur <= 'f'))
591 val = val * 16 + (cur - 'a') + 10;
592 else if ((cur >= 'A') && (cur <= 'F'))
593 val = val * 16 + (cur - 'A') + 10;
594 else {
595 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseStringCharRef: invalid hexadecimal value\n");
599 ctxt->wellFormed = 0;
600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
601 val = 0;
602 break;
604 ptr++;
605 cur = *ptr;
607 if (cur == ';')
608 ptr++;
609 } else if ((cur == '&') && (ptr[1] == '#')){
610 ptr += 2;
611 cur = *ptr;
612 while (cur != ';') { /* Non input consuming loops */
613 if ((cur >= '0') && (cur <= '9'))
614 val = val * 10 + (cur - '0');
615 else {
616 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618 ctxt->sax->error(ctxt->userData,
619 "xmlParseStringCharRef: invalid decimal value\n");
620 ctxt->wellFormed = 0;
621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
622 val = 0;
623 break;
625 ptr++;
626 cur = *ptr;
628 if (cur == ';')
629 ptr++;
630 } else {
631 ctxt->errNo = XML_ERR_INVALID_CHARREF;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData,
634 "xmlParseStringCharRef: invalid value\n");
635 ctxt->wellFormed = 0;
636 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
637 return(0);
639 *str = ptr;
642 * [ WFC: Legal Character ]
643 * Characters referred to using character references must match the
644 * production for Char.
646 if (IS_CHAR(val)) {
647 return(val);
648 } else {
649 ctxt->errNo = XML_ERR_INVALID_CHAR;
650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
651 ctxt->sax->error(ctxt->userData,
652 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
653 ctxt->wellFormed = 0;
654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
656 return(0);
660 * xmlNewBlanksWrapperInputStream:
661 * @ctxt: an XML parser context
662 * @entity: an Entity pointer
664 * Create a new input stream for wrapping
665 * blanks around a PEReference
667 * Returns the new input stream or NULL
670 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
672 static xmlParserInputPtr
673 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
674 xmlParserInputPtr input;
675 xmlChar *buffer;
676 size_t length;
677 if (entity == NULL) {
678 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
682 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
683 return(NULL);
685 if (xmlParserDebugEntities)
686 xmlGenericError(xmlGenericErrorContext,
687 "new blanks wrapper for entity: %s\n", entity->name);
688 input = xmlNewInputStream(ctxt);
689 if (input == NULL) {
690 return(NULL);
692 length = xmlStrlen(entity->name) + 5;
693 buffer = xmlMalloc(length);
694 if (buffer == NULL) {
695 return(NULL);
697 buffer [0] = ' ';
698 buffer [1] = '%';
699 buffer [length-3] = ';';
700 buffer [length-2] = ' ';
701 buffer [length-1] = 0;
702 memcpy(buffer + 2, entity->name, length - 5);
703 input->free = deallocblankswrapper;
704 input->base = buffer;
705 input->cur = buffer;
706 input->length = length;
707 input->end = &buffer[length];
708 return(input);
712 * xmlParserHandlePEReference:
713 * @ctxt: the parser context
715 * [69] PEReference ::= '%' Name ';'
717 * [ WFC: No Recursion ]
718 * A parsed entity must not contain a recursive
719 * reference to itself, either directly or indirectly.
721 * [ WFC: Entity Declared ]
722 * In a document without any DTD, a document with only an internal DTD
723 * subset which contains no parameter entity references, or a document
724 * with "standalone='yes'", ... ... The declaration of a parameter
725 * entity must precede any reference to it...
727 * [ VC: Entity Declared ]
728 * In a document with an external subset or external parameter entities
729 * with "standalone='no'", ... ... The declaration of a parameter entity
730 * must precede any reference to it...
732 * [ WFC: In DTD ]
733 * Parameter-entity references may only appear in the DTD.
734 * NOTE: misleading but this is handled.
736 * A PEReference may have been detected in the current input stream
737 * the handling is done accordingly to
738 * http://www.w3.org/TR/REC-xml#entproc
739 * i.e.
740 * - Included in literal in entity values
741 * - Included as Parameter Entity reference within DTDs
743 void
744 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
745 xmlChar *name;
746 xmlEntityPtr entity = NULL;
747 xmlParserInputPtr input;
749 if (RAW != '%') return;
750 switch(ctxt->instate) {
751 case XML_PARSER_CDATA_SECTION:
752 return;
753 case XML_PARSER_COMMENT:
754 return;
755 case XML_PARSER_START_TAG:
756 return;
757 case XML_PARSER_END_TAG:
758 return;
759 case XML_PARSER_EOF:
760 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
762 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
763 ctxt->wellFormed = 0;
764 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
765 return;
766 case XML_PARSER_PROLOG:
767 case XML_PARSER_START:
768 case XML_PARSER_MISC:
769 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
771 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
772 ctxt->wellFormed = 0;
773 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
774 return;
775 case XML_PARSER_ENTITY_DECL:
776 case XML_PARSER_CONTENT:
777 case XML_PARSER_ATTRIBUTE_VALUE:
778 case XML_PARSER_PI:
779 case XML_PARSER_SYSTEM_LITERAL:
780 case XML_PARSER_PUBLIC_LITERAL:
781 /* we just ignore it there */
782 return;
783 case XML_PARSER_EPILOG:
784 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
786 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
787 ctxt->wellFormed = 0;
788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
789 return;
790 case XML_PARSER_ENTITY_VALUE:
792 * NOTE: in the case of entity values, we don't do the
793 * substitution here since we need the literal
794 * entity value to be able to save the internal
795 * subset of the document.
796 * This will be handled by xmlStringDecodeEntities
798 return;
799 case XML_PARSER_DTD:
801 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
802 * In the internal DTD subset, parameter-entity references
803 * can occur only where markup declarations can occur, not
804 * within markup declarations.
805 * In that case this is handled in xmlParseMarkupDecl
807 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
808 return;
809 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
810 return;
811 break;
812 case XML_PARSER_IGNORE:
813 return;
816 NEXT;
817 name = xmlParseName(ctxt);
818 if (xmlParserDebugEntities)
819 xmlGenericError(xmlGenericErrorContext,
820 "PEReference: %s\n", name);
821 if (name == NULL) {
822 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
825 ctxt->wellFormed = 0;
826 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
827 } else {
828 if (RAW == ';') {
829 NEXT;
830 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
831 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
832 if (entity == NULL) {
835 * [ WFC: Entity Declared ]
836 * In a document without any DTD, a document with only an
837 * internal DTD subset which contains no parameter entity
838 * references, or a document with "standalone='yes'", ...
839 * ... The declaration of a parameter entity must precede
840 * any reference to it...
842 if ((ctxt->standalone == 1) ||
843 ((ctxt->hasExternalSubset == 0) &&
844 (ctxt->hasPErefs == 0))) {
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "PEReference: %%%s; not found\n", name);
848 ctxt->wellFormed = 0;
849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
850 } else {
852 * [ VC: Entity Declared ]
853 * In a document with an external subset or external
854 * parameter entities with "standalone='no'", ...
855 * ... The declaration of a parameter entity must precede
856 * any reference to it...
858 if ((!ctxt->disableSAX) &&
859 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
860 ctxt->vctxt.error(ctxt->vctxt.userData,
861 "PEReference: %%%s; not found\n", name);
862 } else if ((!ctxt->disableSAX) &&
863 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
864 ctxt->sax->warning(ctxt->userData,
865 "PEReference: %%%s; not found\n", name);
866 ctxt->valid = 0;
868 } else if (ctxt->input->free != deallocblankswrapper) {
869 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
870 xmlPushInput(ctxt, input);
871 } else {
872 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
873 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
874 xmlChar start[4];
875 xmlCharEncoding enc;
878 * handle the extra spaces added before and after
879 * c.f. http://www.w3.org/TR/REC-xml#as-PE
880 * this is done independently.
882 input = xmlNewEntityInputStream(ctxt, entity);
883 xmlPushInput(ctxt, input);
886 * Get the 4 first bytes and decode the charset
887 * if enc != XML_CHAR_ENCODING_NONE
888 * plug some encoding conversion routines.
890 GROW
891 if (entity->length >= 4) {
892 start[0] = RAW;
893 start[1] = NXT(1);
894 start[2] = NXT(2);
895 start[3] = NXT(3);
896 enc = xmlDetectCharEncoding(start, 4);
897 if (enc != XML_CHAR_ENCODING_NONE) {
898 xmlSwitchEncoding(ctxt, enc);
902 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
903 (RAW == '<') && (NXT(1) == '?') &&
904 (NXT(2) == 'x') && (NXT(3) == 'm') &&
905 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
906 xmlParseTextDecl(ctxt);
908 } else {
909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910 ctxt->sax->error(ctxt->userData,
911 "xmlParserHandlePEReference: %s is not a parameter entity\n",
912 name);
913 ctxt->wellFormed = 0;
914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
917 } else {
918 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
920 ctxt->sax->error(ctxt->userData,
921 "xmlParserHandlePEReference: expecting ';'\n");
922 ctxt->wellFormed = 0;
923 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
925 xmlFree(name);
930 * Macro used to grow the current buffer.
932 #define growBuffer(buffer) { \
933 buffer##_size *= 2; \
934 buffer = (xmlChar *) \
935 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
936 if (buffer == NULL) { \
937 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
938 return(NULL); \
943 * xmlStringDecodeEntities:
944 * @ctxt: the parser context
945 * @str: the input string
946 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
947 * @end: an end marker xmlChar, 0 if none
948 * @end2: an end marker xmlChar, 0 if none
949 * @end3: an end marker xmlChar, 0 if none
951 * Takes a entity string content and process to do the adequate substitutions.
953 * [67] Reference ::= EntityRef | CharRef
955 * [69] PEReference ::= '%' Name ';'
957 * Returns A newly allocated string with the substitution done. The caller
958 * must deallocate it !
960 xmlChar *
961 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
962 xmlChar end, xmlChar end2, xmlChar end3) {
963 xmlChar *buffer = NULL;
964 int buffer_size = 0;
966 xmlChar *current = NULL;
967 xmlEntityPtr ent;
968 int c,l;
969 int nbchars = 0;
971 if (str == NULL)
972 return(NULL);
974 if (ctxt->depth > 40) {
975 ctxt->errNo = XML_ERR_ENTITY_LOOP;
976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
977 ctxt->sax->error(ctxt->userData,
978 "Detected entity reference loop\n");
979 ctxt->wellFormed = 0;
980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
981 return(NULL);
985 * allocate a translation buffer.
987 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
988 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
989 if (buffer == NULL) {
990 xmlGenericError(xmlGenericErrorContext,
991 "xmlStringDecodeEntities: malloc failed");
992 return(NULL);
996 * OK loop until we reach one of the ending char or a size limit.
997 * we are operating on already parsed values.
999 c = CUR_SCHAR(str, l);
1000 while ((c != 0) && (c != end) && /* non input consuming loop */
1001 (c != end2) && (c != end3)) {
1003 if (c == 0) break;
1004 if ((c == '&') && (str[1] == '#')) {
1005 int val = xmlParseStringCharRef(ctxt, &str);
1006 if (val != 0) {
1007 COPY_BUF(0,buffer,nbchars,val);
1009 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1010 if (xmlParserDebugEntities)
1011 xmlGenericError(xmlGenericErrorContext,
1012 "String decoding Entity Reference: %.30s\n",
1013 str);
1014 ent = xmlParseStringEntityRef(ctxt, &str);
1015 if ((ent != NULL) &&
1016 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1017 if (ent->content != NULL) {
1018 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1019 } else {
1020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1021 ctxt->sax->error(ctxt->userData,
1022 "internal error entity has no content\n");
1024 } else if ((ent != NULL) && (ent->content != NULL)) {
1025 xmlChar *rep;
1027 ctxt->depth++;
1028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1029 0, 0, 0);
1030 ctxt->depth--;
1031 if (rep != NULL) {
1032 current = rep;
1033 while (*current != 0) { /* non input consuming loop */
1034 buffer[nbchars++] = *current++;
1035 if (nbchars >
1036 buffer_size - XML_PARSER_BUFFER_SIZE) {
1037 growBuffer(buffer);
1040 xmlFree(rep);
1042 } else if (ent != NULL) {
1043 int i = xmlStrlen(ent->name);
1044 const xmlChar *cur = ent->name;
1046 buffer[nbchars++] = '&';
1047 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1048 growBuffer(buffer);
1050 for (;i > 0;i--)
1051 buffer[nbchars++] = *cur++;
1052 buffer[nbchars++] = ';';
1054 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1055 if (xmlParserDebugEntities)
1056 xmlGenericError(xmlGenericErrorContext,
1057 "String decoding PE Reference: %.30s\n", str);
1058 ent = xmlParseStringPEReference(ctxt, &str);
1059 if (ent != NULL) {
1060 xmlChar *rep;
1062 ctxt->depth++;
1063 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1064 0, 0, 0);
1065 ctxt->depth--;
1066 if (rep != NULL) {
1067 current = rep;
1068 while (*current != 0) { /* non input consuming loop */
1069 buffer[nbchars++] = *current++;
1070 if (nbchars >
1071 buffer_size - XML_PARSER_BUFFER_SIZE) {
1072 growBuffer(buffer);
1075 xmlFree(rep);
1078 } else {
1079 COPY_BUF(l,buffer,nbchars,c);
1080 str += l;
1081 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1082 growBuffer(buffer);
1085 c = CUR_SCHAR(str, l);
1087 buffer[nbchars++] = 0;
1088 return(buffer);
1092 /************************************************************************
1094 * Commodity functions to handle xmlChars *
1096 ************************************************************************/
1099 * xmlStrndup:
1100 * @cur: the input xmlChar *
1101 * @len: the len of @cur
1103 * a strndup for array of xmlChar's
1105 * Returns a new xmlChar * or NULL
1107 xmlChar *
1108 xmlStrndup(const xmlChar *cur, int len) {
1109 xmlChar *ret;
1111 if ((cur == NULL) || (len < 0)) return(NULL);
1112 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1113 if (ret == NULL) {
1114 xmlGenericError(xmlGenericErrorContext,
1115 "malloc of %ld byte failed\n",
1116 (len + 1) * (long)sizeof(xmlChar));
1117 return(NULL);
1119 memcpy(ret, cur, len * sizeof(xmlChar));
1120 ret[len] = 0;
1121 return(ret);
1125 * xmlStrdup:
1126 * @cur: the input xmlChar *
1128 * a strdup for array of xmlChar's. Since they are supposed to be
1129 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1130 * a termination mark of '0'.
1132 * Returns a new xmlChar * or NULL
1134 xmlChar *
1135 xmlStrdup(const xmlChar *cur) {
1136 const xmlChar *p = cur;
1138 if (cur == NULL) return(NULL);
1139 while (*p != 0) p++; /* non input consuming */
1140 return(xmlStrndup(cur, p - cur));
1144 * xmlCharStrndup:
1145 * @cur: the input char *
1146 * @len: the len of @cur
1148 * a strndup for char's to xmlChar's
1150 * Returns a new xmlChar * or NULL
1153 xmlChar *
1154 xmlCharStrndup(const char *cur, int len) {
1155 int i;
1156 xmlChar *ret;
1158 if ((cur == NULL) || (len < 0)) return(NULL);
1159 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1160 if (ret == NULL) {
1161 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1162 (len + 1) * (long)sizeof(xmlChar));
1163 return(NULL);
1165 for (i = 0;i < len;i++)
1166 ret[i] = (xmlChar) cur[i];
1167 ret[len] = 0;
1168 return(ret);
1172 * xmlCharStrdup:
1173 * @cur: the input char *
1174 * @len: the len of @cur
1176 * a strdup for char's to xmlChar's
1178 * Returns a new xmlChar * or NULL
1181 xmlChar *
1182 xmlCharStrdup(const char *cur) {
1183 const char *p = cur;
1185 if (cur == NULL) return(NULL);
1186 while (*p != '\0') p++; /* non input consuming */
1187 return(xmlCharStrndup(cur, p - cur));
1191 * xmlStrcmp:
1192 * @str1: the first xmlChar *
1193 * @str2: the second xmlChar *
1195 * a strcmp for xmlChar's
1197 * Returns the integer result of the comparison
1201 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1202 register int tmp;
1204 if (str1 == str2) return(0);
1205 if (str1 == NULL) return(-1);
1206 if (str2 == NULL) return(1);
1207 do {
1208 tmp = *str1++ - *str2;
1209 if (tmp != 0) return(tmp);
1210 } while (*str2++ != 0);
1211 return 0;
1215 * xmlStrEqual:
1216 * @str1: the first xmlChar *
1217 * @str2: the second xmlChar *
1219 * Check if both string are equal of have same content
1220 * Should be a bit more readable and faster than xmlStrEqual()
1222 * Returns 1 if they are equal, 0 if they are different
1226 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1227 if (str1 == str2) return(1);
1228 if (str1 == NULL) return(0);
1229 if (str2 == NULL) return(0);
1230 do {
1231 if (*str1++ != *str2) return(0);
1232 } while (*str2++);
1233 return(1);
1237 * xmlStrncmp:
1238 * @str1: the first xmlChar *
1239 * @str2: the second xmlChar *
1240 * @len: the max comparison length
1242 * a strncmp for xmlChar's
1244 * Returns the integer result of the comparison
1248 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1249 register int tmp;
1251 if (len <= 0) return(0);
1252 if (str1 == str2) return(0);
1253 if (str1 == NULL) return(-1);
1254 if (str2 == NULL) return(1);
1255 do {
1256 tmp = *str1++ - *str2;
1257 if (tmp != 0 || --len == 0) return(tmp);
1258 } while (*str2++ != 0);
1259 return 0;
1262 static const xmlChar casemap[256] = {
1263 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1264 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1265 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1266 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1267 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1268 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1269 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1270 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1271 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1272 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1273 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1274 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1275 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1276 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1277 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1278 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1279 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1280 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1281 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1282 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1283 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1284 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1285 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1286 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1287 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1288 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1289 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1290 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1291 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1292 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1293 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1294 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1298 * xmlStrcasecmp:
1299 * @str1: the first xmlChar *
1300 * @str2: the second xmlChar *
1302 * a strcasecmp for xmlChar's
1304 * Returns the integer result of the comparison
1308 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1309 register int tmp;
1311 if (str1 == str2) return(0);
1312 if (str1 == NULL) return(-1);
1313 if (str2 == NULL) return(1);
1314 do {
1315 tmp = casemap[*str1++] - casemap[*str2];
1316 if (tmp != 0) return(tmp);
1317 } while (*str2++ != 0);
1318 return 0;
1322 * xmlStrncasecmp:
1323 * @str1: the first xmlChar *
1324 * @str2: the second xmlChar *
1325 * @len: the max comparison length
1327 * a strncasecmp for xmlChar's
1329 * Returns the integer result of the comparison
1333 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1334 register int tmp;
1336 if (len <= 0) return(0);
1337 if (str1 == str2) return(0);
1338 if (str1 == NULL) return(-1);
1339 if (str2 == NULL) return(1);
1340 do {
1341 tmp = casemap[*str1++] - casemap[*str2];
1342 if (tmp != 0 || --len == 0) return(tmp);
1343 } while (*str2++ != 0);
1344 return 0;
1348 * xmlStrchr:
1349 * @str: the xmlChar * array
1350 * @val: the xmlChar to search
1352 * a strchr for xmlChar's
1354 * Returns the xmlChar * for the first occurrence or NULL.
1357 const xmlChar *
1358 xmlStrchr(const xmlChar *str, xmlChar val) {
1359 if (str == NULL) return(NULL);
1360 while (*str != 0) { /* non input consuming */
1361 if (*str == val) return((xmlChar *) str);
1362 str++;
1364 return(NULL);
1368 * xmlStrstr:
1369 * @str: the xmlChar * array (haystack)
1370 * @val: the xmlChar to search (needle)
1372 * a strstr for xmlChar's
1374 * Returns the xmlChar * for the first occurrence or NULL.
1377 const xmlChar *
1378 xmlStrstr(const xmlChar *str, const xmlChar *val) {
1379 int n;
1381 if (str == NULL) return(NULL);
1382 if (val == NULL) return(NULL);
1383 n = xmlStrlen(val);
1385 if (n == 0) return(str);
1386 while (*str != 0) { /* non input consuming */
1387 if (*str == *val) {
1388 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1390 str++;
1392 return(NULL);
1396 * xmlStrcasestr:
1397 * @str: the xmlChar * array (haystack)
1398 * @val: the xmlChar to search (needle)
1400 * a case-ignoring strstr for xmlChar's
1402 * Returns the xmlChar * for the first occurrence or NULL.
1405 const xmlChar *
1406 xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1407 int n;
1409 if (str == NULL) return(NULL);
1410 if (val == NULL) return(NULL);
1411 n = xmlStrlen(val);
1413 if (n == 0) return(str);
1414 while (*str != 0) { /* non input consuming */
1415 if (casemap[*str] == casemap[*val])
1416 if (!xmlStrncasecmp(str, val, n)) return(str);
1417 str++;
1419 return(NULL);
1423 * xmlStrsub:
1424 * @str: the xmlChar * array (haystack)
1425 * @start: the index of the first char (zero based)
1426 * @len: the length of the substring
1428 * Extract a substring of a given string
1430 * Returns the xmlChar * for the first occurrence or NULL.
1433 xmlChar *
1434 xmlStrsub(const xmlChar *str, int start, int len) {
1435 int i;
1437 if (str == NULL) return(NULL);
1438 if (start < 0) return(NULL);
1439 if (len < 0) return(NULL);
1441 for (i = 0;i < start;i++) {
1442 if (*str == 0) return(NULL);
1443 str++;
1445 if (*str == 0) return(NULL);
1446 return(xmlStrndup(str, len));
1450 * xmlStrlen:
1451 * @str: the xmlChar * array
1453 * length of a xmlChar's string
1455 * Returns the number of xmlChar contained in the ARRAY.
1459 xmlStrlen(const xmlChar *str) {
1460 int len = 0;
1462 if (str == NULL) return(0);
1463 while (*str != 0) { /* non input consuming */
1464 str++;
1465 len++;
1467 return(len);
1471 * xmlStrncat:
1472 * @cur: the original xmlChar * array
1473 * @add: the xmlChar * array added
1474 * @len: the length of @add
1476 * a strncat for array of xmlChar's, it will extend @cur with the len
1477 * first bytes of @add.
1479 * Returns a new xmlChar *, the original @cur is reallocated if needed
1480 * and should not be freed
1483 xmlChar *
1484 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1485 int size;
1486 xmlChar *ret;
1488 if ((add == NULL) || (len == 0))
1489 return(cur);
1490 if (cur == NULL)
1491 return(xmlStrndup(add, len));
1493 size = xmlStrlen(cur);
1494 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1495 if (ret == NULL) {
1496 xmlGenericError(xmlGenericErrorContext,
1497 "xmlStrncat: realloc of %ld byte failed\n",
1498 (size + len + 1) * (long)sizeof(xmlChar));
1499 return(cur);
1501 memcpy(&ret[size], add, len * sizeof(xmlChar));
1502 ret[size + len] = 0;
1503 return(ret);
1507 * xmlStrcat:
1508 * @cur: the original xmlChar * array
1509 * @add: the xmlChar * array added
1511 * a strcat for array of xmlChar's. Since they are supposed to be
1512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1513 * a termination mark of '0'.
1515 * Returns a new xmlChar * containing the concatenated string.
1517 xmlChar *
1518 xmlStrcat(xmlChar *cur, const xmlChar *add) {
1519 const xmlChar *p = add;
1521 if (add == NULL) return(cur);
1522 if (cur == NULL)
1523 return(xmlStrdup(add));
1525 while (*p != 0) p++; /* non input consuming */
1526 return(xmlStrncat(cur, add, p - add));
1529 /************************************************************************
1531 * Commodity functions, cleanup needed ? *
1533 ************************************************************************/
1536 * areBlanks:
1537 * @ctxt: an XML parser context
1538 * @str: a xmlChar *
1539 * @len: the size of @str
1541 * Is this a sequence of blank chars that one can ignore ?
1543 * Returns 1 if ignorable 0 otherwise.
1546 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1547 int i, ret;
1548 xmlNodePtr lastChild;
1551 * Don't spend time trying to differentiate them, the same callback is
1552 * used !
1554 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
1555 return(0);
1558 * Check for xml:space value.
1560 if (*(ctxt->space) == 1)
1561 return(0);
1564 * Check that the string is made of blanks
1566 for (i = 0;i < len;i++)
1567 if (!(IS_BLANK(str[i]))) return(0);
1570 * Look if the element is mixed content in the DTD if available
1572 if (ctxt->node == NULL) return(0);
1573 if (ctxt->myDoc != NULL) {
1574 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1575 if (ret == 0) return(1);
1576 if (ret == 1) return(0);
1580 * Otherwise, heuristic :-\
1582 if (RAW != '<') return(0);
1583 if ((ctxt->node->children == NULL) &&
1584 (RAW == '<') && (NXT(1) == '/')) return(0);
1586 lastChild = xmlGetLastChild(ctxt->node);
1587 if (lastChild == NULL) {
1588 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1589 (ctxt->node->content != NULL)) return(0);
1590 } else if (xmlNodeIsText(lastChild))
1591 return(0);
1592 else if ((ctxt->node->children != NULL) &&
1593 (xmlNodeIsText(ctxt->node->children)))
1594 return(0);
1595 return(1);
1598 /************************************************************************
1600 * Extra stuff for namespace support *
1601 * Relates to http://www.w3.org/TR/WD-xml-names *
1603 ************************************************************************/
1606 * xmlSplitQName:
1607 * @ctxt: an XML parser context
1608 * @name: an XML parser context
1609 * @prefix: a xmlChar **
1611 * parse an UTF8 encoded XML qualified name string
1613 * [NS 5] QName ::= (Prefix ':')? LocalPart
1615 * [NS 6] Prefix ::= NCName
1617 * [NS 7] LocalPart ::= NCName
1619 * Returns the local part, and prefix is updated
1620 * to get the Prefix if any.
1623 xmlChar *
1624 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1625 xmlChar buf[XML_MAX_NAMELEN + 5];
1626 xmlChar *buffer = NULL;
1627 int len = 0;
1628 int max = XML_MAX_NAMELEN;
1629 xmlChar *ret = NULL;
1630 const xmlChar *cur = name;
1631 int c;
1633 *prefix = NULL;
1635 #ifndef XML_XML_NAMESPACE
1636 /* xml: prefix is not really a namespace */
1637 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1638 (cur[2] == 'l') && (cur[3] == ':'))
1639 return(xmlStrdup(name));
1640 #endif
1642 /* nasty but valid */
1643 if (cur[0] == ':')
1644 return(xmlStrdup(name));
1646 c = *cur++;
1647 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1648 buf[len++] = c;
1649 c = *cur++;
1651 if (len >= max) {
1653 * Okay someone managed to make a huge name, so he's ready to pay
1654 * for the processing speed.
1656 max = len * 2;
1658 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1659 if (buffer == NULL) {
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "xmlSplitQName: out of memory\n");
1663 return(NULL);
1665 memcpy(buffer, buf, len);
1666 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1667 if (len + 10 > max) {
1668 max *= 2;
1669 buffer = (xmlChar *) xmlRealloc(buffer,
1670 max * sizeof(xmlChar));
1671 if (buffer == NULL) {
1672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1673 ctxt->sax->error(ctxt->userData,
1674 "xmlSplitQName: out of memory\n");
1675 return(NULL);
1678 buffer[len++] = c;
1679 c = *cur++;
1681 buffer[len] = 0;
1684 if (buffer == NULL)
1685 ret = xmlStrndup(buf, len);
1686 else {
1687 ret = buffer;
1688 buffer = NULL;
1689 max = XML_MAX_NAMELEN;
1693 if (c == ':') {
1694 c = *cur;
1695 if (c == 0) return(ret);
1696 *prefix = ret;
1697 len = 0;
1700 * Check that the first character is proper to start
1701 * a new name
1703 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1704 ((c >= 0x41) && (c <= 0x5A)) ||
1705 (c == '_') || (c == ':'))) {
1706 int l;
1707 int first = CUR_SCHAR(cur, l);
1709 if (!IS_LETTER(first) && (first != '_')) {
1710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1711 ctxt->sax->error(ctxt->userData,
1712 "Name %s is not XML Namespace compliant\n",
1713 name);
1716 cur++;
1718 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1719 buf[len++] = c;
1720 c = *cur++;
1722 if (len >= max) {
1724 * Okay someone managed to make a huge name, so he's ready to pay
1725 * for the processing speed.
1727 max = len * 2;
1729 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1730 if (buffer == NULL) {
1731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "xmlSplitQName: out of memory\n");
1734 return(NULL);
1736 memcpy(buffer, buf, len);
1737 while (c != 0) { /* tested bigname2.xml */
1738 if (len + 10 > max) {
1739 max *= 2;
1740 buffer = (xmlChar *) xmlRealloc(buffer,
1741 max * sizeof(xmlChar));
1742 if (buffer == NULL) {
1743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "xmlSplitQName: out of memory\n");
1746 return(NULL);
1749 buffer[len++] = c;
1750 c = *cur++;
1752 buffer[len] = 0;
1755 if (buffer == NULL)
1756 ret = xmlStrndup(buf, len);
1757 else {
1758 ret = buffer;
1762 return(ret);
1765 /************************************************************************
1767 * The parser itself *
1768 * Relates to http://www.w3.org/TR/REC-xml *
1770 ************************************************************************/
1772 static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
1774 * xmlParseName:
1775 * @ctxt: an XML parser context
1777 * parse an XML name.
1779 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1780 * CombiningChar | Extender
1782 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1784 * [6] Names ::= Name (S Name)*
1786 * Returns the Name parsed or NULL
1789 xmlChar *
1790 xmlParseName(xmlParserCtxtPtr ctxt) {
1791 const xmlChar *in;
1792 xmlChar *ret;
1793 int count = 0;
1795 GROW;
1798 * Accelerator for simple ASCII names
1800 in = ctxt->input->cur;
1801 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1802 ((*in >= 0x41) && (*in <= 0x5A)) ||
1803 (*in == '_') || (*in == ':')) {
1804 in++;
1805 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1806 ((*in >= 0x41) && (*in <= 0x5A)) ||
1807 ((*in >= 0x30) && (*in <= 0x39)) ||
1808 (*in == '_') || (*in == '-') ||
1809 (*in == ':') || (*in == '.'))
1810 in++;
1811 if ((*in > 0) && (*in < 0x80)) {
1812 count = in - ctxt->input->cur;
1813 ret = xmlStrndup(ctxt->input->cur, count);
1814 ctxt->input->cur = in;
1815 return(ret);
1818 return(xmlParseNameComplex(ctxt));
1822 * xmlParseNameAndCompare:
1823 * @ctxt: an XML parser context
1825 * parse an XML name and compares for match
1826 * (specialized for endtag parsing)
1829 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1830 * and the name for mismatch
1833 static xmlChar *
1834 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1835 const xmlChar *cmp = other;
1836 const xmlChar *in;
1837 xmlChar *ret;
1839 GROW;
1841 in = ctxt->input->cur;
1842 while (*in != 0 && *in == *cmp) {
1843 ++in;
1844 ++cmp;
1846 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1847 /* success */
1848 ctxt->input->cur = in;
1849 return (xmlChar*) 1;
1851 /* failure (or end of input buffer), check with full function */
1852 ret = xmlParseName (ctxt);
1853 if (ret != 0 && xmlStrEqual (ret, other)) {
1854 xmlFree (ret);
1855 return (xmlChar*) 1;
1857 return ret;
1860 static xmlChar *
1861 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1862 xmlChar buf[XML_MAX_NAMELEN + 5];
1863 int len = 0, l;
1864 int c;
1865 int count = 0;
1868 * Handler for more complex cases
1870 GROW;
1871 c = CUR_CHAR(l);
1872 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1873 (!IS_LETTER(c) && (c != '_') &&
1874 (c != ':'))) {
1875 return(NULL);
1878 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1879 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c)))) {
1884 if (count++ > 100) {
1885 count = 0;
1886 GROW;
1888 COPY_BUF(l,buf,len,c);
1889 NEXTL(l);
1890 c = CUR_CHAR(l);
1891 if (len >= XML_MAX_NAMELEN) {
1893 * Okay someone managed to make a huge name, so he's ready to pay
1894 * for the processing speed.
1896 xmlChar *buffer;
1897 int max = len * 2;
1899 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1900 if (buffer == NULL) {
1901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1902 ctxt->sax->error(ctxt->userData,
1903 "xmlParseNameComplex: out of memory\n");
1904 return(NULL);
1906 memcpy(buffer, buf, len);
1907 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1908 (c == '.') || (c == '-') ||
1909 (c == '_') || (c == ':') ||
1910 (IS_COMBINING(c)) ||
1911 (IS_EXTENDER(c))) {
1912 if (count++ > 100) {
1913 count = 0;
1914 GROW;
1916 if (len + 10 > max) {
1917 max *= 2;
1918 buffer = (xmlChar *) xmlRealloc(buffer,
1919 max * sizeof(xmlChar));
1920 if (buffer == NULL) {
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData,
1923 "xmlParseNameComplex: out of memory\n");
1924 return(NULL);
1927 COPY_BUF(l,buffer,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1931 buffer[len] = 0;
1932 return(buffer);
1935 return(xmlStrndup(buf, len));
1939 * xmlParseStringName:
1940 * @ctxt: an XML parser context
1941 * @str: a pointer to the string pointer (IN/OUT)
1943 * parse an XML name.
1945 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1946 * CombiningChar | Extender
1948 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1950 * [6] Names ::= Name (S Name)*
1952 * Returns the Name parsed or NULL. The @str pointer
1953 * is updated to the current location in the string.
1956 static xmlChar *
1957 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1958 xmlChar buf[XML_MAX_NAMELEN + 5];
1959 const xmlChar *cur = *str;
1960 int len = 0, l;
1961 int c;
1963 c = CUR_SCHAR(cur, l);
1964 if (!IS_LETTER(c) && (c != '_') &&
1965 (c != ':')) {
1966 return(NULL);
1969 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1970 (c == '.') || (c == '-') ||
1971 (c == '_') || (c == ':') ||
1972 (IS_COMBINING(c)) ||
1973 (IS_EXTENDER(c))) {
1974 COPY_BUF(l,buf,len,c);
1975 cur += l;
1976 c = CUR_SCHAR(cur, l);
1977 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1979 * Okay someone managed to make a huge name, so he's ready to pay
1980 * for the processing speed.
1982 xmlChar *buffer;
1983 int max = len * 2;
1985 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1986 if (buffer == NULL) {
1987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988 ctxt->sax->error(ctxt->userData,
1989 "xmlParseStringName: out of memory\n");
1990 return(NULL);
1992 memcpy(buffer, buf, len);
1993 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1994 (c == '.') || (c == '-') ||
1995 (c == '_') || (c == ':') ||
1996 (IS_COMBINING(c)) ||
1997 (IS_EXTENDER(c))) {
1998 if (len + 10 > max) {
1999 max *= 2;
2000 buffer = (xmlChar *) xmlRealloc(buffer,
2001 max * sizeof(xmlChar));
2002 if (buffer == NULL) {
2003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2004 ctxt->sax->error(ctxt->userData,
2005 "xmlParseStringName: out of memory\n");
2006 return(NULL);
2009 COPY_BUF(l,buffer,len,c);
2010 cur += l;
2011 c = CUR_SCHAR(cur, l);
2013 buffer[len] = 0;
2014 *str = cur;
2015 return(buffer);
2018 *str = cur;
2019 return(xmlStrndup(buf, len));
2023 * xmlParseNmtoken:
2024 * @ctxt: an XML parser context
2026 * parse an XML Nmtoken.
2028 * [7] Nmtoken ::= (NameChar)+
2030 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2032 * Returns the Nmtoken parsed or NULL
2035 xmlChar *
2036 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2037 xmlChar buf[XML_MAX_NAMELEN + 5];
2038 int len = 0, l;
2039 int c;
2040 int count = 0;
2042 GROW;
2043 c = CUR_CHAR(l);
2045 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2046 (c == '.') || (c == '-') ||
2047 (c == '_') || (c == ':') ||
2048 (IS_COMBINING(c)) ||
2049 (IS_EXTENDER(c))) {
2050 if (count++ > 100) {
2051 count = 0;
2052 GROW;
2054 COPY_BUF(l,buf,len,c);
2055 NEXTL(l);
2056 c = CUR_CHAR(l);
2057 if (len >= XML_MAX_NAMELEN) {
2059 * Okay someone managed to make a huge token, so he's ready to pay
2060 * for the processing speed.
2062 xmlChar *buffer;
2063 int max = len * 2;
2065 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2066 if (buffer == NULL) {
2067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2068 ctxt->sax->error(ctxt->userData,
2069 "xmlParseNmtoken: out of memory\n");
2070 return(NULL);
2072 memcpy(buffer, buf, len);
2073 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2074 (c == '.') || (c == '-') ||
2075 (c == '_') || (c == ':') ||
2076 (IS_COMBINING(c)) ||
2077 (IS_EXTENDER(c))) {
2078 if (count++ > 100) {
2079 count = 0;
2080 GROW;
2082 if (len + 10 > max) {
2083 max *= 2;
2084 buffer = (xmlChar *) xmlRealloc(buffer,
2085 max * sizeof(xmlChar));
2086 if (buffer == NULL) {
2087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2088 ctxt->sax->error(ctxt->userData,
2089 "xmlParseNmtoken: out of memory\n");
2090 return(NULL);
2093 COPY_BUF(l,buffer,len,c);
2094 NEXTL(l);
2095 c = CUR_CHAR(l);
2097 buffer[len] = 0;
2098 return(buffer);
2101 if (len == 0)
2102 return(NULL);
2103 return(xmlStrndup(buf, len));
2107 * xmlParseEntityValue:
2108 * @ctxt: an XML parser context
2109 * @orig: if non-NULL store a copy of the original entity value
2111 * parse a value for ENTITY declarations
2113 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2114 * "'" ([^%&'] | PEReference | Reference)* "'"
2116 * Returns the EntityValue parsed with reference substituted or NULL
2119 xmlChar *
2120 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2121 xmlChar *buf = NULL;
2122 int len = 0;
2123 int size = XML_PARSER_BUFFER_SIZE;
2124 int c, l;
2125 xmlChar stop;
2126 xmlChar *ret = NULL;
2127 const xmlChar *cur = NULL;
2128 xmlParserInputPtr input;
2130 if (RAW == '"') stop = '"';
2131 else if (RAW == '\'') stop = '\'';
2132 else {
2133 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2135 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2136 ctxt->wellFormed = 0;
2137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2138 return(NULL);
2140 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2141 if (buf == NULL) {
2142 xmlGenericError(xmlGenericErrorContext,
2143 "malloc of %d byte failed\n", size);
2144 return(NULL);
2148 * The content of the entity definition is copied in a buffer.
2151 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2152 input = ctxt->input;
2153 GROW;
2154 NEXT;
2155 c = CUR_CHAR(l);
2157 * NOTE: 4.4.5 Included in Literal
2158 * When a parameter entity reference appears in a literal entity
2159 * value, ... a single or double quote character in the replacement
2160 * text is always treated as a normal data character and will not
2161 * terminate the literal.
2162 * In practice it means we stop the loop only when back at parsing
2163 * the initial entity and the quote is found
2165 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2166 (ctxt->input != input))) {
2167 if (len + 5 >= size) {
2168 size *= 2;
2169 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2170 if (buf == NULL) {
2171 xmlGenericError(xmlGenericErrorContext,
2172 "realloc of %d byte failed\n", size);
2173 return(NULL);
2176 COPY_BUF(l,buf,len,c);
2177 NEXTL(l);
2179 * Pop-up of finished entities.
2181 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2182 xmlPopInput(ctxt);
2184 GROW;
2185 c = CUR_CHAR(l);
2186 if (c == 0) {
2187 GROW;
2188 c = CUR_CHAR(l);
2191 buf[len] = 0;
2194 * Raise problem w.r.t. '&' and '%' being used in non-entities
2195 * reference constructs. Note Charref will be handled in
2196 * xmlStringDecodeEntities()
2198 cur = buf;
2199 while (*cur != 0) { /* non input consuming */
2200 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2201 xmlChar *name;
2202 xmlChar tmp = *cur;
2204 cur++;
2205 name = xmlParseStringName(ctxt, &cur);
2206 if ((name == NULL) || (*cur != ';')) {
2207 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209 ctxt->sax->error(ctxt->userData,
2210 "EntityValue: '%c' forbidden except for entities references\n",
2211 tmp);
2212 ctxt->wellFormed = 0;
2213 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2215 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2216 (ctxt->inputNr == 1)) {
2217 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2219 ctxt->sax->error(ctxt->userData,
2220 "EntityValue: PEReferences forbidden in internal subset\n",
2221 tmp);
2222 ctxt->wellFormed = 0;
2223 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2225 if (name != NULL)
2226 xmlFree(name);
2228 cur++;
2232 * Then PEReference entities are substituted.
2234 if (c != stop) {
2235 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2238 ctxt->wellFormed = 0;
2239 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2240 xmlFree(buf);
2241 } else {
2242 NEXT;
2244 * NOTE: 4.4.7 Bypassed
2245 * When a general entity reference appears in the EntityValue in
2246 * an entity declaration, it is bypassed and left as is.
2247 * so XML_SUBSTITUTE_REF is not set here.
2249 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2250 0, 0, 0);
2251 if (orig != NULL)
2252 *orig = buf;
2253 else
2254 xmlFree(buf);
2257 return(ret);
2261 * xmlParseAttValue:
2262 * @ctxt: an XML parser context
2264 * parse a value for an attribute
2265 * Note: the parser won't do substitution of entities here, this
2266 * will be handled later in xmlStringGetNodeList
2268 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2269 * "'" ([^<&'] | Reference)* "'"
2271 * 3.3.3 Attribute-Value Normalization:
2272 * Before the value of an attribute is passed to the application or
2273 * checked for validity, the XML processor must normalize it as follows:
2274 * - a character reference is processed by appending the referenced
2275 * character to the attribute value
2276 * - an entity reference is processed by recursively processing the
2277 * replacement text of the entity
2278 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2279 * appending #x20 to the normalized value, except that only a single
2280 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2281 * parsed entity or the literal entity value of an internal parsed entity
2282 * - other characters are processed by appending them to the normalized value
2283 * If the declared value is not CDATA, then the XML processor must further
2284 * process the normalized attribute value by discarding any leading and
2285 * trailing space (#x20) characters, and by replacing sequences of space
2286 * (#x20) characters by a single space (#x20) character.
2287 * All attributes for which no declaration has been read should be treated
2288 * by a non-validating parser as if declared CDATA.
2290 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2293 xmlChar *
2294 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2296 xmlChar *
2297 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2298 xmlChar limit = 0;
2299 const xmlChar *in = NULL;
2300 xmlChar *ret = NULL;
2301 SHRINK;
2302 GROW;
2303 in = (xmlChar *) CUR_PTR;
2304 if (*in != '"' && *in != '\'') {
2305 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2307 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2308 ctxt->wellFormed = 0;
2309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2310 return(NULL);
2312 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2313 limit = *in;
2314 ++in;
2316 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2317 *in != '&' && *in != '<'
2319 ++in;
2321 if (*in != limit) {
2322 return xmlParseAttValueComplex(ctxt);
2324 ++in;
2325 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2326 CUR_PTR = in;
2327 return ret;
2330 xmlChar *
2331 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2332 xmlChar limit = 0;
2333 xmlChar *buf = NULL;
2334 int len = 0;
2335 int buf_size = 0;
2336 int c, l;
2337 xmlChar *current = NULL;
2338 xmlEntityPtr ent;
2341 SHRINK;
2342 if (NXT(0) == '"') {
2343 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2344 limit = '"';
2345 NEXT;
2346 } else if (NXT(0) == '\'') {
2347 limit = '\'';
2348 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2349 NEXT;
2350 } else {
2351 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2354 ctxt->wellFormed = 0;
2355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2356 return(NULL);
2360 * allocate a translation buffer.
2362 buf_size = XML_PARSER_BUFFER_SIZE;
2363 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2364 if (buf == NULL) {
2365 xmlGenericError(xmlGenericErrorContext,
2366 "xmlParseAttValue: malloc failed");
2367 return(NULL);
2371 * OK loop until we reach one of the ending char or a size limit.
2373 c = CUR_CHAR(l);
2374 while ((NXT(0) != limit) && /* checked */
2375 (c != '<')) {
2376 if (c == 0) break;
2377 if (c == '&') {
2378 if (NXT(1) == '#') {
2379 int val = xmlParseCharRef(ctxt);
2380 if (val == '&') {
2381 if (ctxt->replaceEntities) {
2382 if (len > buf_size - 10) {
2383 growBuffer(buf);
2385 buf[len++] = '&';
2386 } else {
2388 * The reparsing will be done in xmlStringGetNodeList()
2389 * called by the attribute() function in SAX.c
2391 static xmlChar buffer[6] = "&#38;";
2393 if (len > buf_size - 10) {
2394 growBuffer(buf);
2396 current = &buffer[0];
2397 while (*current != 0) { /* non input consuming */
2398 buf[len++] = *current++;
2401 } else {
2402 if (len > buf_size - 10) {
2403 growBuffer(buf);
2405 len += xmlCopyChar(0, &buf[len], val);
2407 } else {
2408 ent = xmlParseEntityRef(ctxt);
2409 if ((ent != NULL) &&
2410 (ctxt->replaceEntities != 0)) {
2411 xmlChar *rep;
2413 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2414 rep = xmlStringDecodeEntities(ctxt, ent->content,
2415 XML_SUBSTITUTE_REF, 0, 0, 0);
2416 if (rep != NULL) {
2417 current = rep;
2418 while (*current != 0) { /* non input consuming */
2419 buf[len++] = *current++;
2420 if (len > buf_size - 10) {
2421 growBuffer(buf);
2424 xmlFree(rep);
2426 } else {
2427 if (len > buf_size - 10) {
2428 growBuffer(buf);
2430 if (ent->content != NULL)
2431 buf[len++] = ent->content[0];
2433 } else if (ent != NULL) {
2434 int i = xmlStrlen(ent->name);
2435 const xmlChar *cur = ent->name;
2438 * This may look absurd but is needed to detect
2439 * entities problems
2441 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2442 (ent->content != NULL)) {
2443 xmlChar *rep;
2444 rep = xmlStringDecodeEntities(ctxt, ent->content,
2445 XML_SUBSTITUTE_REF, 0, 0, 0);
2446 if (rep != NULL)
2447 xmlFree(rep);
2451 * Just output the reference
2453 buf[len++] = '&';
2454 if (len > buf_size - i - 10) {
2455 growBuffer(buf);
2457 for (;i > 0;i--)
2458 buf[len++] = *cur++;
2459 buf[len++] = ';';
2462 } else {
2463 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2464 COPY_BUF(l,buf,len,0x20);
2465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2468 } else {
2469 COPY_BUF(l,buf,len,c);
2470 if (len > buf_size - 10) {
2471 growBuffer(buf);
2474 NEXTL(l);
2476 GROW;
2477 c = CUR_CHAR(l);
2479 buf[len++] = 0;
2480 if (RAW == '<') {
2481 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2483 ctxt->sax->error(ctxt->userData,
2484 "Unescaped '<' not allowed in attributes values\n");
2485 ctxt->wellFormed = 0;
2486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2487 } else if (RAW != limit) {
2488 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2490 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2491 ctxt->wellFormed = 0;
2492 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2493 } else
2494 NEXT;
2495 return(buf);
2499 * xmlParseSystemLiteral:
2500 * @ctxt: an XML parser context
2502 * parse an XML Literal
2504 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2506 * Returns the SystemLiteral parsed or NULL
2509 xmlChar *
2510 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2511 xmlChar *buf = NULL;
2512 int len = 0;
2513 int size = XML_PARSER_BUFFER_SIZE;
2514 int cur, l;
2515 xmlChar stop;
2516 int state = ctxt->instate;
2517 int count = 0;
2519 SHRINK;
2520 if (RAW == '"') {
2521 NEXT;
2522 stop = '"';
2523 } else if (RAW == '\'') {
2524 NEXT;
2525 stop = '\'';
2526 } else {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData,
2530 "SystemLiteral \" or ' expected\n");
2531 ctxt->wellFormed = 0;
2532 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2533 return(NULL);
2536 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2537 if (buf == NULL) {
2538 xmlGenericError(xmlGenericErrorContext,
2539 "malloc of %d byte failed\n", size);
2540 return(NULL);
2542 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2543 cur = CUR_CHAR(l);
2544 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2545 if (len + 5 >= size) {
2546 size *= 2;
2547 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2548 if (buf == NULL) {
2549 xmlGenericError(xmlGenericErrorContext,
2550 "realloc of %d byte failed\n", size);
2551 ctxt->instate = (xmlParserInputState) state;
2552 return(NULL);
2555 count++;
2556 if (count > 50) {
2557 GROW;
2558 count = 0;
2560 COPY_BUF(l,buf,len,cur);
2561 NEXTL(l);
2562 cur = CUR_CHAR(l);
2563 if (cur == 0) {
2564 GROW;
2565 SHRINK;
2566 cur = CUR_CHAR(l);
2569 buf[len] = 0;
2570 ctxt->instate = (xmlParserInputState) state;
2571 if (!IS_CHAR(cur)) {
2572 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2574 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2575 ctxt->wellFormed = 0;
2576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2577 } else {
2578 NEXT;
2580 return(buf);
2584 * xmlParsePubidLiteral:
2585 * @ctxt: an XML parser context
2587 * parse an XML public literal
2589 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2591 * Returns the PubidLiteral parsed or NULL.
2594 xmlChar *
2595 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2596 xmlChar *buf = NULL;
2597 int len = 0;
2598 int size = XML_PARSER_BUFFER_SIZE;
2599 xmlChar cur;
2600 xmlChar stop;
2601 int count = 0;
2602 xmlParserInputState oldstate = ctxt->instate;
2604 SHRINK;
2605 if (RAW == '"') {
2606 NEXT;
2607 stop = '"';
2608 } else if (RAW == '\'') {
2609 NEXT;
2610 stop = '\'';
2611 } else {
2612 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2614 ctxt->sax->error(ctxt->userData,
2615 "SystemLiteral \" or ' expected\n");
2616 ctxt->wellFormed = 0;
2617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2618 return(NULL);
2620 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2621 if (buf == NULL) {
2622 xmlGenericError(xmlGenericErrorContext,
2623 "malloc of %d byte failed\n", size);
2624 return(NULL);
2626 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
2627 cur = CUR;
2628 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2629 if (len + 1 >= size) {
2630 size *= 2;
2631 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "realloc of %d byte failed\n", size);
2635 return(NULL);
2638 buf[len++] = cur;
2639 count++;
2640 if (count > 50) {
2641 GROW;
2642 count = 0;
2644 NEXT;
2645 cur = CUR;
2646 if (cur == 0) {
2647 GROW;
2648 SHRINK;
2649 cur = CUR;
2652 buf[len] = 0;
2653 if (cur != stop) {
2654 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2656 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2657 ctxt->wellFormed = 0;
2658 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2659 } else {
2660 NEXT;
2662 ctxt->instate = oldstate;
2663 return(buf);
2666 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
2668 * xmlParseCharData:
2669 * @ctxt: an XML parser context
2670 * @cdata: int indicating whether we are within a CDATA section
2672 * parse a CharData section.
2673 * if we are within a CDATA section ']]>' marks an end of section.
2675 * The right angle bracket (>) may be represented using the string "&gt;",
2676 * and must, for compatibility, be escaped using "&gt;" or a character
2677 * reference when it appears in the string "]]>" in content, when that
2678 * string is not marking the end of a CDATA section.
2680 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2683 void
2684 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
2685 const xmlChar *in;
2686 int nbchar = 0;
2687 int line = ctxt->input->line;
2688 int col = ctxt->input->col;
2690 SHRINK;
2691 GROW;
2693 * Accelerated common case where input don't need to be
2694 * modified before passing it to the handler.
2696 if (!cdata) {
2697 in = ctxt->input->cur;
2698 do {
2699 get_more:
2700 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2701 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2702 in++;
2703 if (*in == 0xA) {
2704 ctxt->input->line++;
2705 in++;
2706 while (*in == 0xA) {
2707 ctxt->input->line++;
2708 in++;
2710 goto get_more;
2712 if (*in == ']') {
2713 if ((in[1] == ']') && (in[2] == '>')) {
2714 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Sequence ']]>' not allowed in content\n");
2718 ctxt->input->cur = in;
2719 ctxt->wellFormed = 0;
2720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2721 return;
2723 in++;
2724 goto get_more;
2726 nbchar = in - ctxt->input->cur;
2727 if (nbchar > 0) {
2728 if (IS_BLANK(*ctxt->input->cur)) {
2729 const xmlChar *tmp = ctxt->input->cur;
2730 ctxt->input->cur = in;
2731 if (areBlanks(ctxt, tmp, nbchar)) {
2732 if (ctxt->sax->ignorableWhitespace != NULL)
2733 ctxt->sax->ignorableWhitespace(ctxt->userData,
2734 tmp, nbchar);
2735 } else {
2736 if (ctxt->sax->characters != NULL)
2737 ctxt->sax->characters(ctxt->userData,
2738 tmp, nbchar);
2740 line = ctxt->input->line;
2741 col = ctxt->input->col;
2742 } else {
2743 if (ctxt->sax->characters != NULL)
2744 ctxt->sax->characters(ctxt->userData,
2745 ctxt->input->cur, nbchar);
2746 line = ctxt->input->line;
2747 col = ctxt->input->col;
2750 ctxt->input->cur = in;
2751 if (*in == 0xD) {
2752 in++;
2753 if (*in == 0xA) {
2754 ctxt->input->cur = in;
2755 in++;
2756 ctxt->input->line++;
2757 continue; /* while */
2759 in--;
2761 if (*in == '<') {
2762 return;
2764 if (*in == '&') {
2765 return;
2767 SHRINK;
2768 GROW;
2769 in = ctxt->input->cur;
2770 } while ((*in >= 0x20) && (*in <= 0x7F));
2771 nbchar = 0;
2773 ctxt->input->line = line;
2774 ctxt->input->col = col;
2775 xmlParseCharDataComplex(ctxt, cdata);
2778 void
2779 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
2780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2781 int nbchar = 0;
2782 int cur, l;
2783 int count = 0;
2785 SHRINK;
2786 GROW;
2787 cur = CUR_CHAR(l);
2788 while ((cur != '<') && /* checked */
2789 (cur != '&') &&
2790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2791 if ((cur == ']') && (NXT(1) == ']') &&
2792 (NXT(2) == '>')) {
2793 if (cdata) break;
2794 else {
2795 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2797 ctxt->sax->error(ctxt->userData,
2798 "Sequence ']]>' not allowed in content\n");
2799 /* Should this be relaxed ??? I see a "must here */
2800 ctxt->wellFormed = 0;
2801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2804 COPY_BUF(l,buf,nbchar,cur);
2805 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2807 * OK the segment is to be consumed as chars.
2809 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2810 if (areBlanks(ctxt, buf, nbchar)) {
2811 if (ctxt->sax->ignorableWhitespace != NULL)
2812 ctxt->sax->ignorableWhitespace(ctxt->userData,
2813 buf, nbchar);
2814 } else {
2815 if (ctxt->sax->characters != NULL)
2816 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2819 nbchar = 0;
2821 count++;
2822 if (count > 50) {
2823 GROW;
2824 count = 0;
2826 NEXTL(l);
2827 cur = CUR_CHAR(l);
2829 if (nbchar != 0) {
2831 * OK the segment is to be consumed as chars.
2833 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2834 if (areBlanks(ctxt, buf, nbchar)) {
2835 if (ctxt->sax->ignorableWhitespace != NULL)
2836 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2846 * xmlParseExternalID:
2847 * @ctxt: an XML parser context
2848 * @publicID: a xmlChar** receiving PubidLiteral
2849 * @strict: indicate whether we should restrict parsing to only
2850 * production [75], see NOTE below
2852 * Parse an External ID or a Public ID
2854 * NOTE: Productions [75] and [83] interact badly since [75] can generate
2855 * 'PUBLIC' S PubidLiteral S SystemLiteral
2857 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2858 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2860 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2862 * Returns the function returns SystemLiteral and in the second
2863 * case publicID receives PubidLiteral, is strict is off
2864 * it is possible to return NULL and have publicID set.
2867 xmlChar *
2868 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2869 xmlChar *URI = NULL;
2871 SHRINK;
2873 *publicID = NULL;
2874 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2875 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2876 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2877 SKIP(6);
2878 if (!IS_BLANK(CUR)) {
2879 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Space required after 'SYSTEM'\n");
2883 ctxt->wellFormed = 0;
2884 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2886 SKIP_BLANKS;
2887 URI = xmlParseSystemLiteral(ctxt);
2888 if (URI == NULL) {
2889 ctxt->errNo = XML_ERR_URI_REQUIRED;
2890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891 ctxt->sax->error(ctxt->userData,
2892 "xmlParseExternalID: SYSTEM, no URI\n");
2893 ctxt->wellFormed = 0;
2894 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2896 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2897 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2898 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2899 SKIP(6);
2900 if (!IS_BLANK(CUR)) {
2901 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2903 ctxt->sax->error(ctxt->userData,
2904 "Space required after 'PUBLIC'\n");
2905 ctxt->wellFormed = 0;
2906 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2908 SKIP_BLANKS;
2909 *publicID = xmlParsePubidLiteral(ctxt);
2910 if (*publicID == NULL) {
2911 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913 ctxt->sax->error(ctxt->userData,
2914 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2915 ctxt->wellFormed = 0;
2916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2918 if (strict) {
2920 * We don't handle [83] so "S SystemLiteral" is required.
2922 if (!IS_BLANK(CUR)) {
2923 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt->userData,
2926 "Space required after the Public Identifier\n");
2927 ctxt->wellFormed = 0;
2928 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2930 } else {
2932 * We handle [83] so we return immediately, if
2933 * "S SystemLiteral" is not detected. From a purely parsing
2934 * point of view that's a nice mess.
2936 const xmlChar *ptr;
2937 GROW;
2939 ptr = CUR_PTR;
2940 if (!IS_BLANK(*ptr)) return(NULL);
2942 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2943 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2945 SKIP_BLANKS;
2946 URI = xmlParseSystemLiteral(ctxt);
2947 if (URI == NULL) {
2948 ctxt->errNo = XML_ERR_URI_REQUIRED;
2949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2950 ctxt->sax->error(ctxt->userData,
2951 "xmlParseExternalID: PUBLIC, no URI\n");
2952 ctxt->wellFormed = 0;
2953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2956 return(URI);
2960 * xmlParseComment:
2961 * @ctxt: an XML parser context
2963 * Skip an XML (SGML) comment <!-- .... -->
2964 * The spec says that "For compatibility, the string "--" (double-hyphen)
2965 * must not occur within comments. "
2967 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2969 void
2970 xmlParseComment(xmlParserCtxtPtr ctxt) {
2971 xmlChar *buf = NULL;
2972 int len;
2973 int size = XML_PARSER_BUFFER_SIZE;
2974 int q, ql;
2975 int r, rl;
2976 int cur, l;
2977 xmlParserInputState state;
2978 xmlParserInputPtr input = ctxt->input;
2979 int count = 0;
2982 * Check that there is a comment right here.
2984 if ((RAW != '<') || (NXT(1) != '!') ||
2985 (NXT(2) != '-') || (NXT(3) != '-')) return;
2987 state = ctxt->instate;
2988 ctxt->instate = XML_PARSER_COMMENT;
2989 SHRINK;
2990 SKIP(4);
2991 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2992 if (buf == NULL) {
2993 xmlGenericError(xmlGenericErrorContext,
2994 "malloc of %d byte failed\n", size);
2995 ctxt->instate = state;
2996 return;
2998 q = CUR_CHAR(ql);
2999 NEXTL(ql);
3000 r = CUR_CHAR(rl);
3001 NEXTL(rl);
3002 cur = CUR_CHAR(l);
3003 len = 0;
3004 while (IS_CHAR(cur) && /* checked */
3005 ((cur != '>') ||
3006 (r != '-') || (q != '-'))) {
3007 if ((r == '-') && (q == '-')) {
3008 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "Comment must not contain '--' (double-hyphen)`\n");
3012 ctxt->wellFormed = 0;
3013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3015 if (len + 5 >= size) {
3016 size *= 2;
3017 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3018 if (buf == NULL) {
3019 xmlGenericError(xmlGenericErrorContext,
3020 "realloc of %d byte failed\n", size);
3021 ctxt->instate = state;
3022 return;
3025 COPY_BUF(ql,buf,len,q);
3026 q = r;
3027 ql = rl;
3028 r = cur;
3029 rl = l;
3031 count++;
3032 if (count > 50) {
3033 GROW;
3034 count = 0;
3036 NEXTL(l);
3037 cur = CUR_CHAR(l);
3038 if (cur == 0) {
3039 SHRINK;
3040 GROW;
3041 cur = CUR_CHAR(l);
3044 buf[len] = 0;
3045 if (!IS_CHAR(cur)) {
3046 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3048 ctxt->sax->error(ctxt->userData,
3049 "Comment not terminated \n<!--%.50s\n", buf);
3050 ctxt->wellFormed = 0;
3051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3052 xmlFree(buf);
3053 } else {
3054 if (input != ctxt->input) {
3055 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057 ctxt->sax->error(ctxt->userData,
3058 "Comment doesn't start and stop in the same entity\n");
3059 ctxt->wellFormed = 0;
3060 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3062 NEXT;
3063 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3064 (!ctxt->disableSAX))
3065 ctxt->sax->comment(ctxt->userData, buf);
3066 xmlFree(buf);
3068 ctxt->instate = state;
3072 * xmlParsePITarget:
3073 * @ctxt: an XML parser context
3075 * parse the name of a PI
3077 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3079 * Returns the PITarget name or NULL
3082 xmlChar *
3083 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3084 xmlChar *name;
3086 name = xmlParseName(ctxt);
3087 if ((name != NULL) &&
3088 ((name[0] == 'x') || (name[0] == 'X')) &&
3089 ((name[1] == 'm') || (name[1] == 'M')) &&
3090 ((name[2] == 'l') || (name[2] == 'L'))) {
3091 int i;
3092 if ((name[0] == 'x') && (name[1] == 'm') &&
3093 (name[2] == 'l') && (name[3] == 0)) {
3094 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3096 ctxt->sax->error(ctxt->userData,
3097 "XML declaration allowed only at the start of the document\n");
3098 ctxt->wellFormed = 0;
3099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3100 return(name);
3101 } else if (name[3] == 0) {
3102 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3104 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3105 ctxt->wellFormed = 0;
3106 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3107 return(name);
3109 for (i = 0;;i++) {
3110 if (xmlW3CPIs[i] == NULL) break;
3111 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3112 return(name);
3114 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3115 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3116 ctxt->sax->warning(ctxt->userData,
3117 "xmlParsePITarget: invalid name prefix 'xml'\n");
3120 return(name);
3123 #ifdef LIBXML_CATALOG_ENABLED
3125 * xmlParseCatalogPI:
3126 * @ctxt: an XML parser context
3127 * @catalog: the PI value string
3129 * parse an XML Catalog Processing Instruction.
3131 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3133 * Occurs only if allowed by the user and if happening in the Misc
3134 * part of the document before any doctype informations
3135 * This will add the given catalog to the parsing context in order
3136 * to be used if there is a resolution need further down in the document
3139 static void
3140 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3141 xmlChar *URL = NULL;
3142 const xmlChar *tmp, *base;
3143 xmlChar marker;
3145 tmp = catalog;
3146 while (IS_BLANK(*tmp)) tmp++;
3147 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3148 goto error;
3149 tmp += 7;
3150 while (IS_BLANK(*tmp)) tmp++;
3151 if (*tmp != '=') {
3152 return;
3154 tmp++;
3155 while (IS_BLANK(*tmp)) tmp++;
3156 marker = *tmp;
3157 if ((marker != '\'') && (marker != '"'))
3158 goto error;
3159 tmp++;
3160 base = tmp;
3161 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3162 if (*tmp == 0)
3163 goto error;
3164 URL = xmlStrndup(base, tmp - base);
3165 tmp++;
3166 while (IS_BLANK(*tmp)) tmp++;
3167 if (*tmp != 0)
3168 goto error;
3170 if (URL != NULL) {
3171 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3172 xmlFree(URL);
3174 return;
3176 error:
3177 ctxt->errNo = XML_WAR_CATALOG_PI;
3178 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3179 ctxt->sax->warning(ctxt->userData,
3180 "Catalog PI syntax error: %s\n", catalog);
3181 if (URL != NULL)
3182 xmlFree(URL);
3184 #endif
3187 * xmlParsePI:
3188 * @ctxt: an XML parser context
3190 * parse an XML Processing Instruction.
3192 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3194 * The processing is transfered to SAX once parsed.
3197 void
3198 xmlParsePI(xmlParserCtxtPtr ctxt) {
3199 xmlChar *buf = NULL;
3200 int len = 0;
3201 int size = XML_PARSER_BUFFER_SIZE;
3202 int cur, l;
3203 xmlChar *target;
3204 xmlParserInputState state;
3205 int count = 0;
3207 if ((RAW == '<') && (NXT(1) == '?')) {
3208 xmlParserInputPtr input = ctxt->input;
3209 state = ctxt->instate;
3210 ctxt->instate = XML_PARSER_PI;
3212 * this is a Processing Instruction.
3214 SKIP(2);
3215 SHRINK;
3218 * Parse the target name and check for special support like
3219 * namespace.
3221 target = xmlParsePITarget(ctxt);
3222 if (target != NULL) {
3223 if ((RAW == '?') && (NXT(1) == '>')) {
3224 if (input != ctxt->input) {
3225 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3227 ctxt->sax->error(ctxt->userData,
3228 "PI declaration doesn't start and stop in the same entity\n");
3229 ctxt->wellFormed = 0;
3230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3232 SKIP(2);
3235 * SAX: PI detected.
3237 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3238 (ctxt->sax->processingInstruction != NULL))
3239 ctxt->sax->processingInstruction(ctxt->userData,
3240 target, NULL);
3241 ctxt->instate = state;
3242 xmlFree(target);
3243 return;
3245 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3246 if (buf == NULL) {
3247 xmlGenericError(xmlGenericErrorContext,
3248 "malloc of %d byte failed\n", size);
3249 ctxt->instate = state;
3250 return;
3252 cur = CUR;
3253 if (!IS_BLANK(cur)) {
3254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "xmlParsePI: PI %s space expected\n", target);
3258 ctxt->wellFormed = 0;
3259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3261 SKIP_BLANKS;
3262 cur = CUR_CHAR(l);
3263 while (IS_CHAR(cur) && /* checked */
3264 ((cur != '?') || (NXT(1) != '>'))) {
3265 if (len + 5 >= size) {
3266 size *= 2;
3267 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3268 if (buf == NULL) {
3269 xmlGenericError(xmlGenericErrorContext,
3270 "realloc of %d byte failed\n", size);
3271 ctxt->instate = state;
3272 return;
3275 count++;
3276 if (count > 50) {
3277 GROW;
3278 count = 0;
3280 COPY_BUF(l,buf,len,cur);
3281 NEXTL(l);
3282 cur = CUR_CHAR(l);
3283 if (cur == 0) {
3284 SHRINK;
3285 GROW;
3286 cur = CUR_CHAR(l);
3289 buf[len] = 0;
3290 if (cur != '?') {
3291 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3293 ctxt->sax->error(ctxt->userData,
3294 "xmlParsePI: PI %s never end ...\n", target);
3295 ctxt->wellFormed = 0;
3296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3297 } else {
3298 if (input != ctxt->input) {
3299 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "PI declaration doesn't start and stop in the same entity\n");
3303 ctxt->wellFormed = 0;
3304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3306 SKIP(2);
3308 #ifdef LIBXML_CATALOG_ENABLED
3309 if (((state == XML_PARSER_MISC) ||
3310 (state == XML_PARSER_START)) &&
3311 (xmlStrEqual(target, XML_CATALOG_PI))) {
3312 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3313 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3314 (allow == XML_CATA_ALLOW_ALL))
3315 xmlParseCatalogPI(ctxt, buf);
3317 #endif
3321 * SAX: PI detected.
3323 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3324 (ctxt->sax->processingInstruction != NULL))
3325 ctxt->sax->processingInstruction(ctxt->userData,
3326 target, buf);
3328 xmlFree(buf);
3329 xmlFree(target);
3330 } else {
3331 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData,
3334 "xmlParsePI : no target name\n");
3335 ctxt->wellFormed = 0;
3336 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3338 ctxt->instate = state;
3343 * xmlParseNotationDecl:
3344 * @ctxt: an XML parser context
3346 * parse a notation declaration
3348 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3350 * Hence there is actually 3 choices:
3351 * 'PUBLIC' S PubidLiteral
3352 * 'PUBLIC' S PubidLiteral S SystemLiteral
3353 * and 'SYSTEM' S SystemLiteral
3355 * See the NOTE on xmlParseExternalID().
3358 void
3359 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3360 xmlChar *name;
3361 xmlChar *Pubid;
3362 xmlChar *Systemid;
3364 if ((RAW == '<') && (NXT(1) == '!') &&
3365 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3366 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3367 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3368 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3369 xmlParserInputPtr input = ctxt->input;
3370 SHRINK;
3371 SKIP(10);
3372 if (!IS_BLANK(CUR)) {
3373 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "Space required after '<!NOTATION'\n");
3377 ctxt->wellFormed = 0;
3378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3379 return;
3381 SKIP_BLANKS;
3383 name = xmlParseName(ctxt);
3384 if (name == NULL) {
3385 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3387 ctxt->sax->error(ctxt->userData,
3388 "NOTATION: Name expected here\n");
3389 ctxt->wellFormed = 0;
3390 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3391 return;
3393 if (!IS_BLANK(CUR)) {
3394 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3396 ctxt->sax->error(ctxt->userData,
3397 "Space required after the NOTATION name'\n");
3398 ctxt->wellFormed = 0;
3399 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3400 return;
3402 SKIP_BLANKS;
3405 * Parse the IDs.
3407 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3408 SKIP_BLANKS;
3410 if (RAW == '>') {
3411 if (input != ctxt->input) {
3412 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415 "Notation declaration doesn't start and stop in the same entity\n");
3416 ctxt->wellFormed = 0;
3417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3419 NEXT;
3420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3421 (ctxt->sax->notationDecl != NULL))
3422 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3423 } else {
3424 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3426 ctxt->sax->error(ctxt->userData,
3427 "'>' required to close NOTATION declaration\n");
3428 ctxt->wellFormed = 0;
3429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3431 xmlFree(name);
3432 if (Systemid != NULL) xmlFree(Systemid);
3433 if (Pubid != NULL) xmlFree(Pubid);
3438 * xmlParseEntityDecl:
3439 * @ctxt: an XML parser context
3441 * parse <!ENTITY declarations
3443 * [70] EntityDecl ::= GEDecl | PEDecl
3445 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3447 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3449 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3451 * [74] PEDef ::= EntityValue | ExternalID
3453 * [76] NDataDecl ::= S 'NDATA' S Name
3455 * [ VC: Notation Declared ]
3456 * The Name must match the declared name of a notation.
3459 void
3460 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3461 xmlChar *name = NULL;
3462 xmlChar *value = NULL;
3463 xmlChar *URI = NULL, *literal = NULL;
3464 xmlChar *ndata = NULL;
3465 int isParameter = 0;
3466 xmlChar *orig = NULL;
3467 int skipped;
3469 GROW;
3470 if ((RAW == '<') && (NXT(1) == '!') &&
3471 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3472 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3473 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3474 xmlParserInputPtr input = ctxt->input;
3475 SHRINK;
3476 SKIP(8);
3477 skipped = SKIP_BLANKS;
3478 if (skipped == 0) {
3479 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Space required after '<!ENTITY'\n");
3483 ctxt->wellFormed = 0;
3484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3487 if (RAW == '%') {
3488 NEXT;
3489 skipped = SKIP_BLANKS;
3490 if (skipped == 0) {
3491 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3493 ctxt->sax->error(ctxt->userData,
3494 "Space required after '%'\n");
3495 ctxt->wellFormed = 0;
3496 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3498 isParameter = 1;
3501 name = xmlParseName(ctxt);
3502 if (name == NULL) {
3503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3506 ctxt->wellFormed = 0;
3507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3508 return;
3510 skipped = SKIP_BLANKS;
3511 if (skipped == 0) {
3512 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3514 ctxt->sax->error(ctxt->userData,
3515 "Space required after the entity name\n");
3516 ctxt->wellFormed = 0;
3517 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3520 ctxt->instate = XML_PARSER_ENTITY_DECL;
3522 * handle the various case of definitions...
3524 if (isParameter) {
3525 if ((RAW == '"') || (RAW == '\'')) {
3526 value = xmlParseEntityValue(ctxt, &orig);
3527 if (value) {
3528 if ((ctxt->sax != NULL) &&
3529 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3530 ctxt->sax->entityDecl(ctxt->userData, name,
3531 XML_INTERNAL_PARAMETER_ENTITY,
3532 NULL, NULL, value);
3534 } else {
3535 URI = xmlParseExternalID(ctxt, &literal, 1);
3536 if ((URI == NULL) && (literal == NULL)) {
3537 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Entity value required\n");
3541 ctxt->wellFormed = 0;
3542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3544 if (URI) {
3545 xmlURIPtr uri;
3547 uri = xmlParseURI((const char *) URI);
3548 if (uri == NULL) {
3549 ctxt->errNo = XML_ERR_INVALID_URI;
3550 if ((ctxt->sax != NULL) &&
3551 (!ctxt->disableSAX) &&
3552 (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554 "Invalid URI: %s\n", URI);
3556 * This really ought to be a well formedness error
3557 * but the XML Core WG decided otherwise c.f. issue
3558 * E26 of the XML erratas.
3560 } else {
3561 if (uri->fragment != NULL) {
3562 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3563 if ((ctxt->sax != NULL) &&
3564 (!ctxt->disableSAX) &&
3565 (ctxt->sax->error != NULL))
3566 ctxt->sax->error(ctxt->userData,
3567 "Fragment not allowed: %s\n", URI);
3569 * Okay this is foolish to block those but not
3570 * invalid URIs.
3572 ctxt->wellFormed = 0;
3573 } else {
3574 if ((ctxt->sax != NULL) &&
3575 (!ctxt->disableSAX) &&
3576 (ctxt->sax->entityDecl != NULL))
3577 ctxt->sax->entityDecl(ctxt->userData, name,
3578 XML_EXTERNAL_PARAMETER_ENTITY,
3579 literal, URI, NULL);
3581 xmlFreeURI(uri);
3585 } else {
3586 if ((RAW == '"') || (RAW == '\'')) {
3587 value = xmlParseEntityValue(ctxt, &orig);
3588 if ((ctxt->sax != NULL) &&
3589 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3590 ctxt->sax->entityDecl(ctxt->userData, name,
3591 XML_INTERNAL_GENERAL_ENTITY,
3592 NULL, NULL, value);
3594 * For expat compatibility in SAX mode.
3596 if ((ctxt->myDoc == NULL) ||
3597 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3598 if (ctxt->myDoc == NULL) {
3599 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3601 if (ctxt->myDoc->intSubset == NULL)
3602 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3603 BAD_CAST "fake", NULL, NULL);
3605 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3606 NULL, NULL, value);
3608 } else {
3609 URI = xmlParseExternalID(ctxt, &literal, 1);
3610 if ((URI == NULL) && (literal == NULL)) {
3611 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3613 ctxt->sax->error(ctxt->userData,
3614 "Entity value required\n");
3615 ctxt->wellFormed = 0;
3616 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3618 if (URI) {
3619 xmlURIPtr uri;
3621 uri = xmlParseURI((const char *)URI);
3622 if (uri == NULL) {
3623 ctxt->errNo = XML_ERR_INVALID_URI;
3624 if ((ctxt->sax != NULL) &&
3625 (!ctxt->disableSAX) &&
3626 (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "Invalid URI: %s\n", URI);
3630 * This really ought to be a well formedness error
3631 * but the XML Core WG decided otherwise c.f. issue
3632 * E26 of the XML erratas.
3634 } else {
3635 if (uri->fragment != NULL) {
3636 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3637 if ((ctxt->sax != NULL) &&
3638 (!ctxt->disableSAX) &&
3639 (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "Fragment not allowed: %s\n", URI);
3643 * Okay this is foolish to block those but not
3644 * invalid URIs.
3646 ctxt->wellFormed = 0;
3648 xmlFreeURI(uri);
3651 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3652 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt->userData,
3655 "Space required before 'NDATA'\n");
3656 ctxt->wellFormed = 0;
3657 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3659 SKIP_BLANKS;
3660 if ((RAW == 'N') && (NXT(1) == 'D') &&
3661 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3662 (NXT(4) == 'A')) {
3663 SKIP(5);
3664 if (!IS_BLANK(CUR)) {
3665 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3667 ctxt->sax->error(ctxt->userData,
3668 "Space required after 'NDATA'\n");
3669 ctxt->wellFormed = 0;
3670 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3672 SKIP_BLANKS;
3673 ndata = xmlParseName(ctxt);
3674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3675 (ctxt->sax->unparsedEntityDecl != NULL))
3676 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3677 literal, URI, ndata);
3678 } else {
3679 if ((ctxt->sax != NULL) &&
3680 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3683 literal, URI, NULL);
3685 * For expat compatibility in SAX mode.
3686 * assuming the entity repalcement was asked for
3688 if ((ctxt->replaceEntities != 0) &&
3689 ((ctxt->myDoc == NULL) ||
3690 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3691 if (ctxt->myDoc == NULL) {
3692 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3695 if (ctxt->myDoc->intSubset == NULL)
3696 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3697 BAD_CAST "fake", NULL, NULL);
3698 entityDecl(ctxt, name,
3699 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3700 literal, URI, NULL);
3705 SKIP_BLANKS;
3706 if (RAW != '>') {
3707 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "xmlParseEntityDecl: entity %s not terminated\n", name);
3711 ctxt->wellFormed = 0;
3712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3713 } else {
3714 if (input != ctxt->input) {
3715 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Entity declaration doesn't start and stop in the same entity\n");
3719 ctxt->wellFormed = 0;
3720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3722 NEXT;
3724 if (orig != NULL) {
3726 * Ugly mechanism to save the raw entity value.
3728 xmlEntityPtr cur = NULL;
3730 if (isParameter) {
3731 if ((ctxt->sax != NULL) &&
3732 (ctxt->sax->getParameterEntity != NULL))
3733 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3734 } else {
3735 if ((ctxt->sax != NULL) &&
3736 (ctxt->sax->getEntity != NULL))
3737 cur = ctxt->sax->getEntity(ctxt->userData, name);
3738 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3739 cur = getEntity(ctxt, name);
3742 if (cur != NULL) {
3743 if (cur->orig != NULL)
3744 xmlFree(orig);
3745 else
3746 cur->orig = orig;
3747 } else
3748 xmlFree(orig);
3750 if (name != NULL) xmlFree(name);
3751 if (value != NULL) xmlFree(value);
3752 if (URI != NULL) xmlFree(URI);
3753 if (literal != NULL) xmlFree(literal);
3754 if (ndata != NULL) xmlFree(ndata);
3759 * xmlParseDefaultDecl:
3760 * @ctxt: an XML parser context
3761 * @value: Receive a possible fixed default value for the attribute
3763 * Parse an attribute default declaration
3765 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3767 * [ VC: Required Attribute ]
3768 * if the default declaration is the keyword #REQUIRED, then the
3769 * attribute must be specified for all elements of the type in the
3770 * attribute-list declaration.
3772 * [ VC: Attribute Default Legal ]
3773 * The declared default value must meet the lexical constraints of
3774 * the declared attribute type c.f. xmlValidateAttributeDecl()
3776 * [ VC: Fixed Attribute Default ]
3777 * if an attribute has a default value declared with the #FIXED
3778 * keyword, instances of that attribute must match the default value.
3780 * [ WFC: No < in Attribute Values ]
3781 * handled in xmlParseAttValue()
3783 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3784 * or XML_ATTRIBUTE_FIXED.
3788 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3789 int val;
3790 xmlChar *ret;
3792 *value = NULL;
3793 if ((RAW == '#') && (NXT(1) == 'R') &&
3794 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3795 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3796 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3797 (NXT(8) == 'D')) {
3798 SKIP(9);
3799 return(XML_ATTRIBUTE_REQUIRED);
3801 if ((RAW == '#') && (NXT(1) == 'I') &&
3802 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3803 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3804 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3805 SKIP(8);
3806 return(XML_ATTRIBUTE_IMPLIED);
3808 val = XML_ATTRIBUTE_NONE;
3809 if ((RAW == '#') && (NXT(1) == 'F') &&
3810 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3811 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3812 SKIP(6);
3813 val = XML_ATTRIBUTE_FIXED;
3814 if (!IS_BLANK(CUR)) {
3815 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3817 ctxt->sax->error(ctxt->userData,
3818 "Space required after '#FIXED'\n");
3819 ctxt->wellFormed = 0;
3820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3822 SKIP_BLANKS;
3824 ret = xmlParseAttValue(ctxt);
3825 ctxt->instate = XML_PARSER_DTD;
3826 if (ret == NULL) {
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "Attribute default value declaration error\n");
3830 ctxt->wellFormed = 0;
3831 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3832 } else
3833 *value = ret;
3834 return(val);
3838 * xmlParseNotationType:
3839 * @ctxt: an XML parser context
3841 * parse an Notation attribute type.
3843 * Note: the leading 'NOTATION' S part has already being parsed...
3845 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3847 * [ VC: Notation Attributes ]
3848 * Values of this type must match one of the notation names included
3849 * in the declaration; all notation names in the declaration must be declared.
3851 * Returns: the notation attribute tree built while parsing
3854 xmlEnumerationPtr
3855 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3856 xmlChar *name;
3857 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3859 if (RAW != '(') {
3860 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863 "'(' required to start 'NOTATION'\n");
3864 ctxt->wellFormed = 0;
3865 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3866 return(NULL);
3868 SHRINK;
3869 do {
3870 NEXT;
3871 SKIP_BLANKS;
3872 name = xmlParseName(ctxt);
3873 if (name == NULL) {
3874 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3876 ctxt->sax->error(ctxt->userData,
3877 "Name expected in NOTATION declaration\n");
3878 ctxt->wellFormed = 0;
3879 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3880 return(ret);
3882 cur = xmlCreateEnumeration(name);
3883 xmlFree(name);
3884 if (cur == NULL) return(ret);
3885 if (last == NULL) ret = last = cur;
3886 else {
3887 last->next = cur;
3888 last = cur;
3890 SKIP_BLANKS;
3891 } while (RAW == '|');
3892 if (RAW != ')') {
3893 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3895 ctxt->sax->error(ctxt->userData,
3896 "')' required to finish NOTATION declaration\n");
3897 ctxt->wellFormed = 0;
3898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3899 if ((last != NULL) && (last != ret))
3900 xmlFreeEnumeration(last);
3901 return(ret);
3903 NEXT;
3904 return(ret);
3908 * xmlParseEnumerationType:
3909 * @ctxt: an XML parser context
3911 * parse an Enumeration attribute type.
3913 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3915 * [ VC: Enumeration ]
3916 * Values of this type must match one of the Nmtoken tokens in
3917 * the declaration
3919 * Returns: the enumeration attribute tree built while parsing
3922 xmlEnumerationPtr
3923 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3924 xmlChar *name;
3925 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3927 if (RAW != '(') {
3928 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3930 ctxt->sax->error(ctxt->userData,
3931 "'(' required to start ATTLIST enumeration\n");
3932 ctxt->wellFormed = 0;
3933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3934 return(NULL);
3936 SHRINK;
3937 do {
3938 NEXT;
3939 SKIP_BLANKS;
3940 name = xmlParseNmtoken(ctxt);
3941 if (name == NULL) {
3942 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3944 ctxt->sax->error(ctxt->userData,
3945 "NmToken expected in ATTLIST enumeration\n");
3946 ctxt->wellFormed = 0;
3947 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3948 return(ret);
3950 cur = xmlCreateEnumeration(name);
3951 xmlFree(name);
3952 if (cur == NULL) return(ret);
3953 if (last == NULL) ret = last = cur;
3954 else {
3955 last->next = cur;
3956 last = cur;
3958 SKIP_BLANKS;
3959 } while (RAW == '|');
3960 if (RAW != ')') {
3961 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "')' required to finish ATTLIST enumeration\n");
3965 ctxt->wellFormed = 0;
3966 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3967 return(ret);
3969 NEXT;
3970 return(ret);
3974 * xmlParseEnumeratedType:
3975 * @ctxt: an XML parser context
3976 * @tree: the enumeration tree built while parsing
3978 * parse an Enumerated attribute type.
3980 * [57] EnumeratedType ::= NotationType | Enumeration
3982 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3985 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3989 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3990 if ((RAW == 'N') && (NXT(1) == 'O') &&
3991 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3992 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3993 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3994 SKIP(8);
3995 if (!IS_BLANK(CUR)) {
3996 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "Space required after 'NOTATION'\n");
4000 ctxt->wellFormed = 0;
4001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4002 return(0);
4004 SKIP_BLANKS;
4005 *tree = xmlParseNotationType(ctxt);
4006 if (*tree == NULL) return(0);
4007 return(XML_ATTRIBUTE_NOTATION);
4009 *tree = xmlParseEnumerationType(ctxt);
4010 if (*tree == NULL) return(0);
4011 return(XML_ATTRIBUTE_ENUMERATION);
4015 * xmlParseAttributeType:
4016 * @ctxt: an XML parser context
4017 * @tree: the enumeration tree built while parsing
4019 * parse the Attribute list def for an element
4021 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4023 * [55] StringType ::= 'CDATA'
4025 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4026 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4028 * Validity constraints for attribute values syntax are checked in
4029 * xmlValidateAttributeValue()
4031 * [ VC: ID ]
4032 * Values of type ID must match the Name production. A name must not
4033 * appear more than once in an XML document as a value of this type;
4034 * i.e., ID values must uniquely identify the elements which bear them.
4036 * [ VC: One ID per Element Type ]
4037 * No element type may have more than one ID attribute specified.
4039 * [ VC: ID Attribute Default ]
4040 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4042 * [ VC: IDREF ]
4043 * Values of type IDREF must match the Name production, and values
4044 * of type IDREFS must match Names; each IDREF Name must match the value
4045 * of an ID attribute on some element in the XML document; i.e. IDREF
4046 * values must match the value of some ID attribute.
4048 * [ VC: Entity Name ]
4049 * Values of type ENTITY must match the Name production, values
4050 * of type ENTITIES must match Names; each Entity Name must match the
4051 * name of an unparsed entity declared in the DTD.
4053 * [ VC: Name Token ]
4054 * Values of type NMTOKEN must match the Nmtoken production; values
4055 * of type NMTOKENS must match Nmtokens.
4057 * Returns the attribute type
4059 int
4060 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4061 SHRINK;
4062 if ((RAW == 'C') && (NXT(1) == 'D') &&
4063 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4064 (NXT(4) == 'A')) {
4065 SKIP(5);
4066 return(XML_ATTRIBUTE_CDATA);
4067 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4068 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4069 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4070 SKIP(6);
4071 return(XML_ATTRIBUTE_IDREFS);
4072 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4073 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4074 (NXT(4) == 'F')) {
4075 SKIP(5);
4076 return(XML_ATTRIBUTE_IDREF);
4077 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4078 SKIP(2);
4079 return(XML_ATTRIBUTE_ID);
4080 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4081 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4082 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4083 SKIP(6);
4084 return(XML_ATTRIBUTE_ENTITY);
4085 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4086 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4087 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4088 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4089 SKIP(8);
4090 return(XML_ATTRIBUTE_ENTITIES);
4091 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4092 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4093 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4094 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4095 SKIP(8);
4096 return(XML_ATTRIBUTE_NMTOKENS);
4097 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4098 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4099 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4100 (NXT(6) == 'N')) {
4101 SKIP(7);
4102 return(XML_ATTRIBUTE_NMTOKEN);
4104 return(xmlParseEnumeratedType(ctxt, tree));
4108 * xmlParseAttributeListDecl:
4109 * @ctxt: an XML parser context
4111 * : parse the Attribute list def for an element
4113 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4115 * [53] AttDef ::= S Name S AttType S DefaultDecl
4118 void
4119 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4120 xmlChar *elemName;
4121 xmlChar *attrName;
4122 xmlEnumerationPtr tree;
4124 if ((RAW == '<') && (NXT(1) == '!') &&
4125 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4126 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4127 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4128 (NXT(8) == 'T')) {
4129 xmlParserInputPtr input = ctxt->input;
4131 SKIP(9);
4132 if (!IS_BLANK(CUR)) {
4133 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4135 ctxt->sax->error(ctxt->userData,
4136 "Space required after '<!ATTLIST'\n");
4137 ctxt->wellFormed = 0;
4138 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4140 SKIP_BLANKS;
4141 elemName = xmlParseName(ctxt);
4142 if (elemName == NULL) {
4143 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "ATTLIST: no name for Element\n");
4147 ctxt->wellFormed = 0;
4148 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4149 return;
4151 SKIP_BLANKS;
4152 GROW;
4153 while (RAW != '>') {
4154 const xmlChar *check = CUR_PTR;
4155 int type;
4156 int def;
4157 xmlChar *defaultValue = NULL;
4159 GROW;
4160 tree = NULL;
4161 attrName = xmlParseName(ctxt);
4162 if (attrName == NULL) {
4163 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "ATTLIST: no name for Attribute\n");
4167 ctxt->wellFormed = 0;
4168 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4169 break;
4171 GROW;
4172 if (!IS_BLANK(CUR)) {
4173 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175 ctxt->sax->error(ctxt->userData,
4176 "Space required after the attribute name\n");
4177 ctxt->wellFormed = 0;
4178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4179 if (attrName != NULL)
4180 xmlFree(attrName);
4181 if (defaultValue != NULL)
4182 xmlFree(defaultValue);
4183 break;
4185 SKIP_BLANKS;
4187 type = xmlParseAttributeType(ctxt, &tree);
4188 if (type <= 0) {
4189 if (attrName != NULL)
4190 xmlFree(attrName);
4191 if (defaultValue != NULL)
4192 xmlFree(defaultValue);
4193 break;
4196 GROW;
4197 if (!IS_BLANK(CUR)) {
4198 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4200 ctxt->sax->error(ctxt->userData,
4201 "Space required after the attribute type\n");
4202 ctxt->wellFormed = 0;
4203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4204 if (attrName != NULL)
4205 xmlFree(attrName);
4206 if (defaultValue != NULL)
4207 xmlFree(defaultValue);
4208 if (tree != NULL)
4209 xmlFreeEnumeration(tree);
4210 break;
4212 SKIP_BLANKS;
4214 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4215 if (def <= 0) {
4216 if (attrName != NULL)
4217 xmlFree(attrName);
4218 if (defaultValue != NULL)
4219 xmlFree(defaultValue);
4220 if (tree != NULL)
4221 xmlFreeEnumeration(tree);
4222 break;
4225 GROW;
4226 if (RAW != '>') {
4227 if (!IS_BLANK(CUR)) {
4228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4230 ctxt->sax->error(ctxt->userData,
4231 "Space required after the attribute default value\n");
4232 ctxt->wellFormed = 0;
4233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4234 if (attrName != NULL)
4235 xmlFree(attrName);
4236 if (defaultValue != NULL)
4237 xmlFree(defaultValue);
4238 if (tree != NULL)
4239 xmlFreeEnumeration(tree);
4240 break;
4242 SKIP_BLANKS;
4244 if (check == CUR_PTR) {
4245 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "xmlParseAttributeListDecl: detected internal error\n");
4249 if (attrName != NULL)
4250 xmlFree(attrName);
4251 if (defaultValue != NULL)
4252 xmlFree(defaultValue);
4253 if (tree != NULL)
4254 xmlFreeEnumeration(tree);
4255 break;
4257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->attributeDecl != NULL))
4259 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4260 type, def, defaultValue, tree);
4261 if (attrName != NULL)
4262 xmlFree(attrName);
4263 if (defaultValue != NULL)
4264 xmlFree(defaultValue);
4265 GROW;
4267 if (RAW == '>') {
4268 if (input != ctxt->input) {
4269 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272 "Attribute list declaration doesn't start and stop in the same entity\n");
4273 ctxt->wellFormed = 0;
4274 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4276 NEXT;
4279 xmlFree(elemName);
4284 * xmlParseElementMixedContentDecl:
4285 * @ctxt: an XML parser context
4287 * parse the declaration for a Mixed Element content
4288 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4290 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4291 * '(' S? '#PCDATA' S? ')'
4293 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4295 * [ VC: No Duplicate Types ]
4296 * The same name must not appear more than once in a single
4297 * mixed-content declaration.
4299 * returns: the list of the xmlElementContentPtr describing the element choices
4301 xmlElementContentPtr
4302 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4303 xmlElementContentPtr ret = NULL, cur = NULL, n;
4304 xmlChar *elem = NULL;
4306 GROW;
4307 if ((RAW == '#') && (NXT(1) == 'P') &&
4308 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4309 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4310 (NXT(6) == 'A')) {
4311 SKIP(7);
4312 SKIP_BLANKS;
4313 SHRINK;
4314 if (RAW == ')') {
4315 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4316 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4317 if (ctxt->vctxt.error != NULL)
4318 ctxt->vctxt.error(ctxt->vctxt.userData,
4319 "Element content declaration doesn't start and stop in the same entity\n");
4320 ctxt->valid = 0;
4322 NEXT;
4323 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4324 if (RAW == '*') {
4325 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4326 NEXT;
4328 return(ret);
4330 if ((RAW == '(') || (RAW == '|')) {
4331 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4332 if (ret == NULL) return(NULL);
4334 while (RAW == '|') {
4335 NEXT;
4336 if (elem == NULL) {
4337 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4338 if (ret == NULL) return(NULL);
4339 ret->c1 = cur;
4340 if (cur != NULL)
4341 cur->parent = ret;
4342 cur = ret;
4343 } else {
4344 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4345 if (n == NULL) return(NULL);
4346 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4347 if (n->c1 != NULL)
4348 n->c1->parent = n;
4349 cur->c2 = n;
4350 if (n != NULL)
4351 n->parent = cur;
4352 cur = n;
4353 xmlFree(elem);
4355 SKIP_BLANKS;
4356 elem = xmlParseName(ctxt);
4357 if (elem == NULL) {
4358 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360 ctxt->sax->error(ctxt->userData,
4361 "xmlParseElementMixedContentDecl : Name expected\n");
4362 ctxt->wellFormed = 0;
4363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4364 xmlFreeElementContent(cur);
4365 return(NULL);
4367 SKIP_BLANKS;
4368 GROW;
4370 if ((RAW == ')') && (NXT(1) == '*')) {
4371 if (elem != NULL) {
4372 cur->c2 = xmlNewElementContent(elem,
4373 XML_ELEMENT_CONTENT_ELEMENT);
4374 if (cur->c2 != NULL)
4375 cur->c2->parent = cur;
4376 xmlFree(elem);
4378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4379 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4380 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4381 if (ctxt->vctxt.error != NULL)
4382 ctxt->vctxt.error(ctxt->vctxt.userData,
4383 "Element content declaration doesn't start and stop in the same entity\n");
4384 ctxt->valid = 0;
4386 SKIP(2);
4387 } else {
4388 if (elem != NULL) xmlFree(elem);
4389 xmlFreeElementContent(ret);
4390 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392 ctxt->sax->error(ctxt->userData,
4393 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4394 ctxt->wellFormed = 0;
4395 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4396 return(NULL);
4399 } else {
4400 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4402 ctxt->sax->error(ctxt->userData,
4403 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4404 ctxt->wellFormed = 0;
4405 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4407 return(ret);
4411 * xmlParseElementChildrenContentDecl:
4412 * @ctxt: an XML parser context
4414 * parse the declaration for a Mixed Element content
4415 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4418 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4420 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4422 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4424 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4426 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4427 * TODO Parameter-entity replacement text must be properly nested
4428 * with parenthesized groups. That is to say, if either of the
4429 * opening or closing parentheses in a choice, seq, or Mixed
4430 * construct is contained in the replacement text for a parameter
4431 * entity, both must be contained in the same replacement text. For
4432 * interoperability, if a parameter-entity reference appears in a
4433 * choice, seq, or Mixed construct, its replacement text should not
4434 * be empty, and neither the first nor last non-blank character of
4435 * the replacement text should be a connector (| or ,).
4437 * Returns the tree of xmlElementContentPtr describing the element
4438 * hierarchy.
4440 xmlElementContentPtr
4441 xmlParseElementChildrenContentDecl
4442 (xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4443 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4444 xmlChar *elem;
4445 xmlChar type = 0;
4447 SKIP_BLANKS;
4448 GROW;
4449 if (RAW == '(') {
4450 xmlParserInputPtr input = ctxt->input;
4452 /* Recurse on first child */
4453 NEXT;
4454 SKIP_BLANKS;
4455 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
4456 SKIP_BLANKS;
4457 GROW;
4458 } else {
4459 elem = xmlParseName(ctxt);
4460 if (elem == NULL) {
4461 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4463 ctxt->sax->error(ctxt->userData,
4464 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4465 ctxt->wellFormed = 0;
4466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4467 return(NULL);
4469 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4470 GROW;
4471 if (RAW == '?') {
4472 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4473 NEXT;
4474 } else if (RAW == '*') {
4475 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4476 NEXT;
4477 } else if (RAW == '+') {
4478 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4479 NEXT;
4480 } else {
4481 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4483 xmlFree(elem);
4484 GROW;
4486 SKIP_BLANKS;
4487 SHRINK;
4488 while (RAW != ')') {
4490 * Each loop we parse one separator and one element.
4492 if (RAW == ',') {
4493 if (type == 0) type = CUR;
4496 * Detect "Name | Name , Name" error
4498 else if (type != CUR) {
4499 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4503 type);
4504 ctxt->wellFormed = 0;
4505 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4506 if ((last != NULL) && (last != ret))
4507 xmlFreeElementContent(last);
4508 if (ret != NULL)
4509 xmlFreeElementContent(ret);
4510 return(NULL);
4512 NEXT;
4514 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4515 if (op == NULL) {
4516 if ((last != NULL) && (last != ret))
4517 xmlFreeElementContent(last);
4518 xmlFreeElementContent(ret);
4519 return(NULL);
4521 if (last == NULL) {
4522 op->c1 = ret;
4523 if (ret != NULL)
4524 ret->parent = op;
4525 ret = cur = op;
4526 } else {
4527 cur->c2 = op;
4528 if (op != NULL)
4529 op->parent = cur;
4530 op->c1 = last;
4531 if (last != NULL)
4532 last->parent = op;
4533 cur =op;
4534 last = NULL;
4536 } else if (RAW == '|') {
4537 if (type == 0) type = CUR;
4540 * Detect "Name , Name | Name" error
4542 else if (type != CUR) {
4543 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4545 ctxt->sax->error(ctxt->userData,
4546 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4547 type);
4548 ctxt->wellFormed = 0;
4549 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4550 if ((last != NULL) && (last != ret))
4551 xmlFreeElementContent(last);
4552 if (ret != NULL)
4553 xmlFreeElementContent(ret);
4554 return(NULL);
4556 NEXT;
4558 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4559 if (op == NULL) {
4560 if ((last != NULL) && (last != ret))
4561 xmlFreeElementContent(last);
4562 if (ret != NULL)
4563 xmlFreeElementContent(ret);
4564 return(NULL);
4566 if (last == NULL) {
4567 op->c1 = ret;
4568 if (ret != NULL)
4569 ret->parent = op;
4570 ret = cur = op;
4571 } else {
4572 cur->c2 = op;
4573 if (op != NULL)
4574 op->parent = cur;
4575 op->c1 = last;
4576 if (last != NULL)
4577 last->parent = op;
4578 cur =op;
4579 last = NULL;
4581 } else {
4582 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4586 ctxt->wellFormed = 0;
4587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4588 if (ret != NULL)
4589 xmlFreeElementContent(ret);
4590 return(NULL);
4592 GROW;
4593 SKIP_BLANKS;
4594 GROW;
4595 if (RAW == '(') {
4596 xmlParserInputPtr input = ctxt->input;
4597 /* Recurse on second child */
4598 NEXT;
4599 SKIP_BLANKS;
4600 last = xmlParseElementChildrenContentDecl(ctxt, input);
4601 SKIP_BLANKS;
4602 } else {
4603 elem = xmlParseName(ctxt);
4604 if (elem == NULL) {
4605 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4609 ctxt->wellFormed = 0;
4610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4611 if (ret != NULL)
4612 xmlFreeElementContent(ret);
4613 return(NULL);
4615 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4616 xmlFree(elem);
4617 if (RAW == '?') {
4618 last->ocur = XML_ELEMENT_CONTENT_OPT;
4619 NEXT;
4620 } else if (RAW == '*') {
4621 last->ocur = XML_ELEMENT_CONTENT_MULT;
4622 NEXT;
4623 } else if (RAW == '+') {
4624 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4625 NEXT;
4626 } else {
4627 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4630 SKIP_BLANKS;
4631 GROW;
4633 if ((cur != NULL) && (last != NULL)) {
4634 cur->c2 = last;
4635 if (last != NULL)
4636 last->parent = cur;
4638 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4639 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4640 if (ctxt->vctxt.error != NULL)
4641 ctxt->vctxt.error(ctxt->vctxt.userData,
4642 "Element content declaration doesn't start and stop in the same entity\n");
4643 ctxt->valid = 0;
4645 NEXT;
4646 if (RAW == '?') {
4647 if (ret != NULL)
4648 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4649 NEXT;
4650 } else if (RAW == '*') {
4651 if (ret != NULL) {
4652 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4653 cur = ret;
4655 * Some normalization:
4656 * (a | b* | c?)* == (a | b | c)*
4658 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4659 if ((cur->c1 != NULL) &&
4660 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4661 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4662 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4663 if ((cur->c2 != NULL) &&
4664 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4665 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4666 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4667 cur = cur->c2;
4670 NEXT;
4671 } else if (RAW == '+') {
4672 if (ret != NULL) {
4673 int found = 0;
4675 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4677 * Some normalization:
4678 * (a | b*)+ == (a | b)*
4679 * (a | b?)+ == (a | b)*
4681 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4682 if ((cur->c1 != NULL) &&
4683 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4684 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4685 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4686 found = 1;
4688 if ((cur->c2 != NULL) &&
4689 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4690 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4691 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4692 found = 1;
4694 cur = cur->c2;
4696 if (found)
4697 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4699 NEXT;
4701 return(ret);
4705 * xmlParseElementContentDecl:
4706 * @ctxt: an XML parser context
4707 * @name: the name of the element being defined.
4708 * @result: the Element Content pointer will be stored here if any
4710 * parse the declaration for an Element content either Mixed or Children,
4711 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4713 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4715 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4719 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4720 xmlElementContentPtr *result) {
4722 xmlElementContentPtr tree = NULL;
4723 xmlParserInputPtr input = ctxt->input;
4724 int res;
4726 *result = NULL;
4728 if (RAW != '(') {
4729 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4731 ctxt->sax->error(ctxt->userData,
4732 "xmlParseElementContentDecl : %s '(' expected\n", name);
4733 ctxt->wellFormed = 0;
4734 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4735 return(-1);
4737 NEXT;
4738 GROW;
4739 SKIP_BLANKS;
4740 if ((RAW == '#') && (NXT(1) == 'P') &&
4741 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4742 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4743 (NXT(6) == 'A')) {
4744 tree = xmlParseElementMixedContentDecl(ctxt, input);
4745 res = XML_ELEMENT_TYPE_MIXED;
4746 } else {
4747 tree = xmlParseElementChildrenContentDecl(ctxt, input);
4748 res = XML_ELEMENT_TYPE_ELEMENT;
4750 SKIP_BLANKS;
4751 *result = tree;
4752 return(res);
4756 * xmlParseElementDecl:
4757 * @ctxt: an XML parser context
4759 * parse an Element declaration.
4761 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4763 * [ VC: Unique Element Type Declaration ]
4764 * No element type may be declared more than once
4766 * Returns the type of the element, or -1 in case of error
4769 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4770 xmlChar *name;
4771 int ret = -1;
4772 xmlElementContentPtr content = NULL;
4774 GROW;
4775 if ((RAW == '<') && (NXT(1) == '!') &&
4776 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4777 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4778 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4779 (NXT(8) == 'T')) {
4780 xmlParserInputPtr input = ctxt->input;
4782 SKIP(9);
4783 if (!IS_BLANK(CUR)) {
4784 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4786 ctxt->sax->error(ctxt->userData,
4787 "Space required after 'ELEMENT'\n");
4788 ctxt->wellFormed = 0;
4789 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4791 SKIP_BLANKS;
4792 name = xmlParseName(ctxt);
4793 if (name == NULL) {
4794 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "xmlParseElementDecl: no name for Element\n");
4798 ctxt->wellFormed = 0;
4799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4800 return(-1);
4802 while ((RAW == 0) && (ctxt->inputNr > 1))
4803 xmlPopInput(ctxt);
4804 if (!IS_BLANK(CUR)) {
4805 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Space required after the element name\n");
4809 ctxt->wellFormed = 0;
4810 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4812 SKIP_BLANKS;
4813 if ((RAW == 'E') && (NXT(1) == 'M') &&
4814 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4815 (NXT(4) == 'Y')) {
4816 SKIP(5);
4818 * Element must always be empty.
4820 ret = XML_ELEMENT_TYPE_EMPTY;
4821 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4822 (NXT(2) == 'Y')) {
4823 SKIP(3);
4825 * Element is a generic container.
4827 ret = XML_ELEMENT_TYPE_ANY;
4828 } else if (RAW == '(') {
4829 ret = xmlParseElementContentDecl(ctxt, name, &content);
4830 } else {
4832 * [ WFC: PEs in Internal Subset ] error handling.
4834 if ((RAW == '%') && (ctxt->external == 0) &&
4835 (ctxt->inputNr == 1)) {
4836 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4838 ctxt->sax->error(ctxt->userData,
4839 "PEReference: forbidden within markup decl in internal subset\n");
4840 } else {
4841 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
4844 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4846 ctxt->wellFormed = 0;
4847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4848 if (name != NULL) xmlFree(name);
4849 return(-1);
4852 SKIP_BLANKS;
4854 * Pop-up of finished entities.
4856 while ((RAW == 0) && (ctxt->inputNr > 1))
4857 xmlPopInput(ctxt);
4858 SKIP_BLANKS;
4860 if (RAW != '>') {
4861 ctxt->errNo = XML_ERR_GT_REQUIRED;
4862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4863 ctxt->sax->error(ctxt->userData,
4864 "xmlParseElementDecl: expected '>' at the end\n");
4865 ctxt->wellFormed = 0;
4866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4867 } else {
4868 if (input != ctxt->input) {
4869 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
4872 "Element declaration doesn't start and stop in the same entity\n");
4873 ctxt->wellFormed = 0;
4874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4877 NEXT;
4878 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4879 (ctxt->sax->elementDecl != NULL))
4880 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4881 content);
4883 if (content != NULL) {
4884 xmlFreeElementContent(content);
4886 if (name != NULL) {
4887 xmlFree(name);
4890 return(ret);
4894 * xmlParseConditionalSections
4895 * @ctxt: an XML parser context
4897 * [61] conditionalSect ::= includeSect | ignoreSect
4898 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4899 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4900 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4901 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4904 static void
4905 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4906 SKIP(3);
4907 SKIP_BLANKS;
4908 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4909 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4910 (NXT(6) == 'E')) {
4911 SKIP(7);
4912 SKIP_BLANKS;
4913 if (RAW != '[') {
4914 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4916 ctxt->sax->error(ctxt->userData,
4917 "XML conditional section '[' expected\n");
4918 ctxt->wellFormed = 0;
4919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4920 } else {
4921 NEXT;
4923 if (xmlParserDebugEntities) {
4924 if ((ctxt->input != NULL) && (ctxt->input->filename))
4925 xmlGenericError(xmlGenericErrorContext,
4926 "%s(%d): ", ctxt->input->filename,
4927 ctxt->input->line);
4928 xmlGenericError(xmlGenericErrorContext,
4929 "Entering INCLUDE Conditional Section\n");
4932 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4933 (NXT(2) != '>'))) {
4934 const xmlChar *check = CUR_PTR;
4935 int cons = ctxt->input->consumed;
4937 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4938 xmlParseConditionalSections(ctxt);
4939 } else if (IS_BLANK(CUR)) {
4940 NEXT;
4941 } else if (RAW == '%') {
4942 xmlParsePEReference(ctxt);
4943 } else
4944 xmlParseMarkupDecl(ctxt);
4947 * Pop-up of finished entities.
4949 while ((RAW == 0) && (ctxt->inputNr > 1))
4950 xmlPopInput(ctxt);
4952 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4953 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "Content error in the external subset\n");
4957 ctxt->wellFormed = 0;
4958 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4959 break;
4962 if (xmlParserDebugEntities) {
4963 if ((ctxt->input != NULL) && (ctxt->input->filename))
4964 xmlGenericError(xmlGenericErrorContext,
4965 "%s(%d): ", ctxt->input->filename,
4966 ctxt->input->line);
4967 xmlGenericError(xmlGenericErrorContext,
4968 "Leaving INCLUDE Conditional Section\n");
4971 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4972 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4973 int state;
4974 int instate;
4975 int depth = 0;
4977 SKIP(6);
4978 SKIP_BLANKS;
4979 if (RAW != '[') {
4980 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4982 ctxt->sax->error(ctxt->userData,
4983 "XML conditional section '[' expected\n");
4984 ctxt->wellFormed = 0;
4985 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4986 } else {
4987 NEXT;
4989 if (xmlParserDebugEntities) {
4990 if ((ctxt->input != NULL) && (ctxt->input->filename))
4991 xmlGenericError(xmlGenericErrorContext,
4992 "%s(%d): ", ctxt->input->filename,
4993 ctxt->input->line);
4994 xmlGenericError(xmlGenericErrorContext,
4995 "Entering IGNORE Conditional Section\n");
4999 * Parse up to the end of the conditional section
5000 * But disable SAX event generating DTD building in the meantime
5002 state = ctxt->disableSAX;
5003 instate = ctxt->instate;
5004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5005 ctxt->instate = XML_PARSER_IGNORE;
5007 while ((depth >= 0) && (RAW != 0)) {
5008 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5009 depth++;
5010 SKIP(3);
5011 continue;
5013 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5014 if (--depth >= 0) SKIP(3);
5015 continue;
5017 NEXT;
5018 continue;
5021 ctxt->disableSAX = state;
5022 ctxt->instate = instate;
5024 if (xmlParserDebugEntities) {
5025 if ((ctxt->input != NULL) && (ctxt->input->filename))
5026 xmlGenericError(xmlGenericErrorContext,
5027 "%s(%d): ", ctxt->input->filename,
5028 ctxt->input->line);
5029 xmlGenericError(xmlGenericErrorContext,
5030 "Leaving IGNORE Conditional Section\n");
5033 } else {
5034 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5036 ctxt->sax->error(ctxt->userData,
5037 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5038 ctxt->wellFormed = 0;
5039 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5042 if (RAW == 0)
5043 SHRINK;
5045 if (RAW == 0) {
5046 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData,
5049 "XML conditional section not closed\n");
5050 ctxt->wellFormed = 0;
5051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5052 } else {
5053 SKIP(3);
5058 * xmlParseMarkupDecl:
5059 * @ctxt: an XML parser context
5061 * parse Markup declarations
5063 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5064 * NotationDecl | PI | Comment
5066 * [ VC: Proper Declaration/PE Nesting ]
5067 * Parameter-entity replacement text must be properly nested with
5068 * markup declarations. That is to say, if either the first character
5069 * or the last character of a markup declaration (markupdecl above) is
5070 * contained in the replacement text for a parameter-entity reference,
5071 * both must be contained in the same replacement text.
5073 * [ WFC: PEs in Internal Subset ]
5074 * In the internal DTD subset, parameter-entity references can occur
5075 * only where markup declarations can occur, not within markup declarations.
5076 * (This does not apply to references that occur in external parameter
5077 * entities or to the external subset.)
5079 void
5080 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5081 GROW;
5082 xmlParseElementDecl(ctxt);
5083 xmlParseAttributeListDecl(ctxt);
5084 xmlParseEntityDecl(ctxt);
5085 xmlParseNotationDecl(ctxt);
5086 xmlParsePI(ctxt);
5087 xmlParseComment(ctxt);
5089 * This is only for internal subset. On external entities,
5090 * the replacement is done before parsing stage
5092 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5093 xmlParsePEReference(ctxt);
5096 * Conditional sections are allowed from entities included
5097 * by PE References in the internal subset.
5099 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5100 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5101 xmlParseConditionalSections(ctxt);
5105 ctxt->instate = XML_PARSER_DTD;
5109 * xmlParseTextDecl:
5110 * @ctxt: an XML parser context
5112 * parse an XML declaration header for external entities
5114 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5116 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5119 void
5120 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5121 xmlChar *version;
5124 * We know that '<?xml' is here.
5126 if ((RAW == '<') && (NXT(1) == '?') &&
5127 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5128 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5129 SKIP(5);
5130 } else {
5131 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5133 ctxt->sax->error(ctxt->userData,
5134 "Text declaration '<?xml' required\n");
5135 ctxt->wellFormed = 0;
5136 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5138 return;
5141 if (!IS_BLANK(CUR)) {
5142 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5144 ctxt->sax->error(ctxt->userData,
5145 "Space needed after '<?xml'\n");
5146 ctxt->wellFormed = 0;
5147 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5149 SKIP_BLANKS;
5152 * We may have the VersionInfo here.
5154 version = xmlParseVersionInfo(ctxt);
5155 if (version == NULL)
5156 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5157 else {
5158 if (!IS_BLANK(CUR)) {
5159 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5162 ctxt->wellFormed = 0;
5163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5166 ctxt->input->version = version;
5169 * We must have the encoding declaration
5171 xmlParseEncodingDecl(ctxt);
5172 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5174 * The XML REC instructs us to stop parsing right here
5176 return;
5179 SKIP_BLANKS;
5180 if ((RAW == '?') && (NXT(1) == '>')) {
5181 SKIP(2);
5182 } else if (RAW == '>') {
5183 /* Deprecated old WD ... */
5184 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5186 ctxt->sax->error(ctxt->userData,
5187 "XML declaration must end-up with '?>'\n");
5188 ctxt->wellFormed = 0;
5189 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5190 NEXT;
5191 } else {
5192 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194 ctxt->sax->error(ctxt->userData,
5195 "parsing XML declaration: '?>' expected\n");
5196 ctxt->wellFormed = 0;
5197 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5198 MOVETO_ENDTAG(CUR_PTR);
5199 NEXT;
5204 * xmlParseExternalSubset:
5205 * @ctxt: an XML parser context
5206 * @ExternalID: the external identifier
5207 * @SystemID: the system identifier (or URL)
5209 * parse Markup declarations from an external subset
5211 * [30] extSubset ::= textDecl? extSubsetDecl
5213 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5215 void
5216 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5217 const xmlChar *SystemID) {
5218 GROW;
5219 if ((RAW == '<') && (NXT(1) == '?') &&
5220 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5221 (NXT(4) == 'l')) {
5222 xmlParseTextDecl(ctxt);
5223 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5225 * The XML REC instructs us to stop parsing right here
5227 ctxt->instate = XML_PARSER_EOF;
5228 return;
5231 if (ctxt->myDoc == NULL) {
5232 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5234 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5235 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5237 ctxt->instate = XML_PARSER_DTD;
5238 ctxt->external = 1;
5239 while (((RAW == '<') && (NXT(1) == '?')) ||
5240 ((RAW == '<') && (NXT(1) == '!')) ||
5241 (RAW == '%') || IS_BLANK(CUR)) {
5242 const xmlChar *check = CUR_PTR;
5243 int cons = ctxt->input->consumed;
5245 GROW;
5246 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5247 xmlParseConditionalSections(ctxt);
5248 } else if (IS_BLANK(CUR)) {
5249 NEXT;
5250 } else if (RAW == '%') {
5251 xmlParsePEReference(ctxt);
5252 } else
5253 xmlParseMarkupDecl(ctxt);
5256 * Pop-up of finished entities.
5258 while ((RAW == 0) && (ctxt->inputNr > 1))
5259 xmlPopInput(ctxt);
5261 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5262 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264 ctxt->sax->error(ctxt->userData,
5265 "Content error in the external subset\n");
5266 ctxt->wellFormed = 0;
5267 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5268 break;
5272 if (RAW != 0) {
5273 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5275 ctxt->sax->error(ctxt->userData,
5276 "Extra content at the end of the document\n");
5277 ctxt->wellFormed = 0;
5278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5284 * xmlParseReference:
5285 * @ctxt: an XML parser context
5287 * parse and handle entity references in content, depending on the SAX
5288 * interface, this may end-up in a call to character() if this is a
5289 * CharRef, a predefined entity, if there is no reference() callback.
5290 * or if the parser was asked to switch to that mode.
5292 * [67] Reference ::= EntityRef | CharRef
5294 void
5295 xmlParseReference(xmlParserCtxtPtr ctxt) {
5296 xmlEntityPtr ent;
5297 xmlChar *val;
5298 if (RAW != '&') return;
5300 if (NXT(1) == '#') {
5301 int i = 0;
5302 xmlChar out[10];
5303 int hex = NXT(2);
5304 int value = xmlParseCharRef(ctxt);
5306 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5308 * So we are using non-UTF-8 buffers
5309 * Check that the char fit on 8bits, if not
5310 * generate a CharRef.
5312 if (value <= 0xFF) {
5313 out[0] = value;
5314 out[1] = 0;
5315 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5316 (!ctxt->disableSAX))
5317 ctxt->sax->characters(ctxt->userData, out, 1);
5318 } else {
5319 if ((hex == 'x') || (hex == 'X'))
5320 snprintf((char *)out, sizeof(out), "#x%X", value);
5321 else
5322 snprintf((char *)out, sizeof(out), "#%d", value);
5323 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5324 (!ctxt->disableSAX))
5325 ctxt->sax->reference(ctxt->userData, out);
5327 } else {
5329 * Just encode the value in UTF-8
5331 COPY_BUF(0 ,out, i, value);
5332 out[i] = 0;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5334 (!ctxt->disableSAX))
5335 ctxt->sax->characters(ctxt->userData, out, i);
5337 } else {
5338 ent = xmlParseEntityRef(ctxt);
5339 if (ent == NULL) return;
5340 if (!ctxt->wellFormed)
5341 return;
5342 if ((ent->name != NULL) &&
5343 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5344 xmlNodePtr list = NULL;
5345 int ret;
5349 * The first reference to the entity trigger a parsing phase
5350 * where the ent->children is filled with the result from
5351 * the parsing.
5353 if (ent->children == NULL) {
5354 xmlChar *value;
5355 value = ent->content;
5358 * Check that this entity is well formed
5360 if ((value != NULL) &&
5361 (value[1] == 0) && (value[0] == '<') &&
5362 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5364 * DONE: get definite answer on this !!!
5365 * Lots of entity decls are used to declare a single
5366 * char
5367 * <!ENTITY lt "<">
5368 * Which seems to be valid since
5369 * 2.4: The ampersand character (&) and the left angle
5370 * bracket (<) may appear in their literal form only
5371 * when used ... They are also legal within the literal
5372 * entity value of an internal entity declaration;i
5373 * see "4.3.2 Well-Formed Parsed Entities".
5374 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5375 * Looking at the OASIS test suite and James Clark
5376 * tests, this is broken. However the XML REC uses
5377 * it. Is the XML REC not well-formed ????
5378 * This is a hack to avoid this problem
5380 * ANSWER: since lt gt amp .. are already defined,
5381 * this is a redefinition and hence the fact that the
5382 * content is not well balanced is not a Wf error, this
5383 * is lousy but acceptable.
5385 list = xmlNewDocText(ctxt->myDoc, value);
5386 if (list != NULL) {
5387 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5388 (ent->children == NULL)) {
5389 ent->children = list;
5390 ent->last = list;
5391 list->parent = (xmlNodePtr) ent;
5392 } else {
5393 xmlFreeNodeList(list);
5395 } else if (list != NULL) {
5396 xmlFreeNodeList(list);
5398 } else {
5400 * 4.3.2: An internal general parsed entity is well-formed
5401 * if its replacement text matches the production labeled
5402 * content.
5405 void *user_data;
5407 * This is a bit hackish but this seems the best
5408 * way to make sure both SAX and DOM entity support
5409 * behaves okay.
5411 if (ctxt->userData == ctxt)
5412 user_data = NULL;
5413 else
5414 user_data = ctxt->userData;
5416 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5417 ctxt->depth++;
5418 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5419 value, user_data, &list);
5420 ctxt->depth--;
5421 } else if (ent->etype ==
5422 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5423 ctxt->depth++;
5424 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
5425 ctxt->sax, user_data, ctxt->depth,
5426 ent->URI, ent->ExternalID, &list);
5427 ctxt->depth--;
5428 } else {
5429 ret = -1;
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5431 ctxt->sax->error(ctxt->userData,
5432 "Internal: invalid entity type\n");
5434 if (ret == XML_ERR_ENTITY_LOOP) {
5435 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5437 ctxt->sax->error(ctxt->userData,
5438 "Detected entity reference loop\n");
5439 ctxt->wellFormed = 0;
5440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5441 return;
5442 } else if ((ret == 0) && (list != NULL)) {
5443 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5444 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
5445 (ent->children == NULL)) {
5446 ent->children = list;
5447 if (ctxt->replaceEntities) {
5449 * Prune it directly in the generated document
5450 * except for single text nodes.
5452 if ((list->type == XML_TEXT_NODE) &&
5453 (list->next == NULL)) {
5454 list->parent = (xmlNodePtr) ent;
5455 list = NULL;
5456 } else {
5457 while (list != NULL) {
5458 list->parent = (xmlNodePtr) ctxt->node;
5459 list->doc = ctxt->myDoc;
5460 if (list->next == NULL)
5461 ent->last = list;
5462 list = list->next;
5464 list = ent->children;
5465 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5466 xmlAddEntityReference(ent, list, NULL);
5468 } else {
5469 while (list != NULL) {
5470 list->parent = (xmlNodePtr) ent;
5471 if (list->next == NULL)
5472 ent->last = list;
5473 list = list->next;
5476 } else {
5477 xmlFreeNodeList(list);
5478 list = NULL;
5480 } else if (ret > 0) {
5481 ctxt->errNo = ret;
5482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5483 ctxt->sax->error(ctxt->userData,
5484 "Entity value required\n");
5485 ctxt->wellFormed = 0;
5486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5487 } else if (list != NULL) {
5488 xmlFreeNodeList(list);
5489 list = NULL;
5493 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5494 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5496 * Create a node.
5498 ctxt->sax->reference(ctxt->userData, ent->name);
5499 return;
5500 } else if (ctxt->replaceEntities) {
5501 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5503 * Seems we are generating the DOM content, do
5504 * a simple tree copy for all references except the first
5505 * In the first occurrence list contains the replacement
5507 if (list == NULL) {
5508 xmlNodePtr new = NULL, cur, firstChild = NULL;
5509 cur = ent->children;
5510 while (cur != NULL) {
5511 new = xmlCopyNode(cur, 1);
5512 if (firstChild == NULL){
5513 firstChild = new;
5515 xmlAddChild(ctxt->node, new);
5516 if (cur == ent->last)
5517 break;
5518 cur = cur->next;
5520 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5521 xmlAddEntityReference(ent, firstChild, new);
5522 } else {
5524 * the name change is to avoid coalescing of the
5525 * node with a possible previous text one which
5526 * would make ent->children a dangling pointer
5528 if (ent->children->type == XML_TEXT_NODE)
5529 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5530 if ((ent->last != ent->children) &&
5531 (ent->last->type == XML_TEXT_NODE))
5532 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5533 xmlAddChildList(ctxt->node, ent->children);
5537 * This is to avoid a nasty side effect, see
5538 * characters() in SAX.c
5540 ctxt->nodemem = 0;
5541 ctxt->nodelen = 0;
5542 return;
5543 } else {
5545 * Probably running in SAX mode
5547 xmlParserInputPtr input;
5549 input = xmlNewEntityInputStream(ctxt, ent);
5550 xmlPushInput(ctxt, input);
5551 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5552 (RAW == '<') && (NXT(1) == '?') &&
5553 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5554 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5555 xmlParseTextDecl(ctxt);
5556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5558 * The XML REC instructs us to stop parsing right here
5560 ctxt->instate = XML_PARSER_EOF;
5561 return;
5563 if (input->standalone == 1) {
5564 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "external parsed entities cannot be standalone\n");
5568 ctxt->wellFormed = 0;
5569 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5572 return;
5575 } else {
5576 val = ent->content;
5577 if (val == NULL) return;
5579 * inline the entity.
5581 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5582 (!ctxt->disableSAX))
5583 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5589 * xmlParseEntityRef:
5590 * @ctxt: an XML parser context
5592 * parse ENTITY references declarations
5594 * [68] EntityRef ::= '&' Name ';'
5596 * [ WFC: Entity Declared ]
5597 * In a document without any DTD, a document with only an internal DTD
5598 * subset which contains no parameter entity references, or a document
5599 * with "standalone='yes'", the Name given in the entity reference
5600 * must match that in an entity declaration, except that well-formed
5601 * documents need not declare any of the following entities: amp, lt,
5602 * gt, apos, quot. The declaration of a parameter entity must precede
5603 * any reference to it. Similarly, the declaration of a general entity
5604 * must precede any reference to it which appears in a default value in an
5605 * attribute-list declaration. Note that if entities are declared in the
5606 * external subset or in external parameter entities, a non-validating
5607 * processor is not obligated to read and process their declarations;
5608 * for such documents, the rule that an entity must be declared is a
5609 * well-formedness constraint only if standalone='yes'.
5611 * [ WFC: Parsed Entity ]
5612 * An entity reference must not contain the name of an unparsed entity
5614 * Returns the xmlEntityPtr if found, or NULL otherwise.
5616 xmlEntityPtr
5617 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5618 xmlChar *name;
5619 xmlEntityPtr ent = NULL;
5621 GROW;
5623 if (RAW == '&') {
5624 NEXT;
5625 name = xmlParseName(ctxt);
5626 if (name == NULL) {
5627 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629 ctxt->sax->error(ctxt->userData,
5630 "xmlParseEntityRef: no name\n");
5631 ctxt->wellFormed = 0;
5632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5633 } else {
5634 if (RAW == ';') {
5635 NEXT;
5637 * Ask first SAX for entity resolution, otherwise try the
5638 * predefined set.
5640 if (ctxt->sax != NULL) {
5641 if (ctxt->sax->getEntity != NULL)
5642 ent = ctxt->sax->getEntity(ctxt->userData, name);
5643 if (ent == NULL)
5644 ent = xmlGetPredefinedEntity(name);
5645 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5646 ent = getEntity(ctxt, name);
5650 * [ WFC: Entity Declared ]
5651 * In a document without any DTD, a document with only an
5652 * internal DTD subset which contains no parameter entity
5653 * references, or a document with "standalone='yes'", the
5654 * Name given in the entity reference must match that in an
5655 * entity declaration, except that well-formed documents
5656 * need not declare any of the following entities: amp, lt,
5657 * gt, apos, quot.
5658 * The declaration of a parameter entity must precede any
5659 * reference to it.
5660 * Similarly, the declaration of a general entity must
5661 * precede any reference to it which appears in a default
5662 * value in an attribute-list declaration. Note that if
5663 * entities are declared in the external subset or in
5664 * external parameter entities, a non-validating processor
5665 * is not obligated to read and process their declarations;
5666 * for such documents, the rule that an entity must be
5667 * declared is a well-formedness constraint only if
5668 * standalone='yes'.
5670 if (ent == NULL) {
5671 if ((ctxt->standalone == 1) ||
5672 ((ctxt->hasExternalSubset == 0) &&
5673 (ctxt->hasPErefs == 0))) {
5674 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5676 ctxt->sax->error(ctxt->userData,
5677 "Entity '%s' not defined\n", name);
5678 ctxt->wellFormed = 0;
5679 ctxt->valid = 0;
5680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5681 } else {
5682 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5684 ctxt->sax->error(ctxt->userData,
5685 "Entity '%s' not defined\n", name);
5686 ctxt->valid = 0;
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an
5693 * unparsed entity
5695 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5696 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "Entity reference to unparsed entity %s\n", name);
5700 ctxt->wellFormed = 0;
5701 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5705 * [ WFC: No External Entity References ]
5706 * Attribute values cannot contain direct or indirect
5707 * entity references to external entities.
5709 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5710 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5711 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5713 ctxt->sax->error(ctxt->userData,
5714 "Attribute references external entity '%s'\n", name);
5715 ctxt->wellFormed = 0;
5716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5719 * [ WFC: No < in Attribute Values ]
5720 * The replacement text of any entity referred to directly or
5721 * indirectly in an attribute value (other than "&lt;") must
5722 * not contain a <.
5724 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5725 (ent != NULL) &&
5726 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5727 (ent->content != NULL) &&
5728 (xmlStrchr(ent->content, '<'))) {
5729 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5731 ctxt->sax->error(ctxt->userData,
5732 "'<' in entity '%s' is not allowed in attributes values\n", name);
5733 ctxt->wellFormed = 0;
5734 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5738 * Internal check, no parameter entities here ...
5740 else {
5741 switch (ent->etype) {
5742 case XML_INTERNAL_PARAMETER_ENTITY:
5743 case XML_EXTERNAL_PARAMETER_ENTITY:
5744 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "Attempt to reference the parameter entity '%s'\n", name);
5748 ctxt->wellFormed = 0;
5749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5750 break;
5751 default:
5752 break;
5757 * [ WFC: No Recursion ]
5758 * A parsed entity must not contain a recursive reference
5759 * to itself, either directly or indirectly.
5760 * Done somewhere else
5763 } else {
5764 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5766 ctxt->sax->error(ctxt->userData,
5767 "xmlParseEntityRef: expecting ';'\n");
5768 ctxt->wellFormed = 0;
5769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5771 xmlFree(name);
5774 return(ent);
5778 * xmlParseStringEntityRef:
5779 * @ctxt: an XML parser context
5780 * @str: a pointer to an index in the string
5782 * parse ENTITY references declarations, but this version parses it from
5783 * a string value.
5785 * [68] EntityRef ::= '&' Name ';'
5787 * [ WFC: Entity Declared ]
5788 * In a document without any DTD, a document with only an internal DTD
5789 * subset which contains no parameter entity references, or a document
5790 * with "standalone='yes'", the Name given in the entity reference
5791 * must match that in an entity declaration, except that well-formed
5792 * documents need not declare any of the following entities: amp, lt,
5793 * gt, apos, quot. The declaration of a parameter entity must precede
5794 * any reference to it. Similarly, the declaration of a general entity
5795 * must precede any reference to it which appears in a default value in an
5796 * attribute-list declaration. Note that if entities are declared in the
5797 * external subset or in external parameter entities, a non-validating
5798 * processor is not obligated to read and process their declarations;
5799 * for such documents, the rule that an entity must be declared is a
5800 * well-formedness constraint only if standalone='yes'.
5802 * [ WFC: Parsed Entity ]
5803 * An entity reference must not contain the name of an unparsed entity
5805 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5806 * is updated to the current location in the string.
5808 xmlEntityPtr
5809 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5810 xmlChar *name;
5811 const xmlChar *ptr;
5812 xmlChar cur;
5813 xmlEntityPtr ent = NULL;
5815 if ((str == NULL) || (*str == NULL))
5816 return(NULL);
5817 ptr = *str;
5818 cur = *ptr;
5819 if (cur == '&') {
5820 ptr++;
5821 cur = *ptr;
5822 name = xmlParseStringName(ctxt, &ptr);
5823 if (name == NULL) {
5824 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "xmlParseStringEntityRef: no name\n");
5828 ctxt->wellFormed = 0;
5829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5830 } else {
5831 if (*ptr == ';') {
5832 ptr++;
5834 * Ask first SAX for entity resolution, otherwise try the
5835 * predefined set.
5837 if (ctxt->sax != NULL) {
5838 if (ctxt->sax->getEntity != NULL)
5839 ent = ctxt->sax->getEntity(ctxt->userData, name);
5840 if (ent == NULL)
5841 ent = xmlGetPredefinedEntity(name);
5842 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5843 ent = getEntity(ctxt, name);
5847 * [ WFC: Entity Declared ]
5848 * In a document without any DTD, a document with only an
5849 * internal DTD subset which contains no parameter entity
5850 * references, or a document with "standalone='yes'", the
5851 * Name given in the entity reference must match that in an
5852 * entity declaration, except that well-formed documents
5853 * need not declare any of the following entities: amp, lt,
5854 * gt, apos, quot.
5855 * The declaration of a parameter entity must precede any
5856 * reference to it.
5857 * Similarly, the declaration of a general entity must
5858 * precede any reference to it which appears in a default
5859 * value in an attribute-list declaration. Note that if
5860 * entities are declared in the external subset or in
5861 * external parameter entities, a non-validating processor
5862 * is not obligated to read and process their declarations;
5863 * for such documents, the rule that an entity must be
5864 * declared is a well-formedness constraint only if
5865 * standalone='yes'.
5867 if (ent == NULL) {
5868 if ((ctxt->standalone == 1) ||
5869 ((ctxt->hasExternalSubset == 0) &&
5870 (ctxt->hasPErefs == 0))) {
5871 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData,
5874 "Entity '%s' not defined\n", name);
5875 ctxt->wellFormed = 0;
5876 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5877 } else {
5878 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5880 ctxt->sax->warning(ctxt->userData,
5881 "Entity '%s' not defined\n", name);
5886 * [ WFC: Parsed Entity ]
5887 * An entity reference must not contain the name of an
5888 * unparsed entity
5890 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5891 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData,
5894 "Entity reference to unparsed entity %s\n", name);
5895 ctxt->wellFormed = 0;
5896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5900 * [ WFC: No External Entity References ]
5901 * Attribute values cannot contain direct or indirect
5902 * entity references to external entities.
5904 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5905 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5906 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908 ctxt->sax->error(ctxt->userData,
5909 "Attribute references external entity '%s'\n", name);
5910 ctxt->wellFormed = 0;
5911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5914 * [ WFC: No < in Attribute Values ]
5915 * The replacement text of any entity referred to directly or
5916 * indirectly in an attribute value (other than "&lt;") must
5917 * not contain a <.
5919 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5920 (ent != NULL) &&
5921 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5922 (ent->content != NULL) &&
5923 (xmlStrchr(ent->content, '<'))) {
5924 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5926 ctxt->sax->error(ctxt->userData,
5927 "'<' in entity '%s' is not allowed in attributes values\n", name);
5928 ctxt->wellFormed = 0;
5929 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5933 * Internal check, no parameter entities here ...
5935 else {
5936 switch (ent->etype) {
5937 case XML_INTERNAL_PARAMETER_ENTITY:
5938 case XML_EXTERNAL_PARAMETER_ENTITY:
5939 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData,
5942 "Attempt to reference the parameter entity '%s'\n", name);
5943 ctxt->wellFormed = 0;
5944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5945 break;
5946 default:
5947 break;
5952 * [ WFC: No Recursion ]
5953 * A parsed entity must not contain a recursive reference
5954 * to itself, either directly or indirectly.
5955 * Done somewhere else
5958 } else {
5959 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5961 ctxt->sax->error(ctxt->userData,
5962 "xmlParseStringEntityRef: expecting ';'\n");
5963 ctxt->wellFormed = 0;
5964 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5966 xmlFree(name);
5969 *str = ptr;
5970 return(ent);
5974 * xmlParsePEReference:
5975 * @ctxt: an XML parser context
5977 * parse PEReference declarations
5978 * The entity content is handled directly by pushing it's content as
5979 * a new input stream.
5981 * [69] PEReference ::= '%' Name ';'
5983 * [ WFC: No Recursion ]
5984 * A parsed entity must not contain a recursive
5985 * reference to itself, either directly or indirectly.
5987 * [ WFC: Entity Declared ]
5988 * In a document without any DTD, a document with only an internal DTD
5989 * subset which contains no parameter entity references, or a document
5990 * with "standalone='yes'", ... ... The declaration of a parameter
5991 * entity must precede any reference to it...
5993 * [ VC: Entity Declared ]
5994 * In a document with an external subset or external parameter entities
5995 * with "standalone='no'", ... ... The declaration of a parameter entity
5996 * must precede any reference to it...
5998 * [ WFC: In DTD ]
5999 * Parameter-entity references may only appear in the DTD.
6000 * NOTE: misleading but this is handled.
6002 void
6003 xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6004 xmlChar *name;
6005 xmlEntityPtr entity = NULL;
6006 xmlParserInputPtr input;
6008 if (RAW == '%') {
6009 NEXT;
6010 name = xmlParseName(ctxt);
6011 if (name == NULL) {
6012 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6014 ctxt->sax->error(ctxt->userData,
6015 "xmlParsePEReference: no name\n");
6016 ctxt->wellFormed = 0;
6017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6018 } else {
6019 if (RAW == ';') {
6020 NEXT;
6021 if ((ctxt->sax != NULL) &&
6022 (ctxt->sax->getParameterEntity != NULL))
6023 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6024 name);
6025 if (entity == NULL) {
6027 * [ WFC: Entity Declared ]
6028 * In a document without any DTD, a document with only an
6029 * internal DTD subset which contains no parameter entity
6030 * references, or a document with "standalone='yes'", ...
6031 * ... The declaration of a parameter entity must precede
6032 * any reference to it...
6034 if ((ctxt->standalone == 1) ||
6035 ((ctxt->hasExternalSubset == 0) &&
6036 (ctxt->hasPErefs == 0))) {
6037 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6038 if ((!ctxt->disableSAX) &&
6039 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6040 ctxt->sax->error(ctxt->userData,
6041 "PEReference: %%%s; not found\n", name);
6042 ctxt->wellFormed = 0;
6043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6044 } else {
6046 * [ VC: Entity Declared ]
6047 * In a document with an external subset or external
6048 * parameter entities with "standalone='no'", ...
6049 * ... The declaration of a parameter entity must precede
6050 * any reference to it...
6052 if ((!ctxt->disableSAX) &&
6053 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6054 ctxt->sax->warning(ctxt->userData,
6055 "PEReference: %%%s; not found\n", name);
6056 ctxt->valid = 0;
6058 } else {
6060 * Internal checking in case the entity quest barfed
6062 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6063 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6064 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6065 ctxt->sax->warning(ctxt->userData,
6066 "Internal: %%%s; is not a parameter entity\n", name);
6067 } else if (ctxt->input->free != deallocblankswrapper) {
6068 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6069 xmlPushInput(ctxt, input);
6070 } else {
6072 * TODO !!!
6073 * handle the extra spaces added before and after
6074 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6076 input = xmlNewEntityInputStream(ctxt, entity);
6077 xmlPushInput(ctxt, input);
6078 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6079 (RAW == '<') && (NXT(1) == '?') &&
6080 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6081 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6082 xmlParseTextDecl(ctxt);
6083 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6085 * The XML REC instructs us to stop parsing
6086 * right here
6088 ctxt->instate = XML_PARSER_EOF;
6089 xmlFree(name);
6090 return;
6095 ctxt->hasPErefs = 1;
6096 } else {
6097 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099 ctxt->sax->error(ctxt->userData,
6100 "xmlParsePEReference: expecting ';'\n");
6101 ctxt->wellFormed = 0;
6102 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6104 xmlFree(name);
6110 * xmlParseStringPEReference:
6111 * @ctxt: an XML parser context
6112 * @str: a pointer to an index in the string
6114 * parse PEReference declarations
6116 * [69] PEReference ::= '%' Name ';'
6118 * [ WFC: No Recursion ]
6119 * A parsed entity must not contain a recursive
6120 * reference to itself, either directly or indirectly.
6122 * [ WFC: Entity Declared ]
6123 * In a document without any DTD, a document with only an internal DTD
6124 * subset which contains no parameter entity references, or a document
6125 * with "standalone='yes'", ... ... The declaration of a parameter
6126 * entity must precede any reference to it...
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external parameter entities
6130 * with "standalone='no'", ... ... The declaration of a parameter entity
6131 * must precede any reference to it...
6133 * [ WFC: In DTD ]
6134 * Parameter-entity references may only appear in the DTD.
6135 * NOTE: misleading but this is handled.
6137 * Returns the string of the entity content.
6138 * str is updated to the current value of the index
6140 xmlEntityPtr
6141 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6142 const xmlChar *ptr;
6143 xmlChar cur;
6144 xmlChar *name;
6145 xmlEntityPtr entity = NULL;
6147 if ((str == NULL) || (*str == NULL)) return(NULL);
6148 ptr = *str;
6149 cur = *ptr;
6150 if (cur == '%') {
6151 ptr++;
6152 cur = *ptr;
6153 name = xmlParseStringName(ctxt, &ptr);
6154 if (name == NULL) {
6155 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6157 ctxt->sax->error(ctxt->userData,
6158 "xmlParseStringPEReference: no name\n");
6159 ctxt->wellFormed = 0;
6160 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6161 } else {
6162 cur = *ptr;
6163 if (cur == ';') {
6164 ptr++;
6165 cur = *ptr;
6166 if ((ctxt->sax != NULL) &&
6167 (ctxt->sax->getParameterEntity != NULL))
6168 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6169 name);
6170 if (entity == NULL) {
6172 * [ WFC: Entity Declared ]
6173 * In a document without any DTD, a document with only an
6174 * internal DTD subset which contains no parameter entity
6175 * references, or a document with "standalone='yes'", ...
6176 * ... The declaration of a parameter entity must precede
6177 * any reference to it...
6179 if ((ctxt->standalone == 1) ||
6180 ((ctxt->hasExternalSubset == 0) &&
6181 (ctxt->hasPErefs == 0))) {
6182 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6184 ctxt->sax->error(ctxt->userData,
6185 "PEReference: %%%s; not found\n", name);
6186 ctxt->wellFormed = 0;
6187 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6188 } else {
6190 * [ VC: Entity Declared ]
6191 * In a document with an external subset or external
6192 * parameter entities with "standalone='no'", ...
6193 * ... The declaration of a parameter entity must
6194 * precede any reference to it...
6196 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6197 ctxt->sax->warning(ctxt->userData,
6198 "PEReference: %%%s; not found\n", name);
6199 ctxt->valid = 0;
6201 } else {
6203 * Internal checking in case the entity quest barfed
6205 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6206 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6207 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6208 ctxt->sax->warning(ctxt->userData,
6209 "Internal: %%%s; is not a parameter entity\n", name);
6212 ctxt->hasPErefs = 1;
6213 } else {
6214 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6216 ctxt->sax->error(ctxt->userData,
6217 "xmlParseStringPEReference: expecting ';'\n");
6218 ctxt->wellFormed = 0;
6219 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6221 xmlFree(name);
6224 *str = ptr;
6225 return(entity);
6229 * xmlParseDocTypeDecl:
6230 * @ctxt: an XML parser context
6232 * parse a DOCTYPE declaration
6234 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6235 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6237 * [ VC: Root Element Type ]
6238 * The Name in the document type declaration must match the element
6239 * type of the root element.
6242 void
6243 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6244 xmlChar *name = NULL;
6245 xmlChar *ExternalID = NULL;
6246 xmlChar *URI = NULL;
6249 * We know that '<!DOCTYPE' has been detected.
6251 SKIP(9);
6253 SKIP_BLANKS;
6256 * Parse the DOCTYPE name.
6258 name = xmlParseName(ctxt);
6259 if (name == NULL) {
6260 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6262 ctxt->sax->error(ctxt->userData,
6263 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6264 ctxt->wellFormed = 0;
6265 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6267 ctxt->intSubName = name;
6269 SKIP_BLANKS;
6272 * Check for SystemID and ExternalID
6274 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6276 if ((URI != NULL) || (ExternalID != NULL)) {
6277 ctxt->hasExternalSubset = 1;
6279 ctxt->extSubURI = URI;
6280 ctxt->extSubSystem = ExternalID;
6282 SKIP_BLANKS;
6285 * Create and update the internal subset.
6287 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6288 (!ctxt->disableSAX))
6289 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6292 * Is there any internal subset declarations ?
6293 * they are handled separately in xmlParseInternalSubset()
6295 if (RAW == '[')
6296 return;
6299 * We should be at the end of the DOCTYPE declaration.
6301 if (RAW != '>') {
6302 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6304 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6305 ctxt->wellFormed = 0;
6306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6308 NEXT;
6312 * xmlParseInternalSubset:
6313 * @ctxt: an XML parser context
6315 * parse the internal subset declaration
6317 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6320 static void
6321 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6323 * Is there any DTD definition ?
6325 if (RAW == '[') {
6326 ctxt->instate = XML_PARSER_DTD;
6327 NEXT;
6329 * Parse the succession of Markup declarations and
6330 * PEReferences.
6331 * Subsequence (markupdecl | PEReference | S)*
6333 while (RAW != ']') {
6334 const xmlChar *check = CUR_PTR;
6335 int cons = ctxt->input->consumed;
6337 SKIP_BLANKS;
6338 xmlParseMarkupDecl(ctxt);
6339 xmlParsePEReference(ctxt);
6342 * Pop-up of finished entities.
6344 while ((RAW == 0) && (ctxt->inputNr > 1))
6345 xmlPopInput(ctxt);
6347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6348 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "xmlParseInternalSubset: error detected in Markup declaration\n");
6352 ctxt->wellFormed = 0;
6353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6354 break;
6357 if (RAW == ']') {
6358 NEXT;
6359 SKIP_BLANKS;
6364 * We should be at the end of the DOCTYPE declaration.
6366 if (RAW != '>') {
6367 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6369 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6370 ctxt->wellFormed = 0;
6371 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6373 NEXT;
6377 * xmlParseAttribute:
6378 * @ctxt: an XML parser context
6379 * @value: a xmlChar ** used to store the value of the attribute
6381 * parse an attribute
6383 * [41] Attribute ::= Name Eq AttValue
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect entity references
6387 * to external entities.
6389 * [ WFC: No < in Attribute Values ]
6390 * The replacement text of any entity referred to directly or indirectly in
6391 * an attribute value (other than "&lt;") must not contain a <.
6393 * [ VC: Attribute Value Type ]
6394 * The attribute must have been declared; the value must be of the type
6395 * declared for it.
6397 * [25] Eq ::= S? '=' S?
6399 * With namespace:
6401 * [NS 11] Attribute ::= QName Eq AttValue
6403 * Also the case QName == xmlns:??? is handled independently as a namespace
6404 * definition.
6406 * Returns the attribute name, and the value in *value.
6409 xmlChar *
6410 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6411 xmlChar *name, *val;
6413 *value = NULL;
6414 GROW;
6415 name = xmlParseName(ctxt);
6416 if (name == NULL) {
6417 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6419 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6420 ctxt->wellFormed = 0;
6421 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6422 return(NULL);
6426 * read the value
6428 SKIP_BLANKS;
6429 if (RAW == '=') {
6430 NEXT;
6431 SKIP_BLANKS;
6432 val = xmlParseAttValue(ctxt);
6433 ctxt->instate = XML_PARSER_CONTENT;
6434 } else {
6435 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6437 ctxt->sax->error(ctxt->userData,
6438 "Specification mandate value for attribute %s\n", name);
6439 ctxt->wellFormed = 0;
6440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6441 xmlFree(name);
6442 return(NULL);
6446 * Check that xml:lang conforms to the specification
6447 * No more registered as an error, just generate a warning now
6448 * since this was deprecated in XML second edition
6450 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6451 if (!xmlCheckLanguageID(val)) {
6452 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6453 ctxt->sax->warning(ctxt->userData,
6454 "Malformed value for xml:lang : %s\n", val);
6459 * Check that xml:space conforms to the specification
6461 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6462 if (xmlStrEqual(val, BAD_CAST "default"))
6463 *(ctxt->space) = 0;
6464 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6465 *(ctxt->space) = 1;
6466 else {
6467 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6469 ctxt->sax->error(ctxt->userData,
6470 "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6471 val);
6472 ctxt->wellFormed = 0;
6473 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6477 *value = val;
6478 return(name);
6482 * xmlParseStartTag:
6483 * @ctxt: an XML parser context
6485 * parse a start of tag either for rule element or
6486 * EmptyElement. In both case we don't parse the tag closing chars.
6488 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6490 * [ WFC: Unique Att Spec ]
6491 * No attribute name may appear more than once in the same start-tag or
6492 * empty-element tag.
6494 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6496 * [ WFC: Unique Att Spec ]
6497 * No attribute name may appear more than once in the same start-tag or
6498 * empty-element tag.
6500 * With namespace:
6502 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6504 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6506 * Returns the element name parsed
6509 xmlChar *
6510 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6511 xmlChar *name;
6512 xmlChar *attname;
6513 xmlChar *attvalue;
6514 const xmlChar **atts = NULL;
6515 int nbatts = 0;
6516 int maxatts = 0;
6517 int i;
6519 if (RAW != '<') return(NULL);
6520 NEXT1;
6522 name = xmlParseName(ctxt);
6523 if (name == NULL) {
6524 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6526 ctxt->sax->error(ctxt->userData,
6527 "xmlParseStartTag: invalid element name\n");
6528 ctxt->wellFormed = 0;
6529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6530 return(NULL);
6534 * Now parse the attributes, it ends up with the ending
6536 * (S Attribute)* S?
6538 SKIP_BLANKS;
6539 GROW;
6541 while ((RAW != '>') &&
6542 ((RAW != '/') || (NXT(1) != '>')) &&
6543 (IS_CHAR(RAW))) {
6544 const xmlChar *q = CUR_PTR;
6545 int cons = ctxt->input->consumed;
6547 attname = xmlParseAttribute(ctxt, &attvalue);
6548 if ((attname != NULL) && (attvalue != NULL)) {
6550 * [ WFC: Unique Att Spec ]
6551 * No attribute name may appear more than once in the same
6552 * start-tag or empty-element tag.
6554 for (i = 0; i < nbatts;i += 2) {
6555 if (xmlStrEqual(atts[i], attname)) {
6556 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558 ctxt->sax->error(ctxt->userData,
6559 "Attribute %s redefined\n",
6560 attname);
6561 ctxt->wellFormed = 0;
6562 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6563 xmlFree(attname);
6564 xmlFree(attvalue);
6565 goto failed;
6570 * Add the pair to atts
6572 if (atts == NULL) {
6573 maxatts = 10;
6574 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6575 if (atts == NULL) {
6576 xmlGenericError(xmlGenericErrorContext,
6577 "malloc of %ld byte failed\n",
6578 maxatts * (long)sizeof(xmlChar *));
6579 return(NULL);
6581 } else if (nbatts + 4 > maxatts) {
6582 maxatts *= 2;
6583 atts = (const xmlChar **) xmlRealloc((void *) atts,
6584 maxatts * sizeof(xmlChar *));
6585 if (atts == NULL) {
6586 xmlGenericError(xmlGenericErrorContext,
6587 "realloc of %ld byte failed\n",
6588 maxatts * (long)sizeof(xmlChar *));
6589 return(NULL);
6592 atts[nbatts++] = attname;
6593 atts[nbatts++] = attvalue;
6594 atts[nbatts] = NULL;
6595 atts[nbatts + 1] = NULL;
6596 } else {
6597 if (attname != NULL)
6598 xmlFree(attname);
6599 if (attvalue != NULL)
6600 xmlFree(attvalue);
6603 failed:
6605 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6606 break;
6607 if (!IS_BLANK(RAW)) {
6608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6610 ctxt->sax->error(ctxt->userData,
6611 "attributes construct error\n");
6612 ctxt->wellFormed = 0;
6613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6615 SKIP_BLANKS;
6616 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6617 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "xmlParseStartTag: problem parsing attributes\n");
6621 ctxt->wellFormed = 0;
6622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6623 break;
6625 GROW;
6629 * SAX: Start of Element !
6631 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6632 (!ctxt->disableSAX))
6633 ctxt->sax->startElement(ctxt->userData, name, atts);
6635 if (atts != NULL) {
6636 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6637 xmlFree((void *) atts);
6639 return(name);
6643 * xmlParseEndTag:
6644 * @ctxt: an XML parser context
6646 * parse an end of tag
6648 * [42] ETag ::= '</' Name S? '>'
6650 * With namespace
6652 * [NS 9] ETag ::= '</' QName S? '>'
6655 void
6656 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6657 xmlChar *name;
6658 xmlChar *oldname;
6660 GROW;
6661 if ((RAW != '<') || (NXT(1) != '/')) {
6662 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6664 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6665 ctxt->wellFormed = 0;
6666 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6667 return;
6669 SKIP(2);
6671 name = xmlParseNameAndCompare(ctxt,ctxt->name);
6674 * We should definitely be at the ending "S? '>'" part
6676 GROW;
6677 SKIP_BLANKS;
6678 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6679 ctxt->errNo = XML_ERR_GT_REQUIRED;
6680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6681 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6682 ctxt->wellFormed = 0;
6683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6684 } else
6685 NEXT1;
6688 * [ WFC: Element Type Match ]
6689 * The Name in an element's end-tag must match the element type in the
6690 * start-tag.
6693 if (name != (xmlChar*)1) {
6694 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6696 if (name != NULL) {
6697 ctxt->sax->error(ctxt->userData,
6698 "Opening and ending tag mismatch: %s and %s\n",
6699 ctxt->name, name);
6700 } else {
6701 ctxt->sax->error(ctxt->userData,
6702 "Ending tag error for: %s\n", ctxt->name);
6706 ctxt->wellFormed = 0;
6707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6708 #if 0
6709 else {
6711 * Recover in case of one missing close
6713 if ((ctxt->nameNr > 2) &&
6714 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6715 namePop(ctxt);
6716 spacePop(ctxt);
6719 #endif
6720 if (name != NULL)
6721 xmlFree(name);
6725 * SAX: End of Tag
6727 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6728 (!ctxt->disableSAX))
6729 ctxt->sax->endElement(ctxt->userData, ctxt->name);
6731 oldname = namePop(ctxt);
6732 spacePop(ctxt);
6733 if (oldname != NULL) {
6734 #ifdef DEBUG_STACK
6735 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6736 #endif
6737 xmlFree(oldname);
6739 return;
6743 * xmlParseCDSect:
6744 * @ctxt: an XML parser context
6746 * Parse escaped pure raw content.
6748 * [18] CDSect ::= CDStart CData CDEnd
6750 * [19] CDStart ::= '<![CDATA['
6752 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6754 * [21] CDEnd ::= ']]>'
6756 void
6757 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6758 xmlChar *buf = NULL;
6759 int len = 0;
6760 int size = XML_PARSER_BUFFER_SIZE;
6761 int r, rl;
6762 int s, sl;
6763 int cur, l;
6764 int count = 0;
6766 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6767 (NXT(2) == '[') && (NXT(3) == 'C') &&
6768 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6769 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6770 (NXT(8) == '[')) {
6771 SKIP(9);
6772 } else
6773 return;
6775 ctxt->instate = XML_PARSER_CDATA_SECTION;
6776 r = CUR_CHAR(rl);
6777 if (!IS_CHAR(r)) {
6778 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6780 ctxt->sax->error(ctxt->userData,
6781 "CData section not finished\n");
6782 ctxt->wellFormed = 0;
6783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6784 ctxt->instate = XML_PARSER_CONTENT;
6785 return;
6787 NEXTL(rl);
6788 s = CUR_CHAR(sl);
6789 if (!IS_CHAR(s)) {
6790 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6792 ctxt->sax->error(ctxt->userData,
6793 "CData section not finished\n");
6794 ctxt->wellFormed = 0;
6795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6796 ctxt->instate = XML_PARSER_CONTENT;
6797 return;
6799 NEXTL(sl);
6800 cur = CUR_CHAR(l);
6801 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6802 if (buf == NULL) {
6803 xmlGenericError(xmlGenericErrorContext,
6804 "malloc of %d byte failed\n", size);
6805 return;
6807 while (IS_CHAR(cur) &&
6808 ((r != ']') || (s != ']') || (cur != '>'))) {
6809 if (len + 5 >= size) {
6810 size *= 2;
6811 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6812 if (buf == NULL) {
6813 xmlGenericError(xmlGenericErrorContext,
6814 "realloc of %d byte failed\n", size);
6815 return;
6818 COPY_BUF(rl,buf,len,r);
6819 r = s;
6820 rl = sl;
6821 s = cur;
6822 sl = l;
6823 count++;
6824 if (count > 50) {
6825 GROW;
6826 count = 0;
6828 NEXTL(l);
6829 cur = CUR_CHAR(l);
6831 buf[len] = 0;
6832 ctxt->instate = XML_PARSER_CONTENT;
6833 if (cur != '>') {
6834 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6836 ctxt->sax->error(ctxt->userData,
6837 "CData section not finished\n%.50s\n", buf);
6838 ctxt->wellFormed = 0;
6839 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6840 xmlFree(buf);
6841 return;
6843 NEXTL(l);
6846 * OK the buffer is to be consumed as cdata.
6848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6849 if (ctxt->sax->cdataBlock != NULL)
6850 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6851 else if (ctxt->sax->characters != NULL)
6852 ctxt->sax->characters(ctxt->userData, buf, len);
6854 xmlFree(buf);
6858 * xmlParseContent:
6859 * @ctxt: an XML parser context
6861 * Parse a content:
6863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6866 void
6867 xmlParseContent(xmlParserCtxtPtr ctxt) {
6868 GROW;
6869 while ((RAW != 0) &&
6870 ((RAW != '<') || (NXT(1) != '/'))) {
6871 const xmlChar *test = CUR_PTR;
6872 int cons = ctxt->input->consumed;
6873 const xmlChar *cur = ctxt->input->cur;
6876 * First case : a Processing Instruction.
6878 if ((*cur == '<') && (cur[1] == '?')) {
6879 xmlParsePI(ctxt);
6883 * Second case : a CDSection
6885 else if ((*cur == '<') && (NXT(1) == '!') &&
6886 (NXT(2) == '[') && (NXT(3) == 'C') &&
6887 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6888 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6889 (NXT(8) == '[')) {
6890 xmlParseCDSect(ctxt);
6894 * Third case : a comment
6896 else if ((*cur == '<') && (NXT(1) == '!') &&
6897 (NXT(2) == '-') && (NXT(3) == '-')) {
6898 xmlParseComment(ctxt);
6899 ctxt->instate = XML_PARSER_CONTENT;
6903 * Fourth case : a sub-element.
6905 else if (*cur == '<') {
6906 xmlParseElement(ctxt);
6910 * Fifth case : a reference. If if has not been resolved,
6911 * parsing returns it's Name, create the node
6914 else if (*cur == '&') {
6915 xmlParseReference(ctxt);
6919 * Last case, text. Note that References are handled directly.
6921 else {
6922 xmlParseCharData(ctxt, 0);
6925 GROW;
6927 * Pop-up of finished entities.
6929 while ((RAW == 0) && (ctxt->inputNr > 1))
6930 xmlPopInput(ctxt);
6931 SHRINK;
6933 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
6934 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6936 ctxt->sax->error(ctxt->userData,
6937 "detected an error in element content\n");
6938 ctxt->wellFormed = 0;
6939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6940 ctxt->instate = XML_PARSER_EOF;
6941 break;
6947 * xmlParseElement:
6948 * @ctxt: an XML parser context
6950 * parse an XML element, this is highly recursive
6952 * [39] element ::= EmptyElemTag | STag content ETag
6954 * [ WFC: Element Type Match ]
6955 * The Name in an element's end-tag must match the element type in the
6956 * start-tag.
6958 * [ VC: Element Valid ]
6959 * An element is valid if there is a declaration matching elementdecl
6960 * where the Name matches the element type and one of the following holds:
6961 * - The declaration matches EMPTY and the element has no content.
6962 * - The declaration matches children and the sequence of child elements
6963 * belongs to the language generated by the regular expression in the
6964 * content model, with optional white space (characters matching the
6965 * nonterminal S) between each pair of child elements.
6966 * - The declaration matches Mixed and the content consists of character
6967 * data and child elements whose types match names in the content model.
6968 * - The declaration matches ANY, and the types of any child elements have
6969 * been declared.
6972 void
6973 xmlParseElement(xmlParserCtxtPtr ctxt) {
6974 xmlChar *name;
6975 xmlChar *oldname;
6976 xmlParserNodeInfo node_info;
6977 xmlNodePtr ret;
6979 /* Capture start position */
6980 if (ctxt->record_info) {
6981 node_info.begin_pos = ctxt->input->consumed +
6982 (CUR_PTR - ctxt->input->base);
6983 node_info.begin_line = ctxt->input->line;
6986 if (ctxt->spaceNr == 0)
6987 spacePush(ctxt, -1);
6988 else
6989 spacePush(ctxt, *ctxt->space);
6991 name = xmlParseStartTag(ctxt);
6992 if (name == NULL) {
6993 spacePop(ctxt);
6994 return;
6996 namePush(ctxt, name);
6997 ret = ctxt->node;
7000 * [ VC: Root Element Type ]
7001 * The Name in the document type declaration must match the element
7002 * type of the root element.
7004 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7005 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7006 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7009 * Check for an Empty Element.
7011 if ((RAW == '/') && (NXT(1) == '>')) {
7012 SKIP(2);
7013 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7014 (!ctxt->disableSAX))
7015 ctxt->sax->endElement(ctxt->userData, name);
7016 oldname = namePop(ctxt);
7017 spacePop(ctxt);
7018 if (oldname != NULL) {
7019 #ifdef DEBUG_STACK
7020 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7021 #endif
7022 xmlFree(oldname);
7024 if ( ret != NULL && ctxt->record_info ) {
7025 node_info.end_pos = ctxt->input->consumed +
7026 (CUR_PTR - ctxt->input->base);
7027 node_info.end_line = ctxt->input->line;
7028 node_info.node = ret;
7029 xmlParserAddNodeInfo(ctxt, &node_info);
7031 return;
7033 if (RAW == '>') {
7034 NEXT1;
7035 } else {
7036 ctxt->errNo = XML_ERR_GT_REQUIRED;
7037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7038 ctxt->sax->error(ctxt->userData,
7039 "Couldn't find end of Start Tag %s\n",
7040 name);
7041 ctxt->wellFormed = 0;
7042 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7045 * end of parsing of this node.
7047 nodePop(ctxt);
7048 oldname = namePop(ctxt);
7049 spacePop(ctxt);
7050 if (oldname != NULL) {
7051 #ifdef DEBUG_STACK
7052 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7053 #endif
7054 xmlFree(oldname);
7058 * Capture end position and add node
7060 if ( ret != NULL && ctxt->record_info ) {
7061 node_info.end_pos = ctxt->input->consumed +
7062 (CUR_PTR - ctxt->input->base);
7063 node_info.end_line = ctxt->input->line;
7064 node_info.node = ret;
7065 xmlParserAddNodeInfo(ctxt, &node_info);
7067 return;
7071 * Parse the content of the element:
7073 xmlParseContent(ctxt);
7074 if (!IS_CHAR(RAW)) {
7075 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
7076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7077 ctxt->sax->error(ctxt->userData,
7078 "Premature end of data in tag %s\n", name);
7079 ctxt->wellFormed = 0;
7080 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7083 * end of parsing of this node.
7085 nodePop(ctxt);
7086 oldname = namePop(ctxt);
7087 spacePop(ctxt);
7088 if (oldname != NULL) {
7089 #ifdef DEBUG_STACK
7090 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7091 #endif
7092 xmlFree(oldname);
7094 return;
7098 * parse the end of tag: '</' should be here.
7100 xmlParseEndTag(ctxt);
7103 * Capture end position and add node
7105 if ( ret != NULL && ctxt->record_info ) {
7106 node_info.end_pos = ctxt->input->consumed +
7107 (CUR_PTR - ctxt->input->base);
7108 node_info.end_line = ctxt->input->line;
7109 node_info.node = ret;
7110 xmlParserAddNodeInfo(ctxt, &node_info);
7115 * xmlParseVersionNum:
7116 * @ctxt: an XML parser context
7118 * parse the XML version value.
7120 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7122 * Returns the string giving the XML version number, or NULL
7124 xmlChar *
7125 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7126 xmlChar *buf = NULL;
7127 int len = 0;
7128 int size = 10;
7129 xmlChar cur;
7131 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7132 if (buf == NULL) {
7133 xmlGenericError(xmlGenericErrorContext,
7134 "malloc of %d byte failed\n", size);
7135 return(NULL);
7137 cur = CUR;
7138 while (((cur >= 'a') && (cur <= 'z')) ||
7139 ((cur >= 'A') && (cur <= 'Z')) ||
7140 ((cur >= '0') && (cur <= '9')) ||
7141 (cur == '_') || (cur == '.') ||
7142 (cur == ':') || (cur == '-')) {
7143 if (len + 1 >= size) {
7144 size *= 2;
7145 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7146 if (buf == NULL) {
7147 xmlGenericError(xmlGenericErrorContext,
7148 "realloc of %d byte failed\n", size);
7149 return(NULL);
7152 buf[len++] = cur;
7153 NEXT;
7154 cur=CUR;
7156 buf[len] = 0;
7157 return(buf);
7161 * xmlParseVersionInfo:
7162 * @ctxt: an XML parser context
7164 * parse the XML version.
7166 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7168 * [25] Eq ::= S? '=' S?
7170 * Returns the version string, e.g. "1.0"
7173 xmlChar *
7174 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7175 xmlChar *version = NULL;
7176 const xmlChar *q;
7178 if ((RAW == 'v') && (NXT(1) == 'e') &&
7179 (NXT(2) == 'r') && (NXT(3) == 's') &&
7180 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7181 (NXT(6) == 'n')) {
7182 SKIP(7);
7183 SKIP_BLANKS;
7184 if (RAW != '=') {
7185 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7187 ctxt->sax->error(ctxt->userData,
7188 "xmlParseVersionInfo : expected '='\n");
7189 ctxt->wellFormed = 0;
7190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7191 return(NULL);
7193 NEXT;
7194 SKIP_BLANKS;
7195 if (RAW == '"') {
7196 NEXT;
7197 q = CUR_PTR;
7198 version = xmlParseVersionNum(ctxt);
7199 if (RAW != '"') {
7200 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "String not closed\n%.50s\n", q);
7204 ctxt->wellFormed = 0;
7205 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7206 } else
7207 NEXT;
7208 } else if (RAW == '\''){
7209 NEXT;
7210 q = CUR_PTR;
7211 version = xmlParseVersionNum(ctxt);
7212 if (RAW != '\'') {
7213 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7215 ctxt->sax->error(ctxt->userData,
7216 "String not closed\n%.50s\n", q);
7217 ctxt->wellFormed = 0;
7218 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7219 } else
7220 NEXT;
7221 } else {
7222 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7224 ctxt->sax->error(ctxt->userData,
7225 "xmlParseVersionInfo : expected ' or \"\n");
7226 ctxt->wellFormed = 0;
7227 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7230 return(version);
7234 * xmlParseEncName:
7235 * @ctxt: an XML parser context
7237 * parse the XML encoding name
7239 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7241 * Returns the encoding name value or NULL
7243 xmlChar *
7244 xmlParseEncName(xmlParserCtxtPtr ctxt) {
7245 xmlChar *buf = NULL;
7246 int len = 0;
7247 int size = 10;
7248 xmlChar cur;
7250 cur = CUR;
7251 if (((cur >= 'a') && (cur <= 'z')) ||
7252 ((cur >= 'A') && (cur <= 'Z'))) {
7253 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7254 if (buf == NULL) {
7255 xmlGenericError(xmlGenericErrorContext,
7256 "malloc of %d byte failed\n", size);
7257 return(NULL);
7260 buf[len++] = cur;
7261 NEXT;
7262 cur = CUR;
7263 while (((cur >= 'a') && (cur <= 'z')) ||
7264 ((cur >= 'A') && (cur <= 'Z')) ||
7265 ((cur >= '0') && (cur <= '9')) ||
7266 (cur == '.') || (cur == '_') ||
7267 (cur == '-')) {
7268 if (len + 1 >= size) {
7269 size *= 2;
7270 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7271 if (buf == NULL) {
7272 xmlGenericError(xmlGenericErrorContext,
7273 "realloc of %d byte failed\n", size);
7274 return(NULL);
7277 buf[len++] = cur;
7278 NEXT;
7279 cur = CUR;
7280 if (cur == 0) {
7281 SHRINK;
7282 GROW;
7283 cur = CUR;
7286 buf[len] = 0;
7287 } else {
7288 ctxt->errNo = XML_ERR_ENCODING_NAME;
7289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7290 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7291 ctxt->wellFormed = 0;
7292 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7294 return(buf);
7298 * xmlParseEncodingDecl:
7299 * @ctxt: an XML parser context
7301 * parse the XML encoding declaration
7303 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7305 * this setups the conversion filters.
7307 * Returns the encoding value or NULL
7310 xmlChar *
7311 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7312 xmlChar *encoding = NULL;
7313 const xmlChar *q;
7315 SKIP_BLANKS;
7316 if ((RAW == 'e') && (NXT(1) == 'n') &&
7317 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7318 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7319 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7320 SKIP(8);
7321 SKIP_BLANKS;
7322 if (RAW != '=') {
7323 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325 ctxt->sax->error(ctxt->userData,
7326 "xmlParseEncodingDecl : expected '='\n");
7327 ctxt->wellFormed = 0;
7328 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7329 return(NULL);
7331 NEXT;
7332 SKIP_BLANKS;
7333 if (RAW == '"') {
7334 NEXT;
7335 q = CUR_PTR;
7336 encoding = xmlParseEncName(ctxt);
7337 if (RAW != '"') {
7338 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "String not closed\n%.50s\n", q);
7342 ctxt->wellFormed = 0;
7343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7344 } else
7345 NEXT;
7346 } else if (RAW == '\''){
7347 NEXT;
7348 q = CUR_PTR;
7349 encoding = xmlParseEncName(ctxt);
7350 if (RAW != '\'') {
7351 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7353 ctxt->sax->error(ctxt->userData,
7354 "String not closed\n%.50s\n", q);
7355 ctxt->wellFormed = 0;
7356 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7357 } else
7358 NEXT;
7359 } else {
7360 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7362 ctxt->sax->error(ctxt->userData,
7363 "xmlParseEncodingDecl : expected ' or \"\n");
7364 ctxt->wellFormed = 0;
7365 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7367 if (encoding != NULL) {
7368 xmlCharEncoding enc;
7369 xmlCharEncodingHandlerPtr handler;
7371 if (ctxt->input->encoding != NULL)
7372 xmlFree((xmlChar *) ctxt->input->encoding);
7373 ctxt->input->encoding = encoding;
7375 enc = xmlParseCharEncoding((const char *) encoding);
7377 * registered set of known encodings
7379 if (enc != XML_CHAR_ENCODING_ERROR) {
7380 xmlSwitchEncoding(ctxt, enc);
7381 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7382 ctxt->input->encoding = NULL;
7383 xmlFree(encoding);
7384 return(NULL);
7386 } else {
7388 * fallback for unknown encodings
7390 handler = xmlFindCharEncodingHandler((const char *) encoding);
7391 if (handler != NULL) {
7392 xmlSwitchToEncoding(ctxt, handler);
7393 } else {
7394 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7396 ctxt->sax->error(ctxt->userData,
7397 "Unsupported encoding %s\n", encoding);
7398 return(NULL);
7403 return(encoding);
7407 * xmlParseSDDecl:
7408 * @ctxt: an XML parser context
7410 * parse the XML standalone declaration
7412 * [32] SDDecl ::= S 'standalone' Eq
7413 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7415 * [ VC: Standalone Document Declaration ]
7416 * TODO The standalone document declaration must have the value "no"
7417 * if any external markup declarations contain declarations of:
7418 * - attributes with default values, if elements to which these
7419 * attributes apply appear in the document without specifications
7420 * of values for these attributes, or
7421 * - entities (other than amp, lt, gt, apos, quot), if references
7422 * to those entities appear in the document, or
7423 * - attributes with values subject to normalization, where the
7424 * attribute appears in the document with a value which will change
7425 * as a result of normalization, or
7426 * - element types with element content, if white space occurs directly
7427 * within any instance of those types.
7429 * Returns 1 if standalone, 0 otherwise
7433 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7434 int standalone = -1;
7436 SKIP_BLANKS;
7437 if ((RAW == 's') && (NXT(1) == 't') &&
7438 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7439 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7440 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7441 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7442 SKIP(10);
7443 SKIP_BLANKS;
7444 if (RAW != '=') {
7445 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7447 ctxt->sax->error(ctxt->userData,
7448 "XML standalone declaration : expected '='\n");
7449 ctxt->wellFormed = 0;
7450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7451 return(standalone);
7453 NEXT;
7454 SKIP_BLANKS;
7455 if (RAW == '\''){
7456 NEXT;
7457 if ((RAW == 'n') && (NXT(1) == 'o')) {
7458 standalone = 0;
7459 SKIP(2);
7460 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7461 (NXT(2) == 's')) {
7462 standalone = 1;
7463 SKIP(3);
7464 } else {
7465 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7467 ctxt->sax->error(ctxt->userData,
7468 "standalone accepts only 'yes' or 'no'\n");
7469 ctxt->wellFormed = 0;
7470 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7472 if (RAW != '\'') {
7473 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7475 ctxt->sax->error(ctxt->userData, "String not closed\n");
7476 ctxt->wellFormed = 0;
7477 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7478 } else
7479 NEXT;
7480 } else if (RAW == '"'){
7481 NEXT;
7482 if ((RAW == 'n') && (NXT(1) == 'o')) {
7483 standalone = 0;
7484 SKIP(2);
7485 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7486 (NXT(2) == 's')) {
7487 standalone = 1;
7488 SKIP(3);
7489 } else {
7490 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7492 ctxt->sax->error(ctxt->userData,
7493 "standalone accepts only 'yes' or 'no'\n");
7494 ctxt->wellFormed = 0;
7495 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7497 if (RAW != '"') {
7498 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7500 ctxt->sax->error(ctxt->userData, "String not closed\n");
7501 ctxt->wellFormed = 0;
7502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7503 } else
7504 NEXT;
7505 } else {
7506 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7508 ctxt->sax->error(ctxt->userData,
7509 "Standalone value not found\n");
7510 ctxt->wellFormed = 0;
7511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7514 return(standalone);
7518 * xmlParseXMLDecl:
7519 * @ctxt: an XML parser context
7521 * parse an XML declaration header
7523 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7526 void
7527 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7528 xmlChar *version;
7531 * We know that '<?xml' is here.
7533 SKIP(5);
7535 if (!IS_BLANK(RAW)) {
7536 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7538 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7539 ctxt->wellFormed = 0;
7540 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7542 SKIP_BLANKS;
7545 * We must have the VersionInfo here.
7547 version = xmlParseVersionInfo(ctxt);
7548 if (version == NULL) {
7549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "Malformed declaration expecting version\n");
7552 ctxt->wellFormed = 0;
7553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7554 } else {
7555 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7557 * TODO: Blueberry should be detected here
7559 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7560 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7561 version);
7563 if (ctxt->version != NULL)
7564 xmlFree((void *) ctxt->version);
7565 ctxt->version = version;
7569 * We may have the encoding declaration
7571 if (!IS_BLANK(RAW)) {
7572 if ((RAW == '?') && (NXT(1) == '>')) {
7573 SKIP(2);
7574 return;
7576 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7578 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7579 ctxt->wellFormed = 0;
7580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7582 xmlParseEncodingDecl(ctxt);
7583 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7585 * The XML REC instructs us to stop parsing right here
7587 return;
7591 * We may have the standalone status.
7593 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7594 if ((RAW == '?') && (NXT(1) == '>')) {
7595 SKIP(2);
7596 return;
7598 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7600 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7601 ctxt->wellFormed = 0;
7602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7604 SKIP_BLANKS;
7605 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7607 SKIP_BLANKS;
7608 if ((RAW == '?') && (NXT(1) == '>')) {
7609 SKIP(2);
7610 } else if (RAW == '>') {
7611 /* Deprecated old WD ... */
7612 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7614 ctxt->sax->error(ctxt->userData,
7615 "XML declaration must end-up with '?>'\n");
7616 ctxt->wellFormed = 0;
7617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7618 NEXT;
7619 } else {
7620 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData,
7623 "parsing XML declaration: '?>' expected\n");
7624 ctxt->wellFormed = 0;
7625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7626 MOVETO_ENDTAG(CUR_PTR);
7627 NEXT;
7632 * xmlParseMisc:
7633 * @ctxt: an XML parser context
7635 * parse an XML Misc* optional field.
7637 * [27] Misc ::= Comment | PI | S
7640 void
7641 xmlParseMisc(xmlParserCtxtPtr ctxt) {
7642 while (((RAW == '<') && (NXT(1) == '?')) ||
7643 ((RAW == '<') && (NXT(1) == '!') &&
7644 (NXT(2) == '-') && (NXT(3) == '-')) ||
7645 IS_BLANK(CUR)) {
7646 if ((RAW == '<') && (NXT(1) == '?')) {
7647 xmlParsePI(ctxt);
7648 } else if (IS_BLANK(CUR)) {
7649 NEXT;
7650 } else
7651 xmlParseComment(ctxt);
7656 * xmlParseDocument:
7657 * @ctxt: an XML parser context
7659 * parse an XML document (and build a tree if using the standard SAX
7660 * interface).
7662 * [1] document ::= prolog element Misc*
7664 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7666 * Returns 0, -1 in case of error. the parser context is augmented
7667 * as a result of the parsing.
7671 xmlParseDocument(xmlParserCtxtPtr ctxt) {
7672 xmlChar start[4];
7673 xmlCharEncoding enc;
7675 xmlInitParser();
7677 GROW;
7680 * SAX: beginning of the document processing.
7682 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7683 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7685 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
7687 * Get the 4 first bytes and decode the charset
7688 * if enc != XML_CHAR_ENCODING_NONE
7689 * plug some encoding conversion routines.
7691 start[0] = RAW;
7692 start[1] = NXT(1);
7693 start[2] = NXT(2);
7694 start[3] = NXT(3);
7695 enc = xmlDetectCharEncoding(start, 4);
7696 if (enc != XML_CHAR_ENCODING_NONE) {
7697 xmlSwitchEncoding(ctxt, enc);
7702 if (CUR == 0) {
7703 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7706 ctxt->wellFormed = 0;
7707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7711 * Check for the XMLDecl in the Prolog.
7713 GROW;
7714 if ((RAW == '<') && (NXT(1) == '?') &&
7715 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7716 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7719 * Note that we will switch encoding on the fly.
7721 xmlParseXMLDecl(ctxt);
7722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7724 * The XML REC instructs us to stop parsing right here
7726 return(-1);
7728 ctxt->standalone = ctxt->input->standalone;
7729 SKIP_BLANKS;
7730 } else {
7731 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7733 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7734 ctxt->sax->startDocument(ctxt->userData);
7737 * The Misc part of the Prolog
7739 GROW;
7740 xmlParseMisc(ctxt);
7743 * Then possibly doc type declaration(s) and more Misc
7744 * (doctypedecl Misc*)?
7746 GROW;
7747 if ((RAW == '<') && (NXT(1) == '!') &&
7748 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7749 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7750 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7751 (NXT(8) == 'E')) {
7753 ctxt->inSubset = 1;
7754 xmlParseDocTypeDecl(ctxt);
7755 if (RAW == '[') {
7756 ctxt->instate = XML_PARSER_DTD;
7757 xmlParseInternalSubset(ctxt);
7761 * Create and update the external subset.
7763 ctxt->inSubset = 2;
7764 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7765 (!ctxt->disableSAX))
7766 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7767 ctxt->extSubSystem, ctxt->extSubURI);
7768 ctxt->inSubset = 0;
7771 ctxt->instate = XML_PARSER_PROLOG;
7772 xmlParseMisc(ctxt);
7776 * Time to start parsing the tree itself
7778 GROW;
7779 if (RAW != '<') {
7780 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7782 ctxt->sax->error(ctxt->userData,
7783 "Start tag expected, '<' not found\n");
7784 ctxt->wellFormed = 0;
7785 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7786 ctxt->instate = XML_PARSER_EOF;
7787 } else {
7788 ctxt->instate = XML_PARSER_CONTENT;
7789 xmlParseElement(ctxt);
7790 ctxt->instate = XML_PARSER_EPILOG;
7794 * The Misc part at the end
7796 xmlParseMisc(ctxt);
7798 if (RAW != 0) {
7799 ctxt->errNo = XML_ERR_DOCUMENT_END;
7800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7801 ctxt->sax->error(ctxt->userData,
7802 "Extra content at the end of the document\n");
7803 ctxt->wellFormed = 0;
7804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7806 ctxt->instate = XML_PARSER_EOF;
7810 * SAX: end of the document processing.
7812 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7813 ctxt->sax->endDocument(ctxt->userData);
7816 * Remove locally kept entity definitions if the tree was not built
7818 if ((ctxt->myDoc != NULL) &&
7819 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7820 xmlFreeDoc(ctxt->myDoc);
7821 ctxt->myDoc = NULL;
7824 if (! ctxt->wellFormed) {
7825 ctxt->valid = 0;
7826 return(-1);
7828 return(0);
7832 * xmlParseExtParsedEnt:
7833 * @ctxt: an XML parser context
7835 * parse a general parsed entity
7836 * An external general parsed entity is well-formed if it matches the
7837 * production labeled extParsedEnt.
7839 * [78] extParsedEnt ::= TextDecl? content
7841 * Returns 0, -1 in case of error. the parser context is augmented
7842 * as a result of the parsing.
7846 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7847 xmlChar start[4];
7848 xmlCharEncoding enc;
7850 xmlDefaultSAXHandlerInit();
7852 GROW;
7855 * SAX: beginning of the document processing.
7857 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7858 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7861 * Get the 4 first bytes and decode the charset
7862 * if enc != XML_CHAR_ENCODING_NONE
7863 * plug some encoding conversion routines.
7865 start[0] = RAW;
7866 start[1] = NXT(1);
7867 start[2] = NXT(2);
7868 start[3] = NXT(3);
7869 enc = xmlDetectCharEncoding(start, 4);
7870 if (enc != XML_CHAR_ENCODING_NONE) {
7871 xmlSwitchEncoding(ctxt, enc);
7875 if (CUR == 0) {
7876 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7878 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7879 ctxt->wellFormed = 0;
7880 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7884 * Check for the XMLDecl in the Prolog.
7886 GROW;
7887 if ((RAW == '<') && (NXT(1) == '?') &&
7888 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7889 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7892 * Note that we will switch encoding on the fly.
7894 xmlParseXMLDecl(ctxt);
7895 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7897 * The XML REC instructs us to stop parsing right here
7899 return(-1);
7901 SKIP_BLANKS;
7902 } else {
7903 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7905 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7906 ctxt->sax->startDocument(ctxt->userData);
7909 * Doing validity checking on chunk doesn't make sense
7911 ctxt->instate = XML_PARSER_CONTENT;
7912 ctxt->validate = 0;
7913 ctxt->loadsubset = 0;
7914 ctxt->depth = 0;
7916 xmlParseContent(ctxt);
7918 if ((RAW == '<') && (NXT(1) == '/')) {
7919 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7921 ctxt->sax->error(ctxt->userData,
7922 "chunk is not well balanced\n");
7923 ctxt->wellFormed = 0;
7924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7925 } else if (RAW != 0) {
7926 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7928 ctxt->sax->error(ctxt->userData,
7929 "extra content at the end of well balanced chunk\n");
7930 ctxt->wellFormed = 0;
7931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7935 * SAX: end of the document processing.
7937 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7938 ctxt->sax->endDocument(ctxt->userData);
7940 if (! ctxt->wellFormed) return(-1);
7941 return(0);
7944 /************************************************************************
7946 * Progressive parsing interfaces *
7948 ************************************************************************/
7951 * xmlParseLookupSequence:
7952 * @ctxt: an XML parser context
7953 * @first: the first char to lookup
7954 * @next: the next char to lookup or zero
7955 * @third: the next char to lookup or zero
7957 * Try to find if a sequence (first, next, third) or just (first next) or
7958 * (first) is available in the input stream.
7959 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7960 * to avoid rescanning sequences of bytes, it DOES change the state of the
7961 * parser, do not use liberally.
7963 * Returns the index to the current parsing point if the full sequence
7964 * is available, -1 otherwise.
7966 static int
7967 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7968 xmlChar next, xmlChar third) {
7969 int base, len;
7970 xmlParserInputPtr in;
7971 const xmlChar *buf;
7973 in = ctxt->input;
7974 if (in == NULL) return(-1);
7975 base = in->cur - in->base;
7976 if (base < 0) return(-1);
7977 if (ctxt->checkIndex > base)
7978 base = ctxt->checkIndex;
7979 if (in->buf == NULL) {
7980 buf = in->base;
7981 len = in->length;
7982 } else {
7983 buf = in->buf->buffer->content;
7984 len = in->buf->buffer->use;
7986 /* take into account the sequence length */
7987 if (third) len -= 2;
7988 else if (next) len --;
7989 for (;base < len;base++) {
7990 if (buf[base] == first) {
7991 if (third != 0) {
7992 if ((buf[base + 1] != next) ||
7993 (buf[base + 2] != third)) continue;
7994 } else if (next != 0) {
7995 if (buf[base + 1] != next) continue;
7997 ctxt->checkIndex = 0;
7998 #ifdef DEBUG_PUSH
7999 if (next == 0)
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: lookup '%c' found at %d\n",
8002 first, base);
8003 else if (third == 0)
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: lookup '%c%c' found at %d\n",
8006 first, next, base);
8007 else
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: lookup '%c%c%c' found at %d\n",
8010 first, next, third, base);
8011 #endif
8012 return(base - (in->cur - in->base));
8015 ctxt->checkIndex = base;
8016 #ifdef DEBUG_PUSH
8017 if (next == 0)
8018 xmlGenericError(xmlGenericErrorContext,
8019 "PP: lookup '%c' failed\n", first);
8020 else if (third == 0)
8021 xmlGenericError(xmlGenericErrorContext,
8022 "PP: lookup '%c%c' failed\n", first, next);
8023 else
8024 xmlGenericError(xmlGenericErrorContext,
8025 "PP: lookup '%c%c%c' failed\n", first, next, third);
8026 #endif
8027 return(-1);
8031 * xmlParseTryOrFinish:
8032 * @ctxt: an XML parser context
8033 * @terminate: last chunk indicator
8035 * Try to progress on parsing
8037 * Returns zero if no parsing was possible
8039 static int
8040 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8041 int ret = 0;
8042 int avail;
8043 xmlChar cur, next;
8045 #ifdef DEBUG_PUSH
8046 switch (ctxt->instate) {
8047 case XML_PARSER_EOF:
8048 xmlGenericError(xmlGenericErrorContext,
8049 "PP: try EOF\n"); break;
8050 case XML_PARSER_START:
8051 xmlGenericError(xmlGenericErrorContext,
8052 "PP: try START\n"); break;
8053 case XML_PARSER_MISC:
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: try MISC\n");break;
8056 case XML_PARSER_COMMENT:
8057 xmlGenericError(xmlGenericErrorContext,
8058 "PP: try COMMENT\n");break;
8059 case XML_PARSER_PROLOG:
8060 xmlGenericError(xmlGenericErrorContext,
8061 "PP: try PROLOG\n");break;
8062 case XML_PARSER_START_TAG:
8063 xmlGenericError(xmlGenericErrorContext,
8064 "PP: try START_TAG\n");break;
8065 case XML_PARSER_CONTENT:
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: try CONTENT\n");break;
8068 case XML_PARSER_CDATA_SECTION:
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: try CDATA_SECTION\n");break;
8071 case XML_PARSER_END_TAG:
8072 xmlGenericError(xmlGenericErrorContext,
8073 "PP: try END_TAG\n");break;
8074 case XML_PARSER_ENTITY_DECL:
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PP: try ENTITY_DECL\n");break;
8077 case XML_PARSER_ENTITY_VALUE:
8078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: try ENTITY_VALUE\n");break;
8080 case XML_PARSER_ATTRIBUTE_VALUE:
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: try ATTRIBUTE_VALUE\n");break;
8083 case XML_PARSER_DTD:
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: try DTD\n");break;
8086 case XML_PARSER_EPILOG:
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: try EPILOG\n");break;
8089 case XML_PARSER_PI:
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: try PI\n");break;
8092 case XML_PARSER_IGNORE:
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: try IGNORE\n");break;
8096 #endif
8098 while (1) {
8099 SHRINK;
8102 * Pop-up of finished entities.
8104 while ((RAW == 0) && (ctxt->inputNr > 1))
8105 xmlPopInput(ctxt);
8107 if (ctxt->input ==NULL) break;
8108 if (ctxt->input->buf == NULL)
8109 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8110 else {
8112 * If we are operating on converted input, try to flush
8113 * remainng chars to avoid them stalling in the non-converted
8114 * buffer.
8116 if ((ctxt->input->buf->raw != NULL) &&
8117 (ctxt->input->buf->raw->use > 0)) {
8118 int base = ctxt->input->base -
8119 ctxt->input->buf->buffer->content;
8120 int current = ctxt->input->cur - ctxt->input->base;
8122 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8123 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8124 ctxt->input->cur = ctxt->input->base + current;
8125 ctxt->input->end =
8126 &ctxt->input->buf->buffer->content[
8127 ctxt->input->buf->buffer->use];
8129 avail = ctxt->input->buf->buffer->use -
8130 (ctxt->input->cur - ctxt->input->base);
8132 if (avail < 1)
8133 goto done;
8134 switch (ctxt->instate) {
8135 case XML_PARSER_EOF:
8137 * Document parsing is done !
8139 goto done;
8140 case XML_PARSER_START:
8141 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8142 xmlChar start[4];
8143 xmlCharEncoding enc;
8146 * Very first chars read from the document flow.
8148 if (avail < 4)
8149 goto done;
8152 * Get the 4 first bytes and decode the charset
8153 * if enc != XML_CHAR_ENCODING_NONE
8154 * plug some encoding conversion routines.
8156 start[0] = RAW;
8157 start[1] = NXT(1);
8158 start[2] = NXT(2);
8159 start[3] = NXT(3);
8160 enc = xmlDetectCharEncoding(start, 4);
8161 if (enc != XML_CHAR_ENCODING_NONE) {
8162 xmlSwitchEncoding(ctxt, enc);
8164 break;
8167 cur = ctxt->input->cur[0];
8168 next = ctxt->input->cur[1];
8169 if (cur == 0) {
8170 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8171 ctxt->sax->setDocumentLocator(ctxt->userData,
8172 &xmlDefaultSAXLocator);
8173 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8175 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8176 ctxt->wellFormed = 0;
8177 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8178 ctxt->instate = XML_PARSER_EOF;
8179 #ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: entering EOF\n");
8182 #endif
8183 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8184 ctxt->sax->endDocument(ctxt->userData);
8185 goto done;
8187 if ((cur == '<') && (next == '?')) {
8188 /* PI or XML decl */
8189 if (avail < 5) return(ret);
8190 if ((!terminate) &&
8191 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8192 return(ret);
8193 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8194 ctxt->sax->setDocumentLocator(ctxt->userData,
8195 &xmlDefaultSAXLocator);
8196 if ((ctxt->input->cur[2] == 'x') &&
8197 (ctxt->input->cur[3] == 'm') &&
8198 (ctxt->input->cur[4] == 'l') &&
8199 (IS_BLANK(ctxt->input->cur[5]))) {
8200 ret += 5;
8201 #ifdef DEBUG_PUSH
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: Parsing XML Decl\n");
8204 #endif
8205 xmlParseXMLDecl(ctxt);
8206 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8208 * The XML REC instructs us to stop parsing right
8209 * here
8211 ctxt->instate = XML_PARSER_EOF;
8212 return(0);
8214 ctxt->standalone = ctxt->input->standalone;
8215 if ((ctxt->encoding == NULL) &&
8216 (ctxt->input->encoding != NULL))
8217 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8218 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8219 (!ctxt->disableSAX))
8220 ctxt->sax->startDocument(ctxt->userData);
8221 ctxt->instate = XML_PARSER_MISC;
8222 #ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: entering MISC\n");
8225 #endif
8226 } else {
8227 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8228 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8229 (!ctxt->disableSAX))
8230 ctxt->sax->startDocument(ctxt->userData);
8231 ctxt->instate = XML_PARSER_MISC;
8232 #ifdef DEBUG_PUSH
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: entering MISC\n");
8235 #endif
8237 } else {
8238 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8239 ctxt->sax->setDocumentLocator(ctxt->userData,
8240 &xmlDefaultSAXLocator);
8241 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8242 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8243 (!ctxt->disableSAX))
8244 ctxt->sax->startDocument(ctxt->userData);
8245 ctxt->instate = XML_PARSER_MISC;
8246 #ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: entering MISC\n");
8249 #endif
8251 break;
8252 case XML_PARSER_MISC:
8253 SKIP_BLANKS;
8254 if (ctxt->input->buf == NULL)
8255 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8256 else
8257 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8258 if (avail < 2)
8259 goto done;
8260 cur = ctxt->input->cur[0];
8261 next = ctxt->input->cur[1];
8262 if ((cur == '<') && (next == '?')) {
8263 if ((!terminate) &&
8264 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8265 goto done;
8266 #ifdef DEBUG_PUSH
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: Parsing PI\n");
8269 #endif
8270 xmlParsePI(ctxt);
8271 } else if ((cur == '<') && (next == '!') &&
8272 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8273 if ((!terminate) &&
8274 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8275 goto done;
8276 #ifdef DEBUG_PUSH
8277 xmlGenericError(xmlGenericErrorContext,
8278 "PP: Parsing Comment\n");
8279 #endif
8280 xmlParseComment(ctxt);
8281 ctxt->instate = XML_PARSER_MISC;
8282 } else if ((cur == '<') && (next == '!') &&
8283 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8284 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8285 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8286 (ctxt->input->cur[8] == 'E')) {
8287 if ((!terminate) &&
8288 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8289 goto done;
8290 #ifdef DEBUG_PUSH
8291 xmlGenericError(xmlGenericErrorContext,
8292 "PP: Parsing internal subset\n");
8293 #endif
8294 ctxt->inSubset = 1;
8295 xmlParseDocTypeDecl(ctxt);
8296 if (RAW == '[') {
8297 ctxt->instate = XML_PARSER_DTD;
8298 #ifdef DEBUG_PUSH
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: entering DTD\n");
8301 #endif
8302 } else {
8304 * Create and update the external subset.
8306 ctxt->inSubset = 2;
8307 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8308 (ctxt->sax->externalSubset != NULL))
8309 ctxt->sax->externalSubset(ctxt->userData,
8310 ctxt->intSubName, ctxt->extSubSystem,
8311 ctxt->extSubURI);
8312 ctxt->inSubset = 0;
8313 ctxt->instate = XML_PARSER_PROLOG;
8314 #ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: entering PROLOG\n");
8317 #endif
8319 } else if ((cur == '<') && (next == '!') &&
8320 (avail < 9)) {
8321 goto done;
8322 } else {
8323 ctxt->instate = XML_PARSER_START_TAG;
8324 #ifdef DEBUG_PUSH
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: entering START_TAG\n");
8327 #endif
8329 break;
8330 case XML_PARSER_IGNORE:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: internal error, state == IGNORE");
8333 ctxt->instate = XML_PARSER_DTD;
8334 #ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: entering DTD\n");
8337 #endif
8338 break;
8339 case XML_PARSER_PROLOG:
8340 SKIP_BLANKS;
8341 if (ctxt->input->buf == NULL)
8342 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8343 else
8344 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8345 if (avail < 2)
8346 goto done;
8347 cur = ctxt->input->cur[0];
8348 next = ctxt->input->cur[1];
8349 if ((cur == '<') && (next == '?')) {
8350 if ((!terminate) &&
8351 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8352 goto done;
8353 #ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext,
8355 "PP: Parsing PI\n");
8356 #endif
8357 xmlParsePI(ctxt);
8358 } else if ((cur == '<') && (next == '!') &&
8359 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8360 if ((!terminate) &&
8361 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8362 goto done;
8363 #ifdef DEBUG_PUSH
8364 xmlGenericError(xmlGenericErrorContext,
8365 "PP: Parsing Comment\n");
8366 #endif
8367 xmlParseComment(ctxt);
8368 ctxt->instate = XML_PARSER_PROLOG;
8369 } else if ((cur == '<') && (next == '!') &&
8370 (avail < 4)) {
8371 goto done;
8372 } else {
8373 ctxt->instate = XML_PARSER_START_TAG;
8374 #ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: entering START_TAG\n");
8377 #endif
8379 break;
8380 case XML_PARSER_EPILOG:
8381 SKIP_BLANKS;
8382 if (ctxt->input->buf == NULL)
8383 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8384 else
8385 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8386 if (avail < 2)
8387 goto done;
8388 cur = ctxt->input->cur[0];
8389 next = ctxt->input->cur[1];
8390 if ((cur == '<') && (next == '?')) {
8391 if ((!terminate) &&
8392 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8393 goto done;
8394 #ifdef DEBUG_PUSH
8395 xmlGenericError(xmlGenericErrorContext,
8396 "PP: Parsing PI\n");
8397 #endif
8398 xmlParsePI(ctxt);
8399 ctxt->instate = XML_PARSER_EPILOG;
8400 } else if ((cur == '<') && (next == '!') &&
8401 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8402 if ((!terminate) &&
8403 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8404 goto done;
8405 #ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: Parsing Comment\n");
8408 #endif
8409 xmlParseComment(ctxt);
8410 ctxt->instate = XML_PARSER_EPILOG;
8411 } else if ((cur == '<') && (next == '!') &&
8412 (avail < 4)) {
8413 goto done;
8414 } else {
8415 ctxt->errNo = XML_ERR_DOCUMENT_END;
8416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8417 ctxt->sax->error(ctxt->userData,
8418 "Extra content at the end of the document\n");
8419 ctxt->wellFormed = 0;
8420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8421 ctxt->instate = XML_PARSER_EOF;
8422 #ifdef DEBUG_PUSH
8423 xmlGenericError(xmlGenericErrorContext,
8424 "PP: entering EOF\n");
8425 #endif
8426 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8427 ctxt->sax->endDocument(ctxt->userData);
8428 goto done;
8430 break;
8431 case XML_PARSER_START_TAG: {
8432 xmlChar *name, *oldname;
8434 if ((avail < 2) && (ctxt->inputNr == 1))
8435 goto done;
8436 cur = ctxt->input->cur[0];
8437 if (cur != '<') {
8438 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8440 ctxt->sax->error(ctxt->userData,
8441 "Start tag expect, '<' not found\n");
8442 ctxt->wellFormed = 0;
8443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8444 ctxt->instate = XML_PARSER_EOF;
8445 #ifdef DEBUG_PUSH
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: entering EOF\n");
8448 #endif
8449 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8450 ctxt->sax->endDocument(ctxt->userData);
8451 goto done;
8453 if ((!terminate) &&
8454 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8455 goto done;
8456 if (ctxt->spaceNr == 0)
8457 spacePush(ctxt, -1);
8458 else
8459 spacePush(ctxt, *ctxt->space);
8460 name = xmlParseStartTag(ctxt);
8461 if (name == NULL) {
8462 spacePop(ctxt);
8463 ctxt->instate = XML_PARSER_EOF;
8464 #ifdef DEBUG_PUSH
8465 xmlGenericError(xmlGenericErrorContext,
8466 "PP: entering EOF\n");
8467 #endif
8468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8469 ctxt->sax->endDocument(ctxt->userData);
8470 goto done;
8472 namePush(ctxt, xmlStrdup(name));
8475 * [ VC: Root Element Type ]
8476 * The Name in the document type declaration must match
8477 * the element type of the root element.
8479 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8480 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8481 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8484 * Check for an Empty Element.
8486 if ((RAW == '/') && (NXT(1) == '>')) {
8487 SKIP(2);
8488 if ((ctxt->sax != NULL) &&
8489 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8490 ctxt->sax->endElement(ctxt->userData, name);
8491 xmlFree(name);
8492 oldname = namePop(ctxt);
8493 spacePop(ctxt);
8494 if (oldname != NULL) {
8495 #ifdef DEBUG_STACK
8496 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8497 #endif
8498 xmlFree(oldname);
8500 if (ctxt->name == NULL) {
8501 ctxt->instate = XML_PARSER_EPILOG;
8502 #ifdef DEBUG_PUSH
8503 xmlGenericError(xmlGenericErrorContext,
8504 "PP: entering EPILOG\n");
8505 #endif
8506 } else {
8507 ctxt->instate = XML_PARSER_CONTENT;
8508 #ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering CONTENT\n");
8511 #endif
8513 break;
8515 if (RAW == '>') {
8516 NEXT;
8517 } else {
8518 ctxt->errNo = XML_ERR_GT_REQUIRED;
8519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8520 ctxt->sax->error(ctxt->userData,
8521 "Couldn't find end of Start Tag %s\n",
8522 name);
8523 ctxt->wellFormed = 0;
8524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8527 * end of parsing of this node.
8529 nodePop(ctxt);
8530 oldname = namePop(ctxt);
8531 spacePop(ctxt);
8532 if (oldname != NULL) {
8533 #ifdef DEBUG_STACK
8534 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8535 #endif
8536 xmlFree(oldname);
8539 xmlFree(name);
8540 ctxt->instate = XML_PARSER_CONTENT;
8541 #ifdef DEBUG_PUSH
8542 xmlGenericError(xmlGenericErrorContext,
8543 "PP: entering CONTENT\n");
8544 #endif
8545 break;
8547 case XML_PARSER_CONTENT: {
8548 const xmlChar *test;
8549 int cons;
8550 if ((avail < 2) && (ctxt->inputNr == 1))
8551 goto done;
8552 cur = ctxt->input->cur[0];
8553 next = ctxt->input->cur[1];
8555 test = CUR_PTR;
8556 cons = ctxt->input->consumed;
8557 if ((cur == '<') && (next == '?')) {
8558 if ((!terminate) &&
8559 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8560 goto done;
8561 #ifdef DEBUG_PUSH
8562 xmlGenericError(xmlGenericErrorContext,
8563 "PP: Parsing PI\n");
8564 #endif
8565 xmlParsePI(ctxt);
8566 } else if ((cur == '<') && (next == '!') &&
8567 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8568 if ((!terminate) &&
8569 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8570 goto done;
8571 #ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: Parsing Comment\n");
8574 #endif
8575 xmlParseComment(ctxt);
8576 ctxt->instate = XML_PARSER_CONTENT;
8577 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8578 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8579 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8580 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8581 (ctxt->input->cur[8] == '[')) {
8582 SKIP(9);
8583 ctxt->instate = XML_PARSER_CDATA_SECTION;
8584 #ifdef DEBUG_PUSH
8585 xmlGenericError(xmlGenericErrorContext,
8586 "PP: entering CDATA_SECTION\n");
8587 #endif
8588 break;
8589 } else if ((cur == '<') && (next == '!') &&
8590 (avail < 9)) {
8591 goto done;
8592 } else if ((cur == '<') && (next == '/')) {
8593 ctxt->instate = XML_PARSER_END_TAG;
8594 #ifdef DEBUG_PUSH
8595 xmlGenericError(xmlGenericErrorContext,
8596 "PP: entering END_TAG\n");
8597 #endif
8598 break;
8599 } else if (cur == '<') {
8600 ctxt->instate = XML_PARSER_START_TAG;
8601 #ifdef DEBUG_PUSH
8602 xmlGenericError(xmlGenericErrorContext,
8603 "PP: entering START_TAG\n");
8604 #endif
8605 break;
8606 } else if (cur == '&') {
8607 if ((!terminate) &&
8608 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8609 goto done;
8610 #ifdef DEBUG_PUSH
8611 xmlGenericError(xmlGenericErrorContext,
8612 "PP: Parsing Reference\n");
8613 #endif
8614 xmlParseReference(ctxt);
8615 } else {
8616 /* TODO Avoid the extra copy, handle directly !!! */
8618 * Goal of the following test is:
8619 * - minimize calls to the SAX 'character' callback
8620 * when they are mergeable
8621 * - handle an problem for isBlank when we only parse
8622 * a sequence of blank chars and the next one is
8623 * not available to check against '<' presence.
8624 * - tries to homogenize the differences in SAX
8625 * callbacks between the push and pull versions
8626 * of the parser.
8628 if ((ctxt->inputNr == 1) &&
8629 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8630 if ((!terminate) &&
8631 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8632 goto done;
8634 ctxt->checkIndex = 0;
8635 #ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext,
8637 "PP: Parsing char data\n");
8638 #endif
8639 xmlParseCharData(ctxt, 0);
8642 * Pop-up of finished entities.
8644 while ((RAW == 0) && (ctxt->inputNr > 1))
8645 xmlPopInput(ctxt);
8646 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8647 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8649 ctxt->sax->error(ctxt->userData,
8650 "detected an error in element content\n");
8651 ctxt->wellFormed = 0;
8652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8653 ctxt->instate = XML_PARSER_EOF;
8654 break;
8656 break;
8658 case XML_PARSER_CDATA_SECTION: {
8660 * The Push mode need to have the SAX callback for
8661 * cdataBlock merge back contiguous callbacks.
8663 int base;
8665 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8666 if (base < 0) {
8667 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8669 if (ctxt->sax->cdataBlock != NULL)
8670 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8671 XML_PARSER_BIG_BUFFER_SIZE);
8673 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8674 ctxt->checkIndex = 0;
8676 goto done;
8677 } else {
8678 if ((ctxt->sax != NULL) && (base > 0) &&
8679 (!ctxt->disableSAX)) {
8680 if (ctxt->sax->cdataBlock != NULL)
8681 ctxt->sax->cdataBlock(ctxt->userData,
8682 ctxt->input->cur, base);
8684 SKIP(base + 3);
8685 ctxt->checkIndex = 0;
8686 ctxt->instate = XML_PARSER_CONTENT;
8687 #ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering CONTENT\n");
8690 #endif
8692 break;
8694 case XML_PARSER_END_TAG:
8695 if (avail < 2)
8696 goto done;
8697 if ((!terminate) &&
8698 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8699 goto done;
8700 xmlParseEndTag(ctxt);
8701 if (ctxt->name == NULL) {
8702 ctxt->instate = XML_PARSER_EPILOG;
8703 #ifdef DEBUG_PUSH
8704 xmlGenericError(xmlGenericErrorContext,
8705 "PP: entering EPILOG\n");
8706 #endif
8707 } else {
8708 ctxt->instate = XML_PARSER_CONTENT;
8709 #ifdef DEBUG_PUSH
8710 xmlGenericError(xmlGenericErrorContext,
8711 "PP: entering CONTENT\n");
8712 #endif
8714 break;
8715 case XML_PARSER_DTD: {
8717 * Sorry but progressive parsing of the internal subset
8718 * is not expected to be supported. We first check that
8719 * the full content of the internal subset is available and
8720 * the parsing is launched only at that point.
8721 * Internal subset ends up with "']' S? '>'" in an unescaped
8722 * section and not in a ']]>' sequence which are conditional
8723 * sections (whoever argued to keep that crap in XML deserve
8724 * a place in hell !).
8726 int base, i;
8727 xmlChar *buf;
8728 xmlChar quote = 0;
8730 base = ctxt->input->cur - ctxt->input->base;
8731 if (base < 0) return(0);
8732 if (ctxt->checkIndex > base)
8733 base = ctxt->checkIndex;
8734 buf = ctxt->input->buf->buffer->content;
8735 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8736 base++) {
8737 if (quote != 0) {
8738 if (buf[base] == quote)
8739 quote = 0;
8740 continue;
8742 if (buf[base] == '"') {
8743 quote = '"';
8744 continue;
8746 if (buf[base] == '\'') {
8747 quote = '\'';
8748 continue;
8750 if (buf[base] == ']') {
8751 if ((unsigned int) base +1 >=
8752 ctxt->input->buf->buffer->use)
8753 break;
8754 if (buf[base + 1] == ']') {
8755 /* conditional crap, skip both ']' ! */
8756 base++;
8757 continue;
8759 for (i = 0;
8760 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8761 i++) {
8762 if (buf[base + i] == '>')
8763 goto found_end_int_subset;
8765 break;
8769 * We didn't found the end of the Internal subset
8771 if (quote == 0)
8772 ctxt->checkIndex = base;
8773 #ifdef DEBUG_PUSH
8774 if (next == 0)
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: lookup of int subset end filed\n");
8777 #endif
8778 goto done;
8780 found_end_int_subset:
8781 xmlParseInternalSubset(ctxt);
8782 ctxt->inSubset = 2;
8783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8784 (ctxt->sax->externalSubset != NULL))
8785 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8786 ctxt->extSubSystem, ctxt->extSubURI);
8787 ctxt->inSubset = 0;
8788 ctxt->instate = XML_PARSER_PROLOG;
8789 ctxt->checkIndex = 0;
8790 #ifdef DEBUG_PUSH
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: entering PROLOG\n");
8793 #endif
8794 break;
8796 case XML_PARSER_COMMENT:
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: internal error, state == COMMENT\n");
8799 ctxt->instate = XML_PARSER_CONTENT;
8800 #ifdef DEBUG_PUSH
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: entering CONTENT\n");
8803 #endif
8804 break;
8805 case XML_PARSER_PI:
8806 xmlGenericError(xmlGenericErrorContext,
8807 "PP: internal error, state == PI\n");
8808 ctxt->instate = XML_PARSER_CONTENT;
8809 #ifdef DEBUG_PUSH
8810 xmlGenericError(xmlGenericErrorContext,
8811 "PP: entering CONTENT\n");
8812 #endif
8813 break;
8814 case XML_PARSER_ENTITY_DECL:
8815 xmlGenericError(xmlGenericErrorContext,
8816 "PP: internal error, state == ENTITY_DECL\n");
8817 ctxt->instate = XML_PARSER_DTD;
8818 #ifdef DEBUG_PUSH
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: entering DTD\n");
8821 #endif
8822 break;
8823 case XML_PARSER_ENTITY_VALUE:
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: internal error, state == ENTITY_VALUE\n");
8826 ctxt->instate = XML_PARSER_CONTENT;
8827 #ifdef DEBUG_PUSH
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: entering DTD\n");
8830 #endif
8831 break;
8832 case XML_PARSER_ATTRIBUTE_VALUE:
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8835 ctxt->instate = XML_PARSER_START_TAG;
8836 #ifdef DEBUG_PUSH
8837 xmlGenericError(xmlGenericErrorContext,
8838 "PP: entering START_TAG\n");
8839 #endif
8840 break;
8841 case XML_PARSER_SYSTEM_LITERAL:
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: internal error, state == SYSTEM_LITERAL\n");
8844 ctxt->instate = XML_PARSER_START_TAG;
8845 #ifdef DEBUG_PUSH
8846 xmlGenericError(xmlGenericErrorContext,
8847 "PP: entering START_TAG\n");
8848 #endif
8849 break;
8850 case XML_PARSER_PUBLIC_LITERAL:
8851 xmlGenericError(xmlGenericErrorContext,
8852 "PP: internal error, state == PUBLIC_LITERAL\n");
8853 ctxt->instate = XML_PARSER_START_TAG;
8854 #ifdef DEBUG_PUSH
8855 xmlGenericError(xmlGenericErrorContext,
8856 "PP: entering START_TAG\n");
8857 #endif
8858 break;
8861 done:
8862 #ifdef DEBUG_PUSH
8863 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8864 #endif
8865 return(ret);
8869 * xmlParseChunk:
8870 * @ctxt: an XML parser context
8871 * @chunk: an char array
8872 * @size: the size in byte of the chunk
8873 * @terminate: last chunk indicator
8875 * Parse a Chunk of memory
8877 * Returns zero if no error, the xmlParserErrors otherwise.
8880 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8881 int terminate) {
8882 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8883 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8884 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8885 int cur = ctxt->input->cur - ctxt->input->base;
8887 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8888 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8889 ctxt->input->cur = ctxt->input->base + cur;
8890 ctxt->input->end =
8891 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
8892 #ifdef DEBUG_PUSH
8893 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8894 #endif
8896 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8897 xmlParseTryOrFinish(ctxt, terminate);
8898 } else if (ctxt->instate != XML_PARSER_EOF) {
8899 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8900 xmlParserInputBufferPtr in = ctxt->input->buf;
8901 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8902 (in->raw != NULL)) {
8903 int nbchars;
8905 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8906 if (nbchars < 0) {
8907 xmlGenericError(xmlGenericErrorContext,
8908 "xmlParseChunk: encoder error\n");
8909 return(XML_ERR_INVALID_ENCODING);
8914 xmlParseTryOrFinish(ctxt, terminate);
8915 if (terminate) {
8917 * Check for termination
8919 int avail = 0;
8920 if (ctxt->input->buf == NULL)
8921 avail = ctxt->input->length -
8922 (ctxt->input->cur - ctxt->input->base);
8923 else
8924 avail = ctxt->input->buf->buffer->use -
8925 (ctxt->input->cur - ctxt->input->base);
8927 if ((ctxt->instate != XML_PARSER_EOF) &&
8928 (ctxt->instate != XML_PARSER_EPILOG)) {
8929 ctxt->errNo = XML_ERR_DOCUMENT_END;
8930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8931 ctxt->sax->error(ctxt->userData,
8932 "Extra content at the end of the document\n");
8933 ctxt->wellFormed = 0;
8934 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8936 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
8937 ctxt->errNo = XML_ERR_DOCUMENT_END;
8938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8939 ctxt->sax->error(ctxt->userData,
8940 "Extra content at the end of the document\n");
8941 ctxt->wellFormed = 0;
8942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8945 if (ctxt->instate != XML_PARSER_EOF) {
8946 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8947 ctxt->sax->endDocument(ctxt->userData);
8949 ctxt->instate = XML_PARSER_EOF;
8951 return((xmlParserErrors) ctxt->errNo);
8954 /************************************************************************
8956 * I/O front end functions to the parser *
8958 ************************************************************************/
8961 * xmlStopParser:
8962 * @ctxt: an XML parser context
8964 * Blocks further parser processing
8966 void
8967 xmlStopParser(xmlParserCtxtPtr ctxt) {
8968 ctxt->instate = XML_PARSER_EOF;
8969 if (ctxt->input != NULL)
8970 ctxt->input->cur = BAD_CAST"";
8974 * xmlCreatePushParserCtxt:
8975 * @sax: a SAX handler
8976 * @user_data: The user data returned on SAX callbacks
8977 * @chunk: a pointer to an array of chars
8978 * @size: number of chars in the array
8979 * @filename: an optional file name or URI
8981 * Create a parser context for using the XML parser in push mode.
8982 * If @buffer and @size are non-NULL, the data is used to detect
8983 * the encoding. The remaining characters will be parsed so they
8984 * don't need to be fed in again through xmlParseChunk.
8985 * To allow content encoding detection, @size should be >= 4
8986 * The value of @filename is used for fetching external entities
8987 * and error/warning reports.
8989 * Returns the new parser context or NULL
8992 xmlParserCtxtPtr
8993 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8994 const char *chunk, int size, const char *filename) {
8995 xmlParserCtxtPtr ctxt;
8996 xmlParserInputPtr inputStream;
8997 xmlParserInputBufferPtr buf;
8998 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9001 * plug some encoding conversion routines
9003 if ((chunk != NULL) && (size >= 4))
9004 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9006 buf = xmlAllocParserInputBuffer(enc);
9007 if (buf == NULL) return(NULL);
9009 ctxt = xmlNewParserCtxt();
9010 if (ctxt == NULL) {
9011 xmlFree(buf);
9012 return(NULL);
9014 if (sax != NULL) {
9015 if (ctxt->sax != &xmlDefaultSAXHandler)
9016 xmlFree(ctxt->sax);
9017 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9018 if (ctxt->sax == NULL) {
9019 xmlFree(buf);
9020 xmlFree(ctxt);
9021 return(NULL);
9023 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9024 if (user_data != NULL)
9025 ctxt->userData = user_data;
9027 if (filename == NULL) {
9028 ctxt->directory = NULL;
9029 } else {
9030 ctxt->directory = xmlParserGetDirectory(filename);
9033 inputStream = xmlNewInputStream(ctxt);
9034 if (inputStream == NULL) {
9035 xmlFreeParserCtxt(ctxt);
9036 return(NULL);
9039 if (filename == NULL)
9040 inputStream->filename = NULL;
9041 else
9042 inputStream->filename = (char *)
9043 xmlNormalizeWindowsPath((const xmlChar *) filename);
9044 inputStream->buf = buf;
9045 inputStream->base = inputStream->buf->buffer->content;
9046 inputStream->cur = inputStream->buf->buffer->content;
9047 inputStream->end =
9048 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
9050 inputPush(ctxt, inputStream);
9052 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9053 (ctxt->input->buf != NULL)) {
9054 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9055 int cur = ctxt->input->cur - ctxt->input->base;
9057 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9059 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9060 ctxt->input->cur = ctxt->input->base + cur;
9061 ctxt->input->end =
9062 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
9063 #ifdef DEBUG_PUSH
9064 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9065 #endif
9068 if (enc != XML_CHAR_ENCODING_NONE) {
9069 xmlSwitchEncoding(ctxt, enc);
9072 return(ctxt);
9076 * xmlCreateIOParserCtxt:
9077 * @sax: a SAX handler
9078 * @user_data: The user data returned on SAX callbacks
9079 * @ioread: an I/O read function
9080 * @ioclose: an I/O close function
9081 * @ioctx: an I/O handler
9082 * @enc: the charset encoding if known
9084 * Create a parser context for using the XML parser with an existing
9085 * I/O stream
9087 * Returns the new parser context or NULL
9089 xmlParserCtxtPtr
9090 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9091 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9092 void *ioctx, xmlCharEncoding enc) {
9093 xmlParserCtxtPtr ctxt;
9094 xmlParserInputPtr inputStream;
9095 xmlParserInputBufferPtr buf;
9097 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9098 if (buf == NULL) return(NULL);
9100 ctxt = xmlNewParserCtxt();
9101 if (ctxt == NULL) {
9102 xmlFree(buf);
9103 return(NULL);
9105 if (sax != NULL) {
9106 if (ctxt->sax != &xmlDefaultSAXHandler)
9107 xmlFree(ctxt->sax);
9108 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9109 if (ctxt->sax == NULL) {
9110 xmlFree(buf);
9111 xmlFree(ctxt);
9112 return(NULL);
9114 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9115 if (user_data != NULL)
9116 ctxt->userData = user_data;
9119 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9120 if (inputStream == NULL) {
9121 xmlFreeParserCtxt(ctxt);
9122 return(NULL);
9124 inputPush(ctxt, inputStream);
9126 return(ctxt);
9129 /************************************************************************
9131 * Front ends when parsing a DTD *
9133 ************************************************************************/
9136 * xmlIOParseDTD:
9137 * @sax: the SAX handler block or NULL
9138 * @input: an Input Buffer
9139 * @enc: the charset encoding if known
9141 * Load and parse a DTD
9143 * Returns the resulting xmlDtdPtr or NULL in case of error.
9144 * @input will be freed at parsing end.
9147 xmlDtdPtr
9148 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9149 xmlCharEncoding enc) {
9150 xmlDtdPtr ret = NULL;
9151 xmlParserCtxtPtr ctxt;
9152 xmlParserInputPtr pinput = NULL;
9153 xmlChar start[4];
9155 if (input == NULL)
9156 return(NULL);
9158 ctxt = xmlNewParserCtxt();
9159 if (ctxt == NULL) {
9160 return(NULL);
9164 * Set-up the SAX context
9166 if (sax != NULL) {
9167 if (ctxt->sax != NULL)
9168 xmlFree(ctxt->sax);
9169 ctxt->sax = sax;
9170 ctxt->userData = NULL;
9174 * generate a parser input from the I/O handler
9177 pinput = xmlNewIOInputStream(ctxt, input, enc);
9178 if (pinput == NULL) {
9179 if (sax != NULL) ctxt->sax = NULL;
9180 xmlFreeParserCtxt(ctxt);
9181 return(NULL);
9185 * plug some encoding conversion routines here.
9187 xmlPushInput(ctxt, pinput);
9189 pinput->filename = NULL;
9190 pinput->line = 1;
9191 pinput->col = 1;
9192 pinput->base = ctxt->input->cur;
9193 pinput->cur = ctxt->input->cur;
9194 pinput->free = NULL;
9197 * let's parse that entity knowing it's an external subset.
9199 ctxt->inSubset = 2;
9200 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9201 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9202 BAD_CAST "none", BAD_CAST "none");
9204 if (enc == XML_CHAR_ENCODING_NONE) {
9206 * Get the 4 first bytes and decode the charset
9207 * if enc != XML_CHAR_ENCODING_NONE
9208 * plug some encoding conversion routines.
9210 start[0] = RAW;
9211 start[1] = NXT(1);
9212 start[2] = NXT(2);
9213 start[3] = NXT(3);
9214 enc = xmlDetectCharEncoding(start, 4);
9215 if (enc != XML_CHAR_ENCODING_NONE) {
9216 xmlSwitchEncoding(ctxt, enc);
9220 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9222 if (ctxt->myDoc != NULL) {
9223 if (ctxt->wellFormed) {
9224 ret = ctxt->myDoc->extSubset;
9225 ctxt->myDoc->extSubset = NULL;
9226 } else {
9227 ret = NULL;
9229 xmlFreeDoc(ctxt->myDoc);
9230 ctxt->myDoc = NULL;
9232 if (sax != NULL) ctxt->sax = NULL;
9233 xmlFreeParserCtxt(ctxt);
9235 return(ret);
9239 * xmlSAXParseDTD:
9240 * @sax: the SAX handler block
9241 * @ExternalID: a NAME* containing the External ID of the DTD
9242 * @SystemID: a NAME* containing the URL to the DTD
9244 * Load and parse an external subset.
9246 * Returns the resulting xmlDtdPtr or NULL in case of error.
9249 xmlDtdPtr
9250 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9251 const xmlChar *SystemID) {
9252 xmlDtdPtr ret = NULL;
9253 xmlParserCtxtPtr ctxt;
9254 xmlParserInputPtr input = NULL;
9255 xmlCharEncoding enc;
9257 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9259 ctxt = xmlNewParserCtxt();
9260 if (ctxt == NULL) {
9261 return(NULL);
9265 * Set-up the SAX context
9267 if (sax != NULL) {
9268 if (ctxt->sax != NULL)
9269 xmlFree(ctxt->sax);
9270 ctxt->sax = sax;
9271 ctxt->userData = NULL;
9275 * Ask the Entity resolver to load the damn thing
9278 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9279 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9280 if (input == NULL) {
9281 if (sax != NULL) ctxt->sax = NULL;
9282 xmlFreeParserCtxt(ctxt);
9283 return(NULL);
9287 * plug some encoding conversion routines here.
9289 xmlPushInput(ctxt, input);
9290 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9291 xmlSwitchEncoding(ctxt, enc);
9293 if (input->filename == NULL)
9294 input->filename = (char *) xmlStrdup(SystemID);
9295 input->line = 1;
9296 input->col = 1;
9297 input->base = ctxt->input->cur;
9298 input->cur = ctxt->input->cur;
9299 input->free = NULL;
9302 * let's parse that entity knowing it's an external subset.
9304 ctxt->inSubset = 2;
9305 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9306 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9307 ExternalID, SystemID);
9308 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9310 if (ctxt->myDoc != NULL) {
9311 if (ctxt->wellFormed) {
9312 ret = ctxt->myDoc->extSubset;
9313 ctxt->myDoc->extSubset = NULL;
9314 } else {
9315 ret = NULL;
9317 xmlFreeDoc(ctxt->myDoc);
9318 ctxt->myDoc = NULL;
9320 if (sax != NULL) ctxt->sax = NULL;
9321 xmlFreeParserCtxt(ctxt);
9323 return(ret);
9327 * xmlParseDTD:
9328 * @ExternalID: a NAME* containing the External ID of the DTD
9329 * @SystemID: a NAME* containing the URL to the DTD
9331 * Load and parse an external subset.
9333 * Returns the resulting xmlDtdPtr or NULL in case of error.
9336 xmlDtdPtr
9337 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9338 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9341 /************************************************************************
9343 * Front ends when parsing an Entity *
9345 ************************************************************************/
9348 * xmlParseCtxtExternalEntity:
9349 * @ctx: the existing parsing context
9350 * @URL: the URL for the entity to load
9351 * @ID: the System ID for the entity to load
9352 * @lst: the return value for the set of parsed nodes
9354 * Parse an external general entity within an existing parsing context
9355 * An external general parsed entity is well-formed if it matches the
9356 * production labeled extParsedEnt.
9358 * [78] extParsedEnt ::= TextDecl? content
9360 * Returns 0 if the entity is well formed, -1 in case of args problem and
9361 * the parser error code otherwise
9365 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
9366 const xmlChar *ID, xmlNodePtr *lst) {
9367 xmlParserCtxtPtr ctxt;
9368 xmlDocPtr newDoc;
9369 xmlSAXHandlerPtr oldsax = NULL;
9370 int ret = 0;
9371 xmlChar start[4];
9372 xmlCharEncoding enc;
9374 if (ctx->depth > 40) {
9375 return(XML_ERR_ENTITY_LOOP);
9378 if (lst != NULL)
9379 *lst = NULL;
9380 if ((URL == NULL) && (ID == NULL))
9381 return(-1);
9382 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9383 return(-1);
9386 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9387 if (ctxt == NULL) return(-1);
9388 ctxt->userData = ctxt;
9389 oldsax = ctxt->sax;
9390 ctxt->sax = ctx->sax;
9391 newDoc = xmlNewDoc(BAD_CAST "1.0");
9392 if (newDoc == NULL) {
9393 xmlFreeParserCtxt(ctxt);
9394 return(-1);
9396 if (ctx->myDoc != NULL) {
9397 newDoc->intSubset = ctx->myDoc->intSubset;
9398 newDoc->extSubset = ctx->myDoc->extSubset;
9400 if (ctx->myDoc->URL != NULL) {
9401 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9403 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9404 if (newDoc->children == NULL) {
9405 ctxt->sax = oldsax;
9406 xmlFreeParserCtxt(ctxt);
9407 newDoc->intSubset = NULL;
9408 newDoc->extSubset = NULL;
9409 xmlFreeDoc(newDoc);
9410 return(-1);
9412 nodePush(ctxt, newDoc->children);
9413 if (ctx->myDoc == NULL) {
9414 ctxt->myDoc = newDoc;
9415 } else {
9416 ctxt->myDoc = ctx->myDoc;
9417 newDoc->children->doc = ctx->myDoc;
9421 * Get the 4 first bytes and decode the charset
9422 * if enc != XML_CHAR_ENCODING_NONE
9423 * plug some encoding conversion routines.
9425 GROW
9426 start[0] = RAW;
9427 start[1] = NXT(1);
9428 start[2] = NXT(2);
9429 start[3] = NXT(3);
9430 enc = xmlDetectCharEncoding(start, 4);
9431 if (enc != XML_CHAR_ENCODING_NONE) {
9432 xmlSwitchEncoding(ctxt, enc);
9436 * Parse a possible text declaration first
9438 if ((RAW == '<') && (NXT(1) == '?') &&
9439 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9440 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9441 xmlParseTextDecl(ctxt);
9445 * Doing validity checking on chunk doesn't make sense
9447 ctxt->instate = XML_PARSER_CONTENT;
9448 ctxt->validate = ctx->validate;
9449 ctxt->loadsubset = ctx->loadsubset;
9450 ctxt->depth = ctx->depth + 1;
9451 ctxt->replaceEntities = ctx->replaceEntities;
9452 if (ctxt->validate) {
9453 ctxt->vctxt.error = ctx->vctxt.error;
9454 ctxt->vctxt.warning = ctx->vctxt.warning;
9455 } else {
9456 ctxt->vctxt.error = NULL;
9457 ctxt->vctxt.warning = NULL;
9459 ctxt->vctxt.nodeTab = NULL;
9460 ctxt->vctxt.nodeNr = 0;
9461 ctxt->vctxt.nodeMax = 0;
9462 ctxt->vctxt.node = NULL;
9464 xmlParseContent(ctxt);
9466 if ((RAW == '<') && (NXT(1) == '/')) {
9467 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9469 ctxt->sax->error(ctxt->userData,
9470 "chunk is not well balanced\n");
9471 ctxt->wellFormed = 0;
9472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9473 } else if (RAW != 0) {
9474 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9476 ctxt->sax->error(ctxt->userData,
9477 "extra content at the end of well balanced chunk\n");
9478 ctxt->wellFormed = 0;
9479 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9481 if (ctxt->node != newDoc->children) {
9482 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9484 ctxt->sax->error(ctxt->userData,
9485 "chunk is not well balanced\n");
9486 ctxt->wellFormed = 0;
9487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9490 if (!ctxt->wellFormed) {
9491 if (ctxt->errNo == 0)
9492 ret = 1;
9493 else
9494 ret = ctxt->errNo;
9495 } else {
9496 if (lst != NULL) {
9497 xmlNodePtr cur;
9500 * Return the newly created nodeset after unlinking it from
9501 * they pseudo parent.
9503 cur = newDoc->children->children;
9504 *lst = cur;
9505 while (cur != NULL) {
9506 cur->parent = NULL;
9507 cur = cur->next;
9509 newDoc->children->children = NULL;
9511 ret = 0;
9513 ctxt->sax = oldsax;
9514 xmlFreeParserCtxt(ctxt);
9515 newDoc->intSubset = NULL;
9516 newDoc->extSubset = NULL;
9517 xmlFreeDoc(newDoc);
9519 return(ret);
9523 * xmlParseExternalEntityPrivate:
9524 * @doc: the document the chunk pertains to
9525 * @oldctxt: the previous parser context if available
9526 * @sax: the SAX handler bloc (possibly NULL)
9527 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9528 * @depth: Used for loop detection, use 0
9529 * @URL: the URL for the entity to load
9530 * @ID: the System ID for the entity to load
9531 * @list: the return value for the set of parsed nodes
9533 * Private version of xmlParseExternalEntity()
9535 * Returns 0 if the entity is well formed, -1 in case of args problem and
9536 * the parser error code otherwise
9539 static int
9540 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9541 xmlSAXHandlerPtr sax,
9542 void *user_data, int depth, const xmlChar *URL,
9543 const xmlChar *ID, xmlNodePtr *list) {
9544 xmlParserCtxtPtr ctxt;
9545 xmlDocPtr newDoc;
9546 xmlSAXHandlerPtr oldsax = NULL;
9547 int ret = 0;
9548 xmlChar start[4];
9549 xmlCharEncoding enc;
9551 if (depth > 40) {
9552 return(XML_ERR_ENTITY_LOOP);
9557 if (list != NULL)
9558 *list = NULL;
9559 if ((URL == NULL) && (ID == NULL))
9560 return(-1);
9561 if (doc == NULL) /* @@ relax but check for dereferences */
9562 return(-1);
9565 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9566 if (ctxt == NULL) return(-1);
9567 ctxt->userData = ctxt;
9568 if (oldctxt != NULL) {
9569 ctxt->_private = oldctxt->_private;
9570 ctxt->loadsubset = oldctxt->loadsubset;
9571 ctxt->validate = oldctxt->validate;
9572 ctxt->external = oldctxt->external;
9573 } else {
9575 * Doing validity checking on chunk without context
9576 * doesn't make sense
9578 ctxt->_private = NULL;
9579 ctxt->validate = 0;
9580 ctxt->external = 2;
9581 ctxt->loadsubset = 0;
9583 if (sax != NULL) {
9584 oldsax = ctxt->sax;
9585 ctxt->sax = sax;
9586 if (user_data != NULL)
9587 ctxt->userData = user_data;
9589 newDoc = xmlNewDoc(BAD_CAST "1.0");
9590 if (newDoc == NULL) {
9591 xmlFreeParserCtxt(ctxt);
9592 return(-1);
9594 if (doc != NULL) {
9595 newDoc->intSubset = doc->intSubset;
9596 newDoc->extSubset = doc->extSubset;
9598 if (doc->URL != NULL) {
9599 newDoc->URL = xmlStrdup(doc->URL);
9601 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9602 if (newDoc->children == NULL) {
9603 if (sax != NULL)
9604 ctxt->sax = oldsax;
9605 xmlFreeParserCtxt(ctxt);
9606 newDoc->intSubset = NULL;
9607 newDoc->extSubset = NULL;
9608 xmlFreeDoc(newDoc);
9609 return(-1);
9611 nodePush(ctxt, newDoc->children);
9612 if (doc == NULL) {
9613 ctxt->myDoc = newDoc;
9614 } else {
9615 ctxt->myDoc = doc;
9616 newDoc->children->doc = doc;
9620 * Get the 4 first bytes and decode the charset
9621 * if enc != XML_CHAR_ENCODING_NONE
9622 * plug some encoding conversion routines.
9624 GROW;
9625 start[0] = RAW;
9626 start[1] = NXT(1);
9627 start[2] = NXT(2);
9628 start[3] = NXT(3);
9629 enc = xmlDetectCharEncoding(start, 4);
9630 if (enc != XML_CHAR_ENCODING_NONE) {
9631 xmlSwitchEncoding(ctxt, enc);
9635 * Parse a possible text declaration first
9637 if ((RAW == '<') && (NXT(1) == '?') &&
9638 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9639 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9640 xmlParseTextDecl(ctxt);
9643 ctxt->instate = XML_PARSER_CONTENT;
9644 ctxt->depth = depth;
9646 xmlParseContent(ctxt);
9648 if ((RAW == '<') && (NXT(1) == '/')) {
9649 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9651 ctxt->sax->error(ctxt->userData,
9652 "chunk is not well balanced\n");
9653 ctxt->wellFormed = 0;
9654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9655 } else if (RAW != 0) {
9656 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9658 ctxt->sax->error(ctxt->userData,
9659 "extra content at the end of well balanced chunk\n");
9660 ctxt->wellFormed = 0;
9661 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9663 if (ctxt->node != newDoc->children) {
9664 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9666 ctxt->sax->error(ctxt->userData,
9667 "chunk is not well balanced\n");
9668 ctxt->wellFormed = 0;
9669 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9672 if (!ctxt->wellFormed) {
9673 if (ctxt->errNo == 0)
9674 ret = 1;
9675 else
9676 ret = ctxt->errNo;
9677 } else {
9678 if (list != NULL) {
9679 xmlNodePtr cur;
9682 * Return the newly created nodeset after unlinking it from
9683 * they pseudo parent.
9685 cur = newDoc->children->children;
9686 *list = cur;
9687 while (cur != NULL) {
9688 cur->parent = NULL;
9689 cur = cur->next;
9691 newDoc->children->children = NULL;
9693 ret = 0;
9695 if (sax != NULL)
9696 ctxt->sax = oldsax;
9697 xmlFreeParserCtxt(ctxt);
9698 newDoc->intSubset = NULL;
9699 newDoc->extSubset = NULL;
9700 xmlFreeDoc(newDoc);
9702 return(ret);
9706 * xmlParseExternalEntity:
9707 * @doc: the document the chunk pertains to
9708 * @sax: the SAX handler bloc (possibly NULL)
9709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9710 * @depth: Used for loop detection, use 0
9711 * @URL: the URL for the entity to load
9712 * @ID: the System ID for the entity to load
9713 * @lst: the return value for the set of parsed nodes
9715 * Parse an external general entity
9716 * An external general parsed entity is well-formed if it matches the
9717 * production labeled extParsedEnt.
9719 * [78] extParsedEnt ::= TextDecl? content
9721 * Returns 0 if the entity is well formed, -1 in case of args problem and
9722 * the parser error code otherwise
9726 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9727 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
9728 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9729 ID, lst));
9733 * xmlParseBalancedChunkMemory:
9734 * @doc: the document the chunk pertains to
9735 * @sax: the SAX handler bloc (possibly NULL)
9736 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9737 * @depth: Used for loop detection, use 0
9738 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9739 * @lst: the return value for the set of parsed nodes
9741 * Parse a well-balanced chunk of an XML document
9742 * called by the parser
9743 * The allowed sequence for the Well Balanced Chunk is the one defined by
9744 * the content production in the XML grammar:
9746 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9748 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9749 * the parser error code otherwise
9753 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9754 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
9755 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9756 depth, string, lst, 0 );
9760 * xmlParseBalancedChunkMemoryInternal:
9761 * @oldctxt: the existing parsing context
9762 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9763 * @user_data: the user data field for the parser context
9764 * @lst: the return value for the set of parsed nodes
9767 * Parse a well-balanced chunk of an XML document
9768 * called by the parser
9769 * The allowed sequence for the Well Balanced Chunk is the one defined by
9770 * the content production in the XML grammar:
9772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9775 * the parser error code otherwise
9777 * In case recover is set to 1, the nodelist will not be empty even if
9778 * the parsed chunk is not well balanced.
9780 static int
9781 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9782 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9783 xmlParserCtxtPtr ctxt;
9784 xmlDocPtr newDoc = NULL;
9785 xmlSAXHandlerPtr oldsax = NULL;
9786 xmlNodePtr content = NULL;
9787 int size;
9788 int ret = 0;
9790 if (oldctxt->depth > 40) {
9791 return(XML_ERR_ENTITY_LOOP);
9795 if (lst != NULL)
9796 *lst = NULL;
9797 if (string == NULL)
9798 return(-1);
9800 size = xmlStrlen(string);
9802 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9803 if (ctxt == NULL) return(-1);
9804 if (user_data != NULL)
9805 ctxt->userData = user_data;
9806 else
9807 ctxt->userData = ctxt;
9809 oldsax = ctxt->sax;
9810 ctxt->sax = oldctxt->sax;
9811 if (oldctxt->myDoc == NULL) {
9812 newDoc = xmlNewDoc(BAD_CAST "1.0");
9813 if (newDoc == NULL) {
9814 ctxt->sax = oldsax;
9815 xmlFreeParserCtxt(ctxt);
9816 return(-1);
9818 ctxt->myDoc = newDoc;
9819 } else {
9820 ctxt->myDoc = oldctxt->myDoc;
9821 content = ctxt->myDoc->children;
9823 ctxt->myDoc->children = xmlNewDocNode(newDoc, NULL,
9824 BAD_CAST "pseudoroot", NULL);
9825 if (ctxt->myDoc->children == NULL) {
9826 ctxt->sax = oldsax;
9827 xmlFreeParserCtxt(ctxt);
9828 if (newDoc != NULL)
9829 xmlFreeDoc(newDoc);
9830 return(-1);
9832 nodePush(ctxt, ctxt->myDoc->children);
9833 ctxt->instate = XML_PARSER_CONTENT;
9834 ctxt->depth = oldctxt->depth + 1;
9837 * Doing validity checking on chunk doesn't make sense
9839 ctxt->validate = 0;
9840 ctxt->loadsubset = oldctxt->loadsubset;
9842 xmlParseContent(ctxt);
9843 if ((RAW == '<') && (NXT(1) == '/')) {
9844 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9846 ctxt->sax->error(ctxt->userData,
9847 "chunk is not well balanced\n");
9848 ctxt->wellFormed = 0;
9849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9850 } else if (RAW != 0) {
9851 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9853 ctxt->sax->error(ctxt->userData,
9854 "extra content at the end of well balanced chunk\n");
9855 ctxt->wellFormed = 0;
9856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9858 if (ctxt->node != ctxt->myDoc->children) {
9859 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9861 ctxt->sax->error(ctxt->userData,
9862 "chunk is not well balanced\n");
9863 ctxt->wellFormed = 0;
9864 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9867 if (!ctxt->wellFormed) {
9868 if (ctxt->errNo == 0)
9869 ret = 1;
9870 else
9871 ret = ctxt->errNo;
9872 } else {
9873 ret = 0;
9876 if ((lst != NULL) && (ret == 0)) {
9877 xmlNodePtr cur;
9880 * Return the newly created nodeset after unlinking it from
9881 * they pseudo parent.
9883 cur = ctxt->myDoc->children->children;
9884 *lst = cur;
9885 while (cur != NULL) {
9886 cur->parent = NULL;
9887 cur = cur->next;
9889 ctxt->myDoc->children->children = NULL;
9891 if (ctxt->myDoc != NULL) {
9892 xmlFreeNode(ctxt->myDoc->children);
9893 ctxt->myDoc->children = content;
9896 ctxt->sax = oldsax;
9897 xmlFreeParserCtxt(ctxt);
9898 if (newDoc != NULL)
9899 xmlFreeDoc(newDoc);
9901 return(ret);
9905 * xmlParseBalancedChunkMemoryRecover:
9906 * @doc: the document the chunk pertains to
9907 * @sax: the SAX handler bloc (possibly NULL)
9908 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9909 * @depth: Used for loop detection, use 0
9910 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9911 * @lst: the return value for the set of parsed nodes
9912 * @recover: return nodes even if the data is broken (use 0)
9915 * Parse a well-balanced chunk of an XML document
9916 * called by the parser
9917 * The allowed sequence for the Well Balanced Chunk is the one defined by
9918 * the content production in the XML grammar:
9920 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9922 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9923 * the parser error code otherwise
9925 * In case recover is set to 1, the nodelist will not be empty even if
9926 * the parsed chunk is not well balanced.
9929 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9930 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9931 int recover) {
9932 xmlParserCtxtPtr ctxt;
9933 xmlDocPtr newDoc;
9934 xmlSAXHandlerPtr oldsax = NULL;
9935 xmlNodePtr content;
9936 int size;
9937 int ret = 0;
9939 if (depth > 40) {
9940 return(XML_ERR_ENTITY_LOOP);
9944 if (lst != NULL)
9945 *lst = NULL;
9946 if (string == NULL)
9947 return(-1);
9949 size = xmlStrlen(string);
9951 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9952 if (ctxt == NULL) return(-1);
9953 ctxt->userData = ctxt;
9954 if (sax != NULL) {
9955 oldsax = ctxt->sax;
9956 ctxt->sax = sax;
9957 if (user_data != NULL)
9958 ctxt->userData = user_data;
9960 newDoc = xmlNewDoc(BAD_CAST "1.0");
9961 if (newDoc == NULL) {
9962 xmlFreeParserCtxt(ctxt);
9963 return(-1);
9965 if (doc != NULL) {
9966 newDoc->intSubset = doc->intSubset;
9967 newDoc->extSubset = doc->extSubset;
9969 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9970 if (newDoc->children == NULL) {
9971 if (sax != NULL)
9972 ctxt->sax = oldsax;
9973 xmlFreeParserCtxt(ctxt);
9974 newDoc->intSubset = NULL;
9975 newDoc->extSubset = NULL;
9976 xmlFreeDoc(newDoc);
9977 return(-1);
9979 nodePush(ctxt, newDoc->children);
9980 if (doc == NULL) {
9981 ctxt->myDoc = newDoc;
9982 } else {
9983 ctxt->myDoc = newDoc;
9984 newDoc->children->doc = doc;
9986 ctxt->instate = XML_PARSER_CONTENT;
9987 ctxt->depth = depth;
9990 * Doing validity checking on chunk doesn't make sense
9992 ctxt->validate = 0;
9993 ctxt->loadsubset = 0;
9995 if ( doc != NULL ){
9996 content = doc->children;
9997 doc->children = NULL;
9998 xmlParseContent(ctxt);
9999 doc->children = content;
10001 else {
10002 xmlParseContent(ctxt);
10004 if ((RAW == '<') && (NXT(1) == '/')) {
10005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10007 ctxt->sax->error(ctxt->userData,
10008 "chunk is not well balanced\n");
10009 ctxt->wellFormed = 0;
10010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10011 } else if (RAW != 0) {
10012 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10014 ctxt->sax->error(ctxt->userData,
10015 "extra content at the end of well balanced chunk\n");
10016 ctxt->wellFormed = 0;
10017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10019 if (ctxt->node != newDoc->children) {
10020 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10022 ctxt->sax->error(ctxt->userData,
10023 "chunk is not well balanced\n");
10024 ctxt->wellFormed = 0;
10025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10028 if (!ctxt->wellFormed) {
10029 if (ctxt->errNo == 0)
10030 ret = 1;
10031 else
10032 ret = ctxt->errNo;
10033 } else {
10034 ret = 0;
10037 if (lst != NULL && (ret == 0 || recover == 1)) {
10038 xmlNodePtr cur;
10041 * Return the newly created nodeset after unlinking it from
10042 * they pseudo parent.
10044 cur = newDoc->children->children;
10045 *lst = cur;
10046 while (cur != NULL) {
10047 cur->parent = NULL;
10048 cur = cur->next;
10050 newDoc->children->children = NULL;
10053 if (sax != NULL)
10054 ctxt->sax = oldsax;
10055 xmlFreeParserCtxt(ctxt);
10056 newDoc->intSubset = NULL;
10057 newDoc->extSubset = NULL;
10058 xmlFreeDoc(newDoc);
10060 return(ret);
10064 * xmlSAXParseEntity:
10065 * @sax: the SAX handler block
10066 * @filename: the filename
10068 * parse an XML external entity out of context and build a tree.
10069 * It use the given SAX function block to handle the parsing callback.
10070 * If sax is NULL, fallback to the default DOM tree building routines.
10072 * [78] extParsedEnt ::= TextDecl? content
10074 * This correspond to a "Well Balanced" chunk
10076 * Returns the resulting document tree
10079 xmlDocPtr
10080 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10081 xmlDocPtr ret;
10082 xmlParserCtxtPtr ctxt;
10083 char *directory = NULL;
10085 ctxt = xmlCreateFileParserCtxt(filename);
10086 if (ctxt == NULL) {
10087 return(NULL);
10089 if (sax != NULL) {
10090 if (ctxt->sax != NULL)
10091 xmlFree(ctxt->sax);
10092 ctxt->sax = sax;
10093 ctxt->userData = NULL;
10096 if ((ctxt->directory == NULL) && (directory == NULL))
10097 directory = xmlParserGetDirectory(filename);
10099 xmlParseExtParsedEnt(ctxt);
10101 if (ctxt->wellFormed)
10102 ret = ctxt->myDoc;
10103 else {
10104 ret = NULL;
10105 xmlFreeDoc(ctxt->myDoc);
10106 ctxt->myDoc = NULL;
10108 if (sax != NULL)
10109 ctxt->sax = NULL;
10110 xmlFreeParserCtxt(ctxt);
10112 return(ret);
10116 * xmlParseEntity:
10117 * @filename: the filename
10119 * parse an XML external entity out of context and build a tree.
10121 * [78] extParsedEnt ::= TextDecl? content
10123 * This correspond to a "Well Balanced" chunk
10125 * Returns the resulting document tree
10128 xmlDocPtr
10129 xmlParseEntity(const char *filename) {
10130 return(xmlSAXParseEntity(NULL, filename));
10134 * xmlCreateEntityParserCtxt:
10135 * @URL: the entity URL
10136 * @ID: the entity PUBLIC ID
10137 * @base: a possible base for the target URI
10139 * Create a parser context for an external entity
10140 * Automatic support for ZLIB/Compress compressed document is provided
10141 * by default if found at compile-time.
10143 * Returns the new parser context or NULL
10145 xmlParserCtxtPtr
10146 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10147 const xmlChar *base) {
10148 xmlParserCtxtPtr ctxt;
10149 xmlParserInputPtr inputStream;
10150 char *directory = NULL;
10151 xmlChar *uri;
10153 ctxt = xmlNewParserCtxt();
10154 if (ctxt == NULL) {
10155 return(NULL);
10158 uri = xmlBuildURI(URL, base);
10160 if (uri == NULL) {
10161 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10162 if (inputStream == NULL) {
10163 xmlFreeParserCtxt(ctxt);
10164 return(NULL);
10167 inputPush(ctxt, inputStream);
10169 if ((ctxt->directory == NULL) && (directory == NULL))
10170 directory = xmlParserGetDirectory((char *)URL);
10171 if ((ctxt->directory == NULL) && (directory != NULL))
10172 ctxt->directory = directory;
10173 } else {
10174 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10175 if (inputStream == NULL) {
10176 xmlFree(uri);
10177 xmlFreeParserCtxt(ctxt);
10178 return(NULL);
10181 inputPush(ctxt, inputStream);
10183 if ((ctxt->directory == NULL) && (directory == NULL))
10184 directory = xmlParserGetDirectory((char *)uri);
10185 if ((ctxt->directory == NULL) && (directory != NULL))
10186 ctxt->directory = directory;
10187 xmlFree(uri);
10190 return(ctxt);
10193 /************************************************************************
10195 * Front ends when parsing from a file *
10197 ************************************************************************/
10200 * xmlCreateFileParserCtxt:
10201 * @filename: the filename
10203 * Create a parser context for a file content.
10204 * Automatic support for ZLIB/Compress compressed document is provided
10205 * by default if found at compile-time.
10207 * Returns the new parser context or NULL
10209 xmlParserCtxtPtr
10210 xmlCreateFileParserCtxt(const char *filename)
10212 xmlParserCtxtPtr ctxt;
10213 xmlParserInputPtr inputStream;
10214 char *directory = NULL;
10215 xmlChar *normalized;
10217 ctxt = xmlNewParserCtxt();
10218 if (ctxt == NULL) {
10219 if (xmlDefaultSAXHandler.error != NULL) {
10220 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10222 return(NULL);
10225 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10226 if (normalized == NULL) {
10227 xmlFreeParserCtxt(ctxt);
10228 return(NULL);
10230 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
10231 if (inputStream == NULL) {
10232 xmlFreeParserCtxt(ctxt);
10233 xmlFree(normalized);
10234 return(NULL);
10237 inputPush(ctxt, inputStream);
10238 if ((ctxt->directory == NULL) && (directory == NULL))
10239 directory = xmlParserGetDirectory((char *) normalized);
10240 if ((ctxt->directory == NULL) && (directory != NULL))
10241 ctxt->directory = directory;
10243 xmlFree(normalized);
10245 return(ctxt);
10249 * xmlSAXParseFileWithData:
10250 * @sax: the SAX handler block
10251 * @filename: the filename
10252 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10253 * documents
10254 * @data: the userdata
10256 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10257 * compressed document is provided by default if found at compile-time.
10258 * It use the given SAX function block to handle the parsing callback.
10259 * If sax is NULL, fallback to the default DOM tree building routines.
10261 * User data (void *) is stored within the parser context in the
10262 * context's _private member, so it is available nearly everywhere in libxml
10264 * Returns the resulting document tree
10267 xmlDocPtr
10268 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10269 int recovery, void *data) {
10270 xmlDocPtr ret;
10271 xmlParserCtxtPtr ctxt;
10272 char *directory = NULL;
10274 xmlInitParser();
10276 ctxt = xmlCreateFileParserCtxt(filename);
10277 if (ctxt == NULL) {
10278 return(NULL);
10280 if (sax != NULL) {
10281 if (ctxt->sax != NULL)
10282 xmlFree(ctxt->sax);
10283 ctxt->sax = sax;
10285 if (data!=NULL) {
10286 ctxt->_private=data;
10289 if ((ctxt->directory == NULL) && (directory == NULL))
10290 directory = xmlParserGetDirectory(filename);
10291 if ((ctxt->directory == NULL) && (directory != NULL))
10292 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10294 ctxt->recovery = recovery;
10296 xmlParseDocument(ctxt);
10298 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10299 else {
10300 ret = NULL;
10301 xmlFreeDoc(ctxt->myDoc);
10302 ctxt->myDoc = NULL;
10304 if (sax != NULL)
10305 ctxt->sax = NULL;
10306 xmlFreeParserCtxt(ctxt);
10308 return(ret);
10312 * xmlSAXParseFile:
10313 * @sax: the SAX handler block
10314 * @filename: the filename
10315 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10316 * documents
10318 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10319 * compressed document is provided by default if found at compile-time.
10320 * It use the given SAX function block to handle the parsing callback.
10321 * If sax is NULL, fallback to the default DOM tree building routines.
10323 * Returns the resulting document tree
10326 xmlDocPtr
10327 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10328 int recovery) {
10329 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10333 * xmlRecoverDoc:
10334 * @cur: a pointer to an array of xmlChar
10336 * parse an XML in-memory document and build a tree.
10337 * In the case the document is not Well Formed, a tree is built anyway
10339 * Returns the resulting document tree
10342 xmlDocPtr
10343 xmlRecoverDoc(xmlChar *cur) {
10344 return(xmlSAXParseDoc(NULL, cur, 1));
10348 * xmlParseFile:
10349 * @filename: the filename
10351 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10352 * compressed document is provided by default if found at compile-time.
10354 * Returns the resulting document tree if the file was wellformed,
10355 * NULL otherwise.
10358 xmlDocPtr
10359 xmlParseFile(const char *filename) {
10360 return(xmlSAXParseFile(NULL, filename, 0));
10364 * xmlRecoverFile:
10365 * @filename: the filename
10367 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10368 * compressed document is provided by default if found at compile-time.
10369 * In the case the document is not Well Formed, a tree is built anyway
10371 * Returns the resulting document tree
10374 xmlDocPtr
10375 xmlRecoverFile(const char *filename) {
10376 return(xmlSAXParseFile(NULL, filename, 1));
10381 * xmlSetupParserForBuffer:
10382 * @ctxt: an XML parser context
10383 * @buffer: a xmlChar * buffer
10384 * @filename: a file name
10386 * Setup the parser context to parse a new buffer; Clears any prior
10387 * contents from the parser context. The buffer parameter must not be
10388 * NULL, but the filename parameter can be
10390 void
10391 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10392 const char* filename)
10394 xmlParserInputPtr input;
10396 input = xmlNewInputStream(ctxt);
10397 if (input == NULL) {
10398 xmlGenericError(xmlGenericErrorContext,
10399 "malloc");
10400 xmlFree(ctxt);
10401 return;
10404 xmlClearParserCtxt(ctxt);
10405 if (filename != NULL)
10406 input->filename = xmlMemStrdup(filename);
10407 input->base = buffer;
10408 input->cur = buffer;
10409 input->end = &buffer[xmlStrlen(buffer)];
10410 inputPush(ctxt, input);
10414 * xmlSAXUserParseFile:
10415 * @sax: a SAX handler
10416 * @user_data: The user data returned on SAX callbacks
10417 * @filename: a file name
10419 * parse an XML file and call the given SAX handler routines.
10420 * Automatic support for ZLIB/Compress compressed document is provided
10422 * Returns 0 in case of success or a error number otherwise
10425 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10426 const char *filename) {
10427 int ret = 0;
10428 xmlParserCtxtPtr ctxt;
10430 ctxt = xmlCreateFileParserCtxt(filename);
10431 if (ctxt == NULL) return -1;
10432 if (ctxt->sax != &xmlDefaultSAXHandler)
10433 xmlFree(ctxt->sax);
10434 ctxt->sax = sax;
10435 if (user_data != NULL)
10436 ctxt->userData = user_data;
10438 xmlParseDocument(ctxt);
10440 if (ctxt->wellFormed)
10441 ret = 0;
10442 else {
10443 if (ctxt->errNo != 0)
10444 ret = ctxt->errNo;
10445 else
10446 ret = -1;
10448 if (sax != NULL)
10449 ctxt->sax = NULL;
10450 xmlFreeParserCtxt(ctxt);
10452 return ret;
10455 /************************************************************************
10457 * Front ends when parsing from memory *
10459 ************************************************************************/
10462 * xmlCreateMemoryParserCtxt:
10463 * @buffer: a pointer to a char array
10464 * @size: the size of the array
10466 * Create a parser context for an XML in-memory document.
10468 * Returns the new parser context or NULL
10470 xmlParserCtxtPtr
10471 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
10472 xmlParserCtxtPtr ctxt;
10473 xmlParserInputPtr input;
10474 xmlParserInputBufferPtr buf;
10476 if (buffer == NULL)
10477 return(NULL);
10478 if (size <= 0)
10479 return(NULL);
10481 ctxt = xmlNewParserCtxt();
10482 if (ctxt == NULL)
10483 return(NULL);
10485 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10486 if (buf == NULL) {
10487 xmlFreeParserCtxt(ctxt);
10488 return(NULL);
10491 input = xmlNewInputStream(ctxt);
10492 if (input == NULL) {
10493 xmlFreeParserInputBuffer(buf);
10494 xmlFreeParserCtxt(ctxt);
10495 return(NULL);
10498 input->filename = NULL;
10499 input->buf = buf;
10500 input->base = input->buf->buffer->content;
10501 input->cur = input->buf->buffer->content;
10502 input->end = &input->buf->buffer->content[input->buf->buffer->use];
10504 inputPush(ctxt, input);
10505 return(ctxt);
10509 * xmlSAXParseMemoryWithData:
10510 * @sax: the SAX handler block
10511 * @buffer: an pointer to a char array
10512 * @size: the size of the array
10513 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10514 * documents
10515 * @data: the userdata
10517 * parse an XML in-memory block and use the given SAX function block
10518 * to handle the parsing callback. If sax is NULL, fallback to the default
10519 * DOM tree building routines.
10521 * User data (void *) is stored within the parser context in the
10522 * context's _private member, so it is available nearly everywhere in libxml
10524 * Returns the resulting document tree
10527 xmlDocPtr
10528 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10529 int size, int recovery, void *data) {
10530 xmlDocPtr ret;
10531 xmlParserCtxtPtr ctxt;
10533 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10534 if (ctxt == NULL) return(NULL);
10535 if (sax != NULL) {
10536 if (ctxt->sax != NULL)
10537 xmlFree(ctxt->sax);
10538 ctxt->sax = sax;
10540 if (data!=NULL) {
10541 ctxt->_private=data;
10544 xmlParseDocument(ctxt);
10546 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10547 else {
10548 ret = NULL;
10549 xmlFreeDoc(ctxt->myDoc);
10550 ctxt->myDoc = NULL;
10552 if (sax != NULL)
10553 ctxt->sax = NULL;
10554 xmlFreeParserCtxt(ctxt);
10556 return(ret);
10560 * xmlSAXParseMemory:
10561 * @sax: the SAX handler block
10562 * @buffer: an pointer to a char array
10563 * @size: the size of the array
10564 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10565 * documents
10567 * parse an XML in-memory block and use the given SAX function block
10568 * to handle the parsing callback. If sax is NULL, fallback to the default
10569 * DOM tree building routines.
10571 * Returns the resulting document tree
10573 xmlDocPtr
10574 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10575 int size, int recovery) {
10576 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
10580 * xmlParseMemory:
10581 * @buffer: an pointer to a char array
10582 * @size: the size of the array
10584 * parse an XML in-memory block and build a tree.
10586 * Returns the resulting document tree
10589 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
10590 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10594 * xmlRecoverMemory:
10595 * @buffer: an pointer to a char array
10596 * @size: the size of the array
10598 * parse an XML in-memory block and build a tree.
10599 * In the case the document is not Well Formed, a tree is built anyway
10601 * Returns the resulting document tree
10604 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
10605 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10609 * xmlSAXUserParseMemory:
10610 * @sax: a SAX handler
10611 * @user_data: The user data returned on SAX callbacks
10612 * @buffer: an in-memory XML document input
10613 * @size: the length of the XML document in bytes
10615 * A better SAX parsing routine.
10616 * parse an XML in-memory buffer and call the given SAX handler routines.
10618 * Returns 0 in case of success or a error number otherwise
10620 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10621 const char *buffer, int size) {
10622 int ret = 0;
10623 xmlParserCtxtPtr ctxt;
10624 xmlSAXHandlerPtr oldsax = NULL;
10626 if (sax == NULL) return -1;
10627 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10628 if (ctxt == NULL) return -1;
10629 oldsax = ctxt->sax;
10630 ctxt->sax = sax;
10631 if (user_data != NULL)
10632 ctxt->userData = user_data;
10634 xmlParseDocument(ctxt);
10636 if (ctxt->wellFormed)
10637 ret = 0;
10638 else {
10639 if (ctxt->errNo != 0)
10640 ret = ctxt->errNo;
10641 else
10642 ret = -1;
10644 ctxt->sax = oldsax;
10645 xmlFreeParserCtxt(ctxt);
10647 return ret;
10651 * xmlCreateDocParserCtxt:
10652 * @cur: a pointer to an array of xmlChar
10654 * Creates a parser context for an XML in-memory document.
10656 * Returns the new parser context or NULL
10658 xmlParserCtxtPtr
10659 xmlCreateDocParserCtxt(xmlChar *cur) {
10660 int len;
10662 if (cur == NULL)
10663 return(NULL);
10664 len = xmlStrlen(cur);
10665 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10669 * xmlSAXParseDoc:
10670 * @sax: the SAX handler block
10671 * @cur: a pointer to an array of xmlChar
10672 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10673 * documents
10675 * parse an XML in-memory document and build a tree.
10676 * It use the given SAX function block to handle the parsing callback.
10677 * If sax is NULL, fallback to the default DOM tree building routines.
10679 * Returns the resulting document tree
10682 xmlDocPtr
10683 xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10684 xmlDocPtr ret;
10685 xmlParserCtxtPtr ctxt;
10687 if (cur == NULL) return(NULL);
10690 ctxt = xmlCreateDocParserCtxt(cur);
10691 if (ctxt == NULL) return(NULL);
10692 if (sax != NULL) {
10693 ctxt->sax = sax;
10694 ctxt->userData = NULL;
10697 xmlParseDocument(ctxt);
10698 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10699 else {
10700 ret = NULL;
10701 xmlFreeDoc(ctxt->myDoc);
10702 ctxt->myDoc = NULL;
10704 if (sax != NULL)
10705 ctxt->sax = NULL;
10706 xmlFreeParserCtxt(ctxt);
10708 return(ret);
10712 * xmlParseDoc:
10713 * @cur: a pointer to an array of xmlChar
10715 * parse an XML in-memory document and build a tree.
10717 * Returns the resulting document tree
10720 xmlDocPtr
10721 xmlParseDoc(xmlChar *cur) {
10722 return(xmlSAXParseDoc(NULL, cur, 0));
10725 /************************************************************************
10727 * Specific function to keep track of entities references *
10728 * and used by the XSLT debugger *
10730 ************************************************************************/
10732 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10735 * xmlAddEntityReference:
10736 * @ent : A valid entity
10737 * @firstNode : A valid first node for children of entity
10738 * @lastNode : A valid last node of children entity
10740 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10742 static void
10743 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10744 xmlNodePtr lastNode)
10746 if (xmlEntityRefFunc != NULL) {
10747 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10753 * xmlSetEntityReferenceFunc:
10754 * @func : A valid function
10756 * Set the function to call call back when a xml reference has been made
10758 void
10759 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10761 xmlEntityRefFunc = func;
10764 /************************************************************************
10766 * Miscellaneous *
10768 ************************************************************************/
10770 #ifdef LIBXML_XPATH_ENABLED
10771 #include <libxml/xpath.h>
10772 #endif
10774 extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
10775 static int xmlParserInitialized = 0;
10778 * xmlInitParser:
10780 * Initialization function for the XML parser.
10781 * This is not reentrant. Call once before processing in case of
10782 * use in multithreaded programs.
10785 void
10786 xmlInitParser(void) {
10787 if (xmlParserInitialized != 0)
10788 return;
10790 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10791 (xmlGenericError == NULL))
10792 initGenericErrorDefaultFunc(NULL);
10793 xmlInitThreads();
10794 xmlInitMemory();
10795 xmlInitCharEncodingHandlers();
10796 xmlInitializePredefinedEntities();
10797 xmlDefaultSAXHandlerInit();
10798 xmlRegisterDefaultInputCallbacks();
10799 xmlRegisterDefaultOutputCallbacks();
10800 #ifdef LIBXML_HTML_ENABLED
10801 htmlInitAutoClose();
10802 htmlDefaultSAXHandlerInit();
10803 #endif
10804 #ifdef LIBXML_XPATH_ENABLED
10805 xmlXPathInit();
10806 #endif
10807 xmlParserInitialized = 1;
10811 * xmlCleanupParser:
10813 * Cleanup function for the XML parser. It tries to reclaim all
10814 * parsing related global memory allocated for the parser processing.
10815 * It doesn't deallocate any document related memory. Calling this
10816 * function should not prevent reusing the parser.
10819 void
10820 xmlCleanupParser(void) {
10821 xmlCleanupCharEncodingHandlers();
10822 xmlCleanupPredefinedEntities();
10823 #ifdef LIBXML_CATALOG_ENABLED
10824 xmlCatalogCleanup();
10825 #endif
10826 xmlCleanupThreads();
10827 xmlParserInitialized = 0;