2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
44 #include <libxml/xmlmemory.h>
45 #include <libxml/threads.h>
46 #include <libxml/globals.h>
47 #include <libxml/tree.h>
48 #include <libxml/parser.h>
49 #include <libxml/parserInternals.h>
50 #include <libxml/valid.h>
51 #include <libxml/entities.h>
52 #include <libxml/xmlerror.h>
53 #include <libxml/encoding.h>
54 #include <libxml/xmlIO.h>
55 #include <libxml/uri.h>
56 #ifdef LIBXML_CATALOG_ENABLED
57 #include <libxml/catalog.h>
66 #ifdef HAVE_SYS_STAT_H
80 #define XML_PARSER_BIG_BUFFER_SIZE 300
81 #define XML_PARSER_BUFFER_SIZE 100
83 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
86 * List of XML prefixed PI allowed by W3C specs
89 static const char *xmlW3CPIs
[] = {
94 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95 xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
99 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
100 xmlSAXHandlerPtr sax
,
101 void *user_data
, int depth
, const xmlChar
*URL
,
102 const xmlChar
*ID
, xmlNodePtr
*list
);
105 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
106 xmlNodePtr lastNode
);
109 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
110 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
111 /************************************************************************
113 * Parser stacks related functions and macros *
115 ************************************************************************/
117 xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
118 const xmlChar
** str
);
121 * Generic function for accessing stacks in the Parser Context
124 #define PUSH_AND_POP(scope, type, name) \
125 scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
126 if (ctxt->name##Nr >= ctxt->name##Max) { \
127 ctxt->name##Max *= 2; \
128 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
129 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
130 if (ctxt->name##Tab == NULL) { \
131 xmlGenericError(xmlGenericErrorContext, \
132 "realloc failed !\n"); \
136 ctxt->name##Tab[ctxt->name##Nr] = value; \
137 ctxt->name = value; \
138 return(ctxt->name##Nr++); \
140 scope type name##Pop(xmlParserCtxtPtr ctxt) { \
142 if (ctxt->name##Nr <= 0) return(0); \
144 if (ctxt->name##Nr > 0) \
145 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148 ret = ctxt->name##Tab[ctxt->name##Nr]; \
149 ctxt->name##Tab[ctxt->name##Nr] = 0; \
155 * @ctxt: an XML parser context
157 * Pops the top parser input from the input stack
159 * Returns the input just removed
163 * @ctxt: an XML parser context
164 * @value: the parser input
166 * Pushes a new parser input on top of the input stack
168 * Returns 0 in case of error, the index in the stack otherwise
172 * @ctxt: an XML parser context
174 * Pops the top element name from the name stack
176 * Returns the name just removed
180 * @ctxt: an XML parser context
181 * @value: the element name
183 * Pushes a new element name on top of the name stack
185 * Returns 0 in case of error, the index in the stack otherwise
189 * @ctxt: an XML parser context
191 * Pops the top element node from the node stack
193 * Returns the node just removed
197 * @ctxt: an XML parser context
198 * @value: the element node
200 * Pushes a new element node on top of the node stack
202 * Returns 0 in case of error, the index in the stack otherwise
205 * Those macros actually generate the functions
207 PUSH_AND_POP(extern, xmlParserInputPtr
, input
)
208 PUSH_AND_POP(extern, xmlNodePtr
, node
)
209 PUSH_AND_POP(extern, xmlChar
*, name
)
211 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
212 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
214 ctxt
->spaceTab
= (int *) xmlRealloc(ctxt
->spaceTab
,
215 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
216 if (ctxt
->spaceTab
== NULL
) {
217 xmlGenericError(xmlGenericErrorContext
,
218 "realloc failed !\n");
222 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
223 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
224 return(ctxt
->spaceNr
++);
227 static int spacePop(xmlParserCtxtPtr ctxt
) {
229 if (ctxt
->spaceNr
<= 0) return(0);
231 if (ctxt
->spaceNr
> 0)
232 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
235 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
236 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
241 * Macros for accessing the content. Those should be used only by the parser,
244 * Dirty macros, i.e. one often need to make assumption on the context to
247 * CUR_PTR return the current pointer to the xmlChar to be parsed.
248 * To be used with extreme caution since operations consuming
249 * characters may move the input buffer to a different location !
250 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
251 * This should be used internally by the parser
252 * only to compare to ASCII values otherwise it would break when
253 * running with UTF-8 encoding.
254 * RAW same as CUR but in the input buffer, bypass any token
255 * extraction that may have been done
256 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
257 * to compare on ASCII based substring.
258 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
259 * strings within the parser.
261 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
263 * NEXT Skip to the next character, this does the proper decoding
264 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
265 * NEXTL(l) Skip l xmlChar in the input buffer
266 * CUR_CHAR(l) returns the current unicode character (int), set l
267 * to the number of xmlChars used for the encoding [0-5].
268 * CUR_SCHAR same but operate on a string instead of the context
269 * COPY_BUF copy the current unicode char to the target buffer, increment
271 * GROW, SHRINK handling of input buffers
274 #define RAW (*ctxt->input->cur)
275 #define CUR (*ctxt->input->cur)
276 #define NXT(val) ctxt->input->cur[(val)]
277 #define CUR_PTR ctxt->input->cur
279 #define SKIP(val) do { \
280 ctxt->nbChars += (val),ctxt->input->cur += (val); \
281 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
282 if ((*ctxt->input->cur == 0) && \
283 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
287 #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
290 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
291 xmlParserInputShrink(ctxt
->input
);
292 if ((*ctxt
->input
->cur
== 0) &&
293 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
297 #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
300 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
301 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
302 if ((*ctxt
->input
->cur
== 0) &&
303 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
307 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
309 #define NEXT xmlNextChar(ctxt)
312 ctxt->input->cur++; \
314 if (*ctxt->input->cur == 0) \
315 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
318 #define NEXTL(l) do { \
319 if (*(ctxt->input->cur) == '\n') { \
320 ctxt->input->line++; ctxt->input->col = 1; \
321 } else ctxt->input->col++; \
322 ctxt->input->cur += l; \
323 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
326 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
327 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
329 #define COPY_BUF(l,b,i,v) \
330 if (l == 1) b[i++] = (xmlChar) v; \
331 else i += xmlCopyCharMultiByte(&b[i],v)
335 * @ctxt: the XML parser context
337 * skip all blanks character found at that point in the input streams.
338 * It pops up finished entities in the process if allowable at that point.
340 * Returns the number of space chars skipped
344 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
348 * It's Okay to use CUR/NEXT here since all the blanks are on
351 if ((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) {
354 * if we are in the document content, go really fast
356 cur
= ctxt
->input
->cur
;
357 while (IS_BLANK(*cur
)) {
359 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
364 ctxt
->input
->cur
= cur
;
365 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
366 cur
= ctxt
->input
->cur
;
369 ctxt
->input
->cur
= cur
;
374 while (IS_BLANK(cur
)) { /* CHECKED tstblanks.xml */
379 while ((cur
== 0) && (ctxt
->inputNr
> 1) &&
380 (ctxt
->instate
!= XML_PARSER_COMMENT
)) {
385 * Need to handle support of entities branching here
387 if (*ctxt
->input
->cur
== '%') xmlParserHandlePEReference(ctxt
);
388 } while (IS_BLANK(cur
)); /* CHECKED tstblanks.xml */
393 /************************************************************************
395 * Commodity functions to handle entities *
397 ************************************************************************/
401 * @ctxt: an XML parser context
403 * xmlPopInput: the current input pointed by ctxt->input came to an end
404 * pop it and return the next char.
406 * Returns the current xmlChar in the parser context
409 xmlPopInput(xmlParserCtxtPtr ctxt
) {
410 if (ctxt
->inputNr
== 1) return(0); /* End of main Input */
411 if (xmlParserDebugEntities
)
412 xmlGenericError(xmlGenericErrorContext
,
413 "Popping input %d\n", ctxt
->inputNr
);
414 xmlFreeInputStream(inputPop(ctxt
));
415 if ((*ctxt
->input
->cur
== 0) &&
416 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
417 return(xmlPopInput(ctxt
));
423 * @ctxt: an XML parser context
424 * @input: an XML parser input fragment (entity, XML fragment ...).
426 * xmlPushInput: switch to a new input stream which is stacked on top
427 * of the previous one(s).
430 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
431 if (input
== NULL
) return;
433 if (xmlParserDebugEntities
) {
434 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
435 xmlGenericError(xmlGenericErrorContext
,
436 "%s(%d): ", ctxt
->input
->filename
,
438 xmlGenericError(xmlGenericErrorContext
,
439 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
441 inputPush(ctxt
, input
);
447 * @ctxt: an XML parser context
449 * parse Reference declarations
451 * [66] CharRef ::= '&#' [0-9]+ ';' |
452 * '&#x' [0-9a-fA-F]+ ';'
454 * [ WFC: Legal Character ]
455 * Characters referred to using character references must match the
456 * production for Char.
458 * Returns the value parsed (as an int), 0 in case of error
461 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
462 unsigned int val
= 0;
466 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
468 if ((RAW
== '&') && (NXT(1) == '#') &&
472 while (RAW
!= ';') { /* loop blocked by count */
477 if ((RAW
>= '0') && (RAW
<= '9'))
478 val
= val
* 16 + (CUR
- '0');
479 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
480 val
= val
* 16 + (CUR
- 'a') + 10;
481 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
482 val
= val
* 16 + (CUR
- 'A') + 10;
484 ctxt
->errNo
= XML_ERR_INVALID_HEX_CHARREF
;
485 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
486 ctxt
->sax
->error(ctxt
->userData
,
487 "xmlParseCharRef: invalid hexadecimal value\n");
488 ctxt
->wellFormed
= 0;
489 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
497 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
501 } else if ((RAW
== '&') && (NXT(1) == '#')) {
504 while (RAW
!= ';') { /* loop blocked by count */
509 if ((RAW
>= '0') && (RAW
<= '9'))
510 val
= val
* 10 + (CUR
- '0');
512 ctxt
->errNo
= XML_ERR_INVALID_DEC_CHARREF
;
513 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
514 ctxt
->sax
->error(ctxt
->userData
,
515 "xmlParseCharRef: invalid decimal value\n");
516 ctxt
->wellFormed
= 0;
517 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
530 ctxt
->errNo
= XML_ERR_INVALID_CHARREF
;
531 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
532 ctxt
->sax
->error(ctxt
->userData
,
533 "xmlParseCharRef: invalid value\n");
534 ctxt
->wellFormed
= 0;
535 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
546 ctxt
->errNo
= XML_ERR_INVALID_CHAR
;
547 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
548 ctxt
->sax
->error(ctxt
->userData
,
549 "xmlParseCharRef: invalid xmlChar value %d\n",
551 ctxt
->wellFormed
= 0;
552 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
558 * xmlParseStringCharRef:
559 * @ctxt: an XML parser context
560 * @str: a pointer to an index in the string
562 * parse Reference declarations, variant parsing from a string rather
563 * than an an input flow.
565 * [66] CharRef ::= '&#' [0-9]+ ';' |
566 * '&#x' [0-9a-fA-F]+ ';'
568 * [ WFC: Legal Character ]
569 * Characters referred to using character references must match the
570 * production for Char.
572 * Returns the value parsed (as an int), 0 in case of error, str will be
573 * updated to the current value of the index
576 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
581 if ((str
== NULL
) || (*str
== NULL
)) return(0);
584 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
587 while (cur
!= ';') { /* Non input consuming loop */
588 if ((cur
>= '0') && (cur
<= '9'))
589 val
= val
* 16 + (cur
- '0');
590 else if ((cur
>= 'a') && (cur
<= 'f'))
591 val
= val
* 16 + (cur
- 'a') + 10;
592 else if ((cur
>= 'A') && (cur
<= 'F'))
593 val
= val
* 16 + (cur
- 'A') + 10;
595 ctxt
->errNo
= XML_ERR_INVALID_HEX_CHARREF
;
596 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
597 ctxt
->sax
->error(ctxt
->userData
,
598 "xmlParseStringCharRef: invalid hexadecimal value\n");
599 ctxt
->wellFormed
= 0;
600 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
609 } else if ((cur
== '&') && (ptr
[1] == '#')){
612 while (cur
!= ';') { /* Non input consuming loops */
613 if ((cur
>= '0') && (cur
<= '9'))
614 val
= val
* 10 + (cur
- '0');
616 ctxt
->errNo
= XML_ERR_INVALID_DEC_CHARREF
;
617 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
618 ctxt
->sax
->error(ctxt
->userData
,
619 "xmlParseStringCharRef: invalid decimal value\n");
620 ctxt
->wellFormed
= 0;
621 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
631 ctxt
->errNo
= XML_ERR_INVALID_CHARREF
;
632 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
633 ctxt
->sax
->error(ctxt
->userData
,
634 "xmlParseStringCharRef: invalid value\n");
635 ctxt
->wellFormed
= 0;
636 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
642 * [ WFC: Legal Character ]
643 * Characters referred to using character references must match the
644 * production for Char.
649 ctxt
->errNo
= XML_ERR_INVALID_CHAR
;
650 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
651 ctxt
->sax
->error(ctxt
->userData
,
652 "xmlParseStringCharRef: invalid xmlChar value %d\n", val
);
653 ctxt
->wellFormed
= 0;
654 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
660 * xmlNewBlanksWrapperInputStream:
661 * @ctxt: an XML parser context
662 * @entity: an Entity pointer
664 * Create a new input stream for wrapping
665 * blanks around a PEReference
667 * Returns the new input stream or NULL
670 static void deallocblankswrapper (xmlChar
*str
) {xmlFree(str
);}
672 static xmlParserInputPtr
673 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
674 xmlParserInputPtr input
;
677 if (entity
== NULL
) {
678 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
679 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
680 ctxt
->sax
->error(ctxt
->userData
,
681 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
682 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
685 if (xmlParserDebugEntities
)
686 xmlGenericError(xmlGenericErrorContext
,
687 "new blanks wrapper for entity: %s\n", entity
->name
);
688 input
= xmlNewInputStream(ctxt
);
692 length
= xmlStrlen(entity
->name
) + 5;
693 buffer
= xmlMalloc(length
);
694 if (buffer
== NULL
) {
699 buffer
[length
-3] = ';';
700 buffer
[length
-2] = ' ';
701 buffer
[length
-1] = 0;
702 memcpy(buffer
+ 2, entity
->name
, length
- 5);
703 input
->free
= deallocblankswrapper
;
704 input
->base
= buffer
;
706 input
->length
= length
;
707 input
->end
= &buffer
[length
];
712 * xmlParserHandlePEReference:
713 * @ctxt: the parser context
715 * [69] PEReference ::= '%' Name ';'
717 * [ WFC: No Recursion ]
718 * A parsed entity must not contain a recursive
719 * reference to itself, either directly or indirectly.
721 * [ WFC: Entity Declared ]
722 * In a document without any DTD, a document with only an internal DTD
723 * subset which contains no parameter entity references, or a document
724 * with "standalone='yes'", ... ... The declaration of a parameter
725 * entity must precede any reference to it...
727 * [ VC: Entity Declared ]
728 * In a document with an external subset or external parameter entities
729 * with "standalone='no'", ... ... The declaration of a parameter entity
730 * must precede any reference to it...
733 * Parameter-entity references may only appear in the DTD.
734 * NOTE: misleading but this is handled.
736 * A PEReference may have been detected in the current input stream
737 * the handling is done accordingly to
738 * http://www.w3.org/TR/REC-xml#entproc
740 * - Included in literal in entity values
741 * - Included as Parameter Entity reference within DTDs
744 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
746 xmlEntityPtr entity
= NULL
;
747 xmlParserInputPtr input
;
749 if (RAW
!= '%') return;
750 switch(ctxt
->instate
) {
751 case XML_PARSER_CDATA_SECTION
:
753 case XML_PARSER_COMMENT
:
755 case XML_PARSER_START_TAG
:
757 case XML_PARSER_END_TAG
:
760 ctxt
->errNo
= XML_ERR_PEREF_AT_EOF
;
761 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
762 ctxt
->sax
->error(ctxt
->userData
, "PEReference at EOF\n");
763 ctxt
->wellFormed
= 0;
764 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
766 case XML_PARSER_PROLOG
:
767 case XML_PARSER_START
:
768 case XML_PARSER_MISC
:
769 ctxt
->errNo
= XML_ERR_PEREF_IN_PROLOG
;
770 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
771 ctxt
->sax
->error(ctxt
->userData
, "PEReference in prolog!\n");
772 ctxt
->wellFormed
= 0;
773 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
775 case XML_PARSER_ENTITY_DECL
:
776 case XML_PARSER_CONTENT
:
777 case XML_PARSER_ATTRIBUTE_VALUE
:
779 case XML_PARSER_SYSTEM_LITERAL
:
780 case XML_PARSER_PUBLIC_LITERAL
:
781 /* we just ignore it there */
783 case XML_PARSER_EPILOG
:
784 ctxt
->errNo
= XML_ERR_PEREF_IN_EPILOG
;
785 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
786 ctxt
->sax
->error(ctxt
->userData
, "PEReference in epilog!\n");
787 ctxt
->wellFormed
= 0;
788 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
790 case XML_PARSER_ENTITY_VALUE
:
792 * NOTE: in the case of entity values, we don't do the
793 * substitution here since we need the literal
794 * entity value to be able to save the internal
795 * subset of the document.
796 * This will be handled by xmlStringDecodeEntities
801 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
802 * In the internal DTD subset, parameter-entity references
803 * can occur only where markup declarations can occur, not
804 * within markup declarations.
805 * In that case this is handled in xmlParseMarkupDecl
807 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
809 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
812 case XML_PARSER_IGNORE
:
817 name
= xmlParseName(ctxt
);
818 if (xmlParserDebugEntities
)
819 xmlGenericError(xmlGenericErrorContext
,
820 "PEReference: %s\n", name
);
822 ctxt
->errNo
= XML_ERR_PEREF_NO_NAME
;
823 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
824 ctxt
->sax
->error(ctxt
->userData
, "xmlParserHandlePEReference: no name\n");
825 ctxt
->wellFormed
= 0;
826 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
830 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->getParameterEntity
!= NULL
))
831 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
832 if (entity
== NULL
) {
835 * [ WFC: Entity Declared ]
836 * In a document without any DTD, a document with only an
837 * internal DTD subset which contains no parameter entity
838 * references, or a document with "standalone='yes'", ...
839 * ... The declaration of a parameter entity must precede
840 * any reference to it...
842 if ((ctxt
->standalone
== 1) ||
843 ((ctxt
->hasExternalSubset
== 0) &&
844 (ctxt
->hasPErefs
== 0))) {
845 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
846 ctxt
->sax
->error(ctxt
->userData
,
847 "PEReference: %%%s; not found\n", name
);
848 ctxt
->wellFormed
= 0;
849 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
852 * [ VC: Entity Declared ]
853 * In a document with an external subset or external
854 * parameter entities with "standalone='no'", ...
855 * ... The declaration of a parameter entity must precede
856 * any reference to it...
858 if ((!ctxt
->disableSAX
) &&
859 (ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
860 ctxt
->vctxt
.error(ctxt
->vctxt
.userData
,
861 "PEReference: %%%s; not found\n", name
);
862 } else if ((!ctxt
->disableSAX
) &&
863 (ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
864 ctxt
->sax
->warning(ctxt
->userData
,
865 "PEReference: %%%s; not found\n", name
);
868 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
869 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
870 xmlPushInput(ctxt
, input
);
872 if ((entity
->etype
== XML_INTERNAL_PARAMETER_ENTITY
) ||
873 (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
)) {
878 * handle the extra spaces added before and after
879 * c.f. http://www.w3.org/TR/REC-xml#as-PE
880 * this is done independently.
882 input
= xmlNewEntityInputStream(ctxt
, entity
);
883 xmlPushInput(ctxt
, input
);
886 * Get the 4 first bytes and decode the charset
887 * if enc != XML_CHAR_ENCODING_NONE
888 * plug some encoding conversion routines.
891 if (entity
->length
>= 4) {
896 enc
= xmlDetectCharEncoding(start
, 4);
897 if (enc
!= XML_CHAR_ENCODING_NONE
) {
898 xmlSwitchEncoding(ctxt
, enc
);
902 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
903 (RAW
== '<') && (NXT(1) == '?') &&
904 (NXT(2) == 'x') && (NXT(3) == 'm') &&
905 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
906 xmlParseTextDecl(ctxt
);
909 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
910 ctxt
->sax
->error(ctxt
->userData
,
911 "xmlParserHandlePEReference: %s is not a parameter entity\n",
913 ctxt
->wellFormed
= 0;
914 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
918 ctxt
->errNo
= XML_ERR_PEREF_SEMICOL_MISSING
;
919 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
920 ctxt
->sax
->error(ctxt
->userData
,
921 "xmlParserHandlePEReference: expecting ';'\n");
922 ctxt
->wellFormed
= 0;
923 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
930 * Macro used to grow the current buffer.
932 #define growBuffer(buffer) { \
933 buffer##_size *= 2; \
934 buffer = (xmlChar *) \
935 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
936 if (buffer == NULL) { \
937 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
943 * xmlStringDecodeEntities:
944 * @ctxt: the parser context
945 * @str: the input string
946 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
947 * @end: an end marker xmlChar, 0 if none
948 * @end2: an end marker xmlChar, 0 if none
949 * @end3: an end marker xmlChar, 0 if none
951 * Takes a entity string content and process to do the adequate substitutions.
953 * [67] Reference ::= EntityRef | CharRef
955 * [69] PEReference ::= '%' Name ';'
957 * Returns A newly allocated string with the substitution done. The caller
958 * must deallocate it !
961 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
962 xmlChar end
, xmlChar end2
, xmlChar end3
) {
963 xmlChar
*buffer
= NULL
;
966 xmlChar
*current
= NULL
;
974 if (ctxt
->depth
> 40) {
975 ctxt
->errNo
= XML_ERR_ENTITY_LOOP
;
976 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
977 ctxt
->sax
->error(ctxt
->userData
,
978 "Detected entity reference loop\n");
979 ctxt
->wellFormed
= 0;
980 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
985 * allocate a translation buffer.
987 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
988 buffer
= (xmlChar
*) xmlMalloc(buffer_size
* sizeof(xmlChar
));
989 if (buffer
== NULL
) {
990 xmlGenericError(xmlGenericErrorContext
,
991 "xmlStringDecodeEntities: malloc failed");
996 * OK loop until we reach one of the ending char or a size limit.
997 * we are operating on already parsed values.
999 c
= CUR_SCHAR(str
, l
);
1000 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
1001 (c
!= end2
) && (c
!= end3
)) {
1004 if ((c
== '&') && (str
[1] == '#')) {
1005 int val
= xmlParseStringCharRef(ctxt
, &str
);
1007 COPY_BUF(0,buffer
,nbchars
,val
);
1009 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
1010 if (xmlParserDebugEntities
)
1011 xmlGenericError(xmlGenericErrorContext
,
1012 "String decoding Entity Reference: %.30s\n",
1014 ent
= xmlParseStringEntityRef(ctxt
, &str
);
1015 if ((ent
!= NULL
) &&
1016 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
1017 if (ent
->content
!= NULL
) {
1018 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
1020 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1021 ctxt
->sax
->error(ctxt
->userData
,
1022 "internal error entity has no content\n");
1024 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
1028 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
1033 while (*current
!= 0) { /* non input consuming loop */
1034 buffer
[nbchars
++] = *current
++;
1036 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
1042 } else if (ent
!= NULL
) {
1043 int i
= xmlStrlen(ent
->name
);
1044 const xmlChar
*cur
= ent
->name
;
1046 buffer
[nbchars
++] = '&';
1047 if (nbchars
> buffer_size
- i
- XML_PARSER_BUFFER_SIZE
) {
1051 buffer
[nbchars
++] = *cur
++;
1052 buffer
[nbchars
++] = ';';
1054 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
1055 if (xmlParserDebugEntities
)
1056 xmlGenericError(xmlGenericErrorContext
,
1057 "String decoding PE Reference: %.30s\n", str
);
1058 ent
= xmlParseStringPEReference(ctxt
, &str
);
1063 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
1068 while (*current
!= 0) { /* non input consuming loop */
1069 buffer
[nbchars
++] = *current
++;
1071 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
1079 COPY_BUF(l
,buffer
,nbchars
,c
);
1081 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
1085 c
= CUR_SCHAR(str
, l
);
1087 buffer
[nbchars
++] = 0;
1092 /************************************************************************
1094 * Commodity functions to handle xmlChars *
1096 ************************************************************************/
1100 * @cur: the input xmlChar *
1101 * @len: the len of @cur
1103 * a strndup for array of xmlChar's
1105 * Returns a new xmlChar * or NULL
1108 xmlStrndup(const xmlChar
*cur
, int len
) {
1111 if ((cur
== NULL
) || (len
< 0)) return(NULL
);
1112 ret
= (xmlChar
*) xmlMalloc((len
+ 1) * sizeof(xmlChar
));
1114 xmlGenericError(xmlGenericErrorContext
,
1115 "malloc of %ld byte failed\n",
1116 (len
+ 1) * (long)sizeof(xmlChar
));
1119 memcpy(ret
, cur
, len
* sizeof(xmlChar
));
1126 * @cur: the input xmlChar *
1128 * a strdup for array of xmlChar's. Since they are supposed to be
1129 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1130 * a termination mark of '0'.
1132 * Returns a new xmlChar * or NULL
1135 xmlStrdup(const xmlChar
*cur
) {
1136 const xmlChar
*p
= cur
;
1138 if (cur
== NULL
) return(NULL
);
1139 while (*p
!= 0) p
++; /* non input consuming */
1140 return(xmlStrndup(cur
, p
- cur
));
1145 * @cur: the input char *
1146 * @len: the len of @cur
1148 * a strndup for char's to xmlChar's
1150 * Returns a new xmlChar * or NULL
1154 xmlCharStrndup(const char *cur
, int len
) {
1158 if ((cur
== NULL
) || (len
< 0)) return(NULL
);
1159 ret
= (xmlChar
*) xmlMalloc((len
+ 1) * sizeof(xmlChar
));
1161 xmlGenericError(xmlGenericErrorContext
, "malloc of %ld byte failed\n",
1162 (len
+ 1) * (long)sizeof(xmlChar
));
1165 for (i
= 0;i
< len
;i
++)
1166 ret
[i
] = (xmlChar
) cur
[i
];
1173 * @cur: the input char *
1174 * @len: the len of @cur
1176 * a strdup for char's to xmlChar's
1178 * Returns a new xmlChar * or NULL
1182 xmlCharStrdup(const char *cur
) {
1183 const char *p
= cur
;
1185 if (cur
== NULL
) return(NULL
);
1186 while (*p
!= '\0') p
++; /* non input consuming */
1187 return(xmlCharStrndup(cur
, p
- cur
));
1192 * @str1: the first xmlChar *
1193 * @str2: the second xmlChar *
1195 * a strcmp for xmlChar's
1197 * Returns the integer result of the comparison
1201 xmlStrcmp(const xmlChar
*str1
, const xmlChar
*str2
) {
1204 if (str1
== str2
) return(0);
1205 if (str1
== NULL
) return(-1);
1206 if (str2
== NULL
) return(1);
1208 tmp
= *str1
++ - *str2
;
1209 if (tmp
!= 0) return(tmp
);
1210 } while (*str2
++ != 0);
1216 * @str1: the first xmlChar *
1217 * @str2: the second xmlChar *
1219 * Check if both string are equal of have same content
1220 * Should be a bit more readable and faster than xmlStrEqual()
1222 * Returns 1 if they are equal, 0 if they are different
1226 xmlStrEqual(const xmlChar
*str1
, const xmlChar
*str2
) {
1227 if (str1
== str2
) return(1);
1228 if (str1
== NULL
) return(0);
1229 if (str2
== NULL
) return(0);
1231 if (*str1
++ != *str2
) return(0);
1238 * @str1: the first xmlChar *
1239 * @str2: the second xmlChar *
1240 * @len: the max comparison length
1242 * a strncmp for xmlChar's
1244 * Returns the integer result of the comparison
1248 xmlStrncmp(const xmlChar
*str1
, const xmlChar
*str2
, int len
) {
1251 if (len
<= 0) return(0);
1252 if (str1
== str2
) return(0);
1253 if (str1
== NULL
) return(-1);
1254 if (str2
== NULL
) return(1);
1256 tmp
= *str1
++ - *str2
;
1257 if (tmp
!= 0 || --len
== 0) return(tmp
);
1258 } while (*str2
++ != 0);
1262 static const xmlChar casemap
[256] = {
1263 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1264 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1265 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1266 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1267 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1268 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1269 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1270 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1271 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1272 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1273 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1274 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1275 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1276 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1277 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1278 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1279 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1280 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1281 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1282 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1283 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1284 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1285 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1286 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1287 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1288 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1289 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1290 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1291 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1292 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1293 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1294 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1299 * @str1: the first xmlChar *
1300 * @str2: the second xmlChar *
1302 * a strcasecmp for xmlChar's
1304 * Returns the integer result of the comparison
1308 xmlStrcasecmp(const xmlChar
*str1
, const xmlChar
*str2
) {
1311 if (str1
== str2
) return(0);
1312 if (str1
== NULL
) return(-1);
1313 if (str2
== NULL
) return(1);
1315 tmp
= casemap
[*str1
++] - casemap
[*str2
];
1316 if (tmp
!= 0) return(tmp
);
1317 } while (*str2
++ != 0);
1323 * @str1: the first xmlChar *
1324 * @str2: the second xmlChar *
1325 * @len: the max comparison length
1327 * a strncasecmp for xmlChar's
1329 * Returns the integer result of the comparison
1333 xmlStrncasecmp(const xmlChar
*str1
, const xmlChar
*str2
, int len
) {
1336 if (len
<= 0) return(0);
1337 if (str1
== str2
) return(0);
1338 if (str1
== NULL
) return(-1);
1339 if (str2
== NULL
) return(1);
1341 tmp
= casemap
[*str1
++] - casemap
[*str2
];
1342 if (tmp
!= 0 || --len
== 0) return(tmp
);
1343 } while (*str2
++ != 0);
1349 * @str: the xmlChar * array
1350 * @val: the xmlChar to search
1352 * a strchr for xmlChar's
1354 * Returns the xmlChar * for the first occurrence or NULL.
1358 xmlStrchr(const xmlChar
*str
, xmlChar val
) {
1359 if (str
== NULL
) return(NULL
);
1360 while (*str
!= 0) { /* non input consuming */
1361 if (*str
== val
) return((xmlChar
*) str
);
1369 * @str: the xmlChar * array (haystack)
1370 * @val: the xmlChar to search (needle)
1372 * a strstr for xmlChar's
1374 * Returns the xmlChar * for the first occurrence or NULL.
1378 xmlStrstr(const xmlChar
*str
, const xmlChar
*val
) {
1381 if (str
== NULL
) return(NULL
);
1382 if (val
== NULL
) return(NULL
);
1385 if (n
== 0) return(str
);
1386 while (*str
!= 0) { /* non input consuming */
1388 if (!xmlStrncmp(str
, val
, n
)) return((const xmlChar
*) str
);
1397 * @str: the xmlChar * array (haystack)
1398 * @val: the xmlChar to search (needle)
1400 * a case-ignoring strstr for xmlChar's
1402 * Returns the xmlChar * for the first occurrence or NULL.
1406 xmlStrcasestr(const xmlChar
*str
, xmlChar
*val
) {
1409 if (str
== NULL
) return(NULL
);
1410 if (val
== NULL
) return(NULL
);
1413 if (n
== 0) return(str
);
1414 while (*str
!= 0) { /* non input consuming */
1415 if (casemap
[*str
] == casemap
[*val
])
1416 if (!xmlStrncasecmp(str
, val
, n
)) return(str
);
1424 * @str: the xmlChar * array (haystack)
1425 * @start: the index of the first char (zero based)
1426 * @len: the length of the substring
1428 * Extract a substring of a given string
1430 * Returns the xmlChar * for the first occurrence or NULL.
1434 xmlStrsub(const xmlChar
*str
, int start
, int len
) {
1437 if (str
== NULL
) return(NULL
);
1438 if (start
< 0) return(NULL
);
1439 if (len
< 0) return(NULL
);
1441 for (i
= 0;i
< start
;i
++) {
1442 if (*str
== 0) return(NULL
);
1445 if (*str
== 0) return(NULL
);
1446 return(xmlStrndup(str
, len
));
1451 * @str: the xmlChar * array
1453 * length of a xmlChar's string
1455 * Returns the number of xmlChar contained in the ARRAY.
1459 xmlStrlen(const xmlChar
*str
) {
1462 if (str
== NULL
) return(0);
1463 while (*str
!= 0) { /* non input consuming */
1472 * @cur: the original xmlChar * array
1473 * @add: the xmlChar * array added
1474 * @len: the length of @add
1476 * a strncat for array of xmlChar's, it will extend @cur with the len
1477 * first bytes of @add.
1479 * Returns a new xmlChar *, the original @cur is reallocated if needed
1480 * and should not be freed
1484 xmlStrncat(xmlChar
*cur
, const xmlChar
*add
, int len
) {
1488 if ((add
== NULL
) || (len
== 0))
1491 return(xmlStrndup(add
, len
));
1493 size
= xmlStrlen(cur
);
1494 ret
= (xmlChar
*) xmlRealloc(cur
, (size
+ len
+ 1) * sizeof(xmlChar
));
1496 xmlGenericError(xmlGenericErrorContext
,
1497 "xmlStrncat: realloc of %ld byte failed\n",
1498 (size
+ len
+ 1) * (long)sizeof(xmlChar
));
1501 memcpy(&ret
[size
], add
, len
* sizeof(xmlChar
));
1502 ret
[size
+ len
] = 0;
1508 * @cur: the original xmlChar * array
1509 * @add: the xmlChar * array added
1511 * a strcat for array of xmlChar's. Since they are supposed to be
1512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1513 * a termination mark of '0'.
1515 * Returns a new xmlChar * containing the concatenated string.
1518 xmlStrcat(xmlChar
*cur
, const xmlChar
*add
) {
1519 const xmlChar
*p
= add
;
1521 if (add
== NULL
) return(cur
);
1523 return(xmlStrdup(add
));
1525 while (*p
!= 0) p
++; /* non input consuming */
1526 return(xmlStrncat(cur
, add
, p
- add
));
1529 /************************************************************************
1531 * Commodity functions, cleanup needed ? *
1533 ************************************************************************/
1537 * @ctxt: an XML parser context
1539 * @len: the size of @str
1541 * Is this a sequence of blank chars that one can ignore ?
1543 * Returns 1 if ignorable 0 otherwise.
1546 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
) {
1548 xmlNodePtr lastChild
;
1551 * Don't spend time trying to differentiate them, the same callback is
1554 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
1558 * Check for xml:space value.
1560 if (*(ctxt
->space
) == 1)
1564 * Check that the string is made of blanks
1566 for (i
= 0;i
< len
;i
++)
1567 if (!(IS_BLANK(str
[i
]))) return(0);
1570 * Look if the element is mixed content in the DTD if available
1572 if (ctxt
->node
== NULL
) return(0);
1573 if (ctxt
->myDoc
!= NULL
) {
1574 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
1575 if (ret
== 0) return(1);
1576 if (ret
== 1) return(0);
1580 * Otherwise, heuristic :-\
1582 if (RAW
!= '<') return(0);
1583 if ((ctxt
->node
->children
== NULL
) &&
1584 (RAW
== '<') && (NXT(1) == '/')) return(0);
1586 lastChild
= xmlGetLastChild(ctxt
->node
);
1587 if (lastChild
== NULL
) {
1588 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
1589 (ctxt
->node
->content
!= NULL
)) return(0);
1590 } else if (xmlNodeIsText(lastChild
))
1592 else if ((ctxt
->node
->children
!= NULL
) &&
1593 (xmlNodeIsText(ctxt
->node
->children
)))
1598 /************************************************************************
1600 * Extra stuff for namespace support *
1601 * Relates to http://www.w3.org/TR/WD-xml-names *
1603 ************************************************************************/
1607 * @ctxt: an XML parser context
1608 * @name: an XML parser context
1609 * @prefix: a xmlChar **
1611 * parse an UTF8 encoded XML qualified name string
1613 * [NS 5] QName ::= (Prefix ':')? LocalPart
1615 * [NS 6] Prefix ::= NCName
1617 * [NS 7] LocalPart ::= NCName
1619 * Returns the local part, and prefix is updated
1620 * to get the Prefix if any.
1624 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
1625 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
1626 xmlChar
*buffer
= NULL
;
1628 int max
= XML_MAX_NAMELEN
;
1629 xmlChar
*ret
= NULL
;
1630 const xmlChar
*cur
= name
;
1635 #ifndef XML_XML_NAMESPACE
1636 /* xml: prefix is not really a namespace */
1637 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
1638 (cur
[2] == 'l') && (cur
[3] == ':'))
1639 return(xmlStrdup(name
));
1642 /* nasty but valid */
1644 return(xmlStrdup(name
));
1647 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
1653 * Okay someone managed to make a huge name, so he's ready to pay
1654 * for the processing speed.
1658 buffer
= (xmlChar
*) xmlMalloc(max
* sizeof(xmlChar
));
1659 if (buffer
== NULL
) {
1660 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1661 ctxt
->sax
->error(ctxt
->userData
,
1662 "xmlSplitQName: out of memory\n");
1665 memcpy(buffer
, buf
, len
);
1666 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
1667 if (len
+ 10 > max
) {
1669 buffer
= (xmlChar
*) xmlRealloc(buffer
,
1670 max
* sizeof(xmlChar
));
1671 if (buffer
== NULL
) {
1672 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1673 ctxt
->sax
->error(ctxt
->userData
,
1674 "xmlSplitQName: out of memory\n");
1685 ret
= xmlStrndup(buf
, len
);
1689 max
= XML_MAX_NAMELEN
;
1695 if (c
== 0) return(ret
);
1700 * Check that the first character is proper to start
1703 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
1704 ((c
>= 0x41) && (c
<= 0x5A)) ||
1705 (c
== '_') || (c
== ':'))) {
1707 int first
= CUR_SCHAR(cur
, l
);
1709 if (!IS_LETTER(first
) && (first
!= '_')) {
1710 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1711 ctxt
->sax
->error(ctxt
->userData
,
1712 "Name %s is not XML Namespace compliant\n",
1718 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
1724 * Okay someone managed to make a huge name, so he's ready to pay
1725 * for the processing speed.
1729 buffer
= (xmlChar
*) xmlMalloc(max
* sizeof(xmlChar
));
1730 if (buffer
== NULL
) {
1731 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1732 ctxt
->sax
->error(ctxt
->userData
,
1733 "xmlSplitQName: out of memory\n");
1736 memcpy(buffer
, buf
, len
);
1737 while (c
!= 0) { /* tested bigname2.xml */
1738 if (len
+ 10 > max
) {
1740 buffer
= (xmlChar
*) xmlRealloc(buffer
,
1741 max
* sizeof(xmlChar
));
1742 if (buffer
== NULL
) {
1743 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1744 ctxt
->sax
->error(ctxt
->userData
,
1745 "xmlSplitQName: out of memory\n");
1756 ret
= xmlStrndup(buf
, len
);
1765 /************************************************************************
1767 * The parser itself *
1768 * Relates to http://www.w3.org/TR/REC-xml *
1770 ************************************************************************/
1772 static xmlChar
* xmlParseNameComplex(xmlParserCtxtPtr ctxt
);
1775 * @ctxt: an XML parser context
1777 * parse an XML name.
1779 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1780 * CombiningChar | Extender
1782 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1784 * [6] Names ::= Name (S Name)*
1786 * Returns the Name parsed or NULL
1790 xmlParseName(xmlParserCtxtPtr ctxt
) {
1798 * Accelerator for simple ASCII names
1800 in
= ctxt
->input
->cur
;
1801 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
1802 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
1803 (*in
== '_') || (*in
== ':')) {
1805 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
1806 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
1807 ((*in
>= 0x30) && (*in
<= 0x39)) ||
1808 (*in
== '_') || (*in
== '-') ||
1809 (*in
== ':') || (*in
== '.'))
1811 if ((*in
> 0) && (*in
< 0x80)) {
1812 count
= in
- ctxt
->input
->cur
;
1813 ret
= xmlStrndup(ctxt
->input
->cur
, count
);
1814 ctxt
->input
->cur
= in
;
1818 return(xmlParseNameComplex(ctxt
));
1822 * xmlParseNameAndCompare:
1823 * @ctxt: an XML parser context
1825 * parse an XML name and compares for match
1826 * (specialized for endtag parsing)
1829 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1830 * and the name for mismatch
1834 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
1835 const xmlChar
*cmp
= other
;
1841 in
= ctxt
->input
->cur
;
1842 while (*in
!= 0 && *in
== *cmp
) {
1846 if (*cmp
== 0 && (*in
== '>' || IS_BLANK (*in
))) {
1848 ctxt
->input
->cur
= in
;
1849 return (xmlChar
*) 1;
1851 /* failure (or end of input buffer), check with full function */
1852 ret
= xmlParseName (ctxt
);
1853 if (ret
!= 0 && xmlStrEqual (ret
, other
)) {
1855 return (xmlChar
*) 1;
1861 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
1862 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
1868 * Handler for more complex cases
1872 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
1873 (!IS_LETTER(c
) && (c
!= '_') &&
1878 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
1879 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
1880 (c
== '.') || (c
== '-') ||
1881 (c
== '_') || (c
== ':') ||
1882 (IS_COMBINING(c
)) ||
1883 (IS_EXTENDER(c
)))) {
1884 if (count
++ > 100) {
1888 COPY_BUF(l
,buf
,len
,c
);
1891 if (len
>= XML_MAX_NAMELEN
) {
1893 * Okay someone managed to make a huge name, so he's ready to pay
1894 * for the processing speed.
1899 buffer
= (xmlChar
*) xmlMalloc(max
* sizeof(xmlChar
));
1900 if (buffer
== NULL
) {
1901 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1902 ctxt
->sax
->error(ctxt
->userData
,
1903 "xmlParseNameComplex: out of memory\n");
1906 memcpy(buffer
, buf
, len
);
1907 while ((IS_LETTER(c
)) || (IS_DIGIT(c
)) || /* test bigname.xml */
1908 (c
== '.') || (c
== '-') ||
1909 (c
== '_') || (c
== ':') ||
1910 (IS_COMBINING(c
)) ||
1912 if (count
++ > 100) {
1916 if (len
+ 10 > max
) {
1918 buffer
= (xmlChar
*) xmlRealloc(buffer
,
1919 max
* sizeof(xmlChar
));
1920 if (buffer
== NULL
) {
1921 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1922 ctxt
->sax
->error(ctxt
->userData
,
1923 "xmlParseNameComplex: out of memory\n");
1927 COPY_BUF(l
,buffer
,len
,c
);
1935 return(xmlStrndup(buf
, len
));
1939 * xmlParseStringName:
1940 * @ctxt: an XML parser context
1941 * @str: a pointer to the string pointer (IN/OUT)
1943 * parse an XML name.
1945 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1946 * CombiningChar | Extender
1948 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1950 * [6] Names ::= Name (S Name)*
1952 * Returns the Name parsed or NULL. The @str pointer
1953 * is updated to the current location in the string.
1957 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
1958 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
1959 const xmlChar
*cur
= *str
;
1963 c
= CUR_SCHAR(cur
, l
);
1964 if (!IS_LETTER(c
) && (c
!= '_') &&
1969 while ((IS_LETTER(c
)) || (IS_DIGIT(c
)) || /* test bigentname.xml */
1970 (c
== '.') || (c
== '-') ||
1971 (c
== '_') || (c
== ':') ||
1972 (IS_COMBINING(c
)) ||
1974 COPY_BUF(l
,buf
,len
,c
);
1976 c
= CUR_SCHAR(cur
, l
);
1977 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
1979 * Okay someone managed to make a huge name, so he's ready to pay
1980 * for the processing speed.
1985 buffer
= (xmlChar
*) xmlMalloc(max
* sizeof(xmlChar
));
1986 if (buffer
== NULL
) {
1987 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1988 ctxt
->sax
->error(ctxt
->userData
,
1989 "xmlParseStringName: out of memory\n");
1992 memcpy(buffer
, buf
, len
);
1993 while ((IS_LETTER(c
)) || (IS_DIGIT(c
)) || /* test bigentname.xml */
1994 (c
== '.') || (c
== '-') ||
1995 (c
== '_') || (c
== ':') ||
1996 (IS_COMBINING(c
)) ||
1998 if (len
+ 10 > max
) {
2000 buffer
= (xmlChar
*) xmlRealloc(buffer
,
2001 max
* sizeof(xmlChar
));
2002 if (buffer
== NULL
) {
2003 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2004 ctxt
->sax
->error(ctxt
->userData
,
2005 "xmlParseStringName: out of memory\n");
2009 COPY_BUF(l
,buffer
,len
,c
);
2011 c
= CUR_SCHAR(cur
, l
);
2019 return(xmlStrndup(buf
, len
));
2024 * @ctxt: an XML parser context
2026 * parse an XML Nmtoken.
2028 * [7] Nmtoken ::= (NameChar)+
2030 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2032 * Returns the Nmtoken parsed or NULL
2036 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
2037 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2045 while ((IS_LETTER(c
)) || (IS_DIGIT(c
)) || /* test bigtoken.xml */
2046 (c
== '.') || (c
== '-') ||
2047 (c
== '_') || (c
== ':') ||
2048 (IS_COMBINING(c
)) ||
2050 if (count
++ > 100) {
2054 COPY_BUF(l
,buf
,len
,c
);
2057 if (len
>= XML_MAX_NAMELEN
) {
2059 * Okay someone managed to make a huge token, so he's ready to pay
2060 * for the processing speed.
2065 buffer
= (xmlChar
*) xmlMalloc(max
* sizeof(xmlChar
));
2066 if (buffer
== NULL
) {
2067 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2068 ctxt
->sax
->error(ctxt
->userData
,
2069 "xmlParseNmtoken: out of memory\n");
2072 memcpy(buffer
, buf
, len
);
2073 while ((IS_LETTER(c
)) || (IS_DIGIT(c
)) || /* test bigtoken.xml */
2074 (c
== '.') || (c
== '-') ||
2075 (c
== '_') || (c
== ':') ||
2076 (IS_COMBINING(c
)) ||
2078 if (count
++ > 100) {
2082 if (len
+ 10 > max
) {
2084 buffer
= (xmlChar
*) xmlRealloc(buffer
,
2085 max
* sizeof(xmlChar
));
2086 if (buffer
== NULL
) {
2087 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2088 ctxt
->sax
->error(ctxt
->userData
,
2089 "xmlParseNmtoken: out of memory\n");
2093 COPY_BUF(l
,buffer
,len
,c
);
2103 return(xmlStrndup(buf
, len
));
2107 * xmlParseEntityValue:
2108 * @ctxt: an XML parser context
2109 * @orig: if non-NULL store a copy of the original entity value
2111 * parse a value for ENTITY declarations
2113 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2114 * "'" ([^%&'] | PEReference | Reference)* "'"
2116 * Returns the EntityValue parsed with reference substituted or NULL
2120 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
2121 xmlChar
*buf
= NULL
;
2123 int size
= XML_PARSER_BUFFER_SIZE
;
2126 xmlChar
*ret
= NULL
;
2127 const xmlChar
*cur
= NULL
;
2128 xmlParserInputPtr input
;
2130 if (RAW
== '"') stop
= '"';
2131 else if (RAW
== '\'') stop
= '\'';
2133 ctxt
->errNo
= XML_ERR_ENTITY_NOT_STARTED
;
2134 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2135 ctxt
->sax
->error(ctxt
->userData
, "EntityValue: \" or ' expected\n");
2136 ctxt
->wellFormed
= 0;
2137 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2140 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
2142 xmlGenericError(xmlGenericErrorContext
,
2143 "malloc of %d byte failed\n", size
);
2148 * The content of the entity definition is copied in a buffer.
2151 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
2152 input
= ctxt
->input
;
2157 * NOTE: 4.4.5 Included in Literal
2158 * When a parameter entity reference appears in a literal entity
2159 * value, ... a single or double quote character in the replacement
2160 * text is always treated as a normal data character and will not
2161 * terminate the literal.
2162 * In practice it means we stop the loop only when back at parsing
2163 * the initial entity and the quote is found
2165 while ((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
2166 (ctxt
->input
!= input
))) {
2167 if (len
+ 5 >= size
) {
2169 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
2171 xmlGenericError(xmlGenericErrorContext
,
2172 "realloc of %d byte failed\n", size
);
2176 COPY_BUF(l
,buf
,len
,c
);
2179 * Pop-up of finished entities.
2181 while ((RAW
== 0) && (ctxt
->inputNr
> 1)) /* non input consuming */
2194 * Raise problem w.r.t. '&' and '%' being used in non-entities
2195 * reference constructs. Note Charref will be handled in
2196 * xmlStringDecodeEntities()
2199 while (*cur
!= 0) { /* non input consuming */
2200 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
2205 name
= xmlParseStringName(ctxt
, &cur
);
2206 if ((name
== NULL
) || (*cur
!= ';')) {
2207 ctxt
->errNo
= XML_ERR_ENTITY_CHAR_ERROR
;
2208 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2209 ctxt
->sax
->error(ctxt
->userData
,
2210 "EntityValue: '%c' forbidden except for entities references\n",
2212 ctxt
->wellFormed
= 0;
2213 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2215 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
2216 (ctxt
->inputNr
== 1)) {
2217 ctxt
->errNo
= XML_ERR_ENTITY_PE_INTERNAL
;
2218 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2219 ctxt
->sax
->error(ctxt
->userData
,
2220 "EntityValue: PEReferences forbidden in internal subset\n",
2222 ctxt
->wellFormed
= 0;
2223 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2232 * Then PEReference entities are substituted.
2235 ctxt
->errNo
= XML_ERR_ENTITY_NOT_FINISHED
;
2236 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2237 ctxt
->sax
->error(ctxt
->userData
, "EntityValue: \" expected\n");
2238 ctxt
->wellFormed
= 0;
2239 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2244 * NOTE: 4.4.7 Bypassed
2245 * When a general entity reference appears in the EntityValue in
2246 * an entity declaration, it is bypassed and left as is.
2247 * so XML_SUBSTITUTE_REF is not set here.
2249 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
2262 * @ctxt: an XML parser context
2264 * parse a value for an attribute
2265 * Note: the parser won't do substitution of entities here, this
2266 * will be handled later in xmlStringGetNodeList
2268 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2269 * "'" ([^<&'] | Reference)* "'"
2271 * 3.3.3 Attribute-Value Normalization:
2272 * Before the value of an attribute is passed to the application or
2273 * checked for validity, the XML processor must normalize it as follows:
2274 * - a character reference is processed by appending the referenced
2275 * character to the attribute value
2276 * - an entity reference is processed by recursively processing the
2277 * replacement text of the entity
2278 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2279 * appending #x20 to the normalized value, except that only a single
2280 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2281 * parsed entity or the literal entity value of an internal parsed entity
2282 * - other characters are processed by appending them to the normalized value
2283 * If the declared value is not CDATA, then the XML processor must further
2284 * process the normalized attribute value by discarding any leading and
2285 * trailing space (#x20) characters, and by replacing sequences of space
2286 * (#x20) characters by a single space (#x20) character.
2287 * All attributes for which no declaration has been read should be treated
2288 * by a non-validating parser as if declared CDATA.
2290 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2294 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
);
2297 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
2299 const xmlChar
*in
= NULL
;
2300 xmlChar
*ret
= NULL
;
2303 in
= (xmlChar
*) CUR_PTR
;
2304 if (*in
!= '"' && *in
!= '\'') {
2305 ctxt
->errNo
= XML_ERR_ATTRIBUTE_NOT_STARTED
;
2306 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2307 ctxt
->sax
->error(ctxt
->userData
, "AttValue: \" or ' expected\n");
2308 ctxt
->wellFormed
= 0;
2309 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2312 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
2316 while (*in
!= limit
&& *in
>= 0x20 && *in
<= 0x7f &&
2317 *in
!= '&' && *in
!= '<'
2322 return xmlParseAttValueComplex(ctxt
);
2325 ret
= xmlStrndup (CUR_PTR
+ 1, in
- CUR_PTR
- 2);
2331 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
) {
2333 xmlChar
*buf
= NULL
;
2337 xmlChar
*current
= NULL
;
2342 if (NXT(0) == '"') {
2343 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
2346 } else if (NXT(0) == '\'') {
2348 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
2351 ctxt
->errNo
= XML_ERR_ATTRIBUTE_NOT_STARTED
;
2352 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2353 ctxt
->sax
->error(ctxt
->userData
, "AttValue: \" or ' expected\n");
2354 ctxt
->wellFormed
= 0;
2355 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2360 * allocate a translation buffer.
2362 buf_size
= XML_PARSER_BUFFER_SIZE
;
2363 buf
= (xmlChar
*) xmlMalloc(buf_size
* sizeof(xmlChar
));
2365 xmlGenericError(xmlGenericErrorContext
,
2366 "xmlParseAttValue: malloc failed");
2371 * OK loop until we reach one of the ending char or a size limit.
2374 while ((NXT(0) != limit
) && /* checked */
2378 if (NXT(1) == '#') {
2379 int val
= xmlParseCharRef(ctxt
);
2381 if (ctxt
->replaceEntities
) {
2382 if (len
> buf_size
- 10) {
2388 * The reparsing will be done in xmlStringGetNodeList()
2389 * called by the attribute() function in SAX.c
2391 static xmlChar buffer
[6] = "&";
2393 if (len
> buf_size
- 10) {
2396 current
= &buffer
[0];
2397 while (*current
!= 0) { /* non input consuming */
2398 buf
[len
++] = *current
++;
2402 if (len
> buf_size
- 10) {
2405 len
+= xmlCopyChar(0, &buf
[len
], val
);
2408 ent
= xmlParseEntityRef(ctxt
);
2409 if ((ent
!= NULL
) &&
2410 (ctxt
->replaceEntities
!= 0)) {
2413 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
2414 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
2415 XML_SUBSTITUTE_REF
, 0, 0, 0);
2418 while (*current
!= 0) { /* non input consuming */
2419 buf
[len
++] = *current
++;
2420 if (len
> buf_size
- 10) {
2427 if (len
> buf_size
- 10) {
2430 if (ent
->content
!= NULL
)
2431 buf
[len
++] = ent
->content
[0];
2433 } else if (ent
!= NULL
) {
2434 int i
= xmlStrlen(ent
->name
);
2435 const xmlChar
*cur
= ent
->name
;
2438 * This may look absurd but is needed to detect
2441 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
2442 (ent
->content
!= NULL
)) {
2444 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
2445 XML_SUBSTITUTE_REF
, 0, 0, 0);
2451 * Just output the reference
2454 if (len
> buf_size
- i
- 10) {
2458 buf
[len
++] = *cur
++;
2463 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
2464 COPY_BUF(l
,buf
,len
,0x20);
2465 if (len
> buf_size
- 10) {
2469 COPY_BUF(l
,buf
,len
,c
);
2470 if (len
> buf_size
- 10) {
2481 ctxt
->errNo
= XML_ERR_LT_IN_ATTRIBUTE
;
2482 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2483 ctxt
->sax
->error(ctxt
->userData
,
2484 "Unescaped '<' not allowed in attributes values\n");
2485 ctxt
->wellFormed
= 0;
2486 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2487 } else if (RAW
!= limit
) {
2488 ctxt
->errNo
= XML_ERR_ATTRIBUTE_NOT_FINISHED
;
2489 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2490 ctxt
->sax
->error(ctxt
->userData
, "AttValue: ' expected\n");
2491 ctxt
->wellFormed
= 0;
2492 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2499 * xmlParseSystemLiteral:
2500 * @ctxt: an XML parser context
2502 * parse an XML Literal
2504 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2506 * Returns the SystemLiteral parsed or NULL
2510 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
2511 xmlChar
*buf
= NULL
;
2513 int size
= XML_PARSER_BUFFER_SIZE
;
2516 int state
= ctxt
->instate
;
2523 } else if (RAW
== '\'') {
2527 ctxt
->errNo
= XML_ERR_LITERAL_NOT_STARTED
;
2528 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2529 ctxt
->sax
->error(ctxt
->userData
,
2530 "SystemLiteral \" or ' expected\n");
2531 ctxt
->wellFormed
= 0;
2532 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2536 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
2538 xmlGenericError(xmlGenericErrorContext
,
2539 "malloc of %d byte failed\n", size
);
2542 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
2544 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
2545 if (len
+ 5 >= size
) {
2547 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
2549 xmlGenericError(xmlGenericErrorContext
,
2550 "realloc of %d byte failed\n", size
);
2551 ctxt
->instate
= (xmlParserInputState
) state
;
2560 COPY_BUF(l
,buf
,len
,cur
);
2570 ctxt
->instate
= (xmlParserInputState
) state
;
2571 if (!IS_CHAR(cur
)) {
2572 ctxt
->errNo
= XML_ERR_LITERAL_NOT_FINISHED
;
2573 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2574 ctxt
->sax
->error(ctxt
->userData
, "Unfinished SystemLiteral\n");
2575 ctxt
->wellFormed
= 0;
2576 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2584 * xmlParsePubidLiteral:
2585 * @ctxt: an XML parser context
2587 * parse an XML public literal
2589 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2591 * Returns the PubidLiteral parsed or NULL.
2595 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
2596 xmlChar
*buf
= NULL
;
2598 int size
= XML_PARSER_BUFFER_SIZE
;
2602 xmlParserInputState oldstate
= ctxt
->instate
;
2608 } else if (RAW
== '\'') {
2612 ctxt
->errNo
= XML_ERR_LITERAL_NOT_STARTED
;
2613 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2614 ctxt
->sax
->error(ctxt
->userData
,
2615 "SystemLiteral \" or ' expected\n");
2616 ctxt
->wellFormed
= 0;
2617 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2620 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
2622 xmlGenericError(xmlGenericErrorContext
,
2623 "malloc of %d byte failed\n", size
);
2626 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
2628 while ((IS_PUBIDCHAR(cur
)) && (cur
!= stop
)) { /* checked */
2629 if (len
+ 1 >= size
) {
2631 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
2633 xmlGenericError(xmlGenericErrorContext
,
2634 "realloc of %d byte failed\n", size
);
2654 ctxt
->errNo
= XML_ERR_LITERAL_NOT_FINISHED
;
2655 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2656 ctxt
->sax
->error(ctxt
->userData
, "Unfinished PubidLiteral\n");
2657 ctxt
->wellFormed
= 0;
2658 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2662 ctxt
->instate
= oldstate
;
2666 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
2669 * @ctxt: an XML parser context
2670 * @cdata: int indicating whether we are within a CDATA section
2672 * parse a CharData section.
2673 * if we are within a CDATA section ']]>' marks an end of section.
2675 * The right angle bracket (>) may be represented using the string ">",
2676 * and must, for compatibility, be escaped using ">" or a character
2677 * reference when it appears in the string "]]>" in content, when that
2678 * string is not marking the end of a CDATA section.
2680 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2684 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
2687 int line
= ctxt
->input
->line
;
2688 int col
= ctxt
->input
->col
;
2693 * Accelerated common case where input don't need to be
2694 * modified before passing it to the handler.
2697 in
= ctxt
->input
->cur
;
2700 while (((*in
>= 0x20) && (*in
!= '<') && (*in
!= ']') &&
2701 (*in
!= '&') && (*in
<= 0x7F)) || (*in
== 0x09))
2704 ctxt
->input
->line
++;
2706 while (*in
== 0xA) {
2707 ctxt
->input
->line
++;
2713 if ((in
[1] == ']') && (in
[2] == '>')) {
2714 ctxt
->errNo
= XML_ERR_MISPLACED_CDATA_END
;
2715 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2716 ctxt
->sax
->error(ctxt
->userData
,
2717 "Sequence ']]>' not allowed in content\n");
2718 ctxt
->input
->cur
= in
;
2719 ctxt
->wellFormed
= 0;
2720 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2726 nbchar
= in
- ctxt
->input
->cur
;
2728 if (IS_BLANK(*ctxt
->input
->cur
)) {
2729 const xmlChar
*tmp
= ctxt
->input
->cur
;
2730 ctxt
->input
->cur
= in
;
2731 if (areBlanks(ctxt
, tmp
, nbchar
)) {
2732 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
2733 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
2736 if (ctxt
->sax
->characters
!= NULL
)
2737 ctxt
->sax
->characters(ctxt
->userData
,
2740 line
= ctxt
->input
->line
;
2741 col
= ctxt
->input
->col
;
2743 if (ctxt
->sax
->characters
!= NULL
)
2744 ctxt
->sax
->characters(ctxt
->userData
,
2745 ctxt
->input
->cur
, nbchar
);
2746 line
= ctxt
->input
->line
;
2747 col
= ctxt
->input
->col
;
2750 ctxt
->input
->cur
= in
;
2754 ctxt
->input
->cur
= in
;
2756 ctxt
->input
->line
++;
2757 continue; /* while */
2769 in
= ctxt
->input
->cur
;
2770 } while ((*in
>= 0x20) && (*in
<= 0x7F));
2773 ctxt
->input
->line
= line
;
2774 ctxt
->input
->col
= col
;
2775 xmlParseCharDataComplex(ctxt
, cdata
);
2779 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
2780 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
2788 while ((cur
!= '<') && /* checked */
2790 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
2791 if ((cur
== ']') && (NXT(1) == ']') &&
2795 ctxt
->errNo
= XML_ERR_MISPLACED_CDATA_END
;
2796 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2797 ctxt
->sax
->error(ctxt
->userData
,
2798 "Sequence ']]>' not allowed in content\n");
2799 /* Should this be relaxed ??? I see a "must here */
2800 ctxt
->wellFormed
= 0;
2801 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2804 COPY_BUF(l
,buf
,nbchar
,cur
);
2805 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
2807 * OK the segment is to be consumed as chars.
2809 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
2810 if (areBlanks(ctxt
, buf
, nbchar
)) {
2811 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
2812 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
2815 if (ctxt
->sax
->characters
!= NULL
)
2816 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
2831 * OK the segment is to be consumed as chars.
2833 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
2834 if (areBlanks(ctxt
, buf
, nbchar
)) {
2835 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
2836 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
2838 if (ctxt
->sax
->characters
!= NULL
)
2839 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
2846 * xmlParseExternalID:
2847 * @ctxt: an XML parser context
2848 * @publicID: a xmlChar** receiving PubidLiteral
2849 * @strict: indicate whether we should restrict parsing to only
2850 * production [75], see NOTE below
2852 * Parse an External ID or a Public ID
2854 * NOTE: Productions [75] and [83] interact badly since [75] can generate
2855 * 'PUBLIC' S PubidLiteral S SystemLiteral
2857 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2858 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2860 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2862 * Returns the function returns SystemLiteral and in the second
2863 * case publicID receives PubidLiteral, is strict is off
2864 * it is possible to return NULL and have publicID set.
2868 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
2869 xmlChar
*URI
= NULL
;
2874 if ((RAW
== 'S') && (NXT(1) == 'Y') &&
2875 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2876 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2878 if (!IS_BLANK(CUR
)) {
2879 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
2880 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2881 ctxt
->sax
->error(ctxt
->userData
,
2882 "Space required after 'SYSTEM'\n");
2883 ctxt
->wellFormed
= 0;
2884 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2887 URI
= xmlParseSystemLiteral(ctxt
);
2889 ctxt
->errNo
= XML_ERR_URI_REQUIRED
;
2890 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2891 ctxt
->sax
->error(ctxt
->userData
,
2892 "xmlParseExternalID: SYSTEM, no URI\n");
2893 ctxt
->wellFormed
= 0;
2894 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2896 } else if ((RAW
== 'P') && (NXT(1) == 'U') &&
2897 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2898 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2900 if (!IS_BLANK(CUR
)) {
2901 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
2902 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2903 ctxt
->sax
->error(ctxt
->userData
,
2904 "Space required after 'PUBLIC'\n");
2905 ctxt
->wellFormed
= 0;
2906 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2909 *publicID
= xmlParsePubidLiteral(ctxt
);
2910 if (*publicID
== NULL
) {
2911 ctxt
->errNo
= XML_ERR_PUBID_REQUIRED
;
2912 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2913 ctxt
->sax
->error(ctxt
->userData
,
2914 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2915 ctxt
->wellFormed
= 0;
2916 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2920 * We don't handle [83] so "S SystemLiteral" is required.
2922 if (!IS_BLANK(CUR
)) {
2923 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
2924 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2925 ctxt
->sax
->error(ctxt
->userData
,
2926 "Space required after the Public Identifier\n");
2927 ctxt
->wellFormed
= 0;
2928 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2932 * We handle [83] so we return immediately, if
2933 * "S SystemLiteral" is not detected. From a purely parsing
2934 * point of view that's a nice mess.
2940 if (!IS_BLANK(*ptr
)) return(NULL
);
2942 while (IS_BLANK(*ptr
)) ptr
++; /* TODO: dangerous, fix ! */
2943 if ((*ptr
!= '\'') && (*ptr
!= '"')) return(NULL
);
2946 URI
= xmlParseSystemLiteral(ctxt
);
2948 ctxt
->errNo
= XML_ERR_URI_REQUIRED
;
2949 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2950 ctxt
->sax
->error(ctxt
->userData
,
2951 "xmlParseExternalID: PUBLIC, no URI\n");
2952 ctxt
->wellFormed
= 0;
2953 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2961 * @ctxt: an XML parser context
2963 * Skip an XML (SGML) comment <!-- .... -->
2964 * The spec says that "For compatibility, the string "--" (double-hyphen)
2965 * must not occur within comments. "
2967 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2970 xmlParseComment(xmlParserCtxtPtr ctxt
) {
2971 xmlChar
*buf
= NULL
;
2973 int size
= XML_PARSER_BUFFER_SIZE
;
2977 xmlParserInputState state
;
2978 xmlParserInputPtr input
= ctxt
->input
;
2982 * Check that there is a comment right here.
2984 if ((RAW
!= '<') || (NXT(1) != '!') ||
2985 (NXT(2) != '-') || (NXT(3) != '-')) return;
2987 state
= ctxt
->instate
;
2988 ctxt
->instate
= XML_PARSER_COMMENT
;
2991 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
2993 xmlGenericError(xmlGenericErrorContext
,
2994 "malloc of %d byte failed\n", size
);
2995 ctxt
->instate
= state
;
3004 while (IS_CHAR(cur
) && /* checked */
3006 (r
!= '-') || (q
!= '-'))) {
3007 if ((r
== '-') && (q
== '-')) {
3008 ctxt
->errNo
= XML_ERR_HYPHEN_IN_COMMENT
;
3009 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3010 ctxt
->sax
->error(ctxt
->userData
,
3011 "Comment must not contain '--' (double-hyphen)`\n");
3012 ctxt
->wellFormed
= 0;
3013 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3015 if (len
+ 5 >= size
) {
3017 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3019 xmlGenericError(xmlGenericErrorContext
,
3020 "realloc of %d byte failed\n", size
);
3021 ctxt
->instate
= state
;
3025 COPY_BUF(ql
,buf
,len
,q
);
3045 if (!IS_CHAR(cur
)) {
3046 ctxt
->errNo
= XML_ERR_COMMENT_NOT_FINISHED
;
3047 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3048 ctxt
->sax
->error(ctxt
->userData
,
3049 "Comment not terminated \n<!--%.50s\n", buf
);
3050 ctxt
->wellFormed
= 0;
3051 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3054 if (input
!= ctxt
->input
) {
3055 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
3056 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3057 ctxt
->sax
->error(ctxt
->userData
,
3058 "Comment doesn't start and stop in the same entity\n");
3059 ctxt
->wellFormed
= 0;
3060 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3063 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
3064 (!ctxt
->disableSAX
))
3065 ctxt
->sax
->comment(ctxt
->userData
, buf
);
3068 ctxt
->instate
= state
;
3073 * @ctxt: an XML parser context
3075 * parse the name of a PI
3077 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3079 * Returns the PITarget name or NULL
3083 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
3086 name
= xmlParseName(ctxt
);
3087 if ((name
!= NULL
) &&
3088 ((name
[0] == 'x') || (name
[0] == 'X')) &&
3089 ((name
[1] == 'm') || (name
[1] == 'M')) &&
3090 ((name
[2] == 'l') || (name
[2] == 'L'))) {
3092 if ((name
[0] == 'x') && (name
[1] == 'm') &&
3093 (name
[2] == 'l') && (name
[3] == 0)) {
3094 ctxt
->errNo
= XML_ERR_RESERVED_XML_NAME
;
3095 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3096 ctxt
->sax
->error(ctxt
->userData
,
3097 "XML declaration allowed only at the start of the document\n");
3098 ctxt
->wellFormed
= 0;
3099 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3101 } else if (name
[3] == 0) {
3102 ctxt
->errNo
= XML_ERR_RESERVED_XML_NAME
;
3103 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3104 ctxt
->sax
->error(ctxt
->userData
, "Invalid PI name\n");
3105 ctxt
->wellFormed
= 0;
3106 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3110 if (xmlW3CPIs
[i
] == NULL
) break;
3111 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
3114 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
)) {
3115 ctxt
->errNo
= XML_ERR_RESERVED_XML_NAME
;
3116 ctxt
->sax
->warning(ctxt
->userData
,
3117 "xmlParsePITarget: invalid name prefix 'xml'\n");
3123 #ifdef LIBXML_CATALOG_ENABLED
3125 * xmlParseCatalogPI:
3126 * @ctxt: an XML parser context
3127 * @catalog: the PI value string
3129 * parse an XML Catalog Processing Instruction.
3131 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3133 * Occurs only if allowed by the user and if happening in the Misc
3134 * part of the document before any doctype informations
3135 * This will add the given catalog to the parsing context in order
3136 * to be used if there is a resolution need further down in the document
3140 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
3141 xmlChar
*URL
= NULL
;
3142 const xmlChar
*tmp
, *base
;
3146 while (IS_BLANK(*tmp
)) tmp
++;
3147 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
3150 while (IS_BLANK(*tmp
)) tmp
++;
3155 while (IS_BLANK(*tmp
)) tmp
++;
3157 if ((marker
!= '\'') && (marker
!= '"'))
3161 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
3164 URL
= xmlStrndup(base
, tmp
- base
);
3166 while (IS_BLANK(*tmp
)) tmp
++;
3171 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
3177 ctxt
->errNo
= XML_WAR_CATALOG_PI
;
3178 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
3179 ctxt
->sax
->warning(ctxt
->userData
,
3180 "Catalog PI syntax error: %s\n", catalog
);
3188 * @ctxt: an XML parser context
3190 * parse an XML Processing Instruction.
3192 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3194 * The processing is transfered to SAX once parsed.
3198 xmlParsePI(xmlParserCtxtPtr ctxt
) {
3199 xmlChar
*buf
= NULL
;
3201 int size
= XML_PARSER_BUFFER_SIZE
;
3204 xmlParserInputState state
;
3207 if ((RAW
== '<') && (NXT(1) == '?')) {
3208 xmlParserInputPtr input
= ctxt
->input
;
3209 state
= ctxt
->instate
;
3210 ctxt
->instate
= XML_PARSER_PI
;
3212 * this is a Processing Instruction.
3218 * Parse the target name and check for special support like
3221 target
= xmlParsePITarget(ctxt
);
3222 if (target
!= NULL
) {
3223 if ((RAW
== '?') && (NXT(1) == '>')) {
3224 if (input
!= ctxt
->input
) {
3225 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
3226 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3227 ctxt
->sax
->error(ctxt
->userData
,
3228 "PI declaration doesn't start and stop in the same entity\n");
3229 ctxt
->wellFormed
= 0;
3230 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3237 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
3238 (ctxt
->sax
->processingInstruction
!= NULL
))
3239 ctxt
->sax
->processingInstruction(ctxt
->userData
,
3241 ctxt
->instate
= state
;
3245 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
3247 xmlGenericError(xmlGenericErrorContext
,
3248 "malloc of %d byte failed\n", size
);
3249 ctxt
->instate
= state
;
3253 if (!IS_BLANK(cur
)) {
3254 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3255 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3256 ctxt
->sax
->error(ctxt
->userData
,
3257 "xmlParsePI: PI %s space expected\n", target
);
3258 ctxt
->wellFormed
= 0;
3259 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3263 while (IS_CHAR(cur
) && /* checked */
3264 ((cur
!= '?') || (NXT(1) != '>'))) {
3265 if (len
+ 5 >= size
) {
3267 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3269 xmlGenericError(xmlGenericErrorContext
,
3270 "realloc of %d byte failed\n", size
);
3271 ctxt
->instate
= state
;
3280 COPY_BUF(l
,buf
,len
,cur
);
3291 ctxt
->errNo
= XML_ERR_PI_NOT_FINISHED
;
3292 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3293 ctxt
->sax
->error(ctxt
->userData
,
3294 "xmlParsePI: PI %s never end ...\n", target
);
3295 ctxt
->wellFormed
= 0;
3296 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3298 if (input
!= ctxt
->input
) {
3299 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
3300 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3301 ctxt
->sax
->error(ctxt
->userData
,
3302 "PI declaration doesn't start and stop in the same entity\n");
3303 ctxt
->wellFormed
= 0;
3304 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3308 #ifdef LIBXML_CATALOG_ENABLED
3309 if (((state
== XML_PARSER_MISC
) ||
3310 (state
== XML_PARSER_START
)) &&
3311 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
3312 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
3313 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
3314 (allow
== XML_CATA_ALLOW_ALL
))
3315 xmlParseCatalogPI(ctxt
, buf
);
3323 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
3324 (ctxt
->sax
->processingInstruction
!= NULL
))
3325 ctxt
->sax
->processingInstruction(ctxt
->userData
,
3331 ctxt
->errNo
= XML_ERR_PI_NOT_STARTED
;
3332 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3333 ctxt
->sax
->error(ctxt
->userData
,
3334 "xmlParsePI : no target name\n");
3335 ctxt
->wellFormed
= 0;
3336 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3338 ctxt
->instate
= state
;
3343 * xmlParseNotationDecl:
3344 * @ctxt: an XML parser context
3346 * parse a notation declaration
3348 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3350 * Hence there is actually 3 choices:
3351 * 'PUBLIC' S PubidLiteral
3352 * 'PUBLIC' S PubidLiteral S SystemLiteral
3353 * and 'SYSTEM' S SystemLiteral
3355 * See the NOTE on xmlParseExternalID().
3359 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
3364 if ((RAW
== '<') && (NXT(1) == '!') &&
3365 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3366 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3367 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3368 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3369 xmlParserInputPtr input
= ctxt
->input
;
3372 if (!IS_BLANK(CUR
)) {
3373 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3374 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3375 ctxt
->sax
->error(ctxt
->userData
,
3376 "Space required after '<!NOTATION'\n");
3377 ctxt
->wellFormed
= 0;
3378 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3383 name
= xmlParseName(ctxt
);
3385 ctxt
->errNo
= XML_ERR_NOTATION_NOT_STARTED
;
3386 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3387 ctxt
->sax
->error(ctxt
->userData
,
3388 "NOTATION: Name expected here\n");
3389 ctxt
->wellFormed
= 0;
3390 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3393 if (!IS_BLANK(CUR
)) {
3394 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3395 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3396 ctxt
->sax
->error(ctxt
->userData
,
3397 "Space required after the NOTATION name'\n");
3398 ctxt
->wellFormed
= 0;
3399 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3407 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
3411 if (input
!= ctxt
->input
) {
3412 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
3413 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3414 ctxt
->sax
->error(ctxt
->userData
,
3415 "Notation declaration doesn't start and stop in the same entity\n");
3416 ctxt
->wellFormed
= 0;
3417 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3420 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
3421 (ctxt
->sax
->notationDecl
!= NULL
))
3422 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
3424 ctxt
->errNo
= XML_ERR_NOTATION_NOT_FINISHED
;
3425 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3426 ctxt
->sax
->error(ctxt
->userData
,
3427 "'>' required to close NOTATION declaration\n");
3428 ctxt
->wellFormed
= 0;
3429 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3432 if (Systemid
!= NULL
) xmlFree(Systemid
);
3433 if (Pubid
!= NULL
) xmlFree(Pubid
);
3438 * xmlParseEntityDecl:
3439 * @ctxt: an XML parser context
3441 * parse <!ENTITY declarations
3443 * [70] EntityDecl ::= GEDecl | PEDecl
3445 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3447 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3449 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3451 * [74] PEDef ::= EntityValue | ExternalID
3453 * [76] NDataDecl ::= S 'NDATA' S Name
3455 * [ VC: Notation Declared ]
3456 * The Name must match the declared name of a notation.
3460 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
3461 xmlChar
*name
= NULL
;
3462 xmlChar
*value
= NULL
;
3463 xmlChar
*URI
= NULL
, *literal
= NULL
;
3464 xmlChar
*ndata
= NULL
;
3465 int isParameter
= 0;
3466 xmlChar
*orig
= NULL
;
3470 if ((RAW
== '<') && (NXT(1) == '!') &&
3471 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3472 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3473 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3474 xmlParserInputPtr input
= ctxt
->input
;
3477 skipped
= SKIP_BLANKS
;
3479 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3480 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3481 ctxt
->sax
->error(ctxt
->userData
,
3482 "Space required after '<!ENTITY'\n");
3483 ctxt
->wellFormed
= 0;
3484 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3489 skipped
= SKIP_BLANKS
;
3491 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3492 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3493 ctxt
->sax
->error(ctxt
->userData
,
3494 "Space required after '%'\n");
3495 ctxt
->wellFormed
= 0;
3496 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3501 name
= xmlParseName(ctxt
);
3503 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
3504 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3505 ctxt
->sax
->error(ctxt
->userData
, "xmlParseEntityDecl: no name\n");
3506 ctxt
->wellFormed
= 0;
3507 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3510 skipped
= SKIP_BLANKS
;
3512 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3513 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3514 ctxt
->sax
->error(ctxt
->userData
,
3515 "Space required after the entity name\n");
3516 ctxt
->wellFormed
= 0;
3517 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3520 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
3522 * handle the various case of definitions...
3525 if ((RAW
== '"') || (RAW
== '\'')) {
3526 value
= xmlParseEntityValue(ctxt
, &orig
);
3528 if ((ctxt
->sax
!= NULL
) &&
3529 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
3530 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
3531 XML_INTERNAL_PARAMETER_ENTITY
,
3535 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
3536 if ((URI
== NULL
) && (literal
== NULL
)) {
3537 ctxt
->errNo
= XML_ERR_VALUE_REQUIRED
;
3538 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3539 ctxt
->sax
->error(ctxt
->userData
,
3540 "Entity value required\n");
3541 ctxt
->wellFormed
= 0;
3542 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3547 uri
= xmlParseURI((const char *) URI
);
3549 ctxt
->errNo
= XML_ERR_INVALID_URI
;
3550 if ((ctxt
->sax
!= NULL
) &&
3551 (!ctxt
->disableSAX
) &&
3552 (ctxt
->sax
->error
!= NULL
))
3553 ctxt
->sax
->error(ctxt
->userData
,
3554 "Invalid URI: %s\n", URI
);
3556 * This really ought to be a well formedness error
3557 * but the XML Core WG decided otherwise c.f. issue
3558 * E26 of the XML erratas.
3561 if (uri
->fragment
!= NULL
) {
3562 ctxt
->errNo
= XML_ERR_URI_FRAGMENT
;
3563 if ((ctxt
->sax
!= NULL
) &&
3564 (!ctxt
->disableSAX
) &&
3565 (ctxt
->sax
->error
!= NULL
))
3566 ctxt
->sax
->error(ctxt
->userData
,
3567 "Fragment not allowed: %s\n", URI
);
3569 * Okay this is foolish to block those but not
3572 ctxt
->wellFormed
= 0;
3574 if ((ctxt
->sax
!= NULL
) &&
3575 (!ctxt
->disableSAX
) &&
3576 (ctxt
->sax
->entityDecl
!= NULL
))
3577 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
3578 XML_EXTERNAL_PARAMETER_ENTITY
,
3579 literal
, URI
, NULL
);
3586 if ((RAW
== '"') || (RAW
== '\'')) {
3587 value
= xmlParseEntityValue(ctxt
, &orig
);
3588 if ((ctxt
->sax
!= NULL
) &&
3589 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
3590 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
3591 XML_INTERNAL_GENERAL_ENTITY
,
3594 * For expat compatibility in SAX mode.
3596 if ((ctxt
->myDoc
== NULL
) ||
3597 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
3598 if (ctxt
->myDoc
== NULL
) {
3599 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
3601 if (ctxt
->myDoc
->intSubset
== NULL
)
3602 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
3603 BAD_CAST
"fake", NULL
, NULL
);
3605 entityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
3609 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
3610 if ((URI
== NULL
) && (literal
== NULL
)) {
3611 ctxt
->errNo
= XML_ERR_VALUE_REQUIRED
;
3612 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3613 ctxt
->sax
->error(ctxt
->userData
,
3614 "Entity value required\n");
3615 ctxt
->wellFormed
= 0;
3616 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3621 uri
= xmlParseURI((const char *)URI
);
3623 ctxt
->errNo
= XML_ERR_INVALID_URI
;
3624 if ((ctxt
->sax
!= NULL
) &&
3625 (!ctxt
->disableSAX
) &&
3626 (ctxt
->sax
->error
!= NULL
))
3627 ctxt
->sax
->error(ctxt
->userData
,
3628 "Invalid URI: %s\n", URI
);
3630 * This really ought to be a well formedness error
3631 * but the XML Core WG decided otherwise c.f. issue
3632 * E26 of the XML erratas.
3635 if (uri
->fragment
!= NULL
) {
3636 ctxt
->errNo
= XML_ERR_URI_FRAGMENT
;
3637 if ((ctxt
->sax
!= NULL
) &&
3638 (!ctxt
->disableSAX
) &&
3639 (ctxt
->sax
->error
!= NULL
))
3640 ctxt
->sax
->error(ctxt
->userData
,
3641 "Fragment not allowed: %s\n", URI
);
3643 * Okay this is foolish to block those but not
3646 ctxt
->wellFormed
= 0;
3651 if ((RAW
!= '>') && (!IS_BLANK(CUR
))) {
3652 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3653 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3654 ctxt
->sax
->error(ctxt
->userData
,
3655 "Space required before 'NDATA'\n");
3656 ctxt
->wellFormed
= 0;
3657 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3660 if ((RAW
== 'N') && (NXT(1) == 'D') &&
3661 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3664 if (!IS_BLANK(CUR
)) {
3665 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3666 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3667 ctxt
->sax
->error(ctxt
->userData
,
3668 "Space required after 'NDATA'\n");
3669 ctxt
->wellFormed
= 0;
3670 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3673 ndata
= xmlParseName(ctxt
);
3674 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
3675 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
3676 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
3677 literal
, URI
, ndata
);
3679 if ((ctxt
->sax
!= NULL
) &&
3680 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
3681 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
3682 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
3683 literal
, URI
, NULL
);
3685 * For expat compatibility in SAX mode.
3686 * assuming the entity repalcement was asked for
3688 if ((ctxt
->replaceEntities
!= 0) &&
3689 ((ctxt
->myDoc
== NULL
) ||
3690 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
3691 if (ctxt
->myDoc
== NULL
) {
3692 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
3695 if (ctxt
->myDoc
->intSubset
== NULL
)
3696 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
3697 BAD_CAST
"fake", NULL
, NULL
);
3698 entityDecl(ctxt
, name
,
3699 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
3700 literal
, URI
, NULL
);
3707 ctxt
->errNo
= XML_ERR_ENTITY_NOT_FINISHED
;
3708 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3709 ctxt
->sax
->error(ctxt
->userData
,
3710 "xmlParseEntityDecl: entity %s not terminated\n", name
);
3711 ctxt
->wellFormed
= 0;
3712 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3714 if (input
!= ctxt
->input
) {
3715 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
3716 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3717 ctxt
->sax
->error(ctxt
->userData
,
3718 "Entity declaration doesn't start and stop in the same entity\n");
3719 ctxt
->wellFormed
= 0;
3720 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3726 * Ugly mechanism to save the raw entity value.
3728 xmlEntityPtr cur
= NULL
;
3731 if ((ctxt
->sax
!= NULL
) &&
3732 (ctxt
->sax
->getParameterEntity
!= NULL
))
3733 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
3735 if ((ctxt
->sax
!= NULL
) &&
3736 (ctxt
->sax
->getEntity
!= NULL
))
3737 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
3738 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
3739 cur
= getEntity(ctxt
, name
);
3743 if (cur
->orig
!= NULL
)
3750 if (name
!= NULL
) xmlFree(name
);
3751 if (value
!= NULL
) xmlFree(value
);
3752 if (URI
!= NULL
) xmlFree(URI
);
3753 if (literal
!= NULL
) xmlFree(literal
);
3754 if (ndata
!= NULL
) xmlFree(ndata
);
3759 * xmlParseDefaultDecl:
3760 * @ctxt: an XML parser context
3761 * @value: Receive a possible fixed default value for the attribute
3763 * Parse an attribute default declaration
3765 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3767 * [ VC: Required Attribute ]
3768 * if the default declaration is the keyword #REQUIRED, then the
3769 * attribute must be specified for all elements of the type in the
3770 * attribute-list declaration.
3772 * [ VC: Attribute Default Legal ]
3773 * The declared default value must meet the lexical constraints of
3774 * the declared attribute type c.f. xmlValidateAttributeDecl()
3776 * [ VC: Fixed Attribute Default ]
3777 * if an attribute has a default value declared with the #FIXED
3778 * keyword, instances of that attribute must match the default value.
3780 * [ WFC: No < in Attribute Values ]
3781 * handled in xmlParseAttValue()
3783 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3784 * or XML_ATTRIBUTE_FIXED.
3788 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
3793 if ((RAW
== '#') && (NXT(1) == 'R') &&
3794 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3795 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3796 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3799 return(XML_ATTRIBUTE_REQUIRED
);
3801 if ((RAW
== '#') && (NXT(1) == 'I') &&
3802 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3803 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3804 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3806 return(XML_ATTRIBUTE_IMPLIED
);
3808 val
= XML_ATTRIBUTE_NONE
;
3809 if ((RAW
== '#') && (NXT(1) == 'F') &&
3810 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3811 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3813 val
= XML_ATTRIBUTE_FIXED
;
3814 if (!IS_BLANK(CUR
)) {
3815 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3816 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3817 ctxt
->sax
->error(ctxt
->userData
,
3818 "Space required after '#FIXED'\n");
3819 ctxt
->wellFormed
= 0;
3820 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3824 ret
= xmlParseAttValue(ctxt
);
3825 ctxt
->instate
= XML_PARSER_DTD
;
3827 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3828 ctxt
->sax
->error(ctxt
->userData
,
3829 "Attribute default value declaration error\n");
3830 ctxt
->wellFormed
= 0;
3831 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3838 * xmlParseNotationType:
3839 * @ctxt: an XML parser context
3841 * parse an Notation attribute type.
3843 * Note: the leading 'NOTATION' S part has already being parsed...
3845 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3847 * [ VC: Notation Attributes ]
3848 * Values of this type must match one of the notation names included
3849 * in the declaration; all notation names in the declaration must be declared.
3851 * Returns: the notation attribute tree built while parsing
3855 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
3857 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
;
3860 ctxt
->errNo
= XML_ERR_NOTATION_NOT_STARTED
;
3861 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3862 ctxt
->sax
->error(ctxt
->userData
,
3863 "'(' required to start 'NOTATION'\n");
3864 ctxt
->wellFormed
= 0;
3865 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3872 name
= xmlParseName(ctxt
);
3874 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
3875 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3876 ctxt
->sax
->error(ctxt
->userData
,
3877 "Name expected in NOTATION declaration\n");
3878 ctxt
->wellFormed
= 0;
3879 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3882 cur
= xmlCreateEnumeration(name
);
3884 if (cur
== NULL
) return(ret
);
3885 if (last
== NULL
) ret
= last
= cur
;
3891 } while (RAW
== '|');
3893 ctxt
->errNo
= XML_ERR_NOTATION_NOT_FINISHED
;
3894 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3895 ctxt
->sax
->error(ctxt
->userData
,
3896 "')' required to finish NOTATION declaration\n");
3897 ctxt
->wellFormed
= 0;
3898 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3899 if ((last
!= NULL
) && (last
!= ret
))
3900 xmlFreeEnumeration(last
);
3908 * xmlParseEnumerationType:
3909 * @ctxt: an XML parser context
3911 * parse an Enumeration attribute type.
3913 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3915 * [ VC: Enumeration ]
3916 * Values of this type must match one of the Nmtoken tokens in
3919 * Returns: the enumeration attribute tree built while parsing
3923 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
3925 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
;
3928 ctxt
->errNo
= XML_ERR_ATTLIST_NOT_STARTED
;
3929 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3930 ctxt
->sax
->error(ctxt
->userData
,
3931 "'(' required to start ATTLIST enumeration\n");
3932 ctxt
->wellFormed
= 0;
3933 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3940 name
= xmlParseNmtoken(ctxt
);
3942 ctxt
->errNo
= XML_ERR_NMTOKEN_REQUIRED
;
3943 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3944 ctxt
->sax
->error(ctxt
->userData
,
3945 "NmToken expected in ATTLIST enumeration\n");
3946 ctxt
->wellFormed
= 0;
3947 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3950 cur
= xmlCreateEnumeration(name
);
3952 if (cur
== NULL
) return(ret
);
3953 if (last
== NULL
) ret
= last
= cur
;
3959 } while (RAW
== '|');
3961 ctxt
->errNo
= XML_ERR_ATTLIST_NOT_FINISHED
;
3962 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3963 ctxt
->sax
->error(ctxt
->userData
,
3964 "')' required to finish ATTLIST enumeration\n");
3965 ctxt
->wellFormed
= 0;
3966 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3974 * xmlParseEnumeratedType:
3975 * @ctxt: an XML parser context
3976 * @tree: the enumeration tree built while parsing
3978 * parse an Enumerated attribute type.
3980 * [57] EnumeratedType ::= NotationType | Enumeration
3982 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3985 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3989 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
3990 if ((RAW
== 'N') && (NXT(1) == 'O') &&
3991 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3992 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3993 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3995 if (!IS_BLANK(CUR
)) {
3996 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
3997 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3998 ctxt
->sax
->error(ctxt
->userData
,
3999 "Space required after 'NOTATION'\n");
4000 ctxt
->wellFormed
= 0;
4001 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4005 *tree
= xmlParseNotationType(ctxt
);
4006 if (*tree
== NULL
) return(0);
4007 return(XML_ATTRIBUTE_NOTATION
);
4009 *tree
= xmlParseEnumerationType(ctxt
);
4010 if (*tree
== NULL
) return(0);
4011 return(XML_ATTRIBUTE_ENUMERATION
);
4015 * xmlParseAttributeType:
4016 * @ctxt: an XML parser context
4017 * @tree: the enumeration tree built while parsing
4019 * parse the Attribute list def for an element
4021 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4023 * [55] StringType ::= 'CDATA'
4025 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4026 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4028 * Validity constraints for attribute values syntax are checked in
4029 * xmlValidateAttributeValue()
4032 * Values of type ID must match the Name production. A name must not
4033 * appear more than once in an XML document as a value of this type;
4034 * i.e., ID values must uniquely identify the elements which bear them.
4036 * [ VC: One ID per Element Type ]
4037 * No element type may have more than one ID attribute specified.
4039 * [ VC: ID Attribute Default ]
4040 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4043 * Values of type IDREF must match the Name production, and values
4044 * of type IDREFS must match Names; each IDREF Name must match the value
4045 * of an ID attribute on some element in the XML document; i.e. IDREF
4046 * values must match the value of some ID attribute.
4048 * [ VC: Entity Name ]
4049 * Values of type ENTITY must match the Name production, values
4050 * of type ENTITIES must match Names; each Entity Name must match the
4051 * name of an unparsed entity declared in the DTD.
4053 * [ VC: Name Token ]
4054 * Values of type NMTOKEN must match the Nmtoken production; values
4055 * of type NMTOKENS must match Nmtokens.
4057 * Returns the attribute type
4060 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
4062 if ((RAW
== 'C') && (NXT(1) == 'D') &&
4063 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4066 return(XML_ATTRIBUTE_CDATA
);
4067 } else if ((RAW
== 'I') && (NXT(1) == 'D') &&
4068 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4069 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4071 return(XML_ATTRIBUTE_IDREFS
);
4072 } else if ((RAW
== 'I') && (NXT(1) == 'D') &&
4073 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4076 return(XML_ATTRIBUTE_IDREF
);
4077 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
4079 return(XML_ATTRIBUTE_ID
);
4080 } else if ((RAW
== 'E') && (NXT(1) == 'N') &&
4081 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4082 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4084 return(XML_ATTRIBUTE_ENTITY
);
4085 } else if ((RAW
== 'E') && (NXT(1) == 'N') &&
4086 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4087 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4088 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4090 return(XML_ATTRIBUTE_ENTITIES
);
4091 } else if ((RAW
== 'N') && (NXT(1) == 'M') &&
4092 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4093 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4094 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4096 return(XML_ATTRIBUTE_NMTOKENS
);
4097 } else if ((RAW
== 'N') && (NXT(1) == 'M') &&
4098 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4099 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4102 return(XML_ATTRIBUTE_NMTOKEN
);
4104 return(xmlParseEnumeratedType(ctxt
, tree
));
4108 * xmlParseAttributeListDecl:
4109 * @ctxt: an XML parser context
4111 * : parse the Attribute list def for an element
4113 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4115 * [53] AttDef ::= S Name S AttType S DefaultDecl
4119 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
4122 xmlEnumerationPtr tree
;
4124 if ((RAW
== '<') && (NXT(1) == '!') &&
4125 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4126 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4127 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4129 xmlParserInputPtr input
= ctxt
->input
;
4132 if (!IS_BLANK(CUR
)) {
4133 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4134 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4135 ctxt
->sax
->error(ctxt
->userData
,
4136 "Space required after '<!ATTLIST'\n");
4137 ctxt
->wellFormed
= 0;
4138 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4141 elemName
= xmlParseName(ctxt
);
4142 if (elemName
== NULL
) {
4143 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
4144 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4145 ctxt
->sax
->error(ctxt
->userData
,
4146 "ATTLIST: no name for Element\n");
4147 ctxt
->wellFormed
= 0;
4148 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4153 while (RAW
!= '>') {
4154 const xmlChar
*check
= CUR_PTR
;
4157 xmlChar
*defaultValue
= NULL
;
4161 attrName
= xmlParseName(ctxt
);
4162 if (attrName
== NULL
) {
4163 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
4164 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4165 ctxt
->sax
->error(ctxt
->userData
,
4166 "ATTLIST: no name for Attribute\n");
4167 ctxt
->wellFormed
= 0;
4168 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4172 if (!IS_BLANK(CUR
)) {
4173 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4174 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4175 ctxt
->sax
->error(ctxt
->userData
,
4176 "Space required after the attribute name\n");
4177 ctxt
->wellFormed
= 0;
4178 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4179 if (attrName
!= NULL
)
4181 if (defaultValue
!= NULL
)
4182 xmlFree(defaultValue
);
4187 type
= xmlParseAttributeType(ctxt
, &tree
);
4189 if (attrName
!= NULL
)
4191 if (defaultValue
!= NULL
)
4192 xmlFree(defaultValue
);
4197 if (!IS_BLANK(CUR
)) {
4198 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4199 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4200 ctxt
->sax
->error(ctxt
->userData
,
4201 "Space required after the attribute type\n");
4202 ctxt
->wellFormed
= 0;
4203 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4204 if (attrName
!= NULL
)
4206 if (defaultValue
!= NULL
)
4207 xmlFree(defaultValue
);
4209 xmlFreeEnumeration(tree
);
4214 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
4216 if (attrName
!= NULL
)
4218 if (defaultValue
!= NULL
)
4219 xmlFree(defaultValue
);
4221 xmlFreeEnumeration(tree
);
4227 if (!IS_BLANK(CUR
)) {
4228 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4229 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4230 ctxt
->sax
->error(ctxt
->userData
,
4231 "Space required after the attribute default value\n");
4232 ctxt
->wellFormed
= 0;
4233 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4234 if (attrName
!= NULL
)
4236 if (defaultValue
!= NULL
)
4237 xmlFree(defaultValue
);
4239 xmlFreeEnumeration(tree
);
4244 if (check
== CUR_PTR
) {
4245 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
4246 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4247 ctxt
->sax
->error(ctxt
->userData
,
4248 "xmlParseAttributeListDecl: detected internal error\n");
4249 if (attrName
!= NULL
)
4251 if (defaultValue
!= NULL
)
4252 xmlFree(defaultValue
);
4254 xmlFreeEnumeration(tree
);
4257 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
4258 (ctxt
->sax
->attributeDecl
!= NULL
))
4259 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
4260 type
, def
, defaultValue
, tree
);
4261 if (attrName
!= NULL
)
4263 if (defaultValue
!= NULL
)
4264 xmlFree(defaultValue
);
4268 if (input
!= ctxt
->input
) {
4269 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
4270 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4271 ctxt
->sax
->error(ctxt
->userData
,
4272 "Attribute list declaration doesn't start and stop in the same entity\n");
4273 ctxt
->wellFormed
= 0;
4274 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4284 * xmlParseElementMixedContentDecl:
4285 * @ctxt: an XML parser context
4287 * parse the declaration for a Mixed Element content
4288 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4290 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4291 * '(' S? '#PCDATA' S? ')'
4293 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4295 * [ VC: No Duplicate Types ]
4296 * The same name must not appear more than once in a single
4297 * mixed-content declaration.
4299 * returns: the list of the xmlElementContentPtr describing the element choices
4301 xmlElementContentPtr
4302 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, xmlParserInputPtr inputchk
) {
4303 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
4304 xmlChar
*elem
= NULL
;
4307 if ((RAW
== '#') && (NXT(1) == 'P') &&
4308 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4309 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4315 if ((ctxt
->validate
) && (ctxt
->input
!= inputchk
)) {
4316 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
4317 if (ctxt
->vctxt
.error
!= NULL
)
4318 ctxt
->vctxt
.error(ctxt
->vctxt
.userData
,
4319 "Element content declaration doesn't start and stop in the same entity\n");
4323 ret
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_PCDATA
);
4325 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4330 if ((RAW
== '(') || (RAW
== '|')) {
4331 ret
= cur
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_PCDATA
);
4332 if (ret
== NULL
) return(NULL
);
4334 while (RAW
== '|') {
4337 ret
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_OR
);
4338 if (ret
== NULL
) return(NULL
);
4344 n
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_OR
);
4345 if (n
== NULL
) return(NULL
);
4346 n
->c1
= xmlNewElementContent(elem
, XML_ELEMENT_CONTENT_ELEMENT
);
4356 elem
= xmlParseName(ctxt
);
4358 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
4359 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4360 ctxt
->sax
->error(ctxt
->userData
,
4361 "xmlParseElementMixedContentDecl : Name expected\n");
4362 ctxt
->wellFormed
= 0;
4363 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4364 xmlFreeElementContent(cur
);
4370 if ((RAW
== ')') && (NXT(1) == '*')) {
4372 cur
->c2
= xmlNewElementContent(elem
,
4373 XML_ELEMENT_CONTENT_ELEMENT
);
4374 if (cur
->c2
!= NULL
)
4375 cur
->c2
->parent
= cur
;
4378 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4379 if ((ctxt
->validate
) && (ctxt
->input
!= inputchk
)) {
4380 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
4381 if (ctxt
->vctxt
.error
!= NULL
)
4382 ctxt
->vctxt
.error(ctxt
->vctxt
.userData
,
4383 "Element content declaration doesn't start and stop in the same entity\n");
4388 if (elem
!= NULL
) xmlFree(elem
);
4389 xmlFreeElementContent(ret
);
4390 ctxt
->errNo
= XML_ERR_MIXED_NOT_STARTED
;
4391 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4392 ctxt
->sax
->error(ctxt
->userData
,
4393 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4394 ctxt
->wellFormed
= 0;
4395 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4400 ctxt
->errNo
= XML_ERR_PCDATA_REQUIRED
;
4401 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4402 ctxt
->sax
->error(ctxt
->userData
,
4403 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4404 ctxt
->wellFormed
= 0;
4405 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4411 * xmlParseElementChildrenContentDecl:
4412 * @ctxt: an XML parser context
4414 * parse the declaration for a Mixed Element content
4415 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4418 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4420 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4422 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4424 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4426 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4427 * TODO Parameter-entity replacement text must be properly nested
4428 * with parenthesized groups. That is to say, if either of the
4429 * opening or closing parentheses in a choice, seq, or Mixed
4430 * construct is contained in the replacement text for a parameter
4431 * entity, both must be contained in the same replacement text. For
4432 * interoperability, if a parameter-entity reference appears in a
4433 * choice, seq, or Mixed construct, its replacement text should not
4434 * be empty, and neither the first nor last non-blank character of
4435 * the replacement text should be a connector (| or ,).
4437 * Returns the tree of xmlElementContentPtr describing the element
4440 xmlElementContentPtr
4441 xmlParseElementChildrenContentDecl
4442 (xmlParserCtxtPtr ctxt
, xmlParserInputPtr inputchk
) {
4443 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
4450 xmlParserInputPtr input
= ctxt
->input
;
4452 /* Recurse on first child */
4455 cur
= ret
= xmlParseElementChildrenContentDecl(ctxt
, input
);
4459 elem
= xmlParseName(ctxt
);
4461 ctxt
->errNo
= XML_ERR_ELEMCONTENT_NOT_STARTED
;
4462 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4463 ctxt
->sax
->error(ctxt
->userData
,
4464 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4465 ctxt
->wellFormed
= 0;
4466 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4469 cur
= ret
= xmlNewElementContent(elem
, XML_ELEMENT_CONTENT_ELEMENT
);
4472 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
4474 } else if (RAW
== '*') {
4475 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4477 } else if (RAW
== '+') {
4478 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
4481 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4488 while (RAW
!= ')') {
4490 * Each loop we parse one separator and one element.
4493 if (type
== 0) type
= CUR
;
4496 * Detect "Name | Name , Name" error
4498 else if (type
!= CUR
) {
4499 ctxt
->errNo
= XML_ERR_SEPARATOR_REQUIRED
;
4500 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4501 ctxt
->sax
->error(ctxt
->userData
,
4502 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4504 ctxt
->wellFormed
= 0;
4505 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4506 if ((last
!= NULL
) && (last
!= ret
))
4507 xmlFreeElementContent(last
);
4509 xmlFreeElementContent(ret
);
4514 op
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_SEQ
);
4516 if ((last
!= NULL
) && (last
!= ret
))
4517 xmlFreeElementContent(last
);
4518 xmlFreeElementContent(ret
);
4536 } else if (RAW
== '|') {
4537 if (type
== 0) type
= CUR
;
4540 * Detect "Name , Name | Name" error
4542 else if (type
!= CUR
) {
4543 ctxt
->errNo
= XML_ERR_SEPARATOR_REQUIRED
;
4544 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4545 ctxt
->sax
->error(ctxt
->userData
,
4546 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4548 ctxt
->wellFormed
= 0;
4549 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4550 if ((last
!= NULL
) && (last
!= ret
))
4551 xmlFreeElementContent(last
);
4553 xmlFreeElementContent(ret
);
4558 op
= xmlNewElementContent(NULL
, XML_ELEMENT_CONTENT_OR
);
4560 if ((last
!= NULL
) && (last
!= ret
))
4561 xmlFreeElementContent(last
);
4563 xmlFreeElementContent(ret
);
4582 ctxt
->errNo
= XML_ERR_ELEMCONTENT_NOT_FINISHED
;
4583 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4584 ctxt
->sax
->error(ctxt
->userData
,
4585 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4586 ctxt
->wellFormed
= 0;
4587 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4589 xmlFreeElementContent(ret
);
4596 xmlParserInputPtr input
= ctxt
->input
;
4597 /* Recurse on second child */
4600 last
= xmlParseElementChildrenContentDecl(ctxt
, input
);
4603 elem
= xmlParseName(ctxt
);
4605 ctxt
->errNo
= XML_ERR_ELEMCONTENT_NOT_STARTED
;
4606 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4607 ctxt
->sax
->error(ctxt
->userData
,
4608 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4609 ctxt
->wellFormed
= 0;
4610 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4612 xmlFreeElementContent(ret
);
4615 last
= xmlNewElementContent(elem
, XML_ELEMENT_CONTENT_ELEMENT
);
4618 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
4620 } else if (RAW
== '*') {
4621 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4623 } else if (RAW
== '+') {
4624 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
4627 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4633 if ((cur
!= NULL
) && (last
!= NULL
)) {
4638 if ((ctxt
->validate
) && (ctxt
->input
!= inputchk
)) {
4639 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
4640 if (ctxt
->vctxt
.error
!= NULL
)
4641 ctxt
->vctxt
.error(ctxt
->vctxt
.userData
,
4642 "Element content declaration doesn't start and stop in the same entity\n");
4648 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
4650 } else if (RAW
== '*') {
4652 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4655 * Some normalization:
4656 * (a | b* | c?)* == (a | b | c)*
4658 while (cur
->type
== XML_ELEMENT_CONTENT_OR
) {
4659 if ((cur
->c1
!= NULL
) &&
4660 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
4661 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
4662 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4663 if ((cur
->c2
!= NULL
) &&
4664 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
4665 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
4666 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4671 } else if (RAW
== '+') {
4675 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
4677 * Some normalization:
4678 * (a | b*)+ == (a | b)*
4679 * (a | b?)+ == (a | b)*
4681 while (cur
->type
== XML_ELEMENT_CONTENT_OR
) {
4682 if ((cur
->c1
!= NULL
) &&
4683 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
4684 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
4685 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4688 if ((cur
->c2
!= NULL
) &&
4689 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
4690 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
4691 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
4697 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
4705 * xmlParseElementContentDecl:
4706 * @ctxt: an XML parser context
4707 * @name: the name of the element being defined.
4708 * @result: the Element Content pointer will be stored here if any
4710 * parse the declaration for an Element content either Mixed or Children,
4711 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4713 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4715 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4719 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, xmlChar
*name
,
4720 xmlElementContentPtr
*result
) {
4722 xmlElementContentPtr tree
= NULL
;
4723 xmlParserInputPtr input
= ctxt
->input
;
4729 ctxt
->errNo
= XML_ERR_ELEMCONTENT_NOT_STARTED
;
4730 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4731 ctxt
->sax
->error(ctxt
->userData
,
4732 "xmlParseElementContentDecl : %s '(' expected\n", name
);
4733 ctxt
->wellFormed
= 0;
4734 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4740 if ((RAW
== '#') && (NXT(1) == 'P') &&
4741 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4742 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4744 tree
= xmlParseElementMixedContentDecl(ctxt
, input
);
4745 res
= XML_ELEMENT_TYPE_MIXED
;
4747 tree
= xmlParseElementChildrenContentDecl(ctxt
, input
);
4748 res
= XML_ELEMENT_TYPE_ELEMENT
;
4756 * xmlParseElementDecl:
4757 * @ctxt: an XML parser context
4759 * parse an Element declaration.
4761 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4763 * [ VC: Unique Element Type Declaration ]
4764 * No element type may be declared more than once
4766 * Returns the type of the element, or -1 in case of error
4769 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
4772 xmlElementContentPtr content
= NULL
;
4775 if ((RAW
== '<') && (NXT(1) == '!') &&
4776 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4777 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4778 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4780 xmlParserInputPtr input
= ctxt
->input
;
4783 if (!IS_BLANK(CUR
)) {
4784 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4785 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4786 ctxt
->sax
->error(ctxt
->userData
,
4787 "Space required after 'ELEMENT'\n");
4788 ctxt
->wellFormed
= 0;
4789 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4792 name
= xmlParseName(ctxt
);
4794 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
4795 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4796 ctxt
->sax
->error(ctxt
->userData
,
4797 "xmlParseElementDecl: no name for Element\n");
4798 ctxt
->wellFormed
= 0;
4799 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4802 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
4804 if (!IS_BLANK(CUR
)) {
4805 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
4806 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4807 ctxt
->sax
->error(ctxt
->userData
,
4808 "Space required after the element name\n");
4809 ctxt
->wellFormed
= 0;
4810 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4813 if ((RAW
== 'E') && (NXT(1) == 'M') &&
4814 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4818 * Element must always be empty.
4820 ret
= XML_ELEMENT_TYPE_EMPTY
;
4821 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
4825 * Element is a generic container.
4827 ret
= XML_ELEMENT_TYPE_ANY
;
4828 } else if (RAW
== '(') {
4829 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
4832 * [ WFC: PEs in Internal Subset ] error handling.
4834 if ((RAW
== '%') && (ctxt
->external
== 0) &&
4835 (ctxt
->inputNr
== 1)) {
4836 ctxt
->errNo
= XML_ERR_PEREF_IN_INT_SUBSET
;
4837 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4838 ctxt
->sax
->error(ctxt
->userData
,
4839 "PEReference: forbidden within markup decl in internal subset\n");
4841 ctxt
->errNo
= XML_ERR_ELEMCONTENT_NOT_STARTED
;
4842 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4843 ctxt
->sax
->error(ctxt
->userData
,
4844 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4846 ctxt
->wellFormed
= 0;
4847 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4848 if (name
!= NULL
) xmlFree(name
);
4854 * Pop-up of finished entities.
4856 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
4861 ctxt
->errNo
= XML_ERR_GT_REQUIRED
;
4862 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4863 ctxt
->sax
->error(ctxt
->userData
,
4864 "xmlParseElementDecl: expected '>' at the end\n");
4865 ctxt
->wellFormed
= 0;
4866 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4868 if (input
!= ctxt
->input
) {
4869 ctxt
->errNo
= XML_ERR_ENTITY_BOUNDARY
;
4870 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4871 ctxt
->sax
->error(ctxt
->userData
,
4872 "Element declaration doesn't start and stop in the same entity\n");
4873 ctxt
->wellFormed
= 0;
4874 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4878 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
4879 (ctxt
->sax
->elementDecl
!= NULL
))
4880 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
4883 if (content
!= NULL
) {
4884 xmlFreeElementContent(content
);
4894 * xmlParseConditionalSections
4895 * @ctxt: an XML parser context
4897 * [61] conditionalSect ::= includeSect | ignoreSect
4898 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4899 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4900 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4901 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4905 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
4908 if ((RAW
== 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4909 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4914 ctxt
->errNo
= XML_ERR_CONDSEC_INVALID
;
4915 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4916 ctxt
->sax
->error(ctxt
->userData
,
4917 "XML conditional section '[' expected\n");
4918 ctxt
->wellFormed
= 0;
4919 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4923 if (xmlParserDebugEntities
) {
4924 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
4925 xmlGenericError(xmlGenericErrorContext
,
4926 "%s(%d): ", ctxt
->input
->filename
,
4928 xmlGenericError(xmlGenericErrorContext
,
4929 "Entering INCLUDE Conditional Section\n");
4932 while ((RAW
!= 0) && ((RAW
!= ']') || (NXT(1) != ']') ||
4934 const xmlChar
*check
= CUR_PTR
;
4935 int cons
= ctxt
->input
->consumed
;
4937 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4938 xmlParseConditionalSections(ctxt
);
4939 } else if (IS_BLANK(CUR
)) {
4941 } else if (RAW
== '%') {
4942 xmlParsePEReference(ctxt
);
4944 xmlParseMarkupDecl(ctxt
);
4947 * Pop-up of finished entities.
4949 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
4952 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
4953 ctxt
->errNo
= XML_ERR_EXT_SUBSET_NOT_FINISHED
;
4954 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4955 ctxt
->sax
->error(ctxt
->userData
,
4956 "Content error in the external subset\n");
4957 ctxt
->wellFormed
= 0;
4958 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4962 if (xmlParserDebugEntities
) {
4963 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
4964 xmlGenericError(xmlGenericErrorContext
,
4965 "%s(%d): ", ctxt
->input
->filename
,
4967 xmlGenericError(xmlGenericErrorContext
,
4968 "Leaving INCLUDE Conditional Section\n");
4971 } else if ((RAW
== 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4972 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4980 ctxt
->errNo
= XML_ERR_CONDSEC_INVALID
;
4981 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
4982 ctxt
->sax
->error(ctxt
->userData
,
4983 "XML conditional section '[' expected\n");
4984 ctxt
->wellFormed
= 0;
4985 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
4989 if (xmlParserDebugEntities
) {
4990 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
4991 xmlGenericError(xmlGenericErrorContext
,
4992 "%s(%d): ", ctxt
->input
->filename
,
4994 xmlGenericError(xmlGenericErrorContext
,
4995 "Entering IGNORE Conditional Section\n");
4999 * Parse up to the end of the conditional section
5000 * But disable SAX event generating DTD building in the meantime
5002 state
= ctxt
->disableSAX
;
5003 instate
= ctxt
->instate
;
5004 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5005 ctxt
->instate
= XML_PARSER_IGNORE
;
5007 while ((depth
>= 0) && (RAW
!= 0)) {
5008 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5013 if ((RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5014 if (--depth
>= 0) SKIP(3);
5021 ctxt
->disableSAX
= state
;
5022 ctxt
->instate
= instate
;
5024 if (xmlParserDebugEntities
) {
5025 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
5026 xmlGenericError(xmlGenericErrorContext
,
5027 "%s(%d): ", ctxt
->input
->filename
,
5029 xmlGenericError(xmlGenericErrorContext
,
5030 "Leaving IGNORE Conditional Section\n");
5034 ctxt
->errNo
= XML_ERR_CONDSEC_INVALID
;
5035 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5036 ctxt
->sax
->error(ctxt
->userData
,
5037 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5038 ctxt
->wellFormed
= 0;
5039 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5046 ctxt
->errNo
= XML_ERR_CONDSEC_NOT_FINISHED
;
5047 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5048 ctxt
->sax
->error(ctxt
->userData
,
5049 "XML conditional section not closed\n");
5050 ctxt
->wellFormed
= 0;
5051 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5058 * xmlParseMarkupDecl:
5059 * @ctxt: an XML parser context
5061 * parse Markup declarations
5063 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5064 * NotationDecl | PI | Comment
5066 * [ VC: Proper Declaration/PE Nesting ]
5067 * Parameter-entity replacement text must be properly nested with
5068 * markup declarations. That is to say, if either the first character
5069 * or the last character of a markup declaration (markupdecl above) is
5070 * contained in the replacement text for a parameter-entity reference,
5071 * both must be contained in the same replacement text.
5073 * [ WFC: PEs in Internal Subset ]
5074 * In the internal DTD subset, parameter-entity references can occur
5075 * only where markup declarations can occur, not within markup declarations.
5076 * (This does not apply to references that occur in external parameter
5077 * entities or to the external subset.)
5080 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
5082 xmlParseElementDecl(ctxt
);
5083 xmlParseAttributeListDecl(ctxt
);
5084 xmlParseEntityDecl(ctxt
);
5085 xmlParseNotationDecl(ctxt
);
5087 xmlParseComment(ctxt
);
5089 * This is only for internal subset. On external entities,
5090 * the replacement is done before parsing stage
5092 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
5093 xmlParsePEReference(ctxt
);
5096 * Conditional sections are allowed from entities included
5097 * by PE References in the internal subset.
5099 if ((ctxt
->external
== 0) && (ctxt
->inputNr
> 1)) {
5100 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5101 xmlParseConditionalSections(ctxt
);
5105 ctxt
->instate
= XML_PARSER_DTD
;
5110 * @ctxt: an XML parser context
5112 * parse an XML declaration header for external entities
5114 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5116 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5120 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
5124 * We know that '<?xml' is here.
5126 if ((RAW
== '<') && (NXT(1) == '?') &&
5127 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5128 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5131 ctxt
->errNo
= XML_ERR_XMLDECL_NOT_STARTED
;
5132 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5133 ctxt
->sax
->error(ctxt
->userData
,
5134 "Text declaration '<?xml' required\n");
5135 ctxt
->wellFormed
= 0;
5136 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5141 if (!IS_BLANK(CUR
)) {
5142 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
5143 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5144 ctxt
->sax
->error(ctxt
->userData
,
5145 "Space needed after '<?xml'\n");
5146 ctxt
->wellFormed
= 0;
5147 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5152 * We may have the VersionInfo here.
5154 version
= xmlParseVersionInfo(ctxt
);
5155 if (version
== NULL
)
5156 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
5158 if (!IS_BLANK(CUR
)) {
5159 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
5160 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5161 ctxt
->sax
->error(ctxt
->userData
, "Space needed here\n");
5162 ctxt
->wellFormed
= 0;
5163 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5166 ctxt
->input
->version
= version
;
5169 * We must have the encoding declaration
5171 xmlParseEncodingDecl(ctxt
);
5172 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
5174 * The XML REC instructs us to stop parsing right here
5180 if ((RAW
== '?') && (NXT(1) == '>')) {
5182 } else if (RAW
== '>') {
5183 /* Deprecated old WD ... */
5184 ctxt
->errNo
= XML_ERR_XMLDECL_NOT_FINISHED
;
5185 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5186 ctxt
->sax
->error(ctxt
->userData
,
5187 "XML declaration must end-up with '?>'\n");
5188 ctxt
->wellFormed
= 0;
5189 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5192 ctxt
->errNo
= XML_ERR_XMLDECL_NOT_FINISHED
;
5193 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5194 ctxt
->sax
->error(ctxt
->userData
,
5195 "parsing XML declaration: '?>' expected\n");
5196 ctxt
->wellFormed
= 0;
5197 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5198 MOVETO_ENDTAG(CUR_PTR
);
5204 * xmlParseExternalSubset:
5205 * @ctxt: an XML parser context
5206 * @ExternalID: the external identifier
5207 * @SystemID: the system identifier (or URL)
5209 * parse Markup declarations from an external subset
5211 * [30] extSubset ::= textDecl? extSubsetDecl
5213 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5216 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
5217 const xmlChar
*SystemID
) {
5219 if ((RAW
== '<') && (NXT(1) == '?') &&
5220 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5222 xmlParseTextDecl(ctxt
);
5223 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
5225 * The XML REC instructs us to stop parsing right here
5227 ctxt
->instate
= XML_PARSER_EOF
;
5231 if (ctxt
->myDoc
== NULL
) {
5232 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
5234 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
5235 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
5237 ctxt
->instate
= XML_PARSER_DTD
;
5239 while (((RAW
== '<') && (NXT(1) == '?')) ||
5240 ((RAW
== '<') && (NXT(1) == '!')) ||
5241 (RAW
== '%') || IS_BLANK(CUR
)) {
5242 const xmlChar
*check
= CUR_PTR
;
5243 int cons
= ctxt
->input
->consumed
;
5246 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5247 xmlParseConditionalSections(ctxt
);
5248 } else if (IS_BLANK(CUR
)) {
5250 } else if (RAW
== '%') {
5251 xmlParsePEReference(ctxt
);
5253 xmlParseMarkupDecl(ctxt
);
5256 * Pop-up of finished entities.
5258 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
5261 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
5262 ctxt
->errNo
= XML_ERR_EXT_SUBSET_NOT_FINISHED
;
5263 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5264 ctxt
->sax
->error(ctxt
->userData
,
5265 "Content error in the external subset\n");
5266 ctxt
->wellFormed
= 0;
5267 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5273 ctxt
->errNo
= XML_ERR_EXT_SUBSET_NOT_FINISHED
;
5274 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5275 ctxt
->sax
->error(ctxt
->userData
,
5276 "Extra content at the end of the document\n");
5277 ctxt
->wellFormed
= 0;
5278 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5284 * xmlParseReference:
5285 * @ctxt: an XML parser context
5287 * parse and handle entity references in content, depending on the SAX
5288 * interface, this may end-up in a call to character() if this is a
5289 * CharRef, a predefined entity, if there is no reference() callback.
5290 * or if the parser was asked to switch to that mode.
5292 * [67] Reference ::= EntityRef | CharRef
5295 xmlParseReference(xmlParserCtxtPtr ctxt
) {
5298 if (RAW
!= '&') return;
5300 if (NXT(1) == '#') {
5304 int value
= xmlParseCharRef(ctxt
);
5306 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
5308 * So we are using non-UTF-8 buffers
5309 * Check that the char fit on 8bits, if not
5310 * generate a CharRef.
5312 if (value
<= 0xFF) {
5315 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
5316 (!ctxt
->disableSAX
))
5317 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
5319 if ((hex
== 'x') || (hex
== 'X'))
5320 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
5322 snprintf((char *)out
, sizeof(out
), "#%d", value
);
5323 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
5324 (!ctxt
->disableSAX
))
5325 ctxt
->sax
->reference(ctxt
->userData
, out
);
5329 * Just encode the value in UTF-8
5331 COPY_BUF(0 ,out
, i
, value
);
5333 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
5334 (!ctxt
->disableSAX
))
5335 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
5338 ent
= xmlParseEntityRef(ctxt
);
5339 if (ent
== NULL
) return;
5340 if (!ctxt
->wellFormed
)
5342 if ((ent
->name
!= NULL
) &&
5343 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
5344 xmlNodePtr list
= NULL
;
5349 * The first reference to the entity trigger a parsing phase
5350 * where the ent->children is filled with the result from
5353 if (ent
->children
== NULL
) {
5355 value
= ent
->content
;
5358 * Check that this entity is well formed
5360 if ((value
!= NULL
) &&
5361 (value
[1] == 0) && (value
[0] == '<') &&
5362 (xmlStrEqual(ent
->name
, BAD_CAST
"lt"))) {
5364 * DONE: get definite answer on this !!!
5365 * Lots of entity decls are used to declare a single
5368 * Which seems to be valid since
5369 * 2.4: The ampersand character (&) and the left angle
5370 * bracket (<) may appear in their literal form only
5371 * when used ... They are also legal within the literal
5372 * entity value of an internal entity declaration;i
5373 * see "4.3.2 Well-Formed Parsed Entities".
5374 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5375 * Looking at the OASIS test suite and James Clark
5376 * tests, this is broken. However the XML REC uses
5377 * it. Is the XML REC not well-formed ????
5378 * This is a hack to avoid this problem
5380 * ANSWER: since lt gt amp .. are already defined,
5381 * this is a redefinition and hence the fact that the
5382 * content is not well balanced is not a Wf error, this
5383 * is lousy but acceptable.
5385 list
= xmlNewDocText(ctxt
->myDoc
, value
);
5387 if ((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) &&
5388 (ent
->children
== NULL
)) {
5389 ent
->children
= list
;
5391 list
->parent
= (xmlNodePtr
) ent
;
5393 xmlFreeNodeList(list
);
5395 } else if (list
!= NULL
) {
5396 xmlFreeNodeList(list
);
5400 * 4.3.2: An internal general parsed entity is well-formed
5401 * if its replacement text matches the production labeled
5407 * This is a bit hackish but this seems the best
5408 * way to make sure both SAX and DOM entity support
5411 if (ctxt
->userData
== ctxt
)
5414 user_data
= ctxt
->userData
;
5416 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
5418 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
5419 value
, user_data
, &list
);
5421 } else if (ent
->etype
==
5422 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
5424 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
5425 ctxt
->sax
, user_data
, ctxt
->depth
,
5426 ent
->URI
, ent
->ExternalID
, &list
);
5430 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5431 ctxt
->sax
->error(ctxt
->userData
,
5432 "Internal: invalid entity type\n");
5434 if (ret
== XML_ERR_ENTITY_LOOP
) {
5435 ctxt
->errNo
= XML_ERR_ENTITY_LOOP
;
5436 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5437 ctxt
->sax
->error(ctxt
->userData
,
5438 "Detected entity reference loop\n");
5439 ctxt
->wellFormed
= 0;
5440 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5442 } else if ((ret
== 0) && (list
!= NULL
)) {
5443 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
5444 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
5445 (ent
->children
== NULL
)) {
5446 ent
->children
= list
;
5447 if (ctxt
->replaceEntities
) {
5449 * Prune it directly in the generated document
5450 * except for single text nodes.
5452 if ((list
->type
== XML_TEXT_NODE
) &&
5453 (list
->next
== NULL
)) {
5454 list
->parent
= (xmlNodePtr
) ent
;
5457 while (list
!= NULL
) {
5458 list
->parent
= (xmlNodePtr
) ctxt
->node
;
5459 list
->doc
= ctxt
->myDoc
;
5460 if (list
->next
== NULL
)
5464 list
= ent
->children
;
5465 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
5466 xmlAddEntityReference(ent
, list
, NULL
);
5469 while (list
!= NULL
) {
5470 list
->parent
= (xmlNodePtr
) ent
;
5471 if (list
->next
== NULL
)
5477 xmlFreeNodeList(list
);
5480 } else if (ret
> 0) {
5482 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5483 ctxt
->sax
->error(ctxt
->userData
,
5484 "Entity value required\n");
5485 ctxt
->wellFormed
= 0;
5486 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5487 } else if (list
!= NULL
) {
5488 xmlFreeNodeList(list
);
5493 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
5494 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
5498 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
5500 } else if (ctxt
->replaceEntities
) {
5501 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
5503 * Seems we are generating the DOM content, do
5504 * a simple tree copy for all references except the first
5505 * In the first occurrence list contains the replacement
5508 xmlNodePtr
new = NULL
, cur
, firstChild
= NULL
;
5509 cur
= ent
->children
;
5510 while (cur
!= NULL
) {
5511 new = xmlCopyNode(cur
, 1);
5512 if (firstChild
== NULL
){
5515 xmlAddChild(ctxt
->node
, new);
5516 if (cur
== ent
->last
)
5520 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
5521 xmlAddEntityReference(ent
, firstChild
, new);
5524 * the name change is to avoid coalescing of the
5525 * node with a possible previous text one which
5526 * would make ent->children a dangling pointer
5528 if (ent
->children
->type
== XML_TEXT_NODE
)
5529 ent
->children
->name
= xmlStrdup(BAD_CAST
"nbktext");
5530 if ((ent
->last
!= ent
->children
) &&
5531 (ent
->last
->type
== XML_TEXT_NODE
))
5532 ent
->last
->name
= xmlStrdup(BAD_CAST
"nbktext");
5533 xmlAddChildList(ctxt
->node
, ent
->children
);
5537 * This is to avoid a nasty side effect, see
5538 * characters() in SAX.c
5545 * Probably running in SAX mode
5547 xmlParserInputPtr input
;
5549 input
= xmlNewEntityInputStream(ctxt
, ent
);
5550 xmlPushInput(ctxt
, input
);
5551 if ((ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) &&
5552 (RAW
== '<') && (NXT(1) == '?') &&
5553 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5554 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5555 xmlParseTextDecl(ctxt
);
5556 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
5558 * The XML REC instructs us to stop parsing right here
5560 ctxt
->instate
= XML_PARSER_EOF
;
5563 if (input
->standalone
== 1) {
5564 ctxt
->errNo
= XML_ERR_EXT_ENTITY_STANDALONE
;
5565 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5566 ctxt
->sax
->error(ctxt
->userData
,
5567 "external parsed entities cannot be standalone\n");
5568 ctxt
->wellFormed
= 0;
5569 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5577 if (val
== NULL
) return;
5579 * inline the entity.
5581 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
5582 (!ctxt
->disableSAX
))
5583 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
5589 * xmlParseEntityRef:
5590 * @ctxt: an XML parser context
5592 * parse ENTITY references declarations
5594 * [68] EntityRef ::= '&' Name ';'
5596 * [ WFC: Entity Declared ]
5597 * In a document without any DTD, a document with only an internal DTD
5598 * subset which contains no parameter entity references, or a document
5599 * with "standalone='yes'", the Name given in the entity reference
5600 * must match that in an entity declaration, except that well-formed
5601 * documents need not declare any of the following entities: amp, lt,
5602 * gt, apos, quot. The declaration of a parameter entity must precede
5603 * any reference to it. Similarly, the declaration of a general entity
5604 * must precede any reference to it which appears in a default value in an
5605 * attribute-list declaration. Note that if entities are declared in the
5606 * external subset or in external parameter entities, a non-validating
5607 * processor is not obligated to read and process their declarations;
5608 * for such documents, the rule that an entity must be declared is a
5609 * well-formedness constraint only if standalone='yes'.
5611 * [ WFC: Parsed Entity ]
5612 * An entity reference must not contain the name of an unparsed entity
5614 * Returns the xmlEntityPtr if found, or NULL otherwise.
5617 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
5619 xmlEntityPtr ent
= NULL
;
5625 name
= xmlParseName(ctxt
);
5627 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
5628 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5629 ctxt
->sax
->error(ctxt
->userData
,
5630 "xmlParseEntityRef: no name\n");
5631 ctxt
->wellFormed
= 0;
5632 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5637 * Ask first SAX for entity resolution, otherwise try the
5640 if (ctxt
->sax
!= NULL
) {
5641 if (ctxt
->sax
->getEntity
!= NULL
)
5642 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5644 ent
= xmlGetPredefinedEntity(name
);
5645 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
5646 ent
= getEntity(ctxt
, name
);
5650 * [ WFC: Entity Declared ]
5651 * In a document without any DTD, a document with only an
5652 * internal DTD subset which contains no parameter entity
5653 * references, or a document with "standalone='yes'", the
5654 * Name given in the entity reference must match that in an
5655 * entity declaration, except that well-formed documents
5656 * need not declare any of the following entities: amp, lt,
5658 * The declaration of a parameter entity must precede any
5660 * Similarly, the declaration of a general entity must
5661 * precede any reference to it which appears in a default
5662 * value in an attribute-list declaration. Note that if
5663 * entities are declared in the external subset or in
5664 * external parameter entities, a non-validating processor
5665 * is not obligated to read and process their declarations;
5666 * for such documents, the rule that an entity must be
5667 * declared is a well-formedness constraint only if
5671 if ((ctxt
->standalone
== 1) ||
5672 ((ctxt
->hasExternalSubset
== 0) &&
5673 (ctxt
->hasPErefs
== 0))) {
5674 ctxt
->errNo
= XML_ERR_UNDECLARED_ENTITY
;
5675 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5676 ctxt
->sax
->error(ctxt
->userData
,
5677 "Entity '%s' not defined\n", name
);
5678 ctxt
->wellFormed
= 0;
5680 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5682 ctxt
->errNo
= XML_WAR_UNDECLARED_ENTITY
;
5683 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5684 ctxt
->sax
->error(ctxt
->userData
,
5685 "Entity '%s' not defined\n", name
);
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an
5695 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
5696 ctxt
->errNo
= XML_ERR_UNPARSED_ENTITY
;
5697 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5698 ctxt
->sax
->error(ctxt
->userData
,
5699 "Entity reference to unparsed entity %s\n", name
);
5700 ctxt
->wellFormed
= 0;
5701 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5705 * [ WFC: No External Entity References ]
5706 * Attribute values cannot contain direct or indirect
5707 * entity references to external entities.
5709 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
5710 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
5711 ctxt
->errNo
= XML_ERR_ENTITY_IS_EXTERNAL
;
5712 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5713 ctxt
->sax
->error(ctxt
->userData
,
5714 "Attribute references external entity '%s'\n", name
);
5715 ctxt
->wellFormed
= 0;
5716 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5719 * [ WFC: No < in Attribute Values ]
5720 * The replacement text of any entity referred to directly or
5721 * indirectly in an attribute value (other than "<") must
5724 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
5726 (!xmlStrEqual(ent
->name
, BAD_CAST
"lt")) &&
5727 (ent
->content
!= NULL
) &&
5728 (xmlStrchr(ent
->content
, '<'))) {
5729 ctxt
->errNo
= XML_ERR_LT_IN_ATTRIBUTE
;
5730 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5731 ctxt
->sax
->error(ctxt
->userData
,
5732 "'<' in entity '%s' is not allowed in attributes values\n", name
);
5733 ctxt
->wellFormed
= 0;
5734 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5738 * Internal check, no parameter entities here ...
5741 switch (ent
->etype
) {
5742 case XML_INTERNAL_PARAMETER_ENTITY
:
5743 case XML_EXTERNAL_PARAMETER_ENTITY
:
5744 ctxt
->errNo
= XML_ERR_ENTITY_IS_PARAMETER
;
5745 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5746 ctxt
->sax
->error(ctxt
->userData
,
5747 "Attempt to reference the parameter entity '%s'\n", name
);
5748 ctxt
->wellFormed
= 0;
5749 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5757 * [ WFC: No Recursion ]
5758 * A parsed entity must not contain a recursive reference
5759 * to itself, either directly or indirectly.
5760 * Done somewhere else
5764 ctxt
->errNo
= XML_ERR_ENTITYREF_SEMICOL_MISSING
;
5765 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5766 ctxt
->sax
->error(ctxt
->userData
,
5767 "xmlParseEntityRef: expecting ';'\n");
5768 ctxt
->wellFormed
= 0;
5769 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5778 * xmlParseStringEntityRef:
5779 * @ctxt: an XML parser context
5780 * @str: a pointer to an index in the string
5782 * parse ENTITY references declarations, but this version parses it from
5785 * [68] EntityRef ::= '&' Name ';'
5787 * [ WFC: Entity Declared ]
5788 * In a document without any DTD, a document with only an internal DTD
5789 * subset which contains no parameter entity references, or a document
5790 * with "standalone='yes'", the Name given in the entity reference
5791 * must match that in an entity declaration, except that well-formed
5792 * documents need not declare any of the following entities: amp, lt,
5793 * gt, apos, quot. The declaration of a parameter entity must precede
5794 * any reference to it. Similarly, the declaration of a general entity
5795 * must precede any reference to it which appears in a default value in an
5796 * attribute-list declaration. Note that if entities are declared in the
5797 * external subset or in external parameter entities, a non-validating
5798 * processor is not obligated to read and process their declarations;
5799 * for such documents, the rule that an entity must be declared is a
5800 * well-formedness constraint only if standalone='yes'.
5802 * [ WFC: Parsed Entity ]
5803 * An entity reference must not contain the name of an unparsed entity
5805 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5806 * is updated to the current location in the string.
5809 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
5813 xmlEntityPtr ent
= NULL
;
5815 if ((str
== NULL
) || (*str
== NULL
))
5822 name
= xmlParseStringName(ctxt
, &ptr
);
5824 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
5825 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5826 ctxt
->sax
->error(ctxt
->userData
,
5827 "xmlParseStringEntityRef: no name\n");
5828 ctxt
->wellFormed
= 0;
5829 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5834 * Ask first SAX for entity resolution, otherwise try the
5837 if (ctxt
->sax
!= NULL
) {
5838 if (ctxt
->sax
->getEntity
!= NULL
)
5839 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5841 ent
= xmlGetPredefinedEntity(name
);
5842 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
5843 ent
= getEntity(ctxt
, name
);
5847 * [ WFC: Entity Declared ]
5848 * In a document without any DTD, a document with only an
5849 * internal DTD subset which contains no parameter entity
5850 * references, or a document with "standalone='yes'", the
5851 * Name given in the entity reference must match that in an
5852 * entity declaration, except that well-formed documents
5853 * need not declare any of the following entities: amp, lt,
5855 * The declaration of a parameter entity must precede any
5857 * Similarly, the declaration of a general entity must
5858 * precede any reference to it which appears in a default
5859 * value in an attribute-list declaration. Note that if
5860 * entities are declared in the external subset or in
5861 * external parameter entities, a non-validating processor
5862 * is not obligated to read and process their declarations;
5863 * for such documents, the rule that an entity must be
5864 * declared is a well-formedness constraint only if
5868 if ((ctxt
->standalone
== 1) ||
5869 ((ctxt
->hasExternalSubset
== 0) &&
5870 (ctxt
->hasPErefs
== 0))) {
5871 ctxt
->errNo
= XML_ERR_UNDECLARED_ENTITY
;
5872 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5873 ctxt
->sax
->error(ctxt
->userData
,
5874 "Entity '%s' not defined\n", name
);
5875 ctxt
->wellFormed
= 0;
5876 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5878 ctxt
->errNo
= XML_WAR_UNDECLARED_ENTITY
;
5879 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
5880 ctxt
->sax
->warning(ctxt
->userData
,
5881 "Entity '%s' not defined\n", name
);
5886 * [ WFC: Parsed Entity ]
5887 * An entity reference must not contain the name of an
5890 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
5891 ctxt
->errNo
= XML_ERR_UNPARSED_ENTITY
;
5892 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5893 ctxt
->sax
->error(ctxt
->userData
,
5894 "Entity reference to unparsed entity %s\n", name
);
5895 ctxt
->wellFormed
= 0;
5896 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5900 * [ WFC: No External Entity References ]
5901 * Attribute values cannot contain direct or indirect
5902 * entity references to external entities.
5904 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
5905 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
5906 ctxt
->errNo
= XML_ERR_ENTITY_IS_EXTERNAL
;
5907 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5908 ctxt
->sax
->error(ctxt
->userData
,
5909 "Attribute references external entity '%s'\n", name
);
5910 ctxt
->wellFormed
= 0;
5911 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5914 * [ WFC: No < in Attribute Values ]
5915 * The replacement text of any entity referred to directly or
5916 * indirectly in an attribute value (other than "<") must
5919 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
5921 (!xmlStrEqual(ent
->name
, BAD_CAST
"lt")) &&
5922 (ent
->content
!= NULL
) &&
5923 (xmlStrchr(ent
->content
, '<'))) {
5924 ctxt
->errNo
= XML_ERR_LT_IN_ATTRIBUTE
;
5925 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5926 ctxt
->sax
->error(ctxt
->userData
,
5927 "'<' in entity '%s' is not allowed in attributes values\n", name
);
5928 ctxt
->wellFormed
= 0;
5929 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5933 * Internal check, no parameter entities here ...
5936 switch (ent
->etype
) {
5937 case XML_INTERNAL_PARAMETER_ENTITY
:
5938 case XML_EXTERNAL_PARAMETER_ENTITY
:
5939 ctxt
->errNo
= XML_ERR_ENTITY_IS_PARAMETER
;
5940 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5941 ctxt
->sax
->error(ctxt
->userData
,
5942 "Attempt to reference the parameter entity '%s'\n", name
);
5943 ctxt
->wellFormed
= 0;
5944 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5952 * [ WFC: No Recursion ]
5953 * A parsed entity must not contain a recursive reference
5954 * to itself, either directly or indirectly.
5955 * Done somewhere else
5959 ctxt
->errNo
= XML_ERR_ENTITYREF_SEMICOL_MISSING
;
5960 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
5961 ctxt
->sax
->error(ctxt
->userData
,
5962 "xmlParseStringEntityRef: expecting ';'\n");
5963 ctxt
->wellFormed
= 0;
5964 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
5974 * xmlParsePEReference:
5975 * @ctxt: an XML parser context
5977 * parse PEReference declarations
5978 * The entity content is handled directly by pushing it's content as
5979 * a new input stream.
5981 * [69] PEReference ::= '%' Name ';'
5983 * [ WFC: No Recursion ]
5984 * A parsed entity must not contain a recursive
5985 * reference to itself, either directly or indirectly.
5987 * [ WFC: Entity Declared ]
5988 * In a document without any DTD, a document with only an internal DTD
5989 * subset which contains no parameter entity references, or a document
5990 * with "standalone='yes'", ... ... The declaration of a parameter
5991 * entity must precede any reference to it...
5993 * [ VC: Entity Declared ]
5994 * In a document with an external subset or external parameter entities
5995 * with "standalone='no'", ... ... The declaration of a parameter entity
5996 * must precede any reference to it...
5999 * Parameter-entity references may only appear in the DTD.
6000 * NOTE: misleading but this is handled.
6003 xmlParsePEReference(xmlParserCtxtPtr ctxt
) {
6005 xmlEntityPtr entity
= NULL
;
6006 xmlParserInputPtr input
;
6010 name
= xmlParseName(ctxt
);
6012 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
6013 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6014 ctxt
->sax
->error(ctxt
->userData
,
6015 "xmlParsePEReference: no name\n");
6016 ctxt
->wellFormed
= 0;
6017 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6021 if ((ctxt
->sax
!= NULL
) &&
6022 (ctxt
->sax
->getParameterEntity
!= NULL
))
6023 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
,
6025 if (entity
== NULL
) {
6027 * [ WFC: Entity Declared ]
6028 * In a document without any DTD, a document with only an
6029 * internal DTD subset which contains no parameter entity
6030 * references, or a document with "standalone='yes'", ...
6031 * ... The declaration of a parameter entity must precede
6032 * any reference to it...
6034 if ((ctxt
->standalone
== 1) ||
6035 ((ctxt
->hasExternalSubset
== 0) &&
6036 (ctxt
->hasPErefs
== 0))) {
6037 ctxt
->errNo
= XML_ERR_UNDECLARED_ENTITY
;
6038 if ((!ctxt
->disableSAX
) &&
6039 (ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6040 ctxt
->sax
->error(ctxt
->userData
,
6041 "PEReference: %%%s; not found\n", name
);
6042 ctxt
->wellFormed
= 0;
6043 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6046 * [ VC: Entity Declared ]
6047 * In a document with an external subset or external
6048 * parameter entities with "standalone='no'", ...
6049 * ... The declaration of a parameter entity must precede
6050 * any reference to it...
6052 if ((!ctxt
->disableSAX
) &&
6053 (ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
6054 ctxt
->sax
->warning(ctxt
->userData
,
6055 "PEReference: %%%s; not found\n", name
);
6060 * Internal checking in case the entity quest barfed
6062 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
6063 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
6064 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
6065 ctxt
->sax
->warning(ctxt
->userData
,
6066 "Internal: %%%s; is not a parameter entity\n", name
);
6067 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
6068 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
6069 xmlPushInput(ctxt
, input
);
6073 * handle the extra spaces added before and after
6074 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6076 input
= xmlNewEntityInputStream(ctxt
, entity
);
6077 xmlPushInput(ctxt
, input
);
6078 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
6079 (RAW
== '<') && (NXT(1) == '?') &&
6080 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6081 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6082 xmlParseTextDecl(ctxt
);
6083 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6085 * The XML REC instructs us to stop parsing
6088 ctxt
->instate
= XML_PARSER_EOF
;
6095 ctxt
->hasPErefs
= 1;
6097 ctxt
->errNo
= XML_ERR_ENTITYREF_SEMICOL_MISSING
;
6098 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6099 ctxt
->sax
->error(ctxt
->userData
,
6100 "xmlParsePEReference: expecting ';'\n");
6101 ctxt
->wellFormed
= 0;
6102 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6110 * xmlParseStringPEReference:
6111 * @ctxt: an XML parser context
6112 * @str: a pointer to an index in the string
6114 * parse PEReference declarations
6116 * [69] PEReference ::= '%' Name ';'
6118 * [ WFC: No Recursion ]
6119 * A parsed entity must not contain a recursive
6120 * reference to itself, either directly or indirectly.
6122 * [ WFC: Entity Declared ]
6123 * In a document without any DTD, a document with only an internal DTD
6124 * subset which contains no parameter entity references, or a document
6125 * with "standalone='yes'", ... ... The declaration of a parameter
6126 * entity must precede any reference to it...
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external parameter entities
6130 * with "standalone='no'", ... ... The declaration of a parameter entity
6131 * must precede any reference to it...
6134 * Parameter-entity references may only appear in the DTD.
6135 * NOTE: misleading but this is handled.
6137 * Returns the string of the entity content.
6138 * str is updated to the current value of the index
6141 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
6145 xmlEntityPtr entity
= NULL
;
6147 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
6153 name
= xmlParseStringName(ctxt
, &ptr
);
6155 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
6156 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6157 ctxt
->sax
->error(ctxt
->userData
,
6158 "xmlParseStringPEReference: no name\n");
6159 ctxt
->wellFormed
= 0;
6160 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6166 if ((ctxt
->sax
!= NULL
) &&
6167 (ctxt
->sax
->getParameterEntity
!= NULL
))
6168 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
,
6170 if (entity
== NULL
) {
6172 * [ WFC: Entity Declared ]
6173 * In a document without any DTD, a document with only an
6174 * internal DTD subset which contains no parameter entity
6175 * references, or a document with "standalone='yes'", ...
6176 * ... The declaration of a parameter entity must precede
6177 * any reference to it...
6179 if ((ctxt
->standalone
== 1) ||
6180 ((ctxt
->hasExternalSubset
== 0) &&
6181 (ctxt
->hasPErefs
== 0))) {
6182 ctxt
->errNo
= XML_ERR_UNDECLARED_ENTITY
;
6183 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6184 ctxt
->sax
->error(ctxt
->userData
,
6185 "PEReference: %%%s; not found\n", name
);
6186 ctxt
->wellFormed
= 0;
6187 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6190 * [ VC: Entity Declared ]
6191 * In a document with an external subset or external
6192 * parameter entities with "standalone='no'", ...
6193 * ... The declaration of a parameter entity must
6194 * precede any reference to it...
6196 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
6197 ctxt
->sax
->warning(ctxt
->userData
,
6198 "PEReference: %%%s; not found\n", name
);
6203 * Internal checking in case the entity quest barfed
6205 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
6206 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
6207 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
6208 ctxt
->sax
->warning(ctxt
->userData
,
6209 "Internal: %%%s; is not a parameter entity\n", name
);
6212 ctxt
->hasPErefs
= 1;
6214 ctxt
->errNo
= XML_ERR_ENTITYREF_SEMICOL_MISSING
;
6215 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6216 ctxt
->sax
->error(ctxt
->userData
,
6217 "xmlParseStringPEReference: expecting ';'\n");
6218 ctxt
->wellFormed
= 0;
6219 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6229 * xmlParseDocTypeDecl:
6230 * @ctxt: an XML parser context
6232 * parse a DOCTYPE declaration
6234 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6235 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6237 * [ VC: Root Element Type ]
6238 * The Name in the document type declaration must match the element
6239 * type of the root element.
6243 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
6244 xmlChar
*name
= NULL
;
6245 xmlChar
*ExternalID
= NULL
;
6246 xmlChar
*URI
= NULL
;
6249 * We know that '<!DOCTYPE' has been detected.
6256 * Parse the DOCTYPE name.
6258 name
= xmlParseName(ctxt
);
6260 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
6261 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6262 ctxt
->sax
->error(ctxt
->userData
,
6263 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6264 ctxt
->wellFormed
= 0;
6265 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6267 ctxt
->intSubName
= name
;
6272 * Check for SystemID and ExternalID
6274 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
6276 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
6277 ctxt
->hasExternalSubset
= 1;
6279 ctxt
->extSubURI
= URI
;
6280 ctxt
->extSubSystem
= ExternalID
;
6285 * Create and update the internal subset.
6287 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
6288 (!ctxt
->disableSAX
))
6289 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
6292 * Is there any internal subset declarations ?
6293 * they are handled separately in xmlParseInternalSubset()
6299 * We should be at the end of the DOCTYPE declaration.
6302 ctxt
->errNo
= XML_ERR_DOCTYPE_NOT_FINISHED
;
6303 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6304 ctxt
->sax
->error(ctxt
->userData
, "DOCTYPE improperly terminated\n");
6305 ctxt
->wellFormed
= 0;
6306 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6312 * xmlParseInternalSubset:
6313 * @ctxt: an XML parser context
6315 * parse the internal subset declaration
6317 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6321 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
6323 * Is there any DTD definition ?
6326 ctxt
->instate
= XML_PARSER_DTD
;
6329 * Parse the succession of Markup declarations and
6331 * Subsequence (markupdecl | PEReference | S)*
6333 while (RAW
!= ']') {
6334 const xmlChar
*check
= CUR_PTR
;
6335 int cons
= ctxt
->input
->consumed
;
6338 xmlParseMarkupDecl(ctxt
);
6339 xmlParsePEReference(ctxt
);
6342 * Pop-up of finished entities.
6344 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6347 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6348 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
6349 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6350 ctxt
->sax
->error(ctxt
->userData
,
6351 "xmlParseInternalSubset: error detected in Markup declaration\n");
6352 ctxt
->wellFormed
= 0;
6353 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6364 * We should be at the end of the DOCTYPE declaration.
6367 ctxt
->errNo
= XML_ERR_DOCTYPE_NOT_FINISHED
;
6368 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6369 ctxt
->sax
->error(ctxt
->userData
, "DOCTYPE improperly terminated\n");
6370 ctxt
->wellFormed
= 0;
6371 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6377 * xmlParseAttribute:
6378 * @ctxt: an XML parser context
6379 * @value: a xmlChar ** used to store the value of the attribute
6381 * parse an attribute
6383 * [41] Attribute ::= Name Eq AttValue
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect entity references
6387 * to external entities.
6389 * [ WFC: No < in Attribute Values ]
6390 * The replacement text of any entity referred to directly or indirectly in
6391 * an attribute value (other than "<") must not contain a <.
6393 * [ VC: Attribute Value Type ]
6394 * The attribute must have been declared; the value must be of the type
6397 * [25] Eq ::= S? '=' S?
6401 * [NS 11] Attribute ::= QName Eq AttValue
6403 * Also the case QName == xmlns:??? is handled independently as a namespace
6406 * Returns the attribute name, and the value in *value.
6410 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
6411 xmlChar
*name
, *val
;
6415 name
= xmlParseName(ctxt
);
6417 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
6418 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6419 ctxt
->sax
->error(ctxt
->userData
, "error parsing attribute name\n");
6420 ctxt
->wellFormed
= 0;
6421 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6432 val
= xmlParseAttValue(ctxt
);
6433 ctxt
->instate
= XML_PARSER_CONTENT
;
6435 ctxt
->errNo
= XML_ERR_ATTRIBUTE_WITHOUT_VALUE
;
6436 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6437 ctxt
->sax
->error(ctxt
->userData
,
6438 "Specification mandate value for attribute %s\n", name
);
6439 ctxt
->wellFormed
= 0;
6440 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6446 * Check that xml:lang conforms to the specification
6447 * No more registered as an error, just generate a warning now
6448 * since this was deprecated in XML second edition
6450 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
6451 if (!xmlCheckLanguageID(val
)) {
6452 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
6453 ctxt
->sax
->warning(ctxt
->userData
,
6454 "Malformed value for xml:lang : %s\n", val
);
6459 * Check that xml:space conforms to the specification
6461 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
6462 if (xmlStrEqual(val
, BAD_CAST
"default"))
6464 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
6467 ctxt
->errNo
= XML_ERR_ATTRIBUTE_WITHOUT_VALUE
;
6468 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6469 ctxt
->sax
->error(ctxt
->userData
,
6470 "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6472 ctxt
->wellFormed
= 0;
6473 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6483 * @ctxt: an XML parser context
6485 * parse a start of tag either for rule element or
6486 * EmptyElement. In both case we don't parse the tag closing chars.
6488 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6490 * [ WFC: Unique Att Spec ]
6491 * No attribute name may appear more than once in the same start-tag or
6492 * empty-element tag.
6494 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6496 * [ WFC: Unique Att Spec ]
6497 * No attribute name may appear more than once in the same start-tag or
6498 * empty-element tag.
6502 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6504 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6506 * Returns the element name parsed
6510 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
6514 const xmlChar
**atts
= NULL
;
6519 if (RAW
!= '<') return(NULL
);
6522 name
= xmlParseName(ctxt
);
6524 ctxt
->errNo
= XML_ERR_NAME_REQUIRED
;
6525 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6526 ctxt
->sax
->error(ctxt
->userData
,
6527 "xmlParseStartTag: invalid element name\n");
6528 ctxt
->wellFormed
= 0;
6529 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6534 * Now parse the attributes, it ends up with the ending
6541 while ((RAW
!= '>') &&
6542 ((RAW
!= '/') || (NXT(1) != '>')) &&
6544 const xmlChar
*q
= CUR_PTR
;
6545 int cons
= ctxt
->input
->consumed
;
6547 attname
= xmlParseAttribute(ctxt
, &attvalue
);
6548 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
6550 * [ WFC: Unique Att Spec ]
6551 * No attribute name may appear more than once in the same
6552 * start-tag or empty-element tag.
6554 for (i
= 0; i
< nbatts
;i
+= 2) {
6555 if (xmlStrEqual(atts
[i
], attname
)) {
6556 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
6557 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6558 ctxt
->sax
->error(ctxt
->userData
,
6559 "Attribute %s redefined\n",
6561 ctxt
->wellFormed
= 0;
6562 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6570 * Add the pair to atts
6574 atts
= (const xmlChar
**) xmlMalloc(maxatts
* sizeof(xmlChar
*));
6576 xmlGenericError(xmlGenericErrorContext
,
6577 "malloc of %ld byte failed\n",
6578 maxatts
* (long)sizeof(xmlChar
*));
6581 } else if (nbatts
+ 4 > maxatts
) {
6583 atts
= (const xmlChar
**) xmlRealloc((void *) atts
,
6584 maxatts
* sizeof(xmlChar
*));
6586 xmlGenericError(xmlGenericErrorContext
,
6587 "realloc of %ld byte failed\n",
6588 maxatts
* (long)sizeof(xmlChar
*));
6592 atts
[nbatts
++] = attname
;
6593 atts
[nbatts
++] = attvalue
;
6594 atts
[nbatts
] = NULL
;
6595 atts
[nbatts
+ 1] = NULL
;
6597 if (attname
!= NULL
)
6599 if (attvalue
!= NULL
)
6605 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
6607 if (!IS_BLANK(RAW
)) {
6608 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
6609 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6610 ctxt
->sax
->error(ctxt
->userData
,
6611 "attributes construct error\n");
6612 ctxt
->wellFormed
= 0;
6613 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6616 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
)) {
6617 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
6618 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6619 ctxt
->sax
->error(ctxt
->userData
,
6620 "xmlParseStartTag: problem parsing attributes\n");
6621 ctxt
->wellFormed
= 0;
6622 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6629 * SAX: Start of Element !
6631 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
6632 (!ctxt
->disableSAX
))
6633 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
6636 for (i
= 0;i
< nbatts
;i
++) xmlFree((xmlChar
*) atts
[i
]);
6637 xmlFree((void *) atts
);
6644 * @ctxt: an XML parser context
6646 * parse an end of tag
6648 * [42] ETag ::= '</' Name S? '>'
6652 * [NS 9] ETag ::= '</' QName S? '>'
6656 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
6661 if ((RAW
!= '<') || (NXT(1) != '/')) {
6662 ctxt
->errNo
= XML_ERR_LTSLASH_REQUIRED
;
6663 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6664 ctxt
->sax
->error(ctxt
->userData
, "xmlParseEndTag: '</' not found\n");
6665 ctxt
->wellFormed
= 0;
6666 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6671 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
6674 * We should definitely be at the ending "S? '>'" part
6678 if ((!IS_CHAR(RAW
)) || (RAW
!= '>')) {
6679 ctxt
->errNo
= XML_ERR_GT_REQUIRED
;
6680 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6681 ctxt
->sax
->error(ctxt
->userData
, "End tag : expected '>'\n");
6682 ctxt
->wellFormed
= 0;
6683 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6688 * [ WFC: Element Type Match ]
6689 * The Name in an element's end-tag must match the element type in the
6693 if (name
!= (xmlChar
*)1) {
6694 ctxt
->errNo
= XML_ERR_TAG_NAME_MISMATCH
;
6695 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
)) {
6697 ctxt
->sax
->error(ctxt
->userData
,
6698 "Opening and ending tag mismatch: %s and %s\n",
6701 ctxt
->sax
->error(ctxt
->userData
,
6702 "Ending tag error for: %s\n", ctxt
->name
);
6706 ctxt
->wellFormed
= 0;
6707 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6711 * Recover in case of one missing close
6713 if ((ctxt
->nameNr
> 2) &&
6714 (xmlStrEqual(ctxt
->nameTab
[ctxt
->nameNr
-2], name
))) {
6727 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
6728 (!ctxt
->disableSAX
))
6729 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
6731 oldname
= namePop(ctxt
);
6733 if (oldname
!= NULL
) {
6735 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
6744 * @ctxt: an XML parser context
6746 * Parse escaped pure raw content.
6748 * [18] CDSect ::= CDStart CData CDEnd
6750 * [19] CDStart ::= '<![CDATA['
6752 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6754 * [21] CDEnd ::= ']]>'
6757 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
6758 xmlChar
*buf
= NULL
;
6760 int size
= XML_PARSER_BUFFER_SIZE
;
6766 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6767 (NXT(2) == '[') && (NXT(3) == 'C') &&
6768 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6769 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6775 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
6778 ctxt
->errNo
= XML_ERR_CDATA_NOT_FINISHED
;
6779 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6780 ctxt
->sax
->error(ctxt
->userData
,
6781 "CData section not finished\n");
6782 ctxt
->wellFormed
= 0;
6783 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6784 ctxt
->instate
= XML_PARSER_CONTENT
;
6790 ctxt
->errNo
= XML_ERR_CDATA_NOT_FINISHED
;
6791 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6792 ctxt
->sax
->error(ctxt
->userData
,
6793 "CData section not finished\n");
6794 ctxt
->wellFormed
= 0;
6795 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6796 ctxt
->instate
= XML_PARSER_CONTENT
;
6801 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
6803 xmlGenericError(xmlGenericErrorContext
,
6804 "malloc of %d byte failed\n", size
);
6807 while (IS_CHAR(cur
) &&
6808 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
6809 if (len
+ 5 >= size
) {
6811 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
6813 xmlGenericError(xmlGenericErrorContext
,
6814 "realloc of %d byte failed\n", size
);
6818 COPY_BUF(rl
,buf
,len
,r
);
6832 ctxt
->instate
= XML_PARSER_CONTENT
;
6834 ctxt
->errNo
= XML_ERR_CDATA_NOT_FINISHED
;
6835 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6836 ctxt
->sax
->error(ctxt
->userData
,
6837 "CData section not finished\n%.50s\n", buf
);
6838 ctxt
->wellFormed
= 0;
6839 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6846 * OK the buffer is to be consumed as cdata.
6848 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
6849 if (ctxt
->sax
->cdataBlock
!= NULL
)
6850 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
6851 else if (ctxt
->sax
->characters
!= NULL
)
6852 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
6859 * @ctxt: an XML parser context
6863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6867 xmlParseContent(xmlParserCtxtPtr ctxt
) {
6869 while ((RAW
!= 0) &&
6870 ((RAW
!= '<') || (NXT(1) != '/'))) {
6871 const xmlChar
*test
= CUR_PTR
;
6872 int cons
= ctxt
->input
->consumed
;
6873 const xmlChar
*cur
= ctxt
->input
->cur
;
6876 * First case : a Processing Instruction.
6878 if ((*cur
== '<') && (cur
[1] == '?')) {
6883 * Second case : a CDSection
6885 else if ((*cur
== '<') && (NXT(1) == '!') &&
6886 (NXT(2) == '[') && (NXT(3) == 'C') &&
6887 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6888 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6890 xmlParseCDSect(ctxt
);
6894 * Third case : a comment
6896 else if ((*cur
== '<') && (NXT(1) == '!') &&
6897 (NXT(2) == '-') && (NXT(3) == '-')) {
6898 xmlParseComment(ctxt
);
6899 ctxt
->instate
= XML_PARSER_CONTENT
;
6903 * Fourth case : a sub-element.
6905 else if (*cur
== '<') {
6906 xmlParseElement(ctxt
);
6910 * Fifth case : a reference. If if has not been resolved,
6911 * parsing returns it's Name, create the node
6914 else if (*cur
== '&') {
6915 xmlParseReference(ctxt
);
6919 * Last case, text. Note that References are handled directly.
6922 xmlParseCharData(ctxt
, 0);
6927 * Pop-up of finished entities.
6929 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6933 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
6934 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
6935 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
6936 ctxt
->sax
->error(ctxt
->userData
,
6937 "detected an error in element content\n");
6938 ctxt
->wellFormed
= 0;
6939 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6940 ctxt
->instate
= XML_PARSER_EOF
;
6948 * @ctxt: an XML parser context
6950 * parse an XML element, this is highly recursive
6952 * [39] element ::= EmptyElemTag | STag content ETag
6954 * [ WFC: Element Type Match ]
6955 * The Name in an element's end-tag must match the element type in the
6958 * [ VC: Element Valid ]
6959 * An element is valid if there is a declaration matching elementdecl
6960 * where the Name matches the element type and one of the following holds:
6961 * - The declaration matches EMPTY and the element has no content.
6962 * - The declaration matches children and the sequence of child elements
6963 * belongs to the language generated by the regular expression in the
6964 * content model, with optional white space (characters matching the
6965 * nonterminal S) between each pair of child elements.
6966 * - The declaration matches Mixed and the content consists of character
6967 * data and child elements whose types match names in the content model.
6968 * - The declaration matches ANY, and the types of any child elements have
6973 xmlParseElement(xmlParserCtxtPtr ctxt
) {
6976 xmlParserNodeInfo node_info
;
6979 /* Capture start position */
6980 if (ctxt
->record_info
) {
6981 node_info
.begin_pos
= ctxt
->input
->consumed
+
6982 (CUR_PTR
- ctxt
->input
->base
);
6983 node_info
.begin_line
= ctxt
->input
->line
;
6986 if (ctxt
->spaceNr
== 0)
6987 spacePush(ctxt
, -1);
6989 spacePush(ctxt
, *ctxt
->space
);
6991 name
= xmlParseStartTag(ctxt
);
6996 namePush(ctxt
, name
);
7000 * [ VC: Root Element Type ]
7001 * The Name in the document type declaration must match the element
7002 * type of the root element.
7004 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
7005 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
7006 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
7009 * Check for an Empty Element.
7011 if ((RAW
== '/') && (NXT(1) == '>')) {
7013 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
7014 (!ctxt
->disableSAX
))
7015 ctxt
->sax
->endElement(ctxt
->userData
, name
);
7016 oldname
= namePop(ctxt
);
7018 if (oldname
!= NULL
) {
7020 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
7024 if ( ret
!= NULL
&& ctxt
->record_info
) {
7025 node_info
.end_pos
= ctxt
->input
->consumed
+
7026 (CUR_PTR
- ctxt
->input
->base
);
7027 node_info
.end_line
= ctxt
->input
->line
;
7028 node_info
.node
= ret
;
7029 xmlParserAddNodeInfo(ctxt
, &node_info
);
7036 ctxt
->errNo
= XML_ERR_GT_REQUIRED
;
7037 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7038 ctxt
->sax
->error(ctxt
->userData
,
7039 "Couldn't find end of Start Tag %s\n",
7041 ctxt
->wellFormed
= 0;
7042 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7045 * end of parsing of this node.
7048 oldname
= namePop(ctxt
);
7050 if (oldname
!= NULL
) {
7052 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
7058 * Capture end position and add node
7060 if ( ret
!= NULL
&& ctxt
->record_info
) {
7061 node_info
.end_pos
= ctxt
->input
->consumed
+
7062 (CUR_PTR
- ctxt
->input
->base
);
7063 node_info
.end_line
= ctxt
->input
->line
;
7064 node_info
.node
= ret
;
7065 xmlParserAddNodeInfo(ctxt
, &node_info
);
7071 * Parse the content of the element:
7073 xmlParseContent(ctxt
);
7074 if (!IS_CHAR(RAW
)) {
7075 ctxt
->errNo
= XML_ERR_TAG_NOT_FINISHED
;
7076 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7077 ctxt
->sax
->error(ctxt
->userData
,
7078 "Premature end of data in tag %s\n", name
);
7079 ctxt
->wellFormed
= 0;
7080 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7083 * end of parsing of this node.
7086 oldname
= namePop(ctxt
);
7088 if (oldname
!= NULL
) {
7090 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
7098 * parse the end of tag: '</' should be here.
7100 xmlParseEndTag(ctxt
);
7103 * Capture end position and add node
7105 if ( ret
!= NULL
&& ctxt
->record_info
) {
7106 node_info
.end_pos
= ctxt
->input
->consumed
+
7107 (CUR_PTR
- ctxt
->input
->base
);
7108 node_info
.end_line
= ctxt
->input
->line
;
7109 node_info
.node
= ret
;
7110 xmlParserAddNodeInfo(ctxt
, &node_info
);
7115 * xmlParseVersionNum:
7116 * @ctxt: an XML parser context
7118 * parse the XML version value.
7120 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7122 * Returns the string giving the XML version number, or NULL
7125 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
7126 xmlChar
*buf
= NULL
;
7131 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
7133 xmlGenericError(xmlGenericErrorContext
,
7134 "malloc of %d byte failed\n", size
);
7138 while (((cur
>= 'a') && (cur
<= 'z')) ||
7139 ((cur
>= 'A') && (cur
<= 'Z')) ||
7140 ((cur
>= '0') && (cur
<= '9')) ||
7141 (cur
== '_') || (cur
== '.') ||
7142 (cur
== ':') || (cur
== '-')) {
7143 if (len
+ 1 >= size
) {
7145 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
7147 xmlGenericError(xmlGenericErrorContext
,
7148 "realloc of %d byte failed\n", size
);
7161 * xmlParseVersionInfo:
7162 * @ctxt: an XML parser context
7164 * parse the XML version.
7166 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7168 * [25] Eq ::= S? '=' S?
7170 * Returns the version string, e.g. "1.0"
7174 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
7175 xmlChar
*version
= NULL
;
7178 if ((RAW
== 'v') && (NXT(1) == 'e') &&
7179 (NXT(2) == 'r') && (NXT(3) == 's') &&
7180 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7185 ctxt
->errNo
= XML_ERR_EQUAL_REQUIRED
;
7186 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7187 ctxt
->sax
->error(ctxt
->userData
,
7188 "xmlParseVersionInfo : expected '='\n");
7189 ctxt
->wellFormed
= 0;
7190 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7198 version
= xmlParseVersionNum(ctxt
);
7200 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7201 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7202 ctxt
->sax
->error(ctxt
->userData
,
7203 "String not closed\n%.50s\n", q
);
7204 ctxt
->wellFormed
= 0;
7205 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7208 } else if (RAW
== '\''){
7211 version
= xmlParseVersionNum(ctxt
);
7213 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7214 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7215 ctxt
->sax
->error(ctxt
->userData
,
7216 "String not closed\n%.50s\n", q
);
7217 ctxt
->wellFormed
= 0;
7218 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7222 ctxt
->errNo
= XML_ERR_STRING_NOT_STARTED
;
7223 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7224 ctxt
->sax
->error(ctxt
->userData
,
7225 "xmlParseVersionInfo : expected ' or \"\n");
7226 ctxt
->wellFormed
= 0;
7227 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7235 * @ctxt: an XML parser context
7237 * parse the XML encoding name
7239 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7241 * Returns the encoding name value or NULL
7244 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
7245 xmlChar
*buf
= NULL
;
7251 if (((cur
>= 'a') && (cur
<= 'z')) ||
7252 ((cur
>= 'A') && (cur
<= 'Z'))) {
7253 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
7255 xmlGenericError(xmlGenericErrorContext
,
7256 "malloc of %d byte failed\n", size
);
7263 while (((cur
>= 'a') && (cur
<= 'z')) ||
7264 ((cur
>= 'A') && (cur
<= 'Z')) ||
7265 ((cur
>= '0') && (cur
<= '9')) ||
7266 (cur
== '.') || (cur
== '_') ||
7268 if (len
+ 1 >= size
) {
7270 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
7272 xmlGenericError(xmlGenericErrorContext
,
7273 "realloc of %d byte failed\n", size
);
7288 ctxt
->errNo
= XML_ERR_ENCODING_NAME
;
7289 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7290 ctxt
->sax
->error(ctxt
->userData
, "Invalid XML encoding name\n");
7291 ctxt
->wellFormed
= 0;
7292 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7298 * xmlParseEncodingDecl:
7299 * @ctxt: an XML parser context
7301 * parse the XML encoding declaration
7303 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7305 * this setups the conversion filters.
7307 * Returns the encoding value or NULL
7311 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
7312 xmlChar
*encoding
= NULL
;
7316 if ((RAW
== 'e') && (NXT(1) == 'n') &&
7317 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7318 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7319 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7323 ctxt
->errNo
= XML_ERR_EQUAL_REQUIRED
;
7324 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7325 ctxt
->sax
->error(ctxt
->userData
,
7326 "xmlParseEncodingDecl : expected '='\n");
7327 ctxt
->wellFormed
= 0;
7328 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7336 encoding
= xmlParseEncName(ctxt
);
7338 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7339 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7340 ctxt
->sax
->error(ctxt
->userData
,
7341 "String not closed\n%.50s\n", q
);
7342 ctxt
->wellFormed
= 0;
7343 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7346 } else if (RAW
== '\''){
7349 encoding
= xmlParseEncName(ctxt
);
7351 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7352 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7353 ctxt
->sax
->error(ctxt
->userData
,
7354 "String not closed\n%.50s\n", q
);
7355 ctxt
->wellFormed
= 0;
7356 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7360 ctxt
->errNo
= XML_ERR_STRING_NOT_STARTED
;
7361 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7362 ctxt
->sax
->error(ctxt
->userData
,
7363 "xmlParseEncodingDecl : expected ' or \"\n");
7364 ctxt
->wellFormed
= 0;
7365 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7367 if (encoding
!= NULL
) {
7368 xmlCharEncoding enc
;
7369 xmlCharEncodingHandlerPtr handler
;
7371 if (ctxt
->input
->encoding
!= NULL
)
7372 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
7373 ctxt
->input
->encoding
= encoding
;
7375 enc
= xmlParseCharEncoding((const char *) encoding
);
7377 * registered set of known encodings
7379 if (enc
!= XML_CHAR_ENCODING_ERROR
) {
7380 xmlSwitchEncoding(ctxt
, enc
);
7381 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7382 ctxt
->input
->encoding
= NULL
;
7388 * fallback for unknown encodings
7390 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
7391 if (handler
!= NULL
) {
7392 xmlSwitchToEncoding(ctxt
, handler
);
7394 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
7395 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7396 ctxt
->sax
->error(ctxt
->userData
,
7397 "Unsupported encoding %s\n", encoding
);
7408 * @ctxt: an XML parser context
7410 * parse the XML standalone declaration
7412 * [32] SDDecl ::= S 'standalone' Eq
7413 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7415 * [ VC: Standalone Document Declaration ]
7416 * TODO The standalone document declaration must have the value "no"
7417 * if any external markup declarations contain declarations of:
7418 * - attributes with default values, if elements to which these
7419 * attributes apply appear in the document without specifications
7420 * of values for these attributes, or
7421 * - entities (other than amp, lt, gt, apos, quot), if references
7422 * to those entities appear in the document, or
7423 * - attributes with values subject to normalization, where the
7424 * attribute appears in the document with a value which will change
7425 * as a result of normalization, or
7426 * - element types with element content, if white space occurs directly
7427 * within any instance of those types.
7429 * Returns 1 if standalone, 0 otherwise
7433 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
7434 int standalone
= -1;
7437 if ((RAW
== 's') && (NXT(1) == 't') &&
7438 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7439 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7440 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7441 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7445 ctxt
->errNo
= XML_ERR_EQUAL_REQUIRED
;
7446 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7447 ctxt
->sax
->error(ctxt
->userData
,
7448 "XML standalone declaration : expected '='\n");
7449 ctxt
->wellFormed
= 0;
7450 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7457 if ((RAW
== 'n') && (NXT(1) == 'o')) {
7460 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
7465 ctxt
->errNo
= XML_ERR_STANDALONE_VALUE
;
7466 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7467 ctxt
->sax
->error(ctxt
->userData
,
7468 "standalone accepts only 'yes' or 'no'\n");
7469 ctxt
->wellFormed
= 0;
7470 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7473 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7474 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7475 ctxt
->sax
->error(ctxt
->userData
, "String not closed\n");
7476 ctxt
->wellFormed
= 0;
7477 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7480 } else if (RAW
== '"'){
7482 if ((RAW
== 'n') && (NXT(1) == 'o')) {
7485 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
7490 ctxt
->errNo
= XML_ERR_STANDALONE_VALUE
;
7491 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7492 ctxt
->sax
->error(ctxt
->userData
,
7493 "standalone accepts only 'yes' or 'no'\n");
7494 ctxt
->wellFormed
= 0;
7495 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7498 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
7499 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7500 ctxt
->sax
->error(ctxt
->userData
, "String not closed\n");
7501 ctxt
->wellFormed
= 0;
7502 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7506 ctxt
->errNo
= XML_ERR_STRING_NOT_STARTED
;
7507 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7508 ctxt
->sax
->error(ctxt
->userData
,
7509 "Standalone value not found\n");
7510 ctxt
->wellFormed
= 0;
7511 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7519 * @ctxt: an XML parser context
7521 * parse an XML declaration header
7523 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7527 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
7531 * We know that '<?xml' is here.
7535 if (!IS_BLANK(RAW
)) {
7536 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
7537 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7538 ctxt
->sax
->error(ctxt
->userData
, "Blank needed after '<?xml'\n");
7539 ctxt
->wellFormed
= 0;
7540 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7545 * We must have the VersionInfo here.
7547 version
= xmlParseVersionInfo(ctxt
);
7548 if (version
== NULL
) {
7549 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7550 ctxt
->sax
->error(ctxt
->userData
,
7551 "Malformed declaration expecting version\n");
7552 ctxt
->wellFormed
= 0;
7553 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7555 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
7557 * TODO: Blueberry should be detected here
7559 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->warning
!= NULL
))
7560 ctxt
->sax
->warning(ctxt
->userData
, "Unsupported version '%s'\n",
7563 if (ctxt
->version
!= NULL
)
7564 xmlFree((void *) ctxt
->version
);
7565 ctxt
->version
= version
;
7569 * We may have the encoding declaration
7571 if (!IS_BLANK(RAW
)) {
7572 if ((RAW
== '?') && (NXT(1) == '>')) {
7576 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
7577 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7578 ctxt
->sax
->error(ctxt
->userData
, "Blank needed here\n");
7579 ctxt
->wellFormed
= 0;
7580 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7582 xmlParseEncodingDecl(ctxt
);
7583 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7585 * The XML REC instructs us to stop parsing right here
7591 * We may have the standalone status.
7593 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK(RAW
))) {
7594 if ((RAW
== '?') && (NXT(1) == '>')) {
7598 ctxt
->errNo
= XML_ERR_SPACE_REQUIRED
;
7599 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7600 ctxt
->sax
->error(ctxt
->userData
, "Blank needed here\n");
7601 ctxt
->wellFormed
= 0;
7602 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7605 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
7608 if ((RAW
== '?') && (NXT(1) == '>')) {
7610 } else if (RAW
== '>') {
7611 /* Deprecated old WD ... */
7612 ctxt
->errNo
= XML_ERR_XMLDECL_NOT_FINISHED
;
7613 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7614 ctxt
->sax
->error(ctxt
->userData
,
7615 "XML declaration must end-up with '?>'\n");
7616 ctxt
->wellFormed
= 0;
7617 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7620 ctxt
->errNo
= XML_ERR_XMLDECL_NOT_FINISHED
;
7621 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7622 ctxt
->sax
->error(ctxt
->userData
,
7623 "parsing XML declaration: '?>' expected\n");
7624 ctxt
->wellFormed
= 0;
7625 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7626 MOVETO_ENDTAG(CUR_PTR
);
7633 * @ctxt: an XML parser context
7635 * parse an XML Misc* optional field.
7637 * [27] Misc ::= Comment | PI | S
7641 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
7642 while (((RAW
== '<') && (NXT(1) == '?')) ||
7643 ((RAW
== '<') && (NXT(1) == '!') &&
7644 (NXT(2) == '-') && (NXT(3) == '-')) ||
7646 if ((RAW
== '<') && (NXT(1) == '?')) {
7648 } else if (IS_BLANK(CUR
)) {
7651 xmlParseComment(ctxt
);
7657 * @ctxt: an XML parser context
7659 * parse an XML document (and build a tree if using the standard SAX
7662 * [1] document ::= prolog element Misc*
7664 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7666 * Returns 0, -1 in case of error. the parser context is augmented
7667 * as a result of the parsing.
7671 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
7673 xmlCharEncoding enc
;
7680 * SAX: beginning of the document processing.
7682 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
7683 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
7685 if (ctxt
->encoding
== (const xmlChar
*)XML_CHAR_ENCODING_NONE
) {
7687 * Get the 4 first bytes and decode the charset
7688 * if enc != XML_CHAR_ENCODING_NONE
7689 * plug some encoding conversion routines.
7695 enc
= xmlDetectCharEncoding(start
, 4);
7696 if (enc
!= XML_CHAR_ENCODING_NONE
) {
7697 xmlSwitchEncoding(ctxt
, enc
);
7703 ctxt
->errNo
= XML_ERR_DOCUMENT_EMPTY
;
7704 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7705 ctxt
->sax
->error(ctxt
->userData
, "Document is empty\n");
7706 ctxt
->wellFormed
= 0;
7707 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7711 * Check for the XMLDecl in the Prolog.
7714 if ((RAW
== '<') && (NXT(1) == '?') &&
7715 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7716 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7719 * Note that we will switch encoding on the fly.
7721 xmlParseXMLDecl(ctxt
);
7722 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7724 * The XML REC instructs us to stop parsing right here
7728 ctxt
->standalone
= ctxt
->input
->standalone
;
7731 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
7733 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
7734 ctxt
->sax
->startDocument(ctxt
->userData
);
7737 * The Misc part of the Prolog
7743 * Then possibly doc type declaration(s) and more Misc
7744 * (doctypedecl Misc*)?
7747 if ((RAW
== '<') && (NXT(1) == '!') &&
7748 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7749 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7750 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7754 xmlParseDocTypeDecl(ctxt
);
7756 ctxt
->instate
= XML_PARSER_DTD
;
7757 xmlParseInternalSubset(ctxt
);
7761 * Create and update the external subset.
7764 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
7765 (!ctxt
->disableSAX
))
7766 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
7767 ctxt
->extSubSystem
, ctxt
->extSubURI
);
7771 ctxt
->instate
= XML_PARSER_PROLOG
;
7776 * Time to start parsing the tree itself
7780 ctxt
->errNo
= XML_ERR_DOCUMENT_EMPTY
;
7781 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7782 ctxt
->sax
->error(ctxt
->userData
,
7783 "Start tag expected, '<' not found\n");
7784 ctxt
->wellFormed
= 0;
7785 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7786 ctxt
->instate
= XML_PARSER_EOF
;
7788 ctxt
->instate
= XML_PARSER_CONTENT
;
7789 xmlParseElement(ctxt
);
7790 ctxt
->instate
= XML_PARSER_EPILOG
;
7794 * The Misc part at the end
7799 ctxt
->errNo
= XML_ERR_DOCUMENT_END
;
7800 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7801 ctxt
->sax
->error(ctxt
->userData
,
7802 "Extra content at the end of the document\n");
7803 ctxt
->wellFormed
= 0;
7804 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7806 ctxt
->instate
= XML_PARSER_EOF
;
7810 * SAX: end of the document processing.
7812 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
7813 ctxt
->sax
->endDocument(ctxt
->userData
);
7816 * Remove locally kept entity definitions if the tree was not built
7818 if ((ctxt
->myDoc
!= NULL
) &&
7819 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
7820 xmlFreeDoc(ctxt
->myDoc
);
7824 if (! ctxt
->wellFormed
) {
7832 * xmlParseExtParsedEnt:
7833 * @ctxt: an XML parser context
7835 * parse a general parsed entity
7836 * An external general parsed entity is well-formed if it matches the
7837 * production labeled extParsedEnt.
7839 * [78] extParsedEnt ::= TextDecl? content
7841 * Returns 0, -1 in case of error. the parser context is augmented
7842 * as a result of the parsing.
7846 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
7848 xmlCharEncoding enc
;
7850 xmlDefaultSAXHandlerInit();
7855 * SAX: beginning of the document processing.
7857 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
7858 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
7861 * Get the 4 first bytes and decode the charset
7862 * if enc != XML_CHAR_ENCODING_NONE
7863 * plug some encoding conversion routines.
7869 enc
= xmlDetectCharEncoding(start
, 4);
7870 if (enc
!= XML_CHAR_ENCODING_NONE
) {
7871 xmlSwitchEncoding(ctxt
, enc
);
7876 ctxt
->errNo
= XML_ERR_DOCUMENT_EMPTY
;
7877 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7878 ctxt
->sax
->error(ctxt
->userData
, "Document is empty\n");
7879 ctxt
->wellFormed
= 0;
7880 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7884 * Check for the XMLDecl in the Prolog.
7887 if ((RAW
== '<') && (NXT(1) == '?') &&
7888 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7889 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7892 * Note that we will switch encoding on the fly.
7894 xmlParseXMLDecl(ctxt
);
7895 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7897 * The XML REC instructs us to stop parsing right here
7903 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
7905 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
7906 ctxt
->sax
->startDocument(ctxt
->userData
);
7909 * Doing validity checking on chunk doesn't make sense
7911 ctxt
->instate
= XML_PARSER_CONTENT
;
7913 ctxt
->loadsubset
= 0;
7916 xmlParseContent(ctxt
);
7918 if ((RAW
== '<') && (NXT(1) == '/')) {
7919 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
7920 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7921 ctxt
->sax
->error(ctxt
->userData
,
7922 "chunk is not well balanced\n");
7923 ctxt
->wellFormed
= 0;
7924 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7925 } else if (RAW
!= 0) {
7926 ctxt
->errNo
= XML_ERR_EXTRA_CONTENT
;
7927 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
7928 ctxt
->sax
->error(ctxt
->userData
,
7929 "extra content at the end of well balanced chunk\n");
7930 ctxt
->wellFormed
= 0;
7931 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
7935 * SAX: end of the document processing.
7937 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
7938 ctxt
->sax
->endDocument(ctxt
->userData
);
7940 if (! ctxt
->wellFormed
) return(-1);
7944 /************************************************************************
7946 * Progressive parsing interfaces *
7948 ************************************************************************/
7951 * xmlParseLookupSequence:
7952 * @ctxt: an XML parser context
7953 * @first: the first char to lookup
7954 * @next: the next char to lookup or zero
7955 * @third: the next char to lookup or zero
7957 * Try to find if a sequence (first, next, third) or just (first next) or
7958 * (first) is available in the input stream.
7959 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7960 * to avoid rescanning sequences of bytes, it DOES change the state of the
7961 * parser, do not use liberally.
7963 * Returns the index to the current parsing point if the full sequence
7964 * is available, -1 otherwise.
7967 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
7968 xmlChar next
, xmlChar third
) {
7970 xmlParserInputPtr in
;
7974 if (in
== NULL
) return(-1);
7975 base
= in
->cur
- in
->base
;
7976 if (base
< 0) return(-1);
7977 if (ctxt
->checkIndex
> base
)
7978 base
= ctxt
->checkIndex
;
7979 if (in
->buf
== NULL
) {
7983 buf
= in
->buf
->buffer
->content
;
7984 len
= in
->buf
->buffer
->use
;
7986 /* take into account the sequence length */
7987 if (third
) len
-= 2;
7988 else if (next
) len
--;
7989 for (;base
< len
;base
++) {
7990 if (buf
[base
] == first
) {
7992 if ((buf
[base
+ 1] != next
) ||
7993 (buf
[base
+ 2] != third
)) continue;
7994 } else if (next
!= 0) {
7995 if (buf
[base
+ 1] != next
) continue;
7997 ctxt
->checkIndex
= 0;
8000 xmlGenericError(xmlGenericErrorContext
,
8001 "PP: lookup '%c' found at %d\n",
8003 else if (third
== 0)
8004 xmlGenericError(xmlGenericErrorContext
,
8005 "PP: lookup '%c%c' found at %d\n",
8008 xmlGenericError(xmlGenericErrorContext
,
8009 "PP: lookup '%c%c%c' found at %d\n",
8010 first
, next
, third
, base
);
8012 return(base
- (in
->cur
- in
->base
));
8015 ctxt
->checkIndex
= base
;
8018 xmlGenericError(xmlGenericErrorContext
,
8019 "PP: lookup '%c' failed\n", first
);
8020 else if (third
== 0)
8021 xmlGenericError(xmlGenericErrorContext
,
8022 "PP: lookup '%c%c' failed\n", first
, next
);
8024 xmlGenericError(xmlGenericErrorContext
,
8025 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
8031 * xmlParseTryOrFinish:
8032 * @ctxt: an XML parser context
8033 * @terminate: last chunk indicator
8035 * Try to progress on parsing
8037 * Returns zero if no parsing was possible
8040 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
8046 switch (ctxt
->instate
) {
8047 case XML_PARSER_EOF
:
8048 xmlGenericError(xmlGenericErrorContext
,
8049 "PP: try EOF\n"); break;
8050 case XML_PARSER_START
:
8051 xmlGenericError(xmlGenericErrorContext
,
8052 "PP: try START\n"); break;
8053 case XML_PARSER_MISC
:
8054 xmlGenericError(xmlGenericErrorContext
,
8055 "PP: try MISC\n");break;
8056 case XML_PARSER_COMMENT
:
8057 xmlGenericError(xmlGenericErrorContext
,
8058 "PP: try COMMENT\n");break;
8059 case XML_PARSER_PROLOG
:
8060 xmlGenericError(xmlGenericErrorContext
,
8061 "PP: try PROLOG\n");break;
8062 case XML_PARSER_START_TAG
:
8063 xmlGenericError(xmlGenericErrorContext
,
8064 "PP: try START_TAG\n");break;
8065 case XML_PARSER_CONTENT
:
8066 xmlGenericError(xmlGenericErrorContext
,
8067 "PP: try CONTENT\n");break;
8068 case XML_PARSER_CDATA_SECTION
:
8069 xmlGenericError(xmlGenericErrorContext
,
8070 "PP: try CDATA_SECTION\n");break;
8071 case XML_PARSER_END_TAG
:
8072 xmlGenericError(xmlGenericErrorContext
,
8073 "PP: try END_TAG\n");break;
8074 case XML_PARSER_ENTITY_DECL
:
8075 xmlGenericError(xmlGenericErrorContext
,
8076 "PP: try ENTITY_DECL\n");break;
8077 case XML_PARSER_ENTITY_VALUE
:
8078 xmlGenericError(xmlGenericErrorContext
,
8079 "PP: try ENTITY_VALUE\n");break;
8080 case XML_PARSER_ATTRIBUTE_VALUE
:
8081 xmlGenericError(xmlGenericErrorContext
,
8082 "PP: try ATTRIBUTE_VALUE\n");break;
8083 case XML_PARSER_DTD
:
8084 xmlGenericError(xmlGenericErrorContext
,
8085 "PP: try DTD\n");break;
8086 case XML_PARSER_EPILOG
:
8087 xmlGenericError(xmlGenericErrorContext
,
8088 "PP: try EPILOG\n");break;
8090 xmlGenericError(xmlGenericErrorContext
,
8091 "PP: try PI\n");break;
8092 case XML_PARSER_IGNORE
:
8093 xmlGenericError(xmlGenericErrorContext
,
8094 "PP: try IGNORE\n");break;
8102 * Pop-up of finished entities.
8104 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
8107 if (ctxt
->input
==NULL
) break;
8108 if (ctxt
->input
->buf
== NULL
)
8109 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8112 * If we are operating on converted input, try to flush
8113 * remainng chars to avoid them stalling in the non-converted
8116 if ((ctxt
->input
->buf
->raw
!= NULL
) &&
8117 (ctxt
->input
->buf
->raw
->use
> 0)) {
8118 int base
= ctxt
->input
->base
-
8119 ctxt
->input
->buf
->buffer
->content
;
8120 int current
= ctxt
->input
->cur
- ctxt
->input
->base
;
8122 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
8123 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
8124 ctxt
->input
->cur
= ctxt
->input
->base
+ current
;
8126 &ctxt
->input
->buf
->buffer
->content
[
8127 ctxt
->input
->buf
->buffer
->use
];
8129 avail
= ctxt
->input
->buf
->buffer
->use
-
8130 (ctxt
->input
->cur
- ctxt
->input
->base
);
8134 switch (ctxt
->instate
) {
8135 case XML_PARSER_EOF
:
8137 * Document parsing is done !
8140 case XML_PARSER_START
:
8141 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
8143 xmlCharEncoding enc
;
8146 * Very first chars read from the document flow.
8152 * Get the 4 first bytes and decode the charset
8153 * if enc != XML_CHAR_ENCODING_NONE
8154 * plug some encoding conversion routines.
8160 enc
= xmlDetectCharEncoding(start
, 4);
8161 if (enc
!= XML_CHAR_ENCODING_NONE
) {
8162 xmlSwitchEncoding(ctxt
, enc
);
8167 cur
= ctxt
->input
->cur
[0];
8168 next
= ctxt
->input
->cur
[1];
8170 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
8171 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
8172 &xmlDefaultSAXLocator
);
8173 ctxt
->errNo
= XML_ERR_DOCUMENT_EMPTY
;
8174 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8175 ctxt
->sax
->error(ctxt
->userData
, "Document is empty\n");
8176 ctxt
->wellFormed
= 0;
8177 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8178 ctxt
->instate
= XML_PARSER_EOF
;
8180 xmlGenericError(xmlGenericErrorContext
,
8181 "PP: entering EOF\n");
8183 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
8184 ctxt
->sax
->endDocument(ctxt
->userData
);
8187 if ((cur
== '<') && (next
== '?')) {
8188 /* PI or XML decl */
8189 if (avail
< 5) return(ret
);
8191 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
8193 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
8194 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
8195 &xmlDefaultSAXLocator
);
8196 if ((ctxt
->input
->cur
[2] == 'x') &&
8197 (ctxt
->input
->cur
[3] == 'm') &&
8198 (ctxt
->input
->cur
[4] == 'l') &&
8199 (IS_BLANK(ctxt
->input
->cur
[5]))) {
8202 xmlGenericError(xmlGenericErrorContext
,
8203 "PP: Parsing XML Decl\n");
8205 xmlParseXMLDecl(ctxt
);
8206 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
8208 * The XML REC instructs us to stop parsing right
8211 ctxt
->instate
= XML_PARSER_EOF
;
8214 ctxt
->standalone
= ctxt
->input
->standalone
;
8215 if ((ctxt
->encoding
== NULL
) &&
8216 (ctxt
->input
->encoding
!= NULL
))
8217 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
8218 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
8219 (!ctxt
->disableSAX
))
8220 ctxt
->sax
->startDocument(ctxt
->userData
);
8221 ctxt
->instate
= XML_PARSER_MISC
;
8223 xmlGenericError(xmlGenericErrorContext
,
8224 "PP: entering MISC\n");
8227 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
8228 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
8229 (!ctxt
->disableSAX
))
8230 ctxt
->sax
->startDocument(ctxt
->userData
);
8231 ctxt
->instate
= XML_PARSER_MISC
;
8233 xmlGenericError(xmlGenericErrorContext
,
8234 "PP: entering MISC\n");
8238 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
8239 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
8240 &xmlDefaultSAXLocator
);
8241 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
8242 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
8243 (!ctxt
->disableSAX
))
8244 ctxt
->sax
->startDocument(ctxt
->userData
);
8245 ctxt
->instate
= XML_PARSER_MISC
;
8247 xmlGenericError(xmlGenericErrorContext
,
8248 "PP: entering MISC\n");
8252 case XML_PARSER_MISC
:
8254 if (ctxt
->input
->buf
== NULL
)
8255 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8257 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8260 cur
= ctxt
->input
->cur
[0];
8261 next
= ctxt
->input
->cur
[1];
8262 if ((cur
== '<') && (next
== '?')) {
8264 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
8267 xmlGenericError(xmlGenericErrorContext
,
8268 "PP: Parsing PI\n");
8271 } else if ((cur
== '<') && (next
== '!') &&
8272 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
8274 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
8277 xmlGenericError(xmlGenericErrorContext
,
8278 "PP: Parsing Comment\n");
8280 xmlParseComment(ctxt
);
8281 ctxt
->instate
= XML_PARSER_MISC
;
8282 } else if ((cur
== '<') && (next
== '!') &&
8283 (ctxt
->input
->cur
[2] == 'D') && (ctxt
->input
->cur
[3] == 'O') &&
8284 (ctxt
->input
->cur
[4] == 'C') && (ctxt
->input
->cur
[5] == 'T') &&
8285 (ctxt
->input
->cur
[6] == 'Y') && (ctxt
->input
->cur
[7] == 'P') &&
8286 (ctxt
->input
->cur
[8] == 'E')) {
8288 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0))
8291 xmlGenericError(xmlGenericErrorContext
,
8292 "PP: Parsing internal subset\n");
8295 xmlParseDocTypeDecl(ctxt
);
8297 ctxt
->instate
= XML_PARSER_DTD
;
8299 xmlGenericError(xmlGenericErrorContext
,
8300 "PP: entering DTD\n");
8304 * Create and update the external subset.
8307 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
8308 (ctxt
->sax
->externalSubset
!= NULL
))
8309 ctxt
->sax
->externalSubset(ctxt
->userData
,
8310 ctxt
->intSubName
, ctxt
->extSubSystem
,
8313 ctxt
->instate
= XML_PARSER_PROLOG
;
8315 xmlGenericError(xmlGenericErrorContext
,
8316 "PP: entering PROLOG\n");
8319 } else if ((cur
== '<') && (next
== '!') &&
8323 ctxt
->instate
= XML_PARSER_START_TAG
;
8325 xmlGenericError(xmlGenericErrorContext
,
8326 "PP: entering START_TAG\n");
8330 case XML_PARSER_IGNORE
:
8331 xmlGenericError(xmlGenericErrorContext
,
8332 "PP: internal error, state == IGNORE");
8333 ctxt
->instate
= XML_PARSER_DTD
;
8335 xmlGenericError(xmlGenericErrorContext
,
8336 "PP: entering DTD\n");
8339 case XML_PARSER_PROLOG
:
8341 if (ctxt
->input
->buf
== NULL
)
8342 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8344 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8347 cur
= ctxt
->input
->cur
[0];
8348 next
= ctxt
->input
->cur
[1];
8349 if ((cur
== '<') && (next
== '?')) {
8351 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
8354 xmlGenericError(xmlGenericErrorContext
,
8355 "PP: Parsing PI\n");
8358 } else if ((cur
== '<') && (next
== '!') &&
8359 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
8361 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
8364 xmlGenericError(xmlGenericErrorContext
,
8365 "PP: Parsing Comment\n");
8367 xmlParseComment(ctxt
);
8368 ctxt
->instate
= XML_PARSER_PROLOG
;
8369 } else if ((cur
== '<') && (next
== '!') &&
8373 ctxt
->instate
= XML_PARSER_START_TAG
;
8375 xmlGenericError(xmlGenericErrorContext
,
8376 "PP: entering START_TAG\n");
8380 case XML_PARSER_EPILOG
:
8382 if (ctxt
->input
->buf
== NULL
)
8383 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8385 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
8388 cur
= ctxt
->input
->cur
[0];
8389 next
= ctxt
->input
->cur
[1];
8390 if ((cur
== '<') && (next
== '?')) {
8392 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
8395 xmlGenericError(xmlGenericErrorContext
,
8396 "PP: Parsing PI\n");
8399 ctxt
->instate
= XML_PARSER_EPILOG
;
8400 } else if ((cur
== '<') && (next
== '!') &&
8401 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
8403 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
8406 xmlGenericError(xmlGenericErrorContext
,
8407 "PP: Parsing Comment\n");
8409 xmlParseComment(ctxt
);
8410 ctxt
->instate
= XML_PARSER_EPILOG
;
8411 } else if ((cur
== '<') && (next
== '!') &&
8415 ctxt
->errNo
= XML_ERR_DOCUMENT_END
;
8416 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8417 ctxt
->sax
->error(ctxt
->userData
,
8418 "Extra content at the end of the document\n");
8419 ctxt
->wellFormed
= 0;
8420 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8421 ctxt
->instate
= XML_PARSER_EOF
;
8423 xmlGenericError(xmlGenericErrorContext
,
8424 "PP: entering EOF\n");
8426 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
8427 ctxt
->sax
->endDocument(ctxt
->userData
);
8431 case XML_PARSER_START_TAG
: {
8432 xmlChar
*name
, *oldname
;
8434 if ((avail
< 2) && (ctxt
->inputNr
== 1))
8436 cur
= ctxt
->input
->cur
[0];
8438 ctxt
->errNo
= XML_ERR_DOCUMENT_EMPTY
;
8439 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8440 ctxt
->sax
->error(ctxt
->userData
,
8441 "Start tag expect, '<' not found\n");
8442 ctxt
->wellFormed
= 0;
8443 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8444 ctxt
->instate
= XML_PARSER_EOF
;
8446 xmlGenericError(xmlGenericErrorContext
,
8447 "PP: entering EOF\n");
8449 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
8450 ctxt
->sax
->endDocument(ctxt
->userData
);
8454 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0))
8456 if (ctxt
->spaceNr
== 0)
8457 spacePush(ctxt
, -1);
8459 spacePush(ctxt
, *ctxt
->space
);
8460 name
= xmlParseStartTag(ctxt
);
8463 ctxt
->instate
= XML_PARSER_EOF
;
8465 xmlGenericError(xmlGenericErrorContext
,
8466 "PP: entering EOF\n");
8468 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
8469 ctxt
->sax
->endDocument(ctxt
->userData
);
8472 namePush(ctxt
, xmlStrdup(name
));
8475 * [ VC: Root Element Type ]
8476 * The Name in the document type declaration must match
8477 * the element type of the root element.
8479 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
8480 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
8481 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
8484 * Check for an Empty Element.
8486 if ((RAW
== '/') && (NXT(1) == '>')) {
8488 if ((ctxt
->sax
!= NULL
) &&
8489 (ctxt
->sax
->endElement
!= NULL
) && (!ctxt
->disableSAX
))
8490 ctxt
->sax
->endElement(ctxt
->userData
, name
);
8492 oldname
= namePop(ctxt
);
8494 if (oldname
!= NULL
) {
8496 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
8500 if (ctxt
->name
== NULL
) {
8501 ctxt
->instate
= XML_PARSER_EPILOG
;
8503 xmlGenericError(xmlGenericErrorContext
,
8504 "PP: entering EPILOG\n");
8507 ctxt
->instate
= XML_PARSER_CONTENT
;
8509 xmlGenericError(xmlGenericErrorContext
,
8510 "PP: entering CONTENT\n");
8518 ctxt
->errNo
= XML_ERR_GT_REQUIRED
;
8519 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8520 ctxt
->sax
->error(ctxt
->userData
,
8521 "Couldn't find end of Start Tag %s\n",
8523 ctxt
->wellFormed
= 0;
8524 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8527 * end of parsing of this node.
8530 oldname
= namePop(ctxt
);
8532 if (oldname
!= NULL
) {
8534 xmlGenericError(xmlGenericErrorContext
,"Close: popped %s\n", oldname
);
8540 ctxt
->instate
= XML_PARSER_CONTENT
;
8542 xmlGenericError(xmlGenericErrorContext
,
8543 "PP: entering CONTENT\n");
8547 case XML_PARSER_CONTENT
: {
8548 const xmlChar
*test
;
8550 if ((avail
< 2) && (ctxt
->inputNr
== 1))
8552 cur
= ctxt
->input
->cur
[0];
8553 next
= ctxt
->input
->cur
[1];
8556 cons
= ctxt
->input
->consumed
;
8557 if ((cur
== '<') && (next
== '?')) {
8559 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
8562 xmlGenericError(xmlGenericErrorContext
,
8563 "PP: Parsing PI\n");
8566 } else if ((cur
== '<') && (next
== '!') &&
8567 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
8569 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
8572 xmlGenericError(xmlGenericErrorContext
,
8573 "PP: Parsing Comment\n");
8575 xmlParseComment(ctxt
);
8576 ctxt
->instate
= XML_PARSER_CONTENT
;
8577 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
8578 (ctxt
->input
->cur
[2] == '[') && (NXT(3) == 'C') &&
8579 (ctxt
->input
->cur
[4] == 'D') && (NXT(5) == 'A') &&
8580 (ctxt
->input
->cur
[6] == 'T') && (NXT(7) == 'A') &&
8581 (ctxt
->input
->cur
[8] == '[')) {
8583 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
8585 xmlGenericError(xmlGenericErrorContext
,
8586 "PP: entering CDATA_SECTION\n");
8589 } else if ((cur
== '<') && (next
== '!') &&
8592 } else if ((cur
== '<') && (next
== '/')) {
8593 ctxt
->instate
= XML_PARSER_END_TAG
;
8595 xmlGenericError(xmlGenericErrorContext
,
8596 "PP: entering END_TAG\n");
8599 } else if (cur
== '<') {
8600 ctxt
->instate
= XML_PARSER_START_TAG
;
8602 xmlGenericError(xmlGenericErrorContext
,
8603 "PP: entering START_TAG\n");
8606 } else if (cur
== '&') {
8608 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
8611 xmlGenericError(xmlGenericErrorContext
,
8612 "PP: Parsing Reference\n");
8614 xmlParseReference(ctxt
);
8616 /* TODO Avoid the extra copy, handle directly !!! */
8618 * Goal of the following test is:
8619 * - minimize calls to the SAX 'character' callback
8620 * when they are mergeable
8621 * - handle an problem for isBlank when we only parse
8622 * a sequence of blank chars and the next one is
8623 * not available to check against '<' presence.
8624 * - tries to homogenize the differences in SAX
8625 * callbacks between the push and pull versions
8628 if ((ctxt
->inputNr
== 1) &&
8629 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
8631 (xmlParseLookupSequence(ctxt
, '<', 0, 0) < 0))
8634 ctxt
->checkIndex
= 0;
8636 xmlGenericError(xmlGenericErrorContext
,
8637 "PP: Parsing char data\n");
8639 xmlParseCharData(ctxt
, 0);
8642 * Pop-up of finished entities.
8644 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
8646 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
8647 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
8648 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8649 ctxt
->sax
->error(ctxt
->userData
,
8650 "detected an error in element content\n");
8651 ctxt
->wellFormed
= 0;
8652 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8653 ctxt
->instate
= XML_PARSER_EOF
;
8658 case XML_PARSER_CDATA_SECTION
: {
8660 * The Push mode need to have the SAX callback for
8661 * cdataBlock merge back contiguous callbacks.
8665 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
8667 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
8668 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
8669 if (ctxt
->sax
->cdataBlock
!= NULL
)
8670 ctxt
->sax
->cdataBlock(ctxt
->userData
, ctxt
->input
->cur
,
8671 XML_PARSER_BIG_BUFFER_SIZE
);
8673 SKIP(XML_PARSER_BIG_BUFFER_SIZE
);
8674 ctxt
->checkIndex
= 0;
8678 if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
8679 (!ctxt
->disableSAX
)) {
8680 if (ctxt
->sax
->cdataBlock
!= NULL
)
8681 ctxt
->sax
->cdataBlock(ctxt
->userData
,
8682 ctxt
->input
->cur
, base
);
8685 ctxt
->checkIndex
= 0;
8686 ctxt
->instate
= XML_PARSER_CONTENT
;
8688 xmlGenericError(xmlGenericErrorContext
,
8689 "PP: entering CONTENT\n");
8694 case XML_PARSER_END_TAG
:
8698 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0))
8700 xmlParseEndTag(ctxt
);
8701 if (ctxt
->name
== NULL
) {
8702 ctxt
->instate
= XML_PARSER_EPILOG
;
8704 xmlGenericError(xmlGenericErrorContext
,
8705 "PP: entering EPILOG\n");
8708 ctxt
->instate
= XML_PARSER_CONTENT
;
8710 xmlGenericError(xmlGenericErrorContext
,
8711 "PP: entering CONTENT\n");
8715 case XML_PARSER_DTD
: {
8717 * Sorry but progressive parsing of the internal subset
8718 * is not expected to be supported. We first check that
8719 * the full content of the internal subset is available and
8720 * the parsing is launched only at that point.
8721 * Internal subset ends up with "']' S? '>'" in an unescaped
8722 * section and not in a ']]>' sequence which are conditional
8723 * sections (whoever argued to keep that crap in XML deserve
8724 * a place in hell !).
8730 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
8731 if (base
< 0) return(0);
8732 if (ctxt
->checkIndex
> base
)
8733 base
= ctxt
->checkIndex
;
8734 buf
= ctxt
->input
->buf
->buffer
->content
;
8735 for (;(unsigned int) base
< ctxt
->input
->buf
->buffer
->use
;
8738 if (buf
[base
] == quote
)
8742 if (buf
[base
] == '"') {
8746 if (buf
[base
] == '\'') {
8750 if (buf
[base
] == ']') {
8751 if ((unsigned int) base
+1 >=
8752 ctxt
->input
->buf
->buffer
->use
)
8754 if (buf
[base
+ 1] == ']') {
8755 /* conditional crap, skip both ']' ! */
8760 (unsigned int) base
+ i
< ctxt
->input
->buf
->buffer
->use
;
8762 if (buf
[base
+ i
] == '>')
8763 goto found_end_int_subset
;
8769 * We didn't found the end of the Internal subset
8772 ctxt
->checkIndex
= base
;
8775 xmlGenericError(xmlGenericErrorContext
,
8776 "PP: lookup of int subset end filed\n");
8780 found_end_int_subset
:
8781 xmlParseInternalSubset(ctxt
);
8783 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
8784 (ctxt
->sax
->externalSubset
!= NULL
))
8785 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
8786 ctxt
->extSubSystem
, ctxt
->extSubURI
);
8788 ctxt
->instate
= XML_PARSER_PROLOG
;
8789 ctxt
->checkIndex
= 0;
8791 xmlGenericError(xmlGenericErrorContext
,
8792 "PP: entering PROLOG\n");
8796 case XML_PARSER_COMMENT
:
8797 xmlGenericError(xmlGenericErrorContext
,
8798 "PP: internal error, state == COMMENT\n");
8799 ctxt
->instate
= XML_PARSER_CONTENT
;
8801 xmlGenericError(xmlGenericErrorContext
,
8802 "PP: entering CONTENT\n");
8806 xmlGenericError(xmlGenericErrorContext
,
8807 "PP: internal error, state == PI\n");
8808 ctxt
->instate
= XML_PARSER_CONTENT
;
8810 xmlGenericError(xmlGenericErrorContext
,
8811 "PP: entering CONTENT\n");
8814 case XML_PARSER_ENTITY_DECL
:
8815 xmlGenericError(xmlGenericErrorContext
,
8816 "PP: internal error, state == ENTITY_DECL\n");
8817 ctxt
->instate
= XML_PARSER_DTD
;
8819 xmlGenericError(xmlGenericErrorContext
,
8820 "PP: entering DTD\n");
8823 case XML_PARSER_ENTITY_VALUE
:
8824 xmlGenericError(xmlGenericErrorContext
,
8825 "PP: internal error, state == ENTITY_VALUE\n");
8826 ctxt
->instate
= XML_PARSER_CONTENT
;
8828 xmlGenericError(xmlGenericErrorContext
,
8829 "PP: entering DTD\n");
8832 case XML_PARSER_ATTRIBUTE_VALUE
:
8833 xmlGenericError(xmlGenericErrorContext
,
8834 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8835 ctxt
->instate
= XML_PARSER_START_TAG
;
8837 xmlGenericError(xmlGenericErrorContext
,
8838 "PP: entering START_TAG\n");
8841 case XML_PARSER_SYSTEM_LITERAL
:
8842 xmlGenericError(xmlGenericErrorContext
,
8843 "PP: internal error, state == SYSTEM_LITERAL\n");
8844 ctxt
->instate
= XML_PARSER_START_TAG
;
8846 xmlGenericError(xmlGenericErrorContext
,
8847 "PP: entering START_TAG\n");
8850 case XML_PARSER_PUBLIC_LITERAL
:
8851 xmlGenericError(xmlGenericErrorContext
,
8852 "PP: internal error, state == PUBLIC_LITERAL\n");
8853 ctxt
->instate
= XML_PARSER_START_TAG
;
8855 xmlGenericError(xmlGenericErrorContext
,
8856 "PP: entering START_TAG\n");
8863 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
8870 * @ctxt: an XML parser context
8871 * @chunk: an char array
8872 * @size: the size in byte of the chunk
8873 * @terminate: last chunk indicator
8875 * Parse a Chunk of memory
8877 * Returns zero if no error, the xmlParserErrors otherwise.
8880 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
8882 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
8883 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8884 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
8885 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
8887 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
8888 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
8889 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
8891 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
8893 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
8896 if ((terminate
) || (ctxt
->input
->buf
->buffer
->use
> 80))
8897 xmlParseTryOrFinish(ctxt
, terminate
);
8898 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
8899 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
8900 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
8901 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
8902 (in
->raw
!= NULL
)) {
8905 nbchars
= xmlCharEncInFunc(in
->encoder
, in
->buffer
, in
->raw
);
8907 xmlGenericError(xmlGenericErrorContext
,
8908 "xmlParseChunk: encoder error\n");
8909 return(XML_ERR_INVALID_ENCODING
);
8914 xmlParseTryOrFinish(ctxt
, terminate
);
8917 * Check for termination
8920 if (ctxt
->input
->buf
== NULL
)
8921 avail
= ctxt
->input
->length
-
8922 (ctxt
->input
->cur
- ctxt
->input
->base
);
8924 avail
= ctxt
->input
->buf
->buffer
->use
-
8925 (ctxt
->input
->cur
- ctxt
->input
->base
);
8927 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
8928 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
8929 ctxt
->errNo
= XML_ERR_DOCUMENT_END
;
8930 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8931 ctxt
->sax
->error(ctxt
->userData
,
8932 "Extra content at the end of the document\n");
8933 ctxt
->wellFormed
= 0;
8934 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8936 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (avail
> 0)) {
8937 ctxt
->errNo
= XML_ERR_DOCUMENT_END
;
8938 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
8939 ctxt
->sax
->error(ctxt
->userData
,
8940 "Extra content at the end of the document\n");
8941 ctxt
->wellFormed
= 0;
8942 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
8945 if (ctxt
->instate
!= XML_PARSER_EOF
) {
8946 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
8947 ctxt
->sax
->endDocument(ctxt
->userData
);
8949 ctxt
->instate
= XML_PARSER_EOF
;
8951 return((xmlParserErrors
) ctxt
->errNo
);
8954 /************************************************************************
8956 * I/O front end functions to the parser *
8958 ************************************************************************/
8962 * @ctxt: an XML parser context
8964 * Blocks further parser processing
8967 xmlStopParser(xmlParserCtxtPtr ctxt
) {
8968 ctxt
->instate
= XML_PARSER_EOF
;
8969 if (ctxt
->input
!= NULL
)
8970 ctxt
->input
->cur
= BAD_CAST
"";
8974 * xmlCreatePushParserCtxt:
8975 * @sax: a SAX handler
8976 * @user_data: The user data returned on SAX callbacks
8977 * @chunk: a pointer to an array of chars
8978 * @size: number of chars in the array
8979 * @filename: an optional file name or URI
8981 * Create a parser context for using the XML parser in push mode.
8982 * If @buffer and @size are non-NULL, the data is used to detect
8983 * the encoding. The remaining characters will be parsed so they
8984 * don't need to be fed in again through xmlParseChunk.
8985 * To allow content encoding detection, @size should be >= 4
8986 * The value of @filename is used for fetching external entities
8987 * and error/warning reports.
8989 * Returns the new parser context or NULL
8993 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
8994 const char *chunk
, int size
, const char *filename
) {
8995 xmlParserCtxtPtr ctxt
;
8996 xmlParserInputPtr inputStream
;
8997 xmlParserInputBufferPtr buf
;
8998 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
9001 * plug some encoding conversion routines
9003 if ((chunk
!= NULL
) && (size
>= 4))
9004 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
9006 buf
= xmlAllocParserInputBuffer(enc
);
9007 if (buf
== NULL
) return(NULL
);
9009 ctxt
= xmlNewParserCtxt();
9015 if (ctxt
->sax
!= &xmlDefaultSAXHandler
)
9017 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
9018 if (ctxt
->sax
== NULL
) {
9023 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
9024 if (user_data
!= NULL
)
9025 ctxt
->userData
= user_data
;
9027 if (filename
== NULL
) {
9028 ctxt
->directory
= NULL
;
9030 ctxt
->directory
= xmlParserGetDirectory(filename
);
9033 inputStream
= xmlNewInputStream(ctxt
);
9034 if (inputStream
== NULL
) {
9035 xmlFreeParserCtxt(ctxt
);
9039 if (filename
== NULL
)
9040 inputStream
->filename
= NULL
;
9042 inputStream
->filename
= (char *)
9043 xmlNormalizeWindowsPath((const xmlChar
*) filename
);
9044 inputStream
->buf
= buf
;
9045 inputStream
->base
= inputStream
->buf
->buffer
->content
;
9046 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
9048 &inputStream
->buf
->buffer
->content
[inputStream
->buf
->buffer
->use
];
9050 inputPush(ctxt
, inputStream
);
9052 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
9053 (ctxt
->input
->buf
!= NULL
)) {
9054 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
9055 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9057 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
9059 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
9060 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
9062 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
9064 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
9068 if (enc
!= XML_CHAR_ENCODING_NONE
) {
9069 xmlSwitchEncoding(ctxt
, enc
);
9076 * xmlCreateIOParserCtxt:
9077 * @sax: a SAX handler
9078 * @user_data: The user data returned on SAX callbacks
9079 * @ioread: an I/O read function
9080 * @ioclose: an I/O close function
9081 * @ioctx: an I/O handler
9082 * @enc: the charset encoding if known
9084 * Create a parser context for using the XML parser with an existing
9087 * Returns the new parser context or NULL
9090 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
9091 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
9092 void *ioctx
, xmlCharEncoding enc
) {
9093 xmlParserCtxtPtr ctxt
;
9094 xmlParserInputPtr inputStream
;
9095 xmlParserInputBufferPtr buf
;
9097 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
9098 if (buf
== NULL
) return(NULL
);
9100 ctxt
= xmlNewParserCtxt();
9106 if (ctxt
->sax
!= &xmlDefaultSAXHandler
)
9108 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
9109 if (ctxt
->sax
== NULL
) {
9114 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
9115 if (user_data
!= NULL
)
9116 ctxt
->userData
= user_data
;
9119 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
9120 if (inputStream
== NULL
) {
9121 xmlFreeParserCtxt(ctxt
);
9124 inputPush(ctxt
, inputStream
);
9129 /************************************************************************
9131 * Front ends when parsing a DTD *
9133 ************************************************************************/
9137 * @sax: the SAX handler block or NULL
9138 * @input: an Input Buffer
9139 * @enc: the charset encoding if known
9141 * Load and parse a DTD
9143 * Returns the resulting xmlDtdPtr or NULL in case of error.
9144 * @input will be freed at parsing end.
9148 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
9149 xmlCharEncoding enc
) {
9150 xmlDtdPtr ret
= NULL
;
9151 xmlParserCtxtPtr ctxt
;
9152 xmlParserInputPtr pinput
= NULL
;
9158 ctxt
= xmlNewParserCtxt();
9164 * Set-up the SAX context
9167 if (ctxt
->sax
!= NULL
)
9170 ctxt
->userData
= NULL
;
9174 * generate a parser input from the I/O handler
9177 pinput
= xmlNewIOInputStream(ctxt
, input
, enc
);
9178 if (pinput
== NULL
) {
9179 if (sax
!= NULL
) ctxt
->sax
= NULL
;
9180 xmlFreeParserCtxt(ctxt
);
9185 * plug some encoding conversion routines here.
9187 xmlPushInput(ctxt
, pinput
);
9189 pinput
->filename
= NULL
;
9192 pinput
->base
= ctxt
->input
->cur
;
9193 pinput
->cur
= ctxt
->input
->cur
;
9194 pinput
->free
= NULL
;
9197 * let's parse that entity knowing it's an external subset.
9200 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
9201 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
9202 BAD_CAST
"none", BAD_CAST
"none");
9204 if (enc
== XML_CHAR_ENCODING_NONE
) {
9206 * Get the 4 first bytes and decode the charset
9207 * if enc != XML_CHAR_ENCODING_NONE
9208 * plug some encoding conversion routines.
9214 enc
= xmlDetectCharEncoding(start
, 4);
9215 if (enc
!= XML_CHAR_ENCODING_NONE
) {
9216 xmlSwitchEncoding(ctxt
, enc
);
9220 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
9222 if (ctxt
->myDoc
!= NULL
) {
9223 if (ctxt
->wellFormed
) {
9224 ret
= ctxt
->myDoc
->extSubset
;
9225 ctxt
->myDoc
->extSubset
= NULL
;
9229 xmlFreeDoc(ctxt
->myDoc
);
9232 if (sax
!= NULL
) ctxt
->sax
= NULL
;
9233 xmlFreeParserCtxt(ctxt
);
9240 * @sax: the SAX handler block
9241 * @ExternalID: a NAME* containing the External ID of the DTD
9242 * @SystemID: a NAME* containing the URL to the DTD
9244 * Load and parse an external subset.
9246 * Returns the resulting xmlDtdPtr or NULL in case of error.
9250 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
9251 const xmlChar
*SystemID
) {
9252 xmlDtdPtr ret
= NULL
;
9253 xmlParserCtxtPtr ctxt
;
9254 xmlParserInputPtr input
= NULL
;
9255 xmlCharEncoding enc
;
9257 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
9259 ctxt
= xmlNewParserCtxt();
9265 * Set-up the SAX context
9268 if (ctxt
->sax
!= NULL
)
9271 ctxt
->userData
= NULL
;
9275 * Ask the Entity resolver to load the damn thing
9278 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
9279 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
, SystemID
);
9280 if (input
== NULL
) {
9281 if (sax
!= NULL
) ctxt
->sax
= NULL
;
9282 xmlFreeParserCtxt(ctxt
);
9287 * plug some encoding conversion routines here.
9289 xmlPushInput(ctxt
, input
);
9290 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
9291 xmlSwitchEncoding(ctxt
, enc
);
9293 if (input
->filename
== NULL
)
9294 input
->filename
= (char *) xmlStrdup(SystemID
);
9297 input
->base
= ctxt
->input
->cur
;
9298 input
->cur
= ctxt
->input
->cur
;
9302 * let's parse that entity knowing it's an external subset.
9305 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
9306 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
9307 ExternalID
, SystemID
);
9308 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
9310 if (ctxt
->myDoc
!= NULL
) {
9311 if (ctxt
->wellFormed
) {
9312 ret
= ctxt
->myDoc
->extSubset
;
9313 ctxt
->myDoc
->extSubset
= NULL
;
9317 xmlFreeDoc(ctxt
->myDoc
);
9320 if (sax
!= NULL
) ctxt
->sax
= NULL
;
9321 xmlFreeParserCtxt(ctxt
);
9328 * @ExternalID: a NAME* containing the External ID of the DTD
9329 * @SystemID: a NAME* containing the URL to the DTD
9331 * Load and parse an external subset.
9333 * Returns the resulting xmlDtdPtr or NULL in case of error.
9337 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
9338 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
9341 /************************************************************************
9343 * Front ends when parsing an Entity *
9345 ************************************************************************/
9348 * xmlParseCtxtExternalEntity:
9349 * @ctx: the existing parsing context
9350 * @URL: the URL for the entity to load
9351 * @ID: the System ID for the entity to load
9352 * @lst: the return value for the set of parsed nodes
9354 * Parse an external general entity within an existing parsing context
9355 * An external general parsed entity is well-formed if it matches the
9356 * production labeled extParsedEnt.
9358 * [78] extParsedEnt ::= TextDecl? content
9360 * Returns 0 if the entity is well formed, -1 in case of args problem and
9361 * the parser error code otherwise
9365 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
9366 const xmlChar
*ID
, xmlNodePtr
*lst
) {
9367 xmlParserCtxtPtr ctxt
;
9369 xmlSAXHandlerPtr oldsax
= NULL
;
9372 xmlCharEncoding enc
;
9374 if (ctx
->depth
> 40) {
9375 return(XML_ERR_ENTITY_LOOP
);
9380 if ((URL
== NULL
) && (ID
== NULL
))
9382 if (ctx
->myDoc
== NULL
) /* @@ relax but check for dereferences */
9386 ctxt
= xmlCreateEntityParserCtxt(URL
, ID
, NULL
);
9387 if (ctxt
== NULL
) return(-1);
9388 ctxt
->userData
= ctxt
;
9390 ctxt
->sax
= ctx
->sax
;
9391 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
9392 if (newDoc
== NULL
) {
9393 xmlFreeParserCtxt(ctxt
);
9396 if (ctx
->myDoc
!= NULL
) {
9397 newDoc
->intSubset
= ctx
->myDoc
->intSubset
;
9398 newDoc
->extSubset
= ctx
->myDoc
->extSubset
;
9400 if (ctx
->myDoc
->URL
!= NULL
) {
9401 newDoc
->URL
= xmlStrdup(ctx
->myDoc
->URL
);
9403 newDoc
->children
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
9404 if (newDoc
->children
== NULL
) {
9406 xmlFreeParserCtxt(ctxt
);
9407 newDoc
->intSubset
= NULL
;
9408 newDoc
->extSubset
= NULL
;
9412 nodePush(ctxt
, newDoc
->children
);
9413 if (ctx
->myDoc
== NULL
) {
9414 ctxt
->myDoc
= newDoc
;
9416 ctxt
->myDoc
= ctx
->myDoc
;
9417 newDoc
->children
->doc
= ctx
->myDoc
;
9421 * Get the 4 first bytes and decode the charset
9422 * if enc != XML_CHAR_ENCODING_NONE
9423 * plug some encoding conversion routines.
9430 enc
= xmlDetectCharEncoding(start
, 4);
9431 if (enc
!= XML_CHAR_ENCODING_NONE
) {
9432 xmlSwitchEncoding(ctxt
, enc
);
9436 * Parse a possible text declaration first
9438 if ((RAW
== '<') && (NXT(1) == '?') &&
9439 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9440 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9441 xmlParseTextDecl(ctxt
);
9445 * Doing validity checking on chunk doesn't make sense
9447 ctxt
->instate
= XML_PARSER_CONTENT
;
9448 ctxt
->validate
= ctx
->validate
;
9449 ctxt
->loadsubset
= ctx
->loadsubset
;
9450 ctxt
->depth
= ctx
->depth
+ 1;
9451 ctxt
->replaceEntities
= ctx
->replaceEntities
;
9452 if (ctxt
->validate
) {
9453 ctxt
->vctxt
.error
= ctx
->vctxt
.error
;
9454 ctxt
->vctxt
.warning
= ctx
->vctxt
.warning
;
9456 ctxt
->vctxt
.error
= NULL
;
9457 ctxt
->vctxt
.warning
= NULL
;
9459 ctxt
->vctxt
.nodeTab
= NULL
;
9460 ctxt
->vctxt
.nodeNr
= 0;
9461 ctxt
->vctxt
.nodeMax
= 0;
9462 ctxt
->vctxt
.node
= NULL
;
9464 xmlParseContent(ctxt
);
9466 if ((RAW
== '<') && (NXT(1) == '/')) {
9467 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9468 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9469 ctxt
->sax
->error(ctxt
->userData
,
9470 "chunk is not well balanced\n");
9471 ctxt
->wellFormed
= 0;
9472 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9473 } else if (RAW
!= 0) {
9474 ctxt
->errNo
= XML_ERR_EXTRA_CONTENT
;
9475 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9476 ctxt
->sax
->error(ctxt
->userData
,
9477 "extra content at the end of well balanced chunk\n");
9478 ctxt
->wellFormed
= 0;
9479 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9481 if (ctxt
->node
!= newDoc
->children
) {
9482 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9483 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9484 ctxt
->sax
->error(ctxt
->userData
,
9485 "chunk is not well balanced\n");
9486 ctxt
->wellFormed
= 0;
9487 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9490 if (!ctxt
->wellFormed
) {
9491 if (ctxt
->errNo
== 0)
9500 * Return the newly created nodeset after unlinking it from
9501 * they pseudo parent.
9503 cur
= newDoc
->children
->children
;
9505 while (cur
!= NULL
) {
9509 newDoc
->children
->children
= NULL
;
9514 xmlFreeParserCtxt(ctxt
);
9515 newDoc
->intSubset
= NULL
;
9516 newDoc
->extSubset
= NULL
;
9523 * xmlParseExternalEntityPrivate:
9524 * @doc: the document the chunk pertains to
9525 * @oldctxt: the previous parser context if available
9526 * @sax: the SAX handler bloc (possibly NULL)
9527 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9528 * @depth: Used for loop detection, use 0
9529 * @URL: the URL for the entity to load
9530 * @ID: the System ID for the entity to load
9531 * @list: the return value for the set of parsed nodes
9533 * Private version of xmlParseExternalEntity()
9535 * Returns 0 if the entity is well formed, -1 in case of args problem and
9536 * the parser error code otherwise
9540 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
9541 xmlSAXHandlerPtr sax
,
9542 void *user_data
, int depth
, const xmlChar
*URL
,
9543 const xmlChar
*ID
, xmlNodePtr
*list
) {
9544 xmlParserCtxtPtr ctxt
;
9546 xmlSAXHandlerPtr oldsax
= NULL
;
9549 xmlCharEncoding enc
;
9552 return(XML_ERR_ENTITY_LOOP
);
9559 if ((URL
== NULL
) && (ID
== NULL
))
9561 if (doc
== NULL
) /* @@ relax but check for dereferences */
9565 ctxt
= xmlCreateEntityParserCtxt(URL
, ID
, NULL
);
9566 if (ctxt
== NULL
) return(-1);
9567 ctxt
->userData
= ctxt
;
9568 if (oldctxt
!= NULL
) {
9569 ctxt
->_private
= oldctxt
->_private
;
9570 ctxt
->loadsubset
= oldctxt
->loadsubset
;
9571 ctxt
->validate
= oldctxt
->validate
;
9572 ctxt
->external
= oldctxt
->external
;
9575 * Doing validity checking on chunk without context
9576 * doesn't make sense
9578 ctxt
->_private
= NULL
;
9581 ctxt
->loadsubset
= 0;
9586 if (user_data
!= NULL
)
9587 ctxt
->userData
= user_data
;
9589 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
9590 if (newDoc
== NULL
) {
9591 xmlFreeParserCtxt(ctxt
);
9595 newDoc
->intSubset
= doc
->intSubset
;
9596 newDoc
->extSubset
= doc
->extSubset
;
9598 if (doc
->URL
!= NULL
) {
9599 newDoc
->URL
= xmlStrdup(doc
->URL
);
9601 newDoc
->children
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
9602 if (newDoc
->children
== NULL
) {
9605 xmlFreeParserCtxt(ctxt
);
9606 newDoc
->intSubset
= NULL
;
9607 newDoc
->extSubset
= NULL
;
9611 nodePush(ctxt
, newDoc
->children
);
9613 ctxt
->myDoc
= newDoc
;
9616 newDoc
->children
->doc
= doc
;
9620 * Get the 4 first bytes and decode the charset
9621 * if enc != XML_CHAR_ENCODING_NONE
9622 * plug some encoding conversion routines.
9629 enc
= xmlDetectCharEncoding(start
, 4);
9630 if (enc
!= XML_CHAR_ENCODING_NONE
) {
9631 xmlSwitchEncoding(ctxt
, enc
);
9635 * Parse a possible text declaration first
9637 if ((RAW
== '<') && (NXT(1) == '?') &&
9638 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9639 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9640 xmlParseTextDecl(ctxt
);
9643 ctxt
->instate
= XML_PARSER_CONTENT
;
9644 ctxt
->depth
= depth
;
9646 xmlParseContent(ctxt
);
9648 if ((RAW
== '<') && (NXT(1) == '/')) {
9649 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9650 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9651 ctxt
->sax
->error(ctxt
->userData
,
9652 "chunk is not well balanced\n");
9653 ctxt
->wellFormed
= 0;
9654 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9655 } else if (RAW
!= 0) {
9656 ctxt
->errNo
= XML_ERR_EXTRA_CONTENT
;
9657 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9658 ctxt
->sax
->error(ctxt
->userData
,
9659 "extra content at the end of well balanced chunk\n");
9660 ctxt
->wellFormed
= 0;
9661 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9663 if (ctxt
->node
!= newDoc
->children
) {
9664 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9665 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9666 ctxt
->sax
->error(ctxt
->userData
,
9667 "chunk is not well balanced\n");
9668 ctxt
->wellFormed
= 0;
9669 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9672 if (!ctxt
->wellFormed
) {
9673 if (ctxt
->errNo
== 0)
9682 * Return the newly created nodeset after unlinking it from
9683 * they pseudo parent.
9685 cur
= newDoc
->children
->children
;
9687 while (cur
!= NULL
) {
9691 newDoc
->children
->children
= NULL
;
9697 xmlFreeParserCtxt(ctxt
);
9698 newDoc
->intSubset
= NULL
;
9699 newDoc
->extSubset
= NULL
;
9706 * xmlParseExternalEntity:
9707 * @doc: the document the chunk pertains to
9708 * @sax: the SAX handler bloc (possibly NULL)
9709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9710 * @depth: Used for loop detection, use 0
9711 * @URL: the URL for the entity to load
9712 * @ID: the System ID for the entity to load
9713 * @lst: the return value for the set of parsed nodes
9715 * Parse an external general entity
9716 * An external general parsed entity is well-formed if it matches the
9717 * production labeled extParsedEnt.
9719 * [78] extParsedEnt ::= TextDecl? content
9721 * Returns 0 if the entity is well formed, -1 in case of args problem and
9722 * the parser error code otherwise
9726 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
9727 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
9728 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
9733 * xmlParseBalancedChunkMemory:
9734 * @doc: the document the chunk pertains to
9735 * @sax: the SAX handler bloc (possibly NULL)
9736 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9737 * @depth: Used for loop detection, use 0
9738 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9739 * @lst: the return value for the set of parsed nodes
9741 * Parse a well-balanced chunk of an XML document
9742 * called by the parser
9743 * The allowed sequence for the Well Balanced Chunk is the one defined by
9744 * the content production in the XML grammar:
9746 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9748 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9749 * the parser error code otherwise
9753 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
9754 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
9755 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
9756 depth
, string
, lst
, 0 );
9760 * xmlParseBalancedChunkMemoryInternal:
9761 * @oldctxt: the existing parsing context
9762 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9763 * @user_data: the user data field for the parser context
9764 * @lst: the return value for the set of parsed nodes
9767 * Parse a well-balanced chunk of an XML document
9768 * called by the parser
9769 * The allowed sequence for the Well Balanced Chunk is the one defined by
9770 * the content production in the XML grammar:
9772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9775 * the parser error code otherwise
9777 * In case recover is set to 1, the nodelist will not be empty even if
9778 * the parsed chunk is not well balanced.
9781 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
9782 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
9783 xmlParserCtxtPtr ctxt
;
9784 xmlDocPtr newDoc
= NULL
;
9785 xmlSAXHandlerPtr oldsax
= NULL
;
9786 xmlNodePtr content
= NULL
;
9790 if (oldctxt
->depth
> 40) {
9791 return(XML_ERR_ENTITY_LOOP
);
9800 size
= xmlStrlen(string
);
9802 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
9803 if (ctxt
== NULL
) return(-1);
9804 if (user_data
!= NULL
)
9805 ctxt
->userData
= user_data
;
9807 ctxt
->userData
= ctxt
;
9810 ctxt
->sax
= oldctxt
->sax
;
9811 if (oldctxt
->myDoc
== NULL
) {
9812 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
9813 if (newDoc
== NULL
) {
9815 xmlFreeParserCtxt(ctxt
);
9818 ctxt
->myDoc
= newDoc
;
9820 ctxt
->myDoc
= oldctxt
->myDoc
;
9821 content
= ctxt
->myDoc
->children
;
9823 ctxt
->myDoc
->children
= xmlNewDocNode(newDoc
, NULL
,
9824 BAD_CAST
"pseudoroot", NULL
);
9825 if (ctxt
->myDoc
->children
== NULL
) {
9827 xmlFreeParserCtxt(ctxt
);
9832 nodePush(ctxt
, ctxt
->myDoc
->children
);
9833 ctxt
->instate
= XML_PARSER_CONTENT
;
9834 ctxt
->depth
= oldctxt
->depth
+ 1;
9837 * Doing validity checking on chunk doesn't make sense
9840 ctxt
->loadsubset
= oldctxt
->loadsubset
;
9842 xmlParseContent(ctxt
);
9843 if ((RAW
== '<') && (NXT(1) == '/')) {
9844 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9845 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9846 ctxt
->sax
->error(ctxt
->userData
,
9847 "chunk is not well balanced\n");
9848 ctxt
->wellFormed
= 0;
9849 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9850 } else if (RAW
!= 0) {
9851 ctxt
->errNo
= XML_ERR_EXTRA_CONTENT
;
9852 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9853 ctxt
->sax
->error(ctxt
->userData
,
9854 "extra content at the end of well balanced chunk\n");
9855 ctxt
->wellFormed
= 0;
9856 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9858 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
9859 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
9860 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
9861 ctxt
->sax
->error(ctxt
->userData
,
9862 "chunk is not well balanced\n");
9863 ctxt
->wellFormed
= 0;
9864 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
9867 if (!ctxt
->wellFormed
) {
9868 if (ctxt
->errNo
== 0)
9876 if ((lst
!= NULL
) && (ret
== 0)) {
9880 * Return the newly created nodeset after unlinking it from
9881 * they pseudo parent.
9883 cur
= ctxt
->myDoc
->children
->children
;
9885 while (cur
!= NULL
) {
9889 ctxt
->myDoc
->children
->children
= NULL
;
9891 if (ctxt
->myDoc
!= NULL
) {
9892 xmlFreeNode(ctxt
->myDoc
->children
);
9893 ctxt
->myDoc
->children
= content
;
9897 xmlFreeParserCtxt(ctxt
);
9905 * xmlParseBalancedChunkMemoryRecover:
9906 * @doc: the document the chunk pertains to
9907 * @sax: the SAX handler bloc (possibly NULL)
9908 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9909 * @depth: Used for loop detection, use 0
9910 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9911 * @lst: the return value for the set of parsed nodes
9912 * @recover: return nodes even if the data is broken (use 0)
9915 * Parse a well-balanced chunk of an XML document
9916 * called by the parser
9917 * The allowed sequence for the Well Balanced Chunk is the one defined by
9918 * the content production in the XML grammar:
9920 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9922 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9923 * the parser error code otherwise
9925 * In case recover is set to 1, the nodelist will not be empty even if
9926 * the parsed chunk is not well balanced.
9929 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
9930 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
9932 xmlParserCtxtPtr ctxt
;
9934 xmlSAXHandlerPtr oldsax
= NULL
;
9940 return(XML_ERR_ENTITY_LOOP
);
9949 size
= xmlStrlen(string
);
9951 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
9952 if (ctxt
== NULL
) return(-1);
9953 ctxt
->userData
= ctxt
;
9957 if (user_data
!= NULL
)
9958 ctxt
->userData
= user_data
;
9960 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
9961 if (newDoc
== NULL
) {
9962 xmlFreeParserCtxt(ctxt
);
9966 newDoc
->intSubset
= doc
->intSubset
;
9967 newDoc
->extSubset
= doc
->extSubset
;
9969 newDoc
->children
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
9970 if (newDoc
->children
== NULL
) {
9973 xmlFreeParserCtxt(ctxt
);
9974 newDoc
->intSubset
= NULL
;
9975 newDoc
->extSubset
= NULL
;
9979 nodePush(ctxt
, newDoc
->children
);
9981 ctxt
->myDoc
= newDoc
;
9983 ctxt
->myDoc
= newDoc
;
9984 newDoc
->children
->doc
= doc
;
9986 ctxt
->instate
= XML_PARSER_CONTENT
;
9987 ctxt
->depth
= depth
;
9990 * Doing validity checking on chunk doesn't make sense
9993 ctxt
->loadsubset
= 0;
9996 content
= doc
->children
;
9997 doc
->children
= NULL
;
9998 xmlParseContent(ctxt
);
9999 doc
->children
= content
;
10002 xmlParseContent(ctxt
);
10004 if ((RAW
== '<') && (NXT(1) == '/')) {
10005 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
10006 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
10007 ctxt
->sax
->error(ctxt
->userData
,
10008 "chunk is not well balanced\n");
10009 ctxt
->wellFormed
= 0;
10010 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
10011 } else if (RAW
!= 0) {
10012 ctxt
->errNo
= XML_ERR_EXTRA_CONTENT
;
10013 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
10014 ctxt
->sax
->error(ctxt
->userData
,
10015 "extra content at the end of well balanced chunk\n");
10016 ctxt
->wellFormed
= 0;
10017 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
10019 if (ctxt
->node
!= newDoc
->children
) {
10020 ctxt
->errNo
= XML_ERR_NOT_WELL_BALANCED
;
10021 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
10022 ctxt
->sax
->error(ctxt
->userData
,
10023 "chunk is not well balanced\n");
10024 ctxt
->wellFormed
= 0;
10025 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
10028 if (!ctxt
->wellFormed
) {
10029 if (ctxt
->errNo
== 0)
10037 if (lst
!= NULL
&& (ret
== 0 || recover
== 1)) {
10041 * Return the newly created nodeset after unlinking it from
10042 * they pseudo parent.
10044 cur
= newDoc
->children
->children
;
10046 while (cur
!= NULL
) {
10047 cur
->parent
= NULL
;
10050 newDoc
->children
->children
= NULL
;
10054 ctxt
->sax
= oldsax
;
10055 xmlFreeParserCtxt(ctxt
);
10056 newDoc
->intSubset
= NULL
;
10057 newDoc
->extSubset
= NULL
;
10058 xmlFreeDoc(newDoc
);
10064 * xmlSAXParseEntity:
10065 * @sax: the SAX handler block
10066 * @filename: the filename
10068 * parse an XML external entity out of context and build a tree.
10069 * It use the given SAX function block to handle the parsing callback.
10070 * If sax is NULL, fallback to the default DOM tree building routines.
10072 * [78] extParsedEnt ::= TextDecl? content
10074 * This correspond to a "Well Balanced" chunk
10076 * Returns the resulting document tree
10080 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
10082 xmlParserCtxtPtr ctxt
;
10083 char *directory
= NULL
;
10085 ctxt
= xmlCreateFileParserCtxt(filename
);
10086 if (ctxt
== NULL
) {
10090 if (ctxt
->sax
!= NULL
)
10091 xmlFree(ctxt
->sax
);
10093 ctxt
->userData
= NULL
;
10096 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
10097 directory
= xmlParserGetDirectory(filename
);
10099 xmlParseExtParsedEnt(ctxt
);
10101 if (ctxt
->wellFormed
)
10105 xmlFreeDoc(ctxt
->myDoc
);
10106 ctxt
->myDoc
= NULL
;
10110 xmlFreeParserCtxt(ctxt
);
10117 * @filename: the filename
10119 * parse an XML external entity out of context and build a tree.
10121 * [78] extParsedEnt ::= TextDecl? content
10123 * This correspond to a "Well Balanced" chunk
10125 * Returns the resulting document tree
10129 xmlParseEntity(const char *filename
) {
10130 return(xmlSAXParseEntity(NULL
, filename
));
10134 * xmlCreateEntityParserCtxt:
10135 * @URL: the entity URL
10136 * @ID: the entity PUBLIC ID
10137 * @base: a possible base for the target URI
10139 * Create a parser context for an external entity
10140 * Automatic support for ZLIB/Compress compressed document is provided
10141 * by default if found at compile-time.
10143 * Returns the new parser context or NULL
10146 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
10147 const xmlChar
*base
) {
10148 xmlParserCtxtPtr ctxt
;
10149 xmlParserInputPtr inputStream
;
10150 char *directory
= NULL
;
10153 ctxt
= xmlNewParserCtxt();
10154 if (ctxt
== NULL
) {
10158 uri
= xmlBuildURI(URL
, base
);
10161 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
10162 if (inputStream
== NULL
) {
10163 xmlFreeParserCtxt(ctxt
);
10167 inputPush(ctxt
, inputStream
);
10169 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
10170 directory
= xmlParserGetDirectory((char *)URL
);
10171 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
10172 ctxt
->directory
= directory
;
10174 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
10175 if (inputStream
== NULL
) {
10177 xmlFreeParserCtxt(ctxt
);
10181 inputPush(ctxt
, inputStream
);
10183 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
10184 directory
= xmlParserGetDirectory((char *)uri
);
10185 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
10186 ctxt
->directory
= directory
;
10193 /************************************************************************
10195 * Front ends when parsing from a file *
10197 ************************************************************************/
10200 * xmlCreateFileParserCtxt:
10201 * @filename: the filename
10203 * Create a parser context for a file content.
10204 * Automatic support for ZLIB/Compress compressed document is provided
10205 * by default if found at compile-time.
10207 * Returns the new parser context or NULL
10210 xmlCreateFileParserCtxt(const char *filename
)
10212 xmlParserCtxtPtr ctxt
;
10213 xmlParserInputPtr inputStream
;
10214 char *directory
= NULL
;
10215 xmlChar
*normalized
;
10217 ctxt
= xmlNewParserCtxt();
10218 if (ctxt
== NULL
) {
10219 if (xmlDefaultSAXHandler
.error
!= NULL
) {
10220 xmlDefaultSAXHandler
.error(NULL
, "out of memory\n");
10225 normalized
= xmlNormalizeWindowsPath((const xmlChar
*) filename
);
10226 if (normalized
== NULL
) {
10227 xmlFreeParserCtxt(ctxt
);
10230 inputStream
= xmlLoadExternalEntity((char *) normalized
, NULL
, ctxt
);
10231 if (inputStream
== NULL
) {
10232 xmlFreeParserCtxt(ctxt
);
10233 xmlFree(normalized
);
10237 inputPush(ctxt
, inputStream
);
10238 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
10239 directory
= xmlParserGetDirectory((char *) normalized
);
10240 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
10241 ctxt
->directory
= directory
;
10243 xmlFree(normalized
);
10249 * xmlSAXParseFileWithData:
10250 * @sax: the SAX handler block
10251 * @filename: the filename
10252 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10254 * @data: the userdata
10256 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10257 * compressed document is provided by default if found at compile-time.
10258 * It use the given SAX function block to handle the parsing callback.
10259 * If sax is NULL, fallback to the default DOM tree building routines.
10261 * User data (void *) is stored within the parser context in the
10262 * context's _private member, so it is available nearly everywhere in libxml
10264 * Returns the resulting document tree
10268 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
10269 int recovery
, void *data
) {
10271 xmlParserCtxtPtr ctxt
;
10272 char *directory
= NULL
;
10276 ctxt
= xmlCreateFileParserCtxt(filename
);
10277 if (ctxt
== NULL
) {
10281 if (ctxt
->sax
!= NULL
)
10282 xmlFree(ctxt
->sax
);
10286 ctxt
->_private
=data
;
10289 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
10290 directory
= xmlParserGetDirectory(filename
);
10291 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
10292 ctxt
->directory
= (char *) xmlStrdup((xmlChar
*) directory
);
10294 ctxt
->recovery
= recovery
;
10296 xmlParseDocument(ctxt
);
10298 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
10301 xmlFreeDoc(ctxt
->myDoc
);
10302 ctxt
->myDoc
= NULL
;
10306 xmlFreeParserCtxt(ctxt
);
10313 * @sax: the SAX handler block
10314 * @filename: the filename
10315 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10318 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10319 * compressed document is provided by default if found at compile-time.
10320 * It use the given SAX function block to handle the parsing callback.
10321 * If sax is NULL, fallback to the default DOM tree building routines.
10323 * Returns the resulting document tree
10327 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
10329 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
10334 * @cur: a pointer to an array of xmlChar
10336 * parse an XML in-memory document and build a tree.
10337 * In the case the document is not Well Formed, a tree is built anyway
10339 * Returns the resulting document tree
10343 xmlRecoverDoc(xmlChar
*cur
) {
10344 return(xmlSAXParseDoc(NULL
, cur
, 1));
10349 * @filename: the filename
10351 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10352 * compressed document is provided by default if found at compile-time.
10354 * Returns the resulting document tree if the file was wellformed,
10359 xmlParseFile(const char *filename
) {
10360 return(xmlSAXParseFile(NULL
, filename
, 0));
10365 * @filename: the filename
10367 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10368 * compressed document is provided by default if found at compile-time.
10369 * In the case the document is not Well Formed, a tree is built anyway
10371 * Returns the resulting document tree
10375 xmlRecoverFile(const char *filename
) {
10376 return(xmlSAXParseFile(NULL
, filename
, 1));
10381 * xmlSetupParserForBuffer:
10382 * @ctxt: an XML parser context
10383 * @buffer: a xmlChar * buffer
10384 * @filename: a file name
10386 * Setup the parser context to parse a new buffer; Clears any prior
10387 * contents from the parser context. The buffer parameter must not be
10388 * NULL, but the filename parameter can be
10391 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
10392 const char* filename
)
10394 xmlParserInputPtr input
;
10396 input
= xmlNewInputStream(ctxt
);
10397 if (input
== NULL
) {
10398 xmlGenericError(xmlGenericErrorContext
,
10404 xmlClearParserCtxt(ctxt
);
10405 if (filename
!= NULL
)
10406 input
->filename
= xmlMemStrdup(filename
);
10407 input
->base
= buffer
;
10408 input
->cur
= buffer
;
10409 input
->end
= &buffer
[xmlStrlen(buffer
)];
10410 inputPush(ctxt
, input
);
10414 * xmlSAXUserParseFile:
10415 * @sax: a SAX handler
10416 * @user_data: The user data returned on SAX callbacks
10417 * @filename: a file name
10419 * parse an XML file and call the given SAX handler routines.
10420 * Automatic support for ZLIB/Compress compressed document is provided
10422 * Returns 0 in case of success or a error number otherwise
10425 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
10426 const char *filename
) {
10428 xmlParserCtxtPtr ctxt
;
10430 ctxt
= xmlCreateFileParserCtxt(filename
);
10431 if (ctxt
== NULL
) return -1;
10432 if (ctxt
->sax
!= &xmlDefaultSAXHandler
)
10433 xmlFree(ctxt
->sax
);
10435 if (user_data
!= NULL
)
10436 ctxt
->userData
= user_data
;
10438 xmlParseDocument(ctxt
);
10440 if (ctxt
->wellFormed
)
10443 if (ctxt
->errNo
!= 0)
10450 xmlFreeParserCtxt(ctxt
);
10455 /************************************************************************
10457 * Front ends when parsing from memory *
10459 ************************************************************************/
10462 * xmlCreateMemoryParserCtxt:
10463 * @buffer: a pointer to a char array
10464 * @size: the size of the array
10466 * Create a parser context for an XML in-memory document.
10468 * Returns the new parser context or NULL
10471 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
10472 xmlParserCtxtPtr ctxt
;
10473 xmlParserInputPtr input
;
10474 xmlParserInputBufferPtr buf
;
10476 if (buffer
== NULL
)
10481 ctxt
= xmlNewParserCtxt();
10485 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
10487 xmlFreeParserCtxt(ctxt
);
10491 input
= xmlNewInputStream(ctxt
);
10492 if (input
== NULL
) {
10493 xmlFreeParserInputBuffer(buf
);
10494 xmlFreeParserCtxt(ctxt
);
10498 input
->filename
= NULL
;
10500 input
->base
= input
->buf
->buffer
->content
;
10501 input
->cur
= input
->buf
->buffer
->content
;
10502 input
->end
= &input
->buf
->buffer
->content
[input
->buf
->buffer
->use
];
10504 inputPush(ctxt
, input
);
10509 * xmlSAXParseMemoryWithData:
10510 * @sax: the SAX handler block
10511 * @buffer: an pointer to a char array
10512 * @size: the size of the array
10513 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10515 * @data: the userdata
10517 * parse an XML in-memory block and use the given SAX function block
10518 * to handle the parsing callback. If sax is NULL, fallback to the default
10519 * DOM tree building routines.
10521 * User data (void *) is stored within the parser context in the
10522 * context's _private member, so it is available nearly everywhere in libxml
10524 * Returns the resulting document tree
10528 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
10529 int size
, int recovery
, void *data
) {
10531 xmlParserCtxtPtr ctxt
;
10533 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
10534 if (ctxt
== NULL
) return(NULL
);
10536 if (ctxt
->sax
!= NULL
)
10537 xmlFree(ctxt
->sax
);
10541 ctxt
->_private
=data
;
10544 xmlParseDocument(ctxt
);
10546 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
10549 xmlFreeDoc(ctxt
->myDoc
);
10550 ctxt
->myDoc
= NULL
;
10554 xmlFreeParserCtxt(ctxt
);
10560 * xmlSAXParseMemory:
10561 * @sax: the SAX handler block
10562 * @buffer: an pointer to a char array
10563 * @size: the size of the array
10564 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10567 * parse an XML in-memory block and use the given SAX function block
10568 * to handle the parsing callback. If sax is NULL, fallback to the default
10569 * DOM tree building routines.
10571 * Returns the resulting document tree
10574 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
10575 int size
, int recovery
) {
10576 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
10581 * @buffer: an pointer to a char array
10582 * @size: the size of the array
10584 * parse an XML in-memory block and build a tree.
10586 * Returns the resulting document tree
10589 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
10590 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
10594 * xmlRecoverMemory:
10595 * @buffer: an pointer to a char array
10596 * @size: the size of the array
10598 * parse an XML in-memory block and build a tree.
10599 * In the case the document is not Well Formed, a tree is built anyway
10601 * Returns the resulting document tree
10604 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
10605 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
10609 * xmlSAXUserParseMemory:
10610 * @sax: a SAX handler
10611 * @user_data: The user data returned on SAX callbacks
10612 * @buffer: an in-memory XML document input
10613 * @size: the length of the XML document in bytes
10615 * A better SAX parsing routine.
10616 * parse an XML in-memory buffer and call the given SAX handler routines.
10618 * Returns 0 in case of success or a error number otherwise
10620 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
10621 const char *buffer
, int size
) {
10623 xmlParserCtxtPtr ctxt
;
10624 xmlSAXHandlerPtr oldsax
= NULL
;
10626 if (sax
== NULL
) return -1;
10627 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
10628 if (ctxt
== NULL
) return -1;
10629 oldsax
= ctxt
->sax
;
10631 if (user_data
!= NULL
)
10632 ctxt
->userData
= user_data
;
10634 xmlParseDocument(ctxt
);
10636 if (ctxt
->wellFormed
)
10639 if (ctxt
->errNo
!= 0)
10644 ctxt
->sax
= oldsax
;
10645 xmlFreeParserCtxt(ctxt
);
10651 * xmlCreateDocParserCtxt:
10652 * @cur: a pointer to an array of xmlChar
10654 * Creates a parser context for an XML in-memory document.
10656 * Returns the new parser context or NULL
10659 xmlCreateDocParserCtxt(xmlChar
*cur
) {
10664 len
= xmlStrlen(cur
);
10665 return(xmlCreateMemoryParserCtxt((char *)cur
, len
));
10670 * @sax: the SAX handler block
10671 * @cur: a pointer to an array of xmlChar
10672 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10675 * parse an XML in-memory document and build a tree.
10676 * It use the given SAX function block to handle the parsing callback.
10677 * If sax is NULL, fallback to the default DOM tree building routines.
10679 * Returns the resulting document tree
10683 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, xmlChar
*cur
, int recovery
) {
10685 xmlParserCtxtPtr ctxt
;
10687 if (cur
== NULL
) return(NULL
);
10690 ctxt
= xmlCreateDocParserCtxt(cur
);
10691 if (ctxt
== NULL
) return(NULL
);
10694 ctxt
->userData
= NULL
;
10697 xmlParseDocument(ctxt
);
10698 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
10701 xmlFreeDoc(ctxt
->myDoc
);
10702 ctxt
->myDoc
= NULL
;
10706 xmlFreeParserCtxt(ctxt
);
10713 * @cur: a pointer to an array of xmlChar
10715 * parse an XML in-memory document and build a tree.
10717 * Returns the resulting document tree
10721 xmlParseDoc(xmlChar
*cur
) {
10722 return(xmlSAXParseDoc(NULL
, cur
, 0));
10725 /************************************************************************
10727 * Specific function to keep track of entities references *
10728 * and used by the XSLT debugger *
10730 ************************************************************************/
10732 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
10735 * xmlAddEntityReference:
10736 * @ent : A valid entity
10737 * @firstNode : A valid first node for children of entity
10738 * @lastNode : A valid last node of children entity
10740 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10743 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
10744 xmlNodePtr lastNode
)
10746 if (xmlEntityRefFunc
!= NULL
) {
10747 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
10753 * xmlSetEntityReferenceFunc:
10754 * @func : A valid function
10756 * Set the function to call call back when a xml reference has been made
10759 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
10761 xmlEntityRefFunc
= func
;
10764 /************************************************************************
10768 ************************************************************************/
10770 #ifdef LIBXML_XPATH_ENABLED
10771 #include <libxml/xpath.h>
10774 extern void xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
10775 static int xmlParserInitialized
= 0;
10780 * Initialization function for the XML parser.
10781 * This is not reentrant. Call once before processing in case of
10782 * use in multithreaded programs.
10786 xmlInitParser(void) {
10787 if (xmlParserInitialized
!= 0)
10790 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
10791 (xmlGenericError
== NULL
))
10792 initGenericErrorDefaultFunc(NULL
);
10795 xmlInitCharEncodingHandlers();
10796 xmlInitializePredefinedEntities();
10797 xmlDefaultSAXHandlerInit();
10798 xmlRegisterDefaultInputCallbacks();
10799 xmlRegisterDefaultOutputCallbacks();
10800 #ifdef LIBXML_HTML_ENABLED
10801 htmlInitAutoClose();
10802 htmlDefaultSAXHandlerInit();
10804 #ifdef LIBXML_XPATH_ENABLED
10807 xmlParserInitialized
= 1;
10811 * xmlCleanupParser:
10813 * Cleanup function for the XML parser. It tries to reclaim all
10814 * parsing related global memory allocated for the parser processing.
10815 * It doesn't deallocate any document related memory. Calling this
10816 * function should not prevent reusing the parser.
10820 xmlCleanupParser(void) {
10821 xmlCleanupCharEncodingHandlers();
10822 xmlCleanupPredefinedEntities();
10823 #ifdef LIBXML_CATALOG_ENABLED
10824 xmlCatalogCleanup();
10826 xmlCleanupThreads();
10827 xmlParserInitialized
= 0;