2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
70 #ifdef HAVE_SYS_STAT_H
84 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
);
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
88 const xmlChar
*base
, xmlParserCtxtPtr pctx
);
90 /************************************************************************
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
94 ************************************************************************/
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
105 #define XML_PARSER_NON_LINEAR 10
108 * xmlParserEntityCheck
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, unsigned long size
,
120 unsigned long consumed
= 0;
122 if ((ctxt
== NULL
) || (ctxt
->options
& XML_PARSE_HUGE
))
124 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
128 * Do the check based on the replacement size of the entity
130 if (size
< XML_PARSER_BIG_ENTITY
)
134 * A limit on the amount of text data reasonably used
136 if (ctxt
->input
!= NULL
) {
137 consumed
= ctxt
->input
->consumed
+
138 (ctxt
->input
->cur
- ctxt
->input
->base
);
140 consumed
+= ctxt
->sizeentities
;
142 if ((size
< XML_PARSER_NON_LINEAR
* consumed
) &&
143 (ctxt
->nbentities
* 3 < XML_PARSER_NON_LINEAR
* consumed
))
145 } else if (ent
!= NULL
) {
147 * use the number of parsed entities in the replacement
152 * The amount of data parsed counting entities size only once
154 if (ctxt
->input
!= NULL
) {
155 consumed
= ctxt
->input
->consumed
+
156 (ctxt
->input
->cur
- ctxt
->input
->base
);
158 consumed
+= ctxt
->sizeentities
;
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
164 if (size
* 3 < consumed
* XML_PARSER_NON_LINEAR
)
168 * strange we got no data for checking just return
173 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
185 unsigned int xmlParserMaxDepth
= 256;
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
195 * List of XML prefixed PI allowed by W3C specs
198 static const char *xmlW3CPIs
[] = {
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
206 const xmlChar
**str
);
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
210 xmlSAXHandlerPtr sax
,
211 void *user_data
, int depth
, const xmlChar
*URL
,
212 const xmlChar
*ID
, xmlNodePtr
*list
);
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
216 const char *encoding
);
217 #ifdef LIBXML_LEGACY_ENABLED
219 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
220 xmlNodePtr lastNode
);
221 #endif /* LIBXML_LEGACY_ENABLED */
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
225 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
230 /************************************************************************
232 * Some factorized error routines *
234 ************************************************************************/
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
242 * Handle a redefinition of attribute error
245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
246 const xmlChar
* localname
)
248 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
249 (ctxt
->instate
== XML_PARSER_EOF
))
252 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
255 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
256 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
257 (const char *) localname
, NULL
, NULL
, 0, 0,
258 "Attribute %s redefined\n", localname
);
260 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
261 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
262 (const char *) prefix
, (const char *) localname
,
263 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
266 ctxt
->wellFormed
= 0;
267 if (ctxt
->recovery
== 0)
268 ctxt
->disableSAX
= 1;
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
281 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
285 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
286 (ctxt
->instate
== XML_PARSER_EOF
))
289 case XML_ERR_INVALID_HEX_CHARREF
:
290 errmsg
= "CharRef: invalid hexadecimal value\n";
292 case XML_ERR_INVALID_DEC_CHARREF
:
293 errmsg
= "CharRef: invalid decimal value\n";
295 case XML_ERR_INVALID_CHARREF
:
296 errmsg
= "CharRef: invalid value\n";
298 case XML_ERR_INTERNAL_ERROR
:
299 errmsg
= "internal error";
301 case XML_ERR_PEREF_AT_EOF
:
302 errmsg
= "PEReference at end of document\n";
304 case XML_ERR_PEREF_IN_PROLOG
:
305 errmsg
= "PEReference in prolog\n";
307 case XML_ERR_PEREF_IN_EPILOG
:
308 errmsg
= "PEReference in epilog\n";
310 case XML_ERR_PEREF_NO_NAME
:
311 errmsg
= "PEReference: no name\n";
313 case XML_ERR_PEREF_SEMICOL_MISSING
:
314 errmsg
= "PEReference: expecting ';'\n";
316 case XML_ERR_ENTITY_LOOP
:
317 errmsg
= "Detected an entity reference loop\n";
319 case XML_ERR_ENTITY_NOT_STARTED
:
320 errmsg
= "EntityValue: \" or ' expected\n";
322 case XML_ERR_ENTITY_PE_INTERNAL
:
323 errmsg
= "PEReferences forbidden in internal subset\n";
325 case XML_ERR_ENTITY_NOT_FINISHED
:
326 errmsg
= "EntityValue: \" or ' expected\n";
328 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
329 errmsg
= "AttValue: \" or ' expected\n";
331 case XML_ERR_LT_IN_ATTRIBUTE
:
332 errmsg
= "Unescaped '<' not allowed in attributes values\n";
334 case XML_ERR_LITERAL_NOT_STARTED
:
335 errmsg
= "SystemLiteral \" or ' expected\n";
337 case XML_ERR_LITERAL_NOT_FINISHED
:
338 errmsg
= "Unfinished System or Public ID \" or ' expected\n";
340 case XML_ERR_MISPLACED_CDATA_END
:
341 errmsg
= "Sequence ']]>' not allowed in content\n";
343 case XML_ERR_URI_REQUIRED
:
344 errmsg
= "SYSTEM or PUBLIC, the URI is missing\n";
346 case XML_ERR_PUBID_REQUIRED
:
347 errmsg
= "PUBLIC, the Public Identifier is missing\n";
349 case XML_ERR_HYPHEN_IN_COMMENT
:
350 errmsg
= "Comment must not contain '--' (double-hyphen)\n";
352 case XML_ERR_PI_NOT_STARTED
:
353 errmsg
= "xmlParsePI : no target name\n";
355 case XML_ERR_RESERVED_XML_NAME
:
356 errmsg
= "Invalid PI name\n";
358 case XML_ERR_NOTATION_NOT_STARTED
:
359 errmsg
= "NOTATION: Name expected here\n";
361 case XML_ERR_NOTATION_NOT_FINISHED
:
362 errmsg
= "'>' required to close NOTATION declaration\n";
364 case XML_ERR_VALUE_REQUIRED
:
365 errmsg
= "Entity value required\n";
367 case XML_ERR_URI_FRAGMENT
:
368 errmsg
= "Fragment not allowed";
370 case XML_ERR_ATTLIST_NOT_STARTED
:
371 errmsg
= "'(' required to start ATTLIST enumeration\n";
373 case XML_ERR_NMTOKEN_REQUIRED
:
374 errmsg
= "NmToken expected in ATTLIST enumeration\n";
376 case XML_ERR_ATTLIST_NOT_FINISHED
:
377 errmsg
= "')' required to finish ATTLIST enumeration\n";
379 case XML_ERR_MIXED_NOT_STARTED
:
380 errmsg
= "MixedContentDecl : '|' or ')*' expected\n";
382 case XML_ERR_PCDATA_REQUIRED
:
383 errmsg
= "MixedContentDecl : '#PCDATA' expected\n";
385 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
386 errmsg
= "ContentDecl : Name or '(' expected\n";
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
389 errmsg
= "ContentDecl : ',' '|' or ')' expected\n";
391 case XML_ERR_PEREF_IN_INT_SUBSET
:
393 "PEReference: forbidden within markup decl in internal subset\n";
395 case XML_ERR_GT_REQUIRED
:
396 errmsg
= "expected '>'\n";
398 case XML_ERR_CONDSEC_INVALID
:
399 errmsg
= "XML conditional section '[' expected\n";
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
402 errmsg
= "Content error in the external subset\n";
404 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
406 "conditional section INCLUDE or IGNORE keyword expected\n";
408 case XML_ERR_CONDSEC_NOT_FINISHED
:
409 errmsg
= "XML conditional section not closed\n";
411 case XML_ERR_XMLDECL_NOT_STARTED
:
412 errmsg
= "Text declaration '<?xml' required\n";
414 case XML_ERR_XMLDECL_NOT_FINISHED
:
415 errmsg
= "parsing XML declaration: '?>' expected\n";
417 case XML_ERR_EXT_ENTITY_STANDALONE
:
418 errmsg
= "external parsed entities cannot be standalone\n";
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
421 errmsg
= "EntityRef: expecting ';'\n";
423 case XML_ERR_DOCTYPE_NOT_FINISHED
:
424 errmsg
= "DOCTYPE improperly terminated\n";
426 case XML_ERR_LTSLASH_REQUIRED
:
427 errmsg
= "EndTag: '</' not found\n";
429 case XML_ERR_EQUAL_REQUIRED
:
430 errmsg
= "expected '='\n";
432 case XML_ERR_STRING_NOT_CLOSED
:
433 errmsg
= "String not closed expecting \" or '\n";
435 case XML_ERR_STRING_NOT_STARTED
:
436 errmsg
= "String not started expecting ' or \"\n";
438 case XML_ERR_ENCODING_NAME
:
439 errmsg
= "Invalid XML encoding name\n";
441 case XML_ERR_STANDALONE_VALUE
:
442 errmsg
= "standalone accepts only 'yes' or 'no'\n";
444 case XML_ERR_DOCUMENT_EMPTY
:
445 errmsg
= "Document is empty\n";
447 case XML_ERR_DOCUMENT_END
:
448 errmsg
= "Extra content at the end of the document\n";
450 case XML_ERR_NOT_WELL_BALANCED
:
451 errmsg
= "chunk is not well balanced\n";
453 case XML_ERR_EXTRA_CONTENT
:
454 errmsg
= "extra content at the end of well balanced chunk\n";
456 case XML_ERR_VERSION_MISSING
:
457 errmsg
= "Malformed declaration expecting version\n";
465 errmsg
= "Unregistered error message\n";
469 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
470 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, errmsg
,
473 ctxt
->wellFormed
= 0;
474 if (ctxt
->recovery
== 0)
475 ctxt
->disableSAX
= 1;
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
491 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
492 (ctxt
->instate
== XML_PARSER_EOF
))
496 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
497 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
499 ctxt
->wellFormed
= 0;
500 if (ctxt
->recovery
== 0)
501 ctxt
->disableSAX
= 1;
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
516 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
517 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
519 xmlStructuredErrorFunc schannel
= NULL
;
521 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
522 (ctxt
->instate
== XML_PARSER_EOF
))
524 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
525 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
526 schannel
= ctxt
->sax
->serror
;
528 __xmlRaiseError(schannel
,
529 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
531 ctxt
, NULL
, XML_FROM_PARSER
, error
,
532 XML_ERR_WARNING
, NULL
, 0,
533 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
534 msg
, (const char *) str1
, (const char *) str2
);
536 __xmlRaiseError(schannel
, NULL
, NULL
,
537 ctxt
, NULL
, XML_FROM_PARSER
, error
,
538 XML_ERR_WARNING
, NULL
, 0,
539 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
540 msg
, (const char *) str1
, (const char *) str2
);
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
551 * Handle a validity error.
554 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
555 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
557 xmlStructuredErrorFunc schannel
= NULL
;
559 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
560 (ctxt
->instate
== XML_PARSER_EOF
))
564 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
565 schannel
= ctxt
->sax
->serror
;
568 __xmlRaiseError(schannel
,
569 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
570 ctxt
, NULL
, XML_FROM_DTD
, error
,
571 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
572 (const char *) str2
, NULL
, 0, 0,
573 msg
, (const char *) str1
, (const char *) str2
);
576 __xmlRaiseError(schannel
, NULL
, NULL
,
577 ctxt
, NULL
, XML_FROM_DTD
, error
,
578 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
579 (const char *) str2
, NULL
, 0, 0,
580 msg
, (const char *) str1
, (const char *) str2
);
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
595 const char *msg
, int val
)
597 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
598 (ctxt
->instate
== XML_PARSER_EOF
))
602 __xmlRaiseError(NULL
, NULL
, NULL
,
603 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
604 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
606 ctxt
->wellFormed
= 0;
607 if (ctxt
->recovery
== 0)
608 ctxt
->disableSAX
= 1;
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
625 const char *msg
, const xmlChar
*str1
, int val
,
628 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
629 (ctxt
->instate
== XML_PARSER_EOF
))
633 __xmlRaiseError(NULL
, NULL
, NULL
,
634 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
635 NULL
, 0, (const char *) str1
, (const char *) str2
,
636 NULL
, val
, 0, msg
, str1
, val
, str2
);
638 ctxt
->wellFormed
= 0;
639 if (ctxt
->recovery
== 0)
640 ctxt
->disableSAX
= 1;
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
655 const char *msg
, const xmlChar
* val
)
657 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
658 (ctxt
->instate
== XML_PARSER_EOF
))
662 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
663 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
664 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
667 ctxt
->wellFormed
= 0;
668 if (ctxt
->recovery
== 0)
669 ctxt
->disableSAX
= 1;
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
680 * Handle a non fatal parser error
683 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
684 const char *msg
, const xmlChar
* val
)
686 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
687 (ctxt
->instate
== XML_PARSER_EOF
))
691 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
692 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
693 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
699 * @ctxt: an XML parser context
700 * @error: the error number
702 * @info1: extra information string
703 * @info2: extra information string
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
710 const xmlChar
* info1
, const xmlChar
* info2
,
711 const xmlChar
* info3
)
713 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
714 (ctxt
->instate
== XML_PARSER_EOF
))
718 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
719 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
720 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
721 info1
, info2
, info3
);
723 ctxt
->nsWellFormed
= 0;
728 * @ctxt: an XML parser context
729 * @error: the error number
731 * @info1: extra information string
732 * @info2: extra information string
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
739 const xmlChar
* info1
, const xmlChar
* info2
,
740 const xmlChar
* info3
)
742 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
743 (ctxt
->instate
== XML_PARSER_EOF
))
745 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
746 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
747 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
748 info1
, info2
, info3
);
751 /************************************************************************
753 * Library wide options *
755 ************************************************************************/
759 * @feature: the feature to be examined
761 * Examines if the library has been compiled with a given feature.
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
768 xmlHasFeature(xmlFeature feature
)
771 case XML_WITH_THREAD
:
772 #ifdef LIBXML_THREAD_ENABLED
778 #ifdef LIBXML_TREE_ENABLED
783 case XML_WITH_OUTPUT
:
784 #ifdef LIBXML_OUTPUT_ENABLED
790 #ifdef LIBXML_PUSH_ENABLED
795 case XML_WITH_READER
:
796 #ifdef LIBXML_READER_ENABLED
801 case XML_WITH_PATTERN
:
802 #ifdef LIBXML_PATTERN_ENABLED
807 case XML_WITH_WRITER
:
808 #ifdef LIBXML_WRITER_ENABLED
814 #ifdef LIBXML_SAX1_ENABLED
820 #ifdef LIBXML_FTP_ENABLED
826 #ifdef LIBXML_HTTP_ENABLED
832 #ifdef LIBXML_VALID_ENABLED
838 #ifdef LIBXML_HTML_ENABLED
843 case XML_WITH_LEGACY
:
844 #ifdef LIBXML_LEGACY_ENABLED
850 #ifdef LIBXML_C14N_ENABLED
855 case XML_WITH_CATALOG
:
856 #ifdef LIBXML_CATALOG_ENABLED
862 #ifdef LIBXML_XPATH_ENABLED
868 #ifdef LIBXML_XPTR_ENABLED
873 case XML_WITH_XINCLUDE
:
874 #ifdef LIBXML_XINCLUDE_ENABLED
880 #ifdef LIBXML_ICONV_ENABLED
885 case XML_WITH_ISO8859X
:
886 #ifdef LIBXML_ISO8859X_ENABLED
891 case XML_WITH_UNICODE
:
892 #ifdef LIBXML_UNICODE_ENABLED
897 case XML_WITH_REGEXP
:
898 #ifdef LIBXML_REGEXP_ENABLED
903 case XML_WITH_AUTOMATA
:
904 #ifdef LIBXML_AUTOMATA_ENABLED
910 #ifdef LIBXML_EXPR_ENABLED
915 case XML_WITH_SCHEMAS
:
916 #ifdef LIBXML_SCHEMAS_ENABLED
921 case XML_WITH_SCHEMATRON
:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
927 case XML_WITH_MODULES
:
928 #ifdef LIBXML_MODULES_ENABLED
934 #ifdef LIBXML_DEBUG_ENABLED
939 case XML_WITH_DEBUG_MEM
:
940 #ifdef DEBUG_MEMORY_LOCATION
945 case XML_WITH_DEBUG_RUN
:
946 #ifdef LIBXML_DEBUG_RUNTIME
952 #ifdef LIBXML_ZLIB_ENABLED
958 #ifdef LIBXML_ICU_ENABLED
969 /************************************************************************
971 * SAX2 defaulted attributes handling *
973 ************************************************************************/
977 * @ctxt: an XML parser context
979 * Do the SAX2 detection and specific intialization
982 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
983 if (ctxt
== NULL
) return;
984 #ifdef LIBXML_SAX1_ENABLED
985 if ((ctxt
->sax
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
) &&
986 ((ctxt
->sax
->startElementNs
!= NULL
) ||
987 (ctxt
->sax
->endElementNs
!= NULL
))) ctxt
->sax2
= 1;
990 #endif /* LIBXML_SAX1_ENABLED */
992 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
993 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
994 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
995 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
996 (ctxt
->str_xml_ns
== NULL
)) {
997 xmlErrMemory(ctxt
, NULL
);
1001 typedef struct _xmlDefAttrs xmlDefAttrs
;
1002 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1003 struct _xmlDefAttrs
{
1004 int nbAttrs
; /* number of defaulted attributes on that element */
1005 int maxAttrs
; /* the size of the array */
1006 const xmlChar
*values
[5]; /* array of localname/prefix/values/external */
1010 * xmlAttrNormalizeSpace:
1011 * @src: the source string
1012 * @dst: the target string
1014 * Normalize the space in non CDATA attribute values:
1015 * If the attribute type is not CDATA, then the XML processor MUST further
1016 * process the normalized attribute value by discarding any leading and
1017 * trailing space (#x20) characters, and by replacing sequences of space
1018 * (#x20) characters by a single space (#x20) character.
1019 * Note that the size of dst need to be at least src, and if one doesn't need
1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021 * passing src as dst is just fine.
1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1027 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1029 if ((src
== NULL
) || (dst
== NULL
))
1032 while (*src
== 0x20) src
++;
1035 while (*src
== 0x20) src
++;
1049 * xmlAttrNormalizeSpace2:
1050 * @src: the source string
1052 * Normalize the space in non CDATA attribute values, a slightly more complex
1053 * front end to avoid allocation problems when running on attribute values
1054 * coming from the input.
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1059 static const xmlChar
*
1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1063 int remove_head
= 0;
1064 int need_realloc
= 0;
1067 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1074 while (*cur
== 0x20) {
1081 if ((*cur
== 0x20) || (*cur
== 0)) {
1091 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1093 xmlErrMemory(ctxt
, NULL
);
1096 xmlAttrNormalizeSpace(ret
, ret
);
1097 *len
= (int) strlen((const char *)ret
);
1099 } else if (remove_head
) {
1100 *len
-= remove_head
;
1101 memmove(src
, src
+ remove_head
, 1 + *len
);
1109 * @ctxt: an XML parser context
1110 * @fullname: the element fullname
1111 * @fullattr: the attribute fullname
1112 * @value: the attribute value
1114 * Add a defaulted attribute for an element
1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1118 const xmlChar
*fullname
,
1119 const xmlChar
*fullattr
,
1120 const xmlChar
*value
) {
1121 xmlDefAttrsPtr defaults
;
1123 const xmlChar
*name
;
1124 const xmlChar
*prefix
;
1127 * Allows to detect attribute redefinitions
1129 if (ctxt
->attsSpecial
!= NULL
) {
1130 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1134 if (ctxt
->attsDefault
== NULL
) {
1135 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1136 if (ctxt
->attsDefault
== NULL
)
1141 * split the element name into prefix:localname , the string found
1142 * are within the DTD and then not associated to namespace names.
1144 name
= xmlSplitQName3(fullname
, &len
);
1146 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1149 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1150 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1154 * make sure there is some storage
1156 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1157 if (defaults
== NULL
) {
1158 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1159 (4 * 5) * sizeof(const xmlChar
*));
1160 if (defaults
== NULL
)
1162 defaults
->nbAttrs
= 0;
1163 defaults
->maxAttrs
= 4;
1164 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1165 defaults
, NULL
) < 0) {
1169 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1170 xmlDefAttrsPtr temp
;
1172 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1173 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1177 defaults
->maxAttrs
*= 2;
1178 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1179 defaults
, NULL
) < 0) {
1186 * Split the element name into prefix:localname , the string found
1187 * are within the DTD and hen not associated to namespace names.
1189 name
= xmlSplitQName3(fullattr
, &len
);
1191 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1194 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1195 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1198 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1199 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1200 /* intern the string and precompute the end */
1201 len
= xmlStrlen(value
);
1202 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1203 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1204 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1206 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1208 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1209 defaults
->nbAttrs
++;
1214 xmlErrMemory(ctxt
, NULL
);
1219 * xmlAddSpecialAttr:
1220 * @ctxt: an XML parser context
1221 * @fullname: the element fullname
1222 * @fullattr: the attribute fullname
1223 * @type: the attribute type
1225 * Register this attribute type
1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1229 const xmlChar
*fullname
,
1230 const xmlChar
*fullattr
,
1233 if (ctxt
->attsSpecial
== NULL
) {
1234 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1235 if (ctxt
->attsSpecial
== NULL
)
1239 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1242 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1243 (void *) (long) type
);
1247 xmlErrMemory(ctxt
, NULL
);
1252 * xmlCleanSpecialAttrCallback:
1254 * Removes CDATA attributes from the special attribute table
1257 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1258 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1259 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1260 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1262 if (((long) payload
) == XML_ATTRIBUTE_CDATA
) {
1263 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1268 * xmlCleanSpecialAttr:
1269 * @ctxt: an XML parser context
1271 * Trim the list of attributes defined to remove all those of type
1272 * CDATA as they are not special. This call should be done when finishing
1273 * to parse the DTD and before starting to parse the document root.
1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1278 if (ctxt
->attsSpecial
== NULL
)
1281 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1283 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1284 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1285 ctxt
->attsSpecial
= NULL
;
1291 * xmlCheckLanguageID:
1292 * @lang: pointer to the string value
1294 * Checks that the value conforms to the LanguageID production:
1296 * NOTE: this is somewhat deprecated, those productions were removed from
1297 * the XML Second edition.
1299 * [33] LanguageID ::= Langcode ('-' Subcode)*
1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304 * [38] Subcode ::= ([a-z] | [A-Z])+
1306 * Returns 1 if correct 0 otherwise
1309 xmlCheckLanguageID(const xmlChar
* lang
)
1311 const xmlChar
*cur
= lang
;
1315 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1316 ((cur
[0] == 'I') && (cur
[1] == '-'))) {
1321 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1322 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1324 } else if (((cur
[0] == 'x') && (cur
[1] == '-')) ||
1325 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1330 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1331 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1333 } else if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1334 ((cur
[0] >= 'a') && (cur
[0] <= 'z'))) {
1339 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1340 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1346 while (cur
[0] != 0) { /* non input consuming */
1350 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1351 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1355 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1356 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1362 /************************************************************************
1364 * Parser stacks related functions and macros *
1366 ************************************************************************/
1368 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1369 const xmlChar
** str
);
1374 * @ctxt: an XML parser context
1375 * @prefix: the namespace prefix or NULL
1376 * @URL: the namespace name
1378 * Pushes a new parser namespace on top of the ns stack
1380 * Returns -1 in case of error, -2 if the namespace should be discarded
1381 * and the index in the stack otherwise.
1384 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1386 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1388 for (i
= 0;i
< ctxt
->nsNr
;i
+= 2) {
1389 if (ctxt
->nsTab
[i
] == prefix
) {
1391 if (ctxt
->nsTab
[i
+ 1] == URL
)
1393 /* out of scope keep it */
1398 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1401 ctxt
->nsTab
= (const xmlChar
**)
1402 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1403 if (ctxt
->nsTab
== NULL
) {
1404 xmlErrMemory(ctxt
, NULL
);
1408 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1409 const xmlChar
** tmp
;
1411 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1412 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1414 xmlErrMemory(ctxt
, NULL
);
1420 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1421 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1422 return (ctxt
->nsNr
);
1426 * @ctxt: an XML parser context
1427 * @nr: the number to pop
1429 * Pops the top @nr parser prefix/namespace from the ns stack
1431 * Returns the number of namespaces removed
1434 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1438 if (ctxt
->nsTab
== NULL
) return(0);
1439 if (ctxt
->nsNr
< nr
) {
1440 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1443 if (ctxt
->nsNr
<= 0)
1446 for (i
= 0;i
< nr
;i
++) {
1448 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1455 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1456 const xmlChar
**atts
;
1460 if (ctxt
->atts
== NULL
) {
1461 maxatts
= 55; /* allow for 10 attrs by default */
1462 atts
= (const xmlChar
**)
1463 xmlMalloc(maxatts
* sizeof(xmlChar
*));
1464 if (atts
== NULL
) goto mem_error
;
1466 attallocs
= (int *) xmlMalloc((maxatts
/ 5) * sizeof(int));
1467 if (attallocs
== NULL
) goto mem_error
;
1468 ctxt
->attallocs
= attallocs
;
1469 ctxt
->maxatts
= maxatts
;
1470 } else if (nr
+ 5 > ctxt
->maxatts
) {
1471 maxatts
= (nr
+ 5) * 2;
1472 atts
= (const xmlChar
**) xmlRealloc((void *) ctxt
->atts
,
1473 maxatts
* sizeof(const xmlChar
*));
1474 if (atts
== NULL
) goto mem_error
;
1476 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1477 (maxatts
/ 5) * sizeof(int));
1478 if (attallocs
== NULL
) goto mem_error
;
1479 ctxt
->attallocs
= attallocs
;
1480 ctxt
->maxatts
= maxatts
;
1482 return(ctxt
->maxatts
);
1484 xmlErrMemory(ctxt
, NULL
);
1490 * @ctxt: an XML parser context
1491 * @value: the parser input
1493 * Pushes a new parser input on top of the input stack
1495 * Returns -1 in case of error, the index in the stack otherwise
1498 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1500 if ((ctxt
== NULL
) || (value
== NULL
))
1502 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1503 ctxt
->inputMax
*= 2;
1505 (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1507 sizeof(ctxt
->inputTab
[0]));
1508 if (ctxt
->inputTab
== NULL
) {
1509 xmlErrMemory(ctxt
, NULL
);
1510 xmlFreeInputStream(value
);
1511 ctxt
->inputMax
/= 2;
1516 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1517 ctxt
->input
= value
;
1518 return (ctxt
->inputNr
++);
1522 * @ctxt: an XML parser context
1524 * Pops the top parser input from the input stack
1526 * Returns the input just removed
1529 inputPop(xmlParserCtxtPtr ctxt
)
1531 xmlParserInputPtr ret
;
1535 if (ctxt
->inputNr
<= 0)
1538 if (ctxt
->inputNr
> 0)
1539 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1542 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1543 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1548 * @ctxt: an XML parser context
1549 * @value: the element node
1551 * Pushes a new element node on top of the node stack
1553 * Returns -1 in case of error, the index in the stack otherwise
1556 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1558 if (ctxt
== NULL
) return(0);
1559 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1562 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1564 sizeof(ctxt
->nodeTab
[0]));
1566 xmlErrMemory(ctxt
, NULL
);
1569 ctxt
->nodeTab
= tmp
;
1572 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1573 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1574 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1575 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1577 ctxt
->instate
= XML_PARSER_EOF
;
1580 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1582 return (ctxt
->nodeNr
++);
1587 * @ctxt: an XML parser context
1589 * Pops the top element node from the node stack
1591 * Returns the node just removed
1594 nodePop(xmlParserCtxtPtr ctxt
)
1598 if (ctxt
== NULL
) return(NULL
);
1599 if (ctxt
->nodeNr
<= 0)
1602 if (ctxt
->nodeNr
> 0)
1603 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1606 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1607 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1611 #ifdef LIBXML_PUSH_ENABLED
1614 * @ctxt: an XML parser context
1615 * @value: the element name
1616 * @prefix: the element prefix
1617 * @URI: the element namespace name
1619 * Pushes a new element name/prefix/URL on top of the name stack
1621 * Returns -1 in case of error, the index in the stack otherwise
1624 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1625 const xmlChar
*prefix
, const xmlChar
*URI
, int nsNr
)
1627 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1628 const xmlChar
* *tmp
;
1631 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1633 sizeof(ctxt
->nameTab
[0]));
1638 ctxt
->nameTab
= tmp
;
1639 tmp2
= (void **) xmlRealloc((void * *)ctxt
->pushTab
,
1641 sizeof(ctxt
->pushTab
[0]));
1646 ctxt
->pushTab
= tmp2
;
1648 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1650 ctxt
->pushTab
[ctxt
->nameNr
* 3] = (void *) prefix
;
1651 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 1] = (void *) URI
;
1652 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 2] = (void *) (long) nsNr
;
1653 return (ctxt
->nameNr
++);
1655 xmlErrMemory(ctxt
, NULL
);
1660 * @ctxt: an XML parser context
1662 * Pops the top element/prefix/URI name from the name stack
1664 * Returns the name just removed
1666 static const xmlChar
*
1667 nameNsPop(xmlParserCtxtPtr ctxt
)
1671 if (ctxt
->nameNr
<= 0)
1674 if (ctxt
->nameNr
> 0)
1675 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1678 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1679 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1682 #endif /* LIBXML_PUSH_ENABLED */
1686 * @ctxt: an XML parser context
1687 * @value: the element name
1689 * Pushes a new element name on top of the name stack
1691 * Returns -1 in case of error, the index in the stack otherwise
1694 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1696 if (ctxt
== NULL
) return (-1);
1698 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1699 const xmlChar
* *tmp
;
1701 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1703 sizeof(ctxt
->nameTab
[0]));
1708 ctxt
->nameTab
= tmp
;
1710 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1712 return (ctxt
->nameNr
++);
1714 xmlErrMemory(ctxt
, NULL
);
1719 * @ctxt: an XML parser context
1721 * Pops the top element name from the name stack
1723 * Returns the name just removed
1726 namePop(xmlParserCtxtPtr ctxt
)
1730 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1733 if (ctxt
->nameNr
> 0)
1734 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1737 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1738 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1742 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1743 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1746 ctxt
->spaceMax
*= 2;
1747 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
1748 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
1750 xmlErrMemory(ctxt
, NULL
);
1754 ctxt
->spaceTab
= tmp
;
1756 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
1757 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
1758 return(ctxt
->spaceNr
++);
1761 static int spacePop(xmlParserCtxtPtr ctxt
) {
1763 if (ctxt
->spaceNr
<= 0) return(0);
1765 if (ctxt
->spaceNr
> 0)
1766 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
1768 ctxt
->space
= &ctxt
->spaceTab
[0];
1769 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
1770 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
1775 * Macros for accessing the content. Those should be used only by the parser,
1778 * Dirty macros, i.e. one often need to make assumption on the context to
1781 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1782 * To be used with extreme caution since operations consuming
1783 * characters may move the input buffer to a different location !
1784 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1785 * This should be used internally by the parser
1786 * only to compare to ASCII values otherwise it would break when
1787 * running with UTF-8 encoding.
1788 * RAW same as CUR but in the input buffer, bypass any token
1789 * extraction that may have been done
1790 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1791 * to compare on ASCII based substring.
1792 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1793 * strings without newlines within the parser.
1794 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1795 * defined char within the parser.
1796 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1798 * NEXT Skip to the next character, this does the proper decoding
1799 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1800 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1801 * CUR_CHAR(l) returns the current unicode character (int), set l
1802 * to the number of xmlChars used for the encoding [0-5].
1803 * CUR_SCHAR same but operate on a string instead of the context
1804 * COPY_BUF copy the current unicode char to the target buffer, increment
1806 * GROW, SHRINK handling of input buffers
1809 #define RAW (*ctxt->input->cur)
1810 #define CUR (*ctxt->input->cur)
1811 #define NXT(val) ctxt->input->cur[(val)]
1812 #define CUR_PTR ctxt->input->cur
1814 #define CMP4( s, c1, c2, c3, c4 ) \
1815 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1816 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1817 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1818 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1819 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1820 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1821 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1822 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1823 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1824 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1825 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1826 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1827 ((unsigned char *) s)[ 8 ] == c9 )
1828 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1829 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1830 ((unsigned char *) s)[ 9 ] == c10 )
1832 #define SKIP(val) do { \
1833 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1834 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1835 if ((*ctxt->input->cur == 0) && \
1836 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1837 xmlPopInput(ctxt); \
1840 #define SKIPL(val) do { \
1842 for(skipl=0; skipl<val; skipl++) { \
1843 if (*(ctxt->input->cur) == '\n') { \
1844 ctxt->input->line++; ctxt->input->col = 1; \
1845 } else ctxt->input->col++; \
1847 ctxt->input->cur++; \
1849 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1850 if ((*ctxt->input->cur == 0) && \
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1852 xmlPopInput(ctxt); \
1855 #define SHRINK if ((ctxt->progressive == 0) && \
1856 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1860 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
1861 xmlParserInputShrink(ctxt
->input
);
1862 if ((*ctxt
->input
->cur
== 0) &&
1863 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1867 #define GROW if ((ctxt->progressive == 0) && \
1868 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1871 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
1872 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1873 if ((ctxt
->input
->cur
!= NULL
) && (*ctxt
->input
->cur
== 0) &&
1874 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1878 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1880 #define NEXT xmlNextChar(ctxt)
1883 ctxt->input->col++; \
1884 ctxt->input->cur++; \
1886 if (*ctxt->input->cur == 0) \
1887 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1890 #define NEXTL(l) do { \
1891 if (*(ctxt->input->cur) == '\n') { \
1892 ctxt->input->line++; ctxt->input->col = 1; \
1893 } else ctxt->input->col++; \
1894 ctxt->input->cur += l; \
1895 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1898 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1899 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1901 #define COPY_BUF(l,b,i,v) \
1902 if (l == 1) b[i++] = (xmlChar) v; \
1903 else i += xmlCopyCharMultiByte(&b[i],v)
1906 * xmlSkipBlankChars:
1907 * @ctxt: the XML parser context
1909 * skip all blanks character found at that point in the input streams.
1910 * It pops up finished entities in the process if allowable at that point.
1912 * Returns the number of space chars skipped
1916 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
1920 * It's Okay to use CUR/NEXT here since all the blanks are on
1923 if ((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) {
1926 * if we are in the document content, go really fast
1928 cur
= ctxt
->input
->cur
;
1929 while (IS_BLANK_CH(*cur
)) {
1931 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
1936 ctxt
->input
->cur
= cur
;
1937 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1938 cur
= ctxt
->input
->cur
;
1941 ctxt
->input
->cur
= cur
;
1946 while (IS_BLANK_CH(cur
)) { /* CHECKED tstblanks.xml */
1951 while ((cur
== 0) && (ctxt
->inputNr
> 1) &&
1952 (ctxt
->instate
!= XML_PARSER_COMMENT
)) {
1957 * Need to handle support of entities branching here
1959 if (*ctxt
->input
->cur
== '%') xmlParserHandlePEReference(ctxt
);
1960 } while (IS_BLANK(cur
)); /* CHECKED tstblanks.xml */
1965 /************************************************************************
1967 * Commodity functions to handle entities *
1969 ************************************************************************/
1973 * @ctxt: an XML parser context
1975 * xmlPopInput: the current input pointed by ctxt->input came to an end
1976 * pop it and return the next char.
1978 * Returns the current xmlChar in the parser context
1981 xmlPopInput(xmlParserCtxtPtr ctxt
) {
1982 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
1983 if (xmlParserDebugEntities
)
1984 xmlGenericError(xmlGenericErrorContext
,
1985 "Popping input %d\n", ctxt
->inputNr
);
1986 xmlFreeInputStream(inputPop(ctxt
));
1987 if ((*ctxt
->input
->cur
== 0) &&
1988 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1989 return(xmlPopInput(ctxt
));
1995 * @ctxt: an XML parser context
1996 * @input: an XML parser input fragment (entity, XML fragment ...).
1998 * xmlPushInput: switch to a new input stream which is stacked on top
1999 * of the previous one(s).
2000 * Returns -1 in case of error or the index in the input stack
2003 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2005 if (input
== NULL
) return(-1);
2007 if (xmlParserDebugEntities
) {
2008 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2009 xmlGenericError(xmlGenericErrorContext
,
2010 "%s(%d): ", ctxt
->input
->filename
,
2012 xmlGenericError(xmlGenericErrorContext
,
2013 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2015 ret
= inputPush(ctxt
, input
);
2016 if (ctxt
->instate
== XML_PARSER_EOF
)
2024 * @ctxt: an XML parser context
2026 * parse Reference declarations
2028 * [66] CharRef ::= '&#' [0-9]+ ';' |
2029 * '&#x' [0-9a-fA-F]+ ';'
2031 * [ WFC: Legal Character ]
2032 * Characters referred to using character references must match the
2033 * production for Char.
2035 * Returns the value parsed (as an int), 0 in case of error
2038 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2039 unsigned int val
= 0;
2041 unsigned int outofrange
= 0;
2044 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2046 if ((RAW
== '&') && (NXT(1) == '#') &&
2050 while (RAW
!= ';') { /* loop blocked by count */
2054 if (ctxt
->instate
== XML_PARSER_EOF
)
2057 if ((RAW
>= '0') && (RAW
<= '9'))
2058 val
= val
* 16 + (CUR
- '0');
2059 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2060 val
= val
* 16 + (CUR
- 'a') + 10;
2061 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2062 val
= val
* 16 + (CUR
- 'A') + 10;
2064 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2080 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2083 while (RAW
!= ';') { /* loop blocked by count */
2087 if (ctxt
->instate
== XML_PARSER_EOF
)
2090 if ((RAW
>= '0') && (RAW
<= '9'))
2091 val
= val
* 10 + (CUR
- '0');
2093 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2104 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2110 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2114 * [ WFC: Legal Character ]
2115 * Characters referred to using character references must match the
2116 * production for Char.
2118 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2121 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2122 "xmlParseCharRef: invalid xmlChar value %d\n",
2129 * xmlParseStringCharRef:
2130 * @ctxt: an XML parser context
2131 * @str: a pointer to an index in the string
2133 * parse Reference declarations, variant parsing from a string rather
2134 * than an an input flow.
2136 * [66] CharRef ::= '&#' [0-9]+ ';' |
2137 * '&#x' [0-9a-fA-F]+ ';'
2139 * [ WFC: Legal Character ]
2140 * Characters referred to using character references must match the
2141 * production for Char.
2143 * Returns the value parsed (as an int), 0 in case of error, str will be
2144 * updated to the current value of the index
2147 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2150 unsigned int val
= 0;
2151 unsigned int outofrange
= 0;
2153 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2156 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2159 while (cur
!= ';') { /* Non input consuming loop */
2160 if ((cur
>= '0') && (cur
<= '9'))
2161 val
= val
* 16 + (cur
- '0');
2162 else if ((cur
>= 'a') && (cur
<= 'f'))
2163 val
= val
* 16 + (cur
- 'a') + 10;
2164 else if ((cur
>= 'A') && (cur
<= 'F'))
2165 val
= val
* 16 + (cur
- 'A') + 10;
2167 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2179 } else if ((cur
== '&') && (ptr
[1] == '#')){
2182 while (cur
!= ';') { /* Non input consuming loops */
2183 if ((cur
>= '0') && (cur
<= '9'))
2184 val
= val
* 10 + (cur
- '0');
2186 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2199 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2205 * [ WFC: Legal Character ]
2206 * Characters referred to using character references must match the
2207 * production for Char.
2209 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2212 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2213 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2220 * xmlNewBlanksWrapperInputStream:
2221 * @ctxt: an XML parser context
2222 * @entity: an Entity pointer
2224 * Create a new input stream for wrapping
2225 * blanks around a PEReference
2227 * Returns the new input stream or NULL
2230 static void deallocblankswrapper (xmlChar
*str
) {xmlFree(str
);}
2232 static xmlParserInputPtr
2233 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
2234 xmlParserInputPtr input
;
2237 if (entity
== NULL
) {
2238 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2239 "xmlNewBlanksWrapperInputStream entity\n");
2242 if (xmlParserDebugEntities
)
2243 xmlGenericError(xmlGenericErrorContext
,
2244 "new blanks wrapper for entity: %s\n", entity
->name
);
2245 input
= xmlNewInputStream(ctxt
);
2246 if (input
== NULL
) {
2249 length
= xmlStrlen(entity
->name
) + 5;
2250 buffer
= xmlMallocAtomic(length
);
2251 if (buffer
== NULL
) {
2252 xmlErrMemory(ctxt
, NULL
);
2258 buffer
[length
-3] = ';';
2259 buffer
[length
-2] = ' ';
2260 buffer
[length
-1] = 0;
2261 memcpy(buffer
+ 2, entity
->name
, length
- 5);
2262 input
->free
= deallocblankswrapper
;
2263 input
->base
= buffer
;
2264 input
->cur
= buffer
;
2265 input
->length
= length
;
2266 input
->end
= &buffer
[length
];
2271 * xmlParserHandlePEReference:
2272 * @ctxt: the parser context
2274 * [69] PEReference ::= '%' Name ';'
2276 * [ WFC: No Recursion ]
2277 * A parsed entity must not contain a recursive
2278 * reference to itself, either directly or indirectly.
2280 * [ WFC: Entity Declared ]
2281 * In a document without any DTD, a document with only an internal DTD
2282 * subset which contains no parameter entity references, or a document
2283 * with "standalone='yes'", ... ... The declaration of a parameter
2284 * entity must precede any reference to it...
2286 * [ VC: Entity Declared ]
2287 * In a document with an external subset or external parameter entities
2288 * with "standalone='no'", ... ... The declaration of a parameter entity
2289 * must precede any reference to it...
2292 * Parameter-entity references may only appear in the DTD.
2293 * NOTE: misleading but this is handled.
2295 * A PEReference may have been detected in the current input stream
2296 * the handling is done accordingly to
2297 * http://www.w3.org/TR/REC-xml#entproc
2299 * - Included in literal in entity values
2300 * - Included as Parameter Entity reference within DTDs
2303 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2304 const xmlChar
*name
;
2305 xmlEntityPtr entity
= NULL
;
2306 xmlParserInputPtr input
;
2308 if (RAW
!= '%') return;
2309 switch(ctxt
->instate
) {
2310 case XML_PARSER_CDATA_SECTION
:
2312 case XML_PARSER_COMMENT
:
2314 case XML_PARSER_START_TAG
:
2316 case XML_PARSER_END_TAG
:
2318 case XML_PARSER_EOF
:
2319 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2321 case XML_PARSER_PROLOG
:
2322 case XML_PARSER_START
:
2323 case XML_PARSER_MISC
:
2324 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2326 case XML_PARSER_ENTITY_DECL
:
2327 case XML_PARSER_CONTENT
:
2328 case XML_PARSER_ATTRIBUTE_VALUE
:
2330 case XML_PARSER_SYSTEM_LITERAL
:
2331 case XML_PARSER_PUBLIC_LITERAL
:
2332 /* we just ignore it there */
2334 case XML_PARSER_EPILOG
:
2335 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2337 case XML_PARSER_ENTITY_VALUE
:
2339 * NOTE: in the case of entity values, we don't do the
2340 * substitution here since we need the literal
2341 * entity value to be able to save the internal
2342 * subset of the document.
2343 * This will be handled by xmlStringDecodeEntities
2346 case XML_PARSER_DTD
:
2348 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2349 * In the internal DTD subset, parameter-entity references
2350 * can occur only where markup declarations can occur, not
2351 * within markup declarations.
2352 * In that case this is handled in xmlParseMarkupDecl
2354 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2356 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2359 case XML_PARSER_IGNORE
:
2364 name
= xmlParseName(ctxt
);
2365 if (xmlParserDebugEntities
)
2366 xmlGenericError(xmlGenericErrorContext
,
2367 "PEReference: %s\n", name
);
2369 xmlFatalErr(ctxt
, XML_ERR_PEREF_NO_NAME
, NULL
);
2373 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->getParameterEntity
!= NULL
))
2374 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
2375 if (ctxt
->instate
== XML_PARSER_EOF
)
2377 if (entity
== NULL
) {
2380 * [ WFC: Entity Declared ]
2381 * In a document without any DTD, a document with only an
2382 * internal DTD subset which contains no parameter entity
2383 * references, or a document with "standalone='yes'", ...
2384 * ... The declaration of a parameter entity must precede
2385 * any reference to it...
2387 if ((ctxt
->standalone
== 1) ||
2388 ((ctxt
->hasExternalSubset
== 0) &&
2389 (ctxt
->hasPErefs
== 0))) {
2390 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
2391 "PEReference: %%%s; not found\n", name
);
2394 * [ VC: Entity Declared ]
2395 * In a document with an external subset or external
2396 * parameter entities with "standalone='no'", ...
2397 * ... The declaration of a parameter entity must precede
2398 * any reference to it...
2400 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
2401 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2402 "PEReference: %%%s; not found\n",
2405 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2406 "PEReference: %%%s; not found\n",
2410 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
2411 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
2412 if (xmlPushInput(ctxt
, input
) < 0)
2415 if ((entity
->etype
== XML_INTERNAL_PARAMETER_ENTITY
) ||
2416 (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
)) {
2418 xmlCharEncoding enc
;
2421 * handle the extra spaces added before and after
2422 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2423 * this is done independently.
2425 input
= xmlNewEntityInputStream(ctxt
, entity
);
2426 if (xmlPushInput(ctxt
, input
) < 0)
2430 * Get the 4 first bytes and decode the charset
2431 * if enc != XML_CHAR_ENCODING_NONE
2432 * plug some encoding conversion routines.
2433 * Note that, since we may have some non-UTF8
2434 * encoding (like UTF16, bug 135229), the 'length'
2435 * is not known, but we can calculate based upon
2436 * the amount of data in the buffer.
2439 if (ctxt
->instate
== XML_PARSER_EOF
)
2441 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
2446 enc
= xmlDetectCharEncoding(start
, 4);
2447 if (enc
!= XML_CHAR_ENCODING_NONE
) {
2448 xmlSwitchEncoding(ctxt
, enc
);
2452 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2453 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l' )) &&
2454 (IS_BLANK_CH(NXT(5)))) {
2455 xmlParseTextDecl(ctxt
);
2458 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
2459 "PEReference: %s is not a parameter entity\n",
2464 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
2470 * Macro used to grow the current buffer.
2472 #define growBuffer(buffer, n) { \
2474 buffer##_size *= 2; \
2475 buffer##_size += n; \
2477 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2478 if (tmp == NULL) goto mem_error; \
2483 * xmlStringLenDecodeEntities:
2484 * @ctxt: the parser context
2485 * @str: the input string
2486 * @len: the string length
2487 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2488 * @end: an end marker xmlChar, 0 if none
2489 * @end2: an end marker xmlChar, 0 if none
2490 * @end3: an end marker xmlChar, 0 if none
2492 * Takes a entity string content and process to do the adequate substitutions.
2494 * [67] Reference ::= EntityRef | CharRef
2496 * [69] PEReference ::= '%' Name ';'
2498 * Returns A newly allocated string with the substitution done. The caller
2499 * must deallocate it !
2502 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2503 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
) {
2504 xmlChar
*buffer
= NULL
;
2505 int buffer_size
= 0;
2507 xmlChar
*current
= NULL
;
2508 xmlChar
*rep
= NULL
;
2509 const xmlChar
*last
;
2514 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2518 if (((ctxt
->depth
> 40) &&
2519 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2520 (ctxt
->depth
> 1024)) {
2521 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2526 * allocate a translation buffer.
2528 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2529 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
* sizeof(xmlChar
));
2530 if (buffer
== NULL
) goto mem_error
;
2533 * OK loop until we reach one of the ending char or a size limit.
2534 * we are operating on already parsed values.
2537 c
= CUR_SCHAR(str
, l
);
2540 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2541 (c
!= end2
) && (c
!= end3
)) {
2544 if ((c
== '&') && (str
[1] == '#')) {
2545 int val
= xmlParseStringCharRef(ctxt
, &str
);
2547 COPY_BUF(0,buffer
,nbchars
,val
);
2549 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2550 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2552 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2553 if (xmlParserDebugEntities
)
2554 xmlGenericError(xmlGenericErrorContext
,
2555 "String decoding Entity Reference: %.30s\n",
2557 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2558 if ((ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
) ||
2559 (ctxt
->lastError
.code
== XML_ERR_INTERNAL_ERROR
))
2562 ctxt
->nbentities
+= ent
->checked
;
2563 if ((ent
!= NULL
) &&
2564 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2565 if (ent
->content
!= NULL
) {
2566 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2567 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2568 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2571 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2572 "predefined entity has no content\n");
2574 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2576 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2582 while (*current
!= 0) { /* non input consuming loop */
2583 buffer
[nbchars
++] = *current
++;
2585 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2586 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
))
2588 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2594 } else if (ent
!= NULL
) {
2595 int i
= xmlStrlen(ent
->name
);
2596 const xmlChar
*cur
= ent
->name
;
2598 buffer
[nbchars
++] = '&';
2599 if (nbchars
> buffer_size
- i
- XML_PARSER_BUFFER_SIZE
) {
2600 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2603 buffer
[nbchars
++] = *cur
++;
2604 buffer
[nbchars
++] = ';';
2606 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2607 if (xmlParserDebugEntities
)
2608 xmlGenericError(xmlGenericErrorContext
,
2609 "String decoding PE Reference: %.30s\n", str
);
2610 ent
= xmlParseStringPEReference(ctxt
, &str
);
2611 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
2614 ctxt
->nbentities
+= ent
->checked
;
2616 if (ent
->content
== NULL
) {
2617 xmlLoadEntityContent(ctxt
, ent
);
2620 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2625 while (*current
!= 0) { /* non input consuming loop */
2626 buffer
[nbchars
++] = *current
++;
2628 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2629 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
))
2631 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2639 COPY_BUF(l
,buffer
,nbchars
,c
);
2641 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2642 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2646 c
= CUR_SCHAR(str
, l
);
2650 buffer
[nbchars
] = 0;
2654 xmlErrMemory(ctxt
, NULL
);
2664 * xmlStringDecodeEntities:
2665 * @ctxt: the parser context
2666 * @str: the input string
2667 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2668 * @end: an end marker xmlChar, 0 if none
2669 * @end2: an end marker xmlChar, 0 if none
2670 * @end3: an end marker xmlChar, 0 if none
2672 * Takes a entity string content and process to do the adequate substitutions.
2674 * [67] Reference ::= EntityRef | CharRef
2676 * [69] PEReference ::= '%' Name ';'
2678 * Returns A newly allocated string with the substitution done. The caller
2679 * must deallocate it !
2682 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2683 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2684 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2685 return(xmlStringLenDecodeEntities(ctxt
, str
, xmlStrlen(str
), what
,
2689 /************************************************************************
2691 * Commodity functions, cleanup needed ? *
2693 ************************************************************************/
2697 * @ctxt: an XML parser context
2699 * @len: the size of @str
2700 * @blank_chars: we know the chars are blanks
2702 * Is this a sequence of blank chars that one can ignore ?
2704 * Returns 1 if ignorable 0 otherwise.
2707 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2710 xmlNodePtr lastChild
;
2713 * Don't spend time trying to differentiate them, the same callback is
2716 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2720 * Check for xml:space value.
2722 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2723 (*(ctxt
->space
) == -2))
2727 * Check that the string is made of blanks
2729 if (blank_chars
== 0) {
2730 for (i
= 0;i
< len
;i
++)
2731 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2735 * Look if the element is mixed content in the DTD if available
2737 if (ctxt
->node
== NULL
) return(0);
2738 if (ctxt
->myDoc
!= NULL
) {
2739 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2740 if (ret
== 0) return(1);
2741 if (ret
== 1) return(0);
2745 * Otherwise, heuristic :-\
2747 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2748 if ((ctxt
->node
->children
== NULL
) &&
2749 (RAW
== '<') && (NXT(1) == '/')) return(0);
2751 lastChild
= xmlGetLastChild(ctxt
->node
);
2752 if (lastChild
== NULL
) {
2753 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2754 (ctxt
->node
->content
!= NULL
)) return(0);
2755 } else if (xmlNodeIsText(lastChild
))
2757 else if ((ctxt
->node
->children
!= NULL
) &&
2758 (xmlNodeIsText(ctxt
->node
->children
)))
2763 /************************************************************************
2765 * Extra stuff for namespace support *
2766 * Relates to http://www.w3.org/TR/WD-xml-names *
2768 ************************************************************************/
2772 * @ctxt: an XML parser context
2773 * @name: an XML parser context
2774 * @prefix: a xmlChar **
2776 * parse an UTF8 encoded XML qualified name string
2778 * [NS 5] QName ::= (Prefix ':')? LocalPart
2780 * [NS 6] Prefix ::= NCName
2782 * [NS 7] LocalPart ::= NCName
2784 * Returns the local part, and prefix is updated
2785 * to get the Prefix if any.
2789 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2790 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2791 xmlChar
*buffer
= NULL
;
2793 int max
= XML_MAX_NAMELEN
;
2794 xmlChar
*ret
= NULL
;
2795 const xmlChar
*cur
= name
;
2798 if (prefix
== NULL
) return(NULL
);
2801 if (cur
== NULL
) return(NULL
);
2803 #ifndef XML_XML_NAMESPACE
2804 /* xml: prefix is not really a namespace */
2805 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2806 (cur
[2] == 'l') && (cur
[3] == ':'))
2807 return(xmlStrdup(name
));
2810 /* nasty but well=formed */
2812 return(xmlStrdup(name
));
2815 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2821 * Okay someone managed to make a huge name, so he's ready to pay
2822 * for the processing speed.
2826 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2827 if (buffer
== NULL
) {
2828 xmlErrMemory(ctxt
, NULL
);
2831 memcpy(buffer
, buf
, len
);
2832 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
2833 if (len
+ 10 > max
) {
2837 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2838 max
* sizeof(xmlChar
));
2841 xmlErrMemory(ctxt
, NULL
);
2852 if ((c
== ':') && (*cur
== 0)) {
2856 return(xmlStrdup(name
));
2860 ret
= xmlStrndup(buf
, len
);
2864 max
= XML_MAX_NAMELEN
;
2872 return(xmlStrndup(BAD_CAST
"", 0));
2877 * Check that the first character is proper to start
2880 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
2881 ((c
>= 0x41) && (c
<= 0x5A)) ||
2882 (c
== '_') || (c
== ':'))) {
2884 int first
= CUR_SCHAR(cur
, l
);
2886 if (!IS_LETTER(first
) && (first
!= '_')) {
2887 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
2888 "Name %s is not XML Namespace compliant\n",
2894 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
2900 * Okay someone managed to make a huge name, so he's ready to pay
2901 * for the processing speed.
2905 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2906 if (buffer
== NULL
) {
2907 xmlErrMemory(ctxt
, NULL
);
2910 memcpy(buffer
, buf
, len
);
2911 while (c
!= 0) { /* tested bigname2.xml */
2912 if (len
+ 10 > max
) {
2916 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2917 max
* sizeof(xmlChar
));
2919 xmlErrMemory(ctxt
, NULL
);
2932 ret
= xmlStrndup(buf
, len
);
2941 /************************************************************************
2943 * The parser itself *
2944 * Relates to http://www.w3.org/TR/REC-xml *
2946 ************************************************************************/
2948 /************************************************************************
2950 * Routines to parse Name, NCName and NmToken *
2952 ************************************************************************/
2954 static unsigned long nbParseName
= 0;
2955 static unsigned long nbParseNmToken
= 0;
2956 static unsigned long nbParseNCName
= 0;
2957 static unsigned long nbParseNCNameComplex
= 0;
2958 static unsigned long nbParseNameComplex
= 0;
2959 static unsigned long nbParseStringName
= 0;
2963 * The two following functions are related to the change of accepted
2964 * characters for Name and NmToken in the Revision 5 of XML-1.0
2965 * They correspond to the modified production [4] and the new production [4a]
2966 * changes in that revision. Also note that the macros used for the
2967 * productions Letter, Digit, CombiningChar and Extender are not needed
2969 * We still keep compatibility to pre-revision5 parsing semantic if the
2970 * new XML_PARSE_OLD10 option is given to the parser.
2973 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
2974 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2979 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
2980 (((c
>= 'a') && (c
<= 'z')) ||
2981 ((c
>= 'A') && (c
<= 'Z')) ||
2982 (c
== '_') || (c
== ':') ||
2983 ((c
>= 0xC0) && (c
<= 0xD6)) ||
2984 ((c
>= 0xD8) && (c
<= 0xF6)) ||
2985 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
2986 ((c
>= 0x370) && (c
<= 0x37D)) ||
2987 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
2988 ((c
>= 0x200C) && (c
<= 0x200D)) ||
2989 ((c
>= 0x2070) && (c
<= 0x218F)) ||
2990 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
2991 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
2992 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
2993 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
2994 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
2997 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3004 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3005 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3007 * Use the new checks of production [4] [4a] amd [5] of the
3008 * Update 5 of XML-1.0
3010 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3011 (((c
>= 'a') && (c
<= 'z')) ||
3012 ((c
>= 'A') && (c
<= 'Z')) ||
3013 ((c
>= '0') && (c
<= '9')) || /* !start */
3014 (c
== '_') || (c
== ':') ||
3015 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3016 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3017 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3018 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3019 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3020 ((c
>= 0x370) && (c
<= 0x37D)) ||
3021 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3022 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3023 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3024 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3025 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3026 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3027 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3028 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3029 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3032 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3033 (c
== '.') || (c
== '-') ||
3034 (c
== '_') || (c
== ':') ||
3035 (IS_COMBINING(c
)) ||
3042 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3043 int *len
, int *alloc
, int normalize
);
3045 static const xmlChar
*
3046 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3052 nbParseNameComplex
++;
3056 * Handler for more complex cases
3059 if (ctxt
->instate
== XML_PARSER_EOF
)
3062 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3064 * Use the new checks of production [4] [4a] amd [5] of the
3065 * Update 5 of XML-1.0
3067 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3068 (!(((c
>= 'a') && (c
<= 'z')) ||
3069 ((c
>= 'A') && (c
<= 'Z')) ||
3070 (c
== '_') || (c
== ':') ||
3071 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3072 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3073 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3074 ((c
>= 0x370) && (c
<= 0x37D)) ||
3075 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3076 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3077 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3078 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3079 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3080 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3081 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3082 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3088 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3089 (((c
>= 'a') && (c
<= 'z')) ||
3090 ((c
>= 'A') && (c
<= 'Z')) ||
3091 ((c
>= '0') && (c
<= '9')) || /* !start */
3092 (c
== '_') || (c
== ':') ||
3093 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3094 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3095 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3096 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3097 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3098 ((c
>= 0x370) && (c
<= 0x37D)) ||
3099 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3100 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3101 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3102 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3103 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3104 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3105 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3106 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3107 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3109 if (count
++ > 100) {
3112 if (ctxt
->instate
== XML_PARSER_EOF
)
3120 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3121 (!IS_LETTER(c
) && (c
!= '_') &&
3129 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3130 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3131 (c
== '.') || (c
== '-') ||
3132 (c
== '_') || (c
== ':') ||
3133 (IS_COMBINING(c
)) ||
3134 (IS_EXTENDER(c
)))) {
3135 if (count
++ > 100) {
3138 if (ctxt
->instate
== XML_PARSER_EOF
)
3146 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3147 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3148 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3153 * @ctxt: an XML parser context
3155 * parse an XML name.
3157 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3158 * CombiningChar | Extender
3160 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3162 * [6] Names ::= Name (#x20 Name)*
3164 * Returns the Name parsed or NULL
3168 xmlParseName(xmlParserCtxtPtr ctxt
) {
3180 * Accelerator for simple ASCII names
3182 in
= ctxt
->input
->cur
;
3183 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3184 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3185 (*in
== '_') || (*in
== ':')) {
3187 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3188 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3189 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3190 (*in
== '_') || (*in
== '-') ||
3191 (*in
== ':') || (*in
== '.'))
3193 if ((*in
> 0) && (*in
< 0x80)) {
3194 count
= in
- ctxt
->input
->cur
;
3195 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3196 ctxt
->input
->cur
= in
;
3197 ctxt
->nbChars
+= count
;
3198 ctxt
->input
->col
+= count
;
3200 xmlErrMemory(ctxt
, NULL
);
3204 /* accelerator for special cases */
3205 return(xmlParseNameComplex(ctxt
));
3208 static const xmlChar
*
3209 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3215 nbParseNCNameComplex
++;
3219 * Handler for more complex cases
3223 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3224 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3228 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3229 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3230 if (count
++ > 100) {
3233 if (ctxt
->instate
== XML_PARSER_EOF
)
3240 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3245 * @ctxt: an XML parser context
3246 * @len: lenght of the string parsed
3248 * parse an XML name.
3250 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3251 * CombiningChar | Extender
3253 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3255 * Returns the Name parsed or NULL
3258 static const xmlChar
*
3259 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3269 * Accelerator for simple ASCII names
3271 in
= ctxt
->input
->cur
;
3272 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3273 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3276 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3277 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3278 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3279 (*in
== '_') || (*in
== '-') ||
3282 if ((*in
> 0) && (*in
< 0x80)) {
3283 count
= in
- ctxt
->input
->cur
;
3284 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3285 ctxt
->input
->cur
= in
;
3286 ctxt
->nbChars
+= count
;
3287 ctxt
->input
->col
+= count
;
3289 xmlErrMemory(ctxt
, NULL
);
3294 return(xmlParseNCNameComplex(ctxt
));
3298 * xmlParseNameAndCompare:
3299 * @ctxt: an XML parser context
3301 * parse an XML name and compares for match
3302 * (specialized for endtag parsing)
3304 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3305 * and the name for mismatch
3308 static const xmlChar
*
3309 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3310 register const xmlChar
*cmp
= other
;
3311 register const xmlChar
*in
;
3315 if (ctxt
->instate
== XML_PARSER_EOF
)
3318 in
= ctxt
->input
->cur
;
3319 while (*in
!= 0 && *in
== *cmp
) {
3324 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3326 ctxt
->input
->cur
= in
;
3327 return (const xmlChar
*) 1;
3329 /* failure (or end of input buffer), check with full function */
3330 ret
= xmlParseName (ctxt
);
3331 /* strings coming from the dictionnary direct compare possible */
3333 return (const xmlChar
*) 1;
3339 * xmlParseStringName:
3340 * @ctxt: an XML parser context
3341 * @str: a pointer to the string pointer (IN/OUT)
3343 * parse an XML name.
3345 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3346 * CombiningChar | Extender
3348 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3350 * [6] Names ::= Name (#x20 Name)*
3352 * Returns the Name parsed or NULL. The @str pointer
3353 * is updated to the current location in the string.
3357 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3358 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3359 const xmlChar
*cur
= *str
;
3364 nbParseStringName
++;
3367 c
= CUR_SCHAR(cur
, l
);
3368 if (!xmlIsNameStartChar(ctxt
, c
)) {
3372 COPY_BUF(l
,buf
,len
,c
);
3374 c
= CUR_SCHAR(cur
, l
);
3375 while (xmlIsNameChar(ctxt
, c
)) {
3376 COPY_BUF(l
,buf
,len
,c
);
3378 c
= CUR_SCHAR(cur
, l
);
3379 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3381 * Okay someone managed to make a huge name, so he's ready to pay
3382 * for the processing speed.
3387 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3388 if (buffer
== NULL
) {
3389 xmlErrMemory(ctxt
, NULL
);
3392 memcpy(buffer
, buf
, len
);
3393 while (xmlIsNameChar(ctxt
, c
)) {
3394 if (len
+ 10 > max
) {
3397 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3398 max
* sizeof(xmlChar
));
3400 xmlErrMemory(ctxt
, NULL
);
3406 COPY_BUF(l
,buffer
,len
,c
);
3408 c
= CUR_SCHAR(cur
, l
);
3416 return(xmlStrndup(buf
, len
));
3421 * @ctxt: an XML parser context
3423 * parse an XML Nmtoken.
3425 * [7] Nmtoken ::= (NameChar)+
3427 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3429 * Returns the Nmtoken parsed or NULL
3433 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3434 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3444 if (ctxt
->instate
== XML_PARSER_EOF
)
3448 while (xmlIsNameChar(ctxt
, c
)) {
3449 if (count
++ > 100) {
3453 COPY_BUF(l
,buf
,len
,c
);
3456 if (len
>= XML_MAX_NAMELEN
) {
3458 * Okay someone managed to make a huge token, so he's ready to pay
3459 * for the processing speed.
3464 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3465 if (buffer
== NULL
) {
3466 xmlErrMemory(ctxt
, NULL
);
3469 memcpy(buffer
, buf
, len
);
3470 while (xmlIsNameChar(ctxt
, c
)) {
3471 if (count
++ > 100) {
3474 if (ctxt
->instate
== XML_PARSER_EOF
) {
3479 if (len
+ 10 > max
) {
3483 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3484 max
* sizeof(xmlChar
));
3486 xmlErrMemory(ctxt
, NULL
);
3492 COPY_BUF(l
,buffer
,len
,c
);
3502 return(xmlStrndup(buf
, len
));
3506 * xmlParseEntityValue:
3507 * @ctxt: an XML parser context
3508 * @orig: if non-NULL store a copy of the original entity value
3510 * parse a value for ENTITY declarations
3512 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3513 * "'" ([^%&'] | PEReference | Reference)* "'"
3515 * Returns the EntityValue parsed with reference substituted or NULL
3519 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3520 xmlChar
*buf
= NULL
;
3522 int size
= XML_PARSER_BUFFER_SIZE
;
3525 xmlChar
*ret
= NULL
;
3526 const xmlChar
*cur
= NULL
;
3527 xmlParserInputPtr input
;
3529 if (RAW
== '"') stop
= '"';
3530 else if (RAW
== '\'') stop
= '\'';
3532 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3535 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3537 xmlErrMemory(ctxt
, NULL
);
3542 * The content of the entity definition is copied in a buffer.
3545 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3546 input
= ctxt
->input
;
3548 if (ctxt
->instate
== XML_PARSER_EOF
) {
3555 * NOTE: 4.4.5 Included in Literal
3556 * When a parameter entity reference appears in a literal entity
3557 * value, ... a single or double quote character in the replacement
3558 * text is always treated as a normal data character and will not
3559 * terminate the literal.
3560 * In practice it means we stop the loop only when back at parsing
3561 * the initial entity and the quote is found
3563 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3564 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3565 if (len
+ 5 >= size
) {
3569 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3571 xmlErrMemory(ctxt
, NULL
);
3577 COPY_BUF(l
,buf
,len
,c
);
3580 * Pop-up of finished entities.
3582 while ((RAW
== 0) && (ctxt
->inputNr
> 1)) /* non input consuming */
3593 if (ctxt
->instate
== XML_PARSER_EOF
) {
3599 * Raise problem w.r.t. '&' and '%' being used in non-entities
3600 * reference constructs. Note Charref will be handled in
3601 * xmlStringDecodeEntities()
3604 while (*cur
!= 0) { /* non input consuming */
3605 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3610 name
= xmlParseStringName(ctxt
, &cur
);
3611 if ((name
== NULL
) || (*cur
!= ';')) {
3612 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3613 "EntityValue: '%c' forbidden except for entities references\n",
3616 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3617 (ctxt
->inputNr
== 1)) {
3618 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3629 * Then PEReference entities are substituted.
3632 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3637 * NOTE: 4.4.7 Bypassed
3638 * When a general entity reference appears in the EntityValue in
3639 * an entity declaration, it is bypassed and left as is.
3640 * so XML_SUBSTITUTE_REF is not set here.
3642 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
3654 * xmlParseAttValueComplex:
3655 * @ctxt: an XML parser context
3656 * @len: the resulting attribute len
3657 * @normalize: wether to apply the inner normalization
3659 * parse a value for an attribute, this is the fallback function
3660 * of xmlParseAttValue() when the attribute parsing requires handling
3661 * of non-ASCII characters, or normalization compaction.
3663 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3666 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3668 xmlChar
*buf
= NULL
;
3669 xmlChar
*rep
= NULL
;
3672 int c
, l
, in_space
= 0;
3673 xmlChar
*current
= NULL
;
3676 if (NXT(0) == '"') {
3677 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3680 } else if (NXT(0) == '\'') {
3682 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3685 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3690 * allocate a translation buffer.
3692 buf_size
= XML_PARSER_BUFFER_SIZE
;
3693 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
* sizeof(xmlChar
));
3694 if (buf
== NULL
) goto mem_error
;
3697 * OK loop until we reach one of the ending char or a size limit.
3700 while (((NXT(0) != limit
) && /* checked */
3701 (IS_CHAR(c
)) && (c
!= '<')) &&
3702 (ctxt
->instate
!= XML_PARSER_EOF
)) {
3706 if (NXT(1) == '#') {
3707 int val
= xmlParseCharRef(ctxt
);
3710 if (ctxt
->replaceEntities
) {
3711 if (len
> buf_size
- 10) {
3712 growBuffer(buf
, 10);
3717 * The reparsing will be done in xmlStringGetNodeList()
3718 * called by the attribute() function in SAX.c
3720 if (len
> buf_size
- 10) {
3721 growBuffer(buf
, 10);
3729 } else if (val
!= 0) {
3730 if (len
> buf_size
- 10) {
3731 growBuffer(buf
, 10);
3733 len
+= xmlCopyChar(0, &buf
[len
], val
);
3736 ent
= xmlParseEntityRef(ctxt
);
3739 ctxt
->nbentities
+= ent
->owner
;
3740 if ((ent
!= NULL
) &&
3741 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3742 if (len
> buf_size
- 10) {
3743 growBuffer(buf
, 10);
3745 if ((ctxt
->replaceEntities
== 0) &&
3746 (ent
->content
[0] == '&')) {
3753 buf
[len
++] = ent
->content
[0];
3755 } else if ((ent
!= NULL
) &&
3756 (ctxt
->replaceEntities
!= 0)) {
3757 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
3758 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
3763 while (*current
!= 0) { /* non input consuming */
3764 if ((*current
== 0xD) || (*current
== 0xA) ||
3765 (*current
== 0x9)) {
3769 buf
[len
++] = *current
++;
3770 if (len
> buf_size
- 10) {
3771 growBuffer(buf
, 10);
3778 if (len
> buf_size
- 10) {
3779 growBuffer(buf
, 10);
3781 if (ent
->content
!= NULL
)
3782 buf
[len
++] = ent
->content
[0];
3784 } else if (ent
!= NULL
) {
3785 int i
= xmlStrlen(ent
->name
);
3786 const xmlChar
*cur
= ent
->name
;
3789 * This may look absurd but is needed to detect
3792 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
3793 (ent
->content
!= NULL
)) {
3794 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
3795 XML_SUBSTITUTE_REF
, 0, 0, 0);
3803 * Just output the reference
3806 while (len
> buf_size
- i
- 10) {
3807 growBuffer(buf
, i
+ 10);
3810 buf
[len
++] = *cur
++;
3815 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
3816 if ((len
!= 0) || (!normalize
)) {
3817 if ((!normalize
) || (!in_space
)) {
3818 COPY_BUF(l
,buf
,len
,0x20);
3819 while (len
> buf_size
- 10) {
3820 growBuffer(buf
, 10);
3827 COPY_BUF(l
,buf
,len
,c
);
3828 if (len
> buf_size
- 10) {
3829 growBuffer(buf
, 10);
3837 if (ctxt
->instate
== XML_PARSER_EOF
)
3840 if ((in_space
) && (normalize
)) {
3841 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
3845 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
3846 } else if (RAW
!= limit
) {
3847 if ((c
!= 0) && (!IS_CHAR(c
))) {
3848 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
3849 "invalid character in attribute value\n");
3851 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
3852 "AttValue: ' expected\n");
3856 if (attlen
!= NULL
) *attlen
= len
;
3860 xmlErrMemory(ctxt
, NULL
);
3871 * @ctxt: an XML parser context
3873 * parse a value for an attribute
3874 * Note: the parser won't do substitution of entities here, this
3875 * will be handled later in xmlStringGetNodeList
3877 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3878 * "'" ([^<&'] | Reference)* "'"
3880 * 3.3.3 Attribute-Value Normalization:
3881 * Before the value of an attribute is passed to the application or
3882 * checked for validity, the XML processor must normalize it as follows:
3883 * - a character reference is processed by appending the referenced
3884 * character to the attribute value
3885 * - an entity reference is processed by recursively processing the
3886 * replacement text of the entity
3887 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3888 * appending #x20 to the normalized value, except that only a single
3889 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3890 * parsed entity or the literal entity value of an internal parsed entity
3891 * - other characters are processed by appending them to the normalized value
3892 * If the declared value is not CDATA, then the XML processor must further
3893 * process the normalized attribute value by discarding any leading and
3894 * trailing space (#x20) characters, and by replacing sequences of space
3895 * (#x20) characters by a single space (#x20) character.
3896 * All attributes for which no declaration has been read should be treated
3897 * by a non-validating parser as if declared CDATA.
3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3904 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
3905 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
3906 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
3910 * xmlParseSystemLiteral:
3911 * @ctxt: an XML parser context
3913 * parse an XML Literal
3915 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3917 * Returns the SystemLiteral parsed or NULL
3921 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
3922 xmlChar
*buf
= NULL
;
3924 int size
= XML_PARSER_BUFFER_SIZE
;
3927 int state
= ctxt
->instate
;
3934 } else if (RAW
== '\'') {
3938 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
3942 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3944 xmlErrMemory(ctxt
, NULL
);
3947 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
3949 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
3950 if (len
+ 5 >= size
) {
3954 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3957 xmlErrMemory(ctxt
, NULL
);
3958 ctxt
->instate
= (xmlParserInputState
) state
;
3967 if (ctxt
->instate
== XML_PARSER_EOF
) {
3972 COPY_BUF(l
,buf
,len
,cur
);
3982 ctxt
->instate
= (xmlParserInputState
) state
;
3983 if (!IS_CHAR(cur
)) {
3984 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
3992 * xmlParsePubidLiteral:
3993 * @ctxt: an XML parser context
3995 * parse an XML public literal
3997 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3999 * Returns the PubidLiteral parsed or NULL.
4003 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4004 xmlChar
*buf
= NULL
;
4006 int size
= XML_PARSER_BUFFER_SIZE
;
4010 xmlParserInputState oldstate
= ctxt
->instate
;
4016 } else if (RAW
== '\'') {
4020 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4023 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4025 xmlErrMemory(ctxt
, NULL
);
4028 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4030 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4031 if (len
+ 1 >= size
) {
4035 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4037 xmlErrMemory(ctxt
, NULL
);
4048 if (ctxt
->instate
== XML_PARSER_EOF
) {
4063 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4067 ctxt
->instate
= oldstate
;
4071 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
4074 * used for the test in the inner loop of the char data testing
4076 static const unsigned char test_char_data
[256] = {
4077 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4078 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4079 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4080 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4081 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4082 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4083 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4084 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4085 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4086 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4087 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4088 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4089 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4090 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4091 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4092 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4093 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4094 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4095 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4096 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4097 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4098 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4099 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4100 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4101 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4102 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4103 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4104 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4105 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4106 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4107 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4108 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4113 * @ctxt: an XML parser context
4114 * @cdata: int indicating whether we are within a CDATA section
4116 * parse a CharData section.
4117 * if we are within a CDATA section ']]>' marks an end of section.
4119 * The right angle bracket (>) may be represented using the string ">",
4120 * and must, for compatibility, be escaped using ">" or a character
4121 * reference when it appears in the string "]]>" in content, when that
4122 * string is not marking the end of a CDATA section.
4124 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4128 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
4131 int line
= ctxt
->input
->line
;
4132 int col
= ctxt
->input
->col
;
4138 * Accelerated common case where input don't need to be
4139 * modified before passing it to the handler.
4142 in
= ctxt
->input
->cur
;
4145 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4148 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4150 } while (*in
== 0xA);
4151 goto get_more_space
;
4154 nbchar
= in
- ctxt
->input
->cur
;
4156 const xmlChar
*tmp
= ctxt
->input
->cur
;
4157 ctxt
->input
->cur
= in
;
4159 if ((ctxt
->sax
!= NULL
) &&
4160 (ctxt
->sax
->ignorableWhitespace
!=
4161 ctxt
->sax
->characters
)) {
4162 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4163 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4164 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4167 if (ctxt
->sax
->characters
!= NULL
)
4168 ctxt
->sax
->characters(ctxt
->userData
,
4170 if (*ctxt
->space
== -1)
4173 } else if ((ctxt
->sax
!= NULL
) &&
4174 (ctxt
->sax
->characters
!= NULL
)) {
4175 ctxt
->sax
->characters(ctxt
->userData
,
4183 ccol
= ctxt
->input
->col
;
4184 while (test_char_data
[*in
]) {
4188 ctxt
->input
->col
= ccol
;
4191 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4193 } while (*in
== 0xA);
4197 if ((in
[1] == ']') && (in
[2] == '>')) {
4198 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4199 ctxt
->input
->cur
= in
;
4206 nbchar
= in
- ctxt
->input
->cur
;
4208 if ((ctxt
->sax
!= NULL
) &&
4209 (ctxt
->sax
->ignorableWhitespace
!=
4210 ctxt
->sax
->characters
) &&
4211 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4212 const xmlChar
*tmp
= ctxt
->input
->cur
;
4213 ctxt
->input
->cur
= in
;
4215 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4216 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4217 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4220 if (ctxt
->sax
->characters
!= NULL
)
4221 ctxt
->sax
->characters(ctxt
->userData
,
4223 if (*ctxt
->space
== -1)
4226 line
= ctxt
->input
->line
;
4227 col
= ctxt
->input
->col
;
4228 } else if (ctxt
->sax
!= NULL
) {
4229 if (ctxt
->sax
->characters
!= NULL
)
4230 ctxt
->sax
->characters(ctxt
->userData
,
4231 ctxt
->input
->cur
, nbchar
);
4232 line
= ctxt
->input
->line
;
4233 col
= ctxt
->input
->col
;
4235 /* something really bad happened in the SAX callback */
4236 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4239 ctxt
->input
->cur
= in
;
4243 ctxt
->input
->cur
= in
;
4245 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4246 continue; /* while */
4258 if (ctxt
->instate
== XML_PARSER_EOF
)
4260 in
= ctxt
->input
->cur
;
4261 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
4264 ctxt
->input
->line
= line
;
4265 ctxt
->input
->col
= col
;
4266 xmlParseCharDataComplex(ctxt
, cdata
);
4270 * xmlParseCharDataComplex:
4271 * @ctxt: an XML parser context
4272 * @cdata: int indicating whether we are within a CDATA section
4274 * parse a CharData section.this is the fallback function
4275 * of xmlParseCharData() when the parsing requires handling
4276 * of non-ASCII characters.
4279 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
4280 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4288 while ((cur
!= '<') && /* checked */
4290 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4291 if ((cur
== ']') && (NXT(1) == ']') &&
4295 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4298 COPY_BUF(l
,buf
,nbchar
,cur
);
4299 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4303 * OK the segment is to be consumed as chars.
4305 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4306 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4307 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4308 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4311 if (ctxt
->sax
->characters
!= NULL
)
4312 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4313 if ((ctxt
->sax
->characters
!=
4314 ctxt
->sax
->ignorableWhitespace
) &&
4315 (*ctxt
->space
== -1))
4320 /* something really bad happened in the SAX callback */
4321 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4328 if (ctxt
->instate
== XML_PARSER_EOF
)
4337 * OK the segment is to be consumed as chars.
4339 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4340 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4341 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4342 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4344 if (ctxt
->sax
->characters
!= NULL
)
4345 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4346 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4347 (*ctxt
->space
== -1))
4352 if ((cur
!= 0) && (!IS_CHAR(cur
))) {
4353 /* Generate the error and skip the offending character */
4354 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4355 "PCDATA invalid Char value %d\n",
4362 * xmlParseExternalID:
4363 * @ctxt: an XML parser context
4364 * @publicID: a xmlChar** receiving PubidLiteral
4365 * @strict: indicate whether we should restrict parsing to only
4366 * production [75], see NOTE below
4368 * Parse an External ID or a Public ID
4370 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4371 * 'PUBLIC' S PubidLiteral S SystemLiteral
4373 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4374 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4376 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4378 * Returns the function returns SystemLiteral and in the second
4379 * case publicID receives PubidLiteral, is strict is off
4380 * it is possible to return NULL and have publicID set.
4384 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4385 xmlChar
*URI
= NULL
;
4390 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4392 if (!IS_BLANK_CH(CUR
)) {
4393 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4394 "Space required after 'SYSTEM'\n");
4397 URI
= xmlParseSystemLiteral(ctxt
);
4399 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4401 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4403 if (!IS_BLANK_CH(CUR
)) {
4404 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4405 "Space required after 'PUBLIC'\n");
4408 *publicID
= xmlParsePubidLiteral(ctxt
);
4409 if (*publicID
== NULL
) {
4410 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4414 * We don't handle [83] so "S SystemLiteral" is required.
4416 if (!IS_BLANK_CH(CUR
)) {
4417 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4418 "Space required after the Public Identifier\n");
4422 * We handle [83] so we return immediately, if
4423 * "S SystemLiteral" is not detected. From a purely parsing
4424 * point of view that's a nice mess.
4430 if (!IS_BLANK_CH(*ptr
)) return(NULL
);
4432 while (IS_BLANK_CH(*ptr
)) ptr
++; /* TODO: dangerous, fix ! */
4433 if ((*ptr
!= '\'') && (*ptr
!= '"')) return(NULL
);
4436 URI
= xmlParseSystemLiteral(ctxt
);
4438 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4445 * xmlParseCommentComplex:
4446 * @ctxt: an XML parser context
4447 * @buf: the already parsed part of the buffer
4448 * @len: number of bytes filles in the buffer
4449 * @size: allocated size of the buffer
4451 * Skip an XML (SGML) comment <!-- .... -->
4452 * The spec says that "For compatibility, the string "--" (double-hyphen)
4453 * must not occur within comments. "
4454 * This is the slow routine in case the accelerator for ascii didn't work
4456 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4459 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
, int len
, int size
) {
4466 inputid
= ctxt
->input
->id
;
4470 size
= XML_PARSER_BUFFER_SIZE
;
4471 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4473 xmlErrMemory(ctxt
, NULL
);
4477 GROW
; /* Assure there's enough input data */
4480 goto not_terminated
;
4482 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4483 "xmlParseComment: invalid xmlChar value %d\n",
4491 goto not_terminated
;
4493 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4494 "xmlParseComment: invalid xmlChar value %d\n",
4502 goto not_terminated
;
4503 while (IS_CHAR(cur
) && /* checked */
4505 (r
!= '-') || (q
!= '-'))) {
4506 if ((r
== '-') && (q
== '-')) {
4507 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4509 if (len
+ 5 >= size
) {
4512 new_buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4513 if (new_buf
== NULL
) {
4515 xmlErrMemory(ctxt
, NULL
);
4520 COPY_BUF(ql
,buf
,len
,q
);
4530 if (ctxt
->instate
== XML_PARSER_EOF
) {
4545 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4546 "Comment not terminated \n<!--%.50s\n", buf
);
4547 } else if (!IS_CHAR(cur
)) {
4548 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4549 "xmlParseComment: invalid xmlChar value %d\n",
4552 if (inputid
!= ctxt
->input
->id
) {
4553 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4554 "Comment doesn't start and stop in the same entity\n");
4557 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4558 (!ctxt
->disableSAX
))
4559 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4564 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4565 "Comment not terminated\n", NULL
);
4572 * @ctxt: an XML parser context
4574 * Skip an XML (SGML) comment <!-- .... -->
4575 * The spec says that "For compatibility, the string "--" (double-hyphen)
4576 * must not occur within comments. "
4578 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4581 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4582 xmlChar
*buf
= NULL
;
4583 int size
= XML_PARSER_BUFFER_SIZE
;
4585 xmlParserInputState state
;
4587 int nbchar
= 0, ccol
;
4591 * Check that there is a comment right here.
4593 if ((RAW
!= '<') || (NXT(1) != '!') ||
4594 (NXT(2) != '-') || (NXT(3) != '-')) return;
4595 state
= ctxt
->instate
;
4596 ctxt
->instate
= XML_PARSER_COMMENT
;
4597 inputid
= ctxt
->input
->id
;
4603 * Accelerated common case where input don't need to be
4604 * modified before passing it to the handler.
4606 in
= ctxt
->input
->cur
;
4610 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4612 } while (*in
== 0xA);
4615 ccol
= ctxt
->input
->col
;
4616 while (((*in
> '-') && (*in
<= 0x7F)) ||
4617 ((*in
>= 0x20) && (*in
< '-')) ||
4622 ctxt
->input
->col
= ccol
;
4625 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4627 } while (*in
== 0xA);
4630 nbchar
= in
- ctxt
->input
->cur
;
4632 * save current set of data
4635 if ((ctxt
->sax
!= NULL
) &&
4636 (ctxt
->sax
->comment
!= NULL
)) {
4638 if ((*in
== '-') && (in
[1] == '-'))
4641 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4642 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4644 xmlErrMemory(ctxt
, NULL
);
4645 ctxt
->instate
= state
;
4649 } else if (len
+ nbchar
+ 1 >= size
) {
4651 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4652 new_buf
= (xmlChar
*) xmlRealloc(buf
,
4653 size
* sizeof(xmlChar
));
4654 if (new_buf
== NULL
) {
4656 xmlErrMemory(ctxt
, NULL
);
4657 ctxt
->instate
= state
;
4662 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4667 ctxt
->input
->cur
= in
;
4670 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4675 ctxt
->input
->cur
= in
;
4677 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4678 continue; /* while */
4684 if (ctxt
->instate
== XML_PARSER_EOF
) {
4688 in
= ctxt
->input
->cur
;
4692 if (ctxt
->input
->id
!= inputid
) {
4693 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4694 "comment doesn't start and stop in the same entity\n");
4697 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4698 (!ctxt
->disableSAX
)) {
4700 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4702 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
4706 if (ctxt
->instate
!= XML_PARSER_EOF
)
4707 ctxt
->instate
= state
;
4711 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4712 "Comment not terminated \n<!--%.50s\n",
4715 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4716 "Comment not terminated \n", NULL
);
4724 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
4725 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
4726 ctxt
->instate
= state
;
4733 * @ctxt: an XML parser context
4735 * parse the name of a PI
4737 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4739 * Returns the PITarget name or NULL
4743 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
4744 const xmlChar
*name
;
4746 name
= xmlParseName(ctxt
);
4747 if ((name
!= NULL
) &&
4748 ((name
[0] == 'x') || (name
[0] == 'X')) &&
4749 ((name
[1] == 'm') || (name
[1] == 'M')) &&
4750 ((name
[2] == 'l') || (name
[2] == 'L'))) {
4752 if ((name
[0] == 'x') && (name
[1] == 'm') &&
4753 (name
[2] == 'l') && (name
[3] == 0)) {
4754 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
4755 "XML declaration allowed only at the start of the document\n");
4757 } else if (name
[3] == 0) {
4758 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
4762 if (xmlW3CPIs
[i
] == NULL
) break;
4763 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
4766 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
4767 "xmlParsePITarget: invalid name prefix 'xml'\n",
4770 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
4771 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
4772 "colon are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
4777 #ifdef LIBXML_CATALOG_ENABLED
4779 * xmlParseCatalogPI:
4780 * @ctxt: an XML parser context
4781 * @catalog: the PI value string
4783 * parse an XML Catalog Processing Instruction.
4785 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4787 * Occurs only if allowed by the user and if happening in the Misc
4788 * part of the document before any doctype informations
4789 * This will add the given catalog to the parsing context in order
4790 * to be used if there is a resolution need further down in the document
4794 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
4795 xmlChar
*URL
= NULL
;
4796 const xmlChar
*tmp
, *base
;
4800 while (IS_BLANK_CH(*tmp
)) tmp
++;
4801 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
4804 while (IS_BLANK_CH(*tmp
)) tmp
++;
4809 while (IS_BLANK_CH(*tmp
)) tmp
++;
4811 if ((marker
!= '\'') && (marker
!= '"'))
4815 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
4818 URL
= xmlStrndup(base
, tmp
- base
);
4820 while (IS_BLANK_CH(*tmp
)) tmp
++;
4825 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
4831 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
4832 "Catalog PI syntax error: %s\n",
4841 * @ctxt: an XML parser context
4843 * parse an XML Processing Instruction.
4845 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4847 * The processing is transfered to SAX once parsed.
4851 xmlParsePI(xmlParserCtxtPtr ctxt
) {
4852 xmlChar
*buf
= NULL
;
4854 int size
= XML_PARSER_BUFFER_SIZE
;
4856 const xmlChar
*target
;
4857 xmlParserInputState state
;
4860 if ((RAW
== '<') && (NXT(1) == '?')) {
4861 xmlParserInputPtr input
= ctxt
->input
;
4862 state
= ctxt
->instate
;
4863 ctxt
->instate
= XML_PARSER_PI
;
4865 * this is a Processing Instruction.
4871 * Parse the target name and check for special support like
4874 target
= xmlParsePITarget(ctxt
);
4875 if (target
!= NULL
) {
4876 if ((RAW
== '?') && (NXT(1) == '>')) {
4877 if (input
!= ctxt
->input
) {
4878 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4879 "PI declaration doesn't start and stop in the same entity\n");
4886 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
4887 (ctxt
->sax
->processingInstruction
!= NULL
))
4888 ctxt
->sax
->processingInstruction(ctxt
->userData
,
4890 if (ctxt
->instate
!= XML_PARSER_EOF
)
4891 ctxt
->instate
= state
;
4894 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4896 xmlErrMemory(ctxt
, NULL
);
4897 ctxt
->instate
= state
;
4901 if (!IS_BLANK(cur
)) {
4902 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
4903 "ParsePI: PI %s space expected\n", target
);
4907 while (IS_CHAR(cur
) && /* checked */
4908 ((cur
!= '?') || (NXT(1) != '>'))) {
4909 if (len
+ 5 >= size
) {
4913 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4915 xmlErrMemory(ctxt
, NULL
);
4917 ctxt
->instate
= state
;
4925 if (ctxt
->instate
== XML_PARSER_EOF
) {
4931 COPY_BUF(l
,buf
,len
,cur
);
4942 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
4943 "ParsePI: PI %s never end ...\n", target
);
4945 if (input
!= ctxt
->input
) {
4946 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4947 "PI declaration doesn't start and stop in the same entity\n");
4951 #ifdef LIBXML_CATALOG_ENABLED
4952 if (((state
== XML_PARSER_MISC
) ||
4953 (state
== XML_PARSER_START
)) &&
4954 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
4955 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
4956 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
4957 (allow
== XML_CATA_ALLOW_ALL
))
4958 xmlParseCatalogPI(ctxt
, buf
);
4966 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
4967 (ctxt
->sax
->processingInstruction
!= NULL
))
4968 ctxt
->sax
->processingInstruction(ctxt
->userData
,
4973 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
4975 if (ctxt
->instate
!= XML_PARSER_EOF
)
4976 ctxt
->instate
= state
;
4981 * xmlParseNotationDecl:
4982 * @ctxt: an XML parser context
4984 * parse a notation declaration
4986 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4988 * Hence there is actually 3 choices:
4989 * 'PUBLIC' S PubidLiteral
4990 * 'PUBLIC' S PubidLiteral S SystemLiteral
4991 * and 'SYSTEM' S SystemLiteral
4993 * See the NOTE on xmlParseExternalID().
4997 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
4998 const xmlChar
*name
;
5002 if (CMP10(CUR_PTR
, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5003 xmlParserInputPtr input
= ctxt
->input
;
5006 if (!IS_BLANK_CH(CUR
)) {
5007 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5008 "Space required after '<!NOTATION'\n");
5013 name
= xmlParseName(ctxt
);
5015 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5018 if (!IS_BLANK_CH(CUR
)) {
5019 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5020 "Space required after the NOTATION name'\n");
5023 if (xmlStrchr(name
, ':') != NULL
) {
5024 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5025 "colon are forbidden from notation names '%s'\n",
5033 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5037 if (input
!= ctxt
->input
) {
5038 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5039 "Notation declaration doesn't start and stop in the same entity\n");
5042 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5043 (ctxt
->sax
->notationDecl
!= NULL
))
5044 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5046 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5048 if (Systemid
!= NULL
) xmlFree(Systemid
);
5049 if (Pubid
!= NULL
) xmlFree(Pubid
);
5054 * xmlParseEntityDecl:
5055 * @ctxt: an XML parser context
5057 * parse <!ENTITY declarations
5059 * [70] EntityDecl ::= GEDecl | PEDecl
5061 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5063 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5065 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5067 * [74] PEDef ::= EntityValue | ExternalID
5069 * [76] NDataDecl ::= S 'NDATA' S Name
5071 * [ VC: Notation Declared ]
5072 * The Name must match the declared name of a notation.
5076 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5077 const xmlChar
*name
= NULL
;
5078 xmlChar
*value
= NULL
;
5079 xmlChar
*URI
= NULL
, *literal
= NULL
;
5080 const xmlChar
*ndata
= NULL
;
5081 int isParameter
= 0;
5082 xmlChar
*orig
= NULL
;
5085 /* GROW; done in the caller */
5086 if (CMP8(CUR_PTR
, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5087 xmlParserInputPtr input
= ctxt
->input
;
5090 skipped
= SKIP_BLANKS
;
5092 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5093 "Space required after '<!ENTITY'\n");
5098 skipped
= SKIP_BLANKS
;
5100 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5101 "Space required after '%'\n");
5106 name
= xmlParseName(ctxt
);
5108 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5109 "xmlParseEntityDecl: no name\n");
5112 if (xmlStrchr(name
, ':') != NULL
) {
5113 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5114 "colon are forbidden from entities names '%s'\n",
5117 skipped
= SKIP_BLANKS
;
5119 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5120 "Space required after the entity name\n");
5123 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5125 * handle the various case of definitions...
5128 if ((RAW
== '"') || (RAW
== '\'')) {
5129 value
= xmlParseEntityValue(ctxt
, &orig
);
5131 if ((ctxt
->sax
!= NULL
) &&
5132 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5133 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5134 XML_INTERNAL_PARAMETER_ENTITY
,
5138 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5139 if ((URI
== NULL
) && (literal
== NULL
)) {
5140 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5145 uri
= xmlParseURI((const char *) URI
);
5147 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5148 "Invalid URI: %s\n", URI
);
5150 * This really ought to be a well formedness error
5151 * but the XML Core WG decided otherwise c.f. issue
5152 * E26 of the XML erratas.
5155 if (uri
->fragment
!= NULL
) {
5157 * Okay this is foolish to block those but not
5160 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5162 if ((ctxt
->sax
!= NULL
) &&
5163 (!ctxt
->disableSAX
) &&
5164 (ctxt
->sax
->entityDecl
!= NULL
))
5165 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5166 XML_EXTERNAL_PARAMETER_ENTITY
,
5167 literal
, URI
, NULL
);
5174 if ((RAW
== '"') || (RAW
== '\'')) {
5175 value
= xmlParseEntityValue(ctxt
, &orig
);
5176 if ((ctxt
->sax
!= NULL
) &&
5177 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5178 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5179 XML_INTERNAL_GENERAL_ENTITY
,
5182 * For expat compatibility in SAX mode.
5184 if ((ctxt
->myDoc
== NULL
) ||
5185 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5186 if (ctxt
->myDoc
== NULL
) {
5187 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5188 if (ctxt
->myDoc
== NULL
) {
5189 xmlErrMemory(ctxt
, "New Doc failed");
5192 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5194 if (ctxt
->myDoc
->intSubset
== NULL
)
5195 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5196 BAD_CAST
"fake", NULL
, NULL
);
5198 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5202 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5203 if ((URI
== NULL
) && (literal
== NULL
)) {
5204 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5209 uri
= xmlParseURI((const char *)URI
);
5211 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5212 "Invalid URI: %s\n", URI
);
5214 * This really ought to be a well formedness error
5215 * but the XML Core WG decided otherwise c.f. issue
5216 * E26 of the XML erratas.
5219 if (uri
->fragment
!= NULL
) {
5221 * Okay this is foolish to block those but not
5224 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5229 if ((RAW
!= '>') && (!IS_BLANK_CH(CUR
))) {
5230 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5231 "Space required before 'NDATA'\n");
5234 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5236 if (!IS_BLANK_CH(CUR
)) {
5237 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5238 "Space required after 'NDATA'\n");
5241 ndata
= xmlParseName(ctxt
);
5242 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5243 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5244 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5245 literal
, URI
, ndata
);
5247 if ((ctxt
->sax
!= NULL
) &&
5248 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5249 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5250 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5251 literal
, URI
, NULL
);
5253 * For expat compatibility in SAX mode.
5254 * assuming the entity repalcement was asked for
5256 if ((ctxt
->replaceEntities
!= 0) &&
5257 ((ctxt
->myDoc
== NULL
) ||
5258 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5259 if (ctxt
->myDoc
== NULL
) {
5260 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5261 if (ctxt
->myDoc
== NULL
) {
5262 xmlErrMemory(ctxt
, "New Doc failed");
5265 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5268 if (ctxt
->myDoc
->intSubset
== NULL
)
5269 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5270 BAD_CAST
"fake", NULL
, NULL
);
5271 xmlSAX2EntityDecl(ctxt
, name
,
5272 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5273 literal
, URI
, NULL
);
5278 if (ctxt
->instate
== XML_PARSER_EOF
)
5282 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5283 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5285 if (input
!= ctxt
->input
) {
5286 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5287 "Entity declaration doesn't start and stop in the same entity\n");
5293 * Ugly mechanism to save the raw entity value.
5295 xmlEntityPtr cur
= NULL
;
5298 if ((ctxt
->sax
!= NULL
) &&
5299 (ctxt
->sax
->getParameterEntity
!= NULL
))
5300 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5302 if ((ctxt
->sax
!= NULL
) &&
5303 (ctxt
->sax
->getEntity
!= NULL
))
5304 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5305 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5306 cur
= xmlSAX2GetEntity(ctxt
, name
);
5310 if (cur
->orig
!= NULL
)
5317 if (value
!= NULL
) xmlFree(value
);
5318 if (URI
!= NULL
) xmlFree(URI
);
5319 if (literal
!= NULL
) xmlFree(literal
);
5324 * xmlParseDefaultDecl:
5325 * @ctxt: an XML parser context
5326 * @value: Receive a possible fixed default value for the attribute
5328 * Parse an attribute default declaration
5330 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5332 * [ VC: Required Attribute ]
5333 * if the default declaration is the keyword #REQUIRED, then the
5334 * attribute must be specified for all elements of the type in the
5335 * attribute-list declaration.
5337 * [ VC: Attribute Default Legal ]
5338 * The declared default value must meet the lexical constraints of
5339 * the declared attribute type c.f. xmlValidateAttributeDecl()
5341 * [ VC: Fixed Attribute Default ]
5342 * if an attribute has a default value declared with the #FIXED
5343 * keyword, instances of that attribute must match the default value.
5345 * [ WFC: No < in Attribute Values ]
5346 * handled in xmlParseAttValue()
5348 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5349 * or XML_ATTRIBUTE_FIXED.
5353 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5358 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5360 return(XML_ATTRIBUTE_REQUIRED
);
5362 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5364 return(XML_ATTRIBUTE_IMPLIED
);
5366 val
= XML_ATTRIBUTE_NONE
;
5367 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5369 val
= XML_ATTRIBUTE_FIXED
;
5370 if (!IS_BLANK_CH(CUR
)) {
5371 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5372 "Space required after '#FIXED'\n");
5376 ret
= xmlParseAttValue(ctxt
);
5377 ctxt
->instate
= XML_PARSER_DTD
;
5379 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5380 "Attribute default value declaration error\n");
5387 * xmlParseNotationType:
5388 * @ctxt: an XML parser context
5390 * parse an Notation attribute type.
5392 * Note: the leading 'NOTATION' S part has already being parsed...
5394 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5396 * [ VC: Notation Attributes ]
5397 * Values of this type must match one of the notation names included
5398 * in the declaration; all notation names in the declaration must be declared.
5400 * Returns: the notation attribute tree built while parsing
5404 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5405 const xmlChar
*name
;
5406 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5409 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5416 name
= xmlParseName(ctxt
);
5418 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5419 "Name expected in NOTATION declaration\n");
5420 xmlFreeEnumeration(ret
);
5424 while (tmp
!= NULL
) {
5425 if (xmlStrEqual(name
, tmp
->name
)) {
5426 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5427 "standalone: attribute notation value token %s duplicated\n",
5429 if (!xmlDictOwns(ctxt
->dict
, name
))
5430 xmlFree((xmlChar
*) name
);
5436 cur
= xmlCreateEnumeration(name
);
5438 xmlFreeEnumeration(ret
);
5441 if (last
== NULL
) ret
= last
= cur
;
5448 } while (RAW
== '|');
5450 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5451 xmlFreeEnumeration(ret
);
5459 * xmlParseEnumerationType:
5460 * @ctxt: an XML parser context
5462 * parse an Enumeration attribute type.
5464 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5466 * [ VC: Enumeration ]
5467 * Values of this type must match one of the Nmtoken tokens in
5470 * Returns: the enumeration attribute tree built while parsing
5474 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5476 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5479 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5486 name
= xmlParseNmtoken(ctxt
);
5488 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5492 while (tmp
!= NULL
) {
5493 if (xmlStrEqual(name
, tmp
->name
)) {
5494 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5495 "standalone: attribute enumeration value token %s duplicated\n",
5497 if (!xmlDictOwns(ctxt
->dict
, name
))
5504 cur
= xmlCreateEnumeration(name
);
5505 if (!xmlDictOwns(ctxt
->dict
, name
))
5508 xmlFreeEnumeration(ret
);
5511 if (last
== NULL
) ret
= last
= cur
;
5518 } while (RAW
== '|');
5520 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5528 * xmlParseEnumeratedType:
5529 * @ctxt: an XML parser context
5530 * @tree: the enumeration tree built while parsing
5532 * parse an Enumerated attribute type.
5534 * [57] EnumeratedType ::= NotationType | Enumeration
5536 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5539 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5543 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5544 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5546 if (!IS_BLANK_CH(CUR
)) {
5547 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5548 "Space required after 'NOTATION'\n");
5552 *tree
= xmlParseNotationType(ctxt
);
5553 if (*tree
== NULL
) return(0);
5554 return(XML_ATTRIBUTE_NOTATION
);
5556 *tree
= xmlParseEnumerationType(ctxt
);
5557 if (*tree
== NULL
) return(0);
5558 return(XML_ATTRIBUTE_ENUMERATION
);
5562 * xmlParseAttributeType:
5563 * @ctxt: an XML parser context
5564 * @tree: the enumeration tree built while parsing
5566 * parse the Attribute list def for an element
5568 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5570 * [55] StringType ::= 'CDATA'
5572 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5573 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5575 * Validity constraints for attribute values syntax are checked in
5576 * xmlValidateAttributeValue()
5579 * Values of type ID must match the Name production. A name must not
5580 * appear more than once in an XML document as a value of this type;
5581 * i.e., ID values must uniquely identify the elements which bear them.
5583 * [ VC: One ID per Element Type ]
5584 * No element type may have more than one ID attribute specified.
5586 * [ VC: ID Attribute Default ]
5587 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5590 * Values of type IDREF must match the Name production, and values
5591 * of type IDREFS must match Names; each IDREF Name must match the value
5592 * of an ID attribute on some element in the XML document; i.e. IDREF
5593 * values must match the value of some ID attribute.
5595 * [ VC: Entity Name ]
5596 * Values of type ENTITY must match the Name production, values
5597 * of type ENTITIES must match Names; each Entity Name must match the
5598 * name of an unparsed entity declared in the DTD.
5600 * [ VC: Name Token ]
5601 * Values of type NMTOKEN must match the Nmtoken production; values
5602 * of type NMTOKENS must match Nmtokens.
5604 * Returns the attribute type
5607 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5609 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5611 return(XML_ATTRIBUTE_CDATA
);
5612 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5614 return(XML_ATTRIBUTE_IDREFS
);
5615 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5617 return(XML_ATTRIBUTE_IDREF
);
5618 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5620 return(XML_ATTRIBUTE_ID
);
5621 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5623 return(XML_ATTRIBUTE_ENTITY
);
5624 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5626 return(XML_ATTRIBUTE_ENTITIES
);
5627 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5629 return(XML_ATTRIBUTE_NMTOKENS
);
5630 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5632 return(XML_ATTRIBUTE_NMTOKEN
);
5634 return(xmlParseEnumeratedType(ctxt
, tree
));
5638 * xmlParseAttributeListDecl:
5639 * @ctxt: an XML parser context
5641 * : parse the Attribute list def for an element
5643 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5645 * [53] AttDef ::= S Name S AttType S DefaultDecl
5649 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5650 const xmlChar
*elemName
;
5651 const xmlChar
*attrName
;
5652 xmlEnumerationPtr tree
;
5654 if (CMP9(CUR_PTR
, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5655 xmlParserInputPtr input
= ctxt
->input
;
5658 if (!IS_BLANK_CH(CUR
)) {
5659 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5660 "Space required after '<!ATTLIST'\n");
5663 elemName
= xmlParseName(ctxt
);
5664 if (elemName
== NULL
) {
5665 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5666 "ATTLIST: no name for Element\n");
5671 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5672 const xmlChar
*check
= CUR_PTR
;
5675 xmlChar
*defaultValue
= NULL
;
5679 attrName
= xmlParseName(ctxt
);
5680 if (attrName
== NULL
) {
5681 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5682 "ATTLIST: no name for Attribute\n");
5686 if (!IS_BLANK_CH(CUR
)) {
5687 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5688 "Space required after the attribute name\n");
5693 type
= xmlParseAttributeType(ctxt
, &tree
);
5699 if (!IS_BLANK_CH(CUR
)) {
5700 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5701 "Space required after the attribute type\n");
5703 xmlFreeEnumeration(tree
);
5708 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
5710 if (defaultValue
!= NULL
)
5711 xmlFree(defaultValue
);
5713 xmlFreeEnumeration(tree
);
5716 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
5717 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
5721 if (!IS_BLANK_CH(CUR
)) {
5722 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5723 "Space required after the attribute default value\n");
5724 if (defaultValue
!= NULL
)
5725 xmlFree(defaultValue
);
5727 xmlFreeEnumeration(tree
);
5732 if (check
== CUR_PTR
) {
5733 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
5734 "in xmlParseAttributeListDecl\n");
5735 if (defaultValue
!= NULL
)
5736 xmlFree(defaultValue
);
5738 xmlFreeEnumeration(tree
);
5741 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5742 (ctxt
->sax
->attributeDecl
!= NULL
))
5743 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
5744 type
, def
, defaultValue
, tree
);
5745 else if (tree
!= NULL
)
5746 xmlFreeEnumeration(tree
);
5748 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
5749 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
5750 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
5751 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
5754 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
5756 if (defaultValue
!= NULL
)
5757 xmlFree(defaultValue
);
5761 if (input
!= ctxt
->input
) {
5762 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5763 "Attribute list declaration doesn't start and stop in the same entity\n",
5772 * xmlParseElementMixedContentDecl:
5773 * @ctxt: an XML parser context
5774 * @inputchk: the input used for the current entity, needed for boundary checks
5776 * parse the declaration for a Mixed Element content
5777 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5779 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5780 * '(' S? '#PCDATA' S? ')'
5782 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5784 * [ VC: No Duplicate Types ]
5785 * The same name must not appear more than once in a single
5786 * mixed-content declaration.
5788 * returns: the list of the xmlElementContentPtr describing the element choices
5790 xmlElementContentPtr
5791 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
5792 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
5793 const xmlChar
*elem
= NULL
;
5796 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5801 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
5802 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5803 "Element content declaration doesn't start and stop in the same entity\n",
5807 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
5811 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5816 if ((RAW
== '(') || (RAW
== '|')) {
5817 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
5818 if (ret
== NULL
) return(NULL
);
5820 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5823 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
5824 if (ret
== NULL
) return(NULL
);
5830 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
5831 if (n
== NULL
) return(NULL
);
5832 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
5841 elem
= xmlParseName(ctxt
);
5843 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5844 "xmlParseElementMixedContentDecl : Name expected\n");
5845 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
5851 if ((RAW
== ')') && (NXT(1) == '*')) {
5853 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
5854 XML_ELEMENT_CONTENT_ELEMENT
);
5855 if (cur
->c2
!= NULL
)
5856 cur
->c2
->parent
= cur
;
5859 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5860 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
5861 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5862 "Element content declaration doesn't start and stop in the same entity\n",
5867 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5868 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
5873 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
5879 * xmlParseElementChildrenContentDeclPriv:
5880 * @ctxt: an XML parser context
5881 * @inputchk: the input used for the current entity, needed for boundary checks
5882 * @depth: the level of recursion
5884 * parse the declaration for a Mixed Element content
5885 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5888 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5890 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5892 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5894 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5896 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5897 * TODO Parameter-entity replacement text must be properly nested
5898 * with parenthesized groups. That is to say, if either of the
5899 * opening or closing parentheses in a choice, seq, or Mixed
5900 * construct is contained in the replacement text for a parameter
5901 * entity, both must be contained in the same replacement text. For
5902 * interoperability, if a parameter-entity reference appears in a
5903 * choice, seq, or Mixed construct, its replacement text should not
5904 * be empty, and neither the first nor last non-blank character of
5905 * the replacement text should be a connector (| or ,).
5907 * Returns the tree of xmlElementContentPtr describing the element
5910 static xmlElementContentPtr
5911 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
5913 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
5914 const xmlChar
*elem
;
5917 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
5919 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
5920 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5927 int inputid
= ctxt
->input
->id
;
5929 /* Recurse on first child */
5932 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
5937 elem
= xmlParseName(ctxt
);
5939 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
5942 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
5944 xmlErrMemory(ctxt
, NULL
);
5949 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
5951 } else if (RAW
== '*') {
5952 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5954 } else if (RAW
== '+') {
5955 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
5958 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
5964 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5966 * Each loop we parse one separator and one element.
5969 if (type
== 0) type
= CUR
;
5972 * Detect "Name | Name , Name" error
5974 else if (type
!= CUR
) {
5975 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
5976 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5978 if ((last
!= NULL
) && (last
!= ret
))
5979 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5981 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5986 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
5988 if ((last
!= NULL
) && (last
!= ret
))
5989 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5990 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6008 } else if (RAW
== '|') {
6009 if (type
== 0) type
= CUR
;
6012 * Detect "Name , Name | Name" error
6014 else if (type
!= CUR
) {
6015 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6016 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6018 if ((last
!= NULL
) && (last
!= ret
))
6019 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6021 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6026 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6028 if ((last
!= NULL
) && (last
!= ret
))
6029 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6031 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6050 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6051 if ((last
!= NULL
) && (last
!= ret
))
6052 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6054 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6061 int inputid
= ctxt
->input
->id
;
6062 /* Recurse on second child */
6065 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6069 elem
= xmlParseName(ctxt
);
6071 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6073 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6076 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6079 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6083 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6085 } else if (RAW
== '*') {
6086 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6088 } else if (RAW
== '+') {
6089 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6092 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6098 if ((cur
!= NULL
) && (last
!= NULL
)) {
6103 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
6104 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6105 "Element content declaration doesn't start and stop in the same entity\n",
6111 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6112 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6113 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6115 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6118 } else if (RAW
== '*') {
6120 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6123 * Some normalization:
6124 * (a | b* | c?)* == (a | b | c)*
6126 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6127 if ((cur
->c1
!= NULL
) &&
6128 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6129 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6130 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6131 if ((cur
->c2
!= NULL
) &&
6132 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6133 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6134 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6139 } else if (RAW
== '+') {
6143 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6144 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6145 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6147 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6149 * Some normalization:
6150 * (a | b*)+ == (a | b)*
6151 * (a | b?)+ == (a | b)*
6153 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6154 if ((cur
->c1
!= NULL
) &&
6155 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6156 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6157 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6160 if ((cur
->c2
!= NULL
) &&
6161 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6162 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6163 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6169 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6177 * xmlParseElementChildrenContentDecl:
6178 * @ctxt: an XML parser context
6179 * @inputchk: the input used for the current entity, needed for boundary checks
6181 * parse the declaration for a Mixed Element content
6182 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6184 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6186 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6188 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6190 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6192 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6193 * TODO Parameter-entity replacement text must be properly nested
6194 * with parenthesized groups. That is to say, if either of the
6195 * opening or closing parentheses in a choice, seq, or Mixed
6196 * construct is contained in the replacement text for a parameter
6197 * entity, both must be contained in the same replacement text. For
6198 * interoperability, if a parameter-entity reference appears in a
6199 * choice, seq, or Mixed construct, its replacement text should not
6200 * be empty, and neither the first nor last non-blank character of
6201 * the replacement text should be a connector (| or ,).
6203 * Returns the tree of xmlElementContentPtr describing the element
6206 xmlElementContentPtr
6207 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6208 /* stub left for API/ABI compat */
6209 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6213 * xmlParseElementContentDecl:
6214 * @ctxt: an XML parser context
6215 * @name: the name of the element being defined.
6216 * @result: the Element Content pointer will be stored here if any
6218 * parse the declaration for an Element content either Mixed or Children,
6219 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6221 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6223 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6227 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6228 xmlElementContentPtr
*result
) {
6230 xmlElementContentPtr tree
= NULL
;
6231 int inputid
= ctxt
->input
->id
;
6237 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6238 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6243 if (ctxt
->instate
== XML_PARSER_EOF
)
6246 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6247 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6248 res
= XML_ELEMENT_TYPE_MIXED
;
6250 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6251 res
= XML_ELEMENT_TYPE_ELEMENT
;
6259 * xmlParseElementDecl:
6260 * @ctxt: an XML parser context
6262 * parse an Element declaration.
6264 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6266 * [ VC: Unique Element Type Declaration ]
6267 * No element type may be declared more than once
6269 * Returns the type of the element, or -1 in case of error
6272 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6273 const xmlChar
*name
;
6275 xmlElementContentPtr content
= NULL
;
6277 /* GROW; done in the caller */
6278 if (CMP9(CUR_PTR
, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6279 xmlParserInputPtr input
= ctxt
->input
;
6282 if (!IS_BLANK_CH(CUR
)) {
6283 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6284 "Space required after 'ELEMENT'\n");
6287 name
= xmlParseName(ctxt
);
6289 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6290 "xmlParseElementDecl: no name for Element\n");
6293 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6295 if (!IS_BLANK_CH(CUR
)) {
6296 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6297 "Space required after the element name\n");
6300 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6303 * Element must always be empty.
6305 ret
= XML_ELEMENT_TYPE_EMPTY
;
6306 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6310 * Element is a generic container.
6312 ret
= XML_ELEMENT_TYPE_ANY
;
6313 } else if (RAW
== '(') {
6314 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6317 * [ WFC: PEs in Internal Subset ] error handling.
6319 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6320 (ctxt
->inputNr
== 1)) {
6321 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6322 "PEReference: forbidden within markup decl in internal subset\n");
6324 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6325 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6332 * Pop-up of finished entities.
6334 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6339 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6340 if (content
!= NULL
) {
6341 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6344 if (input
!= ctxt
->input
) {
6345 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6346 "Element declaration doesn't start and stop in the same entity\n");
6350 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6351 (ctxt
->sax
->elementDecl
!= NULL
)) {
6352 if (content
!= NULL
)
6353 content
->parent
= NULL
;
6354 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6356 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6358 * this is a trick: if xmlAddElementDecl is called,
6359 * instead of copying the full tree it is plugged directly
6360 * if called from the parser. Avoid duplicating the
6361 * interfaces or change the API/ABI
6363 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6365 } else if (content
!= NULL
) {
6366 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6374 * xmlParseConditionalSections
6375 * @ctxt: an XML parser context
6377 * [61] conditionalSect ::= includeSect | ignoreSect
6378 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6379 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6380 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6381 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6385 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6386 int id
= ctxt
->input
->id
;
6390 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6394 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6396 if (ctxt
->input
->id
!= id
) {
6397 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6398 "All markup of the conditional section is not in the same entity\n",
6403 if (xmlParserDebugEntities
) {
6404 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6405 xmlGenericError(xmlGenericErrorContext
,
6406 "%s(%d): ", ctxt
->input
->filename
,
6408 xmlGenericError(xmlGenericErrorContext
,
6409 "Entering INCLUDE Conditional Section\n");
6412 while (((RAW
!= 0) && ((RAW
!= ']') || (NXT(1) != ']') ||
6413 (NXT(2) != '>'))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6414 const xmlChar
*check
= CUR_PTR
;
6415 unsigned int cons
= ctxt
->input
->consumed
;
6417 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6418 xmlParseConditionalSections(ctxt
);
6419 } else if (IS_BLANK_CH(CUR
)) {
6421 } else if (RAW
== '%') {
6422 xmlParsePEReference(ctxt
);
6424 xmlParseMarkupDecl(ctxt
);
6427 * Pop-up of finished entities.
6429 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6432 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6433 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6437 if (xmlParserDebugEntities
) {
6438 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6439 xmlGenericError(xmlGenericErrorContext
,
6440 "%s(%d): ", ctxt
->input
->filename
,
6442 xmlGenericError(xmlGenericErrorContext
,
6443 "Leaving INCLUDE Conditional Section\n");
6446 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6448 xmlParserInputState instate
;
6454 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6456 if (ctxt
->input
->id
!= id
) {
6457 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6458 "All markup of the conditional section is not in the same entity\n",
6463 if (xmlParserDebugEntities
) {
6464 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6465 xmlGenericError(xmlGenericErrorContext
,
6466 "%s(%d): ", ctxt
->input
->filename
,
6468 xmlGenericError(xmlGenericErrorContext
,
6469 "Entering IGNORE Conditional Section\n");
6473 * Parse up to the end of the conditional section
6474 * But disable SAX event generating DTD building in the meantime
6476 state
= ctxt
->disableSAX
;
6477 instate
= ctxt
->instate
;
6478 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6479 ctxt
->instate
= XML_PARSER_IGNORE
;
6481 while (((depth
>= 0) && (RAW
!= 0)) &&
6482 (ctxt
->instate
!= XML_PARSER_EOF
)) {
6483 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6488 if ((RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6489 if (--depth
>= 0) SKIP(3);
6496 ctxt
->disableSAX
= state
;
6497 ctxt
->instate
= instate
;
6499 if (xmlParserDebugEntities
) {
6500 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6501 xmlGenericError(xmlGenericErrorContext
,
6502 "%s(%d): ", ctxt
->input
->filename
,
6504 xmlGenericError(xmlGenericErrorContext
,
6505 "Leaving IGNORE Conditional Section\n");
6509 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6516 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6518 if (ctxt
->input
->id
!= id
) {
6519 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6520 "All markup of the conditional section is not in the same entity\n",
6528 * xmlParseMarkupDecl:
6529 * @ctxt: an XML parser context
6531 * parse Markup declarations
6533 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6534 * NotationDecl | PI | Comment
6536 * [ VC: Proper Declaration/PE Nesting ]
6537 * Parameter-entity replacement text must be properly nested with
6538 * markup declarations. That is to say, if either the first character
6539 * or the last character of a markup declaration (markupdecl above) is
6540 * contained in the replacement text for a parameter-entity reference,
6541 * both must be contained in the same replacement text.
6543 * [ WFC: PEs in Internal Subset ]
6544 * In the internal DTD subset, parameter-entity references can occur
6545 * only where markup declarations can occur, not within markup declarations.
6546 * (This does not apply to references that occur in external parameter
6547 * entities or to the external subset.)
6550 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6553 if (NXT(1) == '!') {
6557 xmlParseElementDecl(ctxt
);
6558 else if (NXT(3) == 'N')
6559 xmlParseEntityDecl(ctxt
);
6562 xmlParseAttributeListDecl(ctxt
);
6565 xmlParseNotationDecl(ctxt
);
6568 xmlParseComment(ctxt
);
6571 /* there is an error but it will be detected later */
6574 } else if (NXT(1) == '?') {
6579 * This is only for internal subset. On external entities,
6580 * the replacement is done before parsing stage
6582 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
6583 xmlParsePEReference(ctxt
);
6586 * Conditional sections are allowed from entities included
6587 * by PE References in the internal subset.
6589 if ((ctxt
->external
== 0) && (ctxt
->inputNr
> 1)) {
6590 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6591 xmlParseConditionalSections(ctxt
);
6595 ctxt
->instate
= XML_PARSER_DTD
;
6600 * @ctxt: an XML parser context
6602 * parse an XML declaration header for external entities
6604 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6608 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6610 const xmlChar
*encoding
;
6613 * We know that '<?xml' is here.
6615 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6618 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6622 if (!IS_BLANK_CH(CUR
)) {
6623 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6624 "Space needed after '<?xml'\n");
6629 * We may have the VersionInfo here.
6631 version
= xmlParseVersionInfo(ctxt
);
6632 if (version
== NULL
)
6633 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6635 if (!IS_BLANK_CH(CUR
)) {
6636 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6637 "Space needed here\n");
6640 ctxt
->input
->version
= version
;
6643 * We must have the encoding declaration
6645 encoding
= xmlParseEncodingDecl(ctxt
);
6646 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6648 * The XML REC instructs us to stop parsing right here
6652 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6653 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6654 "Missing encoding in text declaration\n");
6658 if ((RAW
== '?') && (NXT(1) == '>')) {
6660 } else if (RAW
== '>') {
6661 /* Deprecated old WD ... */
6662 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6665 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6666 MOVETO_ENDTAG(CUR_PTR
);
6672 * xmlParseExternalSubset:
6673 * @ctxt: an XML parser context
6674 * @ExternalID: the external identifier
6675 * @SystemID: the system identifier (or URL)
6677 * parse Markup declarations from an external subset
6679 * [30] extSubset ::= textDecl? extSubsetDecl
6681 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6684 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6685 const xmlChar
*SystemID
) {
6686 xmlDetectSAX2(ctxt
);
6689 if ((ctxt
->encoding
== (const xmlChar
*)XML_CHAR_ENCODING_NONE
) &&
6690 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
6692 xmlCharEncoding enc
;
6698 enc
= xmlDetectCharEncoding(start
, 4);
6699 if (enc
!= XML_CHAR_ENCODING_NONE
)
6700 xmlSwitchEncoding(ctxt
, enc
);
6703 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
6704 xmlParseTextDecl(ctxt
);
6705 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6707 * The XML REC instructs us to stop parsing right here
6709 ctxt
->instate
= XML_PARSER_EOF
;
6713 if (ctxt
->myDoc
== NULL
) {
6714 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
6715 if (ctxt
->myDoc
== NULL
) {
6716 xmlErrMemory(ctxt
, "New Doc failed");
6719 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
6721 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
6722 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
6724 ctxt
->instate
= XML_PARSER_DTD
;
6726 while (((RAW
== '<') && (NXT(1) == '?')) ||
6727 ((RAW
== '<') && (NXT(1) == '!')) ||
6728 (RAW
== '%') || IS_BLANK_CH(CUR
)) {
6729 const xmlChar
*check
= CUR_PTR
;
6730 unsigned int cons
= ctxt
->input
->consumed
;
6733 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6734 xmlParseConditionalSections(ctxt
);
6735 } else if (IS_BLANK_CH(CUR
)) {
6737 } else if (RAW
== '%') {
6738 xmlParsePEReference(ctxt
);
6740 xmlParseMarkupDecl(ctxt
);
6743 * Pop-up of finished entities.
6745 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6748 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6749 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6755 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6761 * xmlParseReference:
6762 * @ctxt: an XML parser context
6764 * parse and handle entity references in content, depending on the SAX
6765 * interface, this may end-up in a call to character() if this is a
6766 * CharRef, a predefined entity, if there is no reference() callback.
6767 * or if the parser was asked to switch to that mode.
6769 * [67] Reference ::= EntityRef | CharRef
6772 xmlParseReference(xmlParserCtxtPtr ctxt
) {
6776 xmlNodePtr list
= NULL
;
6777 xmlParserErrors ret
= XML_ERR_OK
;
6784 * Simple case of a CharRef
6786 if (NXT(1) == '#') {
6790 int value
= xmlParseCharRef(ctxt
);
6794 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
6796 * So we are using non-UTF-8 buffers
6797 * Check that the char fit on 8bits, if not
6798 * generate a CharRef.
6800 if (value
<= 0xFF) {
6803 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6804 (!ctxt
->disableSAX
))
6805 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
6807 if ((hex
== 'x') || (hex
== 'X'))
6808 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
6810 snprintf((char *)out
, sizeof(out
), "#%d", value
);
6811 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
6812 (!ctxt
->disableSAX
))
6813 ctxt
->sax
->reference(ctxt
->userData
, out
);
6817 * Just encode the value in UTF-8
6819 COPY_BUF(0 ,out
, i
, value
);
6821 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6822 (!ctxt
->disableSAX
))
6823 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
6829 * We are seeing an entity reference
6831 ent
= xmlParseEntityRef(ctxt
);
6832 if (ent
== NULL
) return;
6833 if (!ctxt
->wellFormed
)
6835 was_checked
= ent
->checked
;
6837 /* special case of predefined entities */
6838 if ((ent
->name
== NULL
) ||
6839 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
6841 if (val
== NULL
) return;
6843 * inline the entity.
6845 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6846 (!ctxt
->disableSAX
))
6847 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
6852 * The first reference to the entity trigger a parsing phase
6853 * where the ent->children is filled with the result from
6856 if (ent
->checked
== 0) {
6857 unsigned long oldnbent
= ctxt
->nbentities
;
6860 * This is a bit hackish but this seems the best
6861 * way to make sure both SAX and DOM entity support
6865 if (ctxt
->userData
== ctxt
)
6868 user_data
= ctxt
->userData
;
6871 * Check that this entity is well formed
6872 * 4.3.2: An internal general parsed entity is well-formed
6873 * if its replacement text matches the production labeled
6876 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
6878 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
6882 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
6884 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
6885 user_data
, ctxt
->depth
, ent
->URI
,
6886 ent
->ExternalID
, &list
);
6889 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
6890 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
6891 "invalid entity type found\n", NULL
);
6895 * Store the number of entities needing parsing for this entity
6896 * content and do checkings
6898 ent
->checked
= ctxt
->nbentities
- oldnbent
;
6899 if (ret
== XML_ERR_ENTITY_LOOP
) {
6900 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
6901 xmlFreeNodeList(list
);
6904 if (xmlParserEntityCheck(ctxt
, 0, ent
)) {
6905 xmlFreeNodeList(list
);
6909 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
6910 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
6911 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
6912 (ent
->children
== NULL
)) {
6913 ent
->children
= list
;
6914 if (ctxt
->replaceEntities
) {
6916 * Prune it directly in the generated document
6917 * except for single text nodes.
6919 if (((list
->type
== XML_TEXT_NODE
) &&
6920 (list
->next
== NULL
)) ||
6921 (ctxt
->parseMode
== XML_PARSE_READER
)) {
6922 list
->parent
= (xmlNodePtr
) ent
;
6927 while (list
!= NULL
) {
6928 list
->parent
= (xmlNodePtr
) ctxt
->node
;
6929 list
->doc
= ctxt
->myDoc
;
6930 if (list
->next
== NULL
)
6934 list
= ent
->children
;
6935 #ifdef LIBXML_LEGACY_ENABLED
6936 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
6937 xmlAddEntityReference(ent
, list
, NULL
);
6938 #endif /* LIBXML_LEGACY_ENABLED */
6942 while (list
!= NULL
) {
6943 list
->parent
= (xmlNodePtr
) ent
;
6944 if (list
->next
== NULL
)
6950 xmlFreeNodeList(list
);
6953 } else if ((ret
!= XML_ERR_OK
) &&
6954 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
6955 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
6956 "Entity '%s' failed to parse\n", ent
->name
);
6957 } else if (list
!= NULL
) {
6958 xmlFreeNodeList(list
);
6961 if (ent
->checked
== 0)
6963 } else if (ent
->checked
!= 1) {
6964 ctxt
->nbentities
+= ent
->checked
;
6968 * Now that the entity content has been gathered
6969 * provide it to the application, this can take different forms based
6970 * on the parsing modes.
6972 if (ent
->children
== NULL
) {
6974 * Probably running in SAX mode and the callbacks don't
6975 * build the entity content. So unless we already went
6976 * though parsing for first checking go though the entity
6977 * content to generate callbacks associated to the entity
6979 if (was_checked
!= 0) {
6982 * This is a bit hackish but this seems the best
6983 * way to make sure both SAX and DOM entity support
6986 if (ctxt
->userData
== ctxt
)
6989 user_data
= ctxt
->userData
;
6991 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
6993 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
6994 ent
->content
, user_data
, NULL
);
6996 } else if (ent
->etype
==
6997 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
6999 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7000 ctxt
->sax
, user_data
, ctxt
->depth
,
7001 ent
->URI
, ent
->ExternalID
, NULL
);
7004 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7005 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7006 "invalid entity type found\n", NULL
);
7008 if (ret
== XML_ERR_ENTITY_LOOP
) {
7009 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7013 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7014 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7016 * Entity reference callback comes second, it's somewhat
7017 * superfluous but a compatibility to historical behaviour
7019 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7025 * If we didn't get any children for the entity being built
7027 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7028 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7032 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7036 if ((ctxt
->replaceEntities
) || (ent
->children
== NULL
)) {
7038 * There is a problem on the handling of _private for entities
7039 * (bug 155816): Should we copy the content of the field from
7040 * the entity (possibly overwriting some value set by the user
7041 * when a copy is created), should we leave it alone, or should
7042 * we try to take care of different situations? The problem
7043 * is exacerbated by the usage of this field by the xmlReader.
7044 * To fix this bug, we look at _private on the created node
7045 * and, if it's NULL, we copy in whatever was in the entity.
7046 * If it's not NULL we leave it alone. This is somewhat of a
7047 * hack - maybe we should have further tests to determine
7050 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
7052 * Seems we are generating the DOM content, do
7053 * a simple tree copy for all references except the first
7054 * In the first occurrence list contains the replacement.
7055 * progressive == 2 means we are operating on the Reader
7056 * and since nodes are discarded we must copy all the time.
7058 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7059 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7060 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7063 * when operating on a reader, the entities definitions
7064 * are always owning the entities subtree.
7065 if (ctxt->parseMode == XML_PARSE_READER)
7069 cur
= ent
->children
;
7070 while (cur
!= NULL
) {
7071 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7073 if (nw
->_private
== NULL
)
7074 nw
->_private
= cur
->_private
;
7075 if (firstChild
== NULL
){
7078 nw
= xmlAddChild(ctxt
->node
, nw
);
7080 if (cur
== ent
->last
) {
7082 * needed to detect some strange empty
7083 * node cases in the reader tests
7085 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7087 (nw
->type
== XML_ELEMENT_NODE
) &&
7088 (nw
->children
== NULL
))
7095 #ifdef LIBXML_LEGACY_ENABLED
7096 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7097 xmlAddEntityReference(ent
, firstChild
, nw
);
7098 #endif /* LIBXML_LEGACY_ENABLED */
7099 } else if (list
== NULL
) {
7100 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7103 * Copy the entity child list and make it the new
7104 * entity child list. The goal is to make sure any
7105 * ID or REF referenced will be the one from the
7106 * document content and not the entity copy.
7108 cur
= ent
->children
;
7109 ent
->children
= NULL
;
7112 while (cur
!= NULL
) {
7116 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7118 if (nw
->_private
== NULL
)
7119 nw
->_private
= cur
->_private
;
7120 if (firstChild
== NULL
){
7123 xmlAddChild((xmlNodePtr
) ent
, nw
);
7124 xmlAddChild(ctxt
->node
, cur
);
7130 if (ent
->owner
== 0)
7132 #ifdef LIBXML_LEGACY_ENABLED
7133 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7134 xmlAddEntityReference(ent
, firstChild
, nw
);
7135 #endif /* LIBXML_LEGACY_ENABLED */
7137 const xmlChar
*nbktext
;
7140 * the name change is to avoid coalescing of the
7141 * node with a possible previous text one which
7142 * would make ent->children a dangling pointer
7144 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7146 if (ent
->children
->type
== XML_TEXT_NODE
)
7147 ent
->children
->name
= nbktext
;
7148 if ((ent
->last
!= ent
->children
) &&
7149 (ent
->last
->type
== XML_TEXT_NODE
))
7150 ent
->last
->name
= nbktext
;
7151 xmlAddChildList(ctxt
->node
, ent
->children
);
7155 * This is to avoid a nasty side effect, see
7156 * characters() in SAX.c
7166 * xmlParseEntityRef:
7167 * @ctxt: an XML parser context
7169 * parse ENTITY references declarations
7171 * [68] EntityRef ::= '&' Name ';'
7173 * [ WFC: Entity Declared ]
7174 * In a document without any DTD, a document with only an internal DTD
7175 * subset which contains no parameter entity references, or a document
7176 * with "standalone='yes'", the Name given in the entity reference
7177 * must match that in an entity declaration, except that well-formed
7178 * documents need not declare any of the following entities: amp, lt,
7179 * gt, apos, quot. The declaration of a parameter entity must precede
7180 * any reference to it. Similarly, the declaration of a general entity
7181 * must precede any reference to it which appears in a default value in an
7182 * attribute-list declaration. Note that if entities are declared in the
7183 * external subset or in external parameter entities, a non-validating
7184 * processor is not obligated to read and process their declarations;
7185 * for such documents, the rule that an entity must be declared is a
7186 * well-formedness constraint only if standalone='yes'.
7188 * [ WFC: Parsed Entity ]
7189 * An entity reference must not contain the name of an unparsed entity
7191 * Returns the xmlEntityPtr if found, or NULL otherwise.
7194 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7195 const xmlChar
*name
;
7196 xmlEntityPtr ent
= NULL
;
7199 if (ctxt
->instate
== XML_PARSER_EOF
)
7205 name
= xmlParseName(ctxt
);
7207 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7208 "xmlParseEntityRef: no name\n");
7212 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7218 * Predefined entites override any extra definition
7220 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7221 ent
= xmlGetPredefinedEntity(name
);
7227 * Increate the number of entity references parsed
7232 * Ask first SAX for entity resolution, otherwise try the
7233 * entities which may have stored in the parser context.
7235 if (ctxt
->sax
!= NULL
) {
7236 if (ctxt
->sax
->getEntity
!= NULL
)
7237 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7238 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7239 (ctxt
->options
& XML_PARSE_OLDSAX
))
7240 ent
= xmlGetPredefinedEntity(name
);
7241 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7242 (ctxt
->userData
==ctxt
)) {
7243 ent
= xmlSAX2GetEntity(ctxt
, name
);
7246 if (ctxt
->instate
== XML_PARSER_EOF
)
7249 * [ WFC: Entity Declared ]
7250 * In a document without any DTD, a document with only an
7251 * internal DTD subset which contains no parameter entity
7252 * references, or a document with "standalone='yes'", the
7253 * Name given in the entity reference must match that in an
7254 * entity declaration, except that well-formed documents
7255 * need not declare any of the following entities: amp, lt,
7257 * The declaration of a parameter entity must precede any
7259 * Similarly, the declaration of a general entity must
7260 * precede any reference to it which appears in a default
7261 * value in an attribute-list declaration. Note that if
7262 * entities are declared in the external subset or in
7263 * external parameter entities, a non-validating processor
7264 * is not obligated to read and process their declarations;
7265 * for such documents, the rule that an entity must be
7266 * declared is a well-formedness constraint only if
7270 if ((ctxt
->standalone
== 1) ||
7271 ((ctxt
->hasExternalSubset
== 0) &&
7272 (ctxt
->hasPErefs
== 0))) {
7273 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7274 "Entity '%s' not defined\n", name
);
7276 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7277 "Entity '%s' not defined\n", name
);
7278 if ((ctxt
->inSubset
== 0) &&
7279 (ctxt
->sax
!= NULL
) &&
7280 (ctxt
->sax
->reference
!= NULL
)) {
7281 ctxt
->sax
->reference(ctxt
->userData
, name
);
7288 * [ WFC: Parsed Entity ]
7289 * An entity reference must not contain the name of an
7292 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7293 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7294 "Entity reference to unparsed entity %s\n", name
);
7298 * [ WFC: No External Entity References ]
7299 * Attribute values cannot contain direct or indirect
7300 * entity references to external entities.
7302 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7303 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7304 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7305 "Attribute references external entity '%s'\n", name
);
7308 * [ WFC: No < in Attribute Values ]
7309 * The replacement text of any entity referred to directly or
7310 * indirectly in an attribute value (other than "<") must
7313 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7314 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7315 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7316 (xmlStrchr(ent
->content
, '<'))) {
7317 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7318 "'<' in entity '%s' is not allowed in attributes values\n", name
);
7322 * Internal check, no parameter entities here ...
7325 switch (ent
->etype
) {
7326 case XML_INTERNAL_PARAMETER_ENTITY
:
7327 case XML_EXTERNAL_PARAMETER_ENTITY
:
7328 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7329 "Attempt to reference the parameter entity '%s'\n",
7338 * [ WFC: No Recursion ]
7339 * A parsed entity must not contain a recursive reference
7340 * to itself, either directly or indirectly.
7341 * Done somewhere else
7347 * xmlParseStringEntityRef:
7348 * @ctxt: an XML parser context
7349 * @str: a pointer to an index in the string
7351 * parse ENTITY references declarations, but this version parses it from
7354 * [68] EntityRef ::= '&' Name ';'
7356 * [ WFC: Entity Declared ]
7357 * In a document without any DTD, a document with only an internal DTD
7358 * subset which contains no parameter entity references, or a document
7359 * with "standalone='yes'", the Name given in the entity reference
7360 * must match that in an entity declaration, except that well-formed
7361 * documents need not declare any of the following entities: amp, lt,
7362 * gt, apos, quot. The declaration of a parameter entity must precede
7363 * any reference to it. Similarly, the declaration of a general entity
7364 * must precede any reference to it which appears in a default value in an
7365 * attribute-list declaration. Note that if entities are declared in the
7366 * external subset or in external parameter entities, a non-validating
7367 * processor is not obligated to read and process their declarations;
7368 * for such documents, the rule that an entity must be declared is a
7369 * well-formedness constraint only if standalone='yes'.
7371 * [ WFC: Parsed Entity ]
7372 * An entity reference must not contain the name of an unparsed entity
7374 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7375 * is updated to the current location in the string.
7378 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7382 xmlEntityPtr ent
= NULL
;
7384 if ((str
== NULL
) || (*str
== NULL
))
7392 name
= xmlParseStringName(ctxt
, &ptr
);
7394 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7395 "xmlParseStringEntityRef: no name\n");
7400 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7409 * Predefined entites override any extra definition
7411 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7412 ent
= xmlGetPredefinedEntity(name
);
7421 * Increate the number of entity references parsed
7426 * Ask first SAX for entity resolution, otherwise try the
7427 * entities which may have stored in the parser context.
7429 if (ctxt
->sax
!= NULL
) {
7430 if (ctxt
->sax
->getEntity
!= NULL
)
7431 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7432 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7433 ent
= xmlGetPredefinedEntity(name
);
7434 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7435 ent
= xmlSAX2GetEntity(ctxt
, name
);
7438 if (ctxt
->instate
== XML_PARSER_EOF
) {
7444 * [ WFC: Entity Declared ]
7445 * In a document without any DTD, a document with only an
7446 * internal DTD subset which contains no parameter entity
7447 * references, or a document with "standalone='yes'", the
7448 * Name given in the entity reference must match that in an
7449 * entity declaration, except that well-formed documents
7450 * need not declare any of the following entities: amp, lt,
7452 * The declaration of a parameter entity must precede any
7454 * Similarly, the declaration of a general entity must
7455 * precede any reference to it which appears in a default
7456 * value in an attribute-list declaration. Note that if
7457 * entities are declared in the external subset or in
7458 * external parameter entities, a non-validating processor
7459 * is not obligated to read and process their declarations;
7460 * for such documents, the rule that an entity must be
7461 * declared is a well-formedness constraint only if
7465 if ((ctxt
->standalone
== 1) ||
7466 ((ctxt
->hasExternalSubset
== 0) &&
7467 (ctxt
->hasPErefs
== 0))) {
7468 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7469 "Entity '%s' not defined\n", name
);
7471 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7472 "Entity '%s' not defined\n",
7475 /* TODO ? check regressions ctxt->valid = 0; */
7479 * [ WFC: Parsed Entity ]
7480 * An entity reference must not contain the name of an
7483 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7484 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7485 "Entity reference to unparsed entity %s\n", name
);
7489 * [ WFC: No External Entity References ]
7490 * Attribute values cannot contain direct or indirect
7491 * entity references to external entities.
7493 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7494 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7495 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7496 "Attribute references external entity '%s'\n", name
);
7499 * [ WFC: No < in Attribute Values ]
7500 * The replacement text of any entity referred to directly or
7501 * indirectly in an attribute value (other than "<") must
7504 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7505 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7506 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7507 (xmlStrchr(ent
->content
, '<'))) {
7508 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7509 "'<' in entity '%s' is not allowed in attributes values\n",
7514 * Internal check, no parameter entities here ...
7517 switch (ent
->etype
) {
7518 case XML_INTERNAL_PARAMETER_ENTITY
:
7519 case XML_EXTERNAL_PARAMETER_ENTITY
:
7520 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7521 "Attempt to reference the parameter entity '%s'\n",
7530 * [ WFC: No Recursion ]
7531 * A parsed entity must not contain a recursive reference
7532 * to itself, either directly or indirectly.
7533 * Done somewhere else
7542 * xmlParsePEReference:
7543 * @ctxt: an XML parser context
7545 * parse PEReference declarations
7546 * The entity content is handled directly by pushing it's content as
7547 * a new input stream.
7549 * [69] PEReference ::= '%' Name ';'
7551 * [ WFC: No Recursion ]
7552 * A parsed entity must not contain a recursive
7553 * reference to itself, either directly or indirectly.
7555 * [ WFC: Entity Declared ]
7556 * In a document without any DTD, a document with only an internal DTD
7557 * subset which contains no parameter entity references, or a document
7558 * with "standalone='yes'", ... ... The declaration of a parameter
7559 * entity must precede any reference to it...
7561 * [ VC: Entity Declared ]
7562 * In a document with an external subset or external parameter entities
7563 * with "standalone='no'", ... ... The declaration of a parameter entity
7564 * must precede any reference to it...
7567 * Parameter-entity references may only appear in the DTD.
7568 * NOTE: misleading but this is handled.
7571 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7573 const xmlChar
*name
;
7574 xmlEntityPtr entity
= NULL
;
7575 xmlParserInputPtr input
;
7580 name
= xmlParseName(ctxt
);
7582 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7583 "xmlParsePEReference: no name\n");
7587 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7594 * Increate the number of entity references parsed
7599 * Request the entity from SAX
7601 if ((ctxt
->sax
!= NULL
) &&
7602 (ctxt
->sax
->getParameterEntity
!= NULL
))
7603 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7604 if (ctxt
->instate
== XML_PARSER_EOF
)
7606 if (entity
== NULL
) {
7608 * [ WFC: Entity Declared ]
7609 * In a document without any DTD, a document with only an
7610 * internal DTD subset which contains no parameter entity
7611 * references, or a document with "standalone='yes'", ...
7612 * ... The declaration of a parameter entity must precede
7613 * any reference to it...
7615 if ((ctxt
->standalone
== 1) ||
7616 ((ctxt
->hasExternalSubset
== 0) &&
7617 (ctxt
->hasPErefs
== 0))) {
7618 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7619 "PEReference: %%%s; not found\n",
7623 * [ VC: Entity Declared ]
7624 * In a document with an external subset or external
7625 * parameter entities with "standalone='no'", ...
7626 * ... The declaration of a parameter entity must
7627 * precede any reference to it...
7629 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7630 "PEReference: %%%s; not found\n",
7636 * Internal checking in case the entity quest barfed
7638 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7639 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7640 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7641 "Internal: %%%s; is not a parameter entity\n",
7643 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
7644 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
7645 if (xmlPushInput(ctxt
, input
) < 0)
7650 * handle the extra spaces added before and after
7651 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7653 input
= xmlNewEntityInputStream(ctxt
, entity
);
7654 if (xmlPushInput(ctxt
, input
) < 0)
7656 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7657 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
7658 (IS_BLANK_CH(NXT(5)))) {
7659 xmlParseTextDecl(ctxt
);
7661 XML_ERR_UNSUPPORTED_ENCODING
) {
7663 * The XML REC instructs us to stop parsing
7666 ctxt
->instate
= XML_PARSER_EOF
;
7672 ctxt
->hasPErefs
= 1;
7676 * xmlLoadEntityContent:
7677 * @ctxt: an XML parser context
7678 * @entity: an unloaded system entity
7680 * Load the original content of the given system entity from the
7681 * ExternalID/SystemID given. This is to be used for Included in Literal
7682 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7684 * Returns 0 in case of success and -1 in case of failure
7687 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
7688 xmlParserInputPtr input
;
7693 if ((ctxt
== NULL
) || (entity
== NULL
) ||
7694 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
7695 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
7696 (entity
->content
!= NULL
)) {
7697 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7698 "xmlLoadEntityContent parameter error");
7702 if (xmlParserDebugEntities
)
7703 xmlGenericError(xmlGenericErrorContext
,
7704 "Reading %s entity content input\n", entity
->name
);
7706 buf
= xmlBufferCreate();
7708 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7709 "xmlLoadEntityContent parameter error");
7713 input
= xmlNewEntityInputStream(ctxt
, entity
);
7714 if (input
== NULL
) {
7715 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7716 "xmlLoadEntityContent input error");
7722 * Push the entity as the current input, read char by char
7723 * saving to the buffer until the end of the entity or an error
7725 if (xmlPushInput(ctxt
, input
) < 0) {
7732 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
7734 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
7735 if (count
++ > 100) {
7738 if (ctxt
->instate
== XML_PARSER_EOF
) {
7747 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
7749 } else if (!IS_CHAR(c
)) {
7750 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
7751 "xmlLoadEntityContent: invalid char value %d\n",
7756 entity
->content
= buf
->content
;
7757 buf
->content
= NULL
;
7764 * xmlParseStringPEReference:
7765 * @ctxt: an XML parser context
7766 * @str: a pointer to an index in the string
7768 * parse PEReference declarations
7770 * [69] PEReference ::= '%' Name ';'
7772 * [ WFC: No Recursion ]
7773 * A parsed entity must not contain a recursive
7774 * reference to itself, either directly or indirectly.
7776 * [ WFC: Entity Declared ]
7777 * In a document without any DTD, a document with only an internal DTD
7778 * subset which contains no parameter entity references, or a document
7779 * with "standalone='yes'", ... ... The declaration of a parameter
7780 * entity must precede any reference to it...
7782 * [ VC: Entity Declared ]
7783 * In a document with an external subset or external parameter entities
7784 * with "standalone='no'", ... ... The declaration of a parameter entity
7785 * must precede any reference to it...
7788 * Parameter-entity references may only appear in the DTD.
7789 * NOTE: misleading but this is handled.
7791 * Returns the string of the entity content.
7792 * str is updated to the current value of the index
7795 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
7799 xmlEntityPtr entity
= NULL
;
7801 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
7807 name
= xmlParseStringName(ctxt
, &ptr
);
7809 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7810 "xmlParseStringPEReference: no name\n");
7816 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7824 * Increate the number of entity references parsed
7829 * Request the entity from SAX
7831 if ((ctxt
->sax
!= NULL
) &&
7832 (ctxt
->sax
->getParameterEntity
!= NULL
))
7833 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7834 if (ctxt
->instate
== XML_PARSER_EOF
) {
7838 if (entity
== NULL
) {
7840 * [ WFC: Entity Declared ]
7841 * In a document without any DTD, a document with only an
7842 * internal DTD subset which contains no parameter entity
7843 * references, or a document with "standalone='yes'", ...
7844 * ... The declaration of a parameter entity must precede
7845 * any reference to it...
7847 if ((ctxt
->standalone
== 1) ||
7848 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
7849 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7850 "PEReference: %%%s; not found\n", name
);
7853 * [ VC: Entity Declared ]
7854 * In a document with an external subset or external
7855 * parameter entities with "standalone='no'", ...
7856 * ... The declaration of a parameter entity must
7857 * precede any reference to it...
7859 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7860 "PEReference: %%%s; not found\n",
7866 * Internal checking in case the entity quest barfed
7868 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7869 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7870 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7871 "%%%s; is not a parameter entity\n",
7875 ctxt
->hasPErefs
= 1;
7882 * xmlParseDocTypeDecl:
7883 * @ctxt: an XML parser context
7885 * parse a DOCTYPE declaration
7887 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7888 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7890 * [ VC: Root Element Type ]
7891 * The Name in the document type declaration must match the element
7892 * type of the root element.
7896 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
7897 const xmlChar
*name
= NULL
;
7898 xmlChar
*ExternalID
= NULL
;
7899 xmlChar
*URI
= NULL
;
7902 * We know that '<!DOCTYPE' has been detected.
7909 * Parse the DOCTYPE name.
7911 name
= xmlParseName(ctxt
);
7913 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7914 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7916 ctxt
->intSubName
= name
;
7921 * Check for SystemID and ExternalID
7923 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
7925 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
7926 ctxt
->hasExternalSubset
= 1;
7928 ctxt
->extSubURI
= URI
;
7929 ctxt
->extSubSystem
= ExternalID
;
7934 * Create and update the internal subset.
7936 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
7937 (!ctxt
->disableSAX
))
7938 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
7939 if (ctxt
->instate
== XML_PARSER_EOF
)
7943 * Is there any internal subset declarations ?
7944 * they are handled separately in xmlParseInternalSubset()
7950 * We should be at the end of the DOCTYPE declaration.
7953 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
7959 * xmlParseInternalSubset:
7960 * @ctxt: an XML parser context
7962 * parse the internal subset declaration
7964 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7968 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
7970 * Is there any DTD definition ?
7973 ctxt
->instate
= XML_PARSER_DTD
;
7976 * Parse the succession of Markup declarations and
7978 * Subsequence (markupdecl | PEReference | S)*
7980 while ((RAW
!= ']') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
7981 const xmlChar
*check
= CUR_PTR
;
7982 unsigned int cons
= ctxt
->input
->consumed
;
7985 xmlParseMarkupDecl(ctxt
);
7986 xmlParsePEReference(ctxt
);
7989 * Pop-up of finished entities.
7991 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
7994 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
7995 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7996 "xmlParseInternalSubset: error detected in Markup declaration\n");
8007 * We should be at the end of the DOCTYPE declaration.
8010 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8015 #ifdef LIBXML_SAX1_ENABLED
8017 * xmlParseAttribute:
8018 * @ctxt: an XML parser context
8019 * @value: a xmlChar ** used to store the value of the attribute
8021 * parse an attribute
8023 * [41] Attribute ::= Name Eq AttValue
8025 * [ WFC: No External Entity References ]
8026 * Attribute values cannot contain direct or indirect entity references
8027 * to external entities.
8029 * [ WFC: No < in Attribute Values ]
8030 * The replacement text of any entity referred to directly or indirectly in
8031 * an attribute value (other than "<") must not contain a <.
8033 * [ VC: Attribute Value Type ]
8034 * The attribute must have been declared; the value must be of the type
8037 * [25] Eq ::= S? '=' S?
8041 * [NS 11] Attribute ::= QName Eq AttValue
8043 * Also the case QName == xmlns:??? is handled independently as a namespace
8046 * Returns the attribute name, and the value in *value.
8050 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8051 const xmlChar
*name
;
8056 name
= xmlParseName(ctxt
);
8058 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8059 "error parsing attribute name\n");
8070 val
= xmlParseAttValue(ctxt
);
8071 ctxt
->instate
= XML_PARSER_CONTENT
;
8073 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8074 "Specification mandate value for attribute %s\n", name
);
8079 * Check that xml:lang conforms to the specification
8080 * No more registered as an error, just generate a warning now
8081 * since this was deprecated in XML second edition
8083 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8084 if (!xmlCheckLanguageID(val
)) {
8085 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8086 "Malformed value for xml:lang : %s\n",
8092 * Check that xml:space conforms to the specification
8094 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8095 if (xmlStrEqual(val
, BAD_CAST
"default"))
8097 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8100 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8101 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8112 * @ctxt: an XML parser context
8114 * parse a start of tag either for rule element or
8115 * EmptyElement. In both case we don't parse the tag closing chars.
8117 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8119 * [ WFC: Unique Att Spec ]
8120 * No attribute name may appear more than once in the same start-tag or
8121 * empty-element tag.
8123 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8125 * [ WFC: Unique Att Spec ]
8126 * No attribute name may appear more than once in the same start-tag or
8127 * empty-element tag.
8131 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8133 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8135 * Returns the element name parsed
8139 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8140 const xmlChar
*name
;
8141 const xmlChar
*attname
;
8143 const xmlChar
**atts
= ctxt
->atts
;
8145 int maxatts
= ctxt
->maxatts
;
8148 if (RAW
!= '<') return(NULL
);
8151 name
= xmlParseName(ctxt
);
8153 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8154 "xmlParseStartTag: invalid element name\n");
8159 * Now parse the attributes, it ends up with the ending
8166 while (((RAW
!= '>') &&
8167 ((RAW
!= '/') || (NXT(1) != '>')) &&
8168 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8169 const xmlChar
*q
= CUR_PTR
;
8170 unsigned int cons
= ctxt
->input
->consumed
;
8172 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8173 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8175 * [ WFC: Unique Att Spec ]
8176 * No attribute name may appear more than once in the same
8177 * start-tag or empty-element tag.
8179 for (i
= 0; i
< nbatts
;i
+= 2) {
8180 if (xmlStrEqual(atts
[i
], attname
)) {
8181 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8187 * Add the pair to atts
8190 maxatts
= 22; /* allow for 10 attrs by default */
8191 atts
= (const xmlChar
**)
8192 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8194 xmlErrMemory(ctxt
, NULL
);
8195 if (attvalue
!= NULL
)
8200 ctxt
->maxatts
= maxatts
;
8201 } else if (nbatts
+ 4 > maxatts
) {
8205 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8206 maxatts
* sizeof(const xmlChar
*));
8208 xmlErrMemory(ctxt
, NULL
);
8209 if (attvalue
!= NULL
)
8215 ctxt
->maxatts
= maxatts
;
8217 atts
[nbatts
++] = attname
;
8218 atts
[nbatts
++] = attvalue
;
8219 atts
[nbatts
] = NULL
;
8220 atts
[nbatts
+ 1] = NULL
;
8222 if (attvalue
!= NULL
)
8229 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8231 if (!IS_BLANK_CH(RAW
)) {
8232 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8233 "attributes construct error\n");
8236 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
8237 (attname
== NULL
) && (attvalue
== NULL
)) {
8238 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8239 "xmlParseStartTag: problem parsing attributes\n");
8247 * SAX: Start of Element !
8249 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8250 (!ctxt
->disableSAX
)) {
8252 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8254 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8258 /* Free only the content strings */
8259 for (i
= 1;i
< nbatts
;i
+=2)
8260 if (atts
[i
] != NULL
)
8261 xmlFree((xmlChar
*) atts
[i
]);
8268 * @ctxt: an XML parser context
8269 * @line: line of the start tag
8270 * @nsNr: number of namespaces on the start tag
8272 * parse an end of tag
8274 * [42] ETag ::= '</' Name S? '>'
8278 * [NS 9] ETag ::= '</' QName S? '>'
8282 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8283 const xmlChar
*name
;
8286 if ((RAW
!= '<') || (NXT(1) != '/')) {
8287 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8288 "xmlParseEndTag: '</' not found\n");
8293 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8296 * We should definitely be at the ending "S? '>'" part
8300 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8301 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8306 * [ WFC: Element Type Match ]
8307 * The Name in an element's end-tag must match the element type in the
8311 if (name
!= (xmlChar
*)1) {
8312 if (name
== NULL
) name
= BAD_CAST
"unparseable";
8313 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8314 "Opening and ending tag mismatch: %s line %d and %s\n",
8315 ctxt
->name
, line
, name
);
8321 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8322 (!ctxt
->disableSAX
))
8323 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8332 * @ctxt: an XML parser context
8334 * parse an end of tag
8336 * [42] ETag ::= '</' Name S? '>'
8340 * [NS 9] ETag ::= '</' QName S? '>'
8344 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8345 xmlParseEndTag1(ctxt
, 0);
8347 #endif /* LIBXML_SAX1_ENABLED */
8349 /************************************************************************
8351 * SAX 2 specific operations *
8353 ************************************************************************/
8357 * @ctxt: an XML parser context
8358 * @prefix: the prefix to lookup
8360 * Lookup the namespace name for the @prefix (which ca be NULL)
8361 * The prefix must come from the @ctxt->dict dictionnary
8363 * Returns the namespace name or NULL if not bound
8365 static const xmlChar
*
8366 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8369 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8370 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8371 if (ctxt
->nsTab
[i
] == prefix
) {
8372 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8374 return(ctxt
->nsTab
[i
+ 1]);
8381 * @ctxt: an XML parser context
8382 * @prefix: pointer to store the prefix part
8384 * parse an XML Namespace QName
8386 * [6] QName ::= (Prefix ':')? LocalPart
8387 * [7] Prefix ::= NCName
8388 * [8] LocalPart ::= NCName
8390 * Returns the Name parsed or NULL
8393 static const xmlChar
*
8394 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8395 const xmlChar
*l
, *p
;
8399 l
= xmlParseNCName(ctxt
);
8402 l
= xmlParseName(ctxt
);
8404 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8405 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8415 l
= xmlParseNCName(ctxt
);
8419 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8420 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8421 l
= xmlParseNmtoken(ctxt
);
8423 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8425 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8428 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8429 if (tmp
!= NULL
) xmlFree(tmp
);
8436 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8437 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8439 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8441 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8442 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8443 if (tmp
!= NULL
) xmlFree(tmp
);
8447 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8448 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8449 if (tmp
!= NULL
) xmlFree(tmp
);
8460 * xmlParseQNameAndCompare:
8461 * @ctxt: an XML parser context
8462 * @name: the localname
8463 * @prefix: the prefix, if any.
8465 * parse an XML name and compares for match
8466 * (specialized for endtag parsing)
8468 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8469 * and the name for mismatch
8472 static const xmlChar
*
8473 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8474 xmlChar
const *prefix
) {
8478 const xmlChar
*prefix2
;
8480 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8483 in
= ctxt
->input
->cur
;
8486 while (*in
!= 0 && *in
== *cmp
) {
8490 if ((*cmp
== 0) && (*in
== ':')) {
8493 while (*in
!= 0 && *in
== *cmp
) {
8497 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8499 ctxt
->input
->cur
= in
;
8500 return((const xmlChar
*) 1);
8504 * all strings coms from the dictionary, equality can be done directly
8506 ret
= xmlParseQName (ctxt
, &prefix2
);
8507 if ((ret
== name
) && (prefix
== prefix2
))
8508 return((const xmlChar
*) 1);
8513 * xmlParseAttValueInternal:
8514 * @ctxt: an XML parser context
8515 * @len: attribute len result
8516 * @alloc: whether the attribute was reallocated as a new string
8517 * @normalize: if 1 then further non-CDATA normalization must be done
8519 * parse a value for an attribute.
8520 * NOTE: if no normalization is needed, the routine will return pointers
8521 * directly from the data buffer.
8523 * 3.3.3 Attribute-Value Normalization:
8524 * Before the value of an attribute is passed to the application or
8525 * checked for validity, the XML processor must normalize it as follows:
8526 * - a character reference is processed by appending the referenced
8527 * character to the attribute value
8528 * - an entity reference is processed by recursively processing the
8529 * replacement text of the entity
8530 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8531 * appending #x20 to the normalized value, except that only a single
8532 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8533 * parsed entity or the literal entity value of an internal parsed entity
8534 * - other characters are processed by appending them to the normalized value
8535 * If the declared value is not CDATA, then the XML processor must further
8536 * process the normalized attribute value by discarding any leading and
8537 * trailing space (#x20) characters, and by replacing sequences of space
8538 * (#x20) characters by a single space (#x20) character.
8539 * All attributes for which no declaration has been read should be treated
8540 * by a non-validating parser as if declared CDATA.
8542 * Returns the AttValue parsed or NULL. The value has to be freed by the
8543 * caller if it was copied, this can be detected by val[*len] == 0.
8547 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8551 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8552 xmlChar
*ret
= NULL
;
8555 in
= (xmlChar
*) CUR_PTR
;
8556 if (*in
!= '"' && *in
!= '\'') {
8557 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8560 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8563 * try to handle in this routine the most common case where no
8564 * allocation of a new string is required and where content is
8568 end
= ctxt
->input
->end
;
8571 const xmlChar
*oldbase
= ctxt
->input
->base
;
8573 if (oldbase
!= ctxt
->input
->base
) {
8574 long delta
= ctxt
->input
->base
- oldbase
;
8575 start
= start
+ delta
;
8578 end
= ctxt
->input
->end
;
8582 * Skip any leading spaces
8584 while ((in
< end
) && (*in
!= limit
) &&
8585 ((*in
== 0x20) || (*in
== 0x9) ||
8586 (*in
== 0xA) || (*in
== 0xD))) {
8590 const xmlChar
*oldbase
= ctxt
->input
->base
;
8592 if (ctxt
->instate
== XML_PARSER_EOF
)
8594 if (oldbase
!= ctxt
->input
->base
) {
8595 long delta
= ctxt
->input
->base
- oldbase
;
8596 start
= start
+ delta
;
8599 end
= ctxt
->input
->end
;
8602 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
8603 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
8604 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
8606 const xmlChar
*oldbase
= ctxt
->input
->base
;
8608 if (ctxt
->instate
== XML_PARSER_EOF
)
8610 if (oldbase
!= ctxt
->input
->base
) {
8611 long delta
= ctxt
->input
->base
- oldbase
;
8612 start
= start
+ delta
;
8615 end
= ctxt
->input
->end
;
8620 * skip the trailing blanks
8622 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
8623 while ((in
< end
) && (*in
!= limit
) &&
8624 ((*in
== 0x20) || (*in
== 0x9) ||
8625 (*in
== 0xA) || (*in
== 0xD))) {
8628 const xmlChar
*oldbase
= ctxt
->input
->base
;
8630 if (ctxt
->instate
== XML_PARSER_EOF
)
8632 if (oldbase
!= ctxt
->input
->base
) {
8633 long delta
= ctxt
->input
->base
- oldbase
;
8634 start
= start
+ delta
;
8636 last
= last
+ delta
;
8638 end
= ctxt
->input
->end
;
8641 if (*in
!= limit
) goto need_complex
;
8643 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
8644 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
8647 const xmlChar
*oldbase
= ctxt
->input
->base
;
8649 if (ctxt
->instate
== XML_PARSER_EOF
)
8651 if (oldbase
!= ctxt
->input
->base
) {
8652 long delta
= ctxt
->input
->base
- oldbase
;
8653 start
= start
+ delta
;
8656 end
= ctxt
->input
->end
;
8660 if (*in
!= limit
) goto need_complex
;
8664 *len
= last
- start
;
8665 ret
= (xmlChar
*) start
;
8667 if (alloc
) *alloc
= 1;
8668 ret
= xmlStrndup(start
, last
- start
);
8671 if (alloc
) *alloc
= 0;
8674 if (alloc
) *alloc
= 1;
8675 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
8679 * xmlParseAttribute2:
8680 * @ctxt: an XML parser context
8681 * @pref: the element prefix
8682 * @elem: the element name
8683 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8684 * @value: a xmlChar ** used to store the value of the attribute
8685 * @len: an int * to save the length of the attribute
8686 * @alloc: an int * to indicate if the attribute was allocated
8688 * parse an attribute in the new SAX2 framework.
8690 * Returns the attribute name, and the value in *value, .
8693 static const xmlChar
*
8694 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
8695 const xmlChar
* pref
, const xmlChar
* elem
,
8696 const xmlChar
** prefix
, xmlChar
** value
,
8697 int *len
, int *alloc
)
8699 const xmlChar
*name
;
8700 xmlChar
*val
, *internal_val
= NULL
;
8705 name
= xmlParseQName(ctxt
, prefix
);
8707 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8708 "error parsing attribute name\n");
8713 * get the type if needed
8715 if (ctxt
->attsSpecial
!= NULL
) {
8718 type
= (int) (long) xmlHashQLookup2(ctxt
->attsSpecial
,
8719 pref
, elem
, *prefix
, name
);
8731 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
8734 * Sometimes a second normalisation pass for spaces is needed
8735 * but that only happens if charrefs or entities refernces
8736 * have been used in the attribute value, i.e. the attribute
8737 * value have been extracted in an allocated string already.
8740 const xmlChar
*val2
;
8742 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
8743 if ((val2
!= NULL
) && (val2
!= val
)) {
8745 val
= (xmlChar
*) val2
;
8749 ctxt
->instate
= XML_PARSER_CONTENT
;
8751 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8752 "Specification mandate value for attribute %s\n",
8757 if (*prefix
== ctxt
->str_xml
) {
8759 * Check that xml:lang conforms to the specification
8760 * No more registered as an error, just generate a warning now
8761 * since this was deprecated in XML second edition
8763 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
8764 internal_val
= xmlStrndup(val
, *len
);
8765 if (!xmlCheckLanguageID(internal_val
)) {
8766 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8767 "Malformed value for xml:lang : %s\n",
8768 internal_val
, NULL
);
8773 * Check that xml:space conforms to the specification
8775 if (xmlStrEqual(name
, BAD_CAST
"space")) {
8776 internal_val
= xmlStrndup(val
, *len
);
8777 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
8779 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
8782 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8783 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8784 internal_val
, NULL
);
8788 xmlFree(internal_val
);
8796 * xmlParseStartTag2:
8797 * @ctxt: an XML parser context
8799 * parse a start of tag either for rule element or
8800 * EmptyElement. In both case we don't parse the tag closing chars.
8801 * This routine is called when running SAX2 parsing
8803 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8805 * [ WFC: Unique Att Spec ]
8806 * No attribute name may appear more than once in the same start-tag or
8807 * empty-element tag.
8809 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8811 * [ WFC: Unique Att Spec ]
8812 * No attribute name may appear more than once in the same start-tag or
8813 * empty-element tag.
8817 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8819 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8821 * Returns the element name parsed
8824 static const xmlChar
*
8825 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
8826 const xmlChar
**URI
, int *tlen
) {
8827 const xmlChar
*localname
;
8828 const xmlChar
*prefix
;
8829 const xmlChar
*attname
;
8830 const xmlChar
*aprefix
;
8831 const xmlChar
*nsname
;
8833 const xmlChar
**atts
= ctxt
->atts
;
8834 int maxatts
= ctxt
->maxatts
;
8835 int nratts
, nbatts
, nbdef
;
8836 int i
, j
, nbNs
, attval
, oldline
, oldcol
;
8837 const xmlChar
*base
;
8839 int nsNr
= ctxt
->nsNr
;
8841 if (RAW
!= '<') return(NULL
);
8845 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8846 * point since the attribute values may be stored as pointers to
8847 * the buffer and calling SHRINK would destroy them !
8848 * The Shrinking is only possible once the full set of attribute
8849 * callbacks have been done.
8853 base
= ctxt
->input
->base
;
8854 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
8855 oldline
= ctxt
->input
->line
;
8856 oldcol
= ctxt
->input
->col
;
8862 /* Forget any namespaces added during an earlier parse of this element. */
8865 localname
= xmlParseQName(ctxt
, &prefix
);
8866 if (localname
== NULL
) {
8867 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8868 "StartTag: invalid element name\n");
8871 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
8874 * Now parse the attributes, it ends up with the ending
8880 if (ctxt
->input
->base
!= base
) goto base_changed
;
8882 while (((RAW
!= '>') &&
8883 ((RAW
!= '/') || (NXT(1) != '>')) &&
8884 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8885 const xmlChar
*q
= CUR_PTR
;
8886 unsigned int cons
= ctxt
->input
->consumed
;
8887 int len
= -1, alloc
= 0;
8889 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
8890 &aprefix
, &attvalue
, &len
, &alloc
);
8891 if (ctxt
->input
->base
!= base
) {
8892 if ((attvalue
!= NULL
) && (alloc
!= 0))
8897 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8898 if (len
< 0) len
= xmlStrlen(attvalue
);
8899 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
8900 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
8904 uri
= xmlParseURI((const char *) URL
);
8906 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
8907 "xmlns: '%s' is not a valid URI\n",
8910 if (uri
->scheme
== NULL
) {
8911 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
8912 "xmlns: URI %s is not absolute\n",
8917 if (URL
== ctxt
->str_xml_ns
) {
8918 if (attname
!= ctxt
->str_xml
) {
8919 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8920 "xml namespace URI cannot be the default namespace\n",
8923 goto skip_default_ns
;
8927 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
8928 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8929 "reuse of the xmlns namespace name is forbidden\n",
8931 goto skip_default_ns
;
8935 * check that it's not a defined namespace
8937 for (j
= 1;j
<= nbNs
;j
++)
8938 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
8941 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8943 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
8945 if (alloc
!= 0) xmlFree(attvalue
);
8949 if (aprefix
== ctxt
->str_xmlns
) {
8950 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
8953 if (attname
== ctxt
->str_xml
) {
8954 if (URL
!= ctxt
->str_xml_ns
) {
8955 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8956 "xml namespace prefix mapped to wrong URI\n",
8960 * Do not keep a namespace definition node
8964 if (URL
== ctxt
->str_xml_ns
) {
8965 if (attname
!= ctxt
->str_xml
) {
8966 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8967 "xml namespace URI mapped to wrong prefix\n",
8972 if (attname
== ctxt
->str_xmlns
) {
8973 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8974 "redefinition of the xmlns prefix is forbidden\n",
8980 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
8981 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8982 "reuse of the xmlns namespace name is forbidden\n",
8986 if ((URL
== NULL
) || (URL
[0] == 0)) {
8987 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8988 "xmlns:%s: Empty XML namespace is not allowed\n",
8989 attname
, NULL
, NULL
);
8992 uri
= xmlParseURI((const char *) URL
);
8994 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
8995 "xmlns:%s: '%s' is not a valid URI\n",
8996 attname
, URL
, NULL
);
8998 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
8999 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9000 "xmlns:%s: URI %s is not absolute\n",
9001 attname
, URL
, NULL
);
9008 * check that it's not a defined namespace
9010 for (j
= 1;j
<= nbNs
;j
++)
9011 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9014 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9016 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9018 if (alloc
!= 0) xmlFree(attvalue
);
9020 if (ctxt
->input
->base
!= base
) goto base_changed
;
9025 * Add the pair to atts
9027 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9028 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9029 if (attvalue
[len
] == 0)
9033 maxatts
= ctxt
->maxatts
;
9036 ctxt
->attallocs
[nratts
++] = alloc
;
9037 atts
[nbatts
++] = attname
;
9038 atts
[nbatts
++] = aprefix
;
9039 atts
[nbatts
++] = NULL
; /* the URI will be fetched later */
9040 atts
[nbatts
++] = attvalue
;
9042 atts
[nbatts
++] = attvalue
;
9044 * tag if some deallocation is needed
9046 if (alloc
!= 0) attval
= 1;
9048 if ((attvalue
!= NULL
) && (attvalue
[len
] == 0))
9055 if (ctxt
->instate
== XML_PARSER_EOF
)
9057 if (ctxt
->input
->base
!= base
) goto base_changed
;
9058 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9060 if (!IS_BLANK_CH(RAW
)) {
9061 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9062 "attributes construct error\n");
9066 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
9067 (attname
== NULL
) && (attvalue
== NULL
)) {
9068 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9069 "xmlParseStartTag: problem parsing attributes\n");
9073 if (ctxt
->input
->base
!= base
) goto base_changed
;
9077 * The attributes defaulting
9079 if (ctxt
->attsDefault
!= NULL
) {
9080 xmlDefAttrsPtr defaults
;
9082 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9083 if (defaults
!= NULL
) {
9084 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9085 attname
= defaults
->values
[5 * i
];
9086 aprefix
= defaults
->values
[5 * i
+ 1];
9089 * special work for namespaces defaulted defs
9091 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9093 * check that it's not a defined namespace
9095 for (j
= 1;j
<= nbNs
;j
++)
9096 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9098 if (j
<= nbNs
) continue;
9100 nsname
= xmlGetNamespace(ctxt
, NULL
);
9101 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9102 if (nsPush(ctxt
, NULL
,
9103 defaults
->values
[5 * i
+ 2]) > 0)
9106 } else if (aprefix
== ctxt
->str_xmlns
) {
9108 * check that it's not a defined namespace
9110 for (j
= 1;j
<= nbNs
;j
++)
9111 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9113 if (j
<= nbNs
) continue;
9115 nsname
= xmlGetNamespace(ctxt
, attname
);
9116 if (nsname
!= defaults
->values
[2]) {
9117 if (nsPush(ctxt
, attname
,
9118 defaults
->values
[5 * i
+ 2]) > 0)
9123 * check that it's not a defined attribute
9125 for (j
= 0;j
< nbatts
;j
+=5) {
9126 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9129 if (j
< nbatts
) continue;
9131 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9132 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9135 maxatts
= ctxt
->maxatts
;
9138 atts
[nbatts
++] = attname
;
9139 atts
[nbatts
++] = aprefix
;
9140 if (aprefix
== NULL
)
9141 atts
[nbatts
++] = NULL
;
9143 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9144 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9145 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9146 if ((ctxt
->standalone
== 1) &&
9147 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9148 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9149 "standalone: attribute %s on %s defaulted from external subset\n",
9150 attname
, localname
);
9159 * The attributes checkings
9161 for (i
= 0; i
< nbatts
;i
+= 5) {
9163 * The default namespace does not apply to attribute names.
9165 if (atts
[i
+ 1] != NULL
) {
9166 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9167 if (nsname
== NULL
) {
9168 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9169 "Namespace prefix %s for %s on %s is not defined\n",
9170 atts
[i
+ 1], atts
[i
], localname
);
9172 atts
[i
+ 2] = nsname
;
9176 * [ WFC: Unique Att Spec ]
9177 * No attribute name may appear more than once in the same
9178 * start-tag or empty-element tag.
9179 * As extended by the Namespace in XML REC.
9181 for (j
= 0; j
< i
;j
+= 5) {
9182 if (atts
[i
] == atts
[j
]) {
9183 if (atts
[i
+1] == atts
[j
+1]) {
9184 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9187 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9188 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9189 "Namespaced Attribute %s in '%s' redefined\n",
9190 atts
[i
], nsname
, NULL
);
9197 nsname
= xmlGetNamespace(ctxt
, prefix
);
9198 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9199 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9200 "Namespace prefix %s on %s is not defined\n",
9201 prefix
, localname
, NULL
);
9207 * SAX: Start of Element !
9209 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9210 (!ctxt
->disableSAX
)) {
9212 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9213 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9214 nbatts
/ 5, nbdef
, atts
);
9216 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9217 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9221 * Free up attribute allocated strings if needed
9224 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9225 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9226 xmlFree((xmlChar
*) atts
[i
]);
9233 * the attribute strings are valid iif the base didn't changed
9236 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9237 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9238 xmlFree((xmlChar
*) atts
[i
]);
9240 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
9241 ctxt
->input
->line
= oldline
;
9242 ctxt
->input
->col
= oldcol
;
9243 if (ctxt
->wellFormed
== 1) {
9251 * @ctxt: an XML parser context
9252 * @line: line of the start tag
9253 * @nsNr: number of namespaces on the start tag
9255 * parse an end of tag
9257 * [42] ETag ::= '</' Name S? '>'
9261 * [NS 9] ETag ::= '</' QName S? '>'
9265 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
,
9266 const xmlChar
*URI
, int line
, int nsNr
, int tlen
) {
9267 const xmlChar
*name
;
9270 if ((RAW
!= '<') || (NXT(1) != '/')) {
9271 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9276 if ((tlen
> 0) && (xmlStrncmp(ctxt
->input
->cur
, ctxt
->name
, tlen
) == 0)) {
9277 if (ctxt
->input
->cur
[tlen
] == '>') {
9278 ctxt
->input
->cur
+= tlen
+ 1;
9281 ctxt
->input
->cur
+= tlen
;
9285 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9287 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, prefix
);
9291 * We should definitely be at the ending "S? '>'" part
9294 if (ctxt
->instate
== XML_PARSER_EOF
)
9297 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9298 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9303 * [ WFC: Element Type Match ]
9304 * The Name in an element's end-tag must match the element type in the
9308 if (name
!= (xmlChar
*)1) {
9309 if (name
== NULL
) name
= BAD_CAST
"unparseable";
9310 if ((line
== 0) && (ctxt
->node
!= NULL
))
9311 line
= ctxt
->node
->line
;
9312 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9313 "Opening and ending tag mismatch: %s line %d and %s\n",
9314 ctxt
->name
, line
, name
);
9321 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9322 (!ctxt
->disableSAX
))
9323 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, prefix
, URI
);
9333 * @ctxt: an XML parser context
9335 * Parse escaped pure raw content.
9337 * [18] CDSect ::= CDStart CData CDEnd
9339 * [19] CDStart ::= '<![CDATA['
9341 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9343 * [21] CDEnd ::= ']]>'
9346 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9347 xmlChar
*buf
= NULL
;
9349 int size
= XML_PARSER_BUFFER_SIZE
;
9355 /* Check 2.6.0 was NXT(0) not RAW */
9356 if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9361 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9364 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9365 ctxt
->instate
= XML_PARSER_CONTENT
;
9371 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9372 ctxt
->instate
= XML_PARSER_CONTENT
;
9377 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9379 xmlErrMemory(ctxt
, NULL
);
9382 while (IS_CHAR(cur
) &&
9383 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9384 if (len
+ 5 >= size
) {
9388 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9391 xmlErrMemory(ctxt
, NULL
);
9396 COPY_BUF(rl
,buf
,len
,r
);
9404 if (ctxt
->instate
== XML_PARSER_EOF
) {
9414 ctxt
->instate
= XML_PARSER_CONTENT
;
9416 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9417 "CData section not finished\n%.50s\n", buf
);
9424 * OK the buffer is to be consumed as cdata.
9426 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9427 if (ctxt
->sax
->cdataBlock
!= NULL
)
9428 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9429 else if (ctxt
->sax
->characters
!= NULL
)
9430 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9437 * @ctxt: an XML parser context
9441 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9445 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9447 while ((RAW
!= 0) &&
9448 ((RAW
!= '<') || (NXT(1) != '/')) &&
9449 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9450 const xmlChar
*test
= CUR_PTR
;
9451 unsigned int cons
= ctxt
->input
->consumed
;
9452 const xmlChar
*cur
= ctxt
->input
->cur
;
9455 * First case : a Processing Instruction.
9457 if ((*cur
== '<') && (cur
[1] == '?')) {
9462 * Second case : a CDSection
9464 /* 2.6.0 test was *cur not RAW */
9465 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9466 xmlParseCDSect(ctxt
);
9470 * Third case : a comment
9472 else if ((*cur
== '<') && (NXT(1) == '!') &&
9473 (NXT(2) == '-') && (NXT(3) == '-')) {
9474 xmlParseComment(ctxt
);
9475 ctxt
->instate
= XML_PARSER_CONTENT
;
9479 * Fourth case : a sub-element.
9481 else if (*cur
== '<') {
9482 xmlParseElement(ctxt
);
9486 * Fifth case : a reference. If if has not been resolved,
9487 * parsing returns it's Name, create the node
9490 else if (*cur
== '&') {
9491 xmlParseReference(ctxt
);
9495 * Last case, text. Note that References are handled directly.
9498 xmlParseCharData(ctxt
, 0);
9503 * Pop-up of finished entities.
9505 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
9509 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
9510 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9511 "detected an error in element content\n");
9512 ctxt
->instate
= XML_PARSER_EOF
;
9520 * @ctxt: an XML parser context
9522 * parse an XML element, this is highly recursive
9524 * [39] element ::= EmptyElemTag | STag content ETag
9526 * [ WFC: Element Type Match ]
9527 * The Name in an element's end-tag must match the element type in the
9533 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9534 const xmlChar
*name
;
9535 const xmlChar
*prefix
= NULL
;
9536 const xmlChar
*URI
= NULL
;
9537 xmlParserNodeInfo node_info
;
9540 int nsNr
= ctxt
->nsNr
;
9542 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
9543 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9544 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
9545 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9547 ctxt
->instate
= XML_PARSER_EOF
;
9551 /* Capture start position */
9552 if (ctxt
->record_info
) {
9553 node_info
.begin_pos
= ctxt
->input
->consumed
+
9554 (CUR_PTR
- ctxt
->input
->base
);
9555 node_info
.begin_line
= ctxt
->input
->line
;
9558 if (ctxt
->spaceNr
== 0)
9559 spacePush(ctxt
, -1);
9560 else if (*ctxt
->space
== -2)
9561 spacePush(ctxt
, -1);
9563 spacePush(ctxt
, *ctxt
->space
);
9565 line
= ctxt
->input
->line
;
9566 #ifdef LIBXML_SAX1_ENABLED
9568 #endif /* LIBXML_SAX1_ENABLED */
9569 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
9570 #ifdef LIBXML_SAX1_ENABLED
9572 name
= xmlParseStartTag(ctxt
);
9573 #endif /* LIBXML_SAX1_ENABLED */
9574 if (ctxt
->instate
== XML_PARSER_EOF
)
9580 namePush(ctxt
, name
);
9583 #ifdef LIBXML_VALID_ENABLED
9585 * [ VC: Root Element Type ]
9586 * The Name in the document type declaration must match the element
9587 * type of the root element.
9589 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
9590 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
9591 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
9592 #endif /* LIBXML_VALID_ENABLED */
9595 * Check for an Empty Element.
9597 if ((RAW
== '/') && (NXT(1) == '>')) {
9600 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9601 (!ctxt
->disableSAX
))
9602 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
9603 #ifdef LIBXML_SAX1_ENABLED
9605 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
9606 (!ctxt
->disableSAX
))
9607 ctxt
->sax
->endElement(ctxt
->userData
, name
);
9608 #endif /* LIBXML_SAX1_ENABLED */
9612 if (nsNr
!= ctxt
->nsNr
)
9613 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9614 if ( ret
!= NULL
&& ctxt
->record_info
) {
9615 node_info
.end_pos
= ctxt
->input
->consumed
+
9616 (CUR_PTR
- ctxt
->input
->base
);
9617 node_info
.end_line
= ctxt
->input
->line
;
9618 node_info
.node
= ret
;
9619 xmlParserAddNodeInfo(ctxt
, &node_info
);
9626 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
9627 "Couldn't find end of Start Tag %s line %d\n",
9631 * end of parsing of this node.
9636 if (nsNr
!= ctxt
->nsNr
)
9637 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9640 * Capture end position and add node
9642 if ( ret
!= NULL
&& ctxt
->record_info
) {
9643 node_info
.end_pos
= ctxt
->input
->consumed
+
9644 (CUR_PTR
- ctxt
->input
->base
);
9645 node_info
.end_line
= ctxt
->input
->line
;
9646 node_info
.node
= ret
;
9647 xmlParserAddNodeInfo(ctxt
, &node_info
);
9653 * Parse the content of the element:
9655 xmlParseContent(ctxt
);
9656 if (ctxt
->instate
== XML_PARSER_EOF
)
9658 if (!IS_BYTE_CHAR(RAW
)) {
9659 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9660 "Premature end of data in tag %s line %d\n",
9664 * end of parsing of this node.
9669 if (nsNr
!= ctxt
->nsNr
)
9670 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9675 * parse the end of tag: '</' should be here.
9678 xmlParseEndTag2(ctxt
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
, tlen
);
9681 #ifdef LIBXML_SAX1_ENABLED
9683 xmlParseEndTag1(ctxt
, line
);
9684 #endif /* LIBXML_SAX1_ENABLED */
9687 * Capture end position and add node
9689 if ( ret
!= NULL
&& ctxt
->record_info
) {
9690 node_info
.end_pos
= ctxt
->input
->consumed
+
9691 (CUR_PTR
- ctxt
->input
->base
);
9692 node_info
.end_line
= ctxt
->input
->line
;
9693 node_info
.node
= ret
;
9694 xmlParserAddNodeInfo(ctxt
, &node_info
);
9699 * xmlParseVersionNum:
9700 * @ctxt: an XML parser context
9702 * parse the XML version value.
9704 * [26] VersionNum ::= '1.' [0-9]+
9706 * In practice allow [0-9].[0-9]+ at that level
9708 * Returns the string giving the XML version number, or NULL
9711 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
9712 xmlChar
*buf
= NULL
;
9717 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9719 xmlErrMemory(ctxt
, NULL
);
9723 if (!((cur
>= '0') && (cur
<= '9'))) {
9737 while ((cur
>= '0') && (cur
<= '9')) {
9738 if (len
+ 1 >= size
) {
9742 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9745 xmlErrMemory(ctxt
, NULL
);
9759 * xmlParseVersionInfo:
9760 * @ctxt: an XML parser context
9762 * parse the XML version.
9764 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9766 * [25] Eq ::= S? '=' S?
9768 * Returns the version string, e.g. "1.0"
9772 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
9773 xmlChar
*version
= NULL
;
9775 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9779 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
9786 version
= xmlParseVersionNum(ctxt
);
9788 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9791 } else if (RAW
== '\''){
9793 version
= xmlParseVersionNum(ctxt
);
9795 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9799 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
9807 * @ctxt: an XML parser context
9809 * parse the XML encoding name
9811 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9813 * Returns the encoding name value or NULL
9816 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
9817 xmlChar
*buf
= NULL
;
9823 if (((cur
>= 'a') && (cur
<= 'z')) ||
9824 ((cur
>= 'A') && (cur
<= 'Z'))) {
9825 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9827 xmlErrMemory(ctxt
, NULL
);
9834 while (((cur
>= 'a') && (cur
<= 'z')) ||
9835 ((cur
>= 'A') && (cur
<= 'Z')) ||
9836 ((cur
>= '0') && (cur
<= '9')) ||
9837 (cur
== '.') || (cur
== '_') ||
9839 if (len
+ 1 >= size
) {
9843 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9845 xmlErrMemory(ctxt
, NULL
);
9862 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
9868 * xmlParseEncodingDecl:
9869 * @ctxt: an XML parser context
9871 * parse the XML encoding declaration
9873 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9875 * this setups the conversion filters.
9877 * Returns the encoding value or NULL
9881 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
9882 xmlChar
*encoding
= NULL
;
9885 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9889 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
9896 encoding
= xmlParseEncName(ctxt
);
9898 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9901 } else if (RAW
== '\''){
9903 encoding
= xmlParseEncName(ctxt
);
9905 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9909 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
9912 * UTF-16 encoding stwich has already taken place at this stage,
9913 * more over the little-endian/big-endian selection is already done
9915 if ((encoding
!= NULL
) &&
9916 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
9917 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
9919 * If no encoding was passed to the parser, that we are
9920 * using UTF-16 and no decoder is present i.e. the
9921 * document is apparently UTF-8 compatible, then raise an
9922 * encoding mismatch fatal error
9924 if ((ctxt
->encoding
== NULL
) &&
9925 (ctxt
->input
->buf
!= NULL
) &&
9926 (ctxt
->input
->buf
->encoder
== NULL
)) {
9927 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
9928 "Document labelled UTF-16 but has UTF-8 content\n");
9930 if (ctxt
->encoding
!= NULL
)
9931 xmlFree((xmlChar
*) ctxt
->encoding
);
9932 ctxt
->encoding
= encoding
;
9935 * UTF-8 encoding is handled natively
9937 else if ((encoding
!= NULL
) &&
9938 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
9939 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
9940 if (ctxt
->encoding
!= NULL
)
9941 xmlFree((xmlChar
*) ctxt
->encoding
);
9942 ctxt
->encoding
= encoding
;
9944 else if (encoding
!= NULL
) {
9945 xmlCharEncodingHandlerPtr handler
;
9947 if (ctxt
->input
->encoding
!= NULL
)
9948 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
9949 ctxt
->input
->encoding
= encoding
;
9951 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
9952 if (handler
!= NULL
) {
9953 xmlSwitchToEncoding(ctxt
, handler
);
9955 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
9956 "Unsupported encoding %s\n", encoding
);
9966 * @ctxt: an XML parser context
9968 * parse the XML standalone declaration
9970 * [32] SDDecl ::= S 'standalone' Eq
9971 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9973 * [ VC: Standalone Document Declaration ]
9974 * TODO The standalone document declaration must have the value "no"
9975 * if any external markup declarations contain declarations of:
9976 * - attributes with default values, if elements to which these
9977 * attributes apply appear in the document without specifications
9978 * of values for these attributes, or
9979 * - entities (other than amp, lt, gt, apos, quot), if references
9980 * to those entities appear in the document, or
9981 * - attributes with values subject to normalization, where the
9982 * attribute appears in the document with a value which will change
9983 * as a result of normalization, or
9984 * - element types with element content, if white space occurs directly
9985 * within any instance of those types.
9988 * 1 if standalone="yes"
9989 * 0 if standalone="no"
9990 * -2 if standalone attribute is missing or invalid
9991 * (A standalone value of -2 means that the XML declaration was found,
9992 * but no value was specified for the standalone attribute).
9996 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
9997 int standalone
= -2;
10000 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10004 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10005 return(standalone
);
10011 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10014 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10019 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10022 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10025 } else if (RAW
== '"'){
10027 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10030 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10035 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10038 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10042 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10045 return(standalone
);
10050 * @ctxt: an XML parser context
10052 * parse an XML declaration header
10054 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10058 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10062 * This value for standalone indicates that the document has an
10063 * XML declaration but it does not have a standalone attribute.
10064 * It will be overwritten later if a standalone attribute is found.
10066 ctxt
->input
->standalone
= -2;
10069 * We know that '<?xml' is here.
10073 if (!IS_BLANK_CH(RAW
)) {
10074 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10075 "Blank needed after '<?xml'\n");
10080 * We must have the VersionInfo here.
10082 version
= xmlParseVersionInfo(ctxt
);
10083 if (version
== NULL
) {
10084 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10086 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10088 * Changed here for XML-1.0 5th edition
10090 if (ctxt
->options
& XML_PARSE_OLD10
) {
10091 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10092 "Unsupported version '%s'\n",
10095 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10096 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10097 "Unsupported version '%s'\n",
10100 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10101 "Unsupported version '%s'\n",
10106 if (ctxt
->version
!= NULL
)
10107 xmlFree((void *) ctxt
->version
);
10108 ctxt
->version
= version
;
10112 * We may have the encoding declaration
10114 if (!IS_BLANK_CH(RAW
)) {
10115 if ((RAW
== '?') && (NXT(1) == '>')) {
10119 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10121 xmlParseEncodingDecl(ctxt
);
10122 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10124 * The XML REC instructs us to stop parsing right here
10130 * We may have the standalone status.
10132 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10133 if ((RAW
== '?') && (NXT(1) == '>')) {
10137 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10141 * We can grow the input buffer freely at that point
10146 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10149 if ((RAW
== '?') && (NXT(1) == '>')) {
10151 } else if (RAW
== '>') {
10152 /* Deprecated old WD ... */
10153 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10156 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10157 MOVETO_ENDTAG(CUR_PTR
);
10164 * @ctxt: an XML parser context
10166 * parse an XML Misc* optional field.
10168 * [27] Misc ::= Comment | PI | S
10172 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10173 while ((ctxt
->instate
!= XML_PARSER_EOF
) &&
10174 (((RAW
== '<') && (NXT(1) == '?')) ||
10175 (CMP4(CUR_PTR
, '<', '!', '-', '-')) ||
10176 IS_BLANK_CH(CUR
))) {
10177 if ((RAW
== '<') && (NXT(1) == '?')) {
10179 } else if (IS_BLANK_CH(CUR
)) {
10182 xmlParseComment(ctxt
);
10187 * xmlParseDocument:
10188 * @ctxt: an XML parser context
10190 * parse an XML document (and build a tree if using the standard SAX
10193 * [1] document ::= prolog element Misc*
10195 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10197 * Returns 0, -1 in case of error. the parser context is augmented
10198 * as a result of the parsing.
10202 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10204 xmlCharEncoding enc
;
10208 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10214 * SAX: detecting the level.
10216 xmlDetectSAX2(ctxt
);
10219 * SAX: beginning of the document processing.
10221 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10222 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10223 if (ctxt
->instate
== XML_PARSER_EOF
)
10226 if ((ctxt
->encoding
== (const xmlChar
*)XML_CHAR_ENCODING_NONE
) &&
10227 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10229 * Get the 4 first bytes and decode the charset
10230 * if enc != XML_CHAR_ENCODING_NONE
10231 * plug some encoding conversion routines.
10237 enc
= xmlDetectCharEncoding(&start
[0], 4);
10238 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10239 xmlSwitchEncoding(ctxt
, enc
);
10245 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10249 * Check for the XMLDecl in the Prolog.
10250 * do not GROW here to avoid the detected encoder to decode more
10251 * than just the first line, unless the amount of data is really
10252 * too small to hold "<?xml version="1.0" encoding="foo"
10254 if ((ctxt
->input
->end
- ctxt
->input
->cur
) < 35) {
10257 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10260 * Note that we will switch encoding on the fly.
10262 xmlParseXMLDecl(ctxt
);
10263 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10265 * The XML REC instructs us to stop parsing right here
10269 ctxt
->standalone
= ctxt
->input
->standalone
;
10272 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10274 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10275 ctxt
->sax
->startDocument(ctxt
->userData
);
10276 if (ctxt
->instate
== XML_PARSER_EOF
)
10280 * The Misc part of the Prolog
10283 xmlParseMisc(ctxt
);
10286 * Then possibly doc type declaration(s) and more Misc
10287 * (doctypedecl Misc*)?
10290 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10292 ctxt
->inSubset
= 1;
10293 xmlParseDocTypeDecl(ctxt
);
10295 ctxt
->instate
= XML_PARSER_DTD
;
10296 xmlParseInternalSubset(ctxt
);
10297 if (ctxt
->instate
== XML_PARSER_EOF
)
10302 * Create and update the external subset.
10304 ctxt
->inSubset
= 2;
10305 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10306 (!ctxt
->disableSAX
))
10307 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10308 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10309 if (ctxt
->instate
== XML_PARSER_EOF
)
10311 ctxt
->inSubset
= 0;
10313 xmlCleanSpecialAttr(ctxt
);
10315 ctxt
->instate
= XML_PARSER_PROLOG
;
10316 xmlParseMisc(ctxt
);
10320 * Time to start parsing the tree itself
10324 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10325 "Start tag expected, '<' not found\n");
10327 ctxt
->instate
= XML_PARSER_CONTENT
;
10328 xmlParseElement(ctxt
);
10329 ctxt
->instate
= XML_PARSER_EPILOG
;
10333 * The Misc part at the end
10335 xmlParseMisc(ctxt
);
10338 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10340 ctxt
->instate
= XML_PARSER_EOF
;
10344 * SAX: end of the document processing.
10346 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10347 ctxt
->sax
->endDocument(ctxt
->userData
);
10350 * Remove locally kept entity definitions if the tree was not built
10352 if ((ctxt
->myDoc
!= NULL
) &&
10353 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10354 xmlFreeDoc(ctxt
->myDoc
);
10355 ctxt
->myDoc
= NULL
;
10358 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10359 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10361 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10362 if (ctxt
->nsWellFormed
)
10363 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10364 if (ctxt
->options
& XML_PARSE_OLD10
)
10365 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10367 if (! ctxt
->wellFormed
) {
10375 * xmlParseExtParsedEnt:
10376 * @ctxt: an XML parser context
10378 * parse a general parsed entity
10379 * An external general parsed entity is well-formed if it matches the
10380 * production labeled extParsedEnt.
10382 * [78] extParsedEnt ::= TextDecl? content
10384 * Returns 0, -1 in case of error. the parser context is augmented
10385 * as a result of the parsing.
10389 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10391 xmlCharEncoding enc
;
10393 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10396 xmlDefaultSAXHandlerInit();
10398 xmlDetectSAX2(ctxt
);
10403 * SAX: beginning of the document processing.
10405 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10406 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10409 * Get the 4 first bytes and decode the charset
10410 * if enc != XML_CHAR_ENCODING_NONE
10411 * plug some encoding conversion routines.
10413 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10418 enc
= xmlDetectCharEncoding(start
, 4);
10419 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10420 xmlSwitchEncoding(ctxt
, enc
);
10426 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10430 * Check for the XMLDecl in the Prolog.
10433 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10436 * Note that we will switch encoding on the fly.
10438 xmlParseXMLDecl(ctxt
);
10439 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10441 * The XML REC instructs us to stop parsing right here
10447 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10449 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10450 ctxt
->sax
->startDocument(ctxt
->userData
);
10451 if (ctxt
->instate
== XML_PARSER_EOF
)
10455 * Doing validity checking on chunk doesn't make sense
10457 ctxt
->instate
= XML_PARSER_CONTENT
;
10458 ctxt
->validate
= 0;
10459 ctxt
->loadsubset
= 0;
10462 xmlParseContent(ctxt
);
10463 if (ctxt
->instate
== XML_PARSER_EOF
)
10466 if ((RAW
== '<') && (NXT(1) == '/')) {
10467 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10468 } else if (RAW
!= 0) {
10469 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10473 * SAX: end of the document processing.
10475 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10476 ctxt
->sax
->endDocument(ctxt
->userData
);
10478 if (! ctxt
->wellFormed
) return(-1);
10482 #ifdef LIBXML_PUSH_ENABLED
10483 /************************************************************************
10485 * Progressive parsing interfaces *
10487 ************************************************************************/
10490 * xmlParseLookupSequence:
10491 * @ctxt: an XML parser context
10492 * @first: the first char to lookup
10493 * @next: the next char to lookup or zero
10494 * @third: the next char to lookup or zero
10496 * Try to find if a sequence (first, next, third) or just (first next) or
10497 * (first) is available in the input stream.
10498 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499 * to avoid rescanning sequences of bytes, it DOES change the state of the
10500 * parser, do not use liberally.
10502 * Returns the index to the current parsing point if the full sequence
10503 * is available, -1 otherwise.
10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
10507 xmlChar next
, xmlChar third
) {
10509 xmlParserInputPtr in
;
10510 const xmlChar
*buf
;
10513 if (in
== NULL
) return(-1);
10514 base
= in
->cur
- in
->base
;
10515 if (base
< 0) return(-1);
10516 if (ctxt
->checkIndex
> base
)
10517 base
= ctxt
->checkIndex
;
10518 if (in
->buf
== NULL
) {
10522 buf
= in
->buf
->buffer
->content
;
10523 len
= in
->buf
->buffer
->use
;
10525 /* take into account the sequence length */
10526 if (third
) len
-= 2;
10527 else if (next
) len
--;
10528 for (;base
< len
;base
++) {
10529 if (buf
[base
] == first
) {
10531 if ((buf
[base
+ 1] != next
) ||
10532 (buf
[base
+ 2] != third
)) continue;
10533 } else if (next
!= 0) {
10534 if (buf
[base
+ 1] != next
) continue;
10536 ctxt
->checkIndex
= 0;
10539 xmlGenericError(xmlGenericErrorContext
,
10540 "PP: lookup '%c' found at %d\n",
10542 else if (third
== 0)
10543 xmlGenericError(xmlGenericErrorContext
,
10544 "PP: lookup '%c%c' found at %d\n",
10545 first
, next
, base
);
10547 xmlGenericError(xmlGenericErrorContext
,
10548 "PP: lookup '%c%c%c' found at %d\n",
10549 first
, next
, third
, base
);
10551 return(base
- (in
->cur
- in
->base
));
10554 ctxt
->checkIndex
= base
;
10557 xmlGenericError(xmlGenericErrorContext
,
10558 "PP: lookup '%c' failed\n", first
);
10559 else if (third
== 0)
10560 xmlGenericError(xmlGenericErrorContext
,
10561 "PP: lookup '%c%c' failed\n", first
, next
);
10563 xmlGenericError(xmlGenericErrorContext
,
10564 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
10570 * xmlParseGetLasts:
10571 * @ctxt: an XML parser context
10572 * @lastlt: pointer to store the last '<' from the input
10573 * @lastgt: pointer to store the last '>' from the input
10575 * Lookup the last < and > in the current chunk
10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt
, const xmlChar
**lastlt
,
10579 const xmlChar
**lastgt
) {
10580 const xmlChar
*tmp
;
10582 if ((ctxt
== NULL
) || (lastlt
== NULL
) || (lastgt
== NULL
)) {
10583 xmlGenericError(xmlGenericErrorContext
,
10584 "Internal error: xmlParseGetLasts\n");
10587 if ((ctxt
->progressive
!= 0) && (ctxt
->inputNr
== 1)) {
10588 tmp
= ctxt
->input
->end
;
10590 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '<')) tmp
--;
10591 if (tmp
< ctxt
->input
->base
) {
10597 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '>')) {
10598 if (*tmp
== '\'') {
10600 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '\'')) tmp
++;
10601 if (tmp
< ctxt
->input
->end
) tmp
++;
10602 } else if (*tmp
== '"') {
10604 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '"')) tmp
++;
10605 if (tmp
< ctxt
->input
->end
) tmp
++;
10609 if (tmp
< ctxt
->input
->end
)
10614 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '>')) tmp
--;
10615 if (tmp
>= ctxt
->input
->base
)
10627 * xmlCheckCdataPush:
10628 * @cur: pointer to the bock of characters
10629 * @len: length of the block in bytes
10631 * Check that the block of characters is okay as SCdata content [20]
10633 * Returns the number of bytes to pass if okay, a negative index where an
10634 * UTF-8 error occured otherwise
10637 xmlCheckCdataPush(const xmlChar
*utf
, int len
) {
10642 if ((utf
== NULL
) || (len
<= 0))
10645 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
10647 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10650 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
10654 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655 if (ix
+ 2 > len
) return(ix
);
10656 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
10658 codepoint
= (utf
[ix
] & 0x1f) << 6;
10659 codepoint
|= utf
[ix
+1] & 0x3f;
10660 if (!xmlIsCharQ(codepoint
))
10663 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664 if (ix
+ 3 > len
) return(ix
);
10665 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
10666 ((utf
[ix
+2] & 0xc0) != 0x80))
10668 codepoint
= (utf
[ix
] & 0xf) << 12;
10669 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
10670 codepoint
|= utf
[ix
+2] & 0x3f;
10671 if (!xmlIsCharQ(codepoint
))
10674 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675 if (ix
+ 4 > len
) return(ix
);
10676 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
10677 ((utf
[ix
+2] & 0xc0) != 0x80) ||
10678 ((utf
[ix
+3] & 0xc0) != 0x80))
10680 codepoint
= (utf
[ix
] & 0x7) << 18;
10681 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
10682 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
10683 codepoint
|= utf
[ix
+3] & 0x3f;
10684 if (!xmlIsCharQ(codepoint
))
10687 } else /* unknown encoding */
10694 * xmlParseTryOrFinish:
10695 * @ctxt: an XML parser context
10696 * @terminate: last chunk indicator
10698 * Try to progress on parsing
10700 * Returns zero if no parsing was possible
10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
10707 const xmlChar
*lastlt
, *lastgt
;
10709 if (ctxt
->input
== NULL
)
10713 switch (ctxt
->instate
) {
10714 case XML_PARSER_EOF
:
10715 xmlGenericError(xmlGenericErrorContext
,
10716 "PP: try EOF\n"); break;
10717 case XML_PARSER_START
:
10718 xmlGenericError(xmlGenericErrorContext
,
10719 "PP: try START\n"); break;
10720 case XML_PARSER_MISC
:
10721 xmlGenericError(xmlGenericErrorContext
,
10722 "PP: try MISC\n");break;
10723 case XML_PARSER_COMMENT
:
10724 xmlGenericError(xmlGenericErrorContext
,
10725 "PP: try COMMENT\n");break;
10726 case XML_PARSER_PROLOG
:
10727 xmlGenericError(xmlGenericErrorContext
,
10728 "PP: try PROLOG\n");break;
10729 case XML_PARSER_START_TAG
:
10730 xmlGenericError(xmlGenericErrorContext
,
10731 "PP: try START_TAG\n");break;
10732 case XML_PARSER_CONTENT
:
10733 xmlGenericError(xmlGenericErrorContext
,
10734 "PP: try CONTENT\n");break;
10735 case XML_PARSER_CDATA_SECTION
:
10736 xmlGenericError(xmlGenericErrorContext
,
10737 "PP: try CDATA_SECTION\n");break;
10738 case XML_PARSER_END_TAG
:
10739 xmlGenericError(xmlGenericErrorContext
,
10740 "PP: try END_TAG\n");break;
10741 case XML_PARSER_ENTITY_DECL
:
10742 xmlGenericError(xmlGenericErrorContext
,
10743 "PP: try ENTITY_DECL\n");break;
10744 case XML_PARSER_ENTITY_VALUE
:
10745 xmlGenericError(xmlGenericErrorContext
,
10746 "PP: try ENTITY_VALUE\n");break;
10747 case XML_PARSER_ATTRIBUTE_VALUE
:
10748 xmlGenericError(xmlGenericErrorContext
,
10749 "PP: try ATTRIBUTE_VALUE\n");break;
10750 case XML_PARSER_DTD
:
10751 xmlGenericError(xmlGenericErrorContext
,
10752 "PP: try DTD\n");break;
10753 case XML_PARSER_EPILOG
:
10754 xmlGenericError(xmlGenericErrorContext
,
10755 "PP: try EPILOG\n");break;
10756 case XML_PARSER_PI
:
10757 xmlGenericError(xmlGenericErrorContext
,
10758 "PP: try PI\n");break;
10759 case XML_PARSER_IGNORE
:
10760 xmlGenericError(xmlGenericErrorContext
,
10761 "PP: try IGNORE\n");break;
10765 if ((ctxt
->input
!= NULL
) &&
10766 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
10768 ctxt
->checkIndex
= 0;
10770 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
10772 while (ctxt
->instate
!= XML_PARSER_EOF
) {
10773 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
10778 * Pop-up of finished entities.
10780 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
10783 if (ctxt
->input
== NULL
) break;
10784 if (ctxt
->input
->buf
== NULL
)
10785 avail
= ctxt
->input
->length
-
10786 (ctxt
->input
->cur
- ctxt
->input
->base
);
10789 * If we are operating on converted input, try to flush
10790 * remainng chars to avoid them stalling in the non-converted
10793 if ((ctxt
->input
->buf
->raw
!= NULL
) &&
10794 (ctxt
->input
->buf
->raw
->use
> 0)) {
10795 int base
= ctxt
->input
->base
-
10796 ctxt
->input
->buf
->buffer
->content
;
10797 int current
= ctxt
->input
->cur
- ctxt
->input
->base
;
10799 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
10800 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
10801 ctxt
->input
->cur
= ctxt
->input
->base
+ current
;
10803 &ctxt
->input
->buf
->buffer
->content
[
10804 ctxt
->input
->buf
->buffer
->use
];
10806 avail
= ctxt
->input
->buf
->buffer
->use
-
10807 (ctxt
->input
->cur
- ctxt
->input
->base
);
10811 switch (ctxt
->instate
) {
10812 case XML_PARSER_EOF
:
10814 * Document parsing is done !
10817 case XML_PARSER_START
:
10818 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
10820 xmlCharEncoding enc
;
10823 * Very first chars read from the document flow.
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
10831 * plug some encoding conversion routines,
10832 * else xmlSwitchEncoding will set to (default)
10839 enc
= xmlDetectCharEncoding(start
, 4);
10840 xmlSwitchEncoding(ctxt
, enc
);
10846 cur
= ctxt
->input
->cur
[0];
10847 next
= ctxt
->input
->cur
[1];
10849 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10850 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10851 &xmlDefaultSAXLocator
);
10852 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10853 ctxt
->instate
= XML_PARSER_EOF
;
10855 xmlGenericError(xmlGenericErrorContext
,
10856 "PP: entering EOF\n");
10858 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10859 ctxt
->sax
->endDocument(ctxt
->userData
);
10862 if ((cur
== '<') && (next
== '?')) {
10863 /* PI or XML decl */
10864 if (avail
< 5) return(ret
);
10865 if ((!terminate
) &&
10866 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
10868 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10869 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10870 &xmlDefaultSAXLocator
);
10871 if ((ctxt
->input
->cur
[2] == 'x') &&
10872 (ctxt
->input
->cur
[3] == 'm') &&
10873 (ctxt
->input
->cur
[4] == 'l') &&
10874 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
10877 xmlGenericError(xmlGenericErrorContext
,
10878 "PP: Parsing XML Decl\n");
10880 xmlParseXMLDecl(ctxt
);
10881 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10883 * The XML REC instructs us to stop parsing right
10886 ctxt
->instate
= XML_PARSER_EOF
;
10889 ctxt
->standalone
= ctxt
->input
->standalone
;
10890 if ((ctxt
->encoding
== NULL
) &&
10891 (ctxt
->input
->encoding
!= NULL
))
10892 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
10893 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10894 (!ctxt
->disableSAX
))
10895 ctxt
->sax
->startDocument(ctxt
->userData
);
10896 ctxt
->instate
= XML_PARSER_MISC
;
10898 xmlGenericError(xmlGenericErrorContext
,
10899 "PP: entering MISC\n");
10902 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10903 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10904 (!ctxt
->disableSAX
))
10905 ctxt
->sax
->startDocument(ctxt
->userData
);
10906 ctxt
->instate
= XML_PARSER_MISC
;
10908 xmlGenericError(xmlGenericErrorContext
,
10909 "PP: entering MISC\n");
10913 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10914 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10915 &xmlDefaultSAXLocator
);
10916 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10917 if (ctxt
->version
== NULL
) {
10918 xmlErrMemory(ctxt
, NULL
);
10921 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10922 (!ctxt
->disableSAX
))
10923 ctxt
->sax
->startDocument(ctxt
->userData
);
10924 ctxt
->instate
= XML_PARSER_MISC
;
10926 xmlGenericError(xmlGenericErrorContext
,
10927 "PP: entering MISC\n");
10931 case XML_PARSER_START_TAG
: {
10932 const xmlChar
*name
;
10933 const xmlChar
*prefix
= NULL
;
10934 const xmlChar
*URI
= NULL
;
10935 int nsNr
= ctxt
->nsNr
;
10937 if ((avail
< 2) && (ctxt
->inputNr
== 1))
10939 cur
= ctxt
->input
->cur
[0];
10941 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10942 ctxt
->instate
= XML_PARSER_EOF
;
10943 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10944 ctxt
->sax
->endDocument(ctxt
->userData
);
10948 if (ctxt
->progressive
) {
10949 /* > can be found unescaped in attribute values */
10950 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
10952 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
10956 if (ctxt
->spaceNr
== 0)
10957 spacePush(ctxt
, -1);
10958 else if (*ctxt
->space
== -2)
10959 spacePush(ctxt
, -1);
10961 spacePush(ctxt
, *ctxt
->space
);
10962 #ifdef LIBXML_SAX1_ENABLED
10964 #endif /* LIBXML_SAX1_ENABLED */
10965 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10966 #ifdef LIBXML_SAX1_ENABLED
10968 name
= xmlParseStartTag(ctxt
);
10969 #endif /* LIBXML_SAX1_ENABLED */
10970 if (ctxt
->instate
== XML_PARSER_EOF
)
10972 if (name
== NULL
) {
10974 ctxt
->instate
= XML_PARSER_EOF
;
10975 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10976 ctxt
->sax
->endDocument(ctxt
->userData
);
10979 #ifdef LIBXML_VALID_ENABLED
10981 * [ VC: Root Element Type ]
10982 * The Name in the document type declaration must match
10983 * the element type of the root element.
10985 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10986 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10987 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10988 #endif /* LIBXML_VALID_ENABLED */
10991 * Check for an Empty Element.
10993 if ((RAW
== '/') && (NXT(1) == '>')) {
10997 if ((ctxt
->sax
!= NULL
) &&
10998 (ctxt
->sax
->endElementNs
!= NULL
) &&
10999 (!ctxt
->disableSAX
))
11000 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11002 if (ctxt
->nsNr
- nsNr
> 0)
11003 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11004 #ifdef LIBXML_SAX1_ENABLED
11006 if ((ctxt
->sax
!= NULL
) &&
11007 (ctxt
->sax
->endElement
!= NULL
) &&
11008 (!ctxt
->disableSAX
))
11009 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11010 #endif /* LIBXML_SAX1_ENABLED */
11012 if (ctxt
->instate
== XML_PARSER_EOF
)
11015 if (ctxt
->nameNr
== 0) {
11016 ctxt
->instate
= XML_PARSER_EPILOG
;
11018 ctxt
->instate
= XML_PARSER_CONTENT
;
11025 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11026 "Couldn't find end of Start Tag %s\n",
11032 nameNsPush(ctxt
, name
, prefix
, URI
, ctxt
->nsNr
- nsNr
);
11033 #ifdef LIBXML_SAX1_ENABLED
11035 namePush(ctxt
, name
);
11036 #endif /* LIBXML_SAX1_ENABLED */
11038 ctxt
->instate
= XML_PARSER_CONTENT
;
11041 case XML_PARSER_CONTENT
: {
11042 const xmlChar
*test
;
11044 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11046 cur
= ctxt
->input
->cur
[0];
11047 next
= ctxt
->input
->cur
[1];
11050 cons
= ctxt
->input
->consumed
;
11051 if ((cur
== '<') && (next
== '/')) {
11052 ctxt
->instate
= XML_PARSER_END_TAG
;
11054 } else if ((cur
== '<') && (next
== '?')) {
11055 if ((!terminate
) &&
11056 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11059 } else if ((cur
== '<') && (next
!= '!')) {
11060 ctxt
->instate
= XML_PARSER_START_TAG
;
11062 } else if ((cur
== '<') && (next
== '!') &&
11063 (ctxt
->input
->cur
[2] == '-') &&
11064 (ctxt
->input
->cur
[3] == '-')) {
11069 ctxt
->input
->cur
+= 4;
11070 term
= xmlParseLookupSequence(ctxt
, '-', '-', '>');
11071 ctxt
->input
->cur
-= 4;
11072 if ((!terminate
) && (term
< 0))
11074 xmlParseComment(ctxt
);
11075 ctxt
->instate
= XML_PARSER_CONTENT
;
11076 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11077 (ctxt
->input
->cur
[2] == '[') &&
11078 (ctxt
->input
->cur
[3] == 'C') &&
11079 (ctxt
->input
->cur
[4] == 'D') &&
11080 (ctxt
->input
->cur
[5] == 'A') &&
11081 (ctxt
->input
->cur
[6] == 'T') &&
11082 (ctxt
->input
->cur
[7] == 'A') &&
11083 (ctxt
->input
->cur
[8] == '[')) {
11085 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11087 } else if ((cur
== '<') && (next
== '!') &&
11090 } else if (cur
== '&') {
11091 if ((!terminate
) &&
11092 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
11094 xmlParseReference(ctxt
);
11096 /* TODO Avoid the extra copy, handle directly !!! */
11098 * Goal of the following test is:
11099 * - minimize calls to the SAX 'character' callback
11100 * when they are mergeable
11101 * - handle an problem for isBlank when we only parse
11102 * a sequence of blank chars and the next one is
11103 * not available to check against '<' presence.
11104 * - tries to homogenize the differences in SAX
11105 * callbacks between the push and pull versions
11108 if ((ctxt
->inputNr
== 1) &&
11109 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11111 if (ctxt
->progressive
) {
11112 if ((lastlt
== NULL
) ||
11113 (ctxt
->input
->cur
> lastlt
))
11115 } else if (xmlParseLookupSequence(ctxt
,
11121 ctxt
->checkIndex
= 0;
11122 xmlParseCharData(ctxt
, 0);
11125 * Pop-up of finished entities.
11127 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
11129 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
11130 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11131 "detected an error in element content\n");
11132 ctxt
->instate
= XML_PARSER_EOF
;
11137 case XML_PARSER_END_TAG
:
11141 if (ctxt
->progressive
) {
11142 /* > can be found unescaped in attribute values */
11143 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11145 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11150 xmlParseEndTag2(ctxt
,
11151 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 3],
11152 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 2], 0,
11153 (int) (long) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 1], 0);
11156 #ifdef LIBXML_SAX1_ENABLED
11158 xmlParseEndTag1(ctxt
, 0);
11159 #endif /* LIBXML_SAX1_ENABLED */
11160 if (ctxt
->instate
== XML_PARSER_EOF
) {
11162 } else if (ctxt
->nameNr
== 0) {
11163 ctxt
->instate
= XML_PARSER_EPILOG
;
11165 ctxt
->instate
= XML_PARSER_CONTENT
;
11168 case XML_PARSER_CDATA_SECTION
: {
11170 * The Push mode need to have the SAX callback for
11171 * cdataBlock merge back contiguous callbacks.
11175 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
11177 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
11180 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
,
11181 XML_PARSER_BIG_BUFFER_SIZE
);
11184 ctxt
->input
->cur
+= tmp
;
11185 goto encoding_error
;
11187 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11188 if (ctxt
->sax
->cdataBlock
!= NULL
)
11189 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11190 ctxt
->input
->cur
, tmp
);
11191 else if (ctxt
->sax
->characters
!= NULL
)
11192 ctxt
->sax
->characters(ctxt
->userData
,
11193 ctxt
->input
->cur
, tmp
);
11195 if (ctxt
->instate
== XML_PARSER_EOF
)
11198 ctxt
->checkIndex
= 0;
11204 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
);
11205 if ((tmp
< 0) || (tmp
!= base
)) {
11207 ctxt
->input
->cur
+= tmp
;
11208 goto encoding_error
;
11210 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11211 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11212 (!ctxt
->disableSAX
)) {
11214 * Special case to provide identical behaviour
11215 * between pull and push parsers on enpty CDATA
11218 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11219 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11221 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11223 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11224 (!ctxt
->disableSAX
)) {
11225 if (ctxt
->sax
->cdataBlock
!= NULL
)
11226 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11227 ctxt
->input
->cur
, base
);
11228 else if (ctxt
->sax
->characters
!= NULL
)
11229 ctxt
->sax
->characters(ctxt
->userData
,
11230 ctxt
->input
->cur
, base
);
11232 if (ctxt
->instate
== XML_PARSER_EOF
)
11235 ctxt
->checkIndex
= 0;
11236 ctxt
->instate
= XML_PARSER_CONTENT
;
11238 xmlGenericError(xmlGenericErrorContext
,
11239 "PP: entering CONTENT\n");
11244 case XML_PARSER_MISC
:
11246 if (ctxt
->input
->buf
== NULL
)
11247 avail
= ctxt
->input
->length
-
11248 (ctxt
->input
->cur
- ctxt
->input
->base
);
11250 avail
= ctxt
->input
->buf
->buffer
->use
-
11251 (ctxt
->input
->cur
- ctxt
->input
->base
);
11254 cur
= ctxt
->input
->cur
[0];
11255 next
= ctxt
->input
->cur
[1];
11256 if ((cur
== '<') && (next
== '?')) {
11257 if ((!terminate
) &&
11258 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11261 xmlGenericError(xmlGenericErrorContext
,
11262 "PP: Parsing PI\n");
11265 if (ctxt
->instate
== XML_PARSER_EOF
)
11267 ctxt
->checkIndex
= 0;
11268 } else if ((cur
== '<') && (next
== '!') &&
11269 (ctxt
->input
->cur
[2] == '-') &&
11270 (ctxt
->input
->cur
[3] == '-')) {
11271 if ((!terminate
) &&
11272 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11275 xmlGenericError(xmlGenericErrorContext
,
11276 "PP: Parsing Comment\n");
11278 xmlParseComment(ctxt
);
11279 if (ctxt
->instate
== XML_PARSER_EOF
)
11281 ctxt
->instate
= XML_PARSER_MISC
;
11282 ctxt
->checkIndex
= 0;
11283 } else if ((cur
== '<') && (next
== '!') &&
11284 (ctxt
->input
->cur
[2] == 'D') &&
11285 (ctxt
->input
->cur
[3] == 'O') &&
11286 (ctxt
->input
->cur
[4] == 'C') &&
11287 (ctxt
->input
->cur
[5] == 'T') &&
11288 (ctxt
->input
->cur
[6] == 'Y') &&
11289 (ctxt
->input
->cur
[7] == 'P') &&
11290 (ctxt
->input
->cur
[8] == 'E')) {
11291 if ((!terminate
) &&
11292 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0))
11295 xmlGenericError(xmlGenericErrorContext
,
11296 "PP: Parsing internal subset\n");
11298 ctxt
->inSubset
= 1;
11299 xmlParseDocTypeDecl(ctxt
);
11300 if (ctxt
->instate
== XML_PARSER_EOF
)
11303 ctxt
->instate
= XML_PARSER_DTD
;
11305 xmlGenericError(xmlGenericErrorContext
,
11306 "PP: entering DTD\n");
11310 * Create and update the external subset.
11312 ctxt
->inSubset
= 2;
11313 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11314 (ctxt
->sax
->externalSubset
!= NULL
))
11315 ctxt
->sax
->externalSubset(ctxt
->userData
,
11316 ctxt
->intSubName
, ctxt
->extSubSystem
,
11318 ctxt
->inSubset
= 0;
11319 xmlCleanSpecialAttr(ctxt
);
11320 ctxt
->instate
= XML_PARSER_PROLOG
;
11322 xmlGenericError(xmlGenericErrorContext
,
11323 "PP: entering PROLOG\n");
11326 } else if ((cur
== '<') && (next
== '!') &&
11330 ctxt
->instate
= XML_PARSER_START_TAG
;
11331 ctxt
->progressive
= 1;
11332 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11334 xmlGenericError(xmlGenericErrorContext
,
11335 "PP: entering START_TAG\n");
11339 case XML_PARSER_PROLOG
:
11341 if (ctxt
->input
->buf
== NULL
)
11342 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11344 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11347 cur
= ctxt
->input
->cur
[0];
11348 next
= ctxt
->input
->cur
[1];
11349 if ((cur
== '<') && (next
== '?')) {
11350 if ((!terminate
) &&
11351 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11354 xmlGenericError(xmlGenericErrorContext
,
11355 "PP: Parsing PI\n");
11358 if (ctxt
->instate
== XML_PARSER_EOF
)
11360 } else if ((cur
== '<') && (next
== '!') &&
11361 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11362 if ((!terminate
) &&
11363 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11366 xmlGenericError(xmlGenericErrorContext
,
11367 "PP: Parsing Comment\n");
11369 xmlParseComment(ctxt
);
11370 if (ctxt
->instate
== XML_PARSER_EOF
)
11372 ctxt
->instate
= XML_PARSER_PROLOG
;
11373 } else if ((cur
== '<') && (next
== '!') &&
11377 ctxt
->instate
= XML_PARSER_START_TAG
;
11378 if (ctxt
->progressive
== 0)
11379 ctxt
->progressive
= 1;
11380 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11382 xmlGenericError(xmlGenericErrorContext
,
11383 "PP: entering START_TAG\n");
11387 case XML_PARSER_EPILOG
:
11389 if (ctxt
->input
->buf
== NULL
)
11390 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11392 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11395 cur
= ctxt
->input
->cur
[0];
11396 next
= ctxt
->input
->cur
[1];
11397 if ((cur
== '<') && (next
== '?')) {
11398 if ((!terminate
) &&
11399 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11402 xmlGenericError(xmlGenericErrorContext
,
11403 "PP: Parsing PI\n");
11406 if (ctxt
->instate
== XML_PARSER_EOF
)
11408 ctxt
->instate
= XML_PARSER_EPILOG
;
11409 } else if ((cur
== '<') && (next
== '!') &&
11410 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11411 if ((!terminate
) &&
11412 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11415 xmlGenericError(xmlGenericErrorContext
,
11416 "PP: Parsing Comment\n");
11418 xmlParseComment(ctxt
);
11419 if (ctxt
->instate
== XML_PARSER_EOF
)
11421 ctxt
->instate
= XML_PARSER_EPILOG
;
11422 } else if ((cur
== '<') && (next
== '!') &&
11426 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11427 ctxt
->instate
= XML_PARSER_EOF
;
11429 xmlGenericError(xmlGenericErrorContext
,
11430 "PP: entering EOF\n");
11432 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11433 ctxt
->sax
->endDocument(ctxt
->userData
);
11437 case XML_PARSER_DTD
: {
11439 * Sorry but progressive parsing of the internal subset
11440 * is not expected to be supported. We first check that
11441 * the full content of the internal subset is available and
11442 * the parsing is launched only at that point.
11443 * Internal subset ends up with "']' S? '>'" in an unescaped
11444 * section and not in a ']]>' sequence which are conditional
11445 * sections (whoever argued to keep that crap in XML deserve
11446 * a place in hell !).
11452 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
11453 if (base
< 0) return(0);
11454 if (ctxt
->checkIndex
> base
)
11455 base
= ctxt
->checkIndex
;
11456 buf
= ctxt
->input
->buf
->buffer
->content
;
11457 for (;(unsigned int) base
< ctxt
->input
->buf
->buffer
->use
;
11460 if (buf
[base
] == quote
)
11464 if ((quote
== 0) && (buf
[base
] == '<')) {
11466 /* special handling of comments */
11467 if (((unsigned int) base
+ 4 <
11468 ctxt
->input
->buf
->buffer
->use
) &&
11469 (buf
[base
+ 1] == '!') &&
11470 (buf
[base
+ 2] == '-') &&
11471 (buf
[base
+ 3] == '-')) {
11472 for (;(unsigned int) base
+ 3 <
11473 ctxt
->input
->buf
->buffer
->use
; base
++) {
11474 if ((buf
[base
] == '-') &&
11475 (buf
[base
+ 1] == '-') &&
11476 (buf
[base
+ 2] == '>')) {
11484 fprintf(stderr
, "unfinished comment\n");
11491 if (buf
[base
] == '"') {
11495 if (buf
[base
] == '\'') {
11499 if (buf
[base
] == ']') {
11501 fprintf(stderr
, "%c%c%c%c: ", buf
[base
],
11502 buf
[base
+ 1], buf
[base
+ 2], buf
[base
+ 3]);
11504 if ((unsigned int) base
+1 >=
11505 ctxt
->input
->buf
->buffer
->use
)
11507 if (buf
[base
+ 1] == ']') {
11508 /* conditional crap, skip both ']' ! */
11513 (unsigned int) base
+ i
< ctxt
->input
->buf
->buffer
->use
;
11515 if (buf
[base
+ i
] == '>') {
11517 fprintf(stderr
, "found\n");
11519 goto found_end_int_subset
;
11521 if (!IS_BLANK_CH(buf
[base
+ i
])) {
11523 fprintf(stderr
, "not found\n");
11525 goto not_end_of_int_subset
;
11529 fprintf(stderr
, "end of stream\n");
11534 not_end_of_int_subset
:
11535 continue; /* for */
11538 * We didn't found the end of the Internal subset
11542 xmlGenericError(xmlGenericErrorContext
,
11543 "PP: lookup of int subset end filed\n");
11547 found_end_int_subset
:
11548 xmlParseInternalSubset(ctxt
);
11549 if (ctxt
->instate
== XML_PARSER_EOF
)
11551 ctxt
->inSubset
= 2;
11552 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11553 (ctxt
->sax
->externalSubset
!= NULL
))
11554 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
11555 ctxt
->extSubSystem
, ctxt
->extSubURI
);
11556 ctxt
->inSubset
= 0;
11557 xmlCleanSpecialAttr(ctxt
);
11558 if (ctxt
->instate
== XML_PARSER_EOF
)
11560 ctxt
->instate
= XML_PARSER_PROLOG
;
11561 ctxt
->checkIndex
= 0;
11563 xmlGenericError(xmlGenericErrorContext
,
11564 "PP: entering PROLOG\n");
11568 case XML_PARSER_COMMENT
:
11569 xmlGenericError(xmlGenericErrorContext
,
11570 "PP: internal error, state == COMMENT\n");
11571 ctxt
->instate
= XML_PARSER_CONTENT
;
11573 xmlGenericError(xmlGenericErrorContext
,
11574 "PP: entering CONTENT\n");
11577 case XML_PARSER_IGNORE
:
11578 xmlGenericError(xmlGenericErrorContext
,
11579 "PP: internal error, state == IGNORE");
11580 ctxt
->instate
= XML_PARSER_DTD
;
11582 xmlGenericError(xmlGenericErrorContext
,
11583 "PP: entering DTD\n");
11586 case XML_PARSER_PI
:
11587 xmlGenericError(xmlGenericErrorContext
,
11588 "PP: internal error, state == PI\n");
11589 ctxt
->instate
= XML_PARSER_CONTENT
;
11591 xmlGenericError(xmlGenericErrorContext
,
11592 "PP: entering CONTENT\n");
11595 case XML_PARSER_ENTITY_DECL
:
11596 xmlGenericError(xmlGenericErrorContext
,
11597 "PP: internal error, state == ENTITY_DECL\n");
11598 ctxt
->instate
= XML_PARSER_DTD
;
11600 xmlGenericError(xmlGenericErrorContext
,
11601 "PP: entering DTD\n");
11604 case XML_PARSER_ENTITY_VALUE
:
11605 xmlGenericError(xmlGenericErrorContext
,
11606 "PP: internal error, state == ENTITY_VALUE\n");
11607 ctxt
->instate
= XML_PARSER_CONTENT
;
11609 xmlGenericError(xmlGenericErrorContext
,
11610 "PP: entering DTD\n");
11613 case XML_PARSER_ATTRIBUTE_VALUE
:
11614 xmlGenericError(xmlGenericErrorContext
,
11615 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11616 ctxt
->instate
= XML_PARSER_START_TAG
;
11618 xmlGenericError(xmlGenericErrorContext
,
11619 "PP: entering START_TAG\n");
11622 case XML_PARSER_SYSTEM_LITERAL
:
11623 xmlGenericError(xmlGenericErrorContext
,
11624 "PP: internal error, state == SYSTEM_LITERAL\n");
11625 ctxt
->instate
= XML_PARSER_START_TAG
;
11627 xmlGenericError(xmlGenericErrorContext
,
11628 "PP: entering START_TAG\n");
11631 case XML_PARSER_PUBLIC_LITERAL
:
11632 xmlGenericError(xmlGenericErrorContext
,
11633 "PP: internal error, state == PUBLIC_LITERAL\n");
11634 ctxt
->instate
= XML_PARSER_START_TAG
;
11636 xmlGenericError(xmlGenericErrorContext
,
11637 "PP: entering START_TAG\n");
11644 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
11651 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11652 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
11653 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
11654 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
11655 "Input is not proper UTF-8, indicate encoding !\n%s",
11656 BAD_CAST buffer
, NULL
);
11663 * @ctxt: an XML parser context
11664 * @chunk: an char array
11665 * @size: the size in byte of the chunk
11666 * @terminate: last chunk indicator
11668 * Parse a Chunk of memory
11670 * Returns zero if no error, the xmlParserErrors otherwise.
11673 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
11679 return(XML_ERR_INTERNAL_ERROR
);
11680 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11681 return(ctxt
->errNo
);
11682 if (ctxt
->instate
== XML_PARSER_EOF
)
11684 if (ctxt
->instate
== XML_PARSER_START
)
11685 xmlDetectSAX2(ctxt
);
11686 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
11687 (chunk
[size
- 1] == '\r')) {
11694 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
11695 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
11696 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
11697 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
11701 * Specific handling if we autodetected an encoding, we should not
11702 * push more than the first line ... which depend on the encoding
11703 * And only push the rest once the final encoding was detected
11705 if ((ctxt
->instate
== XML_PARSER_START
) && (ctxt
->input
!= NULL
) &&
11706 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->encoder
!= NULL
)) {
11707 unsigned int len
= 45;
11709 if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11710 BAD_CAST
"UTF-16")) ||
11711 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11712 BAD_CAST
"UTF16")))
11714 else if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11715 BAD_CAST
"UCS-4")) ||
11716 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11720 if (ctxt
->input
->buf
->rawconsumed
< len
)
11721 len
-= ctxt
->input
->buf
->rawconsumed
;
11724 * Change size for reading the initial declaration only
11725 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11726 * will blindly copy extra bytes from memory.
11729 remain
= size
- len
;
11735 res
=xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
11737 ctxt
->errNo
= XML_PARSER_EOF
;
11738 ctxt
->disableSAX
= 1;
11739 return (XML_PARSER_EOF
);
11741 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
11742 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
11744 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
11746 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
11749 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
11750 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
11751 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
11752 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
11753 (in
->raw
!= NULL
)) {
11756 nbchars
= xmlCharEncInFunc(in
->encoder
, in
->buffer
, in
->raw
);
11759 xmlGenericError(xmlGenericErrorContext
,
11760 "xmlParseChunk: encoder error\n");
11761 return(XML_ERR_INVALID_ENCODING
);
11767 xmlParseTryOrFinish(ctxt
, 0);
11769 xmlParseTryOrFinish(ctxt
, terminate
);
11770 if (ctxt
->instate
== XML_PARSER_EOF
)
11771 return(ctxt
->errNo
);
11772 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11773 return(ctxt
->errNo
);
11781 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
11782 (ctxt
->input
->buf
!= NULL
)) {
11783 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
11787 * Check for termination
11791 if (ctxt
->input
!= NULL
) {
11792 if (ctxt
->input
->buf
== NULL
)
11793 avail
= ctxt
->input
->length
-
11794 (ctxt
->input
->cur
- ctxt
->input
->base
);
11796 avail
= ctxt
->input
->buf
->buffer
->use
-
11797 (ctxt
->input
->cur
- ctxt
->input
->base
);
11800 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
11801 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
11802 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11804 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (avail
> 0)) {
11805 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11807 if (ctxt
->instate
!= XML_PARSER_EOF
) {
11808 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11809 ctxt
->sax
->endDocument(ctxt
->userData
);
11811 ctxt
->instate
= XML_PARSER_EOF
;
11813 return((xmlParserErrors
) ctxt
->errNo
);
11816 /************************************************************************
11818 * I/O front end functions to the parser *
11820 ************************************************************************/
11823 * xmlCreatePushParserCtxt:
11824 * @sax: a SAX handler
11825 * @user_data: The user data returned on SAX callbacks
11826 * @chunk: a pointer to an array of chars
11827 * @size: number of chars in the array
11828 * @filename: an optional file name or URI
11830 * Create a parser context for using the XML parser in push mode.
11831 * If @buffer and @size are non-NULL, the data is used to detect
11832 * the encoding. The remaining characters will be parsed so they
11833 * don't need to be fed in again through xmlParseChunk.
11834 * To allow content encoding detection, @size should be >= 4
11835 * The value of @filename is used for fetching external entities
11836 * and error/warning reports.
11838 * Returns the new parser context or NULL
11842 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
11843 const char *chunk
, int size
, const char *filename
) {
11844 xmlParserCtxtPtr ctxt
;
11845 xmlParserInputPtr inputStream
;
11846 xmlParserInputBufferPtr buf
;
11847 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
11850 * plug some encoding conversion routines
11852 if ((chunk
!= NULL
) && (size
>= 4))
11853 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
11855 buf
= xmlAllocParserInputBuffer(enc
);
11856 if (buf
== NULL
) return(NULL
);
11858 ctxt
= xmlNewParserCtxt();
11859 if (ctxt
== NULL
) {
11860 xmlErrMemory(NULL
, "creating parser: out of memory\n");
11861 xmlFreeParserInputBuffer(buf
);
11864 ctxt
->dictNames
= 1;
11865 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 * sizeof(xmlChar
*));
11866 if (ctxt
->pushTab
== NULL
) {
11867 xmlErrMemory(ctxt
, NULL
);
11868 xmlFreeParserInputBuffer(buf
);
11869 xmlFreeParserCtxt(ctxt
);
11873 #ifdef LIBXML_SAX1_ENABLED
11874 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
11875 #endif /* LIBXML_SAX1_ENABLED */
11876 xmlFree(ctxt
->sax
);
11877 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
11878 if (ctxt
->sax
== NULL
) {
11879 xmlErrMemory(ctxt
, NULL
);
11880 xmlFreeParserInputBuffer(buf
);
11881 xmlFreeParserCtxt(ctxt
);
11884 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
11885 if (sax
->initialized
== XML_SAX2_MAGIC
)
11886 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
11888 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
11889 if (user_data
!= NULL
)
11890 ctxt
->userData
= user_data
;
11892 if (filename
== NULL
) {
11893 ctxt
->directory
= NULL
;
11895 ctxt
->directory
= xmlParserGetDirectory(filename
);
11898 inputStream
= xmlNewInputStream(ctxt
);
11899 if (inputStream
== NULL
) {
11900 xmlFreeParserCtxt(ctxt
);
11901 xmlFreeParserInputBuffer(buf
);
11905 if (filename
== NULL
)
11906 inputStream
->filename
= NULL
;
11908 inputStream
->filename
= (char *)
11909 xmlCanonicPath((const xmlChar
*) filename
);
11910 if (inputStream
->filename
== NULL
) {
11911 xmlFreeParserCtxt(ctxt
);
11912 xmlFreeParserInputBuffer(buf
);
11916 inputStream
->buf
= buf
;
11917 inputStream
->base
= inputStream
->buf
->buffer
->content
;
11918 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
11920 &inputStream
->buf
->buffer
->content
[inputStream
->buf
->buffer
->use
];
11922 inputPush(ctxt
, inputStream
);
11925 * If the caller didn't provide an initial 'chunk' for determining
11926 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11927 * that it can be automatically determined later
11929 if ((size
== 0) || (chunk
== NULL
)) {
11930 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
11931 } else if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
11932 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
11933 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
11935 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
11937 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
11938 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
11940 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
11942 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
11946 if (enc
!= XML_CHAR_ENCODING_NONE
) {
11947 xmlSwitchEncoding(ctxt
, enc
);
11952 #endif /* LIBXML_PUSH_ENABLED */
11956 * @ctxt: an XML parser context
11958 * Blocks further parser processing
11961 xmlStopParser(xmlParserCtxtPtr ctxt
) {
11964 ctxt
->instate
= XML_PARSER_EOF
;
11965 ctxt
->errNo
= XML_ERR_USER_STOP
;
11966 ctxt
->disableSAX
= 1;
11967 if (ctxt
->input
!= NULL
) {
11968 ctxt
->input
->cur
= BAD_CAST
"";
11969 ctxt
->input
->base
= ctxt
->input
->cur
;
11974 * xmlCreateIOParserCtxt:
11975 * @sax: a SAX handler
11976 * @user_data: The user data returned on SAX callbacks
11977 * @ioread: an I/O read function
11978 * @ioclose: an I/O close function
11979 * @ioctx: an I/O handler
11980 * @enc: the charset encoding if known
11982 * Create a parser context for using the XML parser with an existing
11985 * Returns the new parser context or NULL
11988 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
11989 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
11990 void *ioctx
, xmlCharEncoding enc
) {
11991 xmlParserCtxtPtr ctxt
;
11992 xmlParserInputPtr inputStream
;
11993 xmlParserInputBufferPtr buf
;
11995 if (ioread
== NULL
) return(NULL
);
11997 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
11998 if (buf
== NULL
) return(NULL
);
12000 ctxt
= xmlNewParserCtxt();
12001 if (ctxt
== NULL
) {
12002 xmlFreeParserInputBuffer(buf
);
12006 #ifdef LIBXML_SAX1_ENABLED
12007 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12008 #endif /* LIBXML_SAX1_ENABLED */
12009 xmlFree(ctxt
->sax
);
12010 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12011 if (ctxt
->sax
== NULL
) {
12012 xmlErrMemory(ctxt
, NULL
);
12013 xmlFreeParserCtxt(ctxt
);
12016 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12017 if (sax
->initialized
== XML_SAX2_MAGIC
)
12018 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12020 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12021 if (user_data
!= NULL
)
12022 ctxt
->userData
= user_data
;
12025 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12026 if (inputStream
== NULL
) {
12027 xmlFreeParserCtxt(ctxt
);
12030 inputPush(ctxt
, inputStream
);
12035 #ifdef LIBXML_VALID_ENABLED
12036 /************************************************************************
12038 * Front ends when parsing a DTD *
12040 ************************************************************************/
12044 * @sax: the SAX handler block or NULL
12045 * @input: an Input Buffer
12046 * @enc: the charset encoding if known
12048 * Load and parse a DTD
12050 * Returns the resulting xmlDtdPtr or NULL in case of error.
12051 * @input will be freed by the function in any case.
12055 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12056 xmlCharEncoding enc
) {
12057 xmlDtdPtr ret
= NULL
;
12058 xmlParserCtxtPtr ctxt
;
12059 xmlParserInputPtr pinput
= NULL
;
12065 ctxt
= xmlNewParserCtxt();
12066 if (ctxt
== NULL
) {
12067 xmlFreeParserInputBuffer(input
);
12072 * Set-up the SAX context
12075 if (ctxt
->sax
!= NULL
)
12076 xmlFree(ctxt
->sax
);
12078 ctxt
->userData
= ctxt
;
12080 xmlDetectSAX2(ctxt
);
12083 * generate a parser input from the I/O handler
12086 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12087 if (pinput
== NULL
) {
12088 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12089 xmlFreeParserInputBuffer(input
);
12090 xmlFreeParserCtxt(ctxt
);
12095 * plug some encoding conversion routines here.
12097 if (xmlPushInput(ctxt
, pinput
) < 0) {
12098 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12099 xmlFreeParserCtxt(ctxt
);
12102 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12103 xmlSwitchEncoding(ctxt
, enc
);
12106 pinput
->filename
= NULL
;
12109 pinput
->base
= ctxt
->input
->cur
;
12110 pinput
->cur
= ctxt
->input
->cur
;
12111 pinput
->free
= NULL
;
12114 * let's parse that entity knowing it's an external subset.
12116 ctxt
->inSubset
= 2;
12117 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12118 if (ctxt
->myDoc
== NULL
) {
12119 xmlErrMemory(ctxt
, "New Doc failed");
12122 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12123 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12124 BAD_CAST
"none", BAD_CAST
"none");
12126 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12127 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12129 * Get the 4 first bytes and decode the charset
12130 * if enc != XML_CHAR_ENCODING_NONE
12131 * plug some encoding conversion routines.
12137 enc
= xmlDetectCharEncoding(start
, 4);
12138 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12139 xmlSwitchEncoding(ctxt
, enc
);
12143 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12145 if (ctxt
->myDoc
!= NULL
) {
12146 if (ctxt
->wellFormed
) {
12147 ret
= ctxt
->myDoc
->extSubset
;
12148 ctxt
->myDoc
->extSubset
= NULL
;
12153 tmp
= ret
->children
;
12154 while (tmp
!= NULL
) {
12162 xmlFreeDoc(ctxt
->myDoc
);
12163 ctxt
->myDoc
= NULL
;
12165 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12166 xmlFreeParserCtxt(ctxt
);
12173 * @sax: the SAX handler block
12174 * @ExternalID: a NAME* containing the External ID of the DTD
12175 * @SystemID: a NAME* containing the URL to the DTD
12177 * Load and parse an external subset.
12179 * Returns the resulting xmlDtdPtr or NULL in case of error.
12183 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12184 const xmlChar
*SystemID
) {
12185 xmlDtdPtr ret
= NULL
;
12186 xmlParserCtxtPtr ctxt
;
12187 xmlParserInputPtr input
= NULL
;
12188 xmlCharEncoding enc
;
12189 xmlChar
* systemIdCanonic
;
12191 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12193 ctxt
= xmlNewParserCtxt();
12194 if (ctxt
== NULL
) {
12199 * Set-up the SAX context
12202 if (ctxt
->sax
!= NULL
)
12203 xmlFree(ctxt
->sax
);
12205 ctxt
->userData
= ctxt
;
12209 * Canonicalise the system ID
12211 systemIdCanonic
= xmlCanonicPath(SystemID
);
12212 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12213 xmlFreeParserCtxt(ctxt
);
12218 * Ask the Entity resolver to load the damn thing
12221 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12222 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12224 if (input
== NULL
) {
12225 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12226 xmlFreeParserCtxt(ctxt
);
12227 if (systemIdCanonic
!= NULL
)
12228 xmlFree(systemIdCanonic
);
12233 * plug some encoding conversion routines here.
12235 if (xmlPushInput(ctxt
, input
) < 0) {
12236 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12237 xmlFreeParserCtxt(ctxt
);
12238 if (systemIdCanonic
!= NULL
)
12239 xmlFree(systemIdCanonic
);
12242 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12243 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12244 xmlSwitchEncoding(ctxt
, enc
);
12247 if (input
->filename
== NULL
)
12248 input
->filename
= (char *) systemIdCanonic
;
12250 xmlFree(systemIdCanonic
);
12253 input
->base
= ctxt
->input
->cur
;
12254 input
->cur
= ctxt
->input
->cur
;
12255 input
->free
= NULL
;
12258 * let's parse that entity knowing it's an external subset.
12260 ctxt
->inSubset
= 2;
12261 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12262 if (ctxt
->myDoc
== NULL
) {
12263 xmlErrMemory(ctxt
, "New Doc failed");
12264 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12265 xmlFreeParserCtxt(ctxt
);
12268 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12269 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12270 ExternalID
, SystemID
);
12271 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12273 if (ctxt
->myDoc
!= NULL
) {
12274 if (ctxt
->wellFormed
) {
12275 ret
= ctxt
->myDoc
->extSubset
;
12276 ctxt
->myDoc
->extSubset
= NULL
;
12281 tmp
= ret
->children
;
12282 while (tmp
!= NULL
) {
12290 xmlFreeDoc(ctxt
->myDoc
);
12291 ctxt
->myDoc
= NULL
;
12293 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12294 xmlFreeParserCtxt(ctxt
);
12302 * @ExternalID: a NAME* containing the External ID of the DTD
12303 * @SystemID: a NAME* containing the URL to the DTD
12305 * Load and parse an external subset.
12307 * Returns the resulting xmlDtdPtr or NULL in case of error.
12311 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12312 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12314 #endif /* LIBXML_VALID_ENABLED */
12316 /************************************************************************
12318 * Front ends when parsing an Entity *
12320 ************************************************************************/
12323 * xmlParseCtxtExternalEntity:
12324 * @ctx: the existing parsing context
12325 * @URL: the URL for the entity to load
12326 * @ID: the System ID for the entity to load
12327 * @lst: the return value for the set of parsed nodes
12329 * Parse an external general entity within an existing parsing context
12330 * An external general parsed entity is well-formed if it matches the
12331 * production labeled extParsedEnt.
12333 * [78] extParsedEnt ::= TextDecl? content
12335 * Returns 0 if the entity is well formed, -1 in case of args problem and
12336 * the parser error code otherwise
12340 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12341 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12342 xmlParserCtxtPtr ctxt
;
12344 xmlNodePtr newRoot
;
12345 xmlSAXHandlerPtr oldsax
= NULL
;
12348 xmlCharEncoding enc
;
12350 if (ctx
== NULL
) return(-1);
12352 if (((ctx
->depth
> 40) && ((ctx
->options
& XML_PARSE_HUGE
) == 0)) ||
12353 (ctx
->depth
> 1024)) {
12354 return(XML_ERR_ENTITY_LOOP
);
12359 if ((URL
== NULL
) && (ID
== NULL
))
12361 if (ctx
->myDoc
== NULL
) /* @@ relax but check for dereferences */
12364 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, ctx
);
12365 if (ctxt
== NULL
) {
12369 oldsax
= ctxt
->sax
;
12370 ctxt
->sax
= ctx
->sax
;
12371 xmlDetectSAX2(ctxt
);
12372 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12373 if (newDoc
== NULL
) {
12374 xmlFreeParserCtxt(ctxt
);
12377 newDoc
->properties
= XML_DOC_INTERNAL
;
12378 if (ctx
->myDoc
->dict
) {
12379 newDoc
->dict
= ctx
->myDoc
->dict
;
12380 xmlDictReference(newDoc
->dict
);
12382 if (ctx
->myDoc
!= NULL
) {
12383 newDoc
->intSubset
= ctx
->myDoc
->intSubset
;
12384 newDoc
->extSubset
= ctx
->myDoc
->extSubset
;
12386 if (ctx
->myDoc
->URL
!= NULL
) {
12387 newDoc
->URL
= xmlStrdup(ctx
->myDoc
->URL
);
12389 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12390 if (newRoot
== NULL
) {
12391 ctxt
->sax
= oldsax
;
12392 xmlFreeParserCtxt(ctxt
);
12393 newDoc
->intSubset
= NULL
;
12394 newDoc
->extSubset
= NULL
;
12395 xmlFreeDoc(newDoc
);
12398 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12399 nodePush(ctxt
, newDoc
->children
);
12400 if (ctx
->myDoc
== NULL
) {
12401 ctxt
->myDoc
= newDoc
;
12403 ctxt
->myDoc
= ctx
->myDoc
;
12404 newDoc
->children
->doc
= ctx
->myDoc
;
12408 * Get the 4 first bytes and decode the charset
12409 * if enc != XML_CHAR_ENCODING_NONE
12410 * plug some encoding conversion routines.
12413 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12418 enc
= xmlDetectCharEncoding(start
, 4);
12419 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12420 xmlSwitchEncoding(ctxt
, enc
);
12425 * Parse a possible text declaration first
12427 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12428 xmlParseTextDecl(ctxt
);
12430 * An XML-1.0 document can't reference an entity not XML-1.0
12432 if ((xmlStrEqual(ctx
->version
, BAD_CAST
"1.0")) &&
12433 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
12434 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
12435 "Version mismatch between document and entity\n");
12440 * Doing validity checking on chunk doesn't make sense
12442 ctxt
->instate
= XML_PARSER_CONTENT
;
12443 ctxt
->validate
= ctx
->validate
;
12444 ctxt
->valid
= ctx
->valid
;
12445 ctxt
->loadsubset
= ctx
->loadsubset
;
12446 ctxt
->depth
= ctx
->depth
+ 1;
12447 ctxt
->replaceEntities
= ctx
->replaceEntities
;
12448 if (ctxt
->validate
) {
12449 ctxt
->vctxt
.error
= ctx
->vctxt
.error
;
12450 ctxt
->vctxt
.warning
= ctx
->vctxt
.warning
;
12452 ctxt
->vctxt
.error
= NULL
;
12453 ctxt
->vctxt
.warning
= NULL
;
12455 ctxt
->vctxt
.nodeTab
= NULL
;
12456 ctxt
->vctxt
.nodeNr
= 0;
12457 ctxt
->vctxt
.nodeMax
= 0;
12458 ctxt
->vctxt
.node
= NULL
;
12459 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12460 ctxt
->dict
= ctx
->dict
;
12461 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12462 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12463 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12464 ctxt
->dictNames
= ctx
->dictNames
;
12465 ctxt
->attsDefault
= ctx
->attsDefault
;
12466 ctxt
->attsSpecial
= ctx
->attsSpecial
;
12467 ctxt
->linenumbers
= ctx
->linenumbers
;
12469 xmlParseContent(ctxt
);
12471 ctx
->validate
= ctxt
->validate
;
12472 ctx
->valid
= ctxt
->valid
;
12473 if ((RAW
== '<') && (NXT(1) == '/')) {
12474 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12475 } else if (RAW
!= 0) {
12476 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12478 if (ctxt
->node
!= newDoc
->children
) {
12479 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12482 if (!ctxt
->wellFormed
) {
12483 if (ctxt
->errNo
== 0)
12492 * Return the newly created nodeset after unlinking it from
12493 * they pseudo parent.
12495 cur
= newDoc
->children
->children
;
12497 while (cur
!= NULL
) {
12498 cur
->parent
= NULL
;
12501 newDoc
->children
->children
= NULL
;
12505 ctxt
->sax
= oldsax
;
12507 ctxt
->attsDefault
= NULL
;
12508 ctxt
->attsSpecial
= NULL
;
12509 xmlFreeParserCtxt(ctxt
);
12510 newDoc
->intSubset
= NULL
;
12511 newDoc
->extSubset
= NULL
;
12512 xmlFreeDoc(newDoc
);
12518 * xmlParseExternalEntityPrivate:
12519 * @doc: the document the chunk pertains to
12520 * @oldctxt: the previous parser context if available
12521 * @sax: the SAX handler bloc (possibly NULL)
12522 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12523 * @depth: Used for loop detection, use 0
12524 * @URL: the URL for the entity to load
12525 * @ID: the System ID for the entity to load
12526 * @list: the return value for the set of parsed nodes
12528 * Private version of xmlParseExternalEntity()
12530 * Returns 0 if the entity is well formed, -1 in case of args problem and
12531 * the parser error code otherwise
12534 static xmlParserErrors
12535 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
12536 xmlSAXHandlerPtr sax
,
12537 void *user_data
, int depth
, const xmlChar
*URL
,
12538 const xmlChar
*ID
, xmlNodePtr
*list
) {
12539 xmlParserCtxtPtr ctxt
;
12541 xmlNodePtr newRoot
;
12542 xmlSAXHandlerPtr oldsax
= NULL
;
12543 xmlParserErrors ret
= XML_ERR_OK
;
12545 xmlCharEncoding enc
;
12547 if (((depth
> 40) &&
12548 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12550 return(XML_ERR_ENTITY_LOOP
);
12555 if ((URL
== NULL
) && (ID
== NULL
))
12556 return(XML_ERR_INTERNAL_ERROR
);
12558 return(XML_ERR_INTERNAL_ERROR
);
12561 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, oldctxt
);
12562 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12563 ctxt
->userData
= ctxt
;
12564 if (oldctxt
!= NULL
) {
12565 ctxt
->_private
= oldctxt
->_private
;
12566 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12567 ctxt
->validate
= oldctxt
->validate
;
12568 ctxt
->external
= oldctxt
->external
;
12569 ctxt
->record_info
= oldctxt
->record_info
;
12570 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
12571 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
12572 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
12575 * Doing validity checking on chunk without context
12576 * doesn't make sense
12578 ctxt
->_private
= NULL
;
12579 ctxt
->validate
= 0;
12580 ctxt
->external
= 2;
12581 ctxt
->loadsubset
= 0;
12584 oldsax
= ctxt
->sax
;
12586 if (user_data
!= NULL
)
12587 ctxt
->userData
= user_data
;
12589 xmlDetectSAX2(ctxt
);
12590 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12591 if (newDoc
== NULL
) {
12592 ctxt
->node_seq
.maximum
= 0;
12593 ctxt
->node_seq
.length
= 0;
12594 ctxt
->node_seq
.buffer
= NULL
;
12595 xmlFreeParserCtxt(ctxt
);
12596 return(XML_ERR_INTERNAL_ERROR
);
12598 newDoc
->properties
= XML_DOC_INTERNAL
;
12599 newDoc
->intSubset
= doc
->intSubset
;
12600 newDoc
->extSubset
= doc
->extSubset
;
12601 newDoc
->dict
= doc
->dict
;
12602 xmlDictReference(newDoc
->dict
);
12604 if (doc
->URL
!= NULL
) {
12605 newDoc
->URL
= xmlStrdup(doc
->URL
);
12607 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12608 if (newRoot
== NULL
) {
12610 ctxt
->sax
= oldsax
;
12611 ctxt
->node_seq
.maximum
= 0;
12612 ctxt
->node_seq
.length
= 0;
12613 ctxt
->node_seq
.buffer
= NULL
;
12614 xmlFreeParserCtxt(ctxt
);
12615 newDoc
->intSubset
= NULL
;
12616 newDoc
->extSubset
= NULL
;
12617 xmlFreeDoc(newDoc
);
12618 return(XML_ERR_INTERNAL_ERROR
);
12620 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12621 nodePush(ctxt
, newDoc
->children
);
12623 newRoot
->doc
= doc
;
12626 * Get the 4 first bytes and decode the charset
12627 * if enc != XML_CHAR_ENCODING_NONE
12628 * plug some encoding conversion routines.
12631 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12636 enc
= xmlDetectCharEncoding(start
, 4);
12637 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12638 xmlSwitchEncoding(ctxt
, enc
);
12643 * Parse a possible text declaration first
12645 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12646 xmlParseTextDecl(ctxt
);
12649 ctxt
->instate
= XML_PARSER_CONTENT
;
12650 ctxt
->depth
= depth
;
12652 xmlParseContent(ctxt
);
12654 if ((RAW
== '<') && (NXT(1) == '/')) {
12655 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12656 } else if (RAW
!= 0) {
12657 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12659 if (ctxt
->node
!= newDoc
->children
) {
12660 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12663 if (!ctxt
->wellFormed
) {
12664 if (ctxt
->errNo
== 0)
12665 ret
= XML_ERR_INTERNAL_ERROR
;
12667 ret
= (xmlParserErrors
)ctxt
->errNo
;
12669 if (list
!= NULL
) {
12673 * Return the newly created nodeset after unlinking it from
12674 * they pseudo parent.
12676 cur
= newDoc
->children
->children
;
12678 while (cur
!= NULL
) {
12679 cur
->parent
= NULL
;
12682 newDoc
->children
->children
= NULL
;
12688 * Record in the parent context the number of entities replacement
12689 * done when parsing that reference.
12691 if (oldctxt
!= NULL
)
12692 oldctxt
->nbentities
+= ctxt
->nbentities
;
12695 * Also record the size of the entity parsed
12697 if (ctxt
->input
!= NULL
) {
12698 oldctxt
->sizeentities
+= ctxt
->input
->consumed
;
12699 oldctxt
->sizeentities
+= (ctxt
->input
->cur
- ctxt
->input
->base
);
12702 * And record the last error if any
12704 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
12705 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12708 ctxt
->sax
= oldsax
;
12709 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
12710 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
12711 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
12712 ctxt
->node_seq
.maximum
= 0;
12713 ctxt
->node_seq
.length
= 0;
12714 ctxt
->node_seq
.buffer
= NULL
;
12715 xmlFreeParserCtxt(ctxt
);
12716 newDoc
->intSubset
= NULL
;
12717 newDoc
->extSubset
= NULL
;
12718 xmlFreeDoc(newDoc
);
12723 #ifdef LIBXML_SAX1_ENABLED
12725 * xmlParseExternalEntity:
12726 * @doc: the document the chunk pertains to
12727 * @sax: the SAX handler bloc (possibly NULL)
12728 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12729 * @depth: Used for loop detection, use 0
12730 * @URL: the URL for the entity to load
12731 * @ID: the System ID for the entity to load
12732 * @lst: the return value for the set of parsed nodes
12734 * Parse an external general entity
12735 * An external general parsed entity is well-formed if it matches the
12736 * production labeled extParsedEnt.
12738 * [78] extParsedEnt ::= TextDecl? content
12740 * Returns 0 if the entity is well formed, -1 in case of args problem and
12741 * the parser error code otherwise
12745 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
12746 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
12747 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
12752 * xmlParseBalancedChunkMemory:
12753 * @doc: the document the chunk pertains to
12754 * @sax: the SAX handler bloc (possibly NULL)
12755 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12756 * @depth: Used for loop detection, use 0
12757 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12758 * @lst: the return value for the set of parsed nodes
12760 * Parse a well-balanced chunk of an XML document
12761 * called by the parser
12762 * The allowed sequence for the Well Balanced Chunk is the one defined by
12763 * the content production in the XML grammar:
12765 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12767 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12768 * the parser error code otherwise
12772 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
12773 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
12774 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
12775 depth
, string
, lst
, 0 );
12777 #endif /* LIBXML_SAX1_ENABLED */
12780 * xmlParseBalancedChunkMemoryInternal:
12781 * @oldctxt: the existing parsing context
12782 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12783 * @user_data: the user data field for the parser context
12784 * @lst: the return value for the set of parsed nodes
12787 * Parse a well-balanced chunk of an XML document
12788 * called by the parser
12789 * The allowed sequence for the Well Balanced Chunk is the one defined by
12790 * the content production in the XML grammar:
12792 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12794 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12795 * error code otherwise
12797 * In case recover is set to 1, the nodelist will not be empty even if
12798 * the parsed chunk is not well balanced.
12800 static xmlParserErrors
12801 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
12802 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
12803 xmlParserCtxtPtr ctxt
;
12804 xmlDocPtr newDoc
= NULL
;
12805 xmlNodePtr newRoot
;
12806 xmlSAXHandlerPtr oldsax
= NULL
;
12807 xmlNodePtr content
= NULL
;
12808 xmlNodePtr last
= NULL
;
12810 xmlParserErrors ret
= XML_ERR_OK
;
12815 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12816 (oldctxt
->depth
> 1024)) {
12817 return(XML_ERR_ENTITY_LOOP
);
12823 if (string
== NULL
)
12824 return(XML_ERR_INTERNAL_ERROR
);
12826 size
= xmlStrlen(string
);
12828 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
12829 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12830 if (user_data
!= NULL
)
12831 ctxt
->userData
= user_data
;
12833 ctxt
->userData
= ctxt
;
12834 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12835 ctxt
->dict
= oldctxt
->dict
;
12836 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12837 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12838 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12841 /* propagate namespaces down the entity */
12842 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
12843 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
12847 oldsax
= ctxt
->sax
;
12848 ctxt
->sax
= oldctxt
->sax
;
12849 xmlDetectSAX2(ctxt
);
12850 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12851 ctxt
->options
= oldctxt
->options
;
12853 ctxt
->_private
= oldctxt
->_private
;
12854 if (oldctxt
->myDoc
== NULL
) {
12855 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12856 if (newDoc
== NULL
) {
12857 ctxt
->sax
= oldsax
;
12859 xmlFreeParserCtxt(ctxt
);
12860 return(XML_ERR_INTERNAL_ERROR
);
12862 newDoc
->properties
= XML_DOC_INTERNAL
;
12863 newDoc
->dict
= ctxt
->dict
;
12864 xmlDictReference(newDoc
->dict
);
12865 ctxt
->myDoc
= newDoc
;
12867 ctxt
->myDoc
= oldctxt
->myDoc
;
12868 content
= ctxt
->myDoc
->children
;
12869 last
= ctxt
->myDoc
->last
;
12871 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12872 if (newRoot
== NULL
) {
12873 ctxt
->sax
= oldsax
;
12875 xmlFreeParserCtxt(ctxt
);
12876 if (newDoc
!= NULL
) {
12877 xmlFreeDoc(newDoc
);
12879 return(XML_ERR_INTERNAL_ERROR
);
12881 ctxt
->myDoc
->children
= NULL
;
12882 ctxt
->myDoc
->last
= NULL
;
12883 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
12884 nodePush(ctxt
, ctxt
->myDoc
->children
);
12885 ctxt
->instate
= XML_PARSER_CONTENT
;
12886 ctxt
->depth
= oldctxt
->depth
+ 1;
12888 ctxt
->validate
= 0;
12889 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12890 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
12892 * ID/IDREF registration will be done in xmlValidateElement below
12894 ctxt
->loadsubset
|= XML_SKIP_IDS
;
12896 ctxt
->dictNames
= oldctxt
->dictNames
;
12897 ctxt
->attsDefault
= oldctxt
->attsDefault
;
12898 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
12900 xmlParseContent(ctxt
);
12901 if ((RAW
== '<') && (NXT(1) == '/')) {
12902 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12903 } else if (RAW
!= 0) {
12904 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12906 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
12907 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12910 if (!ctxt
->wellFormed
) {
12911 if (ctxt
->errNo
== 0)
12912 ret
= XML_ERR_INTERNAL_ERROR
;
12914 ret
= (xmlParserErrors
)ctxt
->errNo
;
12919 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
12923 * Return the newly created nodeset after unlinking it from
12924 * they pseudo parent.
12926 cur
= ctxt
->myDoc
->children
->children
;
12928 while (cur
!= NULL
) {
12929 #ifdef LIBXML_VALID_ENABLED
12930 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
12931 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
12932 (cur
->type
== XML_ELEMENT_NODE
)) {
12933 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
12934 oldctxt
->myDoc
, cur
);
12936 #endif /* LIBXML_VALID_ENABLED */
12937 cur
->parent
= NULL
;
12940 ctxt
->myDoc
->children
->children
= NULL
;
12942 if (ctxt
->myDoc
!= NULL
) {
12943 xmlFreeNode(ctxt
->myDoc
->children
);
12944 ctxt
->myDoc
->children
= content
;
12945 ctxt
->myDoc
->last
= last
;
12949 * Record in the parent context the number of entities replacement
12950 * done when parsing that reference.
12952 if (oldctxt
!= NULL
)
12953 oldctxt
->nbentities
+= ctxt
->nbentities
;
12956 * Also record the last error if any
12958 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
12959 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12961 ctxt
->sax
= oldsax
;
12963 ctxt
->attsDefault
= NULL
;
12964 ctxt
->attsSpecial
= NULL
;
12965 xmlFreeParserCtxt(ctxt
);
12966 if (newDoc
!= NULL
) {
12967 xmlFreeDoc(newDoc
);
12974 * xmlParseInNodeContext:
12975 * @node: the context node
12976 * @data: the input string
12977 * @datalen: the input string length in bytes
12978 * @options: a combination of xmlParserOption
12979 * @lst: the return value for the set of parsed nodes
12981 * Parse a well-balanced chunk of an XML document
12982 * within the context (DTD, namespaces, etc ...) of the given node.
12984 * The allowed sequence for the data is a Well Balanced Chunk defined by
12985 * the content production in the XML grammar:
12987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12989 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12990 * error code otherwise
12993 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
12994 int options
, xmlNodePtr
*lst
) {
12996 xmlParserCtxtPtr ctxt
;
12997 xmlDocPtr doc
= NULL
;
12998 xmlNodePtr fake
, cur
;
13001 xmlParserErrors ret
= XML_ERR_OK
;
13004 * check all input parameters, grab the document
13006 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13007 return(XML_ERR_INTERNAL_ERROR
);
13008 switch (node
->type
) {
13009 case XML_ELEMENT_NODE
:
13010 case XML_ATTRIBUTE_NODE
:
13011 case XML_TEXT_NODE
:
13012 case XML_CDATA_SECTION_NODE
:
13013 case XML_ENTITY_REF_NODE
:
13015 case XML_COMMENT_NODE
:
13016 case XML_DOCUMENT_NODE
:
13017 case XML_HTML_DOCUMENT_NODE
:
13020 return(XML_ERR_INTERNAL_ERROR
);
13023 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13024 (node
->type
!= XML_DOCUMENT_NODE
) &&
13025 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13026 node
= node
->parent
;
13028 return(XML_ERR_INTERNAL_ERROR
);
13029 if (node
->type
== XML_ELEMENT_NODE
)
13032 doc
= (xmlDocPtr
) node
;
13034 return(XML_ERR_INTERNAL_ERROR
);
13037 * allocate a context and set-up everything not related to the
13038 * node position in the tree
13040 if (doc
->type
== XML_DOCUMENT_NODE
)
13041 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13042 #ifdef LIBXML_HTML_ENABLED
13043 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13044 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13046 * When parsing in context, it makes no sense to add implied
13047 * elements like html/body/etc...
13049 options
|= HTML_PARSE_NOIMPLIED
;
13053 return(XML_ERR_INTERNAL_ERROR
);
13056 return(XML_ERR_NO_MEMORY
);
13059 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13060 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13061 * we must wait until the last moment to free the original one.
13063 if (doc
->dict
!= NULL
) {
13064 if (ctxt
->dict
!= NULL
)
13065 xmlDictFree(ctxt
->dict
);
13066 ctxt
->dict
= doc
->dict
;
13068 options
|= XML_PARSE_NODICT
;
13070 if (doc
->encoding
!= NULL
) {
13071 xmlCharEncodingHandlerPtr hdlr
;
13073 if (ctxt
->encoding
!= NULL
)
13074 xmlFree((xmlChar
*) ctxt
->encoding
);
13075 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13077 hdlr
= xmlFindCharEncodingHandler(doc
->encoding
);
13078 if (hdlr
!= NULL
) {
13079 xmlSwitchToEncoding(ctxt
, hdlr
);
13081 return(XML_ERR_UNSUPPORTED_ENCODING
);
13085 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13086 xmlDetectSAX2(ctxt
);
13089 fake
= xmlNewComment(NULL
);
13090 if (fake
== NULL
) {
13091 xmlFreeParserCtxt(ctxt
);
13092 return(XML_ERR_NO_MEMORY
);
13094 xmlAddChild(node
, fake
);
13096 if (node
->type
== XML_ELEMENT_NODE
) {
13097 nodePush(ctxt
, node
);
13099 * initialize the SAX2 namespaces stack
13102 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13103 xmlNsPtr ns
= cur
->nsDef
;
13104 const xmlChar
*iprefix
, *ihref
;
13106 while (ns
!= NULL
) {
13108 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13109 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13111 iprefix
= ns
->prefix
;
13115 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13116 nsPush(ctxt
, iprefix
, ihref
);
13123 ctxt
->instate
= XML_PARSER_CONTENT
;
13126 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13128 * ID/IDREF registration will be done in xmlValidateElement below
13130 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13133 #ifdef LIBXML_HTML_ENABLED
13134 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13135 __htmlParseContent(ctxt
);
13138 xmlParseContent(ctxt
);
13141 if ((RAW
== '<') && (NXT(1) == '/')) {
13142 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13143 } else if (RAW
!= 0) {
13144 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13146 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13147 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13148 ctxt
->wellFormed
= 0;
13151 if (!ctxt
->wellFormed
) {
13152 if (ctxt
->errNo
== 0)
13153 ret
= XML_ERR_INTERNAL_ERROR
;
13155 ret
= (xmlParserErrors
)ctxt
->errNo
;
13161 * Return the newly created nodeset after unlinking it from
13162 * the pseudo sibling.
13175 while (cur
!= NULL
) {
13176 cur
->parent
= NULL
;
13180 xmlUnlinkNode(fake
);
13184 if (ret
!= XML_ERR_OK
) {
13185 xmlFreeNodeList(*lst
);
13189 if (doc
->dict
!= NULL
)
13191 xmlFreeParserCtxt(ctxt
);
13195 return(XML_ERR_INTERNAL_ERROR
);
13199 #ifdef LIBXML_SAX1_ENABLED
13201 * xmlParseBalancedChunkMemoryRecover:
13202 * @doc: the document the chunk pertains to
13203 * @sax: the SAX handler bloc (possibly NULL)
13204 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13205 * @depth: Used for loop detection, use 0
13206 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13207 * @lst: the return value for the set of parsed nodes
13208 * @recover: return nodes even if the data is broken (use 0)
13211 * Parse a well-balanced chunk of an XML document
13212 * called by the parser
13213 * The allowed sequence for the Well Balanced Chunk is the one defined by
13214 * the content production in the XML grammar:
13216 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13218 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13219 * the parser error code otherwise
13221 * In case recover is set to 1, the nodelist will not be empty even if
13222 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13226 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13227 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13229 xmlParserCtxtPtr ctxt
;
13231 xmlSAXHandlerPtr oldsax
= NULL
;
13232 xmlNodePtr content
, newRoot
;
13237 return(XML_ERR_ENTITY_LOOP
);
13243 if (string
== NULL
)
13246 size
= xmlStrlen(string
);
13248 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13249 if (ctxt
== NULL
) return(-1);
13250 ctxt
->userData
= ctxt
;
13252 oldsax
= ctxt
->sax
;
13254 if (user_data
!= NULL
)
13255 ctxt
->userData
= user_data
;
13257 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13258 if (newDoc
== NULL
) {
13259 xmlFreeParserCtxt(ctxt
);
13262 newDoc
->properties
= XML_DOC_INTERNAL
;
13263 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13264 xmlDictFree(ctxt
->dict
);
13265 ctxt
->dict
= doc
->dict
;
13266 xmlDictReference(ctxt
->dict
);
13267 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13268 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13269 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13270 ctxt
->dictNames
= 1;
13272 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13275 newDoc
->intSubset
= doc
->intSubset
;
13276 newDoc
->extSubset
= doc
->extSubset
;
13278 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13279 if (newRoot
== NULL
) {
13281 ctxt
->sax
= oldsax
;
13282 xmlFreeParserCtxt(ctxt
);
13283 newDoc
->intSubset
= NULL
;
13284 newDoc
->extSubset
= NULL
;
13285 xmlFreeDoc(newDoc
);
13288 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13289 nodePush(ctxt
, newRoot
);
13291 ctxt
->myDoc
= newDoc
;
13293 ctxt
->myDoc
= newDoc
;
13294 newDoc
->children
->doc
= doc
;
13295 /* Ensure that doc has XML spec namespace */
13296 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13297 newDoc
->oldNs
= doc
->oldNs
;
13299 ctxt
->instate
= XML_PARSER_CONTENT
;
13300 ctxt
->depth
= depth
;
13303 * Doing validity checking on chunk doesn't make sense
13305 ctxt
->validate
= 0;
13306 ctxt
->loadsubset
= 0;
13307 xmlDetectSAX2(ctxt
);
13309 if ( doc
!= NULL
){
13310 content
= doc
->children
;
13311 doc
->children
= NULL
;
13312 xmlParseContent(ctxt
);
13313 doc
->children
= content
;
13316 xmlParseContent(ctxt
);
13318 if ((RAW
== '<') && (NXT(1) == '/')) {
13319 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13320 } else if (RAW
!= 0) {
13321 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13323 if (ctxt
->node
!= newDoc
->children
) {
13324 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13327 if (!ctxt
->wellFormed
) {
13328 if (ctxt
->errNo
== 0)
13336 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13340 * Return the newly created nodeset after unlinking it from
13341 * they pseudo parent.
13343 cur
= newDoc
->children
->children
;
13345 while (cur
!= NULL
) {
13346 xmlSetTreeDoc(cur
, doc
);
13347 cur
->parent
= NULL
;
13350 newDoc
->children
->children
= NULL
;
13354 ctxt
->sax
= oldsax
;
13355 xmlFreeParserCtxt(ctxt
);
13356 newDoc
->intSubset
= NULL
;
13357 newDoc
->extSubset
= NULL
;
13358 newDoc
->oldNs
= NULL
;
13359 xmlFreeDoc(newDoc
);
13365 * xmlSAXParseEntity:
13366 * @sax: the SAX handler block
13367 * @filename: the filename
13369 * parse an XML external entity out of context and build a tree.
13370 * It use the given SAX function block to handle the parsing callback.
13371 * If sax is NULL, fallback to the default DOM tree building routines.
13373 * [78] extParsedEnt ::= TextDecl? content
13375 * This correspond to a "Well Balanced" chunk
13377 * Returns the resulting document tree
13381 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13383 xmlParserCtxtPtr ctxt
;
13385 ctxt
= xmlCreateFileParserCtxt(filename
);
13386 if (ctxt
== NULL
) {
13390 if (ctxt
->sax
!= NULL
)
13391 xmlFree(ctxt
->sax
);
13393 ctxt
->userData
= NULL
;
13396 xmlParseExtParsedEnt(ctxt
);
13398 if (ctxt
->wellFormed
)
13402 xmlFreeDoc(ctxt
->myDoc
);
13403 ctxt
->myDoc
= NULL
;
13407 xmlFreeParserCtxt(ctxt
);
13414 * @filename: the filename
13416 * parse an XML external entity out of context and build a tree.
13418 * [78] extParsedEnt ::= TextDecl? content
13420 * This correspond to a "Well Balanced" chunk
13422 * Returns the resulting document tree
13426 xmlParseEntity(const char *filename
) {
13427 return(xmlSAXParseEntity(NULL
, filename
));
13429 #endif /* LIBXML_SAX1_ENABLED */
13432 * xmlCreateEntityParserCtxtInternal:
13433 * @URL: the entity URL
13434 * @ID: the entity PUBLIC ID
13435 * @base: a possible base for the target URI
13436 * @pctx: parser context used to set options on new context
13438 * Create a parser context for an external entity
13439 * Automatic support for ZLIB/Compress compressed document is provided
13440 * by default if found at compile-time.
13442 * Returns the new parser context or NULL
13444 static xmlParserCtxtPtr
13445 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
13446 const xmlChar
*base
, xmlParserCtxtPtr pctx
) {
13447 xmlParserCtxtPtr ctxt
;
13448 xmlParserInputPtr inputStream
;
13449 char *directory
= NULL
;
13452 ctxt
= xmlNewParserCtxt();
13453 if (ctxt
== NULL
) {
13457 if (pctx
!= NULL
) {
13458 ctxt
->options
= pctx
->options
;
13459 ctxt
->_private
= pctx
->_private
;
13462 uri
= xmlBuildURI(URL
, base
);
13465 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13466 if (inputStream
== NULL
) {
13467 xmlFreeParserCtxt(ctxt
);
13471 inputPush(ctxt
, inputStream
);
13473 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13474 directory
= xmlParserGetDirectory((char *)URL
);
13475 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13476 ctxt
->directory
= directory
;
13478 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13479 if (inputStream
== NULL
) {
13481 xmlFreeParserCtxt(ctxt
);
13485 inputPush(ctxt
, inputStream
);
13487 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13488 directory
= xmlParserGetDirectory((char *)uri
);
13489 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13490 ctxt
->directory
= directory
;
13497 * xmlCreateEntityParserCtxt:
13498 * @URL: the entity URL
13499 * @ID: the entity PUBLIC ID
13500 * @base: a possible base for the target URI
13502 * Create a parser context for an external entity
13503 * Automatic support for ZLIB/Compress compressed document is provided
13504 * by default if found at compile-time.
13506 * Returns the new parser context or NULL
13509 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
13510 const xmlChar
*base
) {
13511 return xmlCreateEntityParserCtxtInternal(URL
, ID
, base
, NULL
);
13515 /************************************************************************
13517 * Front ends when parsing from a file *
13519 ************************************************************************/
13522 * xmlCreateURLParserCtxt:
13523 * @filename: the filename or URL
13524 * @options: a combination of xmlParserOption
13526 * Create a parser context for a file or URL content.
13527 * Automatic support for ZLIB/Compress compressed document is provided
13528 * by default if found at compile-time and for file accesses
13530 * Returns the new parser context or NULL
13533 xmlCreateURLParserCtxt(const char *filename
, int options
)
13535 xmlParserCtxtPtr ctxt
;
13536 xmlParserInputPtr inputStream
;
13537 char *directory
= NULL
;
13539 ctxt
= xmlNewParserCtxt();
13540 if (ctxt
== NULL
) {
13541 xmlErrMemory(NULL
, "cannot allocate parser context");
13546 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13547 ctxt
->linenumbers
= 1;
13549 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
13550 if (inputStream
== NULL
) {
13551 xmlFreeParserCtxt(ctxt
);
13555 inputPush(ctxt
, inputStream
);
13556 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13557 directory
= xmlParserGetDirectory(filename
);
13558 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13559 ctxt
->directory
= directory
;
13565 * xmlCreateFileParserCtxt:
13566 * @filename: the filename
13568 * Create a parser context for a file content.
13569 * Automatic support for ZLIB/Compress compressed document is provided
13570 * by default if found at compile-time.
13572 * Returns the new parser context or NULL
13575 xmlCreateFileParserCtxt(const char *filename
)
13577 return(xmlCreateURLParserCtxt(filename
, 0));
13580 #ifdef LIBXML_SAX1_ENABLED
13582 * xmlSAXParseFileWithData:
13583 * @sax: the SAX handler block
13584 * @filename: the filename
13585 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13587 * @data: the userdata
13589 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13590 * compressed document is provided by default if found at compile-time.
13591 * It use the given SAX function block to handle the parsing callback.
13592 * If sax is NULL, fallback to the default DOM tree building routines.
13594 * User data (void *) is stored within the parser context in the
13595 * context's _private member, so it is available nearly everywhere in libxml
13597 * Returns the resulting document tree
13601 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
13602 int recovery
, void *data
) {
13604 xmlParserCtxtPtr ctxt
;
13608 ctxt
= xmlCreateFileParserCtxt(filename
);
13609 if (ctxt
== NULL
) {
13613 if (ctxt
->sax
!= NULL
)
13614 xmlFree(ctxt
->sax
);
13617 xmlDetectSAX2(ctxt
);
13619 ctxt
->_private
= data
;
13622 if (ctxt
->directory
== NULL
)
13623 ctxt
->directory
= xmlParserGetDirectory(filename
);
13625 ctxt
->recovery
= recovery
;
13627 xmlParseDocument(ctxt
);
13629 if ((ctxt
->wellFormed
) || recovery
) {
13632 if (ctxt
->input
->buf
->compressed
> 0)
13633 ret
->compression
= 9;
13635 ret
->compression
= ctxt
->input
->buf
->compressed
;
13640 xmlFreeDoc(ctxt
->myDoc
);
13641 ctxt
->myDoc
= NULL
;
13645 xmlFreeParserCtxt(ctxt
);
13652 * @sax: the SAX handler block
13653 * @filename: the filename
13654 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13657 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13658 * compressed document is provided by default if found at compile-time.
13659 * It use the given SAX function block to handle the parsing callback.
13660 * If sax is NULL, fallback to the default DOM tree building routines.
13662 * Returns the resulting document tree
13666 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
13668 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
13673 * @cur: a pointer to an array of xmlChar
13675 * parse an XML in-memory document and build a tree.
13676 * In the case the document is not Well Formed, a attempt to build a
13677 * tree is tried anyway
13679 * Returns the resulting document tree or NULL in case of failure
13683 xmlRecoverDoc(const xmlChar
*cur
) {
13684 return(xmlSAXParseDoc(NULL
, cur
, 1));
13689 * @filename: the filename
13691 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13692 * compressed document is provided by default if found at compile-time.
13694 * Returns the resulting document tree if the file was wellformed,
13699 xmlParseFile(const char *filename
) {
13700 return(xmlSAXParseFile(NULL
, filename
, 0));
13705 * @filename: the filename
13707 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13708 * compressed document is provided by default if found at compile-time.
13709 * In the case the document is not Well Formed, it attempts to build
13712 * Returns the resulting document tree or NULL in case of failure
13716 xmlRecoverFile(const char *filename
) {
13717 return(xmlSAXParseFile(NULL
, filename
, 1));
13722 * xmlSetupParserForBuffer:
13723 * @ctxt: an XML parser context
13724 * @buffer: a xmlChar * buffer
13725 * @filename: a file name
13727 * Setup the parser context to parse a new buffer; Clears any prior
13728 * contents from the parser context. The buffer parameter must not be
13729 * NULL, but the filename parameter can be
13732 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
13733 const char* filename
)
13735 xmlParserInputPtr input
;
13737 if ((ctxt
== NULL
) || (buffer
== NULL
))
13740 input
= xmlNewInputStream(ctxt
);
13741 if (input
== NULL
) {
13742 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
13743 xmlClearParserCtxt(ctxt
);
13747 xmlClearParserCtxt(ctxt
);
13748 if (filename
!= NULL
)
13749 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
13750 input
->base
= buffer
;
13751 input
->cur
= buffer
;
13752 input
->end
= &buffer
[xmlStrlen(buffer
)];
13753 inputPush(ctxt
, input
);
13757 * xmlSAXUserParseFile:
13758 * @sax: a SAX handler
13759 * @user_data: The user data returned on SAX callbacks
13760 * @filename: a file name
13762 * parse an XML file and call the given SAX handler routines.
13763 * Automatic support for ZLIB/Compress compressed document is provided
13765 * Returns 0 in case of success or a error number otherwise
13768 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
13769 const char *filename
) {
13771 xmlParserCtxtPtr ctxt
;
13773 ctxt
= xmlCreateFileParserCtxt(filename
);
13774 if (ctxt
== NULL
) return -1;
13775 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13776 xmlFree(ctxt
->sax
);
13778 xmlDetectSAX2(ctxt
);
13780 if (user_data
!= NULL
)
13781 ctxt
->userData
= user_data
;
13783 xmlParseDocument(ctxt
);
13785 if (ctxt
->wellFormed
)
13788 if (ctxt
->errNo
!= 0)
13795 if (ctxt
->myDoc
!= NULL
) {
13796 xmlFreeDoc(ctxt
->myDoc
);
13797 ctxt
->myDoc
= NULL
;
13799 xmlFreeParserCtxt(ctxt
);
13803 #endif /* LIBXML_SAX1_ENABLED */
13805 /************************************************************************
13807 * Front ends when parsing from memory *
13809 ************************************************************************/
13812 * xmlCreateMemoryParserCtxt:
13813 * @buffer: a pointer to a char array
13814 * @size: the size of the array
13816 * Create a parser context for an XML in-memory document.
13818 * Returns the new parser context or NULL
13821 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
13822 xmlParserCtxtPtr ctxt
;
13823 xmlParserInputPtr input
;
13824 xmlParserInputBufferPtr buf
;
13826 if (buffer
== NULL
)
13831 ctxt
= xmlNewParserCtxt();
13835 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13836 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
13838 xmlFreeParserCtxt(ctxt
);
13842 input
= xmlNewInputStream(ctxt
);
13843 if (input
== NULL
) {
13844 xmlFreeParserInputBuffer(buf
);
13845 xmlFreeParserCtxt(ctxt
);
13849 input
->filename
= NULL
;
13851 input
->base
= input
->buf
->buffer
->content
;
13852 input
->cur
= input
->buf
->buffer
->content
;
13853 input
->end
= &input
->buf
->buffer
->content
[input
->buf
->buffer
->use
];
13855 inputPush(ctxt
, input
);
13859 #ifdef LIBXML_SAX1_ENABLED
13861 * xmlSAXParseMemoryWithData:
13862 * @sax: the SAX handler block
13863 * @buffer: an pointer to a char array
13864 * @size: the size of the array
13865 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13867 * @data: the userdata
13869 * parse an XML in-memory block and use the given SAX function block
13870 * to handle the parsing callback. If sax is NULL, fallback to the default
13871 * DOM tree building routines.
13873 * User data (void *) is stored within the parser context in the
13874 * context's _private member, so it is available nearly everywhere in libxml
13876 * Returns the resulting document tree
13880 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
13881 int size
, int recovery
, void *data
) {
13883 xmlParserCtxtPtr ctxt
;
13887 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
13888 if (ctxt
== NULL
) return(NULL
);
13890 if (ctxt
->sax
!= NULL
)
13891 xmlFree(ctxt
->sax
);
13894 xmlDetectSAX2(ctxt
);
13896 ctxt
->_private
=data
;
13899 ctxt
->recovery
= recovery
;
13901 xmlParseDocument(ctxt
);
13903 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
13906 xmlFreeDoc(ctxt
->myDoc
);
13907 ctxt
->myDoc
= NULL
;
13911 xmlFreeParserCtxt(ctxt
);
13917 * xmlSAXParseMemory:
13918 * @sax: the SAX handler block
13919 * @buffer: an pointer to a char array
13920 * @size: the size of the array
13921 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13924 * parse an XML in-memory block and use the given SAX function block
13925 * to handle the parsing callback. If sax is NULL, fallback to the default
13926 * DOM tree building routines.
13928 * Returns the resulting document tree
13931 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
13932 int size
, int recovery
) {
13933 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
13938 * @buffer: an pointer to a char array
13939 * @size: the size of the array
13941 * parse an XML in-memory block and build a tree.
13943 * Returns the resulting document tree
13946 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
13947 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
13951 * xmlRecoverMemory:
13952 * @buffer: an pointer to a char array
13953 * @size: the size of the array
13955 * parse an XML in-memory block and build a tree.
13956 * In the case the document is not Well Formed, an attempt to
13957 * build a tree is tried anyway
13959 * Returns the resulting document tree or NULL in case of error
13962 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
13963 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
13967 * xmlSAXUserParseMemory:
13968 * @sax: a SAX handler
13969 * @user_data: The user data returned on SAX callbacks
13970 * @buffer: an in-memory XML document input
13971 * @size: the length of the XML document in bytes
13973 * A better SAX parsing routine.
13974 * parse an XML in-memory buffer and call the given SAX handler routines.
13976 * Returns 0 in case of success or a error number otherwise
13978 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
13979 const char *buffer
, int size
) {
13981 xmlParserCtxtPtr ctxt
;
13985 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
13986 if (ctxt
== NULL
) return -1;
13987 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13988 xmlFree(ctxt
->sax
);
13990 xmlDetectSAX2(ctxt
);
13992 if (user_data
!= NULL
)
13993 ctxt
->userData
= user_data
;
13995 xmlParseDocument(ctxt
);
13997 if (ctxt
->wellFormed
)
14000 if (ctxt
->errNo
!= 0)
14007 if (ctxt
->myDoc
!= NULL
) {
14008 xmlFreeDoc(ctxt
->myDoc
);
14009 ctxt
->myDoc
= NULL
;
14011 xmlFreeParserCtxt(ctxt
);
14015 #endif /* LIBXML_SAX1_ENABLED */
14018 * xmlCreateDocParserCtxt:
14019 * @cur: a pointer to an array of xmlChar
14021 * Creates a parser context for an XML in-memory document.
14023 * Returns the new parser context or NULL
14026 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14031 len
= xmlStrlen(cur
);
14032 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14035 #ifdef LIBXML_SAX1_ENABLED
14038 * @sax: the SAX handler block
14039 * @cur: a pointer to an array of xmlChar
14040 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14043 * parse an XML in-memory document and build a tree.
14044 * It use the given SAX function block to handle the parsing callback.
14045 * If sax is NULL, fallback to the default DOM tree building routines.
14047 * Returns the resulting document tree
14051 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14053 xmlParserCtxtPtr ctxt
;
14054 xmlSAXHandlerPtr oldsax
= NULL
;
14056 if (cur
== NULL
) return(NULL
);
14059 ctxt
= xmlCreateDocParserCtxt(cur
);
14060 if (ctxt
== NULL
) return(NULL
);
14062 oldsax
= ctxt
->sax
;
14064 ctxt
->userData
= NULL
;
14066 xmlDetectSAX2(ctxt
);
14068 xmlParseDocument(ctxt
);
14069 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14072 xmlFreeDoc(ctxt
->myDoc
);
14073 ctxt
->myDoc
= NULL
;
14076 ctxt
->sax
= oldsax
;
14077 xmlFreeParserCtxt(ctxt
);
14084 * @cur: a pointer to an array of xmlChar
14086 * parse an XML in-memory document and build a tree.
14088 * Returns the resulting document tree
14092 xmlParseDoc(const xmlChar
*cur
) {
14093 return(xmlSAXParseDoc(NULL
, cur
, 0));
14095 #endif /* LIBXML_SAX1_ENABLED */
14097 #ifdef LIBXML_LEGACY_ENABLED
14098 /************************************************************************
14100 * Specific function to keep track of entities references *
14101 * and used by the XSLT debugger *
14103 ************************************************************************/
14105 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14108 * xmlAddEntityReference:
14109 * @ent : A valid entity
14110 * @firstNode : A valid first node for children of entity
14111 * @lastNode : A valid last node of children entity
14113 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14116 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14117 xmlNodePtr lastNode
)
14119 if (xmlEntityRefFunc
!= NULL
) {
14120 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14126 * xmlSetEntityReferenceFunc:
14127 * @func: A valid function
14129 * Set the function to call call back when a xml reference has been made
14132 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14134 xmlEntityRefFunc
= func
;
14136 #endif /* LIBXML_LEGACY_ENABLED */
14138 /************************************************************************
14142 ************************************************************************/
14144 #ifdef LIBXML_XPATH_ENABLED
14145 #include <libxml/xpath.h>
14148 extern void XMLCDECL
xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
14149 static int xmlParserInitialized
= 0;
14154 * Initialization function for the XML parser.
14155 * This is not reentrant. Call once before processing in case of
14156 * use in multithreaded programs.
14160 xmlInitParser(void) {
14161 if (xmlParserInitialized
!= 0)
14164 #ifdef LIBXML_THREAD_ENABLED
14165 __xmlGlobalInitMutexLock();
14166 if (xmlParserInitialized
== 0) {
14170 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
14171 (xmlGenericError
== NULL
))
14172 initGenericErrorDefaultFunc(NULL
);
14174 xmlInitCharEncodingHandlers();
14175 xmlDefaultSAXHandlerInit();
14176 xmlRegisterDefaultInputCallbacks();
14177 #ifdef LIBXML_OUTPUT_ENABLED
14178 xmlRegisterDefaultOutputCallbacks();
14179 #endif /* LIBXML_OUTPUT_ENABLED */
14180 #ifdef LIBXML_HTML_ENABLED
14181 htmlInitAutoClose();
14182 htmlDefaultSAXHandlerInit();
14184 #ifdef LIBXML_XPATH_ENABLED
14187 xmlParserInitialized
= 1;
14188 #ifdef LIBXML_THREAD_ENABLED
14190 __xmlGlobalInitMutexUnlock();
14195 * xmlCleanupParser:
14197 * This function name is somewhat misleading. It does not clean up
14198 * parser state, it cleans up memory allocated by the library itself.
14199 * It is a cleanup function for the XML library. It tries to reclaim all
14200 * related global memory allocated for the library processing.
14201 * It doesn't deallocate any document related memory. One should
14202 * call xmlCleanupParser() only when the process has finished using
14203 * the library and all XML/HTML documents built with it.
14204 * See also xmlInitParser() which has the opposite function of preparing
14205 * the library for operations.
14207 * WARNING: if your application is multithreaded or has plugin support
14208 * calling this may crash the application if another thread or
14209 * a plugin is still using libxml2. It's sometimes very hard to
14210 * guess if libxml2 is in use in the application, some libraries
14211 * or plugins may use it without notice. In case of doubt abstain
14212 * from calling this function or do it just before calling exit()
14213 * to avoid leak reports from valgrind !
14217 xmlCleanupParser(void) {
14218 if (!xmlParserInitialized
)
14221 xmlCleanupCharEncodingHandlers();
14222 #ifdef LIBXML_CATALOG_ENABLED
14223 xmlCatalogCleanup();
14226 xmlCleanupInputCallbacks();
14227 #ifdef LIBXML_OUTPUT_ENABLED
14228 xmlCleanupOutputCallbacks();
14230 #ifdef LIBXML_SCHEMAS_ENABLED
14231 xmlSchemaCleanupTypes();
14232 xmlRelaxNGCleanupTypes();
14234 xmlCleanupGlobals();
14235 xmlResetLastError();
14236 xmlCleanupThreads(); /* must be last if called not from the main thread */
14237 xmlCleanupMemory();
14238 xmlParserInitialized
= 0;
14241 /************************************************************************
14243 * New set (2.6.0) of simpler and more flexible APIs *
14245 ************************************************************************/
14251 * Free a string if it is not owned by the "dict" dictionnary in the
14254 #define DICT_FREE(str) \
14255 if ((str) && ((!dict) || \
14256 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14257 xmlFree((char *)(str));
14261 * @ctxt: an XML parser context
14263 * Reset a parser context
14266 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14268 xmlParserInputPtr input
;
14276 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14277 xmlFreeInputStream(input
);
14280 ctxt
->input
= NULL
;
14283 if (ctxt
->spaceTab
!= NULL
) {
14284 ctxt
->spaceTab
[0] = -1;
14285 ctxt
->space
= &ctxt
->spaceTab
[0];
14287 ctxt
->space
= NULL
;
14297 DICT_FREE(ctxt
->version
);
14298 ctxt
->version
= NULL
;
14299 DICT_FREE(ctxt
->encoding
);
14300 ctxt
->encoding
= NULL
;
14301 DICT_FREE(ctxt
->directory
);
14302 ctxt
->directory
= NULL
;
14303 DICT_FREE(ctxt
->extSubURI
);
14304 ctxt
->extSubURI
= NULL
;
14305 DICT_FREE(ctxt
->extSubSystem
);
14306 ctxt
->extSubSystem
= NULL
;
14307 if (ctxt
->myDoc
!= NULL
)
14308 xmlFreeDoc(ctxt
->myDoc
);
14309 ctxt
->myDoc
= NULL
;
14311 ctxt
->standalone
= -1;
14312 ctxt
->hasExternalSubset
= 0;
14313 ctxt
->hasPErefs
= 0;
14315 ctxt
->external
= 0;
14316 ctxt
->instate
= XML_PARSER_START
;
14319 ctxt
->wellFormed
= 1;
14320 ctxt
->nsWellFormed
= 1;
14321 ctxt
->disableSAX
= 0;
14324 ctxt
->vctxt
.userData
= ctxt
;
14325 ctxt
->vctxt
.error
= xmlParserValidityError
;
14326 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14328 ctxt
->record_info
= 0;
14330 ctxt
->checkIndex
= 0;
14331 ctxt
->inSubset
= 0;
14332 ctxt
->errNo
= XML_ERR_OK
;
14334 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14335 ctxt
->catalogs
= NULL
;
14336 ctxt
->nbentities
= 0;
14337 ctxt
->sizeentities
= 0;
14338 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14340 if (ctxt
->attsDefault
!= NULL
) {
14341 xmlHashFree(ctxt
->attsDefault
, (xmlHashDeallocator
) xmlFree
);
14342 ctxt
->attsDefault
= NULL
;
14344 if (ctxt
->attsSpecial
!= NULL
) {
14345 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14346 ctxt
->attsSpecial
= NULL
;
14349 #ifdef LIBXML_CATALOG_ENABLED
14350 if (ctxt
->catalogs
!= NULL
)
14351 xmlCatalogFreeLocal(ctxt
->catalogs
);
14353 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14354 xmlResetError(&ctxt
->lastError
);
14358 * xmlCtxtResetPush:
14359 * @ctxt: an XML parser context
14360 * @chunk: a pointer to an array of chars
14361 * @size: number of chars in the array
14362 * @filename: an optional file name or URI
14363 * @encoding: the document encoding, or NULL
14365 * Reset a push parser context
14367 * Returns 0 in case of success and 1 in case of error
14370 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14371 int size
, const char *filename
, const char *encoding
)
14373 xmlParserInputPtr inputStream
;
14374 xmlParserInputBufferPtr buf
;
14375 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14380 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14381 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14383 buf
= xmlAllocParserInputBuffer(enc
);
14387 if (ctxt
== NULL
) {
14388 xmlFreeParserInputBuffer(buf
);
14392 xmlCtxtReset(ctxt
);
14394 if (ctxt
->pushTab
== NULL
) {
14395 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 *
14396 sizeof(xmlChar
*));
14397 if (ctxt
->pushTab
== NULL
) {
14398 xmlErrMemory(ctxt
, NULL
);
14399 xmlFreeParserInputBuffer(buf
);
14404 if (filename
== NULL
) {
14405 ctxt
->directory
= NULL
;
14407 ctxt
->directory
= xmlParserGetDirectory(filename
);
14410 inputStream
= xmlNewInputStream(ctxt
);
14411 if (inputStream
== NULL
) {
14412 xmlFreeParserInputBuffer(buf
);
14416 if (filename
== NULL
)
14417 inputStream
->filename
= NULL
;
14419 inputStream
->filename
= (char *)
14420 xmlCanonicPath((const xmlChar
*) filename
);
14421 inputStream
->buf
= buf
;
14422 inputStream
->base
= inputStream
->buf
->buffer
->content
;
14423 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
14425 &inputStream
->buf
->buffer
->content
[inputStream
->buf
->buffer
->use
];
14427 inputPush(ctxt
, inputStream
);
14429 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14430 (ctxt
->input
->buf
!= NULL
)) {
14431 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
14432 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14434 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14436 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
14437 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
14439 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->
14442 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14446 if (encoding
!= NULL
) {
14447 xmlCharEncodingHandlerPtr hdlr
;
14449 if (ctxt
->encoding
!= NULL
)
14450 xmlFree((xmlChar
*) ctxt
->encoding
);
14451 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14453 hdlr
= xmlFindCharEncodingHandler(encoding
);
14454 if (hdlr
!= NULL
) {
14455 xmlSwitchToEncoding(ctxt
, hdlr
);
14457 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14458 "Unsupported encoding %s\n", BAD_CAST encoding
);
14460 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14461 xmlSwitchEncoding(ctxt
, enc
);
14469 * xmlCtxtUseOptionsInternal:
14470 * @ctxt: an XML parser context
14471 * @options: a combination of xmlParserOption
14472 * @encoding: the user provided encoding to use
14474 * Applies the options to the parser context
14476 * Returns 0 in case of success, the set of unknown or unimplemented options
14477 * in case of error.
14480 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
14484 if (encoding
!= NULL
) {
14485 if (ctxt
->encoding
!= NULL
)
14486 xmlFree((xmlChar
*) ctxt
->encoding
);
14487 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14489 if (options
& XML_PARSE_RECOVER
) {
14490 ctxt
->recovery
= 1;
14491 options
-= XML_PARSE_RECOVER
;
14492 ctxt
->options
|= XML_PARSE_RECOVER
;
14494 ctxt
->recovery
= 0;
14495 if (options
& XML_PARSE_DTDLOAD
) {
14496 ctxt
->loadsubset
= XML_DETECT_IDS
;
14497 options
-= XML_PARSE_DTDLOAD
;
14498 ctxt
->options
|= XML_PARSE_DTDLOAD
;
14500 ctxt
->loadsubset
= 0;
14501 if (options
& XML_PARSE_DTDATTR
) {
14502 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
14503 options
-= XML_PARSE_DTDATTR
;
14504 ctxt
->options
|= XML_PARSE_DTDATTR
;
14506 if (options
& XML_PARSE_NOENT
) {
14507 ctxt
->replaceEntities
= 1;
14508 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14509 options
-= XML_PARSE_NOENT
;
14510 ctxt
->options
|= XML_PARSE_NOENT
;
14512 ctxt
->replaceEntities
= 0;
14513 if (options
& XML_PARSE_PEDANTIC
) {
14514 ctxt
->pedantic
= 1;
14515 options
-= XML_PARSE_PEDANTIC
;
14516 ctxt
->options
|= XML_PARSE_PEDANTIC
;
14518 ctxt
->pedantic
= 0;
14519 if (options
& XML_PARSE_NOBLANKS
) {
14520 ctxt
->keepBlanks
= 0;
14521 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
14522 options
-= XML_PARSE_NOBLANKS
;
14523 ctxt
->options
|= XML_PARSE_NOBLANKS
;
14525 ctxt
->keepBlanks
= 1;
14526 if (options
& XML_PARSE_DTDVALID
) {
14527 ctxt
->validate
= 1;
14528 if (options
& XML_PARSE_NOWARNING
)
14529 ctxt
->vctxt
.warning
= NULL
;
14530 if (options
& XML_PARSE_NOERROR
)
14531 ctxt
->vctxt
.error
= NULL
;
14532 options
-= XML_PARSE_DTDVALID
;
14533 ctxt
->options
|= XML_PARSE_DTDVALID
;
14535 ctxt
->validate
= 0;
14536 if (options
& XML_PARSE_NOWARNING
) {
14537 ctxt
->sax
->warning
= NULL
;
14538 options
-= XML_PARSE_NOWARNING
;
14540 if (options
& XML_PARSE_NOERROR
) {
14541 ctxt
->sax
->error
= NULL
;
14542 ctxt
->sax
->fatalError
= NULL
;
14543 options
-= XML_PARSE_NOERROR
;
14545 #ifdef LIBXML_SAX1_ENABLED
14546 if (options
& XML_PARSE_SAX1
) {
14547 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
14548 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
14549 ctxt
->sax
->startElementNs
= NULL
;
14550 ctxt
->sax
->endElementNs
= NULL
;
14551 ctxt
->sax
->initialized
= 1;
14552 options
-= XML_PARSE_SAX1
;
14553 ctxt
->options
|= XML_PARSE_SAX1
;
14555 #endif /* LIBXML_SAX1_ENABLED */
14556 if (options
& XML_PARSE_NODICT
) {
14557 ctxt
->dictNames
= 0;
14558 options
-= XML_PARSE_NODICT
;
14559 ctxt
->options
|= XML_PARSE_NODICT
;
14561 ctxt
->dictNames
= 1;
14563 if (options
& XML_PARSE_NOCDATA
) {
14564 ctxt
->sax
->cdataBlock
= NULL
;
14565 options
-= XML_PARSE_NOCDATA
;
14566 ctxt
->options
|= XML_PARSE_NOCDATA
;
14568 if (options
& XML_PARSE_NSCLEAN
) {
14569 ctxt
->options
|= XML_PARSE_NSCLEAN
;
14570 options
-= XML_PARSE_NSCLEAN
;
14572 if (options
& XML_PARSE_NONET
) {
14573 ctxt
->options
|= XML_PARSE_NONET
;
14574 options
-= XML_PARSE_NONET
;
14576 if (options
& XML_PARSE_COMPACT
) {
14577 ctxt
->options
|= XML_PARSE_COMPACT
;
14578 options
-= XML_PARSE_COMPACT
;
14580 if (options
& XML_PARSE_OLD10
) {
14581 ctxt
->options
|= XML_PARSE_OLD10
;
14582 options
-= XML_PARSE_OLD10
;
14584 if (options
& XML_PARSE_NOBASEFIX
) {
14585 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
14586 options
-= XML_PARSE_NOBASEFIX
;
14588 if (options
& XML_PARSE_HUGE
) {
14589 ctxt
->options
|= XML_PARSE_HUGE
;
14590 options
-= XML_PARSE_HUGE
;
14592 if (options
& XML_PARSE_OLDSAX
) {
14593 ctxt
->options
|= XML_PARSE_OLDSAX
;
14594 options
-= XML_PARSE_OLDSAX
;
14596 ctxt
->linenumbers
= 1;
14601 * xmlCtxtUseOptions:
14602 * @ctxt: an XML parser context
14603 * @options: a combination of xmlParserOption
14605 * Applies the options to the parser context
14607 * Returns 0 in case of success, the set of unknown or unimplemented options
14608 * in case of error.
14611 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
14613 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
14618 * @ctxt: an XML parser context
14619 * @URL: the base URL to use for the document
14620 * @encoding: the document encoding, or NULL
14621 * @options: a combination of xmlParserOption
14622 * @reuse: keep the context for reuse
14624 * Common front-end for the xmlRead functions
14626 * Returns the resulting document tree or NULL
14629 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
14630 int options
, int reuse
)
14634 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
14635 if (encoding
!= NULL
) {
14636 xmlCharEncodingHandlerPtr hdlr
;
14638 hdlr
= xmlFindCharEncodingHandler(encoding
);
14640 xmlSwitchToEncoding(ctxt
, hdlr
);
14642 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
14643 (ctxt
->input
->filename
== NULL
))
14644 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
14645 xmlParseDocument(ctxt
);
14646 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
14650 if (ctxt
->myDoc
!= NULL
) {
14651 xmlFreeDoc(ctxt
->myDoc
);
14654 ctxt
->myDoc
= NULL
;
14656 xmlFreeParserCtxt(ctxt
);
14664 * @cur: a pointer to a zero terminated string
14665 * @URL: the base URL to use for the document
14666 * @encoding: the document encoding, or NULL
14667 * @options: a combination of xmlParserOption
14669 * parse an XML in-memory document and build a tree.
14671 * Returns the resulting document tree
14674 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
14676 xmlParserCtxtPtr ctxt
;
14681 ctxt
= xmlCreateDocParserCtxt(cur
);
14684 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14689 * @filename: a file or URL
14690 * @encoding: the document encoding, or NULL
14691 * @options: a combination of xmlParserOption
14693 * parse an XML file from the filesystem or the network.
14695 * Returns the resulting document tree
14698 xmlReadFile(const char *filename
, const char *encoding
, int options
)
14700 xmlParserCtxtPtr ctxt
;
14702 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
14705 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
14710 * @buffer: a pointer to a char array
14711 * @size: the size of the array
14712 * @URL: the base URL to use for the document
14713 * @encoding: the document encoding, or NULL
14714 * @options: a combination of xmlParserOption
14716 * parse an XML in-memory document and build a tree.
14718 * Returns the resulting document tree
14721 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
14723 xmlParserCtxtPtr ctxt
;
14725 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14728 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14733 * @fd: an open file descriptor
14734 * @URL: the base URL to use for the document
14735 * @encoding: the document encoding, or NULL
14736 * @options: a combination of xmlParserOption
14738 * parse an XML from a file descriptor and build a tree.
14739 * NOTE that the file descriptor will not be closed when the
14740 * reader is closed or reset.
14742 * Returns the resulting document tree
14745 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
14747 xmlParserCtxtPtr ctxt
;
14748 xmlParserInputBufferPtr input
;
14749 xmlParserInputPtr stream
;
14754 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14757 input
->closecallback
= NULL
;
14758 ctxt
= xmlNewParserCtxt();
14759 if (ctxt
== NULL
) {
14760 xmlFreeParserInputBuffer(input
);
14763 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14764 if (stream
== NULL
) {
14765 xmlFreeParserInputBuffer(input
);
14766 xmlFreeParserCtxt(ctxt
);
14769 inputPush(ctxt
, stream
);
14770 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14775 * @ioread: an I/O read function
14776 * @ioclose: an I/O close function
14777 * @ioctx: an I/O handler
14778 * @URL: the base URL to use for the document
14779 * @encoding: the document encoding, or NULL
14780 * @options: a combination of xmlParserOption
14782 * parse an XML document from I/O functions and source and build a tree.
14784 * Returns the resulting document tree
14787 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
14788 void *ioctx
, const char *URL
, const char *encoding
, int options
)
14790 xmlParserCtxtPtr ctxt
;
14791 xmlParserInputBufferPtr input
;
14792 xmlParserInputPtr stream
;
14794 if (ioread
== NULL
)
14797 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
14798 XML_CHAR_ENCODING_NONE
);
14801 ctxt
= xmlNewParserCtxt();
14802 if (ctxt
== NULL
) {
14803 xmlFreeParserInputBuffer(input
);
14806 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14807 if (stream
== NULL
) {
14808 xmlFreeParserInputBuffer(input
);
14809 xmlFreeParserCtxt(ctxt
);
14812 inputPush(ctxt
, stream
);
14813 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14818 * @ctxt: an XML parser context
14819 * @cur: a pointer to a zero terminated string
14820 * @URL: the base URL to use for the document
14821 * @encoding: the document encoding, or NULL
14822 * @options: a combination of xmlParserOption
14824 * parse an XML in-memory document and build a tree.
14825 * This reuses the existing @ctxt parser context
14827 * Returns the resulting document tree
14830 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
14831 const char *URL
, const char *encoding
, int options
)
14833 xmlParserInputPtr stream
;
14840 xmlCtxtReset(ctxt
);
14842 stream
= xmlNewStringInputStream(ctxt
, cur
);
14843 if (stream
== NULL
) {
14846 inputPush(ctxt
, stream
);
14847 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14852 * @ctxt: an XML parser context
14853 * @filename: a file or URL
14854 * @encoding: the document encoding, or NULL
14855 * @options: a combination of xmlParserOption
14857 * parse an XML file from the filesystem or the network.
14858 * This reuses the existing @ctxt parser context
14860 * Returns the resulting document tree
14863 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
14864 const char *encoding
, int options
)
14866 xmlParserInputPtr stream
;
14868 if (filename
== NULL
)
14873 xmlCtxtReset(ctxt
);
14875 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
14876 if (stream
== NULL
) {
14879 inputPush(ctxt
, stream
);
14880 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
14884 * xmlCtxtReadMemory:
14885 * @ctxt: an XML parser context
14886 * @buffer: a pointer to a char array
14887 * @size: the size of the array
14888 * @URL: the base URL to use for the document
14889 * @encoding: the document encoding, or NULL
14890 * @options: a combination of xmlParserOption
14892 * parse an XML in-memory document and build a tree.
14893 * This reuses the existing @ctxt parser context
14895 * Returns the resulting document tree
14898 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
14899 const char *URL
, const char *encoding
, int options
)
14901 xmlParserInputBufferPtr input
;
14902 xmlParserInputPtr stream
;
14906 if (buffer
== NULL
)
14909 xmlCtxtReset(ctxt
);
14911 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
14912 if (input
== NULL
) {
14916 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14917 if (stream
== NULL
) {
14918 xmlFreeParserInputBuffer(input
);
14922 inputPush(ctxt
, stream
);
14923 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14928 * @ctxt: an XML parser context
14929 * @fd: an open file descriptor
14930 * @URL: the base URL to use for the document
14931 * @encoding: the document encoding, or NULL
14932 * @options: a combination of xmlParserOption
14934 * parse an XML from a file descriptor and build a tree.
14935 * This reuses the existing @ctxt parser context
14936 * NOTE that the file descriptor will not be closed when the
14937 * reader is closed or reset.
14939 * Returns the resulting document tree
14942 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
14943 const char *URL
, const char *encoding
, int options
)
14945 xmlParserInputBufferPtr input
;
14946 xmlParserInputPtr stream
;
14953 xmlCtxtReset(ctxt
);
14956 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14959 input
->closecallback
= NULL
;
14960 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14961 if (stream
== NULL
) {
14962 xmlFreeParserInputBuffer(input
);
14965 inputPush(ctxt
, stream
);
14966 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14971 * @ctxt: an XML parser context
14972 * @ioread: an I/O read function
14973 * @ioclose: an I/O close function
14974 * @ioctx: an I/O handler
14975 * @URL: the base URL to use for the document
14976 * @encoding: the document encoding, or NULL
14977 * @options: a combination of xmlParserOption
14979 * parse an XML document from I/O functions and source and build a tree.
14980 * This reuses the existing @ctxt parser context
14982 * Returns the resulting document tree
14985 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
14986 xmlInputCloseCallback ioclose
, void *ioctx
,
14988 const char *encoding
, int options
)
14990 xmlParserInputBufferPtr input
;
14991 xmlParserInputPtr stream
;
14993 if (ioread
== NULL
)
14998 xmlCtxtReset(ctxt
);
15000 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15001 XML_CHAR_ENCODING_NONE
);
15004 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15005 if (stream
== NULL
) {
15006 xmlFreeParserInputBuffer(input
);
15009 inputPush(ctxt
, stream
);
15010 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15013 #define bottom_parser
15014 #include "elfgcchack.h"