2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
71 #ifdef HAVE_SYS_STAT_H
91 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
);
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
95 const xmlChar
*base
, xmlParserCtxtPtr pctx
);
97 /************************************************************************
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101 ************************************************************************/
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
112 #define XML_PARSER_NON_LINEAR 10
115 * xmlParserEntityCheck
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, size_t size
,
125 xmlEntityPtr ent
, size_t replacement
)
129 if ((ctxt
== NULL
) || (ctxt
->options
& XML_PARSE_HUGE
))
131 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
135 * This may look absurd but is needed to detect
138 if ((ent
!= NULL
) && (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
139 (ent
->content
!= NULL
) && (ent
->checked
== 0)) {
140 unsigned long oldnbent
= ctxt
->nbentities
;
145 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
146 XML_SUBSTITUTE_REF
, 0, 0, 0);
148 ent
->checked
= (ctxt
->nbentities
- oldnbent
+ 1) * 2;
150 if (xmlStrchr(rep
, '<'))
156 if (replacement
!= 0) {
157 if (replacement
< XML_MAX_TEXT_LENGTH
)
161 * If the volume of entity copy reaches 10 times the
162 * amount of parsed data and over the large text threshold
163 * then that's very likely to be an abuse.
165 if (ctxt
->input
!= NULL
) {
166 consumed
= ctxt
->input
->consumed
+
167 (ctxt
->input
->cur
- ctxt
->input
->base
);
169 consumed
+= ctxt
->sizeentities
;
171 if (replacement
< XML_PARSER_NON_LINEAR
* consumed
)
173 } else if (size
!= 0) {
175 * Do the check based on the replacement size of the entity
177 if (size
< XML_PARSER_BIG_ENTITY
)
181 * A limit on the amount of text data reasonably used
183 if (ctxt
->input
!= NULL
) {
184 consumed
= ctxt
->input
->consumed
+
185 (ctxt
->input
->cur
- ctxt
->input
->base
);
187 consumed
+= ctxt
->sizeentities
;
189 if ((size
< XML_PARSER_NON_LINEAR
* consumed
) &&
190 (ctxt
->nbentities
* 3 < XML_PARSER_NON_LINEAR
* consumed
))
192 } else if (ent
!= NULL
) {
194 * use the number of parsed entities in the replacement
196 size
= ent
->checked
/ 2;
199 * The amount of data parsed counting entities size only once
201 if (ctxt
->input
!= NULL
) {
202 consumed
= ctxt
->input
->consumed
+
203 (ctxt
->input
->cur
- ctxt
->input
->base
);
205 consumed
+= ctxt
->sizeentities
;
208 * Check the density of entities for the amount of data
209 * knowing an entity reference will take at least 3 bytes
211 if (size
* 3 < consumed
* XML_PARSER_NON_LINEAR
)
215 * strange we got no data for checking
217 if (((ctxt
->lastError
.code
!= XML_ERR_UNDECLARED_ENTITY
) &&
218 (ctxt
->lastError
.code
!= XML_WAR_UNDECLARED_ENTITY
)) ||
219 (ctxt
->nbentities
<= 10000))
222 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
229 * arbitrary depth limit for the XML documents that we allow to
230 * process. This is not a limitation of the parser but a safety
231 * boundary feature. It can be disabled with the XML_PARSE_HUGE
234 unsigned int xmlParserMaxDepth
= 256;
239 #define XML_PARSER_BIG_BUFFER_SIZE 300
240 #define XML_PARSER_BUFFER_SIZE 100
241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244 * XML_PARSER_CHUNK_SIZE
246 * When calling GROW that's the minimal amount of data
247 * the parser expected to have received. It is not a hard
248 * limit but an optimization when reading strings like Names
249 * It is not strictly needed as long as inputs available characters
250 * are followed by 0, which should be provided by the I/O level
252 #define XML_PARSER_CHUNK_SIZE 100
255 * List of XML prefixed PI allowed by W3C specs
258 static const char *xmlW3CPIs
[] = {
265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
267 const xmlChar
**str
);
269 static xmlParserErrors
270 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
271 xmlSAXHandlerPtr sax
,
272 void *user_data
, int depth
, const xmlChar
*URL
,
273 const xmlChar
*ID
, xmlNodePtr
*list
);
276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
277 const char *encoding
);
278 #ifdef LIBXML_LEGACY_ENABLED
280 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
281 xmlNodePtr lastNode
);
282 #endif /* LIBXML_LEGACY_ENABLED */
284 static xmlParserErrors
285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
286 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
291 /************************************************************************
293 * Some factorized error routines *
295 ************************************************************************/
298 * xmlErrAttributeDup:
299 * @ctxt: an XML parser context
300 * @prefix: the attribute prefix
301 * @localname: the attribute localname
303 * Handle a redefinition of attribute error
306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
307 const xmlChar
* localname
)
309 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
310 (ctxt
->instate
== XML_PARSER_EOF
))
313 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
316 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
317 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
318 (const char *) localname
, NULL
, NULL
, 0, 0,
319 "Attribute %s redefined\n", localname
);
321 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
322 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
323 (const char *) prefix
, (const char *) localname
,
324 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
327 ctxt
->wellFormed
= 0;
328 if (ctxt
->recovery
== 0)
329 ctxt
->disableSAX
= 1;
335 * @ctxt: an XML parser context
336 * @error: the error number
337 * @extra: extra information string
339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
345 char errstr
[129] = "";
347 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
348 (ctxt
->instate
== XML_PARSER_EOF
))
351 case XML_ERR_INVALID_HEX_CHARREF
:
352 errmsg
= "CharRef: invalid hexadecimal value";
354 case XML_ERR_INVALID_DEC_CHARREF
:
355 errmsg
= "CharRef: invalid decimal value";
357 case XML_ERR_INVALID_CHARREF
:
358 errmsg
= "CharRef: invalid value";
360 case XML_ERR_INTERNAL_ERROR
:
361 errmsg
= "internal error";
363 case XML_ERR_PEREF_AT_EOF
:
364 errmsg
= "PEReference at end of document";
366 case XML_ERR_PEREF_IN_PROLOG
:
367 errmsg
= "PEReference in prolog";
369 case XML_ERR_PEREF_IN_EPILOG
:
370 errmsg
= "PEReference in epilog";
372 case XML_ERR_PEREF_NO_NAME
:
373 errmsg
= "PEReference: no name";
375 case XML_ERR_PEREF_SEMICOL_MISSING
:
376 errmsg
= "PEReference: expecting ';'";
378 case XML_ERR_ENTITY_LOOP
:
379 errmsg
= "Detected an entity reference loop";
381 case XML_ERR_ENTITY_NOT_STARTED
:
382 errmsg
= "EntityValue: \" or ' expected";
384 case XML_ERR_ENTITY_PE_INTERNAL
:
385 errmsg
= "PEReferences forbidden in internal subset";
387 case XML_ERR_ENTITY_NOT_FINISHED
:
388 errmsg
= "EntityValue: \" or ' expected";
390 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
391 errmsg
= "AttValue: \" or ' expected";
393 case XML_ERR_LT_IN_ATTRIBUTE
:
394 errmsg
= "Unescaped '<' not allowed in attributes values";
396 case XML_ERR_LITERAL_NOT_STARTED
:
397 errmsg
= "SystemLiteral \" or ' expected";
399 case XML_ERR_LITERAL_NOT_FINISHED
:
400 errmsg
= "Unfinished System or Public ID \" or ' expected";
402 case XML_ERR_MISPLACED_CDATA_END
:
403 errmsg
= "Sequence ']]>' not allowed in content";
405 case XML_ERR_URI_REQUIRED
:
406 errmsg
= "SYSTEM or PUBLIC, the URI is missing";
408 case XML_ERR_PUBID_REQUIRED
:
409 errmsg
= "PUBLIC, the Public Identifier is missing";
411 case XML_ERR_HYPHEN_IN_COMMENT
:
412 errmsg
= "Comment must not contain '--' (double-hyphen)";
414 case XML_ERR_PI_NOT_STARTED
:
415 errmsg
= "xmlParsePI : no target name";
417 case XML_ERR_RESERVED_XML_NAME
:
418 errmsg
= "Invalid PI name";
420 case XML_ERR_NOTATION_NOT_STARTED
:
421 errmsg
= "NOTATION: Name expected here";
423 case XML_ERR_NOTATION_NOT_FINISHED
:
424 errmsg
= "'>' required to close NOTATION declaration";
426 case XML_ERR_VALUE_REQUIRED
:
427 errmsg
= "Entity value required";
429 case XML_ERR_URI_FRAGMENT
:
430 errmsg
= "Fragment not allowed";
432 case XML_ERR_ATTLIST_NOT_STARTED
:
433 errmsg
= "'(' required to start ATTLIST enumeration";
435 case XML_ERR_NMTOKEN_REQUIRED
:
436 errmsg
= "NmToken expected in ATTLIST enumeration";
438 case XML_ERR_ATTLIST_NOT_FINISHED
:
439 errmsg
= "')' required to finish ATTLIST enumeration";
441 case XML_ERR_MIXED_NOT_STARTED
:
442 errmsg
= "MixedContentDecl : '|' or ')*' expected";
444 case XML_ERR_PCDATA_REQUIRED
:
445 errmsg
= "MixedContentDecl : '#PCDATA' expected";
447 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
448 errmsg
= "ContentDecl : Name or '(' expected";
450 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
451 errmsg
= "ContentDecl : ',' '|' or ')' expected";
453 case XML_ERR_PEREF_IN_INT_SUBSET
:
455 "PEReference: forbidden within markup decl in internal subset";
457 case XML_ERR_GT_REQUIRED
:
458 errmsg
= "expected '>'";
460 case XML_ERR_CONDSEC_INVALID
:
461 errmsg
= "XML conditional section '[' expected";
463 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
464 errmsg
= "Content error in the external subset";
466 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
468 "conditional section INCLUDE or IGNORE keyword expected";
470 case XML_ERR_CONDSEC_NOT_FINISHED
:
471 errmsg
= "XML conditional section not closed";
473 case XML_ERR_XMLDECL_NOT_STARTED
:
474 errmsg
= "Text declaration '<?xml' required";
476 case XML_ERR_XMLDECL_NOT_FINISHED
:
477 errmsg
= "parsing XML declaration: '?>' expected";
479 case XML_ERR_EXT_ENTITY_STANDALONE
:
480 errmsg
= "external parsed entities cannot be standalone";
482 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
483 errmsg
= "EntityRef: expecting ';'";
485 case XML_ERR_DOCTYPE_NOT_FINISHED
:
486 errmsg
= "DOCTYPE improperly terminated";
488 case XML_ERR_LTSLASH_REQUIRED
:
489 errmsg
= "EndTag: '</' not found";
491 case XML_ERR_EQUAL_REQUIRED
:
492 errmsg
= "expected '='";
494 case XML_ERR_STRING_NOT_CLOSED
:
495 errmsg
= "String not closed expecting \" or '";
497 case XML_ERR_STRING_NOT_STARTED
:
498 errmsg
= "String not started expecting ' or \"";
500 case XML_ERR_ENCODING_NAME
:
501 errmsg
= "Invalid XML encoding name";
503 case XML_ERR_STANDALONE_VALUE
:
504 errmsg
= "standalone accepts only 'yes' or 'no'";
506 case XML_ERR_DOCUMENT_EMPTY
:
507 errmsg
= "Document is empty";
509 case XML_ERR_DOCUMENT_END
:
510 errmsg
= "Extra content at the end of the document";
512 case XML_ERR_NOT_WELL_BALANCED
:
513 errmsg
= "chunk is not well balanced";
515 case XML_ERR_EXTRA_CONTENT
:
516 errmsg
= "extra content at the end of well balanced chunk";
518 case XML_ERR_VERSION_MISSING
:
519 errmsg
= "Malformed declaration expecting version";
521 case XML_ERR_NAME_TOO_LONG
:
522 errmsg
= "Name too long use XML_PARSE_HUGE option";
530 errmsg
= "Unregistered error message";
533 snprintf(errstr
, 128, "%s\n", errmsg
);
535 snprintf(errstr
, 128, "%s: %%s\n", errmsg
);
538 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
539 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, &errstr
[0],
542 ctxt
->wellFormed
= 0;
543 if (ctxt
->recovery
== 0)
544 ctxt
->disableSAX
= 1;
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
560 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
561 (ctxt
->instate
== XML_PARSER_EOF
))
565 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
566 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
568 ctxt
->wellFormed
= 0;
569 if (ctxt
->recovery
== 0)
570 ctxt
->disableSAX
= 1;
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
585 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
586 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
588 xmlStructuredErrorFunc schannel
= NULL
;
590 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
591 (ctxt
->instate
== XML_PARSER_EOF
))
593 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
594 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
595 schannel
= ctxt
->sax
->serror
;
597 __xmlRaiseError(schannel
,
598 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
600 ctxt
, NULL
, XML_FROM_PARSER
, error
,
601 XML_ERR_WARNING
, NULL
, 0,
602 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
603 msg
, (const char *) str1
, (const char *) str2
);
605 __xmlRaiseError(schannel
, NULL
, NULL
,
606 ctxt
, NULL
, XML_FROM_PARSER
, error
,
607 XML_ERR_WARNING
, NULL
, 0,
608 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
609 msg
, (const char *) str1
, (const char *) str2
);
615 * @ctxt: an XML parser context
616 * @error: the error number
617 * @msg: the error message
620 * Handle a validity error.
623 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
624 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
626 xmlStructuredErrorFunc schannel
= NULL
;
628 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
629 (ctxt
->instate
== XML_PARSER_EOF
))
633 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
634 schannel
= ctxt
->sax
->serror
;
637 __xmlRaiseError(schannel
,
638 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
639 ctxt
, NULL
, XML_FROM_DTD
, error
,
640 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
641 (const char *) str2
, NULL
, 0, 0,
642 msg
, (const char *) str1
, (const char *) str2
);
645 __xmlRaiseError(schannel
, NULL
, NULL
,
646 ctxt
, NULL
, XML_FROM_DTD
, error
,
647 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
648 (const char *) str2
, NULL
, 0, 0,
649 msg
, (const char *) str1
, (const char *) str2
);
655 * @ctxt: an XML parser context
656 * @error: the error number
657 * @msg: the error message
658 * @val: an integer value
660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
664 const char *msg
, int val
)
666 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
667 (ctxt
->instate
== XML_PARSER_EOF
))
671 __xmlRaiseError(NULL
, NULL
, NULL
,
672 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
673 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
675 ctxt
->wellFormed
= 0;
676 if (ctxt
->recovery
== 0)
677 ctxt
->disableSAX
= 1;
682 * xmlFatalErrMsgStrIntStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @str1: an string info
687 * @val: an integer value
688 * @str2: an string info
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
694 const char *msg
, const xmlChar
*str1
, int val
,
697 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
698 (ctxt
->instate
== XML_PARSER_EOF
))
702 __xmlRaiseError(NULL
, NULL
, NULL
,
703 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
704 NULL
, 0, (const char *) str1
, (const char *) str2
,
705 NULL
, val
, 0, msg
, str1
, val
, str2
);
707 ctxt
->wellFormed
= 0;
708 if (ctxt
->recovery
== 0)
709 ctxt
->disableSAX
= 1;
715 * @ctxt: an XML parser context
716 * @error: the error number
717 * @msg: the error message
718 * @val: a string value
720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
724 const char *msg
, const xmlChar
* val
)
726 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
727 (ctxt
->instate
== XML_PARSER_EOF
))
731 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
732 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
733 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
736 ctxt
->wellFormed
= 0;
737 if (ctxt
->recovery
== 0)
738 ctxt
->disableSAX
= 1;
744 * @ctxt: an XML parser context
745 * @error: the error number
746 * @msg: the error message
747 * @val: a string value
749 * Handle a non fatal parser error
752 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
753 const char *msg
, const xmlChar
* val
)
755 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
756 (ctxt
->instate
== XML_PARSER_EOF
))
760 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
761 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
762 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
768 * @ctxt: an XML parser context
769 * @error: the error number
771 * @info1: extra information string
772 * @info2: extra information string
774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
779 const xmlChar
* info1
, const xmlChar
* info2
,
780 const xmlChar
* info3
)
782 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
783 (ctxt
->instate
== XML_PARSER_EOF
))
787 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
788 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
789 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
790 info1
, info2
, info3
);
792 ctxt
->nsWellFormed
= 0;
797 * @ctxt: an XML parser context
798 * @error: the error number
800 * @info1: extra information string
801 * @info2: extra information string
803 * Handle a namespace warning error
806 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
808 const xmlChar
* info1
, const xmlChar
* info2
,
809 const xmlChar
* info3
)
811 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
812 (ctxt
->instate
== XML_PARSER_EOF
))
814 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
815 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
816 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
817 info1
, info2
, info3
);
820 /************************************************************************
822 * Library wide options *
824 ************************************************************************/
828 * @feature: the feature to be examined
830 * Examines if the library has been compiled with a given feature.
832 * Returns a non-zero value if the feature exist, otherwise zero.
833 * Returns zero (0) if the feature does not exist or an unknown
834 * unknown feature is requested, non-zero otherwise.
837 xmlHasFeature(xmlFeature feature
)
840 case XML_WITH_THREAD
:
841 #ifdef LIBXML_THREAD_ENABLED
847 #ifdef LIBXML_TREE_ENABLED
852 case XML_WITH_OUTPUT
:
853 #ifdef LIBXML_OUTPUT_ENABLED
859 #ifdef LIBXML_PUSH_ENABLED
864 case XML_WITH_READER
:
865 #ifdef LIBXML_READER_ENABLED
870 case XML_WITH_PATTERN
:
871 #ifdef LIBXML_PATTERN_ENABLED
876 case XML_WITH_WRITER
:
877 #ifdef LIBXML_WRITER_ENABLED
883 #ifdef LIBXML_SAX1_ENABLED
889 #ifdef LIBXML_FTP_ENABLED
895 #ifdef LIBXML_HTTP_ENABLED
901 #ifdef LIBXML_VALID_ENABLED
907 #ifdef LIBXML_HTML_ENABLED
912 case XML_WITH_LEGACY
:
913 #ifdef LIBXML_LEGACY_ENABLED
919 #ifdef LIBXML_C14N_ENABLED
924 case XML_WITH_CATALOG
:
925 #ifdef LIBXML_CATALOG_ENABLED
931 #ifdef LIBXML_XPATH_ENABLED
937 #ifdef LIBXML_XPTR_ENABLED
942 case XML_WITH_XINCLUDE
:
943 #ifdef LIBXML_XINCLUDE_ENABLED
949 #ifdef LIBXML_ICONV_ENABLED
954 case XML_WITH_ISO8859X
:
955 #ifdef LIBXML_ISO8859X_ENABLED
960 case XML_WITH_UNICODE
:
961 #ifdef LIBXML_UNICODE_ENABLED
966 case XML_WITH_REGEXP
:
967 #ifdef LIBXML_REGEXP_ENABLED
972 case XML_WITH_AUTOMATA
:
973 #ifdef LIBXML_AUTOMATA_ENABLED
979 #ifdef LIBXML_EXPR_ENABLED
984 case XML_WITH_SCHEMAS
:
985 #ifdef LIBXML_SCHEMAS_ENABLED
990 case XML_WITH_SCHEMATRON
:
991 #ifdef LIBXML_SCHEMATRON_ENABLED
996 case XML_WITH_MODULES
:
997 #ifdef LIBXML_MODULES_ENABLED
1002 case XML_WITH_DEBUG
:
1003 #ifdef LIBXML_DEBUG_ENABLED
1008 case XML_WITH_DEBUG_MEM
:
1009 #ifdef DEBUG_MEMORY_LOCATION
1014 case XML_WITH_DEBUG_RUN
:
1015 #ifdef LIBXML_DEBUG_RUNTIME
1021 #ifdef LIBXML_ZLIB_ENABLED
1027 #ifdef LIBXML_LZMA_ENABLED
1033 #ifdef LIBXML_ICU_ENABLED
1044 /************************************************************************
1046 * SAX2 defaulted attributes handling *
1048 ************************************************************************/
1052 * @ctxt: an XML parser context
1054 * Do the SAX2 detection and specific intialization
1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
1058 if (ctxt
== NULL
) return;
1059 #ifdef LIBXML_SAX1_ENABLED
1060 if ((ctxt
->sax
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
) &&
1061 ((ctxt
->sax
->startElementNs
!= NULL
) ||
1062 (ctxt
->sax
->endElementNs
!= NULL
))) ctxt
->sax2
= 1;
1065 #endif /* LIBXML_SAX1_ENABLED */
1067 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
1068 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
1069 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
1070 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
1071 (ctxt
->str_xml_ns
== NULL
)) {
1072 xmlErrMemory(ctxt
, NULL
);
1076 typedef struct _xmlDefAttrs xmlDefAttrs
;
1077 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1078 struct _xmlDefAttrs
{
1079 int nbAttrs
; /* number of defaulted attributes on that element */
1080 int maxAttrs
; /* the size of the array */
1081 const xmlChar
*values
[5]; /* array of localname/prefix/values/external */
1085 * xmlAttrNormalizeSpace:
1086 * @src: the source string
1087 * @dst: the target string
1089 * Normalize the space in non CDATA attribute values:
1090 * If the attribute type is not CDATA, then the XML processor MUST further
1091 * process the normalized attribute value by discarding any leading and
1092 * trailing space (#x20) characters, and by replacing sequences of space
1093 * (#x20) characters by a single space (#x20) character.
1094 * Note that the size of dst need to be at least src, and if one doesn't need
1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096 * passing src as dst is just fine.
1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1102 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1104 if ((src
== NULL
) || (dst
== NULL
))
1107 while (*src
== 0x20) src
++;
1110 while (*src
== 0x20) src
++;
1124 * xmlAttrNormalizeSpace2:
1125 * @src: the source string
1127 * Normalize the space in non CDATA attribute values, a slightly more complex
1128 * front end to avoid allocation problems when running on attribute values
1129 * coming from the input.
1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134 static const xmlChar
*
1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1138 int remove_head
= 0;
1139 int need_realloc
= 0;
1142 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1149 while (*cur
== 0x20) {
1156 if ((*cur
== 0x20) || (*cur
== 0)) {
1166 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1168 xmlErrMemory(ctxt
, NULL
);
1171 xmlAttrNormalizeSpace(ret
, ret
);
1172 *len
= (int) strlen((const char *)ret
);
1174 } else if (remove_head
) {
1175 *len
-= remove_head
;
1176 memmove(src
, src
+ remove_head
, 1 + *len
);
1184 * @ctxt: an XML parser context
1185 * @fullname: the element fullname
1186 * @fullattr: the attribute fullname
1187 * @value: the attribute value
1189 * Add a defaulted attribute for an element
1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1193 const xmlChar
*fullname
,
1194 const xmlChar
*fullattr
,
1195 const xmlChar
*value
) {
1196 xmlDefAttrsPtr defaults
;
1198 const xmlChar
*name
;
1199 const xmlChar
*prefix
;
1202 * Allows to detect attribute redefinitions
1204 if (ctxt
->attsSpecial
!= NULL
) {
1205 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1209 if (ctxt
->attsDefault
== NULL
) {
1210 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1211 if (ctxt
->attsDefault
== NULL
)
1216 * split the element name into prefix:localname , the string found
1217 * are within the DTD and then not associated to namespace names.
1219 name
= xmlSplitQName3(fullname
, &len
);
1221 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1224 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1225 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1229 * make sure there is some storage
1231 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1232 if (defaults
== NULL
) {
1233 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1234 (4 * 5) * sizeof(const xmlChar
*));
1235 if (defaults
== NULL
)
1237 defaults
->nbAttrs
= 0;
1238 defaults
->maxAttrs
= 4;
1239 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1240 defaults
, NULL
) < 0) {
1244 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1245 xmlDefAttrsPtr temp
;
1247 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1248 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1252 defaults
->maxAttrs
*= 2;
1253 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1254 defaults
, NULL
) < 0) {
1261 * Split the element name into prefix:localname , the string found
1262 * are within the DTD and hen not associated to namespace names.
1264 name
= xmlSplitQName3(fullattr
, &len
);
1266 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1269 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1270 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1273 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1274 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1275 /* intern the string and precompute the end */
1276 len
= xmlStrlen(value
);
1277 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1278 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1279 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1281 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1283 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1284 defaults
->nbAttrs
++;
1289 xmlErrMemory(ctxt
, NULL
);
1294 * xmlAddSpecialAttr:
1295 * @ctxt: an XML parser context
1296 * @fullname: the element fullname
1297 * @fullattr: the attribute fullname
1298 * @type: the attribute type
1300 * Register this attribute type
1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1304 const xmlChar
*fullname
,
1305 const xmlChar
*fullattr
,
1308 if (ctxt
->attsSpecial
== NULL
) {
1309 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1310 if (ctxt
->attsSpecial
== NULL
)
1314 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1317 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1318 (void *) (long) type
);
1322 xmlErrMemory(ctxt
, NULL
);
1327 * xmlCleanSpecialAttrCallback:
1329 * Removes CDATA attributes from the special attribute table
1332 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1333 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1334 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1335 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1337 if (((long) payload
) == XML_ATTRIBUTE_CDATA
) {
1338 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1343 * xmlCleanSpecialAttr:
1344 * @ctxt: an XML parser context
1346 * Trim the list of attributes defined to remove all those of type
1347 * CDATA as they are not special. This call should be done when finishing
1348 * to parse the DTD and before starting to parse the document root.
1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1353 if (ctxt
->attsSpecial
== NULL
)
1356 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1358 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1359 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1360 ctxt
->attsSpecial
= NULL
;
1366 * xmlCheckLanguageID:
1367 * @lang: pointer to the string value
1369 * Checks that the value conforms to the LanguageID production:
1371 * NOTE: this is somewhat deprecated, those productions were removed from
1372 * the XML Second edition.
1374 * [33] LanguageID ::= Langcode ('-' Subcode)*
1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379 * [38] Subcode ::= ([a-z] | [A-Z])+
1381 * The current REC reference the sucessors of RFC 1766, currently 5646
1383 * http://www.rfc-editor.org/rfc/rfc5646.txt
1384 * langtag = language
1390 * language = 2*3ALPHA ; shortest ISO 639 code
1391 * ["-" extlang] ; sometimes followed by
1392 * ; extended language subtags
1393 * / 4ALPHA ; or reserved for future use
1394 * / 5*8ALPHA ; or registered language subtag
1396 * extlang = 3ALPHA ; selected ISO 639 codes
1397 * *2("-" 3ALPHA) ; permanently reserved
1399 * script = 4ALPHA ; ISO 15924 code
1401 * region = 2ALPHA ; ISO 3166-1 code
1402 * / 3DIGIT ; UN M.49 code
1404 * variant = 5*8alphanum ; registered variants
1405 * / (DIGIT 3alphanum)
1407 * extension = singleton 1*("-" (2*8alphanum))
1409 * ; Single alphanumerics
1410 * ; "x" reserved for private use
1411 * singleton = DIGIT ; 0 - 9
1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418 * The parser below doesn't try to cope with extension or privateuse
1419 * that could be added but that's not interoperable anyway
1421 * Returns 1 if correct 0 otherwise
1424 xmlCheckLanguageID(const xmlChar
* lang
)
1426 const xmlChar
*cur
= lang
, *nxt
;
1430 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1431 ((cur
[0] == 'I') && (cur
[1] == '-')) ||
1432 ((cur
[0] == 'x') && (cur
[1] == '-')) ||
1433 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1435 * Still allow IANA code and user code which were coming
1436 * from the previous version of the XML-1.0 specification
1437 * it's deprecated but we should not fail
1440 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1441 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1443 return(cur
[0] == 0);
1446 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1447 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1449 if (nxt
- cur
>= 4) {
1453 if ((nxt
- cur
> 8) || (nxt
[0] != 0))
1459 /* we got an ISO 639 code */
1467 /* now we can have extlang or script or region or variant */
1468 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1471 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1472 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1478 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1482 /* we parsed an extlang */
1490 /* now we can have script or region or variant */
1491 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1494 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1495 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1499 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1503 /* we parsed a script */
1512 /* now we can have region or variant */
1513 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1516 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1517 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1520 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1524 /* we parsed a region */
1533 /* now we can just have a variant */
1534 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1535 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1538 if ((nxt
- cur
< 5) || (nxt
- cur
> 8))
1541 /* we parsed a variant */
1547 /* extensions and private use subtags not checked */
1551 if (((nxt
[1] >= '0') && (nxt
[1] <= '9')) &&
1552 ((nxt
[2] >= '0') && (nxt
[2] <= '9'))) {
1559 /************************************************************************
1561 * Parser stacks related functions and macros *
1563 ************************************************************************/
1565 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1566 const xmlChar
** str
);
1571 * @ctxt: an XML parser context
1572 * @prefix: the namespace prefix or NULL
1573 * @URL: the namespace name
1575 * Pushes a new parser namespace on top of the ns stack
1577 * Returns -1 in case of error, -2 if the namespace should be discarded
1578 * and the index in the stack otherwise.
1581 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1583 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1585 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-= 2) {
1586 if (ctxt
->nsTab
[i
] == prefix
) {
1588 if (ctxt
->nsTab
[i
+ 1] == URL
)
1590 /* out of scope keep it */
1595 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1598 ctxt
->nsTab
= (const xmlChar
**)
1599 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1600 if (ctxt
->nsTab
== NULL
) {
1601 xmlErrMemory(ctxt
, NULL
);
1605 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1606 const xmlChar
** tmp
;
1608 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1609 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1611 xmlErrMemory(ctxt
, NULL
);
1617 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1618 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1619 return (ctxt
->nsNr
);
1623 * @ctxt: an XML parser context
1624 * @nr: the number to pop
1626 * Pops the top @nr parser prefix/namespace from the ns stack
1628 * Returns the number of namespaces removed
1631 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1635 if (ctxt
->nsTab
== NULL
) return(0);
1636 if (ctxt
->nsNr
< nr
) {
1637 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1640 if (ctxt
->nsNr
<= 0)
1643 for (i
= 0;i
< nr
;i
++) {
1645 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1653 const xmlChar
**atts
;
1657 if (ctxt
->atts
== NULL
) {
1658 maxatts
= 55; /* allow for 10 attrs by default */
1659 atts
= (const xmlChar
**)
1660 xmlMalloc(maxatts
* sizeof(xmlChar
*));
1661 if (atts
== NULL
) goto mem_error
;
1663 attallocs
= (int *) xmlMalloc((maxatts
/ 5) * sizeof(int));
1664 if (attallocs
== NULL
) goto mem_error
;
1665 ctxt
->attallocs
= attallocs
;
1666 ctxt
->maxatts
= maxatts
;
1667 } else if (nr
+ 5 > ctxt
->maxatts
) {
1668 maxatts
= (nr
+ 5) * 2;
1669 atts
= (const xmlChar
**) xmlRealloc((void *) ctxt
->atts
,
1670 maxatts
* sizeof(const xmlChar
*));
1671 if (atts
== NULL
) goto mem_error
;
1673 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1674 (maxatts
/ 5) * sizeof(int));
1675 if (attallocs
== NULL
) goto mem_error
;
1676 ctxt
->attallocs
= attallocs
;
1677 ctxt
->maxatts
= maxatts
;
1679 return(ctxt
->maxatts
);
1681 xmlErrMemory(ctxt
, NULL
);
1687 * @ctxt: an XML parser context
1688 * @value: the parser input
1690 * Pushes a new parser input on top of the input stack
1692 * Returns -1 in case of error, the index in the stack otherwise
1695 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1697 if ((ctxt
== NULL
) || (value
== NULL
))
1699 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1700 ctxt
->inputMax
*= 2;
1702 (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1704 sizeof(ctxt
->inputTab
[0]));
1705 if (ctxt
->inputTab
== NULL
) {
1706 xmlErrMemory(ctxt
, NULL
);
1707 xmlFreeInputStream(value
);
1708 ctxt
->inputMax
/= 2;
1713 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1714 ctxt
->input
= value
;
1715 return (ctxt
->inputNr
++);
1719 * @ctxt: an XML parser context
1721 * Pops the top parser input from the input stack
1723 * Returns the input just removed
1726 inputPop(xmlParserCtxtPtr ctxt
)
1728 xmlParserInputPtr ret
;
1732 if (ctxt
->inputNr
<= 0)
1735 if (ctxt
->inputNr
> 0)
1736 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1739 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1740 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1745 * @ctxt: an XML parser context
1746 * @value: the element node
1748 * Pushes a new element node on top of the node stack
1750 * Returns -1 in case of error, the index in the stack otherwise
1753 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1755 if (ctxt
== NULL
) return(0);
1756 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1759 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1761 sizeof(ctxt
->nodeTab
[0]));
1763 xmlErrMemory(ctxt
, NULL
);
1766 ctxt
->nodeTab
= tmp
;
1769 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1770 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1771 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1774 ctxt
->instate
= XML_PARSER_EOF
;
1777 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1779 return (ctxt
->nodeNr
++);
1784 * @ctxt: an XML parser context
1786 * Pops the top element node from the node stack
1788 * Returns the node just removed
1791 nodePop(xmlParserCtxtPtr ctxt
)
1795 if (ctxt
== NULL
) return(NULL
);
1796 if (ctxt
->nodeNr
<= 0)
1799 if (ctxt
->nodeNr
> 0)
1800 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1803 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1804 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1808 #ifdef LIBXML_PUSH_ENABLED
1811 * @ctxt: an XML parser context
1812 * @value: the element name
1813 * @prefix: the element prefix
1814 * @URI: the element namespace name
1816 * Pushes a new element name/prefix/URL on top of the name stack
1818 * Returns -1 in case of error, the index in the stack otherwise
1821 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1822 const xmlChar
*prefix
, const xmlChar
*URI
, int nsNr
)
1824 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1825 const xmlChar
* *tmp
;
1828 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1830 sizeof(ctxt
->nameTab
[0]));
1835 ctxt
->nameTab
= tmp
;
1836 tmp2
= (void **) xmlRealloc((void * *)ctxt
->pushTab
,
1838 sizeof(ctxt
->pushTab
[0]));
1843 ctxt
->pushTab
= tmp2
;
1845 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1847 ctxt
->pushTab
[ctxt
->nameNr
* 3] = (void *) prefix
;
1848 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 1] = (void *) URI
;
1849 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 2] = (void *) (long) nsNr
;
1850 return (ctxt
->nameNr
++);
1852 xmlErrMemory(ctxt
, NULL
);
1857 * @ctxt: an XML parser context
1859 * Pops the top element/prefix/URI name from the name stack
1861 * Returns the name just removed
1863 static const xmlChar
*
1864 nameNsPop(xmlParserCtxtPtr ctxt
)
1868 if (ctxt
->nameNr
<= 0)
1871 if (ctxt
->nameNr
> 0)
1872 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1875 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1876 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1879 #endif /* LIBXML_PUSH_ENABLED */
1883 * @ctxt: an XML parser context
1884 * @value: the element name
1886 * Pushes a new element name on top of the name stack
1888 * Returns -1 in case of error, the index in the stack otherwise
1891 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1893 if (ctxt
== NULL
) return (-1);
1895 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1896 const xmlChar
* *tmp
;
1897 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1899 sizeof(ctxt
->nameTab
[0]));
1903 ctxt
->nameTab
= tmp
;
1906 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1908 return (ctxt
->nameNr
++);
1910 xmlErrMemory(ctxt
, NULL
);
1915 * @ctxt: an XML parser context
1917 * Pops the top element name from the name stack
1919 * Returns the name just removed
1922 namePop(xmlParserCtxtPtr ctxt
)
1926 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1929 if (ctxt
->nameNr
> 0)
1930 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1933 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1934 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1938 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1939 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1942 ctxt
->spaceMax
*= 2;
1943 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
1944 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
1946 xmlErrMemory(ctxt
, NULL
);
1950 ctxt
->spaceTab
= tmp
;
1952 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
1953 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
1954 return(ctxt
->spaceNr
++);
1957 static int spacePop(xmlParserCtxtPtr ctxt
) {
1959 if (ctxt
->spaceNr
<= 0) return(0);
1961 if (ctxt
->spaceNr
> 0)
1962 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
1964 ctxt
->space
= &ctxt
->spaceTab
[0];
1965 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
1966 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
1971 * Macros for accessing the content. Those should be used only by the parser,
1974 * Dirty macros, i.e. one often need to make assumption on the context to
1977 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1978 * To be used with extreme caution since operations consuming
1979 * characters may move the input buffer to a different location !
1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1981 * This should be used internally by the parser
1982 * only to compare to ASCII values otherwise it would break when
1983 * running with UTF-8 encoding.
1984 * RAW same as CUR but in the input buffer, bypass any token
1985 * extraction that may have been done
1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1987 * to compare on ASCII based substring.
1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989 * strings without newlines within the parser.
1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991 * defined char within the parser.
1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1994 * NEXT Skip to the next character, this does the proper decoding
1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1997 * CUR_CHAR(l) returns the current unicode character (int), set l
1998 * to the number of xmlChars used for the encoding [0-5].
1999 * CUR_SCHAR same but operate on a string instead of the context
2000 * COPY_BUF copy the current unicode char to the target buffer, increment
2002 * GROW, SHRINK handling of input buffers
2005 #define RAW (*ctxt->input->cur)
2006 #define CUR (*ctxt->input->cur)
2007 #define NXT(val) ctxt->input->cur[(val)]
2008 #define CUR_PTR ctxt->input->cur
2010 #define CMP4( s, c1, c2, c3, c4 ) \
2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023 ((unsigned char *) s)[ 8 ] == c9 )
2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026 ((unsigned char *) s)[ 9 ] == c10 )
2028 #define SKIP(val) do { \
2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2031 if ((*ctxt->input->cur == 0) && \
2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2033 xmlPopInput(ctxt); \
2036 #define SKIPL(val) do { \
2038 for(skipl=0; skipl<val; skipl++) { \
2039 if (*(ctxt->input->cur) == '\n') { \
2040 ctxt->input->line++; ctxt->input->col = 1; \
2041 } else ctxt->input->col++; \
2043 ctxt->input->cur++; \
2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2046 if ((*ctxt->input->cur == 0) && \
2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2048 xmlPopInput(ctxt); \
2051 #define SHRINK if ((ctxt->progressive == 0) && \
2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
2057 xmlParserInputShrink(ctxt
->input
);
2058 if ((*ctxt
->input
->cur
== 0) &&
2059 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
2063 #define GROW if ((ctxt->progressive == 0) && \
2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2067 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
2068 unsigned long curEnd
= ctxt
->input
->end
- ctxt
->input
->cur
;
2069 unsigned long curBase
= ctxt
->input
->cur
- ctxt
->input
->base
;
2071 if (((curEnd
> (unsigned long) XML_MAX_LOOKUP_LIMIT
) ||
2072 (curBase
> (unsigned long) XML_MAX_LOOKUP_LIMIT
)) &&
2073 ((ctxt
->input
->buf
) && (ctxt
->input
->buf
->readcallback
!= (xmlInputReadCallback
) xmlNop
)) &&
2074 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
2075 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
2076 ctxt
->instate
= XML_PARSER_EOF
;
2078 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2079 if ((ctxt
->input
->cur
!= NULL
) && (*ctxt
->input
->cur
== 0) &&
2080 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2086 #define NEXT xmlNextChar(ctxt)
2089 ctxt->input->col++; \
2090 ctxt->input->cur++; \
2092 if (*ctxt->input->cur == 0) \
2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2096 #define NEXTL(l) do { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur += l; \
2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2107 #define COPY_BUF(l,b,i,v) \
2108 if (l == 1) b[i++] = (xmlChar) v; \
2109 else i += xmlCopyCharMultiByte(&b[i],v)
2112 * xmlSkipBlankChars:
2113 * @ctxt: the XML parser context
2115 * skip all blanks character found at that point in the input streams.
2116 * It pops up finished entities in the process if allowable at that point.
2118 * Returns the number of space chars skipped
2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
2126 * It's Okay to use CUR/NEXT here since all the blanks are on
2129 if ((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) {
2132 * if we are in the document content, go really fast
2134 cur
= ctxt
->input
->cur
;
2135 while (IS_BLANK_CH(*cur
)) {
2137 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
2144 ctxt
->input
->cur
= cur
;
2145 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2146 cur
= ctxt
->input
->cur
;
2149 ctxt
->input
->cur
= cur
;
2154 while (IS_BLANK_CH(cur
)) { /* CHECKED tstblanks.xml */
2159 while ((cur
== 0) && (ctxt
->inputNr
> 1) &&
2160 (ctxt
->instate
!= XML_PARSER_COMMENT
)) {
2165 * Need to handle support of entities branching here
2167 if (*ctxt
->input
->cur
== '%') xmlParserHandlePEReference(ctxt
);
2168 } while (IS_BLANK(cur
)); /* CHECKED tstblanks.xml */
2173 /************************************************************************
2175 * Commodity functions to handle entities *
2177 ************************************************************************/
2181 * @ctxt: an XML parser context
2183 * xmlPopInput: the current input pointed by ctxt->input came to an end
2184 * pop it and return the next char.
2186 * Returns the current xmlChar in the parser context
2189 xmlPopInput(xmlParserCtxtPtr ctxt
) {
2190 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
2191 if (xmlParserDebugEntities
)
2192 xmlGenericError(xmlGenericErrorContext
,
2193 "Popping input %d\n", ctxt
->inputNr
);
2194 xmlFreeInputStream(inputPop(ctxt
));
2195 if ((*ctxt
->input
->cur
== 0) &&
2196 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
2197 return(xmlPopInput(ctxt
));
2203 * @ctxt: an XML parser context
2204 * @input: an XML parser input fragment (entity, XML fragment ...).
2206 * xmlPushInput: switch to a new input stream which is stacked on top
2207 * of the previous one(s).
2208 * Returns -1 in case of error or the index in the input stack
2211 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2213 if (input
== NULL
) return(-1);
2215 if (xmlParserDebugEntities
) {
2216 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2217 xmlGenericError(xmlGenericErrorContext
,
2218 "%s(%d): ", ctxt
->input
->filename
,
2220 xmlGenericError(xmlGenericErrorContext
,
2221 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2223 ret
= inputPush(ctxt
, input
);
2224 if (ctxt
->instate
== XML_PARSER_EOF
)
2232 * @ctxt: an XML parser context
2234 * parse Reference declarations
2236 * [66] CharRef ::= '&#' [0-9]+ ';' |
2237 * '&#x' [0-9a-fA-F]+ ';'
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2243 * Returns the value parsed (as an int), 0 in case of error
2246 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2247 unsigned int val
= 0;
2249 unsigned int outofrange
= 0;
2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2254 if ((RAW
== '&') && (NXT(1) == '#') &&
2258 while (RAW
!= ';') { /* loop blocked by count */
2262 if (ctxt
->instate
== XML_PARSER_EOF
)
2265 if ((RAW
>= '0') && (RAW
<= '9'))
2266 val
= val
* 16 + (CUR
- '0');
2267 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2268 val
= val
* 16 + (CUR
- 'a') + 10;
2269 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2270 val
= val
* 16 + (CUR
- 'A') + 10;
2272 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2288 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2291 while (RAW
!= ';') { /* loop blocked by count */
2295 if (ctxt
->instate
== XML_PARSER_EOF
)
2298 if ((RAW
>= '0') && (RAW
<= '9'))
2299 val
= val
* 10 + (CUR
- '0');
2301 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2318 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2322 * [ WFC: Legal Character ]
2323 * Characters referred to using character references must match the
2324 * production for Char.
2326 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2329 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2330 "xmlParseCharRef: invalid xmlChar value %d\n",
2337 * xmlParseStringCharRef:
2338 * @ctxt: an XML parser context
2339 * @str: a pointer to an index in the string
2341 * parse Reference declarations, variant parsing from a string rather
2342 * than an an input flow.
2344 * [66] CharRef ::= '&#' [0-9]+ ';' |
2345 * '&#x' [0-9a-fA-F]+ ';'
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
2349 * production for Char.
2351 * Returns the value parsed (as an int), 0 in case of error, str will be
2352 * updated to the current value of the index
2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2358 unsigned int val
= 0;
2359 unsigned int outofrange
= 0;
2361 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2364 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2367 while (cur
!= ';') { /* Non input consuming loop */
2368 if ((cur
>= '0') && (cur
<= '9'))
2369 val
= val
* 16 + (cur
- '0');
2370 else if ((cur
>= 'a') && (cur
<= 'f'))
2371 val
= val
* 16 + (cur
- 'a') + 10;
2372 else if ((cur
>= 'A') && (cur
<= 'F'))
2373 val
= val
* 16 + (cur
- 'A') + 10;
2375 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2387 } else if ((cur
== '&') && (ptr
[1] == '#')){
2390 while (cur
!= ';') { /* Non input consuming loops */
2391 if ((cur
>= '0') && (cur
<= '9'))
2392 val
= val
* 10 + (cur
- '0');
2394 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2407 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2417 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2420 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2428 * xmlNewBlanksWrapperInputStream:
2429 * @ctxt: an XML parser context
2430 * @entity: an Entity pointer
2432 * Create a new input stream for wrapping
2433 * blanks around a PEReference
2435 * Returns the new input stream or NULL
2438 static void deallocblankswrapper (xmlChar
*str
) {xmlFree(str
);}
2440 static xmlParserInputPtr
2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
2442 xmlParserInputPtr input
;
2445 if (entity
== NULL
) {
2446 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2447 "xmlNewBlanksWrapperInputStream entity\n");
2450 if (xmlParserDebugEntities
)
2451 xmlGenericError(xmlGenericErrorContext
,
2452 "new blanks wrapper for entity: %s\n", entity
->name
);
2453 input
= xmlNewInputStream(ctxt
);
2454 if (input
== NULL
) {
2457 length
= xmlStrlen(entity
->name
) + 5;
2458 buffer
= xmlMallocAtomic(length
);
2459 if (buffer
== NULL
) {
2460 xmlErrMemory(ctxt
, NULL
);
2466 buffer
[length
-3] = ';';
2467 buffer
[length
-2] = ' ';
2468 buffer
[length
-1] = 0;
2469 memcpy(buffer
+ 2, entity
->name
, length
- 5);
2470 input
->free
= deallocblankswrapper
;
2471 input
->base
= buffer
;
2472 input
->cur
= buffer
;
2473 input
->length
= length
;
2474 input
->end
= &buffer
[length
];
2479 * xmlParserHandlePEReference:
2480 * @ctxt: the parser context
2482 * [69] PEReference ::= '%' Name ';'
2484 * [ WFC: No Recursion ]
2485 * A parsed entity must not contain a recursive
2486 * reference to itself, either directly or indirectly.
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an internal DTD
2490 * subset which contains no parameter entity references, or a document
2491 * with "standalone='yes'", ... ... The declaration of a parameter
2492 * entity must precede any reference to it...
2494 * [ VC: Entity Declared ]
2495 * In a document with an external subset or external parameter entities
2496 * with "standalone='no'", ... ... The declaration of a parameter entity
2497 * must precede any reference to it...
2500 * Parameter-entity references may only appear in the DTD.
2501 * NOTE: misleading but this is handled.
2503 * A PEReference may have been detected in the current input stream
2504 * the handling is done accordingly to
2505 * http://www.w3.org/TR/REC-xml#entproc
2507 * - Included in literal in entity values
2508 * - Included as Parameter Entity reference within DTDs
2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2512 const xmlChar
*name
;
2513 xmlEntityPtr entity
= NULL
;
2514 xmlParserInputPtr input
;
2516 if (RAW
!= '%') return;
2517 switch(ctxt
->instate
) {
2518 case XML_PARSER_CDATA_SECTION
:
2520 case XML_PARSER_COMMENT
:
2522 case XML_PARSER_START_TAG
:
2524 case XML_PARSER_END_TAG
:
2526 case XML_PARSER_EOF
:
2527 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2529 case XML_PARSER_PROLOG
:
2530 case XML_PARSER_START
:
2531 case XML_PARSER_MISC
:
2532 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2534 case XML_PARSER_ENTITY_DECL
:
2535 case XML_PARSER_CONTENT
:
2536 case XML_PARSER_ATTRIBUTE_VALUE
:
2538 case XML_PARSER_SYSTEM_LITERAL
:
2539 case XML_PARSER_PUBLIC_LITERAL
:
2540 /* we just ignore it there */
2542 case XML_PARSER_EPILOG
:
2543 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2545 case XML_PARSER_ENTITY_VALUE
:
2547 * NOTE: in the case of entity values, we don't do the
2548 * substitution here since we need the literal
2549 * entity value to be able to save the internal
2550 * subset of the document.
2551 * This will be handled by xmlStringDecodeEntities
2554 case XML_PARSER_DTD
:
2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 * In the internal DTD subset, parameter-entity references
2558 * can occur only where markup declarations can occur, not
2559 * within markup declarations.
2560 * In that case this is handled in xmlParseMarkupDecl
2562 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2567 case XML_PARSER_IGNORE
:
2572 name
= xmlParseName(ctxt
);
2573 if (xmlParserDebugEntities
)
2574 xmlGenericError(xmlGenericErrorContext
,
2575 "PEReference: %s\n", name
);
2577 xmlFatalErr(ctxt
, XML_ERR_PEREF_NO_NAME
, NULL
);
2581 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->getParameterEntity
!= NULL
))
2582 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
2583 if (ctxt
->instate
== XML_PARSER_EOF
)
2585 if (entity
== NULL
) {
2588 * [ WFC: Entity Declared ]
2589 * In a document without any DTD, a document with only an
2590 * internal DTD subset which contains no parameter entity
2591 * references, or a document with "standalone='yes'", ...
2592 * ... The declaration of a parameter entity must precede
2593 * any reference to it...
2595 if ((ctxt
->standalone
== 1) ||
2596 ((ctxt
->hasExternalSubset
== 0) &&
2597 (ctxt
->hasPErefs
== 0))) {
2598 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
2599 "PEReference: %%%s; not found\n", name
);
2602 * [ VC: Entity Declared ]
2603 * In a document with an external subset or external
2604 * parameter entities with "standalone='no'", ...
2605 * ... The declaration of a parameter entity must precede
2606 * any reference to it...
2608 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
2609 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2610 "PEReference: %%%s; not found\n",
2613 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2614 "PEReference: %%%s; not found\n",
2618 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
2619 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
2620 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
2621 if (xmlPushInput(ctxt
, input
) < 0)
2624 if ((entity
->etype
== XML_INTERNAL_PARAMETER_ENTITY
) ||
2625 (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
)) {
2627 xmlCharEncoding enc
;
2630 * Note: external parameter entities will not be loaded, it
2631 * is not required for a non-validating parser, unless the
2632 * option of validating, or substituting entities were
2633 * given. Doing so is far more secure as the parser will
2634 * only process data coming from the document entity by
2637 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2638 ((ctxt
->options
& XML_PARSE_NOENT
) == 0) &&
2639 ((ctxt
->options
& XML_PARSE_DTDVALID
) == 0) &&
2640 ((ctxt
->options
& XML_PARSE_DTDLOAD
) == 0) &&
2641 ((ctxt
->options
& XML_PARSE_DTDATTR
) == 0) &&
2642 (ctxt
->replaceEntities
== 0) &&
2643 (ctxt
->validate
== 0))
2647 * handle the extra spaces added before and after
2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 * this is done independently.
2651 input
= xmlNewEntityInputStream(ctxt
, entity
);
2652 if (xmlPushInput(ctxt
, input
) < 0)
2656 * Get the 4 first bytes and decode the charset
2657 * if enc != XML_CHAR_ENCODING_NONE
2658 * plug some encoding conversion routines.
2659 * Note that, since we may have some non-UTF8
2660 * encoding (like UTF16, bug 135229), the 'length'
2661 * is not known, but we can calculate based upon
2662 * the amount of data in the buffer.
2665 if (ctxt
->instate
== XML_PARSER_EOF
)
2667 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
2672 enc
= xmlDetectCharEncoding(start
, 4);
2673 if (enc
!= XML_CHAR_ENCODING_NONE
) {
2674 xmlSwitchEncoding(ctxt
, enc
);
2678 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2679 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l' )) &&
2680 (IS_BLANK_CH(NXT(5)))) {
2681 xmlParseTextDecl(ctxt
);
2684 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
2685 "PEReference: %s is not a parameter entity\n",
2690 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
2696 * Macro used to grow the current buffer.
2697 * buffer##_size is expected to be a size_t
2698 * mem_error: is expected to handle memory allocation failures
2700 #define growBuffer(buffer, n) { \
2702 size_t new_size = buffer##_size * 2 + n; \
2703 if (new_size < buffer##_size) goto mem_error; \
2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2705 if (tmp == NULL) goto mem_error; \
2707 buffer##_size = new_size; \
2711 * xmlStringLenDecodeEntities:
2712 * @ctxt: the parser context
2713 * @str: the input string
2714 * @len: the string length
2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2720 * Takes a entity string content and process to do the adequate substitutions.
2722 * [67] Reference ::= EntityRef | CharRef
2724 * [69] PEReference ::= '%' Name ';'
2726 * Returns A newly allocated string with the substitution done. The caller
2727 * must deallocate it !
2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2731 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
) {
2732 xmlChar
*buffer
= NULL
;
2733 size_t buffer_size
= 0;
2736 xmlChar
*current
= NULL
;
2737 xmlChar
*rep
= NULL
;
2738 const xmlChar
*last
;
2742 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2746 if (((ctxt
->depth
> 40) &&
2747 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2748 (ctxt
->depth
> 1024)) {
2749 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2754 * allocate a translation buffer.
2756 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2757 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
);
2758 if (buffer
== NULL
) goto mem_error
;
2761 * OK loop until we reach one of the ending char or a size limit.
2762 * we are operating on already parsed values.
2765 c
= CUR_SCHAR(str
, l
);
2768 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2769 (c
!= end2
) && (c
!= end3
)) {
2772 if ((c
== '&') && (str
[1] == '#')) {
2773 int val
= xmlParseStringCharRef(ctxt
, &str
);
2775 COPY_BUF(0,buffer
,nbchars
,val
);
2777 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2778 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2780 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2781 if (xmlParserDebugEntities
)
2782 xmlGenericError(xmlGenericErrorContext
,
2783 "String decoding Entity Reference: %.30s\n",
2785 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2786 if ((ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
) ||
2787 (ctxt
->lastError
.code
== XML_ERR_INTERNAL_ERROR
))
2789 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2791 ctxt
->nbentities
+= ent
->checked
/ 2;
2792 if ((ent
!= NULL
) &&
2793 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2794 if (ent
->content
!= NULL
) {
2795 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2796 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2797 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2800 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2801 "predefined entity has no content\n");
2803 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2805 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2811 while (*current
!= 0) { /* non input consuming loop */
2812 buffer
[nbchars
++] = *current
++;
2813 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2814 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2816 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2822 } else if (ent
!= NULL
) {
2823 int i
= xmlStrlen(ent
->name
);
2824 const xmlChar
*cur
= ent
->name
;
2826 buffer
[nbchars
++] = '&';
2827 if (nbchars
+ i
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2828 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2831 buffer
[nbchars
++] = *cur
++;
2832 buffer
[nbchars
++] = ';';
2834 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2835 if (xmlParserDebugEntities
)
2836 xmlGenericError(xmlGenericErrorContext
,
2837 "String decoding PE Reference: %.30s\n", str
);
2838 ent
= xmlParseStringPEReference(ctxt
, &str
);
2839 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
2841 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2843 ctxt
->nbentities
+= ent
->checked
/ 2;
2845 if (ent
->content
== NULL
) {
2846 xmlLoadEntityContent(ctxt
, ent
);
2849 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2854 while (*current
!= 0) { /* non input consuming loop */
2855 buffer
[nbchars
++] = *current
++;
2856 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2857 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2859 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2867 COPY_BUF(l
,buffer
,nbchars
,c
);
2869 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2870 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2874 c
= CUR_SCHAR(str
, l
);
2878 buffer
[nbchars
] = 0;
2882 xmlErrMemory(ctxt
, NULL
);
2892 * xmlStringDecodeEntities:
2893 * @ctxt: the parser context
2894 * @str: the input string
2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896 * @end: an end marker xmlChar, 0 if none
2897 * @end2: an end marker xmlChar, 0 if none
2898 * @end3: an end marker xmlChar, 0 if none
2900 * Takes a entity string content and process to do the adequate substitutions.
2902 * [67] Reference ::= EntityRef | CharRef
2904 * [69] PEReference ::= '%' Name ';'
2906 * Returns A newly allocated string with the substitution done. The caller
2907 * must deallocate it !
2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2911 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2912 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2913 return(xmlStringLenDecodeEntities(ctxt
, str
, xmlStrlen(str
), what
,
2917 /************************************************************************
2919 * Commodity functions, cleanup needed ? *
2921 ************************************************************************/
2925 * @ctxt: an XML parser context
2927 * @len: the size of @str
2928 * @blank_chars: we know the chars are blanks
2930 * Is this a sequence of blank chars that one can ignore ?
2932 * Returns 1 if ignorable 0 otherwise.
2935 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2938 xmlNodePtr lastChild
;
2941 * Don't spend time trying to differentiate them, the same callback is
2944 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2948 * Check for xml:space value.
2950 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2951 (*(ctxt
->space
) == -2))
2955 * Check that the string is made of blanks
2957 if (blank_chars
== 0) {
2958 for (i
= 0;i
< len
;i
++)
2959 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2963 * Look if the element is mixed content in the DTD if available
2965 if (ctxt
->node
== NULL
) return(0);
2966 if (ctxt
->myDoc
!= NULL
) {
2967 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2968 if (ret
== 0) return(1);
2969 if (ret
== 1) return(0);
2973 * Otherwise, heuristic :-\
2975 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2976 if ((ctxt
->node
->children
== NULL
) &&
2977 (RAW
== '<') && (NXT(1) == '/')) return(0);
2979 lastChild
= xmlGetLastChild(ctxt
->node
);
2980 if (lastChild
== NULL
) {
2981 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2982 (ctxt
->node
->content
!= NULL
)) return(0);
2983 } else if (xmlNodeIsText(lastChild
))
2985 else if ((ctxt
->node
->children
!= NULL
) &&
2986 (xmlNodeIsText(ctxt
->node
->children
)))
2991 /************************************************************************
2993 * Extra stuff for namespace support *
2994 * Relates to http://www.w3.org/TR/WD-xml-names *
2996 ************************************************************************/
3000 * @ctxt: an XML parser context
3001 * @name: an XML parser context
3002 * @prefix: a xmlChar **
3004 * parse an UTF8 encoded XML qualified name string
3006 * [NS 5] QName ::= (Prefix ':')? LocalPart
3008 * [NS 6] Prefix ::= NCName
3010 * [NS 7] LocalPart ::= NCName
3012 * Returns the local part, and prefix is updated
3013 * to get the Prefix if any.
3017 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
3018 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3019 xmlChar
*buffer
= NULL
;
3021 int max
= XML_MAX_NAMELEN
;
3022 xmlChar
*ret
= NULL
;
3023 const xmlChar
*cur
= name
;
3026 if (prefix
== NULL
) return(NULL
);
3029 if (cur
== NULL
) return(NULL
);
3031 #ifndef XML_XML_NAMESPACE
3032 /* xml: prefix is not really a namespace */
3033 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
3034 (cur
[2] == 'l') && (cur
[3] == ':'))
3035 return(xmlStrdup(name
));
3038 /* nasty but well=formed */
3040 return(xmlStrdup(name
));
3043 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
3049 * Okay someone managed to make a huge name, so he's ready to pay
3050 * for the processing speed.
3054 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3055 if (buffer
== NULL
) {
3056 xmlErrMemory(ctxt
, NULL
);
3059 memcpy(buffer
, buf
, len
);
3060 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
3061 if (len
+ 10 > max
) {
3065 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3066 max
* sizeof(xmlChar
));
3069 xmlErrMemory(ctxt
, NULL
);
3080 if ((c
== ':') && (*cur
== 0)) {
3084 return(xmlStrdup(name
));
3088 ret
= xmlStrndup(buf
, len
);
3092 max
= XML_MAX_NAMELEN
;
3100 return(xmlStrndup(BAD_CAST
"", 0));
3105 * Check that the first character is proper to start
3108 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
3109 ((c
>= 0x41) && (c
<= 0x5A)) ||
3110 (c
== '_') || (c
== ':'))) {
3112 int first
= CUR_SCHAR(cur
, l
);
3114 if (!IS_LETTER(first
) && (first
!= '_')) {
3115 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
3116 "Name %s is not XML Namespace compliant\n",
3122 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
3128 * Okay someone managed to make a huge name, so he's ready to pay
3129 * for the processing speed.
3133 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3134 if (buffer
== NULL
) {
3135 xmlErrMemory(ctxt
, NULL
);
3138 memcpy(buffer
, buf
, len
);
3139 while (c
!= 0) { /* tested bigname2.xml */
3140 if (len
+ 10 > max
) {
3144 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3145 max
* sizeof(xmlChar
));
3147 xmlErrMemory(ctxt
, NULL
);
3160 ret
= xmlStrndup(buf
, len
);
3169 /************************************************************************
3171 * The parser itself *
3172 * Relates to http://www.w3.org/TR/REC-xml *
3174 ************************************************************************/
3176 /************************************************************************
3178 * Routines to parse Name, NCName and NmToken *
3180 ************************************************************************/
3182 static unsigned long nbParseName
= 0;
3183 static unsigned long nbParseNmToken
= 0;
3184 static unsigned long nbParseNCName
= 0;
3185 static unsigned long nbParseNCNameComplex
= 0;
3186 static unsigned long nbParseNameComplex
= 0;
3187 static unsigned long nbParseStringName
= 0;
3191 * The two following functions are related to the change of accepted
3192 * characters for Name and NmToken in the Revision 5 of XML-1.0
3193 * They correspond to the modified production [4] and the new production [4a]
3194 * changes in that revision. Also note that the macros used for the
3195 * productions Letter, Digit, CombiningChar and Extender are not needed
3197 * We still keep compatibility to pre-revision5 parsing semantic if the
3198 * new XML_PARSE_OLD10 option is given to the parser.
3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
3202 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3207 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3208 (((c
>= 'a') && (c
<= 'z')) ||
3209 ((c
>= 'A') && (c
<= 'Z')) ||
3210 (c
== '_') || (c
== ':') ||
3211 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3212 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3213 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3214 ((c
>= 0x370) && (c
<= 0x37D)) ||
3215 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3216 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3217 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3218 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3219 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3220 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3221 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3222 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3225 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3232 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3233 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3235 * Use the new checks of production [4] [4a] amd [5] of the
3236 * Update 5 of XML-1.0
3238 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3239 (((c
>= 'a') && (c
<= 'z')) ||
3240 ((c
>= 'A') && (c
<= 'Z')) ||
3241 ((c
>= '0') && (c
<= '9')) || /* !start */
3242 (c
== '_') || (c
== ':') ||
3243 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3244 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3245 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3246 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3247 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3248 ((c
>= 0x370) && (c
<= 0x37D)) ||
3249 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3250 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3251 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3252 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3253 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3254 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3255 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3256 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3257 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3260 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3261 (c
== '.') || (c
== '-') ||
3262 (c
== '_') || (c
== ':') ||
3263 (IS_COMBINING(c
)) ||
3270 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3271 int *len
, int *alloc
, int normalize
);
3273 static const xmlChar
*
3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3280 nbParseNameComplex
++;
3284 * Handler for more complex cases
3287 if (ctxt
->instate
== XML_PARSER_EOF
)
3290 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3292 * Use the new checks of production [4] [4a] amd [5] of the
3293 * Update 5 of XML-1.0
3295 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3296 (!(((c
>= 'a') && (c
<= 'z')) ||
3297 ((c
>= 'A') && (c
<= 'Z')) ||
3298 (c
== '_') || (c
== ':') ||
3299 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3300 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3301 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3302 ((c
>= 0x370) && (c
<= 0x37D)) ||
3303 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3304 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3305 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3306 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3307 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3308 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3309 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3310 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3316 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3317 (((c
>= 'a') && (c
<= 'z')) ||
3318 ((c
>= 'A') && (c
<= 'Z')) ||
3319 ((c
>= '0') && (c
<= '9')) || /* !start */
3320 (c
== '_') || (c
== ':') ||
3321 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3322 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3323 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3324 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3325 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3326 ((c
>= 0x370) && (c
<= 0x37D)) ||
3327 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3328 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3329 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3330 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3331 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3332 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3333 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3334 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3335 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3337 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3340 if (ctxt
->instate
== XML_PARSER_EOF
)
3348 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3349 (!IS_LETTER(c
) && (c
!= '_') &&
3357 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3358 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3359 (c
== '.') || (c
== '-') ||
3360 (c
== '_') || (c
== ':') ||
3361 (IS_COMBINING(c
)) ||
3362 (IS_EXTENDER(c
)))) {
3363 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3366 if (ctxt
->instate
== XML_PARSER_EOF
)
3375 if (ctxt
->instate
== XML_PARSER_EOF
)
3381 if ((len
> XML_MAX_NAME_LENGTH
) &&
3382 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3383 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3386 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3387 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3388 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3393 * @ctxt: an XML parser context
3395 * parse an XML name.
3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398 * CombiningChar | Extender
3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3402 * [6] Names ::= Name (#x20 Name)*
3404 * Returns the Name parsed or NULL
3408 xmlParseName(xmlParserCtxtPtr ctxt
) {
3420 * Accelerator for simple ASCII names
3422 in
= ctxt
->input
->cur
;
3423 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3424 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3425 (*in
== '_') || (*in
== ':')) {
3427 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3428 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3429 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3430 (*in
== '_') || (*in
== '-') ||
3431 (*in
== ':') || (*in
== '.'))
3433 if ((*in
> 0) && (*in
< 0x80)) {
3434 count
= in
- ctxt
->input
->cur
;
3435 if ((count
> XML_MAX_NAME_LENGTH
) &&
3436 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3437 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3440 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3441 ctxt
->input
->cur
= in
;
3442 ctxt
->nbChars
+= count
;
3443 ctxt
->input
->col
+= count
;
3445 xmlErrMemory(ctxt
, NULL
);
3449 /* accelerator for special cases */
3450 return(xmlParseNameComplex(ctxt
));
3453 static const xmlChar
*
3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3458 const xmlChar
*end
; /* needed because CUR_CHAR() can move cur on \r\n */
3461 nbParseNCNameComplex
++;
3465 * Handler for more complex cases
3468 end
= ctxt
->input
->cur
;
3470 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3471 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3475 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3476 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3477 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3478 if ((len
> XML_MAX_NAME_LENGTH
) &&
3479 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3480 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3485 if (ctxt
->instate
== XML_PARSER_EOF
)
3490 end
= ctxt
->input
->cur
;
3495 if (ctxt
->instate
== XML_PARSER_EOF
)
3497 end
= ctxt
->input
->cur
;
3501 if ((len
> XML_MAX_NAME_LENGTH
) &&
3502 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3503 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3506 return(xmlDictLookup(ctxt
->dict
, end
- len
, len
));
3511 * @ctxt: an XML parser context
3512 * @len: length of the string parsed
3514 * parse an XML name.
3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517 * CombiningChar | Extender
3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3521 * Returns the Name parsed or NULL
3524 static const xmlChar
*
3525 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3535 * Accelerator for simple ASCII names
3537 in
= ctxt
->input
->cur
;
3538 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3539 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3542 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3543 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3544 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3545 (*in
== '_') || (*in
== '-') ||
3548 if ((*in
> 0) && (*in
< 0x80)) {
3549 count
= in
- ctxt
->input
->cur
;
3550 if ((count
> XML_MAX_NAME_LENGTH
) &&
3551 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3552 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3555 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3556 ctxt
->input
->cur
= in
;
3557 ctxt
->nbChars
+= count
;
3558 ctxt
->input
->col
+= count
;
3560 xmlErrMemory(ctxt
, NULL
);
3565 return(xmlParseNCNameComplex(ctxt
));
3569 * xmlParseNameAndCompare:
3570 * @ctxt: an XML parser context
3572 * parse an XML name and compares for match
3573 * (specialized for endtag parsing)
3575 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3576 * and the name for mismatch
3579 static const xmlChar
*
3580 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3581 register const xmlChar
*cmp
= other
;
3582 register const xmlChar
*in
;
3586 if (ctxt
->instate
== XML_PARSER_EOF
)
3589 in
= ctxt
->input
->cur
;
3590 while (*in
!= 0 && *in
== *cmp
) {
3595 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3597 ctxt
->input
->cur
= in
;
3598 return (const xmlChar
*) 1;
3600 /* failure (or end of input buffer), check with full function */
3601 ret
= xmlParseName (ctxt
);
3602 /* strings coming from the dictionnary direct compare possible */
3604 return (const xmlChar
*) 1;
3610 * xmlParseStringName:
3611 * @ctxt: an XML parser context
3612 * @str: a pointer to the string pointer (IN/OUT)
3614 * parse an XML name.
3616 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3617 * CombiningChar | Extender
3619 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3621 * [6] Names ::= Name (#x20 Name)*
3623 * Returns the Name parsed or NULL. The @str pointer
3624 * is updated to the current location in the string.
3628 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3629 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3630 const xmlChar
*cur
= *str
;
3635 nbParseStringName
++;
3638 c
= CUR_SCHAR(cur
, l
);
3639 if (!xmlIsNameStartChar(ctxt
, c
)) {
3643 COPY_BUF(l
,buf
,len
,c
);
3645 c
= CUR_SCHAR(cur
, l
);
3646 while (xmlIsNameChar(ctxt
, c
)) {
3647 COPY_BUF(l
,buf
,len
,c
);
3649 c
= CUR_SCHAR(cur
, l
);
3650 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3652 * Okay someone managed to make a huge name, so he's ready to pay
3653 * for the processing speed.
3658 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3659 if (buffer
== NULL
) {
3660 xmlErrMemory(ctxt
, NULL
);
3663 memcpy(buffer
, buf
, len
);
3664 while (xmlIsNameChar(ctxt
, c
)) {
3665 if (len
+ 10 > max
) {
3668 if ((len
> XML_MAX_NAME_LENGTH
) &&
3669 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3670 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3675 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3676 max
* sizeof(xmlChar
));
3678 xmlErrMemory(ctxt
, NULL
);
3684 COPY_BUF(l
,buffer
,len
,c
);
3686 c
= CUR_SCHAR(cur
, l
);
3693 if ((len
> XML_MAX_NAME_LENGTH
) &&
3694 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3695 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3699 return(xmlStrndup(buf
, len
));
3704 * @ctxt: an XML parser context
3706 * parse an XML Nmtoken.
3708 * [7] Nmtoken ::= (NameChar)+
3710 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3712 * Returns the Nmtoken parsed or NULL
3716 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3717 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3727 if (ctxt
->instate
== XML_PARSER_EOF
)
3731 while (xmlIsNameChar(ctxt
, c
)) {
3732 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3736 COPY_BUF(l
,buf
,len
,c
);
3742 if (ctxt
->instate
== XML_PARSER_EOF
)
3746 if (len
>= XML_MAX_NAMELEN
) {
3748 * Okay someone managed to make a huge token, so he's ready to pay
3749 * for the processing speed.
3754 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3755 if (buffer
== NULL
) {
3756 xmlErrMemory(ctxt
, NULL
);
3759 memcpy(buffer
, buf
, len
);
3760 while (xmlIsNameChar(ctxt
, c
)) {
3761 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3764 if (ctxt
->instate
== XML_PARSER_EOF
) {
3769 if (len
+ 10 > max
) {
3772 if ((max
> XML_MAX_NAME_LENGTH
) &&
3773 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3774 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3779 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3780 max
* sizeof(xmlChar
));
3782 xmlErrMemory(ctxt
, NULL
);
3788 COPY_BUF(l
,buffer
,len
,c
);
3798 if ((len
> XML_MAX_NAME_LENGTH
) &&
3799 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3800 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3803 return(xmlStrndup(buf
, len
));
3807 * xmlParseEntityValue:
3808 * @ctxt: an XML parser context
3809 * @orig: if non-NULL store a copy of the original entity value
3811 * parse a value for ENTITY declarations
3813 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3814 * "'" ([^%&'] | PEReference | Reference)* "'"
3816 * Returns the EntityValue parsed with reference substituted or NULL
3820 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3821 xmlChar
*buf
= NULL
;
3823 int size
= XML_PARSER_BUFFER_SIZE
;
3826 xmlChar
*ret
= NULL
;
3827 const xmlChar
*cur
= NULL
;
3828 xmlParserInputPtr input
;
3830 if (RAW
== '"') stop
= '"';
3831 else if (RAW
== '\'') stop
= '\'';
3833 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3836 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3838 xmlErrMemory(ctxt
, NULL
);
3843 * The content of the entity definition is copied in a buffer.
3846 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3847 input
= ctxt
->input
;
3849 if (ctxt
->instate
== XML_PARSER_EOF
) {
3856 * NOTE: 4.4.5 Included in Literal
3857 * When a parameter entity reference appears in a literal entity
3858 * value, ... a single or double quote character in the replacement
3859 * text is always treated as a normal data character and will not
3860 * terminate the literal.
3861 * In practice it means we stop the loop only when back at parsing
3862 * the initial entity and the quote is found
3864 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3865 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3866 if (len
+ 5 >= size
) {
3870 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3872 xmlErrMemory(ctxt
, NULL
);
3878 COPY_BUF(l
,buf
,len
,c
);
3881 * Pop-up of finished entities.
3883 while ((RAW
== 0) && (ctxt
->inputNr
> 1)) /* non input consuming */
3894 if (ctxt
->instate
== XML_PARSER_EOF
) {
3900 * Raise problem w.r.t. '&' and '%' being used in non-entities
3901 * reference constructs. Note Charref will be handled in
3902 * xmlStringDecodeEntities()
3905 while (*cur
!= 0) { /* non input consuming */
3906 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3911 name
= xmlParseStringName(ctxt
, &cur
);
3912 if ((name
== NULL
) || (*cur
!= ';')) {
3913 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3914 "EntityValue: '%c' forbidden except for entities references\n",
3917 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3918 (ctxt
->inputNr
== 1)) {
3919 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3930 * Then PEReference entities are substituted.
3933 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3938 * NOTE: 4.4.7 Bypassed
3939 * When a general entity reference appears in the EntityValue in
3940 * an entity declaration, it is bypassed and left as is.
3941 * so XML_SUBSTITUTE_REF is not set here.
3943 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
3955 * xmlParseAttValueComplex:
3956 * @ctxt: an XML parser context
3957 * @len: the resulting attribute len
3958 * @normalize: wether to apply the inner normalization
3960 * parse a value for an attribute, this is the fallback function
3961 * of xmlParseAttValue() when the attribute parsing requires handling
3962 * of non-ASCII characters, or normalization compaction.
3964 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3967 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3969 xmlChar
*buf
= NULL
;
3970 xmlChar
*rep
= NULL
;
3972 size_t buf_size
= 0;
3973 int c
, l
, in_space
= 0;
3974 xmlChar
*current
= NULL
;
3977 if (NXT(0) == '"') {
3978 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3981 } else if (NXT(0) == '\'') {
3983 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3986 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3991 * allocate a translation buffer.
3993 buf_size
= XML_PARSER_BUFFER_SIZE
;
3994 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
);
3995 if (buf
== NULL
) goto mem_error
;
3998 * OK loop until we reach one of the ending char or a size limit.
4001 while (((NXT(0) != limit
) && /* checked */
4002 (IS_CHAR(c
)) && (c
!= '<')) &&
4003 (ctxt
->instate
!= XML_PARSER_EOF
)) {
4005 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4006 * special option is given
4008 if ((len
> XML_MAX_TEXT_LENGTH
) &&
4009 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4010 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4011 "AttValue length too long\n");
4017 if (NXT(1) == '#') {
4018 int val
= xmlParseCharRef(ctxt
);
4021 if (ctxt
->replaceEntities
) {
4022 if (len
+ 10 > buf_size
) {
4023 growBuffer(buf
, 10);
4028 * The reparsing will be done in xmlStringGetNodeList()
4029 * called by the attribute() function in SAX.c
4031 if (len
+ 10 > buf_size
) {
4032 growBuffer(buf
, 10);
4040 } else if (val
!= 0) {
4041 if (len
+ 10 > buf_size
) {
4042 growBuffer(buf
, 10);
4044 len
+= xmlCopyChar(0, &buf
[len
], val
);
4047 ent
= xmlParseEntityRef(ctxt
);
4050 ctxt
->nbentities
+= ent
->owner
;
4051 if ((ent
!= NULL
) &&
4052 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
4053 if (len
+ 10 > buf_size
) {
4054 growBuffer(buf
, 10);
4056 if ((ctxt
->replaceEntities
== 0) &&
4057 (ent
->content
[0] == '&')) {
4064 buf
[len
++] = ent
->content
[0];
4066 } else if ((ent
!= NULL
) &&
4067 (ctxt
->replaceEntities
!= 0)) {
4068 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
4069 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4074 while (*current
!= 0) { /* non input consuming */
4075 if ((*current
== 0xD) || (*current
== 0xA) ||
4076 (*current
== 0x9)) {
4080 buf
[len
++] = *current
++;
4081 if (len
+ 10 > buf_size
) {
4082 growBuffer(buf
, 10);
4089 if (len
+ 10 > buf_size
) {
4090 growBuffer(buf
, 10);
4092 if (ent
->content
!= NULL
)
4093 buf
[len
++] = ent
->content
[0];
4095 } else if (ent
!= NULL
) {
4096 int i
= xmlStrlen(ent
->name
);
4097 const xmlChar
*cur
= ent
->name
;
4100 * This may look absurd but is needed to detect
4103 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
4104 (ent
->content
!= NULL
) && (ent
->checked
== 0)) {
4105 unsigned long oldnbent
= ctxt
->nbentities
;
4107 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4108 XML_SUBSTITUTE_REF
, 0, 0, 0);
4110 ent
->checked
= (ctxt
->nbentities
- oldnbent
+ 1) * 2;
4112 if (xmlStrchr(rep
, '<'))
4120 * Just output the reference
4123 while (len
+ i
+ 10 > buf_size
) {
4124 growBuffer(buf
, i
+ 10);
4127 buf
[len
++] = *cur
++;
4132 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
4133 if ((len
!= 0) || (!normalize
)) {
4134 if ((!normalize
) || (!in_space
)) {
4135 COPY_BUF(l
,buf
,len
,0x20);
4136 while (len
+ 10 > buf_size
) {
4137 growBuffer(buf
, 10);
4144 COPY_BUF(l
,buf
,len
,c
);
4145 if (len
+ 10 > buf_size
) {
4146 growBuffer(buf
, 10);
4154 if (ctxt
->instate
== XML_PARSER_EOF
)
4157 if ((in_space
) && (normalize
)) {
4158 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
4162 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
4163 } else if (RAW
!= limit
) {
4164 if ((c
!= 0) && (!IS_CHAR(c
))) {
4165 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
4166 "invalid character in attribute value\n");
4168 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4169 "AttValue: ' expected\n");
4175 * There we potentially risk an overflow, don't allow attribute value of
4176 * length more than INT_MAX it is a very reasonnable assumption !
4178 if (len
>= INT_MAX
) {
4179 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4180 "AttValue length too long\n");
4184 if (attlen
!= NULL
) *attlen
= (int) len
;
4188 xmlErrMemory(ctxt
, NULL
);
4199 * @ctxt: an XML parser context
4201 * parse a value for an attribute
4202 * Note: the parser won't do substitution of entities here, this
4203 * will be handled later in xmlStringGetNodeList
4205 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4206 * "'" ([^<&'] | Reference)* "'"
4208 * 3.3.3 Attribute-Value Normalization:
4209 * Before the value of an attribute is passed to the application or
4210 * checked for validity, the XML processor must normalize it as follows:
4211 * - a character reference is processed by appending the referenced
4212 * character to the attribute value
4213 * - an entity reference is processed by recursively processing the
4214 * replacement text of the entity
4215 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4216 * appending #x20 to the normalized value, except that only a single
4217 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4218 * parsed entity or the literal entity value of an internal parsed entity
4219 * - other characters are processed by appending them to the normalized value
4220 * If the declared value is not CDATA, then the XML processor must further
4221 * process the normalized attribute value by discarding any leading and
4222 * trailing space (#x20) characters, and by replacing sequences of space
4223 * (#x20) characters by a single space (#x20) character.
4224 * All attributes for which no declaration has been read should be treated
4225 * by a non-validating parser as if declared CDATA.
4227 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4232 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
4233 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
4234 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
4238 * xmlParseSystemLiteral:
4239 * @ctxt: an XML parser context
4241 * parse an XML Literal
4243 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4245 * Returns the SystemLiteral parsed or NULL
4249 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
4250 xmlChar
*buf
= NULL
;
4252 int size
= XML_PARSER_BUFFER_SIZE
;
4255 int state
= ctxt
->instate
;
4262 } else if (RAW
== '\'') {
4266 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4270 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4272 xmlErrMemory(ctxt
, NULL
);
4275 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
4277 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
4278 if (len
+ 5 >= size
) {
4281 if ((size
> XML_MAX_NAME_LENGTH
) &&
4282 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4283 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "SystemLiteral");
4285 ctxt
->instate
= (xmlParserInputState
) state
;
4289 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4292 xmlErrMemory(ctxt
, NULL
);
4293 ctxt
->instate
= (xmlParserInputState
) state
;
4302 if (ctxt
->instate
== XML_PARSER_EOF
) {
4307 COPY_BUF(l
,buf
,len
,cur
);
4317 ctxt
->instate
= (xmlParserInputState
) state
;
4318 if (!IS_CHAR(cur
)) {
4319 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4327 * xmlParsePubidLiteral:
4328 * @ctxt: an XML parser context
4330 * parse an XML public literal
4332 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4334 * Returns the PubidLiteral parsed or NULL.
4338 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4339 xmlChar
*buf
= NULL
;
4341 int size
= XML_PARSER_BUFFER_SIZE
;
4345 xmlParserInputState oldstate
= ctxt
->instate
;
4351 } else if (RAW
== '\'') {
4355 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4358 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4360 xmlErrMemory(ctxt
, NULL
);
4363 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4365 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4366 if (len
+ 1 >= size
) {
4369 if ((size
> XML_MAX_NAME_LENGTH
) &&
4370 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4371 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Public ID");
4376 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4378 xmlErrMemory(ctxt
, NULL
);
4389 if (ctxt
->instate
== XML_PARSER_EOF
) {
4404 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4408 ctxt
->instate
= oldstate
;
4412 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
4415 * used for the test in the inner loop of the char data testing
4417 static const unsigned char test_char_data
[256] = {
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4423 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4424 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4425 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4426 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4427 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4428 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4429 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4430 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4431 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4432 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4433 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4454 * @ctxt: an XML parser context
4455 * @cdata: int indicating whether we are within a CDATA section
4457 * parse a CharData section.
4458 * if we are within a CDATA section ']]>' marks an end of section.
4460 * The right angle bracket (>) may be represented using the string ">",
4461 * and must, for compatibility, be escaped using ">" or a character
4462 * reference when it appears in the string "]]>" in content, when that
4463 * string is not marking the end of a CDATA section.
4465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4469 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
4472 int line
= ctxt
->input
->line
;
4473 int col
= ctxt
->input
->col
;
4479 * Accelerated common case where input don't need to be
4480 * modified before passing it to the handler.
4483 in
= ctxt
->input
->cur
;
4486 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4489 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4491 } while (*in
== 0xA);
4492 goto get_more_space
;
4495 nbchar
= in
- ctxt
->input
->cur
;
4497 const xmlChar
*tmp
= ctxt
->input
->cur
;
4498 ctxt
->input
->cur
= in
;
4500 if ((ctxt
->sax
!= NULL
) &&
4501 (ctxt
->sax
->ignorableWhitespace
!=
4502 ctxt
->sax
->characters
)) {
4503 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4504 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4505 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4508 if (ctxt
->sax
->characters
!= NULL
)
4509 ctxt
->sax
->characters(ctxt
->userData
,
4511 if (*ctxt
->space
== -1)
4514 } else if ((ctxt
->sax
!= NULL
) &&
4515 (ctxt
->sax
->characters
!= NULL
)) {
4516 ctxt
->sax
->characters(ctxt
->userData
,
4524 ccol
= ctxt
->input
->col
;
4525 while (test_char_data
[*in
]) {
4529 ctxt
->input
->col
= ccol
;
4532 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4534 } while (*in
== 0xA);
4538 if ((in
[1] == ']') && (in
[2] == '>')) {
4539 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4540 ctxt
->input
->cur
= in
;
4547 nbchar
= in
- ctxt
->input
->cur
;
4549 if ((ctxt
->sax
!= NULL
) &&
4550 (ctxt
->sax
->ignorableWhitespace
!=
4551 ctxt
->sax
->characters
) &&
4552 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4553 const xmlChar
*tmp
= ctxt
->input
->cur
;
4554 ctxt
->input
->cur
= in
;
4556 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4557 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4558 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4561 if (ctxt
->sax
->characters
!= NULL
)
4562 ctxt
->sax
->characters(ctxt
->userData
,
4564 if (*ctxt
->space
== -1)
4567 line
= ctxt
->input
->line
;
4568 col
= ctxt
->input
->col
;
4569 } else if (ctxt
->sax
!= NULL
) {
4570 if (ctxt
->sax
->characters
!= NULL
)
4571 ctxt
->sax
->characters(ctxt
->userData
,
4572 ctxt
->input
->cur
, nbchar
);
4573 line
= ctxt
->input
->line
;
4574 col
= ctxt
->input
->col
;
4576 /* something really bad happened in the SAX callback */
4577 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4580 ctxt
->input
->cur
= in
;
4584 ctxt
->input
->cur
= in
;
4586 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4587 continue; /* while */
4599 if (ctxt
->instate
== XML_PARSER_EOF
)
4601 in
= ctxt
->input
->cur
;
4602 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
4605 ctxt
->input
->line
= line
;
4606 ctxt
->input
->col
= col
;
4607 xmlParseCharDataComplex(ctxt
, cdata
);
4611 * xmlParseCharDataComplex:
4612 * @ctxt: an XML parser context
4613 * @cdata: int indicating whether we are within a CDATA section
4615 * parse a CharData section.this is the fallback function
4616 * of xmlParseCharData() when the parsing requires handling
4617 * of non-ASCII characters.
4620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
4621 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4629 while ((cur
!= '<') && /* checked */
4631 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4632 if ((cur
== ']') && (NXT(1) == ']') &&
4636 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4639 COPY_BUF(l
,buf
,nbchar
,cur
);
4640 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4644 * OK the segment is to be consumed as chars.
4646 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4647 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4648 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4649 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4652 if (ctxt
->sax
->characters
!= NULL
)
4653 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4654 if ((ctxt
->sax
->characters
!=
4655 ctxt
->sax
->ignorableWhitespace
) &&
4656 (*ctxt
->space
== -1))
4661 /* something really bad happened in the SAX callback */
4662 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4669 if (ctxt
->instate
== XML_PARSER_EOF
)
4678 * OK the segment is to be consumed as chars.
4680 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4681 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4682 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4683 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4685 if (ctxt
->sax
->characters
!= NULL
)
4686 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4687 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4688 (*ctxt
->space
== -1))
4693 if ((cur
!= 0) && (!IS_CHAR(cur
))) {
4694 /* Generate the error and skip the offending character */
4695 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4696 "PCDATA invalid Char value %d\n",
4703 * xmlParseExternalID:
4704 * @ctxt: an XML parser context
4705 * @publicID: a xmlChar** receiving PubidLiteral
4706 * @strict: indicate whether we should restrict parsing to only
4707 * production [75], see NOTE below
4709 * Parse an External ID or a Public ID
4711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4712 * 'PUBLIC' S PubidLiteral S SystemLiteral
4714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4719 * Returns the function returns SystemLiteral and in the second
4720 * case publicID receives PubidLiteral, is strict is off
4721 * it is possible to return NULL and have publicID set.
4725 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4726 xmlChar
*URI
= NULL
;
4731 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4733 if (!IS_BLANK_CH(CUR
)) {
4734 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4735 "Space required after 'SYSTEM'\n");
4738 URI
= xmlParseSystemLiteral(ctxt
);
4740 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4742 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4744 if (!IS_BLANK_CH(CUR
)) {
4745 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4746 "Space required after 'PUBLIC'\n");
4749 *publicID
= xmlParsePubidLiteral(ctxt
);
4750 if (*publicID
== NULL
) {
4751 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4755 * We don't handle [83] so "S SystemLiteral" is required.
4757 if (!IS_BLANK_CH(CUR
)) {
4758 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4759 "Space required after the Public Identifier\n");
4763 * We handle [83] so we return immediately, if
4764 * "S SystemLiteral" is not detected. From a purely parsing
4765 * point of view that's a nice mess.
4771 if (!IS_BLANK_CH(*ptr
)) return(NULL
);
4773 while (IS_BLANK_CH(*ptr
)) ptr
++; /* TODO: dangerous, fix ! */
4774 if ((*ptr
!= '\'') && (*ptr
!= '"')) return(NULL
);
4777 URI
= xmlParseSystemLiteral(ctxt
);
4779 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4786 * xmlParseCommentComplex:
4787 * @ctxt: an XML parser context
4788 * @buf: the already parsed part of the buffer
4789 * @len: number of bytes filles in the buffer
4790 * @size: allocated size of the buffer
4792 * Skip an XML (SGML) comment <!-- .... -->
4793 * The spec says that "For compatibility, the string "--" (double-hyphen)
4794 * must not occur within comments. "
4795 * This is the slow routine in case the accelerator for ascii didn't work
4797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4800 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
,
4801 size_t len
, size_t size
) {
4808 inputid
= ctxt
->input
->id
;
4812 size
= XML_PARSER_BUFFER_SIZE
;
4813 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4815 xmlErrMemory(ctxt
, NULL
);
4819 GROW
; /* Assure there's enough input data */
4822 goto not_terminated
;
4824 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4825 "xmlParseComment: invalid xmlChar value %d\n",
4833 goto not_terminated
;
4835 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4836 "xmlParseComment: invalid xmlChar value %d\n",
4844 goto not_terminated
;
4845 while (IS_CHAR(cur
) && /* checked */
4847 (r
!= '-') || (q
!= '-'))) {
4848 if ((r
== '-') && (q
== '-')) {
4849 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4851 if ((len
> XML_MAX_TEXT_LENGTH
) &&
4852 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4853 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4854 "Comment too big found", NULL
);
4858 if (len
+ 5 >= size
) {
4862 new_size
= size
* 2;
4863 new_buf
= (xmlChar
*) xmlRealloc(buf
, new_size
);
4864 if (new_buf
== NULL
) {
4866 xmlErrMemory(ctxt
, NULL
);
4872 COPY_BUF(ql
,buf
,len
,q
);
4882 if (ctxt
->instate
== XML_PARSER_EOF
) {
4897 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4898 "Comment not terminated \n<!--%.50s\n", buf
);
4899 } else if (!IS_CHAR(cur
)) {
4900 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4901 "xmlParseComment: invalid xmlChar value %d\n",
4904 if (inputid
!= ctxt
->input
->id
) {
4905 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4906 "Comment doesn't start and stop in the same entity\n");
4909 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4910 (!ctxt
->disableSAX
))
4911 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4916 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4917 "Comment not terminated\n", NULL
);
4924 * @ctxt: an XML parser context
4926 * Skip an XML (SGML) comment <!-- .... -->
4927 * The spec says that "For compatibility, the string "--" (double-hyphen)
4928 * must not occur within comments. "
4930 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4933 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4934 xmlChar
*buf
= NULL
;
4935 size_t size
= XML_PARSER_BUFFER_SIZE
;
4937 xmlParserInputState state
;
4944 * Check that there is a comment right here.
4946 if ((RAW
!= '<') || (NXT(1) != '!') ||
4947 (NXT(2) != '-') || (NXT(3) != '-')) return;
4948 state
= ctxt
->instate
;
4949 ctxt
->instate
= XML_PARSER_COMMENT
;
4950 inputid
= ctxt
->input
->id
;
4956 * Accelerated common case where input don't need to be
4957 * modified before passing it to the handler.
4959 in
= ctxt
->input
->cur
;
4963 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4965 } while (*in
== 0xA);
4968 ccol
= ctxt
->input
->col
;
4969 while (((*in
> '-') && (*in
<= 0x7F)) ||
4970 ((*in
>= 0x20) && (*in
< '-')) ||
4975 ctxt
->input
->col
= ccol
;
4978 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4980 } while (*in
== 0xA);
4983 nbchar
= in
- ctxt
->input
->cur
;
4985 * save current set of data
4988 if ((ctxt
->sax
!= NULL
) &&
4989 (ctxt
->sax
->comment
!= NULL
)) {
4991 if ((*in
== '-') && (in
[1] == '-'))
4994 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4995 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4997 xmlErrMemory(ctxt
, NULL
);
4998 ctxt
->instate
= state
;
5002 } else if (len
+ nbchar
+ 1 >= size
) {
5004 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
5005 new_buf
= (xmlChar
*) xmlRealloc(buf
,
5006 size
* sizeof(xmlChar
));
5007 if (new_buf
== NULL
) {
5009 xmlErrMemory(ctxt
, NULL
);
5010 ctxt
->instate
= state
;
5015 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
5020 if ((len
> XML_MAX_TEXT_LENGTH
) &&
5021 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
5022 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
5023 "Comment too big found", NULL
);
5027 ctxt
->input
->cur
= in
;
5030 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
5035 ctxt
->input
->cur
= in
;
5037 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
5038 continue; /* while */
5044 if (ctxt
->instate
== XML_PARSER_EOF
) {
5048 in
= ctxt
->input
->cur
;
5052 if (ctxt
->input
->id
!= inputid
) {
5053 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5054 "comment doesn't start and stop in the same entity\n");
5057 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
5058 (!ctxt
->disableSAX
)) {
5060 ctxt
->sax
->comment(ctxt
->userData
, buf
);
5062 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
5066 if (ctxt
->instate
!= XML_PARSER_EOF
)
5067 ctxt
->instate
= state
;
5071 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5072 "Double hyphen within comment: "
5076 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5077 "Double hyphen within comment\n", NULL
);
5085 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
5086 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
5087 ctxt
->instate
= state
;
5094 * @ctxt: an XML parser context
5096 * parse the name of a PI
5098 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5100 * Returns the PITarget name or NULL
5104 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
5105 const xmlChar
*name
;
5107 name
= xmlParseName(ctxt
);
5108 if ((name
!= NULL
) &&
5109 ((name
[0] == 'x') || (name
[0] == 'X')) &&
5110 ((name
[1] == 'm') || (name
[1] == 'M')) &&
5111 ((name
[2] == 'l') || (name
[2] == 'L'))) {
5113 if ((name
[0] == 'x') && (name
[1] == 'm') &&
5114 (name
[2] == 'l') && (name
[3] == 0)) {
5115 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5116 "XML declaration allowed only at the start of the document\n");
5118 } else if (name
[3] == 0) {
5119 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
5123 if (xmlW3CPIs
[i
] == NULL
) break;
5124 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
5127 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5128 "xmlParsePITarget: invalid name prefix 'xml'\n",
5131 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
5132 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5133 "colons are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
5138 #ifdef LIBXML_CATALOG_ENABLED
5140 * xmlParseCatalogPI:
5141 * @ctxt: an XML parser context
5142 * @catalog: the PI value string
5144 * parse an XML Catalog Processing Instruction.
5146 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5148 * Occurs only if allowed by the user and if happening in the Misc
5149 * part of the document before any doctype informations
5150 * This will add the given catalog to the parsing context in order
5151 * to be used if there is a resolution need further down in the document
5155 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
5156 xmlChar
*URL
= NULL
;
5157 const xmlChar
*tmp
, *base
;
5161 while (IS_BLANK_CH(*tmp
)) tmp
++;
5162 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
5165 while (IS_BLANK_CH(*tmp
)) tmp
++;
5170 while (IS_BLANK_CH(*tmp
)) tmp
++;
5172 if ((marker
!= '\'') && (marker
!= '"'))
5176 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
5179 URL
= xmlStrndup(base
, tmp
- base
);
5181 while (IS_BLANK_CH(*tmp
)) tmp
++;
5186 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
5192 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
5193 "Catalog PI syntax error: %s\n",
5202 * @ctxt: an XML parser context
5204 * parse an XML Processing Instruction.
5206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5208 * The processing is transfered to SAX once parsed.
5212 xmlParsePI(xmlParserCtxtPtr ctxt
) {
5213 xmlChar
*buf
= NULL
;
5215 size_t size
= XML_PARSER_BUFFER_SIZE
;
5217 const xmlChar
*target
;
5218 xmlParserInputState state
;
5221 if ((RAW
== '<') && (NXT(1) == '?')) {
5222 xmlParserInputPtr input
= ctxt
->input
;
5223 state
= ctxt
->instate
;
5224 ctxt
->instate
= XML_PARSER_PI
;
5226 * this is a Processing Instruction.
5232 * Parse the target name and check for special support like
5235 target
= xmlParsePITarget(ctxt
);
5236 if (target
!= NULL
) {
5237 if ((RAW
== '?') && (NXT(1) == '>')) {
5238 if (input
!= ctxt
->input
) {
5239 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5240 "PI declaration doesn't start and stop in the same entity\n");
5247 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5248 (ctxt
->sax
->processingInstruction
!= NULL
))
5249 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5251 if (ctxt
->instate
!= XML_PARSER_EOF
)
5252 ctxt
->instate
= state
;
5255 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
5257 xmlErrMemory(ctxt
, NULL
);
5258 ctxt
->instate
= state
;
5262 if (!IS_BLANK(cur
)) {
5263 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
5264 "ParsePI: PI %s space expected\n", target
);
5268 while (IS_CHAR(cur
) && /* checked */
5269 ((cur
!= '?') || (NXT(1) != '>'))) {
5270 if (len
+ 5 >= size
) {
5272 size_t new_size
= size
* 2;
5273 tmp
= (xmlChar
*) xmlRealloc(buf
, new_size
);
5275 xmlErrMemory(ctxt
, NULL
);
5277 ctxt
->instate
= state
;
5286 if (ctxt
->instate
== XML_PARSER_EOF
) {
5291 if ((len
> XML_MAX_TEXT_LENGTH
) &&
5292 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
5293 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5294 "PI %s too big found", target
);
5296 ctxt
->instate
= state
;
5300 COPY_BUF(l
,buf
,len
,cur
);
5309 if ((len
> XML_MAX_TEXT_LENGTH
) &&
5310 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
5311 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5312 "PI %s too big found", target
);
5314 ctxt
->instate
= state
;
5319 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5320 "ParsePI: PI %s never end ...\n", target
);
5322 if (input
!= ctxt
->input
) {
5323 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5324 "PI declaration doesn't start and stop in the same entity\n");
5328 #ifdef LIBXML_CATALOG_ENABLED
5329 if (((state
== XML_PARSER_MISC
) ||
5330 (state
== XML_PARSER_START
)) &&
5331 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
5332 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
5333 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
5334 (allow
== XML_CATA_ALLOW_ALL
))
5335 xmlParseCatalogPI(ctxt
, buf
);
5343 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5344 (ctxt
->sax
->processingInstruction
!= NULL
))
5345 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5350 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
5352 if (ctxt
->instate
!= XML_PARSER_EOF
)
5353 ctxt
->instate
= state
;
5358 * xmlParseNotationDecl:
5359 * @ctxt: an XML parser context
5361 * parse a notation declaration
5363 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5365 * Hence there is actually 3 choices:
5366 * 'PUBLIC' S PubidLiteral
5367 * 'PUBLIC' S PubidLiteral S SystemLiteral
5368 * and 'SYSTEM' S SystemLiteral
5370 * See the NOTE on xmlParseExternalID().
5374 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
5375 const xmlChar
*name
;
5379 if (CMP10(CUR_PTR
, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5380 xmlParserInputPtr input
= ctxt
->input
;
5383 if (!IS_BLANK_CH(CUR
)) {
5384 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5385 "Space required after '<!NOTATION'\n");
5390 name
= xmlParseName(ctxt
);
5392 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5395 if (!IS_BLANK_CH(CUR
)) {
5396 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5397 "Space required after the NOTATION name'\n");
5400 if (xmlStrchr(name
, ':') != NULL
) {
5401 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5402 "colons are forbidden from notation names '%s'\n",
5410 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5414 if (input
!= ctxt
->input
) {
5415 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5416 "Notation declaration doesn't start and stop in the same entity\n");
5419 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5420 (ctxt
->sax
->notationDecl
!= NULL
))
5421 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5423 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5425 if (Systemid
!= NULL
) xmlFree(Systemid
);
5426 if (Pubid
!= NULL
) xmlFree(Pubid
);
5431 * xmlParseEntityDecl:
5432 * @ctxt: an XML parser context
5434 * parse <!ENTITY declarations
5436 * [70] EntityDecl ::= GEDecl | PEDecl
5438 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5440 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5442 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5444 * [74] PEDef ::= EntityValue | ExternalID
5446 * [76] NDataDecl ::= S 'NDATA' S Name
5448 * [ VC: Notation Declared ]
5449 * The Name must match the declared name of a notation.
5453 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5454 const xmlChar
*name
= NULL
;
5455 xmlChar
*value
= NULL
;
5456 xmlChar
*URI
= NULL
, *literal
= NULL
;
5457 const xmlChar
*ndata
= NULL
;
5458 int isParameter
= 0;
5459 xmlChar
*orig
= NULL
;
5462 /* GROW; done in the caller */
5463 if (CMP8(CUR_PTR
, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5464 xmlParserInputPtr input
= ctxt
->input
;
5467 skipped
= SKIP_BLANKS
;
5469 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5470 "Space required after '<!ENTITY'\n");
5475 skipped
= SKIP_BLANKS
;
5477 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5478 "Space required after '%'\n");
5483 name
= xmlParseName(ctxt
);
5485 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5486 "xmlParseEntityDecl: no name\n");
5489 if (xmlStrchr(name
, ':') != NULL
) {
5490 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5491 "colons are forbidden from entities names '%s'\n",
5494 skipped
= SKIP_BLANKS
;
5496 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5497 "Space required after the entity name\n");
5500 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5502 * handle the various case of definitions...
5505 if ((RAW
== '"') || (RAW
== '\'')) {
5506 value
= xmlParseEntityValue(ctxt
, &orig
);
5508 if ((ctxt
->sax
!= NULL
) &&
5509 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5510 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5511 XML_INTERNAL_PARAMETER_ENTITY
,
5515 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5516 if ((URI
== NULL
) && (literal
== NULL
)) {
5517 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5522 uri
= xmlParseURI((const char *) URI
);
5524 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5525 "Invalid URI: %s\n", URI
);
5527 * This really ought to be a well formedness error
5528 * but the XML Core WG decided otherwise c.f. issue
5529 * E26 of the XML erratas.
5532 if (uri
->fragment
!= NULL
) {
5534 * Okay this is foolish to block those but not
5537 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5539 if ((ctxt
->sax
!= NULL
) &&
5540 (!ctxt
->disableSAX
) &&
5541 (ctxt
->sax
->entityDecl
!= NULL
))
5542 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5543 XML_EXTERNAL_PARAMETER_ENTITY
,
5544 literal
, URI
, NULL
);
5551 if ((RAW
== '"') || (RAW
== '\'')) {
5552 value
= xmlParseEntityValue(ctxt
, &orig
);
5553 if ((ctxt
->sax
!= NULL
) &&
5554 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5555 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5556 XML_INTERNAL_GENERAL_ENTITY
,
5559 * For expat compatibility in SAX mode.
5561 if ((ctxt
->myDoc
== NULL
) ||
5562 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5563 if (ctxt
->myDoc
== NULL
) {
5564 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5565 if (ctxt
->myDoc
== NULL
) {
5566 xmlErrMemory(ctxt
, "New Doc failed");
5569 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5571 if (ctxt
->myDoc
->intSubset
== NULL
)
5572 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5573 BAD_CAST
"fake", NULL
, NULL
);
5575 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5579 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5580 if ((URI
== NULL
) && (literal
== NULL
)) {
5581 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5586 uri
= xmlParseURI((const char *)URI
);
5588 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5589 "Invalid URI: %s\n", URI
);
5591 * This really ought to be a well formedness error
5592 * but the XML Core WG decided otherwise c.f. issue
5593 * E26 of the XML erratas.
5596 if (uri
->fragment
!= NULL
) {
5598 * Okay this is foolish to block those but not
5601 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5606 if ((RAW
!= '>') && (!IS_BLANK_CH(CUR
))) {
5607 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5608 "Space required before 'NDATA'\n");
5611 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5613 if (!IS_BLANK_CH(CUR
)) {
5614 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5615 "Space required after 'NDATA'\n");
5618 ndata
= xmlParseName(ctxt
);
5619 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5620 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5621 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5622 literal
, URI
, ndata
);
5624 if ((ctxt
->sax
!= NULL
) &&
5625 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5626 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5627 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5628 literal
, URI
, NULL
);
5630 * For expat compatibility in SAX mode.
5631 * assuming the entity repalcement was asked for
5633 if ((ctxt
->replaceEntities
!= 0) &&
5634 ((ctxt
->myDoc
== NULL
) ||
5635 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5636 if (ctxt
->myDoc
== NULL
) {
5637 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5638 if (ctxt
->myDoc
== NULL
) {
5639 xmlErrMemory(ctxt
, "New Doc failed");
5642 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5645 if (ctxt
->myDoc
->intSubset
== NULL
)
5646 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5647 BAD_CAST
"fake", NULL
, NULL
);
5648 xmlSAX2EntityDecl(ctxt
, name
,
5649 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5650 literal
, URI
, NULL
);
5655 if (ctxt
->instate
== XML_PARSER_EOF
)
5659 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5660 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5662 if (input
!= ctxt
->input
) {
5663 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5664 "Entity declaration doesn't start and stop in the same entity\n");
5670 * Ugly mechanism to save the raw entity value.
5672 xmlEntityPtr cur
= NULL
;
5675 if ((ctxt
->sax
!= NULL
) &&
5676 (ctxt
->sax
->getParameterEntity
!= NULL
))
5677 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5679 if ((ctxt
->sax
!= NULL
) &&
5680 (ctxt
->sax
->getEntity
!= NULL
))
5681 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5682 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5683 cur
= xmlSAX2GetEntity(ctxt
, name
);
5687 if (cur
->orig
!= NULL
)
5694 if (value
!= NULL
) xmlFree(value
);
5695 if (URI
!= NULL
) xmlFree(URI
);
5696 if (literal
!= NULL
) xmlFree(literal
);
5701 * xmlParseDefaultDecl:
5702 * @ctxt: an XML parser context
5703 * @value: Receive a possible fixed default value for the attribute
5705 * Parse an attribute default declaration
5707 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5709 * [ VC: Required Attribute ]
5710 * if the default declaration is the keyword #REQUIRED, then the
5711 * attribute must be specified for all elements of the type in the
5712 * attribute-list declaration.
5714 * [ VC: Attribute Default Legal ]
5715 * The declared default value must meet the lexical constraints of
5716 * the declared attribute type c.f. xmlValidateAttributeDecl()
5718 * [ VC: Fixed Attribute Default ]
5719 * if an attribute has a default value declared with the #FIXED
5720 * keyword, instances of that attribute must match the default value.
5722 * [ WFC: No < in Attribute Values ]
5723 * handled in xmlParseAttValue()
5725 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5726 * or XML_ATTRIBUTE_FIXED.
5730 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5735 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5737 return(XML_ATTRIBUTE_REQUIRED
);
5739 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5741 return(XML_ATTRIBUTE_IMPLIED
);
5743 val
= XML_ATTRIBUTE_NONE
;
5744 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5746 val
= XML_ATTRIBUTE_FIXED
;
5747 if (!IS_BLANK_CH(CUR
)) {
5748 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5749 "Space required after '#FIXED'\n");
5753 ret
= xmlParseAttValue(ctxt
);
5754 ctxt
->instate
= XML_PARSER_DTD
;
5756 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5757 "Attribute default value declaration error\n");
5764 * xmlParseNotationType:
5765 * @ctxt: an XML parser context
5767 * parse an Notation attribute type.
5769 * Note: the leading 'NOTATION' S part has already being parsed...
5771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5773 * [ VC: Notation Attributes ]
5774 * Values of this type must match one of the notation names included
5775 * in the declaration; all notation names in the declaration must be declared.
5777 * Returns: the notation attribute tree built while parsing
5781 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5782 const xmlChar
*name
;
5783 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5786 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5793 name
= xmlParseName(ctxt
);
5795 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5796 "Name expected in NOTATION declaration\n");
5797 xmlFreeEnumeration(ret
);
5801 while (tmp
!= NULL
) {
5802 if (xmlStrEqual(name
, tmp
->name
)) {
5803 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5804 "standalone: attribute notation value token %s duplicated\n",
5806 if (!xmlDictOwns(ctxt
->dict
, name
))
5807 xmlFree((xmlChar
*) name
);
5813 cur
= xmlCreateEnumeration(name
);
5815 xmlFreeEnumeration(ret
);
5818 if (last
== NULL
) ret
= last
= cur
;
5825 } while (RAW
== '|');
5827 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5828 xmlFreeEnumeration(ret
);
5836 * xmlParseEnumerationType:
5837 * @ctxt: an XML parser context
5839 * parse an Enumeration attribute type.
5841 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5843 * [ VC: Enumeration ]
5844 * Values of this type must match one of the Nmtoken tokens in
5847 * Returns: the enumeration attribute tree built while parsing
5851 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5853 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5856 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5863 name
= xmlParseNmtoken(ctxt
);
5865 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5869 while (tmp
!= NULL
) {
5870 if (xmlStrEqual(name
, tmp
->name
)) {
5871 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5872 "standalone: attribute enumeration value token %s duplicated\n",
5874 if (!xmlDictOwns(ctxt
->dict
, name
))
5881 cur
= xmlCreateEnumeration(name
);
5882 if (!xmlDictOwns(ctxt
->dict
, name
))
5885 xmlFreeEnumeration(ret
);
5888 if (last
== NULL
) ret
= last
= cur
;
5895 } while (RAW
== '|');
5897 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5905 * xmlParseEnumeratedType:
5906 * @ctxt: an XML parser context
5907 * @tree: the enumeration tree built while parsing
5909 * parse an Enumerated attribute type.
5911 * [57] EnumeratedType ::= NotationType | Enumeration
5913 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5916 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5920 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5921 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5923 if (!IS_BLANK_CH(CUR
)) {
5924 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5925 "Space required after 'NOTATION'\n");
5929 *tree
= xmlParseNotationType(ctxt
);
5930 if (*tree
== NULL
) return(0);
5931 return(XML_ATTRIBUTE_NOTATION
);
5933 *tree
= xmlParseEnumerationType(ctxt
);
5934 if (*tree
== NULL
) return(0);
5935 return(XML_ATTRIBUTE_ENUMERATION
);
5939 * xmlParseAttributeType:
5940 * @ctxt: an XML parser context
5941 * @tree: the enumeration tree built while parsing
5943 * parse the Attribute list def for an element
5945 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5947 * [55] StringType ::= 'CDATA'
5949 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5950 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5952 * Validity constraints for attribute values syntax are checked in
5953 * xmlValidateAttributeValue()
5956 * Values of type ID must match the Name production. A name must not
5957 * appear more than once in an XML document as a value of this type;
5958 * i.e., ID values must uniquely identify the elements which bear them.
5960 * [ VC: One ID per Element Type ]
5961 * No element type may have more than one ID attribute specified.
5963 * [ VC: ID Attribute Default ]
5964 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5967 * Values of type IDREF must match the Name production, and values
5968 * of type IDREFS must match Names; each IDREF Name must match the value
5969 * of an ID attribute on some element in the XML document; i.e. IDREF
5970 * values must match the value of some ID attribute.
5972 * [ VC: Entity Name ]
5973 * Values of type ENTITY must match the Name production, values
5974 * of type ENTITIES must match Names; each Entity Name must match the
5975 * name of an unparsed entity declared in the DTD.
5977 * [ VC: Name Token ]
5978 * Values of type NMTOKEN must match the Nmtoken production; values
5979 * of type NMTOKENS must match Nmtokens.
5981 * Returns the attribute type
5984 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5986 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5988 return(XML_ATTRIBUTE_CDATA
);
5989 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5991 return(XML_ATTRIBUTE_IDREFS
);
5992 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5994 return(XML_ATTRIBUTE_IDREF
);
5995 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5997 return(XML_ATTRIBUTE_ID
);
5998 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6000 return(XML_ATTRIBUTE_ENTITY
);
6001 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6003 return(XML_ATTRIBUTE_ENTITIES
);
6004 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6006 return(XML_ATTRIBUTE_NMTOKENS
);
6007 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6009 return(XML_ATTRIBUTE_NMTOKEN
);
6011 return(xmlParseEnumeratedType(ctxt
, tree
));
6015 * xmlParseAttributeListDecl:
6016 * @ctxt: an XML parser context
6018 * : parse the Attribute list def for an element
6020 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6022 * [53] AttDef ::= S Name S AttType S DefaultDecl
6026 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
6027 const xmlChar
*elemName
;
6028 const xmlChar
*attrName
;
6029 xmlEnumerationPtr tree
;
6031 if (CMP9(CUR_PTR
, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6032 xmlParserInputPtr input
= ctxt
->input
;
6035 if (!IS_BLANK_CH(CUR
)) {
6036 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6037 "Space required after '<!ATTLIST'\n");
6040 elemName
= xmlParseName(ctxt
);
6041 if (elemName
== NULL
) {
6042 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6043 "ATTLIST: no name for Element\n");
6048 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6049 const xmlChar
*check
= CUR_PTR
;
6052 xmlChar
*defaultValue
= NULL
;
6056 attrName
= xmlParseName(ctxt
);
6057 if (attrName
== NULL
) {
6058 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6059 "ATTLIST: no name for Attribute\n");
6063 if (!IS_BLANK_CH(CUR
)) {
6064 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6065 "Space required after the attribute name\n");
6070 type
= xmlParseAttributeType(ctxt
, &tree
);
6076 if (!IS_BLANK_CH(CUR
)) {
6077 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6078 "Space required after the attribute type\n");
6080 xmlFreeEnumeration(tree
);
6085 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
6087 if (defaultValue
!= NULL
)
6088 xmlFree(defaultValue
);
6090 xmlFreeEnumeration(tree
);
6093 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
6094 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
6098 if (!IS_BLANK_CH(CUR
)) {
6099 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6100 "Space required after the attribute default value\n");
6101 if (defaultValue
!= NULL
)
6102 xmlFree(defaultValue
);
6104 xmlFreeEnumeration(tree
);
6109 if (check
== CUR_PTR
) {
6110 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
6111 "in xmlParseAttributeListDecl\n");
6112 if (defaultValue
!= NULL
)
6113 xmlFree(defaultValue
);
6115 xmlFreeEnumeration(tree
);
6118 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6119 (ctxt
->sax
->attributeDecl
!= NULL
))
6120 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
6121 type
, def
, defaultValue
, tree
);
6122 else if (tree
!= NULL
)
6123 xmlFreeEnumeration(tree
);
6125 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
6126 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
6127 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
6128 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
6131 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
6133 if (defaultValue
!= NULL
)
6134 xmlFree(defaultValue
);
6138 if (input
!= ctxt
->input
) {
6139 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6140 "Attribute list declaration doesn't start and stop in the same entity\n",
6149 * xmlParseElementMixedContentDecl:
6150 * @ctxt: an XML parser context
6151 * @inputchk: the input used for the current entity, needed for boundary checks
6153 * parse the declaration for a Mixed Element content
6154 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6156 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6157 * '(' S? '#PCDATA' S? ')'
6159 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6161 * [ VC: No Duplicate Types ]
6162 * The same name must not appear more than once in a single
6163 * mixed-content declaration.
6165 * returns: the list of the xmlElementContentPtr describing the element choices
6167 xmlElementContentPtr
6168 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6169 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
6170 const xmlChar
*elem
= NULL
;
6173 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6178 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
6179 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6180 "Element content declaration doesn't start and stop in the same entity\n",
6184 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6188 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6193 if ((RAW
== '(') || (RAW
== '|')) {
6194 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6195 if (ret
== NULL
) return(NULL
);
6197 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6200 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6201 if (ret
== NULL
) return(NULL
);
6207 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6208 if (n
== NULL
) return(NULL
);
6209 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6218 elem
= xmlParseName(ctxt
);
6220 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6221 "xmlParseElementMixedContentDecl : Name expected\n");
6222 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
6228 if ((RAW
== ')') && (NXT(1) == '*')) {
6230 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
6231 XML_ELEMENT_CONTENT_ELEMENT
);
6232 if (cur
->c2
!= NULL
)
6233 cur
->c2
->parent
= cur
;
6236 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6237 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
6238 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6239 "Element content declaration doesn't start and stop in the same entity\n",
6244 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6245 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
6250 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
6256 * xmlParseElementChildrenContentDeclPriv:
6257 * @ctxt: an XML parser context
6258 * @inputchk: the input used for the current entity, needed for boundary checks
6259 * @depth: the level of recursion
6261 * parse the declaration for a Mixed Element content
6262 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274 * TODO Parameter-entity replacement text must be properly nested
6275 * with parenthesized groups. That is to say, if either of the
6276 * opening or closing parentheses in a choice, seq, or Mixed
6277 * construct is contained in the replacement text for a parameter
6278 * entity, both must be contained in the same replacement text. For
6279 * interoperability, if a parameter-entity reference appears in a
6280 * choice, seq, or Mixed construct, its replacement text should not
6281 * be empty, and neither the first nor last non-blank character of
6282 * the replacement text should be a connector (| or ,).
6284 * Returns the tree of xmlElementContentPtr describing the element
6287 static xmlElementContentPtr
6288 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
6290 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
6291 const xmlChar
*elem
;
6294 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
6296 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
6297 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6304 int inputid
= ctxt
->input
->id
;
6306 /* Recurse on first child */
6309 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6314 elem
= xmlParseName(ctxt
);
6316 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6319 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6321 xmlErrMemory(ctxt
, NULL
);
6326 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6328 } else if (RAW
== '*') {
6329 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6331 } else if (RAW
== '+') {
6332 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6335 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6341 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6343 * Each loop we parse one separator and one element.
6346 if (type
== 0) type
= CUR
;
6349 * Detect "Name | Name , Name" error
6351 else if (type
!= CUR
) {
6352 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6353 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6355 if ((last
!= NULL
) && (last
!= ret
))
6356 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6358 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6363 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
6365 if ((last
!= NULL
) && (last
!= ret
))
6366 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6367 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6385 } else if (RAW
== '|') {
6386 if (type
== 0) type
= CUR
;
6389 * Detect "Name , Name | Name" error
6391 else if (type
!= CUR
) {
6392 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6393 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6395 if ((last
!= NULL
) && (last
!= ret
))
6396 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6398 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6403 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6405 if ((last
!= NULL
) && (last
!= ret
))
6406 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6408 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6427 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6428 if ((last
!= NULL
) && (last
!= ret
))
6429 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6431 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6438 int inputid
= ctxt
->input
->id
;
6439 /* Recurse on second child */
6442 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6446 elem
= xmlParseName(ctxt
);
6448 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6450 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6453 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6456 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6460 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6462 } else if (RAW
== '*') {
6463 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6465 } else if (RAW
== '+') {
6466 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6469 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6475 if ((cur
!= NULL
) && (last
!= NULL
)) {
6480 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
6481 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6482 "Element content declaration doesn't start and stop in the same entity\n",
6488 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6489 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6490 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6492 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6495 } else if (RAW
== '*') {
6497 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6500 * Some normalization:
6501 * (a | b* | c?)* == (a | b | c)*
6503 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6504 if ((cur
->c1
!= NULL
) &&
6505 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6506 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6507 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6508 if ((cur
->c2
!= NULL
) &&
6509 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6510 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6511 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6516 } else if (RAW
== '+') {
6520 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6521 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6522 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6524 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6526 * Some normalization:
6527 * (a | b*)+ == (a | b)*
6528 * (a | b?)+ == (a | b)*
6530 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6531 if ((cur
->c1
!= NULL
) &&
6532 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6533 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6534 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6537 if ((cur
->c2
!= NULL
) &&
6538 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6539 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6540 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6546 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6554 * xmlParseElementChildrenContentDecl:
6555 * @ctxt: an XML parser context
6556 * @inputchk: the input used for the current entity, needed for boundary checks
6558 * parse the declaration for a Mixed Element content
6559 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6561 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6563 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6565 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6567 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6569 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6570 * TODO Parameter-entity replacement text must be properly nested
6571 * with parenthesized groups. That is to say, if either of the
6572 * opening or closing parentheses in a choice, seq, or Mixed
6573 * construct is contained in the replacement text for a parameter
6574 * entity, both must be contained in the same replacement text. For
6575 * interoperability, if a parameter-entity reference appears in a
6576 * choice, seq, or Mixed construct, its replacement text should not
6577 * be empty, and neither the first nor last non-blank character of
6578 * the replacement text should be a connector (| or ,).
6580 * Returns the tree of xmlElementContentPtr describing the element
6583 xmlElementContentPtr
6584 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6585 /* stub left for API/ABI compat */
6586 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6590 * xmlParseElementContentDecl:
6591 * @ctxt: an XML parser context
6592 * @name: the name of the element being defined.
6593 * @result: the Element Content pointer will be stored here if any
6595 * parse the declaration for an Element content either Mixed or Children,
6596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6604 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6605 xmlElementContentPtr
*result
) {
6607 xmlElementContentPtr tree
= NULL
;
6608 int inputid
= ctxt
->input
->id
;
6614 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6615 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6620 if (ctxt
->instate
== XML_PARSER_EOF
)
6623 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6624 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6625 res
= XML_ELEMENT_TYPE_MIXED
;
6627 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6628 res
= XML_ELEMENT_TYPE_ELEMENT
;
6636 * xmlParseElementDecl:
6637 * @ctxt: an XML parser context
6639 * parse an Element declaration.
6641 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6643 * [ VC: Unique Element Type Declaration ]
6644 * No element type may be declared more than once
6646 * Returns the type of the element, or -1 in case of error
6649 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6650 const xmlChar
*name
;
6652 xmlElementContentPtr content
= NULL
;
6654 /* GROW; done in the caller */
6655 if (CMP9(CUR_PTR
, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6656 xmlParserInputPtr input
= ctxt
->input
;
6659 if (!IS_BLANK_CH(CUR
)) {
6660 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6661 "Space required after 'ELEMENT'\n");
6664 name
= xmlParseName(ctxt
);
6666 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6667 "xmlParseElementDecl: no name for Element\n");
6670 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6672 if (!IS_BLANK_CH(CUR
)) {
6673 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6674 "Space required after the element name\n");
6677 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6680 * Element must always be empty.
6682 ret
= XML_ELEMENT_TYPE_EMPTY
;
6683 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6687 * Element is a generic container.
6689 ret
= XML_ELEMENT_TYPE_ANY
;
6690 } else if (RAW
== '(') {
6691 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6694 * [ WFC: PEs in Internal Subset ] error handling.
6696 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6697 (ctxt
->inputNr
== 1)) {
6698 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6699 "PEReference: forbidden within markup decl in internal subset\n");
6701 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6702 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6709 * Pop-up of finished entities.
6711 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6716 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6717 if (content
!= NULL
) {
6718 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6721 if (input
!= ctxt
->input
) {
6722 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6723 "Element declaration doesn't start and stop in the same entity\n");
6727 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6728 (ctxt
->sax
->elementDecl
!= NULL
)) {
6729 if (content
!= NULL
)
6730 content
->parent
= NULL
;
6731 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6733 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6735 * this is a trick: if xmlAddElementDecl is called,
6736 * instead of copying the full tree it is plugged directly
6737 * if called from the parser. Avoid duplicating the
6738 * interfaces or change the API/ABI
6740 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6742 } else if (content
!= NULL
) {
6743 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6751 * xmlParseConditionalSections
6752 * @ctxt: an XML parser context
6754 * [61] conditionalSect ::= includeSect | ignoreSect
6755 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6756 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6757 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6758 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6762 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6763 int id
= ctxt
->input
->id
;
6767 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6771 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6773 if (ctxt
->input
->id
!= id
) {
6774 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6775 "All markup of the conditional section is not in the same entity\n",
6780 if (xmlParserDebugEntities
) {
6781 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6782 xmlGenericError(xmlGenericErrorContext
,
6783 "%s(%d): ", ctxt
->input
->filename
,
6785 xmlGenericError(xmlGenericErrorContext
,
6786 "Entering INCLUDE Conditional Section\n");
6789 while (((RAW
!= 0) && ((RAW
!= ']') || (NXT(1) != ']') ||
6790 (NXT(2) != '>'))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6791 const xmlChar
*check
= CUR_PTR
;
6792 unsigned int cons
= ctxt
->input
->consumed
;
6794 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6795 xmlParseConditionalSections(ctxt
);
6796 } else if (IS_BLANK_CH(CUR
)) {
6798 } else if (RAW
== '%') {
6799 xmlParsePEReference(ctxt
);
6801 xmlParseMarkupDecl(ctxt
);
6804 * Pop-up of finished entities.
6806 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6809 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6810 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6814 if (xmlParserDebugEntities
) {
6815 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6816 xmlGenericError(xmlGenericErrorContext
,
6817 "%s(%d): ", ctxt
->input
->filename
,
6819 xmlGenericError(xmlGenericErrorContext
,
6820 "Leaving INCLUDE Conditional Section\n");
6823 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6825 xmlParserInputState instate
;
6831 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6833 if (ctxt
->input
->id
!= id
) {
6834 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6835 "All markup of the conditional section is not in the same entity\n",
6840 if (xmlParserDebugEntities
) {
6841 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6842 xmlGenericError(xmlGenericErrorContext
,
6843 "%s(%d): ", ctxt
->input
->filename
,
6845 xmlGenericError(xmlGenericErrorContext
,
6846 "Entering IGNORE Conditional Section\n");
6850 * Parse up to the end of the conditional section
6851 * But disable SAX event generating DTD building in the meantime
6853 state
= ctxt
->disableSAX
;
6854 instate
= ctxt
->instate
;
6855 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6856 ctxt
->instate
= XML_PARSER_IGNORE
;
6858 while (((depth
>= 0) && (RAW
!= 0)) &&
6859 (ctxt
->instate
!= XML_PARSER_EOF
)) {
6860 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6865 if ((RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6866 if (--depth
>= 0) SKIP(3);
6873 ctxt
->disableSAX
= state
;
6874 ctxt
->instate
= instate
;
6876 if (xmlParserDebugEntities
) {
6877 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6878 xmlGenericError(xmlGenericErrorContext
,
6879 "%s(%d): ", ctxt
->input
->filename
,
6881 xmlGenericError(xmlGenericErrorContext
,
6882 "Leaving IGNORE Conditional Section\n");
6886 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6893 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6895 if (ctxt
->input
->id
!= id
) {
6896 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6897 "All markup of the conditional section is not in the same entity\n",
6905 * xmlParseMarkupDecl:
6906 * @ctxt: an XML parser context
6908 * parse Markup declarations
6910 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6911 * NotationDecl | PI | Comment
6913 * [ VC: Proper Declaration/PE Nesting ]
6914 * Parameter-entity replacement text must be properly nested with
6915 * markup declarations. That is to say, if either the first character
6916 * or the last character of a markup declaration (markupdecl above) is
6917 * contained in the replacement text for a parameter-entity reference,
6918 * both must be contained in the same replacement text.
6920 * [ WFC: PEs in Internal Subset ]
6921 * In the internal DTD subset, parameter-entity references can occur
6922 * only where markup declarations can occur, not within markup declarations.
6923 * (This does not apply to references that occur in external parameter
6924 * entities or to the external subset.)
6927 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6930 if (NXT(1) == '!') {
6934 xmlParseElementDecl(ctxt
);
6935 else if (NXT(3) == 'N')
6936 xmlParseEntityDecl(ctxt
);
6939 xmlParseAttributeListDecl(ctxt
);
6942 xmlParseNotationDecl(ctxt
);
6945 xmlParseComment(ctxt
);
6948 /* there is an error but it will be detected later */
6951 } else if (NXT(1) == '?') {
6956 * This is only for internal subset. On external entities,
6957 * the replacement is done before parsing stage
6959 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
6960 xmlParsePEReference(ctxt
);
6963 * Conditional sections are allowed from entities included
6964 * by PE References in the internal subset.
6966 if ((ctxt
->external
== 0) && (ctxt
->inputNr
> 1)) {
6967 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6968 xmlParseConditionalSections(ctxt
);
6972 ctxt
->instate
= XML_PARSER_DTD
;
6977 * @ctxt: an XML parser context
6979 * parse an XML declaration header for external entities
6981 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6985 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6987 const xmlChar
*encoding
;
6990 * We know that '<?xml' is here.
6992 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6995 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6999 if (!IS_BLANK_CH(CUR
)) {
7000 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
7001 "Space needed after '<?xml'\n");
7006 * We may have the VersionInfo here.
7008 version
= xmlParseVersionInfo(ctxt
);
7009 if (version
== NULL
)
7010 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
7012 if (!IS_BLANK_CH(CUR
)) {
7013 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
7014 "Space needed here\n");
7017 ctxt
->input
->version
= version
;
7020 * We must have the encoding declaration
7022 encoding
= xmlParseEncodingDecl(ctxt
);
7023 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7025 * The XML REC instructs us to stop parsing right here
7029 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
7030 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
7031 "Missing encoding in text declaration\n");
7035 if ((RAW
== '?') && (NXT(1) == '>')) {
7037 } else if (RAW
== '>') {
7038 /* Deprecated old WD ... */
7039 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
7042 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
7043 MOVETO_ENDTAG(CUR_PTR
);
7049 * xmlParseExternalSubset:
7050 * @ctxt: an XML parser context
7051 * @ExternalID: the external identifier
7052 * @SystemID: the system identifier (or URL)
7054 * parse Markup declarations from an external subset
7056 * [30] extSubset ::= textDecl? extSubsetDecl
7058 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7061 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
7062 const xmlChar
*SystemID
) {
7063 xmlDetectSAX2(ctxt
);
7066 if ((ctxt
->encoding
== NULL
) &&
7067 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
7069 xmlCharEncoding enc
;
7075 enc
= xmlDetectCharEncoding(start
, 4);
7076 if (enc
!= XML_CHAR_ENCODING_NONE
)
7077 xmlSwitchEncoding(ctxt
, enc
);
7080 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
7081 xmlParseTextDecl(ctxt
);
7082 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7084 * The XML REC instructs us to stop parsing right here
7086 ctxt
->instate
= XML_PARSER_EOF
;
7090 if (ctxt
->myDoc
== NULL
) {
7091 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
7092 if (ctxt
->myDoc
== NULL
) {
7093 xmlErrMemory(ctxt
, "New Doc failed");
7096 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
7098 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
7099 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
7101 ctxt
->instate
= XML_PARSER_DTD
;
7103 while (((RAW
== '<') && (NXT(1) == '?')) ||
7104 ((RAW
== '<') && (NXT(1) == '!')) ||
7105 (RAW
== '%') || IS_BLANK_CH(CUR
)) {
7106 const xmlChar
*check
= CUR_PTR
;
7107 unsigned int cons
= ctxt
->input
->consumed
;
7110 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7111 xmlParseConditionalSections(ctxt
);
7112 } else if (IS_BLANK_CH(CUR
)) {
7114 } else if (RAW
== '%') {
7115 xmlParsePEReference(ctxt
);
7117 xmlParseMarkupDecl(ctxt
);
7120 * Pop-up of finished entities.
7122 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
7125 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
7126 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7132 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7138 * xmlParseReference:
7139 * @ctxt: an XML parser context
7141 * parse and handle entity references in content, depending on the SAX
7142 * interface, this may end-up in a call to character() if this is a
7143 * CharRef, a predefined entity, if there is no reference() callback.
7144 * or if the parser was asked to switch to that mode.
7146 * [67] Reference ::= EntityRef | CharRef
7149 xmlParseReference(xmlParserCtxtPtr ctxt
) {
7153 xmlNodePtr list
= NULL
;
7154 xmlParserErrors ret
= XML_ERR_OK
;
7161 * Simple case of a CharRef
7163 if (NXT(1) == '#') {
7167 int value
= xmlParseCharRef(ctxt
);
7171 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
7173 * So we are using non-UTF-8 buffers
7174 * Check that the char fit on 8bits, if not
7175 * generate a CharRef.
7177 if (value
<= 0xFF) {
7180 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7181 (!ctxt
->disableSAX
))
7182 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
7184 if ((hex
== 'x') || (hex
== 'X'))
7185 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
7187 snprintf((char *)out
, sizeof(out
), "#%d", value
);
7188 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7189 (!ctxt
->disableSAX
))
7190 ctxt
->sax
->reference(ctxt
->userData
, out
);
7194 * Just encode the value in UTF-8
7196 COPY_BUF(0 ,out
, i
, value
);
7198 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7199 (!ctxt
->disableSAX
))
7200 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
7206 * We are seeing an entity reference
7208 ent
= xmlParseEntityRef(ctxt
);
7209 if (ent
== NULL
) return;
7210 if (!ctxt
->wellFormed
)
7212 was_checked
= ent
->checked
;
7214 /* special case of predefined entities */
7215 if ((ent
->name
== NULL
) ||
7216 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
7218 if (val
== NULL
) return;
7220 * inline the entity.
7222 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7223 (!ctxt
->disableSAX
))
7224 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
7229 * The first reference to the entity trigger a parsing phase
7230 * where the ent->children is filled with the result from
7232 * Note: external parsed entities will not be loaded, it is not
7233 * required for a non-validating parser, unless the parsing option
7234 * of validating, or substituting entities were given. Doing so is
7235 * far more secure as the parser will only process data coming from
7236 * the document entity by default.
7238 if ((ent
->checked
== 0) &&
7239 ((ent
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
) ||
7240 (ctxt
->options
& (XML_PARSE_NOENT
| XML_PARSE_DTDVALID
)))) {
7241 unsigned long oldnbent
= ctxt
->nbentities
;
7244 * This is a bit hackish but this seems the best
7245 * way to make sure both SAX and DOM entity support
7249 if (ctxt
->userData
== ctxt
)
7252 user_data
= ctxt
->userData
;
7255 * Check that this entity is well formed
7256 * 4.3.2: An internal general parsed entity is well-formed
7257 * if its replacement text matches the production labeled
7260 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7262 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
7266 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7268 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
7269 user_data
, ctxt
->depth
, ent
->URI
,
7270 ent
->ExternalID
, &list
);
7273 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7274 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7275 "invalid entity type found\n", NULL
);
7279 * Store the number of entities needing parsing for this entity
7280 * content and do checkings
7282 ent
->checked
= (ctxt
->nbentities
- oldnbent
+ 1) * 2;
7283 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7285 if (ret
== XML_ERR_ENTITY_LOOP
) {
7286 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7287 xmlFreeNodeList(list
);
7290 if (xmlParserEntityCheck(ctxt
, 0, ent
, 0)) {
7291 xmlFreeNodeList(list
);
7295 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
7296 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
7297 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
7298 (ent
->children
== NULL
)) {
7299 ent
->children
= list
;
7300 if (ctxt
->replaceEntities
) {
7302 * Prune it directly in the generated document
7303 * except for single text nodes.
7305 if (((list
->type
== XML_TEXT_NODE
) &&
7306 (list
->next
== NULL
)) ||
7307 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7308 list
->parent
= (xmlNodePtr
) ent
;
7313 while (list
!= NULL
) {
7314 list
->parent
= (xmlNodePtr
) ctxt
->node
;
7315 list
->doc
= ctxt
->myDoc
;
7316 if (list
->next
== NULL
)
7320 list
= ent
->children
;
7321 #ifdef LIBXML_LEGACY_ENABLED
7322 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7323 xmlAddEntityReference(ent
, list
, NULL
);
7324 #endif /* LIBXML_LEGACY_ENABLED */
7328 while (list
!= NULL
) {
7329 list
->parent
= (xmlNodePtr
) ent
;
7330 xmlSetTreeDoc(list
, ent
->doc
);
7331 if (list
->next
== NULL
)
7337 xmlFreeNodeList(list
);
7340 } else if ((ret
!= XML_ERR_OK
) &&
7341 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
7342 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7343 "Entity '%s' failed to parse\n", ent
->name
);
7344 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7345 } else if (list
!= NULL
) {
7346 xmlFreeNodeList(list
);
7349 if (ent
->checked
== 0)
7351 } else if (ent
->checked
!= 1) {
7352 ctxt
->nbentities
+= ent
->checked
/ 2;
7356 * Now that the entity content has been gathered
7357 * provide it to the application, this can take different forms based
7358 * on the parsing modes.
7360 if (ent
->children
== NULL
) {
7362 * Probably running in SAX mode and the callbacks don't
7363 * build the entity content. So unless we already went
7364 * though parsing for first checking go though the entity
7365 * content to generate callbacks associated to the entity
7367 if (was_checked
!= 0) {
7370 * This is a bit hackish but this seems the best
7371 * way to make sure both SAX and DOM entity support
7374 if (ctxt
->userData
== ctxt
)
7377 user_data
= ctxt
->userData
;
7379 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7381 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
7382 ent
->content
, user_data
, NULL
);
7384 } else if (ent
->etype
==
7385 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7387 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7388 ctxt
->sax
, user_data
, ctxt
->depth
,
7389 ent
->URI
, ent
->ExternalID
, NULL
);
7392 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7393 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7394 "invalid entity type found\n", NULL
);
7396 if (ret
== XML_ERR_ENTITY_LOOP
) {
7397 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7401 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7402 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7404 * Entity reference callback comes second, it's somewhat
7405 * superfluous but a compatibility to historical behaviour
7407 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7413 * If we didn't get any children for the entity being built
7415 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7416 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7420 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7424 if ((ctxt
->replaceEntities
) || (ent
->children
== NULL
)) {
7426 * There is a problem on the handling of _private for entities
7427 * (bug 155816): Should we copy the content of the field from
7428 * the entity (possibly overwriting some value set by the user
7429 * when a copy is created), should we leave it alone, or should
7430 * we try to take care of different situations? The problem
7431 * is exacerbated by the usage of this field by the xmlReader.
7432 * To fix this bug, we look at _private on the created node
7433 * and, if it's NULL, we copy in whatever was in the entity.
7434 * If it's not NULL we leave it alone. This is somewhat of a
7435 * hack - maybe we should have further tests to determine
7438 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
7440 * Seems we are generating the DOM content, do
7441 * a simple tree copy for all references except the first
7442 * In the first occurrence list contains the replacement.
7444 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7445 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7446 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7449 * We are copying here, make sure there is no abuse
7451 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7452 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7456 * when operating on a reader, the entities definitions
7457 * are always owning the entities subtree.
7458 if (ctxt->parseMode == XML_PARSE_READER)
7462 cur
= ent
->children
;
7463 while (cur
!= NULL
) {
7464 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7466 if (nw
->_private
== NULL
)
7467 nw
->_private
= cur
->_private
;
7468 if (firstChild
== NULL
){
7471 nw
= xmlAddChild(ctxt
->node
, nw
);
7473 if (cur
== ent
->last
) {
7475 * needed to detect some strange empty
7476 * node cases in the reader tests
7478 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7480 (nw
->type
== XML_ELEMENT_NODE
) &&
7481 (nw
->children
== NULL
))
7488 #ifdef LIBXML_LEGACY_ENABLED
7489 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7490 xmlAddEntityReference(ent
, firstChild
, nw
);
7491 #endif /* LIBXML_LEGACY_ENABLED */
7492 } else if ((list
== NULL
) || (ctxt
->inputNr
> 0)) {
7493 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7497 * We are copying here, make sure there is no abuse
7499 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7500 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7504 * Copy the entity child list and make it the new
7505 * entity child list. The goal is to make sure any
7506 * ID or REF referenced will be the one from the
7507 * document content and not the entity copy.
7509 cur
= ent
->children
;
7510 ent
->children
= NULL
;
7513 while (cur
!= NULL
) {
7517 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7519 if (nw
->_private
== NULL
)
7520 nw
->_private
= cur
->_private
;
7521 if (firstChild
== NULL
){
7524 xmlAddChild((xmlNodePtr
) ent
, nw
);
7525 xmlAddChild(ctxt
->node
, cur
);
7531 if (ent
->owner
== 0)
7533 #ifdef LIBXML_LEGACY_ENABLED
7534 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7535 xmlAddEntityReference(ent
, firstChild
, nw
);
7536 #endif /* LIBXML_LEGACY_ENABLED */
7538 const xmlChar
*nbktext
;
7541 * the name change is to avoid coalescing of the
7542 * node with a possible previous text one which
7543 * would make ent->children a dangling pointer
7545 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7547 if (ent
->children
->type
== XML_TEXT_NODE
)
7548 ent
->children
->name
= nbktext
;
7549 if ((ent
->last
!= ent
->children
) &&
7550 (ent
->last
->type
== XML_TEXT_NODE
))
7551 ent
->last
->name
= nbktext
;
7552 xmlAddChildList(ctxt
->node
, ent
->children
);
7556 * This is to avoid a nasty side effect, see
7557 * characters() in SAX.c
7567 * xmlParseEntityRef:
7568 * @ctxt: an XML parser context
7570 * parse ENTITY references declarations
7572 * [68] EntityRef ::= '&' Name ';'
7574 * [ WFC: Entity Declared ]
7575 * In a document without any DTD, a document with only an internal DTD
7576 * subset which contains no parameter entity references, or a document
7577 * with "standalone='yes'", the Name given in the entity reference
7578 * must match that in an entity declaration, except that well-formed
7579 * documents need not declare any of the following entities: amp, lt,
7580 * gt, apos, quot. The declaration of a parameter entity must precede
7581 * any reference to it. Similarly, the declaration of a general entity
7582 * must precede any reference to it which appears in a default value in an
7583 * attribute-list declaration. Note that if entities are declared in the
7584 * external subset or in external parameter entities, a non-validating
7585 * processor is not obligated to read and process their declarations;
7586 * for such documents, the rule that an entity must be declared is a
7587 * well-formedness constraint only if standalone='yes'.
7589 * [ WFC: Parsed Entity ]
7590 * An entity reference must not contain the name of an unparsed entity
7592 * Returns the xmlEntityPtr if found, or NULL otherwise.
7595 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7596 const xmlChar
*name
;
7597 xmlEntityPtr ent
= NULL
;
7600 if (ctxt
->instate
== XML_PARSER_EOF
)
7606 name
= xmlParseName(ctxt
);
7608 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7609 "xmlParseEntityRef: no name\n");
7613 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7619 * Predefined entities override any extra definition
7621 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7622 ent
= xmlGetPredefinedEntity(name
);
7628 * Increase the number of entity references parsed
7633 * Ask first SAX for entity resolution, otherwise try the
7634 * entities which may have stored in the parser context.
7636 if (ctxt
->sax
!= NULL
) {
7637 if (ctxt
->sax
->getEntity
!= NULL
)
7638 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7639 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7640 (ctxt
->options
& XML_PARSE_OLDSAX
))
7641 ent
= xmlGetPredefinedEntity(name
);
7642 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7643 (ctxt
->userData
==ctxt
)) {
7644 ent
= xmlSAX2GetEntity(ctxt
, name
);
7647 if (ctxt
->instate
== XML_PARSER_EOF
)
7650 * [ WFC: Entity Declared ]
7651 * In a document without any DTD, a document with only an
7652 * internal DTD subset which contains no parameter entity
7653 * references, or a document with "standalone='yes'", the
7654 * Name given in the entity reference must match that in an
7655 * entity declaration, except that well-formed documents
7656 * need not declare any of the following entities: amp, lt,
7658 * The declaration of a parameter entity must precede any
7660 * Similarly, the declaration of a general entity must
7661 * precede any reference to it which appears in a default
7662 * value in an attribute-list declaration. Note that if
7663 * entities are declared in the external subset or in
7664 * external parameter entities, a non-validating processor
7665 * is not obligated to read and process their declarations;
7666 * for such documents, the rule that an entity must be
7667 * declared is a well-formedness constraint only if
7671 if ((ctxt
->standalone
== 1) ||
7672 ((ctxt
->hasExternalSubset
== 0) &&
7673 (ctxt
->hasPErefs
== 0))) {
7674 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7675 "Entity '%s' not defined\n", name
);
7677 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7678 "Entity '%s' not defined\n", name
);
7679 if ((ctxt
->inSubset
== 0) &&
7680 (ctxt
->sax
!= NULL
) &&
7681 (ctxt
->sax
->reference
!= NULL
)) {
7682 ctxt
->sax
->reference(ctxt
->userData
, name
);
7685 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7690 * [ WFC: Parsed Entity ]
7691 * An entity reference must not contain the name of an
7694 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7695 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7696 "Entity reference to unparsed entity %s\n", name
);
7700 * [ WFC: No External Entity References ]
7701 * Attribute values cannot contain direct or indirect
7702 * entity references to external entities.
7704 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7705 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7706 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7707 "Attribute references external entity '%s'\n", name
);
7710 * [ WFC: No < in Attribute Values ]
7711 * The replacement text of any entity referred to directly or
7712 * indirectly in an attribute value (other than "<") must
7715 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7717 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7718 if (((ent
->checked
& 1) || (ent
->checked
== 0)) &&
7719 (ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<'))) {
7720 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7721 "'<' in entity '%s' is not allowed in attributes values\n", name
);
7726 * Internal check, no parameter entities here ...
7729 switch (ent
->etype
) {
7730 case XML_INTERNAL_PARAMETER_ENTITY
:
7731 case XML_EXTERNAL_PARAMETER_ENTITY
:
7732 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7733 "Attempt to reference the parameter entity '%s'\n",
7742 * [ WFC: No Recursion ]
7743 * A parsed entity must not contain a recursive reference
7744 * to itself, either directly or indirectly.
7745 * Done somewhere else
7751 * xmlParseStringEntityRef:
7752 * @ctxt: an XML parser context
7753 * @str: a pointer to an index in the string
7755 * parse ENTITY references declarations, but this version parses it from
7758 * [68] EntityRef ::= '&' Name ';'
7760 * [ WFC: Entity Declared ]
7761 * In a document without any DTD, a document with only an internal DTD
7762 * subset which contains no parameter entity references, or a document
7763 * with "standalone='yes'", the Name given in the entity reference
7764 * must match that in an entity declaration, except that well-formed
7765 * documents need not declare any of the following entities: amp, lt,
7766 * gt, apos, quot. The declaration of a parameter entity must precede
7767 * any reference to it. Similarly, the declaration of a general entity
7768 * must precede any reference to it which appears in a default value in an
7769 * attribute-list declaration. Note that if entities are declared in the
7770 * external subset or in external parameter entities, a non-validating
7771 * processor is not obligated to read and process their declarations;
7772 * for such documents, the rule that an entity must be declared is a
7773 * well-formedness constraint only if standalone='yes'.
7775 * [ WFC: Parsed Entity ]
7776 * An entity reference must not contain the name of an unparsed entity
7778 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7779 * is updated to the current location in the string.
7782 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7786 xmlEntityPtr ent
= NULL
;
7788 if ((str
== NULL
) || (*str
== NULL
))
7796 name
= xmlParseStringName(ctxt
, &ptr
);
7798 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7799 "xmlParseStringEntityRef: no name\n");
7804 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7813 * Predefined entities override any extra definition
7815 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7816 ent
= xmlGetPredefinedEntity(name
);
7825 * Increate the number of entity references parsed
7830 * Ask first SAX for entity resolution, otherwise try the
7831 * entities which may have stored in the parser context.
7833 if (ctxt
->sax
!= NULL
) {
7834 if (ctxt
->sax
->getEntity
!= NULL
)
7835 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7836 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7837 ent
= xmlGetPredefinedEntity(name
);
7838 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7839 ent
= xmlSAX2GetEntity(ctxt
, name
);
7842 if (ctxt
->instate
== XML_PARSER_EOF
) {
7848 * [ WFC: Entity Declared ]
7849 * In a document without any DTD, a document with only an
7850 * internal DTD subset which contains no parameter entity
7851 * references, or a document with "standalone='yes'", the
7852 * Name given in the entity reference must match that in an
7853 * entity declaration, except that well-formed documents
7854 * need not declare any of the following entities: amp, lt,
7856 * The declaration of a parameter entity must precede any
7858 * Similarly, the declaration of a general entity must
7859 * precede any reference to it which appears in a default
7860 * value in an attribute-list declaration. Note that if
7861 * entities are declared in the external subset or in
7862 * external parameter entities, a non-validating processor
7863 * is not obligated to read and process their declarations;
7864 * for such documents, the rule that an entity must be
7865 * declared is a well-formedness constraint only if
7869 if ((ctxt
->standalone
== 1) ||
7870 ((ctxt
->hasExternalSubset
== 0) &&
7871 (ctxt
->hasPErefs
== 0))) {
7872 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7873 "Entity '%s' not defined\n", name
);
7875 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7876 "Entity '%s' not defined\n",
7879 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7880 /* TODO ? check regressions ctxt->valid = 0; */
7884 * [ WFC: Parsed Entity ]
7885 * An entity reference must not contain the name of an
7888 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7889 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7890 "Entity reference to unparsed entity %s\n", name
);
7894 * [ WFC: No External Entity References ]
7895 * Attribute values cannot contain direct or indirect
7896 * entity references to external entities.
7898 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7899 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7900 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7901 "Attribute references external entity '%s'\n", name
);
7904 * [ WFC: No < in Attribute Values ]
7905 * The replacement text of any entity referred to directly or
7906 * indirectly in an attribute value (other than "<") must
7909 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7910 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7911 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7912 (xmlStrchr(ent
->content
, '<'))) {
7913 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7914 "'<' in entity '%s' is not allowed in attributes values\n",
7919 * Internal check, no parameter entities here ...
7922 switch (ent
->etype
) {
7923 case XML_INTERNAL_PARAMETER_ENTITY
:
7924 case XML_EXTERNAL_PARAMETER_ENTITY
:
7925 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7926 "Attempt to reference the parameter entity '%s'\n",
7935 * [ WFC: No Recursion ]
7936 * A parsed entity must not contain a recursive reference
7937 * to itself, either directly or indirectly.
7938 * Done somewhere else
7947 * xmlParsePEReference:
7948 * @ctxt: an XML parser context
7950 * parse PEReference declarations
7951 * The entity content is handled directly by pushing it's content as
7952 * a new input stream.
7954 * [69] PEReference ::= '%' Name ';'
7956 * [ WFC: No Recursion ]
7957 * A parsed entity must not contain a recursive
7958 * reference to itself, either directly or indirectly.
7960 * [ WFC: Entity Declared ]
7961 * In a document without any DTD, a document with only an internal DTD
7962 * subset which contains no parameter entity references, or a document
7963 * with "standalone='yes'", ... ... The declaration of a parameter
7964 * entity must precede any reference to it...
7966 * [ VC: Entity Declared ]
7967 * In a document with an external subset or external parameter entities
7968 * with "standalone='no'", ... ... The declaration of a parameter entity
7969 * must precede any reference to it...
7972 * Parameter-entity references may only appear in the DTD.
7973 * NOTE: misleading but this is handled.
7976 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7978 const xmlChar
*name
;
7979 xmlEntityPtr entity
= NULL
;
7980 xmlParserInputPtr input
;
7985 name
= xmlParseName(ctxt
);
7987 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7988 "xmlParsePEReference: no name\n");
7992 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7999 * Increate the number of entity references parsed
8004 * Request the entity from SAX
8006 if ((ctxt
->sax
!= NULL
) &&
8007 (ctxt
->sax
->getParameterEntity
!= NULL
))
8008 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8009 if (ctxt
->instate
== XML_PARSER_EOF
)
8011 if (entity
== NULL
) {
8013 * [ WFC: Entity Declared ]
8014 * In a document without any DTD, a document with only an
8015 * internal DTD subset which contains no parameter entity
8016 * references, or a document with "standalone='yes'", ...
8017 * ... The declaration of a parameter entity must precede
8018 * any reference to it...
8020 if ((ctxt
->standalone
== 1) ||
8021 ((ctxt
->hasExternalSubset
== 0) &&
8022 (ctxt
->hasPErefs
== 0))) {
8023 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8024 "PEReference: %%%s; not found\n",
8028 * [ VC: Entity Declared ]
8029 * In a document with an external subset or external
8030 * parameter entities with "standalone='no'", ...
8031 * ... The declaration of a parameter entity must
8032 * precede any reference to it...
8034 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8035 "PEReference: %%%s; not found\n",
8039 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
8042 * Internal checking in case the entity quest barfed
8044 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8045 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8046 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8047 "Internal: %%%s; is not a parameter entity\n",
8049 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
8050 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
8051 if (xmlPushInput(ctxt
, input
) < 0)
8056 * handle the extra spaces added before and after
8057 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8059 input
= xmlNewEntityInputStream(ctxt
, entity
);
8060 if (xmlPushInput(ctxt
, input
) < 0)
8062 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
8063 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
8064 (IS_BLANK_CH(NXT(5)))) {
8065 xmlParseTextDecl(ctxt
);
8067 XML_ERR_UNSUPPORTED_ENCODING
) {
8069 * The XML REC instructs us to stop parsing
8072 ctxt
->instate
= XML_PARSER_EOF
;
8078 ctxt
->hasPErefs
= 1;
8082 * xmlLoadEntityContent:
8083 * @ctxt: an XML parser context
8084 * @entity: an unloaded system entity
8086 * Load the original content of the given system entity from the
8087 * ExternalID/SystemID given. This is to be used for Included in Literal
8088 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8090 * Returns 0 in case of success and -1 in case of failure
8093 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
8094 xmlParserInputPtr input
;
8099 if ((ctxt
== NULL
) || (entity
== NULL
) ||
8100 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
8101 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
8102 (entity
->content
!= NULL
)) {
8103 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8104 "xmlLoadEntityContent parameter error");
8108 if (xmlParserDebugEntities
)
8109 xmlGenericError(xmlGenericErrorContext
,
8110 "Reading %s entity content input\n", entity
->name
);
8112 buf
= xmlBufferCreate();
8114 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8115 "xmlLoadEntityContent parameter error");
8119 input
= xmlNewEntityInputStream(ctxt
, entity
);
8120 if (input
== NULL
) {
8121 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8122 "xmlLoadEntityContent input error");
8128 * Push the entity as the current input, read char by char
8129 * saving to the buffer until the end of the entity or an error
8131 if (xmlPushInput(ctxt
, input
) < 0) {
8138 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
8140 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
8141 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
8144 if (ctxt
->instate
== XML_PARSER_EOF
) {
8154 if (ctxt
->instate
== XML_PARSER_EOF
) {
8162 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
8164 } else if (!IS_CHAR(c
)) {
8165 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
8166 "xmlLoadEntityContent: invalid char value %d\n",
8171 entity
->content
= buf
->content
;
8172 buf
->content
= NULL
;
8179 * xmlParseStringPEReference:
8180 * @ctxt: an XML parser context
8181 * @str: a pointer to an index in the string
8183 * parse PEReference declarations
8185 * [69] PEReference ::= '%' Name ';'
8187 * [ WFC: No Recursion ]
8188 * A parsed entity must not contain a recursive
8189 * reference to itself, either directly or indirectly.
8191 * [ WFC: Entity Declared ]
8192 * In a document without any DTD, a document with only an internal DTD
8193 * subset which contains no parameter entity references, or a document
8194 * with "standalone='yes'", ... ... The declaration of a parameter
8195 * entity must precede any reference to it...
8197 * [ VC: Entity Declared ]
8198 * In a document with an external subset or external parameter entities
8199 * with "standalone='no'", ... ... The declaration of a parameter entity
8200 * must precede any reference to it...
8203 * Parameter-entity references may only appear in the DTD.
8204 * NOTE: misleading but this is handled.
8206 * Returns the string of the entity content.
8207 * str is updated to the current value of the index
8210 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
8214 xmlEntityPtr entity
= NULL
;
8216 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
8222 name
= xmlParseStringName(ctxt
, &ptr
);
8224 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8225 "xmlParseStringPEReference: no name\n");
8231 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
8239 * Increate the number of entity references parsed
8244 * Request the entity from SAX
8246 if ((ctxt
->sax
!= NULL
) &&
8247 (ctxt
->sax
->getParameterEntity
!= NULL
))
8248 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8249 if (ctxt
->instate
== XML_PARSER_EOF
) {
8253 if (entity
== NULL
) {
8255 * [ WFC: Entity Declared ]
8256 * In a document without any DTD, a document with only an
8257 * internal DTD subset which contains no parameter entity
8258 * references, or a document with "standalone='yes'", ...
8259 * ... The declaration of a parameter entity must precede
8260 * any reference to it...
8262 if ((ctxt
->standalone
== 1) ||
8263 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
8264 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8265 "PEReference: %%%s; not found\n", name
);
8268 * [ VC: Entity Declared ]
8269 * In a document with an external subset or external
8270 * parameter entities with "standalone='no'", ...
8271 * ... The declaration of a parameter entity must
8272 * precede any reference to it...
8274 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8275 "PEReference: %%%s; not found\n",
8279 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
8282 * Internal checking in case the entity quest barfed
8284 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8285 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8286 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8287 "%%%s; is not a parameter entity\n",
8291 ctxt
->hasPErefs
= 1;
8298 * xmlParseDocTypeDecl:
8299 * @ctxt: an XML parser context
8301 * parse a DOCTYPE declaration
8303 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8304 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8306 * [ VC: Root Element Type ]
8307 * The Name in the document type declaration must match the element
8308 * type of the root element.
8312 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
8313 const xmlChar
*name
= NULL
;
8314 xmlChar
*ExternalID
= NULL
;
8315 xmlChar
*URI
= NULL
;
8318 * We know that '<!DOCTYPE' has been detected.
8325 * Parse the DOCTYPE name.
8327 name
= xmlParseName(ctxt
);
8329 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8330 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8332 ctxt
->intSubName
= name
;
8337 * Check for SystemID and ExternalID
8339 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
8341 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
8342 ctxt
->hasExternalSubset
= 1;
8344 ctxt
->extSubURI
= URI
;
8345 ctxt
->extSubSystem
= ExternalID
;
8350 * Create and update the internal subset.
8352 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
8353 (!ctxt
->disableSAX
))
8354 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
8355 if (ctxt
->instate
== XML_PARSER_EOF
)
8359 * Is there any internal subset declarations ?
8360 * they are handled separately in xmlParseInternalSubset()
8366 * We should be at the end of the DOCTYPE declaration.
8369 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8375 * xmlParseInternalSubset:
8376 * @ctxt: an XML parser context
8378 * parse the internal subset declaration
8380 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8384 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
8386 * Is there any DTD definition ?
8389 ctxt
->instate
= XML_PARSER_DTD
;
8392 * Parse the succession of Markup declarations and
8394 * Subsequence (markupdecl | PEReference | S)*
8396 while ((RAW
!= ']') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8397 const xmlChar
*check
= CUR_PTR
;
8398 unsigned int cons
= ctxt
->input
->consumed
;
8401 xmlParseMarkupDecl(ctxt
);
8402 xmlParsePEReference(ctxt
);
8405 * Pop-up of finished entities.
8407 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
8410 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
8411 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8412 "xmlParseInternalSubset: error detected in Markup declaration\n");
8423 * We should be at the end of the DOCTYPE declaration.
8426 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8431 #ifdef LIBXML_SAX1_ENABLED
8433 * xmlParseAttribute:
8434 * @ctxt: an XML parser context
8435 * @value: a xmlChar ** used to store the value of the attribute
8437 * parse an attribute
8439 * [41] Attribute ::= Name Eq AttValue
8441 * [ WFC: No External Entity References ]
8442 * Attribute values cannot contain direct or indirect entity references
8443 * to external entities.
8445 * [ WFC: No < in Attribute Values ]
8446 * The replacement text of any entity referred to directly or indirectly in
8447 * an attribute value (other than "<") must not contain a <.
8449 * [ VC: Attribute Value Type ]
8450 * The attribute must have been declared; the value must be of the type
8453 * [25] Eq ::= S? '=' S?
8457 * [NS 11] Attribute ::= QName Eq AttValue
8459 * Also the case QName == xmlns:??? is handled independently as a namespace
8462 * Returns the attribute name, and the value in *value.
8466 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8467 const xmlChar
*name
;
8472 name
= xmlParseName(ctxt
);
8474 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8475 "error parsing attribute name\n");
8486 val
= xmlParseAttValue(ctxt
);
8487 ctxt
->instate
= XML_PARSER_CONTENT
;
8489 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8490 "Specification mandate value for attribute %s\n", name
);
8495 * Check that xml:lang conforms to the specification
8496 * No more registered as an error, just generate a warning now
8497 * since this was deprecated in XML second edition
8499 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8500 if (!xmlCheckLanguageID(val
)) {
8501 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8502 "Malformed value for xml:lang : %s\n",
8508 * Check that xml:space conforms to the specification
8510 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8511 if (xmlStrEqual(val
, BAD_CAST
"default"))
8513 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8516 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8517 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8528 * @ctxt: an XML parser context
8530 * parse a start of tag either for rule element or
8531 * EmptyElement. In both case we don't parse the tag closing chars.
8533 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8535 * [ WFC: Unique Att Spec ]
8536 * No attribute name may appear more than once in the same start-tag or
8537 * empty-element tag.
8539 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8541 * [ WFC: Unique Att Spec ]
8542 * No attribute name may appear more than once in the same start-tag or
8543 * empty-element tag.
8547 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8549 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8551 * Returns the element name parsed
8555 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8556 const xmlChar
*name
;
8557 const xmlChar
*attname
;
8559 const xmlChar
**atts
= ctxt
->atts
;
8561 int maxatts
= ctxt
->maxatts
;
8564 if (RAW
!= '<') return(NULL
);
8567 name
= xmlParseName(ctxt
);
8569 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8570 "xmlParseStartTag: invalid element name\n");
8575 * Now parse the attributes, it ends up with the ending
8582 while (((RAW
!= '>') &&
8583 ((RAW
!= '/') || (NXT(1) != '>')) &&
8584 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8585 const xmlChar
*q
= CUR_PTR
;
8586 unsigned int cons
= ctxt
->input
->consumed
;
8588 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8589 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8591 * [ WFC: Unique Att Spec ]
8592 * No attribute name may appear more than once in the same
8593 * start-tag or empty-element tag.
8595 for (i
= 0; i
< nbatts
;i
+= 2) {
8596 if (xmlStrEqual(atts
[i
], attname
)) {
8597 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8603 * Add the pair to atts
8606 maxatts
= 22; /* allow for 10 attrs by default */
8607 atts
= (const xmlChar
**)
8608 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8610 xmlErrMemory(ctxt
, NULL
);
8611 if (attvalue
!= NULL
)
8616 ctxt
->maxatts
= maxatts
;
8617 } else if (nbatts
+ 4 > maxatts
) {
8621 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8622 maxatts
* sizeof(const xmlChar
*));
8624 xmlErrMemory(ctxt
, NULL
);
8625 if (attvalue
!= NULL
)
8631 ctxt
->maxatts
= maxatts
;
8633 atts
[nbatts
++] = attname
;
8634 atts
[nbatts
++] = attvalue
;
8635 atts
[nbatts
] = NULL
;
8636 atts
[nbatts
+ 1] = NULL
;
8638 if (attvalue
!= NULL
)
8645 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8647 if (!IS_BLANK_CH(RAW
)) {
8648 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8649 "attributes construct error\n");
8652 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
8653 (attname
== NULL
) && (attvalue
== NULL
)) {
8654 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8655 "xmlParseStartTag: problem parsing attributes\n");
8663 * SAX: Start of Element !
8665 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8666 (!ctxt
->disableSAX
)) {
8668 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8670 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8674 /* Free only the content strings */
8675 for (i
= 1;i
< nbatts
;i
+=2)
8676 if (atts
[i
] != NULL
)
8677 xmlFree((xmlChar
*) atts
[i
]);
8684 * @ctxt: an XML parser context
8685 * @line: line of the start tag
8686 * @nsNr: number of namespaces on the start tag
8688 * parse an end of tag
8690 * [42] ETag ::= '</' Name S? '>'
8694 * [NS 9] ETag ::= '</' QName S? '>'
8698 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8699 const xmlChar
*name
;
8702 if ((RAW
!= '<') || (NXT(1) != '/')) {
8703 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8704 "xmlParseEndTag: '</' not found\n");
8709 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8712 * We should definitely be at the ending "S? '>'" part
8716 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8717 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8722 * [ WFC: Element Type Match ]
8723 * The Name in an element's end-tag must match the element type in the
8727 if (name
!= (xmlChar
*)1) {
8728 if (name
== NULL
) name
= BAD_CAST
"unparseable";
8729 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8730 "Opening and ending tag mismatch: %s line %d and %s\n",
8731 ctxt
->name
, line
, name
);
8737 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8738 (!ctxt
->disableSAX
))
8739 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8748 * @ctxt: an XML parser context
8750 * parse an end of tag
8752 * [42] ETag ::= '</' Name S? '>'
8756 * [NS 9] ETag ::= '</' QName S? '>'
8760 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8761 xmlParseEndTag1(ctxt
, 0);
8763 #endif /* LIBXML_SAX1_ENABLED */
8765 /************************************************************************
8767 * SAX 2 specific operations *
8769 ************************************************************************/
8773 * @ctxt: an XML parser context
8774 * @prefix: the prefix to lookup
8776 * Lookup the namespace name for the @prefix (which ca be NULL)
8777 * The prefix must come from the @ctxt->dict dictionnary
8779 * Returns the namespace name or NULL if not bound
8781 static const xmlChar
*
8782 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8785 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8786 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8787 if (ctxt
->nsTab
[i
] == prefix
) {
8788 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8790 return(ctxt
->nsTab
[i
+ 1]);
8797 * @ctxt: an XML parser context
8798 * @prefix: pointer to store the prefix part
8800 * parse an XML Namespace QName
8802 * [6] QName ::= (Prefix ':')? LocalPart
8803 * [7] Prefix ::= NCName
8804 * [8] LocalPart ::= NCName
8806 * Returns the Name parsed or NULL
8809 static const xmlChar
*
8810 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8811 const xmlChar
*l
, *p
;
8815 l
= xmlParseNCName(ctxt
);
8818 l
= xmlParseName(ctxt
);
8820 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8821 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8831 l
= xmlParseNCName(ctxt
);
8835 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8836 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8837 l
= xmlParseNmtoken(ctxt
);
8839 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8841 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8844 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8845 if (tmp
!= NULL
) xmlFree(tmp
);
8852 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8853 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8855 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8857 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8858 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8859 if (tmp
!= NULL
) xmlFree(tmp
);
8863 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8864 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8865 if (tmp
!= NULL
) xmlFree(tmp
);
8876 * xmlParseQNameAndCompare:
8877 * @ctxt: an XML parser context
8878 * @name: the localname
8879 * @prefix: the prefix, if any.
8881 * parse an XML name and compares for match
8882 * (specialized for endtag parsing)
8884 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8885 * and the name for mismatch
8888 static const xmlChar
*
8889 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8890 xmlChar
const *prefix
) {
8894 const xmlChar
*prefix2
;
8896 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8899 in
= ctxt
->input
->cur
;
8902 while (*in
!= 0 && *in
== *cmp
) {
8906 if ((*cmp
== 0) && (*in
== ':')) {
8909 while (*in
!= 0 && *in
== *cmp
) {
8913 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8915 ctxt
->input
->cur
= in
;
8916 return((const xmlChar
*) 1);
8920 * all strings coms from the dictionary, equality can be done directly
8922 ret
= xmlParseQName (ctxt
, &prefix2
);
8923 if ((ret
== name
) && (prefix
== prefix2
))
8924 return((const xmlChar
*) 1);
8929 * xmlParseAttValueInternal:
8930 * @ctxt: an XML parser context
8931 * @len: attribute len result
8932 * @alloc: whether the attribute was reallocated as a new string
8933 * @normalize: if 1 then further non-CDATA normalization must be done
8935 * parse a value for an attribute.
8936 * NOTE: if no normalization is needed, the routine will return pointers
8937 * directly from the data buffer.
8939 * 3.3.3 Attribute-Value Normalization:
8940 * Before the value of an attribute is passed to the application or
8941 * checked for validity, the XML processor must normalize it as follows:
8942 * - a character reference is processed by appending the referenced
8943 * character to the attribute value
8944 * - an entity reference is processed by recursively processing the
8945 * replacement text of the entity
8946 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8947 * appending #x20 to the normalized value, except that only a single
8948 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8949 * parsed entity or the literal entity value of an internal parsed entity
8950 * - other characters are processed by appending them to the normalized value
8951 * If the declared value is not CDATA, then the XML processor must further
8952 * process the normalized attribute value by discarding any leading and
8953 * trailing space (#x20) characters, and by replacing sequences of space
8954 * (#x20) characters by a single space (#x20) character.
8955 * All attributes for which no declaration has been read should be treated
8956 * by a non-validating parser as if declared CDATA.
8958 * Returns the AttValue parsed or NULL. The value has to be freed by the
8959 * caller if it was copied, this can be detected by val[*len] == 0.
8963 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8967 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8968 xmlChar
*ret
= NULL
;
8972 in
= (xmlChar
*) CUR_PTR
;
8973 line
= ctxt
->input
->line
;
8974 col
= ctxt
->input
->col
;
8975 if (*in
!= '"' && *in
!= '\'') {
8976 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8979 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8982 * try to handle in this routine the most common case where no
8983 * allocation of a new string is required and where content is
8988 end
= ctxt
->input
->end
;
8991 const xmlChar
*oldbase
= ctxt
->input
->base
;
8993 if (oldbase
!= ctxt
->input
->base
) {
8994 long delta
= ctxt
->input
->base
- oldbase
;
8995 start
= start
+ delta
;
8998 end
= ctxt
->input
->end
;
9002 * Skip any leading spaces
9004 while ((in
< end
) && (*in
!= limit
) &&
9005 ((*in
== 0x20) || (*in
== 0x9) ||
9006 (*in
== 0xA) || (*in
== 0xD))) {
9015 const xmlChar
*oldbase
= ctxt
->input
->base
;
9017 if (ctxt
->instate
== XML_PARSER_EOF
)
9019 if (oldbase
!= ctxt
->input
->base
) {
9020 long delta
= ctxt
->input
->base
- oldbase
;
9021 start
= start
+ delta
;
9024 end
= ctxt
->input
->end
;
9025 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9026 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9027 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9028 "AttValue length too long\n");
9033 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9034 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9036 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
9038 const xmlChar
*oldbase
= ctxt
->input
->base
;
9040 if (ctxt
->instate
== XML_PARSER_EOF
)
9042 if (oldbase
!= ctxt
->input
->base
) {
9043 long delta
= ctxt
->input
->base
- oldbase
;
9044 start
= start
+ delta
;
9047 end
= ctxt
->input
->end
;
9048 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9049 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9050 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9051 "AttValue length too long\n");
9058 * skip the trailing blanks
9060 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
9061 while ((in
< end
) && (*in
!= limit
) &&
9062 ((*in
== 0x20) || (*in
== 0x9) ||
9063 (*in
== 0xA) || (*in
== 0xD))) {
9071 const xmlChar
*oldbase
= ctxt
->input
->base
;
9073 if (ctxt
->instate
== XML_PARSER_EOF
)
9075 if (oldbase
!= ctxt
->input
->base
) {
9076 long delta
= ctxt
->input
->base
- oldbase
;
9077 start
= start
+ delta
;
9079 last
= last
+ delta
;
9081 end
= ctxt
->input
->end
;
9082 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9083 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9084 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9085 "AttValue length too long\n");
9090 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9091 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9092 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9093 "AttValue length too long\n");
9096 if (*in
!= limit
) goto need_complex
;
9098 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9099 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9103 const xmlChar
*oldbase
= ctxt
->input
->base
;
9105 if (ctxt
->instate
== XML_PARSER_EOF
)
9107 if (oldbase
!= ctxt
->input
->base
) {
9108 long delta
= ctxt
->input
->base
- oldbase
;
9109 start
= start
+ delta
;
9112 end
= ctxt
->input
->end
;
9113 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9114 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9115 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9116 "AttValue length too long\n");
9122 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9123 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9124 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9125 "AttValue length too long\n");
9128 if (*in
!= limit
) goto need_complex
;
9133 *len
= last
- start
;
9134 ret
= (xmlChar
*) start
;
9136 if (alloc
) *alloc
= 1;
9137 ret
= xmlStrndup(start
, last
- start
);
9140 ctxt
->input
->line
= line
;
9141 ctxt
->input
->col
= col
;
9142 if (alloc
) *alloc
= 0;
9145 if (alloc
) *alloc
= 1;
9146 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
9150 * xmlParseAttribute2:
9151 * @ctxt: an XML parser context
9152 * @pref: the element prefix
9153 * @elem: the element name
9154 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9155 * @value: a xmlChar ** used to store the value of the attribute
9156 * @len: an int * to save the length of the attribute
9157 * @alloc: an int * to indicate if the attribute was allocated
9159 * parse an attribute in the new SAX2 framework.
9161 * Returns the attribute name, and the value in *value, .
9164 static const xmlChar
*
9165 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
9166 const xmlChar
* pref
, const xmlChar
* elem
,
9167 const xmlChar
** prefix
, xmlChar
** value
,
9168 int *len
, int *alloc
)
9170 const xmlChar
*name
;
9171 xmlChar
*val
, *internal_val
= NULL
;
9176 name
= xmlParseQName(ctxt
, prefix
);
9178 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9179 "error parsing attribute name\n");
9184 * get the type if needed
9186 if (ctxt
->attsSpecial
!= NULL
) {
9189 type
= (int) (long) xmlHashQLookup2(ctxt
->attsSpecial
,
9190 pref
, elem
, *prefix
, name
);
9202 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
9205 * Sometimes a second normalisation pass for spaces is needed
9206 * but that only happens if charrefs or entities refernces
9207 * have been used in the attribute value, i.e. the attribute
9208 * value have been extracted in an allocated string already.
9211 const xmlChar
*val2
;
9213 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
9214 if ((val2
!= NULL
) && (val2
!= val
)) {
9216 val
= (xmlChar
*) val2
;
9220 ctxt
->instate
= XML_PARSER_CONTENT
;
9222 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
9223 "Specification mandate value for attribute %s\n",
9228 if (*prefix
== ctxt
->str_xml
) {
9230 * Check that xml:lang conforms to the specification
9231 * No more registered as an error, just generate a warning now
9232 * since this was deprecated in XML second edition
9234 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
9235 internal_val
= xmlStrndup(val
, *len
);
9236 if (!xmlCheckLanguageID(internal_val
)) {
9237 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
9238 "Malformed value for xml:lang : %s\n",
9239 internal_val
, NULL
);
9244 * Check that xml:space conforms to the specification
9246 if (xmlStrEqual(name
, BAD_CAST
"space")) {
9247 internal_val
= xmlStrndup(val
, *len
);
9248 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
9250 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
9253 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
9254 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9255 internal_val
, NULL
);
9259 xmlFree(internal_val
);
9267 * xmlParseStartTag2:
9268 * @ctxt: an XML parser context
9270 * parse a start of tag either for rule element or
9271 * EmptyElement. In both case we don't parse the tag closing chars.
9272 * This routine is called when running SAX2 parsing
9274 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9276 * [ WFC: Unique Att Spec ]
9277 * No attribute name may appear more than once in the same start-tag or
9278 * empty-element tag.
9280 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9282 * [ WFC: Unique Att Spec ]
9283 * No attribute name may appear more than once in the same start-tag or
9284 * empty-element tag.
9288 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9290 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9292 * Returns the element name parsed
9295 static const xmlChar
*
9296 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
9297 const xmlChar
**URI
, int *tlen
) {
9298 const xmlChar
*localname
;
9299 const xmlChar
*prefix
;
9300 const xmlChar
*attname
;
9301 const xmlChar
*aprefix
;
9302 const xmlChar
*nsname
;
9304 const xmlChar
**atts
= ctxt
->atts
;
9305 int maxatts
= ctxt
->maxatts
;
9306 int nratts
, nbatts
, nbdef
;
9307 int i
, j
, nbNs
, attval
, oldline
, oldcol
;
9308 const xmlChar
*base
;
9310 int nsNr
= ctxt
->nsNr
;
9312 if (RAW
!= '<') return(NULL
);
9316 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9317 * point since the attribute values may be stored as pointers to
9318 * the buffer and calling SHRINK would destroy them !
9319 * The Shrinking is only possible once the full set of attribute
9320 * callbacks have been done.
9324 base
= ctxt
->input
->base
;
9325 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9326 oldline
= ctxt
->input
->line
;
9327 oldcol
= ctxt
->input
->col
;
9333 /* Forget any namespaces added during an earlier parse of this element. */
9336 localname
= xmlParseQName(ctxt
, &prefix
);
9337 if (localname
== NULL
) {
9338 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9339 "StartTag: invalid element name\n");
9342 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
9345 * Now parse the attributes, it ends up with the ending
9351 if (ctxt
->input
->base
!= base
) goto base_changed
;
9353 while (((RAW
!= '>') &&
9354 ((RAW
!= '/') || (NXT(1) != '>')) &&
9355 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
9356 const xmlChar
*q
= CUR_PTR
;
9357 unsigned int cons
= ctxt
->input
->consumed
;
9358 int len
= -1, alloc
= 0;
9360 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
9361 &aprefix
, &attvalue
, &len
, &alloc
);
9362 if (ctxt
->input
->base
!= base
) {
9363 if ((attvalue
!= NULL
) && (alloc
!= 0))
9368 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
9369 if (len
< 0) len
= xmlStrlen(attvalue
);
9370 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9371 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9375 xmlErrMemory(ctxt
, "dictionary allocation failure");
9376 if ((attvalue
!= NULL
) && (alloc
!= 0))
9381 uri
= xmlParseURI((const char *) URL
);
9383 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9384 "xmlns: '%s' is not a valid URI\n",
9387 if (uri
->scheme
== NULL
) {
9388 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9389 "xmlns: URI %s is not absolute\n",
9394 if (URL
== ctxt
->str_xml_ns
) {
9395 if (attname
!= ctxt
->str_xml
) {
9396 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9397 "xml namespace URI cannot be the default namespace\n",
9400 goto skip_default_ns
;
9404 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9405 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9406 "reuse of the xmlns namespace name is forbidden\n",
9408 goto skip_default_ns
;
9412 * check that it's not a defined namespace
9414 for (j
= 1;j
<= nbNs
;j
++)
9415 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9418 xmlErrAttributeDup(ctxt
, NULL
, attname
);
9420 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
9422 if (alloc
!= 0) xmlFree(attvalue
);
9423 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9425 if (!IS_BLANK_CH(RAW
)) {
9426 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9427 "attributes construct error\n");
9433 if (aprefix
== ctxt
->str_xmlns
) {
9434 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9437 if (attname
== ctxt
->str_xml
) {
9438 if (URL
!= ctxt
->str_xml_ns
) {
9439 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9440 "xml namespace prefix mapped to wrong URI\n",
9444 * Do not keep a namespace definition node
9448 if (URL
== ctxt
->str_xml_ns
) {
9449 if (attname
!= ctxt
->str_xml
) {
9450 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9451 "xml namespace URI mapped to wrong prefix\n",
9456 if (attname
== ctxt
->str_xmlns
) {
9457 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9458 "redefinition of the xmlns prefix is forbidden\n",
9464 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9465 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9466 "reuse of the xmlns namespace name is forbidden\n",
9470 if ((URL
== NULL
) || (URL
[0] == 0)) {
9471 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9472 "xmlns:%s: Empty XML namespace is not allowed\n",
9473 attname
, NULL
, NULL
);
9476 uri
= xmlParseURI((const char *) URL
);
9478 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9479 "xmlns:%s: '%s' is not a valid URI\n",
9480 attname
, URL
, NULL
);
9482 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
9483 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9484 "xmlns:%s: URI %s is not absolute\n",
9485 attname
, URL
, NULL
);
9492 * check that it's not a defined namespace
9494 for (j
= 1;j
<= nbNs
;j
++)
9495 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9498 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9500 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9502 if (alloc
!= 0) xmlFree(attvalue
);
9503 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9505 if (!IS_BLANK_CH(RAW
)) {
9506 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9507 "attributes construct error\n");
9511 if (ctxt
->input
->base
!= base
) goto base_changed
;
9516 * Add the pair to atts
9518 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9519 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9520 if (attvalue
[len
] == 0)
9524 maxatts
= ctxt
->maxatts
;
9527 ctxt
->attallocs
[nratts
++] = alloc
;
9528 atts
[nbatts
++] = attname
;
9529 atts
[nbatts
++] = aprefix
;
9530 atts
[nbatts
++] = NULL
; /* the URI will be fetched later */
9531 atts
[nbatts
++] = attvalue
;
9533 atts
[nbatts
++] = attvalue
;
9535 * tag if some deallocation is needed
9537 if (alloc
!= 0) attval
= 1;
9539 if ((attvalue
!= NULL
) && (attvalue
[len
] == 0))
9546 if (ctxt
->instate
== XML_PARSER_EOF
)
9548 if (ctxt
->input
->base
!= base
) goto base_changed
;
9549 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9551 if (!IS_BLANK_CH(RAW
)) {
9552 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9553 "attributes construct error\n");
9557 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
9558 (attname
== NULL
) && (attvalue
== NULL
)) {
9559 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9560 "xmlParseStartTag: problem parsing attributes\n");
9564 if (ctxt
->input
->base
!= base
) goto base_changed
;
9568 * The attributes defaulting
9570 if (ctxt
->attsDefault
!= NULL
) {
9571 xmlDefAttrsPtr defaults
;
9573 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9574 if (defaults
!= NULL
) {
9575 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9576 attname
= defaults
->values
[5 * i
];
9577 aprefix
= defaults
->values
[5 * i
+ 1];
9580 * special work for namespaces defaulted defs
9582 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9584 * check that it's not a defined namespace
9586 for (j
= 1;j
<= nbNs
;j
++)
9587 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9589 if (j
<= nbNs
) continue;
9591 nsname
= xmlGetNamespace(ctxt
, NULL
);
9592 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9593 if (nsPush(ctxt
, NULL
,
9594 defaults
->values
[5 * i
+ 2]) > 0)
9597 } else if (aprefix
== ctxt
->str_xmlns
) {
9599 * check that it's not a defined namespace
9601 for (j
= 1;j
<= nbNs
;j
++)
9602 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9604 if (j
<= nbNs
) continue;
9606 nsname
= xmlGetNamespace(ctxt
, attname
);
9607 if (nsname
!= defaults
->values
[2]) {
9608 if (nsPush(ctxt
, attname
,
9609 defaults
->values
[5 * i
+ 2]) > 0)
9614 * check that it's not a defined attribute
9616 for (j
= 0;j
< nbatts
;j
+=5) {
9617 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9620 if (j
< nbatts
) continue;
9622 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9623 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9626 maxatts
= ctxt
->maxatts
;
9629 atts
[nbatts
++] = attname
;
9630 atts
[nbatts
++] = aprefix
;
9631 if (aprefix
== NULL
)
9632 atts
[nbatts
++] = NULL
;
9634 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9635 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9636 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9637 if ((ctxt
->standalone
== 1) &&
9638 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9639 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9640 "standalone: attribute %s on %s defaulted from external subset\n",
9641 attname
, localname
);
9650 * The attributes checkings
9652 for (i
= 0; i
< nbatts
;i
+= 5) {
9654 * The default namespace does not apply to attribute names.
9656 if (atts
[i
+ 1] != NULL
) {
9657 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9658 if (nsname
== NULL
) {
9659 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9660 "Namespace prefix %s for %s on %s is not defined\n",
9661 atts
[i
+ 1], atts
[i
], localname
);
9663 atts
[i
+ 2] = nsname
;
9667 * [ WFC: Unique Att Spec ]
9668 * No attribute name may appear more than once in the same
9669 * start-tag or empty-element tag.
9670 * As extended by the Namespace in XML REC.
9672 for (j
= 0; j
< i
;j
+= 5) {
9673 if (atts
[i
] == atts
[j
]) {
9674 if (atts
[i
+1] == atts
[j
+1]) {
9675 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9678 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9679 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9680 "Namespaced Attribute %s in '%s' redefined\n",
9681 atts
[i
], nsname
, NULL
);
9688 nsname
= xmlGetNamespace(ctxt
, prefix
);
9689 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9690 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9691 "Namespace prefix %s on %s is not defined\n",
9692 prefix
, localname
, NULL
);
9698 * SAX: Start of Element !
9700 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9701 (!ctxt
->disableSAX
)) {
9703 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9704 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9705 nbatts
/ 5, nbdef
, atts
);
9707 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9708 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9712 * Free up attribute allocated strings if needed
9715 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9716 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9717 xmlFree((xmlChar
*) atts
[i
]);
9724 * the attribute strings are valid iif the base didn't changed
9727 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9728 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9729 xmlFree((xmlChar
*) atts
[i
]);
9731 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
9732 ctxt
->input
->line
= oldline
;
9733 ctxt
->input
->col
= oldcol
;
9734 if (ctxt
->wellFormed
== 1) {
9742 * @ctxt: an XML parser context
9743 * @line: line of the start tag
9744 * @nsNr: number of namespaces on the start tag
9746 * parse an end of tag
9748 * [42] ETag ::= '</' Name S? '>'
9752 * [NS 9] ETag ::= '</' QName S? '>'
9756 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
,
9757 const xmlChar
*URI
, int line
, int nsNr
, int tlen
) {
9758 const xmlChar
*name
;
9761 if ((RAW
!= '<') || (NXT(1) != '/')) {
9762 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9767 if ((tlen
> 0) && (xmlStrncmp(ctxt
->input
->cur
, ctxt
->name
, tlen
) == 0)) {
9768 if (ctxt
->input
->cur
[tlen
] == '>') {
9769 ctxt
->input
->cur
+= tlen
+ 1;
9770 ctxt
->input
->col
+= tlen
+ 1;
9773 ctxt
->input
->cur
+= tlen
;
9774 ctxt
->input
->col
+= tlen
;
9778 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9780 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, prefix
);
9784 * We should definitely be at the ending "S? '>'" part
9787 if (ctxt
->instate
== XML_PARSER_EOF
)
9790 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9791 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9796 * [ WFC: Element Type Match ]
9797 * The Name in an element's end-tag must match the element type in the
9801 if (name
!= (xmlChar
*)1) {
9802 if (name
== NULL
) name
= BAD_CAST
"unparseable";
9803 if ((line
== 0) && (ctxt
->node
!= NULL
))
9804 line
= ctxt
->node
->line
;
9805 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9806 "Opening and ending tag mismatch: %s line %d and %s\n",
9807 ctxt
->name
, line
, name
);
9814 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9815 (!ctxt
->disableSAX
))
9816 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, prefix
, URI
);
9826 * @ctxt: an XML parser context
9828 * Parse escaped pure raw content.
9830 * [18] CDSect ::= CDStart CData CDEnd
9832 * [19] CDStart ::= '<![CDATA['
9834 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9836 * [21] CDEnd ::= ']]>'
9839 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9840 xmlChar
*buf
= NULL
;
9842 int size
= XML_PARSER_BUFFER_SIZE
;
9848 /* Check 2.6.0 was NXT(0) not RAW */
9849 if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9854 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9857 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9858 ctxt
->instate
= XML_PARSER_CONTENT
;
9864 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9865 ctxt
->instate
= XML_PARSER_CONTENT
;
9870 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9872 xmlErrMemory(ctxt
, NULL
);
9875 while (IS_CHAR(cur
) &&
9876 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9877 if (len
+ 5 >= size
) {
9880 if ((size
> XML_MAX_TEXT_LENGTH
) &&
9881 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9882 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9883 "CData section too big found", NULL
);
9887 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* 2 * sizeof(xmlChar
));
9890 xmlErrMemory(ctxt
, NULL
);
9896 COPY_BUF(rl
,buf
,len
,r
);
9904 if (ctxt
->instate
== XML_PARSER_EOF
) {
9914 ctxt
->instate
= XML_PARSER_CONTENT
;
9916 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9917 "CData section not finished\n%.50s\n", buf
);
9924 * OK the buffer is to be consumed as cdata.
9926 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9927 if (ctxt
->sax
->cdataBlock
!= NULL
)
9928 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9929 else if (ctxt
->sax
->characters
!= NULL
)
9930 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9937 * @ctxt: an XML parser context
9941 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9945 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9947 while ((RAW
!= 0) &&
9948 ((RAW
!= '<') || (NXT(1) != '/')) &&
9949 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9950 const xmlChar
*test
= CUR_PTR
;
9951 unsigned int cons
= ctxt
->input
->consumed
;
9952 const xmlChar
*cur
= ctxt
->input
->cur
;
9955 * First case : a Processing Instruction.
9957 if ((*cur
== '<') && (cur
[1] == '?')) {
9962 * Second case : a CDSection
9964 /* 2.6.0 test was *cur not RAW */
9965 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9966 xmlParseCDSect(ctxt
);
9970 * Third case : a comment
9972 else if ((*cur
== '<') && (NXT(1) == '!') &&
9973 (NXT(2) == '-') && (NXT(3) == '-')) {
9974 xmlParseComment(ctxt
);
9975 ctxt
->instate
= XML_PARSER_CONTENT
;
9979 * Fourth case : a sub-element.
9981 else if (*cur
== '<') {
9982 xmlParseElement(ctxt
);
9986 * Fifth case : a reference. If if has not been resolved,
9987 * parsing returns it's Name, create the node
9990 else if (*cur
== '&') {
9991 xmlParseReference(ctxt
);
9995 * Last case, text. Note that References are handled directly.
9998 xmlParseCharData(ctxt
, 0);
10003 * Pop-up of finished entities.
10005 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
10009 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
10010 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
10011 "detected an error in element content\n");
10012 ctxt
->instate
= XML_PARSER_EOF
;
10020 * @ctxt: an XML parser context
10022 * parse an XML element, this is highly recursive
10024 * [39] element ::= EmptyElemTag | STag content ETag
10026 * [ WFC: Element Type Match ]
10027 * The Name in an element's end-tag must match the element type in the
10033 xmlParseElement(xmlParserCtxtPtr ctxt
) {
10034 const xmlChar
*name
;
10035 const xmlChar
*prefix
= NULL
;
10036 const xmlChar
*URI
= NULL
;
10037 xmlParserNodeInfo node_info
;
10038 int line
, tlen
= 0;
10040 int nsNr
= ctxt
->nsNr
;
10042 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
10043 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
10044 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
10045 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10046 xmlParserMaxDepth
);
10047 ctxt
->instate
= XML_PARSER_EOF
;
10051 /* Capture start position */
10052 if (ctxt
->record_info
) {
10053 node_info
.begin_pos
= ctxt
->input
->consumed
+
10054 (CUR_PTR
- ctxt
->input
->base
);
10055 node_info
.begin_line
= ctxt
->input
->line
;
10058 if (ctxt
->spaceNr
== 0)
10059 spacePush(ctxt
, -1);
10060 else if (*ctxt
->space
== -2)
10061 spacePush(ctxt
, -1);
10063 spacePush(ctxt
, *ctxt
->space
);
10065 line
= ctxt
->input
->line
;
10066 #ifdef LIBXML_SAX1_ENABLED
10068 #endif /* LIBXML_SAX1_ENABLED */
10069 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10070 #ifdef LIBXML_SAX1_ENABLED
10072 name
= xmlParseStartTag(ctxt
);
10073 #endif /* LIBXML_SAX1_ENABLED */
10074 if (ctxt
->instate
== XML_PARSER_EOF
)
10076 if (name
== NULL
) {
10080 namePush(ctxt
, name
);
10083 #ifdef LIBXML_VALID_ENABLED
10085 * [ VC: Root Element Type ]
10086 * The Name in the document type declaration must match the element
10087 * type of the root element.
10089 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10090 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10091 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10092 #endif /* LIBXML_VALID_ENABLED */
10095 * Check for an Empty Element.
10097 if ((RAW
== '/') && (NXT(1) == '>')) {
10100 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
10101 (!ctxt
->disableSAX
))
10102 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
10103 #ifdef LIBXML_SAX1_ENABLED
10105 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
10106 (!ctxt
->disableSAX
))
10107 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10108 #endif /* LIBXML_SAX1_ENABLED */
10112 if (nsNr
!= ctxt
->nsNr
)
10113 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10114 if ( ret
!= NULL
&& ctxt
->record_info
) {
10115 node_info
.end_pos
= ctxt
->input
->consumed
+
10116 (CUR_PTR
- ctxt
->input
->base
);
10117 node_info
.end_line
= ctxt
->input
->line
;
10118 node_info
.node
= ret
;
10119 xmlParserAddNodeInfo(ctxt
, &node_info
);
10126 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
10127 "Couldn't find end of Start Tag %s line %d\n",
10131 * end of parsing of this node.
10136 if (nsNr
!= ctxt
->nsNr
)
10137 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10140 * Capture end position and add node
10142 if ( ret
!= NULL
&& ctxt
->record_info
) {
10143 node_info
.end_pos
= ctxt
->input
->consumed
+
10144 (CUR_PTR
- ctxt
->input
->base
);
10145 node_info
.end_line
= ctxt
->input
->line
;
10146 node_info
.node
= ret
;
10147 xmlParserAddNodeInfo(ctxt
, &node_info
);
10153 * Parse the content of the element:
10155 xmlParseContent(ctxt
);
10156 if (ctxt
->instate
== XML_PARSER_EOF
)
10158 if (!IS_BYTE_CHAR(RAW
)) {
10159 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
10160 "Premature end of data in tag %s line %d\n",
10164 * end of parsing of this node.
10169 if (nsNr
!= ctxt
->nsNr
)
10170 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10175 * parse the end of tag: '</' should be here.
10178 xmlParseEndTag2(ctxt
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
, tlen
);
10181 #ifdef LIBXML_SAX1_ENABLED
10183 xmlParseEndTag1(ctxt
, line
);
10184 #endif /* LIBXML_SAX1_ENABLED */
10187 * Capture end position and add node
10189 if ( ret
!= NULL
&& ctxt
->record_info
) {
10190 node_info
.end_pos
= ctxt
->input
->consumed
+
10191 (CUR_PTR
- ctxt
->input
->base
);
10192 node_info
.end_line
= ctxt
->input
->line
;
10193 node_info
.node
= ret
;
10194 xmlParserAddNodeInfo(ctxt
, &node_info
);
10199 * xmlParseVersionNum:
10200 * @ctxt: an XML parser context
10202 * parse the XML version value.
10204 * [26] VersionNum ::= '1.' [0-9]+
10206 * In practice allow [0-9].[0-9]+ at that level
10208 * Returns the string giving the XML version number, or NULL
10211 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
10212 xmlChar
*buf
= NULL
;
10217 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10219 xmlErrMemory(ctxt
, NULL
);
10223 if (!((cur
>= '0') && (cur
<= '9'))) {
10237 while ((cur
>= '0') && (cur
<= '9')) {
10238 if (len
+ 1 >= size
) {
10242 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10245 xmlErrMemory(ctxt
, NULL
);
10259 * xmlParseVersionInfo:
10260 * @ctxt: an XML parser context
10262 * parse the XML version.
10264 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10266 * [25] Eq ::= S? '=' S?
10268 * Returns the version string, e.g. "1.0"
10272 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
10273 xmlChar
*version
= NULL
;
10275 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10279 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10286 version
= xmlParseVersionNum(ctxt
);
10288 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10291 } else if (RAW
== '\''){
10293 version
= xmlParseVersionNum(ctxt
);
10295 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10299 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10307 * @ctxt: an XML parser context
10309 * parse the XML encoding name
10311 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10313 * Returns the encoding name value or NULL
10316 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
10317 xmlChar
*buf
= NULL
;
10323 if (((cur
>= 'a') && (cur
<= 'z')) ||
10324 ((cur
>= 'A') && (cur
<= 'Z'))) {
10325 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10327 xmlErrMemory(ctxt
, NULL
);
10334 while (((cur
>= 'a') && (cur
<= 'z')) ||
10335 ((cur
>= 'A') && (cur
<= 'Z')) ||
10336 ((cur
>= '0') && (cur
<= '9')) ||
10337 (cur
== '.') || (cur
== '_') ||
10339 if (len
+ 1 >= size
) {
10343 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10345 xmlErrMemory(ctxt
, NULL
);
10362 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
10368 * xmlParseEncodingDecl:
10369 * @ctxt: an XML parser context
10371 * parse the XML encoding declaration
10373 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10375 * this setups the conversion filters.
10377 * Returns the encoding value or NULL
10381 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
10382 xmlChar
*encoding
= NULL
;
10385 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10389 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10396 encoding
= xmlParseEncName(ctxt
);
10398 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10401 } else if (RAW
== '\''){
10403 encoding
= xmlParseEncName(ctxt
);
10405 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10409 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10413 * Non standard parsing, allowing the user to ignore encoding
10415 if (ctxt
->options
& XML_PARSE_IGNORE_ENC
) {
10416 xmlFree((xmlChar
*) encoding
);
10421 * UTF-16 encoding stwich has already taken place at this stage,
10422 * more over the little-endian/big-endian selection is already done
10424 if ((encoding
!= NULL
) &&
10425 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
10426 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
10428 * If no encoding was passed to the parser, that we are
10429 * using UTF-16 and no decoder is present i.e. the
10430 * document is apparently UTF-8 compatible, then raise an
10431 * encoding mismatch fatal error
10433 if ((ctxt
->encoding
== NULL
) &&
10434 (ctxt
->input
->buf
!= NULL
) &&
10435 (ctxt
->input
->buf
->encoder
== NULL
)) {
10436 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
10437 "Document labelled UTF-16 but has UTF-8 content\n");
10439 if (ctxt
->encoding
!= NULL
)
10440 xmlFree((xmlChar
*) ctxt
->encoding
);
10441 ctxt
->encoding
= encoding
;
10444 * UTF-8 encoding is handled natively
10446 else if ((encoding
!= NULL
) &&
10447 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
10448 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
10449 if (ctxt
->encoding
!= NULL
)
10450 xmlFree((xmlChar
*) ctxt
->encoding
);
10451 ctxt
->encoding
= encoding
;
10453 else if (encoding
!= NULL
) {
10454 xmlCharEncodingHandlerPtr handler
;
10456 if (ctxt
->input
->encoding
!= NULL
)
10457 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
10458 ctxt
->input
->encoding
= encoding
;
10460 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
10461 if (handler
!= NULL
) {
10462 xmlSwitchToEncoding(ctxt
, handler
);
10464 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
10465 "Unsupported encoding %s\n", encoding
);
10475 * @ctxt: an XML parser context
10477 * parse the XML standalone declaration
10479 * [32] SDDecl ::= S 'standalone' Eq
10480 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10482 * [ VC: Standalone Document Declaration ]
10483 * TODO The standalone document declaration must have the value "no"
10484 * if any external markup declarations contain declarations of:
10485 * - attributes with default values, if elements to which these
10486 * attributes apply appear in the document without specifications
10487 * of values for these attributes, or
10488 * - entities (other than amp, lt, gt, apos, quot), if references
10489 * to those entities appear in the document, or
10490 * - attributes with values subject to normalization, where the
10491 * attribute appears in the document with a value which will change
10492 * as a result of normalization, or
10493 * - element types with element content, if white space occurs directly
10494 * within any instance of those types.
10497 * 1 if standalone="yes"
10498 * 0 if standalone="no"
10499 * -2 if standalone attribute is missing or invalid
10500 * (A standalone value of -2 means that the XML declaration was found,
10501 * but no value was specified for the standalone attribute).
10505 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
10506 int standalone
= -2;
10509 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10513 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10514 return(standalone
);
10520 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10523 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10528 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10531 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10534 } else if (RAW
== '"'){
10536 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10539 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10544 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10547 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10551 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10554 return(standalone
);
10559 * @ctxt: an XML parser context
10561 * parse an XML declaration header
10563 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10567 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10571 * This value for standalone indicates that the document has an
10572 * XML declaration but it does not have a standalone attribute.
10573 * It will be overwritten later if a standalone attribute is found.
10575 ctxt
->input
->standalone
= -2;
10578 * We know that '<?xml' is here.
10582 if (!IS_BLANK_CH(RAW
)) {
10583 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10584 "Blank needed after '<?xml'\n");
10589 * We must have the VersionInfo here.
10591 version
= xmlParseVersionInfo(ctxt
);
10592 if (version
== NULL
) {
10593 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10595 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10597 * Changed here for XML-1.0 5th edition
10599 if (ctxt
->options
& XML_PARSE_OLD10
) {
10600 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10601 "Unsupported version '%s'\n",
10604 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10605 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10606 "Unsupported version '%s'\n",
10609 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10610 "Unsupported version '%s'\n",
10615 if (ctxt
->version
!= NULL
)
10616 xmlFree((void *) ctxt
->version
);
10617 ctxt
->version
= version
;
10621 * We may have the encoding declaration
10623 if (!IS_BLANK_CH(RAW
)) {
10624 if ((RAW
== '?') && (NXT(1) == '>')) {
10628 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10630 xmlParseEncodingDecl(ctxt
);
10631 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10633 * The XML REC instructs us to stop parsing right here
10639 * We may have the standalone status.
10641 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10642 if ((RAW
== '?') && (NXT(1) == '>')) {
10646 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10650 * We can grow the input buffer freely at that point
10655 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10658 if ((RAW
== '?') && (NXT(1) == '>')) {
10660 } else if (RAW
== '>') {
10661 /* Deprecated old WD ... */
10662 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10665 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10666 MOVETO_ENDTAG(CUR_PTR
);
10673 * @ctxt: an XML parser context
10675 * parse an XML Misc* optional field.
10677 * [27] Misc ::= Comment | PI | S
10681 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10682 while ((ctxt
->instate
!= XML_PARSER_EOF
) &&
10683 (((RAW
== '<') && (NXT(1) == '?')) ||
10684 (CMP4(CUR_PTR
, '<', '!', '-', '-')) ||
10685 IS_BLANK_CH(CUR
))) {
10686 if ((RAW
== '<') && (NXT(1) == '?')) {
10688 } else if (IS_BLANK_CH(CUR
)) {
10691 xmlParseComment(ctxt
);
10696 * xmlParseDocument:
10697 * @ctxt: an XML parser context
10699 * parse an XML document (and build a tree if using the standard SAX
10702 * [1] document ::= prolog element Misc*
10704 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10706 * Returns 0, -1 in case of error. the parser context is augmented
10707 * as a result of the parsing.
10711 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10713 xmlCharEncoding enc
;
10717 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10723 * SAX: detecting the level.
10725 xmlDetectSAX2(ctxt
);
10728 * SAX: beginning of the document processing.
10730 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10731 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10732 if (ctxt
->instate
== XML_PARSER_EOF
)
10735 if ((ctxt
->encoding
== NULL
) &&
10736 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10738 * Get the 4 first bytes and decode the charset
10739 * if enc != XML_CHAR_ENCODING_NONE
10740 * plug some encoding conversion routines.
10746 enc
= xmlDetectCharEncoding(&start
[0], 4);
10747 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10748 xmlSwitchEncoding(ctxt
, enc
);
10754 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10758 * Check for the XMLDecl in the Prolog.
10759 * do not GROW here to avoid the detected encoder to decode more
10760 * than just the first line, unless the amount of data is really
10761 * too small to hold "<?xml version="1.0" encoding="foo"
10763 if ((ctxt
->input
->end
- ctxt
->input
->cur
) < 35) {
10766 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10769 * Note that we will switch encoding on the fly.
10771 xmlParseXMLDecl(ctxt
);
10772 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10774 * The XML REC instructs us to stop parsing right here
10778 ctxt
->standalone
= ctxt
->input
->standalone
;
10781 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10783 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10784 ctxt
->sax
->startDocument(ctxt
->userData
);
10785 if (ctxt
->instate
== XML_PARSER_EOF
)
10787 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->input
!= NULL
) &&
10788 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->compressed
>= 0)) {
10789 ctxt
->myDoc
->compression
= ctxt
->input
->buf
->compressed
;
10793 * The Misc part of the Prolog
10796 xmlParseMisc(ctxt
);
10799 * Then possibly doc type declaration(s) and more Misc
10800 * (doctypedecl Misc*)?
10803 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10805 ctxt
->inSubset
= 1;
10806 xmlParseDocTypeDecl(ctxt
);
10808 ctxt
->instate
= XML_PARSER_DTD
;
10809 xmlParseInternalSubset(ctxt
);
10810 if (ctxt
->instate
== XML_PARSER_EOF
)
10815 * Create and update the external subset.
10817 ctxt
->inSubset
= 2;
10818 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10819 (!ctxt
->disableSAX
))
10820 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10821 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10822 if (ctxt
->instate
== XML_PARSER_EOF
)
10824 ctxt
->inSubset
= 0;
10826 xmlCleanSpecialAttr(ctxt
);
10828 ctxt
->instate
= XML_PARSER_PROLOG
;
10829 xmlParseMisc(ctxt
);
10833 * Time to start parsing the tree itself
10837 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10838 "Start tag expected, '<' not found\n");
10840 ctxt
->instate
= XML_PARSER_CONTENT
;
10841 xmlParseElement(ctxt
);
10842 ctxt
->instate
= XML_PARSER_EPILOG
;
10846 * The Misc part at the end
10848 xmlParseMisc(ctxt
);
10851 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10853 ctxt
->instate
= XML_PARSER_EOF
;
10857 * SAX: end of the document processing.
10859 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10860 ctxt
->sax
->endDocument(ctxt
->userData
);
10863 * Remove locally kept entity definitions if the tree was not built
10865 if ((ctxt
->myDoc
!= NULL
) &&
10866 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10867 xmlFreeDoc(ctxt
->myDoc
);
10868 ctxt
->myDoc
= NULL
;
10871 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10872 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10874 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10875 if (ctxt
->nsWellFormed
)
10876 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10877 if (ctxt
->options
& XML_PARSE_OLD10
)
10878 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10880 if (! ctxt
->wellFormed
) {
10888 * xmlParseExtParsedEnt:
10889 * @ctxt: an XML parser context
10891 * parse a general parsed entity
10892 * An external general parsed entity is well-formed if it matches the
10893 * production labeled extParsedEnt.
10895 * [78] extParsedEnt ::= TextDecl? content
10897 * Returns 0, -1 in case of error. the parser context is augmented
10898 * as a result of the parsing.
10902 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10904 xmlCharEncoding enc
;
10906 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10909 xmlDefaultSAXHandlerInit();
10911 xmlDetectSAX2(ctxt
);
10916 * SAX: beginning of the document processing.
10918 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10919 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10922 * Get the 4 first bytes and decode the charset
10923 * if enc != XML_CHAR_ENCODING_NONE
10924 * plug some encoding conversion routines.
10926 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10931 enc
= xmlDetectCharEncoding(start
, 4);
10932 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10933 xmlSwitchEncoding(ctxt
, enc
);
10939 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10943 * Check for the XMLDecl in the Prolog.
10946 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10949 * Note that we will switch encoding on the fly.
10951 xmlParseXMLDecl(ctxt
);
10952 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10954 * The XML REC instructs us to stop parsing right here
10960 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10962 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10963 ctxt
->sax
->startDocument(ctxt
->userData
);
10964 if (ctxt
->instate
== XML_PARSER_EOF
)
10968 * Doing validity checking on chunk doesn't make sense
10970 ctxt
->instate
= XML_PARSER_CONTENT
;
10971 ctxt
->validate
= 0;
10972 ctxt
->loadsubset
= 0;
10975 xmlParseContent(ctxt
);
10976 if (ctxt
->instate
== XML_PARSER_EOF
)
10979 if ((RAW
== '<') && (NXT(1) == '/')) {
10980 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10981 } else if (RAW
!= 0) {
10982 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10986 * SAX: end of the document processing.
10988 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10989 ctxt
->sax
->endDocument(ctxt
->userData
);
10991 if (! ctxt
->wellFormed
) return(-1);
10995 #ifdef LIBXML_PUSH_ENABLED
10996 /************************************************************************
10998 * Progressive parsing interfaces *
11000 ************************************************************************/
11003 * xmlParseLookupSequence:
11004 * @ctxt: an XML parser context
11005 * @first: the first char to lookup
11006 * @next: the next char to lookup or zero
11007 * @third: the next char to lookup or zero
11009 * Try to find if a sequence (first, next, third) or just (first next) or
11010 * (first) is available in the input stream.
11011 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11012 * to avoid rescanning sequences of bytes, it DOES change the state of the
11013 * parser, do not use liberally.
11015 * Returns the index to the current parsing point if the full sequence
11016 * is available, -1 otherwise.
11019 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
11020 xmlChar next
, xmlChar third
) {
11022 xmlParserInputPtr in
;
11023 const xmlChar
*buf
;
11026 if (in
== NULL
) return(-1);
11027 base
= in
->cur
- in
->base
;
11028 if (base
< 0) return(-1);
11029 if (ctxt
->checkIndex
> base
)
11030 base
= ctxt
->checkIndex
;
11031 if (in
->buf
== NULL
) {
11035 buf
= xmlBufContent(in
->buf
->buffer
);
11036 len
= xmlBufUse(in
->buf
->buffer
);
11038 /* take into account the sequence length */
11039 if (third
) len
-= 2;
11040 else if (next
) len
--;
11041 for (;base
< len
;base
++) {
11042 if (buf
[base
] == first
) {
11044 if ((buf
[base
+ 1] != next
) ||
11045 (buf
[base
+ 2] != third
)) continue;
11046 } else if (next
!= 0) {
11047 if (buf
[base
+ 1] != next
) continue;
11049 ctxt
->checkIndex
= 0;
11052 xmlGenericError(xmlGenericErrorContext
,
11053 "PP: lookup '%c' found at %d\n",
11055 else if (third
== 0)
11056 xmlGenericError(xmlGenericErrorContext
,
11057 "PP: lookup '%c%c' found at %d\n",
11058 first
, next
, base
);
11060 xmlGenericError(xmlGenericErrorContext
,
11061 "PP: lookup '%c%c%c' found at %d\n",
11062 first
, next
, third
, base
);
11064 return(base
- (in
->cur
- in
->base
));
11067 ctxt
->checkIndex
= base
;
11070 xmlGenericError(xmlGenericErrorContext
,
11071 "PP: lookup '%c' failed\n", first
);
11072 else if (third
== 0)
11073 xmlGenericError(xmlGenericErrorContext
,
11074 "PP: lookup '%c%c' failed\n", first
, next
);
11076 xmlGenericError(xmlGenericErrorContext
,
11077 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
11083 * xmlParseGetLasts:
11084 * @ctxt: an XML parser context
11085 * @lastlt: pointer to store the last '<' from the input
11086 * @lastgt: pointer to store the last '>' from the input
11088 * Lookup the last < and > in the current chunk
11091 xmlParseGetLasts(xmlParserCtxtPtr ctxt
, const xmlChar
**lastlt
,
11092 const xmlChar
**lastgt
) {
11093 const xmlChar
*tmp
;
11095 if ((ctxt
== NULL
) || (lastlt
== NULL
) || (lastgt
== NULL
)) {
11096 xmlGenericError(xmlGenericErrorContext
,
11097 "Internal error: xmlParseGetLasts\n");
11100 if ((ctxt
->progressive
!= 0) && (ctxt
->inputNr
== 1)) {
11101 tmp
= ctxt
->input
->end
;
11103 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '<')) tmp
--;
11104 if (tmp
< ctxt
->input
->base
) {
11110 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '>')) {
11111 if (*tmp
== '\'') {
11113 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '\'')) tmp
++;
11114 if (tmp
< ctxt
->input
->end
) tmp
++;
11115 } else if (*tmp
== '"') {
11117 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '"')) tmp
++;
11118 if (tmp
< ctxt
->input
->end
) tmp
++;
11122 if (tmp
< ctxt
->input
->end
)
11127 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '>')) tmp
--;
11128 if (tmp
>= ctxt
->input
->base
)
11140 * xmlCheckCdataPush:
11141 * @cur: pointer to the bock of characters
11142 * @len: length of the block in bytes
11144 * Check that the block of characters is okay as SCdata content [20]
11146 * Returns the number of bytes to pass if okay, a negative index where an
11147 * UTF-8 error occured otherwise
11150 xmlCheckCdataPush(const xmlChar
*utf
, int len
) {
11155 if ((utf
== NULL
) || (len
<= 0))
11158 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
11160 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11163 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
11167 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11168 if (ix
+ 2 > len
) return(ix
);
11169 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
11171 codepoint
= (utf
[ix
] & 0x1f) << 6;
11172 codepoint
|= utf
[ix
+1] & 0x3f;
11173 if (!xmlIsCharQ(codepoint
))
11176 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11177 if (ix
+ 3 > len
) return(ix
);
11178 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11179 ((utf
[ix
+2] & 0xc0) != 0x80))
11181 codepoint
= (utf
[ix
] & 0xf) << 12;
11182 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
11183 codepoint
|= utf
[ix
+2] & 0x3f;
11184 if (!xmlIsCharQ(codepoint
))
11187 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11188 if (ix
+ 4 > len
) return(ix
);
11189 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11190 ((utf
[ix
+2] & 0xc0) != 0x80) ||
11191 ((utf
[ix
+3] & 0xc0) != 0x80))
11193 codepoint
= (utf
[ix
] & 0x7) << 18;
11194 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
11195 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
11196 codepoint
|= utf
[ix
+3] & 0x3f;
11197 if (!xmlIsCharQ(codepoint
))
11200 } else /* unknown encoding */
11207 * xmlParseTryOrFinish:
11208 * @ctxt: an XML parser context
11209 * @terminate: last chunk indicator
11211 * Try to progress on parsing
11213 * Returns zero if no parsing was possible
11216 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
11220 const xmlChar
*lastlt
, *lastgt
;
11222 if (ctxt
->input
== NULL
)
11226 switch (ctxt
->instate
) {
11227 case XML_PARSER_EOF
:
11228 xmlGenericError(xmlGenericErrorContext
,
11229 "PP: try EOF\n"); break;
11230 case XML_PARSER_START
:
11231 xmlGenericError(xmlGenericErrorContext
,
11232 "PP: try START\n"); break;
11233 case XML_PARSER_MISC
:
11234 xmlGenericError(xmlGenericErrorContext
,
11235 "PP: try MISC\n");break;
11236 case XML_PARSER_COMMENT
:
11237 xmlGenericError(xmlGenericErrorContext
,
11238 "PP: try COMMENT\n");break;
11239 case XML_PARSER_PROLOG
:
11240 xmlGenericError(xmlGenericErrorContext
,
11241 "PP: try PROLOG\n");break;
11242 case XML_PARSER_START_TAG
:
11243 xmlGenericError(xmlGenericErrorContext
,
11244 "PP: try START_TAG\n");break;
11245 case XML_PARSER_CONTENT
:
11246 xmlGenericError(xmlGenericErrorContext
,
11247 "PP: try CONTENT\n");break;
11248 case XML_PARSER_CDATA_SECTION
:
11249 xmlGenericError(xmlGenericErrorContext
,
11250 "PP: try CDATA_SECTION\n");break;
11251 case XML_PARSER_END_TAG
:
11252 xmlGenericError(xmlGenericErrorContext
,
11253 "PP: try END_TAG\n");break;
11254 case XML_PARSER_ENTITY_DECL
:
11255 xmlGenericError(xmlGenericErrorContext
,
11256 "PP: try ENTITY_DECL\n");break;
11257 case XML_PARSER_ENTITY_VALUE
:
11258 xmlGenericError(xmlGenericErrorContext
,
11259 "PP: try ENTITY_VALUE\n");break;
11260 case XML_PARSER_ATTRIBUTE_VALUE
:
11261 xmlGenericError(xmlGenericErrorContext
,
11262 "PP: try ATTRIBUTE_VALUE\n");break;
11263 case XML_PARSER_DTD
:
11264 xmlGenericError(xmlGenericErrorContext
,
11265 "PP: try DTD\n");break;
11266 case XML_PARSER_EPILOG
:
11267 xmlGenericError(xmlGenericErrorContext
,
11268 "PP: try EPILOG\n");break;
11269 case XML_PARSER_PI
:
11270 xmlGenericError(xmlGenericErrorContext
,
11271 "PP: try PI\n");break;
11272 case XML_PARSER_IGNORE
:
11273 xmlGenericError(xmlGenericErrorContext
,
11274 "PP: try IGNORE\n");break;
11278 if ((ctxt
->input
!= NULL
) &&
11279 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
11281 ctxt
->checkIndex
= 0;
11283 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11285 while (ctxt
->instate
!= XML_PARSER_EOF
) {
11286 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11291 * Pop-up of finished entities.
11293 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
11296 if (ctxt
->input
== NULL
) break;
11297 if (ctxt
->input
->buf
== NULL
)
11298 avail
= ctxt
->input
->length
-
11299 (ctxt
->input
->cur
- ctxt
->input
->base
);
11302 * If we are operating on converted input, try to flush
11303 * remainng chars to avoid them stalling in the non-converted
11304 * buffer. But do not do this in document start where
11305 * encoding="..." may not have been read and we work on a
11306 * guessed encoding.
11308 if ((ctxt
->instate
!= XML_PARSER_START
) &&
11309 (ctxt
->input
->buf
->raw
!= NULL
) &&
11310 (xmlBufIsEmpty(ctxt
->input
->buf
->raw
) == 0)) {
11311 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
11313 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
11315 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
11316 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
11319 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11320 (ctxt
->input
->cur
- ctxt
->input
->base
);
11324 switch (ctxt
->instate
) {
11325 case XML_PARSER_EOF
:
11327 * Document parsing is done !
11330 case XML_PARSER_START
:
11331 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
11333 xmlCharEncoding enc
;
11336 * Very first chars read from the document flow.
11342 * Get the 4 first bytes and decode the charset
11343 * if enc != XML_CHAR_ENCODING_NONE
11344 * plug some encoding conversion routines,
11345 * else xmlSwitchEncoding will set to (default)
11352 enc
= xmlDetectCharEncoding(start
, 4);
11353 xmlSwitchEncoding(ctxt
, enc
);
11359 cur
= ctxt
->input
->cur
[0];
11360 next
= ctxt
->input
->cur
[1];
11362 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11363 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11364 &xmlDefaultSAXLocator
);
11365 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11366 ctxt
->instate
= XML_PARSER_EOF
;
11368 xmlGenericError(xmlGenericErrorContext
,
11369 "PP: entering EOF\n");
11371 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11372 ctxt
->sax
->endDocument(ctxt
->userData
);
11375 if ((cur
== '<') && (next
== '?')) {
11376 /* PI or XML decl */
11377 if (avail
< 5) return(ret
);
11378 if ((!terminate
) &&
11379 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11381 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11382 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11383 &xmlDefaultSAXLocator
);
11384 if ((ctxt
->input
->cur
[2] == 'x') &&
11385 (ctxt
->input
->cur
[3] == 'm') &&
11386 (ctxt
->input
->cur
[4] == 'l') &&
11387 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
11390 xmlGenericError(xmlGenericErrorContext
,
11391 "PP: Parsing XML Decl\n");
11393 xmlParseXMLDecl(ctxt
);
11394 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
11396 * The XML REC instructs us to stop parsing right
11399 ctxt
->instate
= XML_PARSER_EOF
;
11402 ctxt
->standalone
= ctxt
->input
->standalone
;
11403 if ((ctxt
->encoding
== NULL
) &&
11404 (ctxt
->input
->encoding
!= NULL
))
11405 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
11406 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11407 (!ctxt
->disableSAX
))
11408 ctxt
->sax
->startDocument(ctxt
->userData
);
11409 ctxt
->instate
= XML_PARSER_MISC
;
11411 xmlGenericError(xmlGenericErrorContext
,
11412 "PP: entering MISC\n");
11415 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11416 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11417 (!ctxt
->disableSAX
))
11418 ctxt
->sax
->startDocument(ctxt
->userData
);
11419 ctxt
->instate
= XML_PARSER_MISC
;
11421 xmlGenericError(xmlGenericErrorContext
,
11422 "PP: entering MISC\n");
11426 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11427 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11428 &xmlDefaultSAXLocator
);
11429 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11430 if (ctxt
->version
== NULL
) {
11431 xmlErrMemory(ctxt
, NULL
);
11434 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11435 (!ctxt
->disableSAX
))
11436 ctxt
->sax
->startDocument(ctxt
->userData
);
11437 ctxt
->instate
= XML_PARSER_MISC
;
11439 xmlGenericError(xmlGenericErrorContext
,
11440 "PP: entering MISC\n");
11444 case XML_PARSER_START_TAG
: {
11445 const xmlChar
*name
;
11446 const xmlChar
*prefix
= NULL
;
11447 const xmlChar
*URI
= NULL
;
11448 int nsNr
= ctxt
->nsNr
;
11450 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11452 cur
= ctxt
->input
->cur
[0];
11454 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11455 ctxt
->instate
= XML_PARSER_EOF
;
11456 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11457 ctxt
->sax
->endDocument(ctxt
->userData
);
11461 if (ctxt
->progressive
) {
11462 /* > can be found unescaped in attribute values */
11463 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11465 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11469 if (ctxt
->spaceNr
== 0)
11470 spacePush(ctxt
, -1);
11471 else if (*ctxt
->space
== -2)
11472 spacePush(ctxt
, -1);
11474 spacePush(ctxt
, *ctxt
->space
);
11475 #ifdef LIBXML_SAX1_ENABLED
11477 #endif /* LIBXML_SAX1_ENABLED */
11478 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
11479 #ifdef LIBXML_SAX1_ENABLED
11481 name
= xmlParseStartTag(ctxt
);
11482 #endif /* LIBXML_SAX1_ENABLED */
11483 if (ctxt
->instate
== XML_PARSER_EOF
)
11485 if (name
== NULL
) {
11487 ctxt
->instate
= XML_PARSER_EOF
;
11488 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11489 ctxt
->sax
->endDocument(ctxt
->userData
);
11492 #ifdef LIBXML_VALID_ENABLED
11494 * [ VC: Root Element Type ]
11495 * The Name in the document type declaration must match
11496 * the element type of the root element.
11498 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
11499 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
11500 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
11501 #endif /* LIBXML_VALID_ENABLED */
11504 * Check for an Empty Element.
11506 if ((RAW
== '/') && (NXT(1) == '>')) {
11510 if ((ctxt
->sax
!= NULL
) &&
11511 (ctxt
->sax
->endElementNs
!= NULL
) &&
11512 (!ctxt
->disableSAX
))
11513 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11515 if (ctxt
->nsNr
- nsNr
> 0)
11516 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11517 #ifdef LIBXML_SAX1_ENABLED
11519 if ((ctxt
->sax
!= NULL
) &&
11520 (ctxt
->sax
->endElement
!= NULL
) &&
11521 (!ctxt
->disableSAX
))
11522 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11523 #endif /* LIBXML_SAX1_ENABLED */
11525 if (ctxt
->instate
== XML_PARSER_EOF
)
11528 if (ctxt
->nameNr
== 0) {
11529 ctxt
->instate
= XML_PARSER_EPILOG
;
11531 ctxt
->instate
= XML_PARSER_CONTENT
;
11533 ctxt
->progressive
= 1;
11539 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11540 "Couldn't find end of Start Tag %s\n",
11546 nameNsPush(ctxt
, name
, prefix
, URI
, ctxt
->nsNr
- nsNr
);
11547 #ifdef LIBXML_SAX1_ENABLED
11549 namePush(ctxt
, name
);
11550 #endif /* LIBXML_SAX1_ENABLED */
11552 ctxt
->instate
= XML_PARSER_CONTENT
;
11553 ctxt
->progressive
= 1;
11556 case XML_PARSER_CONTENT
: {
11557 const xmlChar
*test
;
11559 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11561 cur
= ctxt
->input
->cur
[0];
11562 next
= ctxt
->input
->cur
[1];
11565 cons
= ctxt
->input
->consumed
;
11566 if ((cur
== '<') && (next
== '/')) {
11567 ctxt
->instate
= XML_PARSER_END_TAG
;
11569 } else if ((cur
== '<') && (next
== '?')) {
11570 if ((!terminate
) &&
11571 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11572 ctxt
->progressive
= XML_PARSER_PI
;
11576 ctxt
->instate
= XML_PARSER_CONTENT
;
11577 ctxt
->progressive
= 1;
11578 } else if ((cur
== '<') && (next
!= '!')) {
11579 ctxt
->instate
= XML_PARSER_START_TAG
;
11581 } else if ((cur
== '<') && (next
== '!') &&
11582 (ctxt
->input
->cur
[2] == '-') &&
11583 (ctxt
->input
->cur
[3] == '-')) {
11588 ctxt
->input
->cur
+= 4;
11589 term
= xmlParseLookupSequence(ctxt
, '-', '-', '>');
11590 ctxt
->input
->cur
-= 4;
11591 if ((!terminate
) && (term
< 0)) {
11592 ctxt
->progressive
= XML_PARSER_COMMENT
;
11595 xmlParseComment(ctxt
);
11596 ctxt
->instate
= XML_PARSER_CONTENT
;
11597 ctxt
->progressive
= 1;
11598 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11599 (ctxt
->input
->cur
[2] == '[') &&
11600 (ctxt
->input
->cur
[3] == 'C') &&
11601 (ctxt
->input
->cur
[4] == 'D') &&
11602 (ctxt
->input
->cur
[5] == 'A') &&
11603 (ctxt
->input
->cur
[6] == 'T') &&
11604 (ctxt
->input
->cur
[7] == 'A') &&
11605 (ctxt
->input
->cur
[8] == '[')) {
11607 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11609 } else if ((cur
== '<') && (next
== '!') &&
11612 } else if (cur
== '&') {
11613 if ((!terminate
) &&
11614 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
11616 xmlParseReference(ctxt
);
11618 /* TODO Avoid the extra copy, handle directly !!! */
11620 * Goal of the following test is:
11621 * - minimize calls to the SAX 'character' callback
11622 * when they are mergeable
11623 * - handle an problem for isBlank when we only parse
11624 * a sequence of blank chars and the next one is
11625 * not available to check against '<' presence.
11626 * - tries to homogenize the differences in SAX
11627 * callbacks between the push and pull versions
11630 if ((ctxt
->inputNr
== 1) &&
11631 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11633 if (ctxt
->progressive
) {
11634 if ((lastlt
== NULL
) ||
11635 (ctxt
->input
->cur
> lastlt
))
11637 } else if (xmlParseLookupSequence(ctxt
,
11643 ctxt
->checkIndex
= 0;
11644 xmlParseCharData(ctxt
, 0);
11647 * Pop-up of finished entities.
11649 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
11651 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
11652 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11653 "detected an error in element content\n");
11654 ctxt
->instate
= XML_PARSER_EOF
;
11659 case XML_PARSER_END_TAG
:
11663 if (ctxt
->progressive
) {
11664 /* > can be found unescaped in attribute values */
11665 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11667 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11672 xmlParseEndTag2(ctxt
,
11673 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 3],
11674 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 2], 0,
11675 (int) (long) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 1], 0);
11678 #ifdef LIBXML_SAX1_ENABLED
11680 xmlParseEndTag1(ctxt
, 0);
11681 #endif /* LIBXML_SAX1_ENABLED */
11682 if (ctxt
->instate
== XML_PARSER_EOF
) {
11684 } else if (ctxt
->nameNr
== 0) {
11685 ctxt
->instate
= XML_PARSER_EPILOG
;
11687 ctxt
->instate
= XML_PARSER_CONTENT
;
11690 case XML_PARSER_CDATA_SECTION
: {
11692 * The Push mode need to have the SAX callback for
11693 * cdataBlock merge back contiguous callbacks.
11697 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
11699 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
11702 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
,
11703 XML_PARSER_BIG_BUFFER_SIZE
);
11706 ctxt
->input
->cur
+= tmp
;
11707 goto encoding_error
;
11709 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11710 if (ctxt
->sax
->cdataBlock
!= NULL
)
11711 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11712 ctxt
->input
->cur
, tmp
);
11713 else if (ctxt
->sax
->characters
!= NULL
)
11714 ctxt
->sax
->characters(ctxt
->userData
,
11715 ctxt
->input
->cur
, tmp
);
11717 if (ctxt
->instate
== XML_PARSER_EOF
)
11720 ctxt
->checkIndex
= 0;
11726 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
);
11727 if ((tmp
< 0) || (tmp
!= base
)) {
11729 ctxt
->input
->cur
+= tmp
;
11730 goto encoding_error
;
11732 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11733 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11734 (!ctxt
->disableSAX
)) {
11736 * Special case to provide identical behaviour
11737 * between pull and push parsers on enpty CDATA
11740 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11741 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11743 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11745 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11746 (!ctxt
->disableSAX
)) {
11747 if (ctxt
->sax
->cdataBlock
!= NULL
)
11748 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11749 ctxt
->input
->cur
, base
);
11750 else if (ctxt
->sax
->characters
!= NULL
)
11751 ctxt
->sax
->characters(ctxt
->userData
,
11752 ctxt
->input
->cur
, base
);
11754 if (ctxt
->instate
== XML_PARSER_EOF
)
11757 ctxt
->checkIndex
= 0;
11758 ctxt
->instate
= XML_PARSER_CONTENT
;
11760 xmlGenericError(xmlGenericErrorContext
,
11761 "PP: entering CONTENT\n");
11766 case XML_PARSER_MISC
:
11768 if (ctxt
->input
->buf
== NULL
)
11769 avail
= ctxt
->input
->length
-
11770 (ctxt
->input
->cur
- ctxt
->input
->base
);
11772 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11773 (ctxt
->input
->cur
- ctxt
->input
->base
);
11776 cur
= ctxt
->input
->cur
[0];
11777 next
= ctxt
->input
->cur
[1];
11778 if ((cur
== '<') && (next
== '?')) {
11779 if ((!terminate
) &&
11780 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11781 ctxt
->progressive
= XML_PARSER_PI
;
11785 xmlGenericError(xmlGenericErrorContext
,
11786 "PP: Parsing PI\n");
11789 if (ctxt
->instate
== XML_PARSER_EOF
)
11791 ctxt
->instate
= XML_PARSER_MISC
;
11792 ctxt
->progressive
= 1;
11793 ctxt
->checkIndex
= 0;
11794 } else if ((cur
== '<') && (next
== '!') &&
11795 (ctxt
->input
->cur
[2] == '-') &&
11796 (ctxt
->input
->cur
[3] == '-')) {
11797 if ((!terminate
) &&
11798 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11799 ctxt
->progressive
= XML_PARSER_COMMENT
;
11803 xmlGenericError(xmlGenericErrorContext
,
11804 "PP: Parsing Comment\n");
11806 xmlParseComment(ctxt
);
11807 if (ctxt
->instate
== XML_PARSER_EOF
)
11809 ctxt
->instate
= XML_PARSER_MISC
;
11810 ctxt
->progressive
= 1;
11811 ctxt
->checkIndex
= 0;
11812 } else if ((cur
== '<') && (next
== '!') &&
11813 (ctxt
->input
->cur
[2] == 'D') &&
11814 (ctxt
->input
->cur
[3] == 'O') &&
11815 (ctxt
->input
->cur
[4] == 'C') &&
11816 (ctxt
->input
->cur
[5] == 'T') &&
11817 (ctxt
->input
->cur
[6] == 'Y') &&
11818 (ctxt
->input
->cur
[7] == 'P') &&
11819 (ctxt
->input
->cur
[8] == 'E')) {
11820 if ((!terminate
) &&
11821 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0)) {
11822 ctxt
->progressive
= XML_PARSER_DTD
;
11826 xmlGenericError(xmlGenericErrorContext
,
11827 "PP: Parsing internal subset\n");
11829 ctxt
->inSubset
= 1;
11830 ctxt
->progressive
= 0;
11831 ctxt
->checkIndex
= 0;
11832 xmlParseDocTypeDecl(ctxt
);
11833 if (ctxt
->instate
== XML_PARSER_EOF
)
11836 ctxt
->instate
= XML_PARSER_DTD
;
11838 xmlGenericError(xmlGenericErrorContext
,
11839 "PP: entering DTD\n");
11843 * Create and update the external subset.
11845 ctxt
->inSubset
= 2;
11846 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11847 (ctxt
->sax
->externalSubset
!= NULL
))
11848 ctxt
->sax
->externalSubset(ctxt
->userData
,
11849 ctxt
->intSubName
, ctxt
->extSubSystem
,
11851 ctxt
->inSubset
= 0;
11852 xmlCleanSpecialAttr(ctxt
);
11853 ctxt
->instate
= XML_PARSER_PROLOG
;
11855 xmlGenericError(xmlGenericErrorContext
,
11856 "PP: entering PROLOG\n");
11859 } else if ((cur
== '<') && (next
== '!') &&
11863 ctxt
->instate
= XML_PARSER_START_TAG
;
11864 ctxt
->progressive
= XML_PARSER_START_TAG
;
11865 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11867 xmlGenericError(xmlGenericErrorContext
,
11868 "PP: entering START_TAG\n");
11872 case XML_PARSER_PROLOG
:
11874 if (ctxt
->input
->buf
== NULL
)
11875 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11877 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11878 (ctxt
->input
->cur
- ctxt
->input
->base
);
11881 cur
= ctxt
->input
->cur
[0];
11882 next
= ctxt
->input
->cur
[1];
11883 if ((cur
== '<') && (next
== '?')) {
11884 if ((!terminate
) &&
11885 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11886 ctxt
->progressive
= XML_PARSER_PI
;
11890 xmlGenericError(xmlGenericErrorContext
,
11891 "PP: Parsing PI\n");
11894 if (ctxt
->instate
== XML_PARSER_EOF
)
11896 ctxt
->instate
= XML_PARSER_PROLOG
;
11897 ctxt
->progressive
= 1;
11898 } else if ((cur
== '<') && (next
== '!') &&
11899 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11900 if ((!terminate
) &&
11901 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11902 ctxt
->progressive
= XML_PARSER_COMMENT
;
11906 xmlGenericError(xmlGenericErrorContext
,
11907 "PP: Parsing Comment\n");
11909 xmlParseComment(ctxt
);
11910 if (ctxt
->instate
== XML_PARSER_EOF
)
11912 ctxt
->instate
= XML_PARSER_PROLOG
;
11913 ctxt
->progressive
= 1;
11914 } else if ((cur
== '<') && (next
== '!') &&
11918 ctxt
->instate
= XML_PARSER_START_TAG
;
11919 if (ctxt
->progressive
== 0)
11920 ctxt
->progressive
= XML_PARSER_START_TAG
;
11921 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11923 xmlGenericError(xmlGenericErrorContext
,
11924 "PP: entering START_TAG\n");
11928 case XML_PARSER_EPILOG
:
11930 if (ctxt
->input
->buf
== NULL
)
11931 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11933 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11934 (ctxt
->input
->cur
- ctxt
->input
->base
);
11937 cur
= ctxt
->input
->cur
[0];
11938 next
= ctxt
->input
->cur
[1];
11939 if ((cur
== '<') && (next
== '?')) {
11940 if ((!terminate
) &&
11941 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11942 ctxt
->progressive
= XML_PARSER_PI
;
11946 xmlGenericError(xmlGenericErrorContext
,
11947 "PP: Parsing PI\n");
11950 if (ctxt
->instate
== XML_PARSER_EOF
)
11952 ctxt
->instate
= XML_PARSER_EPILOG
;
11953 ctxt
->progressive
= 1;
11954 } else if ((cur
== '<') && (next
== '!') &&
11955 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11956 if ((!terminate
) &&
11957 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11958 ctxt
->progressive
= XML_PARSER_COMMENT
;
11962 xmlGenericError(xmlGenericErrorContext
,
11963 "PP: Parsing Comment\n");
11965 xmlParseComment(ctxt
);
11966 if (ctxt
->instate
== XML_PARSER_EOF
)
11968 ctxt
->instate
= XML_PARSER_EPILOG
;
11969 ctxt
->progressive
= 1;
11970 } else if ((cur
== '<') && (next
== '!') &&
11974 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11975 ctxt
->instate
= XML_PARSER_EOF
;
11977 xmlGenericError(xmlGenericErrorContext
,
11978 "PP: entering EOF\n");
11980 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11981 ctxt
->sax
->endDocument(ctxt
->userData
);
11985 case XML_PARSER_DTD
: {
11987 * Sorry but progressive parsing of the internal subset
11988 * is not expected to be supported. We first check that
11989 * the full content of the internal subset is available and
11990 * the parsing is launched only at that point.
11991 * Internal subset ends up with "']' S? '>'" in an unescaped
11992 * section and not in a ']]>' sequence which are conditional
11993 * sections (whoever argued to keep that crap in XML deserve
11994 * a place in hell !).
12001 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
12002 if (base
< 0) return(0);
12003 if (ctxt
->checkIndex
> base
)
12004 base
= ctxt
->checkIndex
;
12005 buf
= xmlBufContent(ctxt
->input
->buf
->buffer
);
12006 use
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12007 for (;(unsigned int) base
< use
; base
++) {
12009 if (buf
[base
] == quote
)
12013 if ((quote
== 0) && (buf
[base
] == '<')) {
12015 /* special handling of comments */
12016 if (((unsigned int) base
+ 4 < use
) &&
12017 (buf
[base
+ 1] == '!') &&
12018 (buf
[base
+ 2] == '-') &&
12019 (buf
[base
+ 3] == '-')) {
12020 for (;(unsigned int) base
+ 3 < use
; base
++) {
12021 if ((buf
[base
] == '-') &&
12022 (buf
[base
+ 1] == '-') &&
12023 (buf
[base
+ 2] == '>')) {
12031 fprintf(stderr
, "unfinished comment\n");
12038 if (buf
[base
] == '"') {
12042 if (buf
[base
] == '\'') {
12046 if (buf
[base
] == ']') {
12048 fprintf(stderr
, "%c%c%c%c: ", buf
[base
],
12049 buf
[base
+ 1], buf
[base
+ 2], buf
[base
+ 3]);
12051 if ((unsigned int) base
+1 >= use
)
12053 if (buf
[base
+ 1] == ']') {
12054 /* conditional crap, skip both ']' ! */
12058 for (i
= 1; (unsigned int) base
+ i
< use
; i
++) {
12059 if (buf
[base
+ i
] == '>') {
12061 fprintf(stderr
, "found\n");
12063 goto found_end_int_subset
;
12065 if (!IS_BLANK_CH(buf
[base
+ i
])) {
12067 fprintf(stderr
, "not found\n");
12069 goto not_end_of_int_subset
;
12073 fprintf(stderr
, "end of stream\n");
12078 not_end_of_int_subset
:
12079 continue; /* for */
12082 * We didn't found the end of the Internal subset
12085 ctxt
->checkIndex
= base
;
12087 ctxt
->checkIndex
= 0;
12090 xmlGenericError(xmlGenericErrorContext
,
12091 "PP: lookup of int subset end filed\n");
12095 found_end_int_subset
:
12096 ctxt
->checkIndex
= 0;
12097 xmlParseInternalSubset(ctxt
);
12098 if (ctxt
->instate
== XML_PARSER_EOF
)
12100 ctxt
->inSubset
= 2;
12101 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
12102 (ctxt
->sax
->externalSubset
!= NULL
))
12103 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
12104 ctxt
->extSubSystem
, ctxt
->extSubURI
);
12105 ctxt
->inSubset
= 0;
12106 xmlCleanSpecialAttr(ctxt
);
12107 if (ctxt
->instate
== XML_PARSER_EOF
)
12109 ctxt
->instate
= XML_PARSER_PROLOG
;
12110 ctxt
->checkIndex
= 0;
12112 xmlGenericError(xmlGenericErrorContext
,
12113 "PP: entering PROLOG\n");
12117 case XML_PARSER_COMMENT
:
12118 xmlGenericError(xmlGenericErrorContext
,
12119 "PP: internal error, state == COMMENT\n");
12120 ctxt
->instate
= XML_PARSER_CONTENT
;
12122 xmlGenericError(xmlGenericErrorContext
,
12123 "PP: entering CONTENT\n");
12126 case XML_PARSER_IGNORE
:
12127 xmlGenericError(xmlGenericErrorContext
,
12128 "PP: internal error, state == IGNORE");
12129 ctxt
->instate
= XML_PARSER_DTD
;
12131 xmlGenericError(xmlGenericErrorContext
,
12132 "PP: entering DTD\n");
12135 case XML_PARSER_PI
:
12136 xmlGenericError(xmlGenericErrorContext
,
12137 "PP: internal error, state == PI\n");
12138 ctxt
->instate
= XML_PARSER_CONTENT
;
12140 xmlGenericError(xmlGenericErrorContext
,
12141 "PP: entering CONTENT\n");
12144 case XML_PARSER_ENTITY_DECL
:
12145 xmlGenericError(xmlGenericErrorContext
,
12146 "PP: internal error, state == ENTITY_DECL\n");
12147 ctxt
->instate
= XML_PARSER_DTD
;
12149 xmlGenericError(xmlGenericErrorContext
,
12150 "PP: entering DTD\n");
12153 case XML_PARSER_ENTITY_VALUE
:
12154 xmlGenericError(xmlGenericErrorContext
,
12155 "PP: internal error, state == ENTITY_VALUE\n");
12156 ctxt
->instate
= XML_PARSER_CONTENT
;
12158 xmlGenericError(xmlGenericErrorContext
,
12159 "PP: entering DTD\n");
12162 case XML_PARSER_ATTRIBUTE_VALUE
:
12163 xmlGenericError(xmlGenericErrorContext
,
12164 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12165 ctxt
->instate
= XML_PARSER_START_TAG
;
12167 xmlGenericError(xmlGenericErrorContext
,
12168 "PP: entering START_TAG\n");
12171 case XML_PARSER_SYSTEM_LITERAL
:
12172 xmlGenericError(xmlGenericErrorContext
,
12173 "PP: internal error, state == SYSTEM_LITERAL\n");
12174 ctxt
->instate
= XML_PARSER_START_TAG
;
12176 xmlGenericError(xmlGenericErrorContext
,
12177 "PP: entering START_TAG\n");
12180 case XML_PARSER_PUBLIC_LITERAL
:
12181 xmlGenericError(xmlGenericErrorContext
,
12182 "PP: internal error, state == PUBLIC_LITERAL\n");
12183 ctxt
->instate
= XML_PARSER_START_TAG
;
12185 xmlGenericError(xmlGenericErrorContext
,
12186 "PP: entering START_TAG\n");
12193 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
12200 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12201 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
12202 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
12203 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12204 "Input is not proper UTF-8, indicate encoding !\n%s",
12205 BAD_CAST buffer
, NULL
);
12211 * xmlParseCheckTransition:
12212 * @ctxt: an XML parser context
12213 * @chunk: a char array
12214 * @size: the size in byte of the chunk
12216 * Check depending on the current parser state if the chunk given must be
12217 * processed immediately or one need more data to advance on parsing.
12219 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12222 xmlParseCheckTransition(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
) {
12223 if ((ctxt
== NULL
) || (chunk
== NULL
) || (size
< 0))
12225 if (ctxt
->instate
== XML_PARSER_START_TAG
) {
12226 if (memchr(chunk
, '>', size
) != NULL
)
12230 if (ctxt
->progressive
== XML_PARSER_COMMENT
) {
12231 if (memchr(chunk
, '>', size
) != NULL
)
12235 if (ctxt
->instate
== XML_PARSER_CDATA_SECTION
) {
12236 if (memchr(chunk
, '>', size
) != NULL
)
12240 if (ctxt
->progressive
== XML_PARSER_PI
) {
12241 if (memchr(chunk
, '>', size
) != NULL
)
12245 if (ctxt
->instate
== XML_PARSER_END_TAG
) {
12246 if (memchr(chunk
, '>', size
) != NULL
)
12250 if ((ctxt
->progressive
== XML_PARSER_DTD
) ||
12251 (ctxt
->instate
== XML_PARSER_DTD
)) {
12252 if (memchr(chunk
, '>', size
) != NULL
)
12261 * @ctxt: an XML parser context
12262 * @chunk: an char array
12263 * @size: the size in byte of the chunk
12264 * @terminate: last chunk indicator
12266 * Parse a Chunk of memory
12268 * Returns zero if no error, the xmlParserErrors otherwise.
12271 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
12275 size_t old_avail
= 0;
12279 return(XML_ERR_INTERNAL_ERROR
);
12280 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12281 return(ctxt
->errNo
);
12282 if (ctxt
->instate
== XML_PARSER_EOF
)
12284 if (ctxt
->instate
== XML_PARSER_START
)
12285 xmlDetectSAX2(ctxt
);
12286 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
12287 (chunk
[size
- 1] == '\r')) {
12294 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
12295 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
12296 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12297 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12300 old_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12302 * Specific handling if we autodetected an encoding, we should not
12303 * push more than the first line ... which depend on the encoding
12304 * And only push the rest once the final encoding was detected
12306 if ((ctxt
->instate
== XML_PARSER_START
) && (ctxt
->input
!= NULL
) &&
12307 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->encoder
!= NULL
)) {
12308 unsigned int len
= 45;
12310 if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12311 BAD_CAST
"UTF-16")) ||
12312 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12313 BAD_CAST
"UTF16")))
12315 else if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12316 BAD_CAST
"UCS-4")) ||
12317 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12321 if (ctxt
->input
->buf
->rawconsumed
< len
)
12322 len
-= ctxt
->input
->buf
->rawconsumed
;
12325 * Change size for reading the initial declaration only
12326 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12327 * will blindly copy extra bytes from memory.
12329 if ((unsigned int) size
> len
) {
12330 remain
= size
- len
;
12336 res
= xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12338 ctxt
->errNo
= XML_PARSER_EOF
;
12339 ctxt
->disableSAX
= 1;
12340 return (XML_PARSER_EOF
);
12342 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12344 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12347 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
12348 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
12349 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
12350 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
12351 (in
->raw
!= NULL
)) {
12353 size_t base
= xmlBufGetInputBase(in
->buffer
, ctxt
->input
);
12354 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12356 nbchars
= xmlCharEncInput(in
, terminate
);
12359 xmlGenericError(xmlGenericErrorContext
,
12360 "xmlParseChunk: encoder error\n");
12361 return(XML_ERR_INVALID_ENCODING
);
12363 xmlBufSetInputBaseCur(in
->buffer
, ctxt
->input
, base
, current
);
12368 xmlParseTryOrFinish(ctxt
, 0);
12370 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
))
12371 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12373 * Depending on the current state it may not be such
12374 * a good idea to try parsing if there is nothing in the chunk
12375 * which would be worth doing a parser state transition and we
12376 * need to wait for more data
12378 if ((terminate
) || (avail
> XML_MAX_TEXT_LENGTH
) ||
12379 (old_avail
== 0) || (avail
== 0) ||
12380 (xmlParseCheckTransition(ctxt
,
12381 (const char *)&ctxt
->input
->base
[old_avail
],
12382 avail
- old_avail
)))
12383 xmlParseTryOrFinish(ctxt
, terminate
);
12385 if (ctxt
->instate
== XML_PARSER_EOF
)
12386 return(ctxt
->errNo
);
12388 if ((ctxt
->input
!= NULL
) &&
12389 (((ctxt
->input
->end
- ctxt
->input
->cur
) > XML_MAX_LOOKUP_LIMIT
) ||
12390 ((ctxt
->input
->cur
- ctxt
->input
->base
) > XML_MAX_LOOKUP_LIMIT
)) &&
12391 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
12392 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
12393 ctxt
->instate
= XML_PARSER_EOF
;
12395 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12396 return(ctxt
->errNo
);
12404 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
12405 (ctxt
->input
->buf
!= NULL
)) {
12406 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
12408 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12410 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
12412 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
12417 * Check for termination
12421 if (ctxt
->input
!= NULL
) {
12422 if (ctxt
->input
->buf
== NULL
)
12423 cur_avail
= ctxt
->input
->length
-
12424 (ctxt
->input
->cur
- ctxt
->input
->base
);
12426 cur_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
12427 (ctxt
->input
->cur
- ctxt
->input
->base
);
12430 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
12431 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
12432 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12434 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (cur_avail
> 0)) {
12435 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12437 if (ctxt
->instate
!= XML_PARSER_EOF
) {
12438 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
12439 ctxt
->sax
->endDocument(ctxt
->userData
);
12441 ctxt
->instate
= XML_PARSER_EOF
;
12443 if (ctxt
->wellFormed
== 0)
12444 return((xmlParserErrors
) ctxt
->errNo
);
12449 /************************************************************************
12451 * I/O front end functions to the parser *
12453 ************************************************************************/
12456 * xmlCreatePushParserCtxt:
12457 * @sax: a SAX handler
12458 * @user_data: The user data returned on SAX callbacks
12459 * @chunk: a pointer to an array of chars
12460 * @size: number of chars in the array
12461 * @filename: an optional file name or URI
12463 * Create a parser context for using the XML parser in push mode.
12464 * If @buffer and @size are non-NULL, the data is used to detect
12465 * the encoding. The remaining characters will be parsed so they
12466 * don't need to be fed in again through xmlParseChunk.
12467 * To allow content encoding detection, @size should be >= 4
12468 * The value of @filename is used for fetching external entities
12469 * and error/warning reports.
12471 * Returns the new parser context or NULL
12475 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12476 const char *chunk
, int size
, const char *filename
) {
12477 xmlParserCtxtPtr ctxt
;
12478 xmlParserInputPtr inputStream
;
12479 xmlParserInputBufferPtr buf
;
12480 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
12483 * plug some encoding conversion routines
12485 if ((chunk
!= NULL
) && (size
>= 4))
12486 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
12488 buf
= xmlAllocParserInputBuffer(enc
);
12489 if (buf
== NULL
) return(NULL
);
12491 ctxt
= xmlNewParserCtxt();
12492 if (ctxt
== NULL
) {
12493 xmlErrMemory(NULL
, "creating parser: out of memory\n");
12494 xmlFreeParserInputBuffer(buf
);
12497 ctxt
->dictNames
= 1;
12498 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 * sizeof(xmlChar
*));
12499 if (ctxt
->pushTab
== NULL
) {
12500 xmlErrMemory(ctxt
, NULL
);
12501 xmlFreeParserInputBuffer(buf
);
12502 xmlFreeParserCtxt(ctxt
);
12506 #ifdef LIBXML_SAX1_ENABLED
12507 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12508 #endif /* LIBXML_SAX1_ENABLED */
12509 xmlFree(ctxt
->sax
);
12510 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12511 if (ctxt
->sax
== NULL
) {
12512 xmlErrMemory(ctxt
, NULL
);
12513 xmlFreeParserInputBuffer(buf
);
12514 xmlFreeParserCtxt(ctxt
);
12517 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12518 if (sax
->initialized
== XML_SAX2_MAGIC
)
12519 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12521 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12522 if (user_data
!= NULL
)
12523 ctxt
->userData
= user_data
;
12525 if (filename
== NULL
) {
12526 ctxt
->directory
= NULL
;
12528 ctxt
->directory
= xmlParserGetDirectory(filename
);
12531 inputStream
= xmlNewInputStream(ctxt
);
12532 if (inputStream
== NULL
) {
12533 xmlFreeParserCtxt(ctxt
);
12534 xmlFreeParserInputBuffer(buf
);
12538 if (filename
== NULL
)
12539 inputStream
->filename
= NULL
;
12541 inputStream
->filename
= (char *)
12542 xmlCanonicPath((const xmlChar
*) filename
);
12543 if (inputStream
->filename
== NULL
) {
12544 xmlFreeParserCtxt(ctxt
);
12545 xmlFreeParserInputBuffer(buf
);
12549 inputStream
->buf
= buf
;
12550 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
12551 inputPush(ctxt
, inputStream
);
12554 * If the caller didn't provide an initial 'chunk' for determining
12555 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12556 * that it can be automatically determined later
12558 if ((size
== 0) || (chunk
== NULL
)) {
12559 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
12560 } else if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
12561 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12562 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12564 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12566 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12568 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12572 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12573 xmlSwitchEncoding(ctxt
, enc
);
12578 #endif /* LIBXML_PUSH_ENABLED */
12582 * @ctxt: an XML parser context
12584 * Blocks further parser processing
12587 xmlStopParser(xmlParserCtxtPtr ctxt
) {
12590 ctxt
->instate
= XML_PARSER_EOF
;
12591 ctxt
->errNo
= XML_ERR_USER_STOP
;
12592 ctxt
->disableSAX
= 1;
12593 if (ctxt
->input
!= NULL
) {
12594 ctxt
->input
->cur
= BAD_CAST
"";
12595 ctxt
->input
->base
= ctxt
->input
->cur
;
12600 * xmlCreateIOParserCtxt:
12601 * @sax: a SAX handler
12602 * @user_data: The user data returned on SAX callbacks
12603 * @ioread: an I/O read function
12604 * @ioclose: an I/O close function
12605 * @ioctx: an I/O handler
12606 * @enc: the charset encoding if known
12608 * Create a parser context for using the XML parser with an existing
12611 * Returns the new parser context or NULL
12614 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12615 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
12616 void *ioctx
, xmlCharEncoding enc
) {
12617 xmlParserCtxtPtr ctxt
;
12618 xmlParserInputPtr inputStream
;
12619 xmlParserInputBufferPtr buf
;
12621 if (ioread
== NULL
) return(NULL
);
12623 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
12625 if (ioclose
!= NULL
)
12630 ctxt
= xmlNewParserCtxt();
12631 if (ctxt
== NULL
) {
12632 xmlFreeParserInputBuffer(buf
);
12636 #ifdef LIBXML_SAX1_ENABLED
12637 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12638 #endif /* LIBXML_SAX1_ENABLED */
12639 xmlFree(ctxt
->sax
);
12640 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12641 if (ctxt
->sax
== NULL
) {
12642 xmlErrMemory(ctxt
, NULL
);
12643 xmlFreeParserCtxt(ctxt
);
12646 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12647 if (sax
->initialized
== XML_SAX2_MAGIC
)
12648 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12650 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12651 if (user_data
!= NULL
)
12652 ctxt
->userData
= user_data
;
12655 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12656 if (inputStream
== NULL
) {
12657 xmlFreeParserCtxt(ctxt
);
12660 inputPush(ctxt
, inputStream
);
12665 #ifdef LIBXML_VALID_ENABLED
12666 /************************************************************************
12668 * Front ends when parsing a DTD *
12670 ************************************************************************/
12674 * @sax: the SAX handler block or NULL
12675 * @input: an Input Buffer
12676 * @enc: the charset encoding if known
12678 * Load and parse a DTD
12680 * Returns the resulting xmlDtdPtr or NULL in case of error.
12681 * @input will be freed by the function in any case.
12685 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12686 xmlCharEncoding enc
) {
12687 xmlDtdPtr ret
= NULL
;
12688 xmlParserCtxtPtr ctxt
;
12689 xmlParserInputPtr pinput
= NULL
;
12695 ctxt
= xmlNewParserCtxt();
12696 if (ctxt
== NULL
) {
12697 xmlFreeParserInputBuffer(input
);
12701 /* We are loading a DTD */
12702 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12705 * Set-up the SAX context
12708 if (ctxt
->sax
!= NULL
)
12709 xmlFree(ctxt
->sax
);
12711 ctxt
->userData
= ctxt
;
12713 xmlDetectSAX2(ctxt
);
12716 * generate a parser input from the I/O handler
12719 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12720 if (pinput
== NULL
) {
12721 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12722 xmlFreeParserInputBuffer(input
);
12723 xmlFreeParserCtxt(ctxt
);
12728 * plug some encoding conversion routines here.
12730 if (xmlPushInput(ctxt
, pinput
) < 0) {
12731 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12732 xmlFreeParserCtxt(ctxt
);
12735 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12736 xmlSwitchEncoding(ctxt
, enc
);
12739 pinput
->filename
= NULL
;
12742 pinput
->base
= ctxt
->input
->cur
;
12743 pinput
->cur
= ctxt
->input
->cur
;
12744 pinput
->free
= NULL
;
12747 * let's parse that entity knowing it's an external subset.
12749 ctxt
->inSubset
= 2;
12750 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12751 if (ctxt
->myDoc
== NULL
) {
12752 xmlErrMemory(ctxt
, "New Doc failed");
12755 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12756 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12757 BAD_CAST
"none", BAD_CAST
"none");
12759 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12760 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12762 * Get the 4 first bytes and decode the charset
12763 * if enc != XML_CHAR_ENCODING_NONE
12764 * plug some encoding conversion routines.
12770 enc
= xmlDetectCharEncoding(start
, 4);
12771 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12772 xmlSwitchEncoding(ctxt
, enc
);
12776 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12778 if (ctxt
->myDoc
!= NULL
) {
12779 if (ctxt
->wellFormed
) {
12780 ret
= ctxt
->myDoc
->extSubset
;
12781 ctxt
->myDoc
->extSubset
= NULL
;
12786 tmp
= ret
->children
;
12787 while (tmp
!= NULL
) {
12795 xmlFreeDoc(ctxt
->myDoc
);
12796 ctxt
->myDoc
= NULL
;
12798 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12799 xmlFreeParserCtxt(ctxt
);
12806 * @sax: the SAX handler block
12807 * @ExternalID: a NAME* containing the External ID of the DTD
12808 * @SystemID: a NAME* containing the URL to the DTD
12810 * Load and parse an external subset.
12812 * Returns the resulting xmlDtdPtr or NULL in case of error.
12816 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12817 const xmlChar
*SystemID
) {
12818 xmlDtdPtr ret
= NULL
;
12819 xmlParserCtxtPtr ctxt
;
12820 xmlParserInputPtr input
= NULL
;
12821 xmlCharEncoding enc
;
12822 xmlChar
* systemIdCanonic
;
12824 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12826 ctxt
= xmlNewParserCtxt();
12827 if (ctxt
== NULL
) {
12831 /* We are loading a DTD */
12832 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12835 * Set-up the SAX context
12838 if (ctxt
->sax
!= NULL
)
12839 xmlFree(ctxt
->sax
);
12841 ctxt
->userData
= ctxt
;
12845 * Canonicalise the system ID
12847 systemIdCanonic
= xmlCanonicPath(SystemID
);
12848 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12849 xmlFreeParserCtxt(ctxt
);
12854 * Ask the Entity resolver to load the damn thing
12857 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12858 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12860 if (input
== NULL
) {
12861 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12862 xmlFreeParserCtxt(ctxt
);
12863 if (systemIdCanonic
!= NULL
)
12864 xmlFree(systemIdCanonic
);
12869 * plug some encoding conversion routines here.
12871 if (xmlPushInput(ctxt
, input
) < 0) {
12872 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12873 xmlFreeParserCtxt(ctxt
);
12874 if (systemIdCanonic
!= NULL
)
12875 xmlFree(systemIdCanonic
);
12878 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12879 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12880 xmlSwitchEncoding(ctxt
, enc
);
12883 if (input
->filename
== NULL
)
12884 input
->filename
= (char *) systemIdCanonic
;
12886 xmlFree(systemIdCanonic
);
12889 input
->base
= ctxt
->input
->cur
;
12890 input
->cur
= ctxt
->input
->cur
;
12891 input
->free
= NULL
;
12894 * let's parse that entity knowing it's an external subset.
12896 ctxt
->inSubset
= 2;
12897 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12898 if (ctxt
->myDoc
== NULL
) {
12899 xmlErrMemory(ctxt
, "New Doc failed");
12900 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12901 xmlFreeParserCtxt(ctxt
);
12904 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12905 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12906 ExternalID
, SystemID
);
12907 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12909 if (ctxt
->myDoc
!= NULL
) {
12910 if (ctxt
->wellFormed
) {
12911 ret
= ctxt
->myDoc
->extSubset
;
12912 ctxt
->myDoc
->extSubset
= NULL
;
12917 tmp
= ret
->children
;
12918 while (tmp
!= NULL
) {
12926 xmlFreeDoc(ctxt
->myDoc
);
12927 ctxt
->myDoc
= NULL
;
12929 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12930 xmlFreeParserCtxt(ctxt
);
12938 * @ExternalID: a NAME* containing the External ID of the DTD
12939 * @SystemID: a NAME* containing the URL to the DTD
12941 * Load and parse an external subset.
12943 * Returns the resulting xmlDtdPtr or NULL in case of error.
12947 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12948 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12950 #endif /* LIBXML_VALID_ENABLED */
12952 /************************************************************************
12954 * Front ends when parsing an Entity *
12956 ************************************************************************/
12959 * xmlParseCtxtExternalEntity:
12960 * @ctx: the existing parsing context
12961 * @URL: the URL for the entity to load
12962 * @ID: the System ID for the entity to load
12963 * @lst: the return value for the set of parsed nodes
12965 * Parse an external general entity within an existing parsing context
12966 * An external general parsed entity is well-formed if it matches the
12967 * production labeled extParsedEnt.
12969 * [78] extParsedEnt ::= TextDecl? content
12971 * Returns 0 if the entity is well formed, -1 in case of args problem and
12972 * the parser error code otherwise
12976 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12977 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12978 xmlParserCtxtPtr ctxt
;
12980 xmlNodePtr newRoot
;
12981 xmlSAXHandlerPtr oldsax
= NULL
;
12984 xmlCharEncoding enc
;
12986 if (ctx
== NULL
) return(-1);
12988 if (((ctx
->depth
> 40) && ((ctx
->options
& XML_PARSE_HUGE
) == 0)) ||
12989 (ctx
->depth
> 1024)) {
12990 return(XML_ERR_ENTITY_LOOP
);
12995 if ((URL
== NULL
) && (ID
== NULL
))
12997 if (ctx
->myDoc
== NULL
) /* @@ relax but check for dereferences */
13000 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, ctx
);
13001 if (ctxt
== NULL
) {
13005 oldsax
= ctxt
->sax
;
13006 ctxt
->sax
= ctx
->sax
;
13007 xmlDetectSAX2(ctxt
);
13008 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13009 if (newDoc
== NULL
) {
13010 xmlFreeParserCtxt(ctxt
);
13013 newDoc
->properties
= XML_DOC_INTERNAL
;
13014 if (ctx
->myDoc
->dict
) {
13015 newDoc
->dict
= ctx
->myDoc
->dict
;
13016 xmlDictReference(newDoc
->dict
);
13018 if (ctx
->myDoc
!= NULL
) {
13019 newDoc
->intSubset
= ctx
->myDoc
->intSubset
;
13020 newDoc
->extSubset
= ctx
->myDoc
->extSubset
;
13022 if (ctx
->myDoc
->URL
!= NULL
) {
13023 newDoc
->URL
= xmlStrdup(ctx
->myDoc
->URL
);
13025 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13026 if (newRoot
== NULL
) {
13027 ctxt
->sax
= oldsax
;
13028 xmlFreeParserCtxt(ctxt
);
13029 newDoc
->intSubset
= NULL
;
13030 newDoc
->extSubset
= NULL
;
13031 xmlFreeDoc(newDoc
);
13034 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13035 nodePush(ctxt
, newDoc
->children
);
13036 if (ctx
->myDoc
== NULL
) {
13037 ctxt
->myDoc
= newDoc
;
13039 ctxt
->myDoc
= ctx
->myDoc
;
13040 newDoc
->children
->doc
= ctx
->myDoc
;
13044 * Get the 4 first bytes and decode the charset
13045 * if enc != XML_CHAR_ENCODING_NONE
13046 * plug some encoding conversion routines.
13049 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
13054 enc
= xmlDetectCharEncoding(start
, 4);
13055 if (enc
!= XML_CHAR_ENCODING_NONE
) {
13056 xmlSwitchEncoding(ctxt
, enc
);
13061 * Parse a possible text declaration first
13063 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13064 xmlParseTextDecl(ctxt
);
13066 * An XML-1.0 document can't reference an entity not XML-1.0
13068 if ((xmlStrEqual(ctx
->version
, BAD_CAST
"1.0")) &&
13069 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
13070 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
13071 "Version mismatch between document and entity\n");
13076 * If the user provided its own SAX callbacks then reuse the
13077 * useData callback field, otherwise the expected setup in a
13078 * DOM builder is to have userData == ctxt
13080 if (ctx
->userData
== ctx
)
13081 ctxt
->userData
= ctxt
;
13083 ctxt
->userData
= ctx
->userData
;
13086 * Doing validity checking on chunk doesn't make sense
13088 ctxt
->instate
= XML_PARSER_CONTENT
;
13089 ctxt
->validate
= ctx
->validate
;
13090 ctxt
->valid
= ctx
->valid
;
13091 ctxt
->loadsubset
= ctx
->loadsubset
;
13092 ctxt
->depth
= ctx
->depth
+ 1;
13093 ctxt
->replaceEntities
= ctx
->replaceEntities
;
13094 if (ctxt
->validate
) {
13095 ctxt
->vctxt
.error
= ctx
->vctxt
.error
;
13096 ctxt
->vctxt
.warning
= ctx
->vctxt
.warning
;
13098 ctxt
->vctxt
.error
= NULL
;
13099 ctxt
->vctxt
.warning
= NULL
;
13101 ctxt
->vctxt
.nodeTab
= NULL
;
13102 ctxt
->vctxt
.nodeNr
= 0;
13103 ctxt
->vctxt
.nodeMax
= 0;
13104 ctxt
->vctxt
.node
= NULL
;
13105 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
13106 ctxt
->dict
= ctx
->dict
;
13107 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13108 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13109 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13110 ctxt
->dictNames
= ctx
->dictNames
;
13111 ctxt
->attsDefault
= ctx
->attsDefault
;
13112 ctxt
->attsSpecial
= ctx
->attsSpecial
;
13113 ctxt
->linenumbers
= ctx
->linenumbers
;
13115 xmlParseContent(ctxt
);
13117 ctx
->validate
= ctxt
->validate
;
13118 ctx
->valid
= ctxt
->valid
;
13119 if ((RAW
== '<') && (NXT(1) == '/')) {
13120 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13121 } else if (RAW
!= 0) {
13122 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13124 if (ctxt
->node
!= newDoc
->children
) {
13125 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13128 if (!ctxt
->wellFormed
) {
13129 if (ctxt
->errNo
== 0)
13138 * Return the newly created nodeset after unlinking it from
13139 * they pseudo parent.
13141 cur
= newDoc
->children
->children
;
13143 while (cur
!= NULL
) {
13144 cur
->parent
= NULL
;
13147 newDoc
->children
->children
= NULL
;
13151 ctxt
->sax
= oldsax
;
13153 ctxt
->attsDefault
= NULL
;
13154 ctxt
->attsSpecial
= NULL
;
13155 xmlFreeParserCtxt(ctxt
);
13156 newDoc
->intSubset
= NULL
;
13157 newDoc
->extSubset
= NULL
;
13158 xmlFreeDoc(newDoc
);
13164 * xmlParseExternalEntityPrivate:
13165 * @doc: the document the chunk pertains to
13166 * @oldctxt: the previous parser context if available
13167 * @sax: the SAX handler bloc (possibly NULL)
13168 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13169 * @depth: Used for loop detection, use 0
13170 * @URL: the URL for the entity to load
13171 * @ID: the System ID for the entity to load
13172 * @list: the return value for the set of parsed nodes
13174 * Private version of xmlParseExternalEntity()
13176 * Returns 0 if the entity is well formed, -1 in case of args problem and
13177 * the parser error code otherwise
13180 static xmlParserErrors
13181 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
13182 xmlSAXHandlerPtr sax
,
13183 void *user_data
, int depth
, const xmlChar
*URL
,
13184 const xmlChar
*ID
, xmlNodePtr
*list
) {
13185 xmlParserCtxtPtr ctxt
;
13187 xmlNodePtr newRoot
;
13188 xmlSAXHandlerPtr oldsax
= NULL
;
13189 xmlParserErrors ret
= XML_ERR_OK
;
13191 xmlCharEncoding enc
;
13193 if (((depth
> 40) &&
13194 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13196 return(XML_ERR_ENTITY_LOOP
);
13201 if ((URL
== NULL
) && (ID
== NULL
))
13202 return(XML_ERR_INTERNAL_ERROR
);
13204 return(XML_ERR_INTERNAL_ERROR
);
13207 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, oldctxt
);
13208 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13209 ctxt
->userData
= ctxt
;
13210 if (oldctxt
!= NULL
) {
13211 ctxt
->_private
= oldctxt
->_private
;
13212 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13213 ctxt
->validate
= oldctxt
->validate
;
13214 ctxt
->external
= oldctxt
->external
;
13215 ctxt
->record_info
= oldctxt
->record_info
;
13216 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
13217 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
13218 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
13221 * Doing validity checking on chunk without context
13222 * doesn't make sense
13224 ctxt
->_private
= NULL
;
13225 ctxt
->validate
= 0;
13226 ctxt
->external
= 2;
13227 ctxt
->loadsubset
= 0;
13230 oldsax
= ctxt
->sax
;
13232 if (user_data
!= NULL
)
13233 ctxt
->userData
= user_data
;
13235 xmlDetectSAX2(ctxt
);
13236 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13237 if (newDoc
== NULL
) {
13238 ctxt
->node_seq
.maximum
= 0;
13239 ctxt
->node_seq
.length
= 0;
13240 ctxt
->node_seq
.buffer
= NULL
;
13241 xmlFreeParserCtxt(ctxt
);
13242 return(XML_ERR_INTERNAL_ERROR
);
13244 newDoc
->properties
= XML_DOC_INTERNAL
;
13245 newDoc
->intSubset
= doc
->intSubset
;
13246 newDoc
->extSubset
= doc
->extSubset
;
13247 newDoc
->dict
= doc
->dict
;
13248 xmlDictReference(newDoc
->dict
);
13250 if (doc
->URL
!= NULL
) {
13251 newDoc
->URL
= xmlStrdup(doc
->URL
);
13253 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13254 if (newRoot
== NULL
) {
13256 ctxt
->sax
= oldsax
;
13257 ctxt
->node_seq
.maximum
= 0;
13258 ctxt
->node_seq
.length
= 0;
13259 ctxt
->node_seq
.buffer
= NULL
;
13260 xmlFreeParserCtxt(ctxt
);
13261 newDoc
->intSubset
= NULL
;
13262 newDoc
->extSubset
= NULL
;
13263 xmlFreeDoc(newDoc
);
13264 return(XML_ERR_INTERNAL_ERROR
);
13266 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13267 nodePush(ctxt
, newDoc
->children
);
13269 newRoot
->doc
= doc
;
13272 * Get the 4 first bytes and decode the charset
13273 * if enc != XML_CHAR_ENCODING_NONE
13274 * plug some encoding conversion routines.
13277 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
13282 enc
= xmlDetectCharEncoding(start
, 4);
13283 if (enc
!= XML_CHAR_ENCODING_NONE
) {
13284 xmlSwitchEncoding(ctxt
, enc
);
13289 * Parse a possible text declaration first
13291 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13292 xmlParseTextDecl(ctxt
);
13295 ctxt
->instate
= XML_PARSER_CONTENT
;
13296 ctxt
->depth
= depth
;
13298 xmlParseContent(ctxt
);
13300 if ((RAW
== '<') && (NXT(1) == '/')) {
13301 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13302 } else if (RAW
!= 0) {
13303 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13305 if (ctxt
->node
!= newDoc
->children
) {
13306 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13309 if (!ctxt
->wellFormed
) {
13310 if (ctxt
->errNo
== 0)
13311 ret
= XML_ERR_INTERNAL_ERROR
;
13313 ret
= (xmlParserErrors
)ctxt
->errNo
;
13315 if (list
!= NULL
) {
13319 * Return the newly created nodeset after unlinking it from
13320 * they pseudo parent.
13322 cur
= newDoc
->children
->children
;
13324 while (cur
!= NULL
) {
13325 cur
->parent
= NULL
;
13328 newDoc
->children
->children
= NULL
;
13334 * Record in the parent context the number of entities replacement
13335 * done when parsing that reference.
13337 if (oldctxt
!= NULL
)
13338 oldctxt
->nbentities
+= ctxt
->nbentities
;
13341 * Also record the size of the entity parsed
13343 if (ctxt
->input
!= NULL
) {
13344 oldctxt
->sizeentities
+= ctxt
->input
->consumed
;
13345 oldctxt
->sizeentities
+= (ctxt
->input
->cur
- ctxt
->input
->base
);
13348 * And record the last error if any
13350 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
13351 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13354 ctxt
->sax
= oldsax
;
13355 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
13356 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
13357 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
13358 ctxt
->node_seq
.maximum
= 0;
13359 ctxt
->node_seq
.length
= 0;
13360 ctxt
->node_seq
.buffer
= NULL
;
13361 xmlFreeParserCtxt(ctxt
);
13362 newDoc
->intSubset
= NULL
;
13363 newDoc
->extSubset
= NULL
;
13364 xmlFreeDoc(newDoc
);
13369 #ifdef LIBXML_SAX1_ENABLED
13371 * xmlParseExternalEntity:
13372 * @doc: the document the chunk pertains to
13373 * @sax: the SAX handler bloc (possibly NULL)
13374 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13375 * @depth: Used for loop detection, use 0
13376 * @URL: the URL for the entity to load
13377 * @ID: the System ID for the entity to load
13378 * @lst: the return value for the set of parsed nodes
13380 * Parse an external general entity
13381 * An external general parsed entity is well-formed if it matches the
13382 * production labeled extParsedEnt.
13384 * [78] extParsedEnt ::= TextDecl? content
13386 * Returns 0 if the entity is well formed, -1 in case of args problem and
13387 * the parser error code otherwise
13391 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
13392 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
13393 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
13398 * xmlParseBalancedChunkMemory:
13399 * @doc: the document the chunk pertains to
13400 * @sax: the SAX handler bloc (possibly NULL)
13401 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13402 * @depth: Used for loop detection, use 0
13403 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13404 * @lst: the return value for the set of parsed nodes
13406 * Parse a well-balanced chunk of an XML document
13407 * called by the parser
13408 * The allowed sequence for the Well Balanced Chunk is the one defined by
13409 * the content production in the XML grammar:
13411 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13413 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13414 * the parser error code otherwise
13418 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13419 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
13420 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
13421 depth
, string
, lst
, 0 );
13423 #endif /* LIBXML_SAX1_ENABLED */
13426 * xmlParseBalancedChunkMemoryInternal:
13427 * @oldctxt: the existing parsing context
13428 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13429 * @user_data: the user data field for the parser context
13430 * @lst: the return value for the set of parsed nodes
13433 * Parse a well-balanced chunk of an XML document
13434 * called by the parser
13435 * The allowed sequence for the Well Balanced Chunk is the one defined by
13436 * the content production in the XML grammar:
13438 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13440 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13441 * error code otherwise
13443 * In case recover is set to 1, the nodelist will not be empty even if
13444 * the parsed chunk is not well balanced.
13446 static xmlParserErrors
13447 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
13448 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
13449 xmlParserCtxtPtr ctxt
;
13450 xmlDocPtr newDoc
= NULL
;
13451 xmlNodePtr newRoot
;
13452 xmlSAXHandlerPtr oldsax
= NULL
;
13453 xmlNodePtr content
= NULL
;
13454 xmlNodePtr last
= NULL
;
13456 xmlParserErrors ret
= XML_ERR_OK
;
13461 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13462 (oldctxt
->depth
> 1024)) {
13463 return(XML_ERR_ENTITY_LOOP
);
13469 if (string
== NULL
)
13470 return(XML_ERR_INTERNAL_ERROR
);
13472 size
= xmlStrlen(string
);
13474 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13475 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13476 if (user_data
!= NULL
)
13477 ctxt
->userData
= user_data
;
13479 ctxt
->userData
= ctxt
;
13480 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
13481 ctxt
->dict
= oldctxt
->dict
;
13482 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13483 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13484 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13487 /* propagate namespaces down the entity */
13488 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
13489 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
13493 oldsax
= ctxt
->sax
;
13494 ctxt
->sax
= oldctxt
->sax
;
13495 xmlDetectSAX2(ctxt
);
13496 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
13497 ctxt
->options
= oldctxt
->options
;
13499 ctxt
->_private
= oldctxt
->_private
;
13500 if (oldctxt
->myDoc
== NULL
) {
13501 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13502 if (newDoc
== NULL
) {
13503 ctxt
->sax
= oldsax
;
13505 xmlFreeParserCtxt(ctxt
);
13506 return(XML_ERR_INTERNAL_ERROR
);
13508 newDoc
->properties
= XML_DOC_INTERNAL
;
13509 newDoc
->dict
= ctxt
->dict
;
13510 xmlDictReference(newDoc
->dict
);
13511 ctxt
->myDoc
= newDoc
;
13513 ctxt
->myDoc
= oldctxt
->myDoc
;
13514 content
= ctxt
->myDoc
->children
;
13515 last
= ctxt
->myDoc
->last
;
13517 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13518 if (newRoot
== NULL
) {
13519 ctxt
->sax
= oldsax
;
13521 xmlFreeParserCtxt(ctxt
);
13522 if (newDoc
!= NULL
) {
13523 xmlFreeDoc(newDoc
);
13525 return(XML_ERR_INTERNAL_ERROR
);
13527 ctxt
->myDoc
->children
= NULL
;
13528 ctxt
->myDoc
->last
= NULL
;
13529 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
13530 nodePush(ctxt
, ctxt
->myDoc
->children
);
13531 ctxt
->instate
= XML_PARSER_CONTENT
;
13532 ctxt
->depth
= oldctxt
->depth
+ 1;
13534 ctxt
->validate
= 0;
13535 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13536 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
13538 * ID/IDREF registration will be done in xmlValidateElement below
13540 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13542 ctxt
->dictNames
= oldctxt
->dictNames
;
13543 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13544 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13546 xmlParseContent(ctxt
);
13547 if ((RAW
== '<') && (NXT(1) == '/')) {
13548 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13549 } else if (RAW
!= 0) {
13550 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13552 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
13553 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13556 if (!ctxt
->wellFormed
) {
13557 if (ctxt
->errNo
== 0)
13558 ret
= XML_ERR_INTERNAL_ERROR
;
13560 ret
= (xmlParserErrors
)ctxt
->errNo
;
13565 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
13569 * Return the newly created nodeset after unlinking it from
13570 * they pseudo parent.
13572 cur
= ctxt
->myDoc
->children
->children
;
13574 while (cur
!= NULL
) {
13575 #ifdef LIBXML_VALID_ENABLED
13576 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
13577 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
13578 (cur
->type
== XML_ELEMENT_NODE
)) {
13579 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
13580 oldctxt
->myDoc
, cur
);
13582 #endif /* LIBXML_VALID_ENABLED */
13583 cur
->parent
= NULL
;
13586 ctxt
->myDoc
->children
->children
= NULL
;
13588 if (ctxt
->myDoc
!= NULL
) {
13589 xmlFreeNode(ctxt
->myDoc
->children
);
13590 ctxt
->myDoc
->children
= content
;
13591 ctxt
->myDoc
->last
= last
;
13595 * Record in the parent context the number of entities replacement
13596 * done when parsing that reference.
13598 if (oldctxt
!= NULL
)
13599 oldctxt
->nbentities
+= ctxt
->nbentities
;
13602 * Also record the last error if any
13604 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
13605 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13607 ctxt
->sax
= oldsax
;
13609 ctxt
->attsDefault
= NULL
;
13610 ctxt
->attsSpecial
= NULL
;
13611 xmlFreeParserCtxt(ctxt
);
13612 if (newDoc
!= NULL
) {
13613 xmlFreeDoc(newDoc
);
13620 * xmlParseInNodeContext:
13621 * @node: the context node
13622 * @data: the input string
13623 * @datalen: the input string length in bytes
13624 * @options: a combination of xmlParserOption
13625 * @lst: the return value for the set of parsed nodes
13627 * Parse a well-balanced chunk of an XML document
13628 * within the context (DTD, namespaces, etc ...) of the given node.
13630 * The allowed sequence for the data is a Well Balanced Chunk defined by
13631 * the content production in the XML grammar:
13633 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13635 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13636 * error code otherwise
13639 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
13640 int options
, xmlNodePtr
*lst
) {
13642 xmlParserCtxtPtr ctxt
;
13643 xmlDocPtr doc
= NULL
;
13644 xmlNodePtr fake
, cur
;
13647 xmlParserErrors ret
= XML_ERR_OK
;
13650 * check all input parameters, grab the document
13652 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13653 return(XML_ERR_INTERNAL_ERROR
);
13654 switch (node
->type
) {
13655 case XML_ELEMENT_NODE
:
13656 case XML_ATTRIBUTE_NODE
:
13657 case XML_TEXT_NODE
:
13658 case XML_CDATA_SECTION_NODE
:
13659 case XML_ENTITY_REF_NODE
:
13661 case XML_COMMENT_NODE
:
13662 case XML_DOCUMENT_NODE
:
13663 case XML_HTML_DOCUMENT_NODE
:
13666 return(XML_ERR_INTERNAL_ERROR
);
13669 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13670 (node
->type
!= XML_DOCUMENT_NODE
) &&
13671 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13672 node
= node
->parent
;
13674 return(XML_ERR_INTERNAL_ERROR
);
13675 if (node
->type
== XML_ELEMENT_NODE
)
13678 doc
= (xmlDocPtr
) node
;
13680 return(XML_ERR_INTERNAL_ERROR
);
13683 * allocate a context and set-up everything not related to the
13684 * node position in the tree
13686 if (doc
->type
== XML_DOCUMENT_NODE
)
13687 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13688 #ifdef LIBXML_HTML_ENABLED
13689 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13690 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13692 * When parsing in context, it makes no sense to add implied
13693 * elements like html/body/etc...
13695 options
|= HTML_PARSE_NOIMPLIED
;
13699 return(XML_ERR_INTERNAL_ERROR
);
13702 return(XML_ERR_NO_MEMORY
);
13705 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13706 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13707 * we must wait until the last moment to free the original one.
13709 if (doc
->dict
!= NULL
) {
13710 if (ctxt
->dict
!= NULL
)
13711 xmlDictFree(ctxt
->dict
);
13712 ctxt
->dict
= doc
->dict
;
13714 options
|= XML_PARSE_NODICT
;
13716 if (doc
->encoding
!= NULL
) {
13717 xmlCharEncodingHandlerPtr hdlr
;
13719 if (ctxt
->encoding
!= NULL
)
13720 xmlFree((xmlChar
*) ctxt
->encoding
);
13721 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13723 hdlr
= xmlFindCharEncodingHandler((const char *) doc
->encoding
);
13724 if (hdlr
!= NULL
) {
13725 xmlSwitchToEncoding(ctxt
, hdlr
);
13727 return(XML_ERR_UNSUPPORTED_ENCODING
);
13731 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13732 xmlDetectSAX2(ctxt
);
13734 /* parsing in context, i.e. as within existing content */
13735 ctxt
->instate
= XML_PARSER_CONTENT
;
13737 fake
= xmlNewComment(NULL
);
13738 if (fake
== NULL
) {
13739 xmlFreeParserCtxt(ctxt
);
13740 return(XML_ERR_NO_MEMORY
);
13742 xmlAddChild(node
, fake
);
13744 if (node
->type
== XML_ELEMENT_NODE
) {
13745 nodePush(ctxt
, node
);
13747 * initialize the SAX2 namespaces stack
13750 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13751 xmlNsPtr ns
= cur
->nsDef
;
13752 const xmlChar
*iprefix
, *ihref
;
13754 while (ns
!= NULL
) {
13756 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13757 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13759 iprefix
= ns
->prefix
;
13763 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13764 nsPush(ctxt
, iprefix
, ihref
);
13773 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13775 * ID/IDREF registration will be done in xmlValidateElement below
13777 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13780 #ifdef LIBXML_HTML_ENABLED
13781 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13782 __htmlParseContent(ctxt
);
13785 xmlParseContent(ctxt
);
13788 if ((RAW
== '<') && (NXT(1) == '/')) {
13789 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13790 } else if (RAW
!= 0) {
13791 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13793 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13794 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13795 ctxt
->wellFormed
= 0;
13798 if (!ctxt
->wellFormed
) {
13799 if (ctxt
->errNo
== 0)
13800 ret
= XML_ERR_INTERNAL_ERROR
;
13802 ret
= (xmlParserErrors
)ctxt
->errNo
;
13808 * Return the newly created nodeset after unlinking it from
13809 * the pseudo sibling.
13822 while (cur
!= NULL
) {
13823 cur
->parent
= NULL
;
13827 xmlUnlinkNode(fake
);
13831 if (ret
!= XML_ERR_OK
) {
13832 xmlFreeNodeList(*lst
);
13836 if (doc
->dict
!= NULL
)
13838 xmlFreeParserCtxt(ctxt
);
13842 return(XML_ERR_INTERNAL_ERROR
);
13846 #ifdef LIBXML_SAX1_ENABLED
13848 * xmlParseBalancedChunkMemoryRecover:
13849 * @doc: the document the chunk pertains to
13850 * @sax: the SAX handler bloc (possibly NULL)
13851 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13852 * @depth: Used for loop detection, use 0
13853 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13854 * @lst: the return value for the set of parsed nodes
13855 * @recover: return nodes even if the data is broken (use 0)
13858 * Parse a well-balanced chunk of an XML document
13859 * called by the parser
13860 * The allowed sequence for the Well Balanced Chunk is the one defined by
13861 * the content production in the XML grammar:
13863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13865 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13866 * the parser error code otherwise
13868 * In case recover is set to 1, the nodelist will not be empty even if
13869 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13873 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13874 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13876 xmlParserCtxtPtr ctxt
;
13878 xmlSAXHandlerPtr oldsax
= NULL
;
13879 xmlNodePtr content
, newRoot
;
13884 return(XML_ERR_ENTITY_LOOP
);
13890 if (string
== NULL
)
13893 size
= xmlStrlen(string
);
13895 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13896 if (ctxt
== NULL
) return(-1);
13897 ctxt
->userData
= ctxt
;
13899 oldsax
= ctxt
->sax
;
13901 if (user_data
!= NULL
)
13902 ctxt
->userData
= user_data
;
13904 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13905 if (newDoc
== NULL
) {
13906 xmlFreeParserCtxt(ctxt
);
13909 newDoc
->properties
= XML_DOC_INTERNAL
;
13910 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13911 xmlDictFree(ctxt
->dict
);
13912 ctxt
->dict
= doc
->dict
;
13913 xmlDictReference(ctxt
->dict
);
13914 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13915 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13916 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13917 ctxt
->dictNames
= 1;
13919 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13922 newDoc
->intSubset
= doc
->intSubset
;
13923 newDoc
->extSubset
= doc
->extSubset
;
13925 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13926 if (newRoot
== NULL
) {
13928 ctxt
->sax
= oldsax
;
13929 xmlFreeParserCtxt(ctxt
);
13930 newDoc
->intSubset
= NULL
;
13931 newDoc
->extSubset
= NULL
;
13932 xmlFreeDoc(newDoc
);
13935 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13936 nodePush(ctxt
, newRoot
);
13938 ctxt
->myDoc
= newDoc
;
13940 ctxt
->myDoc
= newDoc
;
13941 newDoc
->children
->doc
= doc
;
13942 /* Ensure that doc has XML spec namespace */
13943 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13944 newDoc
->oldNs
= doc
->oldNs
;
13946 ctxt
->instate
= XML_PARSER_CONTENT
;
13947 ctxt
->depth
= depth
;
13950 * Doing validity checking on chunk doesn't make sense
13952 ctxt
->validate
= 0;
13953 ctxt
->loadsubset
= 0;
13954 xmlDetectSAX2(ctxt
);
13956 if ( doc
!= NULL
){
13957 content
= doc
->children
;
13958 doc
->children
= NULL
;
13959 xmlParseContent(ctxt
);
13960 doc
->children
= content
;
13963 xmlParseContent(ctxt
);
13965 if ((RAW
== '<') && (NXT(1) == '/')) {
13966 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13967 } else if (RAW
!= 0) {
13968 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13970 if (ctxt
->node
!= newDoc
->children
) {
13971 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13974 if (!ctxt
->wellFormed
) {
13975 if (ctxt
->errNo
== 0)
13983 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13987 * Return the newly created nodeset after unlinking it from
13988 * they pseudo parent.
13990 cur
= newDoc
->children
->children
;
13992 while (cur
!= NULL
) {
13993 xmlSetTreeDoc(cur
, doc
);
13994 cur
->parent
= NULL
;
13997 newDoc
->children
->children
= NULL
;
14001 ctxt
->sax
= oldsax
;
14002 xmlFreeParserCtxt(ctxt
);
14003 newDoc
->intSubset
= NULL
;
14004 newDoc
->extSubset
= NULL
;
14005 newDoc
->oldNs
= NULL
;
14006 xmlFreeDoc(newDoc
);
14012 * xmlSAXParseEntity:
14013 * @sax: the SAX handler block
14014 * @filename: the filename
14016 * parse an XML external entity out of context and build a tree.
14017 * It use the given SAX function block to handle the parsing callback.
14018 * If sax is NULL, fallback to the default DOM tree building routines.
14020 * [78] extParsedEnt ::= TextDecl? content
14022 * This correspond to a "Well Balanced" chunk
14024 * Returns the resulting document tree
14028 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
14030 xmlParserCtxtPtr ctxt
;
14032 ctxt
= xmlCreateFileParserCtxt(filename
);
14033 if (ctxt
== NULL
) {
14037 if (ctxt
->sax
!= NULL
)
14038 xmlFree(ctxt
->sax
);
14040 ctxt
->userData
= NULL
;
14043 xmlParseExtParsedEnt(ctxt
);
14045 if (ctxt
->wellFormed
)
14049 xmlFreeDoc(ctxt
->myDoc
);
14050 ctxt
->myDoc
= NULL
;
14054 xmlFreeParserCtxt(ctxt
);
14061 * @filename: the filename
14063 * parse an XML external entity out of context and build a tree.
14065 * [78] extParsedEnt ::= TextDecl? content
14067 * This correspond to a "Well Balanced" chunk
14069 * Returns the resulting document tree
14073 xmlParseEntity(const char *filename
) {
14074 return(xmlSAXParseEntity(NULL
, filename
));
14076 #endif /* LIBXML_SAX1_ENABLED */
14079 * xmlCreateEntityParserCtxtInternal:
14080 * @URL: the entity URL
14081 * @ID: the entity PUBLIC ID
14082 * @base: a possible base for the target URI
14083 * @pctx: parser context used to set options on new context
14085 * Create a parser context for an external entity
14086 * Automatic support for ZLIB/Compress compressed document is provided
14087 * by default if found at compile-time.
14089 * Returns the new parser context or NULL
14091 static xmlParserCtxtPtr
14092 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
14093 const xmlChar
*base
, xmlParserCtxtPtr pctx
) {
14094 xmlParserCtxtPtr ctxt
;
14095 xmlParserInputPtr inputStream
;
14096 char *directory
= NULL
;
14099 ctxt
= xmlNewParserCtxt();
14100 if (ctxt
== NULL
) {
14104 if (pctx
!= NULL
) {
14105 ctxt
->options
= pctx
->options
;
14106 ctxt
->_private
= pctx
->_private
;
14109 uri
= xmlBuildURI(URL
, base
);
14112 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
14113 if (inputStream
== NULL
) {
14114 xmlFreeParserCtxt(ctxt
);
14118 inputPush(ctxt
, inputStream
);
14120 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14121 directory
= xmlParserGetDirectory((char *)URL
);
14122 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14123 ctxt
->directory
= directory
;
14125 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
14126 if (inputStream
== NULL
) {
14128 xmlFreeParserCtxt(ctxt
);
14132 inputPush(ctxt
, inputStream
);
14134 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14135 directory
= xmlParserGetDirectory((char *)uri
);
14136 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14137 ctxt
->directory
= directory
;
14144 * xmlCreateEntityParserCtxt:
14145 * @URL: the entity URL
14146 * @ID: the entity PUBLIC ID
14147 * @base: a possible base for the target URI
14149 * Create a parser context for an external entity
14150 * Automatic support for ZLIB/Compress compressed document is provided
14151 * by default if found at compile-time.
14153 * Returns the new parser context or NULL
14156 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
14157 const xmlChar
*base
) {
14158 return xmlCreateEntityParserCtxtInternal(URL
, ID
, base
, NULL
);
14162 /************************************************************************
14164 * Front ends when parsing from a file *
14166 ************************************************************************/
14169 * xmlCreateURLParserCtxt:
14170 * @filename: the filename or URL
14171 * @options: a combination of xmlParserOption
14173 * Create a parser context for a file or URL content.
14174 * Automatic support for ZLIB/Compress compressed document is provided
14175 * by default if found at compile-time and for file accesses
14177 * Returns the new parser context or NULL
14180 xmlCreateURLParserCtxt(const char *filename
, int options
)
14182 xmlParserCtxtPtr ctxt
;
14183 xmlParserInputPtr inputStream
;
14184 char *directory
= NULL
;
14186 ctxt
= xmlNewParserCtxt();
14187 if (ctxt
== NULL
) {
14188 xmlErrMemory(NULL
, "cannot allocate parser context");
14193 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
14194 ctxt
->linenumbers
= 1;
14196 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
14197 if (inputStream
== NULL
) {
14198 xmlFreeParserCtxt(ctxt
);
14202 inputPush(ctxt
, inputStream
);
14203 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14204 directory
= xmlParserGetDirectory(filename
);
14205 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14206 ctxt
->directory
= directory
;
14212 * xmlCreateFileParserCtxt:
14213 * @filename: the filename
14215 * Create a parser context for a file content.
14216 * Automatic support for ZLIB/Compress compressed document is provided
14217 * by default if found at compile-time.
14219 * Returns the new parser context or NULL
14222 xmlCreateFileParserCtxt(const char *filename
)
14224 return(xmlCreateURLParserCtxt(filename
, 0));
14227 #ifdef LIBXML_SAX1_ENABLED
14229 * xmlSAXParseFileWithData:
14230 * @sax: the SAX handler block
14231 * @filename: the filename
14232 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14234 * @data: the userdata
14236 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14237 * compressed document is provided by default if found at compile-time.
14238 * It use the given SAX function block to handle the parsing callback.
14239 * If sax is NULL, fallback to the default DOM tree building routines.
14241 * User data (void *) is stored within the parser context in the
14242 * context's _private member, so it is available nearly everywhere in libxml
14244 * Returns the resulting document tree
14248 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
14249 int recovery
, void *data
) {
14251 xmlParserCtxtPtr ctxt
;
14255 ctxt
= xmlCreateFileParserCtxt(filename
);
14256 if (ctxt
== NULL
) {
14260 if (ctxt
->sax
!= NULL
)
14261 xmlFree(ctxt
->sax
);
14264 xmlDetectSAX2(ctxt
);
14266 ctxt
->_private
= data
;
14269 if (ctxt
->directory
== NULL
)
14270 ctxt
->directory
= xmlParserGetDirectory(filename
);
14272 ctxt
->recovery
= recovery
;
14274 xmlParseDocument(ctxt
);
14276 if ((ctxt
->wellFormed
) || recovery
) {
14279 if (ctxt
->input
->buf
->compressed
> 0)
14280 ret
->compression
= 9;
14282 ret
->compression
= ctxt
->input
->buf
->compressed
;
14287 xmlFreeDoc(ctxt
->myDoc
);
14288 ctxt
->myDoc
= NULL
;
14292 xmlFreeParserCtxt(ctxt
);
14299 * @sax: the SAX handler block
14300 * @filename: the filename
14301 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14304 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14305 * compressed document is provided by default if found at compile-time.
14306 * It use the given SAX function block to handle the parsing callback.
14307 * If sax is NULL, fallback to the default DOM tree building routines.
14309 * Returns the resulting document tree
14313 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
14315 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
14320 * @cur: a pointer to an array of xmlChar
14322 * parse an XML in-memory document and build a tree.
14323 * In the case the document is not Well Formed, a attempt to build a
14324 * tree is tried anyway
14326 * Returns the resulting document tree or NULL in case of failure
14330 xmlRecoverDoc(const xmlChar
*cur
) {
14331 return(xmlSAXParseDoc(NULL
, cur
, 1));
14336 * @filename: the filename
14338 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14339 * compressed document is provided by default if found at compile-time.
14341 * Returns the resulting document tree if the file was wellformed,
14346 xmlParseFile(const char *filename
) {
14347 return(xmlSAXParseFile(NULL
, filename
, 0));
14352 * @filename: the filename
14354 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14355 * compressed document is provided by default if found at compile-time.
14356 * In the case the document is not Well Formed, it attempts to build
14359 * Returns the resulting document tree or NULL in case of failure
14363 xmlRecoverFile(const char *filename
) {
14364 return(xmlSAXParseFile(NULL
, filename
, 1));
14369 * xmlSetupParserForBuffer:
14370 * @ctxt: an XML parser context
14371 * @buffer: a xmlChar * buffer
14372 * @filename: a file name
14374 * Setup the parser context to parse a new buffer; Clears any prior
14375 * contents from the parser context. The buffer parameter must not be
14376 * NULL, but the filename parameter can be
14379 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
14380 const char* filename
)
14382 xmlParserInputPtr input
;
14384 if ((ctxt
== NULL
) || (buffer
== NULL
))
14387 input
= xmlNewInputStream(ctxt
);
14388 if (input
== NULL
) {
14389 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
14390 xmlClearParserCtxt(ctxt
);
14394 xmlClearParserCtxt(ctxt
);
14395 if (filename
!= NULL
)
14396 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
14397 input
->base
= buffer
;
14398 input
->cur
= buffer
;
14399 input
->end
= &buffer
[xmlStrlen(buffer
)];
14400 inputPush(ctxt
, input
);
14404 * xmlSAXUserParseFile:
14405 * @sax: a SAX handler
14406 * @user_data: The user data returned on SAX callbacks
14407 * @filename: a file name
14409 * parse an XML file and call the given SAX handler routines.
14410 * Automatic support for ZLIB/Compress compressed document is provided
14412 * Returns 0 in case of success or a error number otherwise
14415 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
14416 const char *filename
) {
14418 xmlParserCtxtPtr ctxt
;
14420 ctxt
= xmlCreateFileParserCtxt(filename
);
14421 if (ctxt
== NULL
) return -1;
14422 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14423 xmlFree(ctxt
->sax
);
14425 xmlDetectSAX2(ctxt
);
14427 if (user_data
!= NULL
)
14428 ctxt
->userData
= user_data
;
14430 xmlParseDocument(ctxt
);
14432 if (ctxt
->wellFormed
)
14435 if (ctxt
->errNo
!= 0)
14442 if (ctxt
->myDoc
!= NULL
) {
14443 xmlFreeDoc(ctxt
->myDoc
);
14444 ctxt
->myDoc
= NULL
;
14446 xmlFreeParserCtxt(ctxt
);
14450 #endif /* LIBXML_SAX1_ENABLED */
14452 /************************************************************************
14454 * Front ends when parsing from memory *
14456 ************************************************************************/
14459 * xmlCreateMemoryParserCtxt:
14460 * @buffer: a pointer to a char array
14461 * @size: the size of the array
14463 * Create a parser context for an XML in-memory document.
14465 * Returns the new parser context or NULL
14468 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
14469 xmlParserCtxtPtr ctxt
;
14470 xmlParserInputPtr input
;
14471 xmlParserInputBufferPtr buf
;
14473 if (buffer
== NULL
)
14478 ctxt
= xmlNewParserCtxt();
14482 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14483 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
14485 xmlFreeParserCtxt(ctxt
);
14489 input
= xmlNewInputStream(ctxt
);
14490 if (input
== NULL
) {
14491 xmlFreeParserInputBuffer(buf
);
14492 xmlFreeParserCtxt(ctxt
);
14496 input
->filename
= NULL
;
14498 xmlBufResetInput(input
->buf
->buffer
, input
);
14500 inputPush(ctxt
, input
);
14504 #ifdef LIBXML_SAX1_ENABLED
14506 * xmlSAXParseMemoryWithData:
14507 * @sax: the SAX handler block
14508 * @buffer: an pointer to a char array
14509 * @size: the size of the array
14510 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14512 * @data: the userdata
14514 * parse an XML in-memory block and use the given SAX function block
14515 * to handle the parsing callback. If sax is NULL, fallback to the default
14516 * DOM tree building routines.
14518 * User data (void *) is stored within the parser context in the
14519 * context's _private member, so it is available nearly everywhere in libxml
14521 * Returns the resulting document tree
14525 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
14526 int size
, int recovery
, void *data
) {
14528 xmlParserCtxtPtr ctxt
;
14532 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14533 if (ctxt
== NULL
) return(NULL
);
14535 if (ctxt
->sax
!= NULL
)
14536 xmlFree(ctxt
->sax
);
14539 xmlDetectSAX2(ctxt
);
14541 ctxt
->_private
=data
;
14544 ctxt
->recovery
= recovery
;
14546 xmlParseDocument(ctxt
);
14548 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14551 xmlFreeDoc(ctxt
->myDoc
);
14552 ctxt
->myDoc
= NULL
;
14556 xmlFreeParserCtxt(ctxt
);
14562 * xmlSAXParseMemory:
14563 * @sax: the SAX handler block
14564 * @buffer: an pointer to a char array
14565 * @size: the size of the array
14566 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14569 * parse an XML in-memory block and use the given SAX function block
14570 * to handle the parsing callback. If sax is NULL, fallback to the default
14571 * DOM tree building routines.
14573 * Returns the resulting document tree
14576 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
14577 int size
, int recovery
) {
14578 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
14583 * @buffer: an pointer to a char array
14584 * @size: the size of the array
14586 * parse an XML in-memory block and build a tree.
14588 * Returns the resulting document tree
14591 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
14592 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
14596 * xmlRecoverMemory:
14597 * @buffer: an pointer to a char array
14598 * @size: the size of the array
14600 * parse an XML in-memory block and build a tree.
14601 * In the case the document is not Well Formed, an attempt to
14602 * build a tree is tried anyway
14604 * Returns the resulting document tree or NULL in case of error
14607 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
14608 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
14612 * xmlSAXUserParseMemory:
14613 * @sax: a SAX handler
14614 * @user_data: The user data returned on SAX callbacks
14615 * @buffer: an in-memory XML document input
14616 * @size: the length of the XML document in bytes
14618 * A better SAX parsing routine.
14619 * parse an XML in-memory buffer and call the given SAX handler routines.
14621 * Returns 0 in case of success or a error number otherwise
14623 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
14624 const char *buffer
, int size
) {
14626 xmlParserCtxtPtr ctxt
;
14630 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14631 if (ctxt
== NULL
) return -1;
14632 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14633 xmlFree(ctxt
->sax
);
14635 xmlDetectSAX2(ctxt
);
14637 if (user_data
!= NULL
)
14638 ctxt
->userData
= user_data
;
14640 xmlParseDocument(ctxt
);
14642 if (ctxt
->wellFormed
)
14645 if (ctxt
->errNo
!= 0)
14652 if (ctxt
->myDoc
!= NULL
) {
14653 xmlFreeDoc(ctxt
->myDoc
);
14654 ctxt
->myDoc
= NULL
;
14656 xmlFreeParserCtxt(ctxt
);
14660 #endif /* LIBXML_SAX1_ENABLED */
14663 * xmlCreateDocParserCtxt:
14664 * @cur: a pointer to an array of xmlChar
14666 * Creates a parser context for an XML in-memory document.
14668 * Returns the new parser context or NULL
14671 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14676 len
= xmlStrlen(cur
);
14677 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14680 #ifdef LIBXML_SAX1_ENABLED
14683 * @sax: the SAX handler block
14684 * @cur: a pointer to an array of xmlChar
14685 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14688 * parse an XML in-memory document and build a tree.
14689 * It use the given SAX function block to handle the parsing callback.
14690 * If sax is NULL, fallback to the default DOM tree building routines.
14692 * Returns the resulting document tree
14696 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14698 xmlParserCtxtPtr ctxt
;
14699 xmlSAXHandlerPtr oldsax
= NULL
;
14701 if (cur
== NULL
) return(NULL
);
14704 ctxt
= xmlCreateDocParserCtxt(cur
);
14705 if (ctxt
== NULL
) return(NULL
);
14707 oldsax
= ctxt
->sax
;
14709 ctxt
->userData
= NULL
;
14711 xmlDetectSAX2(ctxt
);
14713 xmlParseDocument(ctxt
);
14714 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14717 xmlFreeDoc(ctxt
->myDoc
);
14718 ctxt
->myDoc
= NULL
;
14721 ctxt
->sax
= oldsax
;
14722 xmlFreeParserCtxt(ctxt
);
14729 * @cur: a pointer to an array of xmlChar
14731 * parse an XML in-memory document and build a tree.
14733 * Returns the resulting document tree
14737 xmlParseDoc(const xmlChar
*cur
) {
14738 return(xmlSAXParseDoc(NULL
, cur
, 0));
14740 #endif /* LIBXML_SAX1_ENABLED */
14742 #ifdef LIBXML_LEGACY_ENABLED
14743 /************************************************************************
14745 * Specific function to keep track of entities references *
14746 * and used by the XSLT debugger *
14748 ************************************************************************/
14750 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14753 * xmlAddEntityReference:
14754 * @ent : A valid entity
14755 * @firstNode : A valid first node for children of entity
14756 * @lastNode : A valid last node of children entity
14758 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14761 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14762 xmlNodePtr lastNode
)
14764 if (xmlEntityRefFunc
!= NULL
) {
14765 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14771 * xmlSetEntityReferenceFunc:
14772 * @func: A valid function
14774 * Set the function to call call back when a xml reference has been made
14777 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14779 xmlEntityRefFunc
= func
;
14781 #endif /* LIBXML_LEGACY_ENABLED */
14783 /************************************************************************
14787 ************************************************************************/
14789 #ifdef LIBXML_XPATH_ENABLED
14790 #include <libxml/xpath.h>
14793 extern void XMLCDECL
xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
14794 static int xmlParserInitialized
= 0;
14799 * Initialization function for the XML parser.
14800 * This is not reentrant. Call once before processing in case of
14801 * use in multithreaded programs.
14805 xmlInitParser(void) {
14806 if (xmlParserInitialized
!= 0)
14809 #ifdef LIBXML_THREAD_ENABLED
14810 __xmlGlobalInitMutexLock();
14811 if (xmlParserInitialized
== 0) {
14815 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
14816 (xmlGenericError
== NULL
))
14817 initGenericErrorDefaultFunc(NULL
);
14819 xmlInitializeDict();
14820 xmlInitCharEncodingHandlers();
14821 xmlDefaultSAXHandlerInit();
14822 xmlRegisterDefaultInputCallbacks();
14823 #ifdef LIBXML_OUTPUT_ENABLED
14824 xmlRegisterDefaultOutputCallbacks();
14825 #endif /* LIBXML_OUTPUT_ENABLED */
14826 #ifdef LIBXML_HTML_ENABLED
14827 htmlInitAutoClose();
14828 htmlDefaultSAXHandlerInit();
14830 #ifdef LIBXML_XPATH_ENABLED
14833 #ifdef LIBXML_CATALOG_ENABLED
14834 xmlInitializeCatalog();
14836 xmlParserInitialized
= 1;
14837 #ifdef LIBXML_THREAD_ENABLED
14839 __xmlGlobalInitMutexUnlock();
14844 * xmlCleanupParser:
14846 * This function name is somewhat misleading. It does not clean up
14847 * parser state, it cleans up memory allocated by the library itself.
14848 * It is a cleanup function for the XML library. It tries to reclaim all
14849 * related global memory allocated for the library processing.
14850 * It doesn't deallocate any document related memory. One should
14851 * call xmlCleanupParser() only when the process has finished using
14852 * the library and all XML/HTML documents built with it.
14853 * See also xmlInitParser() which has the opposite function of preparing
14854 * the library for operations.
14856 * WARNING: if your application is multithreaded or has plugin support
14857 * calling this may crash the application if another thread or
14858 * a plugin is still using libxml2. It's sometimes very hard to
14859 * guess if libxml2 is in use in the application, some libraries
14860 * or plugins may use it without notice. In case of doubt abstain
14861 * from calling this function or do it just before calling exit()
14862 * to avoid leak reports from valgrind !
14866 xmlCleanupParser(void) {
14867 if (!xmlParserInitialized
)
14870 xmlCleanupCharEncodingHandlers();
14871 #ifdef LIBXML_CATALOG_ENABLED
14872 xmlCatalogCleanup();
14875 xmlCleanupInputCallbacks();
14876 #ifdef LIBXML_OUTPUT_ENABLED
14877 xmlCleanupOutputCallbacks();
14879 #ifdef LIBXML_SCHEMAS_ENABLED
14880 xmlSchemaCleanupTypes();
14881 xmlRelaxNGCleanupTypes();
14883 xmlResetLastError();
14884 xmlCleanupGlobals();
14885 xmlCleanupThreads(); /* must be last if called not from the main thread */
14886 xmlCleanupMemory();
14887 xmlParserInitialized
= 0;
14890 /************************************************************************
14892 * New set (2.6.0) of simpler and more flexible APIs *
14894 ************************************************************************/
14900 * Free a string if it is not owned by the "dict" dictionnary in the
14903 #define DICT_FREE(str) \
14904 if ((str) && ((!dict) || \
14905 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14906 xmlFree((char *)(str));
14910 * @ctxt: an XML parser context
14912 * Reset a parser context
14915 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14917 xmlParserInputPtr input
;
14925 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14926 xmlFreeInputStream(input
);
14929 ctxt
->input
= NULL
;
14932 if (ctxt
->spaceTab
!= NULL
) {
14933 ctxt
->spaceTab
[0] = -1;
14934 ctxt
->space
= &ctxt
->spaceTab
[0];
14936 ctxt
->space
= NULL
;
14946 DICT_FREE(ctxt
->version
);
14947 ctxt
->version
= NULL
;
14948 DICT_FREE(ctxt
->encoding
);
14949 ctxt
->encoding
= NULL
;
14950 DICT_FREE(ctxt
->directory
);
14951 ctxt
->directory
= NULL
;
14952 DICT_FREE(ctxt
->extSubURI
);
14953 ctxt
->extSubURI
= NULL
;
14954 DICT_FREE(ctxt
->extSubSystem
);
14955 ctxt
->extSubSystem
= NULL
;
14956 if (ctxt
->myDoc
!= NULL
)
14957 xmlFreeDoc(ctxt
->myDoc
);
14958 ctxt
->myDoc
= NULL
;
14960 ctxt
->standalone
= -1;
14961 ctxt
->hasExternalSubset
= 0;
14962 ctxt
->hasPErefs
= 0;
14964 ctxt
->external
= 0;
14965 ctxt
->instate
= XML_PARSER_START
;
14968 ctxt
->wellFormed
= 1;
14969 ctxt
->nsWellFormed
= 1;
14970 ctxt
->disableSAX
= 0;
14973 ctxt
->vctxt
.userData
= ctxt
;
14974 ctxt
->vctxt
.error
= xmlParserValidityError
;
14975 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14977 ctxt
->record_info
= 0;
14979 ctxt
->checkIndex
= 0;
14980 ctxt
->inSubset
= 0;
14981 ctxt
->errNo
= XML_ERR_OK
;
14983 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14984 ctxt
->catalogs
= NULL
;
14985 ctxt
->nbentities
= 0;
14986 ctxt
->sizeentities
= 0;
14987 ctxt
->sizeentcopy
= 0;
14988 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14990 if (ctxt
->attsDefault
!= NULL
) {
14991 xmlHashFree(ctxt
->attsDefault
, (xmlHashDeallocator
) xmlFree
);
14992 ctxt
->attsDefault
= NULL
;
14994 if (ctxt
->attsSpecial
!= NULL
) {
14995 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14996 ctxt
->attsSpecial
= NULL
;
14999 #ifdef LIBXML_CATALOG_ENABLED
15000 if (ctxt
->catalogs
!= NULL
)
15001 xmlCatalogFreeLocal(ctxt
->catalogs
);
15003 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
15004 xmlResetError(&ctxt
->lastError
);
15008 * xmlCtxtResetPush:
15009 * @ctxt: an XML parser context
15010 * @chunk: a pointer to an array of chars
15011 * @size: number of chars in the array
15012 * @filename: an optional file name or URI
15013 * @encoding: the document encoding, or NULL
15015 * Reset a push parser context
15017 * Returns 0 in case of success and 1 in case of error
15020 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
15021 int size
, const char *filename
, const char *encoding
)
15023 xmlParserInputPtr inputStream
;
15024 xmlParserInputBufferPtr buf
;
15025 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
15030 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
15031 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
15033 buf
= xmlAllocParserInputBuffer(enc
);
15037 if (ctxt
== NULL
) {
15038 xmlFreeParserInputBuffer(buf
);
15042 xmlCtxtReset(ctxt
);
15044 if (ctxt
->pushTab
== NULL
) {
15045 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 *
15046 sizeof(xmlChar
*));
15047 if (ctxt
->pushTab
== NULL
) {
15048 xmlErrMemory(ctxt
, NULL
);
15049 xmlFreeParserInputBuffer(buf
);
15054 if (filename
== NULL
) {
15055 ctxt
->directory
= NULL
;
15057 ctxt
->directory
= xmlParserGetDirectory(filename
);
15060 inputStream
= xmlNewInputStream(ctxt
);
15061 if (inputStream
== NULL
) {
15062 xmlFreeParserInputBuffer(buf
);
15066 if (filename
== NULL
)
15067 inputStream
->filename
= NULL
;
15069 inputStream
->filename
= (char *)
15070 xmlCanonicPath((const xmlChar
*) filename
);
15071 inputStream
->buf
= buf
;
15072 xmlBufResetInput(buf
->buffer
, inputStream
);
15074 inputPush(ctxt
, inputStream
);
15076 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
15077 (ctxt
->input
->buf
!= NULL
)) {
15078 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
15079 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
15081 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
15083 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
15085 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
15089 if (encoding
!= NULL
) {
15090 xmlCharEncodingHandlerPtr hdlr
;
15092 if (ctxt
->encoding
!= NULL
)
15093 xmlFree((xmlChar
*) ctxt
->encoding
);
15094 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
15096 hdlr
= xmlFindCharEncodingHandler(encoding
);
15097 if (hdlr
!= NULL
) {
15098 xmlSwitchToEncoding(ctxt
, hdlr
);
15100 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
15101 "Unsupported encoding %s\n", BAD_CAST encoding
);
15103 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
15104 xmlSwitchEncoding(ctxt
, enc
);
15112 * xmlCtxtUseOptionsInternal:
15113 * @ctxt: an XML parser context
15114 * @options: a combination of xmlParserOption
15115 * @encoding: the user provided encoding to use
15117 * Applies the options to the parser context
15119 * Returns 0 in case of success, the set of unknown or unimplemented options
15120 * in case of error.
15123 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
15127 if (encoding
!= NULL
) {
15128 if (ctxt
->encoding
!= NULL
)
15129 xmlFree((xmlChar
*) ctxt
->encoding
);
15130 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
15132 if (options
& XML_PARSE_RECOVER
) {
15133 ctxt
->recovery
= 1;
15134 options
-= XML_PARSE_RECOVER
;
15135 ctxt
->options
|= XML_PARSE_RECOVER
;
15137 ctxt
->recovery
= 0;
15138 if (options
& XML_PARSE_DTDLOAD
) {
15139 ctxt
->loadsubset
= XML_DETECT_IDS
;
15140 options
-= XML_PARSE_DTDLOAD
;
15141 ctxt
->options
|= XML_PARSE_DTDLOAD
;
15143 ctxt
->loadsubset
= 0;
15144 if (options
& XML_PARSE_DTDATTR
) {
15145 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
15146 options
-= XML_PARSE_DTDATTR
;
15147 ctxt
->options
|= XML_PARSE_DTDATTR
;
15149 if (options
& XML_PARSE_NOENT
) {
15150 ctxt
->replaceEntities
= 1;
15151 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15152 options
-= XML_PARSE_NOENT
;
15153 ctxt
->options
|= XML_PARSE_NOENT
;
15155 ctxt
->replaceEntities
= 0;
15156 if (options
& XML_PARSE_PEDANTIC
) {
15157 ctxt
->pedantic
= 1;
15158 options
-= XML_PARSE_PEDANTIC
;
15159 ctxt
->options
|= XML_PARSE_PEDANTIC
;
15161 ctxt
->pedantic
= 0;
15162 if (options
& XML_PARSE_NOBLANKS
) {
15163 ctxt
->keepBlanks
= 0;
15164 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
15165 options
-= XML_PARSE_NOBLANKS
;
15166 ctxt
->options
|= XML_PARSE_NOBLANKS
;
15168 ctxt
->keepBlanks
= 1;
15169 if (options
& XML_PARSE_DTDVALID
) {
15170 ctxt
->validate
= 1;
15171 if (options
& XML_PARSE_NOWARNING
)
15172 ctxt
->vctxt
.warning
= NULL
;
15173 if (options
& XML_PARSE_NOERROR
)
15174 ctxt
->vctxt
.error
= NULL
;
15175 options
-= XML_PARSE_DTDVALID
;
15176 ctxt
->options
|= XML_PARSE_DTDVALID
;
15178 ctxt
->validate
= 0;
15179 if (options
& XML_PARSE_NOWARNING
) {
15180 ctxt
->sax
->warning
= NULL
;
15181 options
-= XML_PARSE_NOWARNING
;
15183 if (options
& XML_PARSE_NOERROR
) {
15184 ctxt
->sax
->error
= NULL
;
15185 ctxt
->sax
->fatalError
= NULL
;
15186 options
-= XML_PARSE_NOERROR
;
15188 #ifdef LIBXML_SAX1_ENABLED
15189 if (options
& XML_PARSE_SAX1
) {
15190 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
15191 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
15192 ctxt
->sax
->startElementNs
= NULL
;
15193 ctxt
->sax
->endElementNs
= NULL
;
15194 ctxt
->sax
->initialized
= 1;
15195 options
-= XML_PARSE_SAX1
;
15196 ctxt
->options
|= XML_PARSE_SAX1
;
15198 #endif /* LIBXML_SAX1_ENABLED */
15199 if (options
& XML_PARSE_NODICT
) {
15200 ctxt
->dictNames
= 0;
15201 options
-= XML_PARSE_NODICT
;
15202 ctxt
->options
|= XML_PARSE_NODICT
;
15204 ctxt
->dictNames
= 1;
15206 if (options
& XML_PARSE_NOCDATA
) {
15207 ctxt
->sax
->cdataBlock
= NULL
;
15208 options
-= XML_PARSE_NOCDATA
;
15209 ctxt
->options
|= XML_PARSE_NOCDATA
;
15211 if (options
& XML_PARSE_NSCLEAN
) {
15212 ctxt
->options
|= XML_PARSE_NSCLEAN
;
15213 options
-= XML_PARSE_NSCLEAN
;
15215 if (options
& XML_PARSE_NONET
) {
15216 ctxt
->options
|= XML_PARSE_NONET
;
15217 options
-= XML_PARSE_NONET
;
15219 if (options
& XML_PARSE_COMPACT
) {
15220 ctxt
->options
|= XML_PARSE_COMPACT
;
15221 options
-= XML_PARSE_COMPACT
;
15223 if (options
& XML_PARSE_OLD10
) {
15224 ctxt
->options
|= XML_PARSE_OLD10
;
15225 options
-= XML_PARSE_OLD10
;
15227 if (options
& XML_PARSE_NOBASEFIX
) {
15228 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
15229 options
-= XML_PARSE_NOBASEFIX
;
15231 if (options
& XML_PARSE_HUGE
) {
15232 ctxt
->options
|= XML_PARSE_HUGE
;
15233 options
-= XML_PARSE_HUGE
;
15234 if (ctxt
->dict
!= NULL
)
15235 xmlDictSetLimit(ctxt
->dict
, 0);
15237 if (options
& XML_PARSE_OLDSAX
) {
15238 ctxt
->options
|= XML_PARSE_OLDSAX
;
15239 options
-= XML_PARSE_OLDSAX
;
15241 if (options
& XML_PARSE_IGNORE_ENC
) {
15242 ctxt
->options
|= XML_PARSE_IGNORE_ENC
;
15243 options
-= XML_PARSE_IGNORE_ENC
;
15245 if (options
& XML_PARSE_BIG_LINES
) {
15246 ctxt
->options
|= XML_PARSE_BIG_LINES
;
15247 options
-= XML_PARSE_BIG_LINES
;
15249 ctxt
->linenumbers
= 1;
15254 * xmlCtxtUseOptions:
15255 * @ctxt: an XML parser context
15256 * @options: a combination of xmlParserOption
15258 * Applies the options to the parser context
15260 * Returns 0 in case of success, the set of unknown or unimplemented options
15261 * in case of error.
15264 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
15266 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
15271 * @ctxt: an XML parser context
15272 * @URL: the base URL to use for the document
15273 * @encoding: the document encoding, or NULL
15274 * @options: a combination of xmlParserOption
15275 * @reuse: keep the context for reuse
15277 * Common front-end for the xmlRead functions
15279 * Returns the resulting document tree or NULL
15282 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
15283 int options
, int reuse
)
15287 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
15288 if (encoding
!= NULL
) {
15289 xmlCharEncodingHandlerPtr hdlr
;
15291 hdlr
= xmlFindCharEncodingHandler(encoding
);
15293 xmlSwitchToEncoding(ctxt
, hdlr
);
15295 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
15296 (ctxt
->input
->filename
== NULL
))
15297 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
15298 xmlParseDocument(ctxt
);
15299 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
15303 if (ctxt
->myDoc
!= NULL
) {
15304 xmlFreeDoc(ctxt
->myDoc
);
15307 ctxt
->myDoc
= NULL
;
15309 xmlFreeParserCtxt(ctxt
);
15317 * @cur: a pointer to a zero terminated string
15318 * @URL: the base URL to use for the document
15319 * @encoding: the document encoding, or NULL
15320 * @options: a combination of xmlParserOption
15322 * parse an XML in-memory document and build a tree.
15324 * Returns the resulting document tree
15327 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
15329 xmlParserCtxtPtr ctxt
;
15335 ctxt
= xmlCreateDocParserCtxt(cur
);
15338 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15343 * @filename: a file or URL
15344 * @encoding: the document encoding, or NULL
15345 * @options: a combination of xmlParserOption
15347 * parse an XML file from the filesystem or the network.
15349 * Returns the resulting document tree
15352 xmlReadFile(const char *filename
, const char *encoding
, int options
)
15354 xmlParserCtxtPtr ctxt
;
15357 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
15360 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
15365 * @buffer: a pointer to a char array
15366 * @size: the size of the array
15367 * @URL: the base URL to use for the document
15368 * @encoding: the document encoding, or NULL
15369 * @options: a combination of xmlParserOption
15371 * parse an XML in-memory document and build a tree.
15373 * Returns the resulting document tree
15376 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
15378 xmlParserCtxtPtr ctxt
;
15381 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
15384 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15389 * @fd: an open file descriptor
15390 * @URL: the base URL to use for the document
15391 * @encoding: the document encoding, or NULL
15392 * @options: a combination of xmlParserOption
15394 * parse an XML from a file descriptor and build a tree.
15395 * NOTE that the file descriptor will not be closed when the
15396 * reader is closed or reset.
15398 * Returns the resulting document tree
15401 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
15403 xmlParserCtxtPtr ctxt
;
15404 xmlParserInputBufferPtr input
;
15405 xmlParserInputPtr stream
;
15411 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15414 input
->closecallback
= NULL
;
15415 ctxt
= xmlNewParserCtxt();
15416 if (ctxt
== NULL
) {
15417 xmlFreeParserInputBuffer(input
);
15420 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15421 if (stream
== NULL
) {
15422 xmlFreeParserInputBuffer(input
);
15423 xmlFreeParserCtxt(ctxt
);
15426 inputPush(ctxt
, stream
);
15427 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15432 * @ioread: an I/O read function
15433 * @ioclose: an I/O close function
15434 * @ioctx: an I/O handler
15435 * @URL: the base URL to use for the document
15436 * @encoding: the document encoding, or NULL
15437 * @options: a combination of xmlParserOption
15439 * parse an XML document from I/O functions and source and build a tree.
15441 * Returns the resulting document tree
15444 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
15445 void *ioctx
, const char *URL
, const char *encoding
, int options
)
15447 xmlParserCtxtPtr ctxt
;
15448 xmlParserInputBufferPtr input
;
15449 xmlParserInputPtr stream
;
15451 if (ioread
== NULL
)
15455 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15456 XML_CHAR_ENCODING_NONE
);
15457 if (input
== NULL
) {
15458 if (ioclose
!= NULL
)
15462 ctxt
= xmlNewParserCtxt();
15463 if (ctxt
== NULL
) {
15464 xmlFreeParserInputBuffer(input
);
15467 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15468 if (stream
== NULL
) {
15469 xmlFreeParserInputBuffer(input
);
15470 xmlFreeParserCtxt(ctxt
);
15473 inputPush(ctxt
, stream
);
15474 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15479 * @ctxt: an XML parser context
15480 * @cur: a pointer to a zero terminated string
15481 * @URL: the base URL to use for the document
15482 * @encoding: the document encoding, or NULL
15483 * @options: a combination of xmlParserOption
15485 * parse an XML in-memory document and build a tree.
15486 * This reuses the existing @ctxt parser context
15488 * Returns the resulting document tree
15491 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
15492 const char *URL
, const char *encoding
, int options
)
15494 xmlParserInputPtr stream
;
15502 xmlCtxtReset(ctxt
);
15504 stream
= xmlNewStringInputStream(ctxt
, cur
);
15505 if (stream
== NULL
) {
15508 inputPush(ctxt
, stream
);
15509 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15514 * @ctxt: an XML parser context
15515 * @filename: a file or URL
15516 * @encoding: the document encoding, or NULL
15517 * @options: a combination of xmlParserOption
15519 * parse an XML file from the filesystem or the network.
15520 * This reuses the existing @ctxt parser context
15522 * Returns the resulting document tree
15525 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
15526 const char *encoding
, int options
)
15528 xmlParserInputPtr stream
;
15530 if (filename
== NULL
)
15536 xmlCtxtReset(ctxt
);
15538 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
15539 if (stream
== NULL
) {
15542 inputPush(ctxt
, stream
);
15543 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
15547 * xmlCtxtReadMemory:
15548 * @ctxt: an XML parser context
15549 * @buffer: a pointer to a char array
15550 * @size: the size of the array
15551 * @URL: the base URL to use for the document
15552 * @encoding: the document encoding, or NULL
15553 * @options: a combination of xmlParserOption
15555 * parse an XML in-memory document and build a tree.
15556 * This reuses the existing @ctxt parser context
15558 * Returns the resulting document tree
15561 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
15562 const char *URL
, const char *encoding
, int options
)
15564 xmlParserInputBufferPtr input
;
15565 xmlParserInputPtr stream
;
15569 if (buffer
== NULL
)
15573 xmlCtxtReset(ctxt
);
15575 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
15576 if (input
== NULL
) {
15580 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15581 if (stream
== NULL
) {
15582 xmlFreeParserInputBuffer(input
);
15586 inputPush(ctxt
, stream
);
15587 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15592 * @ctxt: an XML parser context
15593 * @fd: an open file descriptor
15594 * @URL: the base URL to use for the document
15595 * @encoding: the document encoding, or NULL
15596 * @options: a combination of xmlParserOption
15598 * parse an XML from a file descriptor and build a tree.
15599 * This reuses the existing @ctxt parser context
15600 * NOTE that the file descriptor will not be closed when the
15601 * reader is closed or reset.
15603 * Returns the resulting document tree
15606 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
15607 const char *URL
, const char *encoding
, int options
)
15609 xmlParserInputBufferPtr input
;
15610 xmlParserInputPtr stream
;
15618 xmlCtxtReset(ctxt
);
15621 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15624 input
->closecallback
= NULL
;
15625 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15626 if (stream
== NULL
) {
15627 xmlFreeParserInputBuffer(input
);
15630 inputPush(ctxt
, stream
);
15631 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15636 * @ctxt: an XML parser context
15637 * @ioread: an I/O read function
15638 * @ioclose: an I/O close function
15639 * @ioctx: an I/O handler
15640 * @URL: the base URL to use for the document
15641 * @encoding: the document encoding, or NULL
15642 * @options: a combination of xmlParserOption
15644 * parse an XML document from I/O functions and source and build a tree.
15645 * This reuses the existing @ctxt parser context
15647 * Returns the resulting document tree
15650 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
15651 xmlInputCloseCallback ioclose
, void *ioctx
,
15653 const char *encoding
, int options
)
15655 xmlParserInputBufferPtr input
;
15656 xmlParserInputPtr stream
;
15658 if (ioread
== NULL
)
15664 xmlCtxtReset(ctxt
);
15666 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15667 XML_CHAR_ENCODING_NONE
);
15668 if (input
== NULL
) {
15669 if (ioclose
!= NULL
)
15673 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15674 if (stream
== NULL
) {
15675 xmlFreeParserInputBuffer(input
);
15678 inputPush(ctxt
, stream
);
15679 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15682 #define bottom_parser
15683 #include "elfgcchack.h"