2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/HTMLparser.h>
61 #include <libxml/valid.h>
62 #include <libxml/entities.h>
63 #include <libxml/xmlerror.h>
64 #include <libxml/encoding.h>
65 #include <libxml/xmlIO.h>
66 #include <libxml/uri.h>
67 #ifdef LIBXML_CATALOG_ENABLED
68 #include <libxml/catalog.h>
70 #ifdef LIBXML_SCHEMAS_ENABLED
71 #include <libxml/xmlschemastypes.h>
72 #include <libxml/relaxng.h>
74 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75 #include <libxml/xpath.h>
78 #include "private/buf.h"
79 #include "private/dict.h"
80 #include "private/enc.h"
81 #include "private/entities.h"
82 #include "private/error.h"
83 #include "private/globals.h"
84 #include "private/html.h"
85 #include "private/io.h"
86 #include "private/memory.h"
87 #include "private/parser.h"
88 #include "private/threads.h"
89 #include "private/xpath.h"
92 const xmlChar
*prefix
;
98 static xmlParserCtxtPtr
99 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax
, void *userData
,
100 const xmlChar
*URL
, const xmlChar
*ID
, const xmlChar
*base
,
101 xmlParserCtxtPtr pctx
);
104 xmlParseElementStart(xmlParserCtxtPtr ctxt
);
107 xmlParseElementEnd(xmlParserCtxtPtr ctxt
);
109 /************************************************************************
111 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
113 ************************************************************************/
115 #define XML_PARSER_BIG_ENTITY 1000
116 #define XML_PARSER_LOT_ENTITY 5000
119 * Constants for protection against abusive entity expansion
120 * ("billion laughs").
124 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125 * of serialized output after entity expansion.
127 #define XML_PARSER_NON_LINEAR 5
130 * A certain amount is always allowed.
132 #define XML_PARSER_ALLOWED_EXPANSION 1000000
135 * Fixed cost for each entity reference. This crudely models processing time
136 * as well to protect, for example, against exponential expansion of empty
137 * or very short entities.
139 #define XML_ENT_FIXED_COST 20
144 * arbitrary depth limit for the XML documents that we allow to
145 * process. This is not a limitation of the parser but a safety
146 * boundary feature. It can be disabled with the XML_PARSE_HUGE
149 unsigned int xmlParserMaxDepth
= 256;
154 #define XML_PARSER_BIG_BUFFER_SIZE 300
155 #define XML_PARSER_BUFFER_SIZE 100
156 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
159 * XML_PARSER_CHUNK_SIZE
161 * When calling GROW that's the minimal amount of data
162 * the parser expected to have received. It is not a hard
163 * limit but an optimization when reading strings like Names
164 * It is not strictly needed as long as inputs available characters
165 * are followed by 0, which should be provided by the I/O level
167 #define XML_PARSER_CHUNK_SIZE 100
170 * List of XML prefixed PI allowed by W3C specs
173 static const char* const xmlW3CPIs
[] = {
180 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
182 const xmlChar
**str
);
184 static xmlParserErrors
185 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
186 xmlSAXHandlerPtr sax
,
187 void *user_data
, int depth
, const xmlChar
*URL
,
188 const xmlChar
*ID
, xmlNodePtr
*list
);
191 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
192 const char *encoding
);
193 #ifdef LIBXML_LEGACY_ENABLED
195 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
196 xmlNodePtr lastNode
);
197 #endif /* LIBXML_LEGACY_ENABLED */
199 static xmlParserErrors
200 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
201 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
204 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
206 /************************************************************************
208 * Some factorized error routines *
210 ************************************************************************/
213 * xmlErrAttributeDup:
214 * @ctxt: an XML parser context
215 * @prefix: the attribute prefix
216 * @localname: the attribute localname
218 * Handle a redefinition of attribute error
221 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
222 const xmlChar
* localname
)
224 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
225 (ctxt
->instate
== XML_PARSER_EOF
))
228 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
231 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
232 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
233 (const char *) localname
, NULL
, NULL
, 0, 0,
234 "Attribute %s redefined\n", localname
);
236 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
237 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
238 (const char *) prefix
, (const char *) localname
,
239 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
242 ctxt
->wellFormed
= 0;
243 if (ctxt
->recovery
== 0)
244 ctxt
->disableSAX
= 1;
250 * @ctxt: an XML parser context
251 * @error: the error number
252 * @extra: extra information string
254 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
257 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
261 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
262 (ctxt
->instate
== XML_PARSER_EOF
))
265 case XML_ERR_INVALID_HEX_CHARREF
:
266 errmsg
= "CharRef: invalid hexadecimal value";
268 case XML_ERR_INVALID_DEC_CHARREF
:
269 errmsg
= "CharRef: invalid decimal value";
271 case XML_ERR_INVALID_CHARREF
:
272 errmsg
= "CharRef: invalid value";
274 case XML_ERR_INTERNAL_ERROR
:
275 errmsg
= "internal error";
277 case XML_ERR_PEREF_AT_EOF
:
278 errmsg
= "PEReference at end of document";
280 case XML_ERR_PEREF_IN_PROLOG
:
281 errmsg
= "PEReference in prolog";
283 case XML_ERR_PEREF_IN_EPILOG
:
284 errmsg
= "PEReference in epilog";
286 case XML_ERR_PEREF_NO_NAME
:
287 errmsg
= "PEReference: no name";
289 case XML_ERR_PEREF_SEMICOL_MISSING
:
290 errmsg
= "PEReference: expecting ';'";
292 case XML_ERR_ENTITY_LOOP
:
293 errmsg
= "Detected an entity reference loop";
295 case XML_ERR_ENTITY_NOT_STARTED
:
296 errmsg
= "EntityValue: \" or ' expected";
298 case XML_ERR_ENTITY_PE_INTERNAL
:
299 errmsg
= "PEReferences forbidden in internal subset";
301 case XML_ERR_ENTITY_NOT_FINISHED
:
302 errmsg
= "EntityValue: \" or ' expected";
304 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
305 errmsg
= "AttValue: \" or ' expected";
307 case XML_ERR_LT_IN_ATTRIBUTE
:
308 errmsg
= "Unescaped '<' not allowed in attributes values";
310 case XML_ERR_LITERAL_NOT_STARTED
:
311 errmsg
= "SystemLiteral \" or ' expected";
313 case XML_ERR_LITERAL_NOT_FINISHED
:
314 errmsg
= "Unfinished System or Public ID \" or ' expected";
316 case XML_ERR_MISPLACED_CDATA_END
:
317 errmsg
= "Sequence ']]>' not allowed in content";
319 case XML_ERR_URI_REQUIRED
:
320 errmsg
= "SYSTEM or PUBLIC, the URI is missing";
322 case XML_ERR_PUBID_REQUIRED
:
323 errmsg
= "PUBLIC, the Public Identifier is missing";
325 case XML_ERR_HYPHEN_IN_COMMENT
:
326 errmsg
= "Comment must not contain '--' (double-hyphen)";
328 case XML_ERR_PI_NOT_STARTED
:
329 errmsg
= "xmlParsePI : no target name";
331 case XML_ERR_RESERVED_XML_NAME
:
332 errmsg
= "Invalid PI name";
334 case XML_ERR_NOTATION_NOT_STARTED
:
335 errmsg
= "NOTATION: Name expected here";
337 case XML_ERR_NOTATION_NOT_FINISHED
:
338 errmsg
= "'>' required to close NOTATION declaration";
340 case XML_ERR_VALUE_REQUIRED
:
341 errmsg
= "Entity value required";
343 case XML_ERR_URI_FRAGMENT
:
344 errmsg
= "Fragment not allowed";
346 case XML_ERR_ATTLIST_NOT_STARTED
:
347 errmsg
= "'(' required to start ATTLIST enumeration";
349 case XML_ERR_NMTOKEN_REQUIRED
:
350 errmsg
= "NmToken expected in ATTLIST enumeration";
352 case XML_ERR_ATTLIST_NOT_FINISHED
:
353 errmsg
= "')' required to finish ATTLIST enumeration";
355 case XML_ERR_MIXED_NOT_STARTED
:
356 errmsg
= "MixedContentDecl : '|' or ')*' expected";
358 case XML_ERR_PCDATA_REQUIRED
:
359 errmsg
= "MixedContentDecl : '#PCDATA' expected";
361 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
362 errmsg
= "ContentDecl : Name or '(' expected";
364 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
365 errmsg
= "ContentDecl : ',' '|' or ')' expected";
367 case XML_ERR_PEREF_IN_INT_SUBSET
:
369 "PEReference: forbidden within markup decl in internal subset";
371 case XML_ERR_GT_REQUIRED
:
372 errmsg
= "expected '>'";
374 case XML_ERR_CONDSEC_INVALID
:
375 errmsg
= "XML conditional section '[' expected";
377 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
378 errmsg
= "Content error in the external subset";
380 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
382 "conditional section INCLUDE or IGNORE keyword expected";
384 case XML_ERR_CONDSEC_NOT_FINISHED
:
385 errmsg
= "XML conditional section not closed";
387 case XML_ERR_XMLDECL_NOT_STARTED
:
388 errmsg
= "Text declaration '<?xml' required";
390 case XML_ERR_XMLDECL_NOT_FINISHED
:
391 errmsg
= "parsing XML declaration: '?>' expected";
393 case XML_ERR_EXT_ENTITY_STANDALONE
:
394 errmsg
= "external parsed entities cannot be standalone";
396 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
397 errmsg
= "EntityRef: expecting ';'";
399 case XML_ERR_DOCTYPE_NOT_FINISHED
:
400 errmsg
= "DOCTYPE improperly terminated";
402 case XML_ERR_LTSLASH_REQUIRED
:
403 errmsg
= "EndTag: '</' not found";
405 case XML_ERR_EQUAL_REQUIRED
:
406 errmsg
= "expected '='";
408 case XML_ERR_STRING_NOT_CLOSED
:
409 errmsg
= "String not closed expecting \" or '";
411 case XML_ERR_STRING_NOT_STARTED
:
412 errmsg
= "String not started expecting ' or \"";
414 case XML_ERR_ENCODING_NAME
:
415 errmsg
= "Invalid XML encoding name";
417 case XML_ERR_STANDALONE_VALUE
:
418 errmsg
= "standalone accepts only 'yes' or 'no'";
420 case XML_ERR_DOCUMENT_EMPTY
:
421 errmsg
= "Document is empty";
423 case XML_ERR_DOCUMENT_END
:
424 errmsg
= "Extra content at the end of the document";
426 case XML_ERR_NOT_WELL_BALANCED
:
427 errmsg
= "chunk is not well balanced";
429 case XML_ERR_EXTRA_CONTENT
:
430 errmsg
= "extra content at the end of well balanced chunk";
432 case XML_ERR_VERSION_MISSING
:
433 errmsg
= "Malformed declaration expecting version";
435 case XML_ERR_NAME_TOO_LONG
:
436 errmsg
= "Name too long";
444 errmsg
= "Unregistered error message";
449 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
450 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s\n",
453 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
454 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s: %s\n",
458 ctxt
->wellFormed
= 0;
459 if (ctxt
->recovery
== 0)
460 ctxt
->disableSAX
= 1;
466 * @ctxt: an XML parser context
467 * @error: the error number
468 * @msg: the error message
470 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
472 static void LIBXML_ATTR_FORMAT(3,0)
473 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
476 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
477 (ctxt
->instate
== XML_PARSER_EOF
))
481 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
482 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
484 ctxt
->wellFormed
= 0;
485 if (ctxt
->recovery
== 0)
486 ctxt
->disableSAX
= 1;
492 * @ctxt: an XML parser context
493 * @error: the error number
494 * @msg: the error message
500 static void LIBXML_ATTR_FORMAT(3,0)
501 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
502 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
504 xmlStructuredErrorFunc schannel
= NULL
;
506 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
507 (ctxt
->instate
== XML_PARSER_EOF
))
509 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
510 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
511 schannel
= ctxt
->sax
->serror
;
513 __xmlRaiseError(schannel
,
514 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
516 ctxt
, NULL
, XML_FROM_PARSER
, error
,
517 XML_ERR_WARNING
, NULL
, 0,
518 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
519 msg
, (const char *) str1
, (const char *) str2
);
521 __xmlRaiseError(schannel
, NULL
, NULL
,
522 ctxt
, NULL
, XML_FROM_PARSER
, error
,
523 XML_ERR_WARNING
, NULL
, 0,
524 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
525 msg
, (const char *) str1
, (const char *) str2
);
531 * @ctxt: an XML parser context
532 * @error: the error number
533 * @msg: the error message
536 * Handle a validity error.
538 static void LIBXML_ATTR_FORMAT(3,0)
539 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
540 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
542 xmlStructuredErrorFunc schannel
= NULL
;
544 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
545 (ctxt
->instate
== XML_PARSER_EOF
))
549 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
550 schannel
= ctxt
->sax
->serror
;
553 __xmlRaiseError(schannel
,
554 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
555 ctxt
, NULL
, XML_FROM_DTD
, error
,
556 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
557 (const char *) str2
, NULL
, 0, 0,
558 msg
, (const char *) str1
, (const char *) str2
);
561 __xmlRaiseError(schannel
, NULL
, NULL
,
562 ctxt
, NULL
, XML_FROM_DTD
, error
,
563 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
564 (const char *) str2
, NULL
, 0, 0,
565 msg
, (const char *) str1
, (const char *) str2
);
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
578 static void LIBXML_ATTR_FORMAT(3,0)
579 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
580 const char *msg
, int val
)
582 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
583 (ctxt
->instate
== XML_PARSER_EOF
))
587 __xmlRaiseError(NULL
, NULL
, NULL
,
588 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
589 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
591 ctxt
->wellFormed
= 0;
592 if (ctxt
->recovery
== 0)
593 ctxt
->disableSAX
= 1;
598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
608 static void LIBXML_ATTR_FORMAT(3,0)
609 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
610 const char *msg
, const xmlChar
*str1
, int val
,
613 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
614 (ctxt
->instate
== XML_PARSER_EOF
))
618 __xmlRaiseError(NULL
, NULL
, NULL
,
619 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
620 NULL
, 0, (const char *) str1
, (const char *) str2
,
621 NULL
, val
, 0, msg
, str1
, val
, str2
);
623 ctxt
->wellFormed
= 0;
624 if (ctxt
->recovery
== 0)
625 ctxt
->disableSAX
= 1;
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
638 static void LIBXML_ATTR_FORMAT(3,0)
639 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
640 const char *msg
, const xmlChar
* val
)
642 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
643 (ctxt
->instate
== XML_PARSER_EOF
))
647 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
648 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
649 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
652 ctxt
->wellFormed
= 0;
653 if (ctxt
->recovery
== 0)
654 ctxt
->disableSAX
= 1;
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
665 * Handle a non fatal parser error
667 static void LIBXML_ATTR_FORMAT(3,0)
668 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
669 const char *msg
, const xmlChar
* val
)
671 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
672 (ctxt
->instate
== XML_PARSER_EOF
))
676 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
677 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
678 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
684 * @ctxt: an XML parser context
685 * @error: the error number
687 * @info1: extra information string
688 * @info2: extra information string
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
692 static void LIBXML_ATTR_FORMAT(3,0)
693 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
695 const xmlChar
* info1
, const xmlChar
* info2
,
696 const xmlChar
* info3
)
698 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
699 (ctxt
->instate
== XML_PARSER_EOF
))
703 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
704 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
705 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
706 info1
, info2
, info3
);
708 ctxt
->nsWellFormed
= 0;
713 * @ctxt: an XML parser context
714 * @error: the error number
716 * @info1: extra information string
717 * @info2: extra information string
719 * Handle a namespace warning error
721 static void LIBXML_ATTR_FORMAT(3,0)
722 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
724 const xmlChar
* info1
, const xmlChar
* info2
,
725 const xmlChar
* info3
)
727 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
728 (ctxt
->instate
== XML_PARSER_EOF
))
730 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
731 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
732 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
733 info1
, info2
, info3
);
737 xmlSaturatedAdd(unsigned long *dst
, unsigned long val
) {
738 if (val
> ULONG_MAX
- *dst
)
745 xmlSaturatedAddSizeT(unsigned long *dst
, unsigned long val
) {
746 if (val
> ULONG_MAX
- *dst
)
753 * xmlParserEntityCheck:
754 * @ctxt: parser context
755 * @extra: sum of unexpanded entity sizes
757 * Check for non-linear entity expansion behaviour.
759 * In some cases like xmlStringDecodeEntities, this function is called
760 * for each, possibly nested entity and its unexpanded content length.
762 * In other cases like xmlParseReference, it's only called for each
763 * top-level entity with its unexpanded content length plus the sum of
764 * the unexpanded content lengths (plus fixed cost) of all nested
767 * Summing the unexpanded lengths also adds the length of the reference.
768 * This is by design. Taking the length of the entity name into account
769 * discourages attacks that try to waste CPU time with abusively long
770 * entity names. See test/recurse/lol6.xml for example. Each call also
771 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
774 * Returns 1 on error, 0 on success.
777 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, unsigned long extra
)
779 unsigned long consumed
;
780 xmlParserInputPtr input
= ctxt
->input
;
781 xmlEntityPtr entity
= input
->entity
;
784 * Compute total consumed bytes so far, including input streams of
787 consumed
= input
->parentConsumed
;
788 if ((entity
== NULL
) ||
789 ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
790 ((entity
->flags
& XML_ENT_PARSED
) == 0))) {
791 xmlSaturatedAdd(&consumed
, input
->consumed
);
792 xmlSaturatedAddSizeT(&consumed
, input
->cur
- input
->base
);
794 xmlSaturatedAdd(&consumed
, ctxt
->sizeentities
);
797 * Add extra cost and some fixed cost.
799 xmlSaturatedAdd(&ctxt
->sizeentcopy
, extra
);
800 xmlSaturatedAdd(&ctxt
->sizeentcopy
, XML_ENT_FIXED_COST
);
803 * It's important to always use saturation arithmetic when tracking
804 * entity sizes to make the size checks reliable. If "sizeentcopy"
805 * overflows, we have to abort.
807 if ((ctxt
->sizeentcopy
> XML_PARSER_ALLOWED_EXPANSION
) &&
808 ((ctxt
->sizeentcopy
>= ULONG_MAX
) ||
809 (ctxt
->sizeentcopy
/ XML_PARSER_NON_LINEAR
> consumed
))) {
810 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_LOOP
,
811 "Maximum entity amplification factor exceeded");
819 /************************************************************************
821 * Library wide options *
823 ************************************************************************/
827 * @feature: the feature to be examined
829 * Examines if the library has been compiled with a given feature.
831 * Returns a non-zero value if the feature exist, otherwise zero.
832 * Returns zero (0) if the feature does not exist or an unknown
833 * unknown feature is requested, non-zero otherwise.
836 xmlHasFeature(xmlFeature feature
)
839 case XML_WITH_THREAD
:
840 #ifdef LIBXML_THREAD_ENABLED
846 #ifdef LIBXML_TREE_ENABLED
851 case XML_WITH_OUTPUT
:
852 #ifdef LIBXML_OUTPUT_ENABLED
858 #ifdef LIBXML_PUSH_ENABLED
863 case XML_WITH_READER
:
864 #ifdef LIBXML_READER_ENABLED
869 case XML_WITH_PATTERN
:
870 #ifdef LIBXML_PATTERN_ENABLED
875 case XML_WITH_WRITER
:
876 #ifdef LIBXML_WRITER_ENABLED
882 #ifdef LIBXML_SAX1_ENABLED
888 #ifdef LIBXML_FTP_ENABLED
894 #ifdef LIBXML_HTTP_ENABLED
900 #ifdef LIBXML_VALID_ENABLED
906 #ifdef LIBXML_HTML_ENABLED
911 case XML_WITH_LEGACY
:
912 #ifdef LIBXML_LEGACY_ENABLED
918 #ifdef LIBXML_C14N_ENABLED
923 case XML_WITH_CATALOG
:
924 #ifdef LIBXML_CATALOG_ENABLED
930 #ifdef LIBXML_XPATH_ENABLED
936 #ifdef LIBXML_XPTR_ENABLED
941 case XML_WITH_XINCLUDE
:
942 #ifdef LIBXML_XINCLUDE_ENABLED
948 #ifdef LIBXML_ICONV_ENABLED
953 case XML_WITH_ISO8859X
:
954 #ifdef LIBXML_ISO8859X_ENABLED
959 case XML_WITH_UNICODE
:
960 #ifdef LIBXML_UNICODE_ENABLED
965 case XML_WITH_REGEXP
:
966 #ifdef LIBXML_REGEXP_ENABLED
971 case XML_WITH_AUTOMATA
:
972 #ifdef LIBXML_AUTOMATA_ENABLED
978 #ifdef LIBXML_EXPR_ENABLED
983 case XML_WITH_SCHEMAS
:
984 #ifdef LIBXML_SCHEMAS_ENABLED
989 case XML_WITH_SCHEMATRON
:
990 #ifdef LIBXML_SCHEMATRON_ENABLED
995 case XML_WITH_MODULES
:
996 #ifdef LIBXML_MODULES_ENABLED
1001 case XML_WITH_DEBUG
:
1002 #ifdef LIBXML_DEBUG_ENABLED
1007 case XML_WITH_DEBUG_MEM
:
1008 #ifdef DEBUG_MEMORY_LOCATION
1013 case XML_WITH_DEBUG_RUN
:
1016 #ifdef LIBXML_ZLIB_ENABLED
1022 #ifdef LIBXML_LZMA_ENABLED
1028 #ifdef LIBXML_ICU_ENABLED
1039 /************************************************************************
1041 * SAX2 defaulted attributes handling *
1043 ************************************************************************/
1047 * @ctxt: an XML parser context
1049 * Do the SAX2 detection and specific initialization
1052 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
1053 xmlSAXHandlerPtr sax
;
1055 /* Avoid unused variable warning if features are disabled. */
1058 if (ctxt
== NULL
) return;
1060 #ifdef LIBXML_SAX1_ENABLED
1061 if ((sax
) && (sax
->initialized
== XML_SAX2_MAGIC
) &&
1062 ((sax
->startElementNs
!= NULL
) ||
1063 (sax
->endElementNs
!= NULL
) ||
1064 ((sax
->startElement
== NULL
) && (sax
->endElement
== NULL
))))
1068 #endif /* LIBXML_SAX1_ENABLED */
1070 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
1071 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
1072 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
1073 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
1074 (ctxt
->str_xml_ns
== NULL
)) {
1075 xmlErrMemory(ctxt
, NULL
);
1079 typedef struct _xmlDefAttrs xmlDefAttrs
;
1080 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1081 struct _xmlDefAttrs
{
1082 int nbAttrs
; /* number of defaulted attributes on that element */
1083 int maxAttrs
; /* the size of the array */
1084 #if __STDC_VERSION__ >= 199901L
1085 /* Using a C99 flexible array member avoids UBSan errors. */
1086 const xmlChar
*values
[]; /* array of localname/prefix/values/external */
1088 const xmlChar
*values
[5];
1093 * xmlAttrNormalizeSpace:
1094 * @src: the source string
1095 * @dst: the target string
1097 * Normalize the space in non CDATA attribute values:
1098 * If the attribute type is not CDATA, then the XML processor MUST further
1099 * process the normalized attribute value by discarding any leading and
1100 * trailing space (#x20) characters, and by replacing sequences of space
1101 * (#x20) characters by a single space (#x20) character.
1102 * Note that the size of dst need to be at least src, and if one doesn't need
1103 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104 * passing src as dst is just fine.
1106 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1110 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1112 if ((src
== NULL
) || (dst
== NULL
))
1115 while (*src
== 0x20) src
++;
1118 while (*src
== 0x20) src
++;
1132 * xmlAttrNormalizeSpace2:
1133 * @src: the source string
1135 * Normalize the space in non CDATA attribute values, a slightly more complex
1136 * front end to avoid allocation problems when running on attribute values
1137 * coming from the input.
1139 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1142 static const xmlChar
*
1143 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1146 int remove_head
= 0;
1147 int need_realloc
= 0;
1150 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1157 while (*cur
== 0x20) {
1164 if ((*cur
== 0x20) || (*cur
== 0)) {
1174 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1176 xmlErrMemory(ctxt
, NULL
);
1179 xmlAttrNormalizeSpace(ret
, ret
);
1180 *len
= strlen((const char *)ret
);
1182 } else if (remove_head
) {
1183 *len
-= remove_head
;
1184 memmove(src
, src
+ remove_head
, 1 + *len
);
1192 * @ctxt: an XML parser context
1193 * @fullname: the element fullname
1194 * @fullattr: the attribute fullname
1195 * @value: the attribute value
1197 * Add a defaulted attribute for an element
1200 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1201 const xmlChar
*fullname
,
1202 const xmlChar
*fullattr
,
1203 const xmlChar
*value
) {
1204 xmlDefAttrsPtr defaults
;
1206 const xmlChar
*name
;
1207 const xmlChar
*prefix
;
1210 * Allows to detect attribute redefinitions
1212 if (ctxt
->attsSpecial
!= NULL
) {
1213 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1217 if (ctxt
->attsDefault
== NULL
) {
1218 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1219 if (ctxt
->attsDefault
== NULL
)
1224 * split the element name into prefix:localname , the string found
1225 * are within the DTD and then not associated to namespace names.
1227 name
= xmlSplitQName3(fullname
, &len
);
1229 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1232 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1233 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1237 * make sure there is some storage
1239 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1240 if (defaults
== NULL
) {
1241 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1242 (4 * 5) * sizeof(const xmlChar
*));
1243 if (defaults
== NULL
)
1245 defaults
->nbAttrs
= 0;
1246 defaults
->maxAttrs
= 4;
1247 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1248 defaults
, NULL
) < 0) {
1252 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1253 xmlDefAttrsPtr temp
;
1255 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1256 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1260 defaults
->maxAttrs
*= 2;
1261 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1262 defaults
, NULL
) < 0) {
1269 * Split the element name into prefix:localname , the string found
1270 * are within the DTD and hen not associated to namespace names.
1272 name
= xmlSplitQName3(fullattr
, &len
);
1274 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1277 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1278 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1281 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1282 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1283 /* intern the string and precompute the end */
1284 len
= xmlStrlen(value
);
1285 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1288 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1289 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1291 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1293 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1294 defaults
->nbAttrs
++;
1299 xmlErrMemory(ctxt
, NULL
);
1304 * xmlAddSpecialAttr:
1305 * @ctxt: an XML parser context
1306 * @fullname: the element fullname
1307 * @fullattr: the attribute fullname
1308 * @type: the attribute type
1310 * Register this attribute type
1313 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1314 const xmlChar
*fullname
,
1315 const xmlChar
*fullattr
,
1318 if (ctxt
->attsSpecial
== NULL
) {
1319 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1320 if (ctxt
->attsSpecial
== NULL
)
1324 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1327 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1328 (void *) (ptrdiff_t) type
);
1332 xmlErrMemory(ctxt
, NULL
);
1337 * xmlCleanSpecialAttrCallback:
1339 * Removes CDATA attributes from the special attribute table
1342 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1343 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1344 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1345 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1347 if (((ptrdiff_t) payload
) == XML_ATTRIBUTE_CDATA
) {
1348 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1353 * xmlCleanSpecialAttr:
1354 * @ctxt: an XML parser context
1356 * Trim the list of attributes defined to remove all those of type
1357 * CDATA as they are not special. This call should be done when finishing
1358 * to parse the DTD and before starting to parse the document root.
1361 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1363 if (ctxt
->attsSpecial
== NULL
)
1366 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1368 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1369 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1370 ctxt
->attsSpecial
= NULL
;
1376 * xmlCheckLanguageID:
1377 * @lang: pointer to the string value
1379 * DEPRECATED: Internal function, do not use.
1381 * Checks that the value conforms to the LanguageID production:
1383 * NOTE: this is somewhat deprecated, those productions were removed from
1384 * the XML Second edition.
1386 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1388 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391 * [38] Subcode ::= ([a-z] | [A-Z])+
1393 * The current REC reference the successors of RFC 1766, currently 5646
1395 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396 * langtag = language
1402 * language = 2*3ALPHA ; shortest ISO 639 code
1403 * ["-" extlang] ; sometimes followed by
1404 * ; extended language subtags
1405 * / 4ALPHA ; or reserved for future use
1406 * / 5*8ALPHA ; or registered language subtag
1408 * extlang = 3ALPHA ; selected ISO 639 codes
1409 * *2("-" 3ALPHA) ; permanently reserved
1411 * script = 4ALPHA ; ISO 15924 code
1413 * region = 2ALPHA ; ISO 3166-1 code
1414 * / 3DIGIT ; UN M.49 code
1416 * variant = 5*8alphanum ; registered variants
1417 * / (DIGIT 3alphanum)
1419 * extension = singleton 1*("-" (2*8alphanum))
1421 * ; Single alphanumerics
1422 * ; "x" reserved for private use
1423 * singleton = DIGIT ; 0 - 9
1429 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430 * The parser below doesn't try to cope with extension or privateuse
1431 * that could be added but that's not interoperable anyway
1433 * Returns 1 if correct 0 otherwise
1436 xmlCheckLanguageID(const xmlChar
* lang
)
1438 const xmlChar
*cur
= lang
, *nxt
;
1442 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1443 ((cur
[0] == 'I') && (cur
[1] == '-')) ||
1444 ((cur
[0] == 'x') && (cur
[1] == '-')) ||
1445 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1447 * Still allow IANA code and user code which were coming
1448 * from the previous version of the XML-1.0 specification
1449 * it's deprecated but we should not fail
1452 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1453 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1455 return(cur
[0] == 0);
1458 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1459 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1461 if (nxt
- cur
>= 4) {
1465 if ((nxt
- cur
> 8) || (nxt
[0] != 0))
1471 /* we got an ISO 639 code */
1479 /* now we can have extlang or script or region or variant */
1480 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1483 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1484 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1490 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1494 /* we parsed an extlang */
1502 /* now we can have script or region or variant */
1503 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1506 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1507 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1511 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1515 /* we parsed a script */
1524 /* now we can have region or variant */
1525 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1528 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1529 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1532 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1536 /* we parsed a region */
1545 /* now we can just have a variant */
1546 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1547 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1550 if ((nxt
- cur
< 5) || (nxt
- cur
> 8))
1553 /* we parsed a variant */
1559 /* extensions and private use subtags not checked */
1563 if (((nxt
[1] >= '0') && (nxt
[1] <= '9')) &&
1564 ((nxt
[2] >= '0') && (nxt
[2] <= '9'))) {
1571 /************************************************************************
1573 * Parser stacks related functions and macros *
1575 ************************************************************************/
1577 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1578 const xmlChar
** str
);
1583 * @ctxt: an XML parser context
1584 * @prefix: the namespace prefix or NULL
1585 * @URL: the namespace name
1587 * Pushes a new parser namespace on top of the ns stack
1589 * Returns -1 in case of error, -2 if the namespace should be discarded
1590 * and the index in the stack otherwise.
1593 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1595 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1597 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-= 2) {
1598 if (ctxt
->nsTab
[i
] == prefix
) {
1600 if (ctxt
->nsTab
[i
+ 1] == URL
)
1602 /* out of scope keep it */
1607 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1610 ctxt
->nsTab
= (const xmlChar
**)
1611 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1612 if (ctxt
->nsTab
== NULL
) {
1613 xmlErrMemory(ctxt
, NULL
);
1617 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1618 const xmlChar
** tmp
;
1620 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1621 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1623 xmlErrMemory(ctxt
, NULL
);
1629 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1630 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1631 return (ctxt
->nsNr
);
1635 * @ctxt: an XML parser context
1636 * @nr: the number to pop
1638 * Pops the top @nr parser prefix/namespace from the ns stack
1640 * Returns the number of namespaces removed
1643 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1647 if (ctxt
->nsTab
== NULL
) return(0);
1648 if (ctxt
->nsNr
< nr
) {
1649 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1652 if (ctxt
->nsNr
<= 0)
1655 for (i
= 0;i
< nr
;i
++) {
1657 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1664 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1665 const xmlChar
**atts
;
1669 if (nr
+ 5 > ctxt
->maxatts
) {
1670 maxatts
= ctxt
->maxatts
== 0 ? 55 : (nr
+ 5) * 2;
1671 atts
= (const xmlChar
**) xmlMalloc(
1672 maxatts
* sizeof(const xmlChar
*));
1673 if (atts
== NULL
) goto mem_error
;
1674 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1675 (maxatts
/ 5) * sizeof(int));
1676 if (attallocs
== NULL
) {
1680 if (ctxt
->maxatts
> 0)
1681 memcpy(atts
, ctxt
->atts
, ctxt
->maxatts
* sizeof(const xmlChar
*));
1682 xmlFree(ctxt
->atts
);
1684 ctxt
->attallocs
= attallocs
;
1685 ctxt
->maxatts
= maxatts
;
1687 return(ctxt
->maxatts
);
1689 xmlErrMemory(ctxt
, NULL
);
1695 * @ctxt: an XML parser context
1696 * @value: the parser input
1698 * Pushes a new parser input on top of the input stack
1700 * Returns -1 in case of error, the index in the stack otherwise
1703 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1705 if ((ctxt
== NULL
) || (value
== NULL
))
1707 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1708 size_t newSize
= ctxt
->inputMax
* 2;
1709 xmlParserInputPtr
*tmp
;
1711 tmp
= (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1712 newSize
* sizeof(*tmp
));
1714 xmlErrMemory(ctxt
, NULL
);
1717 ctxt
->inputTab
= tmp
;
1718 ctxt
->inputMax
= newSize
;
1720 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1721 ctxt
->input
= value
;
1722 return (ctxt
->inputNr
++);
1726 * @ctxt: an XML parser context
1728 * Pops the top parser input from the input stack
1730 * Returns the input just removed
1733 inputPop(xmlParserCtxtPtr ctxt
)
1735 xmlParserInputPtr ret
;
1739 if (ctxt
->inputNr
<= 0)
1742 if (ctxt
->inputNr
> 0)
1743 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1746 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1747 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1752 * @ctxt: an XML parser context
1753 * @value: the element node
1755 * DEPRECATED: Internal function, do not use.
1757 * Pushes a new element node on top of the node stack
1759 * Returns -1 in case of error, the index in the stack otherwise
1762 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1764 if (ctxt
== NULL
) return(0);
1765 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1768 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1770 sizeof(ctxt
->nodeTab
[0]));
1772 xmlErrMemory(ctxt
, NULL
);
1775 ctxt
->nodeTab
= tmp
;
1778 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1779 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1780 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1783 xmlHaltParser(ctxt
);
1786 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1788 return (ctxt
->nodeNr
++);
1793 * @ctxt: an XML parser context
1795 * DEPRECATED: Internal function, do not use.
1797 * Pops the top element node from the node stack
1799 * Returns the node just removed
1802 nodePop(xmlParserCtxtPtr ctxt
)
1806 if (ctxt
== NULL
) return(NULL
);
1807 if (ctxt
->nodeNr
<= 0)
1810 if (ctxt
->nodeNr
> 0)
1811 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1814 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1815 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1821 * @ctxt: an XML parser context
1822 * @value: the element name
1823 * @prefix: the element prefix
1824 * @URI: the element namespace name
1825 * @line: the current line number for error messages
1826 * @nsNr: the number of namespaces pushed on the namespace table
1828 * Pushes a new element name/prefix/URL on top of the name stack
1830 * Returns -1 in case of error, the index in the stack otherwise
1833 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1834 const xmlChar
*prefix
, const xmlChar
*URI
, int line
, int nsNr
)
1838 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1839 const xmlChar
* *tmp
;
1842 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1844 sizeof(ctxt
->nameTab
[0]));
1849 ctxt
->nameTab
= tmp
;
1850 tmp2
= (xmlStartTag
*) xmlRealloc((void * *)ctxt
->pushTab
,
1852 sizeof(ctxt
->pushTab
[0]));
1857 ctxt
->pushTab
= tmp2
;
1858 } else if (ctxt
->pushTab
== NULL
) {
1859 ctxt
->pushTab
= (xmlStartTag
*) xmlMalloc(ctxt
->nameMax
*
1860 sizeof(ctxt
->pushTab
[0]));
1861 if (ctxt
->pushTab
== NULL
)
1864 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1866 tag
= &ctxt
->pushTab
[ctxt
->nameNr
];
1867 tag
->prefix
= prefix
;
1871 return (ctxt
->nameNr
++);
1873 xmlErrMemory(ctxt
, NULL
);
1876 #ifdef LIBXML_PUSH_ENABLED
1879 * @ctxt: an XML parser context
1881 * Pops the top element/prefix/URI name from the name stack
1883 * Returns the name just removed
1885 static const xmlChar
*
1886 nameNsPop(xmlParserCtxtPtr ctxt
)
1890 if (ctxt
->nameNr
<= 0)
1893 if (ctxt
->nameNr
> 0)
1894 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1897 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1898 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1901 #endif /* LIBXML_PUSH_ENABLED */
1905 * @ctxt: an XML parser context
1906 * @value: the element name
1908 * DEPRECATED: Internal function, do not use.
1910 * Pushes a new element name on top of the name stack
1912 * Returns -1 in case of error, the index in the stack otherwise
1915 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1917 if (ctxt
== NULL
) return (-1);
1919 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1920 const xmlChar
* *tmp
;
1921 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1923 sizeof(ctxt
->nameTab
[0]));
1927 ctxt
->nameTab
= tmp
;
1930 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1932 return (ctxt
->nameNr
++);
1934 xmlErrMemory(ctxt
, NULL
);
1940 * @ctxt: an XML parser context
1942 * DEPRECATED: Internal function, do not use.
1944 * Pops the top element name from the name stack
1946 * Returns the name just removed
1949 namePop(xmlParserCtxtPtr ctxt
)
1953 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1956 if (ctxt
->nameNr
> 0)
1957 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1960 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1961 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1965 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1966 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1969 ctxt
->spaceMax
*= 2;
1970 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
1971 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
1973 xmlErrMemory(ctxt
, NULL
);
1977 ctxt
->spaceTab
= tmp
;
1979 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
1980 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
1981 return(ctxt
->spaceNr
++);
1984 static int spacePop(xmlParserCtxtPtr ctxt
) {
1986 if (ctxt
->spaceNr
<= 0) return(0);
1988 if (ctxt
->spaceNr
> 0)
1989 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
1991 ctxt
->space
= &ctxt
->spaceTab
[0];
1992 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
1993 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
1998 * Macros for accessing the content. Those should be used only by the parser,
2001 * Dirty macros, i.e. one often need to make assumption on the context to
2004 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2005 * To be used with extreme caution since operations consuming
2006 * characters may move the input buffer to a different location !
2007 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2008 * This should be used internally by the parser
2009 * only to compare to ASCII values otherwise it would break when
2010 * running with UTF-8 encoding.
2011 * RAW same as CUR but in the input buffer, bypass any token
2012 * extraction that may have been done
2013 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2014 * to compare on ASCII based substring.
2015 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016 * strings without newlines within the parser.
2017 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018 * defined char within the parser.
2019 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021 * NEXT Skip to the next character, this does the proper decoding
2022 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2024 * CUR_CHAR(l) returns the current unicode character (int), set l
2025 * to the number of xmlChars used for the encoding [0-5].
2026 * CUR_SCHAR same but operate on a string instead of the context
2027 * COPY_BUF copy the current unicode char to the target buffer, increment
2029 * GROW, SHRINK handling of input buffers
2032 #define RAW (*ctxt->input->cur)
2033 #define CUR (*ctxt->input->cur)
2034 #define NXT(val) ctxt->input->cur[(val)]
2035 #define CUR_PTR ctxt->input->cur
2036 #define BASE_PTR ctxt->input->base
2038 #define CMP4( s, c1, c2, c3, c4 ) \
2039 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2042 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051 ((unsigned char *) s)[ 8 ] == c9 )
2052 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054 ((unsigned char *) s)[ 9 ] == c10 )
2056 #define SKIP(val) do { \
2057 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserGrow(ctxt); \
2062 #define SKIPL(val) do { \
2064 for(skipl=0; skipl<val; skipl++) { \
2065 if (*(ctxt->input->cur) == '\n') { \
2066 ctxt->input->line++; ctxt->input->col = 1; \
2067 } else ctxt->input->col++; \
2068 ctxt->input->cur++; \
2070 if (*ctxt->input->cur == 0) \
2071 xmlParserGrow(ctxt); \
2074 #define SHRINK if ((ctxt->progressive == 0) && \
2075 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077 xmlParserShrink(ctxt);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081 xmlParserGrow(ctxt);
2083 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085 #define NEXT xmlNextChar(ctxt)
2088 ctxt->input->col++; \
2089 ctxt->input->cur++; \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserGrow(ctxt); \
2094 #define NEXTL(l) do { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur += l; \
2101 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2104 #define COPY_BUF(l,b,i,v) \
2105 if (l == 1) b[i++] = v; \
2106 else i += xmlCopyCharMultiByte(&b[i],v)
2109 * xmlSkipBlankChars:
2110 * @ctxt: the XML parser context
2112 * DEPRECATED: Internal function, do not use.
2114 * skip all blanks character found at that point in the input streams.
2115 * It pops up finished entities in the process if allowable at that point.
2117 * Returns the number of space chars skipped
2121 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
2125 * It's Okay to use CUR/NEXT here since all the blanks are on
2128 if (((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) ||
2129 (ctxt
->instate
== XML_PARSER_START
)) {
2132 * if we are in the document content, go really fast
2134 cur
= ctxt
->input
->cur
;
2135 while (IS_BLANK_CH(*cur
)) {
2137 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
2145 ctxt
->input
->cur
= cur
;
2146 xmlParserGrow(ctxt
);
2147 cur
= ctxt
->input
->cur
;
2150 ctxt
->input
->cur
= cur
;
2152 int expandPE
= ((ctxt
->external
!= 0) || (ctxt
->inputNr
!= 1));
2154 while (ctxt
->instate
!= XML_PARSER_EOF
) {
2155 if (IS_BLANK_CH(CUR
)) { /* CHECKED tstblanks.xml */
2157 } else if (CUR
== '%') {
2159 * Need to handle support of entities branching here
2161 if ((expandPE
== 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2163 xmlParsePEReference(ctxt
);
2164 } else if (CUR
== 0) {
2165 unsigned long consumed
;
2168 if (ctxt
->inputNr
<= 1)
2171 consumed
= ctxt
->input
->consumed
;
2172 xmlSaturatedAddSizeT(&consumed
,
2173 ctxt
->input
->cur
- ctxt
->input
->base
);
2176 * Add to sizeentities when parsing an external entity
2177 * for the first time.
2179 ent
= ctxt
->input
->entity
;
2180 if ((ent
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2181 ((ent
->flags
& XML_ENT_PARSED
) == 0)) {
2182 ent
->flags
|= XML_ENT_PARSED
;
2184 xmlSaturatedAdd(&ctxt
->sizeentities
, consumed
);
2187 xmlParserEntityCheck(ctxt
, consumed
);
2195 * Also increase the counter when entering or exiting a PERef.
2196 * The spec says: "When a parameter-entity reference is recognized
2197 * in the DTD and included, its replacement text MUST be enlarged
2198 * by the attachment of one leading and one following space (#x20)
2208 /************************************************************************
2210 * Commodity functions to handle entities *
2212 ************************************************************************/
2216 * @ctxt: an XML parser context
2218 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219 * pop it and return the next char.
2221 * Returns the current xmlChar in the parser context
2224 xmlPopInput(xmlParserCtxtPtr ctxt
) {
2225 xmlParserInputPtr input
;
2227 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
2228 if (xmlParserDebugEntities
)
2229 xmlGenericError(xmlGenericErrorContext
,
2230 "Popping input %d\n", ctxt
->inputNr
);
2231 if ((ctxt
->inputNr
> 1) && (ctxt
->inSubset
== 0) &&
2232 (ctxt
->instate
!= XML_PARSER_EOF
))
2233 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2234 "Unfinished entity outside the DTD");
2235 input
= inputPop(ctxt
);
2236 if (input
->entity
!= NULL
)
2237 input
->entity
->flags
&= ~XML_ENT_EXPANDING
;
2238 xmlFreeInputStream(input
);
2239 if (*ctxt
->input
->cur
== 0)
2240 xmlParserGrow(ctxt
);
2246 * @ctxt: an XML parser context
2247 * @input: an XML parser input fragment (entity, XML fragment ...).
2249 * xmlPushInput: switch to a new input stream which is stacked on top
2250 * of the previous one(s).
2251 * Returns -1 in case of error or the index in the input stack
2254 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2256 if (input
== NULL
) return(-1);
2258 if (xmlParserDebugEntities
) {
2259 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2260 xmlGenericError(xmlGenericErrorContext
,
2261 "%s(%d): ", ctxt
->input
->filename
,
2263 xmlGenericError(xmlGenericErrorContext
,
2264 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2266 if (((ctxt
->inputNr
> 40) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2267 (ctxt
->inputNr
> 100)) {
2268 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2269 while (ctxt
->inputNr
> 1)
2270 xmlFreeInputStream(inputPop(ctxt
));
2273 ret
= inputPush(ctxt
, input
);
2274 if (ctxt
->instate
== XML_PARSER_EOF
)
2282 * @ctxt: an XML parser context
2284 * DEPRECATED: Internal function, don't use.
2286 * Parse a numeric character reference. Always consumes '&'.
2288 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289 * '&#x' [0-9a-fA-F]+ ';'
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
2293 * production for Char.
2295 * Returns the value parsed (as an int), 0 in case of error
2298 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2303 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2305 if ((RAW
== '&') && (NXT(1) == '#') &&
2309 while (RAW
!= ';') { /* loop blocked by count */
2313 if (ctxt
->instate
== XML_PARSER_EOF
)
2316 if ((RAW
>= '0') && (RAW
<= '9'))
2317 val
= val
* 16 + (CUR
- '0');
2318 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2319 val
= val
* 16 + (CUR
- 'a') + 10;
2320 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2321 val
= val
* 16 + (CUR
- 'A') + 10;
2323 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2334 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2338 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2341 while (RAW
!= ';') { /* loop blocked by count */
2345 if (ctxt
->instate
== XML_PARSER_EOF
)
2348 if ((RAW
>= '0') && (RAW
<= '9'))
2349 val
= val
* 10 + (CUR
- '0');
2351 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2373 * [ WFC: Legal Character ]
2374 * Characters referred to using character references must match the
2375 * production for Char.
2377 if (val
>= 0x110000) {
2378 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2379 "xmlParseCharRef: character reference out of bounds\n",
2381 } else if (IS_CHAR(val
)) {
2384 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2385 "xmlParseCharRef: invalid xmlChar value %d\n",
2392 * xmlParseStringCharRef:
2393 * @ctxt: an XML parser context
2394 * @str: a pointer to an index in the string
2396 * parse Reference declarations, variant parsing from a string rather
2397 * than an an input flow.
2399 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400 * '&#x' [0-9a-fA-F]+ ';'
2402 * [ WFC: Legal Character ]
2403 * Characters referred to using character references must match the
2404 * production for Char.
2406 * Returns the value parsed (as an int), 0 in case of error, str will be
2407 * updated to the current value of the index
2410 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2415 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2418 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2421 while (cur
!= ';') { /* Non input consuming loop */
2422 if ((cur
>= '0') && (cur
<= '9'))
2423 val
= val
* 16 + (cur
- '0');
2424 else if ((cur
>= 'a') && (cur
<= 'f'))
2425 val
= val
* 16 + (cur
- 'a') + 10;
2426 else if ((cur
>= 'A') && (cur
<= 'F'))
2427 val
= val
* 16 + (cur
- 'A') + 10;
2429 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2441 } else if ((cur
== '&') && (ptr
[1] == '#')){
2444 while (cur
!= ';') { /* Non input consuming loops */
2445 if ((cur
>= '0') && (cur
<= '9'))
2446 val
= val
* 10 + (cur
- '0');
2448 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2461 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2467 * [ WFC: Legal Character ]
2468 * Characters referred to using character references must match the
2469 * production for Char.
2471 if (val
>= 0x110000) {
2472 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2473 "xmlParseStringCharRef: character reference out of bounds\n",
2475 } else if (IS_CHAR(val
)) {
2478 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2479 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2486 * xmlParserHandlePEReference:
2487 * @ctxt: the parser context
2489 * DEPRECATED: Internal function, do not use.
2491 * [69] PEReference ::= '%' Name ';'
2493 * [ WFC: No Recursion ]
2494 * A parsed entity must not contain a recursive
2495 * reference to itself, either directly or indirectly.
2497 * [ WFC: Entity Declared ]
2498 * In a document without any DTD, a document with only an internal DTD
2499 * subset which contains no parameter entity references, or a document
2500 * with "standalone='yes'", ... ... The declaration of a parameter
2501 * entity must precede any reference to it...
2503 * [ VC: Entity Declared ]
2504 * In a document with an external subset or external parameter entities
2505 * with "standalone='no'", ... ... The declaration of a parameter entity
2506 * must precede any reference to it...
2509 * Parameter-entity references may only appear in the DTD.
2510 * NOTE: misleading but this is handled.
2512 * A PEReference may have been detected in the current input stream
2513 * the handling is done accordingly to
2514 * http://www.w3.org/TR/REC-xml#entproc
2516 * - Included in literal in entity values
2517 * - Included as Parameter Entity reference within DTDs
2520 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2521 switch(ctxt
->instate
) {
2522 case XML_PARSER_CDATA_SECTION
:
2524 case XML_PARSER_COMMENT
:
2526 case XML_PARSER_START_TAG
:
2528 case XML_PARSER_END_TAG
:
2530 case XML_PARSER_EOF
:
2531 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2533 case XML_PARSER_PROLOG
:
2534 case XML_PARSER_START
:
2535 case XML_PARSER_MISC
:
2536 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2538 case XML_PARSER_ENTITY_DECL
:
2539 case XML_PARSER_CONTENT
:
2540 case XML_PARSER_ATTRIBUTE_VALUE
:
2542 case XML_PARSER_SYSTEM_LITERAL
:
2543 case XML_PARSER_PUBLIC_LITERAL
:
2544 /* we just ignore it there */
2546 case XML_PARSER_EPILOG
:
2547 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2549 case XML_PARSER_ENTITY_VALUE
:
2551 * NOTE: in the case of entity values, we don't do the
2552 * substitution here since we need the literal
2553 * entity value to be able to save the internal
2554 * subset of the document.
2555 * This will be handled by xmlStringDecodeEntities
2558 case XML_PARSER_DTD
:
2560 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561 * In the internal DTD subset, parameter-entity references
2562 * can occur only where markup declarations can occur, not
2563 * within markup declarations.
2564 * In that case this is handled in xmlParseMarkupDecl
2566 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2568 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2571 case XML_PARSER_IGNORE
:
2575 xmlParsePEReference(ctxt
);
2579 * Macro used to grow the current buffer.
2580 * buffer##_size is expected to be a size_t
2581 * mem_error: is expected to handle memory allocation failures
2583 #define growBuffer(buffer, n) { \
2585 size_t new_size = buffer##_size * 2 + n; \
2586 if (new_size < buffer##_size) goto mem_error; \
2587 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2588 if (tmp == NULL) goto mem_error; \
2590 buffer##_size = new_size; \
2594 * xmlStringDecodeEntitiesInt:
2595 * @ctxt: the parser context
2596 * @str: the input string
2597 * @len: the string length
2598 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599 * @end: an end marker xmlChar, 0 if none
2600 * @end2: an end marker xmlChar, 0 if none
2601 * @end3: an end marker xmlChar, 0 if none
2602 * @check: whether to perform entity checks
2605 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2606 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
,
2608 xmlChar
*buffer
= NULL
;
2609 size_t buffer_size
= 0;
2612 xmlChar
*current
= NULL
;
2613 xmlChar
*rep
= NULL
;
2614 const xmlChar
*last
;
2622 if (((ctxt
->depth
> 40) &&
2623 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2624 (ctxt
->depth
> 100)) {
2625 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_LOOP
,
2626 "Maximum entity nesting depth exceeded");
2631 * allocate a translation buffer.
2633 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2634 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
);
2635 if (buffer
== NULL
) goto mem_error
;
2638 * OK loop until we reach one of the ending char or a size limit.
2639 * we are operating on already parsed values.
2642 c
= CUR_SCHAR(str
, l
);
2645 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2646 (c
!= end2
) && (c
!= end3
) &&
2647 (ctxt
->instate
!= XML_PARSER_EOF
)) {
2650 if ((c
== '&') && (str
[1] == '#')) {
2651 int val
= xmlParseStringCharRef(ctxt
, &str
);
2654 COPY_BUF(0,buffer
,nbchars
,val
);
2655 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2656 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2658 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2659 if (xmlParserDebugEntities
)
2660 xmlGenericError(xmlGenericErrorContext
,
2661 "String decoding Entity Reference: %.30s\n",
2663 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2664 if ((ent
!= NULL
) &&
2665 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2666 if (ent
->content
!= NULL
) {
2667 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2668 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2669 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2672 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2673 "predefined entity has no content\n");
2676 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2677 if ((check
) && (xmlParserEntityCheck(ctxt
, ent
->length
)))
2680 if (ent
->flags
& XML_ENT_EXPANDING
) {
2681 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2682 xmlHaltParser(ctxt
);
2683 ent
->content
[0] = 0;
2687 ent
->flags
|= XML_ENT_EXPANDING
;
2689 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
2690 ent
->length
, what
, 0, 0, 0, check
);
2692 ent
->flags
&= ~XML_ENT_EXPANDING
;
2695 ent
->content
[0] = 0;
2700 while (*current
!= 0) { /* non input consuming loop */
2701 buffer
[nbchars
++] = *current
++;
2702 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2703 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2708 } else if (ent
!= NULL
) {
2709 int i
= xmlStrlen(ent
->name
);
2710 const xmlChar
*cur
= ent
->name
;
2712 buffer
[nbchars
++] = '&';
2713 if (nbchars
+ i
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2714 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2717 buffer
[nbchars
++] = *cur
++;
2718 buffer
[nbchars
++] = ';';
2720 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2721 if (xmlParserDebugEntities
)
2722 xmlGenericError(xmlGenericErrorContext
,
2723 "String decoding PE Reference: %.30s\n", str
);
2724 ent
= xmlParseStringPEReference(ctxt
, &str
);
2726 if (ent
->content
== NULL
) {
2728 * Note: external parsed entities will not be loaded,
2729 * it is not required for a non-validating parser to
2730 * complete external PEReferences coming from the
2733 if (((ctxt
->options
& XML_PARSE_NOENT
) != 0) ||
2734 ((ctxt
->options
& XML_PARSE_DTDVALID
) != 0) ||
2735 (ctxt
->validate
!= 0)) {
2736 xmlLoadEntityContent(ctxt
, ent
);
2738 xmlWarningMsg(ctxt
, XML_ERR_ENTITY_PROCESSING
,
2739 "not validating will not read content for PE entity %s\n",
2744 if ((check
) && (xmlParserEntityCheck(ctxt
, ent
->length
)))
2747 if (ent
->flags
& XML_ENT_EXPANDING
) {
2748 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2749 xmlHaltParser(ctxt
);
2750 if (ent
->content
!= NULL
)
2751 ent
->content
[0] = 0;
2755 ent
->flags
|= XML_ENT_EXPANDING
;
2757 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
2758 ent
->length
, what
, 0, 0, 0, check
);
2760 ent
->flags
&= ~XML_ENT_EXPANDING
;
2763 if (ent
->content
!= NULL
)
2764 ent
->content
[0] = 0;
2768 while (*current
!= 0) { /* non input consuming loop */
2769 buffer
[nbchars
++] = *current
++;
2770 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2771 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2778 COPY_BUF(l
,buffer
,nbchars
,c
);
2780 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2781 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2785 c
= CUR_SCHAR(str
, l
);
2789 buffer
[nbchars
] = 0;
2793 xmlErrMemory(ctxt
, NULL
);
2803 * xmlStringLenDecodeEntities:
2804 * @ctxt: the parser context
2805 * @str: the input string
2806 * @len: the string length
2807 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808 * @end: an end marker xmlChar, 0 if none
2809 * @end2: an end marker xmlChar, 0 if none
2810 * @end3: an end marker xmlChar, 0 if none
2812 * DEPRECATED: Internal function, don't use.
2814 * Takes a entity string content and process to do the adequate substitutions.
2816 * [67] Reference ::= EntityRef | CharRef
2818 * [69] PEReference ::= '%' Name ';'
2820 * Returns A newly allocated string with the substitution done. The caller
2821 * must deallocate it !
2824 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2825 int what
, xmlChar end
, xmlChar end2
,
2827 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2829 return(xmlStringDecodeEntitiesInt(ctxt
, str
, len
, what
,
2830 end
, end2
, end3
, 0));
2834 * xmlStringDecodeEntities:
2835 * @ctxt: the parser context
2836 * @str: the input string
2837 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838 * @end: an end marker xmlChar, 0 if none
2839 * @end2: an end marker xmlChar, 0 if none
2840 * @end3: an end marker xmlChar, 0 if none
2842 * DEPRECATED: Internal function, don't use.
2844 * Takes a entity string content and process to do the adequate substitutions.
2846 * [67] Reference ::= EntityRef | CharRef
2848 * [69] PEReference ::= '%' Name ';'
2850 * Returns A newly allocated string with the substitution done. The caller
2851 * must deallocate it !
2854 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2855 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2856 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2857 return(xmlStringDecodeEntitiesInt(ctxt
, str
, xmlStrlen(str
), what
,
2858 end
, end2
, end3
, 0));
2861 /************************************************************************
2863 * Commodity functions, cleanup needed ? *
2865 ************************************************************************/
2869 * @ctxt: an XML parser context
2871 * @len: the size of @str
2872 * @blank_chars: we know the chars are blanks
2874 * Is this a sequence of blank chars that one can ignore ?
2876 * Returns 1 if ignorable 0 otherwise.
2879 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2882 xmlNodePtr lastChild
;
2885 * Don't spend time trying to differentiate them, the same callback is
2888 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2892 * Check for xml:space value.
2894 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2895 (*(ctxt
->space
) == -2))
2899 * Check that the string is made of blanks
2901 if (blank_chars
== 0) {
2902 for (i
= 0;i
< len
;i
++)
2903 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2907 * Look if the element is mixed content in the DTD if available
2909 if (ctxt
->node
== NULL
) return(0);
2910 if (ctxt
->myDoc
!= NULL
) {
2911 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2912 if (ret
== 0) return(1);
2913 if (ret
== 1) return(0);
2917 * Otherwise, heuristic :-\
2919 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2920 if ((ctxt
->node
->children
== NULL
) &&
2921 (RAW
== '<') && (NXT(1) == '/')) return(0);
2923 lastChild
= xmlGetLastChild(ctxt
->node
);
2924 if (lastChild
== NULL
) {
2925 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2926 (ctxt
->node
->content
!= NULL
)) return(0);
2927 } else if (xmlNodeIsText(lastChild
))
2929 else if ((ctxt
->node
->children
!= NULL
) &&
2930 (xmlNodeIsText(ctxt
->node
->children
)))
2935 /************************************************************************
2937 * Extra stuff for namespace support *
2938 * Relates to http://www.w3.org/TR/WD-xml-names *
2940 ************************************************************************/
2944 * @ctxt: an XML parser context
2945 * @name: an XML parser context
2946 * @prefix: a xmlChar **
2948 * parse an UTF8 encoded XML qualified name string
2950 * [NS 5] QName ::= (Prefix ':')? LocalPart
2952 * [NS 6] Prefix ::= NCName
2954 * [NS 7] LocalPart ::= NCName
2956 * Returns the local part, and prefix is updated
2957 * to get the Prefix if any.
2961 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2962 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2963 xmlChar
*buffer
= NULL
;
2965 int max
= XML_MAX_NAMELEN
;
2966 xmlChar
*ret
= NULL
;
2967 const xmlChar
*cur
= name
;
2970 if (prefix
== NULL
) return(NULL
);
2973 if (cur
== NULL
) return(NULL
);
2975 #ifndef XML_XML_NAMESPACE
2976 /* xml: prefix is not really a namespace */
2977 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2978 (cur
[2] == 'l') && (cur
[3] == ':'))
2979 return(xmlStrdup(name
));
2982 /* nasty but well=formed */
2984 return(xmlStrdup(name
));
2987 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2993 * Okay someone managed to make a huge name, so he's ready to pay
2994 * for the processing speed.
2998 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
2999 if (buffer
== NULL
) {
3000 xmlErrMemory(ctxt
, NULL
);
3003 memcpy(buffer
, buf
, len
);
3004 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
3005 if (len
+ 10 > max
) {
3009 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3012 xmlErrMemory(ctxt
, NULL
);
3023 if ((c
== ':') && (*cur
== 0)) {
3027 return(xmlStrdup(name
));
3031 ret
= xmlStrndup(buf
, len
);
3035 max
= XML_MAX_NAMELEN
;
3043 return(xmlStrndup(BAD_CAST
"", 0));
3048 * Check that the first character is proper to start
3051 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
3052 ((c
>= 0x41) && (c
<= 0x5A)) ||
3053 (c
== '_') || (c
== ':'))) {
3055 int first
= CUR_SCHAR(cur
, l
);
3057 if (!IS_LETTER(first
) && (first
!= '_')) {
3058 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
3059 "Name %s is not XML Namespace compliant\n",
3065 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
3071 * Okay someone managed to make a huge name, so he's ready to pay
3072 * for the processing speed.
3076 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3077 if (buffer
== NULL
) {
3078 xmlErrMemory(ctxt
, NULL
);
3081 memcpy(buffer
, buf
, len
);
3082 while (c
!= 0) { /* tested bigname2.xml */
3083 if (len
+ 10 > max
) {
3087 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3089 xmlErrMemory(ctxt
, NULL
);
3102 ret
= xmlStrndup(buf
, len
);
3111 /************************************************************************
3113 * The parser itself *
3114 * Relates to http://www.w3.org/TR/REC-xml *
3116 ************************************************************************/
3118 /************************************************************************
3120 * Routines to parse Name, NCName and NmToken *
3122 ************************************************************************/
3124 static unsigned long nbParseName
= 0;
3125 static unsigned long nbParseNmToken
= 0;
3126 static unsigned long nbParseNCName
= 0;
3127 static unsigned long nbParseNCNameComplex
= 0;
3128 static unsigned long nbParseNameComplex
= 0;
3129 static unsigned long nbParseStringName
= 0;
3133 * The two following functions are related to the change of accepted
3134 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135 * They correspond to the modified production [4] and the new production [4a]
3136 * changes in that revision. Also note that the macros used for the
3137 * productions Letter, Digit, CombiningChar and Extender are not needed
3139 * We still keep compatibility to pre-revision5 parsing semantic if the
3140 * new XML_PARSE_OLD10 option is given to the parser.
3143 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
3144 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3146 * Use the new checks of production [4] [4a] amd [5] of the
3147 * Update 5 of XML-1.0
3149 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3150 (((c
>= 'a') && (c
<= 'z')) ||
3151 ((c
>= 'A') && (c
<= 'Z')) ||
3152 (c
== '_') || (c
== ':') ||
3153 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3154 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3155 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3156 ((c
>= 0x370) && (c
<= 0x37D)) ||
3157 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3158 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3159 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3160 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3161 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3162 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3163 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3164 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3167 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3174 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3175 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3177 * Use the new checks of production [4] [4a] amd [5] of the
3178 * Update 5 of XML-1.0
3180 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3181 (((c
>= 'a') && (c
<= 'z')) ||
3182 ((c
>= 'A') && (c
<= 'Z')) ||
3183 ((c
>= '0') && (c
<= '9')) || /* !start */
3184 (c
== '_') || (c
== ':') ||
3185 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3186 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3187 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3188 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3189 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3190 ((c
>= 0x370) && (c
<= 0x37D)) ||
3191 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3192 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3193 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3194 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3195 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3196 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3197 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3198 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3199 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3202 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3203 (c
== '.') || (c
== '-') ||
3204 (c
== '_') || (c
== ':') ||
3205 (IS_COMBINING(c
)) ||
3212 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3213 int *len
, int *alloc
, int normalize
);
3215 static const xmlChar
*
3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3219 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3220 XML_MAX_TEXT_LENGTH
:
3221 XML_MAX_NAME_LENGTH
;
3224 nbParseNameComplex
++;
3228 * Handler for more complex cases
3231 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3233 * Use the new checks of production [4] [4a] amd [5] of the
3234 * Update 5 of XML-1.0
3236 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3237 (!(((c
>= 'a') && (c
<= 'z')) ||
3238 ((c
>= 'A') && (c
<= 'Z')) ||
3239 (c
== '_') || (c
== ':') ||
3240 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3241 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3242 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3243 ((c
>= 0x370) && (c
<= 0x37D)) ||
3244 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3245 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3246 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3247 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3248 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3249 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3250 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3251 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3257 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3258 (((c
>= 'a') && (c
<= 'z')) ||
3259 ((c
>= 'A') && (c
<= 'Z')) ||
3260 ((c
>= '0') && (c
<= '9')) || /* !start */
3261 (c
== '_') || (c
== ':') ||
3262 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3263 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3264 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3265 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3266 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3267 ((c
>= 0x370) && (c
<= 0x37D)) ||
3268 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3269 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3270 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3271 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3272 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3273 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3274 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3275 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3276 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3278 if (len
<= INT_MAX
- l
)
3284 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3285 (!IS_LETTER(c
) && (c
!= '_') &&
3293 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3294 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3295 (c
== '.') || (c
== '-') ||
3296 (c
== '_') || (c
== ':') ||
3297 (IS_COMBINING(c
)) ||
3298 (IS_EXTENDER(c
)))) {
3299 if (len
<= INT_MAX
- l
)
3305 if (ctxt
->instate
== XML_PARSER_EOF
)
3307 if (len
> maxLength
) {
3308 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3311 if (ctxt
->input
->cur
- ctxt
->input
->base
< len
) {
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3317 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
3318 "unexpected change of input buffer");
3321 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3322 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3323 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3328 * @ctxt: an XML parser context
3330 * DEPRECATED: Internal function, don't use.
3332 * parse an XML name.
3334 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335 * CombiningChar | Extender
3337 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3339 * [6] Names ::= Name (#x20 Name)*
3341 * Returns the Name parsed or NULL
3345 xmlParseName(xmlParserCtxtPtr ctxt
) {
3349 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3350 XML_MAX_TEXT_LENGTH
:
3351 XML_MAX_NAME_LENGTH
;
3354 if (ctxt
->instate
== XML_PARSER_EOF
)
3362 * Accelerator for simple ASCII names
3364 in
= ctxt
->input
->cur
;
3365 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3366 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3367 (*in
== '_') || (*in
== ':')) {
3369 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3370 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3371 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3372 (*in
== '_') || (*in
== '-') ||
3373 (*in
== ':') || (*in
== '.'))
3375 if ((*in
> 0) && (*in
< 0x80)) {
3376 count
= in
- ctxt
->input
->cur
;
3377 if (count
> maxLength
) {
3378 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3381 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3382 ctxt
->input
->cur
= in
;
3383 ctxt
->input
->col
+= count
;
3385 xmlErrMemory(ctxt
, NULL
);
3389 /* accelerator for special cases */
3390 return(xmlParseNameComplex(ctxt
));
3393 static const xmlChar
*
3394 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3397 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3398 XML_MAX_TEXT_LENGTH
:
3399 XML_MAX_NAME_LENGTH
;
3400 size_t startPosition
= 0;
3403 nbParseNCNameComplex
++;
3407 * Handler for more complex cases
3409 startPosition
= CUR_PTR
- BASE_PTR
;
3411 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3412 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3416 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3417 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3418 if (len
<= INT_MAX
- l
)
3423 if (ctxt
->instate
== XML_PARSER_EOF
)
3425 if (len
> maxLength
) {
3426 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3429 return(xmlDictLookup(ctxt
->dict
, (BASE_PTR
+ startPosition
), len
));
3434 * @ctxt: an XML parser context
3435 * @len: length of the string parsed
3437 * parse an XML name.
3439 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440 * CombiningChar | Extender
3442 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3444 * Returns the Name parsed or NULL
3447 static const xmlChar
*
3448 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3449 const xmlChar
*in
, *e
;
3452 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3453 XML_MAX_TEXT_LENGTH
:
3454 XML_MAX_NAME_LENGTH
;
3461 * Accelerator for simple ASCII names
3463 in
= ctxt
->input
->cur
;
3464 e
= ctxt
->input
->end
;
3465 if ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3466 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3467 (*in
== '_')) && (in
< e
)) {
3469 while ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3470 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3471 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3472 (*in
== '_') || (*in
== '-') ||
3473 (*in
== '.')) && (in
< e
))
3477 if ((*in
> 0) && (*in
< 0x80)) {
3478 count
= in
- ctxt
->input
->cur
;
3479 if (count
> maxLength
) {
3480 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3483 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3484 ctxt
->input
->cur
= in
;
3485 ctxt
->input
->col
+= count
;
3487 xmlErrMemory(ctxt
, NULL
);
3493 return(xmlParseNCNameComplex(ctxt
));
3497 * xmlParseNameAndCompare:
3498 * @ctxt: an XML parser context
3500 * parse an XML name and compares for match
3501 * (specialized for endtag parsing)
3503 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504 * and the name for mismatch
3507 static const xmlChar
*
3508 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3509 register const xmlChar
*cmp
= other
;
3510 register const xmlChar
*in
;
3514 if (ctxt
->instate
== XML_PARSER_EOF
)
3517 in
= ctxt
->input
->cur
;
3518 while (*in
!= 0 && *in
== *cmp
) {
3522 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3524 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
3525 ctxt
->input
->cur
= in
;
3526 return (const xmlChar
*) 1;
3528 /* failure (or end of input buffer), check with full function */
3529 ret
= xmlParseName (ctxt
);
3530 /* strings coming from the dictionary direct compare possible */
3532 return (const xmlChar
*) 1;
3538 * xmlParseStringName:
3539 * @ctxt: an XML parser context
3540 * @str: a pointer to the string pointer (IN/OUT)
3542 * parse an XML name.
3544 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545 * CombiningChar | Extender
3547 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3549 * [6] Names ::= Name (#x20 Name)*
3551 * Returns the Name parsed or NULL. The @str pointer
3552 * is updated to the current location in the string.
3556 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3557 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3558 const xmlChar
*cur
= *str
;
3561 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3562 XML_MAX_TEXT_LENGTH
:
3563 XML_MAX_NAME_LENGTH
;
3566 nbParseStringName
++;
3569 c
= CUR_SCHAR(cur
, l
);
3570 if (!xmlIsNameStartChar(ctxt
, c
)) {
3574 COPY_BUF(l
,buf
,len
,c
);
3576 c
= CUR_SCHAR(cur
, l
);
3577 while (xmlIsNameChar(ctxt
, c
)) {
3578 COPY_BUF(l
,buf
,len
,c
);
3580 c
= CUR_SCHAR(cur
, l
);
3581 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3583 * Okay someone managed to make a huge name, so he's ready to pay
3584 * for the processing speed.
3589 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3590 if (buffer
== NULL
) {
3591 xmlErrMemory(ctxt
, NULL
);
3594 memcpy(buffer
, buf
, len
);
3595 while (xmlIsNameChar(ctxt
, c
)) {
3596 if (len
+ 10 > max
) {
3600 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3602 xmlErrMemory(ctxt
, NULL
);
3608 COPY_BUF(l
,buffer
,len
,c
);
3610 c
= CUR_SCHAR(cur
, l
);
3611 if (len
> maxLength
) {
3612 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3622 if (len
> maxLength
) {
3623 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3627 return(xmlStrndup(buf
, len
));
3632 * @ctxt: an XML parser context
3634 * DEPRECATED: Internal function, don't use.
3636 * parse an XML Nmtoken.
3638 * [7] Nmtoken ::= (NameChar)+
3640 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3642 * Returns the Nmtoken parsed or NULL
3646 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3647 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3650 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3651 XML_MAX_TEXT_LENGTH
:
3652 XML_MAX_NAME_LENGTH
;
3660 while (xmlIsNameChar(ctxt
, c
)) {
3661 COPY_BUF(l
,buf
,len
,c
);
3664 if (len
>= XML_MAX_NAMELEN
) {
3666 * Okay someone managed to make a huge token, so he's ready to pay
3667 * for the processing speed.
3672 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3673 if (buffer
== NULL
) {
3674 xmlErrMemory(ctxt
, NULL
);
3677 memcpy(buffer
, buf
, len
);
3678 while (xmlIsNameChar(ctxt
, c
)) {
3679 if (len
+ 10 > max
) {
3683 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3685 xmlErrMemory(ctxt
, NULL
);
3691 COPY_BUF(l
,buffer
,len
,c
);
3692 if (len
> maxLength
) {
3693 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3701 if (ctxt
->instate
== XML_PARSER_EOF
) {
3708 if (ctxt
->instate
== XML_PARSER_EOF
)
3712 if (len
> maxLength
) {
3713 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3716 return(xmlStrndup(buf
, len
));
3720 * xmlParseEntityValue:
3721 * @ctxt: an XML parser context
3722 * @orig: if non-NULL store a copy of the original entity value
3724 * DEPRECATED: Internal function, don't use.
3726 * parse a value for ENTITY declarations
3728 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729 * "'" ([^%&'] | PEReference | Reference)* "'"
3731 * Returns the EntityValue parsed with reference substituted or NULL
3735 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3736 xmlChar
*buf
= NULL
;
3738 int size
= XML_PARSER_BUFFER_SIZE
;
3740 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3741 XML_MAX_HUGE_LENGTH
:
3742 XML_MAX_TEXT_LENGTH
;
3744 xmlChar
*ret
= NULL
;
3745 const xmlChar
*cur
= NULL
;
3746 xmlParserInputPtr input
;
3748 if (RAW
== '"') stop
= '"';
3749 else if (RAW
== '\'') stop
= '\'';
3751 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3754 buf
= (xmlChar
*) xmlMallocAtomic(size
);
3756 xmlErrMemory(ctxt
, NULL
);
3761 * The content of the entity definition is copied in a buffer.
3764 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3765 input
= ctxt
->input
;
3767 if (ctxt
->instate
== XML_PARSER_EOF
)
3772 * NOTE: 4.4.5 Included in Literal
3773 * When a parameter entity reference appears in a literal entity
3774 * value, ... a single or double quote character in the replacement
3775 * text is always treated as a normal data character and will not
3776 * terminate the literal.
3777 * In practice it means we stop the loop only when back at parsing
3778 * the initial entity and the quote is found
3780 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3781 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3782 if (len
+ 5 >= size
) {
3786 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
3788 xmlErrMemory(ctxt
, NULL
);
3793 COPY_BUF(l
,buf
,len
,c
);
3803 if (len
> maxLength
) {
3804 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
3805 "entity value too long\n");
3810 if (ctxt
->instate
== XML_PARSER_EOF
)
3813 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3819 * Raise problem w.r.t. '&' and '%' being used in non-entities
3820 * reference constructs. Note Charref will be handled in
3821 * xmlStringDecodeEntities()
3824 while (*cur
!= 0) { /* non input consuming */
3825 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3831 name
= xmlParseStringName(ctxt
, &cur
);
3836 if ((nameOk
== 0) || (*cur
!= ';')) {
3837 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3838 "EntityValue: '%c' forbidden except for entities references\n",
3842 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3843 (ctxt
->inputNr
== 1)) {
3844 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3854 * Then PEReference entities are substituted.
3856 * NOTE: 4.4.7 Bypassed
3857 * When a general entity reference appears in the EntityValue in
3858 * an entity declaration, it is bypassed and left as is.
3859 * so XML_SUBSTITUTE_REF is not set here.
3862 ret
= xmlStringDecodeEntitiesInt(ctxt
, buf
, len
, XML_SUBSTITUTE_PEREF
,
3863 0, 0, 0, /* check */ 1);
3878 * xmlParseAttValueComplex:
3879 * @ctxt: an XML parser context
3880 * @len: the resulting attribute len
3881 * @normalize: whether to apply the inner normalization
3883 * parse a value for an attribute, this is the fallback function
3884 * of xmlParseAttValue() when the attribute parsing requires handling
3885 * of non-ASCII characters, or normalization compaction.
3887 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3890 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3892 xmlChar
*buf
= NULL
;
3893 xmlChar
*rep
= NULL
;
3895 size_t buf_size
= 0;
3896 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3897 XML_MAX_HUGE_LENGTH
:
3898 XML_MAX_TEXT_LENGTH
;
3899 int c
, l
, in_space
= 0;
3900 xmlChar
*current
= NULL
;
3903 if (NXT(0) == '"') {
3904 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3907 } else if (NXT(0) == '\'') {
3909 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3912 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3917 * allocate a translation buffer.
3919 buf_size
= XML_PARSER_BUFFER_SIZE
;
3920 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
);
3921 if (buf
== NULL
) goto mem_error
;
3924 * OK loop until we reach one of the ending char or a size limit.
3927 while (((NXT(0) != limit
) && /* checked */
3928 (IS_CHAR(c
)) && (c
!= '<')) &&
3929 (ctxt
->instate
!= XML_PARSER_EOF
)) {
3932 if (NXT(1) == '#') {
3933 int val
= xmlParseCharRef(ctxt
);
3936 if (ctxt
->replaceEntities
) {
3937 if (len
+ 10 > buf_size
) {
3938 growBuffer(buf
, 10);
3943 * The reparsing will be done in xmlStringGetNodeList()
3944 * called by the attribute() function in SAX.c
3946 if (len
+ 10 > buf_size
) {
3947 growBuffer(buf
, 10);
3955 } else if (val
!= 0) {
3956 if (len
+ 10 > buf_size
) {
3957 growBuffer(buf
, 10);
3959 len
+= xmlCopyChar(0, &buf
[len
], val
);
3962 ent
= xmlParseEntityRef(ctxt
);
3963 if ((ent
!= NULL
) &&
3964 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3965 if (len
+ 10 > buf_size
) {
3966 growBuffer(buf
, 10);
3968 if ((ctxt
->replaceEntities
== 0) &&
3969 (ent
->content
[0] == '&')) {
3976 buf
[len
++] = ent
->content
[0];
3978 } else if ((ent
!= NULL
) &&
3979 (ctxt
->replaceEntities
!= 0)) {
3980 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
3981 if (xmlParserEntityCheck(ctxt
, ent
->length
))
3985 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
3986 ent
->length
, XML_SUBSTITUTE_REF
, 0, 0, 0,
3991 while (*current
!= 0) { /* non input consuming */
3992 if ((*current
== 0xD) || (*current
== 0xA) ||
3993 (*current
== 0x9)) {
3997 buf
[len
++] = *current
++;
3998 if (len
+ 10 > buf_size
) {
3999 growBuffer(buf
, 10);
4006 if (len
+ 10 > buf_size
) {
4007 growBuffer(buf
, 10);
4009 if (ent
->content
!= NULL
)
4010 buf
[len
++] = ent
->content
[0];
4012 } else if (ent
!= NULL
) {
4013 int i
= xmlStrlen(ent
->name
);
4014 const xmlChar
*cur
= ent
->name
;
4017 * We also check for recursion and amplification
4018 * when entities are not substituted. They're
4019 * often expanded later.
4021 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
4022 (ent
->content
!= NULL
)) {
4023 if ((ent
->flags
& XML_ENT_CHECKED
) == 0) {
4024 unsigned long oldCopy
= ctxt
->sizeentcopy
;
4026 ctxt
->sizeentcopy
= ent
->length
;
4029 rep
= xmlStringDecodeEntitiesInt(ctxt
,
4030 ent
->content
, ent
->length
,
4031 XML_SUBSTITUTE_REF
, 0, 0, 0,
4036 * If we're parsing DTD content, the entity
4037 * might reference other entities which
4038 * weren't defined yet, so the check isn't
4041 if (ctxt
->inSubset
== 0) {
4042 ent
->flags
|= XML_ENT_CHECKED
;
4043 ent
->expandedSize
= ctxt
->sizeentcopy
;
4050 ent
->content
[0] = 0;
4053 if (xmlParserEntityCheck(ctxt
, oldCopy
))
4056 if (xmlParserEntityCheck(ctxt
, ent
->expandedSize
))
4062 * Just output the reference
4065 while (len
+ i
+ 10 > buf_size
) {
4066 growBuffer(buf
, i
+ 10);
4069 buf
[len
++] = *cur
++;
4074 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
4075 if ((len
!= 0) || (!normalize
)) {
4076 if ((!normalize
) || (!in_space
)) {
4077 COPY_BUF(l
,buf
,len
,0x20);
4078 while (len
+ 10 > buf_size
) {
4079 growBuffer(buf
, 10);
4086 COPY_BUF(l
,buf
,len
,c
);
4087 if (len
+ 10 > buf_size
) {
4088 growBuffer(buf
, 10);
4095 if (len
> maxLength
) {
4096 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4097 "AttValue length too long\n");
4101 if (ctxt
->instate
== XML_PARSER_EOF
)
4104 if ((in_space
) && (normalize
)) {
4105 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
4109 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
4110 } else if (RAW
!= limit
) {
4111 if ((c
!= 0) && (!IS_CHAR(c
))) {
4112 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
4113 "invalid character in attribute value\n");
4115 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4116 "AttValue: ' expected\n");
4121 if (attlen
!= NULL
) *attlen
= len
;
4125 xmlErrMemory(ctxt
, NULL
);
4136 * @ctxt: an XML parser context
4138 * DEPRECATED: Internal function, don't use.
4140 * parse a value for an attribute
4141 * Note: the parser won't do substitution of entities here, this
4142 * will be handled later in xmlStringGetNodeList
4144 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145 * "'" ([^<&'] | Reference)* "'"
4147 * 3.3.3 Attribute-Value Normalization:
4148 * Before the value of an attribute is passed to the application or
4149 * checked for validity, the XML processor must normalize it as follows:
4150 * - a character reference is processed by appending the referenced
4151 * character to the attribute value
4152 * - an entity reference is processed by recursively processing the
4153 * replacement text of the entity
4154 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155 * appending #x20 to the normalized value, except that only a single
4156 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4157 * parsed entity or the literal entity value of an internal parsed entity
4158 * - other characters are processed by appending them to the normalized value
4159 * If the declared value is not CDATA, then the XML processor must further
4160 * process the normalized attribute value by discarding any leading and
4161 * trailing space (#x20) characters, and by replacing sequences of space
4162 * (#x20) characters by a single space (#x20) character.
4163 * All attributes for which no declaration has been read should be treated
4164 * by a non-validating parser as if declared CDATA.
4166 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4171 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
4172 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
4173 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
4177 * xmlParseSystemLiteral:
4178 * @ctxt: an XML parser context
4180 * DEPRECATED: Internal function, don't use.
4182 * parse an XML Literal
4184 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4186 * Returns the SystemLiteral parsed or NULL
4190 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
4191 xmlChar
*buf
= NULL
;
4193 int size
= XML_PARSER_BUFFER_SIZE
;
4195 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4196 XML_MAX_TEXT_LENGTH
:
4197 XML_MAX_NAME_LENGTH
;
4199 int state
= ctxt
->instate
;
4204 } else if (RAW
== '\'') {
4208 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4212 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4214 xmlErrMemory(ctxt
, NULL
);
4217 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
4219 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
4220 if (len
+ 5 >= size
) {
4224 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
4227 xmlErrMemory(ctxt
, NULL
);
4228 ctxt
->instate
= (xmlParserInputState
) state
;
4233 COPY_BUF(l
,buf
,len
,cur
);
4234 if (len
> maxLength
) {
4235 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "SystemLiteral");
4237 ctxt
->instate
= (xmlParserInputState
) state
;
4244 if (ctxt
->instate
== XML_PARSER_EOF
) {
4248 ctxt
->instate
= (xmlParserInputState
) state
;
4249 if (!IS_CHAR(cur
)) {
4250 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4258 * xmlParsePubidLiteral:
4259 * @ctxt: an XML parser context
4261 * DEPRECATED: Internal function, don't use.
4263 * parse an XML public literal
4265 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4267 * Returns the PubidLiteral parsed or NULL.
4271 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4272 xmlChar
*buf
= NULL
;
4274 int size
= XML_PARSER_BUFFER_SIZE
;
4275 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4276 XML_MAX_TEXT_LENGTH
:
4277 XML_MAX_NAME_LENGTH
;
4280 xmlParserInputState oldstate
= ctxt
->instate
;
4285 } else if (RAW
== '\'') {
4289 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4292 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4294 xmlErrMemory(ctxt
, NULL
);
4297 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4299 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4300 if (len
+ 1 >= size
) {
4304 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
4306 xmlErrMemory(ctxt
, NULL
);
4313 if (len
> maxLength
) {
4314 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Public ID");
4322 if (ctxt
->instate
== XML_PARSER_EOF
) {
4327 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4331 ctxt
->instate
= oldstate
;
4335 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int partial
);
4338 * used for the test in the inner loop of the char data testing
4340 static const unsigned char test_char_data
[256] = {
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4376 * xmlParseCharDataInternal:
4377 * @ctxt: an XML parser context
4378 * @partial: buffer may contain partial UTF-8 sequences
4380 * Parse character data. Always makes progress if the first char isn't
4383 * The right angle bracket (>) may be represented using the string ">",
4384 * and must, for compatibility, be escaped using ">" or a character
4385 * reference when it appears in the string "]]>" in content, when that
4386 * string is not marking the end of a CDATA section.
4388 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4391 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt
, int partial
) {
4394 int line
= ctxt
->input
->line
;
4395 int col
= ctxt
->input
->col
;
4400 * Accelerated common case where input don't need to be
4401 * modified before passing it to the handler.
4403 in
= ctxt
->input
->cur
;
4406 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4409 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4411 } while (*in
== 0xA);
4412 goto get_more_space
;
4415 nbchar
= in
- ctxt
->input
->cur
;
4417 const xmlChar
*tmp
= ctxt
->input
->cur
;
4418 ctxt
->input
->cur
= in
;
4420 if ((ctxt
->sax
!= NULL
) &&
4421 (ctxt
->sax
->ignorableWhitespace
!=
4422 ctxt
->sax
->characters
)) {
4423 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4424 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4425 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4428 if (ctxt
->sax
->characters
!= NULL
)
4429 ctxt
->sax
->characters(ctxt
->userData
,
4431 if (*ctxt
->space
== -1)
4434 } else if ((ctxt
->sax
!= NULL
) &&
4435 (ctxt
->sax
->characters
!= NULL
)) {
4436 ctxt
->sax
->characters(ctxt
->userData
,
4444 ccol
= ctxt
->input
->col
;
4445 while (test_char_data
[*in
]) {
4449 ctxt
->input
->col
= ccol
;
4452 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4454 } while (*in
== 0xA);
4458 if ((in
[1] == ']') && (in
[2] == '>')) {
4459 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4460 if (ctxt
->instate
!= XML_PARSER_EOF
)
4461 ctxt
->input
->cur
= in
+ 1;
4468 nbchar
= in
- ctxt
->input
->cur
;
4470 if ((ctxt
->sax
!= NULL
) &&
4471 (ctxt
->sax
->ignorableWhitespace
!=
4472 ctxt
->sax
->characters
) &&
4473 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4474 const xmlChar
*tmp
= ctxt
->input
->cur
;
4475 ctxt
->input
->cur
= in
;
4477 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4478 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4479 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4482 if (ctxt
->sax
->characters
!= NULL
)
4483 ctxt
->sax
->characters(ctxt
->userData
,
4485 if (*ctxt
->space
== -1)
4488 line
= ctxt
->input
->line
;
4489 col
= ctxt
->input
->col
;
4490 } else if (ctxt
->sax
!= NULL
) {
4491 if (ctxt
->sax
->characters
!= NULL
)
4492 ctxt
->sax
->characters(ctxt
->userData
,
4493 ctxt
->input
->cur
, nbchar
);
4494 line
= ctxt
->input
->line
;
4495 col
= ctxt
->input
->col
;
4497 if (ctxt
->instate
== XML_PARSER_EOF
)
4500 ctxt
->input
->cur
= in
;
4504 ctxt
->input
->cur
= in
;
4506 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4507 continue; /* while */
4519 if (ctxt
->instate
== XML_PARSER_EOF
)
4521 in
= ctxt
->input
->cur
;
4522 } while (((*in
>= 0x20) && (*in
<= 0x7F)) ||
4523 (*in
== 0x09) || (*in
== 0x0a));
4524 ctxt
->input
->line
= line
;
4525 ctxt
->input
->col
= col
;
4526 xmlParseCharDataComplex(ctxt
, partial
);
4530 * xmlParseCharDataComplex:
4531 * @ctxt: an XML parser context
4532 * @cdata: int indicating whether we are within a CDATA section
4534 * Always makes progress if the first char isn't '<' or '&'.
4536 * parse a CharData section.this is the fallback function
4537 * of xmlParseCharData() when the parsing requires handling
4538 * of non-ASCII characters.
4541 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int partial
) {
4542 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4547 while ((cur
!= '<') && /* checked */
4549 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4550 if ((cur
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4551 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4553 COPY_BUF(l
,buf
,nbchar
,cur
);
4554 /* move current position before possible calling of ctxt->sax->characters */
4556 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4560 * OK the segment is to be consumed as chars.
4562 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4563 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4564 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4565 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4568 if (ctxt
->sax
->characters
!= NULL
)
4569 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4570 if ((ctxt
->sax
->characters
!=
4571 ctxt
->sax
->ignorableWhitespace
) &&
4572 (*ctxt
->space
== -1))
4577 /* something really bad happened in the SAX callback */
4578 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4584 if (ctxt
->instate
== XML_PARSER_EOF
)
4589 * OK the segment is to be consumed as chars.
4591 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4592 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4593 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4594 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4596 if (ctxt
->sax
->characters
!= NULL
)
4597 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4598 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4599 (*ctxt
->space
== -1))
4607 * - XML_PARSER_EOF or memory error. This is checked above.
4608 * - An actual 0 character.
4610 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4612 if (ctxt
->input
->cur
< ctxt
->input
->end
) {
4613 if ((cur
== 0) && (CUR
!= 0)) {
4615 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4616 "Incomplete UTF-8 sequence starting with %02X\n", CUR
);
4619 } else if ((cur
!= '<') && (cur
!= '&')) {
4620 /* Generate the error and skip the offending character */
4621 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4622 "PCDATA invalid Char value %d\n", cur
);
4630 * @ctxt: an XML parser context
4633 * DEPRECATED: Internal function, don't use.
4636 xmlParseCharData(xmlParserCtxtPtr ctxt
, ATTRIBUTE_UNUSED
int cdata
) {
4637 xmlParseCharDataInternal(ctxt
, 0);
4641 * xmlParseExternalID:
4642 * @ctxt: an XML parser context
4643 * @publicID: a xmlChar** receiving PubidLiteral
4644 * @strict: indicate whether we should restrict parsing to only
4645 * production [75], see NOTE below
4647 * DEPRECATED: Internal function, don't use.
4649 * Parse an External ID or a Public ID
4651 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4652 * 'PUBLIC' S PubidLiteral S SystemLiteral
4654 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4655 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4657 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4659 * Returns the function returns SystemLiteral and in the second
4660 * case publicID receives PubidLiteral, is strict is off
4661 * it is possible to return NULL and have publicID set.
4665 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4666 xmlChar
*URI
= NULL
;
4669 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4671 if (SKIP_BLANKS
== 0) {
4672 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4673 "Space required after 'SYSTEM'\n");
4675 URI
= xmlParseSystemLiteral(ctxt
);
4677 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4679 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4681 if (SKIP_BLANKS
== 0) {
4682 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4683 "Space required after 'PUBLIC'\n");
4685 *publicID
= xmlParsePubidLiteral(ctxt
);
4686 if (*publicID
== NULL
) {
4687 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4691 * We don't handle [83] so "S SystemLiteral" is required.
4693 if (SKIP_BLANKS
== 0) {
4694 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4695 "Space required after the Public Identifier\n");
4699 * We handle [83] so we return immediately, if
4700 * "S SystemLiteral" is not detected. We skip blanks if no
4701 * system literal was found, but this is harmless since we must
4702 * be at the end of a NotationDecl.
4704 if (SKIP_BLANKS
== 0) return(NULL
);
4705 if ((CUR
!= '\'') && (CUR
!= '"')) return(NULL
);
4707 URI
= xmlParseSystemLiteral(ctxt
);
4709 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4716 * xmlParseCommentComplex:
4717 * @ctxt: an XML parser context
4718 * @buf: the already parsed part of the buffer
4719 * @len: number of bytes in the buffer
4720 * @size: allocated size of the buffer
4722 * Skip an XML (SGML) comment <!-- .... -->
4723 * The spec says that "For compatibility, the string "--" (double-hyphen)
4724 * must not occur within comments. "
4725 * This is the slow routine in case the accelerator for ascii didn't work
4727 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4730 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
,
4731 size_t len
, size_t size
) {
4735 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4736 XML_MAX_HUGE_LENGTH
:
4737 XML_MAX_TEXT_LENGTH
;
4740 inputid
= ctxt
->input
->id
;
4744 size
= XML_PARSER_BUFFER_SIZE
;
4745 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4747 xmlErrMemory(ctxt
, NULL
);
4753 goto not_terminated
;
4755 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4756 "xmlParseComment: invalid xmlChar value %d\n",
4764 goto not_terminated
;
4766 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4767 "xmlParseComment: invalid xmlChar value %d\n",
4775 goto not_terminated
;
4776 while (IS_CHAR(cur
) && /* checked */
4778 (r
!= '-') || (q
!= '-'))) {
4779 if ((r
== '-') && (q
== '-')) {
4780 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4782 if (len
+ 5 >= size
) {
4786 new_size
= size
* 2;
4787 new_buf
= (xmlChar
*) xmlRealloc(buf
, new_size
);
4788 if (new_buf
== NULL
) {
4790 xmlErrMemory(ctxt
, NULL
);
4796 COPY_BUF(ql
,buf
,len
,q
);
4797 if (len
> maxLength
) {
4798 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4799 "Comment too big found", NULL
);
4814 if (ctxt
->instate
== XML_PARSER_EOF
) {
4819 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4820 "Comment not terminated \n<!--%.50s\n", buf
);
4821 } else if (!IS_CHAR(cur
)) {
4822 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4823 "xmlParseComment: invalid xmlChar value %d\n",
4826 if (inputid
!= ctxt
->input
->id
) {
4827 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4828 "Comment doesn't start and stop in the same"
4832 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4833 (!ctxt
->disableSAX
))
4834 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4839 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4840 "Comment not terminated\n", NULL
);
4847 * @ctxt: an XML parser context
4849 * DEPRECATED: Internal function, don't use.
4851 * Parse an XML (SGML) comment. Always consumes '<!'.
4853 * The spec says that "For compatibility, the string "--" (double-hyphen)
4854 * must not occur within comments. "
4856 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4859 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4860 xmlChar
*buf
= NULL
;
4861 size_t size
= XML_PARSER_BUFFER_SIZE
;
4863 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4864 XML_MAX_HUGE_LENGTH
:
4865 XML_MAX_TEXT_LENGTH
;
4866 xmlParserInputState state
;
4873 * Check that there is a comment right here.
4875 if ((RAW
!= '<') || (NXT(1) != '!'))
4878 if ((RAW
!= '-') || (NXT(1) != '-'))
4880 state
= ctxt
->instate
;
4881 ctxt
->instate
= XML_PARSER_COMMENT
;
4882 inputid
= ctxt
->input
->id
;
4887 * Accelerated common case where input don't need to be
4888 * modified before passing it to the handler.
4890 in
= ctxt
->input
->cur
;
4894 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4896 } while (*in
== 0xA);
4899 ccol
= ctxt
->input
->col
;
4900 while (((*in
> '-') && (*in
<= 0x7F)) ||
4901 ((*in
>= 0x20) && (*in
< '-')) ||
4906 ctxt
->input
->col
= ccol
;
4909 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4911 } while (*in
== 0xA);
4914 nbchar
= in
- ctxt
->input
->cur
;
4916 * save current set of data
4919 if ((ctxt
->sax
!= NULL
) &&
4920 (ctxt
->sax
->comment
!= NULL
)) {
4922 if ((*in
== '-') && (in
[1] == '-'))
4925 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4926 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4928 xmlErrMemory(ctxt
, NULL
);
4929 ctxt
->instate
= state
;
4933 } else if (len
+ nbchar
+ 1 >= size
) {
4935 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4936 new_buf
= (xmlChar
*) xmlRealloc(buf
, size
);
4937 if (new_buf
== NULL
) {
4939 xmlErrMemory(ctxt
, NULL
);
4940 ctxt
->instate
= state
;
4945 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4950 if (len
> maxLength
) {
4951 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4952 "Comment too big found", NULL
);
4956 ctxt
->input
->cur
= in
;
4959 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4964 ctxt
->input
->cur
= in
;
4966 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4973 if (ctxt
->instate
== XML_PARSER_EOF
) {
4977 in
= ctxt
->input
->cur
;
4981 if (ctxt
->input
->id
!= inputid
) {
4982 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4983 "comment doesn't start and stop in the"
4987 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4988 (!ctxt
->disableSAX
)) {
4990 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4992 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
4996 if (ctxt
->instate
!= XML_PARSER_EOF
)
4997 ctxt
->instate
= state
;
5001 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5002 "Double hyphen within comment: "
5006 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5007 "Double hyphen within comment\n", NULL
);
5008 if (ctxt
->instate
== XML_PARSER_EOF
) {
5019 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
5020 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
5021 ctxt
->instate
= state
;
5028 * @ctxt: an XML parser context
5030 * DEPRECATED: Internal function, don't use.
5032 * parse the name of a PI
5034 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5036 * Returns the PITarget name or NULL
5040 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
5041 const xmlChar
*name
;
5043 name
= xmlParseName(ctxt
);
5044 if ((name
!= NULL
) &&
5045 ((name
[0] == 'x') || (name
[0] == 'X')) &&
5046 ((name
[1] == 'm') || (name
[1] == 'M')) &&
5047 ((name
[2] == 'l') || (name
[2] == 'L'))) {
5049 if ((name
[0] == 'x') && (name
[1] == 'm') &&
5050 (name
[2] == 'l') && (name
[3] == 0)) {
5051 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5052 "XML declaration allowed only at the start of the document\n");
5054 } else if (name
[3] == 0) {
5055 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
5059 if (xmlW3CPIs
[i
] == NULL
) break;
5060 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
5063 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5064 "xmlParsePITarget: invalid name prefix 'xml'\n",
5067 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
5068 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5069 "colons are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
5074 #ifdef LIBXML_CATALOG_ENABLED
5076 * xmlParseCatalogPI:
5077 * @ctxt: an XML parser context
5078 * @catalog: the PI value string
5080 * parse an XML Catalog Processing Instruction.
5082 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5084 * Occurs only if allowed by the user and if happening in the Misc
5085 * part of the document before any doctype information
5086 * This will add the given catalog to the parsing context in order
5087 * to be used if there is a resolution need further down in the document
5091 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
5092 xmlChar
*URL
= NULL
;
5093 const xmlChar
*tmp
, *base
;
5097 while (IS_BLANK_CH(*tmp
)) tmp
++;
5098 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
5101 while (IS_BLANK_CH(*tmp
)) tmp
++;
5106 while (IS_BLANK_CH(*tmp
)) tmp
++;
5108 if ((marker
!= '\'') && (marker
!= '"'))
5112 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
5115 URL
= xmlStrndup(base
, tmp
- base
);
5117 while (IS_BLANK_CH(*tmp
)) tmp
++;
5122 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
5128 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
5129 "Catalog PI syntax error: %s\n",
5138 * @ctxt: an XML parser context
5140 * DEPRECATED: Internal function, don't use.
5142 * parse an XML Processing Instruction.
5144 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5146 * The processing is transferred to SAX once parsed.
5150 xmlParsePI(xmlParserCtxtPtr ctxt
) {
5151 xmlChar
*buf
= NULL
;
5153 size_t size
= XML_PARSER_BUFFER_SIZE
;
5154 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
5155 XML_MAX_HUGE_LENGTH
:
5156 XML_MAX_TEXT_LENGTH
;
5158 const xmlChar
*target
;
5159 xmlParserInputState state
;
5161 if ((RAW
== '<') && (NXT(1) == '?')) {
5162 int inputid
= ctxt
->input
->id
;
5163 state
= ctxt
->instate
;
5164 ctxt
->instate
= XML_PARSER_PI
;
5166 * this is a Processing Instruction.
5171 * Parse the target name and check for special support like
5174 target
= xmlParsePITarget(ctxt
);
5175 if (target
!= NULL
) {
5176 if ((RAW
== '?') && (NXT(1) == '>')) {
5177 if (inputid
!= ctxt
->input
->id
) {
5178 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5179 "PI declaration doesn't start and stop in"
5180 " the same entity\n");
5187 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5188 (ctxt
->sax
->processingInstruction
!= NULL
))
5189 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5191 if (ctxt
->instate
!= XML_PARSER_EOF
)
5192 ctxt
->instate
= state
;
5195 buf
= (xmlChar
*) xmlMallocAtomic(size
);
5197 xmlErrMemory(ctxt
, NULL
);
5198 ctxt
->instate
= state
;
5201 if (SKIP_BLANKS
== 0) {
5202 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
5203 "ParsePI: PI %s space expected\n", target
);
5206 while (IS_CHAR(cur
) && /* checked */
5207 ((cur
!= '?') || (NXT(1) != '>'))) {
5208 if (len
+ 5 >= size
) {
5210 size_t new_size
= size
* 2;
5211 tmp
= (xmlChar
*) xmlRealloc(buf
, new_size
);
5213 xmlErrMemory(ctxt
, NULL
);
5215 ctxt
->instate
= state
;
5221 COPY_BUF(l
,buf
,len
,cur
);
5222 if (len
> maxLength
) {
5223 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5224 "PI %s too big found", target
);
5226 ctxt
->instate
= state
;
5233 if (ctxt
->instate
== XML_PARSER_EOF
) {
5238 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5239 "ParsePI: PI %s never end ...\n", target
);
5241 if (inputid
!= ctxt
->input
->id
) {
5242 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5243 "PI declaration doesn't start and stop in"
5244 " the same entity\n");
5248 #ifdef LIBXML_CATALOG_ENABLED
5249 if (((state
== XML_PARSER_MISC
) ||
5250 (state
== XML_PARSER_START
)) &&
5251 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
5252 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
5253 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
5254 (allow
== XML_CATA_ALLOW_ALL
))
5255 xmlParseCatalogPI(ctxt
, buf
);
5263 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5264 (ctxt
->sax
->processingInstruction
!= NULL
))
5265 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5270 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
5272 if (ctxt
->instate
!= XML_PARSER_EOF
)
5273 ctxt
->instate
= state
;
5278 * xmlParseNotationDecl:
5279 * @ctxt: an XML parser context
5281 * DEPRECATED: Internal function, don't use.
5283 * Parse a notation declaration. Always consumes '<!'.
5285 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5287 * Hence there is actually 3 choices:
5288 * 'PUBLIC' S PubidLiteral
5289 * 'PUBLIC' S PubidLiteral S SystemLiteral
5290 * and 'SYSTEM' S SystemLiteral
5292 * See the NOTE on xmlParseExternalID().
5296 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
5297 const xmlChar
*name
;
5301 if ((CUR
!= '<') || (NXT(1) != '!'))
5305 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5306 int inputid
= ctxt
->input
->id
;
5308 if (SKIP_BLANKS
== 0) {
5309 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5310 "Space required after '<!NOTATION'\n");
5314 name
= xmlParseName(ctxt
);
5316 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5319 if (xmlStrchr(name
, ':') != NULL
) {
5320 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5321 "colons are forbidden from notation names '%s'\n",
5324 if (SKIP_BLANKS
== 0) {
5325 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5326 "Space required after the NOTATION name'\n");
5333 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5337 if (inputid
!= ctxt
->input
->id
) {
5338 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5339 "Notation declaration doesn't start and stop"
5340 " in the same entity\n");
5343 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5344 (ctxt
->sax
->notationDecl
!= NULL
))
5345 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5347 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5349 if (Systemid
!= NULL
) xmlFree(Systemid
);
5350 if (Pubid
!= NULL
) xmlFree(Pubid
);
5355 * xmlParseEntityDecl:
5356 * @ctxt: an XML parser context
5358 * DEPRECATED: Internal function, don't use.
5360 * Parse an entity declaration. Always consumes '<!'.
5362 * [70] EntityDecl ::= GEDecl | PEDecl
5364 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5366 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5368 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5370 * [74] PEDef ::= EntityValue | ExternalID
5372 * [76] NDataDecl ::= S 'NDATA' S Name
5374 * [ VC: Notation Declared ]
5375 * The Name must match the declared name of a notation.
5379 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5380 const xmlChar
*name
= NULL
;
5381 xmlChar
*value
= NULL
;
5382 xmlChar
*URI
= NULL
, *literal
= NULL
;
5383 const xmlChar
*ndata
= NULL
;
5384 int isParameter
= 0;
5385 xmlChar
*orig
= NULL
;
5387 if ((CUR
!= '<') || (NXT(1) != '!'))
5391 /* GROW; done in the caller */
5392 if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5393 int inputid
= ctxt
->input
->id
;
5395 if (SKIP_BLANKS
== 0) {
5396 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5397 "Space required after '<!ENTITY'\n");
5402 if (SKIP_BLANKS
== 0) {
5403 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5404 "Space required after '%%'\n");
5409 name
= xmlParseName(ctxt
);
5411 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5412 "xmlParseEntityDecl: no name\n");
5415 if (xmlStrchr(name
, ':') != NULL
) {
5416 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5417 "colons are forbidden from entities names '%s'\n",
5420 if (SKIP_BLANKS
== 0) {
5421 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5422 "Space required after the entity name\n");
5425 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5427 * handle the various case of definitions...
5430 if ((RAW
== '"') || (RAW
== '\'')) {
5431 value
= xmlParseEntityValue(ctxt
, &orig
);
5433 if ((ctxt
->sax
!= NULL
) &&
5434 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5435 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5436 XML_INTERNAL_PARAMETER_ENTITY
,
5440 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5441 if ((URI
== NULL
) && (literal
== NULL
)) {
5442 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5447 uri
= xmlParseURI((const char *) URI
);
5449 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5450 "Invalid URI: %s\n", URI
);
5452 * This really ought to be a well formedness error
5453 * but the XML Core WG decided otherwise c.f. issue
5454 * E26 of the XML erratas.
5457 if (uri
->fragment
!= NULL
) {
5459 * Okay this is foolish to block those but not
5462 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5464 if ((ctxt
->sax
!= NULL
) &&
5465 (!ctxt
->disableSAX
) &&
5466 (ctxt
->sax
->entityDecl
!= NULL
))
5467 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5468 XML_EXTERNAL_PARAMETER_ENTITY
,
5469 literal
, URI
, NULL
);
5476 if ((RAW
== '"') || (RAW
== '\'')) {
5477 value
= xmlParseEntityValue(ctxt
, &orig
);
5478 if ((ctxt
->sax
!= NULL
) &&
5479 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5480 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5481 XML_INTERNAL_GENERAL_ENTITY
,
5484 * For expat compatibility in SAX mode.
5486 if ((ctxt
->myDoc
== NULL
) ||
5487 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5488 if (ctxt
->myDoc
== NULL
) {
5489 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5490 if (ctxt
->myDoc
== NULL
) {
5491 xmlErrMemory(ctxt
, "New Doc failed");
5494 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5496 if (ctxt
->myDoc
->intSubset
== NULL
)
5497 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5498 BAD_CAST
"fake", NULL
, NULL
);
5500 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5504 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5505 if ((URI
== NULL
) && (literal
== NULL
)) {
5506 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5511 uri
= xmlParseURI((const char *)URI
);
5513 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5514 "Invalid URI: %s\n", URI
);
5516 * This really ought to be a well formedness error
5517 * but the XML Core WG decided otherwise c.f. issue
5518 * E26 of the XML erratas.
5521 if (uri
->fragment
!= NULL
) {
5523 * Okay this is foolish to block those but not
5526 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5531 if ((RAW
!= '>') && (SKIP_BLANKS
== 0)) {
5532 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5533 "Space required before 'NDATA'\n");
5535 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5537 if (SKIP_BLANKS
== 0) {
5538 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5539 "Space required after 'NDATA'\n");
5541 ndata
= xmlParseName(ctxt
);
5542 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5543 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5544 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5545 literal
, URI
, ndata
);
5547 if ((ctxt
->sax
!= NULL
) &&
5548 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5549 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5550 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5551 literal
, URI
, NULL
);
5553 * For expat compatibility in SAX mode.
5554 * assuming the entity replacement was asked for
5556 if ((ctxt
->replaceEntities
!= 0) &&
5557 ((ctxt
->myDoc
== NULL
) ||
5558 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5559 if (ctxt
->myDoc
== NULL
) {
5560 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5561 if (ctxt
->myDoc
== NULL
) {
5562 xmlErrMemory(ctxt
, "New Doc failed");
5565 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5568 if (ctxt
->myDoc
->intSubset
== NULL
)
5569 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5570 BAD_CAST
"fake", NULL
, NULL
);
5571 xmlSAX2EntityDecl(ctxt
, name
,
5572 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5573 literal
, URI
, NULL
);
5578 if (ctxt
->instate
== XML_PARSER_EOF
)
5582 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5583 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5584 xmlHaltParser(ctxt
);
5586 if (inputid
!= ctxt
->input
->id
) {
5587 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5588 "Entity declaration doesn't start and stop in"
5589 " the same entity\n");
5595 * Ugly mechanism to save the raw entity value.
5597 xmlEntityPtr cur
= NULL
;
5600 if ((ctxt
->sax
!= NULL
) &&
5601 (ctxt
->sax
->getParameterEntity
!= NULL
))
5602 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5604 if ((ctxt
->sax
!= NULL
) &&
5605 (ctxt
->sax
->getEntity
!= NULL
))
5606 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5607 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5608 cur
= xmlSAX2GetEntity(ctxt
, name
);
5611 if ((cur
!= NULL
) && (cur
->orig
== NULL
)) {
5618 if (value
!= NULL
) xmlFree(value
);
5619 if (URI
!= NULL
) xmlFree(URI
);
5620 if (literal
!= NULL
) xmlFree(literal
);
5621 if (orig
!= NULL
) xmlFree(orig
);
5626 * xmlParseDefaultDecl:
5627 * @ctxt: an XML parser context
5628 * @value: Receive a possible fixed default value for the attribute
5630 * DEPRECATED: Internal function, don't use.
5632 * Parse an attribute default declaration
5634 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5636 * [ VC: Required Attribute ]
5637 * if the default declaration is the keyword #REQUIRED, then the
5638 * attribute must be specified for all elements of the type in the
5639 * attribute-list declaration.
5641 * [ VC: Attribute Default Legal ]
5642 * The declared default value must meet the lexical constraints of
5643 * the declared attribute type c.f. xmlValidateAttributeDecl()
5645 * [ VC: Fixed Attribute Default ]
5646 * if an attribute has a default value declared with the #FIXED
5647 * keyword, instances of that attribute must match the default value.
5649 * [ WFC: No < in Attribute Values ]
5650 * handled in xmlParseAttValue()
5652 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5653 * or XML_ATTRIBUTE_FIXED.
5657 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5662 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5664 return(XML_ATTRIBUTE_REQUIRED
);
5666 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5668 return(XML_ATTRIBUTE_IMPLIED
);
5670 val
= XML_ATTRIBUTE_NONE
;
5671 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5673 val
= XML_ATTRIBUTE_FIXED
;
5674 if (SKIP_BLANKS
== 0) {
5675 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5676 "Space required after '#FIXED'\n");
5679 ret
= xmlParseAttValue(ctxt
);
5680 ctxt
->instate
= XML_PARSER_DTD
;
5682 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5683 "Attribute default value declaration error\n");
5690 * xmlParseNotationType:
5691 * @ctxt: an XML parser context
5693 * DEPRECATED: Internal function, don't use.
5695 * parse an Notation attribute type.
5697 * Note: the leading 'NOTATION' S part has already being parsed...
5699 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701 * [ VC: Notation Attributes ]
5702 * Values of this type must match one of the notation names included
5703 * in the declaration; all notation names in the declaration must be declared.
5705 * Returns: the notation attribute tree built while parsing
5709 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5710 const xmlChar
*name
;
5711 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5714 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5720 name
= xmlParseName(ctxt
);
5722 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5723 "Name expected in NOTATION declaration\n");
5724 xmlFreeEnumeration(ret
);
5728 while (tmp
!= NULL
) {
5729 if (xmlStrEqual(name
, tmp
->name
)) {
5730 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5731 "standalone: attribute notation value token %s duplicated\n",
5733 if (!xmlDictOwns(ctxt
->dict
, name
))
5734 xmlFree((xmlChar
*) name
);
5740 cur
= xmlCreateEnumeration(name
);
5742 xmlFreeEnumeration(ret
);
5745 if (last
== NULL
) ret
= last
= cur
;
5752 } while (RAW
== '|');
5754 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5755 xmlFreeEnumeration(ret
);
5763 * xmlParseEnumerationType:
5764 * @ctxt: an XML parser context
5766 * DEPRECATED: Internal function, don't use.
5768 * parse an Enumeration attribute type.
5770 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5772 * [ VC: Enumeration ]
5773 * Values of this type must match one of the Nmtoken tokens in
5776 * Returns: the enumeration attribute tree built while parsing
5780 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5782 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5785 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5791 name
= xmlParseNmtoken(ctxt
);
5793 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5797 while (tmp
!= NULL
) {
5798 if (xmlStrEqual(name
, tmp
->name
)) {
5799 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5800 "standalone: attribute enumeration value token %s duplicated\n",
5802 if (!xmlDictOwns(ctxt
->dict
, name
))
5809 cur
= xmlCreateEnumeration(name
);
5810 if (!xmlDictOwns(ctxt
->dict
, name
))
5813 xmlFreeEnumeration(ret
);
5816 if (last
== NULL
) ret
= last
= cur
;
5823 } while (RAW
== '|');
5825 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5833 * xmlParseEnumeratedType:
5834 * @ctxt: an XML parser context
5835 * @tree: the enumeration tree built while parsing
5837 * DEPRECATED: Internal function, don't use.
5839 * parse an Enumerated attribute type.
5841 * [57] EnumeratedType ::= NotationType | Enumeration
5843 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5846 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5850 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5851 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5853 if (SKIP_BLANKS
== 0) {
5854 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5855 "Space required after 'NOTATION'\n");
5858 *tree
= xmlParseNotationType(ctxt
);
5859 if (*tree
== NULL
) return(0);
5860 return(XML_ATTRIBUTE_NOTATION
);
5862 *tree
= xmlParseEnumerationType(ctxt
);
5863 if (*tree
== NULL
) return(0);
5864 return(XML_ATTRIBUTE_ENUMERATION
);
5868 * xmlParseAttributeType:
5869 * @ctxt: an XML parser context
5870 * @tree: the enumeration tree built while parsing
5872 * DEPRECATED: Internal function, don't use.
5874 * parse the Attribute list def for an element
5876 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5878 * [55] StringType ::= 'CDATA'
5880 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5881 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5883 * Validity constraints for attribute values syntax are checked in
5884 * xmlValidateAttributeValue()
5887 * Values of type ID must match the Name production. A name must not
5888 * appear more than once in an XML document as a value of this type;
5889 * i.e., ID values must uniquely identify the elements which bear them.
5891 * [ VC: One ID per Element Type ]
5892 * No element type may have more than one ID attribute specified.
5894 * [ VC: ID Attribute Default ]
5895 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5898 * Values of type IDREF must match the Name production, and values
5899 * of type IDREFS must match Names; each IDREF Name must match the value
5900 * of an ID attribute on some element in the XML document; i.e. IDREF
5901 * values must match the value of some ID attribute.
5903 * [ VC: Entity Name ]
5904 * Values of type ENTITY must match the Name production, values
5905 * of type ENTITIES must match Names; each Entity Name must match the
5906 * name of an unparsed entity declared in the DTD.
5908 * [ VC: Name Token ]
5909 * Values of type NMTOKEN must match the Nmtoken production; values
5910 * of type NMTOKENS must match Nmtokens.
5912 * Returns the attribute type
5915 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5916 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5918 return(XML_ATTRIBUTE_CDATA
);
5919 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5921 return(XML_ATTRIBUTE_IDREFS
);
5922 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5924 return(XML_ATTRIBUTE_IDREF
);
5925 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5927 return(XML_ATTRIBUTE_ID
);
5928 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5930 return(XML_ATTRIBUTE_ENTITY
);
5931 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5933 return(XML_ATTRIBUTE_ENTITIES
);
5934 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5936 return(XML_ATTRIBUTE_NMTOKENS
);
5937 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5939 return(XML_ATTRIBUTE_NMTOKEN
);
5941 return(xmlParseEnumeratedType(ctxt
, tree
));
5945 * xmlParseAttributeListDecl:
5946 * @ctxt: an XML parser context
5948 * DEPRECATED: Internal function, don't use.
5950 * Parse an attribute list declaration for an element. Always consumes '<!'.
5952 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5954 * [53] AttDef ::= S Name S AttType S DefaultDecl
5958 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5959 const xmlChar
*elemName
;
5960 const xmlChar
*attrName
;
5961 xmlEnumerationPtr tree
;
5963 if ((CUR
!= '<') || (NXT(1) != '!'))
5967 if (CMP7(CUR_PTR
, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5968 int inputid
= ctxt
->input
->id
;
5971 if (SKIP_BLANKS
== 0) {
5972 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5973 "Space required after '<!ATTLIST'\n");
5975 elemName
= xmlParseName(ctxt
);
5976 if (elemName
== NULL
) {
5977 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5978 "ATTLIST: no name for Element\n");
5983 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5986 xmlChar
*defaultValue
= NULL
;
5990 attrName
= xmlParseName(ctxt
);
5991 if (attrName
== NULL
) {
5992 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5993 "ATTLIST: no name for Attribute\n");
5997 if (SKIP_BLANKS
== 0) {
5998 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5999 "Space required after the attribute name\n");
6003 type
= xmlParseAttributeType(ctxt
, &tree
);
6009 if (SKIP_BLANKS
== 0) {
6010 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6011 "Space required after the attribute type\n");
6013 xmlFreeEnumeration(tree
);
6017 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
6019 if (defaultValue
!= NULL
)
6020 xmlFree(defaultValue
);
6022 xmlFreeEnumeration(tree
);
6025 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
6026 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
6030 if (SKIP_BLANKS
== 0) {
6031 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6032 "Space required after the attribute default value\n");
6033 if (defaultValue
!= NULL
)
6034 xmlFree(defaultValue
);
6036 xmlFreeEnumeration(tree
);
6040 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6041 (ctxt
->sax
->attributeDecl
!= NULL
))
6042 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
6043 type
, def
, defaultValue
, tree
);
6044 else if (tree
!= NULL
)
6045 xmlFreeEnumeration(tree
);
6047 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
6048 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
6049 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
6050 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
6053 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
6055 if (defaultValue
!= NULL
)
6056 xmlFree(defaultValue
);
6060 if (inputid
!= ctxt
->input
->id
) {
6061 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6062 "Attribute list declaration doesn't start and"
6063 " stop in the same entity\n");
6071 * xmlParseElementMixedContentDecl:
6072 * @ctxt: an XML parser context
6073 * @inputchk: the input used for the current entity, needed for boundary checks
6075 * DEPRECATED: Internal function, don't use.
6077 * parse the declaration for a Mixed Element content
6078 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6080 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6081 * '(' S? '#PCDATA' S? ')'
6083 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6085 * [ VC: No Duplicate Types ]
6086 * The same name must not appear more than once in a single
6087 * mixed-content declaration.
6089 * returns: the list of the xmlElementContentPtr describing the element choices
6091 xmlElementContentPtr
6092 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6093 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
6094 const xmlChar
*elem
= NULL
;
6097 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6101 if (ctxt
->input
->id
!= inputchk
) {
6102 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
6107 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6111 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6116 if ((RAW
== '(') || (RAW
== '|')) {
6117 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6118 if (ret
== NULL
) return(NULL
);
6120 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6123 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6125 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
6133 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6135 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6138 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6147 elem
= xmlParseName(ctxt
);
6149 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6150 "xmlParseElementMixedContentDecl : Name expected\n");
6151 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6157 if ((RAW
== ')') && (NXT(1) == '*')) {
6159 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
6160 XML_ELEMENT_CONTENT_ELEMENT
);
6161 if (cur
->c2
!= NULL
)
6162 cur
->c2
->parent
= cur
;
6165 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6166 if (ctxt
->input
->id
!= inputchk
) {
6167 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6168 "Element content declaration doesn't start and"
6169 " stop in the same entity\n");
6173 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6174 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
6179 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
6185 * xmlParseElementChildrenContentDeclPriv:
6186 * @ctxt: an XML parser context
6187 * @inputchk: the input used for the current entity, needed for boundary checks
6188 * @depth: the level of recursion
6190 * parse the declaration for a Mixed Element content
6191 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6194 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6196 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6198 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6200 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6202 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6203 * TODO Parameter-entity replacement text must be properly nested
6204 * with parenthesized groups. That is to say, if either of the
6205 * opening or closing parentheses in a choice, seq, or Mixed
6206 * construct is contained in the replacement text for a parameter
6207 * entity, both must be contained in the same replacement text. For
6208 * interoperability, if a parameter-entity reference appears in a
6209 * choice, seq, or Mixed construct, its replacement text should not
6210 * be empty, and neither the first nor last non-blank character of
6211 * the replacement text should be a connector (| or ,).
6213 * Returns the tree of xmlElementContentPtr describing the element
6216 static xmlElementContentPtr
6217 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
6219 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
6220 const xmlChar
*elem
;
6223 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
6225 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
6226 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6233 int inputid
= ctxt
->input
->id
;
6235 /* Recurse on first child */
6238 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6245 elem
= xmlParseName(ctxt
);
6247 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6250 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6252 xmlErrMemory(ctxt
, NULL
);
6257 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6259 } else if (RAW
== '*') {
6260 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6262 } else if (RAW
== '+') {
6263 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6266 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6271 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6273 * Each loop we parse one separator and one element.
6276 if (type
== 0) type
= CUR
;
6279 * Detect "Name | Name , Name" error
6281 else if (type
!= CUR
) {
6282 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6283 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6285 if ((last
!= NULL
) && (last
!= ret
))
6286 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6288 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6293 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
6295 if ((last
!= NULL
) && (last
!= ret
))
6296 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6297 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6315 } else if (RAW
== '|') {
6316 if (type
== 0) type
= CUR
;
6319 * Detect "Name , Name | Name" error
6321 else if (type
!= CUR
) {
6322 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6323 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6325 if ((last
!= NULL
) && (last
!= ret
))
6326 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6328 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6333 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6335 if ((last
!= NULL
) && (last
!= ret
))
6336 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6338 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6357 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6358 if ((last
!= NULL
) && (last
!= ret
))
6359 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6361 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6368 int inputid
= ctxt
->input
->id
;
6369 /* Recurse on second child */
6372 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6376 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6381 elem
= xmlParseName(ctxt
);
6383 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6385 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6388 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6391 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6395 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6397 } else if (RAW
== '*') {
6398 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6400 } else if (RAW
== '+') {
6401 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6404 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6410 if ((cur
!= NULL
) && (last
!= NULL
)) {
6415 if (ctxt
->input
->id
!= inputchk
) {
6416 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6417 "Element content declaration doesn't start and stop in"
6418 " the same entity\n");
6423 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6424 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6425 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6427 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6430 } else if (RAW
== '*') {
6432 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6435 * Some normalization:
6436 * (a | b* | c?)* == (a | b | c)*
6438 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6439 if ((cur
->c1
!= NULL
) &&
6440 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6441 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6442 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6443 if ((cur
->c2
!= NULL
) &&
6444 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6445 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6446 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6451 } else if (RAW
== '+') {
6455 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6456 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6457 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6459 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6461 * Some normalization:
6462 * (a | b*)+ == (a | b)*
6463 * (a | b?)+ == (a | b)*
6465 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6466 if ((cur
->c1
!= NULL
) &&
6467 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6468 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6469 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6472 if ((cur
->c2
!= NULL
) &&
6473 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6474 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6475 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6481 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6489 * xmlParseElementChildrenContentDecl:
6490 * @ctxt: an XML parser context
6491 * @inputchk: the input used for the current entity, needed for boundary checks
6493 * DEPRECATED: Internal function, don't use.
6495 * parse the declaration for a Mixed Element content
6496 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6498 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6500 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6502 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6504 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6506 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6507 * TODO Parameter-entity replacement text must be properly nested
6508 * with parenthesized groups. That is to say, if either of the
6509 * opening or closing parentheses in a choice, seq, or Mixed
6510 * construct is contained in the replacement text for a parameter
6511 * entity, both must be contained in the same replacement text. For
6512 * interoperability, if a parameter-entity reference appears in a
6513 * choice, seq, or Mixed construct, its replacement text should not
6514 * be empty, and neither the first nor last non-blank character of
6515 * the replacement text should be a connector (| or ,).
6517 * Returns the tree of xmlElementContentPtr describing the element
6520 xmlElementContentPtr
6521 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6522 /* stub left for API/ABI compat */
6523 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6527 * xmlParseElementContentDecl:
6528 * @ctxt: an XML parser context
6529 * @name: the name of the element being defined.
6530 * @result: the Element Content pointer will be stored here if any
6532 * DEPRECATED: Internal function, don't use.
6534 * parse the declaration for an Element content either Mixed or Children,
6535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6539 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6544 xmlElementContentPtr
*result
) {
6546 xmlElementContentPtr tree
= NULL
;
6547 int inputid
= ctxt
->input
->id
;
6553 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6554 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6559 if (ctxt
->instate
== XML_PARSER_EOF
)
6562 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6563 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6564 res
= XML_ELEMENT_TYPE_MIXED
;
6566 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6567 res
= XML_ELEMENT_TYPE_ELEMENT
;
6575 * xmlParseElementDecl:
6576 * @ctxt: an XML parser context
6578 * DEPRECATED: Internal function, don't use.
6580 * Parse an element declaration. Always consumes '<!'.
6582 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6584 * [ VC: Unique Element Type Declaration ]
6585 * No element type may be declared more than once
6587 * Returns the type of the element, or -1 in case of error
6590 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6591 const xmlChar
*name
;
6593 xmlElementContentPtr content
= NULL
;
6595 if ((CUR
!= '<') || (NXT(1) != '!'))
6599 /* GROW; done in the caller */
6600 if (CMP7(CUR_PTR
, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6601 int inputid
= ctxt
->input
->id
;
6604 if (SKIP_BLANKS
== 0) {
6605 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6606 "Space required after 'ELEMENT'\n");
6609 name
= xmlParseName(ctxt
);
6611 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6612 "xmlParseElementDecl: no name for Element\n");
6615 if (SKIP_BLANKS
== 0) {
6616 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6617 "Space required after the element name\n");
6619 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6622 * Element must always be empty.
6624 ret
= XML_ELEMENT_TYPE_EMPTY
;
6625 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6629 * Element is a generic container.
6631 ret
= XML_ELEMENT_TYPE_ANY
;
6632 } else if (RAW
== '(') {
6633 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6636 * [ WFC: PEs in Internal Subset ] error handling.
6638 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6639 (ctxt
->inputNr
== 1)) {
6640 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6641 "PEReference: forbidden within markup decl in internal subset\n");
6643 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6644 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6652 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6653 if (content
!= NULL
) {
6654 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6657 if (inputid
!= ctxt
->input
->id
) {
6658 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6659 "Element declaration doesn't start and stop in"
6660 " the same entity\n");
6664 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6665 (ctxt
->sax
->elementDecl
!= NULL
)) {
6666 if (content
!= NULL
)
6667 content
->parent
= NULL
;
6668 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6670 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6672 * this is a trick: if xmlAddElementDecl is called,
6673 * instead of copying the full tree it is plugged directly
6674 * if called from the parser. Avoid duplicating the
6675 * interfaces or change the API/ABI
6677 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6679 } else if (content
!= NULL
) {
6680 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6688 * xmlParseConditionalSections
6689 * @ctxt: an XML parser context
6691 * Parse a conditional section. Always consumes '<!['.
6693 * [61] conditionalSect ::= includeSect | ignoreSect
6694 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6695 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6696 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6697 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6701 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6702 int *inputIds
= NULL
;
6703 size_t inputIdsSize
= 0;
6706 while (ctxt
->instate
!= XML_PARSER_EOF
) {
6707 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6708 int id
= ctxt
->input
->id
;
6713 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6717 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6718 xmlHaltParser(ctxt
);
6721 if (ctxt
->input
->id
!= id
) {
6722 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6723 "All markup of the conditional section is"
6724 " not in the same entity\n");
6728 if (inputIdsSize
<= depth
) {
6731 inputIdsSize
= (inputIdsSize
== 0 ? 4 : inputIdsSize
* 2);
6732 tmp
= (int *) xmlRealloc(inputIds
,
6733 inputIdsSize
* sizeof(int));
6735 xmlErrMemory(ctxt
, NULL
);
6740 inputIds
[depth
] = id
;
6742 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6743 size_t ignoreDepth
= 0;
6748 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6749 xmlHaltParser(ctxt
);
6752 if (ctxt
->input
->id
!= id
) {
6753 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6754 "All markup of the conditional section is"
6755 " not in the same entity\n");
6760 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6763 /* Check for integer overflow */
6764 if (ignoreDepth
== 0) {
6765 xmlErrMemory(ctxt
, NULL
);
6768 } else if ((RAW
== ']') && (NXT(1) == ']') &&
6770 if (ignoreDepth
== 0)
6780 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6783 if (ctxt
->input
->id
!= id
) {
6784 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6785 "All markup of the conditional section is"
6786 " not in the same entity\n");
6790 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6791 xmlHaltParser(ctxt
);
6794 } else if ((depth
> 0) &&
6795 (RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6797 if (ctxt
->input
->id
!= inputIds
[depth
]) {
6798 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6799 "All markup of the conditional section is not"
6800 " in the same entity\n");
6803 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6804 xmlParseMarkupDecl(ctxt
);
6806 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6807 xmlHaltParser(ctxt
);
6824 * xmlParseMarkupDecl:
6825 * @ctxt: an XML parser context
6827 * DEPRECATED: Internal function, don't use.
6829 * Parse markup declarations. Always consumes '<!' or '<?'.
6831 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6832 * NotationDecl | PI | Comment
6834 * [ VC: Proper Declaration/PE Nesting ]
6835 * Parameter-entity replacement text must be properly nested with
6836 * markup declarations. That is to say, if either the first character
6837 * or the last character of a markup declaration (markupdecl above) is
6838 * contained in the replacement text for a parameter-entity reference,
6839 * both must be contained in the same replacement text.
6841 * [ WFC: PEs in Internal Subset ]
6842 * In the internal DTD subset, parameter-entity references can occur
6843 * only where markup declarations can occur, not within markup declarations.
6844 * (This does not apply to references that occur in external parameter
6845 * entities or to the external subset.)
6848 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6851 if (NXT(1) == '!') {
6855 xmlParseElementDecl(ctxt
);
6856 else if (NXT(3) == 'N')
6857 xmlParseEntityDecl(ctxt
);
6862 xmlParseAttributeListDecl(ctxt
);
6865 xmlParseNotationDecl(ctxt
);
6868 xmlParseComment(ctxt
);
6871 /* there is an error but it will be detected later */
6875 } else if (NXT(1) == '?') {
6881 * detect requirement to exit there and act accordingly
6882 * and avoid having instate overridden later on
6884 if (ctxt
->instate
== XML_PARSER_EOF
)
6887 ctxt
->instate
= XML_PARSER_DTD
;
6892 * @ctxt: an XML parser context
6894 * DEPRECATED: Internal function, don't use.
6896 * parse an XML declaration header for external entities
6898 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6902 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6904 const xmlChar
*encoding
;
6908 * We know that '<?xml' is here.
6910 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6913 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6917 /* Avoid expansion of parameter entities when skipping blanks. */
6918 oldstate
= ctxt
->instate
;
6919 ctxt
->instate
= XML_PARSER_START
;
6921 if (SKIP_BLANKS
== 0) {
6922 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6923 "Space needed after '<?xml'\n");
6927 * We may have the VersionInfo here.
6929 version
= xmlParseVersionInfo(ctxt
);
6930 if (version
== NULL
)
6931 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6933 if (SKIP_BLANKS
== 0) {
6934 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6935 "Space needed here\n");
6938 ctxt
->input
->version
= version
;
6941 * We must have the encoding declaration
6943 encoding
= xmlParseEncodingDecl(ctxt
);
6944 if (ctxt
->instate
== XML_PARSER_EOF
)
6946 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6948 * The XML REC instructs us to stop parsing right here
6950 ctxt
->instate
= oldstate
;
6953 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6954 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6955 "Missing encoding in text declaration\n");
6959 if ((RAW
== '?') && (NXT(1) == '>')) {
6961 } else if (RAW
== '>') {
6962 /* Deprecated old WD ... */
6963 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6968 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6969 while ((c
= CUR
) != 0) {
6976 ctxt
->instate
= oldstate
;
6980 * xmlParseExternalSubset:
6981 * @ctxt: an XML parser context
6982 * @ExternalID: the external identifier
6983 * @SystemID: the system identifier (or URL)
6985 * parse Markup declarations from an external subset
6987 * [30] extSubset ::= textDecl? extSubsetDecl
6989 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6992 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6993 const xmlChar
*SystemID
) {
6994 xmlDetectSAX2(ctxt
);
6997 if ((ctxt
->encoding
== NULL
) &&
6998 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
7000 xmlCharEncoding enc
;
7006 enc
= xmlDetectCharEncoding(start
, 4);
7007 if (enc
!= XML_CHAR_ENCODING_NONE
)
7008 xmlSwitchEncoding(ctxt
, enc
);
7011 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
7012 xmlParseTextDecl(ctxt
);
7013 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7015 * The XML REC instructs us to stop parsing right here
7017 xmlHaltParser(ctxt
);
7021 if (ctxt
->myDoc
== NULL
) {
7022 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
7023 if (ctxt
->myDoc
== NULL
) {
7024 xmlErrMemory(ctxt
, "New Doc failed");
7027 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
7029 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
7030 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
7032 ctxt
->instate
= XML_PARSER_DTD
;
7035 while ((ctxt
->instate
!= XML_PARSER_EOF
) && (RAW
!= 0)) {
7037 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7038 xmlParseConditionalSections(ctxt
);
7039 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7040 xmlParseMarkupDecl(ctxt
);
7042 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7043 xmlHaltParser(ctxt
);
7051 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7057 * xmlParseReference:
7058 * @ctxt: an XML parser context
7060 * DEPRECATED: Internal function, don't use.
7062 * parse and handle entity references in content, depending on the SAX
7063 * interface, this may end-up in a call to character() if this is a
7064 * CharRef, a predefined entity, if there is no reference() callback.
7065 * or if the parser was asked to switch to that mode.
7067 * Always consumes '&'.
7069 * [67] Reference ::= EntityRef | CharRef
7072 xmlParseReference(xmlParserCtxtPtr ctxt
) {
7076 xmlNodePtr list
= NULL
;
7077 xmlParserErrors ret
= XML_ERR_OK
;
7084 * Simple case of a CharRef
7086 if (NXT(1) == '#') {
7090 int value
= xmlParseCharRef(ctxt
);
7094 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
7096 * So we are using non-UTF-8 buffers
7097 * Check that the char fit on 8bits, if not
7098 * generate a CharRef.
7100 if (value
<= 0xFF) {
7103 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7104 (!ctxt
->disableSAX
))
7105 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
7107 if ((hex
== 'x') || (hex
== 'X'))
7108 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
7110 snprintf((char *)out
, sizeof(out
), "#%d", value
);
7111 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7112 (!ctxt
->disableSAX
))
7113 ctxt
->sax
->reference(ctxt
->userData
, out
);
7117 * Just encode the value in UTF-8
7119 COPY_BUF(0 ,out
, i
, value
);
7121 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7122 (!ctxt
->disableSAX
))
7123 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
7129 * We are seeing an entity reference
7131 ent
= xmlParseEntityRef(ctxt
);
7132 if (ent
== NULL
) return;
7133 if (!ctxt
->wellFormed
)
7135 was_checked
= ent
->flags
& XML_ENT_PARSED
;
7137 /* special case of predefined entities */
7138 if ((ent
->name
== NULL
) ||
7139 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
7141 if (val
== NULL
) return;
7143 * inline the entity.
7145 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7146 (!ctxt
->disableSAX
))
7147 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
7152 * The first reference to the entity trigger a parsing phase
7153 * where the ent->children is filled with the result from
7155 * Note: external parsed entities will not be loaded, it is not
7156 * required for a non-validating parser, unless the parsing option
7157 * of validating, or substituting entities were given. Doing so is
7158 * far more secure as the parser will only process data coming from
7159 * the document entity by default.
7161 if (((ent
->flags
& XML_ENT_PARSED
) == 0) &&
7162 ((ent
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
) ||
7163 (ctxt
->options
& (XML_PARSE_NOENT
| XML_PARSE_DTDVALID
)))) {
7164 unsigned long oldsizeentcopy
= ctxt
->sizeentcopy
;
7167 * This is a bit hackish but this seems the best
7168 * way to make sure both SAX and DOM entity support
7172 if (ctxt
->userData
== ctxt
)
7175 user_data
= ctxt
->userData
;
7177 /* Avoid overflow as much as possible */
7178 ctxt
->sizeentcopy
= 0;
7180 if (ent
->flags
& XML_ENT_EXPANDING
) {
7181 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7182 xmlHaltParser(ctxt
);
7186 ent
->flags
|= XML_ENT_EXPANDING
;
7189 * Check that this entity is well formed
7190 * 4.3.2: An internal general parsed entity is well-formed
7191 * if its replacement text matches the production labeled
7194 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7196 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
7200 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7202 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
7203 user_data
, ctxt
->depth
, ent
->URI
,
7204 ent
->ExternalID
, &list
);
7207 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7208 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7209 "invalid entity type found\n", NULL
);
7212 ent
->flags
&= ~XML_ENT_EXPANDING
;
7213 ent
->flags
|= XML_ENT_PARSED
| XML_ENT_CHECKED
;
7214 ent
->expandedSize
= ctxt
->sizeentcopy
;
7215 if (ret
== XML_ERR_ENTITY_LOOP
) {
7216 xmlHaltParser(ctxt
);
7217 xmlFreeNodeList(list
);
7220 if (xmlParserEntityCheck(ctxt
, oldsizeentcopy
)) {
7221 xmlFreeNodeList(list
);
7225 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
7226 ent
->children
= list
;
7228 * Prune it directly in the generated document
7229 * except for single text nodes.
7231 if ((ctxt
->replaceEntities
== 0) ||
7232 (ctxt
->parseMode
== XML_PARSE_READER
) ||
7233 ((list
->type
== XML_TEXT_NODE
) &&
7234 (list
->next
== NULL
))) {
7236 while (list
!= NULL
) {
7237 list
->parent
= (xmlNodePtr
) ent
;
7238 if (list
->doc
!= ent
->doc
)
7239 xmlSetTreeDoc(list
, ent
->doc
);
7240 if (list
->next
== NULL
)
7247 while (list
!= NULL
) {
7248 list
->parent
= (xmlNodePtr
) ctxt
->node
;
7249 list
->doc
= ctxt
->myDoc
;
7250 if (list
->next
== NULL
)
7254 list
= ent
->children
;
7255 #ifdef LIBXML_LEGACY_ENABLED
7256 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7257 xmlAddEntityReference(ent
, list
, NULL
);
7258 #endif /* LIBXML_LEGACY_ENABLED */
7260 } else if ((ret
!= XML_ERR_OK
) &&
7261 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
7262 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7263 "Entity '%s' failed to parse\n", ent
->name
);
7264 if (ent
->content
!= NULL
)
7265 ent
->content
[0] = 0;
7266 } else if (list
!= NULL
) {
7267 xmlFreeNodeList(list
);
7271 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7276 * Now that the entity content has been gathered
7277 * provide it to the application, this can take different forms based
7278 * on the parsing modes.
7280 if (ent
->children
== NULL
) {
7282 * Probably running in SAX mode and the callbacks don't
7283 * build the entity content. So unless we already went
7284 * though parsing for first checking go though the entity
7285 * content to generate callbacks associated to the entity
7287 if (was_checked
!= 0) {
7290 * This is a bit hackish but this seems the best
7291 * way to make sure both SAX and DOM entity support
7294 if (ctxt
->userData
== ctxt
)
7297 user_data
= ctxt
->userData
;
7299 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7301 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
7302 ent
->content
, user_data
, NULL
);
7304 } else if (ent
->etype
==
7305 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7306 unsigned long oldsizeentities
= ctxt
->sizeentities
;
7309 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7310 ctxt
->sax
, user_data
, ctxt
->depth
,
7311 ent
->URI
, ent
->ExternalID
, NULL
);
7314 /* Undo the change to sizeentities */
7315 ctxt
->sizeentities
= oldsizeentities
;
7317 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7318 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7319 "invalid entity type found\n", NULL
);
7321 if (ret
== XML_ERR_ENTITY_LOOP
) {
7322 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7325 if (xmlParserEntityCheck(ctxt
, 0))
7328 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7329 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7331 * Entity reference callback comes second, it's somewhat
7332 * superfluous but a compatibility to historical behaviour
7334 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7340 * We also check for amplification if entities aren't substituted.
7341 * They might be expanded later.
7343 if ((was_checked
!= 0) &&
7344 (xmlParserEntityCheck(ctxt
, ent
->expandedSize
)))
7348 * If we didn't get any children for the entity being built
7350 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7351 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7355 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7359 if (ctxt
->replaceEntities
) {
7361 * There is a problem on the handling of _private for entities
7362 * (bug 155816): Should we copy the content of the field from
7363 * the entity (possibly overwriting some value set by the user
7364 * when a copy is created), should we leave it alone, or should
7365 * we try to take care of different situations? The problem
7366 * is exacerbated by the usage of this field by the xmlReader.
7367 * To fix this bug, we look at _private on the created node
7368 * and, if it's NULL, we copy in whatever was in the entity.
7369 * If it's not NULL we leave it alone. This is somewhat of a
7370 * hack - maybe we should have further tests to determine
7373 if (ctxt
->node
!= NULL
) {
7375 * Seems we are generating the DOM content, do
7376 * a simple tree copy for all references except the first
7377 * In the first occurrence list contains the replacement.
7379 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7380 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7381 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7384 * when operating on a reader, the entities definitions
7385 * are always owning the entities subtree.
7386 if (ctxt->parseMode == XML_PARSE_READER)
7390 cur
= ent
->children
;
7391 while (cur
!= NULL
) {
7392 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7394 if (nw
->_private
== NULL
)
7395 nw
->_private
= cur
->_private
;
7396 if (firstChild
== NULL
){
7399 nw
= xmlAddChild(ctxt
->node
, nw
);
7401 if (cur
== ent
->last
) {
7403 * needed to detect some strange empty
7404 * node cases in the reader tests
7406 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7408 (nw
->type
== XML_ELEMENT_NODE
) &&
7409 (nw
->children
== NULL
))
7416 #ifdef LIBXML_LEGACY_ENABLED
7417 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7418 xmlAddEntityReference(ent
, firstChild
, nw
);
7419 #endif /* LIBXML_LEGACY_ENABLED */
7420 } else if ((list
== NULL
) || (ctxt
->inputNr
> 0)) {
7421 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7425 * Copy the entity child list and make it the new
7426 * entity child list. The goal is to make sure any
7427 * ID or REF referenced will be the one from the
7428 * document content and not the entity copy.
7430 cur
= ent
->children
;
7431 ent
->children
= NULL
;
7434 while (cur
!= NULL
) {
7438 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7440 if (nw
->_private
== NULL
)
7441 nw
->_private
= cur
->_private
;
7442 if (firstChild
== NULL
){
7445 xmlAddChild((xmlNodePtr
) ent
, nw
);
7447 xmlAddChild(ctxt
->node
, cur
);
7452 if (ent
->owner
== 0)
7454 #ifdef LIBXML_LEGACY_ENABLED
7455 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7456 xmlAddEntityReference(ent
, firstChild
, nw
);
7457 #endif /* LIBXML_LEGACY_ENABLED */
7459 const xmlChar
*nbktext
;
7462 * the name change is to avoid coalescing of the
7463 * node with a possible previous text one which
7464 * would make ent->children a dangling pointer
7466 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7468 if (ent
->children
->type
== XML_TEXT_NODE
)
7469 ent
->children
->name
= nbktext
;
7470 if ((ent
->last
!= ent
->children
) &&
7471 (ent
->last
->type
== XML_TEXT_NODE
))
7472 ent
->last
->name
= nbktext
;
7473 xmlAddChildList(ctxt
->node
, ent
->children
);
7477 * This is to avoid a nasty side effect, see
7478 * characters() in SAX.c
7488 * xmlParseEntityRef:
7489 * @ctxt: an XML parser context
7491 * DEPRECATED: Internal function, don't use.
7493 * Parse an entitiy reference. Always consumes '&'.
7495 * [68] EntityRef ::= '&' Name ';'
7497 * [ WFC: Entity Declared ]
7498 * In a document without any DTD, a document with only an internal DTD
7499 * subset which contains no parameter entity references, or a document
7500 * with "standalone='yes'", the Name given in the entity reference
7501 * must match that in an entity declaration, except that well-formed
7502 * documents need not declare any of the following entities: amp, lt,
7503 * gt, apos, quot. The declaration of a parameter entity must precede
7504 * any reference to it. Similarly, the declaration of a general entity
7505 * must precede any reference to it which appears in a default value in an
7506 * attribute-list declaration. Note that if entities are declared in the
7507 * external subset or in external parameter entities, a non-validating
7508 * processor is not obligated to read and process their declarations;
7509 * for such documents, the rule that an entity must be declared is a
7510 * well-formedness constraint only if standalone='yes'.
7512 * [ WFC: Parsed Entity ]
7513 * An entity reference must not contain the name of an unparsed entity
7515 * Returns the xmlEntityPtr if found, or NULL otherwise.
7518 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7519 const xmlChar
*name
;
7520 xmlEntityPtr ent
= NULL
;
7523 if (ctxt
->instate
== XML_PARSER_EOF
)
7529 name
= xmlParseName(ctxt
);
7531 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7532 "xmlParseEntityRef: no name\n");
7536 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7542 * Predefined entities override any extra definition
7544 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7545 ent
= xmlGetPredefinedEntity(name
);
7551 * Ask first SAX for entity resolution, otherwise try the
7552 * entities which may have stored in the parser context.
7554 if (ctxt
->sax
!= NULL
) {
7555 if (ctxt
->sax
->getEntity
!= NULL
)
7556 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7557 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7558 (ctxt
->options
& XML_PARSE_OLDSAX
))
7559 ent
= xmlGetPredefinedEntity(name
);
7560 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7561 (ctxt
->userData
==ctxt
)) {
7562 ent
= xmlSAX2GetEntity(ctxt
, name
);
7565 if (ctxt
->instate
== XML_PARSER_EOF
)
7568 * [ WFC: Entity Declared ]
7569 * In a document without any DTD, a document with only an
7570 * internal DTD subset which contains no parameter entity
7571 * references, or a document with "standalone='yes'", the
7572 * Name given in the entity reference must match that in an
7573 * entity declaration, except that well-formed documents
7574 * need not declare any of the following entities: amp, lt,
7576 * The declaration of a parameter entity must precede any
7578 * Similarly, the declaration of a general entity must
7579 * precede any reference to it which appears in a default
7580 * value in an attribute-list declaration. Note that if
7581 * entities are declared in the external subset or in
7582 * external parameter entities, a non-validating processor
7583 * is not obligated to read and process their declarations;
7584 * for such documents, the rule that an entity must be
7585 * declared is a well-formedness constraint only if
7589 if ((ctxt
->standalone
== 1) ||
7590 ((ctxt
->hasExternalSubset
== 0) &&
7591 (ctxt
->hasPErefs
== 0))) {
7592 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7593 "Entity '%s' not defined\n", name
);
7595 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7596 "Entity '%s' not defined\n", name
);
7597 if ((ctxt
->inSubset
== 0) &&
7598 (ctxt
->sax
!= NULL
) &&
7599 (ctxt
->sax
->reference
!= NULL
)) {
7600 ctxt
->sax
->reference(ctxt
->userData
, name
);
7607 * [ WFC: Parsed Entity ]
7608 * An entity reference must not contain the name of an
7611 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7612 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7613 "Entity reference to unparsed entity %s\n", name
);
7617 * [ WFC: No External Entity References ]
7618 * Attribute values cannot contain direct or indirect
7619 * entity references to external entities.
7621 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7622 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7623 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7624 "Attribute references external entity '%s'\n", name
);
7627 * [ WFC: No < in Attribute Values ]
7628 * The replacement text of any entity referred to directly or
7629 * indirectly in an attribute value (other than "<") must
7632 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7633 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7634 if ((ent
->flags
& XML_ENT_CHECKED_LT
) == 0) {
7635 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7636 ent
->flags
|= XML_ENT_CONTAINS_LT
;
7637 ent
->flags
|= XML_ENT_CHECKED_LT
;
7639 if (ent
->flags
& XML_ENT_CONTAINS_LT
)
7640 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7641 "'<' in entity '%s' is not allowed in attributes "
7646 * Internal check, no parameter entities here ...
7649 switch (ent
->etype
) {
7650 case XML_INTERNAL_PARAMETER_ENTITY
:
7651 case XML_EXTERNAL_PARAMETER_ENTITY
:
7652 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7653 "Attempt to reference the parameter entity '%s'\n",
7662 * [ WFC: No Recursion ]
7663 * A parsed entity must not contain a recursive reference
7664 * to itself, either directly or indirectly.
7665 * Done somewhere else
7671 * xmlParseStringEntityRef:
7672 * @ctxt: an XML parser context
7673 * @str: a pointer to an index in the string
7675 * parse ENTITY references declarations, but this version parses it from
7678 * [68] EntityRef ::= '&' Name ';'
7680 * [ WFC: Entity Declared ]
7681 * In a document without any DTD, a document with only an internal DTD
7682 * subset which contains no parameter entity references, or a document
7683 * with "standalone='yes'", the Name given in the entity reference
7684 * must match that in an entity declaration, except that well-formed
7685 * documents need not declare any of the following entities: amp, lt,
7686 * gt, apos, quot. The declaration of a parameter entity must precede
7687 * any reference to it. Similarly, the declaration of a general entity
7688 * must precede any reference to it which appears in a default value in an
7689 * attribute-list declaration. Note that if entities are declared in the
7690 * external subset or in external parameter entities, a non-validating
7691 * processor is not obligated to read and process their declarations;
7692 * for such documents, the rule that an entity must be declared is a
7693 * well-formedness constraint only if standalone='yes'.
7695 * [ WFC: Parsed Entity ]
7696 * An entity reference must not contain the name of an unparsed entity
7698 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7699 * is updated to the current location in the string.
7702 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7706 xmlEntityPtr ent
= NULL
;
7708 if ((str
== NULL
) || (*str
== NULL
))
7716 name
= xmlParseStringName(ctxt
, &ptr
);
7718 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7719 "xmlParseStringEntityRef: no name\n");
7724 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7733 * Predefined entities override any extra definition
7735 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7736 ent
= xmlGetPredefinedEntity(name
);
7745 * Ask first SAX for entity resolution, otherwise try the
7746 * entities which may have stored in the parser context.
7748 if (ctxt
->sax
!= NULL
) {
7749 if (ctxt
->sax
->getEntity
!= NULL
)
7750 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7751 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7752 ent
= xmlGetPredefinedEntity(name
);
7753 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7754 ent
= xmlSAX2GetEntity(ctxt
, name
);
7757 if (ctxt
->instate
== XML_PARSER_EOF
) {
7763 * [ WFC: Entity Declared ]
7764 * In a document without any DTD, a document with only an
7765 * internal DTD subset which contains no parameter entity
7766 * references, or a document with "standalone='yes'", the
7767 * Name given in the entity reference must match that in an
7768 * entity declaration, except that well-formed documents
7769 * need not declare any of the following entities: amp, lt,
7771 * The declaration of a parameter entity must precede any
7773 * Similarly, the declaration of a general entity must
7774 * precede any reference to it which appears in a default
7775 * value in an attribute-list declaration. Note that if
7776 * entities are declared in the external subset or in
7777 * external parameter entities, a non-validating processor
7778 * is not obligated to read and process their declarations;
7779 * for such documents, the rule that an entity must be
7780 * declared is a well-formedness constraint only if
7784 if ((ctxt
->standalone
== 1) ||
7785 ((ctxt
->hasExternalSubset
== 0) &&
7786 (ctxt
->hasPErefs
== 0))) {
7787 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7788 "Entity '%s' not defined\n", name
);
7790 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7791 "Entity '%s' not defined\n",
7794 /* TODO ? check regressions ctxt->valid = 0; */
7798 * [ WFC: Parsed Entity ]
7799 * An entity reference must not contain the name of an
7802 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7803 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7804 "Entity reference to unparsed entity %s\n", name
);
7808 * [ WFC: No External Entity References ]
7809 * Attribute values cannot contain direct or indirect
7810 * entity references to external entities.
7812 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7813 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7814 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7815 "Attribute references external entity '%s'\n", name
);
7818 * [ WFC: No < in Attribute Values ]
7819 * The replacement text of any entity referred to directly or
7820 * indirectly in an attribute value (other than "<") must
7823 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7824 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7825 if ((ent
->flags
& XML_ENT_CHECKED_LT
) == 0) {
7826 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7827 ent
->flags
|= XML_ENT_CONTAINS_LT
;
7828 ent
->flags
|= XML_ENT_CHECKED_LT
;
7830 if (ent
->flags
& XML_ENT_CONTAINS_LT
)
7831 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7832 "'<' in entity '%s' is not allowed in attributes "
7837 * Internal check, no parameter entities here ...
7840 switch (ent
->etype
) {
7841 case XML_INTERNAL_PARAMETER_ENTITY
:
7842 case XML_EXTERNAL_PARAMETER_ENTITY
:
7843 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7844 "Attempt to reference the parameter entity '%s'\n",
7853 * [ WFC: No Recursion ]
7854 * A parsed entity must not contain a recursive reference
7855 * to itself, either directly or indirectly.
7856 * Done somewhere else
7865 * xmlParsePEReference:
7866 * @ctxt: an XML parser context
7868 * DEPRECATED: Internal function, don't use.
7870 * Parse a parameter entity reference. Always consumes '%'.
7872 * The entity content is handled directly by pushing it's content as
7873 * a new input stream.
7875 * [69] PEReference ::= '%' Name ';'
7877 * [ WFC: No Recursion ]
7878 * A parsed entity must not contain a recursive
7879 * reference to itself, either directly or indirectly.
7881 * [ WFC: Entity Declared ]
7882 * In a document without any DTD, a document with only an internal DTD
7883 * subset which contains no parameter entity references, or a document
7884 * with "standalone='yes'", ... ... The declaration of a parameter
7885 * entity must precede any reference to it...
7887 * [ VC: Entity Declared ]
7888 * In a document with an external subset or external parameter entities
7889 * with "standalone='no'", ... ... The declaration of a parameter entity
7890 * must precede any reference to it...
7893 * Parameter-entity references may only appear in the DTD.
7894 * NOTE: misleading but this is handled.
7897 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7899 const xmlChar
*name
;
7900 xmlEntityPtr entity
= NULL
;
7901 xmlParserInputPtr input
;
7906 name
= xmlParseName(ctxt
);
7908 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_NO_NAME
, "PEReference: no name\n");
7911 if (xmlParserDebugEntities
)
7912 xmlGenericError(xmlGenericErrorContext
,
7913 "PEReference: %s\n", name
);
7915 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
7922 * Request the entity from SAX
7924 if ((ctxt
->sax
!= NULL
) &&
7925 (ctxt
->sax
->getParameterEntity
!= NULL
))
7926 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7927 if (ctxt
->instate
== XML_PARSER_EOF
)
7929 if (entity
== NULL
) {
7931 * [ WFC: Entity Declared ]
7932 * In a document without any DTD, a document with only an
7933 * internal DTD subset which contains no parameter entity
7934 * references, or a document with "standalone='yes'", ...
7935 * ... The declaration of a parameter entity must precede
7936 * any reference to it...
7938 if ((ctxt
->standalone
== 1) ||
7939 ((ctxt
->hasExternalSubset
== 0) &&
7940 (ctxt
->hasPErefs
== 0))) {
7941 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7942 "PEReference: %%%s; not found\n",
7946 * [ VC: Entity Declared ]
7947 * In a document with an external subset or external
7948 * parameter entities with "standalone='no'", ...
7949 * ... The declaration of a parameter entity must
7950 * precede any reference to it...
7952 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
7953 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7954 "PEReference: %%%s; not found\n",
7957 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7958 "PEReference: %%%s; not found\n",
7964 * Internal checking in case the entity quest barfed
7966 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7967 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7968 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7969 "Internal: %%%s; is not a parameter entity\n",
7973 xmlCharEncoding enc
;
7974 unsigned long parentConsumed
;
7975 xmlEntityPtr oldEnt
;
7977 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7978 ((ctxt
->options
& XML_PARSE_NOENT
) == 0) &&
7979 ((ctxt
->options
& XML_PARSE_DTDVALID
) == 0) &&
7980 ((ctxt
->options
& XML_PARSE_DTDLOAD
) == 0) &&
7981 ((ctxt
->options
& XML_PARSE_DTDATTR
) == 0) &&
7982 (ctxt
->replaceEntities
== 0) &&
7983 (ctxt
->validate
== 0))
7986 if (entity
->flags
& XML_ENT_EXPANDING
) {
7987 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7988 xmlHaltParser(ctxt
);
7992 /* Must be computed from old input before pushing new input. */
7993 parentConsumed
= ctxt
->input
->parentConsumed
;
7994 oldEnt
= ctxt
->input
->entity
;
7995 if ((oldEnt
== NULL
) ||
7996 ((oldEnt
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7997 ((oldEnt
->flags
& XML_ENT_PARSED
) == 0))) {
7998 xmlSaturatedAdd(&parentConsumed
, ctxt
->input
->consumed
);
7999 xmlSaturatedAddSizeT(&parentConsumed
,
8000 ctxt
->input
->cur
- ctxt
->input
->base
);
8003 input
= xmlNewEntityInputStream(ctxt
, entity
);
8004 if (xmlPushInput(ctxt
, input
) < 0) {
8005 xmlFreeInputStream(input
);
8009 entity
->flags
|= XML_ENT_EXPANDING
;
8011 input
->parentConsumed
= parentConsumed
;
8013 if (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) {
8015 * Get the 4 first bytes and decode the charset
8016 * if enc != XML_CHAR_ENCODING_NONE
8017 * plug some encoding conversion routines.
8018 * Note that, since we may have some non-UTF8
8019 * encoding (like UTF16, bug 135229), the 'length'
8020 * is not known, but we can calculate based upon
8021 * the amount of data in the buffer.
8024 if (ctxt
->instate
== XML_PARSER_EOF
)
8026 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
8031 enc
= xmlDetectCharEncoding(start
, 4);
8032 if (enc
!= XML_CHAR_ENCODING_NONE
) {
8033 xmlSwitchEncoding(ctxt
, enc
);
8037 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
8038 (IS_BLANK_CH(NXT(5)))) {
8039 xmlParseTextDecl(ctxt
);
8044 ctxt
->hasPErefs
= 1;
8048 * xmlLoadEntityContent:
8049 * @ctxt: an XML parser context
8050 * @entity: an unloaded system entity
8052 * Load the original content of the given system entity from the
8053 * ExternalID/SystemID given. This is to be used for Included in Literal
8054 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056 * Returns 0 in case of success and -1 in case of failure
8059 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
8060 xmlParserInputPtr input
;
8064 if ((ctxt
== NULL
) || (entity
== NULL
) ||
8065 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
8066 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
8067 (entity
->content
!= NULL
)) {
8068 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8069 "xmlLoadEntityContent parameter error");
8073 if (xmlParserDebugEntities
)
8074 xmlGenericError(xmlGenericErrorContext
,
8075 "Reading %s entity content input\n", entity
->name
);
8077 buf
= xmlBufferCreate();
8079 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8080 "xmlLoadEntityContent parameter error");
8083 xmlBufferSetAllocationScheme(buf
, XML_BUFFER_ALLOC_DOUBLEIT
);
8085 input
= xmlNewEntityInputStream(ctxt
, entity
);
8086 if (input
== NULL
) {
8087 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8088 "xmlLoadEntityContent input error");
8094 * Push the entity as the current input, read char by char
8095 * saving to the buffer until the end of the entity or an error
8097 if (xmlPushInput(ctxt
, input
) < 0) {
8099 xmlFreeInputStream(input
);
8105 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
8107 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
8111 if (ctxt
->instate
== XML_PARSER_EOF
) {
8116 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
8117 xmlSaturatedAdd(&ctxt
->sizeentities
, ctxt
->input
->consumed
);
8119 } else if (!IS_CHAR(c
)) {
8120 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
8121 "xmlLoadEntityContent: invalid char value %d\n",
8126 entity
->content
= buf
->content
;
8127 entity
->length
= buf
->use
;
8128 buf
->content
= NULL
;
8135 * xmlParseStringPEReference:
8136 * @ctxt: an XML parser context
8137 * @str: a pointer to an index in the string
8139 * parse PEReference declarations
8141 * [69] PEReference ::= '%' Name ';'
8143 * [ WFC: No Recursion ]
8144 * A parsed entity must not contain a recursive
8145 * reference to itself, either directly or indirectly.
8147 * [ WFC: Entity Declared ]
8148 * In a document without any DTD, a document with only an internal DTD
8149 * subset which contains no parameter entity references, or a document
8150 * with "standalone='yes'", ... ... The declaration of a parameter
8151 * entity must precede any reference to it...
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external parameter entities
8155 * with "standalone='no'", ... ... The declaration of a parameter entity
8156 * must precede any reference to it...
8159 * Parameter-entity references may only appear in the DTD.
8160 * NOTE: misleading but this is handled.
8162 * Returns the string of the entity content.
8163 * str is updated to the current value of the index
8166 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
8170 xmlEntityPtr entity
= NULL
;
8172 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
8178 name
= xmlParseStringName(ctxt
, &ptr
);
8180 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8181 "xmlParseStringPEReference: no name\n");
8187 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
8195 * Request the entity from SAX
8197 if ((ctxt
->sax
!= NULL
) &&
8198 (ctxt
->sax
->getParameterEntity
!= NULL
))
8199 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8200 if (ctxt
->instate
== XML_PARSER_EOF
) {
8205 if (entity
== NULL
) {
8207 * [ WFC: Entity Declared ]
8208 * In a document without any DTD, a document with only an
8209 * internal DTD subset which contains no parameter entity
8210 * references, or a document with "standalone='yes'", ...
8211 * ... The declaration of a parameter entity must precede
8212 * any reference to it...
8214 if ((ctxt
->standalone
== 1) ||
8215 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
8216 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8217 "PEReference: %%%s; not found\n", name
);
8220 * [ VC: Entity Declared ]
8221 * In a document with an external subset or external
8222 * parameter entities with "standalone='no'", ...
8223 * ... The declaration of a parameter entity must
8224 * precede any reference to it...
8226 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8227 "PEReference: %%%s; not found\n",
8233 * Internal checking in case the entity quest barfed
8235 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8236 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8237 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8238 "%%%s; is not a parameter entity\n",
8242 ctxt
->hasPErefs
= 1;
8249 * xmlParseDocTypeDecl:
8250 * @ctxt: an XML parser context
8252 * DEPRECATED: Internal function, don't use.
8254 * parse a DOCTYPE declaration
8256 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8257 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8259 * [ VC: Root Element Type ]
8260 * The Name in the document type declaration must match the element
8261 * type of the root element.
8265 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
8266 const xmlChar
*name
= NULL
;
8267 xmlChar
*ExternalID
= NULL
;
8268 xmlChar
*URI
= NULL
;
8271 * We know that '<!DOCTYPE' has been detected.
8278 * Parse the DOCTYPE name.
8280 name
= xmlParseName(ctxt
);
8282 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8283 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8285 ctxt
->intSubName
= name
;
8290 * Check for SystemID and ExternalID
8292 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
8294 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
8295 ctxt
->hasExternalSubset
= 1;
8297 ctxt
->extSubURI
= URI
;
8298 ctxt
->extSubSystem
= ExternalID
;
8303 * Create and update the internal subset.
8305 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
8306 (!ctxt
->disableSAX
))
8307 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
8308 if (ctxt
->instate
== XML_PARSER_EOF
)
8312 * Is there any internal subset declarations ?
8313 * they are handled separately in xmlParseInternalSubset()
8319 * We should be at the end of the DOCTYPE declaration.
8322 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8328 * xmlParseInternalSubset:
8329 * @ctxt: an XML parser context
8331 * parse the internal subset declaration
8333 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8337 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
8339 * Is there any DTD definition ?
8342 int baseInputNr
= ctxt
->inputNr
;
8343 ctxt
->instate
= XML_PARSER_DTD
;
8346 * Parse the succession of Markup declarations and
8348 * Subsequence (markupdecl | PEReference | S)*
8351 while (((RAW
!= ']') || (ctxt
->inputNr
> baseInputNr
)) &&
8352 (ctxt
->instate
!= XML_PARSER_EOF
)) {
8355 * Conditional sections are allowed from external entities included
8356 * by PE References in the internal subset.
8358 if ((ctxt
->inputNr
> 1) && (ctxt
->input
->filename
!= NULL
) &&
8359 (RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8360 xmlParseConditionalSections(ctxt
);
8361 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8362 xmlParseMarkupDecl(ctxt
);
8363 } else if (RAW
== '%') {
8364 xmlParsePEReference(ctxt
);
8366 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8367 "xmlParseInternalSubset: error detected in"
8368 " Markup declaration\n");
8369 xmlHaltParser(ctxt
);
8383 * We should be at the end of the DOCTYPE declaration.
8386 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8392 #ifdef LIBXML_SAX1_ENABLED
8394 * xmlParseAttribute:
8395 * @ctxt: an XML parser context
8396 * @value: a xmlChar ** used to store the value of the attribute
8398 * DEPRECATED: Internal function, don't use.
8400 * parse an attribute
8402 * [41] Attribute ::= Name Eq AttValue
8404 * [ WFC: No External Entity References ]
8405 * Attribute values cannot contain direct or indirect entity references
8406 * to external entities.
8408 * [ WFC: No < in Attribute Values ]
8409 * The replacement text of any entity referred to directly or indirectly in
8410 * an attribute value (other than "<") must not contain a <.
8412 * [ VC: Attribute Value Type ]
8413 * The attribute must have been declared; the value must be of the type
8416 * [25] Eq ::= S? '=' S?
8420 * [NS 11] Attribute ::= QName Eq AttValue
8422 * Also the case QName == xmlns:??? is handled independently as a namespace
8425 * Returns the attribute name, and the value in *value.
8429 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8430 const xmlChar
*name
;
8435 name
= xmlParseName(ctxt
);
8437 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8438 "error parsing attribute name\n");
8449 val
= xmlParseAttValue(ctxt
);
8450 ctxt
->instate
= XML_PARSER_CONTENT
;
8452 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8453 "Specification mandates value for attribute %s\n", name
);
8458 * Check that xml:lang conforms to the specification
8459 * No more registered as an error, just generate a warning now
8460 * since this was deprecated in XML second edition
8462 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8463 if (!xmlCheckLanguageID(val
)) {
8464 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8465 "Malformed value for xml:lang : %s\n",
8471 * Check that xml:space conforms to the specification
8473 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8474 if (xmlStrEqual(val
, BAD_CAST
"default"))
8476 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8479 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8480 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8491 * @ctxt: an XML parser context
8493 * DEPRECATED: Internal function, don't use.
8495 * Parse a start tag. Always consumes '<'.
8497 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8499 * [ WFC: Unique Att Spec ]
8500 * No attribute name may appear more than once in the same start-tag or
8501 * empty-element tag.
8503 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8505 * [ WFC: Unique Att Spec ]
8506 * No attribute name may appear more than once in the same start-tag or
8507 * empty-element tag.
8511 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8513 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8515 * Returns the element name parsed
8519 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8520 const xmlChar
*name
;
8521 const xmlChar
*attname
;
8523 const xmlChar
**atts
= ctxt
->atts
;
8525 int maxatts
= ctxt
->maxatts
;
8528 if (RAW
!= '<') return(NULL
);
8531 name
= xmlParseName(ctxt
);
8533 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8534 "xmlParseStartTag: invalid element name\n");
8539 * Now parse the attributes, it ends up with the ending
8546 while (((RAW
!= '>') &&
8547 ((RAW
!= '/') || (NXT(1) != '>')) &&
8548 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8549 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8550 if (attname
== NULL
) {
8551 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8552 "xmlParseStartTag: problem parsing attributes\n");
8555 if (attvalue
!= NULL
) {
8557 * [ WFC: Unique Att Spec ]
8558 * No attribute name may appear more than once in the same
8559 * start-tag or empty-element tag.
8561 for (i
= 0; i
< nbatts
;i
+= 2) {
8562 if (xmlStrEqual(atts
[i
], attname
)) {
8563 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8569 * Add the pair to atts
8572 maxatts
= 22; /* allow for 10 attrs by default */
8573 atts
= (const xmlChar
**)
8574 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8576 xmlErrMemory(ctxt
, NULL
);
8577 if (attvalue
!= NULL
)
8582 ctxt
->maxatts
= maxatts
;
8583 } else if (nbatts
+ 4 > maxatts
) {
8587 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8588 maxatts
* sizeof(const xmlChar
*));
8590 xmlErrMemory(ctxt
, NULL
);
8591 if (attvalue
!= NULL
)
8597 ctxt
->maxatts
= maxatts
;
8599 atts
[nbatts
++] = attname
;
8600 atts
[nbatts
++] = attvalue
;
8601 atts
[nbatts
] = NULL
;
8602 atts
[nbatts
+ 1] = NULL
;
8604 if (attvalue
!= NULL
)
8611 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8613 if (SKIP_BLANKS
== 0) {
8614 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8615 "attributes construct error\n");
8622 * SAX: Start of Element !
8624 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8625 (!ctxt
->disableSAX
)) {
8627 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8629 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8633 /* Free only the content strings */
8634 for (i
= 1;i
< nbatts
;i
+=2)
8635 if (atts
[i
] != NULL
)
8636 xmlFree((xmlChar
*) atts
[i
]);
8643 * @ctxt: an XML parser context
8644 * @line: line of the start tag
8645 * @nsNr: number of namespaces on the start tag
8647 * Parse an end tag. Always consumes '</'.
8649 * [42] ETag ::= '</' Name S? '>'
8653 * [NS 9] ETag ::= '</' QName S? '>'
8657 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8658 const xmlChar
*name
;
8661 if ((RAW
!= '<') || (NXT(1) != '/')) {
8662 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8663 "xmlParseEndTag: '</' not found\n");
8668 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8671 * We should definitely be at the ending "S? '>'" part
8675 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8676 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8681 * [ WFC: Element Type Match ]
8682 * The Name in an element's end-tag must match the element type in the
8686 if (name
!= (xmlChar
*)1) {
8687 if (name
== NULL
) name
= BAD_CAST
"unparsable";
8688 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8689 "Opening and ending tag mismatch: %s line %d and %s\n",
8690 ctxt
->name
, line
, name
);
8696 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8697 (!ctxt
->disableSAX
))
8698 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8707 * @ctxt: an XML parser context
8709 * DEPRECATED: Internal function, don't use.
8711 * parse an end of tag
8713 * [42] ETag ::= '</' Name S? '>'
8717 * [NS 9] ETag ::= '</' QName S? '>'
8721 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8722 xmlParseEndTag1(ctxt
, 0);
8724 #endif /* LIBXML_SAX1_ENABLED */
8726 /************************************************************************
8728 * SAX 2 specific operations *
8730 ************************************************************************/
8734 * @ctxt: an XML parser context
8735 * @prefix: the prefix to lookup
8737 * Lookup the namespace name for the @prefix (which ca be NULL)
8738 * The prefix must come from the @ctxt->dict dictionary
8740 * Returns the namespace name or NULL if not bound
8742 static const xmlChar
*
8743 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8746 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8747 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8748 if (ctxt
->nsTab
[i
] == prefix
) {
8749 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8751 return(ctxt
->nsTab
[i
+ 1]);
8758 * @ctxt: an XML parser context
8759 * @prefix: pointer to store the prefix part
8761 * parse an XML Namespace QName
8763 * [6] QName ::= (Prefix ':')? LocalPart
8764 * [7] Prefix ::= NCName
8765 * [8] LocalPart ::= NCName
8767 * Returns the Name parsed or NULL
8770 static const xmlChar
*
8771 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8772 const xmlChar
*l
, *p
;
8775 if (ctxt
->instate
== XML_PARSER_EOF
)
8778 l
= xmlParseNCName(ctxt
);
8781 l
= xmlParseName(ctxt
);
8783 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8784 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8794 l
= xmlParseNCName(ctxt
);
8798 if (ctxt
->instate
== XML_PARSER_EOF
)
8800 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8801 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8802 l
= xmlParseNmtoken(ctxt
);
8804 if (ctxt
->instate
== XML_PARSER_EOF
)
8806 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8808 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8811 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8812 if (tmp
!= NULL
) xmlFree(tmp
);
8819 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8820 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8822 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8824 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8825 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8826 if (tmp
!= NULL
) xmlFree(tmp
);
8830 if (ctxt
->instate
== XML_PARSER_EOF
)
8832 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8833 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8834 if (tmp
!= NULL
) xmlFree(tmp
);
8845 * xmlParseQNameAndCompare:
8846 * @ctxt: an XML parser context
8847 * @name: the localname
8848 * @prefix: the prefix, if any.
8850 * parse an XML name and compares for match
8851 * (specialized for endtag parsing)
8853 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8854 * and the name for mismatch
8857 static const xmlChar
*
8858 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8859 xmlChar
const *prefix
) {
8863 const xmlChar
*prefix2
;
8865 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8868 in
= ctxt
->input
->cur
;
8871 while (*in
!= 0 && *in
== *cmp
) {
8875 if ((*cmp
== 0) && (*in
== ':')) {
8878 while (*in
!= 0 && *in
== *cmp
) {
8882 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8884 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
8885 ctxt
->input
->cur
= in
;
8886 return((const xmlChar
*) 1);
8890 * all strings coms from the dictionary, equality can be done directly
8892 ret
= xmlParseQName (ctxt
, &prefix2
);
8893 if ((ret
== name
) && (prefix
== prefix2
))
8894 return((const xmlChar
*) 1);
8899 * xmlParseAttValueInternal:
8900 * @ctxt: an XML parser context
8901 * @len: attribute len result
8902 * @alloc: whether the attribute was reallocated as a new string
8903 * @normalize: if 1 then further non-CDATA normalization must be done
8905 * parse a value for an attribute.
8906 * NOTE: if no normalization is needed, the routine will return pointers
8907 * directly from the data buffer.
8909 * 3.3.3 Attribute-Value Normalization:
8910 * Before the value of an attribute is passed to the application or
8911 * checked for validity, the XML processor must normalize it as follows:
8912 * - a character reference is processed by appending the referenced
8913 * character to the attribute value
8914 * - an entity reference is processed by recursively processing the
8915 * replacement text of the entity
8916 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8917 * appending #x20 to the normalized value, except that only a single
8918 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8919 * parsed entity or the literal entity value of an internal parsed entity
8920 * - other characters are processed by appending them to the normalized value
8921 * If the declared value is not CDATA, then the XML processor must further
8922 * process the normalized attribute value by discarding any leading and
8923 * trailing space (#x20) characters, and by replacing sequences of space
8924 * (#x20) characters by a single space (#x20) character.
8925 * All attributes for which no declaration has been read should be treated
8926 * by a non-validating parser as if declared CDATA.
8928 * Returns the AttValue parsed or NULL. The value has to be freed by the
8929 * caller if it was copied, this can be detected by val[*len] == 0.
8932 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8933 const xmlChar *oldbase = ctxt->input->base;\
8935 if (ctxt->instate == XML_PARSER_EOF)\
8937 if (oldbase != ctxt->input->base) {\
8938 ptrdiff_t delta = ctxt->input->base - oldbase;\
8939 start = start + delta;\
8942 end = ctxt->input->end;
8945 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8949 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8950 xmlChar
*ret
= NULL
;
8952 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
8953 XML_MAX_HUGE_LENGTH
:
8954 XML_MAX_TEXT_LENGTH
;
8957 in
= (xmlChar
*) CUR_PTR
;
8958 line
= ctxt
->input
->line
;
8959 col
= ctxt
->input
->col
;
8960 if (*in
!= '"' && *in
!= '\'') {
8961 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8964 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8967 * try to handle in this routine the most common case where no
8968 * allocation of a new string is required and where content is
8973 end
= ctxt
->input
->end
;
8976 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8980 * Skip any leading spaces
8982 while ((in
< end
) && (*in
!= limit
) &&
8983 ((*in
== 0x20) || (*in
== 0x9) ||
8984 (*in
== 0xA) || (*in
== 0xD))) {
8993 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8994 if ((in
- start
) > maxLength
) {
8995 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
8996 "AttValue length too long\n");
9001 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9002 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9004 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
9006 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9007 if ((in
- start
) > maxLength
) {
9008 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9009 "AttValue length too long\n");
9016 * skip the trailing blanks
9018 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
9019 while ((in
< end
) && (*in
!= limit
) &&
9020 ((*in
== 0x20) || (*in
== 0x9) ||
9021 (*in
== 0xA) || (*in
== 0xD))) {
9029 const xmlChar
*oldbase
= ctxt
->input
->base
;
9031 if (ctxt
->instate
== XML_PARSER_EOF
)
9033 if (oldbase
!= ctxt
->input
->base
) {
9034 ptrdiff_t delta
= ctxt
->input
->base
- oldbase
;
9035 start
= start
+ delta
;
9037 last
= last
+ delta
;
9039 end
= ctxt
->input
->end
;
9040 if ((in
- start
) > maxLength
) {
9041 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9042 "AttValue length too long\n");
9047 if ((in
- start
) > maxLength
) {
9048 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9049 "AttValue length too long\n");
9052 if (*in
!= limit
) goto need_complex
;
9054 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9055 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9059 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9060 if ((in
- start
) > maxLength
) {
9061 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9062 "AttValue length too long\n");
9068 if ((in
- start
) > maxLength
) {
9069 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9070 "AttValue length too long\n");
9073 if (*in
!= limit
) goto need_complex
;
9078 if (alloc
) *alloc
= 0;
9079 *len
= last
- start
;
9080 ret
= (xmlChar
*) start
;
9082 if (alloc
) *alloc
= 1;
9083 ret
= xmlStrndup(start
, last
- start
);
9086 ctxt
->input
->line
= line
;
9087 ctxt
->input
->col
= col
;
9090 if (alloc
) *alloc
= 1;
9091 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
9095 * xmlParseAttribute2:
9096 * @ctxt: an XML parser context
9097 * @pref: the element prefix
9098 * @elem: the element name
9099 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9100 * @value: a xmlChar ** used to store the value of the attribute
9101 * @len: an int * to save the length of the attribute
9102 * @alloc: an int * to indicate if the attribute was allocated
9104 * parse an attribute in the new SAX2 framework.
9106 * Returns the attribute name, and the value in *value, .
9109 static const xmlChar
*
9110 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
9111 const xmlChar
* pref
, const xmlChar
* elem
,
9112 const xmlChar
** prefix
, xmlChar
** value
,
9113 int *len
, int *alloc
)
9115 const xmlChar
*name
;
9116 xmlChar
*val
, *internal_val
= NULL
;
9121 name
= xmlParseQName(ctxt
, prefix
);
9123 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9124 "error parsing attribute name\n");
9129 * get the type if needed
9131 if (ctxt
->attsSpecial
!= NULL
) {
9134 type
= (int) (ptrdiff_t) xmlHashQLookup2(ctxt
->attsSpecial
,
9135 pref
, elem
, *prefix
, name
);
9147 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
9152 * Sometimes a second normalisation pass for spaces is needed
9153 * but that only happens if charrefs or entities references
9154 * have been used in the attribute value, i.e. the attribute
9155 * value have been extracted in an allocated string already.
9158 const xmlChar
*val2
;
9160 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
9161 if ((val2
!= NULL
) && (val2
!= val
)) {
9163 val
= (xmlChar
*) val2
;
9167 ctxt
->instate
= XML_PARSER_CONTENT
;
9169 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
9170 "Specification mandates value for attribute %s\n",
9175 if (*prefix
== ctxt
->str_xml
) {
9177 * Check that xml:lang conforms to the specification
9178 * No more registered as an error, just generate a warning now
9179 * since this was deprecated in XML second edition
9181 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
9182 internal_val
= xmlStrndup(val
, *len
);
9183 if (!xmlCheckLanguageID(internal_val
)) {
9184 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
9185 "Malformed value for xml:lang : %s\n",
9186 internal_val
, NULL
);
9191 * Check that xml:space conforms to the specification
9193 if (xmlStrEqual(name
, BAD_CAST
"space")) {
9194 internal_val
= xmlStrndup(val
, *len
);
9195 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
9197 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
9200 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
9201 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9202 internal_val
, NULL
);
9206 xmlFree(internal_val
);
9214 * xmlParseStartTag2:
9215 * @ctxt: an XML parser context
9217 * Parse a start tag. Always consumes '<'.
9219 * This routine is called when running SAX2 parsing
9221 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9223 * [ WFC: Unique Att Spec ]
9224 * No attribute name may appear more than once in the same start-tag or
9225 * empty-element tag.
9227 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9229 * [ WFC: Unique Att Spec ]
9230 * No attribute name may appear more than once in the same start-tag or
9231 * empty-element tag.
9235 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9237 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9239 * Returns the element name parsed
9242 static const xmlChar
*
9243 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
9244 const xmlChar
**URI
, int *tlen
) {
9245 const xmlChar
*localname
;
9246 const xmlChar
*prefix
;
9247 const xmlChar
*attname
;
9248 const xmlChar
*aprefix
;
9249 const xmlChar
*nsname
;
9251 const xmlChar
**atts
= ctxt
->atts
;
9252 int maxatts
= ctxt
->maxatts
;
9253 int nratts
, nbatts
, nbdef
, inputid
;
9254 int i
, j
, nbNs
, attval
;
9256 int nsNr
= ctxt
->nsNr
;
9258 if (RAW
!= '<') return(NULL
);
9261 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9262 inputid
= ctxt
->input
->id
;
9268 /* Forget any namespaces added during an earlier parse of this element. */
9271 localname
= xmlParseQName(ctxt
, &prefix
);
9272 if (localname
== NULL
) {
9273 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9274 "StartTag: invalid element name\n");
9277 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
9280 * Now parse the attributes, it ends up with the ending
9287 while (((RAW
!= '>') &&
9288 ((RAW
!= '/') || (NXT(1) != '>')) &&
9289 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
9290 int len
= -1, alloc
= 0;
9292 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
9293 &aprefix
, &attvalue
, &len
, &alloc
);
9294 if (attname
== NULL
) {
9295 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9296 "xmlParseStartTag: problem parsing attributes\n");
9299 if (attvalue
== NULL
)
9301 if (len
< 0) len
= xmlStrlen(attvalue
);
9303 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9304 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9308 xmlErrMemory(ctxt
, "dictionary allocation failure");
9309 if ((attvalue
!= NULL
) && (alloc
!= 0))
9315 uri
= xmlParseURI((const char *) URL
);
9317 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9318 "xmlns: '%s' is not a valid URI\n",
9321 if (uri
->scheme
== NULL
) {
9322 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9323 "xmlns: URI %s is not absolute\n",
9328 if (URL
== ctxt
->str_xml_ns
) {
9329 if (attname
!= ctxt
->str_xml
) {
9330 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9331 "xml namespace URI cannot be the default namespace\n",
9338 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9339 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9340 "reuse of the xmlns namespace name is forbidden\n",
9346 * check that it's not a defined namespace
9348 for (j
= 1;j
<= nbNs
;j
++)
9349 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9352 xmlErrAttributeDup(ctxt
, NULL
, attname
);
9354 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
9356 } else if (aprefix
== ctxt
->str_xmlns
) {
9357 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9360 if (attname
== ctxt
->str_xml
) {
9361 if (URL
!= ctxt
->str_xml_ns
) {
9362 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9363 "xml namespace prefix mapped to wrong URI\n",
9367 * Do not keep a namespace definition node
9371 if (URL
== ctxt
->str_xml_ns
) {
9372 if (attname
!= ctxt
->str_xml
) {
9373 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9374 "xml namespace URI mapped to wrong prefix\n",
9379 if (attname
== ctxt
->str_xmlns
) {
9380 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9381 "redefinition of the xmlns prefix is forbidden\n",
9387 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9388 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9389 "reuse of the xmlns namespace name is forbidden\n",
9393 if ((URL
== NULL
) || (URL
[0] == 0)) {
9394 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9395 "xmlns:%s: Empty XML namespace is not allowed\n",
9396 attname
, NULL
, NULL
);
9399 uri
= xmlParseURI((const char *) URL
);
9401 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9402 "xmlns:%s: '%s' is not a valid URI\n",
9403 attname
, URL
, NULL
);
9405 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
9406 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9407 "xmlns:%s: URI %s is not absolute\n",
9408 attname
, URL
, NULL
);
9415 * check that it's not a defined namespace
9417 for (j
= 1;j
<= nbNs
;j
++)
9418 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9421 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9423 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9427 * Add the pair to atts
9429 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9430 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9433 maxatts
= ctxt
->maxatts
;
9436 ctxt
->attallocs
[nratts
++] = alloc
;
9437 atts
[nbatts
++] = attname
;
9438 atts
[nbatts
++] = aprefix
;
9440 * The namespace URI field is used temporarily to point at the
9441 * base of the current input buffer for non-alloced attributes.
9442 * When the input buffer is reallocated, all the pointers become
9443 * invalid, but they can be reconstructed later.
9446 atts
[nbatts
++] = NULL
;
9448 atts
[nbatts
++] = ctxt
->input
->base
;
9449 atts
[nbatts
++] = attvalue
;
9451 atts
[nbatts
++] = attvalue
;
9453 * tag if some deallocation is needed
9455 if (alloc
!= 0) attval
= 1;
9456 attvalue
= NULL
; /* moved into atts */
9460 if ((attvalue
!= NULL
) && (alloc
!= 0)) {
9466 if (ctxt
->instate
== XML_PARSER_EOF
)
9468 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9470 if (SKIP_BLANKS
== 0) {
9471 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9472 "attributes construct error\n");
9478 if (ctxt
->input
->id
!= inputid
) {
9479 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9480 "Unexpected change of input\n");
9485 /* Reconstruct attribute value pointers. */
9486 for (i
= 0, j
= 0; j
< nratts
; i
+= 5, j
++) {
9487 if (atts
[i
+2] != NULL
) {
9489 * Arithmetic on dangling pointers is technically undefined
9490 * behavior, but well...
9492 const xmlChar
*old
= atts
[i
+2];
9493 atts
[i
+2] = NULL
; /* Reset repurposed namespace URI */
9494 atts
[i
+3] = ctxt
->input
->base
+ (atts
[i
+3] - old
); /* value */
9495 atts
[i
+4] = ctxt
->input
->base
+ (atts
[i
+4] - old
); /* valuend */
9500 * The attributes defaulting
9502 if (ctxt
->attsDefault
!= NULL
) {
9503 xmlDefAttrsPtr defaults
;
9505 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9506 if (defaults
!= NULL
) {
9507 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9508 attname
= defaults
->values
[5 * i
];
9509 aprefix
= defaults
->values
[5 * i
+ 1];
9512 * special work for namespaces defaulted defs
9514 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9516 * check that it's not a defined namespace
9518 for (j
= 1;j
<= nbNs
;j
++)
9519 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9521 if (j
<= nbNs
) continue;
9523 nsname
= xmlGetNamespace(ctxt
, NULL
);
9524 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9525 if (nsPush(ctxt
, NULL
,
9526 defaults
->values
[5 * i
+ 2]) > 0)
9529 } else if (aprefix
== ctxt
->str_xmlns
) {
9531 * check that it's not a defined namespace
9533 for (j
= 1;j
<= nbNs
;j
++)
9534 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9536 if (j
<= nbNs
) continue;
9538 nsname
= xmlGetNamespace(ctxt
, attname
);
9539 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9540 if (nsPush(ctxt
, attname
,
9541 defaults
->values
[5 * i
+ 2]) > 0)
9546 * check that it's not a defined attribute
9548 for (j
= 0;j
< nbatts
;j
+=5) {
9549 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9552 if (j
< nbatts
) continue;
9554 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9555 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9559 maxatts
= ctxt
->maxatts
;
9562 atts
[nbatts
++] = attname
;
9563 atts
[nbatts
++] = aprefix
;
9564 if (aprefix
== NULL
)
9565 atts
[nbatts
++] = NULL
;
9567 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9568 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9569 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9570 if ((ctxt
->standalone
== 1) &&
9571 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9572 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9573 "standalone: attribute %s on %s defaulted from external subset\n",
9574 attname
, localname
);
9583 * The attributes checkings
9585 for (i
= 0; i
< nbatts
;i
+= 5) {
9587 * The default namespace does not apply to attribute names.
9589 if (atts
[i
+ 1] != NULL
) {
9590 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9591 if (nsname
== NULL
) {
9592 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9593 "Namespace prefix %s for %s on %s is not defined\n",
9594 atts
[i
+ 1], atts
[i
], localname
);
9596 atts
[i
+ 2] = nsname
;
9600 * [ WFC: Unique Att Spec ]
9601 * No attribute name may appear more than once in the same
9602 * start-tag or empty-element tag.
9603 * As extended by the Namespace in XML REC.
9605 for (j
= 0; j
< i
;j
+= 5) {
9606 if (atts
[i
] == atts
[j
]) {
9607 if (atts
[i
+1] == atts
[j
+1]) {
9608 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9611 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9612 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9613 "Namespaced Attribute %s in '%s' redefined\n",
9614 atts
[i
], nsname
, NULL
);
9621 nsname
= xmlGetNamespace(ctxt
, prefix
);
9622 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9623 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9624 "Namespace prefix %s on %s is not defined\n",
9625 prefix
, localname
, NULL
);
9631 * SAX: Start of Element !
9633 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9634 (!ctxt
->disableSAX
)) {
9636 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9637 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9638 nbatts
/ 5, nbdef
, atts
);
9640 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9641 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9646 * Free up attribute allocated strings if needed
9649 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9650 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9651 xmlFree((xmlChar
*) atts
[i
]);
9659 * @ctxt: an XML parser context
9660 * @line: line of the start tag
9661 * @nsNr: number of namespaces on the start tag
9663 * Parse an end tag. Always consumes '</'.
9665 * [42] ETag ::= '</' Name S? '>'
9669 * [NS 9] ETag ::= '</' QName S? '>'
9673 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlStartTag
*tag
) {
9674 const xmlChar
*name
;
9677 if ((RAW
!= '<') || (NXT(1) != '/')) {
9678 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9683 if (tag
->prefix
== NULL
)
9684 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9686 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, tag
->prefix
);
9689 * We should definitely be at the ending "S? '>'" part
9692 if (ctxt
->instate
== XML_PARSER_EOF
)
9695 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9696 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9701 * [ WFC: Element Type Match ]
9702 * The Name in an element's end-tag must match the element type in the
9706 if (name
!= (xmlChar
*)1) {
9707 if (name
== NULL
) name
= BAD_CAST
"unparsable";
9708 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9709 "Opening and ending tag mismatch: %s line %d and %s\n",
9710 ctxt
->name
, tag
->line
, name
);
9716 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9717 (!ctxt
->disableSAX
))
9718 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, tag
->prefix
,
9723 nsPop(ctxt
, tag
->nsNr
);
9728 * @ctxt: an XML parser context
9730 * DEPRECATED: Internal function, don't use.
9732 * Parse escaped pure raw content. Always consumes '<!['.
9734 * [18] CDSect ::= CDStart CData CDEnd
9736 * [19] CDStart ::= '<![CDATA['
9738 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9740 * [21] CDEnd ::= ']]>'
9743 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9744 xmlChar
*buf
= NULL
;
9746 int size
= XML_PARSER_BUFFER_SIZE
;
9750 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
9751 XML_MAX_HUGE_LENGTH
:
9752 XML_MAX_TEXT_LENGTH
;
9754 if ((CUR
!= '<') || (NXT(1) != '!') || (NXT(2) != '['))
9758 if (!CMP6(CUR_PTR
, 'C', 'D', 'A', 'T', 'A', '['))
9762 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9765 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9771 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9776 buf
= (xmlChar
*) xmlMallocAtomic(size
);
9778 xmlErrMemory(ctxt
, NULL
);
9781 while (IS_CHAR(cur
) &&
9782 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9783 if (len
+ 5 >= size
) {
9786 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* 2);
9788 xmlErrMemory(ctxt
, NULL
);
9794 COPY_BUF(rl
,buf
,len
,r
);
9795 if (len
> maxLength
) {
9796 xmlFatalErrMsg(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9797 "CData section too big found\n");
9808 if (ctxt
->instate
== XML_PARSER_EOF
) {
9813 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9814 "CData section not finished\n%.50s\n", buf
);
9820 * OK the buffer is to be consumed as cdata.
9822 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9823 if (ctxt
->sax
->cdataBlock
!= NULL
)
9824 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9825 else if (ctxt
->sax
->characters
!= NULL
)
9826 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9830 if (ctxt
->instate
!= XML_PARSER_EOF
)
9831 ctxt
->instate
= XML_PARSER_CONTENT
;
9836 * xmlParseContentInternal:
9837 * @ctxt: an XML parser context
9839 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9840 * unexpected EOF to the caller.
9844 xmlParseContentInternal(xmlParserCtxtPtr ctxt
) {
9845 int nameNr
= ctxt
->nameNr
;
9848 while ((RAW
!= 0) &&
9849 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9850 const xmlChar
*cur
= ctxt
->input
->cur
;
9853 * First case : a Processing Instruction.
9855 if ((*cur
== '<') && (cur
[1] == '?')) {
9860 * Second case : a CDSection
9862 /* 2.6.0 test was *cur not RAW */
9863 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9864 xmlParseCDSect(ctxt
);
9868 * Third case : a comment
9870 else if ((*cur
== '<') && (NXT(1) == '!') &&
9871 (NXT(2) == '-') && (NXT(3) == '-')) {
9872 xmlParseComment(ctxt
);
9873 ctxt
->instate
= XML_PARSER_CONTENT
;
9877 * Fourth case : a sub-element.
9879 else if (*cur
== '<') {
9880 if (NXT(1) == '/') {
9881 if (ctxt
->nameNr
<= nameNr
)
9883 xmlParseElementEnd(ctxt
);
9885 xmlParseElementStart(ctxt
);
9890 * Fifth case : a reference. If if has not been resolved,
9891 * parsing returns it's Name, create the node
9894 else if (*cur
== '&') {
9895 xmlParseReference(ctxt
);
9899 * Last case, text. Note that References are handled directly.
9902 xmlParseCharDataInternal(ctxt
, 0);
9912 * @ctxt: an XML parser context
9914 * Parse a content sequence. Stops at EOF or '</'.
9916 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9920 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9921 int nameNr
= ctxt
->nameNr
;
9923 xmlParseContentInternal(ctxt
);
9925 if ((ctxt
->instate
!= XML_PARSER_EOF
) && (ctxt
->nameNr
> nameNr
)) {
9926 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9927 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9928 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9929 "Premature end of data in tag %s line %d\n",
9936 * @ctxt: an XML parser context
9938 * DEPRECATED: Internal function, don't use.
9940 * parse an XML element
9942 * [39] element ::= EmptyElemTag | STag content ETag
9944 * [ WFC: Element Type Match ]
9945 * The Name in an element's end-tag must match the element type in the
9951 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9952 if (xmlParseElementStart(ctxt
) != 0)
9955 xmlParseContentInternal(ctxt
);
9956 if (ctxt
->instate
== XML_PARSER_EOF
)
9960 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9961 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9962 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9963 "Premature end of data in tag %s line %d\n",
9968 xmlParseElementEnd(ctxt
);
9972 * xmlParseElementStart:
9973 * @ctxt: an XML parser context
9975 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9976 * opening tag was parsed, 1 if an empty element was parsed.
9978 * Always consumes '<'.
9981 xmlParseElementStart(xmlParserCtxtPtr ctxt
) {
9982 const xmlChar
*name
;
9983 const xmlChar
*prefix
= NULL
;
9984 const xmlChar
*URI
= NULL
;
9985 xmlParserNodeInfo node_info
;
9988 int nsNr
= ctxt
->nsNr
;
9990 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
9991 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9992 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
9993 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9995 xmlHaltParser(ctxt
);
9999 /* Capture start position */
10000 if (ctxt
->record_info
) {
10001 node_info
.begin_pos
= ctxt
->input
->consumed
+
10002 (CUR_PTR
- ctxt
->input
->base
);
10003 node_info
.begin_line
= ctxt
->input
->line
;
10006 if (ctxt
->spaceNr
== 0)
10007 spacePush(ctxt
, -1);
10008 else if (*ctxt
->space
== -2)
10009 spacePush(ctxt
, -1);
10011 spacePush(ctxt
, *ctxt
->space
);
10013 line
= ctxt
->input
->line
;
10014 #ifdef LIBXML_SAX1_ENABLED
10016 #endif /* LIBXML_SAX1_ENABLED */
10017 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10018 #ifdef LIBXML_SAX1_ENABLED
10020 name
= xmlParseStartTag(ctxt
);
10021 #endif /* LIBXML_SAX1_ENABLED */
10022 if (ctxt
->instate
== XML_PARSER_EOF
)
10024 if (name
== NULL
) {
10028 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
10031 #ifdef LIBXML_VALID_ENABLED
10033 * [ VC: Root Element Type ]
10034 * The Name in the document type declaration must match the element
10035 * type of the root element.
10037 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10038 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10039 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10040 #endif /* LIBXML_VALID_ENABLED */
10043 * Check for an Empty Element.
10045 if ((RAW
== '/') && (NXT(1) == '>')) {
10048 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
10049 (!ctxt
->disableSAX
))
10050 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
10051 #ifdef LIBXML_SAX1_ENABLED
10053 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
10054 (!ctxt
->disableSAX
))
10055 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10056 #endif /* LIBXML_SAX1_ENABLED */
10060 if (nsNr
!= ctxt
->nsNr
)
10061 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10062 if (cur
!= NULL
&& ctxt
->record_info
) {
10063 node_info
.node
= cur
;
10064 node_info
.end_pos
= ctxt
->input
->consumed
+
10065 (CUR_PTR
- ctxt
->input
->base
);
10066 node_info
.end_line
= ctxt
->input
->line
;
10067 xmlParserAddNodeInfo(ctxt
, &node_info
);
10073 if (cur
!= NULL
&& ctxt
->record_info
) {
10074 node_info
.node
= cur
;
10075 node_info
.end_pos
= 0;
10076 node_info
.end_line
= 0;
10077 xmlParserAddNodeInfo(ctxt
, &node_info
);
10080 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
10081 "Couldn't find end of Start Tag %s line %d\n",
10085 * end of parsing of this node.
10090 if (nsNr
!= ctxt
->nsNr
)
10091 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10099 * xmlParseElementEnd:
10100 * @ctxt: an XML parser context
10102 * Parse the end of an XML element. Always consumes '</'.
10105 xmlParseElementEnd(xmlParserCtxtPtr ctxt
) {
10106 xmlNodePtr cur
= ctxt
->node
;
10108 if (ctxt
->nameNr
<= 0) {
10109 if ((RAW
== '<') && (NXT(1) == '/'))
10115 * parse the end of tag: '</' should be here.
10118 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
10121 #ifdef LIBXML_SAX1_ENABLED
10123 xmlParseEndTag1(ctxt
, 0);
10124 #endif /* LIBXML_SAX1_ENABLED */
10127 * Capture end position
10129 if (cur
!= NULL
&& ctxt
->record_info
) {
10130 xmlParserNodeInfoPtr node_info
;
10132 node_info
= (xmlParserNodeInfoPtr
) xmlParserFindNodeInfo(ctxt
, cur
);
10133 if (node_info
!= NULL
) {
10134 node_info
->end_pos
= ctxt
->input
->consumed
+
10135 (CUR_PTR
- ctxt
->input
->base
);
10136 node_info
->end_line
= ctxt
->input
->line
;
10142 * xmlParseVersionNum:
10143 * @ctxt: an XML parser context
10145 * DEPRECATED: Internal function, don't use.
10147 * parse the XML version value.
10149 * [26] VersionNum ::= '1.' [0-9]+
10151 * In practice allow [0-9].[0-9]+ at that level
10153 * Returns the string giving the XML version number, or NULL
10156 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
10157 xmlChar
*buf
= NULL
;
10162 buf
= (xmlChar
*) xmlMallocAtomic(size
);
10164 xmlErrMemory(ctxt
, NULL
);
10168 if (!((cur
>= '0') && (cur
<= '9'))) {
10182 while ((cur
>= '0') && (cur
<= '9')) {
10183 if (len
+ 1 >= size
) {
10187 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
10190 xmlErrMemory(ctxt
, NULL
);
10204 * xmlParseVersionInfo:
10205 * @ctxt: an XML parser context
10207 * DEPRECATED: Internal function, don't use.
10209 * parse the XML version.
10211 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10213 * [25] Eq ::= S? '=' S?
10215 * Returns the version string, e.g. "1.0"
10219 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
10220 xmlChar
*version
= NULL
;
10222 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10226 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10233 version
= xmlParseVersionNum(ctxt
);
10235 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10238 } else if (RAW
== '\''){
10240 version
= xmlParseVersionNum(ctxt
);
10242 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10246 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10254 * @ctxt: an XML parser context
10256 * DEPRECATED: Internal function, don't use.
10258 * parse the XML encoding name
10260 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10262 * Returns the encoding name value or NULL
10265 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
10266 xmlChar
*buf
= NULL
;
10269 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
10270 XML_MAX_TEXT_LENGTH
:
10271 XML_MAX_NAME_LENGTH
;
10275 if (((cur
>= 'a') && (cur
<= 'z')) ||
10276 ((cur
>= 'A') && (cur
<= 'Z'))) {
10277 buf
= (xmlChar
*) xmlMallocAtomic(size
);
10279 xmlErrMemory(ctxt
, NULL
);
10286 while (((cur
>= 'a') && (cur
<= 'z')) ||
10287 ((cur
>= 'A') && (cur
<= 'Z')) ||
10288 ((cur
>= '0') && (cur
<= '9')) ||
10289 (cur
== '.') || (cur
== '_') ||
10291 if (len
+ 1 >= size
) {
10295 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
10297 xmlErrMemory(ctxt
, NULL
);
10304 if (len
> maxLength
) {
10305 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "EncName");
10314 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
10320 * xmlParseEncodingDecl:
10321 * @ctxt: an XML parser context
10323 * DEPRECATED: Internal function, don't use.
10325 * parse the XML encoding declaration
10327 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10329 * this setups the conversion filters.
10331 * Returns the encoding value or NULL
10335 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
10336 xmlChar
*encoding
= NULL
;
10339 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10343 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10350 encoding
= xmlParseEncName(ctxt
);
10352 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10353 xmlFree((xmlChar
*) encoding
);
10357 } else if (RAW
== '\''){
10359 encoding
= xmlParseEncName(ctxt
);
10361 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10362 xmlFree((xmlChar
*) encoding
);
10367 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10371 * Non standard parsing, allowing the user to ignore encoding
10373 if (ctxt
->options
& XML_PARSE_IGNORE_ENC
) {
10374 xmlFree((xmlChar
*) encoding
);
10379 * UTF-16 encoding switch has already taken place at this stage,
10380 * more over the little-endian/big-endian selection is already done
10382 if ((encoding
!= NULL
) &&
10383 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
10384 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
10386 * If no encoding was passed to the parser, that we are
10387 * using UTF-16 and no decoder is present i.e. the
10388 * document is apparently UTF-8 compatible, then raise an
10389 * encoding mismatch fatal error
10391 if ((ctxt
->encoding
== NULL
) &&
10392 (ctxt
->input
->buf
!= NULL
) &&
10393 (ctxt
->input
->buf
->encoder
== NULL
)) {
10394 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
10395 "Document labelled UTF-16 but has UTF-8 content\n");
10397 if (ctxt
->encoding
!= NULL
)
10398 xmlFree((xmlChar
*) ctxt
->encoding
);
10399 ctxt
->encoding
= encoding
;
10402 * UTF-8 encoding is handled natively
10404 else if ((encoding
!= NULL
) &&
10405 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
10406 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
10407 /* TODO: Check for encoding mismatch. */
10408 if (ctxt
->encoding
!= NULL
)
10409 xmlFree((xmlChar
*) ctxt
->encoding
);
10410 ctxt
->encoding
= encoding
;
10412 else if (encoding
!= NULL
) {
10413 xmlCharEncodingHandlerPtr handler
;
10415 if (ctxt
->input
->encoding
!= NULL
)
10416 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
10417 ctxt
->input
->encoding
= encoding
;
10419 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
10420 if (handler
!= NULL
) {
10421 if (xmlSwitchToEncoding(ctxt
, handler
) < 0) {
10422 /* failed to convert */
10423 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
10427 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
10428 "Unsupported encoding %s\n", encoding
);
10438 * @ctxt: an XML parser context
10440 * DEPRECATED: Internal function, don't use.
10442 * parse the XML standalone declaration
10444 * [32] SDDecl ::= S 'standalone' Eq
10445 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10447 * [ VC: Standalone Document Declaration ]
10448 * TODO The standalone document declaration must have the value "no"
10449 * if any external markup declarations contain declarations of:
10450 * - attributes with default values, if elements to which these
10451 * attributes apply appear in the document without specifications
10452 * of values for these attributes, or
10453 * - entities (other than amp, lt, gt, apos, quot), if references
10454 * to those entities appear in the document, or
10455 * - attributes with values subject to normalization, where the
10456 * attribute appears in the document with a value which will change
10457 * as a result of normalization, or
10458 * - element types with element content, if white space occurs directly
10459 * within any instance of those types.
10462 * 1 if standalone="yes"
10463 * 0 if standalone="no"
10464 * -2 if standalone attribute is missing or invalid
10465 * (A standalone value of -2 means that the XML declaration was found,
10466 * but no value was specified for the standalone attribute).
10470 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
10471 int standalone
= -2;
10474 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10478 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10479 return(standalone
);
10485 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10488 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10493 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10496 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10499 } else if (RAW
== '"'){
10501 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10504 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10509 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10512 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10516 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10519 return(standalone
);
10524 * @ctxt: an XML parser context
10526 * DEPRECATED: Internal function, don't use.
10528 * parse an XML declaration header
10530 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10534 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10538 * This value for standalone indicates that the document has an
10539 * XML declaration but it does not have a standalone attribute.
10540 * It will be overwritten later if a standalone attribute is found.
10542 ctxt
->input
->standalone
= -2;
10545 * We know that '<?xml' is here.
10549 if (!IS_BLANK_CH(RAW
)) {
10550 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10551 "Blank needed after '<?xml'\n");
10556 * We must have the VersionInfo here.
10558 version
= xmlParseVersionInfo(ctxt
);
10559 if (version
== NULL
) {
10560 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10562 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10564 * Changed here for XML-1.0 5th edition
10566 if (ctxt
->options
& XML_PARSE_OLD10
) {
10567 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10568 "Unsupported version '%s'\n",
10571 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10572 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10573 "Unsupported version '%s'\n",
10576 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10577 "Unsupported version '%s'\n",
10582 if (ctxt
->version
!= NULL
)
10583 xmlFree((void *) ctxt
->version
);
10584 ctxt
->version
= version
;
10588 * We may have the encoding declaration
10590 if (!IS_BLANK_CH(RAW
)) {
10591 if ((RAW
== '?') && (NXT(1) == '>')) {
10595 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10597 xmlParseEncodingDecl(ctxt
);
10598 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10599 (ctxt
->instate
== XML_PARSER_EOF
)) {
10601 * The XML REC instructs us to stop parsing right here
10607 * We may have the standalone status.
10609 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10610 if ((RAW
== '?') && (NXT(1) == '>')) {
10614 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10618 * We can grow the input buffer freely at that point
10623 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10626 if ((RAW
== '?') && (NXT(1) == '>')) {
10628 } else if (RAW
== '>') {
10629 /* Deprecated old WD ... */
10630 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10635 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10636 while ((c
= CUR
) != 0) {
10646 * @ctxt: an XML parser context
10648 * DEPRECATED: Internal function, don't use.
10650 * parse an XML Misc* optional field.
10652 * [27] Misc ::= Comment | PI | S
10656 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10657 while (ctxt
->instate
!= XML_PARSER_EOF
) {
10660 if ((RAW
== '<') && (NXT(1) == '?')) {
10662 } else if (CMP4(CUR_PTR
, '<', '!', '-', '-')) {
10663 xmlParseComment(ctxt
);
10671 * xmlParseDocument:
10672 * @ctxt: an XML parser context
10674 * parse an XML document (and build a tree if using the standard SAX
10677 * [1] document ::= prolog element Misc*
10679 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10681 * Returns 0, -1 in case of error. the parser context is augmented
10682 * as a result of the parsing.
10686 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10688 xmlCharEncoding enc
;
10692 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10698 * SAX: detecting the level.
10700 xmlDetectSAX2(ctxt
);
10703 * SAX: beginning of the document processing.
10705 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10706 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10707 if (ctxt
->instate
== XML_PARSER_EOF
)
10710 if ((ctxt
->encoding
== NULL
) &&
10711 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10713 * Get the 4 first bytes and decode the charset
10714 * if enc != XML_CHAR_ENCODING_NONE
10715 * plug some encoding conversion routines.
10721 enc
= xmlDetectCharEncoding(&start
[0], 4);
10722 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10723 xmlSwitchEncoding(ctxt
, enc
);
10729 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10734 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10737 * Note that we will switch encoding on the fly.
10739 xmlParseXMLDecl(ctxt
);
10740 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10741 (ctxt
->instate
== XML_PARSER_EOF
)) {
10743 * The XML REC instructs us to stop parsing right here
10747 ctxt
->standalone
= ctxt
->input
->standalone
;
10750 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10752 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10753 ctxt
->sax
->startDocument(ctxt
->userData
);
10754 if (ctxt
->instate
== XML_PARSER_EOF
)
10756 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->input
!= NULL
) &&
10757 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->compressed
>= 0)) {
10758 ctxt
->myDoc
->compression
= ctxt
->input
->buf
->compressed
;
10762 * The Misc part of the Prolog
10764 xmlParseMisc(ctxt
);
10767 * Then possibly doc type declaration(s) and more Misc
10768 * (doctypedecl Misc*)?
10771 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10773 ctxt
->inSubset
= 1;
10774 xmlParseDocTypeDecl(ctxt
);
10776 ctxt
->instate
= XML_PARSER_DTD
;
10777 xmlParseInternalSubset(ctxt
);
10778 if (ctxt
->instate
== XML_PARSER_EOF
)
10783 * Create and update the external subset.
10785 ctxt
->inSubset
= 2;
10786 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10787 (!ctxt
->disableSAX
))
10788 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10789 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10790 if (ctxt
->instate
== XML_PARSER_EOF
)
10792 ctxt
->inSubset
= 0;
10794 xmlCleanSpecialAttr(ctxt
);
10796 ctxt
->instate
= XML_PARSER_PROLOG
;
10797 xmlParseMisc(ctxt
);
10801 * Time to start parsing the tree itself
10805 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10806 "Start tag expected, '<' not found\n");
10808 ctxt
->instate
= XML_PARSER_CONTENT
;
10809 xmlParseElement(ctxt
);
10810 ctxt
->instate
= XML_PARSER_EPILOG
;
10814 * The Misc part at the end
10816 xmlParseMisc(ctxt
);
10819 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10821 ctxt
->instate
= XML_PARSER_EOF
;
10825 * SAX: end of the document processing.
10827 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10828 ctxt
->sax
->endDocument(ctxt
->userData
);
10831 * Remove locally kept entity definitions if the tree was not built
10833 if ((ctxt
->myDoc
!= NULL
) &&
10834 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10835 xmlFreeDoc(ctxt
->myDoc
);
10836 ctxt
->myDoc
= NULL
;
10839 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10840 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10842 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10843 if (ctxt
->nsWellFormed
)
10844 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10845 if (ctxt
->options
& XML_PARSE_OLD10
)
10846 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10848 if (! ctxt
->wellFormed
) {
10856 * xmlParseExtParsedEnt:
10857 * @ctxt: an XML parser context
10859 * parse a general parsed entity
10860 * An external general parsed entity is well-formed if it matches the
10861 * production labeled extParsedEnt.
10863 * [78] extParsedEnt ::= TextDecl? content
10865 * Returns 0, -1 in case of error. the parser context is augmented
10866 * as a result of the parsing.
10870 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10872 xmlCharEncoding enc
;
10874 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10877 xmlDetectSAX2(ctxt
);
10882 * SAX: beginning of the document processing.
10884 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10885 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10888 * Get the 4 first bytes and decode the charset
10889 * if enc != XML_CHAR_ENCODING_NONE
10890 * plug some encoding conversion routines.
10892 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10897 enc
= xmlDetectCharEncoding(start
, 4);
10898 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10899 xmlSwitchEncoding(ctxt
, enc
);
10905 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10909 * Check for the XMLDecl in the Prolog.
10912 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10915 * Note that we will switch encoding on the fly.
10917 xmlParseXMLDecl(ctxt
);
10918 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10920 * The XML REC instructs us to stop parsing right here
10926 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10928 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10929 ctxt
->sax
->startDocument(ctxt
->userData
);
10930 if (ctxt
->instate
== XML_PARSER_EOF
)
10934 * Doing validity checking on chunk doesn't make sense
10936 ctxt
->instate
= XML_PARSER_CONTENT
;
10937 ctxt
->validate
= 0;
10938 ctxt
->loadsubset
= 0;
10941 xmlParseContent(ctxt
);
10942 if (ctxt
->instate
== XML_PARSER_EOF
)
10945 if ((RAW
== '<') && (NXT(1) == '/')) {
10946 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10947 } else if (RAW
!= 0) {
10948 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10952 * SAX: end of the document processing.
10954 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10955 ctxt
->sax
->endDocument(ctxt
->userData
);
10957 if (! ctxt
->wellFormed
) return(-1);
10961 #ifdef LIBXML_PUSH_ENABLED
10962 /************************************************************************
10964 * Progressive parsing interfaces *
10966 ************************************************************************/
10969 * xmlParseLookupChar:
10970 * @ctxt: an XML parser context
10973 * Check whether the input buffer contains a character.
10976 xmlParseLookupChar(xmlParserCtxtPtr ctxt
, int c
) {
10977 const xmlChar
*cur
;
10979 if (ctxt
->checkIndex
== 0) {
10980 cur
= ctxt
->input
->cur
+ 1;
10982 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
10985 if (memchr(cur
, c
, ctxt
->input
->end
- cur
) == NULL
) {
10986 size_t index
= ctxt
->input
->end
- ctxt
->input
->cur
;
10988 if (index
> LONG_MAX
) {
10989 ctxt
->checkIndex
= 0;
10992 ctxt
->checkIndex
= index
;
10995 ctxt
->checkIndex
= 0;
11001 * xmlParseLookupString:
11002 * @ctxt: an XML parser context
11003 * @startDelta: delta to apply at the start
11005 * @strLen: length of string
11007 * Check whether the input buffer contains a string.
11009 static const xmlChar
*
11010 xmlParseLookupString(xmlParserCtxtPtr ctxt
, size_t startDelta
,
11011 const char *str
, size_t strLen
) {
11012 const xmlChar
*cur
, *term
;
11014 if (ctxt
->checkIndex
== 0) {
11015 cur
= ctxt
->input
->cur
+ startDelta
;
11017 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11020 term
= BAD_CAST
strstr((const char *) cur
, str
);
11021 if (term
== NULL
) {
11022 const xmlChar
*end
= ctxt
->input
->end
;
11025 /* Rescan (strLen - 1) characters. */
11026 if ((size_t) (end
- cur
) < strLen
)
11030 index
= end
- ctxt
->input
->cur
;
11031 if (index
> LONG_MAX
) {
11032 ctxt
->checkIndex
= 0;
11033 return(ctxt
->input
->end
- strLen
);
11035 ctxt
->checkIndex
= index
;
11037 ctxt
->checkIndex
= 0;
11044 * xmlParseLookupCharData:
11045 * @ctxt: an XML parser context
11047 * Check whether the input buffer contains terminated char data.
11050 xmlParseLookupCharData(xmlParserCtxtPtr ctxt
) {
11051 const xmlChar
*cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11052 const xmlChar
*end
= ctxt
->input
->end
;
11055 while (cur
< end
) {
11056 if ((*cur
== '<') || (*cur
== '&')) {
11057 ctxt
->checkIndex
= 0;
11063 index
= cur
- ctxt
->input
->cur
;
11064 if (index
> LONG_MAX
) {
11065 ctxt
->checkIndex
= 0;
11068 ctxt
->checkIndex
= index
;
11073 * xmlParseLookupGt:
11074 * @ctxt: an XML parser context
11076 * Check whether there's enough data in the input buffer to finish parsing
11077 * a start tag. This has to take quotes into account.
11080 xmlParseLookupGt(xmlParserCtxtPtr ctxt
) {
11081 const xmlChar
*cur
;
11082 const xmlChar
*end
= ctxt
->input
->end
;
11083 int state
= ctxt
->endCheckState
;
11086 if (ctxt
->checkIndex
== 0)
11087 cur
= ctxt
->input
->cur
+ 1;
11089 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11091 while (cur
< end
) {
11095 } else if (*cur
== '\'' || *cur
== '"') {
11097 } else if (*cur
== '>') {
11098 ctxt
->checkIndex
= 0;
11099 ctxt
->endCheckState
= 0;
11105 index
= cur
- ctxt
->input
->cur
;
11106 if (index
> LONG_MAX
) {
11107 ctxt
->checkIndex
= 0;
11108 ctxt
->endCheckState
= 0;
11111 ctxt
->checkIndex
= index
;
11112 ctxt
->endCheckState
= state
;
11117 * xmlParseLookupInternalSubset:
11118 * @ctxt: an XML parser context
11120 * Check whether there's enough data in the input buffer to finish parsing
11121 * the internal subset.
11124 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt
) {
11126 * Sorry, but progressive parsing of the internal subset is not
11127 * supported. We first check that the full content of the internal
11128 * subset is available and parsing is launched only at that point.
11129 * Internal subset ends with "']' S? '>'" in an unescaped section and
11130 * not in a ']]>' sequence which are conditional sections.
11132 const xmlChar
*cur
, *start
;
11133 const xmlChar
*end
= ctxt
->input
->end
;
11134 int state
= ctxt
->endCheckState
;
11137 if (ctxt
->checkIndex
== 0) {
11138 cur
= ctxt
->input
->cur
+ 1;
11140 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11144 while (cur
< end
) {
11145 if (state
== '-') {
11146 if ((*cur
== '-') &&
11155 else if (state
== ']') {
11157 ctxt
->checkIndex
= 0;
11158 ctxt
->endCheckState
= 0;
11161 if (IS_BLANK_CH(*cur
)) {
11163 } else if (*cur
!= ']') {
11169 else if (state
== ' ') {
11171 ctxt
->checkIndex
= 0;
11172 ctxt
->endCheckState
= 0;
11175 if (!IS_BLANK_CH(*cur
)) {
11181 else if (state
!= 0) {
11182 if (*cur
== state
) {
11187 else if (*cur
== '<') {
11188 if ((cur
[1] == '!') &&
11193 /* Don't treat <!--> as comment */
11198 else if ((*cur
== '"') || (*cur
== '\'') || (*cur
== ']')) {
11206 * Rescan the three last characters to detect "<!--" and "-->"
11207 * split across chunks.
11209 if ((state
== 0) || (state
== '-')) {
11210 if (cur
- start
< 3)
11215 index
= cur
- ctxt
->input
->cur
;
11216 if (index
> LONG_MAX
) {
11217 ctxt
->checkIndex
= 0;
11218 ctxt
->endCheckState
= 0;
11221 ctxt
->checkIndex
= index
;
11222 ctxt
->endCheckState
= state
;
11227 * xmlCheckCdataPush:
11228 * @cur: pointer to the block of characters
11229 * @len: length of the block in bytes
11230 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11232 * Check that the block of characters is okay as SCdata content [20]
11234 * Returns the number of bytes to pass if okay, a negative index where an
11235 * UTF-8 error occurred otherwise
11238 xmlCheckCdataPush(const xmlChar
*utf
, int len
, int complete
) {
11243 if ((utf
== NULL
) || (len
<= 0))
11246 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
11248 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11251 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
11255 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11256 if (ix
+ 2 > len
) return(complete
? -ix
: ix
);
11257 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
11259 codepoint
= (utf
[ix
] & 0x1f) << 6;
11260 codepoint
|= utf
[ix
+1] & 0x3f;
11261 if (!xmlIsCharQ(codepoint
))
11264 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11265 if (ix
+ 3 > len
) return(complete
? -ix
: ix
);
11266 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11267 ((utf
[ix
+2] & 0xc0) != 0x80))
11269 codepoint
= (utf
[ix
] & 0xf) << 12;
11270 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
11271 codepoint
|= utf
[ix
+2] & 0x3f;
11272 if (!xmlIsCharQ(codepoint
))
11275 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11276 if (ix
+ 4 > len
) return(complete
? -ix
: ix
);
11277 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11278 ((utf
[ix
+2] & 0xc0) != 0x80) ||
11279 ((utf
[ix
+3] & 0xc0) != 0x80))
11281 codepoint
= (utf
[ix
] & 0x7) << 18;
11282 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
11283 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
11284 codepoint
|= utf
[ix
+3] & 0x3f;
11285 if (!xmlIsCharQ(codepoint
))
11288 } else /* unknown encoding */
11295 * xmlParseTryOrFinish:
11296 * @ctxt: an XML parser context
11297 * @terminate: last chunk indicator
11299 * Try to progress on parsing
11301 * Returns zero if no parsing was possible
11304 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
11310 if (ctxt
->input
== NULL
)
11314 switch (ctxt
->instate
) {
11315 case XML_PARSER_EOF
:
11316 xmlGenericError(xmlGenericErrorContext
,
11317 "PP: try EOF\n"); break;
11318 case XML_PARSER_START
:
11319 xmlGenericError(xmlGenericErrorContext
,
11320 "PP: try START\n"); break;
11321 case XML_PARSER_MISC
:
11322 xmlGenericError(xmlGenericErrorContext
,
11323 "PP: try MISC\n");break;
11324 case XML_PARSER_COMMENT
:
11325 xmlGenericError(xmlGenericErrorContext
,
11326 "PP: try COMMENT\n");break;
11327 case XML_PARSER_PROLOG
:
11328 xmlGenericError(xmlGenericErrorContext
,
11329 "PP: try PROLOG\n");break;
11330 case XML_PARSER_START_TAG
:
11331 xmlGenericError(xmlGenericErrorContext
,
11332 "PP: try START_TAG\n");break;
11333 case XML_PARSER_CONTENT
:
11334 xmlGenericError(xmlGenericErrorContext
,
11335 "PP: try CONTENT\n");break;
11336 case XML_PARSER_CDATA_SECTION
:
11337 xmlGenericError(xmlGenericErrorContext
,
11338 "PP: try CDATA_SECTION\n");break;
11339 case XML_PARSER_END_TAG
:
11340 xmlGenericError(xmlGenericErrorContext
,
11341 "PP: try END_TAG\n");break;
11342 case XML_PARSER_ENTITY_DECL
:
11343 xmlGenericError(xmlGenericErrorContext
,
11344 "PP: try ENTITY_DECL\n");break;
11345 case XML_PARSER_ENTITY_VALUE
:
11346 xmlGenericError(xmlGenericErrorContext
,
11347 "PP: try ENTITY_VALUE\n");break;
11348 case XML_PARSER_ATTRIBUTE_VALUE
:
11349 xmlGenericError(xmlGenericErrorContext
,
11350 "PP: try ATTRIBUTE_VALUE\n");break;
11351 case XML_PARSER_DTD
:
11352 xmlGenericError(xmlGenericErrorContext
,
11353 "PP: try DTD\n");break;
11354 case XML_PARSER_EPILOG
:
11355 xmlGenericError(xmlGenericErrorContext
,
11356 "PP: try EPILOG\n");break;
11357 case XML_PARSER_PI
:
11358 xmlGenericError(xmlGenericErrorContext
,
11359 "PP: try PI\n");break;
11360 case XML_PARSER_IGNORE
:
11361 xmlGenericError(xmlGenericErrorContext
,
11362 "PP: try IGNORE\n");break;
11366 if ((ctxt
->input
!= NULL
) &&
11367 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
11368 xmlParserShrink(ctxt
);
11371 while (ctxt
->instate
!= XML_PARSER_EOF
) {
11372 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11375 if (ctxt
->input
== NULL
) break;
11376 if (ctxt
->input
->buf
!= NULL
) {
11378 * If we are operating on converted input, try to flush
11379 * remaining chars to avoid them stalling in the non-converted
11382 if ((ctxt
->input
->buf
->raw
!= NULL
) &&
11383 (xmlBufIsEmpty(ctxt
->input
->buf
->raw
) == 0)) {
11384 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
11386 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
11388 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
11389 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
11393 avail
= ctxt
->input
->end
- ctxt
->input
->cur
;
11396 switch (ctxt
->instate
) {
11397 case XML_PARSER_EOF
:
11399 * Document parsing is done !
11402 case XML_PARSER_START
:
11403 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
11405 xmlCharEncoding enc
;
11408 * Very first chars read from the document flow.
11414 * Get the 4 first bytes and decode the charset
11415 * if enc != XML_CHAR_ENCODING_NONE
11416 * plug some encoding conversion routines,
11417 * else xmlSwitchEncoding will set to (default)
11424 enc
= xmlDetectCharEncoding(start
, 4);
11426 * We need more bytes to detect EBCDIC code pages.
11427 * See xmlDetectEBCDIC.
11429 if ((enc
== XML_CHAR_ENCODING_EBCDIC
) &&
11430 (!terminate
) && (avail
< 200))
11432 xmlSwitchEncoding(ctxt
, enc
);
11438 cur
= ctxt
->input
->cur
[0];
11439 next
= ctxt
->input
->cur
[1];
11441 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11442 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11443 &xmlDefaultSAXLocator
);
11444 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11445 xmlHaltParser(ctxt
);
11447 xmlGenericError(xmlGenericErrorContext
,
11448 "PP: entering EOF\n");
11450 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11451 ctxt
->sax
->endDocument(ctxt
->userData
);
11454 if ((cur
== '<') && (next
== '?')) {
11455 /* PI or XML decl */
11456 if (avail
< 5) goto done
;
11457 if ((!terminate
) &&
11458 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11460 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11461 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11462 &xmlDefaultSAXLocator
);
11463 if ((ctxt
->input
->cur
[2] == 'x') &&
11464 (ctxt
->input
->cur
[3] == 'm') &&
11465 (ctxt
->input
->cur
[4] == 'l') &&
11466 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
11469 xmlGenericError(xmlGenericErrorContext
,
11470 "PP: Parsing XML Decl\n");
11472 xmlParseXMLDecl(ctxt
);
11473 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
11475 * The XML REC instructs us to stop parsing right
11478 xmlHaltParser(ctxt
);
11481 ctxt
->standalone
= ctxt
->input
->standalone
;
11482 if ((ctxt
->encoding
== NULL
) &&
11483 (ctxt
->input
->encoding
!= NULL
))
11484 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
11485 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11486 (!ctxt
->disableSAX
))
11487 ctxt
->sax
->startDocument(ctxt
->userData
);
11488 ctxt
->instate
= XML_PARSER_MISC
;
11490 xmlGenericError(xmlGenericErrorContext
,
11491 "PP: entering MISC\n");
11494 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11495 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11496 (!ctxt
->disableSAX
))
11497 ctxt
->sax
->startDocument(ctxt
->userData
);
11498 ctxt
->instate
= XML_PARSER_MISC
;
11500 xmlGenericError(xmlGenericErrorContext
,
11501 "PP: entering MISC\n");
11505 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11506 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11507 &xmlDefaultSAXLocator
);
11508 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11509 if (ctxt
->version
== NULL
) {
11510 xmlErrMemory(ctxt
, NULL
);
11513 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11514 (!ctxt
->disableSAX
))
11515 ctxt
->sax
->startDocument(ctxt
->userData
);
11516 ctxt
->instate
= XML_PARSER_MISC
;
11518 xmlGenericError(xmlGenericErrorContext
,
11519 "PP: entering MISC\n");
11523 case XML_PARSER_START_TAG
: {
11524 const xmlChar
*name
;
11525 const xmlChar
*prefix
= NULL
;
11526 const xmlChar
*URI
= NULL
;
11527 int line
= ctxt
->input
->line
;
11528 int nsNr
= ctxt
->nsNr
;
11530 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11532 cur
= ctxt
->input
->cur
[0];
11534 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11535 xmlHaltParser(ctxt
);
11536 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11537 ctxt
->sax
->endDocument(ctxt
->userData
);
11540 if ((!terminate
) && (!xmlParseLookupGt(ctxt
)))
11542 if (ctxt
->spaceNr
== 0)
11543 spacePush(ctxt
, -1);
11544 else if (*ctxt
->space
== -2)
11545 spacePush(ctxt
, -1);
11547 spacePush(ctxt
, *ctxt
->space
);
11548 #ifdef LIBXML_SAX1_ENABLED
11550 #endif /* LIBXML_SAX1_ENABLED */
11551 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
11552 #ifdef LIBXML_SAX1_ENABLED
11554 name
= xmlParseStartTag(ctxt
);
11555 #endif /* LIBXML_SAX1_ENABLED */
11556 if (ctxt
->instate
== XML_PARSER_EOF
)
11558 if (name
== NULL
) {
11560 xmlHaltParser(ctxt
);
11561 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11562 ctxt
->sax
->endDocument(ctxt
->userData
);
11565 #ifdef LIBXML_VALID_ENABLED
11567 * [ VC: Root Element Type ]
11568 * The Name in the document type declaration must match
11569 * the element type of the root element.
11571 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
11572 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
11573 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
11574 #endif /* LIBXML_VALID_ENABLED */
11577 * Check for an Empty Element.
11579 if ((RAW
== '/') && (NXT(1) == '>')) {
11583 if ((ctxt
->sax
!= NULL
) &&
11584 (ctxt
->sax
->endElementNs
!= NULL
) &&
11585 (!ctxt
->disableSAX
))
11586 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11588 if (ctxt
->nsNr
- nsNr
> 0)
11589 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11590 #ifdef LIBXML_SAX1_ENABLED
11592 if ((ctxt
->sax
!= NULL
) &&
11593 (ctxt
->sax
->endElement
!= NULL
) &&
11594 (!ctxt
->disableSAX
))
11595 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11596 #endif /* LIBXML_SAX1_ENABLED */
11598 if (ctxt
->instate
== XML_PARSER_EOF
)
11601 if (ctxt
->nameNr
== 0) {
11602 ctxt
->instate
= XML_PARSER_EPILOG
;
11604 ctxt
->instate
= XML_PARSER_CONTENT
;
11611 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11612 "Couldn't find end of Start Tag %s\n",
11617 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
11619 ctxt
->instate
= XML_PARSER_CONTENT
;
11622 case XML_PARSER_CONTENT
: {
11623 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11625 cur
= ctxt
->input
->cur
[0];
11626 next
= ctxt
->input
->cur
[1];
11628 if ((cur
== '<') && (next
== '/')) {
11629 ctxt
->instate
= XML_PARSER_END_TAG
;
11631 } else if ((cur
== '<') && (next
== '?')) {
11632 if ((!terminate
) &&
11633 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11636 ctxt
->instate
= XML_PARSER_CONTENT
;
11637 } else if ((cur
== '<') && (next
!= '!')) {
11638 ctxt
->instate
= XML_PARSER_START_TAG
;
11640 } else if ((cur
== '<') && (next
== '!') &&
11641 (ctxt
->input
->cur
[2] == '-') &&
11642 (ctxt
->input
->cur
[3] == '-')) {
11643 if ((!terminate
) &&
11644 (!xmlParseLookupString(ctxt
, 4, "-->", 3)))
11646 xmlParseComment(ctxt
);
11647 ctxt
->instate
= XML_PARSER_CONTENT
;
11648 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11649 (ctxt
->input
->cur
[2] == '[') &&
11650 (ctxt
->input
->cur
[3] == 'C') &&
11651 (ctxt
->input
->cur
[4] == 'D') &&
11652 (ctxt
->input
->cur
[5] == 'A') &&
11653 (ctxt
->input
->cur
[6] == 'T') &&
11654 (ctxt
->input
->cur
[7] == 'A') &&
11655 (ctxt
->input
->cur
[8] == '[')) {
11657 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11659 } else if ((cur
== '<') && (next
== '!') &&
11662 } else if (cur
== '<') {
11663 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11664 "detected an error in element content\n");
11666 } else if (cur
== '&') {
11667 if ((!terminate
) && (!xmlParseLookupChar(ctxt
, ';')))
11669 xmlParseReference(ctxt
);
11671 /* TODO Avoid the extra copy, handle directly !!! */
11673 * Goal of the following test is:
11674 * - minimize calls to the SAX 'character' callback
11675 * when they are mergeable
11676 * - handle an problem for isBlank when we only parse
11677 * a sequence of blank chars and the next one is
11678 * not available to check against '<' presence.
11679 * - tries to homogenize the differences in SAX
11680 * callbacks between the push and pull versions
11683 if ((ctxt
->inputNr
== 1) &&
11684 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11685 if ((!terminate
) && (!xmlParseLookupCharData(ctxt
)))
11688 ctxt
->checkIndex
= 0;
11689 xmlParseCharDataInternal(ctxt
, !terminate
);
11693 case XML_PARSER_END_TAG
:
11696 if ((!terminate
) && (!xmlParseLookupChar(ctxt
, '>')))
11699 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
11702 #ifdef LIBXML_SAX1_ENABLED
11704 xmlParseEndTag1(ctxt
, 0);
11705 #endif /* LIBXML_SAX1_ENABLED */
11706 if (ctxt
->instate
== XML_PARSER_EOF
) {
11708 } else if (ctxt
->nameNr
== 0) {
11709 ctxt
->instate
= XML_PARSER_EPILOG
;
11711 ctxt
->instate
= XML_PARSER_CONTENT
;
11714 case XML_PARSER_CDATA_SECTION
: {
11716 * The Push mode need to have the SAX callback for
11717 * cdataBlock merge back contiguous callbacks.
11719 const xmlChar
*term
;
11723 * Don't call xmlParseLookupString. If 'terminate'
11724 * is set, checkIndex is invalid.
11726 term
= BAD_CAST
strstr((const char *) ctxt
->input
->cur
,
11729 term
= xmlParseLookupString(ctxt
, 0, "]]>", 3);
11732 if (term
== NULL
) {
11736 /* Unfinished CDATA section */
11737 size
= ctxt
->input
->end
- ctxt
->input
->cur
;
11739 if (avail
< XML_PARSER_BIG_BUFFER_SIZE
+ 2)
11741 ctxt
->checkIndex
= 0;
11742 /* XXX: Why don't we pass the full buffer? */
11743 size
= XML_PARSER_BIG_BUFFER_SIZE
;
11745 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, size
, 0);
11748 ctxt
->input
->cur
+= tmp
;
11749 goto encoding_error
;
11751 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11752 if (ctxt
->sax
->cdataBlock
!= NULL
)
11753 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11754 ctxt
->input
->cur
, tmp
);
11755 else if (ctxt
->sax
->characters
!= NULL
)
11756 ctxt
->sax
->characters(ctxt
->userData
,
11757 ctxt
->input
->cur
, tmp
);
11759 if (ctxt
->instate
== XML_PARSER_EOF
)
11763 int base
= term
- CUR_PTR
;
11766 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
, 1);
11767 if ((tmp
< 0) || (tmp
!= base
)) {
11769 ctxt
->input
->cur
+= tmp
;
11770 goto encoding_error
;
11772 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11773 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11774 (!ctxt
->disableSAX
)) {
11776 * Special case to provide identical behaviour
11777 * between pull and push parsers on enpty CDATA
11780 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11781 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11783 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11785 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11786 (!ctxt
->disableSAX
)) {
11787 if (ctxt
->sax
->cdataBlock
!= NULL
)
11788 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11789 ctxt
->input
->cur
, base
);
11790 else if (ctxt
->sax
->characters
!= NULL
)
11791 ctxt
->sax
->characters(ctxt
->userData
,
11792 ctxt
->input
->cur
, base
);
11794 if (ctxt
->instate
== XML_PARSER_EOF
)
11797 ctxt
->instate
= XML_PARSER_CONTENT
;
11799 xmlGenericError(xmlGenericErrorContext
,
11800 "PP: entering CONTENT\n");
11805 case XML_PARSER_MISC
:
11806 case XML_PARSER_PROLOG
:
11807 case XML_PARSER_EPILOG
:
11809 avail
= ctxt
->input
->end
- ctxt
->input
->cur
;
11812 cur
= ctxt
->input
->cur
[0];
11813 next
= ctxt
->input
->cur
[1];
11814 if ((cur
== '<') && (next
== '?')) {
11815 if ((!terminate
) &&
11816 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11819 xmlGenericError(xmlGenericErrorContext
,
11820 "PP: Parsing PI\n");
11823 if (ctxt
->instate
== XML_PARSER_EOF
)
11825 } else if ((cur
== '<') && (next
== '!') &&
11826 (ctxt
->input
->cur
[2] == '-') &&
11827 (ctxt
->input
->cur
[3] == '-')) {
11828 if ((!terminate
) &&
11829 (!xmlParseLookupString(ctxt
, 4, "-->", 3)))
11832 xmlGenericError(xmlGenericErrorContext
,
11833 "PP: Parsing Comment\n");
11835 xmlParseComment(ctxt
);
11836 if (ctxt
->instate
== XML_PARSER_EOF
)
11838 } else if ((ctxt
->instate
== XML_PARSER_MISC
) &&
11839 (cur
== '<') && (next
== '!') &&
11840 (ctxt
->input
->cur
[2] == 'D') &&
11841 (ctxt
->input
->cur
[3] == 'O') &&
11842 (ctxt
->input
->cur
[4] == 'C') &&
11843 (ctxt
->input
->cur
[5] == 'T') &&
11844 (ctxt
->input
->cur
[6] == 'Y') &&
11845 (ctxt
->input
->cur
[7] == 'P') &&
11846 (ctxt
->input
->cur
[8] == 'E')) {
11847 if ((!terminate
) && (!xmlParseLookupGt(ctxt
)))
11850 xmlGenericError(xmlGenericErrorContext
,
11851 "PP: Parsing internal subset\n");
11853 ctxt
->inSubset
= 1;
11854 xmlParseDocTypeDecl(ctxt
);
11855 if (ctxt
->instate
== XML_PARSER_EOF
)
11858 ctxt
->instate
= XML_PARSER_DTD
;
11860 xmlGenericError(xmlGenericErrorContext
,
11861 "PP: entering DTD\n");
11865 * Create and update the external subset.
11867 ctxt
->inSubset
= 2;
11868 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11869 (ctxt
->sax
->externalSubset
!= NULL
))
11870 ctxt
->sax
->externalSubset(ctxt
->userData
,
11871 ctxt
->intSubName
, ctxt
->extSubSystem
,
11873 ctxt
->inSubset
= 0;
11874 xmlCleanSpecialAttr(ctxt
);
11875 ctxt
->instate
= XML_PARSER_PROLOG
;
11877 xmlGenericError(xmlGenericErrorContext
,
11878 "PP: entering PROLOG\n");
11881 } else if ((cur
== '<') && (next
== '!') &&
11883 (ctxt
->instate
== XML_PARSER_MISC
? 9 : 4))) {
11885 } else if (ctxt
->instate
== XML_PARSER_EPILOG
) {
11886 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11887 xmlHaltParser(ctxt
);
11889 xmlGenericError(xmlGenericErrorContext
,
11890 "PP: entering EOF\n");
11892 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11893 ctxt
->sax
->endDocument(ctxt
->userData
);
11896 ctxt
->instate
= XML_PARSER_START_TAG
;
11898 xmlGenericError(xmlGenericErrorContext
,
11899 "PP: entering START_TAG\n");
11903 case XML_PARSER_DTD
: {
11904 if ((!terminate
) && (!xmlParseLookupInternalSubset(ctxt
)))
11906 xmlParseInternalSubset(ctxt
);
11907 if (ctxt
->instate
== XML_PARSER_EOF
)
11909 ctxt
->inSubset
= 2;
11910 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11911 (ctxt
->sax
->externalSubset
!= NULL
))
11912 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
11913 ctxt
->extSubSystem
, ctxt
->extSubURI
);
11914 ctxt
->inSubset
= 0;
11915 xmlCleanSpecialAttr(ctxt
);
11916 if (ctxt
->instate
== XML_PARSER_EOF
)
11918 ctxt
->instate
= XML_PARSER_PROLOG
;
11920 xmlGenericError(xmlGenericErrorContext
,
11921 "PP: entering PROLOG\n");
11925 case XML_PARSER_COMMENT
:
11926 xmlGenericError(xmlGenericErrorContext
,
11927 "PP: internal error, state == COMMENT\n");
11928 ctxt
->instate
= XML_PARSER_CONTENT
;
11930 xmlGenericError(xmlGenericErrorContext
,
11931 "PP: entering CONTENT\n");
11934 case XML_PARSER_IGNORE
:
11935 xmlGenericError(xmlGenericErrorContext
,
11936 "PP: internal error, state == IGNORE");
11937 ctxt
->instate
= XML_PARSER_DTD
;
11939 xmlGenericError(xmlGenericErrorContext
,
11940 "PP: entering DTD\n");
11943 case XML_PARSER_PI
:
11944 xmlGenericError(xmlGenericErrorContext
,
11945 "PP: internal error, state == PI\n");
11946 ctxt
->instate
= XML_PARSER_CONTENT
;
11948 xmlGenericError(xmlGenericErrorContext
,
11949 "PP: entering CONTENT\n");
11952 case XML_PARSER_ENTITY_DECL
:
11953 xmlGenericError(xmlGenericErrorContext
,
11954 "PP: internal error, state == ENTITY_DECL\n");
11955 ctxt
->instate
= XML_PARSER_DTD
;
11957 xmlGenericError(xmlGenericErrorContext
,
11958 "PP: entering DTD\n");
11961 case XML_PARSER_ENTITY_VALUE
:
11962 xmlGenericError(xmlGenericErrorContext
,
11963 "PP: internal error, state == ENTITY_VALUE\n");
11964 ctxt
->instate
= XML_PARSER_CONTENT
;
11966 xmlGenericError(xmlGenericErrorContext
,
11967 "PP: entering DTD\n");
11970 case XML_PARSER_ATTRIBUTE_VALUE
:
11971 xmlGenericError(xmlGenericErrorContext
,
11972 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11973 ctxt
->instate
= XML_PARSER_START_TAG
;
11975 xmlGenericError(xmlGenericErrorContext
,
11976 "PP: entering START_TAG\n");
11979 case XML_PARSER_SYSTEM_LITERAL
:
11980 xmlGenericError(xmlGenericErrorContext
,
11981 "PP: internal error, state == SYSTEM_LITERAL\n");
11982 ctxt
->instate
= XML_PARSER_START_TAG
;
11984 xmlGenericError(xmlGenericErrorContext
,
11985 "PP: entering START_TAG\n");
11988 case XML_PARSER_PUBLIC_LITERAL
:
11989 xmlGenericError(xmlGenericErrorContext
,
11990 "PP: internal error, state == PUBLIC_LITERAL\n");
11991 ctxt
->instate
= XML_PARSER_START_TAG
;
11993 xmlGenericError(xmlGenericErrorContext
,
11994 "PP: entering START_TAG\n");
12001 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
12005 if (ctxt
->input
->end
- ctxt
->input
->cur
< 4) {
12006 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12007 "Input is not proper UTF-8, indicate encoding !\n",
12012 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12013 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
12014 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
12015 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12016 "Input is not proper UTF-8, indicate encoding !\n%s",
12017 BAD_CAST buffer
, NULL
);
12024 * @ctxt: an XML parser context
12025 * @chunk: an char array
12026 * @size: the size in byte of the chunk
12027 * @terminate: last chunk indicator
12029 * Parse a Chunk of memory
12031 * Returns zero if no error, the xmlParserErrors otherwise.
12034 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
12039 return(XML_ERR_INTERNAL_ERROR
);
12040 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12041 return(ctxt
->errNo
);
12042 if (ctxt
->instate
== XML_PARSER_EOF
)
12044 if (ctxt
->input
== NULL
)
12047 ctxt
->progressive
= 1;
12048 if (ctxt
->instate
== XML_PARSER_START
)
12049 xmlDetectSAX2(ctxt
);
12050 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
12051 (chunk
[size
- 1] == '\r')) {
12056 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
12057 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
12058 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12059 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12062 res
= xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12063 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12065 ctxt
->errNo
= XML_PARSER_EOF
;
12066 xmlHaltParser(ctxt
);
12067 return (XML_PARSER_EOF
);
12070 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12073 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
12074 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
12075 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
12076 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
12077 (in
->raw
!= NULL
)) {
12079 size_t base
= xmlBufGetInputBase(in
->buffer
, ctxt
->input
);
12080 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12082 nbchars
= xmlCharEncInput(in
, terminate
);
12083 xmlBufSetInputBaseCur(in
->buffer
, ctxt
->input
, base
, current
);
12086 xmlGenericError(xmlGenericErrorContext
,
12087 "xmlParseChunk: encoder error\n");
12088 xmlHaltParser(ctxt
);
12089 return(XML_ERR_INVALID_ENCODING
);
12095 xmlParseTryOrFinish(ctxt
, terminate
);
12096 if (ctxt
->instate
== XML_PARSER_EOF
)
12097 return(ctxt
->errNo
);
12099 if ((ctxt
->input
!= NULL
) &&
12100 (((ctxt
->input
->end
- ctxt
->input
->cur
) > XML_MAX_LOOKUP_LIMIT
) ||
12101 ((ctxt
->input
->cur
- ctxt
->input
->base
) > XML_MAX_LOOKUP_LIMIT
)) &&
12102 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
12103 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
12104 xmlHaltParser(ctxt
);
12106 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12107 return(ctxt
->errNo
);
12109 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
12110 (ctxt
->input
->buf
!= NULL
)) {
12111 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
12113 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12115 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
12117 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
12122 * Check for termination
12124 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
12125 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
12126 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12128 if ((ctxt
->instate
== XML_PARSER_EPILOG
) &&
12129 (ctxt
->input
->cur
< ctxt
->input
->end
)) {
12130 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12132 if (ctxt
->instate
!= XML_PARSER_EOF
) {
12133 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
12134 ctxt
->sax
->endDocument(ctxt
->userData
);
12136 ctxt
->instate
= XML_PARSER_EOF
;
12138 if (ctxt
->wellFormed
== 0)
12139 return((xmlParserErrors
) ctxt
->errNo
);
12144 /************************************************************************
12146 * I/O front end functions to the parser *
12148 ************************************************************************/
12151 * xmlCreatePushParserCtxt:
12152 * @sax: a SAX handler
12153 * @user_data: The user data returned on SAX callbacks
12154 * @chunk: a pointer to an array of chars
12155 * @size: number of chars in the array
12156 * @filename: an optional file name or URI
12158 * Create a parser context for using the XML parser in push mode.
12159 * If @buffer and @size are non-NULL, the data is used to detect
12160 * the encoding. The remaining characters will be parsed so they
12161 * don't need to be fed in again through xmlParseChunk.
12162 * To allow content encoding detection, @size should be >= 4
12163 * The value of @filename is used for fetching external entities
12164 * and error/warning reports.
12166 * Returns the new parser context or NULL
12170 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12171 const char *chunk
, int size
, const char *filename
) {
12172 xmlParserCtxtPtr ctxt
;
12173 xmlParserInputPtr inputStream
;
12174 xmlParserInputBufferPtr buf
;
12176 buf
= xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE
);
12177 if (buf
== NULL
) return(NULL
);
12179 ctxt
= xmlNewSAXParserCtxt(sax
, user_data
);
12180 if (ctxt
== NULL
) {
12181 xmlErrMemory(NULL
, "creating parser: out of memory\n");
12182 xmlFreeParserInputBuffer(buf
);
12185 ctxt
->dictNames
= 1;
12186 if (filename
== NULL
) {
12187 ctxt
->directory
= NULL
;
12189 ctxt
->directory
= xmlParserGetDirectory(filename
);
12192 inputStream
= xmlNewInputStream(ctxt
);
12193 if (inputStream
== NULL
) {
12194 xmlFreeParserCtxt(ctxt
);
12195 xmlFreeParserInputBuffer(buf
);
12199 if (filename
== NULL
)
12200 inputStream
->filename
= NULL
;
12202 inputStream
->filename
= (char *)
12203 xmlCanonicPath((const xmlChar
*) filename
);
12204 if (inputStream
->filename
== NULL
) {
12205 xmlFreeInputStream(inputStream
);
12206 xmlFreeParserCtxt(ctxt
);
12207 xmlFreeParserInputBuffer(buf
);
12211 inputStream
->buf
= buf
;
12212 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
12213 inputPush(ctxt
, inputStream
);
12216 * If the caller didn't provide an initial 'chunk' for determining
12217 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12218 * that it can be automatically determined later
12220 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
12222 if ((size
!= 0) && (chunk
!= NULL
) &&
12223 (ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
12224 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12225 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12227 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12229 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12231 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12237 #endif /* LIBXML_PUSH_ENABLED */
12241 * @ctxt: an XML parser context
12243 * Blocks further parser processing
12246 xmlStopParser(xmlParserCtxtPtr ctxt
) {
12249 xmlHaltParser(ctxt
);
12250 ctxt
->errNo
= XML_ERR_USER_STOP
;
12254 * xmlCreateIOParserCtxt:
12255 * @sax: a SAX handler
12256 * @user_data: The user data returned on SAX callbacks
12257 * @ioread: an I/O read function
12258 * @ioclose: an I/O close function
12259 * @ioctx: an I/O handler
12260 * @enc: the charset encoding if known
12262 * Create a parser context for using the XML parser with an existing
12265 * Returns the new parser context or NULL
12268 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12269 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
12270 void *ioctx
, xmlCharEncoding enc
) {
12271 xmlParserCtxtPtr ctxt
;
12272 xmlParserInputPtr inputStream
;
12273 xmlParserInputBufferPtr buf
;
12275 if (ioread
== NULL
) return(NULL
);
12277 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
12279 if (ioclose
!= NULL
)
12284 ctxt
= xmlNewSAXParserCtxt(sax
, user_data
);
12285 if (ctxt
== NULL
) {
12286 xmlFreeParserInputBuffer(buf
);
12290 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12291 if (inputStream
== NULL
) {
12292 xmlFreeParserCtxt(ctxt
);
12295 inputPush(ctxt
, inputStream
);
12300 #ifdef LIBXML_VALID_ENABLED
12301 /************************************************************************
12303 * Front ends when parsing a DTD *
12305 ************************************************************************/
12309 * @sax: the SAX handler block or NULL
12310 * @input: an Input Buffer
12311 * @enc: the charset encoding if known
12313 * Load and parse a DTD
12315 * Returns the resulting xmlDtdPtr or NULL in case of error.
12316 * @input will be freed by the function in any case.
12320 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12321 xmlCharEncoding enc
) {
12322 xmlDtdPtr ret
= NULL
;
12323 xmlParserCtxtPtr ctxt
;
12324 xmlParserInputPtr pinput
= NULL
;
12330 ctxt
= xmlNewSAXParserCtxt(sax
, NULL
);
12331 if (ctxt
== NULL
) {
12332 xmlFreeParserInputBuffer(input
);
12336 /* We are loading a DTD */
12337 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12339 xmlDetectSAX2(ctxt
);
12342 * generate a parser input from the I/O handler
12345 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12346 if (pinput
== NULL
) {
12347 xmlFreeParserInputBuffer(input
);
12348 xmlFreeParserCtxt(ctxt
);
12353 * plug some encoding conversion routines here.
12355 if (xmlPushInput(ctxt
, pinput
) < 0) {
12356 xmlFreeParserCtxt(ctxt
);
12359 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12360 xmlSwitchEncoding(ctxt
, enc
);
12363 pinput
->filename
= NULL
;
12366 pinput
->base
= ctxt
->input
->cur
;
12367 pinput
->cur
= ctxt
->input
->cur
;
12368 pinput
->free
= NULL
;
12371 * let's parse that entity knowing it's an external subset.
12373 ctxt
->inSubset
= 2;
12374 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12375 if (ctxt
->myDoc
== NULL
) {
12376 xmlErrMemory(ctxt
, "New Doc failed");
12379 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12380 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12381 BAD_CAST
"none", BAD_CAST
"none");
12383 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12384 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12386 * Get the 4 first bytes and decode the charset
12387 * if enc != XML_CHAR_ENCODING_NONE
12388 * plug some encoding conversion routines.
12394 enc
= xmlDetectCharEncoding(start
, 4);
12395 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12396 xmlSwitchEncoding(ctxt
, enc
);
12400 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12402 if (ctxt
->myDoc
!= NULL
) {
12403 if (ctxt
->wellFormed
) {
12404 ret
= ctxt
->myDoc
->extSubset
;
12405 ctxt
->myDoc
->extSubset
= NULL
;
12410 tmp
= ret
->children
;
12411 while (tmp
!= NULL
) {
12419 xmlFreeDoc(ctxt
->myDoc
);
12420 ctxt
->myDoc
= NULL
;
12422 xmlFreeParserCtxt(ctxt
);
12429 * @sax: the SAX handler block
12430 * @ExternalID: a NAME* containing the External ID of the DTD
12431 * @SystemID: a NAME* containing the URL to the DTD
12433 * DEPRECATED: Don't use.
12435 * Load and parse an external subset.
12437 * Returns the resulting xmlDtdPtr or NULL in case of error.
12441 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12442 const xmlChar
*SystemID
) {
12443 xmlDtdPtr ret
= NULL
;
12444 xmlParserCtxtPtr ctxt
;
12445 xmlParserInputPtr input
= NULL
;
12446 xmlCharEncoding enc
;
12447 xmlChar
* systemIdCanonic
;
12449 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12451 ctxt
= xmlNewSAXParserCtxt(sax
, NULL
);
12452 if (ctxt
== NULL
) {
12456 /* We are loading a DTD */
12457 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12460 * Canonicalise the system ID
12462 systemIdCanonic
= xmlCanonicPath(SystemID
);
12463 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12464 xmlFreeParserCtxt(ctxt
);
12469 * Ask the Entity resolver to load the damn thing
12472 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12473 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12475 if (input
== NULL
) {
12476 xmlFreeParserCtxt(ctxt
);
12477 if (systemIdCanonic
!= NULL
)
12478 xmlFree(systemIdCanonic
);
12483 * plug some encoding conversion routines here.
12485 if (xmlPushInput(ctxt
, input
) < 0) {
12486 xmlFreeParserCtxt(ctxt
);
12487 if (systemIdCanonic
!= NULL
)
12488 xmlFree(systemIdCanonic
);
12491 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12492 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12493 xmlSwitchEncoding(ctxt
, enc
);
12496 if (input
->filename
== NULL
)
12497 input
->filename
= (char *) systemIdCanonic
;
12499 xmlFree(systemIdCanonic
);
12502 input
->base
= ctxt
->input
->cur
;
12503 input
->cur
= ctxt
->input
->cur
;
12504 input
->free
= NULL
;
12507 * let's parse that entity knowing it's an external subset.
12509 ctxt
->inSubset
= 2;
12510 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12511 if (ctxt
->myDoc
== NULL
) {
12512 xmlErrMemory(ctxt
, "New Doc failed");
12513 xmlFreeParserCtxt(ctxt
);
12516 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12517 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12518 ExternalID
, SystemID
);
12519 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12521 if (ctxt
->myDoc
!= NULL
) {
12522 if (ctxt
->wellFormed
) {
12523 ret
= ctxt
->myDoc
->extSubset
;
12524 ctxt
->myDoc
->extSubset
= NULL
;
12529 tmp
= ret
->children
;
12530 while (tmp
!= NULL
) {
12538 xmlFreeDoc(ctxt
->myDoc
);
12539 ctxt
->myDoc
= NULL
;
12541 xmlFreeParserCtxt(ctxt
);
12549 * @ExternalID: a NAME* containing the External ID of the DTD
12550 * @SystemID: a NAME* containing the URL to the DTD
12552 * Load and parse an external subset.
12554 * Returns the resulting xmlDtdPtr or NULL in case of error.
12558 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12559 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12561 #endif /* LIBXML_VALID_ENABLED */
12563 /************************************************************************
12565 * Front ends when parsing an Entity *
12567 ************************************************************************/
12570 * xmlParseCtxtExternalEntity:
12571 * @ctx: the existing parsing context
12572 * @URL: the URL for the entity to load
12573 * @ID: the System ID for the entity to load
12574 * @lst: the return value for the set of parsed nodes
12576 * Parse an external general entity within an existing parsing context
12577 * An external general parsed entity is well-formed if it matches the
12578 * production labeled extParsedEnt.
12580 * [78] extParsedEnt ::= TextDecl? content
12582 * Returns 0 if the entity is well formed, -1 in case of args problem and
12583 * the parser error code otherwise
12587 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12588 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12591 if (ctx
== NULL
) return(-1);
12593 * If the user provided their own SAX callbacks, then reuse the
12594 * userData callback field, otherwise the expected setup in a
12595 * DOM builder is to have userData == ctxt
12597 if (ctx
->userData
== ctx
)
12600 userData
= ctx
->userData
;
12601 return xmlParseExternalEntityPrivate(ctx
->myDoc
, ctx
, ctx
->sax
,
12602 userData
, ctx
->depth
+ 1,
12607 * xmlParseExternalEntityPrivate:
12608 * @doc: the document the chunk pertains to
12609 * @oldctxt: the previous parser context if available
12610 * @sax: the SAX handler block (possibly NULL)
12611 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12612 * @depth: Used for loop detection, use 0
12613 * @URL: the URL for the entity to load
12614 * @ID: the System ID for the entity to load
12615 * @list: the return value for the set of parsed nodes
12617 * Private version of xmlParseExternalEntity()
12619 * Returns 0 if the entity is well formed, -1 in case of args problem and
12620 * the parser error code otherwise
12623 static xmlParserErrors
12624 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
12625 xmlSAXHandlerPtr sax
,
12626 void *user_data
, int depth
, const xmlChar
*URL
,
12627 const xmlChar
*ID
, xmlNodePtr
*list
) {
12628 xmlParserCtxtPtr ctxt
;
12630 xmlNodePtr newRoot
;
12631 xmlParserErrors ret
= XML_ERR_OK
;
12633 xmlCharEncoding enc
;
12635 if (((depth
> 40) &&
12636 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12638 xmlFatalErrMsg(oldctxt
, XML_ERR_ENTITY_LOOP
,
12639 "Maximum entity nesting depth exceeded");
12640 return(XML_ERR_ENTITY_LOOP
);
12645 if ((URL
== NULL
) && (ID
== NULL
))
12646 return(XML_ERR_INTERNAL_ERROR
);
12648 return(XML_ERR_INTERNAL_ERROR
);
12650 ctxt
= xmlCreateEntityParserCtxtInternal(sax
, user_data
, URL
, ID
, NULL
,
12652 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12653 if (oldctxt
!= NULL
) {
12654 ctxt
->nbErrors
= oldctxt
->nbErrors
;
12655 ctxt
->nbWarnings
= oldctxt
->nbWarnings
;
12657 xmlDetectSAX2(ctxt
);
12659 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12660 if (newDoc
== NULL
) {
12661 xmlFreeParserCtxt(ctxt
);
12662 return(XML_ERR_INTERNAL_ERROR
);
12664 newDoc
->properties
= XML_DOC_INTERNAL
;
12666 newDoc
->intSubset
= doc
->intSubset
;
12667 newDoc
->extSubset
= doc
->extSubset
;
12669 newDoc
->dict
= doc
->dict
;
12670 xmlDictReference(newDoc
->dict
);
12672 if (doc
->URL
!= NULL
) {
12673 newDoc
->URL
= xmlStrdup(doc
->URL
);
12676 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12677 if (newRoot
== NULL
) {
12679 xmlFreeParserCtxt(ctxt
);
12680 newDoc
->intSubset
= NULL
;
12681 newDoc
->extSubset
= NULL
;
12682 xmlFreeDoc(newDoc
);
12683 return(XML_ERR_INTERNAL_ERROR
);
12685 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12686 nodePush(ctxt
, newDoc
->children
);
12688 ctxt
->myDoc
= newDoc
;
12691 newRoot
->doc
= doc
;
12695 * Get the 4 first bytes and decode the charset
12696 * if enc != XML_CHAR_ENCODING_NONE
12697 * plug some encoding conversion routines.
12700 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12705 enc
= xmlDetectCharEncoding(start
, 4);
12706 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12707 xmlSwitchEncoding(ctxt
, enc
);
12712 * Parse a possible text declaration first
12714 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12715 xmlParseTextDecl(ctxt
);
12717 * An XML-1.0 document can't reference an entity not XML-1.0
12719 if ((xmlStrEqual(oldctxt
->version
, BAD_CAST
"1.0")) &&
12720 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
12721 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
12722 "Version mismatch between document and entity\n");
12726 ctxt
->instate
= XML_PARSER_CONTENT
;
12727 ctxt
->depth
= depth
;
12728 if (oldctxt
!= NULL
) {
12729 ctxt
->_private
= oldctxt
->_private
;
12730 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12731 ctxt
->validate
= oldctxt
->validate
;
12732 ctxt
->valid
= oldctxt
->valid
;
12733 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12734 if (oldctxt
->validate
) {
12735 ctxt
->vctxt
.error
= oldctxt
->vctxt
.error
;
12736 ctxt
->vctxt
.warning
= oldctxt
->vctxt
.warning
;
12737 ctxt
->vctxt
.userData
= oldctxt
->vctxt
.userData
;
12738 ctxt
->vctxt
.flags
= oldctxt
->vctxt
.flags
;
12740 ctxt
->external
= oldctxt
->external
;
12741 if (ctxt
->dict
) xmlDictFree(ctxt
->dict
);
12742 ctxt
->dict
= oldctxt
->dict
;
12743 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12744 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12745 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12746 ctxt
->dictNames
= oldctxt
->dictNames
;
12747 ctxt
->attsDefault
= oldctxt
->attsDefault
;
12748 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
12749 ctxt
->linenumbers
= oldctxt
->linenumbers
;
12750 ctxt
->record_info
= oldctxt
->record_info
;
12751 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
12752 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
12753 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
12756 * Doing validity checking on chunk without context
12757 * doesn't make sense
12759 ctxt
->_private
= NULL
;
12760 ctxt
->validate
= 0;
12761 ctxt
->external
= 2;
12762 ctxt
->loadsubset
= 0;
12765 xmlParseContent(ctxt
);
12767 if ((RAW
== '<') && (NXT(1) == '/')) {
12768 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12769 } else if (RAW
!= 0) {
12770 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12772 if (ctxt
->node
!= newDoc
->children
) {
12773 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12776 if (!ctxt
->wellFormed
) {
12777 ret
= (xmlParserErrors
)ctxt
->errNo
;
12778 if (oldctxt
!= NULL
) {
12779 oldctxt
->errNo
= ctxt
->errNo
;
12780 oldctxt
->wellFormed
= 0;
12781 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12784 if (list
!= NULL
) {
12788 * Return the newly created nodeset after unlinking it from
12789 * they pseudo parent.
12791 cur
= newDoc
->children
->children
;
12793 while (cur
!= NULL
) {
12794 cur
->parent
= NULL
;
12797 newDoc
->children
->children
= NULL
;
12803 * Also record the size of the entity parsed
12805 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
12806 unsigned long consumed
= ctxt
->input
->consumed
;
12808 xmlSaturatedAddSizeT(&consumed
, ctxt
->input
->cur
- ctxt
->input
->base
);
12810 xmlSaturatedAdd(&oldctxt
->sizeentities
, consumed
);
12811 xmlSaturatedAdd(&oldctxt
->sizeentities
, ctxt
->sizeentities
);
12813 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, consumed
);
12814 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, ctxt
->sizeentcopy
);
12817 if (oldctxt
!= NULL
) {
12819 ctxt
->attsDefault
= NULL
;
12820 ctxt
->attsSpecial
= NULL
;
12821 oldctxt
->nbErrors
= ctxt
->nbErrors
;
12822 oldctxt
->nbWarnings
= ctxt
->nbWarnings
;
12823 oldctxt
->validate
= ctxt
->validate
;
12824 oldctxt
->valid
= ctxt
->valid
;
12825 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
12826 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
12827 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
12829 ctxt
->node_seq
.maximum
= 0;
12830 ctxt
->node_seq
.length
= 0;
12831 ctxt
->node_seq
.buffer
= NULL
;
12832 xmlFreeParserCtxt(ctxt
);
12833 newDoc
->intSubset
= NULL
;
12834 newDoc
->extSubset
= NULL
;
12835 xmlFreeDoc(newDoc
);
12840 #ifdef LIBXML_SAX1_ENABLED
12842 * xmlParseExternalEntity:
12843 * @doc: the document the chunk pertains to
12844 * @sax: the SAX handler block (possibly NULL)
12845 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12846 * @depth: Used for loop detection, use 0
12847 * @URL: the URL for the entity to load
12848 * @ID: the System ID for the entity to load
12849 * @lst: the return value for the set of parsed nodes
12851 * Parse an external general entity
12852 * An external general parsed entity is well-formed if it matches the
12853 * production labeled extParsedEnt.
12855 * [78] extParsedEnt ::= TextDecl? content
12857 * Returns 0 if the entity is well formed, -1 in case of args problem and
12858 * the parser error code otherwise
12862 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
12863 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
12864 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
12869 * xmlParseBalancedChunkMemory:
12870 * @doc: the document the chunk pertains to (must not be NULL)
12871 * @sax: the SAX handler block (possibly NULL)
12872 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12873 * @depth: Used for loop detection, use 0
12874 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12875 * @lst: the return value for the set of parsed nodes
12877 * Parse a well-balanced chunk of an XML document
12878 * called by the parser
12879 * The allowed sequence for the Well Balanced Chunk is the one defined by
12880 * the content production in the XML grammar:
12882 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12884 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12885 * the parser error code otherwise
12889 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
12890 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
12891 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
12892 depth
, string
, lst
, 0 );
12894 #endif /* LIBXML_SAX1_ENABLED */
12897 * xmlParseBalancedChunkMemoryInternal:
12898 * @oldctxt: the existing parsing context
12899 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12900 * @user_data: the user data field for the parser context
12901 * @lst: the return value for the set of parsed nodes
12904 * Parse a well-balanced chunk of an XML document
12905 * called by the parser
12906 * The allowed sequence for the Well Balanced Chunk is the one defined by
12907 * the content production in the XML grammar:
12909 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12911 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12912 * error code otherwise
12914 * In case recover is set to 1, the nodelist will not be empty even if
12915 * the parsed chunk is not well balanced.
12917 static xmlParserErrors
12918 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
12919 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
12920 xmlParserCtxtPtr ctxt
;
12921 xmlDocPtr newDoc
= NULL
;
12922 xmlNodePtr newRoot
;
12923 xmlSAXHandlerPtr oldsax
= NULL
;
12924 xmlNodePtr content
= NULL
;
12925 xmlNodePtr last
= NULL
;
12927 xmlParserErrors ret
= XML_ERR_OK
;
12932 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12933 (oldctxt
->depth
> 100)) {
12934 xmlFatalErrMsg(oldctxt
, XML_ERR_ENTITY_LOOP
,
12935 "Maximum entity nesting depth exceeded");
12936 return(XML_ERR_ENTITY_LOOP
);
12942 if (string
== NULL
)
12943 return(XML_ERR_INTERNAL_ERROR
);
12945 size
= xmlStrlen(string
);
12947 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
12948 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12949 ctxt
->nbErrors
= oldctxt
->nbErrors
;
12950 ctxt
->nbWarnings
= oldctxt
->nbWarnings
;
12951 if (user_data
!= NULL
)
12952 ctxt
->userData
= user_data
;
12954 ctxt
->userData
= ctxt
;
12955 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12956 ctxt
->dict
= oldctxt
->dict
;
12957 ctxt
->input_id
= oldctxt
->input_id
;
12958 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12959 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12960 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12963 /* propagate namespaces down the entity */
12964 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
12965 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
12969 oldsax
= ctxt
->sax
;
12970 ctxt
->sax
= oldctxt
->sax
;
12971 xmlDetectSAX2(ctxt
);
12972 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12973 ctxt
->options
= oldctxt
->options
;
12975 ctxt
->_private
= oldctxt
->_private
;
12976 if (oldctxt
->myDoc
== NULL
) {
12977 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12978 if (newDoc
== NULL
) {
12979 ctxt
->sax
= oldsax
;
12981 xmlFreeParserCtxt(ctxt
);
12982 return(XML_ERR_INTERNAL_ERROR
);
12984 newDoc
->properties
= XML_DOC_INTERNAL
;
12985 newDoc
->dict
= ctxt
->dict
;
12986 xmlDictReference(newDoc
->dict
);
12987 ctxt
->myDoc
= newDoc
;
12989 ctxt
->myDoc
= oldctxt
->myDoc
;
12990 content
= ctxt
->myDoc
->children
;
12991 last
= ctxt
->myDoc
->last
;
12993 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12994 if (newRoot
== NULL
) {
12995 ctxt
->sax
= oldsax
;
12997 xmlFreeParserCtxt(ctxt
);
12998 if (newDoc
!= NULL
) {
12999 xmlFreeDoc(newDoc
);
13001 return(XML_ERR_INTERNAL_ERROR
);
13003 ctxt
->myDoc
->children
= NULL
;
13004 ctxt
->myDoc
->last
= NULL
;
13005 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
13006 nodePush(ctxt
, ctxt
->myDoc
->children
);
13007 ctxt
->instate
= XML_PARSER_CONTENT
;
13008 ctxt
->depth
= oldctxt
->depth
;
13010 ctxt
->validate
= 0;
13011 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13012 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
13014 * ID/IDREF registration will be done in xmlValidateElement below
13016 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13018 ctxt
->dictNames
= oldctxt
->dictNames
;
13019 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13020 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13022 xmlParseContent(ctxt
);
13023 if ((RAW
== '<') && (NXT(1) == '/')) {
13024 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13025 } else if (RAW
!= 0) {
13026 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13028 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
13029 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13032 if (!ctxt
->wellFormed
) {
13033 ret
= (xmlParserErrors
)ctxt
->errNo
;
13034 oldctxt
->errNo
= ctxt
->errNo
;
13035 oldctxt
->wellFormed
= 0;
13036 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13041 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
13045 * Return the newly created nodeset after unlinking it from
13046 * they pseudo parent.
13048 cur
= ctxt
->myDoc
->children
->children
;
13050 while (cur
!= NULL
) {
13051 #ifdef LIBXML_VALID_ENABLED
13052 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
13053 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
13054 (cur
->type
== XML_ELEMENT_NODE
)) {
13055 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
13056 oldctxt
->myDoc
, cur
);
13058 #endif /* LIBXML_VALID_ENABLED */
13059 cur
->parent
= NULL
;
13062 ctxt
->myDoc
->children
->children
= NULL
;
13064 if (ctxt
->myDoc
!= NULL
) {
13065 xmlFreeNode(ctxt
->myDoc
->children
);
13066 ctxt
->myDoc
->children
= content
;
13067 ctxt
->myDoc
->last
= last
;
13071 * Also record the size of the entity parsed
13073 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
13074 unsigned long consumed
= ctxt
->input
->consumed
;
13076 xmlSaturatedAddSizeT(&consumed
, ctxt
->input
->cur
- ctxt
->input
->base
);
13078 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, consumed
);
13079 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, ctxt
->sizeentcopy
);
13082 oldctxt
->nbErrors
= ctxt
->nbErrors
;
13083 oldctxt
->nbWarnings
= ctxt
->nbWarnings
;
13084 ctxt
->sax
= oldsax
;
13086 ctxt
->attsDefault
= NULL
;
13087 ctxt
->attsSpecial
= NULL
;
13088 xmlFreeParserCtxt(ctxt
);
13089 if (newDoc
!= NULL
) {
13090 xmlFreeDoc(newDoc
);
13097 * xmlParseInNodeContext:
13098 * @node: the context node
13099 * @data: the input string
13100 * @datalen: the input string length in bytes
13101 * @options: a combination of xmlParserOption
13102 * @lst: the return value for the set of parsed nodes
13104 * Parse a well-balanced chunk of an XML document
13105 * within the context (DTD, namespaces, etc ...) of the given node.
13107 * The allowed sequence for the data is a Well Balanced Chunk defined by
13108 * the content production in the XML grammar:
13110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13112 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13113 * error code otherwise
13116 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
13117 int options
, xmlNodePtr
*lst
) {
13119 xmlParserCtxtPtr ctxt
;
13120 xmlDocPtr doc
= NULL
;
13121 xmlNodePtr fake
, cur
;
13124 xmlParserErrors ret
= XML_ERR_OK
;
13127 * check all input parameters, grab the document
13129 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13130 return(XML_ERR_INTERNAL_ERROR
);
13131 switch (node
->type
) {
13132 case XML_ELEMENT_NODE
:
13133 case XML_ATTRIBUTE_NODE
:
13134 case XML_TEXT_NODE
:
13135 case XML_CDATA_SECTION_NODE
:
13136 case XML_ENTITY_REF_NODE
:
13138 case XML_COMMENT_NODE
:
13139 case XML_DOCUMENT_NODE
:
13140 case XML_HTML_DOCUMENT_NODE
:
13143 return(XML_ERR_INTERNAL_ERROR
);
13146 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13147 (node
->type
!= XML_DOCUMENT_NODE
) &&
13148 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13149 node
= node
->parent
;
13151 return(XML_ERR_INTERNAL_ERROR
);
13152 if (node
->type
== XML_ELEMENT_NODE
)
13155 doc
= (xmlDocPtr
) node
;
13157 return(XML_ERR_INTERNAL_ERROR
);
13160 * allocate a context and set-up everything not related to the
13161 * node position in the tree
13163 if (doc
->type
== XML_DOCUMENT_NODE
)
13164 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13165 #ifdef LIBXML_HTML_ENABLED
13166 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13167 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13169 * When parsing in context, it makes no sense to add implied
13170 * elements like html/body/etc...
13172 options
|= HTML_PARSE_NOIMPLIED
;
13176 return(XML_ERR_INTERNAL_ERROR
);
13179 return(XML_ERR_NO_MEMORY
);
13182 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13183 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13184 * we must wait until the last moment to free the original one.
13186 if (doc
->dict
!= NULL
) {
13187 if (ctxt
->dict
!= NULL
)
13188 xmlDictFree(ctxt
->dict
);
13189 ctxt
->dict
= doc
->dict
;
13191 options
|= XML_PARSE_NODICT
;
13193 if (doc
->encoding
!= NULL
) {
13194 xmlCharEncodingHandlerPtr hdlr
;
13196 if (ctxt
->encoding
!= NULL
)
13197 xmlFree((xmlChar
*) ctxt
->encoding
);
13198 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13200 hdlr
= xmlFindCharEncodingHandler((const char *) doc
->encoding
);
13201 if (hdlr
!= NULL
) {
13202 xmlSwitchToEncoding(ctxt
, hdlr
);
13204 return(XML_ERR_UNSUPPORTED_ENCODING
);
13208 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13209 xmlDetectSAX2(ctxt
);
13211 /* parsing in context, i.e. as within existing content */
13212 ctxt
->input_id
= 2;
13213 ctxt
->instate
= XML_PARSER_CONTENT
;
13215 fake
= xmlNewDocComment(node
->doc
, NULL
);
13216 if (fake
== NULL
) {
13217 xmlFreeParserCtxt(ctxt
);
13218 return(XML_ERR_NO_MEMORY
);
13220 xmlAddChild(node
, fake
);
13222 if (node
->type
== XML_ELEMENT_NODE
) {
13223 nodePush(ctxt
, node
);
13225 * initialize the SAX2 namespaces stack
13228 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13229 xmlNsPtr ns
= cur
->nsDef
;
13230 const xmlChar
*iprefix
, *ihref
;
13232 while (ns
!= NULL
) {
13234 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13235 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13237 iprefix
= ns
->prefix
;
13241 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13242 nsPush(ctxt
, iprefix
, ihref
);
13251 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13253 * ID/IDREF registration will be done in xmlValidateElement below
13255 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13258 #ifdef LIBXML_HTML_ENABLED
13259 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13260 __htmlParseContent(ctxt
);
13263 xmlParseContent(ctxt
);
13266 if ((RAW
== '<') && (NXT(1) == '/')) {
13267 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13268 } else if (RAW
!= 0) {
13269 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13271 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13272 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13273 ctxt
->wellFormed
= 0;
13276 if (!ctxt
->wellFormed
) {
13277 if (ctxt
->errNo
== 0)
13278 ret
= XML_ERR_INTERNAL_ERROR
;
13280 ret
= (xmlParserErrors
)ctxt
->errNo
;
13286 * Return the newly created nodeset after unlinking it from
13287 * the pseudo sibling.
13300 while (cur
!= NULL
) {
13301 cur
->parent
= NULL
;
13305 xmlUnlinkNode(fake
);
13309 if (ret
!= XML_ERR_OK
) {
13310 xmlFreeNodeList(*lst
);
13314 if (doc
->dict
!= NULL
)
13316 xmlFreeParserCtxt(ctxt
);
13320 return(XML_ERR_INTERNAL_ERROR
);
13324 #ifdef LIBXML_SAX1_ENABLED
13326 * xmlParseBalancedChunkMemoryRecover:
13327 * @doc: the document the chunk pertains to (must not be NULL)
13328 * @sax: the SAX handler block (possibly NULL)
13329 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13330 * @depth: Used for loop detection, use 0
13331 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13332 * @lst: the return value for the set of parsed nodes
13333 * @recover: return nodes even if the data is broken (use 0)
13336 * Parse a well-balanced chunk of an XML document
13337 * called by the parser
13338 * The allowed sequence for the Well Balanced Chunk is the one defined by
13339 * the content production in the XML grammar:
13341 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13343 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13344 * the parser error code otherwise
13346 * In case recover is set to 1, the nodelist will not be empty even if
13347 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13351 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13352 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13354 xmlParserCtxtPtr ctxt
;
13356 xmlSAXHandlerPtr oldsax
= NULL
;
13357 xmlNodePtr content
, newRoot
;
13362 return(XML_ERR_ENTITY_LOOP
);
13368 if (string
== NULL
)
13371 size
= xmlStrlen(string
);
13373 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13374 if (ctxt
== NULL
) return(-1);
13375 ctxt
->userData
= ctxt
;
13377 oldsax
= ctxt
->sax
;
13379 if (user_data
!= NULL
)
13380 ctxt
->userData
= user_data
;
13382 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13383 if (newDoc
== NULL
) {
13384 xmlFreeParserCtxt(ctxt
);
13387 newDoc
->properties
= XML_DOC_INTERNAL
;
13388 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13389 xmlDictFree(ctxt
->dict
);
13390 ctxt
->dict
= doc
->dict
;
13391 xmlDictReference(ctxt
->dict
);
13392 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13393 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13394 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13395 ctxt
->dictNames
= 1;
13397 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13399 /* doc == NULL is only supported for historic reasons */
13401 newDoc
->intSubset
= doc
->intSubset
;
13402 newDoc
->extSubset
= doc
->extSubset
;
13404 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13405 if (newRoot
== NULL
) {
13407 ctxt
->sax
= oldsax
;
13408 xmlFreeParserCtxt(ctxt
);
13409 newDoc
->intSubset
= NULL
;
13410 newDoc
->extSubset
= NULL
;
13411 xmlFreeDoc(newDoc
);
13414 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13415 nodePush(ctxt
, newRoot
);
13416 /* doc == NULL is only supported for historic reasons */
13418 ctxt
->myDoc
= newDoc
;
13420 ctxt
->myDoc
= newDoc
;
13421 newDoc
->children
->doc
= doc
;
13422 /* Ensure that doc has XML spec namespace */
13423 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13424 newDoc
->oldNs
= doc
->oldNs
;
13426 ctxt
->instate
= XML_PARSER_CONTENT
;
13427 ctxt
->input_id
= 2;
13428 ctxt
->depth
= depth
;
13431 * Doing validity checking on chunk doesn't make sense
13433 ctxt
->validate
= 0;
13434 ctxt
->loadsubset
= 0;
13435 xmlDetectSAX2(ctxt
);
13437 if ( doc
!= NULL
){
13438 content
= doc
->children
;
13439 doc
->children
= NULL
;
13440 xmlParseContent(ctxt
);
13441 doc
->children
= content
;
13444 xmlParseContent(ctxt
);
13446 if ((RAW
== '<') && (NXT(1) == '/')) {
13447 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13448 } else if (RAW
!= 0) {
13449 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13451 if (ctxt
->node
!= newDoc
->children
) {
13452 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13455 if (!ctxt
->wellFormed
) {
13456 if (ctxt
->errNo
== 0)
13464 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13468 * Return the newly created nodeset after unlinking it from
13469 * they pseudo parent.
13471 cur
= newDoc
->children
->children
;
13473 while (cur
!= NULL
) {
13474 xmlSetTreeDoc(cur
, doc
);
13475 cur
->parent
= NULL
;
13478 newDoc
->children
->children
= NULL
;
13482 ctxt
->sax
= oldsax
;
13483 xmlFreeParserCtxt(ctxt
);
13484 newDoc
->intSubset
= NULL
;
13485 newDoc
->extSubset
= NULL
;
13486 /* This leaks the namespace list if doc == NULL */
13487 newDoc
->oldNs
= NULL
;
13488 xmlFreeDoc(newDoc
);
13494 * xmlSAXParseEntity:
13495 * @sax: the SAX handler block
13496 * @filename: the filename
13498 * DEPRECATED: Don't use.
13500 * parse an XML external entity out of context and build a tree.
13501 * It use the given SAX function block to handle the parsing callback.
13502 * If sax is NULL, fallback to the default DOM tree building routines.
13504 * [78] extParsedEnt ::= TextDecl? content
13506 * This correspond to a "Well Balanced" chunk
13508 * Returns the resulting document tree
13512 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13514 xmlParserCtxtPtr ctxt
;
13516 ctxt
= xmlCreateFileParserCtxt(filename
);
13517 if (ctxt
== NULL
) {
13521 if (ctxt
->sax
!= NULL
)
13522 xmlFree(ctxt
->sax
);
13524 ctxt
->userData
= NULL
;
13527 xmlParseExtParsedEnt(ctxt
);
13529 if (ctxt
->wellFormed
)
13533 xmlFreeDoc(ctxt
->myDoc
);
13534 ctxt
->myDoc
= NULL
;
13538 xmlFreeParserCtxt(ctxt
);
13545 * @filename: the filename
13547 * parse an XML external entity out of context and build a tree.
13549 * [78] extParsedEnt ::= TextDecl? content
13551 * This correspond to a "Well Balanced" chunk
13553 * Returns the resulting document tree
13557 xmlParseEntity(const char *filename
) {
13558 return(xmlSAXParseEntity(NULL
, filename
));
13560 #endif /* LIBXML_SAX1_ENABLED */
13563 * xmlCreateEntityParserCtxtInternal:
13564 * @URL: the entity URL
13565 * @ID: the entity PUBLIC ID
13566 * @base: a possible base for the target URI
13567 * @pctx: parser context used to set options on new context
13569 * Create a parser context for an external entity
13570 * Automatic support for ZLIB/Compress compressed document is provided
13571 * by default if found at compile-time.
13573 * Returns the new parser context or NULL
13575 static xmlParserCtxtPtr
13576 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax
, void *userData
,
13577 const xmlChar
*URL
, const xmlChar
*ID
, const xmlChar
*base
,
13578 xmlParserCtxtPtr pctx
) {
13579 xmlParserCtxtPtr ctxt
;
13580 xmlParserInputPtr inputStream
;
13581 char *directory
= NULL
;
13584 ctxt
= xmlNewSAXParserCtxt(sax
, userData
);
13585 if (ctxt
== NULL
) {
13589 if (pctx
!= NULL
) {
13590 ctxt
->options
= pctx
->options
;
13591 ctxt
->_private
= pctx
->_private
;
13592 ctxt
->input_id
= pctx
->input_id
;
13595 /* Don't read from stdin. */
13596 if (xmlStrcmp(URL
, BAD_CAST
"-") == 0)
13597 URL
= BAD_CAST
"./-";
13599 uri
= xmlBuildURI(URL
, base
);
13602 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13603 if (inputStream
== NULL
) {
13604 xmlFreeParserCtxt(ctxt
);
13608 inputPush(ctxt
, inputStream
);
13610 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13611 directory
= xmlParserGetDirectory((char *)URL
);
13612 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13613 ctxt
->directory
= directory
;
13615 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13616 if (inputStream
== NULL
) {
13618 xmlFreeParserCtxt(ctxt
);
13622 inputPush(ctxt
, inputStream
);
13624 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13625 directory
= xmlParserGetDirectory((char *)uri
);
13626 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13627 ctxt
->directory
= directory
;
13634 * xmlCreateEntityParserCtxt:
13635 * @URL: the entity URL
13636 * @ID: the entity PUBLIC ID
13637 * @base: a possible base for the target URI
13639 * Create a parser context for an external entity
13640 * Automatic support for ZLIB/Compress compressed document is provided
13641 * by default if found at compile-time.
13643 * Returns the new parser context or NULL
13646 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
13647 const xmlChar
*base
) {
13648 return xmlCreateEntityParserCtxtInternal(NULL
, NULL
, URL
, ID
, base
, NULL
);
13652 /************************************************************************
13654 * Front ends when parsing from a file *
13656 ************************************************************************/
13659 * xmlCreateURLParserCtxt:
13660 * @filename: the filename or URL
13661 * @options: a combination of xmlParserOption
13663 * Create a parser context for a file or URL content.
13664 * Automatic support for ZLIB/Compress compressed document is provided
13665 * by default if found at compile-time and for file accesses
13667 * Returns the new parser context or NULL
13670 xmlCreateURLParserCtxt(const char *filename
, int options
)
13672 xmlParserCtxtPtr ctxt
;
13673 xmlParserInputPtr inputStream
;
13674 char *directory
= NULL
;
13676 ctxt
= xmlNewParserCtxt();
13677 if (ctxt
== NULL
) {
13678 xmlErrMemory(NULL
, "cannot allocate parser context");
13683 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13684 ctxt
->linenumbers
= 1;
13686 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
13687 if (inputStream
== NULL
) {
13688 xmlFreeParserCtxt(ctxt
);
13692 inputPush(ctxt
, inputStream
);
13693 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13694 directory
= xmlParserGetDirectory(filename
);
13695 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13696 ctxt
->directory
= directory
;
13702 * xmlCreateFileParserCtxt:
13703 * @filename: the filename
13705 * Create a parser context for a file content.
13706 * Automatic support for ZLIB/Compress compressed document is provided
13707 * by default if found at compile-time.
13709 * Returns the new parser context or NULL
13712 xmlCreateFileParserCtxt(const char *filename
)
13714 return(xmlCreateURLParserCtxt(filename
, 0));
13717 #ifdef LIBXML_SAX1_ENABLED
13719 * xmlSAXParseFileWithData:
13720 * @sax: the SAX handler block
13721 * @filename: the filename
13722 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13724 * @data: the userdata
13726 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13728 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13729 * compressed document is provided by default if found at compile-time.
13730 * It use the given SAX function block to handle the parsing callback.
13731 * If sax is NULL, fallback to the default DOM tree building routines.
13733 * User data (void *) is stored within the parser context in the
13734 * context's _private member, so it is available nearly everywhere in libxml
13736 * Returns the resulting document tree
13740 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
13741 int recovery
, void *data
) {
13743 xmlParserCtxtPtr ctxt
;
13747 ctxt
= xmlCreateFileParserCtxt(filename
);
13748 if (ctxt
== NULL
) {
13752 if (ctxt
->sax
!= NULL
)
13753 xmlFree(ctxt
->sax
);
13756 xmlDetectSAX2(ctxt
);
13758 ctxt
->_private
= data
;
13761 if (ctxt
->directory
== NULL
)
13762 ctxt
->directory
= xmlParserGetDirectory(filename
);
13764 ctxt
->recovery
= recovery
;
13766 xmlParseDocument(ctxt
);
13768 if ((ctxt
->wellFormed
) || recovery
) {
13770 if ((ret
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
13771 if (ctxt
->input
->buf
->compressed
> 0)
13772 ret
->compression
= 9;
13774 ret
->compression
= ctxt
->input
->buf
->compressed
;
13779 xmlFreeDoc(ctxt
->myDoc
);
13780 ctxt
->myDoc
= NULL
;
13784 xmlFreeParserCtxt(ctxt
);
13791 * @sax: the SAX handler block
13792 * @filename: the filename
13793 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13796 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13798 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13799 * compressed document is provided by default if found at compile-time.
13800 * It use the given SAX function block to handle the parsing callback.
13801 * If sax is NULL, fallback to the default DOM tree building routines.
13803 * Returns the resulting document tree
13807 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
13809 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
13814 * @cur: a pointer to an array of xmlChar
13816 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13818 * parse an XML in-memory document and build a tree.
13819 * In the case the document is not Well Formed, a attempt to build a
13820 * tree is tried anyway
13822 * Returns the resulting document tree or NULL in case of failure
13826 xmlRecoverDoc(const xmlChar
*cur
) {
13827 return(xmlSAXParseDoc(NULL
, cur
, 1));
13832 * @filename: the filename
13834 * DEPRECATED: Use xmlReadFile.
13836 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13837 * compressed document is provided by default if found at compile-time.
13839 * Returns the resulting document tree if the file was wellformed,
13844 xmlParseFile(const char *filename
) {
13845 return(xmlSAXParseFile(NULL
, filename
, 0));
13850 * @filename: the filename
13852 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13854 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13855 * compressed document is provided by default if found at compile-time.
13856 * In the case the document is not Well Formed, it attempts to build
13859 * Returns the resulting document tree or NULL in case of failure
13863 xmlRecoverFile(const char *filename
) {
13864 return(xmlSAXParseFile(NULL
, filename
, 1));
13869 * xmlSetupParserForBuffer:
13870 * @ctxt: an XML parser context
13871 * @buffer: a xmlChar * buffer
13872 * @filename: a file name
13874 * DEPRECATED: Don't use.
13876 * Setup the parser context to parse a new buffer; Clears any prior
13877 * contents from the parser context. The buffer parameter must not be
13878 * NULL, but the filename parameter can be
13881 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
13882 const char* filename
)
13884 xmlParserInputPtr input
;
13886 if ((ctxt
== NULL
) || (buffer
== NULL
))
13889 input
= xmlNewInputStream(ctxt
);
13890 if (input
== NULL
) {
13891 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
13892 xmlClearParserCtxt(ctxt
);
13896 xmlClearParserCtxt(ctxt
);
13897 if (filename
!= NULL
)
13898 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
13899 input
->base
= buffer
;
13900 input
->cur
= buffer
;
13901 input
->end
= &buffer
[xmlStrlen(buffer
)];
13902 inputPush(ctxt
, input
);
13906 * xmlSAXUserParseFile:
13907 * @sax: a SAX handler
13908 * @user_data: The user data returned on SAX callbacks
13909 * @filename: a file name
13911 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13913 * parse an XML file and call the given SAX handler routines.
13914 * Automatic support for ZLIB/Compress compressed document is provided
13916 * Returns 0 in case of success or a error number otherwise
13919 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
13920 const char *filename
) {
13922 xmlParserCtxtPtr ctxt
;
13924 ctxt
= xmlCreateFileParserCtxt(filename
);
13925 if (ctxt
== NULL
) return -1;
13926 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13927 xmlFree(ctxt
->sax
);
13929 xmlDetectSAX2(ctxt
);
13931 if (user_data
!= NULL
)
13932 ctxt
->userData
= user_data
;
13934 xmlParseDocument(ctxt
);
13936 if (ctxt
->wellFormed
)
13939 if (ctxt
->errNo
!= 0)
13946 if (ctxt
->myDoc
!= NULL
) {
13947 xmlFreeDoc(ctxt
->myDoc
);
13948 ctxt
->myDoc
= NULL
;
13950 xmlFreeParserCtxt(ctxt
);
13954 #endif /* LIBXML_SAX1_ENABLED */
13956 /************************************************************************
13958 * Front ends when parsing from memory *
13960 ************************************************************************/
13963 * xmlCreateMemoryParserCtxt:
13964 * @buffer: a pointer to a char array
13965 * @size: the size of the array
13967 * Create a parser context for an XML in-memory document.
13969 * Returns the new parser context or NULL
13972 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
13973 xmlParserCtxtPtr ctxt
;
13974 xmlParserInputPtr input
;
13975 xmlParserInputBufferPtr buf
;
13977 if (buffer
== NULL
)
13982 ctxt
= xmlNewParserCtxt();
13986 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
13988 xmlFreeParserCtxt(ctxt
);
13992 input
= xmlNewInputStream(ctxt
);
13993 if (input
== NULL
) {
13994 xmlFreeParserInputBuffer(buf
);
13995 xmlFreeParserCtxt(ctxt
);
13999 input
->filename
= NULL
;
14001 xmlBufResetInput(input
->buf
->buffer
, input
);
14003 inputPush(ctxt
, input
);
14007 #ifdef LIBXML_SAX1_ENABLED
14009 * xmlSAXParseMemoryWithData:
14010 * @sax: the SAX handler block
14011 * @buffer: an pointer to a char array
14012 * @size: the size of the array
14013 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14015 * @data: the userdata
14017 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14019 * parse an XML in-memory block and use the given SAX function block
14020 * to handle the parsing callback. If sax is NULL, fallback to the default
14021 * DOM tree building routines.
14023 * User data (void *) is stored within the parser context in the
14024 * context's _private member, so it is available nearly everywhere in libxml
14026 * Returns the resulting document tree
14030 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
14031 int size
, int recovery
, void *data
) {
14033 xmlParserCtxtPtr ctxt
;
14037 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14038 if (ctxt
== NULL
) return(NULL
);
14040 if (ctxt
->sax
!= NULL
)
14041 xmlFree(ctxt
->sax
);
14044 xmlDetectSAX2(ctxt
);
14046 ctxt
->_private
=data
;
14049 ctxt
->recovery
= recovery
;
14051 xmlParseDocument(ctxt
);
14053 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14056 xmlFreeDoc(ctxt
->myDoc
);
14057 ctxt
->myDoc
= NULL
;
14061 xmlFreeParserCtxt(ctxt
);
14067 * xmlSAXParseMemory:
14068 * @sax: the SAX handler block
14069 * @buffer: an pointer to a char array
14070 * @size: the size of the array
14071 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14074 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14076 * parse an XML in-memory block and use the given SAX function block
14077 * to handle the parsing callback. If sax is NULL, fallback to the default
14078 * DOM tree building routines.
14080 * Returns the resulting document tree
14083 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
14084 int size
, int recovery
) {
14085 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
14090 * @buffer: an pointer to a char array
14091 * @size: the size of the array
14093 * DEPRECATED: Use xmlReadMemory.
14095 * parse an XML in-memory block and build a tree.
14097 * Returns the resulting document tree
14100 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
14101 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
14105 * xmlRecoverMemory:
14106 * @buffer: an pointer to a char array
14107 * @size: the size of the array
14109 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14111 * parse an XML in-memory block and build a tree.
14112 * In the case the document is not Well Formed, an attempt to
14113 * build a tree is tried anyway
14115 * Returns the resulting document tree or NULL in case of error
14118 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
14119 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
14123 * xmlSAXUserParseMemory:
14124 * @sax: a SAX handler
14125 * @user_data: The user data returned on SAX callbacks
14126 * @buffer: an in-memory XML document input
14127 * @size: the length of the XML document in bytes
14129 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14131 * parse an XML in-memory buffer and call the given SAX handler routines.
14133 * Returns 0 in case of success or a error number otherwise
14135 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
14136 const char *buffer
, int size
) {
14138 xmlParserCtxtPtr ctxt
;
14142 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14143 if (ctxt
== NULL
) return -1;
14144 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14145 xmlFree(ctxt
->sax
);
14147 xmlDetectSAX2(ctxt
);
14149 if (user_data
!= NULL
)
14150 ctxt
->userData
= user_data
;
14152 xmlParseDocument(ctxt
);
14154 if (ctxt
->wellFormed
)
14157 if (ctxt
->errNo
!= 0)
14164 if (ctxt
->myDoc
!= NULL
) {
14165 xmlFreeDoc(ctxt
->myDoc
);
14166 ctxt
->myDoc
= NULL
;
14168 xmlFreeParserCtxt(ctxt
);
14172 #endif /* LIBXML_SAX1_ENABLED */
14175 * xmlCreateDocParserCtxt:
14176 * @cur: a pointer to an array of xmlChar
14178 * Creates a parser context for an XML in-memory document.
14180 * Returns the new parser context or NULL
14183 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14188 len
= xmlStrlen(cur
);
14189 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14192 #ifdef LIBXML_SAX1_ENABLED
14195 * @sax: the SAX handler block
14196 * @cur: a pointer to an array of xmlChar
14197 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14200 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14202 * parse an XML in-memory document and build a tree.
14203 * It use the given SAX function block to handle the parsing callback.
14204 * If sax is NULL, fallback to the default DOM tree building routines.
14206 * Returns the resulting document tree
14210 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14212 xmlParserCtxtPtr ctxt
;
14213 xmlSAXHandlerPtr oldsax
= NULL
;
14215 if (cur
== NULL
) return(NULL
);
14218 ctxt
= xmlCreateDocParserCtxt(cur
);
14219 if (ctxt
== NULL
) return(NULL
);
14221 oldsax
= ctxt
->sax
;
14223 ctxt
->userData
= NULL
;
14225 xmlDetectSAX2(ctxt
);
14227 xmlParseDocument(ctxt
);
14228 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14231 xmlFreeDoc(ctxt
->myDoc
);
14232 ctxt
->myDoc
= NULL
;
14235 ctxt
->sax
= oldsax
;
14236 xmlFreeParserCtxt(ctxt
);
14243 * @cur: a pointer to an array of xmlChar
14245 * DEPRECATED: Use xmlReadDoc.
14247 * parse an XML in-memory document and build a tree.
14249 * Returns the resulting document tree
14253 xmlParseDoc(const xmlChar
*cur
) {
14254 return(xmlSAXParseDoc(NULL
, cur
, 0));
14256 #endif /* LIBXML_SAX1_ENABLED */
14258 #ifdef LIBXML_LEGACY_ENABLED
14259 /************************************************************************
14261 * Specific function to keep track of entities references *
14262 * and used by the XSLT debugger *
14264 ************************************************************************/
14266 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14269 * xmlAddEntityReference:
14270 * @ent : A valid entity
14271 * @firstNode : A valid first node for children of entity
14272 * @lastNode : A valid last node of children entity
14274 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14277 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14278 xmlNodePtr lastNode
)
14280 if (xmlEntityRefFunc
!= NULL
) {
14281 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14287 * xmlSetEntityReferenceFunc:
14288 * @func: A valid function
14290 * Set the function to call call back when a xml reference has been made
14293 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14295 xmlEntityRefFunc
= func
;
14297 #endif /* LIBXML_LEGACY_ENABLED */
14299 /************************************************************************
14303 ************************************************************************/
14305 static int xmlParserInitialized
= 0;
14310 * Initialization function for the XML parser.
14311 * This is not reentrant. Call once before processing in case of
14312 * use in multithreaded programs.
14316 xmlInitParser(void) {
14318 * Note that the initialization code must not make memory allocations.
14320 if (xmlParserInitialized
!= 0)
14323 #ifdef LIBXML_THREAD_ENABLED
14324 __xmlGlobalInitMutexLock();
14325 if (xmlParserInitialized
== 0) {
14327 #if defined(_WIN32) && \
14328 !defined(LIBXML_THREAD_ALLOC_ENABLED) && \
14329 (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14330 if (xmlFree
== free
)
14331 atexit(xmlCleanupParser
);
14334 xmlInitThreadsInternal();
14335 xmlInitGlobalsInternal();
14336 xmlInitMemoryInternal();
14337 __xmlInitializeDict();
14338 xmlInitEncodingInternal();
14339 xmlRegisterDefaultInputCallbacks();
14340 #ifdef LIBXML_OUTPUT_ENABLED
14341 xmlRegisterDefaultOutputCallbacks();
14342 #endif /* LIBXML_OUTPUT_ENABLED */
14343 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14344 xmlInitXPathInternal();
14346 xmlParserInitialized
= 1;
14347 #ifdef LIBXML_THREAD_ENABLED
14349 __xmlGlobalInitMutexUnlock();
14354 * xmlCleanupParser:
14356 * This function name is somewhat misleading. It does not clean up
14357 * parser state, it cleans up memory allocated by the library itself.
14358 * It is a cleanup function for the XML library. It tries to reclaim all
14359 * related global memory allocated for the library processing.
14360 * It doesn't deallocate any document related memory. One should
14361 * call xmlCleanupParser() only when the process has finished using
14362 * the library and all XML/HTML documents built with it.
14363 * See also xmlInitParser() which has the opposite function of preparing
14364 * the library for operations.
14366 * WARNING: if your application is multithreaded or has plugin support
14367 * calling this may crash the application if another thread or
14368 * a plugin is still using libxml2. It's sometimes very hard to
14369 * guess if libxml2 is in use in the application, some libraries
14370 * or plugins may use it without notice. In case of doubt abstain
14371 * from calling this function or do it just before calling exit()
14372 * to avoid leak reports from valgrind !
14376 xmlCleanupParser(void) {
14377 if (!xmlParserInitialized
)
14380 xmlCleanupCharEncodingHandlers();
14381 #ifdef LIBXML_CATALOG_ENABLED
14382 xmlCatalogCleanup();
14384 xmlCleanupDictInternal();
14385 xmlCleanupInputCallbacks();
14386 #ifdef LIBXML_OUTPUT_ENABLED
14387 xmlCleanupOutputCallbacks();
14389 #ifdef LIBXML_SCHEMAS_ENABLED
14390 xmlSchemaCleanupTypes();
14391 xmlRelaxNGCleanupTypes();
14393 xmlCleanupGlobalsInternal();
14394 xmlCleanupThreadsInternal();
14395 xmlCleanupMemoryInternal();
14396 xmlParserInitialized
= 0;
14399 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && \
14400 !defined(LIBXML_THREAD_ALLOC_ENABLED) && \
14401 !defined(LIBXML_STATIC) && \
14404 ATTRIBUTE_DESTRUCTOR
14405 xmlDestructor(void) {
14407 * Calling custom deallocation functions in a destructor can cause
14408 * problems, for example with Nokogiri.
14410 if (xmlFree
== free
)
14411 xmlCleanupParser();
14415 /************************************************************************
14417 * New set (2.6.0) of simpler and more flexible APIs *
14419 ************************************************************************/
14425 * Free a string if it is not owned by the "dict" dictionary in the
14428 #define DICT_FREE(str) \
14429 if ((str) && ((!dict) || \
14430 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14431 xmlFree((char *)(str));
14435 * @ctxt: an XML parser context
14437 * Reset a parser context
14440 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14442 xmlParserInputPtr input
;
14450 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14451 xmlFreeInputStream(input
);
14454 ctxt
->input
= NULL
;
14457 if (ctxt
->spaceTab
!= NULL
) {
14458 ctxt
->spaceTab
[0] = -1;
14459 ctxt
->space
= &ctxt
->spaceTab
[0];
14461 ctxt
->space
= NULL
;
14473 DICT_FREE(ctxt
->version
);
14474 ctxt
->version
= NULL
;
14475 DICT_FREE(ctxt
->encoding
);
14476 ctxt
->encoding
= NULL
;
14477 DICT_FREE(ctxt
->directory
);
14478 ctxt
->directory
= NULL
;
14479 DICT_FREE(ctxt
->extSubURI
);
14480 ctxt
->extSubURI
= NULL
;
14481 DICT_FREE(ctxt
->extSubSystem
);
14482 ctxt
->extSubSystem
= NULL
;
14483 if (ctxt
->myDoc
!= NULL
)
14484 xmlFreeDoc(ctxt
->myDoc
);
14485 ctxt
->myDoc
= NULL
;
14487 ctxt
->standalone
= -1;
14488 ctxt
->hasExternalSubset
= 0;
14489 ctxt
->hasPErefs
= 0;
14491 ctxt
->external
= 0;
14492 ctxt
->instate
= XML_PARSER_START
;
14495 ctxt
->wellFormed
= 1;
14496 ctxt
->nsWellFormed
= 1;
14497 ctxt
->disableSAX
= 0;
14500 ctxt
->vctxt
.userData
= ctxt
;
14501 ctxt
->vctxt
.error
= xmlParserValidityError
;
14502 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14504 ctxt
->record_info
= 0;
14505 ctxt
->checkIndex
= 0;
14506 ctxt
->endCheckState
= 0;
14507 ctxt
->inSubset
= 0;
14508 ctxt
->errNo
= XML_ERR_OK
;
14510 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14511 ctxt
->catalogs
= NULL
;
14512 ctxt
->sizeentities
= 0;
14513 ctxt
->sizeentcopy
= 0;
14514 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14516 if (ctxt
->attsDefault
!= NULL
) {
14517 xmlHashFree(ctxt
->attsDefault
, xmlHashDefaultDeallocator
);
14518 ctxt
->attsDefault
= NULL
;
14520 if (ctxt
->attsSpecial
!= NULL
) {
14521 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14522 ctxt
->attsSpecial
= NULL
;
14525 #ifdef LIBXML_CATALOG_ENABLED
14526 if (ctxt
->catalogs
!= NULL
)
14527 xmlCatalogFreeLocal(ctxt
->catalogs
);
14529 ctxt
->nbErrors
= 0;
14530 ctxt
->nbWarnings
= 0;
14531 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14532 xmlResetError(&ctxt
->lastError
);
14536 * xmlCtxtResetPush:
14537 * @ctxt: an XML parser context
14538 * @chunk: a pointer to an array of chars
14539 * @size: number of chars in the array
14540 * @filename: an optional file name or URI
14541 * @encoding: the document encoding, or NULL
14543 * Reset a push parser context
14545 * Returns 0 in case of success and 1 in case of error
14548 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14549 int size
, const char *filename
, const char *encoding
)
14551 xmlParserInputPtr inputStream
;
14552 xmlParserInputBufferPtr buf
;
14553 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14558 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14559 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14561 buf
= xmlAllocParserInputBuffer(enc
);
14565 if (ctxt
== NULL
) {
14566 xmlFreeParserInputBuffer(buf
);
14570 xmlCtxtReset(ctxt
);
14572 if (filename
== NULL
) {
14573 ctxt
->directory
= NULL
;
14575 ctxt
->directory
= xmlParserGetDirectory(filename
);
14578 inputStream
= xmlNewInputStream(ctxt
);
14579 if (inputStream
== NULL
) {
14580 xmlFreeParserInputBuffer(buf
);
14584 if (filename
== NULL
)
14585 inputStream
->filename
= NULL
;
14587 inputStream
->filename
= (char *)
14588 xmlCanonicPath((const xmlChar
*) filename
);
14589 inputStream
->buf
= buf
;
14590 xmlBufResetInput(buf
->buffer
, inputStream
);
14592 inputPush(ctxt
, inputStream
);
14594 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14595 (ctxt
->input
->buf
!= NULL
)) {
14596 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
14597 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14599 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14601 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
14603 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14607 if (encoding
!= NULL
) {
14608 xmlCharEncodingHandlerPtr hdlr
;
14610 if (ctxt
->encoding
!= NULL
)
14611 xmlFree((xmlChar
*) ctxt
->encoding
);
14612 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14614 hdlr
= xmlFindCharEncodingHandler(encoding
);
14615 if (hdlr
!= NULL
) {
14616 xmlSwitchToEncoding(ctxt
, hdlr
);
14618 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14619 "Unsupported encoding %s\n", BAD_CAST encoding
);
14621 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14622 xmlSwitchEncoding(ctxt
, enc
);
14630 * xmlCtxtUseOptionsInternal:
14631 * @ctxt: an XML parser context
14632 * @options: a combination of xmlParserOption
14633 * @encoding: the user provided encoding to use
14635 * Applies the options to the parser context
14637 * Returns 0 in case of success, the set of unknown or unimplemented options
14638 * in case of error.
14641 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
14645 if (encoding
!= NULL
) {
14646 if (ctxt
->encoding
!= NULL
)
14647 xmlFree((xmlChar
*) ctxt
->encoding
);
14648 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14650 if (options
& XML_PARSE_RECOVER
) {
14651 ctxt
->recovery
= 1;
14652 options
-= XML_PARSE_RECOVER
;
14653 ctxt
->options
|= XML_PARSE_RECOVER
;
14655 ctxt
->recovery
= 0;
14656 if (options
& XML_PARSE_DTDLOAD
) {
14657 ctxt
->loadsubset
= XML_DETECT_IDS
;
14658 options
-= XML_PARSE_DTDLOAD
;
14659 ctxt
->options
|= XML_PARSE_DTDLOAD
;
14661 ctxt
->loadsubset
= 0;
14662 if (options
& XML_PARSE_DTDATTR
) {
14663 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
14664 options
-= XML_PARSE_DTDATTR
;
14665 ctxt
->options
|= XML_PARSE_DTDATTR
;
14667 if (options
& XML_PARSE_NOENT
) {
14668 ctxt
->replaceEntities
= 1;
14669 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14670 options
-= XML_PARSE_NOENT
;
14671 ctxt
->options
|= XML_PARSE_NOENT
;
14673 ctxt
->replaceEntities
= 0;
14674 if (options
& XML_PARSE_PEDANTIC
) {
14675 ctxt
->pedantic
= 1;
14676 options
-= XML_PARSE_PEDANTIC
;
14677 ctxt
->options
|= XML_PARSE_PEDANTIC
;
14679 ctxt
->pedantic
= 0;
14680 if (options
& XML_PARSE_NOBLANKS
) {
14681 ctxt
->keepBlanks
= 0;
14682 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
14683 options
-= XML_PARSE_NOBLANKS
;
14684 ctxt
->options
|= XML_PARSE_NOBLANKS
;
14686 ctxt
->keepBlanks
= 1;
14687 if (options
& XML_PARSE_DTDVALID
) {
14688 ctxt
->validate
= 1;
14689 if (options
& XML_PARSE_NOWARNING
)
14690 ctxt
->vctxt
.warning
= NULL
;
14691 if (options
& XML_PARSE_NOERROR
)
14692 ctxt
->vctxt
.error
= NULL
;
14693 options
-= XML_PARSE_DTDVALID
;
14694 ctxt
->options
|= XML_PARSE_DTDVALID
;
14696 ctxt
->validate
= 0;
14697 if (options
& XML_PARSE_NOWARNING
) {
14698 ctxt
->sax
->warning
= NULL
;
14699 options
-= XML_PARSE_NOWARNING
;
14701 if (options
& XML_PARSE_NOERROR
) {
14702 ctxt
->sax
->error
= NULL
;
14703 ctxt
->sax
->fatalError
= NULL
;
14704 options
-= XML_PARSE_NOERROR
;
14706 #ifdef LIBXML_SAX1_ENABLED
14707 if (options
& XML_PARSE_SAX1
) {
14708 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
14709 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
14710 ctxt
->sax
->startElementNs
= NULL
;
14711 ctxt
->sax
->endElementNs
= NULL
;
14712 ctxt
->sax
->initialized
= 1;
14713 options
-= XML_PARSE_SAX1
;
14714 ctxt
->options
|= XML_PARSE_SAX1
;
14716 #endif /* LIBXML_SAX1_ENABLED */
14717 if (options
& XML_PARSE_NODICT
) {
14718 ctxt
->dictNames
= 0;
14719 options
-= XML_PARSE_NODICT
;
14720 ctxt
->options
|= XML_PARSE_NODICT
;
14722 ctxt
->dictNames
= 1;
14724 if (options
& XML_PARSE_NOCDATA
) {
14725 ctxt
->sax
->cdataBlock
= NULL
;
14726 options
-= XML_PARSE_NOCDATA
;
14727 ctxt
->options
|= XML_PARSE_NOCDATA
;
14729 if (options
& XML_PARSE_NSCLEAN
) {
14730 ctxt
->options
|= XML_PARSE_NSCLEAN
;
14731 options
-= XML_PARSE_NSCLEAN
;
14733 if (options
& XML_PARSE_NONET
) {
14734 ctxt
->options
|= XML_PARSE_NONET
;
14735 options
-= XML_PARSE_NONET
;
14737 if (options
& XML_PARSE_COMPACT
) {
14738 ctxt
->options
|= XML_PARSE_COMPACT
;
14739 options
-= XML_PARSE_COMPACT
;
14741 if (options
& XML_PARSE_OLD10
) {
14742 ctxt
->options
|= XML_PARSE_OLD10
;
14743 options
-= XML_PARSE_OLD10
;
14745 if (options
& XML_PARSE_NOBASEFIX
) {
14746 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
14747 options
-= XML_PARSE_NOBASEFIX
;
14749 if (options
& XML_PARSE_HUGE
) {
14750 ctxt
->options
|= XML_PARSE_HUGE
;
14751 options
-= XML_PARSE_HUGE
;
14752 if (ctxt
->dict
!= NULL
)
14753 xmlDictSetLimit(ctxt
->dict
, 0);
14755 if (options
& XML_PARSE_OLDSAX
) {
14756 ctxt
->options
|= XML_PARSE_OLDSAX
;
14757 options
-= XML_PARSE_OLDSAX
;
14759 if (options
& XML_PARSE_IGNORE_ENC
) {
14760 ctxt
->options
|= XML_PARSE_IGNORE_ENC
;
14761 options
-= XML_PARSE_IGNORE_ENC
;
14763 if (options
& XML_PARSE_BIG_LINES
) {
14764 ctxt
->options
|= XML_PARSE_BIG_LINES
;
14765 options
-= XML_PARSE_BIG_LINES
;
14767 ctxt
->linenumbers
= 1;
14772 * xmlCtxtUseOptions:
14773 * @ctxt: an XML parser context
14774 * @options: a combination of xmlParserOption
14776 * Applies the options to the parser context
14778 * Returns 0 in case of success, the set of unknown or unimplemented options
14779 * in case of error.
14782 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
14784 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
14789 * @ctxt: an XML parser context
14790 * @URL: the base URL to use for the document
14791 * @encoding: the document encoding, or NULL
14792 * @options: a combination of xmlParserOption
14793 * @reuse: keep the context for reuse
14795 * Common front-end for the xmlRead functions
14797 * Returns the resulting document tree or NULL
14800 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
14801 int options
, int reuse
)
14805 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
14806 if (encoding
!= NULL
) {
14807 xmlCharEncodingHandlerPtr hdlr
;
14810 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14811 * caller provided an encoding. Otherwise, we might switch to
14812 * the encoding from the XML declaration which is likely to
14813 * break things. Also see xmlSwitchInputEncoding.
14815 hdlr
= xmlFindCharEncodingHandler(encoding
);
14817 xmlSwitchToEncoding(ctxt
, hdlr
);
14819 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
14820 (ctxt
->input
->filename
== NULL
))
14821 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
14822 xmlParseDocument(ctxt
);
14823 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
14827 if (ctxt
->myDoc
!= NULL
) {
14828 xmlFreeDoc(ctxt
->myDoc
);
14831 ctxt
->myDoc
= NULL
;
14833 xmlFreeParserCtxt(ctxt
);
14841 * @cur: a pointer to a zero terminated string
14842 * @URL: the base URL to use for the document
14843 * @encoding: the document encoding, or NULL
14844 * @options: a combination of xmlParserOption
14846 * parse an XML in-memory document and build a tree.
14848 * Returns the resulting document tree
14851 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
14853 xmlParserCtxtPtr ctxt
;
14859 ctxt
= xmlCreateDocParserCtxt(cur
);
14862 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14867 * @filename: a file or URL
14868 * @encoding: the document encoding, or NULL
14869 * @options: a combination of xmlParserOption
14871 * parse an XML file from the filesystem or the network.
14873 * Returns the resulting document tree
14876 xmlReadFile(const char *filename
, const char *encoding
, int options
)
14878 xmlParserCtxtPtr ctxt
;
14881 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
14884 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
14889 * @buffer: a pointer to a char array
14890 * @size: the size of the array
14891 * @URL: the base URL to use for the document
14892 * @encoding: the document encoding, or NULL
14893 * @options: a combination of xmlParserOption
14895 * parse an XML in-memory document and build a tree.
14897 * Returns the resulting document tree
14900 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
14902 xmlParserCtxtPtr ctxt
;
14905 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14908 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14913 * @fd: an open file descriptor
14914 * @URL: the base URL to use for the document
14915 * @encoding: the document encoding, or NULL
14916 * @options: a combination of xmlParserOption
14918 * parse an XML from a file descriptor and build a tree.
14919 * NOTE that the file descriptor will not be closed when the
14920 * reader is closed or reset.
14922 * Returns the resulting document tree
14925 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
14927 xmlParserCtxtPtr ctxt
;
14928 xmlParserInputBufferPtr input
;
14929 xmlParserInputPtr stream
;
14935 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14938 input
->closecallback
= NULL
;
14939 ctxt
= xmlNewParserCtxt();
14940 if (ctxt
== NULL
) {
14941 xmlFreeParserInputBuffer(input
);
14944 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14945 if (stream
== NULL
) {
14946 xmlFreeParserInputBuffer(input
);
14947 xmlFreeParserCtxt(ctxt
);
14950 inputPush(ctxt
, stream
);
14951 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14956 * @ioread: an I/O read function
14957 * @ioclose: an I/O close function
14958 * @ioctx: an I/O handler
14959 * @URL: the base URL to use for the document
14960 * @encoding: the document encoding, or NULL
14961 * @options: a combination of xmlParserOption
14963 * parse an XML document from I/O functions and source and build a tree.
14965 * Returns the resulting document tree
14968 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
14969 void *ioctx
, const char *URL
, const char *encoding
, int options
)
14971 xmlParserCtxtPtr ctxt
;
14972 xmlParserInputBufferPtr input
;
14973 xmlParserInputPtr stream
;
14975 if (ioread
== NULL
)
14979 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
14980 XML_CHAR_ENCODING_NONE
);
14981 if (input
== NULL
) {
14982 if (ioclose
!= NULL
)
14986 ctxt
= xmlNewParserCtxt();
14987 if (ctxt
== NULL
) {
14988 xmlFreeParserInputBuffer(input
);
14991 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14992 if (stream
== NULL
) {
14993 xmlFreeParserInputBuffer(input
);
14994 xmlFreeParserCtxt(ctxt
);
14997 inputPush(ctxt
, stream
);
14998 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15003 * @ctxt: an XML parser context
15004 * @cur: a pointer to a zero terminated string
15005 * @URL: the base URL to use for the document
15006 * @encoding: the document encoding, or NULL
15007 * @options: a combination of xmlParserOption
15009 * parse an XML in-memory document and build a tree.
15010 * This reuses the existing @ctxt parser context
15012 * Returns the resulting document tree
15015 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
15016 const char *URL
, const char *encoding
, int options
)
15020 return (xmlCtxtReadMemory(ctxt
, (const char *) cur
, xmlStrlen(cur
), URL
,
15021 encoding
, options
));
15026 * @ctxt: an XML parser context
15027 * @filename: a file or URL
15028 * @encoding: the document encoding, or NULL
15029 * @options: a combination of xmlParserOption
15031 * parse an XML file from the filesystem or the network.
15032 * This reuses the existing @ctxt parser context
15034 * Returns the resulting document tree
15037 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
15038 const char *encoding
, int options
)
15040 xmlParserInputPtr stream
;
15042 if (filename
== NULL
)
15048 xmlCtxtReset(ctxt
);
15050 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
15051 if (stream
== NULL
) {
15054 inputPush(ctxt
, stream
);
15055 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
15059 * xmlCtxtReadMemory:
15060 * @ctxt: an XML parser context
15061 * @buffer: a pointer to a char array
15062 * @size: the size of the array
15063 * @URL: the base URL to use for the document
15064 * @encoding: the document encoding, or NULL
15065 * @options: a combination of xmlParserOption
15067 * parse an XML in-memory document and build a tree.
15068 * This reuses the existing @ctxt parser context
15070 * Returns the resulting document tree
15073 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
15074 const char *URL
, const char *encoding
, int options
)
15076 xmlParserInputBufferPtr input
;
15077 xmlParserInputPtr stream
;
15081 if (buffer
== NULL
)
15085 xmlCtxtReset(ctxt
);
15087 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
15088 if (input
== NULL
) {
15092 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15093 if (stream
== NULL
) {
15094 xmlFreeParserInputBuffer(input
);
15098 inputPush(ctxt
, stream
);
15099 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15104 * @ctxt: an XML parser context
15105 * @fd: an open file descriptor
15106 * @URL: the base URL to use for the document
15107 * @encoding: the document encoding, or NULL
15108 * @options: a combination of xmlParserOption
15110 * parse an XML from a file descriptor and build a tree.
15111 * This reuses the existing @ctxt parser context
15112 * NOTE that the file descriptor will not be closed when the
15113 * reader is closed or reset.
15115 * Returns the resulting document tree
15118 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
15119 const char *URL
, const char *encoding
, int options
)
15121 xmlParserInputBufferPtr input
;
15122 xmlParserInputPtr stream
;
15130 xmlCtxtReset(ctxt
);
15133 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15136 input
->closecallback
= NULL
;
15137 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15138 if (stream
== NULL
) {
15139 xmlFreeParserInputBuffer(input
);
15142 inputPush(ctxt
, stream
);
15143 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15148 * @ctxt: an XML parser context
15149 * @ioread: an I/O read function
15150 * @ioclose: an I/O close function
15151 * @ioctx: an I/O handler
15152 * @URL: the base URL to use for the document
15153 * @encoding: the document encoding, or NULL
15154 * @options: a combination of xmlParserOption
15156 * parse an XML document from I/O functions and source and build a tree.
15157 * This reuses the existing @ctxt parser context
15159 * Returns the resulting document tree
15162 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
15163 xmlInputCloseCallback ioclose
, void *ioctx
,
15165 const char *encoding
, int options
)
15167 xmlParserInputBufferPtr input
;
15168 xmlParserInputPtr stream
;
15170 if (ioread
== NULL
)
15176 xmlCtxtReset(ctxt
);
15178 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15179 XML_CHAR_ENCODING_NONE
);
15180 if (input
== NULL
) {
15181 if (ioclose
!= NULL
)
15185 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15186 if (stream
== NULL
) {
15187 xmlFreeParserInputBuffer(input
);
15190 inputPush(ctxt
, stream
);
15191 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));