include/mscvpdb.h: Use flexible array members for the rest of structures.
[wine.git] / libs / xml2 / parser.c
blob1145a58860d0938b691dd8915d44836602b28401
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
38 #define IN_LIBXML
39 #include "libxml.h"
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/HTMLparser.h>
61 #include <libxml/valid.h>
62 #include <libxml/entities.h>
63 #include <libxml/xmlerror.h>
64 #include <libxml/encoding.h>
65 #include <libxml/xmlIO.h>
66 #include <libxml/uri.h>
67 #ifdef LIBXML_CATALOG_ENABLED
68 #include <libxml/catalog.h>
69 #endif
70 #ifdef LIBXML_SCHEMAS_ENABLED
71 #include <libxml/xmlschemastypes.h>
72 #include <libxml/relaxng.h>
73 #endif
74 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75 #include <libxml/xpath.h>
76 #endif
78 #include "private/buf.h"
79 #include "private/dict.h"
80 #include "private/enc.h"
81 #include "private/entities.h"
82 #include "private/error.h"
83 #include "private/globals.h"
84 #include "private/html.h"
85 #include "private/io.h"
86 #include "private/memory.h"
87 #include "private/parser.h"
88 #include "private/threads.h"
89 #include "private/xpath.h"
91 struct _xmlStartTag {
92 const xmlChar *prefix;
93 const xmlChar *URI;
94 int line;
95 int nsNr;
98 static xmlParserCtxtPtr
99 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101 xmlParserCtxtPtr pctx);
103 static int
104 xmlParseElementStart(xmlParserCtxtPtr ctxt);
106 static void
107 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
109 /************************************************************************
111 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
113 ************************************************************************/
115 #define XML_PARSER_BIG_ENTITY 1000
116 #define XML_PARSER_LOT_ENTITY 5000
119 * Constants for protection against abusive entity expansion
120 * ("billion laughs").
124 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125 * of serialized output after entity expansion.
127 #define XML_PARSER_NON_LINEAR 5
130 * A certain amount is always allowed.
132 #define XML_PARSER_ALLOWED_EXPANSION 1000000
135 * Fixed cost for each entity reference. This crudely models processing time
136 * as well to protect, for example, against exponential expansion of empty
137 * or very short entities.
139 #define XML_ENT_FIXED_COST 20
142 * xmlParserMaxDepth:
144 * arbitrary depth limit for the XML documents that we allow to
145 * process. This is not a limitation of the parser but a safety
146 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147 * parser option.
149 unsigned int xmlParserMaxDepth = 256;
153 #define SAX2 1
154 #define XML_PARSER_BIG_BUFFER_SIZE 300
155 #define XML_PARSER_BUFFER_SIZE 100
156 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
159 * XML_PARSER_CHUNK_SIZE
161 * When calling GROW that's the minimal amount of data
162 * the parser expected to have received. It is not a hard
163 * limit but an optimization when reading strings like Names
164 * It is not strictly needed as long as inputs available characters
165 * are followed by 0, which should be provided by the I/O level
167 #define XML_PARSER_CHUNK_SIZE 100
170 * List of XML prefixed PI allowed by W3C specs
173 static const char* const xmlW3CPIs[] = {
174 "xml-stylesheet",
175 "xml-model",
176 NULL
180 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182 const xmlChar **str);
184 static xmlParserErrors
185 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186 xmlSAXHandlerPtr sax,
187 void *user_data, int depth, const xmlChar *URL,
188 const xmlChar *ID, xmlNodePtr *list);
190 static int
191 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192 const char *encoding);
193 #ifdef LIBXML_LEGACY_ENABLED
194 static void
195 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196 xmlNodePtr lastNode);
197 #endif /* LIBXML_LEGACY_ENABLED */
199 static xmlParserErrors
200 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201 const xmlChar *string, void *user_data, xmlNodePtr *lst);
203 static int
204 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
206 /************************************************************************
208 * Some factorized error routines *
210 ************************************************************************/
213 * xmlErrAttributeDup:
214 * @ctxt: an XML parser context
215 * @prefix: the attribute prefix
216 * @localname: the attribute localname
218 * Handle a redefinition of attribute error
220 static void
221 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222 const xmlChar * localname)
224 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225 (ctxt->instate == XML_PARSER_EOF))
226 return;
227 if (ctxt != NULL)
228 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
230 if (prefix == NULL)
231 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233 (const char *) localname, NULL, NULL, 0, 0,
234 "Attribute %s redefined\n", localname);
235 else
236 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238 (const char *) prefix, (const char *) localname,
239 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240 localname);
241 if (ctxt != NULL) {
242 ctxt->wellFormed = 0;
243 if (ctxt->recovery == 0)
244 ctxt->disableSAX = 1;
249 * xmlFatalErr:
250 * @ctxt: an XML parser context
251 * @error: the error number
252 * @extra: extra information string
254 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256 static void
257 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
259 const char *errmsg;
261 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262 (ctxt->instate == XML_PARSER_EOF))
263 return;
264 switch (error) {
265 case XML_ERR_INVALID_HEX_CHARREF:
266 errmsg = "CharRef: invalid hexadecimal value";
267 break;
268 case XML_ERR_INVALID_DEC_CHARREF:
269 errmsg = "CharRef: invalid decimal value";
270 break;
271 case XML_ERR_INVALID_CHARREF:
272 errmsg = "CharRef: invalid value";
273 break;
274 case XML_ERR_INTERNAL_ERROR:
275 errmsg = "internal error";
276 break;
277 case XML_ERR_PEREF_AT_EOF:
278 errmsg = "PEReference at end of document";
279 break;
280 case XML_ERR_PEREF_IN_PROLOG:
281 errmsg = "PEReference in prolog";
282 break;
283 case XML_ERR_PEREF_IN_EPILOG:
284 errmsg = "PEReference in epilog";
285 break;
286 case XML_ERR_PEREF_NO_NAME:
287 errmsg = "PEReference: no name";
288 break;
289 case XML_ERR_PEREF_SEMICOL_MISSING:
290 errmsg = "PEReference: expecting ';'";
291 break;
292 case XML_ERR_ENTITY_LOOP:
293 errmsg = "Detected an entity reference loop";
294 break;
295 case XML_ERR_ENTITY_NOT_STARTED:
296 errmsg = "EntityValue: \" or ' expected";
297 break;
298 case XML_ERR_ENTITY_PE_INTERNAL:
299 errmsg = "PEReferences forbidden in internal subset";
300 break;
301 case XML_ERR_ENTITY_NOT_FINISHED:
302 errmsg = "EntityValue: \" or ' expected";
303 break;
304 case XML_ERR_ATTRIBUTE_NOT_STARTED:
305 errmsg = "AttValue: \" or ' expected";
306 break;
307 case XML_ERR_LT_IN_ATTRIBUTE:
308 errmsg = "Unescaped '<' not allowed in attributes values";
309 break;
310 case XML_ERR_LITERAL_NOT_STARTED:
311 errmsg = "SystemLiteral \" or ' expected";
312 break;
313 case XML_ERR_LITERAL_NOT_FINISHED:
314 errmsg = "Unfinished System or Public ID \" or ' expected";
315 break;
316 case XML_ERR_MISPLACED_CDATA_END:
317 errmsg = "Sequence ']]>' not allowed in content";
318 break;
319 case XML_ERR_URI_REQUIRED:
320 errmsg = "SYSTEM or PUBLIC, the URI is missing";
321 break;
322 case XML_ERR_PUBID_REQUIRED:
323 errmsg = "PUBLIC, the Public Identifier is missing";
324 break;
325 case XML_ERR_HYPHEN_IN_COMMENT:
326 errmsg = "Comment must not contain '--' (double-hyphen)";
327 break;
328 case XML_ERR_PI_NOT_STARTED:
329 errmsg = "xmlParsePI : no target name";
330 break;
331 case XML_ERR_RESERVED_XML_NAME:
332 errmsg = "Invalid PI name";
333 break;
334 case XML_ERR_NOTATION_NOT_STARTED:
335 errmsg = "NOTATION: Name expected here";
336 break;
337 case XML_ERR_NOTATION_NOT_FINISHED:
338 errmsg = "'>' required to close NOTATION declaration";
339 break;
340 case XML_ERR_VALUE_REQUIRED:
341 errmsg = "Entity value required";
342 break;
343 case XML_ERR_URI_FRAGMENT:
344 errmsg = "Fragment not allowed";
345 break;
346 case XML_ERR_ATTLIST_NOT_STARTED:
347 errmsg = "'(' required to start ATTLIST enumeration";
348 break;
349 case XML_ERR_NMTOKEN_REQUIRED:
350 errmsg = "NmToken expected in ATTLIST enumeration";
351 break;
352 case XML_ERR_ATTLIST_NOT_FINISHED:
353 errmsg = "')' required to finish ATTLIST enumeration";
354 break;
355 case XML_ERR_MIXED_NOT_STARTED:
356 errmsg = "MixedContentDecl : '|' or ')*' expected";
357 break;
358 case XML_ERR_PCDATA_REQUIRED:
359 errmsg = "MixedContentDecl : '#PCDATA' expected";
360 break;
361 case XML_ERR_ELEMCONTENT_NOT_STARTED:
362 errmsg = "ContentDecl : Name or '(' expected";
363 break;
364 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365 errmsg = "ContentDecl : ',' '|' or ')' expected";
366 break;
367 case XML_ERR_PEREF_IN_INT_SUBSET:
368 errmsg =
369 "PEReference: forbidden within markup decl in internal subset";
370 break;
371 case XML_ERR_GT_REQUIRED:
372 errmsg = "expected '>'";
373 break;
374 case XML_ERR_CONDSEC_INVALID:
375 errmsg = "XML conditional section '[' expected";
376 break;
377 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378 errmsg = "Content error in the external subset";
379 break;
380 case XML_ERR_CONDSEC_INVALID_KEYWORD:
381 errmsg =
382 "conditional section INCLUDE or IGNORE keyword expected";
383 break;
384 case XML_ERR_CONDSEC_NOT_FINISHED:
385 errmsg = "XML conditional section not closed";
386 break;
387 case XML_ERR_XMLDECL_NOT_STARTED:
388 errmsg = "Text declaration '<?xml' required";
389 break;
390 case XML_ERR_XMLDECL_NOT_FINISHED:
391 errmsg = "parsing XML declaration: '?>' expected";
392 break;
393 case XML_ERR_EXT_ENTITY_STANDALONE:
394 errmsg = "external parsed entities cannot be standalone";
395 break;
396 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397 errmsg = "EntityRef: expecting ';'";
398 break;
399 case XML_ERR_DOCTYPE_NOT_FINISHED:
400 errmsg = "DOCTYPE improperly terminated";
401 break;
402 case XML_ERR_LTSLASH_REQUIRED:
403 errmsg = "EndTag: '</' not found";
404 break;
405 case XML_ERR_EQUAL_REQUIRED:
406 errmsg = "expected '='";
407 break;
408 case XML_ERR_STRING_NOT_CLOSED:
409 errmsg = "String not closed expecting \" or '";
410 break;
411 case XML_ERR_STRING_NOT_STARTED:
412 errmsg = "String not started expecting ' or \"";
413 break;
414 case XML_ERR_ENCODING_NAME:
415 errmsg = "Invalid XML encoding name";
416 break;
417 case XML_ERR_STANDALONE_VALUE:
418 errmsg = "standalone accepts only 'yes' or 'no'";
419 break;
420 case XML_ERR_DOCUMENT_EMPTY:
421 errmsg = "Document is empty";
422 break;
423 case XML_ERR_DOCUMENT_END:
424 errmsg = "Extra content at the end of the document";
425 break;
426 case XML_ERR_NOT_WELL_BALANCED:
427 errmsg = "chunk is not well balanced";
428 break;
429 case XML_ERR_EXTRA_CONTENT:
430 errmsg = "extra content at the end of well balanced chunk";
431 break;
432 case XML_ERR_VERSION_MISSING:
433 errmsg = "Malformed declaration expecting version";
434 break;
435 case XML_ERR_NAME_TOO_LONG:
436 errmsg = "Name too long";
437 break;
438 #if 0
439 case:
440 errmsg = "";
441 break;
442 #endif
443 default:
444 errmsg = "Unregistered error message";
446 if (ctxt != NULL)
447 ctxt->errNo = error;
448 if (info == NULL) {
449 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451 errmsg);
452 } else {
453 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455 errmsg, info);
457 if (ctxt != NULL) {
458 ctxt->wellFormed = 0;
459 if (ctxt->recovery == 0)
460 ctxt->disableSAX = 1;
465 * xmlFatalErrMsg:
466 * @ctxt: an XML parser context
467 * @error: the error number
468 * @msg: the error message
470 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
472 static void LIBXML_ATTR_FORMAT(3,0)
473 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474 const char *msg)
476 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477 (ctxt->instate == XML_PARSER_EOF))
478 return;
479 if (ctxt != NULL)
480 ctxt->errNo = error;
481 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483 if (ctxt != NULL) {
484 ctxt->wellFormed = 0;
485 if (ctxt->recovery == 0)
486 ctxt->disableSAX = 1;
491 * xmlWarningMsg:
492 * @ctxt: an XML parser context
493 * @error: the error number
494 * @msg: the error message
495 * @str1: extra data
496 * @str2: extra data
498 * Handle a warning.
500 static void LIBXML_ATTR_FORMAT(3,0)
501 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502 const char *msg, const xmlChar *str1, const xmlChar *str2)
504 xmlStructuredErrorFunc schannel = NULL;
506 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507 (ctxt->instate == XML_PARSER_EOF))
508 return;
509 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510 (ctxt->sax->initialized == XML_SAX2_MAGIC))
511 schannel = ctxt->sax->serror;
512 if (ctxt != NULL) {
513 __xmlRaiseError(schannel,
514 (ctxt->sax) ? ctxt->sax->warning : NULL,
515 ctxt->userData,
516 ctxt, NULL, XML_FROM_PARSER, error,
517 XML_ERR_WARNING, NULL, 0,
518 (const char *) str1, (const char *) str2, NULL, 0, 0,
519 msg, (const char *) str1, (const char *) str2);
520 } else {
521 __xmlRaiseError(schannel, NULL, NULL,
522 ctxt, NULL, XML_FROM_PARSER, error,
523 XML_ERR_WARNING, NULL, 0,
524 (const char *) str1, (const char *) str2, NULL, 0, 0,
525 msg, (const char *) str1, (const char *) str2);
530 * xmlValidityError:
531 * @ctxt: an XML parser context
532 * @error: the error number
533 * @msg: the error message
534 * @str1: extra data
536 * Handle a validity error.
538 static void LIBXML_ATTR_FORMAT(3,0)
539 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540 const char *msg, const xmlChar *str1, const xmlChar *str2)
542 xmlStructuredErrorFunc schannel = NULL;
544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
547 if (ctxt != NULL) {
548 ctxt->errNo = error;
549 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550 schannel = ctxt->sax->serror;
552 if (ctxt != NULL) {
553 __xmlRaiseError(schannel,
554 ctxt->vctxt.error, ctxt->vctxt.userData,
555 ctxt, NULL, XML_FROM_DTD, error,
556 XML_ERR_ERROR, NULL, 0, (const char *) str1,
557 (const char *) str2, NULL, 0, 0,
558 msg, (const char *) str1, (const char *) str2);
559 ctxt->valid = 0;
560 } else {
561 __xmlRaiseError(schannel, NULL, NULL,
562 ctxt, NULL, XML_FROM_DTD, error,
563 XML_ERR_ERROR, NULL, 0, (const char *) str1,
564 (const char *) str2, NULL, 0, 0,
565 msg, (const char *) str1, (const char *) str2);
570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
578 static void LIBXML_ATTR_FORMAT(3,0)
579 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580 const char *msg, int val)
582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
585 if (ctxt != NULL)
586 ctxt->errNo = error;
587 __xmlRaiseError(NULL, NULL, NULL,
588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
608 static void LIBXML_ATTR_FORMAT(3,0)
609 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
616 if (ctxt != NULL)
617 ctxt->errNo = error;
618 __xmlRaiseError(NULL, NULL, NULL,
619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
638 static void LIBXML_ATTR_FORMAT(3,0)
639 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640 const char *msg, const xmlChar * val)
642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
645 if (ctxt != NULL)
646 ctxt->errNo = error;
647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
665 * Handle a non fatal parser error
667 static void LIBXML_ATTR_FORMAT(3,0)
668 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
674 if (ctxt != NULL)
675 ctxt->errNo = error;
676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
692 static void LIBXML_ATTR_FORMAT(3,0)
693 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
701 if (ctxt != NULL)
702 ctxt->errNo = error;
703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
719 * Handle a namespace warning error
721 static void LIBXML_ATTR_FORMAT(3,0)
722 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
736 static void
737 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738 if (val > ULONG_MAX - *dst)
739 *dst = ULONG_MAX;
740 else
741 *dst += val;
744 static void
745 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746 if (val > ULONG_MAX - *dst)
747 *dst = ULONG_MAX;
748 else
749 *dst += val;
753 * xmlParserEntityCheck:
754 * @ctxt: parser context
755 * @extra: sum of unexpanded entity sizes
757 * Check for non-linear entity expansion behaviour.
759 * In some cases like xmlStringDecodeEntities, this function is called
760 * for each, possibly nested entity and its unexpanded content length.
762 * In other cases like xmlParseReference, it's only called for each
763 * top-level entity with its unexpanded content length plus the sum of
764 * the unexpanded content lengths (plus fixed cost) of all nested
765 * entities.
767 * Summing the unexpanded lengths also adds the length of the reference.
768 * This is by design. Taking the length of the entity name into account
769 * discourages attacks that try to waste CPU time with abusively long
770 * entity names. See test/recurse/lol6.xml for example. Each call also
771 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772 * short entities.
774 * Returns 1 on error, 0 on success.
776 static int
777 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
779 unsigned long consumed;
780 xmlParserInputPtr input = ctxt->input;
781 xmlEntityPtr entity = input->entity;
784 * Compute total consumed bytes so far, including input streams of
785 * external entities.
787 consumed = input->parentConsumed;
788 if ((entity == NULL) ||
789 ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790 ((entity->flags & XML_ENT_PARSED) == 0))) {
791 xmlSaturatedAdd(&consumed, input->consumed);
792 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
794 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
797 * Add extra cost and some fixed cost.
799 xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800 xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
803 * It's important to always use saturation arithmetic when tracking
804 * entity sizes to make the size checks reliable. If "sizeentcopy"
805 * overflows, we have to abort.
807 if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808 ((ctxt->sizeentcopy >= ULONG_MAX) ||
809 (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811 "Maximum entity amplification factor exceeded");
812 xmlHaltParser(ctxt);
813 return(1);
816 return(0);
819 /************************************************************************
821 * Library wide options *
823 ************************************************************************/
826 * xmlHasFeature:
827 * @feature: the feature to be examined
829 * Examines if the library has been compiled with a given feature.
831 * Returns a non-zero value if the feature exist, otherwise zero.
832 * Returns zero (0) if the feature does not exist or an unknown
833 * unknown feature is requested, non-zero otherwise.
836 xmlHasFeature(xmlFeature feature)
838 switch (feature) {
839 case XML_WITH_THREAD:
840 #ifdef LIBXML_THREAD_ENABLED
841 return(1);
842 #else
843 return(0);
844 #endif
845 case XML_WITH_TREE:
846 #ifdef LIBXML_TREE_ENABLED
847 return(1);
848 #else
849 return(0);
850 #endif
851 case XML_WITH_OUTPUT:
852 #ifdef LIBXML_OUTPUT_ENABLED
853 return(1);
854 #else
855 return(0);
856 #endif
857 case XML_WITH_PUSH:
858 #ifdef LIBXML_PUSH_ENABLED
859 return(1);
860 #else
861 return(0);
862 #endif
863 case XML_WITH_READER:
864 #ifdef LIBXML_READER_ENABLED
865 return(1);
866 #else
867 return(0);
868 #endif
869 case XML_WITH_PATTERN:
870 #ifdef LIBXML_PATTERN_ENABLED
871 return(1);
872 #else
873 return(0);
874 #endif
875 case XML_WITH_WRITER:
876 #ifdef LIBXML_WRITER_ENABLED
877 return(1);
878 #else
879 return(0);
880 #endif
881 case XML_WITH_SAX1:
882 #ifdef LIBXML_SAX1_ENABLED
883 return(1);
884 #else
885 return(0);
886 #endif
887 case XML_WITH_FTP:
888 #ifdef LIBXML_FTP_ENABLED
889 return(1);
890 #else
891 return(0);
892 #endif
893 case XML_WITH_HTTP:
894 #ifdef LIBXML_HTTP_ENABLED
895 return(1);
896 #else
897 return(0);
898 #endif
899 case XML_WITH_VALID:
900 #ifdef LIBXML_VALID_ENABLED
901 return(1);
902 #else
903 return(0);
904 #endif
905 case XML_WITH_HTML:
906 #ifdef LIBXML_HTML_ENABLED
907 return(1);
908 #else
909 return(0);
910 #endif
911 case XML_WITH_LEGACY:
912 #ifdef LIBXML_LEGACY_ENABLED
913 return(1);
914 #else
915 return(0);
916 #endif
917 case XML_WITH_C14N:
918 #ifdef LIBXML_C14N_ENABLED
919 return(1);
920 #else
921 return(0);
922 #endif
923 case XML_WITH_CATALOG:
924 #ifdef LIBXML_CATALOG_ENABLED
925 return(1);
926 #else
927 return(0);
928 #endif
929 case XML_WITH_XPATH:
930 #ifdef LIBXML_XPATH_ENABLED
931 return(1);
932 #else
933 return(0);
934 #endif
935 case XML_WITH_XPTR:
936 #ifdef LIBXML_XPTR_ENABLED
937 return(1);
938 #else
939 return(0);
940 #endif
941 case XML_WITH_XINCLUDE:
942 #ifdef LIBXML_XINCLUDE_ENABLED
943 return(1);
944 #else
945 return(0);
946 #endif
947 case XML_WITH_ICONV:
948 #ifdef LIBXML_ICONV_ENABLED
949 return(1);
950 #else
951 return(0);
952 #endif
953 case XML_WITH_ISO8859X:
954 #ifdef LIBXML_ISO8859X_ENABLED
955 return(1);
956 #else
957 return(0);
958 #endif
959 case XML_WITH_UNICODE:
960 #ifdef LIBXML_UNICODE_ENABLED
961 return(1);
962 #else
963 return(0);
964 #endif
965 case XML_WITH_REGEXP:
966 #ifdef LIBXML_REGEXP_ENABLED
967 return(1);
968 #else
969 return(0);
970 #endif
971 case XML_WITH_AUTOMATA:
972 #ifdef LIBXML_AUTOMATA_ENABLED
973 return(1);
974 #else
975 return(0);
976 #endif
977 case XML_WITH_EXPR:
978 #ifdef LIBXML_EXPR_ENABLED
979 return(1);
980 #else
981 return(0);
982 #endif
983 case XML_WITH_SCHEMAS:
984 #ifdef LIBXML_SCHEMAS_ENABLED
985 return(1);
986 #else
987 return(0);
988 #endif
989 case XML_WITH_SCHEMATRON:
990 #ifdef LIBXML_SCHEMATRON_ENABLED
991 return(1);
992 #else
993 return(0);
994 #endif
995 case XML_WITH_MODULES:
996 #ifdef LIBXML_MODULES_ENABLED
997 return(1);
998 #else
999 return(0);
1000 #endif
1001 case XML_WITH_DEBUG:
1002 #ifdef LIBXML_DEBUG_ENABLED
1003 return(1);
1004 #else
1005 return(0);
1006 #endif
1007 case XML_WITH_DEBUG_MEM:
1008 #ifdef DEBUG_MEMORY_LOCATION
1009 return(1);
1010 #else
1011 return(0);
1012 #endif
1013 case XML_WITH_DEBUG_RUN:
1014 return(0);
1015 case XML_WITH_ZLIB:
1016 #ifdef LIBXML_ZLIB_ENABLED
1017 return(1);
1018 #else
1019 return(0);
1020 #endif
1021 case XML_WITH_LZMA:
1022 #ifdef LIBXML_LZMA_ENABLED
1023 return(1);
1024 #else
1025 return(0);
1026 #endif
1027 case XML_WITH_ICU:
1028 #ifdef LIBXML_ICU_ENABLED
1029 return(1);
1030 #else
1031 return(0);
1032 #endif
1033 default:
1034 break;
1036 return(0);
1039 /************************************************************************
1041 * SAX2 defaulted attributes handling *
1043 ************************************************************************/
1046 * xmlDetectSAX2:
1047 * @ctxt: an XML parser context
1049 * Do the SAX2 detection and specific initialization
1051 static void
1052 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053 xmlSAXHandlerPtr sax;
1055 /* Avoid unused variable warning if features are disabled. */
1056 (void) sax;
1058 if (ctxt == NULL) return;
1059 sax = ctxt->sax;
1060 #ifdef LIBXML_SAX1_ENABLED
1061 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1062 ((sax->startElementNs != NULL) ||
1063 (sax->endElementNs != NULL) ||
1064 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065 ctxt->sax2 = 1;
1066 #else
1067 ctxt->sax2 = 1;
1068 #endif /* LIBXML_SAX1_ENABLED */
1070 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074 (ctxt->str_xml_ns == NULL)) {
1075 xmlErrMemory(ctxt, NULL);
1079 typedef struct _xmlDefAttrs xmlDefAttrs;
1080 typedef xmlDefAttrs *xmlDefAttrsPtr;
1081 struct _xmlDefAttrs {
1082 int nbAttrs; /* number of defaulted attributes on that element */
1083 int maxAttrs; /* the size of the array */
1084 #if __STDC_VERSION__ >= 199901L
1085 /* Using a C99 flexible array member avoids UBSan errors. */
1086 const xmlChar *values[]; /* array of localname/prefix/values/external */
1087 #else
1088 const xmlChar *values[5];
1089 #endif
1093 * xmlAttrNormalizeSpace:
1094 * @src: the source string
1095 * @dst: the target string
1097 * Normalize the space in non CDATA attribute values:
1098 * If the attribute type is not CDATA, then the XML processor MUST further
1099 * process the normalized attribute value by discarding any leading and
1100 * trailing space (#x20) characters, and by replacing sequences of space
1101 * (#x20) characters by a single space (#x20) character.
1102 * Note that the size of dst need to be at least src, and if one doesn't need
1103 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104 * passing src as dst is just fine.
1106 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107 * is needed.
1109 static xmlChar *
1110 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112 if ((src == NULL) || (dst == NULL))
1113 return(NULL);
1115 while (*src == 0x20) src++;
1116 while (*src != 0) {
1117 if (*src == 0x20) {
1118 while (*src == 0x20) src++;
1119 if (*src != 0)
1120 *dst++ = 0x20;
1121 } else {
1122 *dst++ = *src++;
1125 *dst = 0;
1126 if (dst == src)
1127 return(NULL);
1128 return(dst);
1132 * xmlAttrNormalizeSpace2:
1133 * @src: the source string
1135 * Normalize the space in non CDATA attribute values, a slightly more complex
1136 * front end to avoid allocation problems when running on attribute values
1137 * coming from the input.
1139 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140 * is needed.
1142 static const xmlChar *
1143 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1145 int i;
1146 int remove_head = 0;
1147 int need_realloc = 0;
1148 const xmlChar *cur;
1150 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151 return(NULL);
1152 i = *len;
1153 if (i <= 0)
1154 return(NULL);
1156 cur = src;
1157 while (*cur == 0x20) {
1158 cur++;
1159 remove_head++;
1161 while (*cur != 0) {
1162 if (*cur == 0x20) {
1163 cur++;
1164 if ((*cur == 0x20) || (*cur == 0)) {
1165 need_realloc = 1;
1166 break;
1168 } else
1169 cur++;
1171 if (need_realloc) {
1172 xmlChar *ret;
1174 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175 if (ret == NULL) {
1176 xmlErrMemory(ctxt, NULL);
1177 return(NULL);
1179 xmlAttrNormalizeSpace(ret, ret);
1180 *len = strlen((const char *)ret);
1181 return(ret);
1182 } else if (remove_head) {
1183 *len -= remove_head;
1184 memmove(src, src + remove_head, 1 + *len);
1185 return(src);
1187 return(NULL);
1191 * xmlAddDefAttrs:
1192 * @ctxt: an XML parser context
1193 * @fullname: the element fullname
1194 * @fullattr: the attribute fullname
1195 * @value: the attribute value
1197 * Add a defaulted attribute for an element
1199 static void
1200 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201 const xmlChar *fullname,
1202 const xmlChar *fullattr,
1203 const xmlChar *value) {
1204 xmlDefAttrsPtr defaults;
1205 int len;
1206 const xmlChar *name;
1207 const xmlChar *prefix;
1210 * Allows to detect attribute redefinitions
1212 if (ctxt->attsSpecial != NULL) {
1213 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214 return;
1217 if (ctxt->attsDefault == NULL) {
1218 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219 if (ctxt->attsDefault == NULL)
1220 goto mem_error;
1224 * split the element name into prefix:localname , the string found
1225 * are within the DTD and then not associated to namespace names.
1227 name = xmlSplitQName3(fullname, &len);
1228 if (name == NULL) {
1229 name = xmlDictLookup(ctxt->dict, fullname, -1);
1230 prefix = NULL;
1231 } else {
1232 name = xmlDictLookup(ctxt->dict, name, -1);
1233 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1237 * make sure there is some storage
1239 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240 if (defaults == NULL) {
1241 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242 (4 * 5) * sizeof(const xmlChar *));
1243 if (defaults == NULL)
1244 goto mem_error;
1245 defaults->nbAttrs = 0;
1246 defaults->maxAttrs = 4;
1247 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248 defaults, NULL) < 0) {
1249 xmlFree(defaults);
1250 goto mem_error;
1252 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253 xmlDefAttrsPtr temp;
1255 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257 if (temp == NULL)
1258 goto mem_error;
1259 defaults = temp;
1260 defaults->maxAttrs *= 2;
1261 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262 defaults, NULL) < 0) {
1263 xmlFree(defaults);
1264 goto mem_error;
1269 * Split the element name into prefix:localname , the string found
1270 * are within the DTD and hen not associated to namespace names.
1272 name = xmlSplitQName3(fullattr, &len);
1273 if (name == NULL) {
1274 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275 prefix = NULL;
1276 } else {
1277 name = xmlDictLookup(ctxt->dict, name, -1);
1278 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1281 defaults->values[5 * defaults->nbAttrs] = name;
1282 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283 /* intern the string and precompute the end */
1284 len = xmlStrlen(value);
1285 value = xmlDictLookup(ctxt->dict, value, len);
1286 if (value == NULL)
1287 goto mem_error;
1288 defaults->values[5 * defaults->nbAttrs + 2] = value;
1289 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290 if (ctxt->external)
1291 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292 else
1293 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294 defaults->nbAttrs++;
1296 return;
1298 mem_error:
1299 xmlErrMemory(ctxt, NULL);
1300 return;
1304 * xmlAddSpecialAttr:
1305 * @ctxt: an XML parser context
1306 * @fullname: the element fullname
1307 * @fullattr: the attribute fullname
1308 * @type: the attribute type
1310 * Register this attribute type
1312 static void
1313 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314 const xmlChar *fullname,
1315 const xmlChar *fullattr,
1316 int type)
1318 if (ctxt->attsSpecial == NULL) {
1319 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320 if (ctxt->attsSpecial == NULL)
1321 goto mem_error;
1324 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325 return;
1327 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328 (void *) (ptrdiff_t) type);
1329 return;
1331 mem_error:
1332 xmlErrMemory(ctxt, NULL);
1333 return;
1337 * xmlCleanSpecialAttrCallback:
1339 * Removes CDATA attributes from the special attribute table
1341 static void
1342 xmlCleanSpecialAttrCallback(void *payload, void *data,
1343 const xmlChar *fullname, const xmlChar *fullattr,
1344 const xmlChar *unused ATTRIBUTE_UNUSED) {
1345 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1347 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 * xmlCleanSpecialAttr:
1354 * @ctxt: an XML parser context
1356 * Trim the list of attributes defined to remove all those of type
1357 * CDATA as they are not special. This call should be done when finishing
1358 * to parse the DTD and before starting to parse the document root.
1360 static void
1361 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1363 if (ctxt->attsSpecial == NULL)
1364 return;
1366 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1368 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369 xmlHashFree(ctxt->attsSpecial, NULL);
1370 ctxt->attsSpecial = NULL;
1372 return;
1376 * xmlCheckLanguageID:
1377 * @lang: pointer to the string value
1379 * DEPRECATED: Internal function, do not use.
1381 * Checks that the value conforms to the LanguageID production:
1383 * NOTE: this is somewhat deprecated, those productions were removed from
1384 * the XML Second edition.
1386 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1388 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391 * [38] Subcode ::= ([a-z] | [A-Z])+
1393 * The current REC reference the successors of RFC 1766, currently 5646
1395 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396 * langtag = language
1397 * ["-" script]
1398 * ["-" region]
1399 * *("-" variant)
1400 * *("-" extension)
1401 * ["-" privateuse]
1402 * language = 2*3ALPHA ; shortest ISO 639 code
1403 * ["-" extlang] ; sometimes followed by
1404 * ; extended language subtags
1405 * / 4ALPHA ; or reserved for future use
1406 * / 5*8ALPHA ; or registered language subtag
1408 * extlang = 3ALPHA ; selected ISO 639 codes
1409 * *2("-" 3ALPHA) ; permanently reserved
1411 * script = 4ALPHA ; ISO 15924 code
1413 * region = 2ALPHA ; ISO 3166-1 code
1414 * / 3DIGIT ; UN M.49 code
1416 * variant = 5*8alphanum ; registered variants
1417 * / (DIGIT 3alphanum)
1419 * extension = singleton 1*("-" (2*8alphanum))
1421 * ; Single alphanumerics
1422 * ; "x" reserved for private use
1423 * singleton = DIGIT ; 0 - 9
1424 * / %x41-57 ; A - W
1425 * / %x59-5A ; Y - Z
1426 * / %x61-77 ; a - w
1427 * / %x79-7A ; y - z
1429 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430 * The parser below doesn't try to cope with extension or privateuse
1431 * that could be added but that's not interoperable anyway
1433 * Returns 1 if correct 0 otherwise
1436 xmlCheckLanguageID(const xmlChar * lang)
1438 const xmlChar *cur = lang, *nxt;
1440 if (cur == NULL)
1441 return (0);
1442 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1443 ((cur[0] == 'I') && (cur[1] == '-')) ||
1444 ((cur[0] == 'x') && (cur[1] == '-')) ||
1445 ((cur[0] == 'X') && (cur[1] == '-'))) {
1447 * Still allow IANA code and user code which were coming
1448 * from the previous version of the XML-1.0 specification
1449 * it's deprecated but we should not fail
1451 cur += 2;
1452 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1453 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1454 cur++;
1455 return(cur[0] == 0);
1457 nxt = cur;
1458 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1459 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1460 nxt++;
1461 if (nxt - cur >= 4) {
1463 * Reserved
1465 if ((nxt - cur > 8) || (nxt[0] != 0))
1466 return(0);
1467 return(1);
1469 if (nxt - cur < 2)
1470 return(0);
1471 /* we got an ISO 639 code */
1472 if (nxt[0] == 0)
1473 return(1);
1474 if (nxt[0] != '-')
1475 return(0);
1477 nxt++;
1478 cur = nxt;
1479 /* now we can have extlang or script or region or variant */
1480 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1481 goto region_m49;
1483 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1484 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1485 nxt++;
1486 if (nxt - cur == 4)
1487 goto script;
1488 if (nxt - cur == 2)
1489 goto region;
1490 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491 goto variant;
1492 if (nxt - cur != 3)
1493 return(0);
1494 /* we parsed an extlang */
1495 if (nxt[0] == 0)
1496 return(1);
1497 if (nxt[0] != '-')
1498 return(0);
1500 nxt++;
1501 cur = nxt;
1502 /* now we can have script or region or variant */
1503 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1504 goto region_m49;
1506 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1507 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508 nxt++;
1509 if (nxt - cur == 2)
1510 goto region;
1511 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512 goto variant;
1513 if (nxt - cur != 4)
1514 return(0);
1515 /* we parsed a script */
1516 script:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1522 nxt++;
1523 cur = nxt;
1524 /* now we can have region or variant */
1525 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526 goto region_m49;
1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530 nxt++;
1532 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533 goto variant;
1534 if (nxt - cur != 2)
1535 return(0);
1536 /* we parsed a region */
1537 region:
1538 if (nxt[0] == 0)
1539 return(1);
1540 if (nxt[0] != '-')
1541 return(0);
1543 nxt++;
1544 cur = nxt;
1545 /* now we can just have a variant */
1546 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1547 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1548 nxt++;
1550 if ((nxt - cur < 5) || (nxt - cur > 8))
1551 return(0);
1553 /* we parsed a variant */
1554 variant:
1555 if (nxt[0] == 0)
1556 return(1);
1557 if (nxt[0] != '-')
1558 return(0);
1559 /* extensions and private use subtags not checked */
1560 return (1);
1562 region_m49:
1563 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1564 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1565 nxt += 3;
1566 goto region;
1568 return(0);
1571 /************************************************************************
1573 * Parser stacks related functions and macros *
1575 ************************************************************************/
1577 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1578 const xmlChar ** str);
1580 #ifdef SAX2
1582 * nsPush:
1583 * @ctxt: an XML parser context
1584 * @prefix: the namespace prefix or NULL
1585 * @URL: the namespace name
1587 * Pushes a new parser namespace on top of the ns stack
1589 * Returns -1 in case of error, -2 if the namespace should be discarded
1590 * and the index in the stack otherwise.
1592 static int
1593 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1595 if (ctxt->options & XML_PARSE_NSCLEAN) {
1596 int i;
1597 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1598 if (ctxt->nsTab[i] == prefix) {
1599 /* in scope */
1600 if (ctxt->nsTab[i + 1] == URL)
1601 return(-2);
1602 /* out of scope keep it */
1603 break;
1607 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1608 ctxt->nsMax = 10;
1609 ctxt->nsNr = 0;
1610 ctxt->nsTab = (const xmlChar **)
1611 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1612 if (ctxt->nsTab == NULL) {
1613 xmlErrMemory(ctxt, NULL);
1614 ctxt->nsMax = 0;
1615 return (-1);
1617 } else if (ctxt->nsNr >= ctxt->nsMax) {
1618 const xmlChar ** tmp;
1619 ctxt->nsMax *= 2;
1620 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1621 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1622 if (tmp == NULL) {
1623 xmlErrMemory(ctxt, NULL);
1624 ctxt->nsMax /= 2;
1625 return (-1);
1627 ctxt->nsTab = tmp;
1629 ctxt->nsTab[ctxt->nsNr++] = prefix;
1630 ctxt->nsTab[ctxt->nsNr++] = URL;
1631 return (ctxt->nsNr);
1634 * nsPop:
1635 * @ctxt: an XML parser context
1636 * @nr: the number to pop
1638 * Pops the top @nr parser prefix/namespace from the ns stack
1640 * Returns the number of namespaces removed
1642 static int
1643 nsPop(xmlParserCtxtPtr ctxt, int nr)
1645 int i;
1647 if (ctxt->nsTab == NULL) return(0);
1648 if (ctxt->nsNr < nr) {
1649 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1650 nr = ctxt->nsNr;
1652 if (ctxt->nsNr <= 0)
1653 return (0);
1655 for (i = 0;i < nr;i++) {
1656 ctxt->nsNr--;
1657 ctxt->nsTab[ctxt->nsNr] = NULL;
1659 return(nr);
1661 #endif
1663 static int
1664 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1665 const xmlChar **atts;
1666 int *attallocs;
1667 int maxatts;
1669 if (nr + 5 > ctxt->maxatts) {
1670 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1671 atts = (const xmlChar **) xmlMalloc(
1672 maxatts * sizeof(const xmlChar *));
1673 if (atts == NULL) goto mem_error;
1674 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1675 (maxatts / 5) * sizeof(int));
1676 if (attallocs == NULL) {
1677 xmlFree(atts);
1678 goto mem_error;
1680 if (ctxt->maxatts > 0)
1681 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1682 xmlFree(ctxt->atts);
1683 ctxt->atts = atts;
1684 ctxt->attallocs = attallocs;
1685 ctxt->maxatts = maxatts;
1687 return(ctxt->maxatts);
1688 mem_error:
1689 xmlErrMemory(ctxt, NULL);
1690 return(-1);
1694 * inputPush:
1695 * @ctxt: an XML parser context
1696 * @value: the parser input
1698 * Pushes a new parser input on top of the input stack
1700 * Returns -1 in case of error, the index in the stack otherwise
1703 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705 if ((ctxt == NULL) || (value == NULL))
1706 return(-1);
1707 if (ctxt->inputNr >= ctxt->inputMax) {
1708 size_t newSize = ctxt->inputMax * 2;
1709 xmlParserInputPtr *tmp;
1711 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 newSize * sizeof(*tmp));
1713 if (tmp == NULL) {
1714 xmlErrMemory(ctxt, NULL);
1715 return (-1);
1717 ctxt->inputTab = tmp;
1718 ctxt->inputMax = newSize;
1720 ctxt->inputTab[ctxt->inputNr] = value;
1721 ctxt->input = value;
1722 return (ctxt->inputNr++);
1725 * inputPop:
1726 * @ctxt: an XML parser context
1728 * Pops the top parser input from the input stack
1730 * Returns the input just removed
1732 xmlParserInputPtr
1733 inputPop(xmlParserCtxtPtr ctxt)
1735 xmlParserInputPtr ret;
1737 if (ctxt == NULL)
1738 return(NULL);
1739 if (ctxt->inputNr <= 0)
1740 return (NULL);
1741 ctxt->inputNr--;
1742 if (ctxt->inputNr > 0)
1743 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1744 else
1745 ctxt->input = NULL;
1746 ret = ctxt->inputTab[ctxt->inputNr];
1747 ctxt->inputTab[ctxt->inputNr] = NULL;
1748 return (ret);
1751 * nodePush:
1752 * @ctxt: an XML parser context
1753 * @value: the element node
1755 * DEPRECATED: Internal function, do not use.
1757 * Pushes a new element node on top of the node stack
1759 * Returns -1 in case of error, the index in the stack otherwise
1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1764 if (ctxt == NULL) return(0);
1765 if (ctxt->nodeNr >= ctxt->nodeMax) {
1766 xmlNodePtr *tmp;
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
1770 sizeof(ctxt->nodeTab[0]));
1771 if (tmp == NULL) {
1772 xmlErrMemory(ctxt, NULL);
1773 return (-1);
1775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782 xmlParserMaxDepth);
1783 xmlHaltParser(ctxt);
1784 return(-1);
1786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1792 * nodePop:
1793 * @ctxt: an XML parser context
1795 * DEPRECATED: Internal function, do not use.
1797 * Pops the top element node from the node stack
1799 * Returns the node just removed
1801 xmlNodePtr
1802 nodePop(xmlParserCtxtPtr ctxt)
1804 xmlNodePtr ret;
1806 if (ctxt == NULL) return(NULL);
1807 if (ctxt->nodeNr <= 0)
1808 return (NULL);
1809 ctxt->nodeNr--;
1810 if (ctxt->nodeNr > 0)
1811 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812 else
1813 ctxt->node = NULL;
1814 ret = ctxt->nodeTab[ctxt->nodeNr];
1815 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1816 return (ret);
1820 * nameNsPush:
1821 * @ctxt: an XML parser context
1822 * @value: the element name
1823 * @prefix: the element prefix
1824 * @URI: the element namespace name
1825 * @line: the current line number for error messages
1826 * @nsNr: the number of namespaces pushed on the namespace table
1828 * Pushes a new element name/prefix/URL on top of the name stack
1830 * Returns -1 in case of error, the index in the stack otherwise
1832 static int
1833 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1834 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1836 xmlStartTag *tag;
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 xmlStartTag *tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1849 ctxt->nameTab = tmp;
1850 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1857 ctxt->pushTab = tmp2;
1858 } else if (ctxt->pushTab == NULL) {
1859 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1860 sizeof(ctxt->pushTab[0]));
1861 if (ctxt->pushTab == NULL)
1862 goto mem_error;
1864 ctxt->nameTab[ctxt->nameNr] = value;
1865 ctxt->name = value;
1866 tag = &ctxt->pushTab[ctxt->nameNr];
1867 tag->prefix = prefix;
1868 tag->URI = URI;
1869 tag->line = line;
1870 tag->nsNr = nsNr;
1871 return (ctxt->nameNr++);
1872 mem_error:
1873 xmlErrMemory(ctxt, NULL);
1874 return (-1);
1876 #ifdef LIBXML_PUSH_ENABLED
1878 * nameNsPop:
1879 * @ctxt: an XML parser context
1881 * Pops the top element/prefix/URI name from the name stack
1883 * Returns the name just removed
1885 static const xmlChar *
1886 nameNsPop(xmlParserCtxtPtr ctxt)
1888 const xmlChar *ret;
1890 if (ctxt->nameNr <= 0)
1891 return (NULL);
1892 ctxt->nameNr--;
1893 if (ctxt->nameNr > 0)
1894 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895 else
1896 ctxt->name = NULL;
1897 ret = ctxt->nameTab[ctxt->nameNr];
1898 ctxt->nameTab[ctxt->nameNr] = NULL;
1899 return (ret);
1901 #endif /* LIBXML_PUSH_ENABLED */
1904 * namePush:
1905 * @ctxt: an XML parser context
1906 * @value: the element name
1908 * DEPRECATED: Internal function, do not use.
1910 * Pushes a new element name on top of the name stack
1912 * Returns -1 in case of error, the index in the stack otherwise
1915 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1917 if (ctxt == NULL) return (-1);
1919 if (ctxt->nameNr >= ctxt->nameMax) {
1920 const xmlChar * *tmp;
1921 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1922 ctxt->nameMax * 2 *
1923 sizeof(ctxt->nameTab[0]));
1924 if (tmp == NULL) {
1925 goto mem_error;
1927 ctxt->nameTab = tmp;
1928 ctxt->nameMax *= 2;
1930 ctxt->nameTab[ctxt->nameNr] = value;
1931 ctxt->name = value;
1932 return (ctxt->nameNr++);
1933 mem_error:
1934 xmlErrMemory(ctxt, NULL);
1935 return (-1);
1939 * namePop:
1940 * @ctxt: an XML parser context
1942 * DEPRECATED: Internal function, do not use.
1944 * Pops the top element name from the name stack
1946 * Returns the name just removed
1948 const xmlChar *
1949 namePop(xmlParserCtxtPtr ctxt)
1951 const xmlChar *ret;
1953 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1954 return (NULL);
1955 ctxt->nameNr--;
1956 if (ctxt->nameNr > 0)
1957 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1958 else
1959 ctxt->name = NULL;
1960 ret = ctxt->nameTab[ctxt->nameNr];
1961 ctxt->nameTab[ctxt->nameNr] = NULL;
1962 return (ret);
1965 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1966 if (ctxt->spaceNr >= ctxt->spaceMax) {
1967 int *tmp;
1969 ctxt->spaceMax *= 2;
1970 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1971 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1972 if (tmp == NULL) {
1973 xmlErrMemory(ctxt, NULL);
1974 ctxt->spaceMax /=2;
1975 return(-1);
1977 ctxt->spaceTab = tmp;
1979 ctxt->spaceTab[ctxt->spaceNr] = val;
1980 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1981 return(ctxt->spaceNr++);
1984 static int spacePop(xmlParserCtxtPtr ctxt) {
1985 int ret;
1986 if (ctxt->spaceNr <= 0) return(0);
1987 ctxt->spaceNr--;
1988 if (ctxt->spaceNr > 0)
1989 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1990 else
1991 ctxt->space = &ctxt->spaceTab[0];
1992 ret = ctxt->spaceTab[ctxt->spaceNr];
1993 ctxt->spaceTab[ctxt->spaceNr] = -1;
1994 return(ret);
1998 * Macros for accessing the content. Those should be used only by the parser,
1999 * and not exported.
2001 * Dirty macros, i.e. one often need to make assumption on the context to
2002 * use them
2004 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2005 * To be used with extreme caution since operations consuming
2006 * characters may move the input buffer to a different location !
2007 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2008 * This should be used internally by the parser
2009 * only to compare to ASCII values otherwise it would break when
2010 * running with UTF-8 encoding.
2011 * RAW same as CUR but in the input buffer, bypass any token
2012 * extraction that may have been done
2013 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2014 * to compare on ASCII based substring.
2015 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016 * strings without newlines within the parser.
2017 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018 * defined char within the parser.
2019 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021 * NEXT Skip to the next character, this does the proper decoding
2022 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2024 * CUR_CHAR(l) returns the current unicode character (int), set l
2025 * to the number of xmlChars used for the encoding [0-5].
2026 * CUR_SCHAR same but operate on a string instead of the context
2027 * COPY_BUF copy the current unicode char to the target buffer, increment
2028 * the index
2029 * GROW, SHRINK handling of input buffers
2032 #define RAW (*ctxt->input->cur)
2033 #define CUR (*ctxt->input->cur)
2034 #define NXT(val) ctxt->input->cur[(val)]
2035 #define CUR_PTR ctxt->input->cur
2036 #define BASE_PTR ctxt->input->base
2038 #define CMP4( s, c1, c2, c3, c4 ) \
2039 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2042 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051 ((unsigned char *) s)[ 8 ] == c9 )
2052 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054 ((unsigned char *) s)[ 9 ] == c10 )
2056 #define SKIP(val) do { \
2057 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserGrow(ctxt); \
2060 } while (0)
2062 #define SKIPL(val) do { \
2063 int skipl; \
2064 for(skipl=0; skipl<val; skipl++) { \
2065 if (*(ctxt->input->cur) == '\n') { \
2066 ctxt->input->line++; ctxt->input->col = 1; \
2067 } else ctxt->input->col++; \
2068 ctxt->input->cur++; \
2070 if (*ctxt->input->cur == 0) \
2071 xmlParserGrow(ctxt); \
2072 } while (0)
2074 #define SHRINK if ((ctxt->progressive == 0) && \
2075 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077 xmlParserShrink(ctxt);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081 xmlParserGrow(ctxt);
2083 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085 #define NEXT xmlNextChar(ctxt)
2087 #define NEXT1 { \
2088 ctxt->input->col++; \
2089 ctxt->input->cur++; \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserGrow(ctxt); \
2094 #define NEXTL(l) do { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur += l; \
2099 } while (0)
2101 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2104 #define COPY_BUF(l,b,i,v) \
2105 if (l == 1) b[i++] = v; \
2106 else i += xmlCopyCharMultiByte(&b[i],v)
2109 * xmlSkipBlankChars:
2110 * @ctxt: the XML parser context
2112 * DEPRECATED: Internal function, do not use.
2114 * skip all blanks character found at that point in the input streams.
2115 * It pops up finished entities in the process if allowable at that point.
2117 * Returns the number of space chars skipped
2121 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2122 int res = 0;
2125 * It's Okay to use CUR/NEXT here since all the blanks are on
2126 * the ASCII range.
2128 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2129 (ctxt->instate == XML_PARSER_START)) {
2130 const xmlChar *cur;
2132 * if we are in the document content, go really fast
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2141 cur++;
2142 if (res < INT_MAX)
2143 res++;
2144 if (*cur == 0) {
2145 ctxt->input->cur = cur;
2146 xmlParserGrow(ctxt);
2147 cur = ctxt->input->cur;
2150 ctxt->input->cur = cur;
2151 } else {
2152 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2154 while (ctxt->instate != XML_PARSER_EOF) {
2155 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2156 NEXT;
2157 } else if (CUR == '%') {
2159 * Need to handle support of entities branching here
2161 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2162 break;
2163 xmlParsePEReference(ctxt);
2164 } else if (CUR == 0) {
2165 unsigned long consumed;
2166 xmlEntityPtr ent;
2168 if (ctxt->inputNr <= 1)
2169 break;
2171 consumed = ctxt->input->consumed;
2172 xmlSaturatedAddSizeT(&consumed,
2173 ctxt->input->cur - ctxt->input->base);
2176 * Add to sizeentities when parsing an external entity
2177 * for the first time.
2179 ent = ctxt->input->entity;
2180 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2181 ((ent->flags & XML_ENT_PARSED) == 0)) {
2182 ent->flags |= XML_ENT_PARSED;
2184 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2187 xmlParserEntityCheck(ctxt, consumed);
2189 xmlPopInput(ctxt);
2190 } else {
2191 break;
2195 * Also increase the counter when entering or exiting a PERef.
2196 * The spec says: "When a parameter-entity reference is recognized
2197 * in the DTD and included, its replacement text MUST be enlarged
2198 * by the attachment of one leading and one following space (#x20)
2199 * character."
2201 if (res < INT_MAX)
2202 res++;
2205 return(res);
2208 /************************************************************************
2210 * Commodity functions to handle entities *
2212 ************************************************************************/
2215 * xmlPopInput:
2216 * @ctxt: an XML parser context
2218 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219 * pop it and return the next char.
2221 * Returns the current xmlChar in the parser context
2223 xmlChar
2224 xmlPopInput(xmlParserCtxtPtr ctxt) {
2225 xmlParserInputPtr input;
2227 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2228 if (xmlParserDebugEntities)
2229 xmlGenericError(xmlGenericErrorContext,
2230 "Popping input %d\n", ctxt->inputNr);
2231 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2232 (ctxt->instate != XML_PARSER_EOF))
2233 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2234 "Unfinished entity outside the DTD");
2235 input = inputPop(ctxt);
2236 if (input->entity != NULL)
2237 input->entity->flags &= ~XML_ENT_EXPANDING;
2238 xmlFreeInputStream(input);
2239 if (*ctxt->input->cur == 0)
2240 xmlParserGrow(ctxt);
2241 return(CUR);
2245 * xmlPushInput:
2246 * @ctxt: an XML parser context
2247 * @input: an XML parser input fragment (entity, XML fragment ...).
2249 * xmlPushInput: switch to a new input stream which is stacked on top
2250 * of the previous one(s).
2251 * Returns -1 in case of error or the index in the input stack
2254 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2255 int ret;
2256 if (input == NULL) return(-1);
2258 if (xmlParserDebugEntities) {
2259 if ((ctxt->input != NULL) && (ctxt->input->filename))
2260 xmlGenericError(xmlGenericErrorContext,
2261 "%s(%d): ", ctxt->input->filename,
2262 ctxt->input->line);
2263 xmlGenericError(xmlGenericErrorContext,
2264 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2266 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2267 (ctxt->inputNr > 100)) {
2268 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2269 while (ctxt->inputNr > 1)
2270 xmlFreeInputStream(inputPop(ctxt));
2271 return(-1);
2273 ret = inputPush(ctxt, input);
2274 if (ctxt->instate == XML_PARSER_EOF)
2275 return(-1);
2276 GROW;
2277 return(ret);
2281 * xmlParseCharRef:
2282 * @ctxt: an XML parser context
2284 * DEPRECATED: Internal function, don't use.
2286 * Parse a numeric character reference. Always consumes '&'.
2288 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289 * '&#x' [0-9a-fA-F]+ ';'
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
2293 * production for Char.
2295 * Returns the value parsed (as an int), 0 in case of error
2298 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2299 int val = 0;
2300 int count = 0;
2303 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2305 if ((RAW == '&') && (NXT(1) == '#') &&
2306 (NXT(2) == 'x')) {
2307 SKIP(3);
2308 GROW;
2309 while (RAW != ';') { /* loop blocked by count */
2310 if (count++ > 20) {
2311 count = 0;
2312 GROW;
2313 if (ctxt->instate == XML_PARSER_EOF)
2314 return(0);
2316 if ((RAW >= '0') && (RAW <= '9'))
2317 val = val * 16 + (CUR - '0');
2318 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2319 val = val * 16 + (CUR - 'a') + 10;
2320 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2321 val = val * 16 + (CUR - 'A') + 10;
2322 else {
2323 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2324 val = 0;
2325 break;
2327 if (val > 0x110000)
2328 val = 0x110000;
2330 NEXT;
2331 count++;
2333 if (RAW == ';') {
2334 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2335 ctxt->input->col++;
2336 ctxt->input->cur++;
2338 } else if ((RAW == '&') && (NXT(1) == '#')) {
2339 SKIP(2);
2340 GROW;
2341 while (RAW != ';') { /* loop blocked by count */
2342 if (count++ > 20) {
2343 count = 0;
2344 GROW;
2345 if (ctxt->instate == XML_PARSER_EOF)
2346 return(0);
2348 if ((RAW >= '0') && (RAW <= '9'))
2349 val = val * 10 + (CUR - '0');
2350 else {
2351 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2352 val = 0;
2353 break;
2355 if (val > 0x110000)
2356 val = 0x110000;
2358 NEXT;
2359 count++;
2361 if (RAW == ';') {
2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363 ctxt->input->col++;
2364 ctxt->input->cur++;
2366 } else {
2367 if (RAW == '&')
2368 SKIP(1);
2369 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2373 * [ WFC: Legal Character ]
2374 * Characters referred to using character references must match the
2375 * production for Char.
2377 if (val >= 0x110000) {
2378 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2379 "xmlParseCharRef: character reference out of bounds\n",
2380 val);
2381 } else if (IS_CHAR(val)) {
2382 return(val);
2383 } else {
2384 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2385 "xmlParseCharRef: invalid xmlChar value %d\n",
2386 val);
2388 return(0);
2392 * xmlParseStringCharRef:
2393 * @ctxt: an XML parser context
2394 * @str: a pointer to an index in the string
2396 * parse Reference declarations, variant parsing from a string rather
2397 * than an an input flow.
2399 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400 * '&#x' [0-9a-fA-F]+ ';'
2402 * [ WFC: Legal Character ]
2403 * Characters referred to using character references must match the
2404 * production for Char.
2406 * Returns the value parsed (as an int), 0 in case of error, str will be
2407 * updated to the current value of the index
2409 static int
2410 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2411 const xmlChar *ptr;
2412 xmlChar cur;
2413 int val = 0;
2415 if ((str == NULL) || (*str == NULL)) return(0);
2416 ptr = *str;
2417 cur = *ptr;
2418 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2419 ptr += 3;
2420 cur = *ptr;
2421 while (cur != ';') { /* Non input consuming loop */
2422 if ((cur >= '0') && (cur <= '9'))
2423 val = val * 16 + (cur - '0');
2424 else if ((cur >= 'a') && (cur <= 'f'))
2425 val = val * 16 + (cur - 'a') + 10;
2426 else if ((cur >= 'A') && (cur <= 'F'))
2427 val = val * 16 + (cur - 'A') + 10;
2428 else {
2429 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2430 val = 0;
2431 break;
2433 if (val > 0x110000)
2434 val = 0x110000;
2436 ptr++;
2437 cur = *ptr;
2439 if (cur == ';')
2440 ptr++;
2441 } else if ((cur == '&') && (ptr[1] == '#')){
2442 ptr += 2;
2443 cur = *ptr;
2444 while (cur != ';') { /* Non input consuming loops */
2445 if ((cur >= '0') && (cur <= '9'))
2446 val = val * 10 + (cur - '0');
2447 else {
2448 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449 val = 0;
2450 break;
2452 if (val > 0x110000)
2453 val = 0x110000;
2455 ptr++;
2456 cur = *ptr;
2458 if (cur == ';')
2459 ptr++;
2460 } else {
2461 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2462 return(0);
2464 *str = ptr;
2467 * [ WFC: Legal Character ]
2468 * Characters referred to using character references must match the
2469 * production for Char.
2471 if (val >= 0x110000) {
2472 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2473 "xmlParseStringCharRef: character reference out of bounds\n",
2474 val);
2475 } else if (IS_CHAR(val)) {
2476 return(val);
2477 } else {
2478 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2479 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2480 val);
2482 return(0);
2486 * xmlParserHandlePEReference:
2487 * @ctxt: the parser context
2489 * DEPRECATED: Internal function, do not use.
2491 * [69] PEReference ::= '%' Name ';'
2493 * [ WFC: No Recursion ]
2494 * A parsed entity must not contain a recursive
2495 * reference to itself, either directly or indirectly.
2497 * [ WFC: Entity Declared ]
2498 * In a document without any DTD, a document with only an internal DTD
2499 * subset which contains no parameter entity references, or a document
2500 * with "standalone='yes'", ... ... The declaration of a parameter
2501 * entity must precede any reference to it...
2503 * [ VC: Entity Declared ]
2504 * In a document with an external subset or external parameter entities
2505 * with "standalone='no'", ... ... The declaration of a parameter entity
2506 * must precede any reference to it...
2508 * [ WFC: In DTD ]
2509 * Parameter-entity references may only appear in the DTD.
2510 * NOTE: misleading but this is handled.
2512 * A PEReference may have been detected in the current input stream
2513 * the handling is done accordingly to
2514 * http://www.w3.org/TR/REC-xml#entproc
2515 * i.e.
2516 * - Included in literal in entity values
2517 * - Included as Parameter Entity reference within DTDs
2519 void
2520 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2521 switch(ctxt->instate) {
2522 case XML_PARSER_CDATA_SECTION:
2523 return;
2524 case XML_PARSER_COMMENT:
2525 return;
2526 case XML_PARSER_START_TAG:
2527 return;
2528 case XML_PARSER_END_TAG:
2529 return;
2530 case XML_PARSER_EOF:
2531 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2532 return;
2533 case XML_PARSER_PROLOG:
2534 case XML_PARSER_START:
2535 case XML_PARSER_MISC:
2536 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2537 return;
2538 case XML_PARSER_ENTITY_DECL:
2539 case XML_PARSER_CONTENT:
2540 case XML_PARSER_ATTRIBUTE_VALUE:
2541 case XML_PARSER_PI:
2542 case XML_PARSER_SYSTEM_LITERAL:
2543 case XML_PARSER_PUBLIC_LITERAL:
2544 /* we just ignore it there */
2545 return;
2546 case XML_PARSER_EPILOG:
2547 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2548 return;
2549 case XML_PARSER_ENTITY_VALUE:
2551 * NOTE: in the case of entity values, we don't do the
2552 * substitution here since we need the literal
2553 * entity value to be able to save the internal
2554 * subset of the document.
2555 * This will be handled by xmlStringDecodeEntities
2557 return;
2558 case XML_PARSER_DTD:
2560 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561 * In the internal DTD subset, parameter-entity references
2562 * can occur only where markup declarations can occur, not
2563 * within markup declarations.
2564 * In that case this is handled in xmlParseMarkupDecl
2566 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2567 return;
2568 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2569 return;
2570 break;
2571 case XML_PARSER_IGNORE:
2572 return;
2575 xmlParsePEReference(ctxt);
2579 * Macro used to grow the current buffer.
2580 * buffer##_size is expected to be a size_t
2581 * mem_error: is expected to handle memory allocation failures
2583 #define growBuffer(buffer, n) { \
2584 xmlChar *tmp; \
2585 size_t new_size = buffer##_size * 2 + n; \
2586 if (new_size < buffer##_size) goto mem_error; \
2587 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2588 if (tmp == NULL) goto mem_error; \
2589 buffer = tmp; \
2590 buffer##_size = new_size; \
2594 * xmlStringDecodeEntitiesInt:
2595 * @ctxt: the parser context
2596 * @str: the input string
2597 * @len: the string length
2598 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599 * @end: an end marker xmlChar, 0 if none
2600 * @end2: an end marker xmlChar, 0 if none
2601 * @end3: an end marker xmlChar, 0 if none
2602 * @check: whether to perform entity checks
2604 static xmlChar *
2605 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606 int what, xmlChar end, xmlChar end2, xmlChar end3,
2607 int check) {
2608 xmlChar *buffer = NULL;
2609 size_t buffer_size = 0;
2610 size_t nbchars = 0;
2612 xmlChar *current = NULL;
2613 xmlChar *rep = NULL;
2614 const xmlChar *last;
2615 xmlEntityPtr ent;
2616 int c,l;
2618 if (str == NULL)
2619 return(NULL);
2620 last = str + len;
2622 if (((ctxt->depth > 40) &&
2623 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2624 (ctxt->depth > 100)) {
2625 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2626 "Maximum entity nesting depth exceeded");
2627 return(NULL);
2631 * allocate a translation buffer.
2633 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2634 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2635 if (buffer == NULL) goto mem_error;
2638 * OK loop until we reach one of the ending char or a size limit.
2639 * we are operating on already parsed values.
2641 if (str < last)
2642 c = CUR_SCHAR(str, l);
2643 else
2644 c = 0;
2645 while ((c != 0) && (c != end) && /* non input consuming loop */
2646 (c != end2) && (c != end3) &&
2647 (ctxt->instate != XML_PARSER_EOF)) {
2649 if (c == 0) break;
2650 if ((c == '&') && (str[1] == '#')) {
2651 int val = xmlParseStringCharRef(ctxt, &str);
2652 if (val == 0)
2653 goto int_error;
2654 COPY_BUF(0,buffer,nbchars,val);
2655 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2656 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2658 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659 if (xmlParserDebugEntities)
2660 xmlGenericError(xmlGenericErrorContext,
2661 "String decoding Entity Reference: %.30s\n",
2662 str);
2663 ent = xmlParseStringEntityRef(ctxt, &str);
2664 if ((ent != NULL) &&
2665 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2666 if (ent->content != NULL) {
2667 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2668 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2669 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2671 } else {
2672 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2673 "predefined entity has no content\n");
2674 goto int_error;
2676 } else if ((ent != NULL) && (ent->content != NULL)) {
2677 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2678 goto int_error;
2680 if (ent->flags & XML_ENT_EXPANDING) {
2681 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682 xmlHaltParser(ctxt);
2683 ent->content[0] = 0;
2684 goto int_error;
2687 ent->flags |= XML_ENT_EXPANDING;
2688 ctxt->depth++;
2689 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2690 ent->length, what, 0, 0, 0, check);
2691 ctxt->depth--;
2692 ent->flags &= ~XML_ENT_EXPANDING;
2694 if (rep == NULL) {
2695 ent->content[0] = 0;
2696 goto int_error;
2699 current = rep;
2700 while (*current != 0) { /* non input consuming loop */
2701 buffer[nbchars++] = *current++;
2702 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2703 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2706 xmlFree(rep);
2707 rep = NULL;
2708 } else if (ent != NULL) {
2709 int i = xmlStrlen(ent->name);
2710 const xmlChar *cur = ent->name;
2712 buffer[nbchars++] = '&';
2713 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2716 for (;i > 0;i--)
2717 buffer[nbchars++] = *cur++;
2718 buffer[nbchars++] = ';';
2720 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2721 if (xmlParserDebugEntities)
2722 xmlGenericError(xmlGenericErrorContext,
2723 "String decoding PE Reference: %.30s\n", str);
2724 ent = xmlParseStringPEReference(ctxt, &str);
2725 if (ent != NULL) {
2726 if (ent->content == NULL) {
2728 * Note: external parsed entities will not be loaded,
2729 * it is not required for a non-validating parser to
2730 * complete external PEReferences coming from the
2731 * internal subset
2733 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2734 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2735 (ctxt->validate != 0)) {
2736 xmlLoadEntityContent(ctxt, ent);
2737 } else {
2738 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2739 "not validating will not read content for PE entity %s\n",
2740 ent->name, NULL);
2744 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2745 goto int_error;
2747 if (ent->flags & XML_ENT_EXPANDING) {
2748 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2749 xmlHaltParser(ctxt);
2750 if (ent->content != NULL)
2751 ent->content[0] = 0;
2752 goto int_error;
2755 ent->flags |= XML_ENT_EXPANDING;
2756 ctxt->depth++;
2757 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2758 ent->length, what, 0, 0, 0, check);
2759 ctxt->depth--;
2760 ent->flags &= ~XML_ENT_EXPANDING;
2762 if (rep == NULL) {
2763 if (ent->content != NULL)
2764 ent->content[0] = 0;
2765 goto int_error;
2767 current = rep;
2768 while (*current != 0) { /* non input consuming loop */
2769 buffer[nbchars++] = *current++;
2770 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2771 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2774 xmlFree(rep);
2775 rep = NULL;
2777 } else {
2778 COPY_BUF(l,buffer,nbchars,c);
2779 str += l;
2780 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2784 if (str < last)
2785 c = CUR_SCHAR(str, l);
2786 else
2787 c = 0;
2789 buffer[nbchars] = 0;
2790 return(buffer);
2792 mem_error:
2793 xmlErrMemory(ctxt, NULL);
2794 int_error:
2795 if (rep != NULL)
2796 xmlFree(rep);
2797 if (buffer != NULL)
2798 xmlFree(buffer);
2799 return(NULL);
2803 * xmlStringLenDecodeEntities:
2804 * @ctxt: the parser context
2805 * @str: the input string
2806 * @len: the string length
2807 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808 * @end: an end marker xmlChar, 0 if none
2809 * @end2: an end marker xmlChar, 0 if none
2810 * @end3: an end marker xmlChar, 0 if none
2812 * DEPRECATED: Internal function, don't use.
2814 * Takes a entity string content and process to do the adequate substitutions.
2816 * [67] Reference ::= EntityRef | CharRef
2818 * [69] PEReference ::= '%' Name ';'
2820 * Returns A newly allocated string with the substitution done. The caller
2821 * must deallocate it !
2823 xmlChar *
2824 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2825 int what, xmlChar end, xmlChar end2,
2826 xmlChar end3) {
2827 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2828 return(NULL);
2829 return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2830 end, end2, end3, 0));
2834 * xmlStringDecodeEntities:
2835 * @ctxt: the parser context
2836 * @str: the input string
2837 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838 * @end: an end marker xmlChar, 0 if none
2839 * @end2: an end marker xmlChar, 0 if none
2840 * @end3: an end marker xmlChar, 0 if none
2842 * DEPRECATED: Internal function, don't use.
2844 * Takes a entity string content and process to do the adequate substitutions.
2846 * [67] Reference ::= EntityRef | CharRef
2848 * [69] PEReference ::= '%' Name ';'
2850 * Returns A newly allocated string with the substitution done. The caller
2851 * must deallocate it !
2853 xmlChar *
2854 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2855 xmlChar end, xmlChar end2, xmlChar end3) {
2856 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2857 return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2858 end, end2, end3, 0));
2861 /************************************************************************
2863 * Commodity functions, cleanup needed ? *
2865 ************************************************************************/
2868 * areBlanks:
2869 * @ctxt: an XML parser context
2870 * @str: a xmlChar *
2871 * @len: the size of @str
2872 * @blank_chars: we know the chars are blanks
2874 * Is this a sequence of blank chars that one can ignore ?
2876 * Returns 1 if ignorable 0 otherwise.
2879 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2880 int blank_chars) {
2881 int i, ret;
2882 xmlNodePtr lastChild;
2885 * Don't spend time trying to differentiate them, the same callback is
2886 * used !
2888 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2889 return(0);
2892 * Check for xml:space value.
2894 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2895 (*(ctxt->space) == -2))
2896 return(0);
2899 * Check that the string is made of blanks
2901 if (blank_chars == 0) {
2902 for (i = 0;i < len;i++)
2903 if (!(IS_BLANK_CH(str[i]))) return(0);
2907 * Look if the element is mixed content in the DTD if available
2909 if (ctxt->node == NULL) return(0);
2910 if (ctxt->myDoc != NULL) {
2911 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2912 if (ret == 0) return(1);
2913 if (ret == 1) return(0);
2917 * Otherwise, heuristic :-\
2919 if ((RAW != '<') && (RAW != 0xD)) return(0);
2920 if ((ctxt->node->children == NULL) &&
2921 (RAW == '<') && (NXT(1) == '/')) return(0);
2923 lastChild = xmlGetLastChild(ctxt->node);
2924 if (lastChild == NULL) {
2925 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926 (ctxt->node->content != NULL)) return(0);
2927 } else if (xmlNodeIsText(lastChild))
2928 return(0);
2929 else if ((ctxt->node->children != NULL) &&
2930 (xmlNodeIsText(ctxt->node->children)))
2931 return(0);
2932 return(1);
2935 /************************************************************************
2937 * Extra stuff for namespace support *
2938 * Relates to http://www.w3.org/TR/WD-xml-names *
2940 ************************************************************************/
2943 * xmlSplitQName:
2944 * @ctxt: an XML parser context
2945 * @name: an XML parser context
2946 * @prefix: a xmlChar **
2948 * parse an UTF8 encoded XML qualified name string
2950 * [NS 5] QName ::= (Prefix ':')? LocalPart
2952 * [NS 6] Prefix ::= NCName
2954 * [NS 7] LocalPart ::= NCName
2956 * Returns the local part, and prefix is updated
2957 * to get the Prefix if any.
2960 xmlChar *
2961 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2962 xmlChar buf[XML_MAX_NAMELEN + 5];
2963 xmlChar *buffer = NULL;
2964 int len = 0;
2965 int max = XML_MAX_NAMELEN;
2966 xmlChar *ret = NULL;
2967 const xmlChar *cur = name;
2968 int c;
2970 if (prefix == NULL) return(NULL);
2971 *prefix = NULL;
2973 if (cur == NULL) return(NULL);
2975 #ifndef XML_XML_NAMESPACE
2976 /* xml: prefix is not really a namespace */
2977 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2978 (cur[2] == 'l') && (cur[3] == ':'))
2979 return(xmlStrdup(name));
2980 #endif
2982 /* nasty but well=formed */
2983 if (cur[0] == ':')
2984 return(xmlStrdup(name));
2986 c = *cur++;
2987 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2988 buf[len++] = c;
2989 c = *cur++;
2991 if (len >= max) {
2993 * Okay someone managed to make a huge name, so he's ready to pay
2994 * for the processing speed.
2996 max = len * 2;
2998 buffer = (xmlChar *) xmlMallocAtomic(max);
2999 if (buffer == NULL) {
3000 xmlErrMemory(ctxt, NULL);
3001 return(NULL);
3003 memcpy(buffer, buf, len);
3004 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3005 if (len + 10 > max) {
3006 xmlChar *tmp;
3008 max *= 2;
3009 tmp = (xmlChar *) xmlRealloc(buffer, max);
3010 if (tmp == NULL) {
3011 xmlFree(buffer);
3012 xmlErrMemory(ctxt, NULL);
3013 return(NULL);
3015 buffer = tmp;
3017 buffer[len++] = c;
3018 c = *cur++;
3020 buffer[len] = 0;
3023 if ((c == ':') && (*cur == 0)) {
3024 if (buffer != NULL)
3025 xmlFree(buffer);
3026 *prefix = NULL;
3027 return(xmlStrdup(name));
3030 if (buffer == NULL)
3031 ret = xmlStrndup(buf, len);
3032 else {
3033 ret = buffer;
3034 buffer = NULL;
3035 max = XML_MAX_NAMELEN;
3039 if (c == ':') {
3040 c = *cur;
3041 *prefix = ret;
3042 if (c == 0) {
3043 return(xmlStrndup(BAD_CAST "", 0));
3045 len = 0;
3048 * Check that the first character is proper to start
3049 * a new name
3051 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3052 ((c >= 0x41) && (c <= 0x5A)) ||
3053 (c == '_') || (c == ':'))) {
3054 int l;
3055 int first = CUR_SCHAR(cur, l);
3057 if (!IS_LETTER(first) && (first != '_')) {
3058 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3059 "Name %s is not XML Namespace compliant\n",
3060 name);
3063 cur++;
3065 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3066 buf[len++] = c;
3067 c = *cur++;
3069 if (len >= max) {
3071 * Okay someone managed to make a huge name, so he's ready to pay
3072 * for the processing speed.
3074 max = len * 2;
3076 buffer = (xmlChar *) xmlMallocAtomic(max);
3077 if (buffer == NULL) {
3078 xmlErrMemory(ctxt, NULL);
3079 return(NULL);
3081 memcpy(buffer, buf, len);
3082 while (c != 0) { /* tested bigname2.xml */
3083 if (len + 10 > max) {
3084 xmlChar *tmp;
3086 max *= 2;
3087 tmp = (xmlChar *) xmlRealloc(buffer, max);
3088 if (tmp == NULL) {
3089 xmlErrMemory(ctxt, NULL);
3090 xmlFree(buffer);
3091 return(NULL);
3093 buffer = tmp;
3095 buffer[len++] = c;
3096 c = *cur++;
3098 buffer[len] = 0;
3101 if (buffer == NULL)
3102 ret = xmlStrndup(buf, len);
3103 else {
3104 ret = buffer;
3108 return(ret);
3111 /************************************************************************
3113 * The parser itself *
3114 * Relates to http://www.w3.org/TR/REC-xml *
3116 ************************************************************************/
3118 /************************************************************************
3120 * Routines to parse Name, NCName and NmToken *
3122 ************************************************************************/
3123 #ifdef DEBUG
3124 static unsigned long nbParseName = 0;
3125 static unsigned long nbParseNmToken = 0;
3126 static unsigned long nbParseNCName = 0;
3127 static unsigned long nbParseNCNameComplex = 0;
3128 static unsigned long nbParseNameComplex = 0;
3129 static unsigned long nbParseStringName = 0;
3130 #endif
3133 * The two following functions are related to the change of accepted
3134 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135 * They correspond to the modified production [4] and the new production [4a]
3136 * changes in that revision. Also note that the macros used for the
3137 * productions Letter, Digit, CombiningChar and Extender are not needed
3138 * anymore.
3139 * We still keep compatibility to pre-revision5 parsing semantic if the
3140 * new XML_PARSE_OLD10 option is given to the parser.
3142 static int
3143 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3144 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3146 * Use the new checks of production [4] [4a] amd [5] of the
3147 * Update 5 of XML-1.0
3149 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3150 (((c >= 'a') && (c <= 'z')) ||
3151 ((c >= 'A') && (c <= 'Z')) ||
3152 (c == '_') || (c == ':') ||
3153 ((c >= 0xC0) && (c <= 0xD6)) ||
3154 ((c >= 0xD8) && (c <= 0xF6)) ||
3155 ((c >= 0xF8) && (c <= 0x2FF)) ||
3156 ((c >= 0x370) && (c <= 0x37D)) ||
3157 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3158 ((c >= 0x200C) && (c <= 0x200D)) ||
3159 ((c >= 0x2070) && (c <= 0x218F)) ||
3160 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164 ((c >= 0x10000) && (c <= 0xEFFFF))))
3165 return(1);
3166 } else {
3167 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3168 return(1);
3170 return(0);
3173 static int
3174 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3175 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3177 * Use the new checks of production [4] [4a] amd [5] of the
3178 * Update 5 of XML-1.0
3180 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181 (((c >= 'a') && (c <= 'z')) ||
3182 ((c >= 'A') && (c <= 'Z')) ||
3183 ((c >= '0') && (c <= '9')) || /* !start */
3184 (c == '_') || (c == ':') ||
3185 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3186 ((c >= 0xC0) && (c <= 0xD6)) ||
3187 ((c >= 0xD8) && (c <= 0xF6)) ||
3188 ((c >= 0xF8) && (c <= 0x2FF)) ||
3189 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3190 ((c >= 0x370) && (c <= 0x37D)) ||
3191 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3192 ((c >= 0x200C) && (c <= 0x200D)) ||
3193 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3194 ((c >= 0x2070) && (c <= 0x218F)) ||
3195 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3196 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3197 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3198 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3199 ((c >= 0x10000) && (c <= 0xEFFFF))))
3200 return(1);
3201 } else {
3202 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3203 (c == '.') || (c == '-') ||
3204 (c == '_') || (c == ':') ||
3205 (IS_COMBINING(c)) ||
3206 (IS_EXTENDER(c)))
3207 return(1);
3209 return(0);
3212 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3213 int *len, int *alloc, int normalize);
3215 static const xmlChar *
3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217 int len = 0, l;
3218 int c;
3219 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3220 XML_MAX_TEXT_LENGTH :
3221 XML_MAX_NAME_LENGTH;
3223 #ifdef DEBUG
3224 nbParseNameComplex++;
3225 #endif
3228 * Handler for more complex cases
3230 c = CUR_CHAR(l);
3231 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3233 * Use the new checks of production [4] [4a] amd [5] of the
3234 * Update 5 of XML-1.0
3236 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3237 (!(((c >= 'a') && (c <= 'z')) ||
3238 ((c >= 'A') && (c <= 'Z')) ||
3239 (c == '_') || (c == ':') ||
3240 ((c >= 0xC0) && (c <= 0xD6)) ||
3241 ((c >= 0xD8) && (c <= 0xF6)) ||
3242 ((c >= 0xF8) && (c <= 0x2FF)) ||
3243 ((c >= 0x370) && (c <= 0x37D)) ||
3244 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3245 ((c >= 0x200C) && (c <= 0x200D)) ||
3246 ((c >= 0x2070) && (c <= 0x218F)) ||
3247 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3248 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3249 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3250 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3251 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3252 return(NULL);
3254 len += l;
3255 NEXTL(l);
3256 c = CUR_CHAR(l);
3257 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3258 (((c >= 'a') && (c <= 'z')) ||
3259 ((c >= 'A') && (c <= 'Z')) ||
3260 ((c >= '0') && (c <= '9')) || /* !start */
3261 (c == '_') || (c == ':') ||
3262 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3263 ((c >= 0xC0) && (c <= 0xD6)) ||
3264 ((c >= 0xD8) && (c <= 0xF6)) ||
3265 ((c >= 0xF8) && (c <= 0x2FF)) ||
3266 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3267 ((c >= 0x370) && (c <= 0x37D)) ||
3268 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3269 ((c >= 0x200C) && (c <= 0x200D)) ||
3270 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3271 ((c >= 0x2070) && (c <= 0x218F)) ||
3272 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276 ((c >= 0x10000) && (c <= 0xEFFFF))
3277 )) {
3278 if (len <= INT_MAX - l)
3279 len += l;
3280 NEXTL(l);
3281 c = CUR_CHAR(l);
3283 } else {
3284 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3285 (!IS_LETTER(c) && (c != '_') &&
3286 (c != ':'))) {
3287 return(NULL);
3289 len += l;
3290 NEXTL(l);
3291 c = CUR_CHAR(l);
3293 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3294 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3295 (c == '.') || (c == '-') ||
3296 (c == '_') || (c == ':') ||
3297 (IS_COMBINING(c)) ||
3298 (IS_EXTENDER(c)))) {
3299 if (len <= INT_MAX - l)
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
3305 if (ctxt->instate == XML_PARSER_EOF)
3306 return(NULL);
3307 if (len > maxLength) {
3308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309 return(NULL);
3311 if (ctxt->input->cur - ctxt->input->base < len) {
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318 "unexpected change of input buffer");
3319 return (NULL);
3321 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3327 * xmlParseName:
3328 * @ctxt: an XML parser context
3330 * DEPRECATED: Internal function, don't use.
3332 * parse an XML name.
3334 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335 * CombiningChar | Extender
3337 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3339 * [6] Names ::= Name (#x20 Name)*
3341 * Returns the Name parsed or NULL
3344 const xmlChar *
3345 xmlParseName(xmlParserCtxtPtr ctxt) {
3346 const xmlChar *in;
3347 const xmlChar *ret;
3348 size_t count = 0;
3349 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3350 XML_MAX_TEXT_LENGTH :
3351 XML_MAX_NAME_LENGTH;
3353 GROW;
3354 if (ctxt->instate == XML_PARSER_EOF)
3355 return(NULL);
3357 #ifdef DEBUG
3358 nbParseName++;
3359 #endif
3362 * Accelerator for simple ASCII names
3364 in = ctxt->input->cur;
3365 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3366 ((*in >= 0x41) && (*in <= 0x5A)) ||
3367 (*in == '_') || (*in == ':')) {
3368 in++;
3369 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3370 ((*in >= 0x41) && (*in <= 0x5A)) ||
3371 ((*in >= 0x30) && (*in <= 0x39)) ||
3372 (*in == '_') || (*in == '-') ||
3373 (*in == ':') || (*in == '.'))
3374 in++;
3375 if ((*in > 0) && (*in < 0x80)) {
3376 count = in - ctxt->input->cur;
3377 if (count > maxLength) {
3378 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3379 return(NULL);
3381 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3382 ctxt->input->cur = in;
3383 ctxt->input->col += count;
3384 if (ret == NULL)
3385 xmlErrMemory(ctxt, NULL);
3386 return(ret);
3389 /* accelerator for special cases */
3390 return(xmlParseNameComplex(ctxt));
3393 static const xmlChar *
3394 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3395 int len = 0, l;
3396 int c;
3397 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398 XML_MAX_TEXT_LENGTH :
3399 XML_MAX_NAME_LENGTH;
3400 size_t startPosition = 0;
3402 #ifdef DEBUG
3403 nbParseNCNameComplex++;
3404 #endif
3407 * Handler for more complex cases
3409 startPosition = CUR_PTR - BASE_PTR;
3410 c = CUR_CHAR(l);
3411 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3412 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3413 return(NULL);
3416 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3417 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3418 if (len <= INT_MAX - l)
3419 len += l;
3420 NEXTL(l);
3421 c = CUR_CHAR(l);
3423 if (ctxt->instate == XML_PARSER_EOF)
3424 return(NULL);
3425 if (len > maxLength) {
3426 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3427 return(NULL);
3429 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3433 * xmlParseNCName:
3434 * @ctxt: an XML parser context
3435 * @len: length of the string parsed
3437 * parse an XML name.
3439 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440 * CombiningChar | Extender
3442 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3444 * Returns the Name parsed or NULL
3447 static const xmlChar *
3448 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3449 const xmlChar *in, *e;
3450 const xmlChar *ret;
3451 size_t count = 0;
3452 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3453 XML_MAX_TEXT_LENGTH :
3454 XML_MAX_NAME_LENGTH;
3456 #ifdef DEBUG
3457 nbParseNCName++;
3458 #endif
3461 * Accelerator for simple ASCII names
3463 in = ctxt->input->cur;
3464 e = ctxt->input->end;
3465 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3466 ((*in >= 0x41) && (*in <= 0x5A)) ||
3467 (*in == '_')) && (in < e)) {
3468 in++;
3469 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3470 ((*in >= 0x41) && (*in <= 0x5A)) ||
3471 ((*in >= 0x30) && (*in <= 0x39)) ||
3472 (*in == '_') || (*in == '-') ||
3473 (*in == '.')) && (in < e))
3474 in++;
3475 if (in >= e)
3476 goto complex;
3477 if ((*in > 0) && (*in < 0x80)) {
3478 count = in - ctxt->input->cur;
3479 if (count > maxLength) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3483 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3484 ctxt->input->cur = in;
3485 ctxt->input->col += count;
3486 if (ret == NULL) {
3487 xmlErrMemory(ctxt, NULL);
3489 return(ret);
3492 complex:
3493 return(xmlParseNCNameComplex(ctxt));
3497 * xmlParseNameAndCompare:
3498 * @ctxt: an XML parser context
3500 * parse an XML name and compares for match
3501 * (specialized for endtag parsing)
3503 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504 * and the name for mismatch
3507 static const xmlChar *
3508 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3509 register const xmlChar *cmp = other;
3510 register const xmlChar *in;
3511 const xmlChar *ret;
3513 GROW;
3514 if (ctxt->instate == XML_PARSER_EOF)
3515 return(NULL);
3517 in = ctxt->input->cur;
3518 while (*in != 0 && *in == *cmp) {
3519 ++in;
3520 ++cmp;
3522 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523 /* success */
3524 ctxt->input->col += in - ctxt->input->cur;
3525 ctxt->input->cur = in;
3526 return (const xmlChar*) 1;
3528 /* failure (or end of input buffer), check with full function */
3529 ret = xmlParseName (ctxt);
3530 /* strings coming from the dictionary direct compare possible */
3531 if (ret == other) {
3532 return (const xmlChar*) 1;
3534 return ret;
3538 * xmlParseStringName:
3539 * @ctxt: an XML parser context
3540 * @str: a pointer to the string pointer (IN/OUT)
3542 * parse an XML name.
3544 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545 * CombiningChar | Extender
3547 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3549 * [6] Names ::= Name (#x20 Name)*
3551 * Returns the Name parsed or NULL. The @str pointer
3552 * is updated to the current location in the string.
3555 static xmlChar *
3556 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557 xmlChar buf[XML_MAX_NAMELEN + 5];
3558 const xmlChar *cur = *str;
3559 int len = 0, l;
3560 int c;
3561 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3562 XML_MAX_TEXT_LENGTH :
3563 XML_MAX_NAME_LENGTH;
3565 #ifdef DEBUG
3566 nbParseStringName++;
3567 #endif
3569 c = CUR_SCHAR(cur, l);
3570 if (!xmlIsNameStartChar(ctxt, c)) {
3571 return(NULL);
3574 COPY_BUF(l,buf,len,c);
3575 cur += l;
3576 c = CUR_SCHAR(cur, l);
3577 while (xmlIsNameChar(ctxt, c)) {
3578 COPY_BUF(l,buf,len,c);
3579 cur += l;
3580 c = CUR_SCHAR(cur, l);
3581 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583 * Okay someone managed to make a huge name, so he's ready to pay
3584 * for the processing speed.
3586 xmlChar *buffer;
3587 int max = len * 2;
3589 buffer = (xmlChar *) xmlMallocAtomic(max);
3590 if (buffer == NULL) {
3591 xmlErrMemory(ctxt, NULL);
3592 return(NULL);
3594 memcpy(buffer, buf, len);
3595 while (xmlIsNameChar(ctxt, c)) {
3596 if (len + 10 > max) {
3597 xmlChar *tmp;
3599 max *= 2;
3600 tmp = (xmlChar *) xmlRealloc(buffer, max);
3601 if (tmp == NULL) {
3602 xmlErrMemory(ctxt, NULL);
3603 xmlFree(buffer);
3604 return(NULL);
3606 buffer = tmp;
3608 COPY_BUF(l,buffer,len,c);
3609 cur += l;
3610 c = CUR_SCHAR(cur, l);
3611 if (len > maxLength) {
3612 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613 xmlFree(buffer);
3614 return(NULL);
3617 buffer[len] = 0;
3618 *str = cur;
3619 return(buffer);
3622 if (len > maxLength) {
3623 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624 return(NULL);
3626 *str = cur;
3627 return(xmlStrndup(buf, len));
3631 * xmlParseNmtoken:
3632 * @ctxt: an XML parser context
3634 * DEPRECATED: Internal function, don't use.
3636 * parse an XML Nmtoken.
3638 * [7] Nmtoken ::= (NameChar)+
3640 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3642 * Returns the Nmtoken parsed or NULL
3645 xmlChar *
3646 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647 xmlChar buf[XML_MAX_NAMELEN + 5];
3648 int len = 0, l;
3649 int c;
3650 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3651 XML_MAX_TEXT_LENGTH :
3652 XML_MAX_NAME_LENGTH;
3654 #ifdef DEBUG
3655 nbParseNmToken++;
3656 #endif
3658 c = CUR_CHAR(l);
3660 while (xmlIsNameChar(ctxt, c)) {
3661 COPY_BUF(l,buf,len,c);
3662 NEXTL(l);
3663 c = CUR_CHAR(l);
3664 if (len >= XML_MAX_NAMELEN) {
3666 * Okay someone managed to make a huge token, so he's ready to pay
3667 * for the processing speed.
3669 xmlChar *buffer;
3670 int max = len * 2;
3672 buffer = (xmlChar *) xmlMallocAtomic(max);
3673 if (buffer == NULL) {
3674 xmlErrMemory(ctxt, NULL);
3675 return(NULL);
3677 memcpy(buffer, buf, len);
3678 while (xmlIsNameChar(ctxt, c)) {
3679 if (len + 10 > max) {
3680 xmlChar *tmp;
3682 max *= 2;
3683 tmp = (xmlChar *) xmlRealloc(buffer, max);
3684 if (tmp == NULL) {
3685 xmlErrMemory(ctxt, NULL);
3686 xmlFree(buffer);
3687 return(NULL);
3689 buffer = tmp;
3691 COPY_BUF(l,buffer,len,c);
3692 if (len > maxLength) {
3693 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3694 xmlFree(buffer);
3695 return(NULL);
3697 NEXTL(l);
3698 c = CUR_CHAR(l);
3700 buffer[len] = 0;
3701 if (ctxt->instate == XML_PARSER_EOF) {
3702 xmlFree(buffer);
3703 return(NULL);
3705 return(buffer);
3708 if (ctxt->instate == XML_PARSER_EOF)
3709 return(NULL);
3710 if (len == 0)
3711 return(NULL);
3712 if (len > maxLength) {
3713 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714 return(NULL);
3716 return(xmlStrndup(buf, len));
3720 * xmlParseEntityValue:
3721 * @ctxt: an XML parser context
3722 * @orig: if non-NULL store a copy of the original entity value
3724 * DEPRECATED: Internal function, don't use.
3726 * parse a value for ENTITY declarations
3728 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729 * "'" ([^%&'] | PEReference | Reference)* "'"
3731 * Returns the EntityValue parsed with reference substituted or NULL
3734 xmlChar *
3735 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3736 xmlChar *buf = NULL;
3737 int len = 0;
3738 int size = XML_PARSER_BUFFER_SIZE;
3739 int c, l;
3740 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741 XML_MAX_HUGE_LENGTH :
3742 XML_MAX_TEXT_LENGTH;
3743 xmlChar stop;
3744 xmlChar *ret = NULL;
3745 const xmlChar *cur = NULL;
3746 xmlParserInputPtr input;
3748 if (RAW == '"') stop = '"';
3749 else if (RAW == '\'') stop = '\'';
3750 else {
3751 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3752 return(NULL);
3754 buf = (xmlChar *) xmlMallocAtomic(size);
3755 if (buf == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return(NULL);
3761 * The content of the entity definition is copied in a buffer.
3764 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3765 input = ctxt->input;
3766 GROW;
3767 if (ctxt->instate == XML_PARSER_EOF)
3768 goto error;
3769 NEXT;
3770 c = CUR_CHAR(l);
3772 * NOTE: 4.4.5 Included in Literal
3773 * When a parameter entity reference appears in a literal entity
3774 * value, ... a single or double quote character in the replacement
3775 * text is always treated as a normal data character and will not
3776 * terminate the literal.
3777 * In practice it means we stop the loop only when back at parsing
3778 * the initial entity and the quote is found
3780 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3781 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3782 if (len + 5 >= size) {
3783 xmlChar *tmp;
3785 size *= 2;
3786 tmp = (xmlChar *) xmlRealloc(buf, size);
3787 if (tmp == NULL) {
3788 xmlErrMemory(ctxt, NULL);
3789 goto error;
3791 buf = tmp;
3793 COPY_BUF(l,buf,len,c);
3794 NEXTL(l);
3796 GROW;
3797 c = CUR_CHAR(l);
3798 if (c == 0) {
3799 GROW;
3800 c = CUR_CHAR(l);
3803 if (len > maxLength) {
3804 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3805 "entity value too long\n");
3806 goto error;
3809 buf[len] = 0;
3810 if (ctxt->instate == XML_PARSER_EOF)
3811 goto error;
3812 if (c != stop) {
3813 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3814 goto error;
3816 NEXT;
3819 * Raise problem w.r.t. '&' and '%' being used in non-entities
3820 * reference constructs. Note Charref will be handled in
3821 * xmlStringDecodeEntities()
3823 cur = buf;
3824 while (*cur != 0) { /* non input consuming */
3825 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3826 xmlChar *name;
3827 xmlChar tmp = *cur;
3828 int nameOk = 0;
3830 cur++;
3831 name = xmlParseStringName(ctxt, &cur);
3832 if (name != NULL) {
3833 nameOk = 1;
3834 xmlFree(name);
3836 if ((nameOk == 0) || (*cur != ';')) {
3837 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3838 "EntityValue: '%c' forbidden except for entities references\n",
3839 tmp);
3840 goto error;
3842 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3843 (ctxt->inputNr == 1)) {
3844 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3845 goto error;
3847 if (*cur == 0)
3848 break;
3850 cur++;
3854 * Then PEReference entities are substituted.
3856 * NOTE: 4.4.7 Bypassed
3857 * When a general entity reference appears in the EntityValue in
3858 * an entity declaration, it is bypassed and left as is.
3859 * so XML_SUBSTITUTE_REF is not set here.
3861 ++ctxt->depth;
3862 ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3863 0, 0, 0, /* check */ 1);
3864 --ctxt->depth;
3866 if (orig != NULL) {
3867 *orig = buf;
3868 buf = NULL;
3871 error:
3872 if (buf != NULL)
3873 xmlFree(buf);
3874 return(ret);
3878 * xmlParseAttValueComplex:
3879 * @ctxt: an XML parser context
3880 * @len: the resulting attribute len
3881 * @normalize: whether to apply the inner normalization
3883 * parse a value for an attribute, this is the fallback function
3884 * of xmlParseAttValue() when the attribute parsing requires handling
3885 * of non-ASCII characters, or normalization compaction.
3887 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3889 static xmlChar *
3890 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3891 xmlChar limit = 0;
3892 xmlChar *buf = NULL;
3893 xmlChar *rep = NULL;
3894 size_t len = 0;
3895 size_t buf_size = 0;
3896 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3897 XML_MAX_HUGE_LENGTH :
3898 XML_MAX_TEXT_LENGTH;
3899 int c, l, in_space = 0;
3900 xmlChar *current = NULL;
3901 xmlEntityPtr ent;
3903 if (NXT(0) == '"') {
3904 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905 limit = '"';
3906 NEXT;
3907 } else if (NXT(0) == '\'') {
3908 limit = '\'';
3909 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910 NEXT;
3911 } else {
3912 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913 return(NULL);
3917 * allocate a translation buffer.
3919 buf_size = XML_PARSER_BUFFER_SIZE;
3920 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921 if (buf == NULL) goto mem_error;
3924 * OK loop until we reach one of the ending char or a size limit.
3926 c = CUR_CHAR(l);
3927 while (((NXT(0) != limit) && /* checked */
3928 (IS_CHAR(c)) && (c != '<')) &&
3929 (ctxt->instate != XML_PARSER_EOF)) {
3930 if (c == '&') {
3931 in_space = 0;
3932 if (NXT(1) == '#') {
3933 int val = xmlParseCharRef(ctxt);
3935 if (val == '&') {
3936 if (ctxt->replaceEntities) {
3937 if (len + 10 > buf_size) {
3938 growBuffer(buf, 10);
3940 buf[len++] = '&';
3941 } else {
3943 * The reparsing will be done in xmlStringGetNodeList()
3944 * called by the attribute() function in SAX.c
3946 if (len + 10 > buf_size) {
3947 growBuffer(buf, 10);
3949 buf[len++] = '&';
3950 buf[len++] = '#';
3951 buf[len++] = '3';
3952 buf[len++] = '8';
3953 buf[len++] = ';';
3955 } else if (val != 0) {
3956 if (len + 10 > buf_size) {
3957 growBuffer(buf, 10);
3959 len += xmlCopyChar(0, &buf[len], val);
3961 } else {
3962 ent = xmlParseEntityRef(ctxt);
3963 if ((ent != NULL) &&
3964 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3965 if (len + 10 > buf_size) {
3966 growBuffer(buf, 10);
3968 if ((ctxt->replaceEntities == 0) &&
3969 (ent->content[0] == '&')) {
3970 buf[len++] = '&';
3971 buf[len++] = '#';
3972 buf[len++] = '3';
3973 buf[len++] = '8';
3974 buf[len++] = ';';
3975 } else {
3976 buf[len++] = ent->content[0];
3978 } else if ((ent != NULL) &&
3979 (ctxt->replaceEntities != 0)) {
3980 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3981 if (xmlParserEntityCheck(ctxt, ent->length))
3982 goto error;
3984 ++ctxt->depth;
3985 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3986 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3987 /* check */ 1);
3988 --ctxt->depth;
3989 if (rep != NULL) {
3990 current = rep;
3991 while (*current != 0) { /* non input consuming */
3992 if ((*current == 0xD) || (*current == 0xA) ||
3993 (*current == 0x9)) {
3994 buf[len++] = 0x20;
3995 current++;
3996 } else
3997 buf[len++] = *current++;
3998 if (len + 10 > buf_size) {
3999 growBuffer(buf, 10);
4002 xmlFree(rep);
4003 rep = NULL;
4005 } else {
4006 if (len + 10 > buf_size) {
4007 growBuffer(buf, 10);
4009 if (ent->content != NULL)
4010 buf[len++] = ent->content[0];
4012 } else if (ent != NULL) {
4013 int i = xmlStrlen(ent->name);
4014 const xmlChar *cur = ent->name;
4017 * We also check for recursion and amplification
4018 * when entities are not substituted. They're
4019 * often expanded later.
4021 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4022 (ent->content != NULL)) {
4023 if ((ent->flags & XML_ENT_CHECKED) == 0) {
4024 unsigned long oldCopy = ctxt->sizeentcopy;
4026 ctxt->sizeentcopy = ent->length;
4028 ++ctxt->depth;
4029 rep = xmlStringDecodeEntitiesInt(ctxt,
4030 ent->content, ent->length,
4031 XML_SUBSTITUTE_REF, 0, 0, 0,
4032 /* check */ 1);
4033 --ctxt->depth;
4036 * If we're parsing DTD content, the entity
4037 * might reference other entities which
4038 * weren't defined yet, so the check isn't
4039 * reliable.
4041 if (ctxt->inSubset == 0) {
4042 ent->flags |= XML_ENT_CHECKED;
4043 ent->expandedSize = ctxt->sizeentcopy;
4046 if (rep != NULL) {
4047 xmlFree(rep);
4048 rep = NULL;
4049 } else {
4050 ent->content[0] = 0;
4053 if (xmlParserEntityCheck(ctxt, oldCopy))
4054 goto error;
4055 } else {
4056 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4057 goto error;
4062 * Just output the reference
4064 buf[len++] = '&';
4065 while (len + i + 10 > buf_size) {
4066 growBuffer(buf, i + 10);
4068 for (;i > 0;i--)
4069 buf[len++] = *cur++;
4070 buf[len++] = ';';
4073 } else {
4074 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4075 if ((len != 0) || (!normalize)) {
4076 if ((!normalize) || (!in_space)) {
4077 COPY_BUF(l,buf,len,0x20);
4078 while (len + 10 > buf_size) {
4079 growBuffer(buf, 10);
4082 in_space = 1;
4084 } else {
4085 in_space = 0;
4086 COPY_BUF(l,buf,len,c);
4087 if (len + 10 > buf_size) {
4088 growBuffer(buf, 10);
4091 NEXTL(l);
4093 GROW;
4094 c = CUR_CHAR(l);
4095 if (len > maxLength) {
4096 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4097 "AttValue length too long\n");
4098 goto mem_error;
4101 if (ctxt->instate == XML_PARSER_EOF)
4102 goto error;
4104 if ((in_space) && (normalize)) {
4105 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107 buf[len] = 0;
4108 if (RAW == '<') {
4109 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4110 } else if (RAW != limit) {
4111 if ((c != 0) && (!IS_CHAR(c))) {
4112 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4113 "invalid character in attribute value\n");
4114 } else {
4115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116 "AttValue: ' expected\n");
4118 } else
4119 NEXT;
4121 if (attlen != NULL) *attlen = len;
4122 return(buf);
4124 mem_error:
4125 xmlErrMemory(ctxt, NULL);
4126 error:
4127 if (buf != NULL)
4128 xmlFree(buf);
4129 if (rep != NULL)
4130 xmlFree(rep);
4131 return(NULL);
4135 * xmlParseAttValue:
4136 * @ctxt: an XML parser context
4138 * DEPRECATED: Internal function, don't use.
4140 * parse a value for an attribute
4141 * Note: the parser won't do substitution of entities here, this
4142 * will be handled later in xmlStringGetNodeList
4144 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145 * "'" ([^<&'] | Reference)* "'"
4147 * 3.3.3 Attribute-Value Normalization:
4148 * Before the value of an attribute is passed to the application or
4149 * checked for validity, the XML processor must normalize it as follows:
4150 * - a character reference is processed by appending the referenced
4151 * character to the attribute value
4152 * - an entity reference is processed by recursively processing the
4153 * replacement text of the entity
4154 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155 * appending #x20 to the normalized value, except that only a single
4156 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4157 * parsed entity or the literal entity value of an internal parsed entity
4158 * - other characters are processed by appending them to the normalized value
4159 * If the declared value is not CDATA, then the XML processor must further
4160 * process the normalized attribute value by discarding any leading and
4161 * trailing space (#x20) characters, and by replacing sequences of space
4162 * (#x20) characters by a single space (#x20) character.
4163 * All attributes for which no declaration has been read should be treated
4164 * by a non-validating parser as if declared CDATA.
4166 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4170 xmlChar *
4171 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4172 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4173 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4177 * xmlParseSystemLiteral:
4178 * @ctxt: an XML parser context
4180 * DEPRECATED: Internal function, don't use.
4182 * parse an XML Literal
4184 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4186 * Returns the SystemLiteral parsed or NULL
4189 xmlChar *
4190 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4191 xmlChar *buf = NULL;
4192 int len = 0;
4193 int size = XML_PARSER_BUFFER_SIZE;
4194 int cur, l;
4195 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4196 XML_MAX_TEXT_LENGTH :
4197 XML_MAX_NAME_LENGTH;
4198 xmlChar stop;
4199 int state = ctxt->instate;
4201 if (RAW == '"') {
4202 NEXT;
4203 stop = '"';
4204 } else if (RAW == '\'') {
4205 NEXT;
4206 stop = '\'';
4207 } else {
4208 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4209 return(NULL);
4212 buf = (xmlChar *) xmlMallocAtomic(size);
4213 if (buf == NULL) {
4214 xmlErrMemory(ctxt, NULL);
4215 return(NULL);
4217 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4218 cur = CUR_CHAR(l);
4219 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4220 if (len + 5 >= size) {
4221 xmlChar *tmp;
4223 size *= 2;
4224 tmp = (xmlChar *) xmlRealloc(buf, size);
4225 if (tmp == NULL) {
4226 xmlFree(buf);
4227 xmlErrMemory(ctxt, NULL);
4228 ctxt->instate = (xmlParserInputState) state;
4229 return(NULL);
4231 buf = tmp;
4233 COPY_BUF(l,buf,len,cur);
4234 if (len > maxLength) {
4235 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236 xmlFree(buf);
4237 ctxt->instate = (xmlParserInputState) state;
4238 return(NULL);
4240 NEXTL(l);
4241 cur = CUR_CHAR(l);
4243 buf[len] = 0;
4244 if (ctxt->instate == XML_PARSER_EOF) {
4245 xmlFree(buf);
4246 return(NULL);
4248 ctxt->instate = (xmlParserInputState) state;
4249 if (!IS_CHAR(cur)) {
4250 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4251 } else {
4252 NEXT;
4254 return(buf);
4258 * xmlParsePubidLiteral:
4259 * @ctxt: an XML parser context
4261 * DEPRECATED: Internal function, don't use.
4263 * parse an XML public literal
4265 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4267 * Returns the PubidLiteral parsed or NULL.
4270 xmlChar *
4271 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4272 xmlChar *buf = NULL;
4273 int len = 0;
4274 int size = XML_PARSER_BUFFER_SIZE;
4275 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4276 XML_MAX_TEXT_LENGTH :
4277 XML_MAX_NAME_LENGTH;
4278 xmlChar cur;
4279 xmlChar stop;
4280 xmlParserInputState oldstate = ctxt->instate;
4282 if (RAW == '"') {
4283 NEXT;
4284 stop = '"';
4285 } else if (RAW == '\'') {
4286 NEXT;
4287 stop = '\'';
4288 } else {
4289 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4290 return(NULL);
4292 buf = (xmlChar *) xmlMallocAtomic(size);
4293 if (buf == NULL) {
4294 xmlErrMemory(ctxt, NULL);
4295 return(NULL);
4297 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4298 cur = CUR;
4299 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4300 if (len + 1 >= size) {
4301 xmlChar *tmp;
4303 size *= 2;
4304 tmp = (xmlChar *) xmlRealloc(buf, size);
4305 if (tmp == NULL) {
4306 xmlErrMemory(ctxt, NULL);
4307 xmlFree(buf);
4308 return(NULL);
4310 buf = tmp;
4312 buf[len++] = cur;
4313 if (len > maxLength) {
4314 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315 xmlFree(buf);
4316 return(NULL);
4318 NEXT;
4319 cur = CUR;
4321 buf[len] = 0;
4322 if (ctxt->instate == XML_PARSER_EOF) {
4323 xmlFree(buf);
4324 return(NULL);
4326 if (cur != stop) {
4327 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4328 } else {
4329 NEXTL(1);
4331 ctxt->instate = oldstate;
4332 return(buf);
4335 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4338 * used for the test in the inner loop of the char data testing
4340 static const unsigned char test_char_data[256] = {
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4376 * xmlParseCharDataInternal:
4377 * @ctxt: an XML parser context
4378 * @partial: buffer may contain partial UTF-8 sequences
4380 * Parse character data. Always makes progress if the first char isn't
4381 * '<' or '&'.
4383 * The right angle bracket (>) may be represented using the string "&gt;",
4384 * and must, for compatibility, be escaped using "&gt;" or a character
4385 * reference when it appears in the string "]]>" in content, when that
4386 * string is not marking the end of a CDATA section.
4388 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4390 static void
4391 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4392 const xmlChar *in;
4393 int nbchar = 0;
4394 int line = ctxt->input->line;
4395 int col = ctxt->input->col;
4396 int ccol;
4398 GROW;
4400 * Accelerated common case where input don't need to be
4401 * modified before passing it to the handler.
4403 in = ctxt->input->cur;
4404 do {
4405 get_more_space:
4406 while (*in == 0x20) { in++; ctxt->input->col++; }
4407 if (*in == 0xA) {
4408 do {
4409 ctxt->input->line++; ctxt->input->col = 1;
4410 in++;
4411 } while (*in == 0xA);
4412 goto get_more_space;
4414 if (*in == '<') {
4415 nbchar = in - ctxt->input->cur;
4416 if (nbchar > 0) {
4417 const xmlChar *tmp = ctxt->input->cur;
4418 ctxt->input->cur = in;
4420 if ((ctxt->sax != NULL) &&
4421 (ctxt->sax->ignorableWhitespace !=
4422 ctxt->sax->characters)) {
4423 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424 if (ctxt->sax->ignorableWhitespace != NULL)
4425 ctxt->sax->ignorableWhitespace(ctxt->userData,
4426 tmp, nbchar);
4427 } else {
4428 if (ctxt->sax->characters != NULL)
4429 ctxt->sax->characters(ctxt->userData,
4430 tmp, nbchar);
4431 if (*ctxt->space == -1)
4432 *ctxt->space = -2;
4434 } else if ((ctxt->sax != NULL) &&
4435 (ctxt->sax->characters != NULL)) {
4436 ctxt->sax->characters(ctxt->userData,
4437 tmp, nbchar);
4440 return;
4443 get_more:
4444 ccol = ctxt->input->col;
4445 while (test_char_data[*in]) {
4446 in++;
4447 ccol++;
4449 ctxt->input->col = ccol;
4450 if (*in == 0xA) {
4451 do {
4452 ctxt->input->line++; ctxt->input->col = 1;
4453 in++;
4454 } while (*in == 0xA);
4455 goto get_more;
4457 if (*in == ']') {
4458 if ((in[1] == ']') && (in[2] == '>')) {
4459 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460 if (ctxt->instate != XML_PARSER_EOF)
4461 ctxt->input->cur = in + 1;
4462 return;
4464 in++;
4465 ctxt->input->col++;
4466 goto get_more;
4468 nbchar = in - ctxt->input->cur;
4469 if (nbchar > 0) {
4470 if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->ignorableWhitespace !=
4472 ctxt->sax->characters) &&
4473 (IS_BLANK_CH(*ctxt->input->cur))) {
4474 const xmlChar *tmp = ctxt->input->cur;
4475 ctxt->input->cur = in;
4477 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4478 if (ctxt->sax->ignorableWhitespace != NULL)
4479 ctxt->sax->ignorableWhitespace(ctxt->userData,
4480 tmp, nbchar);
4481 } else {
4482 if (ctxt->sax->characters != NULL)
4483 ctxt->sax->characters(ctxt->userData,
4484 tmp, nbchar);
4485 if (*ctxt->space == -1)
4486 *ctxt->space = -2;
4488 line = ctxt->input->line;
4489 col = ctxt->input->col;
4490 } else if (ctxt->sax != NULL) {
4491 if (ctxt->sax->characters != NULL)
4492 ctxt->sax->characters(ctxt->userData,
4493 ctxt->input->cur, nbchar);
4494 line = ctxt->input->line;
4495 col = ctxt->input->col;
4497 if (ctxt->instate == XML_PARSER_EOF)
4498 return;
4500 ctxt->input->cur = in;
4501 if (*in == 0xD) {
4502 in++;
4503 if (*in == 0xA) {
4504 ctxt->input->cur = in;
4505 in++;
4506 ctxt->input->line++; ctxt->input->col = 1;
4507 continue; /* while */
4509 in--;
4511 if (*in == '<') {
4512 return;
4514 if (*in == '&') {
4515 return;
4517 SHRINK;
4518 GROW;
4519 if (ctxt->instate == XML_PARSER_EOF)
4520 return;
4521 in = ctxt->input->cur;
4522 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4523 (*in == 0x09) || (*in == 0x0a));
4524 ctxt->input->line = line;
4525 ctxt->input->col = col;
4526 xmlParseCharDataComplex(ctxt, partial);
4530 * xmlParseCharDataComplex:
4531 * @ctxt: an XML parser context
4532 * @cdata: int indicating whether we are within a CDATA section
4534 * Always makes progress if the first char isn't '<' or '&'.
4536 * parse a CharData section.this is the fallback function
4537 * of xmlParseCharData() when the parsing requires handling
4538 * of non-ASCII characters.
4540 static void
4541 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4542 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4543 int nbchar = 0;
4544 int cur, l;
4546 cur = CUR_CHAR(l);
4547 while ((cur != '<') && /* checked */
4548 (cur != '&') &&
4549 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4550 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4551 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4553 COPY_BUF(l,buf,nbchar,cur);
4554 /* move current position before possible calling of ctxt->sax->characters */
4555 NEXTL(l);
4556 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4557 buf[nbchar] = 0;
4560 * OK the segment is to be consumed as chars.
4562 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4563 if (areBlanks(ctxt, buf, nbchar, 0)) {
4564 if (ctxt->sax->ignorableWhitespace != NULL)
4565 ctxt->sax->ignorableWhitespace(ctxt->userData,
4566 buf, nbchar);
4567 } else {
4568 if (ctxt->sax->characters != NULL)
4569 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4570 if ((ctxt->sax->characters !=
4571 ctxt->sax->ignorableWhitespace) &&
4572 (*ctxt->space == -1))
4573 *ctxt->space = -2;
4576 nbchar = 0;
4577 /* something really bad happened in the SAX callback */
4578 if (ctxt->instate != XML_PARSER_CONTENT)
4579 return;
4580 SHRINK;
4582 cur = CUR_CHAR(l);
4584 if (ctxt->instate == XML_PARSER_EOF)
4585 return;
4586 if (nbchar != 0) {
4587 buf[nbchar] = 0;
4589 * OK the segment is to be consumed as chars.
4591 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4592 if (areBlanks(ctxt, buf, nbchar, 0)) {
4593 if (ctxt->sax->ignorableWhitespace != NULL)
4594 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4595 } else {
4596 if (ctxt->sax->characters != NULL)
4597 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4598 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4599 (*ctxt->space == -1))
4600 *ctxt->space = -2;
4605 * cur == 0 can mean
4607 * - XML_PARSER_EOF or memory error. This is checked above.
4608 * - An actual 0 character.
4609 * - End of buffer.
4610 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4612 if (ctxt->input->cur < ctxt->input->end) {
4613 if ((cur == 0) && (CUR != 0)) {
4614 if (partial == 0) {
4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4617 NEXTL(1);
4619 } else if ((cur != '<') && (cur != '&')) {
4620 /* Generate the error and skip the offending character */
4621 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4622 "PCDATA invalid Char value %d\n", cur);
4623 NEXTL(l);
4629 * xmlParseCharData:
4630 * @ctxt: an XML parser context
4631 * @cdata: unused
4633 * DEPRECATED: Internal function, don't use.
4635 void
4636 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4637 xmlParseCharDataInternal(ctxt, 0);
4641 * xmlParseExternalID:
4642 * @ctxt: an XML parser context
4643 * @publicID: a xmlChar** receiving PubidLiteral
4644 * @strict: indicate whether we should restrict parsing to only
4645 * production [75], see NOTE below
4647 * DEPRECATED: Internal function, don't use.
4649 * Parse an External ID or a Public ID
4651 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4652 * 'PUBLIC' S PubidLiteral S SystemLiteral
4654 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4655 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4657 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4659 * Returns the function returns SystemLiteral and in the second
4660 * case publicID receives PubidLiteral, is strict is off
4661 * it is possible to return NULL and have publicID set.
4664 xmlChar *
4665 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4666 xmlChar *URI = NULL;
4668 *publicID = NULL;
4669 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4670 SKIP(6);
4671 if (SKIP_BLANKS == 0) {
4672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4673 "Space required after 'SYSTEM'\n");
4675 URI = xmlParseSystemLiteral(ctxt);
4676 if (URI == NULL) {
4677 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4679 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4680 SKIP(6);
4681 if (SKIP_BLANKS == 0) {
4682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4683 "Space required after 'PUBLIC'\n");
4685 *publicID = xmlParsePubidLiteral(ctxt);
4686 if (*publicID == NULL) {
4687 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4689 if (strict) {
4691 * We don't handle [83] so "S SystemLiteral" is required.
4693 if (SKIP_BLANKS == 0) {
4694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4695 "Space required after the Public Identifier\n");
4697 } else {
4699 * We handle [83] so we return immediately, if
4700 * "S SystemLiteral" is not detected. We skip blanks if no
4701 * system literal was found, but this is harmless since we must
4702 * be at the end of a NotationDecl.
4704 if (SKIP_BLANKS == 0) return(NULL);
4705 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4707 URI = xmlParseSystemLiteral(ctxt);
4708 if (URI == NULL) {
4709 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4712 return(URI);
4716 * xmlParseCommentComplex:
4717 * @ctxt: an XML parser context
4718 * @buf: the already parsed part of the buffer
4719 * @len: number of bytes in the buffer
4720 * @size: allocated size of the buffer
4722 * Skip an XML (SGML) comment <!-- .... -->
4723 * The spec says that "For compatibility, the string "--" (double-hyphen)
4724 * must not occur within comments. "
4725 * This is the slow routine in case the accelerator for ascii didn't work
4727 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4729 static void
4730 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4731 size_t len, size_t size) {
4732 int q, ql;
4733 int r, rl;
4734 int cur, l;
4735 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4736 XML_MAX_HUGE_LENGTH :
4737 XML_MAX_TEXT_LENGTH;
4738 int inputid;
4740 inputid = ctxt->input->id;
4742 if (buf == NULL) {
4743 len = 0;
4744 size = XML_PARSER_BUFFER_SIZE;
4745 buf = (xmlChar *) xmlMallocAtomic(size);
4746 if (buf == NULL) {
4747 xmlErrMemory(ctxt, NULL);
4748 return;
4751 q = CUR_CHAR(ql);
4752 if (q == 0)
4753 goto not_terminated;
4754 if (!IS_CHAR(q)) {
4755 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4756 "xmlParseComment: invalid xmlChar value %d\n",
4758 xmlFree (buf);
4759 return;
4761 NEXTL(ql);
4762 r = CUR_CHAR(rl);
4763 if (r == 0)
4764 goto not_terminated;
4765 if (!IS_CHAR(r)) {
4766 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4767 "xmlParseComment: invalid xmlChar value %d\n",
4769 xmlFree (buf);
4770 return;
4772 NEXTL(rl);
4773 cur = CUR_CHAR(l);
4774 if (cur == 0)
4775 goto not_terminated;
4776 while (IS_CHAR(cur) && /* checked */
4777 ((cur != '>') ||
4778 (r != '-') || (q != '-'))) {
4779 if ((r == '-') && (q == '-')) {
4780 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4782 if (len + 5 >= size) {
4783 xmlChar *new_buf;
4784 size_t new_size;
4786 new_size = size * 2;
4787 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4788 if (new_buf == NULL) {
4789 xmlFree (buf);
4790 xmlErrMemory(ctxt, NULL);
4791 return;
4793 buf = new_buf;
4794 size = new_size;
4796 COPY_BUF(ql,buf,len,q);
4797 if (len > maxLength) {
4798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4799 "Comment too big found", NULL);
4800 xmlFree (buf);
4801 return;
4804 q = r;
4805 ql = rl;
4806 r = cur;
4807 rl = l;
4809 NEXTL(l);
4810 cur = CUR_CHAR(l);
4813 buf[len] = 0;
4814 if (ctxt->instate == XML_PARSER_EOF) {
4815 xmlFree(buf);
4816 return;
4818 if (cur == 0) {
4819 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4820 "Comment not terminated \n<!--%.50s\n", buf);
4821 } else if (!IS_CHAR(cur)) {
4822 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4823 "xmlParseComment: invalid xmlChar value %d\n",
4824 cur);
4825 } else {
4826 if (inputid != ctxt->input->id) {
4827 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4828 "Comment doesn't start and stop in the same"
4829 " entity\n");
4831 NEXT;
4832 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4833 (!ctxt->disableSAX))
4834 ctxt->sax->comment(ctxt->userData, buf);
4836 xmlFree(buf);
4837 return;
4838 not_terminated:
4839 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4840 "Comment not terminated\n", NULL);
4841 xmlFree(buf);
4842 return;
4846 * xmlParseComment:
4847 * @ctxt: an XML parser context
4849 * DEPRECATED: Internal function, don't use.
4851 * Parse an XML (SGML) comment. Always consumes '<!'.
4853 * The spec says that "For compatibility, the string "--" (double-hyphen)
4854 * must not occur within comments. "
4856 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4858 void
4859 xmlParseComment(xmlParserCtxtPtr ctxt) {
4860 xmlChar *buf = NULL;
4861 size_t size = XML_PARSER_BUFFER_SIZE;
4862 size_t len = 0;
4863 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4864 XML_MAX_HUGE_LENGTH :
4865 XML_MAX_TEXT_LENGTH;
4866 xmlParserInputState state;
4867 const xmlChar *in;
4868 size_t nbchar = 0;
4869 int ccol;
4870 int inputid;
4873 * Check that there is a comment right here.
4875 if ((RAW != '<') || (NXT(1) != '!'))
4876 return;
4877 SKIP(2);
4878 if ((RAW != '-') || (NXT(1) != '-'))
4879 return;
4880 state = ctxt->instate;
4881 ctxt->instate = XML_PARSER_COMMENT;
4882 inputid = ctxt->input->id;
4883 SKIP(2);
4884 GROW;
4887 * Accelerated common case where input don't need to be
4888 * modified before passing it to the handler.
4890 in = ctxt->input->cur;
4891 do {
4892 if (*in == 0xA) {
4893 do {
4894 ctxt->input->line++; ctxt->input->col = 1;
4895 in++;
4896 } while (*in == 0xA);
4898 get_more:
4899 ccol = ctxt->input->col;
4900 while (((*in > '-') && (*in <= 0x7F)) ||
4901 ((*in >= 0x20) && (*in < '-')) ||
4902 (*in == 0x09)) {
4903 in++;
4904 ccol++;
4906 ctxt->input->col = ccol;
4907 if (*in == 0xA) {
4908 do {
4909 ctxt->input->line++; ctxt->input->col = 1;
4910 in++;
4911 } while (*in == 0xA);
4912 goto get_more;
4914 nbchar = in - ctxt->input->cur;
4916 * save current set of data
4918 if (nbchar > 0) {
4919 if ((ctxt->sax != NULL) &&
4920 (ctxt->sax->comment != NULL)) {
4921 if (buf == NULL) {
4922 if ((*in == '-') && (in[1] == '-'))
4923 size = nbchar + 1;
4924 else
4925 size = XML_PARSER_BUFFER_SIZE + nbchar;
4926 buf = (xmlChar *) xmlMallocAtomic(size);
4927 if (buf == NULL) {
4928 xmlErrMemory(ctxt, NULL);
4929 ctxt->instate = state;
4930 return;
4932 len = 0;
4933 } else if (len + nbchar + 1 >= size) {
4934 xmlChar *new_buf;
4935 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4936 new_buf = (xmlChar *) xmlRealloc(buf, size);
4937 if (new_buf == NULL) {
4938 xmlFree (buf);
4939 xmlErrMemory(ctxt, NULL);
4940 ctxt->instate = state;
4941 return;
4943 buf = new_buf;
4945 memcpy(&buf[len], ctxt->input->cur, nbchar);
4946 len += nbchar;
4947 buf[len] = 0;
4950 if (len > maxLength) {
4951 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4952 "Comment too big found", NULL);
4953 xmlFree (buf);
4954 return;
4956 ctxt->input->cur = in;
4957 if (*in == 0xA) {
4958 in++;
4959 ctxt->input->line++; ctxt->input->col = 1;
4961 if (*in == 0xD) {
4962 in++;
4963 if (*in == 0xA) {
4964 ctxt->input->cur = in;
4965 in++;
4966 ctxt->input->line++; ctxt->input->col = 1;
4967 goto get_more;
4969 in--;
4971 SHRINK;
4972 GROW;
4973 if (ctxt->instate == XML_PARSER_EOF) {
4974 xmlFree(buf);
4975 return;
4977 in = ctxt->input->cur;
4978 if (*in == '-') {
4979 if (in[1] == '-') {
4980 if (in[2] == '>') {
4981 if (ctxt->input->id != inputid) {
4982 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4983 "comment doesn't start and stop in the"
4984 " same entity\n");
4986 SKIP(3);
4987 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4988 (!ctxt->disableSAX)) {
4989 if (buf != NULL)
4990 ctxt->sax->comment(ctxt->userData, buf);
4991 else
4992 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4994 if (buf != NULL)
4995 xmlFree(buf);
4996 if (ctxt->instate != XML_PARSER_EOF)
4997 ctxt->instate = state;
4998 return;
5000 if (buf != NULL) {
5001 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5002 "Double hyphen within comment: "
5003 "<!--%.50s\n",
5004 buf);
5005 } else
5006 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5007 "Double hyphen within comment\n", NULL);
5008 if (ctxt->instate == XML_PARSER_EOF) {
5009 xmlFree(buf);
5010 return;
5012 in++;
5013 ctxt->input->col++;
5015 in++;
5016 ctxt->input->col++;
5017 goto get_more;
5019 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5020 xmlParseCommentComplex(ctxt, buf, len, size);
5021 ctxt->instate = state;
5022 return;
5027 * xmlParsePITarget:
5028 * @ctxt: an XML parser context
5030 * DEPRECATED: Internal function, don't use.
5032 * parse the name of a PI
5034 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5036 * Returns the PITarget name or NULL
5039 const xmlChar *
5040 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5041 const xmlChar *name;
5043 name = xmlParseName(ctxt);
5044 if ((name != NULL) &&
5045 ((name[0] == 'x') || (name[0] == 'X')) &&
5046 ((name[1] == 'm') || (name[1] == 'M')) &&
5047 ((name[2] == 'l') || (name[2] == 'L'))) {
5048 int i;
5049 if ((name[0] == 'x') && (name[1] == 'm') &&
5050 (name[2] == 'l') && (name[3] == 0)) {
5051 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5052 "XML declaration allowed only at the start of the document\n");
5053 return(name);
5054 } else if (name[3] == 0) {
5055 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5056 return(name);
5058 for (i = 0;;i++) {
5059 if (xmlW3CPIs[i] == NULL) break;
5060 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5061 return(name);
5063 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5064 "xmlParsePITarget: invalid name prefix 'xml'\n",
5065 NULL, NULL);
5067 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5068 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5069 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5071 return(name);
5074 #ifdef LIBXML_CATALOG_ENABLED
5076 * xmlParseCatalogPI:
5077 * @ctxt: an XML parser context
5078 * @catalog: the PI value string
5080 * parse an XML Catalog Processing Instruction.
5082 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5084 * Occurs only if allowed by the user and if happening in the Misc
5085 * part of the document before any doctype information
5086 * This will add the given catalog to the parsing context in order
5087 * to be used if there is a resolution need further down in the document
5090 static void
5091 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5092 xmlChar *URL = NULL;
5093 const xmlChar *tmp, *base;
5094 xmlChar marker;
5096 tmp = catalog;
5097 while (IS_BLANK_CH(*tmp)) tmp++;
5098 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5099 goto error;
5100 tmp += 7;
5101 while (IS_BLANK_CH(*tmp)) tmp++;
5102 if (*tmp != '=') {
5103 return;
5105 tmp++;
5106 while (IS_BLANK_CH(*tmp)) tmp++;
5107 marker = *tmp;
5108 if ((marker != '\'') && (marker != '"'))
5109 goto error;
5110 tmp++;
5111 base = tmp;
5112 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5113 if (*tmp == 0)
5114 goto error;
5115 URL = xmlStrndup(base, tmp - base);
5116 tmp++;
5117 while (IS_BLANK_CH(*tmp)) tmp++;
5118 if (*tmp != 0)
5119 goto error;
5121 if (URL != NULL) {
5122 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5123 xmlFree(URL);
5125 return;
5127 error:
5128 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5129 "Catalog PI syntax error: %s\n",
5130 catalog, NULL);
5131 if (URL != NULL)
5132 xmlFree(URL);
5134 #endif
5137 * xmlParsePI:
5138 * @ctxt: an XML parser context
5140 * DEPRECATED: Internal function, don't use.
5142 * parse an XML Processing Instruction.
5144 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5146 * The processing is transferred to SAX once parsed.
5149 void
5150 xmlParsePI(xmlParserCtxtPtr ctxt) {
5151 xmlChar *buf = NULL;
5152 size_t len = 0;
5153 size_t size = XML_PARSER_BUFFER_SIZE;
5154 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5155 XML_MAX_HUGE_LENGTH :
5156 XML_MAX_TEXT_LENGTH;
5157 int cur, l;
5158 const xmlChar *target;
5159 xmlParserInputState state;
5161 if ((RAW == '<') && (NXT(1) == '?')) {
5162 int inputid = ctxt->input->id;
5163 state = ctxt->instate;
5164 ctxt->instate = XML_PARSER_PI;
5166 * this is a Processing Instruction.
5168 SKIP(2);
5171 * Parse the target name and check for special support like
5172 * namespace.
5174 target = xmlParsePITarget(ctxt);
5175 if (target != NULL) {
5176 if ((RAW == '?') && (NXT(1) == '>')) {
5177 if (inputid != ctxt->input->id) {
5178 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5179 "PI declaration doesn't start and stop in"
5180 " the same entity\n");
5182 SKIP(2);
5185 * SAX: PI detected.
5187 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5188 (ctxt->sax->processingInstruction != NULL))
5189 ctxt->sax->processingInstruction(ctxt->userData,
5190 target, NULL);
5191 if (ctxt->instate != XML_PARSER_EOF)
5192 ctxt->instate = state;
5193 return;
5195 buf = (xmlChar *) xmlMallocAtomic(size);
5196 if (buf == NULL) {
5197 xmlErrMemory(ctxt, NULL);
5198 ctxt->instate = state;
5199 return;
5201 if (SKIP_BLANKS == 0) {
5202 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5203 "ParsePI: PI %s space expected\n", target);
5205 cur = CUR_CHAR(l);
5206 while (IS_CHAR(cur) && /* checked */
5207 ((cur != '?') || (NXT(1) != '>'))) {
5208 if (len + 5 >= size) {
5209 xmlChar *tmp;
5210 size_t new_size = size * 2;
5211 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5212 if (tmp == NULL) {
5213 xmlErrMemory(ctxt, NULL);
5214 xmlFree(buf);
5215 ctxt->instate = state;
5216 return;
5218 buf = tmp;
5219 size = new_size;
5221 COPY_BUF(l,buf,len,cur);
5222 if (len > maxLength) {
5223 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5224 "PI %s too big found", target);
5225 xmlFree(buf);
5226 ctxt->instate = state;
5227 return;
5229 NEXTL(l);
5230 cur = CUR_CHAR(l);
5232 buf[len] = 0;
5233 if (ctxt->instate == XML_PARSER_EOF) {
5234 xmlFree(buf);
5235 return;
5237 if (cur != '?') {
5238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239 "ParsePI: PI %s never end ...\n", target);
5240 } else {
5241 if (inputid != ctxt->input->id) {
5242 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5243 "PI declaration doesn't start and stop in"
5244 " the same entity\n");
5246 SKIP(2);
5248 #ifdef LIBXML_CATALOG_ENABLED
5249 if (((state == XML_PARSER_MISC) ||
5250 (state == XML_PARSER_START)) &&
5251 (xmlStrEqual(target, XML_CATALOG_PI))) {
5252 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5253 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5254 (allow == XML_CATA_ALLOW_ALL))
5255 xmlParseCatalogPI(ctxt, buf);
5257 #endif
5261 * SAX: PI detected.
5263 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5264 (ctxt->sax->processingInstruction != NULL))
5265 ctxt->sax->processingInstruction(ctxt->userData,
5266 target, buf);
5268 xmlFree(buf);
5269 } else {
5270 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5272 if (ctxt->instate != XML_PARSER_EOF)
5273 ctxt->instate = state;
5278 * xmlParseNotationDecl:
5279 * @ctxt: an XML parser context
5281 * DEPRECATED: Internal function, don't use.
5283 * Parse a notation declaration. Always consumes '<!'.
5285 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5287 * Hence there is actually 3 choices:
5288 * 'PUBLIC' S PubidLiteral
5289 * 'PUBLIC' S PubidLiteral S SystemLiteral
5290 * and 'SYSTEM' S SystemLiteral
5292 * See the NOTE on xmlParseExternalID().
5295 void
5296 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5297 const xmlChar *name;
5298 xmlChar *Pubid;
5299 xmlChar *Systemid;
5301 if ((CUR != '<') || (NXT(1) != '!'))
5302 return;
5303 SKIP(2);
5305 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5306 int inputid = ctxt->input->id;
5307 SKIP(8);
5308 if (SKIP_BLANKS == 0) {
5309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5310 "Space required after '<!NOTATION'\n");
5311 return;
5314 name = xmlParseName(ctxt);
5315 if (name == NULL) {
5316 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5317 return;
5319 if (xmlStrchr(name, ':') != NULL) {
5320 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5321 "colons are forbidden from notation names '%s'\n",
5322 name, NULL, NULL);
5324 if (SKIP_BLANKS == 0) {
5325 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5326 "Space required after the NOTATION name'\n");
5327 return;
5331 * Parse the IDs.
5333 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5334 SKIP_BLANKS;
5336 if (RAW == '>') {
5337 if (inputid != ctxt->input->id) {
5338 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339 "Notation declaration doesn't start and stop"
5340 " in the same entity\n");
5342 NEXT;
5343 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5344 (ctxt->sax->notationDecl != NULL))
5345 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5346 } else {
5347 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5349 if (Systemid != NULL) xmlFree(Systemid);
5350 if (Pubid != NULL) xmlFree(Pubid);
5355 * xmlParseEntityDecl:
5356 * @ctxt: an XML parser context
5358 * DEPRECATED: Internal function, don't use.
5360 * Parse an entity declaration. Always consumes '<!'.
5362 * [70] EntityDecl ::= GEDecl | PEDecl
5364 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5366 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5368 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5370 * [74] PEDef ::= EntityValue | ExternalID
5372 * [76] NDataDecl ::= S 'NDATA' S Name
5374 * [ VC: Notation Declared ]
5375 * The Name must match the declared name of a notation.
5378 void
5379 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5380 const xmlChar *name = NULL;
5381 xmlChar *value = NULL;
5382 xmlChar *URI = NULL, *literal = NULL;
5383 const xmlChar *ndata = NULL;
5384 int isParameter = 0;
5385 xmlChar *orig = NULL;
5387 if ((CUR != '<') || (NXT(1) != '!'))
5388 return;
5389 SKIP(2);
5391 /* GROW; done in the caller */
5392 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5393 int inputid = ctxt->input->id;
5394 SKIP(6);
5395 if (SKIP_BLANKS == 0) {
5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 "Space required after '<!ENTITY'\n");
5400 if (RAW == '%') {
5401 NEXT;
5402 if (SKIP_BLANKS == 0) {
5403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5404 "Space required after '%%'\n");
5406 isParameter = 1;
5409 name = xmlParseName(ctxt);
5410 if (name == NULL) {
5411 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5412 "xmlParseEntityDecl: no name\n");
5413 return;
5415 if (xmlStrchr(name, ':') != NULL) {
5416 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5417 "colons are forbidden from entities names '%s'\n",
5418 name, NULL, NULL);
5420 if (SKIP_BLANKS == 0) {
5421 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5422 "Space required after the entity name\n");
5425 ctxt->instate = XML_PARSER_ENTITY_DECL;
5427 * handle the various case of definitions...
5429 if (isParameter) {
5430 if ((RAW == '"') || (RAW == '\'')) {
5431 value = xmlParseEntityValue(ctxt, &orig);
5432 if (value) {
5433 if ((ctxt->sax != NULL) &&
5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435 ctxt->sax->entityDecl(ctxt->userData, name,
5436 XML_INTERNAL_PARAMETER_ENTITY,
5437 NULL, NULL, value);
5439 } else {
5440 URI = xmlParseExternalID(ctxt, &literal, 1);
5441 if ((URI == NULL) && (literal == NULL)) {
5442 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5444 if (URI) {
5445 xmlURIPtr uri;
5447 uri = xmlParseURI((const char *) URI);
5448 if (uri == NULL) {
5449 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5450 "Invalid URI: %s\n", URI);
5452 * This really ought to be a well formedness error
5453 * but the XML Core WG decided otherwise c.f. issue
5454 * E26 of the XML erratas.
5456 } else {
5457 if (uri->fragment != NULL) {
5459 * Okay this is foolish to block those but not
5460 * invalid URIs.
5462 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5463 } else {
5464 if ((ctxt->sax != NULL) &&
5465 (!ctxt->disableSAX) &&
5466 (ctxt->sax->entityDecl != NULL))
5467 ctxt->sax->entityDecl(ctxt->userData, name,
5468 XML_EXTERNAL_PARAMETER_ENTITY,
5469 literal, URI, NULL);
5471 xmlFreeURI(uri);
5475 } else {
5476 if ((RAW == '"') || (RAW == '\'')) {
5477 value = xmlParseEntityValue(ctxt, &orig);
5478 if ((ctxt->sax != NULL) &&
5479 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5480 ctxt->sax->entityDecl(ctxt->userData, name,
5481 XML_INTERNAL_GENERAL_ENTITY,
5482 NULL, NULL, value);
5484 * For expat compatibility in SAX mode.
5486 if ((ctxt->myDoc == NULL) ||
5487 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5488 if (ctxt->myDoc == NULL) {
5489 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5490 if (ctxt->myDoc == NULL) {
5491 xmlErrMemory(ctxt, "New Doc failed");
5492 goto done;
5494 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5496 if (ctxt->myDoc->intSubset == NULL)
5497 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5498 BAD_CAST "fake", NULL, NULL);
5500 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5501 NULL, NULL, value);
5503 } else {
5504 URI = xmlParseExternalID(ctxt, &literal, 1);
5505 if ((URI == NULL) && (literal == NULL)) {
5506 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5508 if (URI) {
5509 xmlURIPtr uri;
5511 uri = xmlParseURI((const char *)URI);
5512 if (uri == NULL) {
5513 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5514 "Invalid URI: %s\n", URI);
5516 * This really ought to be a well formedness error
5517 * but the XML Core WG decided otherwise c.f. issue
5518 * E26 of the XML erratas.
5520 } else {
5521 if (uri->fragment != NULL) {
5523 * Okay this is foolish to block those but not
5524 * invalid URIs.
5526 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5528 xmlFreeURI(uri);
5531 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5533 "Space required before 'NDATA'\n");
5535 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5536 SKIP(5);
5537 if (SKIP_BLANKS == 0) {
5538 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5539 "Space required after 'NDATA'\n");
5541 ndata = xmlParseName(ctxt);
5542 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5543 (ctxt->sax->unparsedEntityDecl != NULL))
5544 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5545 literal, URI, ndata);
5546 } else {
5547 if ((ctxt->sax != NULL) &&
5548 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5549 ctxt->sax->entityDecl(ctxt->userData, name,
5550 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5551 literal, URI, NULL);
5553 * For expat compatibility in SAX mode.
5554 * assuming the entity replacement was asked for
5556 if ((ctxt->replaceEntities != 0) &&
5557 ((ctxt->myDoc == NULL) ||
5558 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5559 if (ctxt->myDoc == NULL) {
5560 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5561 if (ctxt->myDoc == NULL) {
5562 xmlErrMemory(ctxt, "New Doc failed");
5563 goto done;
5565 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5568 if (ctxt->myDoc->intSubset == NULL)
5569 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5570 BAD_CAST "fake", NULL, NULL);
5571 xmlSAX2EntityDecl(ctxt, name,
5572 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5573 literal, URI, NULL);
5578 if (ctxt->instate == XML_PARSER_EOF)
5579 goto done;
5580 SKIP_BLANKS;
5581 if (RAW != '>') {
5582 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5583 "xmlParseEntityDecl: entity %s not terminated\n", name);
5584 xmlHaltParser(ctxt);
5585 } else {
5586 if (inputid != ctxt->input->id) {
5587 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5588 "Entity declaration doesn't start and stop in"
5589 " the same entity\n");
5591 NEXT;
5593 if (orig != NULL) {
5595 * Ugly mechanism to save the raw entity value.
5597 xmlEntityPtr cur = NULL;
5599 if (isParameter) {
5600 if ((ctxt->sax != NULL) &&
5601 (ctxt->sax->getParameterEntity != NULL))
5602 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5603 } else {
5604 if ((ctxt->sax != NULL) &&
5605 (ctxt->sax->getEntity != NULL))
5606 cur = ctxt->sax->getEntity(ctxt->userData, name);
5607 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5608 cur = xmlSAX2GetEntity(ctxt, name);
5611 if ((cur != NULL) && (cur->orig == NULL)) {
5612 cur->orig = orig;
5613 orig = NULL;
5617 done:
5618 if (value != NULL) xmlFree(value);
5619 if (URI != NULL) xmlFree(URI);
5620 if (literal != NULL) xmlFree(literal);
5621 if (orig != NULL) xmlFree(orig);
5626 * xmlParseDefaultDecl:
5627 * @ctxt: an XML parser context
5628 * @value: Receive a possible fixed default value for the attribute
5630 * DEPRECATED: Internal function, don't use.
5632 * Parse an attribute default declaration
5634 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5636 * [ VC: Required Attribute ]
5637 * if the default declaration is the keyword #REQUIRED, then the
5638 * attribute must be specified for all elements of the type in the
5639 * attribute-list declaration.
5641 * [ VC: Attribute Default Legal ]
5642 * The declared default value must meet the lexical constraints of
5643 * the declared attribute type c.f. xmlValidateAttributeDecl()
5645 * [ VC: Fixed Attribute Default ]
5646 * if an attribute has a default value declared with the #FIXED
5647 * keyword, instances of that attribute must match the default value.
5649 * [ WFC: No < in Attribute Values ]
5650 * handled in xmlParseAttValue()
5652 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5653 * or XML_ATTRIBUTE_FIXED.
5657 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5658 int val;
5659 xmlChar *ret;
5661 *value = NULL;
5662 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5663 SKIP(9);
5664 return(XML_ATTRIBUTE_REQUIRED);
5666 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5667 SKIP(8);
5668 return(XML_ATTRIBUTE_IMPLIED);
5670 val = XML_ATTRIBUTE_NONE;
5671 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5672 SKIP(6);
5673 val = XML_ATTRIBUTE_FIXED;
5674 if (SKIP_BLANKS == 0) {
5675 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5676 "Space required after '#FIXED'\n");
5679 ret = xmlParseAttValue(ctxt);
5680 ctxt->instate = XML_PARSER_DTD;
5681 if (ret == NULL) {
5682 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5683 "Attribute default value declaration error\n");
5684 } else
5685 *value = ret;
5686 return(val);
5690 * xmlParseNotationType:
5691 * @ctxt: an XML parser context
5693 * DEPRECATED: Internal function, don't use.
5695 * parse an Notation attribute type.
5697 * Note: the leading 'NOTATION' S part has already being parsed...
5699 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701 * [ VC: Notation Attributes ]
5702 * Values of this type must match one of the notation names included
5703 * in the declaration; all notation names in the declaration must be declared.
5705 * Returns: the notation attribute tree built while parsing
5708 xmlEnumerationPtr
5709 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5710 const xmlChar *name;
5711 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5713 if (RAW != '(') {
5714 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5715 return(NULL);
5717 do {
5718 NEXT;
5719 SKIP_BLANKS;
5720 name = xmlParseName(ctxt);
5721 if (name == NULL) {
5722 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5723 "Name expected in NOTATION declaration\n");
5724 xmlFreeEnumeration(ret);
5725 return(NULL);
5727 tmp = ret;
5728 while (tmp != NULL) {
5729 if (xmlStrEqual(name, tmp->name)) {
5730 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5731 "standalone: attribute notation value token %s duplicated\n",
5732 name, NULL);
5733 if (!xmlDictOwns(ctxt->dict, name))
5734 xmlFree((xmlChar *) name);
5735 break;
5737 tmp = tmp->next;
5739 if (tmp == NULL) {
5740 cur = xmlCreateEnumeration(name);
5741 if (cur == NULL) {
5742 xmlFreeEnumeration(ret);
5743 return(NULL);
5745 if (last == NULL) ret = last = cur;
5746 else {
5747 last->next = cur;
5748 last = cur;
5751 SKIP_BLANKS;
5752 } while (RAW == '|');
5753 if (RAW != ')') {
5754 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5755 xmlFreeEnumeration(ret);
5756 return(NULL);
5758 NEXT;
5759 return(ret);
5763 * xmlParseEnumerationType:
5764 * @ctxt: an XML parser context
5766 * DEPRECATED: Internal function, don't use.
5768 * parse an Enumeration attribute type.
5770 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5772 * [ VC: Enumeration ]
5773 * Values of this type must match one of the Nmtoken tokens in
5774 * the declaration
5776 * Returns: the enumeration attribute tree built while parsing
5779 xmlEnumerationPtr
5780 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5781 xmlChar *name;
5782 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5784 if (RAW != '(') {
5785 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5786 return(NULL);
5788 do {
5789 NEXT;
5790 SKIP_BLANKS;
5791 name = xmlParseNmtoken(ctxt);
5792 if (name == NULL) {
5793 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5794 return(ret);
5796 tmp = ret;
5797 while (tmp != NULL) {
5798 if (xmlStrEqual(name, tmp->name)) {
5799 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5800 "standalone: attribute enumeration value token %s duplicated\n",
5801 name, NULL);
5802 if (!xmlDictOwns(ctxt->dict, name))
5803 xmlFree(name);
5804 break;
5806 tmp = tmp->next;
5808 if (tmp == NULL) {
5809 cur = xmlCreateEnumeration(name);
5810 if (!xmlDictOwns(ctxt->dict, name))
5811 xmlFree(name);
5812 if (cur == NULL) {
5813 xmlFreeEnumeration(ret);
5814 return(NULL);
5816 if (last == NULL) ret = last = cur;
5817 else {
5818 last->next = cur;
5819 last = cur;
5822 SKIP_BLANKS;
5823 } while (RAW == '|');
5824 if (RAW != ')') {
5825 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5826 return(ret);
5828 NEXT;
5829 return(ret);
5833 * xmlParseEnumeratedType:
5834 * @ctxt: an XML parser context
5835 * @tree: the enumeration tree built while parsing
5837 * DEPRECATED: Internal function, don't use.
5839 * parse an Enumerated attribute type.
5841 * [57] EnumeratedType ::= NotationType | Enumeration
5843 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5846 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5850 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5851 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5852 SKIP(8);
5853 if (SKIP_BLANKS == 0) {
5854 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5855 "Space required after 'NOTATION'\n");
5856 return(0);
5858 *tree = xmlParseNotationType(ctxt);
5859 if (*tree == NULL) return(0);
5860 return(XML_ATTRIBUTE_NOTATION);
5862 *tree = xmlParseEnumerationType(ctxt);
5863 if (*tree == NULL) return(0);
5864 return(XML_ATTRIBUTE_ENUMERATION);
5868 * xmlParseAttributeType:
5869 * @ctxt: an XML parser context
5870 * @tree: the enumeration tree built while parsing
5872 * DEPRECATED: Internal function, don't use.
5874 * parse the Attribute list def for an element
5876 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5878 * [55] StringType ::= 'CDATA'
5880 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5881 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5883 * Validity constraints for attribute values syntax are checked in
5884 * xmlValidateAttributeValue()
5886 * [ VC: ID ]
5887 * Values of type ID must match the Name production. A name must not
5888 * appear more than once in an XML document as a value of this type;
5889 * i.e., ID values must uniquely identify the elements which bear them.
5891 * [ VC: One ID per Element Type ]
5892 * No element type may have more than one ID attribute specified.
5894 * [ VC: ID Attribute Default ]
5895 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5897 * [ VC: IDREF ]
5898 * Values of type IDREF must match the Name production, and values
5899 * of type IDREFS must match Names; each IDREF Name must match the value
5900 * of an ID attribute on some element in the XML document; i.e. IDREF
5901 * values must match the value of some ID attribute.
5903 * [ VC: Entity Name ]
5904 * Values of type ENTITY must match the Name production, values
5905 * of type ENTITIES must match Names; each Entity Name must match the
5906 * name of an unparsed entity declared in the DTD.
5908 * [ VC: Name Token ]
5909 * Values of type NMTOKEN must match the Nmtoken production; values
5910 * of type NMTOKENS must match Nmtokens.
5912 * Returns the attribute type
5915 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5916 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5917 SKIP(5);
5918 return(XML_ATTRIBUTE_CDATA);
5919 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5920 SKIP(6);
5921 return(XML_ATTRIBUTE_IDREFS);
5922 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5923 SKIP(5);
5924 return(XML_ATTRIBUTE_IDREF);
5925 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5926 SKIP(2);
5927 return(XML_ATTRIBUTE_ID);
5928 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5929 SKIP(6);
5930 return(XML_ATTRIBUTE_ENTITY);
5931 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5932 SKIP(8);
5933 return(XML_ATTRIBUTE_ENTITIES);
5934 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5935 SKIP(8);
5936 return(XML_ATTRIBUTE_NMTOKENS);
5937 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5938 SKIP(7);
5939 return(XML_ATTRIBUTE_NMTOKEN);
5941 return(xmlParseEnumeratedType(ctxt, tree));
5945 * xmlParseAttributeListDecl:
5946 * @ctxt: an XML parser context
5948 * DEPRECATED: Internal function, don't use.
5950 * Parse an attribute list declaration for an element. Always consumes '<!'.
5952 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5954 * [53] AttDef ::= S Name S AttType S DefaultDecl
5957 void
5958 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5959 const xmlChar *elemName;
5960 const xmlChar *attrName;
5961 xmlEnumerationPtr tree;
5963 if ((CUR != '<') || (NXT(1) != '!'))
5964 return;
5965 SKIP(2);
5967 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5968 int inputid = ctxt->input->id;
5970 SKIP(7);
5971 if (SKIP_BLANKS == 0) {
5972 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5973 "Space required after '<!ATTLIST'\n");
5975 elemName = xmlParseName(ctxt);
5976 if (elemName == NULL) {
5977 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5978 "ATTLIST: no name for Element\n");
5979 return;
5981 SKIP_BLANKS;
5982 GROW;
5983 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5984 int type;
5985 int def;
5986 xmlChar *defaultValue = NULL;
5988 GROW;
5989 tree = NULL;
5990 attrName = xmlParseName(ctxt);
5991 if (attrName == NULL) {
5992 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5993 "ATTLIST: no name for Attribute\n");
5994 break;
5996 GROW;
5997 if (SKIP_BLANKS == 0) {
5998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5999 "Space required after the attribute name\n");
6000 break;
6003 type = xmlParseAttributeType(ctxt, &tree);
6004 if (type <= 0) {
6005 break;
6008 GROW;
6009 if (SKIP_BLANKS == 0) {
6010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6011 "Space required after the attribute type\n");
6012 if (tree != NULL)
6013 xmlFreeEnumeration(tree);
6014 break;
6017 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6018 if (def <= 0) {
6019 if (defaultValue != NULL)
6020 xmlFree(defaultValue);
6021 if (tree != NULL)
6022 xmlFreeEnumeration(tree);
6023 break;
6025 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6026 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6028 GROW;
6029 if (RAW != '>') {
6030 if (SKIP_BLANKS == 0) {
6031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6032 "Space required after the attribute default value\n");
6033 if (defaultValue != NULL)
6034 xmlFree(defaultValue);
6035 if (tree != NULL)
6036 xmlFreeEnumeration(tree);
6037 break;
6040 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6041 (ctxt->sax->attributeDecl != NULL))
6042 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6043 type, def, defaultValue, tree);
6044 else if (tree != NULL)
6045 xmlFreeEnumeration(tree);
6047 if ((ctxt->sax2) && (defaultValue != NULL) &&
6048 (def != XML_ATTRIBUTE_IMPLIED) &&
6049 (def != XML_ATTRIBUTE_REQUIRED)) {
6050 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6052 if (ctxt->sax2) {
6053 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6055 if (defaultValue != NULL)
6056 xmlFree(defaultValue);
6057 GROW;
6059 if (RAW == '>') {
6060 if (inputid != ctxt->input->id) {
6061 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6062 "Attribute list declaration doesn't start and"
6063 " stop in the same entity\n");
6065 NEXT;
6071 * xmlParseElementMixedContentDecl:
6072 * @ctxt: an XML parser context
6073 * @inputchk: the input used for the current entity, needed for boundary checks
6075 * DEPRECATED: Internal function, don't use.
6077 * parse the declaration for a Mixed Element content
6078 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6080 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6081 * '(' S? '#PCDATA' S? ')'
6083 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6085 * [ VC: No Duplicate Types ]
6086 * The same name must not appear more than once in a single
6087 * mixed-content declaration.
6089 * returns: the list of the xmlElementContentPtr describing the element choices
6091 xmlElementContentPtr
6092 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6093 xmlElementContentPtr ret = NULL, cur = NULL, n;
6094 const xmlChar *elem = NULL;
6096 GROW;
6097 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6098 SKIP(7);
6099 SKIP_BLANKS;
6100 if (RAW == ')') {
6101 if (ctxt->input->id != inputchk) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
6106 NEXT;
6107 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6108 if (ret == NULL)
6109 return(NULL);
6110 if (RAW == '*') {
6111 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6112 NEXT;
6114 return(ret);
6116 if ((RAW == '(') || (RAW == '|')) {
6117 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6118 if (ret == NULL) return(NULL);
6120 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6121 NEXT;
6122 if (elem == NULL) {
6123 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6124 if (ret == NULL) {
6125 xmlFreeDocElementContent(ctxt->myDoc, cur);
6126 return(NULL);
6128 ret->c1 = cur;
6129 if (cur != NULL)
6130 cur->parent = ret;
6131 cur = ret;
6132 } else {
6133 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6134 if (n == NULL) {
6135 xmlFreeDocElementContent(ctxt->myDoc, ret);
6136 return(NULL);
6138 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6139 if (n->c1 != NULL)
6140 n->c1->parent = n;
6141 cur->c2 = n;
6142 if (n != NULL)
6143 n->parent = cur;
6144 cur = n;
6146 SKIP_BLANKS;
6147 elem = xmlParseName(ctxt);
6148 if (elem == NULL) {
6149 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6150 "xmlParseElementMixedContentDecl : Name expected\n");
6151 xmlFreeDocElementContent(ctxt->myDoc, ret);
6152 return(NULL);
6154 SKIP_BLANKS;
6155 GROW;
6157 if ((RAW == ')') && (NXT(1) == '*')) {
6158 if (elem != NULL) {
6159 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6160 XML_ELEMENT_CONTENT_ELEMENT);
6161 if (cur->c2 != NULL)
6162 cur->c2->parent = cur;
6164 if (ret != NULL)
6165 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6166 if (ctxt->input->id != inputchk) {
6167 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6168 "Element content declaration doesn't start and"
6169 " stop in the same entity\n");
6171 SKIP(2);
6172 } else {
6173 xmlFreeDocElementContent(ctxt->myDoc, ret);
6174 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6175 return(NULL);
6178 } else {
6179 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6181 return(ret);
6185 * xmlParseElementChildrenContentDeclPriv:
6186 * @ctxt: an XML parser context
6187 * @inputchk: the input used for the current entity, needed for boundary checks
6188 * @depth: the level of recursion
6190 * parse the declaration for a Mixed Element content
6191 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6194 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6196 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6198 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6200 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6202 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6203 * TODO Parameter-entity replacement text must be properly nested
6204 * with parenthesized groups. That is to say, if either of the
6205 * opening or closing parentheses in a choice, seq, or Mixed
6206 * construct is contained in the replacement text for a parameter
6207 * entity, both must be contained in the same replacement text. For
6208 * interoperability, if a parameter-entity reference appears in a
6209 * choice, seq, or Mixed construct, its replacement text should not
6210 * be empty, and neither the first nor last non-blank character of
6211 * the replacement text should be a connector (| or ,).
6213 * Returns the tree of xmlElementContentPtr describing the element
6214 * hierarchy.
6216 static xmlElementContentPtr
6217 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6218 int depth) {
6219 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6220 const xmlChar *elem;
6221 xmlChar type = 0;
6223 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6224 (depth > 2048)) {
6225 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6226 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6227 depth);
6228 return(NULL);
6230 SKIP_BLANKS;
6231 GROW;
6232 if (RAW == '(') {
6233 int inputid = ctxt->input->id;
6235 /* Recurse on first child */
6236 NEXT;
6237 SKIP_BLANKS;
6238 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6239 depth + 1);
6240 if (cur == NULL)
6241 return(NULL);
6242 SKIP_BLANKS;
6243 GROW;
6244 } else {
6245 elem = xmlParseName(ctxt);
6246 if (elem == NULL) {
6247 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6248 return(NULL);
6250 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6251 if (cur == NULL) {
6252 xmlErrMemory(ctxt, NULL);
6253 return(NULL);
6255 GROW;
6256 if (RAW == '?') {
6257 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6258 NEXT;
6259 } else if (RAW == '*') {
6260 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6261 NEXT;
6262 } else if (RAW == '+') {
6263 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6264 NEXT;
6265 } else {
6266 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6268 GROW;
6270 SKIP_BLANKS;
6271 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6273 * Each loop we parse one separator and one element.
6275 if (RAW == ',') {
6276 if (type == 0) type = CUR;
6279 * Detect "Name | Name , Name" error
6281 else if (type != CUR) {
6282 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6283 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6284 type);
6285 if ((last != NULL) && (last != ret))
6286 xmlFreeDocElementContent(ctxt->myDoc, last);
6287 if (ret != NULL)
6288 xmlFreeDocElementContent(ctxt->myDoc, ret);
6289 return(NULL);
6291 NEXT;
6293 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6294 if (op == NULL) {
6295 if ((last != NULL) && (last != ret))
6296 xmlFreeDocElementContent(ctxt->myDoc, last);
6297 xmlFreeDocElementContent(ctxt->myDoc, ret);
6298 return(NULL);
6300 if (last == NULL) {
6301 op->c1 = ret;
6302 if (ret != NULL)
6303 ret->parent = op;
6304 ret = cur = op;
6305 } else {
6306 cur->c2 = op;
6307 if (op != NULL)
6308 op->parent = cur;
6309 op->c1 = last;
6310 if (last != NULL)
6311 last->parent = op;
6312 cur =op;
6313 last = NULL;
6315 } else if (RAW == '|') {
6316 if (type == 0) type = CUR;
6319 * Detect "Name , Name | Name" error
6321 else if (type != CUR) {
6322 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6323 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6324 type);
6325 if ((last != NULL) && (last != ret))
6326 xmlFreeDocElementContent(ctxt->myDoc, last);
6327 if (ret != NULL)
6328 xmlFreeDocElementContent(ctxt->myDoc, ret);
6329 return(NULL);
6331 NEXT;
6333 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6334 if (op == NULL) {
6335 if ((last != NULL) && (last != ret))
6336 xmlFreeDocElementContent(ctxt->myDoc, last);
6337 if (ret != NULL)
6338 xmlFreeDocElementContent(ctxt->myDoc, ret);
6339 return(NULL);
6341 if (last == NULL) {
6342 op->c1 = ret;
6343 if (ret != NULL)
6344 ret->parent = op;
6345 ret = cur = op;
6346 } else {
6347 cur->c2 = op;
6348 if (op != NULL)
6349 op->parent = cur;
6350 op->c1 = last;
6351 if (last != NULL)
6352 last->parent = op;
6353 cur =op;
6354 last = NULL;
6356 } else {
6357 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6358 if ((last != NULL) && (last != ret))
6359 xmlFreeDocElementContent(ctxt->myDoc, last);
6360 if (ret != NULL)
6361 xmlFreeDocElementContent(ctxt->myDoc, ret);
6362 return(NULL);
6364 GROW;
6365 SKIP_BLANKS;
6366 GROW;
6367 if (RAW == '(') {
6368 int inputid = ctxt->input->id;
6369 /* Recurse on second child */
6370 NEXT;
6371 SKIP_BLANKS;
6372 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6373 depth + 1);
6374 if (last == NULL) {
6375 if (ret != NULL)
6376 xmlFreeDocElementContent(ctxt->myDoc, ret);
6377 return(NULL);
6379 SKIP_BLANKS;
6380 } else {
6381 elem = xmlParseName(ctxt);
6382 if (elem == NULL) {
6383 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6384 if (ret != NULL)
6385 xmlFreeDocElementContent(ctxt->myDoc, ret);
6386 return(NULL);
6388 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6389 if (last == NULL) {
6390 if (ret != NULL)
6391 xmlFreeDocElementContent(ctxt->myDoc, ret);
6392 return(NULL);
6394 if (RAW == '?') {
6395 last->ocur = XML_ELEMENT_CONTENT_OPT;
6396 NEXT;
6397 } else if (RAW == '*') {
6398 last->ocur = XML_ELEMENT_CONTENT_MULT;
6399 NEXT;
6400 } else if (RAW == '+') {
6401 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6402 NEXT;
6403 } else {
6404 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6407 SKIP_BLANKS;
6408 GROW;
6410 if ((cur != NULL) && (last != NULL)) {
6411 cur->c2 = last;
6412 if (last != NULL)
6413 last->parent = cur;
6415 if (ctxt->input->id != inputchk) {
6416 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6417 "Element content declaration doesn't start and stop in"
6418 " the same entity\n");
6420 NEXT;
6421 if (RAW == '?') {
6422 if (ret != NULL) {
6423 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6424 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6425 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6426 else
6427 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6429 NEXT;
6430 } else if (RAW == '*') {
6431 if (ret != NULL) {
6432 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6433 cur = ret;
6435 * Some normalization:
6436 * (a | b* | c?)* == (a | b | c)*
6438 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6439 if ((cur->c1 != NULL) &&
6440 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6441 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6442 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6443 if ((cur->c2 != NULL) &&
6444 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6445 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6446 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6447 cur = cur->c2;
6450 NEXT;
6451 } else if (RAW == '+') {
6452 if (ret != NULL) {
6453 int found = 0;
6455 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6456 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6457 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6458 else
6459 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6461 * Some normalization:
6462 * (a | b*)+ == (a | b)*
6463 * (a | b?)+ == (a | b)*
6465 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6466 if ((cur->c1 != NULL) &&
6467 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6468 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6469 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6470 found = 1;
6472 if ((cur->c2 != NULL) &&
6473 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6474 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6475 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6476 found = 1;
6478 cur = cur->c2;
6480 if (found)
6481 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483 NEXT;
6485 return(ret);
6489 * xmlParseElementChildrenContentDecl:
6490 * @ctxt: an XML parser context
6491 * @inputchk: the input used for the current entity, needed for boundary checks
6493 * DEPRECATED: Internal function, don't use.
6495 * parse the declaration for a Mixed Element content
6496 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6498 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6500 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6502 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6504 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6506 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6507 * TODO Parameter-entity replacement text must be properly nested
6508 * with parenthesized groups. That is to say, if either of the
6509 * opening or closing parentheses in a choice, seq, or Mixed
6510 * construct is contained in the replacement text for a parameter
6511 * entity, both must be contained in the same replacement text. For
6512 * interoperability, if a parameter-entity reference appears in a
6513 * choice, seq, or Mixed construct, its replacement text should not
6514 * be empty, and neither the first nor last non-blank character of
6515 * the replacement text should be a connector (| or ,).
6517 * Returns the tree of xmlElementContentPtr describing the element
6518 * hierarchy.
6520 xmlElementContentPtr
6521 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6522 /* stub left for API/ABI compat */
6523 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6527 * xmlParseElementContentDecl:
6528 * @ctxt: an XML parser context
6529 * @name: the name of the element being defined.
6530 * @result: the Element Content pointer will be stored here if any
6532 * DEPRECATED: Internal function, don't use.
6534 * parse the declaration for an Element content either Mixed or Children,
6535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6539 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6544 xmlElementContentPtr *result) {
6546 xmlElementContentPtr tree = NULL;
6547 int inputid = ctxt->input->id;
6548 int res;
6550 *result = NULL;
6552 if (RAW != '(') {
6553 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6554 "xmlParseElementContentDecl : %s '(' expected\n", name);
6555 return(-1);
6557 NEXT;
6558 GROW;
6559 if (ctxt->instate == XML_PARSER_EOF)
6560 return(-1);
6561 SKIP_BLANKS;
6562 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6563 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6564 res = XML_ELEMENT_TYPE_MIXED;
6565 } else {
6566 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6567 res = XML_ELEMENT_TYPE_ELEMENT;
6569 SKIP_BLANKS;
6570 *result = tree;
6571 return(res);
6575 * xmlParseElementDecl:
6576 * @ctxt: an XML parser context
6578 * DEPRECATED: Internal function, don't use.
6580 * Parse an element declaration. Always consumes '<!'.
6582 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6584 * [ VC: Unique Element Type Declaration ]
6585 * No element type may be declared more than once
6587 * Returns the type of the element, or -1 in case of error
6590 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6591 const xmlChar *name;
6592 int ret = -1;
6593 xmlElementContentPtr content = NULL;
6595 if ((CUR != '<') || (NXT(1) != '!'))
6596 return(ret);
6597 SKIP(2);
6599 /* GROW; done in the caller */
6600 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6601 int inputid = ctxt->input->id;
6603 SKIP(7);
6604 if (SKIP_BLANKS == 0) {
6605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6606 "Space required after 'ELEMENT'\n");
6607 return(-1);
6609 name = xmlParseName(ctxt);
6610 if (name == NULL) {
6611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseElementDecl: no name for Element\n");
6613 return(-1);
6615 if (SKIP_BLANKS == 0) {
6616 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6617 "Space required after the element name\n");
6619 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6620 SKIP(5);
6622 * Element must always be empty.
6624 ret = XML_ELEMENT_TYPE_EMPTY;
6625 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6626 (NXT(2) == 'Y')) {
6627 SKIP(3);
6629 * Element is a generic container.
6631 ret = XML_ELEMENT_TYPE_ANY;
6632 } else if (RAW == '(') {
6633 ret = xmlParseElementContentDecl(ctxt, name, &content);
6634 } else {
6636 * [ WFC: PEs in Internal Subset ] error handling.
6638 if ((RAW == '%') && (ctxt->external == 0) &&
6639 (ctxt->inputNr == 1)) {
6640 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6641 "PEReference: forbidden within markup decl in internal subset\n");
6642 } else {
6643 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6644 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6646 return(-1);
6649 SKIP_BLANKS;
6651 if (RAW != '>') {
6652 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6653 if (content != NULL) {
6654 xmlFreeDocElementContent(ctxt->myDoc, content);
6656 } else {
6657 if (inputid != ctxt->input->id) {
6658 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6659 "Element declaration doesn't start and stop in"
6660 " the same entity\n");
6663 NEXT;
6664 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6665 (ctxt->sax->elementDecl != NULL)) {
6666 if (content != NULL)
6667 content->parent = NULL;
6668 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6669 content);
6670 if ((content != NULL) && (content->parent == NULL)) {
6672 * this is a trick: if xmlAddElementDecl is called,
6673 * instead of copying the full tree it is plugged directly
6674 * if called from the parser. Avoid duplicating the
6675 * interfaces or change the API/ABI
6677 xmlFreeDocElementContent(ctxt->myDoc, content);
6679 } else if (content != NULL) {
6680 xmlFreeDocElementContent(ctxt->myDoc, content);
6684 return(ret);
6688 * xmlParseConditionalSections
6689 * @ctxt: an XML parser context
6691 * Parse a conditional section. Always consumes '<!['.
6693 * [61] conditionalSect ::= includeSect | ignoreSect
6694 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6695 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6696 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6697 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6700 static void
6701 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6702 int *inputIds = NULL;
6703 size_t inputIdsSize = 0;
6704 size_t depth = 0;
6706 while (ctxt->instate != XML_PARSER_EOF) {
6707 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6708 int id = ctxt->input->id;
6710 SKIP(3);
6711 SKIP_BLANKS;
6713 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6714 SKIP(7);
6715 SKIP_BLANKS;
6716 if (RAW != '[') {
6717 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6718 xmlHaltParser(ctxt);
6719 goto error;
6721 if (ctxt->input->id != id) {
6722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6723 "All markup of the conditional section is"
6724 " not in the same entity\n");
6726 NEXT;
6728 if (inputIdsSize <= depth) {
6729 int *tmp;
6731 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6732 tmp = (int *) xmlRealloc(inputIds,
6733 inputIdsSize * sizeof(int));
6734 if (tmp == NULL) {
6735 xmlErrMemory(ctxt, NULL);
6736 goto error;
6738 inputIds = tmp;
6740 inputIds[depth] = id;
6741 depth++;
6742 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6743 size_t ignoreDepth = 0;
6745 SKIP(6);
6746 SKIP_BLANKS;
6747 if (RAW != '[') {
6748 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6749 xmlHaltParser(ctxt);
6750 goto error;
6752 if (ctxt->input->id != id) {
6753 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6754 "All markup of the conditional section is"
6755 " not in the same entity\n");
6757 NEXT;
6759 while (RAW != 0) {
6760 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6761 SKIP(3);
6762 ignoreDepth++;
6763 /* Check for integer overflow */
6764 if (ignoreDepth == 0) {
6765 xmlErrMemory(ctxt, NULL);
6766 goto error;
6768 } else if ((RAW == ']') && (NXT(1) == ']') &&
6769 (NXT(2) == '>')) {
6770 if (ignoreDepth == 0)
6771 break;
6772 SKIP(3);
6773 ignoreDepth--;
6774 } else {
6775 NEXT;
6779 if (RAW == 0) {
6780 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6781 goto error;
6783 if (ctxt->input->id != id) {
6784 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6785 "All markup of the conditional section is"
6786 " not in the same entity\n");
6788 SKIP(3);
6789 } else {
6790 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6791 xmlHaltParser(ctxt);
6792 goto error;
6794 } else if ((depth > 0) &&
6795 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6796 depth--;
6797 if (ctxt->input->id != inputIds[depth]) {
6798 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6799 "All markup of the conditional section is not"
6800 " in the same entity\n");
6802 SKIP(3);
6803 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6804 xmlParseMarkupDecl(ctxt);
6805 } else {
6806 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6807 xmlHaltParser(ctxt);
6808 goto error;
6811 if (depth == 0)
6812 break;
6814 SKIP_BLANKS;
6815 SHRINK;
6816 GROW;
6819 error:
6820 xmlFree(inputIds);
6824 * xmlParseMarkupDecl:
6825 * @ctxt: an XML parser context
6827 * DEPRECATED: Internal function, don't use.
6829 * Parse markup declarations. Always consumes '<!' or '<?'.
6831 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6832 * NotationDecl | PI | Comment
6834 * [ VC: Proper Declaration/PE Nesting ]
6835 * Parameter-entity replacement text must be properly nested with
6836 * markup declarations. That is to say, if either the first character
6837 * or the last character of a markup declaration (markupdecl above) is
6838 * contained in the replacement text for a parameter-entity reference,
6839 * both must be contained in the same replacement text.
6841 * [ WFC: PEs in Internal Subset ]
6842 * In the internal DTD subset, parameter-entity references can occur
6843 * only where markup declarations can occur, not within markup declarations.
6844 * (This does not apply to references that occur in external parameter
6845 * entities or to the external subset.)
6847 void
6848 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6849 GROW;
6850 if (CUR == '<') {
6851 if (NXT(1) == '!') {
6852 switch (NXT(2)) {
6853 case 'E':
6854 if (NXT(3) == 'L')
6855 xmlParseElementDecl(ctxt);
6856 else if (NXT(3) == 'N')
6857 xmlParseEntityDecl(ctxt);
6858 else
6859 SKIP(2);
6860 break;
6861 case 'A':
6862 xmlParseAttributeListDecl(ctxt);
6863 break;
6864 case 'N':
6865 xmlParseNotationDecl(ctxt);
6866 break;
6867 case '-':
6868 xmlParseComment(ctxt);
6869 break;
6870 default:
6871 /* there is an error but it will be detected later */
6872 SKIP(2);
6873 break;
6875 } else if (NXT(1) == '?') {
6876 xmlParsePI(ctxt);
6881 * detect requirement to exit there and act accordingly
6882 * and avoid having instate overridden later on
6884 if (ctxt->instate == XML_PARSER_EOF)
6885 return;
6887 ctxt->instate = XML_PARSER_DTD;
6891 * xmlParseTextDecl:
6892 * @ctxt: an XML parser context
6894 * DEPRECATED: Internal function, don't use.
6896 * parse an XML declaration header for external entities
6898 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6901 void
6902 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6903 xmlChar *version;
6904 const xmlChar *encoding;
6905 int oldstate;
6908 * We know that '<?xml' is here.
6910 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6911 SKIP(5);
6912 } else {
6913 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6914 return;
6917 /* Avoid expansion of parameter entities when skipping blanks. */
6918 oldstate = ctxt->instate;
6919 ctxt->instate = XML_PARSER_START;
6921 if (SKIP_BLANKS == 0) {
6922 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6923 "Space needed after '<?xml'\n");
6927 * We may have the VersionInfo here.
6929 version = xmlParseVersionInfo(ctxt);
6930 if (version == NULL)
6931 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6932 else {
6933 if (SKIP_BLANKS == 0) {
6934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6935 "Space needed here\n");
6938 ctxt->input->version = version;
6941 * We must have the encoding declaration
6943 encoding = xmlParseEncodingDecl(ctxt);
6944 if (ctxt->instate == XML_PARSER_EOF)
6945 return;
6946 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6948 * The XML REC instructs us to stop parsing right here
6950 ctxt->instate = oldstate;
6951 return;
6953 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6954 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6955 "Missing encoding in text declaration\n");
6958 SKIP_BLANKS;
6959 if ((RAW == '?') && (NXT(1) == '>')) {
6960 SKIP(2);
6961 } else if (RAW == '>') {
6962 /* Deprecated old WD ... */
6963 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6964 NEXT;
6965 } else {
6966 int c;
6968 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6969 while ((c = CUR) != 0) {
6970 NEXT;
6971 if (c == '>')
6972 break;
6976 ctxt->instate = oldstate;
6980 * xmlParseExternalSubset:
6981 * @ctxt: an XML parser context
6982 * @ExternalID: the external identifier
6983 * @SystemID: the system identifier (or URL)
6985 * parse Markup declarations from an external subset
6987 * [30] extSubset ::= textDecl? extSubsetDecl
6989 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6991 void
6992 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6993 const xmlChar *SystemID) {
6994 xmlDetectSAX2(ctxt);
6995 GROW;
6997 if ((ctxt->encoding == NULL) &&
6998 (ctxt->input->end - ctxt->input->cur >= 4)) {
6999 xmlChar start[4];
7000 xmlCharEncoding enc;
7002 start[0] = RAW;
7003 start[1] = NXT(1);
7004 start[2] = NXT(2);
7005 start[3] = NXT(3);
7006 enc = xmlDetectCharEncoding(start, 4);
7007 if (enc != XML_CHAR_ENCODING_NONE)
7008 xmlSwitchEncoding(ctxt, enc);
7011 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7012 xmlParseTextDecl(ctxt);
7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7015 * The XML REC instructs us to stop parsing right here
7017 xmlHaltParser(ctxt);
7018 return;
7021 if (ctxt->myDoc == NULL) {
7022 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7023 if (ctxt->myDoc == NULL) {
7024 xmlErrMemory(ctxt, "New Doc failed");
7025 return;
7027 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7029 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7030 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7032 ctxt->instate = XML_PARSER_DTD;
7033 ctxt->external = 1;
7034 SKIP_BLANKS;
7035 while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7036 GROW;
7037 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7038 xmlParseConditionalSections(ctxt);
7039 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7040 xmlParseMarkupDecl(ctxt);
7041 } else {
7042 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7043 xmlHaltParser(ctxt);
7044 return;
7046 SKIP_BLANKS;
7047 SHRINK;
7050 if (RAW != 0) {
7051 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7057 * xmlParseReference:
7058 * @ctxt: an XML parser context
7060 * DEPRECATED: Internal function, don't use.
7062 * parse and handle entity references in content, depending on the SAX
7063 * interface, this may end-up in a call to character() if this is a
7064 * CharRef, a predefined entity, if there is no reference() callback.
7065 * or if the parser was asked to switch to that mode.
7067 * Always consumes '&'.
7069 * [67] Reference ::= EntityRef | CharRef
7071 void
7072 xmlParseReference(xmlParserCtxtPtr ctxt) {
7073 xmlEntityPtr ent;
7074 xmlChar *val;
7075 int was_checked;
7076 xmlNodePtr list = NULL;
7077 xmlParserErrors ret = XML_ERR_OK;
7080 if (RAW != '&')
7081 return;
7084 * Simple case of a CharRef
7086 if (NXT(1) == '#') {
7087 int i = 0;
7088 xmlChar out[16];
7089 int hex = NXT(2);
7090 int value = xmlParseCharRef(ctxt);
7092 if (value == 0)
7093 return;
7094 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7096 * So we are using non-UTF-8 buffers
7097 * Check that the char fit on 8bits, if not
7098 * generate a CharRef.
7100 if (value <= 0xFF) {
7101 out[0] = value;
7102 out[1] = 0;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7104 (!ctxt->disableSAX))
7105 ctxt->sax->characters(ctxt->userData, out, 1);
7106 } else {
7107 if ((hex == 'x') || (hex == 'X'))
7108 snprintf((char *)out, sizeof(out), "#x%X", value);
7109 else
7110 snprintf((char *)out, sizeof(out), "#%d", value);
7111 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7112 (!ctxt->disableSAX))
7113 ctxt->sax->reference(ctxt->userData, out);
7115 } else {
7117 * Just encode the value in UTF-8
7119 COPY_BUF(0 ,out, i, value);
7120 out[i] = 0;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7122 (!ctxt->disableSAX))
7123 ctxt->sax->characters(ctxt->userData, out, i);
7125 return;
7129 * We are seeing an entity reference
7131 ent = xmlParseEntityRef(ctxt);
7132 if (ent == NULL) return;
7133 if (!ctxt->wellFormed)
7134 return;
7135 was_checked = ent->flags & XML_ENT_PARSED;
7137 /* special case of predefined entities */
7138 if ((ent->name == NULL) ||
7139 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7140 val = ent->content;
7141 if (val == NULL) return;
7143 * inline the entity.
7145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7146 (!ctxt->disableSAX))
7147 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7148 return;
7152 * The first reference to the entity trigger a parsing phase
7153 * where the ent->children is filled with the result from
7154 * the parsing.
7155 * Note: external parsed entities will not be loaded, it is not
7156 * required for a non-validating parser, unless the parsing option
7157 * of validating, or substituting entities were given. Doing so is
7158 * far more secure as the parser will only process data coming from
7159 * the document entity by default.
7161 if (((ent->flags & XML_ENT_PARSED) == 0) &&
7162 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7163 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7164 unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7167 * This is a bit hackish but this seems the best
7168 * way to make sure both SAX and DOM entity support
7169 * behaves okay.
7171 void *user_data;
7172 if (ctxt->userData == ctxt)
7173 user_data = NULL;
7174 else
7175 user_data = ctxt->userData;
7177 /* Avoid overflow as much as possible */
7178 ctxt->sizeentcopy = 0;
7180 if (ent->flags & XML_ENT_EXPANDING) {
7181 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7182 xmlHaltParser(ctxt);
7183 return;
7186 ent->flags |= XML_ENT_EXPANDING;
7189 * Check that this entity is well formed
7190 * 4.3.2: An internal general parsed entity is well-formed
7191 * if its replacement text matches the production labeled
7192 * content.
7194 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7195 ctxt->depth++;
7196 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7197 user_data, &list);
7198 ctxt->depth--;
7200 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7201 ctxt->depth++;
7202 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7203 user_data, ctxt->depth, ent->URI,
7204 ent->ExternalID, &list);
7205 ctxt->depth--;
7206 } else {
7207 ret = XML_ERR_ENTITY_PE_INTERNAL;
7208 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7209 "invalid entity type found\n", NULL);
7212 ent->flags &= ~XML_ENT_EXPANDING;
7213 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7214 ent->expandedSize = ctxt->sizeentcopy;
7215 if (ret == XML_ERR_ENTITY_LOOP) {
7216 xmlHaltParser(ctxt);
7217 xmlFreeNodeList(list);
7218 return;
7220 if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7221 xmlFreeNodeList(list);
7222 return;
7225 if ((ret == XML_ERR_OK) && (list != NULL)) {
7226 ent->children = list;
7228 * Prune it directly in the generated document
7229 * except for single text nodes.
7231 if ((ctxt->replaceEntities == 0) ||
7232 (ctxt->parseMode == XML_PARSE_READER) ||
7233 ((list->type == XML_TEXT_NODE) &&
7234 (list->next == NULL))) {
7235 ent->owner = 1;
7236 while (list != NULL) {
7237 list->parent = (xmlNodePtr) ent;
7238 if (list->doc != ent->doc)
7239 xmlSetTreeDoc(list, ent->doc);
7240 if (list->next == NULL)
7241 ent->last = list;
7242 list = list->next;
7244 list = NULL;
7245 } else {
7246 ent->owner = 0;
7247 while (list != NULL) {
7248 list->parent = (xmlNodePtr) ctxt->node;
7249 list->doc = ctxt->myDoc;
7250 if (list->next == NULL)
7251 ent->last = list;
7252 list = list->next;
7254 list = ent->children;
7255 #ifdef LIBXML_LEGACY_ENABLED
7256 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7257 xmlAddEntityReference(ent, list, NULL);
7258 #endif /* LIBXML_LEGACY_ENABLED */
7260 } else if ((ret != XML_ERR_OK) &&
7261 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7262 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7263 "Entity '%s' failed to parse\n", ent->name);
7264 if (ent->content != NULL)
7265 ent->content[0] = 0;
7266 } else if (list != NULL) {
7267 xmlFreeNodeList(list);
7268 list = NULL;
7271 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7272 was_checked = 0;
7276 * Now that the entity content has been gathered
7277 * provide it to the application, this can take different forms based
7278 * on the parsing modes.
7280 if (ent->children == NULL) {
7282 * Probably running in SAX mode and the callbacks don't
7283 * build the entity content. So unless we already went
7284 * though parsing for first checking go though the entity
7285 * content to generate callbacks associated to the entity
7287 if (was_checked != 0) {
7288 void *user_data;
7290 * This is a bit hackish but this seems the best
7291 * way to make sure both SAX and DOM entity support
7292 * behaves okay.
7294 if (ctxt->userData == ctxt)
7295 user_data = NULL;
7296 else
7297 user_data = ctxt->userData;
7299 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7300 ctxt->depth++;
7301 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7302 ent->content, user_data, NULL);
7303 ctxt->depth--;
7304 } else if (ent->etype ==
7305 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7306 unsigned long oldsizeentities = ctxt->sizeentities;
7308 ctxt->depth++;
7309 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7310 ctxt->sax, user_data, ctxt->depth,
7311 ent->URI, ent->ExternalID, NULL);
7312 ctxt->depth--;
7314 /* Undo the change to sizeentities */
7315 ctxt->sizeentities = oldsizeentities;
7316 } else {
7317 ret = XML_ERR_ENTITY_PE_INTERNAL;
7318 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7319 "invalid entity type found\n", NULL);
7321 if (ret == XML_ERR_ENTITY_LOOP) {
7322 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7323 return;
7325 if (xmlParserEntityCheck(ctxt, 0))
7326 return;
7328 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7329 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7331 * Entity reference callback comes second, it's somewhat
7332 * superfluous but a compatibility to historical behaviour
7334 ctxt->sax->reference(ctxt->userData, ent->name);
7336 return;
7340 * We also check for amplification if entities aren't substituted.
7341 * They might be expanded later.
7343 if ((was_checked != 0) &&
7344 (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7345 return;
7348 * If we didn't get any children for the entity being built
7350 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7351 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7353 * Create a node.
7355 ctxt->sax->reference(ctxt->userData, ent->name);
7356 return;
7359 if (ctxt->replaceEntities) {
7361 * There is a problem on the handling of _private for entities
7362 * (bug 155816): Should we copy the content of the field from
7363 * the entity (possibly overwriting some value set by the user
7364 * when a copy is created), should we leave it alone, or should
7365 * we try to take care of different situations? The problem
7366 * is exacerbated by the usage of this field by the xmlReader.
7367 * To fix this bug, we look at _private on the created node
7368 * and, if it's NULL, we copy in whatever was in the entity.
7369 * If it's not NULL we leave it alone. This is somewhat of a
7370 * hack - maybe we should have further tests to determine
7371 * what to do.
7373 if (ctxt->node != NULL) {
7375 * Seems we are generating the DOM content, do
7376 * a simple tree copy for all references except the first
7377 * In the first occurrence list contains the replacement.
7379 if (((list == NULL) && (ent->owner == 0)) ||
7380 (ctxt->parseMode == XML_PARSE_READER)) {
7381 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7384 * when operating on a reader, the entities definitions
7385 * are always owning the entities subtree.
7386 if (ctxt->parseMode == XML_PARSE_READER)
7387 ent->owner = 1;
7390 cur = ent->children;
7391 while (cur != NULL) {
7392 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7393 if (nw != NULL) {
7394 if (nw->_private == NULL)
7395 nw->_private = cur->_private;
7396 if (firstChild == NULL){
7397 firstChild = nw;
7399 nw = xmlAddChild(ctxt->node, nw);
7401 if (cur == ent->last) {
7403 * needed to detect some strange empty
7404 * node cases in the reader tests
7406 if ((ctxt->parseMode == XML_PARSE_READER) &&
7407 (nw != NULL) &&
7408 (nw->type == XML_ELEMENT_NODE) &&
7409 (nw->children == NULL))
7410 nw->extra = 1;
7412 break;
7414 cur = cur->next;
7416 #ifdef LIBXML_LEGACY_ENABLED
7417 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7418 xmlAddEntityReference(ent, firstChild, nw);
7419 #endif /* LIBXML_LEGACY_ENABLED */
7420 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7421 xmlNodePtr nw = NULL, cur, next, last,
7422 firstChild = NULL;
7425 * Copy the entity child list and make it the new
7426 * entity child list. The goal is to make sure any
7427 * ID or REF referenced will be the one from the
7428 * document content and not the entity copy.
7430 cur = ent->children;
7431 ent->children = NULL;
7432 last = ent->last;
7433 ent->last = NULL;
7434 while (cur != NULL) {
7435 next = cur->next;
7436 cur->next = NULL;
7437 cur->parent = NULL;
7438 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7439 if (nw != NULL) {
7440 if (nw->_private == NULL)
7441 nw->_private = cur->_private;
7442 if (firstChild == NULL){
7443 firstChild = cur;
7445 xmlAddChild((xmlNodePtr) ent, nw);
7447 xmlAddChild(ctxt->node, cur);
7448 if (cur == last)
7449 break;
7450 cur = next;
7452 if (ent->owner == 0)
7453 ent->owner = 1;
7454 #ifdef LIBXML_LEGACY_ENABLED
7455 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7456 xmlAddEntityReference(ent, firstChild, nw);
7457 #endif /* LIBXML_LEGACY_ENABLED */
7458 } else {
7459 const xmlChar *nbktext;
7462 * the name change is to avoid coalescing of the
7463 * node with a possible previous text one which
7464 * would make ent->children a dangling pointer
7466 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7467 -1);
7468 if (ent->children->type == XML_TEXT_NODE)
7469 ent->children->name = nbktext;
7470 if ((ent->last != ent->children) &&
7471 (ent->last->type == XML_TEXT_NODE))
7472 ent->last->name = nbktext;
7473 xmlAddChildList(ctxt->node, ent->children);
7477 * This is to avoid a nasty side effect, see
7478 * characters() in SAX.c
7480 ctxt->nodemem = 0;
7481 ctxt->nodelen = 0;
7482 return;
7488 * xmlParseEntityRef:
7489 * @ctxt: an XML parser context
7491 * DEPRECATED: Internal function, don't use.
7493 * Parse an entitiy reference. Always consumes '&'.
7495 * [68] EntityRef ::= '&' Name ';'
7497 * [ WFC: Entity Declared ]
7498 * In a document without any DTD, a document with only an internal DTD
7499 * subset which contains no parameter entity references, or a document
7500 * with "standalone='yes'", the Name given in the entity reference
7501 * must match that in an entity declaration, except that well-formed
7502 * documents need not declare any of the following entities: amp, lt,
7503 * gt, apos, quot. The declaration of a parameter entity must precede
7504 * any reference to it. Similarly, the declaration of a general entity
7505 * must precede any reference to it which appears in a default value in an
7506 * attribute-list declaration. Note that if entities are declared in the
7507 * external subset or in external parameter entities, a non-validating
7508 * processor is not obligated to read and process their declarations;
7509 * for such documents, the rule that an entity must be declared is a
7510 * well-formedness constraint only if standalone='yes'.
7512 * [ WFC: Parsed Entity ]
7513 * An entity reference must not contain the name of an unparsed entity
7515 * Returns the xmlEntityPtr if found, or NULL otherwise.
7517 xmlEntityPtr
7518 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7519 const xmlChar *name;
7520 xmlEntityPtr ent = NULL;
7522 GROW;
7523 if (ctxt->instate == XML_PARSER_EOF)
7524 return(NULL);
7526 if (RAW != '&')
7527 return(NULL);
7528 NEXT;
7529 name = xmlParseName(ctxt);
7530 if (name == NULL) {
7531 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7532 "xmlParseEntityRef: no name\n");
7533 return(NULL);
7535 if (RAW != ';') {
7536 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7537 return(NULL);
7539 NEXT;
7542 * Predefined entities override any extra definition
7544 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7545 ent = xmlGetPredefinedEntity(name);
7546 if (ent != NULL)
7547 return(ent);
7551 * Ask first SAX for entity resolution, otherwise try the
7552 * entities which may have stored in the parser context.
7554 if (ctxt->sax != NULL) {
7555 if (ctxt->sax->getEntity != NULL)
7556 ent = ctxt->sax->getEntity(ctxt->userData, name);
7557 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7558 (ctxt->options & XML_PARSE_OLDSAX))
7559 ent = xmlGetPredefinedEntity(name);
7560 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7561 (ctxt->userData==ctxt)) {
7562 ent = xmlSAX2GetEntity(ctxt, name);
7565 if (ctxt->instate == XML_PARSER_EOF)
7566 return(NULL);
7568 * [ WFC: Entity Declared ]
7569 * In a document without any DTD, a document with only an
7570 * internal DTD subset which contains no parameter entity
7571 * references, or a document with "standalone='yes'", the
7572 * Name given in the entity reference must match that in an
7573 * entity declaration, except that well-formed documents
7574 * need not declare any of the following entities: amp, lt,
7575 * gt, apos, quot.
7576 * The declaration of a parameter entity must precede any
7577 * reference to it.
7578 * Similarly, the declaration of a general entity must
7579 * precede any reference to it which appears in a default
7580 * value in an attribute-list declaration. Note that if
7581 * entities are declared in the external subset or in
7582 * external parameter entities, a non-validating processor
7583 * is not obligated to read and process their declarations;
7584 * for such documents, the rule that an entity must be
7585 * declared is a well-formedness constraint only if
7586 * standalone='yes'.
7588 if (ent == NULL) {
7589 if ((ctxt->standalone == 1) ||
7590 ((ctxt->hasExternalSubset == 0) &&
7591 (ctxt->hasPErefs == 0))) {
7592 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7593 "Entity '%s' not defined\n", name);
7594 } else {
7595 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7596 "Entity '%s' not defined\n", name);
7597 if ((ctxt->inSubset == 0) &&
7598 (ctxt->sax != NULL) &&
7599 (ctxt->sax->reference != NULL)) {
7600 ctxt->sax->reference(ctxt->userData, name);
7603 ctxt->valid = 0;
7607 * [ WFC: Parsed Entity ]
7608 * An entity reference must not contain the name of an
7609 * unparsed entity
7611 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7612 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7613 "Entity reference to unparsed entity %s\n", name);
7617 * [ WFC: No External Entity References ]
7618 * Attribute values cannot contain direct or indirect
7619 * entity references to external entities.
7621 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7622 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7623 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7624 "Attribute references external entity '%s'\n", name);
7627 * [ WFC: No < in Attribute Values ]
7628 * The replacement text of any entity referred to directly or
7629 * indirectly in an attribute value (other than "&lt;") must
7630 * not contain a <.
7632 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7633 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7634 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7635 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7636 ent->flags |= XML_ENT_CONTAINS_LT;
7637 ent->flags |= XML_ENT_CHECKED_LT;
7639 if (ent->flags & XML_ENT_CONTAINS_LT)
7640 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7641 "'<' in entity '%s' is not allowed in attributes "
7642 "values\n", name);
7646 * Internal check, no parameter entities here ...
7648 else {
7649 switch (ent->etype) {
7650 case XML_INTERNAL_PARAMETER_ENTITY:
7651 case XML_EXTERNAL_PARAMETER_ENTITY:
7652 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7653 "Attempt to reference the parameter entity '%s'\n",
7654 name);
7655 break;
7656 default:
7657 break;
7662 * [ WFC: No Recursion ]
7663 * A parsed entity must not contain a recursive reference
7664 * to itself, either directly or indirectly.
7665 * Done somewhere else
7667 return(ent);
7671 * xmlParseStringEntityRef:
7672 * @ctxt: an XML parser context
7673 * @str: a pointer to an index in the string
7675 * parse ENTITY references declarations, but this version parses it from
7676 * a string value.
7678 * [68] EntityRef ::= '&' Name ';'
7680 * [ WFC: Entity Declared ]
7681 * In a document without any DTD, a document with only an internal DTD
7682 * subset which contains no parameter entity references, or a document
7683 * with "standalone='yes'", the Name given in the entity reference
7684 * must match that in an entity declaration, except that well-formed
7685 * documents need not declare any of the following entities: amp, lt,
7686 * gt, apos, quot. The declaration of a parameter entity must precede
7687 * any reference to it. Similarly, the declaration of a general entity
7688 * must precede any reference to it which appears in a default value in an
7689 * attribute-list declaration. Note that if entities are declared in the
7690 * external subset or in external parameter entities, a non-validating
7691 * processor is not obligated to read and process their declarations;
7692 * for such documents, the rule that an entity must be declared is a
7693 * well-formedness constraint only if standalone='yes'.
7695 * [ WFC: Parsed Entity ]
7696 * An entity reference must not contain the name of an unparsed entity
7698 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7699 * is updated to the current location in the string.
7701 static xmlEntityPtr
7702 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7703 xmlChar *name;
7704 const xmlChar *ptr;
7705 xmlChar cur;
7706 xmlEntityPtr ent = NULL;
7708 if ((str == NULL) || (*str == NULL))
7709 return(NULL);
7710 ptr = *str;
7711 cur = *ptr;
7712 if (cur != '&')
7713 return(NULL);
7715 ptr++;
7716 name = xmlParseStringName(ctxt, &ptr);
7717 if (name == NULL) {
7718 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7719 "xmlParseStringEntityRef: no name\n");
7720 *str = ptr;
7721 return(NULL);
7723 if (*ptr != ';') {
7724 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7725 xmlFree(name);
7726 *str = ptr;
7727 return(NULL);
7729 ptr++;
7733 * Predefined entities override any extra definition
7735 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7736 ent = xmlGetPredefinedEntity(name);
7737 if (ent != NULL) {
7738 xmlFree(name);
7739 *str = ptr;
7740 return(ent);
7745 * Ask first SAX for entity resolution, otherwise try the
7746 * entities which may have stored in the parser context.
7748 if (ctxt->sax != NULL) {
7749 if (ctxt->sax->getEntity != NULL)
7750 ent = ctxt->sax->getEntity(ctxt->userData, name);
7751 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7752 ent = xmlGetPredefinedEntity(name);
7753 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7754 ent = xmlSAX2GetEntity(ctxt, name);
7757 if (ctxt->instate == XML_PARSER_EOF) {
7758 xmlFree(name);
7759 return(NULL);
7763 * [ WFC: Entity Declared ]
7764 * In a document without any DTD, a document with only an
7765 * internal DTD subset which contains no parameter entity
7766 * references, or a document with "standalone='yes'", the
7767 * Name given in the entity reference must match that in an
7768 * entity declaration, except that well-formed documents
7769 * need not declare any of the following entities: amp, lt,
7770 * gt, apos, quot.
7771 * The declaration of a parameter entity must precede any
7772 * reference to it.
7773 * Similarly, the declaration of a general entity must
7774 * precede any reference to it which appears in a default
7775 * value in an attribute-list declaration. Note that if
7776 * entities are declared in the external subset or in
7777 * external parameter entities, a non-validating processor
7778 * is not obligated to read and process their declarations;
7779 * for such documents, the rule that an entity must be
7780 * declared is a well-formedness constraint only if
7781 * standalone='yes'.
7783 if (ent == NULL) {
7784 if ((ctxt->standalone == 1) ||
7785 ((ctxt->hasExternalSubset == 0) &&
7786 (ctxt->hasPErefs == 0))) {
7787 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7788 "Entity '%s' not defined\n", name);
7789 } else {
7790 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7791 "Entity '%s' not defined\n",
7792 name);
7794 /* TODO ? check regressions ctxt->valid = 0; */
7798 * [ WFC: Parsed Entity ]
7799 * An entity reference must not contain the name of an
7800 * unparsed entity
7802 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7803 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7804 "Entity reference to unparsed entity %s\n", name);
7808 * [ WFC: No External Entity References ]
7809 * Attribute values cannot contain direct or indirect
7810 * entity references to external entities.
7812 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7813 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7814 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7815 "Attribute references external entity '%s'\n", name);
7818 * [ WFC: No < in Attribute Values ]
7819 * The replacement text of any entity referred to directly or
7820 * indirectly in an attribute value (other than "&lt;") must
7821 * not contain a <.
7823 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7824 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7825 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7826 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7827 ent->flags |= XML_ENT_CONTAINS_LT;
7828 ent->flags |= XML_ENT_CHECKED_LT;
7830 if (ent->flags & XML_ENT_CONTAINS_LT)
7831 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7832 "'<' in entity '%s' is not allowed in attributes "
7833 "values\n", name);
7837 * Internal check, no parameter entities here ...
7839 else {
7840 switch (ent->etype) {
7841 case XML_INTERNAL_PARAMETER_ENTITY:
7842 case XML_EXTERNAL_PARAMETER_ENTITY:
7843 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7844 "Attempt to reference the parameter entity '%s'\n",
7845 name);
7846 break;
7847 default:
7848 break;
7853 * [ WFC: No Recursion ]
7854 * A parsed entity must not contain a recursive reference
7855 * to itself, either directly or indirectly.
7856 * Done somewhere else
7859 xmlFree(name);
7860 *str = ptr;
7861 return(ent);
7865 * xmlParsePEReference:
7866 * @ctxt: an XML parser context
7868 * DEPRECATED: Internal function, don't use.
7870 * Parse a parameter entity reference. Always consumes '%'.
7872 * The entity content is handled directly by pushing it's content as
7873 * a new input stream.
7875 * [69] PEReference ::= '%' Name ';'
7877 * [ WFC: No Recursion ]
7878 * A parsed entity must not contain a recursive
7879 * reference to itself, either directly or indirectly.
7881 * [ WFC: Entity Declared ]
7882 * In a document without any DTD, a document with only an internal DTD
7883 * subset which contains no parameter entity references, or a document
7884 * with "standalone='yes'", ... ... The declaration of a parameter
7885 * entity must precede any reference to it...
7887 * [ VC: Entity Declared ]
7888 * In a document with an external subset or external parameter entities
7889 * with "standalone='no'", ... ... The declaration of a parameter entity
7890 * must precede any reference to it...
7892 * [ WFC: In DTD ]
7893 * Parameter-entity references may only appear in the DTD.
7894 * NOTE: misleading but this is handled.
7896 void
7897 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7899 const xmlChar *name;
7900 xmlEntityPtr entity = NULL;
7901 xmlParserInputPtr input;
7903 if (RAW != '%')
7904 return;
7905 NEXT;
7906 name = xmlParseName(ctxt);
7907 if (name == NULL) {
7908 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7909 return;
7911 if (xmlParserDebugEntities)
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PEReference: %s\n", name);
7914 if (RAW != ';') {
7915 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7916 return;
7919 NEXT;
7922 * Request the entity from SAX
7924 if ((ctxt->sax != NULL) &&
7925 (ctxt->sax->getParameterEntity != NULL))
7926 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7927 if (ctxt->instate == XML_PARSER_EOF)
7928 return;
7929 if (entity == NULL) {
7931 * [ WFC: Entity Declared ]
7932 * In a document without any DTD, a document with only an
7933 * internal DTD subset which contains no parameter entity
7934 * references, or a document with "standalone='yes'", ...
7935 * ... The declaration of a parameter entity must precede
7936 * any reference to it...
7938 if ((ctxt->standalone == 1) ||
7939 ((ctxt->hasExternalSubset == 0) &&
7940 (ctxt->hasPErefs == 0))) {
7941 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7942 "PEReference: %%%s; not found\n",
7943 name);
7944 } else {
7946 * [ VC: Entity Declared ]
7947 * In a document with an external subset or external
7948 * parameter entities with "standalone='no'", ...
7949 * ... The declaration of a parameter entity must
7950 * precede any reference to it...
7952 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7953 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7954 "PEReference: %%%s; not found\n",
7955 name, NULL);
7956 } else
7957 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7958 "PEReference: %%%s; not found\n",
7959 name, NULL);
7960 ctxt->valid = 0;
7962 } else {
7964 * Internal checking in case the entity quest barfed
7966 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7967 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7968 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7969 "Internal: %%%s; is not a parameter entity\n",
7970 name, NULL);
7971 } else {
7972 xmlChar start[4];
7973 xmlCharEncoding enc;
7974 unsigned long parentConsumed;
7975 xmlEntityPtr oldEnt;
7977 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7978 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7979 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7980 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7981 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7982 (ctxt->replaceEntities == 0) &&
7983 (ctxt->validate == 0))
7984 return;
7986 if (entity->flags & XML_ENT_EXPANDING) {
7987 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7988 xmlHaltParser(ctxt);
7989 return;
7992 /* Must be computed from old input before pushing new input. */
7993 parentConsumed = ctxt->input->parentConsumed;
7994 oldEnt = ctxt->input->entity;
7995 if ((oldEnt == NULL) ||
7996 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7997 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7998 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7999 xmlSaturatedAddSizeT(&parentConsumed,
8000 ctxt->input->cur - ctxt->input->base);
8003 input = xmlNewEntityInputStream(ctxt, entity);
8004 if (xmlPushInput(ctxt, input) < 0) {
8005 xmlFreeInputStream(input);
8006 return;
8009 entity->flags |= XML_ENT_EXPANDING;
8011 input->parentConsumed = parentConsumed;
8013 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8015 * Get the 4 first bytes and decode the charset
8016 * if enc != XML_CHAR_ENCODING_NONE
8017 * plug some encoding conversion routines.
8018 * Note that, since we may have some non-UTF8
8019 * encoding (like UTF16, bug 135229), the 'length'
8020 * is not known, but we can calculate based upon
8021 * the amount of data in the buffer.
8023 GROW
8024 if (ctxt->instate == XML_PARSER_EOF)
8025 return;
8026 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8027 start[0] = RAW;
8028 start[1] = NXT(1);
8029 start[2] = NXT(2);
8030 start[3] = NXT(3);
8031 enc = xmlDetectCharEncoding(start, 4);
8032 if (enc != XML_CHAR_ENCODING_NONE) {
8033 xmlSwitchEncoding(ctxt, enc);
8037 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8038 (IS_BLANK_CH(NXT(5)))) {
8039 xmlParseTextDecl(ctxt);
8044 ctxt->hasPErefs = 1;
8048 * xmlLoadEntityContent:
8049 * @ctxt: an XML parser context
8050 * @entity: an unloaded system entity
8052 * Load the original content of the given system entity from the
8053 * ExternalID/SystemID given. This is to be used for Included in Literal
8054 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056 * Returns 0 in case of success and -1 in case of failure
8058 static int
8059 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8060 xmlParserInputPtr input;
8061 xmlBufferPtr buf;
8062 int l, c;
8064 if ((ctxt == NULL) || (entity == NULL) ||
8065 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8066 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8067 (entity->content != NULL)) {
8068 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8069 "xmlLoadEntityContent parameter error");
8070 return(-1);
8073 if (xmlParserDebugEntities)
8074 xmlGenericError(xmlGenericErrorContext,
8075 "Reading %s entity content input\n", entity->name);
8077 buf = xmlBufferCreate();
8078 if (buf == NULL) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlLoadEntityContent parameter error");
8081 return(-1);
8083 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8085 input = xmlNewEntityInputStream(ctxt, entity);
8086 if (input == NULL) {
8087 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8088 "xmlLoadEntityContent input error");
8089 xmlBufferFree(buf);
8090 return(-1);
8094 * Push the entity as the current input, read char by char
8095 * saving to the buffer until the end of the entity or an error
8097 if (xmlPushInput(ctxt, input) < 0) {
8098 xmlBufferFree(buf);
8099 xmlFreeInputStream(input);
8100 return(-1);
8103 GROW;
8104 c = CUR_CHAR(l);
8105 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8106 (IS_CHAR(c))) {
8107 xmlBufferAdd(buf, ctxt->input->cur, l);
8108 NEXTL(l);
8109 c = CUR_CHAR(l);
8111 if (ctxt->instate == XML_PARSER_EOF) {
8112 xmlBufferFree(buf);
8113 return(-1);
8116 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8117 xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8118 xmlPopInput(ctxt);
8119 } else if (!IS_CHAR(c)) {
8120 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8121 "xmlLoadEntityContent: invalid char value %d\n",
8123 xmlBufferFree(buf);
8124 return(-1);
8126 entity->content = buf->content;
8127 entity->length = buf->use;
8128 buf->content = NULL;
8129 xmlBufferFree(buf);
8131 return(0);
8135 * xmlParseStringPEReference:
8136 * @ctxt: an XML parser context
8137 * @str: a pointer to an index in the string
8139 * parse PEReference declarations
8141 * [69] PEReference ::= '%' Name ';'
8143 * [ WFC: No Recursion ]
8144 * A parsed entity must not contain a recursive
8145 * reference to itself, either directly or indirectly.
8147 * [ WFC: Entity Declared ]
8148 * In a document without any DTD, a document with only an internal DTD
8149 * subset which contains no parameter entity references, or a document
8150 * with "standalone='yes'", ... ... The declaration of a parameter
8151 * entity must precede any reference to it...
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external parameter entities
8155 * with "standalone='no'", ... ... The declaration of a parameter entity
8156 * must precede any reference to it...
8158 * [ WFC: In DTD ]
8159 * Parameter-entity references may only appear in the DTD.
8160 * NOTE: misleading but this is handled.
8162 * Returns the string of the entity content.
8163 * str is updated to the current value of the index
8165 static xmlEntityPtr
8166 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8167 const xmlChar *ptr;
8168 xmlChar cur;
8169 xmlChar *name;
8170 xmlEntityPtr entity = NULL;
8172 if ((str == NULL) || (*str == NULL)) return(NULL);
8173 ptr = *str;
8174 cur = *ptr;
8175 if (cur != '%')
8176 return(NULL);
8177 ptr++;
8178 name = xmlParseStringName(ctxt, &ptr);
8179 if (name == NULL) {
8180 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8181 "xmlParseStringPEReference: no name\n");
8182 *str = ptr;
8183 return(NULL);
8185 cur = *ptr;
8186 if (cur != ';') {
8187 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8188 xmlFree(name);
8189 *str = ptr;
8190 return(NULL);
8192 ptr++;
8195 * Request the entity from SAX
8197 if ((ctxt->sax != NULL) &&
8198 (ctxt->sax->getParameterEntity != NULL))
8199 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8200 if (ctxt->instate == XML_PARSER_EOF) {
8201 xmlFree(name);
8202 *str = ptr;
8203 return(NULL);
8205 if (entity == NULL) {
8207 * [ WFC: Entity Declared ]
8208 * In a document without any DTD, a document with only an
8209 * internal DTD subset which contains no parameter entity
8210 * references, or a document with "standalone='yes'", ...
8211 * ... The declaration of a parameter entity must precede
8212 * any reference to it...
8214 if ((ctxt->standalone == 1) ||
8215 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8216 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8217 "PEReference: %%%s; not found\n", name);
8218 } else {
8220 * [ VC: Entity Declared ]
8221 * In a document with an external subset or external
8222 * parameter entities with "standalone='no'", ...
8223 * ... The declaration of a parameter entity must
8224 * precede any reference to it...
8226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8227 "PEReference: %%%s; not found\n",
8228 name, NULL);
8229 ctxt->valid = 0;
8231 } else {
8233 * Internal checking in case the entity quest barfed
8235 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8236 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8237 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8238 "%%%s; is not a parameter entity\n",
8239 name, NULL);
8242 ctxt->hasPErefs = 1;
8243 xmlFree(name);
8244 *str = ptr;
8245 return(entity);
8249 * xmlParseDocTypeDecl:
8250 * @ctxt: an XML parser context
8252 * DEPRECATED: Internal function, don't use.
8254 * parse a DOCTYPE declaration
8256 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8257 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8259 * [ VC: Root Element Type ]
8260 * The Name in the document type declaration must match the element
8261 * type of the root element.
8264 void
8265 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8266 const xmlChar *name = NULL;
8267 xmlChar *ExternalID = NULL;
8268 xmlChar *URI = NULL;
8271 * We know that '<!DOCTYPE' has been detected.
8273 SKIP(9);
8275 SKIP_BLANKS;
8278 * Parse the DOCTYPE name.
8280 name = xmlParseName(ctxt);
8281 if (name == NULL) {
8282 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8283 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8285 ctxt->intSubName = name;
8287 SKIP_BLANKS;
8290 * Check for SystemID and ExternalID
8292 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8294 if ((URI != NULL) || (ExternalID != NULL)) {
8295 ctxt->hasExternalSubset = 1;
8297 ctxt->extSubURI = URI;
8298 ctxt->extSubSystem = ExternalID;
8300 SKIP_BLANKS;
8303 * Create and update the internal subset.
8305 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8306 (!ctxt->disableSAX))
8307 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8308 if (ctxt->instate == XML_PARSER_EOF)
8309 return;
8312 * Is there any internal subset declarations ?
8313 * they are handled separately in xmlParseInternalSubset()
8315 if (RAW == '[')
8316 return;
8319 * We should be at the end of the DOCTYPE declaration.
8321 if (RAW != '>') {
8322 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8324 NEXT;
8328 * xmlParseInternalSubset:
8329 * @ctxt: an XML parser context
8331 * parse the internal subset declaration
8333 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8336 static void
8337 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8339 * Is there any DTD definition ?
8341 if (RAW == '[') {
8342 int baseInputNr = ctxt->inputNr;
8343 ctxt->instate = XML_PARSER_DTD;
8344 NEXT;
8346 * Parse the succession of Markup declarations and
8347 * PEReferences.
8348 * Subsequence (markupdecl | PEReference | S)*
8350 SKIP_BLANKS;
8351 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8352 (ctxt->instate != XML_PARSER_EOF)) {
8355 * Conditional sections are allowed from external entities included
8356 * by PE References in the internal subset.
8358 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8359 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8360 xmlParseConditionalSections(ctxt);
8361 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8362 xmlParseMarkupDecl(ctxt);
8363 } else if (RAW == '%') {
8364 xmlParsePEReference(ctxt);
8365 } else {
8366 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8367 "xmlParseInternalSubset: error detected in"
8368 " Markup declaration\n");
8369 xmlHaltParser(ctxt);
8370 return;
8372 SKIP_BLANKS;
8373 SHRINK;
8374 GROW;
8376 if (RAW == ']') {
8377 NEXT;
8378 SKIP_BLANKS;
8383 * We should be at the end of the DOCTYPE declaration.
8385 if (RAW != '>') {
8386 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8387 return;
8389 NEXT;
8392 #ifdef LIBXML_SAX1_ENABLED
8394 * xmlParseAttribute:
8395 * @ctxt: an XML parser context
8396 * @value: a xmlChar ** used to store the value of the attribute
8398 * DEPRECATED: Internal function, don't use.
8400 * parse an attribute
8402 * [41] Attribute ::= Name Eq AttValue
8404 * [ WFC: No External Entity References ]
8405 * Attribute values cannot contain direct or indirect entity references
8406 * to external entities.
8408 * [ WFC: No < in Attribute Values ]
8409 * The replacement text of any entity referred to directly or indirectly in
8410 * an attribute value (other than "&lt;") must not contain a <.
8412 * [ VC: Attribute Value Type ]
8413 * The attribute must have been declared; the value must be of the type
8414 * declared for it.
8416 * [25] Eq ::= S? '=' S?
8418 * With namespace:
8420 * [NS 11] Attribute ::= QName Eq AttValue
8422 * Also the case QName == xmlns:??? is handled independently as a namespace
8423 * definition.
8425 * Returns the attribute name, and the value in *value.
8428 const xmlChar *
8429 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8430 const xmlChar *name;
8431 xmlChar *val;
8433 *value = NULL;
8434 GROW;
8435 name = xmlParseName(ctxt);
8436 if (name == NULL) {
8437 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8438 "error parsing attribute name\n");
8439 return(NULL);
8443 * read the value
8445 SKIP_BLANKS;
8446 if (RAW == '=') {
8447 NEXT;
8448 SKIP_BLANKS;
8449 val = xmlParseAttValue(ctxt);
8450 ctxt->instate = XML_PARSER_CONTENT;
8451 } else {
8452 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8453 "Specification mandates value for attribute %s\n", name);
8454 return(name);
8458 * Check that xml:lang conforms to the specification
8459 * No more registered as an error, just generate a warning now
8460 * since this was deprecated in XML second edition
8462 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8463 if (!xmlCheckLanguageID(val)) {
8464 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8465 "Malformed value for xml:lang : %s\n",
8466 val, NULL);
8471 * Check that xml:space conforms to the specification
8473 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8474 if (xmlStrEqual(val, BAD_CAST "default"))
8475 *(ctxt->space) = 0;
8476 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8477 *(ctxt->space) = 1;
8478 else {
8479 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8480 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8481 val, NULL);
8485 *value = val;
8486 return(name);
8490 * xmlParseStartTag:
8491 * @ctxt: an XML parser context
8493 * DEPRECATED: Internal function, don't use.
8495 * Parse a start tag. Always consumes '<'.
8497 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8499 * [ WFC: Unique Att Spec ]
8500 * No attribute name may appear more than once in the same start-tag or
8501 * empty-element tag.
8503 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8505 * [ WFC: Unique Att Spec ]
8506 * No attribute name may appear more than once in the same start-tag or
8507 * empty-element tag.
8509 * With namespace:
8511 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8513 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8515 * Returns the element name parsed
8518 const xmlChar *
8519 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8520 const xmlChar *name;
8521 const xmlChar *attname;
8522 xmlChar *attvalue;
8523 const xmlChar **atts = ctxt->atts;
8524 int nbatts = 0;
8525 int maxatts = ctxt->maxatts;
8526 int i;
8528 if (RAW != '<') return(NULL);
8529 NEXT1;
8531 name = xmlParseName(ctxt);
8532 if (name == NULL) {
8533 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8534 "xmlParseStartTag: invalid element name\n");
8535 return(NULL);
8539 * Now parse the attributes, it ends up with the ending
8541 * (S Attribute)* S?
8543 SKIP_BLANKS;
8544 GROW;
8546 while (((RAW != '>') &&
8547 ((RAW != '/') || (NXT(1) != '>')) &&
8548 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8549 attname = xmlParseAttribute(ctxt, &attvalue);
8550 if (attname == NULL) {
8551 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8552 "xmlParseStartTag: problem parsing attributes\n");
8553 break;
8555 if (attvalue != NULL) {
8557 * [ WFC: Unique Att Spec ]
8558 * No attribute name may appear more than once in the same
8559 * start-tag or empty-element tag.
8561 for (i = 0; i < nbatts;i += 2) {
8562 if (xmlStrEqual(atts[i], attname)) {
8563 xmlErrAttributeDup(ctxt, NULL, attname);
8564 xmlFree(attvalue);
8565 goto failed;
8569 * Add the pair to atts
8571 if (atts == NULL) {
8572 maxatts = 22; /* allow for 10 attrs by default */
8573 atts = (const xmlChar **)
8574 xmlMalloc(maxatts * sizeof(xmlChar *));
8575 if (atts == NULL) {
8576 xmlErrMemory(ctxt, NULL);
8577 if (attvalue != NULL)
8578 xmlFree(attvalue);
8579 goto failed;
8581 ctxt->atts = atts;
8582 ctxt->maxatts = maxatts;
8583 } else if (nbatts + 4 > maxatts) {
8584 const xmlChar **n;
8586 maxatts *= 2;
8587 n = (const xmlChar **) xmlRealloc((void *) atts,
8588 maxatts * sizeof(const xmlChar *));
8589 if (n == NULL) {
8590 xmlErrMemory(ctxt, NULL);
8591 if (attvalue != NULL)
8592 xmlFree(attvalue);
8593 goto failed;
8595 atts = n;
8596 ctxt->atts = atts;
8597 ctxt->maxatts = maxatts;
8599 atts[nbatts++] = attname;
8600 atts[nbatts++] = attvalue;
8601 atts[nbatts] = NULL;
8602 atts[nbatts + 1] = NULL;
8603 } else {
8604 if (attvalue != NULL)
8605 xmlFree(attvalue);
8608 failed:
8610 GROW
8611 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8612 break;
8613 if (SKIP_BLANKS == 0) {
8614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8615 "attributes construct error\n");
8617 SHRINK;
8618 GROW;
8622 * SAX: Start of Element !
8624 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8625 (!ctxt->disableSAX)) {
8626 if (nbatts > 0)
8627 ctxt->sax->startElement(ctxt->userData, name, atts);
8628 else
8629 ctxt->sax->startElement(ctxt->userData, name, NULL);
8632 if (atts != NULL) {
8633 /* Free only the content strings */
8634 for (i = 1;i < nbatts;i+=2)
8635 if (atts[i] != NULL)
8636 xmlFree((xmlChar *) atts[i]);
8638 return(name);
8642 * xmlParseEndTag1:
8643 * @ctxt: an XML parser context
8644 * @line: line of the start tag
8645 * @nsNr: number of namespaces on the start tag
8647 * Parse an end tag. Always consumes '</'.
8649 * [42] ETag ::= '</' Name S? '>'
8651 * With namespace
8653 * [NS 9] ETag ::= '</' QName S? '>'
8656 static void
8657 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8658 const xmlChar *name;
8660 GROW;
8661 if ((RAW != '<') || (NXT(1) != '/')) {
8662 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8663 "xmlParseEndTag: '</' not found\n");
8664 return;
8666 SKIP(2);
8668 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8671 * We should definitely be at the ending "S? '>'" part
8673 GROW;
8674 SKIP_BLANKS;
8675 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8676 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8677 } else
8678 NEXT1;
8681 * [ WFC: Element Type Match ]
8682 * The Name in an element's end-tag must match the element type in the
8683 * start-tag.
8686 if (name != (xmlChar*)1) {
8687 if (name == NULL) name = BAD_CAST "unparsable";
8688 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8689 "Opening and ending tag mismatch: %s line %d and %s\n",
8690 ctxt->name, line, name);
8694 * SAX: End of Tag
8696 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8697 (!ctxt->disableSAX))
8698 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8700 namePop(ctxt);
8701 spacePop(ctxt);
8702 return;
8706 * xmlParseEndTag:
8707 * @ctxt: an XML parser context
8709 * DEPRECATED: Internal function, don't use.
8711 * parse an end of tag
8713 * [42] ETag ::= '</' Name S? '>'
8715 * With namespace
8717 * [NS 9] ETag ::= '</' QName S? '>'
8720 void
8721 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8722 xmlParseEndTag1(ctxt, 0);
8724 #endif /* LIBXML_SAX1_ENABLED */
8726 /************************************************************************
8728 * SAX 2 specific operations *
8730 ************************************************************************/
8733 * xmlGetNamespace:
8734 * @ctxt: an XML parser context
8735 * @prefix: the prefix to lookup
8737 * Lookup the namespace name for the @prefix (which ca be NULL)
8738 * The prefix must come from the @ctxt->dict dictionary
8740 * Returns the namespace name or NULL if not bound
8742 static const xmlChar *
8743 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8744 int i;
8746 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8747 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8748 if (ctxt->nsTab[i] == prefix) {
8749 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8750 return(NULL);
8751 return(ctxt->nsTab[i + 1]);
8753 return(NULL);
8757 * xmlParseQName:
8758 * @ctxt: an XML parser context
8759 * @prefix: pointer to store the prefix part
8761 * parse an XML Namespace QName
8763 * [6] QName ::= (Prefix ':')? LocalPart
8764 * [7] Prefix ::= NCName
8765 * [8] LocalPart ::= NCName
8767 * Returns the Name parsed or NULL
8770 static const xmlChar *
8771 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8772 const xmlChar *l, *p;
8774 GROW;
8775 if (ctxt->instate == XML_PARSER_EOF)
8776 return(NULL);
8778 l = xmlParseNCName(ctxt);
8779 if (l == NULL) {
8780 if (CUR == ':') {
8781 l = xmlParseName(ctxt);
8782 if (l != NULL) {
8783 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8784 "Failed to parse QName '%s'\n", l, NULL, NULL);
8785 *prefix = NULL;
8786 return(l);
8789 return(NULL);
8791 if (CUR == ':') {
8792 NEXT;
8793 p = l;
8794 l = xmlParseNCName(ctxt);
8795 if (l == NULL) {
8796 xmlChar *tmp;
8798 if (ctxt->instate == XML_PARSER_EOF)
8799 return(NULL);
8800 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8801 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8802 l = xmlParseNmtoken(ctxt);
8803 if (l == NULL) {
8804 if (ctxt->instate == XML_PARSER_EOF)
8805 return(NULL);
8806 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8807 } else {
8808 tmp = xmlBuildQName(l, p, NULL, 0);
8809 xmlFree((char *)l);
8811 p = xmlDictLookup(ctxt->dict, tmp, -1);
8812 if (tmp != NULL) xmlFree(tmp);
8813 *prefix = NULL;
8814 return(p);
8816 if (CUR == ':') {
8817 xmlChar *tmp;
8819 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8820 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8821 NEXT;
8822 tmp = (xmlChar *) xmlParseName(ctxt);
8823 if (tmp != NULL) {
8824 tmp = xmlBuildQName(tmp, l, NULL, 0);
8825 l = xmlDictLookup(ctxt->dict, tmp, -1);
8826 if (tmp != NULL) xmlFree(tmp);
8827 *prefix = p;
8828 return(l);
8830 if (ctxt->instate == XML_PARSER_EOF)
8831 return(NULL);
8832 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8833 l = xmlDictLookup(ctxt->dict, tmp, -1);
8834 if (tmp != NULL) xmlFree(tmp);
8835 *prefix = p;
8836 return(l);
8838 *prefix = p;
8839 } else
8840 *prefix = NULL;
8841 return(l);
8845 * xmlParseQNameAndCompare:
8846 * @ctxt: an XML parser context
8847 * @name: the localname
8848 * @prefix: the prefix, if any.
8850 * parse an XML name and compares for match
8851 * (specialized for endtag parsing)
8853 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8854 * and the name for mismatch
8857 static const xmlChar *
8858 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8859 xmlChar const *prefix) {
8860 const xmlChar *cmp;
8861 const xmlChar *in;
8862 const xmlChar *ret;
8863 const xmlChar *prefix2;
8865 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8867 GROW;
8868 in = ctxt->input->cur;
8870 cmp = prefix;
8871 while (*in != 0 && *in == *cmp) {
8872 ++in;
8873 ++cmp;
8875 if ((*cmp == 0) && (*in == ':')) {
8876 in++;
8877 cmp = name;
8878 while (*in != 0 && *in == *cmp) {
8879 ++in;
8880 ++cmp;
8882 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8883 /* success */
8884 ctxt->input->col += in - ctxt->input->cur;
8885 ctxt->input->cur = in;
8886 return((const xmlChar*) 1);
8890 * all strings coms from the dictionary, equality can be done directly
8892 ret = xmlParseQName (ctxt, &prefix2);
8893 if ((ret == name) && (prefix == prefix2))
8894 return((const xmlChar*) 1);
8895 return ret;
8899 * xmlParseAttValueInternal:
8900 * @ctxt: an XML parser context
8901 * @len: attribute len result
8902 * @alloc: whether the attribute was reallocated as a new string
8903 * @normalize: if 1 then further non-CDATA normalization must be done
8905 * parse a value for an attribute.
8906 * NOTE: if no normalization is needed, the routine will return pointers
8907 * directly from the data buffer.
8909 * 3.3.3 Attribute-Value Normalization:
8910 * Before the value of an attribute is passed to the application or
8911 * checked for validity, the XML processor must normalize it as follows:
8912 * - a character reference is processed by appending the referenced
8913 * character to the attribute value
8914 * - an entity reference is processed by recursively processing the
8915 * replacement text of the entity
8916 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8917 * appending #x20 to the normalized value, except that only a single
8918 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8919 * parsed entity or the literal entity value of an internal parsed entity
8920 * - other characters are processed by appending them to the normalized value
8921 * If the declared value is not CDATA, then the XML processor must further
8922 * process the normalized attribute value by discarding any leading and
8923 * trailing space (#x20) characters, and by replacing sequences of space
8924 * (#x20) characters by a single space (#x20) character.
8925 * All attributes for which no declaration has been read should be treated
8926 * by a non-validating parser as if declared CDATA.
8928 * Returns the AttValue parsed or NULL. The value has to be freed by the
8929 * caller if it was copied, this can be detected by val[*len] == 0.
8932 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8933 const xmlChar *oldbase = ctxt->input->base;\
8934 GROW;\
8935 if (ctxt->instate == XML_PARSER_EOF)\
8936 return(NULL);\
8937 if (oldbase != ctxt->input->base) {\
8938 ptrdiff_t delta = ctxt->input->base - oldbase;\
8939 start = start + delta;\
8940 in = in + delta;\
8942 end = ctxt->input->end;
8944 static xmlChar *
8945 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8946 int normalize)
8948 xmlChar limit = 0;
8949 const xmlChar *in = NULL, *start, *end, *last;
8950 xmlChar *ret = NULL;
8951 int line, col;
8952 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8953 XML_MAX_HUGE_LENGTH :
8954 XML_MAX_TEXT_LENGTH;
8956 GROW;
8957 in = (xmlChar *) CUR_PTR;
8958 line = ctxt->input->line;
8959 col = ctxt->input->col;
8960 if (*in != '"' && *in != '\'') {
8961 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8962 return (NULL);
8964 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8967 * try to handle in this routine the most common case where no
8968 * allocation of a new string is required and where content is
8969 * pure ASCII.
8971 limit = *in++;
8972 col++;
8973 end = ctxt->input->end;
8974 start = in;
8975 if (in >= end) {
8976 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8978 if (normalize) {
8980 * Skip any leading spaces
8982 while ((in < end) && (*in != limit) &&
8983 ((*in == 0x20) || (*in == 0x9) ||
8984 (*in == 0xA) || (*in == 0xD))) {
8985 if (*in == 0xA) {
8986 line++; col = 1;
8987 } else {
8988 col++;
8990 in++;
8991 start = in;
8992 if (in >= end) {
8993 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8994 if ((in - start) > maxLength) {
8995 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8996 "AttValue length too long\n");
8997 return(NULL);
9001 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9002 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9003 col++;
9004 if ((*in++ == 0x20) && (*in == 0x20)) break;
9005 if (in >= end) {
9006 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9007 if ((in - start) > maxLength) {
9008 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009 "AttValue length too long\n");
9010 return(NULL);
9014 last = in;
9016 * skip the trailing blanks
9018 while ((last[-1] == 0x20) && (last > start)) last--;
9019 while ((in < end) && (*in != limit) &&
9020 ((*in == 0x20) || (*in == 0x9) ||
9021 (*in == 0xA) || (*in == 0xD))) {
9022 if (*in == 0xA) {
9023 line++, col = 1;
9024 } else {
9025 col++;
9027 in++;
9028 if (in >= end) {
9029 const xmlChar *oldbase = ctxt->input->base;
9030 GROW;
9031 if (ctxt->instate == XML_PARSER_EOF)
9032 return(NULL);
9033 if (oldbase != ctxt->input->base) {
9034 ptrdiff_t delta = ctxt->input->base - oldbase;
9035 start = start + delta;
9036 in = in + delta;
9037 last = last + delta;
9039 end = ctxt->input->end;
9040 if ((in - start) > maxLength) {
9041 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9042 "AttValue length too long\n");
9043 return(NULL);
9047 if ((in - start) > maxLength) {
9048 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9049 "AttValue length too long\n");
9050 return(NULL);
9052 if (*in != limit) goto need_complex;
9053 } else {
9054 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9055 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9056 in++;
9057 col++;
9058 if (in >= end) {
9059 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9060 if ((in - start) > maxLength) {
9061 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9062 "AttValue length too long\n");
9063 return(NULL);
9067 last = in;
9068 if ((in - start) > maxLength) {
9069 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9070 "AttValue length too long\n");
9071 return(NULL);
9073 if (*in != limit) goto need_complex;
9075 in++;
9076 col++;
9077 if (len != NULL) {
9078 if (alloc) *alloc = 0;
9079 *len = last - start;
9080 ret = (xmlChar *) start;
9081 } else {
9082 if (alloc) *alloc = 1;
9083 ret = xmlStrndup(start, last - start);
9085 CUR_PTR = in;
9086 ctxt->input->line = line;
9087 ctxt->input->col = col;
9088 return ret;
9089 need_complex:
9090 if (alloc) *alloc = 1;
9091 return xmlParseAttValueComplex(ctxt, len, normalize);
9095 * xmlParseAttribute2:
9096 * @ctxt: an XML parser context
9097 * @pref: the element prefix
9098 * @elem: the element name
9099 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9100 * @value: a xmlChar ** used to store the value of the attribute
9101 * @len: an int * to save the length of the attribute
9102 * @alloc: an int * to indicate if the attribute was allocated
9104 * parse an attribute in the new SAX2 framework.
9106 * Returns the attribute name, and the value in *value, .
9109 static const xmlChar *
9110 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9111 const xmlChar * pref, const xmlChar * elem,
9112 const xmlChar ** prefix, xmlChar ** value,
9113 int *len, int *alloc)
9115 const xmlChar *name;
9116 xmlChar *val, *internal_val = NULL;
9117 int normalize = 0;
9119 *value = NULL;
9120 GROW;
9121 name = xmlParseQName(ctxt, prefix);
9122 if (name == NULL) {
9123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9124 "error parsing attribute name\n");
9125 return (NULL);
9129 * get the type if needed
9131 if (ctxt->attsSpecial != NULL) {
9132 int type;
9134 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9135 pref, elem, *prefix, name);
9136 if (type != 0)
9137 normalize = 1;
9141 * read the value
9143 SKIP_BLANKS;
9144 if (RAW == '=') {
9145 NEXT;
9146 SKIP_BLANKS;
9147 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9148 if (val == NULL)
9149 return (NULL);
9150 if (normalize) {
9152 * Sometimes a second normalisation pass for spaces is needed
9153 * but that only happens if charrefs or entities references
9154 * have been used in the attribute value, i.e. the attribute
9155 * value have been extracted in an allocated string already.
9157 if (*alloc) {
9158 const xmlChar *val2;
9160 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9161 if ((val2 != NULL) && (val2 != val)) {
9162 xmlFree(val);
9163 val = (xmlChar *) val2;
9167 ctxt->instate = XML_PARSER_CONTENT;
9168 } else {
9169 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9170 "Specification mandates value for attribute %s\n",
9171 name);
9172 return (name);
9175 if (*prefix == ctxt->str_xml) {
9177 * Check that xml:lang conforms to the specification
9178 * No more registered as an error, just generate a warning now
9179 * since this was deprecated in XML second edition
9181 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9182 internal_val = xmlStrndup(val, *len);
9183 if (!xmlCheckLanguageID(internal_val)) {
9184 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9185 "Malformed value for xml:lang : %s\n",
9186 internal_val, NULL);
9191 * Check that xml:space conforms to the specification
9193 if (xmlStrEqual(name, BAD_CAST "space")) {
9194 internal_val = xmlStrndup(val, *len);
9195 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9196 *(ctxt->space) = 0;
9197 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9198 *(ctxt->space) = 1;
9199 else {
9200 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9201 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9202 internal_val, NULL);
9205 if (internal_val) {
9206 xmlFree(internal_val);
9210 *value = val;
9211 return (name);
9214 * xmlParseStartTag2:
9215 * @ctxt: an XML parser context
9217 * Parse a start tag. Always consumes '<'.
9219 * This routine is called when running SAX2 parsing
9221 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9223 * [ WFC: Unique Att Spec ]
9224 * No attribute name may appear more than once in the same start-tag or
9225 * empty-element tag.
9227 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9229 * [ WFC: Unique Att Spec ]
9230 * No attribute name may appear more than once in the same start-tag or
9231 * empty-element tag.
9233 * With namespace:
9235 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9237 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9239 * Returns the element name parsed
9242 static const xmlChar *
9243 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9244 const xmlChar **URI, int *tlen) {
9245 const xmlChar *localname;
9246 const xmlChar *prefix;
9247 const xmlChar *attname;
9248 const xmlChar *aprefix;
9249 const xmlChar *nsname;
9250 xmlChar *attvalue;
9251 const xmlChar **atts = ctxt->atts;
9252 int maxatts = ctxt->maxatts;
9253 int nratts, nbatts, nbdef, inputid;
9254 int i, j, nbNs, attval;
9255 size_t cur;
9256 int nsNr = ctxt->nsNr;
9258 if (RAW != '<') return(NULL);
9259 NEXT1;
9261 cur = ctxt->input->cur - ctxt->input->base;
9262 inputid = ctxt->input->id;
9263 nbatts = 0;
9264 nratts = 0;
9265 nbdef = 0;
9266 nbNs = 0;
9267 attval = 0;
9268 /* Forget any namespaces added during an earlier parse of this element. */
9269 ctxt->nsNr = nsNr;
9271 localname = xmlParseQName(ctxt, &prefix);
9272 if (localname == NULL) {
9273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9274 "StartTag: invalid element name\n");
9275 return(NULL);
9277 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9280 * Now parse the attributes, it ends up with the ending
9282 * (S Attribute)* S?
9284 SKIP_BLANKS;
9285 GROW;
9287 while (((RAW != '>') &&
9288 ((RAW != '/') || (NXT(1) != '>')) &&
9289 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9290 int len = -1, alloc = 0;
9292 attname = xmlParseAttribute2(ctxt, prefix, localname,
9293 &aprefix, &attvalue, &len, &alloc);
9294 if (attname == NULL) {
9295 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9296 "xmlParseStartTag: problem parsing attributes\n");
9297 break;
9299 if (attvalue == NULL)
9300 goto next_attr;
9301 if (len < 0) len = xmlStrlen(attvalue);
9303 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9304 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9305 xmlURIPtr uri;
9307 if (URL == NULL) {
9308 xmlErrMemory(ctxt, "dictionary allocation failure");
9309 if ((attvalue != NULL) && (alloc != 0))
9310 xmlFree(attvalue);
9311 localname = NULL;
9312 goto done;
9314 if (*URL != 0) {
9315 uri = xmlParseURI((const char *) URL);
9316 if (uri == NULL) {
9317 xmlNsErr(ctxt, XML_WAR_NS_URI,
9318 "xmlns: '%s' is not a valid URI\n",
9319 URL, NULL, NULL);
9320 } else {
9321 if (uri->scheme == NULL) {
9322 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9323 "xmlns: URI %s is not absolute\n",
9324 URL, NULL, NULL);
9326 xmlFreeURI(uri);
9328 if (URL == ctxt->str_xml_ns) {
9329 if (attname != ctxt->str_xml) {
9330 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9331 "xml namespace URI cannot be the default namespace\n",
9332 NULL, NULL, NULL);
9334 goto next_attr;
9336 if ((len == 29) &&
9337 (xmlStrEqual(URL,
9338 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9339 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9340 "reuse of the xmlns namespace name is forbidden\n",
9341 NULL, NULL, NULL);
9342 goto next_attr;
9346 * check that it's not a defined namespace
9348 for (j = 1;j <= nbNs;j++)
9349 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9350 break;
9351 if (j <= nbNs)
9352 xmlErrAttributeDup(ctxt, NULL, attname);
9353 else
9354 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9356 } else if (aprefix == ctxt->str_xmlns) {
9357 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9358 xmlURIPtr uri;
9360 if (attname == ctxt->str_xml) {
9361 if (URL != ctxt->str_xml_ns) {
9362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363 "xml namespace prefix mapped to wrong URI\n",
9364 NULL, NULL, NULL);
9367 * Do not keep a namespace definition node
9369 goto next_attr;
9371 if (URL == ctxt->str_xml_ns) {
9372 if (attname != ctxt->str_xml) {
9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374 "xml namespace URI mapped to wrong prefix\n",
9375 NULL, NULL, NULL);
9377 goto next_attr;
9379 if (attname == ctxt->str_xmlns) {
9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381 "redefinition of the xmlns prefix is forbidden\n",
9382 NULL, NULL, NULL);
9383 goto next_attr;
9385 if ((len == 29) &&
9386 (xmlStrEqual(URL,
9387 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9388 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9389 "reuse of the xmlns namespace name is forbidden\n",
9390 NULL, NULL, NULL);
9391 goto next_attr;
9393 if ((URL == NULL) || (URL[0] == 0)) {
9394 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9395 "xmlns:%s: Empty XML namespace is not allowed\n",
9396 attname, NULL, NULL);
9397 goto next_attr;
9398 } else {
9399 uri = xmlParseURI((const char *) URL);
9400 if (uri == NULL) {
9401 xmlNsErr(ctxt, XML_WAR_NS_URI,
9402 "xmlns:%s: '%s' is not a valid URI\n",
9403 attname, URL, NULL);
9404 } else {
9405 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9406 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9407 "xmlns:%s: URI %s is not absolute\n",
9408 attname, URL, NULL);
9410 xmlFreeURI(uri);
9415 * check that it's not a defined namespace
9417 for (j = 1;j <= nbNs;j++)
9418 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9419 break;
9420 if (j <= nbNs)
9421 xmlErrAttributeDup(ctxt, aprefix, attname);
9422 else
9423 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9425 } else {
9427 * Add the pair to atts
9429 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9430 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9431 goto next_attr;
9433 maxatts = ctxt->maxatts;
9434 atts = ctxt->atts;
9436 ctxt->attallocs[nratts++] = alloc;
9437 atts[nbatts++] = attname;
9438 atts[nbatts++] = aprefix;
9440 * The namespace URI field is used temporarily to point at the
9441 * base of the current input buffer for non-alloced attributes.
9442 * When the input buffer is reallocated, all the pointers become
9443 * invalid, but they can be reconstructed later.
9445 if (alloc)
9446 atts[nbatts++] = NULL;
9447 else
9448 atts[nbatts++] = ctxt->input->base;
9449 atts[nbatts++] = attvalue;
9450 attvalue += len;
9451 atts[nbatts++] = attvalue;
9453 * tag if some deallocation is needed
9455 if (alloc != 0) attval = 1;
9456 attvalue = NULL; /* moved into atts */
9459 next_attr:
9460 if ((attvalue != NULL) && (alloc != 0)) {
9461 xmlFree(attvalue);
9462 attvalue = NULL;
9465 GROW
9466 if (ctxt->instate == XML_PARSER_EOF)
9467 break;
9468 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9469 break;
9470 if (SKIP_BLANKS == 0) {
9471 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9472 "attributes construct error\n");
9473 break;
9475 GROW;
9478 if (ctxt->input->id != inputid) {
9479 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9480 "Unexpected change of input\n");
9481 localname = NULL;
9482 goto done;
9485 /* Reconstruct attribute value pointers. */
9486 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9487 if (atts[i+2] != NULL) {
9489 * Arithmetic on dangling pointers is technically undefined
9490 * behavior, but well...
9492 const xmlChar *old = atts[i+2];
9493 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9494 atts[i+3] = ctxt->input->base + (atts[i+3] - old); /* value */
9495 atts[i+4] = ctxt->input->base + (atts[i+4] - old); /* valuend */
9500 * The attributes defaulting
9502 if (ctxt->attsDefault != NULL) {
9503 xmlDefAttrsPtr defaults;
9505 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9506 if (defaults != NULL) {
9507 for (i = 0;i < defaults->nbAttrs;i++) {
9508 attname = defaults->values[5 * i];
9509 aprefix = defaults->values[5 * i + 1];
9512 * special work for namespaces defaulted defs
9514 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9516 * check that it's not a defined namespace
9518 for (j = 1;j <= nbNs;j++)
9519 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9520 break;
9521 if (j <= nbNs) continue;
9523 nsname = xmlGetNamespace(ctxt, NULL);
9524 if (nsname != defaults->values[5 * i + 2]) {
9525 if (nsPush(ctxt, NULL,
9526 defaults->values[5 * i + 2]) > 0)
9527 nbNs++;
9529 } else if (aprefix == ctxt->str_xmlns) {
9531 * check that it's not a defined namespace
9533 for (j = 1;j <= nbNs;j++)
9534 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9535 break;
9536 if (j <= nbNs) continue;
9538 nsname = xmlGetNamespace(ctxt, attname);
9539 if (nsname != defaults->values[5 * i + 2]) {
9540 if (nsPush(ctxt, attname,
9541 defaults->values[5 * i + 2]) > 0)
9542 nbNs++;
9544 } else {
9546 * check that it's not a defined attribute
9548 for (j = 0;j < nbatts;j+=5) {
9549 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9550 break;
9552 if (j < nbatts) continue;
9554 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9555 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9556 localname = NULL;
9557 goto done;
9559 maxatts = ctxt->maxatts;
9560 atts = ctxt->atts;
9562 atts[nbatts++] = attname;
9563 atts[nbatts++] = aprefix;
9564 if (aprefix == NULL)
9565 atts[nbatts++] = NULL;
9566 else
9567 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9568 atts[nbatts++] = defaults->values[5 * i + 2];
9569 atts[nbatts++] = defaults->values[5 * i + 3];
9570 if ((ctxt->standalone == 1) &&
9571 (defaults->values[5 * i + 4] != NULL)) {
9572 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9573 "standalone: attribute %s on %s defaulted from external subset\n",
9574 attname, localname);
9576 nbdef++;
9583 * The attributes checkings
9585 for (i = 0; i < nbatts;i += 5) {
9587 * The default namespace does not apply to attribute names.
9589 if (atts[i + 1] != NULL) {
9590 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9591 if (nsname == NULL) {
9592 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9593 "Namespace prefix %s for %s on %s is not defined\n",
9594 atts[i + 1], atts[i], localname);
9596 atts[i + 2] = nsname;
9597 } else
9598 nsname = NULL;
9600 * [ WFC: Unique Att Spec ]
9601 * No attribute name may appear more than once in the same
9602 * start-tag or empty-element tag.
9603 * As extended by the Namespace in XML REC.
9605 for (j = 0; j < i;j += 5) {
9606 if (atts[i] == atts[j]) {
9607 if (atts[i+1] == atts[j+1]) {
9608 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9609 break;
9611 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9612 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9613 "Namespaced Attribute %s in '%s' redefined\n",
9614 atts[i], nsname, NULL);
9615 break;
9621 nsname = xmlGetNamespace(ctxt, prefix);
9622 if ((prefix != NULL) && (nsname == NULL)) {
9623 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9624 "Namespace prefix %s on %s is not defined\n",
9625 prefix, localname, NULL);
9627 *pref = prefix;
9628 *URI = nsname;
9631 * SAX: Start of Element !
9633 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9634 (!ctxt->disableSAX)) {
9635 if (nbNs > 0)
9636 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9637 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9638 nbatts / 5, nbdef, atts);
9639 else
9640 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9641 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9644 done:
9646 * Free up attribute allocated strings if needed
9648 if (attval != 0) {
9649 for (i = 3,j = 0; j < nratts;i += 5,j++)
9650 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9651 xmlFree((xmlChar *) atts[i]);
9654 return(localname);
9658 * xmlParseEndTag2:
9659 * @ctxt: an XML parser context
9660 * @line: line of the start tag
9661 * @nsNr: number of namespaces on the start tag
9663 * Parse an end tag. Always consumes '</'.
9665 * [42] ETag ::= '</' Name S? '>'
9667 * With namespace
9669 * [NS 9] ETag ::= '</' QName S? '>'
9672 static void
9673 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9674 const xmlChar *name;
9676 GROW;
9677 if ((RAW != '<') || (NXT(1) != '/')) {
9678 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9679 return;
9681 SKIP(2);
9683 if (tag->prefix == NULL)
9684 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9685 else
9686 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9689 * We should definitely be at the ending "S? '>'" part
9691 GROW;
9692 if (ctxt->instate == XML_PARSER_EOF)
9693 return;
9694 SKIP_BLANKS;
9695 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9696 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9697 } else
9698 NEXT1;
9701 * [ WFC: Element Type Match ]
9702 * The Name in an element's end-tag must match the element type in the
9703 * start-tag.
9706 if (name != (xmlChar*)1) {
9707 if (name == NULL) name = BAD_CAST "unparsable";
9708 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9709 "Opening and ending tag mismatch: %s line %d and %s\n",
9710 ctxt->name, tag->line, name);
9714 * SAX: End of Tag
9716 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9717 (!ctxt->disableSAX))
9718 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9719 tag->URI);
9721 spacePop(ctxt);
9722 if (tag->nsNr != 0)
9723 nsPop(ctxt, tag->nsNr);
9727 * xmlParseCDSect:
9728 * @ctxt: an XML parser context
9730 * DEPRECATED: Internal function, don't use.
9732 * Parse escaped pure raw content. Always consumes '<!['.
9734 * [18] CDSect ::= CDStart CData CDEnd
9736 * [19] CDStart ::= '<![CDATA['
9738 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9740 * [21] CDEnd ::= ']]>'
9742 void
9743 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9744 xmlChar *buf = NULL;
9745 int len = 0;
9746 int size = XML_PARSER_BUFFER_SIZE;
9747 int r, rl;
9748 int s, sl;
9749 int cur, l;
9750 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9751 XML_MAX_HUGE_LENGTH :
9752 XML_MAX_TEXT_LENGTH;
9754 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9755 return;
9756 SKIP(3);
9758 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9759 return;
9760 SKIP(6);
9762 ctxt->instate = XML_PARSER_CDATA_SECTION;
9763 r = CUR_CHAR(rl);
9764 if (!IS_CHAR(r)) {
9765 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9766 goto out;
9768 NEXTL(rl);
9769 s = CUR_CHAR(sl);
9770 if (!IS_CHAR(s)) {
9771 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9772 goto out;
9774 NEXTL(sl);
9775 cur = CUR_CHAR(l);
9776 buf = (xmlChar *) xmlMallocAtomic(size);
9777 if (buf == NULL) {
9778 xmlErrMemory(ctxt, NULL);
9779 goto out;
9781 while (IS_CHAR(cur) &&
9782 ((r != ']') || (s != ']') || (cur != '>'))) {
9783 if (len + 5 >= size) {
9784 xmlChar *tmp;
9786 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9787 if (tmp == NULL) {
9788 xmlErrMemory(ctxt, NULL);
9789 goto out;
9791 buf = tmp;
9792 size *= 2;
9794 COPY_BUF(rl,buf,len,r);
9795 if (len > maxLength) {
9796 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9797 "CData section too big found\n");
9798 goto out;
9800 r = s;
9801 rl = sl;
9802 s = cur;
9803 sl = l;
9804 NEXTL(l);
9805 cur = CUR_CHAR(l);
9807 buf[len] = 0;
9808 if (ctxt->instate == XML_PARSER_EOF) {
9809 xmlFree(buf);
9810 return;
9812 if (cur != '>') {
9813 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9814 "CData section not finished\n%.50s\n", buf);
9815 goto out;
9817 NEXTL(l);
9820 * OK the buffer is to be consumed as cdata.
9822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9823 if (ctxt->sax->cdataBlock != NULL)
9824 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9825 else if (ctxt->sax->characters != NULL)
9826 ctxt->sax->characters(ctxt->userData, buf, len);
9829 out:
9830 if (ctxt->instate != XML_PARSER_EOF)
9831 ctxt->instate = XML_PARSER_CONTENT;
9832 xmlFree(buf);
9836 * xmlParseContentInternal:
9837 * @ctxt: an XML parser context
9839 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9840 * unexpected EOF to the caller.
9843 static void
9844 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9845 int nameNr = ctxt->nameNr;
9847 GROW;
9848 while ((RAW != 0) &&
9849 (ctxt->instate != XML_PARSER_EOF)) {
9850 const xmlChar *cur = ctxt->input->cur;
9853 * First case : a Processing Instruction.
9855 if ((*cur == '<') && (cur[1] == '?')) {
9856 xmlParsePI(ctxt);
9860 * Second case : a CDSection
9862 /* 2.6.0 test was *cur not RAW */
9863 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9864 xmlParseCDSect(ctxt);
9868 * Third case : a comment
9870 else if ((*cur == '<') && (NXT(1) == '!') &&
9871 (NXT(2) == '-') && (NXT(3) == '-')) {
9872 xmlParseComment(ctxt);
9873 ctxt->instate = XML_PARSER_CONTENT;
9877 * Fourth case : a sub-element.
9879 else if (*cur == '<') {
9880 if (NXT(1) == '/') {
9881 if (ctxt->nameNr <= nameNr)
9882 break;
9883 xmlParseElementEnd(ctxt);
9884 } else {
9885 xmlParseElementStart(ctxt);
9890 * Fifth case : a reference. If if has not been resolved,
9891 * parsing returns it's Name, create the node
9894 else if (*cur == '&') {
9895 xmlParseReference(ctxt);
9899 * Last case, text. Note that References are handled directly.
9901 else {
9902 xmlParseCharDataInternal(ctxt, 0);
9905 SHRINK;
9906 GROW;
9911 * xmlParseContent:
9912 * @ctxt: an XML parser context
9914 * Parse a content sequence. Stops at EOF or '</'.
9916 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9919 void
9920 xmlParseContent(xmlParserCtxtPtr ctxt) {
9921 int nameNr = ctxt->nameNr;
9923 xmlParseContentInternal(ctxt);
9925 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9926 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9927 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9928 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9929 "Premature end of data in tag %s line %d\n",
9930 name, line, NULL);
9935 * xmlParseElement:
9936 * @ctxt: an XML parser context
9938 * DEPRECATED: Internal function, don't use.
9940 * parse an XML element
9942 * [39] element ::= EmptyElemTag | STag content ETag
9944 * [ WFC: Element Type Match ]
9945 * The Name in an element's end-tag must match the element type in the
9946 * start-tag.
9950 void
9951 xmlParseElement(xmlParserCtxtPtr ctxt) {
9952 if (xmlParseElementStart(ctxt) != 0)
9953 return;
9955 xmlParseContentInternal(ctxt);
9956 if (ctxt->instate == XML_PARSER_EOF)
9957 return;
9959 if (CUR == 0) {
9960 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9961 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9962 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9963 "Premature end of data in tag %s line %d\n",
9964 name, line, NULL);
9965 return;
9968 xmlParseElementEnd(ctxt);
9972 * xmlParseElementStart:
9973 * @ctxt: an XML parser context
9975 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9976 * opening tag was parsed, 1 if an empty element was parsed.
9978 * Always consumes '<'.
9980 static int
9981 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9982 const xmlChar *name;
9983 const xmlChar *prefix = NULL;
9984 const xmlChar *URI = NULL;
9985 xmlParserNodeInfo node_info;
9986 int line, tlen = 0;
9987 xmlNodePtr cur;
9988 int nsNr = ctxt->nsNr;
9990 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9991 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9992 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9993 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9994 xmlParserMaxDepth);
9995 xmlHaltParser(ctxt);
9996 return(-1);
9999 /* Capture start position */
10000 if (ctxt->record_info) {
10001 node_info.begin_pos = ctxt->input->consumed +
10002 (CUR_PTR - ctxt->input->base);
10003 node_info.begin_line = ctxt->input->line;
10006 if (ctxt->spaceNr == 0)
10007 spacePush(ctxt, -1);
10008 else if (*ctxt->space == -2)
10009 spacePush(ctxt, -1);
10010 else
10011 spacePush(ctxt, *ctxt->space);
10013 line = ctxt->input->line;
10014 #ifdef LIBXML_SAX1_ENABLED
10015 if (ctxt->sax2)
10016 #endif /* LIBXML_SAX1_ENABLED */
10017 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10018 #ifdef LIBXML_SAX1_ENABLED
10019 else
10020 name = xmlParseStartTag(ctxt);
10021 #endif /* LIBXML_SAX1_ENABLED */
10022 if (ctxt->instate == XML_PARSER_EOF)
10023 return(-1);
10024 if (name == NULL) {
10025 spacePop(ctxt);
10026 return(-1);
10028 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10029 cur = ctxt->node;
10031 #ifdef LIBXML_VALID_ENABLED
10033 * [ VC: Root Element Type ]
10034 * The Name in the document type declaration must match the element
10035 * type of the root element.
10037 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10038 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10039 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10040 #endif /* LIBXML_VALID_ENABLED */
10043 * Check for an Empty Element.
10045 if ((RAW == '/') && (NXT(1) == '>')) {
10046 SKIP(2);
10047 if (ctxt->sax2) {
10048 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10049 (!ctxt->disableSAX))
10050 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10051 #ifdef LIBXML_SAX1_ENABLED
10052 } else {
10053 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10054 (!ctxt->disableSAX))
10055 ctxt->sax->endElement(ctxt->userData, name);
10056 #endif /* LIBXML_SAX1_ENABLED */
10058 namePop(ctxt);
10059 spacePop(ctxt);
10060 if (nsNr != ctxt->nsNr)
10061 nsPop(ctxt, ctxt->nsNr - nsNr);
10062 if (cur != NULL && ctxt->record_info) {
10063 node_info.node = cur;
10064 node_info.end_pos = ctxt->input->consumed +
10065 (CUR_PTR - ctxt->input->base);
10066 node_info.end_line = ctxt->input->line;
10067 xmlParserAddNodeInfo(ctxt, &node_info);
10069 return(1);
10071 if (RAW == '>') {
10072 NEXT1;
10073 if (cur != NULL && ctxt->record_info) {
10074 node_info.node = cur;
10075 node_info.end_pos = 0;
10076 node_info.end_line = 0;
10077 xmlParserAddNodeInfo(ctxt, &node_info);
10079 } else {
10080 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10081 "Couldn't find end of Start Tag %s line %d\n",
10082 name, line, NULL);
10085 * end of parsing of this node.
10087 nodePop(ctxt);
10088 namePop(ctxt);
10089 spacePop(ctxt);
10090 if (nsNr != ctxt->nsNr)
10091 nsPop(ctxt, ctxt->nsNr - nsNr);
10092 return(-1);
10095 return(0);
10099 * xmlParseElementEnd:
10100 * @ctxt: an XML parser context
10102 * Parse the end of an XML element. Always consumes '</'.
10104 static void
10105 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10106 xmlNodePtr cur = ctxt->node;
10108 if (ctxt->nameNr <= 0) {
10109 if ((RAW == '<') && (NXT(1) == '/'))
10110 SKIP(2);
10111 return;
10115 * parse the end of tag: '</' should be here.
10117 if (ctxt->sax2) {
10118 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10119 namePop(ctxt);
10121 #ifdef LIBXML_SAX1_ENABLED
10122 else
10123 xmlParseEndTag1(ctxt, 0);
10124 #endif /* LIBXML_SAX1_ENABLED */
10127 * Capture end position
10129 if (cur != NULL && ctxt->record_info) {
10130 xmlParserNodeInfoPtr node_info;
10132 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10133 if (node_info != NULL) {
10134 node_info->end_pos = ctxt->input->consumed +
10135 (CUR_PTR - ctxt->input->base);
10136 node_info->end_line = ctxt->input->line;
10142 * xmlParseVersionNum:
10143 * @ctxt: an XML parser context
10145 * DEPRECATED: Internal function, don't use.
10147 * parse the XML version value.
10149 * [26] VersionNum ::= '1.' [0-9]+
10151 * In practice allow [0-9].[0-9]+ at that level
10153 * Returns the string giving the XML version number, or NULL
10155 xmlChar *
10156 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10157 xmlChar *buf = NULL;
10158 int len = 0;
10159 int size = 10;
10160 xmlChar cur;
10162 buf = (xmlChar *) xmlMallocAtomic(size);
10163 if (buf == NULL) {
10164 xmlErrMemory(ctxt, NULL);
10165 return(NULL);
10167 cur = CUR;
10168 if (!((cur >= '0') && (cur <= '9'))) {
10169 xmlFree(buf);
10170 return(NULL);
10172 buf[len++] = cur;
10173 NEXT;
10174 cur=CUR;
10175 if (cur != '.') {
10176 xmlFree(buf);
10177 return(NULL);
10179 buf[len++] = cur;
10180 NEXT;
10181 cur=CUR;
10182 while ((cur >= '0') && (cur <= '9')) {
10183 if (len + 1 >= size) {
10184 xmlChar *tmp;
10186 size *= 2;
10187 tmp = (xmlChar *) xmlRealloc(buf, size);
10188 if (tmp == NULL) {
10189 xmlFree(buf);
10190 xmlErrMemory(ctxt, NULL);
10191 return(NULL);
10193 buf = tmp;
10195 buf[len++] = cur;
10196 NEXT;
10197 cur=CUR;
10199 buf[len] = 0;
10200 return(buf);
10204 * xmlParseVersionInfo:
10205 * @ctxt: an XML parser context
10207 * DEPRECATED: Internal function, don't use.
10209 * parse the XML version.
10211 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10213 * [25] Eq ::= S? '=' S?
10215 * Returns the version string, e.g. "1.0"
10218 xmlChar *
10219 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10220 xmlChar *version = NULL;
10222 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10223 SKIP(7);
10224 SKIP_BLANKS;
10225 if (RAW != '=') {
10226 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10227 return(NULL);
10229 NEXT;
10230 SKIP_BLANKS;
10231 if (RAW == '"') {
10232 NEXT;
10233 version = xmlParseVersionNum(ctxt);
10234 if (RAW != '"') {
10235 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10236 } else
10237 NEXT;
10238 } else if (RAW == '\''){
10239 NEXT;
10240 version = xmlParseVersionNum(ctxt);
10241 if (RAW != '\'') {
10242 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10243 } else
10244 NEXT;
10245 } else {
10246 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10249 return(version);
10253 * xmlParseEncName:
10254 * @ctxt: an XML parser context
10256 * DEPRECATED: Internal function, don't use.
10258 * parse the XML encoding name
10260 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10262 * Returns the encoding name value or NULL
10264 xmlChar *
10265 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10266 xmlChar *buf = NULL;
10267 int len = 0;
10268 int size = 10;
10269 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10270 XML_MAX_TEXT_LENGTH :
10271 XML_MAX_NAME_LENGTH;
10272 xmlChar cur;
10274 cur = CUR;
10275 if (((cur >= 'a') && (cur <= 'z')) ||
10276 ((cur >= 'A') && (cur <= 'Z'))) {
10277 buf = (xmlChar *) xmlMallocAtomic(size);
10278 if (buf == NULL) {
10279 xmlErrMemory(ctxt, NULL);
10280 return(NULL);
10283 buf[len++] = cur;
10284 NEXT;
10285 cur = CUR;
10286 while (((cur >= 'a') && (cur <= 'z')) ||
10287 ((cur >= 'A') && (cur <= 'Z')) ||
10288 ((cur >= '0') && (cur <= '9')) ||
10289 (cur == '.') || (cur == '_') ||
10290 (cur == '-')) {
10291 if (len + 1 >= size) {
10292 xmlChar *tmp;
10294 size *= 2;
10295 tmp = (xmlChar *) xmlRealloc(buf, size);
10296 if (tmp == NULL) {
10297 xmlErrMemory(ctxt, NULL);
10298 xmlFree(buf);
10299 return(NULL);
10301 buf = tmp;
10303 buf[len++] = cur;
10304 if (len > maxLength) {
10305 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10306 xmlFree(buf);
10307 return(NULL);
10309 NEXT;
10310 cur = CUR;
10312 buf[len] = 0;
10313 } else {
10314 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10316 return(buf);
10320 * xmlParseEncodingDecl:
10321 * @ctxt: an XML parser context
10323 * DEPRECATED: Internal function, don't use.
10325 * parse the XML encoding declaration
10327 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10329 * this setups the conversion filters.
10331 * Returns the encoding value or NULL
10334 const xmlChar *
10335 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10336 xmlChar *encoding = NULL;
10338 SKIP_BLANKS;
10339 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10340 SKIP(8);
10341 SKIP_BLANKS;
10342 if (RAW != '=') {
10343 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10344 return(NULL);
10346 NEXT;
10347 SKIP_BLANKS;
10348 if (RAW == '"') {
10349 NEXT;
10350 encoding = xmlParseEncName(ctxt);
10351 if (RAW != '"') {
10352 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353 xmlFree((xmlChar *) encoding);
10354 return(NULL);
10355 } else
10356 NEXT;
10357 } else if (RAW == '\''){
10358 NEXT;
10359 encoding = xmlParseEncName(ctxt);
10360 if (RAW != '\'') {
10361 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10362 xmlFree((xmlChar *) encoding);
10363 return(NULL);
10364 } else
10365 NEXT;
10366 } else {
10367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10371 * Non standard parsing, allowing the user to ignore encoding
10373 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10374 xmlFree((xmlChar *) encoding);
10375 return(NULL);
10379 * UTF-16 encoding switch has already taken place at this stage,
10380 * more over the little-endian/big-endian selection is already done
10382 if ((encoding != NULL) &&
10383 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10384 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10386 * If no encoding was passed to the parser, that we are
10387 * using UTF-16 and no decoder is present i.e. the
10388 * document is apparently UTF-8 compatible, then raise an
10389 * encoding mismatch fatal error
10391 if ((ctxt->encoding == NULL) &&
10392 (ctxt->input->buf != NULL) &&
10393 (ctxt->input->buf->encoder == NULL)) {
10394 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10395 "Document labelled UTF-16 but has UTF-8 content\n");
10397 if (ctxt->encoding != NULL)
10398 xmlFree((xmlChar *) ctxt->encoding);
10399 ctxt->encoding = encoding;
10402 * UTF-8 encoding is handled natively
10404 else if ((encoding != NULL) &&
10405 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10406 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10407 /* TODO: Check for encoding mismatch. */
10408 if (ctxt->encoding != NULL)
10409 xmlFree((xmlChar *) ctxt->encoding);
10410 ctxt->encoding = encoding;
10412 else if (encoding != NULL) {
10413 xmlCharEncodingHandlerPtr handler;
10415 if (ctxt->input->encoding != NULL)
10416 xmlFree((xmlChar *) ctxt->input->encoding);
10417 ctxt->input->encoding = encoding;
10419 handler = xmlFindCharEncodingHandler((const char *) encoding);
10420 if (handler != NULL) {
10421 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10422 /* failed to convert */
10423 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10424 return(NULL);
10426 } else {
10427 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10428 "Unsupported encoding %s\n", encoding);
10429 return(NULL);
10433 return(encoding);
10437 * xmlParseSDDecl:
10438 * @ctxt: an XML parser context
10440 * DEPRECATED: Internal function, don't use.
10442 * parse the XML standalone declaration
10444 * [32] SDDecl ::= S 'standalone' Eq
10445 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10447 * [ VC: Standalone Document Declaration ]
10448 * TODO The standalone document declaration must have the value "no"
10449 * if any external markup declarations contain declarations of:
10450 * - attributes with default values, if elements to which these
10451 * attributes apply appear in the document without specifications
10452 * of values for these attributes, or
10453 * - entities (other than amp, lt, gt, apos, quot), if references
10454 * to those entities appear in the document, or
10455 * - attributes with values subject to normalization, where the
10456 * attribute appears in the document with a value which will change
10457 * as a result of normalization, or
10458 * - element types with element content, if white space occurs directly
10459 * within any instance of those types.
10461 * Returns:
10462 * 1 if standalone="yes"
10463 * 0 if standalone="no"
10464 * -2 if standalone attribute is missing or invalid
10465 * (A standalone value of -2 means that the XML declaration was found,
10466 * but no value was specified for the standalone attribute).
10470 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10471 int standalone = -2;
10473 SKIP_BLANKS;
10474 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10475 SKIP(10);
10476 SKIP_BLANKS;
10477 if (RAW != '=') {
10478 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10479 return(standalone);
10481 NEXT;
10482 SKIP_BLANKS;
10483 if (RAW == '\''){
10484 NEXT;
10485 if ((RAW == 'n') && (NXT(1) == 'o')) {
10486 standalone = 0;
10487 SKIP(2);
10488 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10489 (NXT(2) == 's')) {
10490 standalone = 1;
10491 SKIP(3);
10492 } else {
10493 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10495 if (RAW != '\'') {
10496 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10497 } else
10498 NEXT;
10499 } else if (RAW == '"'){
10500 NEXT;
10501 if ((RAW == 'n') && (NXT(1) == 'o')) {
10502 standalone = 0;
10503 SKIP(2);
10504 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10505 (NXT(2) == 's')) {
10506 standalone = 1;
10507 SKIP(3);
10508 } else {
10509 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10511 if (RAW != '"') {
10512 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10513 } else
10514 NEXT;
10515 } else {
10516 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10519 return(standalone);
10523 * xmlParseXMLDecl:
10524 * @ctxt: an XML parser context
10526 * DEPRECATED: Internal function, don't use.
10528 * parse an XML declaration header
10530 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10533 void
10534 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10535 xmlChar *version;
10538 * This value for standalone indicates that the document has an
10539 * XML declaration but it does not have a standalone attribute.
10540 * It will be overwritten later if a standalone attribute is found.
10542 ctxt->input->standalone = -2;
10545 * We know that '<?xml' is here.
10547 SKIP(5);
10549 if (!IS_BLANK_CH(RAW)) {
10550 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10551 "Blank needed after '<?xml'\n");
10553 SKIP_BLANKS;
10556 * We must have the VersionInfo here.
10558 version = xmlParseVersionInfo(ctxt);
10559 if (version == NULL) {
10560 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10561 } else {
10562 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10564 * Changed here for XML-1.0 5th edition
10566 if (ctxt->options & XML_PARSE_OLD10) {
10567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10568 "Unsupported version '%s'\n",
10569 version);
10570 } else {
10571 if ((version[0] == '1') && ((version[1] == '.'))) {
10572 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10573 "Unsupported version '%s'\n",
10574 version, NULL);
10575 } else {
10576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10577 "Unsupported version '%s'\n",
10578 version);
10582 if (ctxt->version != NULL)
10583 xmlFree((void *) ctxt->version);
10584 ctxt->version = version;
10588 * We may have the encoding declaration
10590 if (!IS_BLANK_CH(RAW)) {
10591 if ((RAW == '?') && (NXT(1) == '>')) {
10592 SKIP(2);
10593 return;
10595 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10597 xmlParseEncodingDecl(ctxt);
10598 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10599 (ctxt->instate == XML_PARSER_EOF)) {
10601 * The XML REC instructs us to stop parsing right here
10603 return;
10607 * We may have the standalone status.
10609 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10610 if ((RAW == '?') && (NXT(1) == '>')) {
10611 SKIP(2);
10612 return;
10614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10618 * We can grow the input buffer freely at that point
10620 GROW;
10622 SKIP_BLANKS;
10623 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10625 SKIP_BLANKS;
10626 if ((RAW == '?') && (NXT(1) == '>')) {
10627 SKIP(2);
10628 } else if (RAW == '>') {
10629 /* Deprecated old WD ... */
10630 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10631 NEXT;
10632 } else {
10633 int c;
10635 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10636 while ((c = CUR) != 0) {
10637 NEXT;
10638 if (c == '>')
10639 break;
10645 * xmlParseMisc:
10646 * @ctxt: an XML parser context
10648 * DEPRECATED: Internal function, don't use.
10650 * parse an XML Misc* optional field.
10652 * [27] Misc ::= Comment | PI | S
10655 void
10656 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10657 while (ctxt->instate != XML_PARSER_EOF) {
10658 SKIP_BLANKS;
10659 GROW;
10660 if ((RAW == '<') && (NXT(1) == '?')) {
10661 xmlParsePI(ctxt);
10662 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10663 xmlParseComment(ctxt);
10664 } else {
10665 break;
10671 * xmlParseDocument:
10672 * @ctxt: an XML parser context
10674 * parse an XML document (and build a tree if using the standard SAX
10675 * interface).
10677 * [1] document ::= prolog element Misc*
10679 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10681 * Returns 0, -1 in case of error. the parser context is augmented
10682 * as a result of the parsing.
10686 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10687 xmlChar start[4];
10688 xmlCharEncoding enc;
10690 xmlInitParser();
10692 if ((ctxt == NULL) || (ctxt->input == NULL))
10693 return(-1);
10695 GROW;
10698 * SAX: detecting the level.
10700 xmlDetectSAX2(ctxt);
10703 * SAX: beginning of the document processing.
10705 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10706 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10707 if (ctxt->instate == XML_PARSER_EOF)
10708 return(-1);
10710 if ((ctxt->encoding == NULL) &&
10711 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10713 * Get the 4 first bytes and decode the charset
10714 * if enc != XML_CHAR_ENCODING_NONE
10715 * plug some encoding conversion routines.
10717 start[0] = RAW;
10718 start[1] = NXT(1);
10719 start[2] = NXT(2);
10720 start[3] = NXT(3);
10721 enc = xmlDetectCharEncoding(&start[0], 4);
10722 if (enc != XML_CHAR_ENCODING_NONE) {
10723 xmlSwitchEncoding(ctxt, enc);
10728 if (CUR == 0) {
10729 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10730 return(-1);
10733 GROW;
10734 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10737 * Note that we will switch encoding on the fly.
10739 xmlParseXMLDecl(ctxt);
10740 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10741 (ctxt->instate == XML_PARSER_EOF)) {
10743 * The XML REC instructs us to stop parsing right here
10745 return(-1);
10747 ctxt->standalone = ctxt->input->standalone;
10748 SKIP_BLANKS;
10749 } else {
10750 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10752 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10753 ctxt->sax->startDocument(ctxt->userData);
10754 if (ctxt->instate == XML_PARSER_EOF)
10755 return(-1);
10756 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10757 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10758 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10762 * The Misc part of the Prolog
10764 xmlParseMisc(ctxt);
10767 * Then possibly doc type declaration(s) and more Misc
10768 * (doctypedecl Misc*)?
10770 GROW;
10771 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10773 ctxt->inSubset = 1;
10774 xmlParseDocTypeDecl(ctxt);
10775 if (RAW == '[') {
10776 ctxt->instate = XML_PARSER_DTD;
10777 xmlParseInternalSubset(ctxt);
10778 if (ctxt->instate == XML_PARSER_EOF)
10779 return(-1);
10783 * Create and update the external subset.
10785 ctxt->inSubset = 2;
10786 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10787 (!ctxt->disableSAX))
10788 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10789 ctxt->extSubSystem, ctxt->extSubURI);
10790 if (ctxt->instate == XML_PARSER_EOF)
10791 return(-1);
10792 ctxt->inSubset = 0;
10794 xmlCleanSpecialAttr(ctxt);
10796 ctxt->instate = XML_PARSER_PROLOG;
10797 xmlParseMisc(ctxt);
10801 * Time to start parsing the tree itself
10803 GROW;
10804 if (RAW != '<') {
10805 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10806 "Start tag expected, '<' not found\n");
10807 } else {
10808 ctxt->instate = XML_PARSER_CONTENT;
10809 xmlParseElement(ctxt);
10810 ctxt->instate = XML_PARSER_EPILOG;
10814 * The Misc part at the end
10816 xmlParseMisc(ctxt);
10818 if (RAW != 0) {
10819 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10821 ctxt->instate = XML_PARSER_EOF;
10825 * SAX: end of the document processing.
10827 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10828 ctxt->sax->endDocument(ctxt->userData);
10831 * Remove locally kept entity definitions if the tree was not built
10833 if ((ctxt->myDoc != NULL) &&
10834 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10835 xmlFreeDoc(ctxt->myDoc);
10836 ctxt->myDoc = NULL;
10839 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10840 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10841 if (ctxt->valid)
10842 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10843 if (ctxt->nsWellFormed)
10844 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10845 if (ctxt->options & XML_PARSE_OLD10)
10846 ctxt->myDoc->properties |= XML_DOC_OLD10;
10848 if (! ctxt->wellFormed) {
10849 ctxt->valid = 0;
10850 return(-1);
10852 return(0);
10856 * xmlParseExtParsedEnt:
10857 * @ctxt: an XML parser context
10859 * parse a general parsed entity
10860 * An external general parsed entity is well-formed if it matches the
10861 * production labeled extParsedEnt.
10863 * [78] extParsedEnt ::= TextDecl? content
10865 * Returns 0, -1 in case of error. the parser context is augmented
10866 * as a result of the parsing.
10870 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10871 xmlChar start[4];
10872 xmlCharEncoding enc;
10874 if ((ctxt == NULL) || (ctxt->input == NULL))
10875 return(-1);
10877 xmlDetectSAX2(ctxt);
10879 GROW;
10882 * SAX: beginning of the document processing.
10884 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10885 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10888 * Get the 4 first bytes and decode the charset
10889 * if enc != XML_CHAR_ENCODING_NONE
10890 * plug some encoding conversion routines.
10892 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10893 start[0] = RAW;
10894 start[1] = NXT(1);
10895 start[2] = NXT(2);
10896 start[3] = NXT(3);
10897 enc = xmlDetectCharEncoding(start, 4);
10898 if (enc != XML_CHAR_ENCODING_NONE) {
10899 xmlSwitchEncoding(ctxt, enc);
10904 if (CUR == 0) {
10905 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10909 * Check for the XMLDecl in the Prolog.
10911 GROW;
10912 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10915 * Note that we will switch encoding on the fly.
10917 xmlParseXMLDecl(ctxt);
10918 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10920 * The XML REC instructs us to stop parsing right here
10922 return(-1);
10924 SKIP_BLANKS;
10925 } else {
10926 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10928 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10929 ctxt->sax->startDocument(ctxt->userData);
10930 if (ctxt->instate == XML_PARSER_EOF)
10931 return(-1);
10934 * Doing validity checking on chunk doesn't make sense
10936 ctxt->instate = XML_PARSER_CONTENT;
10937 ctxt->validate = 0;
10938 ctxt->loadsubset = 0;
10939 ctxt->depth = 0;
10941 xmlParseContent(ctxt);
10942 if (ctxt->instate == XML_PARSER_EOF)
10943 return(-1);
10945 if ((RAW == '<') && (NXT(1) == '/')) {
10946 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10947 } else if (RAW != 0) {
10948 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10952 * SAX: end of the document processing.
10954 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10955 ctxt->sax->endDocument(ctxt->userData);
10957 if (! ctxt->wellFormed) return(-1);
10958 return(0);
10961 #ifdef LIBXML_PUSH_ENABLED
10962 /************************************************************************
10964 * Progressive parsing interfaces *
10966 ************************************************************************/
10969 * xmlParseLookupChar:
10970 * @ctxt: an XML parser context
10971 * @c: character
10973 * Check whether the input buffer contains a character.
10975 static int
10976 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10977 const xmlChar *cur;
10979 if (ctxt->checkIndex == 0) {
10980 cur = ctxt->input->cur + 1;
10981 } else {
10982 cur = ctxt->input->cur + ctxt->checkIndex;
10985 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10986 size_t index = ctxt->input->end - ctxt->input->cur;
10988 if (index > LONG_MAX) {
10989 ctxt->checkIndex = 0;
10990 return(1);
10992 ctxt->checkIndex = index;
10993 return(0);
10994 } else {
10995 ctxt->checkIndex = 0;
10996 return(1);
11001 * xmlParseLookupString:
11002 * @ctxt: an XML parser context
11003 * @startDelta: delta to apply at the start
11004 * @str: string
11005 * @strLen: length of string
11007 * Check whether the input buffer contains a string.
11009 static const xmlChar *
11010 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11011 const char *str, size_t strLen) {
11012 const xmlChar *cur, *term;
11014 if (ctxt->checkIndex == 0) {
11015 cur = ctxt->input->cur + startDelta;
11016 } else {
11017 cur = ctxt->input->cur + ctxt->checkIndex;
11020 term = BAD_CAST strstr((const char *) cur, str);
11021 if (term == NULL) {
11022 const xmlChar *end = ctxt->input->end;
11023 size_t index;
11025 /* Rescan (strLen - 1) characters. */
11026 if ((size_t) (end - cur) < strLen)
11027 end = cur;
11028 else
11029 end -= strLen - 1;
11030 index = end - ctxt->input->cur;
11031 if (index > LONG_MAX) {
11032 ctxt->checkIndex = 0;
11033 return(ctxt->input->end - strLen);
11035 ctxt->checkIndex = index;
11036 } else {
11037 ctxt->checkIndex = 0;
11040 return(term);
11044 * xmlParseLookupCharData:
11045 * @ctxt: an XML parser context
11047 * Check whether the input buffer contains terminated char data.
11049 static int
11050 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11051 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11052 const xmlChar *end = ctxt->input->end;
11053 size_t index;
11055 while (cur < end) {
11056 if ((*cur == '<') || (*cur == '&')) {
11057 ctxt->checkIndex = 0;
11058 return(1);
11060 cur++;
11063 index = cur - ctxt->input->cur;
11064 if (index > LONG_MAX) {
11065 ctxt->checkIndex = 0;
11066 return(1);
11068 ctxt->checkIndex = index;
11069 return(0);
11073 * xmlParseLookupGt:
11074 * @ctxt: an XML parser context
11076 * Check whether there's enough data in the input buffer to finish parsing
11077 * a start tag. This has to take quotes into account.
11079 static int
11080 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11081 const xmlChar *cur;
11082 const xmlChar *end = ctxt->input->end;
11083 int state = ctxt->endCheckState;
11084 size_t index;
11086 if (ctxt->checkIndex == 0)
11087 cur = ctxt->input->cur + 1;
11088 else
11089 cur = ctxt->input->cur + ctxt->checkIndex;
11091 while (cur < end) {
11092 if (state) {
11093 if (*cur == state)
11094 state = 0;
11095 } else if (*cur == '\'' || *cur == '"') {
11096 state = *cur;
11097 } else if (*cur == '>') {
11098 ctxt->checkIndex = 0;
11099 ctxt->endCheckState = 0;
11100 return(1);
11102 cur++;
11105 index = cur - ctxt->input->cur;
11106 if (index > LONG_MAX) {
11107 ctxt->checkIndex = 0;
11108 ctxt->endCheckState = 0;
11109 return(1);
11111 ctxt->checkIndex = index;
11112 ctxt->endCheckState = state;
11113 return(0);
11117 * xmlParseLookupInternalSubset:
11118 * @ctxt: an XML parser context
11120 * Check whether there's enough data in the input buffer to finish parsing
11121 * the internal subset.
11123 static int
11124 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11126 * Sorry, but progressive parsing of the internal subset is not
11127 * supported. We first check that the full content of the internal
11128 * subset is available and parsing is launched only at that point.
11129 * Internal subset ends with "']' S? '>'" in an unescaped section and
11130 * not in a ']]>' sequence which are conditional sections.
11132 const xmlChar *cur, *start;
11133 const xmlChar *end = ctxt->input->end;
11134 int state = ctxt->endCheckState;
11135 size_t index;
11137 if (ctxt->checkIndex == 0) {
11138 cur = ctxt->input->cur + 1;
11139 } else {
11140 cur = ctxt->input->cur + ctxt->checkIndex;
11142 start = cur;
11144 while (cur < end) {
11145 if (state == '-') {
11146 if ((*cur == '-') &&
11147 (cur[1] == '-') &&
11148 (cur[2] == '>')) {
11149 state = 0;
11150 cur += 3;
11151 start = cur;
11152 continue;
11155 else if (state == ']') {
11156 if (*cur == '>') {
11157 ctxt->checkIndex = 0;
11158 ctxt->endCheckState = 0;
11159 return(1);
11161 if (IS_BLANK_CH(*cur)) {
11162 state = ' ';
11163 } else if (*cur != ']') {
11164 state = 0;
11165 start = cur;
11166 continue;
11169 else if (state == ' ') {
11170 if (*cur == '>') {
11171 ctxt->checkIndex = 0;
11172 ctxt->endCheckState = 0;
11173 return(1);
11175 if (!IS_BLANK_CH(*cur)) {
11176 state = 0;
11177 start = cur;
11178 continue;
11181 else if (state != 0) {
11182 if (*cur == state) {
11183 state = 0;
11184 start = cur + 1;
11187 else if (*cur == '<') {
11188 if ((cur[1] == '!') &&
11189 (cur[2] == '-') &&
11190 (cur[3] == '-')) {
11191 state = '-';
11192 cur += 4;
11193 /* Don't treat <!--> as comment */
11194 start = cur;
11195 continue;
11198 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11199 state = *cur;
11202 cur++;
11206 * Rescan the three last characters to detect "<!--" and "-->"
11207 * split across chunks.
11209 if ((state == 0) || (state == '-')) {
11210 if (cur - start < 3)
11211 cur = start;
11212 else
11213 cur -= 3;
11215 index = cur - ctxt->input->cur;
11216 if (index > LONG_MAX) {
11217 ctxt->checkIndex = 0;
11218 ctxt->endCheckState = 0;
11219 return(1);
11221 ctxt->checkIndex = index;
11222 ctxt->endCheckState = state;
11223 return(0);
11227 * xmlCheckCdataPush:
11228 * @cur: pointer to the block of characters
11229 * @len: length of the block in bytes
11230 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11232 * Check that the block of characters is okay as SCdata content [20]
11234 * Returns the number of bytes to pass if okay, a negative index where an
11235 * UTF-8 error occurred otherwise
11237 static int
11238 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11239 int ix;
11240 unsigned char c;
11241 int codepoint;
11243 if ((utf == NULL) || (len <= 0))
11244 return(0);
11246 for (ix = 0; ix < len;) { /* string is 0-terminated */
11247 c = utf[ix];
11248 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11249 if (c >= 0x20)
11250 ix++;
11251 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11252 ix++;
11253 else
11254 return(-ix);
11255 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11256 if (ix + 2 > len) return(complete ? -ix : ix);
11257 if ((utf[ix+1] & 0xc0 ) != 0x80)
11258 return(-ix);
11259 codepoint = (utf[ix] & 0x1f) << 6;
11260 codepoint |= utf[ix+1] & 0x3f;
11261 if (!xmlIsCharQ(codepoint))
11262 return(-ix);
11263 ix += 2;
11264 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11265 if (ix + 3 > len) return(complete ? -ix : ix);
11266 if (((utf[ix+1] & 0xc0) != 0x80) ||
11267 ((utf[ix+2] & 0xc0) != 0x80))
11268 return(-ix);
11269 codepoint = (utf[ix] & 0xf) << 12;
11270 codepoint |= (utf[ix+1] & 0x3f) << 6;
11271 codepoint |= utf[ix+2] & 0x3f;
11272 if (!xmlIsCharQ(codepoint))
11273 return(-ix);
11274 ix += 3;
11275 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11276 if (ix + 4 > len) return(complete ? -ix : ix);
11277 if (((utf[ix+1] & 0xc0) != 0x80) ||
11278 ((utf[ix+2] & 0xc0) != 0x80) ||
11279 ((utf[ix+3] & 0xc0) != 0x80))
11280 return(-ix);
11281 codepoint = (utf[ix] & 0x7) << 18;
11282 codepoint |= (utf[ix+1] & 0x3f) << 12;
11283 codepoint |= (utf[ix+2] & 0x3f) << 6;
11284 codepoint |= utf[ix+3] & 0x3f;
11285 if (!xmlIsCharQ(codepoint))
11286 return(-ix);
11287 ix += 4;
11288 } else /* unknown encoding */
11289 return(-ix);
11291 return(ix);
11295 * xmlParseTryOrFinish:
11296 * @ctxt: an XML parser context
11297 * @terminate: last chunk indicator
11299 * Try to progress on parsing
11301 * Returns zero if no parsing was possible
11303 static int
11304 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11305 int ret = 0;
11306 int tlen;
11307 size_t avail;
11308 xmlChar cur, next;
11310 if (ctxt->input == NULL)
11311 return(0);
11313 #ifdef DEBUG_PUSH
11314 switch (ctxt->instate) {
11315 case XML_PARSER_EOF:
11316 xmlGenericError(xmlGenericErrorContext,
11317 "PP: try EOF\n"); break;
11318 case XML_PARSER_START:
11319 xmlGenericError(xmlGenericErrorContext,
11320 "PP: try START\n"); break;
11321 case XML_PARSER_MISC:
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: try MISC\n");break;
11324 case XML_PARSER_COMMENT:
11325 xmlGenericError(xmlGenericErrorContext,
11326 "PP: try COMMENT\n");break;
11327 case XML_PARSER_PROLOG:
11328 xmlGenericError(xmlGenericErrorContext,
11329 "PP: try PROLOG\n");break;
11330 case XML_PARSER_START_TAG:
11331 xmlGenericError(xmlGenericErrorContext,
11332 "PP: try START_TAG\n");break;
11333 case XML_PARSER_CONTENT:
11334 xmlGenericError(xmlGenericErrorContext,
11335 "PP: try CONTENT\n");break;
11336 case XML_PARSER_CDATA_SECTION:
11337 xmlGenericError(xmlGenericErrorContext,
11338 "PP: try CDATA_SECTION\n");break;
11339 case XML_PARSER_END_TAG:
11340 xmlGenericError(xmlGenericErrorContext,
11341 "PP: try END_TAG\n");break;
11342 case XML_PARSER_ENTITY_DECL:
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: try ENTITY_DECL\n");break;
11345 case XML_PARSER_ENTITY_VALUE:
11346 xmlGenericError(xmlGenericErrorContext,
11347 "PP: try ENTITY_VALUE\n");break;
11348 case XML_PARSER_ATTRIBUTE_VALUE:
11349 xmlGenericError(xmlGenericErrorContext,
11350 "PP: try ATTRIBUTE_VALUE\n");break;
11351 case XML_PARSER_DTD:
11352 xmlGenericError(xmlGenericErrorContext,
11353 "PP: try DTD\n");break;
11354 case XML_PARSER_EPILOG:
11355 xmlGenericError(xmlGenericErrorContext,
11356 "PP: try EPILOG\n");break;
11357 case XML_PARSER_PI:
11358 xmlGenericError(xmlGenericErrorContext,
11359 "PP: try PI\n");break;
11360 case XML_PARSER_IGNORE:
11361 xmlGenericError(xmlGenericErrorContext,
11362 "PP: try IGNORE\n");break;
11364 #endif
11366 if ((ctxt->input != NULL) &&
11367 (ctxt->input->cur - ctxt->input->base > 4096)) {
11368 xmlParserShrink(ctxt);
11371 while (ctxt->instate != XML_PARSER_EOF) {
11372 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11373 return(0);
11375 if (ctxt->input == NULL) break;
11376 if (ctxt->input->buf != NULL) {
11378 * If we are operating on converted input, try to flush
11379 * remaining chars to avoid them stalling in the non-converted
11380 * buffer.
11382 if ((ctxt->input->buf->raw != NULL) &&
11383 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11384 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11385 ctxt->input);
11386 size_t current = ctxt->input->cur - ctxt->input->base;
11388 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11389 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11390 base, current);
11393 avail = ctxt->input->end - ctxt->input->cur;
11394 if (avail < 1)
11395 goto done;
11396 switch (ctxt->instate) {
11397 case XML_PARSER_EOF:
11399 * Document parsing is done !
11401 goto done;
11402 case XML_PARSER_START:
11403 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11404 xmlChar start[4];
11405 xmlCharEncoding enc;
11408 * Very first chars read from the document flow.
11410 if (avail < 4)
11411 goto done;
11414 * Get the 4 first bytes and decode the charset
11415 * if enc != XML_CHAR_ENCODING_NONE
11416 * plug some encoding conversion routines,
11417 * else xmlSwitchEncoding will set to (default)
11418 * UTF8.
11420 start[0] = RAW;
11421 start[1] = NXT(1);
11422 start[2] = NXT(2);
11423 start[3] = NXT(3);
11424 enc = xmlDetectCharEncoding(start, 4);
11426 * We need more bytes to detect EBCDIC code pages.
11427 * See xmlDetectEBCDIC.
11429 if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11430 (!terminate) && (avail < 200))
11431 goto done;
11432 xmlSwitchEncoding(ctxt, enc);
11433 break;
11436 if (avail < 2)
11437 goto done;
11438 cur = ctxt->input->cur[0];
11439 next = ctxt->input->cur[1];
11440 if (cur == 0) {
11441 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11442 ctxt->sax->setDocumentLocator(ctxt->userData,
11443 &xmlDefaultSAXLocator);
11444 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11445 xmlHaltParser(ctxt);
11446 #ifdef DEBUG_PUSH
11447 xmlGenericError(xmlGenericErrorContext,
11448 "PP: entering EOF\n");
11449 #endif
11450 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11451 ctxt->sax->endDocument(ctxt->userData);
11452 goto done;
11454 if ((cur == '<') && (next == '?')) {
11455 /* PI or XML decl */
11456 if (avail < 5) goto done;
11457 if ((!terminate) &&
11458 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11459 goto done;
11460 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11461 ctxt->sax->setDocumentLocator(ctxt->userData,
11462 &xmlDefaultSAXLocator);
11463 if ((ctxt->input->cur[2] == 'x') &&
11464 (ctxt->input->cur[3] == 'm') &&
11465 (ctxt->input->cur[4] == 'l') &&
11466 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11467 ret += 5;
11468 #ifdef DEBUG_PUSH
11469 xmlGenericError(xmlGenericErrorContext,
11470 "PP: Parsing XML Decl\n");
11471 #endif
11472 xmlParseXMLDecl(ctxt);
11473 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11475 * The XML REC instructs us to stop parsing right
11476 * here
11478 xmlHaltParser(ctxt);
11479 return(0);
11481 ctxt->standalone = ctxt->input->standalone;
11482 if ((ctxt->encoding == NULL) &&
11483 (ctxt->input->encoding != NULL))
11484 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11485 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11486 (!ctxt->disableSAX))
11487 ctxt->sax->startDocument(ctxt->userData);
11488 ctxt->instate = XML_PARSER_MISC;
11489 #ifdef DEBUG_PUSH
11490 xmlGenericError(xmlGenericErrorContext,
11491 "PP: entering MISC\n");
11492 #endif
11493 } else {
11494 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11495 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11496 (!ctxt->disableSAX))
11497 ctxt->sax->startDocument(ctxt->userData);
11498 ctxt->instate = XML_PARSER_MISC;
11499 #ifdef DEBUG_PUSH
11500 xmlGenericError(xmlGenericErrorContext,
11501 "PP: entering MISC\n");
11502 #endif
11504 } else {
11505 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11506 ctxt->sax->setDocumentLocator(ctxt->userData,
11507 &xmlDefaultSAXLocator);
11508 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11509 if (ctxt->version == NULL) {
11510 xmlErrMemory(ctxt, NULL);
11511 break;
11513 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11514 (!ctxt->disableSAX))
11515 ctxt->sax->startDocument(ctxt->userData);
11516 ctxt->instate = XML_PARSER_MISC;
11517 #ifdef DEBUG_PUSH
11518 xmlGenericError(xmlGenericErrorContext,
11519 "PP: entering MISC\n");
11520 #endif
11522 break;
11523 case XML_PARSER_START_TAG: {
11524 const xmlChar *name;
11525 const xmlChar *prefix = NULL;
11526 const xmlChar *URI = NULL;
11527 int line = ctxt->input->line;
11528 int nsNr = ctxt->nsNr;
11530 if ((avail < 2) && (ctxt->inputNr == 1))
11531 goto done;
11532 cur = ctxt->input->cur[0];
11533 if (cur != '<') {
11534 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11535 xmlHaltParser(ctxt);
11536 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11537 ctxt->sax->endDocument(ctxt->userData);
11538 goto done;
11540 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11541 goto done;
11542 if (ctxt->spaceNr == 0)
11543 spacePush(ctxt, -1);
11544 else if (*ctxt->space == -2)
11545 spacePush(ctxt, -1);
11546 else
11547 spacePush(ctxt, *ctxt->space);
11548 #ifdef LIBXML_SAX1_ENABLED
11549 if (ctxt->sax2)
11550 #endif /* LIBXML_SAX1_ENABLED */
11551 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11552 #ifdef LIBXML_SAX1_ENABLED
11553 else
11554 name = xmlParseStartTag(ctxt);
11555 #endif /* LIBXML_SAX1_ENABLED */
11556 if (ctxt->instate == XML_PARSER_EOF)
11557 goto done;
11558 if (name == NULL) {
11559 spacePop(ctxt);
11560 xmlHaltParser(ctxt);
11561 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11562 ctxt->sax->endDocument(ctxt->userData);
11563 goto done;
11565 #ifdef LIBXML_VALID_ENABLED
11567 * [ VC: Root Element Type ]
11568 * The Name in the document type declaration must match
11569 * the element type of the root element.
11571 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11572 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11573 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11574 #endif /* LIBXML_VALID_ENABLED */
11577 * Check for an Empty Element.
11579 if ((RAW == '/') && (NXT(1) == '>')) {
11580 SKIP(2);
11582 if (ctxt->sax2) {
11583 if ((ctxt->sax != NULL) &&
11584 (ctxt->sax->endElementNs != NULL) &&
11585 (!ctxt->disableSAX))
11586 ctxt->sax->endElementNs(ctxt->userData, name,
11587 prefix, URI);
11588 if (ctxt->nsNr - nsNr > 0)
11589 nsPop(ctxt, ctxt->nsNr - nsNr);
11590 #ifdef LIBXML_SAX1_ENABLED
11591 } else {
11592 if ((ctxt->sax != NULL) &&
11593 (ctxt->sax->endElement != NULL) &&
11594 (!ctxt->disableSAX))
11595 ctxt->sax->endElement(ctxt->userData, name);
11596 #endif /* LIBXML_SAX1_ENABLED */
11598 if (ctxt->instate == XML_PARSER_EOF)
11599 goto done;
11600 spacePop(ctxt);
11601 if (ctxt->nameNr == 0) {
11602 ctxt->instate = XML_PARSER_EPILOG;
11603 } else {
11604 ctxt->instate = XML_PARSER_CONTENT;
11606 break;
11608 if (RAW == '>') {
11609 NEXT;
11610 } else {
11611 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11612 "Couldn't find end of Start Tag %s\n",
11613 name);
11614 nodePop(ctxt);
11615 spacePop(ctxt);
11617 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11619 ctxt->instate = XML_PARSER_CONTENT;
11620 break;
11622 case XML_PARSER_CONTENT: {
11623 if ((avail < 2) && (ctxt->inputNr == 1))
11624 goto done;
11625 cur = ctxt->input->cur[0];
11626 next = ctxt->input->cur[1];
11628 if ((cur == '<') && (next == '/')) {
11629 ctxt->instate = XML_PARSER_END_TAG;
11630 break;
11631 } else if ((cur == '<') && (next == '?')) {
11632 if ((!terminate) &&
11633 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11634 goto done;
11635 xmlParsePI(ctxt);
11636 ctxt->instate = XML_PARSER_CONTENT;
11637 } else if ((cur == '<') && (next != '!')) {
11638 ctxt->instate = XML_PARSER_START_TAG;
11639 break;
11640 } else if ((cur == '<') && (next == '!') &&
11641 (ctxt->input->cur[2] == '-') &&
11642 (ctxt->input->cur[3] == '-')) {
11643 if ((!terminate) &&
11644 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11645 goto done;
11646 xmlParseComment(ctxt);
11647 ctxt->instate = XML_PARSER_CONTENT;
11648 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11649 (ctxt->input->cur[2] == '[') &&
11650 (ctxt->input->cur[3] == 'C') &&
11651 (ctxt->input->cur[4] == 'D') &&
11652 (ctxt->input->cur[5] == 'A') &&
11653 (ctxt->input->cur[6] == 'T') &&
11654 (ctxt->input->cur[7] == 'A') &&
11655 (ctxt->input->cur[8] == '[')) {
11656 SKIP(9);
11657 ctxt->instate = XML_PARSER_CDATA_SECTION;
11658 break;
11659 } else if ((cur == '<') && (next == '!') &&
11660 (avail < 9)) {
11661 goto done;
11662 } else if (cur == '<') {
11663 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11664 "detected an error in element content\n");
11665 SKIP(1);
11666 } else if (cur == '&') {
11667 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11668 goto done;
11669 xmlParseReference(ctxt);
11670 } else {
11671 /* TODO Avoid the extra copy, handle directly !!! */
11673 * Goal of the following test is:
11674 * - minimize calls to the SAX 'character' callback
11675 * when they are mergeable
11676 * - handle an problem for isBlank when we only parse
11677 * a sequence of blank chars and the next one is
11678 * not available to check against '<' presence.
11679 * - tries to homogenize the differences in SAX
11680 * callbacks between the push and pull versions
11681 * of the parser.
11683 if ((ctxt->inputNr == 1) &&
11684 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11685 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11686 goto done;
11688 ctxt->checkIndex = 0;
11689 xmlParseCharDataInternal(ctxt, !terminate);
11691 break;
11693 case XML_PARSER_END_TAG:
11694 if (avail < 2)
11695 goto done;
11696 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11697 goto done;
11698 if (ctxt->sax2) {
11699 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11700 nameNsPop(ctxt);
11702 #ifdef LIBXML_SAX1_ENABLED
11703 else
11704 xmlParseEndTag1(ctxt, 0);
11705 #endif /* LIBXML_SAX1_ENABLED */
11706 if (ctxt->instate == XML_PARSER_EOF) {
11707 /* Nothing */
11708 } else if (ctxt->nameNr == 0) {
11709 ctxt->instate = XML_PARSER_EPILOG;
11710 } else {
11711 ctxt->instate = XML_PARSER_CONTENT;
11713 break;
11714 case XML_PARSER_CDATA_SECTION: {
11716 * The Push mode need to have the SAX callback for
11717 * cdataBlock merge back contiguous callbacks.
11719 const xmlChar *term;
11721 if (terminate) {
11723 * Don't call xmlParseLookupString. If 'terminate'
11724 * is set, checkIndex is invalid.
11726 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11727 "]]>");
11728 } else {
11729 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11732 if (term == NULL) {
11733 int tmp, size;
11735 if (terminate) {
11736 /* Unfinished CDATA section */
11737 size = ctxt->input->end - ctxt->input->cur;
11738 } else {
11739 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11740 goto done;
11741 ctxt->checkIndex = 0;
11742 /* XXX: Why don't we pass the full buffer? */
11743 size = XML_PARSER_BIG_BUFFER_SIZE;
11745 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11746 if (tmp <= 0) {
11747 tmp = -tmp;
11748 ctxt->input->cur += tmp;
11749 goto encoding_error;
11751 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11752 if (ctxt->sax->cdataBlock != NULL)
11753 ctxt->sax->cdataBlock(ctxt->userData,
11754 ctxt->input->cur, tmp);
11755 else if (ctxt->sax->characters != NULL)
11756 ctxt->sax->characters(ctxt->userData,
11757 ctxt->input->cur, tmp);
11759 if (ctxt->instate == XML_PARSER_EOF)
11760 goto done;
11761 SKIPL(tmp);
11762 } else {
11763 int base = term - CUR_PTR;
11764 int tmp;
11766 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11767 if ((tmp < 0) || (tmp != base)) {
11768 tmp = -tmp;
11769 ctxt->input->cur += tmp;
11770 goto encoding_error;
11772 if ((ctxt->sax != NULL) && (base == 0) &&
11773 (ctxt->sax->cdataBlock != NULL) &&
11774 (!ctxt->disableSAX)) {
11776 * Special case to provide identical behaviour
11777 * between pull and push parsers on enpty CDATA
11778 * sections
11780 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11781 (!strncmp((const char *)&ctxt->input->cur[-9],
11782 "<![CDATA[", 9)))
11783 ctxt->sax->cdataBlock(ctxt->userData,
11784 BAD_CAST "", 0);
11785 } else if ((ctxt->sax != NULL) && (base > 0) &&
11786 (!ctxt->disableSAX)) {
11787 if (ctxt->sax->cdataBlock != NULL)
11788 ctxt->sax->cdataBlock(ctxt->userData,
11789 ctxt->input->cur, base);
11790 else if (ctxt->sax->characters != NULL)
11791 ctxt->sax->characters(ctxt->userData,
11792 ctxt->input->cur, base);
11794 if (ctxt->instate == XML_PARSER_EOF)
11795 goto done;
11796 SKIPL(base + 3);
11797 ctxt->instate = XML_PARSER_CONTENT;
11798 #ifdef DEBUG_PUSH
11799 xmlGenericError(xmlGenericErrorContext,
11800 "PP: entering CONTENT\n");
11801 #endif
11803 break;
11805 case XML_PARSER_MISC:
11806 case XML_PARSER_PROLOG:
11807 case XML_PARSER_EPILOG:
11808 SKIP_BLANKS;
11809 avail = ctxt->input->end - ctxt->input->cur;
11810 if (avail < 2)
11811 goto done;
11812 cur = ctxt->input->cur[0];
11813 next = ctxt->input->cur[1];
11814 if ((cur == '<') && (next == '?')) {
11815 if ((!terminate) &&
11816 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11817 goto done;
11818 #ifdef DEBUG_PUSH
11819 xmlGenericError(xmlGenericErrorContext,
11820 "PP: Parsing PI\n");
11821 #endif
11822 xmlParsePI(ctxt);
11823 if (ctxt->instate == XML_PARSER_EOF)
11824 goto done;
11825 } else if ((cur == '<') && (next == '!') &&
11826 (ctxt->input->cur[2] == '-') &&
11827 (ctxt->input->cur[3] == '-')) {
11828 if ((!terminate) &&
11829 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11830 goto done;
11831 #ifdef DEBUG_PUSH
11832 xmlGenericError(xmlGenericErrorContext,
11833 "PP: Parsing Comment\n");
11834 #endif
11835 xmlParseComment(ctxt);
11836 if (ctxt->instate == XML_PARSER_EOF)
11837 goto done;
11838 } else if ((ctxt->instate == XML_PARSER_MISC) &&
11839 (cur == '<') && (next == '!') &&
11840 (ctxt->input->cur[2] == 'D') &&
11841 (ctxt->input->cur[3] == 'O') &&
11842 (ctxt->input->cur[4] == 'C') &&
11843 (ctxt->input->cur[5] == 'T') &&
11844 (ctxt->input->cur[6] == 'Y') &&
11845 (ctxt->input->cur[7] == 'P') &&
11846 (ctxt->input->cur[8] == 'E')) {
11847 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11848 goto done;
11849 #ifdef DEBUG_PUSH
11850 xmlGenericError(xmlGenericErrorContext,
11851 "PP: Parsing internal subset\n");
11852 #endif
11853 ctxt->inSubset = 1;
11854 xmlParseDocTypeDecl(ctxt);
11855 if (ctxt->instate == XML_PARSER_EOF)
11856 goto done;
11857 if (RAW == '[') {
11858 ctxt->instate = XML_PARSER_DTD;
11859 #ifdef DEBUG_PUSH
11860 xmlGenericError(xmlGenericErrorContext,
11861 "PP: entering DTD\n");
11862 #endif
11863 } else {
11865 * Create and update the external subset.
11867 ctxt->inSubset = 2;
11868 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11869 (ctxt->sax->externalSubset != NULL))
11870 ctxt->sax->externalSubset(ctxt->userData,
11871 ctxt->intSubName, ctxt->extSubSystem,
11872 ctxt->extSubURI);
11873 ctxt->inSubset = 0;
11874 xmlCleanSpecialAttr(ctxt);
11875 ctxt->instate = XML_PARSER_PROLOG;
11876 #ifdef DEBUG_PUSH
11877 xmlGenericError(xmlGenericErrorContext,
11878 "PP: entering PROLOG\n");
11879 #endif
11881 } else if ((cur == '<') && (next == '!') &&
11882 (avail <
11883 (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11884 goto done;
11885 } else if (ctxt->instate == XML_PARSER_EPILOG) {
11886 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11887 xmlHaltParser(ctxt);
11888 #ifdef DEBUG_PUSH
11889 xmlGenericError(xmlGenericErrorContext,
11890 "PP: entering EOF\n");
11891 #endif
11892 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11893 ctxt->sax->endDocument(ctxt->userData);
11894 goto done;
11895 } else {
11896 ctxt->instate = XML_PARSER_START_TAG;
11897 #ifdef DEBUG_PUSH
11898 xmlGenericError(xmlGenericErrorContext,
11899 "PP: entering START_TAG\n");
11900 #endif
11902 break;
11903 case XML_PARSER_DTD: {
11904 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11905 goto done;
11906 xmlParseInternalSubset(ctxt);
11907 if (ctxt->instate == XML_PARSER_EOF)
11908 goto done;
11909 ctxt->inSubset = 2;
11910 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11911 (ctxt->sax->externalSubset != NULL))
11912 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11913 ctxt->extSubSystem, ctxt->extSubURI);
11914 ctxt->inSubset = 0;
11915 xmlCleanSpecialAttr(ctxt);
11916 if (ctxt->instate == XML_PARSER_EOF)
11917 goto done;
11918 ctxt->instate = XML_PARSER_PROLOG;
11919 #ifdef DEBUG_PUSH
11920 xmlGenericError(xmlGenericErrorContext,
11921 "PP: entering PROLOG\n");
11922 #endif
11923 break;
11925 case XML_PARSER_COMMENT:
11926 xmlGenericError(xmlGenericErrorContext,
11927 "PP: internal error, state == COMMENT\n");
11928 ctxt->instate = XML_PARSER_CONTENT;
11929 #ifdef DEBUG_PUSH
11930 xmlGenericError(xmlGenericErrorContext,
11931 "PP: entering CONTENT\n");
11932 #endif
11933 break;
11934 case XML_PARSER_IGNORE:
11935 xmlGenericError(xmlGenericErrorContext,
11936 "PP: internal error, state == IGNORE");
11937 ctxt->instate = XML_PARSER_DTD;
11938 #ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: entering DTD\n");
11941 #endif
11942 break;
11943 case XML_PARSER_PI:
11944 xmlGenericError(xmlGenericErrorContext,
11945 "PP: internal error, state == PI\n");
11946 ctxt->instate = XML_PARSER_CONTENT;
11947 #ifdef DEBUG_PUSH
11948 xmlGenericError(xmlGenericErrorContext,
11949 "PP: entering CONTENT\n");
11950 #endif
11951 break;
11952 case XML_PARSER_ENTITY_DECL:
11953 xmlGenericError(xmlGenericErrorContext,
11954 "PP: internal error, state == ENTITY_DECL\n");
11955 ctxt->instate = XML_PARSER_DTD;
11956 #ifdef DEBUG_PUSH
11957 xmlGenericError(xmlGenericErrorContext,
11958 "PP: entering DTD\n");
11959 #endif
11960 break;
11961 case XML_PARSER_ENTITY_VALUE:
11962 xmlGenericError(xmlGenericErrorContext,
11963 "PP: internal error, state == ENTITY_VALUE\n");
11964 ctxt->instate = XML_PARSER_CONTENT;
11965 #ifdef DEBUG_PUSH
11966 xmlGenericError(xmlGenericErrorContext,
11967 "PP: entering DTD\n");
11968 #endif
11969 break;
11970 case XML_PARSER_ATTRIBUTE_VALUE:
11971 xmlGenericError(xmlGenericErrorContext,
11972 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11973 ctxt->instate = XML_PARSER_START_TAG;
11974 #ifdef DEBUG_PUSH
11975 xmlGenericError(xmlGenericErrorContext,
11976 "PP: entering START_TAG\n");
11977 #endif
11978 break;
11979 case XML_PARSER_SYSTEM_LITERAL:
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: internal error, state == SYSTEM_LITERAL\n");
11982 ctxt->instate = XML_PARSER_START_TAG;
11983 #ifdef DEBUG_PUSH
11984 xmlGenericError(xmlGenericErrorContext,
11985 "PP: entering START_TAG\n");
11986 #endif
11987 break;
11988 case XML_PARSER_PUBLIC_LITERAL:
11989 xmlGenericError(xmlGenericErrorContext,
11990 "PP: internal error, state == PUBLIC_LITERAL\n");
11991 ctxt->instate = XML_PARSER_START_TAG;
11992 #ifdef DEBUG_PUSH
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: entering START_TAG\n");
11995 #endif
11996 break;
11999 done:
12000 #ifdef DEBUG_PUSH
12001 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12002 #endif
12003 return(ret);
12004 encoding_error:
12005 if (ctxt->input->end - ctxt->input->cur < 4) {
12006 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12007 "Input is not proper UTF-8, indicate encoding !\n",
12008 NULL, NULL);
12009 } else {
12010 char buffer[150];
12012 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12013 ctxt->input->cur[0], ctxt->input->cur[1],
12014 ctxt->input->cur[2], ctxt->input->cur[3]);
12015 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12016 "Input is not proper UTF-8, indicate encoding !\n%s",
12017 BAD_CAST buffer, NULL);
12019 return(0);
12023 * xmlParseChunk:
12024 * @ctxt: an XML parser context
12025 * @chunk: an char array
12026 * @size: the size in byte of the chunk
12027 * @terminate: last chunk indicator
12029 * Parse a Chunk of memory
12031 * Returns zero if no error, the xmlParserErrors otherwise.
12034 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12035 int terminate) {
12036 int end_in_lf = 0;
12038 if (ctxt == NULL)
12039 return(XML_ERR_INTERNAL_ERROR);
12040 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12041 return(ctxt->errNo);
12042 if (ctxt->instate == XML_PARSER_EOF)
12043 return(-1);
12044 if (ctxt->input == NULL)
12045 return(-1);
12047 ctxt->progressive = 1;
12048 if (ctxt->instate == XML_PARSER_START)
12049 xmlDetectSAX2(ctxt);
12050 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12051 (chunk[size - 1] == '\r')) {
12052 end_in_lf = 1;
12053 size--;
12056 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12057 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12058 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12059 size_t cur = ctxt->input->cur - ctxt->input->base;
12060 int res;
12062 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12063 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12064 if (res < 0) {
12065 ctxt->errNo = XML_PARSER_EOF;
12066 xmlHaltParser(ctxt);
12067 return (XML_PARSER_EOF);
12069 #ifdef DEBUG_PUSH
12070 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12071 #endif
12073 } else if (ctxt->instate != XML_PARSER_EOF) {
12074 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12075 xmlParserInputBufferPtr in = ctxt->input->buf;
12076 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12077 (in->raw != NULL)) {
12078 int nbchars;
12079 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12080 size_t current = ctxt->input->cur - ctxt->input->base;
12082 nbchars = xmlCharEncInput(in, terminate);
12083 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12084 if (nbchars < 0) {
12085 /* TODO 2.6.0 */
12086 xmlGenericError(xmlGenericErrorContext,
12087 "xmlParseChunk: encoder error\n");
12088 xmlHaltParser(ctxt);
12089 return(XML_ERR_INVALID_ENCODING);
12095 xmlParseTryOrFinish(ctxt, terminate);
12096 if (ctxt->instate == XML_PARSER_EOF)
12097 return(ctxt->errNo);
12099 if ((ctxt->input != NULL) &&
12100 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12101 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12102 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12104 xmlHaltParser(ctxt);
12106 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12107 return(ctxt->errNo);
12109 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12110 (ctxt->input->buf != NULL)) {
12111 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12112 ctxt->input);
12113 size_t current = ctxt->input->cur - ctxt->input->base;
12115 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12117 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12118 base, current);
12120 if (terminate) {
12122 * Check for termination
12124 if ((ctxt->instate != XML_PARSER_EOF) &&
12125 (ctxt->instate != XML_PARSER_EPILOG)) {
12126 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12128 if ((ctxt->instate == XML_PARSER_EPILOG) &&
12129 (ctxt->input->cur < ctxt->input->end)) {
12130 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12132 if (ctxt->instate != XML_PARSER_EOF) {
12133 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12134 ctxt->sax->endDocument(ctxt->userData);
12136 ctxt->instate = XML_PARSER_EOF;
12138 if (ctxt->wellFormed == 0)
12139 return((xmlParserErrors) ctxt->errNo);
12140 else
12141 return(0);
12144 /************************************************************************
12146 * I/O front end functions to the parser *
12148 ************************************************************************/
12151 * xmlCreatePushParserCtxt:
12152 * @sax: a SAX handler
12153 * @user_data: The user data returned on SAX callbacks
12154 * @chunk: a pointer to an array of chars
12155 * @size: number of chars in the array
12156 * @filename: an optional file name or URI
12158 * Create a parser context for using the XML parser in push mode.
12159 * If @buffer and @size are non-NULL, the data is used to detect
12160 * the encoding. The remaining characters will be parsed so they
12161 * don't need to be fed in again through xmlParseChunk.
12162 * To allow content encoding detection, @size should be >= 4
12163 * The value of @filename is used for fetching external entities
12164 * and error/warning reports.
12166 * Returns the new parser context or NULL
12169 xmlParserCtxtPtr
12170 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12171 const char *chunk, int size, const char *filename) {
12172 xmlParserCtxtPtr ctxt;
12173 xmlParserInputPtr inputStream;
12174 xmlParserInputBufferPtr buf;
12176 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12177 if (buf == NULL) return(NULL);
12179 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12180 if (ctxt == NULL) {
12181 xmlErrMemory(NULL, "creating parser: out of memory\n");
12182 xmlFreeParserInputBuffer(buf);
12183 return(NULL);
12185 ctxt->dictNames = 1;
12186 if (filename == NULL) {
12187 ctxt->directory = NULL;
12188 } else {
12189 ctxt->directory = xmlParserGetDirectory(filename);
12192 inputStream = xmlNewInputStream(ctxt);
12193 if (inputStream == NULL) {
12194 xmlFreeParserCtxt(ctxt);
12195 xmlFreeParserInputBuffer(buf);
12196 return(NULL);
12199 if (filename == NULL)
12200 inputStream->filename = NULL;
12201 else {
12202 inputStream->filename = (char *)
12203 xmlCanonicPath((const xmlChar *) filename);
12204 if (inputStream->filename == NULL) {
12205 xmlFreeInputStream(inputStream);
12206 xmlFreeParserCtxt(ctxt);
12207 xmlFreeParserInputBuffer(buf);
12208 return(NULL);
12211 inputStream->buf = buf;
12212 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12213 inputPush(ctxt, inputStream);
12216 * If the caller didn't provide an initial 'chunk' for determining
12217 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12218 * that it can be automatically determined later
12220 ctxt->charset = XML_CHAR_ENCODING_NONE;
12222 if ((size != 0) && (chunk != NULL) &&
12223 (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12224 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12225 size_t cur = ctxt->input->cur - ctxt->input->base;
12227 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12229 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12230 #ifdef DEBUG_PUSH
12231 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12232 #endif
12235 return(ctxt);
12237 #endif /* LIBXML_PUSH_ENABLED */
12240 * xmlStopParser:
12241 * @ctxt: an XML parser context
12243 * Blocks further parser processing
12245 void
12246 xmlStopParser(xmlParserCtxtPtr ctxt) {
12247 if (ctxt == NULL)
12248 return;
12249 xmlHaltParser(ctxt);
12250 ctxt->errNo = XML_ERR_USER_STOP;
12254 * xmlCreateIOParserCtxt:
12255 * @sax: a SAX handler
12256 * @user_data: The user data returned on SAX callbacks
12257 * @ioread: an I/O read function
12258 * @ioclose: an I/O close function
12259 * @ioctx: an I/O handler
12260 * @enc: the charset encoding if known
12262 * Create a parser context for using the XML parser with an existing
12263 * I/O stream
12265 * Returns the new parser context or NULL
12267 xmlParserCtxtPtr
12268 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12269 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12270 void *ioctx, xmlCharEncoding enc) {
12271 xmlParserCtxtPtr ctxt;
12272 xmlParserInputPtr inputStream;
12273 xmlParserInputBufferPtr buf;
12275 if (ioread == NULL) return(NULL);
12277 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12278 if (buf == NULL) {
12279 if (ioclose != NULL)
12280 ioclose(ioctx);
12281 return (NULL);
12284 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12285 if (ctxt == NULL) {
12286 xmlFreeParserInputBuffer(buf);
12287 return(NULL);
12290 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12291 if (inputStream == NULL) {
12292 xmlFreeParserCtxt(ctxt);
12293 return(NULL);
12295 inputPush(ctxt, inputStream);
12297 return(ctxt);
12300 #ifdef LIBXML_VALID_ENABLED
12301 /************************************************************************
12303 * Front ends when parsing a DTD *
12305 ************************************************************************/
12308 * xmlIOParseDTD:
12309 * @sax: the SAX handler block or NULL
12310 * @input: an Input Buffer
12311 * @enc: the charset encoding if known
12313 * Load and parse a DTD
12315 * Returns the resulting xmlDtdPtr or NULL in case of error.
12316 * @input will be freed by the function in any case.
12319 xmlDtdPtr
12320 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12321 xmlCharEncoding enc) {
12322 xmlDtdPtr ret = NULL;
12323 xmlParserCtxtPtr ctxt;
12324 xmlParserInputPtr pinput = NULL;
12325 xmlChar start[4];
12327 if (input == NULL)
12328 return(NULL);
12330 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12331 if (ctxt == NULL) {
12332 xmlFreeParserInputBuffer(input);
12333 return(NULL);
12336 /* We are loading a DTD */
12337 ctxt->options |= XML_PARSE_DTDLOAD;
12339 xmlDetectSAX2(ctxt);
12342 * generate a parser input from the I/O handler
12345 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12346 if (pinput == NULL) {
12347 xmlFreeParserInputBuffer(input);
12348 xmlFreeParserCtxt(ctxt);
12349 return(NULL);
12353 * plug some encoding conversion routines here.
12355 if (xmlPushInput(ctxt, pinput) < 0) {
12356 xmlFreeParserCtxt(ctxt);
12357 return(NULL);
12359 if (enc != XML_CHAR_ENCODING_NONE) {
12360 xmlSwitchEncoding(ctxt, enc);
12363 pinput->filename = NULL;
12364 pinput->line = 1;
12365 pinput->col = 1;
12366 pinput->base = ctxt->input->cur;
12367 pinput->cur = ctxt->input->cur;
12368 pinput->free = NULL;
12371 * let's parse that entity knowing it's an external subset.
12373 ctxt->inSubset = 2;
12374 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12375 if (ctxt->myDoc == NULL) {
12376 xmlErrMemory(ctxt, "New Doc failed");
12377 return(NULL);
12379 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12380 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12381 BAD_CAST "none", BAD_CAST "none");
12383 if ((enc == XML_CHAR_ENCODING_NONE) &&
12384 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12386 * Get the 4 first bytes and decode the charset
12387 * if enc != XML_CHAR_ENCODING_NONE
12388 * plug some encoding conversion routines.
12390 start[0] = RAW;
12391 start[1] = NXT(1);
12392 start[2] = NXT(2);
12393 start[3] = NXT(3);
12394 enc = xmlDetectCharEncoding(start, 4);
12395 if (enc != XML_CHAR_ENCODING_NONE) {
12396 xmlSwitchEncoding(ctxt, enc);
12400 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12402 if (ctxt->myDoc != NULL) {
12403 if (ctxt->wellFormed) {
12404 ret = ctxt->myDoc->extSubset;
12405 ctxt->myDoc->extSubset = NULL;
12406 if (ret != NULL) {
12407 xmlNodePtr tmp;
12409 ret->doc = NULL;
12410 tmp = ret->children;
12411 while (tmp != NULL) {
12412 tmp->doc = NULL;
12413 tmp = tmp->next;
12416 } else {
12417 ret = NULL;
12419 xmlFreeDoc(ctxt->myDoc);
12420 ctxt->myDoc = NULL;
12422 xmlFreeParserCtxt(ctxt);
12424 return(ret);
12428 * xmlSAXParseDTD:
12429 * @sax: the SAX handler block
12430 * @ExternalID: a NAME* containing the External ID of the DTD
12431 * @SystemID: a NAME* containing the URL to the DTD
12433 * DEPRECATED: Don't use.
12435 * Load and parse an external subset.
12437 * Returns the resulting xmlDtdPtr or NULL in case of error.
12440 xmlDtdPtr
12441 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12442 const xmlChar *SystemID) {
12443 xmlDtdPtr ret = NULL;
12444 xmlParserCtxtPtr ctxt;
12445 xmlParserInputPtr input = NULL;
12446 xmlCharEncoding enc;
12447 xmlChar* systemIdCanonic;
12449 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12451 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12452 if (ctxt == NULL) {
12453 return(NULL);
12456 /* We are loading a DTD */
12457 ctxt->options |= XML_PARSE_DTDLOAD;
12460 * Canonicalise the system ID
12462 systemIdCanonic = xmlCanonicPath(SystemID);
12463 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12464 xmlFreeParserCtxt(ctxt);
12465 return(NULL);
12469 * Ask the Entity resolver to load the damn thing
12472 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12473 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12474 systemIdCanonic);
12475 if (input == NULL) {
12476 xmlFreeParserCtxt(ctxt);
12477 if (systemIdCanonic != NULL)
12478 xmlFree(systemIdCanonic);
12479 return(NULL);
12483 * plug some encoding conversion routines here.
12485 if (xmlPushInput(ctxt, input) < 0) {
12486 xmlFreeParserCtxt(ctxt);
12487 if (systemIdCanonic != NULL)
12488 xmlFree(systemIdCanonic);
12489 return(NULL);
12491 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12492 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12493 xmlSwitchEncoding(ctxt, enc);
12496 if (input->filename == NULL)
12497 input->filename = (char *) systemIdCanonic;
12498 else
12499 xmlFree(systemIdCanonic);
12500 input->line = 1;
12501 input->col = 1;
12502 input->base = ctxt->input->cur;
12503 input->cur = ctxt->input->cur;
12504 input->free = NULL;
12507 * let's parse that entity knowing it's an external subset.
12509 ctxt->inSubset = 2;
12510 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12511 if (ctxt->myDoc == NULL) {
12512 xmlErrMemory(ctxt, "New Doc failed");
12513 xmlFreeParserCtxt(ctxt);
12514 return(NULL);
12516 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12517 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12518 ExternalID, SystemID);
12519 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12521 if (ctxt->myDoc != NULL) {
12522 if (ctxt->wellFormed) {
12523 ret = ctxt->myDoc->extSubset;
12524 ctxt->myDoc->extSubset = NULL;
12525 if (ret != NULL) {
12526 xmlNodePtr tmp;
12528 ret->doc = NULL;
12529 tmp = ret->children;
12530 while (tmp != NULL) {
12531 tmp->doc = NULL;
12532 tmp = tmp->next;
12535 } else {
12536 ret = NULL;
12538 xmlFreeDoc(ctxt->myDoc);
12539 ctxt->myDoc = NULL;
12541 xmlFreeParserCtxt(ctxt);
12543 return(ret);
12548 * xmlParseDTD:
12549 * @ExternalID: a NAME* containing the External ID of the DTD
12550 * @SystemID: a NAME* containing the URL to the DTD
12552 * Load and parse an external subset.
12554 * Returns the resulting xmlDtdPtr or NULL in case of error.
12557 xmlDtdPtr
12558 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12559 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12561 #endif /* LIBXML_VALID_ENABLED */
12563 /************************************************************************
12565 * Front ends when parsing an Entity *
12567 ************************************************************************/
12570 * xmlParseCtxtExternalEntity:
12571 * @ctx: the existing parsing context
12572 * @URL: the URL for the entity to load
12573 * @ID: the System ID for the entity to load
12574 * @lst: the return value for the set of parsed nodes
12576 * Parse an external general entity within an existing parsing context
12577 * An external general parsed entity is well-formed if it matches the
12578 * production labeled extParsedEnt.
12580 * [78] extParsedEnt ::= TextDecl? content
12582 * Returns 0 if the entity is well formed, -1 in case of args problem and
12583 * the parser error code otherwise
12587 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12588 const xmlChar *ID, xmlNodePtr *lst) {
12589 void *userData;
12591 if (ctx == NULL) return(-1);
12593 * If the user provided their own SAX callbacks, then reuse the
12594 * userData callback field, otherwise the expected setup in a
12595 * DOM builder is to have userData == ctxt
12597 if (ctx->userData == ctx)
12598 userData = NULL;
12599 else
12600 userData = ctx->userData;
12601 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12602 userData, ctx->depth + 1,
12603 URL, ID, lst);
12607 * xmlParseExternalEntityPrivate:
12608 * @doc: the document the chunk pertains to
12609 * @oldctxt: the previous parser context if available
12610 * @sax: the SAX handler block (possibly NULL)
12611 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12612 * @depth: Used for loop detection, use 0
12613 * @URL: the URL for the entity to load
12614 * @ID: the System ID for the entity to load
12615 * @list: the return value for the set of parsed nodes
12617 * Private version of xmlParseExternalEntity()
12619 * Returns 0 if the entity is well formed, -1 in case of args problem and
12620 * the parser error code otherwise
12623 static xmlParserErrors
12624 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12625 xmlSAXHandlerPtr sax,
12626 void *user_data, int depth, const xmlChar *URL,
12627 const xmlChar *ID, xmlNodePtr *list) {
12628 xmlParserCtxtPtr ctxt;
12629 xmlDocPtr newDoc;
12630 xmlNodePtr newRoot;
12631 xmlParserErrors ret = XML_ERR_OK;
12632 xmlChar start[4];
12633 xmlCharEncoding enc;
12635 if (((depth > 40) &&
12636 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12637 (depth > 100)) {
12638 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12639 "Maximum entity nesting depth exceeded");
12640 return(XML_ERR_ENTITY_LOOP);
12643 if (list != NULL)
12644 *list = NULL;
12645 if ((URL == NULL) && (ID == NULL))
12646 return(XML_ERR_INTERNAL_ERROR);
12647 if (doc == NULL)
12648 return(XML_ERR_INTERNAL_ERROR);
12650 ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12651 oldctxt);
12652 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12653 if (oldctxt != NULL) {
12654 ctxt->nbErrors = oldctxt->nbErrors;
12655 ctxt->nbWarnings = oldctxt->nbWarnings;
12657 xmlDetectSAX2(ctxt);
12659 newDoc = xmlNewDoc(BAD_CAST "1.0");
12660 if (newDoc == NULL) {
12661 xmlFreeParserCtxt(ctxt);
12662 return(XML_ERR_INTERNAL_ERROR);
12664 newDoc->properties = XML_DOC_INTERNAL;
12665 if (doc) {
12666 newDoc->intSubset = doc->intSubset;
12667 newDoc->extSubset = doc->extSubset;
12668 if (doc->dict) {
12669 newDoc->dict = doc->dict;
12670 xmlDictReference(newDoc->dict);
12672 if (doc->URL != NULL) {
12673 newDoc->URL = xmlStrdup(doc->URL);
12676 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12677 if (newRoot == NULL) {
12678 if (sax != NULL)
12679 xmlFreeParserCtxt(ctxt);
12680 newDoc->intSubset = NULL;
12681 newDoc->extSubset = NULL;
12682 xmlFreeDoc(newDoc);
12683 return(XML_ERR_INTERNAL_ERROR);
12685 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12686 nodePush(ctxt, newDoc->children);
12687 if (doc == NULL) {
12688 ctxt->myDoc = newDoc;
12689 } else {
12690 ctxt->myDoc = doc;
12691 newRoot->doc = doc;
12695 * Get the 4 first bytes and decode the charset
12696 * if enc != XML_CHAR_ENCODING_NONE
12697 * plug some encoding conversion routines.
12699 GROW;
12700 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12701 start[0] = RAW;
12702 start[1] = NXT(1);
12703 start[2] = NXT(2);
12704 start[3] = NXT(3);
12705 enc = xmlDetectCharEncoding(start, 4);
12706 if (enc != XML_CHAR_ENCODING_NONE) {
12707 xmlSwitchEncoding(ctxt, enc);
12712 * Parse a possible text declaration first
12714 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12715 xmlParseTextDecl(ctxt);
12717 * An XML-1.0 document can't reference an entity not XML-1.0
12719 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12720 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12721 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12722 "Version mismatch between document and entity\n");
12726 ctxt->instate = XML_PARSER_CONTENT;
12727 ctxt->depth = depth;
12728 if (oldctxt != NULL) {
12729 ctxt->_private = oldctxt->_private;
12730 ctxt->loadsubset = oldctxt->loadsubset;
12731 ctxt->validate = oldctxt->validate;
12732 ctxt->valid = oldctxt->valid;
12733 ctxt->replaceEntities = oldctxt->replaceEntities;
12734 if (oldctxt->validate) {
12735 ctxt->vctxt.error = oldctxt->vctxt.error;
12736 ctxt->vctxt.warning = oldctxt->vctxt.warning;
12737 ctxt->vctxt.userData = oldctxt->vctxt.userData;
12738 ctxt->vctxt.flags = oldctxt->vctxt.flags;
12740 ctxt->external = oldctxt->external;
12741 if (ctxt->dict) xmlDictFree(ctxt->dict);
12742 ctxt->dict = oldctxt->dict;
12743 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12744 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12745 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12746 ctxt->dictNames = oldctxt->dictNames;
12747 ctxt->attsDefault = oldctxt->attsDefault;
12748 ctxt->attsSpecial = oldctxt->attsSpecial;
12749 ctxt->linenumbers = oldctxt->linenumbers;
12750 ctxt->record_info = oldctxt->record_info;
12751 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12752 ctxt->node_seq.length = oldctxt->node_seq.length;
12753 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12754 } else {
12756 * Doing validity checking on chunk without context
12757 * doesn't make sense
12759 ctxt->_private = NULL;
12760 ctxt->validate = 0;
12761 ctxt->external = 2;
12762 ctxt->loadsubset = 0;
12765 xmlParseContent(ctxt);
12767 if ((RAW == '<') && (NXT(1) == '/')) {
12768 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12769 } else if (RAW != 0) {
12770 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12772 if (ctxt->node != newDoc->children) {
12773 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12776 if (!ctxt->wellFormed) {
12777 ret = (xmlParserErrors)ctxt->errNo;
12778 if (oldctxt != NULL) {
12779 oldctxt->errNo = ctxt->errNo;
12780 oldctxt->wellFormed = 0;
12781 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12783 } else {
12784 if (list != NULL) {
12785 xmlNodePtr cur;
12788 * Return the newly created nodeset after unlinking it from
12789 * they pseudo parent.
12791 cur = newDoc->children->children;
12792 *list = cur;
12793 while (cur != NULL) {
12794 cur->parent = NULL;
12795 cur = cur->next;
12797 newDoc->children->children = NULL;
12799 ret = XML_ERR_OK;
12803 * Also record the size of the entity parsed
12805 if (ctxt->input != NULL && oldctxt != NULL) {
12806 unsigned long consumed = ctxt->input->consumed;
12808 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12810 xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12811 xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12813 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12814 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12817 if (oldctxt != NULL) {
12818 ctxt->dict = NULL;
12819 ctxt->attsDefault = NULL;
12820 ctxt->attsSpecial = NULL;
12821 oldctxt->nbErrors = ctxt->nbErrors;
12822 oldctxt->nbWarnings = ctxt->nbWarnings;
12823 oldctxt->validate = ctxt->validate;
12824 oldctxt->valid = ctxt->valid;
12825 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12826 oldctxt->node_seq.length = ctxt->node_seq.length;
12827 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12829 ctxt->node_seq.maximum = 0;
12830 ctxt->node_seq.length = 0;
12831 ctxt->node_seq.buffer = NULL;
12832 xmlFreeParserCtxt(ctxt);
12833 newDoc->intSubset = NULL;
12834 newDoc->extSubset = NULL;
12835 xmlFreeDoc(newDoc);
12837 return(ret);
12840 #ifdef LIBXML_SAX1_ENABLED
12842 * xmlParseExternalEntity:
12843 * @doc: the document the chunk pertains to
12844 * @sax: the SAX handler block (possibly NULL)
12845 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12846 * @depth: Used for loop detection, use 0
12847 * @URL: the URL for the entity to load
12848 * @ID: the System ID for the entity to load
12849 * @lst: the return value for the set of parsed nodes
12851 * Parse an external general entity
12852 * An external general parsed entity is well-formed if it matches the
12853 * production labeled extParsedEnt.
12855 * [78] extParsedEnt ::= TextDecl? content
12857 * Returns 0 if the entity is well formed, -1 in case of args problem and
12858 * the parser error code otherwise
12862 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12863 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12864 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12865 ID, lst));
12869 * xmlParseBalancedChunkMemory:
12870 * @doc: the document the chunk pertains to (must not be NULL)
12871 * @sax: the SAX handler block (possibly NULL)
12872 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12873 * @depth: Used for loop detection, use 0
12874 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12875 * @lst: the return value for the set of parsed nodes
12877 * Parse a well-balanced chunk of an XML document
12878 * called by the parser
12879 * The allowed sequence for the Well Balanced Chunk is the one defined by
12880 * the content production in the XML grammar:
12882 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12884 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12885 * the parser error code otherwise
12889 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12890 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12891 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12892 depth, string, lst, 0 );
12894 #endif /* LIBXML_SAX1_ENABLED */
12897 * xmlParseBalancedChunkMemoryInternal:
12898 * @oldctxt: the existing parsing context
12899 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12900 * @user_data: the user data field for the parser context
12901 * @lst: the return value for the set of parsed nodes
12904 * Parse a well-balanced chunk of an XML document
12905 * called by the parser
12906 * The allowed sequence for the Well Balanced Chunk is the one defined by
12907 * the content production in the XML grammar:
12909 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12911 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12912 * error code otherwise
12914 * In case recover is set to 1, the nodelist will not be empty even if
12915 * the parsed chunk is not well balanced.
12917 static xmlParserErrors
12918 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12919 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12920 xmlParserCtxtPtr ctxt;
12921 xmlDocPtr newDoc = NULL;
12922 xmlNodePtr newRoot;
12923 xmlSAXHandlerPtr oldsax = NULL;
12924 xmlNodePtr content = NULL;
12925 xmlNodePtr last = NULL;
12926 int size;
12927 xmlParserErrors ret = XML_ERR_OK;
12928 #ifdef SAX2
12929 int i;
12930 #endif
12932 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12933 (oldctxt->depth > 100)) {
12934 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12935 "Maximum entity nesting depth exceeded");
12936 return(XML_ERR_ENTITY_LOOP);
12940 if (lst != NULL)
12941 *lst = NULL;
12942 if (string == NULL)
12943 return(XML_ERR_INTERNAL_ERROR);
12945 size = xmlStrlen(string);
12947 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12948 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12949 ctxt->nbErrors = oldctxt->nbErrors;
12950 ctxt->nbWarnings = oldctxt->nbWarnings;
12951 if (user_data != NULL)
12952 ctxt->userData = user_data;
12953 else
12954 ctxt->userData = ctxt;
12955 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12956 ctxt->dict = oldctxt->dict;
12957 ctxt->input_id = oldctxt->input_id;
12958 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12959 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12960 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12962 #ifdef SAX2
12963 /* propagate namespaces down the entity */
12964 for (i = 0;i < oldctxt->nsNr;i += 2) {
12965 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12967 #endif
12969 oldsax = ctxt->sax;
12970 ctxt->sax = oldctxt->sax;
12971 xmlDetectSAX2(ctxt);
12972 ctxt->replaceEntities = oldctxt->replaceEntities;
12973 ctxt->options = oldctxt->options;
12975 ctxt->_private = oldctxt->_private;
12976 if (oldctxt->myDoc == NULL) {
12977 newDoc = xmlNewDoc(BAD_CAST "1.0");
12978 if (newDoc == NULL) {
12979 ctxt->sax = oldsax;
12980 ctxt->dict = NULL;
12981 xmlFreeParserCtxt(ctxt);
12982 return(XML_ERR_INTERNAL_ERROR);
12984 newDoc->properties = XML_DOC_INTERNAL;
12985 newDoc->dict = ctxt->dict;
12986 xmlDictReference(newDoc->dict);
12987 ctxt->myDoc = newDoc;
12988 } else {
12989 ctxt->myDoc = oldctxt->myDoc;
12990 content = ctxt->myDoc->children;
12991 last = ctxt->myDoc->last;
12993 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12994 if (newRoot == NULL) {
12995 ctxt->sax = oldsax;
12996 ctxt->dict = NULL;
12997 xmlFreeParserCtxt(ctxt);
12998 if (newDoc != NULL) {
12999 xmlFreeDoc(newDoc);
13001 return(XML_ERR_INTERNAL_ERROR);
13003 ctxt->myDoc->children = NULL;
13004 ctxt->myDoc->last = NULL;
13005 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13006 nodePush(ctxt, ctxt->myDoc->children);
13007 ctxt->instate = XML_PARSER_CONTENT;
13008 ctxt->depth = oldctxt->depth;
13010 ctxt->validate = 0;
13011 ctxt->loadsubset = oldctxt->loadsubset;
13012 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13014 * ID/IDREF registration will be done in xmlValidateElement below
13016 ctxt->loadsubset |= XML_SKIP_IDS;
13018 ctxt->dictNames = oldctxt->dictNames;
13019 ctxt->attsDefault = oldctxt->attsDefault;
13020 ctxt->attsSpecial = oldctxt->attsSpecial;
13022 xmlParseContent(ctxt);
13023 if ((RAW == '<') && (NXT(1) == '/')) {
13024 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13025 } else if (RAW != 0) {
13026 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13028 if (ctxt->node != ctxt->myDoc->children) {
13029 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13032 if (!ctxt->wellFormed) {
13033 ret = (xmlParserErrors)ctxt->errNo;
13034 oldctxt->errNo = ctxt->errNo;
13035 oldctxt->wellFormed = 0;
13036 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13037 } else {
13038 ret = XML_ERR_OK;
13041 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13042 xmlNodePtr cur;
13045 * Return the newly created nodeset after unlinking it from
13046 * they pseudo parent.
13048 cur = ctxt->myDoc->children->children;
13049 *lst = cur;
13050 while (cur != NULL) {
13051 #ifdef LIBXML_VALID_ENABLED
13052 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13053 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13054 (cur->type == XML_ELEMENT_NODE)) {
13055 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13056 oldctxt->myDoc, cur);
13058 #endif /* LIBXML_VALID_ENABLED */
13059 cur->parent = NULL;
13060 cur = cur->next;
13062 ctxt->myDoc->children->children = NULL;
13064 if (ctxt->myDoc != NULL) {
13065 xmlFreeNode(ctxt->myDoc->children);
13066 ctxt->myDoc->children = content;
13067 ctxt->myDoc->last = last;
13071 * Also record the size of the entity parsed
13073 if (ctxt->input != NULL && oldctxt != NULL) {
13074 unsigned long consumed = ctxt->input->consumed;
13076 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13078 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13079 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13082 oldctxt->nbErrors = ctxt->nbErrors;
13083 oldctxt->nbWarnings = ctxt->nbWarnings;
13084 ctxt->sax = oldsax;
13085 ctxt->dict = NULL;
13086 ctxt->attsDefault = NULL;
13087 ctxt->attsSpecial = NULL;
13088 xmlFreeParserCtxt(ctxt);
13089 if (newDoc != NULL) {
13090 xmlFreeDoc(newDoc);
13093 return(ret);
13097 * xmlParseInNodeContext:
13098 * @node: the context node
13099 * @data: the input string
13100 * @datalen: the input string length in bytes
13101 * @options: a combination of xmlParserOption
13102 * @lst: the return value for the set of parsed nodes
13104 * Parse a well-balanced chunk of an XML document
13105 * within the context (DTD, namespaces, etc ...) of the given node.
13107 * The allowed sequence for the data is a Well Balanced Chunk defined by
13108 * the content production in the XML grammar:
13110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13112 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13113 * error code otherwise
13115 xmlParserErrors
13116 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13117 int options, xmlNodePtr *lst) {
13118 #ifdef SAX2
13119 xmlParserCtxtPtr ctxt;
13120 xmlDocPtr doc = NULL;
13121 xmlNodePtr fake, cur;
13122 int nsnr = 0;
13124 xmlParserErrors ret = XML_ERR_OK;
13127 * check all input parameters, grab the document
13129 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13130 return(XML_ERR_INTERNAL_ERROR);
13131 switch (node->type) {
13132 case XML_ELEMENT_NODE:
13133 case XML_ATTRIBUTE_NODE:
13134 case XML_TEXT_NODE:
13135 case XML_CDATA_SECTION_NODE:
13136 case XML_ENTITY_REF_NODE:
13137 case XML_PI_NODE:
13138 case XML_COMMENT_NODE:
13139 case XML_DOCUMENT_NODE:
13140 case XML_HTML_DOCUMENT_NODE:
13141 break;
13142 default:
13143 return(XML_ERR_INTERNAL_ERROR);
13146 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13147 (node->type != XML_DOCUMENT_NODE) &&
13148 (node->type != XML_HTML_DOCUMENT_NODE))
13149 node = node->parent;
13150 if (node == NULL)
13151 return(XML_ERR_INTERNAL_ERROR);
13152 if (node->type == XML_ELEMENT_NODE)
13153 doc = node->doc;
13154 else
13155 doc = (xmlDocPtr) node;
13156 if (doc == NULL)
13157 return(XML_ERR_INTERNAL_ERROR);
13160 * allocate a context and set-up everything not related to the
13161 * node position in the tree
13163 if (doc->type == XML_DOCUMENT_NODE)
13164 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13165 #ifdef LIBXML_HTML_ENABLED
13166 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13167 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13169 * When parsing in context, it makes no sense to add implied
13170 * elements like html/body/etc...
13172 options |= HTML_PARSE_NOIMPLIED;
13174 #endif
13175 else
13176 return(XML_ERR_INTERNAL_ERROR);
13178 if (ctxt == NULL)
13179 return(XML_ERR_NO_MEMORY);
13182 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13183 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13184 * we must wait until the last moment to free the original one.
13186 if (doc->dict != NULL) {
13187 if (ctxt->dict != NULL)
13188 xmlDictFree(ctxt->dict);
13189 ctxt->dict = doc->dict;
13190 } else
13191 options |= XML_PARSE_NODICT;
13193 if (doc->encoding != NULL) {
13194 xmlCharEncodingHandlerPtr hdlr;
13196 if (ctxt->encoding != NULL)
13197 xmlFree((xmlChar *) ctxt->encoding);
13198 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13200 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13201 if (hdlr != NULL) {
13202 xmlSwitchToEncoding(ctxt, hdlr);
13203 } else {
13204 return(XML_ERR_UNSUPPORTED_ENCODING);
13208 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13209 xmlDetectSAX2(ctxt);
13210 ctxt->myDoc = doc;
13211 /* parsing in context, i.e. as within existing content */
13212 ctxt->input_id = 2;
13213 ctxt->instate = XML_PARSER_CONTENT;
13215 fake = xmlNewDocComment(node->doc, NULL);
13216 if (fake == NULL) {
13217 xmlFreeParserCtxt(ctxt);
13218 return(XML_ERR_NO_MEMORY);
13220 xmlAddChild(node, fake);
13222 if (node->type == XML_ELEMENT_NODE) {
13223 nodePush(ctxt, node);
13225 * initialize the SAX2 namespaces stack
13227 cur = node;
13228 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13229 xmlNsPtr ns = cur->nsDef;
13230 const xmlChar *iprefix, *ihref;
13232 while (ns != NULL) {
13233 if (ctxt->dict) {
13234 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13235 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13236 } else {
13237 iprefix = ns->prefix;
13238 ihref = ns->href;
13241 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13242 nsPush(ctxt, iprefix, ihref);
13243 nsnr++;
13245 ns = ns->next;
13247 cur = cur->parent;
13251 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13253 * ID/IDREF registration will be done in xmlValidateElement below
13255 ctxt->loadsubset |= XML_SKIP_IDS;
13258 #ifdef LIBXML_HTML_ENABLED
13259 if (doc->type == XML_HTML_DOCUMENT_NODE)
13260 __htmlParseContent(ctxt);
13261 else
13262 #endif
13263 xmlParseContent(ctxt);
13265 nsPop(ctxt, nsnr);
13266 if ((RAW == '<') && (NXT(1) == '/')) {
13267 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13268 } else if (RAW != 0) {
13269 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13271 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13272 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13273 ctxt->wellFormed = 0;
13276 if (!ctxt->wellFormed) {
13277 if (ctxt->errNo == 0)
13278 ret = XML_ERR_INTERNAL_ERROR;
13279 else
13280 ret = (xmlParserErrors)ctxt->errNo;
13281 } else {
13282 ret = XML_ERR_OK;
13286 * Return the newly created nodeset after unlinking it from
13287 * the pseudo sibling.
13290 cur = fake->next;
13291 fake->next = NULL;
13292 node->last = fake;
13294 if (cur != NULL) {
13295 cur->prev = NULL;
13298 *lst = cur;
13300 while (cur != NULL) {
13301 cur->parent = NULL;
13302 cur = cur->next;
13305 xmlUnlinkNode(fake);
13306 xmlFreeNode(fake);
13309 if (ret != XML_ERR_OK) {
13310 xmlFreeNodeList(*lst);
13311 *lst = NULL;
13314 if (doc->dict != NULL)
13315 ctxt->dict = NULL;
13316 xmlFreeParserCtxt(ctxt);
13318 return(ret);
13319 #else /* !SAX2 */
13320 return(XML_ERR_INTERNAL_ERROR);
13321 #endif
13324 #ifdef LIBXML_SAX1_ENABLED
13326 * xmlParseBalancedChunkMemoryRecover:
13327 * @doc: the document the chunk pertains to (must not be NULL)
13328 * @sax: the SAX handler block (possibly NULL)
13329 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13330 * @depth: Used for loop detection, use 0
13331 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13332 * @lst: the return value for the set of parsed nodes
13333 * @recover: return nodes even if the data is broken (use 0)
13336 * Parse a well-balanced chunk of an XML document
13337 * called by the parser
13338 * The allowed sequence for the Well Balanced Chunk is the one defined by
13339 * the content production in the XML grammar:
13341 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13343 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13344 * the parser error code otherwise
13346 * In case recover is set to 1, the nodelist will not be empty even if
13347 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13348 * some extent.
13351 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13352 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13353 int recover) {
13354 xmlParserCtxtPtr ctxt;
13355 xmlDocPtr newDoc;
13356 xmlSAXHandlerPtr oldsax = NULL;
13357 xmlNodePtr content, newRoot;
13358 int size;
13359 int ret = 0;
13361 if (depth > 40) {
13362 return(XML_ERR_ENTITY_LOOP);
13366 if (lst != NULL)
13367 *lst = NULL;
13368 if (string == NULL)
13369 return(-1);
13371 size = xmlStrlen(string);
13373 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13374 if (ctxt == NULL) return(-1);
13375 ctxt->userData = ctxt;
13376 if (sax != NULL) {
13377 oldsax = ctxt->sax;
13378 ctxt->sax = sax;
13379 if (user_data != NULL)
13380 ctxt->userData = user_data;
13382 newDoc = xmlNewDoc(BAD_CAST "1.0");
13383 if (newDoc == NULL) {
13384 xmlFreeParserCtxt(ctxt);
13385 return(-1);
13387 newDoc->properties = XML_DOC_INTERNAL;
13388 if ((doc != NULL) && (doc->dict != NULL)) {
13389 xmlDictFree(ctxt->dict);
13390 ctxt->dict = doc->dict;
13391 xmlDictReference(ctxt->dict);
13392 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13393 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13394 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13395 ctxt->dictNames = 1;
13396 } else {
13397 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13399 /* doc == NULL is only supported for historic reasons */
13400 if (doc != NULL) {
13401 newDoc->intSubset = doc->intSubset;
13402 newDoc->extSubset = doc->extSubset;
13404 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13405 if (newRoot == NULL) {
13406 if (sax != NULL)
13407 ctxt->sax = oldsax;
13408 xmlFreeParserCtxt(ctxt);
13409 newDoc->intSubset = NULL;
13410 newDoc->extSubset = NULL;
13411 xmlFreeDoc(newDoc);
13412 return(-1);
13414 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13415 nodePush(ctxt, newRoot);
13416 /* doc == NULL is only supported for historic reasons */
13417 if (doc == NULL) {
13418 ctxt->myDoc = newDoc;
13419 } else {
13420 ctxt->myDoc = newDoc;
13421 newDoc->children->doc = doc;
13422 /* Ensure that doc has XML spec namespace */
13423 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13424 newDoc->oldNs = doc->oldNs;
13426 ctxt->instate = XML_PARSER_CONTENT;
13427 ctxt->input_id = 2;
13428 ctxt->depth = depth;
13431 * Doing validity checking on chunk doesn't make sense
13433 ctxt->validate = 0;
13434 ctxt->loadsubset = 0;
13435 xmlDetectSAX2(ctxt);
13437 if ( doc != NULL ){
13438 content = doc->children;
13439 doc->children = NULL;
13440 xmlParseContent(ctxt);
13441 doc->children = content;
13443 else {
13444 xmlParseContent(ctxt);
13446 if ((RAW == '<') && (NXT(1) == '/')) {
13447 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13448 } else if (RAW != 0) {
13449 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13451 if (ctxt->node != newDoc->children) {
13452 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13455 if (!ctxt->wellFormed) {
13456 if (ctxt->errNo == 0)
13457 ret = 1;
13458 else
13459 ret = ctxt->errNo;
13460 } else {
13461 ret = 0;
13464 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13465 xmlNodePtr cur;
13468 * Return the newly created nodeset after unlinking it from
13469 * they pseudo parent.
13471 cur = newDoc->children->children;
13472 *lst = cur;
13473 while (cur != NULL) {
13474 xmlSetTreeDoc(cur, doc);
13475 cur->parent = NULL;
13476 cur = cur->next;
13478 newDoc->children->children = NULL;
13481 if (sax != NULL)
13482 ctxt->sax = oldsax;
13483 xmlFreeParserCtxt(ctxt);
13484 newDoc->intSubset = NULL;
13485 newDoc->extSubset = NULL;
13486 /* This leaks the namespace list if doc == NULL */
13487 newDoc->oldNs = NULL;
13488 xmlFreeDoc(newDoc);
13490 return(ret);
13494 * xmlSAXParseEntity:
13495 * @sax: the SAX handler block
13496 * @filename: the filename
13498 * DEPRECATED: Don't use.
13500 * parse an XML external entity out of context and build a tree.
13501 * It use the given SAX function block to handle the parsing callback.
13502 * If sax is NULL, fallback to the default DOM tree building routines.
13504 * [78] extParsedEnt ::= TextDecl? content
13506 * This correspond to a "Well Balanced" chunk
13508 * Returns the resulting document tree
13511 xmlDocPtr
13512 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13513 xmlDocPtr ret;
13514 xmlParserCtxtPtr ctxt;
13516 ctxt = xmlCreateFileParserCtxt(filename);
13517 if (ctxt == NULL) {
13518 return(NULL);
13520 if (sax != NULL) {
13521 if (ctxt->sax != NULL)
13522 xmlFree(ctxt->sax);
13523 ctxt->sax = sax;
13524 ctxt->userData = NULL;
13527 xmlParseExtParsedEnt(ctxt);
13529 if (ctxt->wellFormed)
13530 ret = ctxt->myDoc;
13531 else {
13532 ret = NULL;
13533 xmlFreeDoc(ctxt->myDoc);
13534 ctxt->myDoc = NULL;
13536 if (sax != NULL)
13537 ctxt->sax = NULL;
13538 xmlFreeParserCtxt(ctxt);
13540 return(ret);
13544 * xmlParseEntity:
13545 * @filename: the filename
13547 * parse an XML external entity out of context and build a tree.
13549 * [78] extParsedEnt ::= TextDecl? content
13551 * This correspond to a "Well Balanced" chunk
13553 * Returns the resulting document tree
13556 xmlDocPtr
13557 xmlParseEntity(const char *filename) {
13558 return(xmlSAXParseEntity(NULL, filename));
13560 #endif /* LIBXML_SAX1_ENABLED */
13563 * xmlCreateEntityParserCtxtInternal:
13564 * @URL: the entity URL
13565 * @ID: the entity PUBLIC ID
13566 * @base: a possible base for the target URI
13567 * @pctx: parser context used to set options on new context
13569 * Create a parser context for an external entity
13570 * Automatic support for ZLIB/Compress compressed document is provided
13571 * by default if found at compile-time.
13573 * Returns the new parser context or NULL
13575 static xmlParserCtxtPtr
13576 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13577 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13578 xmlParserCtxtPtr pctx) {
13579 xmlParserCtxtPtr ctxt;
13580 xmlParserInputPtr inputStream;
13581 char *directory = NULL;
13582 xmlChar *uri;
13584 ctxt = xmlNewSAXParserCtxt(sax, userData);
13585 if (ctxt == NULL) {
13586 return(NULL);
13589 if (pctx != NULL) {
13590 ctxt->options = pctx->options;
13591 ctxt->_private = pctx->_private;
13592 ctxt->input_id = pctx->input_id;
13595 /* Don't read from stdin. */
13596 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13597 URL = BAD_CAST "./-";
13599 uri = xmlBuildURI(URL, base);
13601 if (uri == NULL) {
13602 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13603 if (inputStream == NULL) {
13604 xmlFreeParserCtxt(ctxt);
13605 return(NULL);
13608 inputPush(ctxt, inputStream);
13610 if ((ctxt->directory == NULL) && (directory == NULL))
13611 directory = xmlParserGetDirectory((char *)URL);
13612 if ((ctxt->directory == NULL) && (directory != NULL))
13613 ctxt->directory = directory;
13614 } else {
13615 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13616 if (inputStream == NULL) {
13617 xmlFree(uri);
13618 xmlFreeParserCtxt(ctxt);
13619 return(NULL);
13622 inputPush(ctxt, inputStream);
13624 if ((ctxt->directory == NULL) && (directory == NULL))
13625 directory = xmlParserGetDirectory((char *)uri);
13626 if ((ctxt->directory == NULL) && (directory != NULL))
13627 ctxt->directory = directory;
13628 xmlFree(uri);
13630 return(ctxt);
13634 * xmlCreateEntityParserCtxt:
13635 * @URL: the entity URL
13636 * @ID: the entity PUBLIC ID
13637 * @base: a possible base for the target URI
13639 * Create a parser context for an external entity
13640 * Automatic support for ZLIB/Compress compressed document is provided
13641 * by default if found at compile-time.
13643 * Returns the new parser context or NULL
13645 xmlParserCtxtPtr
13646 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13647 const xmlChar *base) {
13648 return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13652 /************************************************************************
13654 * Front ends when parsing from a file *
13656 ************************************************************************/
13659 * xmlCreateURLParserCtxt:
13660 * @filename: the filename or URL
13661 * @options: a combination of xmlParserOption
13663 * Create a parser context for a file or URL content.
13664 * Automatic support for ZLIB/Compress compressed document is provided
13665 * by default if found at compile-time and for file accesses
13667 * Returns the new parser context or NULL
13669 xmlParserCtxtPtr
13670 xmlCreateURLParserCtxt(const char *filename, int options)
13672 xmlParserCtxtPtr ctxt;
13673 xmlParserInputPtr inputStream;
13674 char *directory = NULL;
13676 ctxt = xmlNewParserCtxt();
13677 if (ctxt == NULL) {
13678 xmlErrMemory(NULL, "cannot allocate parser context");
13679 return(NULL);
13682 if (options)
13683 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13684 ctxt->linenumbers = 1;
13686 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13687 if (inputStream == NULL) {
13688 xmlFreeParserCtxt(ctxt);
13689 return(NULL);
13692 inputPush(ctxt, inputStream);
13693 if ((ctxt->directory == NULL) && (directory == NULL))
13694 directory = xmlParserGetDirectory(filename);
13695 if ((ctxt->directory == NULL) && (directory != NULL))
13696 ctxt->directory = directory;
13698 return(ctxt);
13702 * xmlCreateFileParserCtxt:
13703 * @filename: the filename
13705 * Create a parser context for a file content.
13706 * Automatic support for ZLIB/Compress compressed document is provided
13707 * by default if found at compile-time.
13709 * Returns the new parser context or NULL
13711 xmlParserCtxtPtr
13712 xmlCreateFileParserCtxt(const char *filename)
13714 return(xmlCreateURLParserCtxt(filename, 0));
13717 #ifdef LIBXML_SAX1_ENABLED
13719 * xmlSAXParseFileWithData:
13720 * @sax: the SAX handler block
13721 * @filename: the filename
13722 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13723 * documents
13724 * @data: the userdata
13726 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13728 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13729 * compressed document is provided by default if found at compile-time.
13730 * It use the given SAX function block to handle the parsing callback.
13731 * If sax is NULL, fallback to the default DOM tree building routines.
13733 * User data (void *) is stored within the parser context in the
13734 * context's _private member, so it is available nearly everywhere in libxml
13736 * Returns the resulting document tree
13739 xmlDocPtr
13740 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13741 int recovery, void *data) {
13742 xmlDocPtr ret;
13743 xmlParserCtxtPtr ctxt;
13745 xmlInitParser();
13747 ctxt = xmlCreateFileParserCtxt(filename);
13748 if (ctxt == NULL) {
13749 return(NULL);
13751 if (sax != NULL) {
13752 if (ctxt->sax != NULL)
13753 xmlFree(ctxt->sax);
13754 ctxt->sax = sax;
13756 xmlDetectSAX2(ctxt);
13757 if (data!=NULL) {
13758 ctxt->_private = data;
13761 if (ctxt->directory == NULL)
13762 ctxt->directory = xmlParserGetDirectory(filename);
13764 ctxt->recovery = recovery;
13766 xmlParseDocument(ctxt);
13768 if ((ctxt->wellFormed) || recovery) {
13769 ret = ctxt->myDoc;
13770 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13771 if (ctxt->input->buf->compressed > 0)
13772 ret->compression = 9;
13773 else
13774 ret->compression = ctxt->input->buf->compressed;
13777 else {
13778 ret = NULL;
13779 xmlFreeDoc(ctxt->myDoc);
13780 ctxt->myDoc = NULL;
13782 if (sax != NULL)
13783 ctxt->sax = NULL;
13784 xmlFreeParserCtxt(ctxt);
13786 return(ret);
13790 * xmlSAXParseFile:
13791 * @sax: the SAX handler block
13792 * @filename: the filename
13793 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13794 * documents
13796 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13798 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13799 * compressed document is provided by default if found at compile-time.
13800 * It use the given SAX function block to handle the parsing callback.
13801 * If sax is NULL, fallback to the default DOM tree building routines.
13803 * Returns the resulting document tree
13806 xmlDocPtr
13807 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13808 int recovery) {
13809 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13813 * xmlRecoverDoc:
13814 * @cur: a pointer to an array of xmlChar
13816 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13818 * parse an XML in-memory document and build a tree.
13819 * In the case the document is not Well Formed, a attempt to build a
13820 * tree is tried anyway
13822 * Returns the resulting document tree or NULL in case of failure
13825 xmlDocPtr
13826 xmlRecoverDoc(const xmlChar *cur) {
13827 return(xmlSAXParseDoc(NULL, cur, 1));
13831 * xmlParseFile:
13832 * @filename: the filename
13834 * DEPRECATED: Use xmlReadFile.
13836 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13837 * compressed document is provided by default if found at compile-time.
13839 * Returns the resulting document tree if the file was wellformed,
13840 * NULL otherwise.
13843 xmlDocPtr
13844 xmlParseFile(const char *filename) {
13845 return(xmlSAXParseFile(NULL, filename, 0));
13849 * xmlRecoverFile:
13850 * @filename: the filename
13852 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13854 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13855 * compressed document is provided by default if found at compile-time.
13856 * In the case the document is not Well Formed, it attempts to build
13857 * a tree anyway
13859 * Returns the resulting document tree or NULL in case of failure
13862 xmlDocPtr
13863 xmlRecoverFile(const char *filename) {
13864 return(xmlSAXParseFile(NULL, filename, 1));
13869 * xmlSetupParserForBuffer:
13870 * @ctxt: an XML parser context
13871 * @buffer: a xmlChar * buffer
13872 * @filename: a file name
13874 * DEPRECATED: Don't use.
13876 * Setup the parser context to parse a new buffer; Clears any prior
13877 * contents from the parser context. The buffer parameter must not be
13878 * NULL, but the filename parameter can be
13880 void
13881 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13882 const char* filename)
13884 xmlParserInputPtr input;
13886 if ((ctxt == NULL) || (buffer == NULL))
13887 return;
13889 input = xmlNewInputStream(ctxt);
13890 if (input == NULL) {
13891 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13892 xmlClearParserCtxt(ctxt);
13893 return;
13896 xmlClearParserCtxt(ctxt);
13897 if (filename != NULL)
13898 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13899 input->base = buffer;
13900 input->cur = buffer;
13901 input->end = &buffer[xmlStrlen(buffer)];
13902 inputPush(ctxt, input);
13906 * xmlSAXUserParseFile:
13907 * @sax: a SAX handler
13908 * @user_data: The user data returned on SAX callbacks
13909 * @filename: a file name
13911 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13913 * parse an XML file and call the given SAX handler routines.
13914 * Automatic support for ZLIB/Compress compressed document is provided
13916 * Returns 0 in case of success or a error number otherwise
13919 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13920 const char *filename) {
13921 int ret = 0;
13922 xmlParserCtxtPtr ctxt;
13924 ctxt = xmlCreateFileParserCtxt(filename);
13925 if (ctxt == NULL) return -1;
13926 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13927 xmlFree(ctxt->sax);
13928 ctxt->sax = sax;
13929 xmlDetectSAX2(ctxt);
13931 if (user_data != NULL)
13932 ctxt->userData = user_data;
13934 xmlParseDocument(ctxt);
13936 if (ctxt->wellFormed)
13937 ret = 0;
13938 else {
13939 if (ctxt->errNo != 0)
13940 ret = ctxt->errNo;
13941 else
13942 ret = -1;
13944 if (sax != NULL)
13945 ctxt->sax = NULL;
13946 if (ctxt->myDoc != NULL) {
13947 xmlFreeDoc(ctxt->myDoc);
13948 ctxt->myDoc = NULL;
13950 xmlFreeParserCtxt(ctxt);
13952 return ret;
13954 #endif /* LIBXML_SAX1_ENABLED */
13956 /************************************************************************
13958 * Front ends when parsing from memory *
13960 ************************************************************************/
13963 * xmlCreateMemoryParserCtxt:
13964 * @buffer: a pointer to a char array
13965 * @size: the size of the array
13967 * Create a parser context for an XML in-memory document.
13969 * Returns the new parser context or NULL
13971 xmlParserCtxtPtr
13972 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13973 xmlParserCtxtPtr ctxt;
13974 xmlParserInputPtr input;
13975 xmlParserInputBufferPtr buf;
13977 if (buffer == NULL)
13978 return(NULL);
13979 if (size <= 0)
13980 return(NULL);
13982 ctxt = xmlNewParserCtxt();
13983 if (ctxt == NULL)
13984 return(NULL);
13986 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13987 if (buf == NULL) {
13988 xmlFreeParserCtxt(ctxt);
13989 return(NULL);
13992 input = xmlNewInputStream(ctxt);
13993 if (input == NULL) {
13994 xmlFreeParserInputBuffer(buf);
13995 xmlFreeParserCtxt(ctxt);
13996 return(NULL);
13999 input->filename = NULL;
14000 input->buf = buf;
14001 xmlBufResetInput(input->buf->buffer, input);
14003 inputPush(ctxt, input);
14004 return(ctxt);
14007 #ifdef LIBXML_SAX1_ENABLED
14009 * xmlSAXParseMemoryWithData:
14010 * @sax: the SAX handler block
14011 * @buffer: an pointer to a char array
14012 * @size: the size of the array
14013 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14014 * documents
14015 * @data: the userdata
14017 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14019 * parse an XML in-memory block and use the given SAX function block
14020 * to handle the parsing callback. If sax is NULL, fallback to the default
14021 * DOM tree building routines.
14023 * User data (void *) is stored within the parser context in the
14024 * context's _private member, so it is available nearly everywhere in libxml
14026 * Returns the resulting document tree
14029 xmlDocPtr
14030 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14031 int size, int recovery, void *data) {
14032 xmlDocPtr ret;
14033 xmlParserCtxtPtr ctxt;
14035 xmlInitParser();
14037 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14038 if (ctxt == NULL) return(NULL);
14039 if (sax != NULL) {
14040 if (ctxt->sax != NULL)
14041 xmlFree(ctxt->sax);
14042 ctxt->sax = sax;
14044 xmlDetectSAX2(ctxt);
14045 if (data!=NULL) {
14046 ctxt->_private=data;
14049 ctxt->recovery = recovery;
14051 xmlParseDocument(ctxt);
14053 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14054 else {
14055 ret = NULL;
14056 xmlFreeDoc(ctxt->myDoc);
14057 ctxt->myDoc = NULL;
14059 if (sax != NULL)
14060 ctxt->sax = NULL;
14061 xmlFreeParserCtxt(ctxt);
14063 return(ret);
14067 * xmlSAXParseMemory:
14068 * @sax: the SAX handler block
14069 * @buffer: an pointer to a char array
14070 * @size: the size of the array
14071 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14072 * documents
14074 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14076 * parse an XML in-memory block and use the given SAX function block
14077 * to handle the parsing callback. If sax is NULL, fallback to the default
14078 * DOM tree building routines.
14080 * Returns the resulting document tree
14082 xmlDocPtr
14083 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14084 int size, int recovery) {
14085 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14089 * xmlParseMemory:
14090 * @buffer: an pointer to a char array
14091 * @size: the size of the array
14093 * DEPRECATED: Use xmlReadMemory.
14095 * parse an XML in-memory block and build a tree.
14097 * Returns the resulting document tree
14100 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14101 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14105 * xmlRecoverMemory:
14106 * @buffer: an pointer to a char array
14107 * @size: the size of the array
14109 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14111 * parse an XML in-memory block and build a tree.
14112 * In the case the document is not Well Formed, an attempt to
14113 * build a tree is tried anyway
14115 * Returns the resulting document tree or NULL in case of error
14118 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14119 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14123 * xmlSAXUserParseMemory:
14124 * @sax: a SAX handler
14125 * @user_data: The user data returned on SAX callbacks
14126 * @buffer: an in-memory XML document input
14127 * @size: the length of the XML document in bytes
14129 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14131 * parse an XML in-memory buffer and call the given SAX handler routines.
14133 * Returns 0 in case of success or a error number otherwise
14135 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14136 const char *buffer, int size) {
14137 int ret = 0;
14138 xmlParserCtxtPtr ctxt;
14140 xmlInitParser();
14142 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14143 if (ctxt == NULL) return -1;
14144 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14145 xmlFree(ctxt->sax);
14146 ctxt->sax = sax;
14147 xmlDetectSAX2(ctxt);
14149 if (user_data != NULL)
14150 ctxt->userData = user_data;
14152 xmlParseDocument(ctxt);
14154 if (ctxt->wellFormed)
14155 ret = 0;
14156 else {
14157 if (ctxt->errNo != 0)
14158 ret = ctxt->errNo;
14159 else
14160 ret = -1;
14162 if (sax != NULL)
14163 ctxt->sax = NULL;
14164 if (ctxt->myDoc != NULL) {
14165 xmlFreeDoc(ctxt->myDoc);
14166 ctxt->myDoc = NULL;
14168 xmlFreeParserCtxt(ctxt);
14170 return ret;
14172 #endif /* LIBXML_SAX1_ENABLED */
14175 * xmlCreateDocParserCtxt:
14176 * @cur: a pointer to an array of xmlChar
14178 * Creates a parser context for an XML in-memory document.
14180 * Returns the new parser context or NULL
14182 xmlParserCtxtPtr
14183 xmlCreateDocParserCtxt(const xmlChar *cur) {
14184 int len;
14186 if (cur == NULL)
14187 return(NULL);
14188 len = xmlStrlen(cur);
14189 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14192 #ifdef LIBXML_SAX1_ENABLED
14194 * xmlSAXParseDoc:
14195 * @sax: the SAX handler block
14196 * @cur: a pointer to an array of xmlChar
14197 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14198 * documents
14200 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14202 * parse an XML in-memory document and build a tree.
14203 * It use the given SAX function block to handle the parsing callback.
14204 * If sax is NULL, fallback to the default DOM tree building routines.
14206 * Returns the resulting document tree
14209 xmlDocPtr
14210 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14211 xmlDocPtr ret;
14212 xmlParserCtxtPtr ctxt;
14213 xmlSAXHandlerPtr oldsax = NULL;
14215 if (cur == NULL) return(NULL);
14218 ctxt = xmlCreateDocParserCtxt(cur);
14219 if (ctxt == NULL) return(NULL);
14220 if (sax != NULL) {
14221 oldsax = ctxt->sax;
14222 ctxt->sax = sax;
14223 ctxt->userData = NULL;
14225 xmlDetectSAX2(ctxt);
14227 xmlParseDocument(ctxt);
14228 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14229 else {
14230 ret = NULL;
14231 xmlFreeDoc(ctxt->myDoc);
14232 ctxt->myDoc = NULL;
14234 if (sax != NULL)
14235 ctxt->sax = oldsax;
14236 xmlFreeParserCtxt(ctxt);
14238 return(ret);
14242 * xmlParseDoc:
14243 * @cur: a pointer to an array of xmlChar
14245 * DEPRECATED: Use xmlReadDoc.
14247 * parse an XML in-memory document and build a tree.
14249 * Returns the resulting document tree
14252 xmlDocPtr
14253 xmlParseDoc(const xmlChar *cur) {
14254 return(xmlSAXParseDoc(NULL, cur, 0));
14256 #endif /* LIBXML_SAX1_ENABLED */
14258 #ifdef LIBXML_LEGACY_ENABLED
14259 /************************************************************************
14261 * Specific function to keep track of entities references *
14262 * and used by the XSLT debugger *
14264 ************************************************************************/
14266 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14269 * xmlAddEntityReference:
14270 * @ent : A valid entity
14271 * @firstNode : A valid first node for children of entity
14272 * @lastNode : A valid last node of children entity
14274 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14276 static void
14277 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14278 xmlNodePtr lastNode)
14280 if (xmlEntityRefFunc != NULL) {
14281 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14287 * xmlSetEntityReferenceFunc:
14288 * @func: A valid function
14290 * Set the function to call call back when a xml reference has been made
14292 void
14293 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14295 xmlEntityRefFunc = func;
14297 #endif /* LIBXML_LEGACY_ENABLED */
14299 /************************************************************************
14301 * Miscellaneous *
14303 ************************************************************************/
14305 static int xmlParserInitialized = 0;
14308 * xmlInitParser:
14310 * Initialization function for the XML parser.
14311 * This is not reentrant. Call once before processing in case of
14312 * use in multithreaded programs.
14315 void
14316 xmlInitParser(void) {
14318 * Note that the initialization code must not make memory allocations.
14320 if (xmlParserInitialized != 0)
14321 return;
14323 #ifdef LIBXML_THREAD_ENABLED
14324 __xmlGlobalInitMutexLock();
14325 if (xmlParserInitialized == 0) {
14326 #endif
14327 #if defined(_WIN32) && \
14328 !defined(LIBXML_THREAD_ALLOC_ENABLED) && \
14329 (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14330 if (xmlFree == free)
14331 atexit(xmlCleanupParser);
14332 #endif
14334 xmlInitThreadsInternal();
14335 xmlInitGlobalsInternal();
14336 xmlInitMemoryInternal();
14337 __xmlInitializeDict();
14338 xmlInitEncodingInternal();
14339 xmlRegisterDefaultInputCallbacks();
14340 #ifdef LIBXML_OUTPUT_ENABLED
14341 xmlRegisterDefaultOutputCallbacks();
14342 #endif /* LIBXML_OUTPUT_ENABLED */
14343 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14344 xmlInitXPathInternal();
14345 #endif
14346 xmlParserInitialized = 1;
14347 #ifdef LIBXML_THREAD_ENABLED
14349 __xmlGlobalInitMutexUnlock();
14350 #endif
14354 * xmlCleanupParser:
14356 * This function name is somewhat misleading. It does not clean up
14357 * parser state, it cleans up memory allocated by the library itself.
14358 * It is a cleanup function for the XML library. It tries to reclaim all
14359 * related global memory allocated for the library processing.
14360 * It doesn't deallocate any document related memory. One should
14361 * call xmlCleanupParser() only when the process has finished using
14362 * the library and all XML/HTML documents built with it.
14363 * See also xmlInitParser() which has the opposite function of preparing
14364 * the library for operations.
14366 * WARNING: if your application is multithreaded or has plugin support
14367 * calling this may crash the application if another thread or
14368 * a plugin is still using libxml2. It's sometimes very hard to
14369 * guess if libxml2 is in use in the application, some libraries
14370 * or plugins may use it without notice. In case of doubt abstain
14371 * from calling this function or do it just before calling exit()
14372 * to avoid leak reports from valgrind !
14375 void
14376 xmlCleanupParser(void) {
14377 if (!xmlParserInitialized)
14378 return;
14380 xmlCleanupCharEncodingHandlers();
14381 #ifdef LIBXML_CATALOG_ENABLED
14382 xmlCatalogCleanup();
14383 #endif
14384 xmlCleanupDictInternal();
14385 xmlCleanupInputCallbacks();
14386 #ifdef LIBXML_OUTPUT_ENABLED
14387 xmlCleanupOutputCallbacks();
14388 #endif
14389 #ifdef LIBXML_SCHEMAS_ENABLED
14390 xmlSchemaCleanupTypes();
14391 xmlRelaxNGCleanupTypes();
14392 #endif
14393 xmlCleanupGlobalsInternal();
14394 xmlCleanupThreadsInternal();
14395 xmlCleanupMemoryInternal();
14396 xmlParserInitialized = 0;
14399 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && \
14400 !defined(LIBXML_THREAD_ALLOC_ENABLED) && \
14401 !defined(LIBXML_STATIC) && \
14402 !defined(_WIN32)
14403 static void
14404 ATTRIBUTE_DESTRUCTOR
14405 xmlDestructor(void) {
14407 * Calling custom deallocation functions in a destructor can cause
14408 * problems, for example with Nokogiri.
14410 if (xmlFree == free)
14411 xmlCleanupParser();
14413 #endif
14415 /************************************************************************
14417 * New set (2.6.0) of simpler and more flexible APIs *
14419 ************************************************************************/
14422 * DICT_FREE:
14423 * @str: a string
14425 * Free a string if it is not owned by the "dict" dictionary in the
14426 * current scope
14428 #define DICT_FREE(str) \
14429 if ((str) && ((!dict) || \
14430 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14431 xmlFree((char *)(str));
14434 * xmlCtxtReset:
14435 * @ctxt: an XML parser context
14437 * Reset a parser context
14439 void
14440 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14442 xmlParserInputPtr input;
14443 xmlDictPtr dict;
14445 if (ctxt == NULL)
14446 return;
14448 dict = ctxt->dict;
14450 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14451 xmlFreeInputStream(input);
14453 ctxt->inputNr = 0;
14454 ctxt->input = NULL;
14456 ctxt->spaceNr = 0;
14457 if (ctxt->spaceTab != NULL) {
14458 ctxt->spaceTab[0] = -1;
14459 ctxt->space = &ctxt->spaceTab[0];
14460 } else {
14461 ctxt->space = NULL;
14465 ctxt->nodeNr = 0;
14466 ctxt->node = NULL;
14468 ctxt->nameNr = 0;
14469 ctxt->name = NULL;
14471 ctxt->nsNr = 0;
14473 DICT_FREE(ctxt->version);
14474 ctxt->version = NULL;
14475 DICT_FREE(ctxt->encoding);
14476 ctxt->encoding = NULL;
14477 DICT_FREE(ctxt->directory);
14478 ctxt->directory = NULL;
14479 DICT_FREE(ctxt->extSubURI);
14480 ctxt->extSubURI = NULL;
14481 DICT_FREE(ctxt->extSubSystem);
14482 ctxt->extSubSystem = NULL;
14483 if (ctxt->myDoc != NULL)
14484 xmlFreeDoc(ctxt->myDoc);
14485 ctxt->myDoc = NULL;
14487 ctxt->standalone = -1;
14488 ctxt->hasExternalSubset = 0;
14489 ctxt->hasPErefs = 0;
14490 ctxt->html = 0;
14491 ctxt->external = 0;
14492 ctxt->instate = XML_PARSER_START;
14493 ctxt->token = 0;
14495 ctxt->wellFormed = 1;
14496 ctxt->nsWellFormed = 1;
14497 ctxt->disableSAX = 0;
14498 ctxt->valid = 1;
14499 #if 0
14500 ctxt->vctxt.userData = ctxt;
14501 ctxt->vctxt.error = xmlParserValidityError;
14502 ctxt->vctxt.warning = xmlParserValidityWarning;
14503 #endif
14504 ctxt->record_info = 0;
14505 ctxt->checkIndex = 0;
14506 ctxt->endCheckState = 0;
14507 ctxt->inSubset = 0;
14508 ctxt->errNo = XML_ERR_OK;
14509 ctxt->depth = 0;
14510 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14511 ctxt->catalogs = NULL;
14512 ctxt->sizeentities = 0;
14513 ctxt->sizeentcopy = 0;
14514 xmlInitNodeInfoSeq(&ctxt->node_seq);
14516 if (ctxt->attsDefault != NULL) {
14517 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14518 ctxt->attsDefault = NULL;
14520 if (ctxt->attsSpecial != NULL) {
14521 xmlHashFree(ctxt->attsSpecial, NULL);
14522 ctxt->attsSpecial = NULL;
14525 #ifdef LIBXML_CATALOG_ENABLED
14526 if (ctxt->catalogs != NULL)
14527 xmlCatalogFreeLocal(ctxt->catalogs);
14528 #endif
14529 ctxt->nbErrors = 0;
14530 ctxt->nbWarnings = 0;
14531 if (ctxt->lastError.code != XML_ERR_OK)
14532 xmlResetError(&ctxt->lastError);
14536 * xmlCtxtResetPush:
14537 * @ctxt: an XML parser context
14538 * @chunk: a pointer to an array of chars
14539 * @size: number of chars in the array
14540 * @filename: an optional file name or URI
14541 * @encoding: the document encoding, or NULL
14543 * Reset a push parser context
14545 * Returns 0 in case of success and 1 in case of error
14548 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14549 int size, const char *filename, const char *encoding)
14551 xmlParserInputPtr inputStream;
14552 xmlParserInputBufferPtr buf;
14553 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14555 if (ctxt == NULL)
14556 return(1);
14558 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14559 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14561 buf = xmlAllocParserInputBuffer(enc);
14562 if (buf == NULL)
14563 return(1);
14565 if (ctxt == NULL) {
14566 xmlFreeParserInputBuffer(buf);
14567 return(1);
14570 xmlCtxtReset(ctxt);
14572 if (filename == NULL) {
14573 ctxt->directory = NULL;
14574 } else {
14575 ctxt->directory = xmlParserGetDirectory(filename);
14578 inputStream = xmlNewInputStream(ctxt);
14579 if (inputStream == NULL) {
14580 xmlFreeParserInputBuffer(buf);
14581 return(1);
14584 if (filename == NULL)
14585 inputStream->filename = NULL;
14586 else
14587 inputStream->filename = (char *)
14588 xmlCanonicPath((const xmlChar *) filename);
14589 inputStream->buf = buf;
14590 xmlBufResetInput(buf->buffer, inputStream);
14592 inputPush(ctxt, inputStream);
14594 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14595 (ctxt->input->buf != NULL)) {
14596 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14597 size_t cur = ctxt->input->cur - ctxt->input->base;
14599 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14601 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14602 #ifdef DEBUG_PUSH
14603 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14604 #endif
14607 if (encoding != NULL) {
14608 xmlCharEncodingHandlerPtr hdlr;
14610 if (ctxt->encoding != NULL)
14611 xmlFree((xmlChar *) ctxt->encoding);
14612 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14614 hdlr = xmlFindCharEncodingHandler(encoding);
14615 if (hdlr != NULL) {
14616 xmlSwitchToEncoding(ctxt, hdlr);
14617 } else {
14618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14619 "Unsupported encoding %s\n", BAD_CAST encoding);
14621 } else if (enc != XML_CHAR_ENCODING_NONE) {
14622 xmlSwitchEncoding(ctxt, enc);
14625 return(0);
14630 * xmlCtxtUseOptionsInternal:
14631 * @ctxt: an XML parser context
14632 * @options: a combination of xmlParserOption
14633 * @encoding: the user provided encoding to use
14635 * Applies the options to the parser context
14637 * Returns 0 in case of success, the set of unknown or unimplemented options
14638 * in case of error.
14640 static int
14641 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14643 if (ctxt == NULL)
14644 return(-1);
14645 if (encoding != NULL) {
14646 if (ctxt->encoding != NULL)
14647 xmlFree((xmlChar *) ctxt->encoding);
14648 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14650 if (options & XML_PARSE_RECOVER) {
14651 ctxt->recovery = 1;
14652 options -= XML_PARSE_RECOVER;
14653 ctxt->options |= XML_PARSE_RECOVER;
14654 } else
14655 ctxt->recovery = 0;
14656 if (options & XML_PARSE_DTDLOAD) {
14657 ctxt->loadsubset = XML_DETECT_IDS;
14658 options -= XML_PARSE_DTDLOAD;
14659 ctxt->options |= XML_PARSE_DTDLOAD;
14660 } else
14661 ctxt->loadsubset = 0;
14662 if (options & XML_PARSE_DTDATTR) {
14663 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14664 options -= XML_PARSE_DTDATTR;
14665 ctxt->options |= XML_PARSE_DTDATTR;
14667 if (options & XML_PARSE_NOENT) {
14668 ctxt->replaceEntities = 1;
14669 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14670 options -= XML_PARSE_NOENT;
14671 ctxt->options |= XML_PARSE_NOENT;
14672 } else
14673 ctxt->replaceEntities = 0;
14674 if (options & XML_PARSE_PEDANTIC) {
14675 ctxt->pedantic = 1;
14676 options -= XML_PARSE_PEDANTIC;
14677 ctxt->options |= XML_PARSE_PEDANTIC;
14678 } else
14679 ctxt->pedantic = 0;
14680 if (options & XML_PARSE_NOBLANKS) {
14681 ctxt->keepBlanks = 0;
14682 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14683 options -= XML_PARSE_NOBLANKS;
14684 ctxt->options |= XML_PARSE_NOBLANKS;
14685 } else
14686 ctxt->keepBlanks = 1;
14687 if (options & XML_PARSE_DTDVALID) {
14688 ctxt->validate = 1;
14689 if (options & XML_PARSE_NOWARNING)
14690 ctxt->vctxt.warning = NULL;
14691 if (options & XML_PARSE_NOERROR)
14692 ctxt->vctxt.error = NULL;
14693 options -= XML_PARSE_DTDVALID;
14694 ctxt->options |= XML_PARSE_DTDVALID;
14695 } else
14696 ctxt->validate = 0;
14697 if (options & XML_PARSE_NOWARNING) {
14698 ctxt->sax->warning = NULL;
14699 options -= XML_PARSE_NOWARNING;
14701 if (options & XML_PARSE_NOERROR) {
14702 ctxt->sax->error = NULL;
14703 ctxt->sax->fatalError = NULL;
14704 options -= XML_PARSE_NOERROR;
14706 #ifdef LIBXML_SAX1_ENABLED
14707 if (options & XML_PARSE_SAX1) {
14708 ctxt->sax->startElement = xmlSAX2StartElement;
14709 ctxt->sax->endElement = xmlSAX2EndElement;
14710 ctxt->sax->startElementNs = NULL;
14711 ctxt->sax->endElementNs = NULL;
14712 ctxt->sax->initialized = 1;
14713 options -= XML_PARSE_SAX1;
14714 ctxt->options |= XML_PARSE_SAX1;
14716 #endif /* LIBXML_SAX1_ENABLED */
14717 if (options & XML_PARSE_NODICT) {
14718 ctxt->dictNames = 0;
14719 options -= XML_PARSE_NODICT;
14720 ctxt->options |= XML_PARSE_NODICT;
14721 } else {
14722 ctxt->dictNames = 1;
14724 if (options & XML_PARSE_NOCDATA) {
14725 ctxt->sax->cdataBlock = NULL;
14726 options -= XML_PARSE_NOCDATA;
14727 ctxt->options |= XML_PARSE_NOCDATA;
14729 if (options & XML_PARSE_NSCLEAN) {
14730 ctxt->options |= XML_PARSE_NSCLEAN;
14731 options -= XML_PARSE_NSCLEAN;
14733 if (options & XML_PARSE_NONET) {
14734 ctxt->options |= XML_PARSE_NONET;
14735 options -= XML_PARSE_NONET;
14737 if (options & XML_PARSE_COMPACT) {
14738 ctxt->options |= XML_PARSE_COMPACT;
14739 options -= XML_PARSE_COMPACT;
14741 if (options & XML_PARSE_OLD10) {
14742 ctxt->options |= XML_PARSE_OLD10;
14743 options -= XML_PARSE_OLD10;
14745 if (options & XML_PARSE_NOBASEFIX) {
14746 ctxt->options |= XML_PARSE_NOBASEFIX;
14747 options -= XML_PARSE_NOBASEFIX;
14749 if (options & XML_PARSE_HUGE) {
14750 ctxt->options |= XML_PARSE_HUGE;
14751 options -= XML_PARSE_HUGE;
14752 if (ctxt->dict != NULL)
14753 xmlDictSetLimit(ctxt->dict, 0);
14755 if (options & XML_PARSE_OLDSAX) {
14756 ctxt->options |= XML_PARSE_OLDSAX;
14757 options -= XML_PARSE_OLDSAX;
14759 if (options & XML_PARSE_IGNORE_ENC) {
14760 ctxt->options |= XML_PARSE_IGNORE_ENC;
14761 options -= XML_PARSE_IGNORE_ENC;
14763 if (options & XML_PARSE_BIG_LINES) {
14764 ctxt->options |= XML_PARSE_BIG_LINES;
14765 options -= XML_PARSE_BIG_LINES;
14767 ctxt->linenumbers = 1;
14768 return (options);
14772 * xmlCtxtUseOptions:
14773 * @ctxt: an XML parser context
14774 * @options: a combination of xmlParserOption
14776 * Applies the options to the parser context
14778 * Returns 0 in case of success, the set of unknown or unimplemented options
14779 * in case of error.
14782 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14784 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14788 * xmlDoRead:
14789 * @ctxt: an XML parser context
14790 * @URL: the base URL to use for the document
14791 * @encoding: the document encoding, or NULL
14792 * @options: a combination of xmlParserOption
14793 * @reuse: keep the context for reuse
14795 * Common front-end for the xmlRead functions
14797 * Returns the resulting document tree or NULL
14799 static xmlDocPtr
14800 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14801 int options, int reuse)
14803 xmlDocPtr ret;
14805 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14806 if (encoding != NULL) {
14807 xmlCharEncodingHandlerPtr hdlr;
14810 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14811 * caller provided an encoding. Otherwise, we might switch to
14812 * the encoding from the XML declaration which is likely to
14813 * break things. Also see xmlSwitchInputEncoding.
14815 hdlr = xmlFindCharEncodingHandler(encoding);
14816 if (hdlr != NULL)
14817 xmlSwitchToEncoding(ctxt, hdlr);
14819 if ((URL != NULL) && (ctxt->input != NULL) &&
14820 (ctxt->input->filename == NULL))
14821 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14822 xmlParseDocument(ctxt);
14823 if ((ctxt->wellFormed) || ctxt->recovery)
14824 ret = ctxt->myDoc;
14825 else {
14826 ret = NULL;
14827 if (ctxt->myDoc != NULL) {
14828 xmlFreeDoc(ctxt->myDoc);
14831 ctxt->myDoc = NULL;
14832 if (!reuse) {
14833 xmlFreeParserCtxt(ctxt);
14836 return (ret);
14840 * xmlReadDoc:
14841 * @cur: a pointer to a zero terminated string
14842 * @URL: the base URL to use for the document
14843 * @encoding: the document encoding, or NULL
14844 * @options: a combination of xmlParserOption
14846 * parse an XML in-memory document and build a tree.
14848 * Returns the resulting document tree
14850 xmlDocPtr
14851 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14853 xmlParserCtxtPtr ctxt;
14855 if (cur == NULL)
14856 return (NULL);
14857 xmlInitParser();
14859 ctxt = xmlCreateDocParserCtxt(cur);
14860 if (ctxt == NULL)
14861 return (NULL);
14862 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14866 * xmlReadFile:
14867 * @filename: a file or URL
14868 * @encoding: the document encoding, or NULL
14869 * @options: a combination of xmlParserOption
14871 * parse an XML file from the filesystem or the network.
14873 * Returns the resulting document tree
14875 xmlDocPtr
14876 xmlReadFile(const char *filename, const char *encoding, int options)
14878 xmlParserCtxtPtr ctxt;
14880 xmlInitParser();
14881 ctxt = xmlCreateURLParserCtxt(filename, options);
14882 if (ctxt == NULL)
14883 return (NULL);
14884 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14888 * xmlReadMemory:
14889 * @buffer: a pointer to a char array
14890 * @size: the size of the array
14891 * @URL: the base URL to use for the document
14892 * @encoding: the document encoding, or NULL
14893 * @options: a combination of xmlParserOption
14895 * parse an XML in-memory document and build a tree.
14897 * Returns the resulting document tree
14899 xmlDocPtr
14900 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14902 xmlParserCtxtPtr ctxt;
14904 xmlInitParser();
14905 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14906 if (ctxt == NULL)
14907 return (NULL);
14908 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14912 * xmlReadFd:
14913 * @fd: an open file descriptor
14914 * @URL: the base URL to use for the document
14915 * @encoding: the document encoding, or NULL
14916 * @options: a combination of xmlParserOption
14918 * parse an XML from a file descriptor and build a tree.
14919 * NOTE that the file descriptor will not be closed when the
14920 * reader is closed or reset.
14922 * Returns the resulting document tree
14924 xmlDocPtr
14925 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14927 xmlParserCtxtPtr ctxt;
14928 xmlParserInputBufferPtr input;
14929 xmlParserInputPtr stream;
14931 if (fd < 0)
14932 return (NULL);
14933 xmlInitParser();
14935 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14936 if (input == NULL)
14937 return (NULL);
14938 input->closecallback = NULL;
14939 ctxt = xmlNewParserCtxt();
14940 if (ctxt == NULL) {
14941 xmlFreeParserInputBuffer(input);
14942 return (NULL);
14944 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14945 if (stream == NULL) {
14946 xmlFreeParserInputBuffer(input);
14947 xmlFreeParserCtxt(ctxt);
14948 return (NULL);
14950 inputPush(ctxt, stream);
14951 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14955 * xmlReadIO:
14956 * @ioread: an I/O read function
14957 * @ioclose: an I/O close function
14958 * @ioctx: an I/O handler
14959 * @URL: the base URL to use for the document
14960 * @encoding: the document encoding, or NULL
14961 * @options: a combination of xmlParserOption
14963 * parse an XML document from I/O functions and source and build a tree.
14965 * Returns the resulting document tree
14967 xmlDocPtr
14968 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14969 void *ioctx, const char *URL, const char *encoding, int options)
14971 xmlParserCtxtPtr ctxt;
14972 xmlParserInputBufferPtr input;
14973 xmlParserInputPtr stream;
14975 if (ioread == NULL)
14976 return (NULL);
14977 xmlInitParser();
14979 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14980 XML_CHAR_ENCODING_NONE);
14981 if (input == NULL) {
14982 if (ioclose != NULL)
14983 ioclose(ioctx);
14984 return (NULL);
14986 ctxt = xmlNewParserCtxt();
14987 if (ctxt == NULL) {
14988 xmlFreeParserInputBuffer(input);
14989 return (NULL);
14991 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14992 if (stream == NULL) {
14993 xmlFreeParserInputBuffer(input);
14994 xmlFreeParserCtxt(ctxt);
14995 return (NULL);
14997 inputPush(ctxt, stream);
14998 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15002 * xmlCtxtReadDoc:
15003 * @ctxt: an XML parser context
15004 * @cur: a pointer to a zero terminated string
15005 * @URL: the base URL to use for the document
15006 * @encoding: the document encoding, or NULL
15007 * @options: a combination of xmlParserOption
15009 * parse an XML in-memory document and build a tree.
15010 * This reuses the existing @ctxt parser context
15012 * Returns the resulting document tree
15014 xmlDocPtr
15015 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15016 const char *URL, const char *encoding, int options)
15018 if (cur == NULL)
15019 return (NULL);
15020 return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15021 encoding, options));
15025 * xmlCtxtReadFile:
15026 * @ctxt: an XML parser context
15027 * @filename: a file or URL
15028 * @encoding: the document encoding, or NULL
15029 * @options: a combination of xmlParserOption
15031 * parse an XML file from the filesystem or the network.
15032 * This reuses the existing @ctxt parser context
15034 * Returns the resulting document tree
15036 xmlDocPtr
15037 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15038 const char *encoding, int options)
15040 xmlParserInputPtr stream;
15042 if (filename == NULL)
15043 return (NULL);
15044 if (ctxt == NULL)
15045 return (NULL);
15046 xmlInitParser();
15048 xmlCtxtReset(ctxt);
15050 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15051 if (stream == NULL) {
15052 return (NULL);
15054 inputPush(ctxt, stream);
15055 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15059 * xmlCtxtReadMemory:
15060 * @ctxt: an XML parser context
15061 * @buffer: a pointer to a char array
15062 * @size: the size of the array
15063 * @URL: the base URL to use for the document
15064 * @encoding: the document encoding, or NULL
15065 * @options: a combination of xmlParserOption
15067 * parse an XML in-memory document and build a tree.
15068 * This reuses the existing @ctxt parser context
15070 * Returns the resulting document tree
15072 xmlDocPtr
15073 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15074 const char *URL, const char *encoding, int options)
15076 xmlParserInputBufferPtr input;
15077 xmlParserInputPtr stream;
15079 if (ctxt == NULL)
15080 return (NULL);
15081 if (buffer == NULL)
15082 return (NULL);
15083 xmlInitParser();
15085 xmlCtxtReset(ctxt);
15087 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15088 if (input == NULL) {
15089 return(NULL);
15092 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15093 if (stream == NULL) {
15094 xmlFreeParserInputBuffer(input);
15095 return(NULL);
15098 inputPush(ctxt, stream);
15099 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15103 * xmlCtxtReadFd:
15104 * @ctxt: an XML parser context
15105 * @fd: an open file descriptor
15106 * @URL: the base URL to use for the document
15107 * @encoding: the document encoding, or NULL
15108 * @options: a combination of xmlParserOption
15110 * parse an XML from a file descriptor and build a tree.
15111 * This reuses the existing @ctxt parser context
15112 * NOTE that the file descriptor will not be closed when the
15113 * reader is closed or reset.
15115 * Returns the resulting document tree
15117 xmlDocPtr
15118 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15119 const char *URL, const char *encoding, int options)
15121 xmlParserInputBufferPtr input;
15122 xmlParserInputPtr stream;
15124 if (fd < 0)
15125 return (NULL);
15126 if (ctxt == NULL)
15127 return (NULL);
15128 xmlInitParser();
15130 xmlCtxtReset(ctxt);
15133 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15134 if (input == NULL)
15135 return (NULL);
15136 input->closecallback = NULL;
15137 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15138 if (stream == NULL) {
15139 xmlFreeParserInputBuffer(input);
15140 return (NULL);
15142 inputPush(ctxt, stream);
15143 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15147 * xmlCtxtReadIO:
15148 * @ctxt: an XML parser context
15149 * @ioread: an I/O read function
15150 * @ioclose: an I/O close function
15151 * @ioctx: an I/O handler
15152 * @URL: the base URL to use for the document
15153 * @encoding: the document encoding, or NULL
15154 * @options: a combination of xmlParserOption
15156 * parse an XML document from I/O functions and source and build a tree.
15157 * This reuses the existing @ctxt parser context
15159 * Returns the resulting document tree
15161 xmlDocPtr
15162 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15163 xmlInputCloseCallback ioclose, void *ioctx,
15164 const char *URL,
15165 const char *encoding, int options)
15167 xmlParserInputBufferPtr input;
15168 xmlParserInputPtr stream;
15170 if (ioread == NULL)
15171 return (NULL);
15172 if (ctxt == NULL)
15173 return (NULL);
15174 xmlInitParser();
15176 xmlCtxtReset(ctxt);
15178 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15179 XML_CHAR_ENCODING_NONE);
15180 if (input == NULL) {
15181 if (ioclose != NULL)
15182 ioclose(ioctx);
15183 return (NULL);
15185 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186 if (stream == NULL) {
15187 xmlFreeParserInputBuffer(input);
15188 return (NULL);
15190 inputPush(ctxt, stream);
15191 return (xmlDoRead(ctxt, URL, encoding, options, 1));