Finish refactoring of DomCodeToUsLayoutKeyboardCode().
[chromium-blink-merge.git] / third_party / libxml / src / parser.c
blob1d9396786ba7eca5d8e985d18e2679398243acd4
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 #define IN_LIBXML
34 #include "libxml.h"
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
87 #include "buf.h"
88 #include "enc.h"
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
97 /************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101 ************************************************************************/
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
112 #define XML_PARSER_NON_LINEAR 10
115 * xmlParserEntityCheck
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
123 static int
124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125 xmlEntityPtr ent, size_t replacement)
127 size_t consumed = 0;
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
135 * This may look absurd but is needed to detect
136 * entities problems
138 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
139 (ent->content != NULL) && (ent->checked == 0)) {
140 unsigned long oldnbent = ctxt->nbentities;
141 xmlChar *rep;
143 ent->checked = 1;
145 rep = xmlStringDecodeEntities(ctxt, ent->content,
146 XML_SUBSTITUTE_REF, 0, 0, 0);
148 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
149 if (rep != NULL) {
150 if (xmlStrchr(rep, '<'))
151 ent->checked |= 1;
152 xmlFree(rep);
153 rep = NULL;
156 if (replacement != 0) {
157 if (replacement < XML_MAX_TEXT_LENGTH)
158 return(0);
161 * If the volume of entity copy reaches 10 times the
162 * amount of parsed data and over the large text threshold
163 * then that's very likely to be an abuse.
165 if (ctxt->input != NULL) {
166 consumed = ctxt->input->consumed +
167 (ctxt->input->cur - ctxt->input->base);
169 consumed += ctxt->sizeentities;
171 if (replacement < XML_PARSER_NON_LINEAR * consumed)
172 return(0);
173 } else if (size != 0) {
175 * Do the check based on the replacement size of the entity
177 if (size < XML_PARSER_BIG_ENTITY)
178 return(0);
181 * A limit on the amount of text data reasonably used
183 if (ctxt->input != NULL) {
184 consumed = ctxt->input->consumed +
185 (ctxt->input->cur - ctxt->input->base);
187 consumed += ctxt->sizeentities;
189 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
190 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
191 return (0);
192 } else if (ent != NULL) {
194 * use the number of parsed entities in the replacement
196 size = ent->checked / 2;
199 * The amount of data parsed counting entities size only once
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
205 consumed += ctxt->sizeentities;
208 * Check the density of entities for the amount of data
209 * knowing an entity reference will take at least 3 bytes
211 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
212 return (0);
213 } else {
215 * strange we got no data for checking
217 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
218 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
219 (ctxt->nbentities <= 10000))
220 return (0);
222 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
223 return (1);
227 * xmlParserMaxDepth:
229 * arbitrary depth limit for the XML documents that we allow to
230 * process. This is not a limitation of the parser but a safety
231 * boundary feature. It can be disabled with the XML_PARSE_HUGE
232 * parser option.
234 unsigned int xmlParserMaxDepth = 256;
238 #define SAX2 1
239 #define XML_PARSER_BIG_BUFFER_SIZE 300
240 #define XML_PARSER_BUFFER_SIZE 100
241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244 * XML_PARSER_CHUNK_SIZE
246 * When calling GROW that's the minimal amount of data
247 * the parser expected to have received. It is not a hard
248 * limit but an optimization when reading strings like Names
249 * It is not strictly needed as long as inputs available characters
250 * are followed by 0, which should be provided by the I/O level
252 #define XML_PARSER_CHUNK_SIZE 100
255 * List of XML prefixed PI allowed by W3C specs
258 static const char *xmlW3CPIs[] = {
259 "xml-stylesheet",
260 "xml-model",
261 NULL
265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
267 const xmlChar **str);
269 static xmlParserErrors
270 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
271 xmlSAXHandlerPtr sax,
272 void *user_data, int depth, const xmlChar *URL,
273 const xmlChar *ID, xmlNodePtr *list);
275 static int
276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
277 const char *encoding);
278 #ifdef LIBXML_LEGACY_ENABLED
279 static void
280 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
281 xmlNodePtr lastNode);
282 #endif /* LIBXML_LEGACY_ENABLED */
284 static xmlParserErrors
285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
286 const xmlChar *string, void *user_data, xmlNodePtr *lst);
288 static int
289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
291 /************************************************************************
293 * Some factorized error routines *
295 ************************************************************************/
298 * xmlErrAttributeDup:
299 * @ctxt: an XML parser context
300 * @prefix: the attribute prefix
301 * @localname: the attribute localname
303 * Handle a redefinition of attribute error
305 static void
306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
307 const xmlChar * localname)
309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310 (ctxt->instate == XML_PARSER_EOF))
311 return;
312 if (ctxt != NULL)
313 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
315 if (prefix == NULL)
316 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318 (const char *) localname, NULL, NULL, 0, 0,
319 "Attribute %s redefined\n", localname);
320 else
321 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323 (const char *) prefix, (const char *) localname,
324 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
325 localname);
326 if (ctxt != NULL) {
327 ctxt->wellFormed = 0;
328 if (ctxt->recovery == 0)
329 ctxt->disableSAX = 1;
334 * xmlFatalErr:
335 * @ctxt: an XML parser context
336 * @error: the error number
337 * @extra: extra information string
339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
341 static void
342 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
344 const char *errmsg;
345 char errstr[129] = "";
347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348 (ctxt->instate == XML_PARSER_EOF))
349 return;
350 switch (error) {
351 case XML_ERR_INVALID_HEX_CHARREF:
352 errmsg = "CharRef: invalid hexadecimal value";
353 break;
354 case XML_ERR_INVALID_DEC_CHARREF:
355 errmsg = "CharRef: invalid decimal value";
356 break;
357 case XML_ERR_INVALID_CHARREF:
358 errmsg = "CharRef: invalid value";
359 break;
360 case XML_ERR_INTERNAL_ERROR:
361 errmsg = "internal error";
362 break;
363 case XML_ERR_PEREF_AT_EOF:
364 errmsg = "PEReference at end of document";
365 break;
366 case XML_ERR_PEREF_IN_PROLOG:
367 errmsg = "PEReference in prolog";
368 break;
369 case XML_ERR_PEREF_IN_EPILOG:
370 errmsg = "PEReference in epilog";
371 break;
372 case XML_ERR_PEREF_NO_NAME:
373 errmsg = "PEReference: no name";
374 break;
375 case XML_ERR_PEREF_SEMICOL_MISSING:
376 errmsg = "PEReference: expecting ';'";
377 break;
378 case XML_ERR_ENTITY_LOOP:
379 errmsg = "Detected an entity reference loop";
380 break;
381 case XML_ERR_ENTITY_NOT_STARTED:
382 errmsg = "EntityValue: \" or ' expected";
383 break;
384 case XML_ERR_ENTITY_PE_INTERNAL:
385 errmsg = "PEReferences forbidden in internal subset";
386 break;
387 case XML_ERR_ENTITY_NOT_FINISHED:
388 errmsg = "EntityValue: \" or ' expected";
389 break;
390 case XML_ERR_ATTRIBUTE_NOT_STARTED:
391 errmsg = "AttValue: \" or ' expected";
392 break;
393 case XML_ERR_LT_IN_ATTRIBUTE:
394 errmsg = "Unescaped '<' not allowed in attributes values";
395 break;
396 case XML_ERR_LITERAL_NOT_STARTED:
397 errmsg = "SystemLiteral \" or ' expected";
398 break;
399 case XML_ERR_LITERAL_NOT_FINISHED:
400 errmsg = "Unfinished System or Public ID \" or ' expected";
401 break;
402 case XML_ERR_MISPLACED_CDATA_END:
403 errmsg = "Sequence ']]>' not allowed in content";
404 break;
405 case XML_ERR_URI_REQUIRED:
406 errmsg = "SYSTEM or PUBLIC, the URI is missing";
407 break;
408 case XML_ERR_PUBID_REQUIRED:
409 errmsg = "PUBLIC, the Public Identifier is missing";
410 break;
411 case XML_ERR_HYPHEN_IN_COMMENT:
412 errmsg = "Comment must not contain '--' (double-hyphen)";
413 break;
414 case XML_ERR_PI_NOT_STARTED:
415 errmsg = "xmlParsePI : no target name";
416 break;
417 case XML_ERR_RESERVED_XML_NAME:
418 errmsg = "Invalid PI name";
419 break;
420 case XML_ERR_NOTATION_NOT_STARTED:
421 errmsg = "NOTATION: Name expected here";
422 break;
423 case XML_ERR_NOTATION_NOT_FINISHED:
424 errmsg = "'>' required to close NOTATION declaration";
425 break;
426 case XML_ERR_VALUE_REQUIRED:
427 errmsg = "Entity value required";
428 break;
429 case XML_ERR_URI_FRAGMENT:
430 errmsg = "Fragment not allowed";
431 break;
432 case XML_ERR_ATTLIST_NOT_STARTED:
433 errmsg = "'(' required to start ATTLIST enumeration";
434 break;
435 case XML_ERR_NMTOKEN_REQUIRED:
436 errmsg = "NmToken expected in ATTLIST enumeration";
437 break;
438 case XML_ERR_ATTLIST_NOT_FINISHED:
439 errmsg = "')' required to finish ATTLIST enumeration";
440 break;
441 case XML_ERR_MIXED_NOT_STARTED:
442 errmsg = "MixedContentDecl : '|' or ')*' expected";
443 break;
444 case XML_ERR_PCDATA_REQUIRED:
445 errmsg = "MixedContentDecl : '#PCDATA' expected";
446 break;
447 case XML_ERR_ELEMCONTENT_NOT_STARTED:
448 errmsg = "ContentDecl : Name or '(' expected";
449 break;
450 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451 errmsg = "ContentDecl : ',' '|' or ')' expected";
452 break;
453 case XML_ERR_PEREF_IN_INT_SUBSET:
454 errmsg =
455 "PEReference: forbidden within markup decl in internal subset";
456 break;
457 case XML_ERR_GT_REQUIRED:
458 errmsg = "expected '>'";
459 break;
460 case XML_ERR_CONDSEC_INVALID:
461 errmsg = "XML conditional section '[' expected";
462 break;
463 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464 errmsg = "Content error in the external subset";
465 break;
466 case XML_ERR_CONDSEC_INVALID_KEYWORD:
467 errmsg =
468 "conditional section INCLUDE or IGNORE keyword expected";
469 break;
470 case XML_ERR_CONDSEC_NOT_FINISHED:
471 errmsg = "XML conditional section not closed";
472 break;
473 case XML_ERR_XMLDECL_NOT_STARTED:
474 errmsg = "Text declaration '<?xml' required";
475 break;
476 case XML_ERR_XMLDECL_NOT_FINISHED:
477 errmsg = "parsing XML declaration: '?>' expected";
478 break;
479 case XML_ERR_EXT_ENTITY_STANDALONE:
480 errmsg = "external parsed entities cannot be standalone";
481 break;
482 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483 errmsg = "EntityRef: expecting ';'";
484 break;
485 case XML_ERR_DOCTYPE_NOT_FINISHED:
486 errmsg = "DOCTYPE improperly terminated";
487 break;
488 case XML_ERR_LTSLASH_REQUIRED:
489 errmsg = "EndTag: '</' not found";
490 break;
491 case XML_ERR_EQUAL_REQUIRED:
492 errmsg = "expected '='";
493 break;
494 case XML_ERR_STRING_NOT_CLOSED:
495 errmsg = "String not closed expecting \" or '";
496 break;
497 case XML_ERR_STRING_NOT_STARTED:
498 errmsg = "String not started expecting ' or \"";
499 break;
500 case XML_ERR_ENCODING_NAME:
501 errmsg = "Invalid XML encoding name";
502 break;
503 case XML_ERR_STANDALONE_VALUE:
504 errmsg = "standalone accepts only 'yes' or 'no'";
505 break;
506 case XML_ERR_DOCUMENT_EMPTY:
507 errmsg = "Document is empty";
508 break;
509 case XML_ERR_DOCUMENT_END:
510 errmsg = "Extra content at the end of the document";
511 break;
512 case XML_ERR_NOT_WELL_BALANCED:
513 errmsg = "chunk is not well balanced";
514 break;
515 case XML_ERR_EXTRA_CONTENT:
516 errmsg = "extra content at the end of well balanced chunk";
517 break;
518 case XML_ERR_VERSION_MISSING:
519 errmsg = "Malformed declaration expecting version";
520 break;
521 case XML_ERR_NAME_TOO_LONG:
522 errmsg = "Name too long use XML_PARSE_HUGE option";
523 break;
524 #if 0
525 case:
526 errmsg = "";
527 break;
528 #endif
529 default:
530 errmsg = "Unregistered error message";
532 if (info == NULL)
533 snprintf(errstr, 128, "%s\n", errmsg);
534 else
535 snprintf(errstr, 128, "%s: %%s\n", errmsg);
536 if (ctxt != NULL)
537 ctxt->errNo = error;
538 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540 info);
541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
549 * xmlFatalErrMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
556 static void
557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
558 const char *msg)
560 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
561 (ctxt->instate == XML_PARSER_EOF))
562 return;
563 if (ctxt != NULL)
564 ctxt->errNo = error;
565 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567 if (ctxt != NULL) {
568 ctxt->wellFormed = 0;
569 if (ctxt->recovery == 0)
570 ctxt->disableSAX = 1;
575 * xmlWarningMsg:
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
579 * @str1: extra data
580 * @str2: extra data
582 * Handle a warning.
584 static void
585 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
586 const char *msg, const xmlChar *str1, const xmlChar *str2)
588 xmlStructuredErrorFunc schannel = NULL;
590 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
591 (ctxt->instate == XML_PARSER_EOF))
592 return;
593 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
594 (ctxt->sax->initialized == XML_SAX2_MAGIC))
595 schannel = ctxt->sax->serror;
596 if (ctxt != NULL) {
597 __xmlRaiseError(schannel,
598 (ctxt->sax) ? ctxt->sax->warning : NULL,
599 ctxt->userData,
600 ctxt, NULL, XML_FROM_PARSER, error,
601 XML_ERR_WARNING, NULL, 0,
602 (const char *) str1, (const char *) str2, NULL, 0, 0,
603 msg, (const char *) str1, (const char *) str2);
604 } else {
605 __xmlRaiseError(schannel, NULL, NULL,
606 ctxt, NULL, XML_FROM_PARSER, error,
607 XML_ERR_WARNING, NULL, 0,
608 (const char *) str1, (const char *) str2, NULL, 0, 0,
609 msg, (const char *) str1, (const char *) str2);
614 * xmlValidityError:
615 * @ctxt: an XML parser context
616 * @error: the error number
617 * @msg: the error message
618 * @str1: extra data
620 * Handle a validity error.
622 static void
623 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624 const char *msg, const xmlChar *str1, const xmlChar *str2)
626 xmlStructuredErrorFunc schannel = NULL;
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL) {
632 ctxt->errNo = error;
633 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
634 schannel = ctxt->sax->serror;
636 if (ctxt != NULL) {
637 __xmlRaiseError(schannel,
638 ctxt->vctxt.error, ctxt->vctxt.userData,
639 ctxt, NULL, XML_FROM_DTD, error,
640 XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 (const char *) str2, NULL, 0, 0,
642 msg, (const char *) str1, (const char *) str2);
643 ctxt->valid = 0;
644 } else {
645 __xmlRaiseError(schannel, NULL, NULL,
646 ctxt, NULL, XML_FROM_DTD, error,
647 XML_ERR_ERROR, NULL, 0, (const char *) str1,
648 (const char *) str2, NULL, 0, 0,
649 msg, (const char *) str1, (const char *) str2);
654 * xmlFatalErrMsgInt:
655 * @ctxt: an XML parser context
656 * @error: the error number
657 * @msg: the error message
658 * @val: an integer value
660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
662 static void
663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664 const char *msg, int val)
666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667 (ctxt->instate == XML_PARSER_EOF))
668 return;
669 if (ctxt != NULL)
670 ctxt->errNo = error;
671 __xmlRaiseError(NULL, NULL, NULL,
672 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
673 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
682 * xmlFatalErrMsgStrIntStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @str1: an string info
687 * @val: an integer value
688 * @str2: an string info
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
692 static void
693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg, const xmlChar *str1, int val,
695 const xmlChar *str2)
697 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
698 (ctxt->instate == XML_PARSER_EOF))
699 return;
700 if (ctxt != NULL)
701 ctxt->errNo = error;
702 __xmlRaiseError(NULL, NULL, NULL,
703 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
704 NULL, 0, (const char *) str1, (const char *) str2,
705 NULL, val, 0, msg, str1, val, str2);
706 if (ctxt != NULL) {
707 ctxt->wellFormed = 0;
708 if (ctxt->recovery == 0)
709 ctxt->disableSAX = 1;
714 * xmlFatalErrMsgStr:
715 * @ctxt: an XML parser context
716 * @error: the error number
717 * @msg: the error message
718 * @val: a string value
720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
722 static void
723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724 const char *msg, const xmlChar * val)
726 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
727 (ctxt->instate == XML_PARSER_EOF))
728 return;
729 if (ctxt != NULL)
730 ctxt->errNo = error;
731 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732 XML_FROM_PARSER, error, XML_ERR_FATAL,
733 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
734 val);
735 if (ctxt != NULL) {
736 ctxt->wellFormed = 0;
737 if (ctxt->recovery == 0)
738 ctxt->disableSAX = 1;
743 * xmlErrMsgStr:
744 * @ctxt: an XML parser context
745 * @error: the error number
746 * @msg: the error message
747 * @val: a string value
749 * Handle a non fatal parser error
751 static void
752 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753 const char *msg, const xmlChar * val)
755 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
756 (ctxt->instate == XML_PARSER_EOF))
757 return;
758 if (ctxt != NULL)
759 ctxt->errNo = error;
760 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761 XML_FROM_PARSER, error, XML_ERR_ERROR,
762 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
763 val);
767 * xmlNsErr:
768 * @ctxt: an XML parser context
769 * @error: the error number
770 * @msg: the message
771 * @info1: extra information string
772 * @info2: extra information string
774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
776 static void
777 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
778 const char *msg,
779 const xmlChar * info1, const xmlChar * info2,
780 const xmlChar * info3)
782 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
783 (ctxt->instate == XML_PARSER_EOF))
784 return;
785 if (ctxt != NULL)
786 ctxt->errNo = error;
787 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788 XML_ERR_ERROR, NULL, 0, (const char *) info1,
789 (const char *) info2, (const char *) info3, 0, 0, msg,
790 info1, info2, info3);
791 if (ctxt != NULL)
792 ctxt->nsWellFormed = 0;
796 * xmlNsWarn
797 * @ctxt: an XML parser context
798 * @error: the error number
799 * @msg: the message
800 * @info1: extra information string
801 * @info2: extra information string
803 * Handle a namespace warning error
805 static void
806 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
807 const char *msg,
808 const xmlChar * info1, const xmlChar * info2,
809 const xmlChar * info3)
811 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
812 (ctxt->instate == XML_PARSER_EOF))
813 return;
814 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
815 XML_ERR_WARNING, NULL, 0, (const char *) info1,
816 (const char *) info2, (const char *) info3, 0, 0, msg,
817 info1, info2, info3);
820 /************************************************************************
822 * Library wide options *
824 ************************************************************************/
827 * xmlHasFeature:
828 * @feature: the feature to be examined
830 * Examines if the library has been compiled with a given feature.
832 * Returns a non-zero value if the feature exist, otherwise zero.
833 * Returns zero (0) if the feature does not exist or an unknown
834 * unknown feature is requested, non-zero otherwise.
837 xmlHasFeature(xmlFeature feature)
839 switch (feature) {
840 case XML_WITH_THREAD:
841 #ifdef LIBXML_THREAD_ENABLED
842 return(1);
843 #else
844 return(0);
845 #endif
846 case XML_WITH_TREE:
847 #ifdef LIBXML_TREE_ENABLED
848 return(1);
849 #else
850 return(0);
851 #endif
852 case XML_WITH_OUTPUT:
853 #ifdef LIBXML_OUTPUT_ENABLED
854 return(1);
855 #else
856 return(0);
857 #endif
858 case XML_WITH_PUSH:
859 #ifdef LIBXML_PUSH_ENABLED
860 return(1);
861 #else
862 return(0);
863 #endif
864 case XML_WITH_READER:
865 #ifdef LIBXML_READER_ENABLED
866 return(1);
867 #else
868 return(0);
869 #endif
870 case XML_WITH_PATTERN:
871 #ifdef LIBXML_PATTERN_ENABLED
872 return(1);
873 #else
874 return(0);
875 #endif
876 case XML_WITH_WRITER:
877 #ifdef LIBXML_WRITER_ENABLED
878 return(1);
879 #else
880 return(0);
881 #endif
882 case XML_WITH_SAX1:
883 #ifdef LIBXML_SAX1_ENABLED
884 return(1);
885 #else
886 return(0);
887 #endif
888 case XML_WITH_FTP:
889 #ifdef LIBXML_FTP_ENABLED
890 return(1);
891 #else
892 return(0);
893 #endif
894 case XML_WITH_HTTP:
895 #ifdef LIBXML_HTTP_ENABLED
896 return(1);
897 #else
898 return(0);
899 #endif
900 case XML_WITH_VALID:
901 #ifdef LIBXML_VALID_ENABLED
902 return(1);
903 #else
904 return(0);
905 #endif
906 case XML_WITH_HTML:
907 #ifdef LIBXML_HTML_ENABLED
908 return(1);
909 #else
910 return(0);
911 #endif
912 case XML_WITH_LEGACY:
913 #ifdef LIBXML_LEGACY_ENABLED
914 return(1);
915 #else
916 return(0);
917 #endif
918 case XML_WITH_C14N:
919 #ifdef LIBXML_C14N_ENABLED
920 return(1);
921 #else
922 return(0);
923 #endif
924 case XML_WITH_CATALOG:
925 #ifdef LIBXML_CATALOG_ENABLED
926 return(1);
927 #else
928 return(0);
929 #endif
930 case XML_WITH_XPATH:
931 #ifdef LIBXML_XPATH_ENABLED
932 return(1);
933 #else
934 return(0);
935 #endif
936 case XML_WITH_XPTR:
937 #ifdef LIBXML_XPTR_ENABLED
938 return(1);
939 #else
940 return(0);
941 #endif
942 case XML_WITH_XINCLUDE:
943 #ifdef LIBXML_XINCLUDE_ENABLED
944 return(1);
945 #else
946 return(0);
947 #endif
948 case XML_WITH_ICONV:
949 #ifdef LIBXML_ICONV_ENABLED
950 return(1);
951 #else
952 return(0);
953 #endif
954 case XML_WITH_ISO8859X:
955 #ifdef LIBXML_ISO8859X_ENABLED
956 return(1);
957 #else
958 return(0);
959 #endif
960 case XML_WITH_UNICODE:
961 #ifdef LIBXML_UNICODE_ENABLED
962 return(1);
963 #else
964 return(0);
965 #endif
966 case XML_WITH_REGEXP:
967 #ifdef LIBXML_REGEXP_ENABLED
968 return(1);
969 #else
970 return(0);
971 #endif
972 case XML_WITH_AUTOMATA:
973 #ifdef LIBXML_AUTOMATA_ENABLED
974 return(1);
975 #else
976 return(0);
977 #endif
978 case XML_WITH_EXPR:
979 #ifdef LIBXML_EXPR_ENABLED
980 return(1);
981 #else
982 return(0);
983 #endif
984 case XML_WITH_SCHEMAS:
985 #ifdef LIBXML_SCHEMAS_ENABLED
986 return(1);
987 #else
988 return(0);
989 #endif
990 case XML_WITH_SCHEMATRON:
991 #ifdef LIBXML_SCHEMATRON_ENABLED
992 return(1);
993 #else
994 return(0);
995 #endif
996 case XML_WITH_MODULES:
997 #ifdef LIBXML_MODULES_ENABLED
998 return(1);
999 #else
1000 return(0);
1001 #endif
1002 case XML_WITH_DEBUG:
1003 #ifdef LIBXML_DEBUG_ENABLED
1004 return(1);
1005 #else
1006 return(0);
1007 #endif
1008 case XML_WITH_DEBUG_MEM:
1009 #ifdef DEBUG_MEMORY_LOCATION
1010 return(1);
1011 #else
1012 return(0);
1013 #endif
1014 case XML_WITH_DEBUG_RUN:
1015 #ifdef LIBXML_DEBUG_RUNTIME
1016 return(1);
1017 #else
1018 return(0);
1019 #endif
1020 case XML_WITH_ZLIB:
1021 #ifdef LIBXML_ZLIB_ENABLED
1022 return(1);
1023 #else
1024 return(0);
1025 #endif
1026 case XML_WITH_LZMA:
1027 #ifdef LIBXML_LZMA_ENABLED
1028 return(1);
1029 #else
1030 return(0);
1031 #endif
1032 case XML_WITH_ICU:
1033 #ifdef LIBXML_ICU_ENABLED
1034 return(1);
1035 #else
1036 return(0);
1037 #endif
1038 default:
1039 break;
1041 return(0);
1044 /************************************************************************
1046 * SAX2 defaulted attributes handling *
1048 ************************************************************************/
1051 * xmlDetectSAX2:
1052 * @ctxt: an XML parser context
1054 * Do the SAX2 detection and specific intialization
1056 static void
1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1058 if (ctxt == NULL) return;
1059 #ifdef LIBXML_SAX1_ENABLED
1060 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1061 ((ctxt->sax->startElementNs != NULL) ||
1062 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063 #else
1064 ctxt->sax2 = 1;
1065 #endif /* LIBXML_SAX1_ENABLED */
1067 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1068 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1069 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1071 (ctxt->str_xml_ns == NULL)) {
1072 xmlErrMemory(ctxt, NULL);
1076 typedef struct _xmlDefAttrs xmlDefAttrs;
1077 typedef xmlDefAttrs *xmlDefAttrsPtr;
1078 struct _xmlDefAttrs {
1079 int nbAttrs; /* number of defaulted attributes on that element */
1080 int maxAttrs; /* the size of the array */
1081 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1085 * xmlAttrNormalizeSpace:
1086 * @src: the source string
1087 * @dst: the target string
1089 * Normalize the space in non CDATA attribute values:
1090 * If the attribute type is not CDATA, then the XML processor MUST further
1091 * process the normalized attribute value by discarding any leading and
1092 * trailing space (#x20) characters, and by replacing sequences of space
1093 * (#x20) characters by a single space (#x20) character.
1094 * Note that the size of dst need to be at least src, and if one doesn't need
1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096 * passing src as dst is just fine.
1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1099 * is needed.
1101 static xmlChar *
1102 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1104 if ((src == NULL) || (dst == NULL))
1105 return(NULL);
1107 while (*src == 0x20) src++;
1108 while (*src != 0) {
1109 if (*src == 0x20) {
1110 while (*src == 0x20) src++;
1111 if (*src != 0)
1112 *dst++ = 0x20;
1113 } else {
1114 *dst++ = *src++;
1117 *dst = 0;
1118 if (dst == src)
1119 return(NULL);
1120 return(dst);
1124 * xmlAttrNormalizeSpace2:
1125 * @src: the source string
1127 * Normalize the space in non CDATA attribute values, a slightly more complex
1128 * front end to avoid allocation problems when running on attribute values
1129 * coming from the input.
1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1132 * is needed.
1134 static const xmlChar *
1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1137 int i;
1138 int remove_head = 0;
1139 int need_realloc = 0;
1140 const xmlChar *cur;
1142 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1143 return(NULL);
1144 i = *len;
1145 if (i <= 0)
1146 return(NULL);
1148 cur = src;
1149 while (*cur == 0x20) {
1150 cur++;
1151 remove_head++;
1153 while (*cur != 0) {
1154 if (*cur == 0x20) {
1155 cur++;
1156 if ((*cur == 0x20) || (*cur == 0)) {
1157 need_realloc = 1;
1158 break;
1160 } else
1161 cur++;
1163 if (need_realloc) {
1164 xmlChar *ret;
1166 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1167 if (ret == NULL) {
1168 xmlErrMemory(ctxt, NULL);
1169 return(NULL);
1171 xmlAttrNormalizeSpace(ret, ret);
1172 *len = (int) strlen((const char *)ret);
1173 return(ret);
1174 } else if (remove_head) {
1175 *len -= remove_head;
1176 memmove(src, src + remove_head, 1 + *len);
1177 return(src);
1179 return(NULL);
1183 * xmlAddDefAttrs:
1184 * @ctxt: an XML parser context
1185 * @fullname: the element fullname
1186 * @fullattr: the attribute fullname
1187 * @value: the attribute value
1189 * Add a defaulted attribute for an element
1191 static void
1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1193 const xmlChar *fullname,
1194 const xmlChar *fullattr,
1195 const xmlChar *value) {
1196 xmlDefAttrsPtr defaults;
1197 int len;
1198 const xmlChar *name;
1199 const xmlChar *prefix;
1202 * Allows to detect attribute redefinitions
1204 if (ctxt->attsSpecial != NULL) {
1205 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1206 return;
1209 if (ctxt->attsDefault == NULL) {
1210 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 if (ctxt->attsDefault == NULL)
1212 goto mem_error;
1216 * split the element name into prefix:localname , the string found
1217 * are within the DTD and then not associated to namespace names.
1219 name = xmlSplitQName3(fullname, &len);
1220 if (name == NULL) {
1221 name = xmlDictLookup(ctxt->dict, fullname, -1);
1222 prefix = NULL;
1223 } else {
1224 name = xmlDictLookup(ctxt->dict, name, -1);
1225 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1229 * make sure there is some storage
1231 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1232 if (defaults == NULL) {
1233 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234 (4 * 5) * sizeof(const xmlChar *));
1235 if (defaults == NULL)
1236 goto mem_error;
1237 defaults->nbAttrs = 0;
1238 defaults->maxAttrs = 4;
1239 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1240 defaults, NULL) < 0) {
1241 xmlFree(defaults);
1242 goto mem_error;
1244 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245 xmlDefAttrsPtr temp;
1247 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249 if (temp == NULL)
1250 goto mem_error;
1251 defaults = temp;
1252 defaults->maxAttrs *= 2;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1261 * Split the element name into prefix:localname , the string found
1262 * are within the DTD and hen not associated to namespace names.
1264 name = xmlSplitQName3(fullattr, &len);
1265 if (name == NULL) {
1266 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1267 prefix = NULL;
1268 } else {
1269 name = xmlDictLookup(ctxt->dict, name, -1);
1270 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273 defaults->values[5 * defaults->nbAttrs] = name;
1274 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275 /* intern the string and precompute the end */
1276 len = xmlStrlen(value);
1277 value = xmlDictLookup(ctxt->dict, value, len);
1278 defaults->values[5 * defaults->nbAttrs + 2] = value;
1279 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1280 if (ctxt->external)
1281 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1282 else
1283 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284 defaults->nbAttrs++;
1286 return;
1288 mem_error:
1289 xmlErrMemory(ctxt, NULL);
1290 return;
1294 * xmlAddSpecialAttr:
1295 * @ctxt: an XML parser context
1296 * @fullname: the element fullname
1297 * @fullattr: the attribute fullname
1298 * @type: the attribute type
1300 * Register this attribute type
1302 static void
1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1304 const xmlChar *fullname,
1305 const xmlChar *fullattr,
1306 int type)
1308 if (ctxt->attsSpecial == NULL) {
1309 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 if (ctxt->attsSpecial == NULL)
1311 goto mem_error;
1314 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1315 return;
1317 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1318 (void *) (long) type);
1319 return;
1321 mem_error:
1322 xmlErrMemory(ctxt, NULL);
1323 return;
1327 * xmlCleanSpecialAttrCallback:
1329 * Removes CDATA attributes from the special attribute table
1331 static void
1332 xmlCleanSpecialAttrCallback(void *payload, void *data,
1333 const xmlChar *fullname, const xmlChar *fullattr,
1334 const xmlChar *unused ATTRIBUTE_UNUSED) {
1335 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1343 * xmlCleanSpecialAttr:
1344 * @ctxt: an XML parser context
1346 * Trim the list of attributes defined to remove all those of type
1347 * CDATA as they are not special. This call should be done when finishing
1348 * to parse the DTD and before starting to parse the document root.
1350 static void
1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353 if (ctxt->attsSpecial == NULL)
1354 return;
1356 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1359 xmlHashFree(ctxt->attsSpecial, NULL);
1360 ctxt->attsSpecial = NULL;
1362 return;
1366 * xmlCheckLanguageID:
1367 * @lang: pointer to the string value
1369 * Checks that the value conforms to the LanguageID production:
1371 * NOTE: this is somewhat deprecated, those productions were removed from
1372 * the XML Second edition.
1374 * [33] LanguageID ::= Langcode ('-' Subcode)*
1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379 * [38] Subcode ::= ([a-z] | [A-Z])+
1381 * The current REC reference the sucessors of RFC 1766, currently 5646
1383 * http://www.rfc-editor.org/rfc/rfc5646.txt
1384 * langtag = language
1385 * ["-" script]
1386 * ["-" region]
1387 * *("-" variant)
1388 * *("-" extension)
1389 * ["-" privateuse]
1390 * language = 2*3ALPHA ; shortest ISO 639 code
1391 * ["-" extlang] ; sometimes followed by
1392 * ; extended language subtags
1393 * / 4ALPHA ; or reserved for future use
1394 * / 5*8ALPHA ; or registered language subtag
1396 * extlang = 3ALPHA ; selected ISO 639 codes
1397 * *2("-" 3ALPHA) ; permanently reserved
1399 * script = 4ALPHA ; ISO 15924 code
1401 * region = 2ALPHA ; ISO 3166-1 code
1402 * / 3DIGIT ; UN M.49 code
1404 * variant = 5*8alphanum ; registered variants
1405 * / (DIGIT 3alphanum)
1407 * extension = singleton 1*("-" (2*8alphanum))
1409 * ; Single alphanumerics
1410 * ; "x" reserved for private use
1411 * singleton = DIGIT ; 0 - 9
1412 * / %x41-57 ; A - W
1413 * / %x59-5A ; Y - Z
1414 * / %x61-77 ; a - w
1415 * / %x79-7A ; y - z
1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418 * The parser below doesn't try to cope with extension or privateuse
1419 * that could be added but that's not interoperable anyway
1421 * Returns 1 if correct 0 otherwise
1424 xmlCheckLanguageID(const xmlChar * lang)
1426 const xmlChar *cur = lang, *nxt;
1428 if (cur == NULL)
1429 return (0);
1430 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1431 ((cur[0] == 'I') && (cur[1] == '-')) ||
1432 ((cur[0] == 'x') && (cur[1] == '-')) ||
1433 ((cur[0] == 'X') && (cur[1] == '-'))) {
1435 * Still allow IANA code and user code which were coming
1436 * from the previous version of the XML-1.0 specification
1437 * it's deprecated but we should not fail
1439 cur += 2;
1440 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1441 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1442 cur++;
1443 return(cur[0] == 0);
1445 nxt = cur;
1446 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448 nxt++;
1449 if (nxt - cur >= 4) {
1451 * Reserved
1453 if ((nxt - cur > 8) || (nxt[0] != 0))
1454 return(0);
1455 return(1);
1457 if (nxt - cur < 2)
1458 return(0);
1459 /* we got an ISO 639 code */
1460 if (nxt[0] == 0)
1461 return(1);
1462 if (nxt[0] != '-')
1463 return(0);
1465 nxt++;
1466 cur = nxt;
1467 /* now we can have extlang or script or region or variant */
1468 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1469 goto region_m49;
1471 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1472 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 nxt++;
1474 if (nxt - cur == 4)
1475 goto script;
1476 if (nxt - cur == 2)
1477 goto region;
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 3)
1481 return(0);
1482 /* we parsed an extlang */
1483 if (nxt[0] == 0)
1484 return(1);
1485 if (nxt[0] != '-')
1486 return(0);
1488 nxt++;
1489 cur = nxt;
1490 /* now we can have script or region or variant */
1491 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492 goto region_m49;
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur == 2)
1498 goto region;
1499 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1500 goto variant;
1501 if (nxt - cur != 4)
1502 return(0);
1503 /* we parsed a script */
1504 script:
1505 if (nxt[0] == 0)
1506 return(1);
1507 if (nxt[0] != '-')
1508 return(0);
1510 nxt++;
1511 cur = nxt;
1512 /* now we can have region or variant */
1513 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 goto region_m49;
1516 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1517 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1518 nxt++;
1520 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1521 goto variant;
1522 if (nxt - cur != 2)
1523 return(0);
1524 /* we parsed a region */
1525 region:
1526 if (nxt[0] == 0)
1527 return(1);
1528 if (nxt[0] != '-')
1529 return(0);
1531 nxt++;
1532 cur = nxt;
1533 /* now we can just have a variant */
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1538 if ((nxt - cur < 5) || (nxt - cur > 8))
1539 return(0);
1541 /* we parsed a variant */
1542 variant:
1543 if (nxt[0] == 0)
1544 return(1);
1545 if (nxt[0] != '-')
1546 return(0);
1547 /* extensions and private use subtags not checked */
1548 return (1);
1550 region_m49:
1551 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1552 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1553 nxt += 3;
1554 goto region;
1556 return(0);
1559 /************************************************************************
1561 * Parser stacks related functions and macros *
1563 ************************************************************************/
1565 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1566 const xmlChar ** str);
1568 #ifdef SAX2
1570 * nsPush:
1571 * @ctxt: an XML parser context
1572 * @prefix: the namespace prefix or NULL
1573 * @URL: the namespace name
1575 * Pushes a new parser namespace on top of the ns stack
1577 * Returns -1 in case of error, -2 if the namespace should be discarded
1578 * and the index in the stack otherwise.
1580 static int
1581 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583 if (ctxt->options & XML_PARSE_NSCLEAN) {
1584 int i;
1585 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1586 if (ctxt->nsTab[i] == prefix) {
1587 /* in scope */
1588 if (ctxt->nsTab[i + 1] == URL)
1589 return(-2);
1590 /* out of scope keep it */
1591 break;
1595 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1596 ctxt->nsMax = 10;
1597 ctxt->nsNr = 0;
1598 ctxt->nsTab = (const xmlChar **)
1599 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1600 if (ctxt->nsTab == NULL) {
1601 xmlErrMemory(ctxt, NULL);
1602 ctxt->nsMax = 0;
1603 return (-1);
1605 } else if (ctxt->nsNr >= ctxt->nsMax) {
1606 const xmlChar ** tmp;
1607 ctxt->nsMax *= 2;
1608 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1609 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1610 if (tmp == NULL) {
1611 xmlErrMemory(ctxt, NULL);
1612 ctxt->nsMax /= 2;
1613 return (-1);
1615 ctxt->nsTab = tmp;
1617 ctxt->nsTab[ctxt->nsNr++] = prefix;
1618 ctxt->nsTab[ctxt->nsNr++] = URL;
1619 return (ctxt->nsNr);
1622 * nsPop:
1623 * @ctxt: an XML parser context
1624 * @nr: the number to pop
1626 * Pops the top @nr parser prefix/namespace from the ns stack
1628 * Returns the number of namespaces removed
1630 static int
1631 nsPop(xmlParserCtxtPtr ctxt, int nr)
1633 int i;
1635 if (ctxt->nsTab == NULL) return(0);
1636 if (ctxt->nsNr < nr) {
1637 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1638 nr = ctxt->nsNr;
1640 if (ctxt->nsNr <= 0)
1641 return (0);
1643 for (i = 0;i < nr;i++) {
1644 ctxt->nsNr--;
1645 ctxt->nsTab[ctxt->nsNr] = NULL;
1647 return(nr);
1649 #endif
1651 static int
1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1653 const xmlChar **atts;
1654 int *attallocs;
1655 int maxatts;
1657 if (ctxt->atts == NULL) {
1658 maxatts = 55; /* allow for 10 attrs by default */
1659 atts = (const xmlChar **)
1660 xmlMalloc(maxatts * sizeof(xmlChar *));
1661 if (atts == NULL) goto mem_error;
1662 ctxt->atts = atts;
1663 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1664 if (attallocs == NULL) goto mem_error;
1665 ctxt->attallocs = attallocs;
1666 ctxt->maxatts = maxatts;
1667 } else if (nr + 5 > ctxt->maxatts) {
1668 maxatts = (nr + 5) * 2;
1669 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1670 maxatts * sizeof(const xmlChar *));
1671 if (atts == NULL) goto mem_error;
1672 ctxt->atts = atts;
1673 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1674 (maxatts / 5) * sizeof(int));
1675 if (attallocs == NULL) goto mem_error;
1676 ctxt->attallocs = attallocs;
1677 ctxt->maxatts = maxatts;
1679 return(ctxt->maxatts);
1680 mem_error:
1681 xmlErrMemory(ctxt, NULL);
1682 return(-1);
1686 * inputPush:
1687 * @ctxt: an XML parser context
1688 * @value: the parser input
1690 * Pushes a new parser input on top of the input stack
1692 * Returns -1 in case of error, the index in the stack otherwise
1695 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1697 if ((ctxt == NULL) || (value == NULL))
1698 return(-1);
1699 if (ctxt->inputNr >= ctxt->inputMax) {
1700 ctxt->inputMax *= 2;
1701 ctxt->inputTab =
1702 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1703 ctxt->inputMax *
1704 sizeof(ctxt->inputTab[0]));
1705 if (ctxt->inputTab == NULL) {
1706 xmlErrMemory(ctxt, NULL);
1707 xmlFreeInputStream(value);
1708 ctxt->inputMax /= 2;
1709 value = NULL;
1710 return (-1);
1713 ctxt->inputTab[ctxt->inputNr] = value;
1714 ctxt->input = value;
1715 return (ctxt->inputNr++);
1718 * inputPop:
1719 * @ctxt: an XML parser context
1721 * Pops the top parser input from the input stack
1723 * Returns the input just removed
1725 xmlParserInputPtr
1726 inputPop(xmlParserCtxtPtr ctxt)
1728 xmlParserInputPtr ret;
1730 if (ctxt == NULL)
1731 return(NULL);
1732 if (ctxt->inputNr <= 0)
1733 return (NULL);
1734 ctxt->inputNr--;
1735 if (ctxt->inputNr > 0)
1736 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1737 else
1738 ctxt->input = NULL;
1739 ret = ctxt->inputTab[ctxt->inputNr];
1740 ctxt->inputTab[ctxt->inputNr] = NULL;
1741 return (ret);
1744 * nodePush:
1745 * @ctxt: an XML parser context
1746 * @value: the element node
1748 * Pushes a new element node on top of the node stack
1750 * Returns -1 in case of error, the index in the stack otherwise
1753 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1755 if (ctxt == NULL) return(0);
1756 if (ctxt->nodeNr >= ctxt->nodeMax) {
1757 xmlNodePtr *tmp;
1759 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1760 ctxt->nodeMax * 2 *
1761 sizeof(ctxt->nodeTab[0]));
1762 if (tmp == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 return (-1);
1766 ctxt->nodeTab = tmp;
1767 ctxt->nodeMax *= 2;
1769 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1771 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1773 xmlParserMaxDepth);
1774 ctxt->instate = XML_PARSER_EOF;
1775 return(-1);
1777 ctxt->nodeTab[ctxt->nodeNr] = value;
1778 ctxt->node = value;
1779 return (ctxt->nodeNr++);
1783 * nodePop:
1784 * @ctxt: an XML parser context
1786 * Pops the top element node from the node stack
1788 * Returns the node just removed
1790 xmlNodePtr
1791 nodePop(xmlParserCtxtPtr ctxt)
1793 xmlNodePtr ret;
1795 if (ctxt == NULL) return(NULL);
1796 if (ctxt->nodeNr <= 0)
1797 return (NULL);
1798 ctxt->nodeNr--;
1799 if (ctxt->nodeNr > 0)
1800 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1801 else
1802 ctxt->node = NULL;
1803 ret = ctxt->nodeTab[ctxt->nodeNr];
1804 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1805 return (ret);
1808 #ifdef LIBXML_PUSH_ENABLED
1810 * nameNsPush:
1811 * @ctxt: an XML parser context
1812 * @value: the element name
1813 * @prefix: the element prefix
1814 * @URI: the element namespace name
1816 * Pushes a new element name/prefix/URL on top of the name stack
1818 * Returns -1 in case of error, the index in the stack otherwise
1820 static int
1821 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1822 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1824 if (ctxt->nameNr >= ctxt->nameMax) {
1825 const xmlChar * *tmp;
1826 void **tmp2;
1827 ctxt->nameMax *= 2;
1828 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1829 ctxt->nameMax *
1830 sizeof(ctxt->nameTab[0]));
1831 if (tmp == NULL) {
1832 ctxt->nameMax /= 2;
1833 goto mem_error;
1835 ctxt->nameTab = tmp;
1836 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1837 ctxt->nameMax * 3 *
1838 sizeof(ctxt->pushTab[0]));
1839 if (tmp2 == NULL) {
1840 ctxt->nameMax /= 2;
1841 goto mem_error;
1843 ctxt->pushTab = tmp2;
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1848 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1849 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1850 return (ctxt->nameNr++);
1851 mem_error:
1852 xmlErrMemory(ctxt, NULL);
1853 return (-1);
1856 * nameNsPop:
1857 * @ctxt: an XML parser context
1859 * Pops the top element/prefix/URI name from the name stack
1861 * Returns the name just removed
1863 static const xmlChar *
1864 nameNsPop(xmlParserCtxtPtr ctxt)
1866 const xmlChar *ret;
1868 if (ctxt->nameNr <= 0)
1869 return (NULL);
1870 ctxt->nameNr--;
1871 if (ctxt->nameNr > 0)
1872 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1873 else
1874 ctxt->name = NULL;
1875 ret = ctxt->nameTab[ctxt->nameNr];
1876 ctxt->nameTab[ctxt->nameNr] = NULL;
1877 return (ret);
1879 #endif /* LIBXML_PUSH_ENABLED */
1882 * namePush:
1883 * @ctxt: an XML parser context
1884 * @value: the element name
1886 * Pushes a new element name on top of the name stack
1888 * Returns -1 in case of error, the index in the stack otherwise
1891 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1893 if (ctxt == NULL) return (-1);
1895 if (ctxt->nameNr >= ctxt->nameMax) {
1896 const xmlChar * *tmp;
1897 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1898 ctxt->nameMax * 2 *
1899 sizeof(ctxt->nameTab[0]));
1900 if (tmp == NULL) {
1901 goto mem_error;
1903 ctxt->nameTab = tmp;
1904 ctxt->nameMax *= 2;
1906 ctxt->nameTab[ctxt->nameNr] = value;
1907 ctxt->name = value;
1908 return (ctxt->nameNr++);
1909 mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1914 * namePop:
1915 * @ctxt: an XML parser context
1917 * Pops the top element name from the name stack
1919 * Returns the name just removed
1921 const xmlChar *
1922 namePop(xmlParserCtxtPtr ctxt)
1924 const xmlChar *ret;
1926 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1927 return (NULL);
1928 ctxt->nameNr--;
1929 if (ctxt->nameNr > 0)
1930 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1931 else
1932 ctxt->name = NULL;
1933 ret = ctxt->nameTab[ctxt->nameNr];
1934 ctxt->nameTab[ctxt->nameNr] = NULL;
1935 return (ret);
1938 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1939 if (ctxt->spaceNr >= ctxt->spaceMax) {
1940 int *tmp;
1942 ctxt->spaceMax *= 2;
1943 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1944 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1945 if (tmp == NULL) {
1946 xmlErrMemory(ctxt, NULL);
1947 ctxt->spaceMax /=2;
1948 return(-1);
1950 ctxt->spaceTab = tmp;
1952 ctxt->spaceTab[ctxt->spaceNr] = val;
1953 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1954 return(ctxt->spaceNr++);
1957 static int spacePop(xmlParserCtxtPtr ctxt) {
1958 int ret;
1959 if (ctxt->spaceNr <= 0) return(0);
1960 ctxt->spaceNr--;
1961 if (ctxt->spaceNr > 0)
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1963 else
1964 ctxt->space = &ctxt->spaceTab[0];
1965 ret = ctxt->spaceTab[ctxt->spaceNr];
1966 ctxt->spaceTab[ctxt->spaceNr] = -1;
1967 return(ret);
1971 * Macros for accessing the content. Those should be used only by the parser,
1972 * and not exported.
1974 * Dirty macros, i.e. one often need to make assumption on the context to
1975 * use them
1977 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1978 * To be used with extreme caution since operations consuming
1979 * characters may move the input buffer to a different location !
1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1981 * This should be used internally by the parser
1982 * only to compare to ASCII values otherwise it would break when
1983 * running with UTF-8 encoding.
1984 * RAW same as CUR but in the input buffer, bypass any token
1985 * extraction that may have been done
1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1987 * to compare on ASCII based substring.
1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989 * strings without newlines within the parser.
1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991 * defined char within the parser.
1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1994 * NEXT Skip to the next character, this does the proper decoding
1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1997 * CUR_CHAR(l) returns the current unicode character (int), set l
1998 * to the number of xmlChars used for the encoding [0-5].
1999 * CUR_SCHAR same but operate on a string instead of the context
2000 * COPY_BUF copy the current unicode char to the target buffer, increment
2001 * the index
2002 * GROW, SHRINK handling of input buffers
2005 #define RAW (*ctxt->input->cur)
2006 #define CUR (*ctxt->input->cur)
2007 #define NXT(val) ctxt->input->cur[(val)]
2008 #define CUR_PTR ctxt->input->cur
2010 #define CMP4( s, c1, c2, c3, c4 ) \
2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023 ((unsigned char *) s)[ 8 ] == c9 )
2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026 ((unsigned char *) s)[ 9 ] == c10 )
2028 #define SKIP(val) do { \
2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2031 if ((*ctxt->input->cur == 0) && \
2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2033 xmlPopInput(ctxt); \
2034 } while (0)
2036 #define SKIPL(val) do { \
2037 int skipl; \
2038 for(skipl=0; skipl<val; skipl++) { \
2039 if (*(ctxt->input->cur) == '\n') { \
2040 ctxt->input->line++; ctxt->input->col = 1; \
2041 } else ctxt->input->col++; \
2042 ctxt->nbChars++; \
2043 ctxt->input->cur++; \
2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2046 if ((*ctxt->input->cur == 0) && \
2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2048 xmlPopInput(ctxt); \
2049 } while (0)
2051 #define SHRINK if ((ctxt->progressive == 0) && \
2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2054 xmlSHRINK (ctxt);
2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2057 xmlParserInputShrink(ctxt->input);
2058 if ((*ctxt->input->cur == 0) &&
2059 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2060 xmlPopInput(ctxt);
2063 #define GROW if ((ctxt->progressive == 0) && \
2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2065 xmlGROW (ctxt);
2067 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2068 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2069 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2071 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2072 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2073 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2074 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2076 ctxt->instate = XML_PARSER_EOF;
2078 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2081 xmlPopInput(ctxt);
2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2086 #define NEXT xmlNextChar(ctxt)
2088 #define NEXT1 { \
2089 ctxt->input->col++; \
2090 ctxt->input->cur++; \
2091 ctxt->nbChars++; \
2092 if (*ctxt->input->cur == 0) \
2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2096 #define NEXTL(l) do { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur += l; \
2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2102 } while (0)
2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2107 #define COPY_BUF(l,b,i,v) \
2108 if (l == 1) b[i++] = (xmlChar) v; \
2109 else i += xmlCopyCharMultiByte(&b[i],v)
2112 * xmlSkipBlankChars:
2113 * @ctxt: the XML parser context
2115 * skip all blanks character found at that point in the input streams.
2116 * It pops up finished entities in the process if allowable at that point.
2118 * Returns the number of space chars skipped
2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2123 int res = 0;
2126 * It's Okay to use CUR/NEXT here since all the blanks are on
2127 * the ASCII range.
2129 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2130 const xmlChar *cur;
2132 * if we are in the document content, go really fast
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2141 cur++;
2142 res++;
2143 if (*cur == 0) {
2144 ctxt->input->cur = cur;
2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 cur = ctxt->input->cur;
2149 ctxt->input->cur = cur;
2150 } else {
2151 int cur;
2152 do {
2153 cur = CUR;
2154 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2155 NEXT;
2156 cur = CUR;
2157 res++;
2159 while ((cur == 0) && (ctxt->inputNr > 1) &&
2160 (ctxt->instate != XML_PARSER_COMMENT)) {
2161 xmlPopInput(ctxt);
2162 cur = CUR;
2165 * Need to handle support of entities branching here
2167 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2168 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2170 return(res);
2173 /************************************************************************
2175 * Commodity functions to handle entities *
2177 ************************************************************************/
2180 * xmlPopInput:
2181 * @ctxt: an XML parser context
2183 * xmlPopInput: the current input pointed by ctxt->input came to an end
2184 * pop it and return the next char.
2186 * Returns the current xmlChar in the parser context
2188 xmlChar
2189 xmlPopInput(xmlParserCtxtPtr ctxt) {
2190 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2191 if (xmlParserDebugEntities)
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Popping input %d\n", ctxt->inputNr);
2194 xmlFreeInputStream(inputPop(ctxt));
2195 if ((*ctxt->input->cur == 0) &&
2196 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2197 return(xmlPopInput(ctxt));
2198 return(CUR);
2202 * xmlPushInput:
2203 * @ctxt: an XML parser context
2204 * @input: an XML parser input fragment (entity, XML fragment ...).
2206 * xmlPushInput: switch to a new input stream which is stacked on top
2207 * of the previous one(s).
2208 * Returns -1 in case of error or the index in the input stack
2211 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2212 int ret;
2213 if (input == NULL) return(-1);
2215 if (xmlParserDebugEntities) {
2216 if ((ctxt->input != NULL) && (ctxt->input->filename))
2217 xmlGenericError(xmlGenericErrorContext,
2218 "%s(%d): ", ctxt->input->filename,
2219 ctxt->input->line);
2220 xmlGenericError(xmlGenericErrorContext,
2221 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2223 ret = inputPush(ctxt, input);
2224 if (ctxt->instate == XML_PARSER_EOF)
2225 return(-1);
2226 GROW;
2227 return(ret);
2231 * xmlParseCharRef:
2232 * @ctxt: an XML parser context
2234 * parse Reference declarations
2236 * [66] CharRef ::= '&#' [0-9]+ ';' |
2237 * '&#x' [0-9a-fA-F]+ ';'
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2243 * Returns the value parsed (as an int), 0 in case of error
2246 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2247 unsigned int val = 0;
2248 int count = 0;
2249 unsigned int outofrange = 0;
2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2254 if ((RAW == '&') && (NXT(1) == '#') &&
2255 (NXT(2) == 'x')) {
2256 SKIP(3);
2257 GROW;
2258 while (RAW != ';') { /* loop blocked by count */
2259 if (count++ > 20) {
2260 count = 0;
2261 GROW;
2262 if (ctxt->instate == XML_PARSER_EOF)
2263 return(0);
2265 if ((RAW >= '0') && (RAW <= '9'))
2266 val = val * 16 + (CUR - '0');
2267 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2268 val = val * 16 + (CUR - 'a') + 10;
2269 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2270 val = val * 16 + (CUR - 'A') + 10;
2271 else {
2272 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2273 val = 0;
2274 break;
2276 if (val > 0x10FFFF)
2277 outofrange = val;
2279 NEXT;
2280 count++;
2282 if (RAW == ';') {
2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2284 ctxt->input->col++;
2285 ctxt->nbChars ++;
2286 ctxt->input->cur++;
2288 } else if ((RAW == '&') && (NXT(1) == '#')) {
2289 SKIP(2);
2290 GROW;
2291 while (RAW != ';') { /* loop blocked by count */
2292 if (count++ > 20) {
2293 count = 0;
2294 GROW;
2295 if (ctxt->instate == XML_PARSER_EOF)
2296 return(0);
2298 if ((RAW >= '0') && (RAW <= '9'))
2299 val = val * 10 + (CUR - '0');
2300 else {
2301 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2302 val = 0;
2303 break;
2305 if (val > 0x10FFFF)
2306 outofrange = val;
2308 NEXT;
2309 count++;
2311 if (RAW == ';') {
2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2313 ctxt->input->col++;
2314 ctxt->nbChars ++;
2315 ctxt->input->cur++;
2317 } else {
2318 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2322 * [ WFC: Legal Character ]
2323 * Characters referred to using character references must match the
2324 * production for Char.
2326 if ((IS_CHAR(val) && (outofrange == 0))) {
2327 return(val);
2328 } else {
2329 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2330 "xmlParseCharRef: invalid xmlChar value %d\n",
2331 val);
2333 return(0);
2337 * xmlParseStringCharRef:
2338 * @ctxt: an XML parser context
2339 * @str: a pointer to an index in the string
2341 * parse Reference declarations, variant parsing from a string rather
2342 * than an an input flow.
2344 * [66] CharRef ::= '&#' [0-9]+ ';' |
2345 * '&#x' [0-9a-fA-F]+ ';'
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
2349 * production for Char.
2351 * Returns the value parsed (as an int), 0 in case of error, str will be
2352 * updated to the current value of the index
2354 static int
2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2356 const xmlChar *ptr;
2357 xmlChar cur;
2358 unsigned int val = 0;
2359 unsigned int outofrange = 0;
2361 if ((str == NULL) || (*str == NULL)) return(0);
2362 ptr = *str;
2363 cur = *ptr;
2364 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2365 ptr += 3;
2366 cur = *ptr;
2367 while (cur != ';') { /* Non input consuming loop */
2368 if ((cur >= '0') && (cur <= '9'))
2369 val = val * 16 + (cur - '0');
2370 else if ((cur >= 'a') && (cur <= 'f'))
2371 val = val * 16 + (cur - 'a') + 10;
2372 else if ((cur >= 'A') && (cur <= 'F'))
2373 val = val * 16 + (cur - 'A') + 10;
2374 else {
2375 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2376 val = 0;
2377 break;
2379 if (val > 0x10FFFF)
2380 outofrange = val;
2382 ptr++;
2383 cur = *ptr;
2385 if (cur == ';')
2386 ptr++;
2387 } else if ((cur == '&') && (ptr[1] == '#')){
2388 ptr += 2;
2389 cur = *ptr;
2390 while (cur != ';') { /* Non input consuming loops */
2391 if ((cur >= '0') && (cur <= '9'))
2392 val = val * 10 + (cur - '0');
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2395 val = 0;
2396 break;
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2401 ptr++;
2402 cur = *ptr;
2404 if (cur == ';')
2405 ptr++;
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 return(0);
2410 *str = ptr;
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2417 if ((IS_CHAR(val) && (outofrange == 0))) {
2418 return(val);
2419 } else {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2422 val);
2424 return(0);
2428 * xmlNewBlanksWrapperInputStream:
2429 * @ctxt: an XML parser context
2430 * @entity: an Entity pointer
2432 * Create a new input stream for wrapping
2433 * blanks around a PEReference
2435 * Returns the new input stream or NULL
2438 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2440 static xmlParserInputPtr
2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2442 xmlParserInputPtr input;
2443 xmlChar *buffer;
2444 size_t length;
2445 if (entity == NULL) {
2446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2447 "xmlNewBlanksWrapperInputStream entity\n");
2448 return(NULL);
2450 if (xmlParserDebugEntities)
2451 xmlGenericError(xmlGenericErrorContext,
2452 "new blanks wrapper for entity: %s\n", entity->name);
2453 input = xmlNewInputStream(ctxt);
2454 if (input == NULL) {
2455 return(NULL);
2457 length = xmlStrlen(entity->name) + 5;
2458 buffer = xmlMallocAtomic(length);
2459 if (buffer == NULL) {
2460 xmlErrMemory(ctxt, NULL);
2461 xmlFree(input);
2462 return(NULL);
2464 buffer [0] = ' ';
2465 buffer [1] = '%';
2466 buffer [length-3] = ';';
2467 buffer [length-2] = ' ';
2468 buffer [length-1] = 0;
2469 memcpy(buffer + 2, entity->name, length - 5);
2470 input->free = deallocblankswrapper;
2471 input->base = buffer;
2472 input->cur = buffer;
2473 input->length = length;
2474 input->end = &buffer[length];
2475 return(input);
2479 * xmlParserHandlePEReference:
2480 * @ctxt: the parser context
2482 * [69] PEReference ::= '%' Name ';'
2484 * [ WFC: No Recursion ]
2485 * A parsed entity must not contain a recursive
2486 * reference to itself, either directly or indirectly.
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an internal DTD
2490 * subset which contains no parameter entity references, or a document
2491 * with "standalone='yes'", ... ... The declaration of a parameter
2492 * entity must precede any reference to it...
2494 * [ VC: Entity Declared ]
2495 * In a document with an external subset or external parameter entities
2496 * with "standalone='no'", ... ... The declaration of a parameter entity
2497 * must precede any reference to it...
2499 * [ WFC: In DTD ]
2500 * Parameter-entity references may only appear in the DTD.
2501 * NOTE: misleading but this is handled.
2503 * A PEReference may have been detected in the current input stream
2504 * the handling is done accordingly to
2505 * http://www.w3.org/TR/REC-xml#entproc
2506 * i.e.
2507 * - Included in literal in entity values
2508 * - Included as Parameter Entity reference within DTDs
2510 void
2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2512 const xmlChar *name;
2513 xmlEntityPtr entity = NULL;
2514 xmlParserInputPtr input;
2516 if (RAW != '%') return;
2517 switch(ctxt->instate) {
2518 case XML_PARSER_CDATA_SECTION:
2519 return;
2520 case XML_PARSER_COMMENT:
2521 return;
2522 case XML_PARSER_START_TAG:
2523 return;
2524 case XML_PARSER_END_TAG:
2525 return;
2526 case XML_PARSER_EOF:
2527 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2528 return;
2529 case XML_PARSER_PROLOG:
2530 case XML_PARSER_START:
2531 case XML_PARSER_MISC:
2532 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2533 return;
2534 case XML_PARSER_ENTITY_DECL:
2535 case XML_PARSER_CONTENT:
2536 case XML_PARSER_ATTRIBUTE_VALUE:
2537 case XML_PARSER_PI:
2538 case XML_PARSER_SYSTEM_LITERAL:
2539 case XML_PARSER_PUBLIC_LITERAL:
2540 /* we just ignore it there */
2541 return;
2542 case XML_PARSER_EPILOG:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_VALUE:
2547 * NOTE: in the case of entity values, we don't do the
2548 * substitution here since we need the literal
2549 * entity value to be able to save the internal
2550 * subset of the document.
2551 * This will be handled by xmlStringDecodeEntities
2553 return;
2554 case XML_PARSER_DTD:
2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 * In the internal DTD subset, parameter-entity references
2558 * can occur only where markup declarations can occur, not
2559 * within markup declarations.
2560 * In that case this is handled in xmlParseMarkupDecl
2562 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2563 return;
2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2565 return;
2566 break;
2567 case XML_PARSER_IGNORE:
2568 return;
2571 NEXT;
2572 name = xmlParseName(ctxt);
2573 if (xmlParserDebugEntities)
2574 xmlGenericError(xmlGenericErrorContext,
2575 "PEReference: %s\n", name);
2576 if (name == NULL) {
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2578 } else {
2579 if (RAW == ';') {
2580 NEXT;
2581 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2582 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2583 if (ctxt->instate == XML_PARSER_EOF)
2584 return;
2585 if (entity == NULL) {
2588 * [ WFC: Entity Declared ]
2589 * In a document without any DTD, a document with only an
2590 * internal DTD subset which contains no parameter entity
2591 * references, or a document with "standalone='yes'", ...
2592 * ... The declaration of a parameter entity must precede
2593 * any reference to it...
2595 if ((ctxt->standalone == 1) ||
2596 ((ctxt->hasExternalSubset == 0) &&
2597 (ctxt->hasPErefs == 0))) {
2598 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2599 "PEReference: %%%s; not found\n", name);
2600 } else {
2602 * [ VC: Entity Declared ]
2603 * In a document with an external subset or external
2604 * parameter entities with "standalone='no'", ...
2605 * ... The declaration of a parameter entity must precede
2606 * any reference to it...
2608 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2609 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n",
2611 name, NULL);
2612 } else
2613 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2614 "PEReference: %%%s; not found\n",
2615 name, NULL);
2616 ctxt->valid = 0;
2618 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2619 } else if (ctxt->input->free != deallocblankswrapper) {
2620 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2621 if (xmlPushInput(ctxt, input) < 0)
2622 return;
2623 } else {
2624 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2625 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2626 xmlChar start[4];
2627 xmlCharEncoding enc;
2630 * Note: external parameter entities will not be loaded, it
2631 * is not required for a non-validating parser, unless the
2632 * option of validating, or substituting entities were
2633 * given. Doing so is far more secure as the parser will
2634 * only process data coming from the document entity by
2635 * default.
2637 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2638 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2639 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2640 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2641 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2642 (ctxt->replaceEntities == 0) &&
2643 (ctxt->validate == 0))
2644 return;
2647 * handle the extra spaces added before and after
2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 * this is done independently.
2651 input = xmlNewEntityInputStream(ctxt, entity);
2652 if (xmlPushInput(ctxt, input) < 0)
2653 return;
2656 * Get the 4 first bytes and decode the charset
2657 * if enc != XML_CHAR_ENCODING_NONE
2658 * plug some encoding conversion routines.
2659 * Note that, since we may have some non-UTF8
2660 * encoding (like UTF16, bug 135229), the 'length'
2661 * is not known, but we can calculate based upon
2662 * the amount of data in the buffer.
2664 GROW
2665 if (ctxt->instate == XML_PARSER_EOF)
2666 return;
2667 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2668 start[0] = RAW;
2669 start[1] = NXT(1);
2670 start[2] = NXT(2);
2671 start[3] = NXT(3);
2672 enc = xmlDetectCharEncoding(start, 4);
2673 if (enc != XML_CHAR_ENCODING_NONE) {
2674 xmlSwitchEncoding(ctxt, enc);
2678 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2679 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2680 (IS_BLANK_CH(NXT(5)))) {
2681 xmlParseTextDecl(ctxt);
2683 } else {
2684 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2685 "PEReference: %s is not a parameter entity\n",
2686 name);
2689 } else {
2690 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2696 * Macro used to grow the current buffer.
2697 * buffer##_size is expected to be a size_t
2698 * mem_error: is expected to handle memory allocation failures
2700 #define growBuffer(buffer, n) { \
2701 xmlChar *tmp; \
2702 size_t new_size = buffer##_size * 2 + n; \
2703 if (new_size < buffer##_size) goto mem_error; \
2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2705 if (tmp == NULL) goto mem_error; \
2706 buffer = tmp; \
2707 buffer##_size = new_size; \
2711 * xmlStringLenDecodeEntities:
2712 * @ctxt: the parser context
2713 * @str: the input string
2714 * @len: the string length
2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2720 * Takes a entity string content and process to do the adequate substitutions.
2722 * [67] Reference ::= EntityRef | CharRef
2724 * [69] PEReference ::= '%' Name ';'
2726 * Returns A newly allocated string with the substitution done. The caller
2727 * must deallocate it !
2729 xmlChar *
2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2731 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2732 xmlChar *buffer = NULL;
2733 size_t buffer_size = 0;
2734 size_t nbchars = 0;
2736 xmlChar *current = NULL;
2737 xmlChar *rep = NULL;
2738 const xmlChar *last;
2739 xmlEntityPtr ent;
2740 int c,l;
2742 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2743 return(NULL);
2744 last = str + len;
2746 if (((ctxt->depth > 40) &&
2747 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2748 (ctxt->depth > 1024)) {
2749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2750 return(NULL);
2754 * allocate a translation buffer.
2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2758 if (buffer == NULL) goto mem_error;
2761 * OK loop until we reach one of the ending char or a size limit.
2762 * we are operating on already parsed values.
2764 if (str < last)
2765 c = CUR_SCHAR(str, l);
2766 else
2767 c = 0;
2768 while ((c != 0) && (c != end) && /* non input consuming loop */
2769 (c != end2) && (c != end3)) {
2771 if (c == 0) break;
2772 if ((c == '&') && (str[1] == '#')) {
2773 int val = xmlParseStringCharRef(ctxt, &str);
2774 if (val != 0) {
2775 COPY_BUF(0,buffer,nbchars,val);
2777 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2780 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2781 if (xmlParserDebugEntities)
2782 xmlGenericError(xmlGenericErrorContext,
2783 "String decoding Entity Reference: %.30s\n",
2784 str);
2785 ent = xmlParseStringEntityRef(ctxt, &str);
2786 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2787 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2788 goto int_error;
2789 xmlParserEntityCheck(ctxt, 0, ent, 0);
2790 if (ent != NULL)
2791 ctxt->nbentities += ent->checked / 2;
2792 if ((ent != NULL) &&
2793 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2794 if (ent->content != NULL) {
2795 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2799 } else {
2800 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2801 "predefined entity has no content\n");
2803 } else if ((ent != NULL) && (ent->content != NULL)) {
2804 ctxt->depth++;
2805 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2806 0, 0, 0);
2807 ctxt->depth--;
2809 if (rep != NULL) {
2810 current = rep;
2811 while (*current != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *current++;
2813 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2814 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2815 goto int_error;
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2819 xmlFree(rep);
2820 rep = NULL;
2822 } else if (ent != NULL) {
2823 int i = xmlStrlen(ent->name);
2824 const xmlChar *cur = ent->name;
2826 buffer[nbchars++] = '&';
2827 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2828 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2830 for (;i > 0;i--)
2831 buffer[nbchars++] = *cur++;
2832 buffer[nbchars++] = ';';
2834 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2835 if (xmlParserDebugEntities)
2836 xmlGenericError(xmlGenericErrorContext,
2837 "String decoding PE Reference: %.30s\n", str);
2838 ent = xmlParseStringPEReference(ctxt, &str);
2839 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2840 goto int_error;
2841 xmlParserEntityCheck(ctxt, 0, ent, 0);
2842 if (ent != NULL)
2843 ctxt->nbentities += ent->checked / 2;
2844 if (ent != NULL) {
2845 if (ent->content == NULL) {
2846 xmlLoadEntityContent(ctxt, ent);
2848 ctxt->depth++;
2849 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2850 0, 0, 0);
2851 ctxt->depth--;
2852 if (rep != NULL) {
2853 current = rep;
2854 while (*current != 0) { /* non input consuming loop */
2855 buffer[nbchars++] = *current++;
2856 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2857 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2858 goto int_error;
2859 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2862 xmlFree(rep);
2863 rep = NULL;
2866 } else {
2867 COPY_BUF(l,buffer,nbchars,c);
2868 str += l;
2869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2870 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2873 if (str < last)
2874 c = CUR_SCHAR(str, l);
2875 else
2876 c = 0;
2878 buffer[nbchars] = 0;
2879 return(buffer);
2881 mem_error:
2882 xmlErrMemory(ctxt, NULL);
2883 int_error:
2884 if (rep != NULL)
2885 xmlFree(rep);
2886 if (buffer != NULL)
2887 xmlFree(buffer);
2888 return(NULL);
2892 * xmlStringDecodeEntities:
2893 * @ctxt: the parser context
2894 * @str: the input string
2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896 * @end: an end marker xmlChar, 0 if none
2897 * @end2: an end marker xmlChar, 0 if none
2898 * @end3: an end marker xmlChar, 0 if none
2900 * Takes a entity string content and process to do the adequate substitutions.
2902 * [67] Reference ::= EntityRef | CharRef
2904 * [69] PEReference ::= '%' Name ';'
2906 * Returns A newly allocated string with the substitution done. The caller
2907 * must deallocate it !
2909 xmlChar *
2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2911 xmlChar end, xmlChar end2, xmlChar end3) {
2912 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2913 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2914 end, end2, end3));
2917 /************************************************************************
2919 * Commodity functions, cleanup needed ? *
2921 ************************************************************************/
2924 * areBlanks:
2925 * @ctxt: an XML parser context
2926 * @str: a xmlChar *
2927 * @len: the size of @str
2928 * @blank_chars: we know the chars are blanks
2930 * Is this a sequence of blank chars that one can ignore ?
2932 * Returns 1 if ignorable 0 otherwise.
2935 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2936 int blank_chars) {
2937 int i, ret;
2938 xmlNodePtr lastChild;
2941 * Don't spend time trying to differentiate them, the same callback is
2942 * used !
2944 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2945 return(0);
2948 * Check for xml:space value.
2950 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2951 (*(ctxt->space) == -2))
2952 return(0);
2955 * Check that the string is made of blanks
2957 if (blank_chars == 0) {
2958 for (i = 0;i < len;i++)
2959 if (!(IS_BLANK_CH(str[i]))) return(0);
2963 * Look if the element is mixed content in the DTD if available
2965 if (ctxt->node == NULL) return(0);
2966 if (ctxt->myDoc != NULL) {
2967 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2968 if (ret == 0) return(1);
2969 if (ret == 1) return(0);
2973 * Otherwise, heuristic :-\
2975 if ((RAW != '<') && (RAW != 0xD)) return(0);
2976 if ((ctxt->node->children == NULL) &&
2977 (RAW == '<') && (NXT(1) == '/')) return(0);
2979 lastChild = xmlGetLastChild(ctxt->node);
2980 if (lastChild == NULL) {
2981 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2982 (ctxt->node->content != NULL)) return(0);
2983 } else if (xmlNodeIsText(lastChild))
2984 return(0);
2985 else if ((ctxt->node->children != NULL) &&
2986 (xmlNodeIsText(ctxt->node->children)))
2987 return(0);
2988 return(1);
2991 /************************************************************************
2993 * Extra stuff for namespace support *
2994 * Relates to http://www.w3.org/TR/WD-xml-names *
2996 ************************************************************************/
2999 * xmlSplitQName:
3000 * @ctxt: an XML parser context
3001 * @name: an XML parser context
3002 * @prefix: a xmlChar **
3004 * parse an UTF8 encoded XML qualified name string
3006 * [NS 5] QName ::= (Prefix ':')? LocalPart
3008 * [NS 6] Prefix ::= NCName
3010 * [NS 7] LocalPart ::= NCName
3012 * Returns the local part, and prefix is updated
3013 * to get the Prefix if any.
3016 xmlChar *
3017 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3018 xmlChar buf[XML_MAX_NAMELEN + 5];
3019 xmlChar *buffer = NULL;
3020 int len = 0;
3021 int max = XML_MAX_NAMELEN;
3022 xmlChar *ret = NULL;
3023 const xmlChar *cur = name;
3024 int c;
3026 if (prefix == NULL) return(NULL);
3027 *prefix = NULL;
3029 if (cur == NULL) return(NULL);
3031 #ifndef XML_XML_NAMESPACE
3032 /* xml: prefix is not really a namespace */
3033 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3034 (cur[2] == 'l') && (cur[3] == ':'))
3035 return(xmlStrdup(name));
3036 #endif
3038 /* nasty but well=formed */
3039 if (cur[0] == ':')
3040 return(xmlStrdup(name));
3042 c = *cur++;
3043 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3044 buf[len++] = c;
3045 c = *cur++;
3047 if (len >= max) {
3049 * Okay someone managed to make a huge name, so he's ready to pay
3050 * for the processing speed.
3052 max = len * 2;
3054 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3055 if (buffer == NULL) {
3056 xmlErrMemory(ctxt, NULL);
3057 return(NULL);
3059 memcpy(buffer, buf, len);
3060 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3061 if (len + 10 > max) {
3062 xmlChar *tmp;
3064 max *= 2;
3065 tmp = (xmlChar *) xmlRealloc(buffer,
3066 max * sizeof(xmlChar));
3067 if (tmp == NULL) {
3068 xmlFree(buffer);
3069 xmlErrMemory(ctxt, NULL);
3070 return(NULL);
3072 buffer = tmp;
3074 buffer[len++] = c;
3075 c = *cur++;
3077 buffer[len] = 0;
3080 if ((c == ':') && (*cur == 0)) {
3081 if (buffer != NULL)
3082 xmlFree(buffer);
3083 *prefix = NULL;
3084 return(xmlStrdup(name));
3087 if (buffer == NULL)
3088 ret = xmlStrndup(buf, len);
3089 else {
3090 ret = buffer;
3091 buffer = NULL;
3092 max = XML_MAX_NAMELEN;
3096 if (c == ':') {
3097 c = *cur;
3098 *prefix = ret;
3099 if (c == 0) {
3100 return(xmlStrndup(BAD_CAST "", 0));
3102 len = 0;
3105 * Check that the first character is proper to start
3106 * a new name
3108 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3109 ((c >= 0x41) && (c <= 0x5A)) ||
3110 (c == '_') || (c == ':'))) {
3111 int l;
3112 int first = CUR_SCHAR(cur, l);
3114 if (!IS_LETTER(first) && (first != '_')) {
3115 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3116 "Name %s is not XML Namespace compliant\n",
3117 name);
3120 cur++;
3122 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3123 buf[len++] = c;
3124 c = *cur++;
3126 if (len >= max) {
3128 * Okay someone managed to make a huge name, so he's ready to pay
3129 * for the processing speed.
3131 max = len * 2;
3133 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3134 if (buffer == NULL) {
3135 xmlErrMemory(ctxt, NULL);
3136 return(NULL);
3138 memcpy(buffer, buf, len);
3139 while (c != 0) { /* tested bigname2.xml */
3140 if (len + 10 > max) {
3141 xmlChar *tmp;
3143 max *= 2;
3144 tmp = (xmlChar *) xmlRealloc(buffer,
3145 max * sizeof(xmlChar));
3146 if (tmp == NULL) {
3147 xmlErrMemory(ctxt, NULL);
3148 xmlFree(buffer);
3149 return(NULL);
3151 buffer = tmp;
3153 buffer[len++] = c;
3154 c = *cur++;
3156 buffer[len] = 0;
3159 if (buffer == NULL)
3160 ret = xmlStrndup(buf, len);
3161 else {
3162 ret = buffer;
3166 return(ret);
3169 /************************************************************************
3171 * The parser itself *
3172 * Relates to http://www.w3.org/TR/REC-xml *
3174 ************************************************************************/
3176 /************************************************************************
3178 * Routines to parse Name, NCName and NmToken *
3180 ************************************************************************/
3181 #ifdef DEBUG
3182 static unsigned long nbParseName = 0;
3183 static unsigned long nbParseNmToken = 0;
3184 static unsigned long nbParseNCName = 0;
3185 static unsigned long nbParseNCNameComplex = 0;
3186 static unsigned long nbParseNameComplex = 0;
3187 static unsigned long nbParseStringName = 0;
3188 #endif
3191 * The two following functions are related to the change of accepted
3192 * characters for Name and NmToken in the Revision 5 of XML-1.0
3193 * They correspond to the modified production [4] and the new production [4a]
3194 * changes in that revision. Also note that the macros used for the
3195 * productions Letter, Digit, CombiningChar and Extender are not needed
3196 * anymore.
3197 * We still keep compatibility to pre-revision5 parsing semantic if the
3198 * new XML_PARSE_OLD10 option is given to the parser.
3200 static int
3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 (c == '_') || (c == ':') ||
3211 ((c >= 0xC0) && (c <= 0xD6)) ||
3212 ((c >= 0xD8) && (c <= 0xF6)) ||
3213 ((c >= 0xF8) && (c <= 0x2FF)) ||
3214 ((c >= 0x370) && (c <= 0x37D)) ||
3215 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3216 ((c >= 0x200C) && (c <= 0x200D)) ||
3217 ((c >= 0x2070) && (c <= 0x218F)) ||
3218 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3219 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3220 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3221 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3222 ((c >= 0x10000) && (c <= 0xEFFFF))))
3223 return(1);
3224 } else {
3225 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3226 return(1);
3228 return(0);
3231 static int
3232 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3233 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3235 * Use the new checks of production [4] [4a] amd [5] of the
3236 * Update 5 of XML-1.0
3238 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3239 (((c >= 'a') && (c <= 'z')) ||
3240 ((c >= 'A') && (c <= 'Z')) ||
3241 ((c >= '0') && (c <= '9')) || /* !start */
3242 (c == '_') || (c == ':') ||
3243 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3244 ((c >= 0xC0) && (c <= 0xD6)) ||
3245 ((c >= 0xD8) && (c <= 0xF6)) ||
3246 ((c >= 0xF8) && (c <= 0x2FF)) ||
3247 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3248 ((c >= 0x370) && (c <= 0x37D)) ||
3249 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3250 ((c >= 0x200C) && (c <= 0x200D)) ||
3251 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))
3258 return(1);
3259 } else {
3260 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3261 (c == '.') || (c == '-') ||
3262 (c == '_') || (c == ':') ||
3263 (IS_COMBINING(c)) ||
3264 (IS_EXTENDER(c)))
3265 return(1);
3267 return(0);
3270 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3271 int *len, int *alloc, int normalize);
3273 static const xmlChar *
3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3275 int len = 0, l;
3276 int c;
3277 int count = 0;
3279 #ifdef DEBUG
3280 nbParseNameComplex++;
3281 #endif
3284 * Handler for more complex cases
3286 GROW;
3287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
3289 c = CUR_CHAR(l);
3290 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3292 * Use the new checks of production [4] [4a] amd [5] of the
3293 * Update 5 of XML-1.0
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!(((c >= 'a') && (c <= 'z')) ||
3297 ((c >= 'A') && (c <= 'Z')) ||
3298 (c == '_') || (c == ':') ||
3299 ((c >= 0xC0) && (c <= 0xD6)) ||
3300 ((c >= 0xD8) && (c <= 0xF6)) ||
3301 ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 ((c >= 0x370) && (c <= 0x37D)) ||
3303 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3304 ((c >= 0x200C) && (c <= 0x200D)) ||
3305 ((c >= 0x2070) && (c <= 0x218F)) ||
3306 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3307 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3308 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3309 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3310 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3311 return(NULL);
3313 len += l;
3314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3317 (((c >= 'a') && (c <= 'z')) ||
3318 ((c >= 'A') && (c <= 'Z')) ||
3319 ((c >= '0') && (c <= '9')) || /* !start */
3320 (c == '_') || (c == ':') ||
3321 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3322 ((c >= 0xC0) && (c <= 0xD6)) ||
3323 ((c >= 0xD8) && (c <= 0xF6)) ||
3324 ((c >= 0xF8) && (c <= 0x2FF)) ||
3325 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3326 ((c >= 0x370) && (c <= 0x37D)) ||
3327 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3328 ((c >= 0x200C) && (c <= 0x200D)) ||
3329 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3330 ((c >= 0x2070) && (c <= 0x218F)) ||
3331 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3332 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3333 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3334 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3335 ((c >= 0x10000) && (c <= 0xEFFFF))
3336 )) {
3337 if (count++ > XML_PARSER_CHUNK_SIZE) {
3338 count = 0;
3339 GROW;
3340 if (ctxt->instate == XML_PARSER_EOF)
3341 return(NULL);
3343 len += l;
3344 NEXTL(l);
3345 c = CUR_CHAR(l);
3347 } else {
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!IS_LETTER(c) && (c != '_') &&
3350 (c != ':'))) {
3351 return(NULL);
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3357 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3358 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3359 (c == '.') || (c == '-') ||
3360 (c == '_') || (c == ':') ||
3361 (IS_COMBINING(c)) ||
3362 (IS_EXTENDER(c)))) {
3363 if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 count = 0;
3365 GROW;
3366 if (ctxt->instate == XML_PARSER_EOF)
3367 return(NULL);
3369 len += l;
3370 NEXTL(l);
3371 c = CUR_CHAR(l);
3372 if (c == 0) {
3373 count = 0;
3374 GROW;
3375 if (ctxt->instate == XML_PARSER_EOF)
3376 return(NULL);
3377 c = CUR_CHAR(l);
3381 if ((len > XML_MAX_NAME_LENGTH) &&
3382 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3383 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384 return(NULL);
3386 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3387 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3388 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3392 * xmlParseName:
3393 * @ctxt: an XML parser context
3395 * parse an XML name.
3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398 * CombiningChar | Extender
3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3402 * [6] Names ::= Name (#x20 Name)*
3404 * Returns the Name parsed or NULL
3407 const xmlChar *
3408 xmlParseName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in;
3410 const xmlChar *ret;
3411 int count = 0;
3413 GROW;
3415 #ifdef DEBUG
3416 nbParseName++;
3417 #endif
3420 * Accelerator for simple ASCII names
3422 in = ctxt->input->cur;
3423 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3424 ((*in >= 0x41) && (*in <= 0x5A)) ||
3425 (*in == '_') || (*in == ':')) {
3426 in++;
3427 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3428 ((*in >= 0x41) && (*in <= 0x5A)) ||
3429 ((*in >= 0x30) && (*in <= 0x39)) ||
3430 (*in == '_') || (*in == '-') ||
3431 (*in == ':') || (*in == '.'))
3432 in++;
3433 if ((*in > 0) && (*in < 0x80)) {
3434 count = in - ctxt->input->cur;
3435 if ((count > XML_MAX_NAME_LENGTH) &&
3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3438 return(NULL);
3440 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3441 ctxt->input->cur = in;
3442 ctxt->nbChars += count;
3443 ctxt->input->col += count;
3444 if (ret == NULL)
3445 xmlErrMemory(ctxt, NULL);
3446 return(ret);
3449 /* accelerator for special cases */
3450 return(xmlParseNameComplex(ctxt));
3453 static const xmlChar *
3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3455 int len = 0, l;
3456 int c;
3457 int count = 0;
3458 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3460 #ifdef DEBUG
3461 nbParseNCNameComplex++;
3462 #endif
3465 * Handler for more complex cases
3467 GROW;
3468 end = ctxt->input->cur;
3469 c = CUR_CHAR(l);
3470 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3471 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3472 return(NULL);
3475 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3476 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3477 if (count++ > XML_PARSER_CHUNK_SIZE) {
3478 if ((len > XML_MAX_NAME_LENGTH) &&
3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3483 count = 0;
3484 GROW;
3485 if (ctxt->instate == XML_PARSER_EOF)
3486 return(NULL);
3488 len += l;
3489 NEXTL(l);
3490 end = ctxt->input->cur;
3491 c = CUR_CHAR(l);
3492 if (c == 0) {
3493 count = 0;
3494 GROW;
3495 if (ctxt->instate == XML_PARSER_EOF)
3496 return(NULL);
3497 end = ctxt->input->cur;
3498 c = CUR_CHAR(l);
3501 if ((len > XML_MAX_NAME_LENGTH) &&
3502 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504 return(NULL);
3506 return(xmlDictLookup(ctxt->dict, end - len, len));
3510 * xmlParseNCName:
3511 * @ctxt: an XML parser context
3512 * @len: length of the string parsed
3514 * parse an XML name.
3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517 * CombiningChar | Extender
3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3521 * Returns the Name parsed or NULL
3524 static const xmlChar *
3525 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3526 const xmlChar *in;
3527 const xmlChar *ret;
3528 int count = 0;
3530 #ifdef DEBUG
3531 nbParseNCName++;
3532 #endif
3535 * Accelerator for simple ASCII names
3537 in = ctxt->input->cur;
3538 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3539 ((*in >= 0x41) && (*in <= 0x5A)) ||
3540 (*in == '_')) {
3541 in++;
3542 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 ((*in >= 0x30) && (*in <= 0x39)) ||
3545 (*in == '_') || (*in == '-') ||
3546 (*in == '.'))
3547 in++;
3548 if ((*in > 0) && (*in < 0x80)) {
3549 count = in - ctxt->input->cur;
3550 if ((count > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3555 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3556 ctxt->input->cur = in;
3557 ctxt->nbChars += count;
3558 ctxt->input->col += count;
3559 if (ret == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3562 return(ret);
3565 return(xmlParseNCNameComplex(ctxt));
3569 * xmlParseNameAndCompare:
3570 * @ctxt: an XML parser context
3572 * parse an XML name and compares for match
3573 * (specialized for endtag parsing)
3575 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3576 * and the name for mismatch
3579 static const xmlChar *
3580 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3581 register const xmlChar *cmp = other;
3582 register const xmlChar *in;
3583 const xmlChar *ret;
3585 GROW;
3586 if (ctxt->instate == XML_PARSER_EOF)
3587 return(NULL);
3589 in = ctxt->input->cur;
3590 while (*in != 0 && *in == *cmp) {
3591 ++in;
3592 ++cmp;
3593 ctxt->input->col++;
3595 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3596 /* success */
3597 ctxt->input->cur = in;
3598 return (const xmlChar*) 1;
3600 /* failure (or end of input buffer), check with full function */
3601 ret = xmlParseName (ctxt);
3602 /* strings coming from the dictionnary direct compare possible */
3603 if (ret == other) {
3604 return (const xmlChar*) 1;
3606 return ret;
3610 * xmlParseStringName:
3611 * @ctxt: an XML parser context
3612 * @str: a pointer to the string pointer (IN/OUT)
3614 * parse an XML name.
3616 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3617 * CombiningChar | Extender
3619 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3621 * [6] Names ::= Name (#x20 Name)*
3623 * Returns the Name parsed or NULL. The @str pointer
3624 * is updated to the current location in the string.
3627 static xmlChar *
3628 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3629 xmlChar buf[XML_MAX_NAMELEN + 5];
3630 const xmlChar *cur = *str;
3631 int len = 0, l;
3632 int c;
3634 #ifdef DEBUG
3635 nbParseStringName++;
3636 #endif
3638 c = CUR_SCHAR(cur, l);
3639 if (!xmlIsNameStartChar(ctxt, c)) {
3640 return(NULL);
3643 COPY_BUF(l,buf,len,c);
3644 cur += l;
3645 c = CUR_SCHAR(cur, l);
3646 while (xmlIsNameChar(ctxt, c)) {
3647 COPY_BUF(l,buf,len,c);
3648 cur += l;
3649 c = CUR_SCHAR(cur, l);
3650 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3652 * Okay someone managed to make a huge name, so he's ready to pay
3653 * for the processing speed.
3655 xmlChar *buffer;
3656 int max = len * 2;
3658 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3659 if (buffer == NULL) {
3660 xmlErrMemory(ctxt, NULL);
3661 return(NULL);
3663 memcpy(buffer, buf, len);
3664 while (xmlIsNameChar(ctxt, c)) {
3665 if (len + 10 > max) {
3666 xmlChar *tmp;
3668 if ((len > XML_MAX_NAME_LENGTH) &&
3669 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3670 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3671 xmlFree(buffer);
3672 return(NULL);
3674 max *= 2;
3675 tmp = (xmlChar *) xmlRealloc(buffer,
3676 max * sizeof(xmlChar));
3677 if (tmp == NULL) {
3678 xmlErrMemory(ctxt, NULL);
3679 xmlFree(buffer);
3680 return(NULL);
3682 buffer = tmp;
3684 COPY_BUF(l,buffer,len,c);
3685 cur += l;
3686 c = CUR_SCHAR(cur, l);
3688 buffer[len] = 0;
3689 *str = cur;
3690 return(buffer);
3693 if ((len > XML_MAX_NAME_LENGTH) &&
3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3696 return(NULL);
3698 *str = cur;
3699 return(xmlStrndup(buf, len));
3703 * xmlParseNmtoken:
3704 * @ctxt: an XML parser context
3706 * parse an XML Nmtoken.
3708 * [7] Nmtoken ::= (NameChar)+
3710 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3712 * Returns the Nmtoken parsed or NULL
3715 xmlChar *
3716 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3717 xmlChar buf[XML_MAX_NAMELEN + 5];
3718 int len = 0, l;
3719 int c;
3720 int count = 0;
3722 #ifdef DEBUG
3723 nbParseNmToken++;
3724 #endif
3726 GROW;
3727 if (ctxt->instate == XML_PARSER_EOF)
3728 return(NULL);
3729 c = CUR_CHAR(l);
3731 while (xmlIsNameChar(ctxt, c)) {
3732 if (count++ > XML_PARSER_CHUNK_SIZE) {
3733 count = 0;
3734 GROW;
3736 COPY_BUF(l,buf,len,c);
3737 NEXTL(l);
3738 c = CUR_CHAR(l);
3739 if (c == 0) {
3740 count = 0;
3741 GROW;
3742 if (ctxt->instate == XML_PARSER_EOF)
3743 return(NULL);
3744 c = CUR_CHAR(l);
3746 if (len >= XML_MAX_NAMELEN) {
3748 * Okay someone managed to make a huge token, so he's ready to pay
3749 * for the processing speed.
3751 xmlChar *buffer;
3752 int max = len * 2;
3754 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3755 if (buffer == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return(NULL);
3759 memcpy(buffer, buf, len);
3760 while (xmlIsNameChar(ctxt, c)) {
3761 if (count++ > XML_PARSER_CHUNK_SIZE) {
3762 count = 0;
3763 GROW;
3764 if (ctxt->instate == XML_PARSER_EOF) {
3765 xmlFree(buffer);
3766 return(NULL);
3769 if (len + 10 > max) {
3770 xmlChar *tmp;
3772 if ((max > XML_MAX_NAME_LENGTH) &&
3773 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3774 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3775 xmlFree(buffer);
3776 return(NULL);
3778 max *= 2;
3779 tmp = (xmlChar *) xmlRealloc(buffer,
3780 max * sizeof(xmlChar));
3781 if (tmp == NULL) {
3782 xmlErrMemory(ctxt, NULL);
3783 xmlFree(buffer);
3784 return(NULL);
3786 buffer = tmp;
3788 COPY_BUF(l,buffer,len,c);
3789 NEXTL(l);
3790 c = CUR_CHAR(l);
3792 buffer[len] = 0;
3793 return(buffer);
3796 if (len == 0)
3797 return(NULL);
3798 if ((len > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801 return(NULL);
3803 return(xmlStrndup(buf, len));
3807 * xmlParseEntityValue:
3808 * @ctxt: an XML parser context
3809 * @orig: if non-NULL store a copy of the original entity value
3811 * parse a value for ENTITY declarations
3813 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3814 * "'" ([^%&'] | PEReference | Reference)* "'"
3816 * Returns the EntityValue parsed with reference substituted or NULL
3819 xmlChar *
3820 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3821 xmlChar *buf = NULL;
3822 int len = 0;
3823 int size = XML_PARSER_BUFFER_SIZE;
3824 int c, l;
3825 xmlChar stop;
3826 xmlChar *ret = NULL;
3827 const xmlChar *cur = NULL;
3828 xmlParserInputPtr input;
3830 if (RAW == '"') stop = '"';
3831 else if (RAW == '\'') stop = '\'';
3832 else {
3833 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3834 return(NULL);
3836 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3837 if (buf == NULL) {
3838 xmlErrMemory(ctxt, NULL);
3839 return(NULL);
3843 * The content of the entity definition is copied in a buffer.
3846 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3847 input = ctxt->input;
3848 GROW;
3849 if (ctxt->instate == XML_PARSER_EOF) {
3850 xmlFree(buf);
3851 return(NULL);
3853 NEXT;
3854 c = CUR_CHAR(l);
3856 * NOTE: 4.4.5 Included in Literal
3857 * When a parameter entity reference appears in a literal entity
3858 * value, ... a single or double quote character in the replacement
3859 * text is always treated as a normal data character and will not
3860 * terminate the literal.
3861 * In practice it means we stop the loop only when back at parsing
3862 * the initial entity and the quote is found
3864 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3865 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3866 if (len + 5 >= size) {
3867 xmlChar *tmp;
3869 size *= 2;
3870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3871 if (tmp == NULL) {
3872 xmlErrMemory(ctxt, NULL);
3873 xmlFree(buf);
3874 return(NULL);
3876 buf = tmp;
3878 COPY_BUF(l,buf,len,c);
3879 NEXTL(l);
3881 * Pop-up of finished entities.
3883 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3884 xmlPopInput(ctxt);
3886 GROW;
3887 c = CUR_CHAR(l);
3888 if (c == 0) {
3889 GROW;
3890 c = CUR_CHAR(l);
3893 buf[len] = 0;
3894 if (ctxt->instate == XML_PARSER_EOF) {
3895 xmlFree(buf);
3896 return(NULL);
3900 * Raise problem w.r.t. '&' and '%' being used in non-entities
3901 * reference constructs. Note Charref will be handled in
3902 * xmlStringDecodeEntities()
3904 cur = buf;
3905 while (*cur != 0) { /* non input consuming */
3906 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3907 xmlChar *name;
3908 xmlChar tmp = *cur;
3910 cur++;
3911 name = xmlParseStringName(ctxt, &cur);
3912 if ((name == NULL) || (*cur != ';')) {
3913 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3914 "EntityValue: '%c' forbidden except for entities references\n",
3915 tmp);
3917 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3918 (ctxt->inputNr == 1)) {
3919 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3921 if (name != NULL)
3922 xmlFree(name);
3923 if (*cur == 0)
3924 break;
3926 cur++;
3930 * Then PEReference entities are substituted.
3932 if (c != stop) {
3933 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3934 xmlFree(buf);
3935 } else {
3936 NEXT;
3938 * NOTE: 4.4.7 Bypassed
3939 * When a general entity reference appears in the EntityValue in
3940 * an entity declaration, it is bypassed and left as is.
3941 * so XML_SUBSTITUTE_REF is not set here.
3943 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3944 0, 0, 0);
3945 if (orig != NULL)
3946 *orig = buf;
3947 else
3948 xmlFree(buf);
3951 return(ret);
3955 * xmlParseAttValueComplex:
3956 * @ctxt: an XML parser context
3957 * @len: the resulting attribute len
3958 * @normalize: wether to apply the inner normalization
3960 * parse a value for an attribute, this is the fallback function
3961 * of xmlParseAttValue() when the attribute parsing requires handling
3962 * of non-ASCII characters, or normalization compaction.
3964 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3966 static xmlChar *
3967 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3968 xmlChar limit = 0;
3969 xmlChar *buf = NULL;
3970 xmlChar *rep = NULL;
3971 size_t len = 0;
3972 size_t buf_size = 0;
3973 int c, l, in_space = 0;
3974 xmlChar *current = NULL;
3975 xmlEntityPtr ent;
3977 if (NXT(0) == '"') {
3978 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3979 limit = '"';
3980 NEXT;
3981 } else if (NXT(0) == '\'') {
3982 limit = '\'';
3983 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3984 NEXT;
3985 } else {
3986 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3987 return(NULL);
3991 * allocate a translation buffer.
3993 buf_size = XML_PARSER_BUFFER_SIZE;
3994 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3995 if (buf == NULL) goto mem_error;
3998 * OK loop until we reach one of the ending char or a size limit.
4000 c = CUR_CHAR(l);
4001 while (((NXT(0) != limit) && /* checked */
4002 (IS_CHAR(c)) && (c != '<')) &&
4003 (ctxt->instate != XML_PARSER_EOF)) {
4005 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4006 * special option is given
4008 if ((len > XML_MAX_TEXT_LENGTH) &&
4009 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4011 "AttValue length too long\n");
4012 goto mem_error;
4014 if (c == 0) break;
4015 if (c == '&') {
4016 in_space = 0;
4017 if (NXT(1) == '#') {
4018 int val = xmlParseCharRef(ctxt);
4020 if (val == '&') {
4021 if (ctxt->replaceEntities) {
4022 if (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4025 buf[len++] = '&';
4026 } else {
4028 * The reparsing will be done in xmlStringGetNodeList()
4029 * called by the attribute() function in SAX.c
4031 if (len + 10 > buf_size) {
4032 growBuffer(buf, 10);
4034 buf[len++] = '&';
4035 buf[len++] = '#';
4036 buf[len++] = '3';
4037 buf[len++] = '8';
4038 buf[len++] = ';';
4040 } else if (val != 0) {
4041 if (len + 10 > buf_size) {
4042 growBuffer(buf, 10);
4044 len += xmlCopyChar(0, &buf[len], val);
4046 } else {
4047 ent = xmlParseEntityRef(ctxt);
4048 ctxt->nbentities++;
4049 if (ent != NULL)
4050 ctxt->nbentities += ent->owner;
4051 if ((ent != NULL) &&
4052 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4053 if (len + 10 > buf_size) {
4054 growBuffer(buf, 10);
4056 if ((ctxt->replaceEntities == 0) &&
4057 (ent->content[0] == '&')) {
4058 buf[len++] = '&';
4059 buf[len++] = '#';
4060 buf[len++] = '3';
4061 buf[len++] = '8';
4062 buf[len++] = ';';
4063 } else {
4064 buf[len++] = ent->content[0];
4066 } else if ((ent != NULL) &&
4067 (ctxt->replaceEntities != 0)) {
4068 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4069 rep = xmlStringDecodeEntities(ctxt, ent->content,
4070 XML_SUBSTITUTE_REF,
4071 0, 0, 0);
4072 if (rep != NULL) {
4073 current = rep;
4074 while (*current != 0) { /* non input consuming */
4075 if ((*current == 0xD) || (*current == 0xA) ||
4076 (*current == 0x9)) {
4077 buf[len++] = 0x20;
4078 current++;
4079 } else
4080 buf[len++] = *current++;
4081 if (len + 10 > buf_size) {
4082 growBuffer(buf, 10);
4085 xmlFree(rep);
4086 rep = NULL;
4088 } else {
4089 if (len + 10 > buf_size) {
4090 growBuffer(buf, 10);
4092 if (ent->content != NULL)
4093 buf[len++] = ent->content[0];
4095 } else if (ent != NULL) {
4096 int i = xmlStrlen(ent->name);
4097 const xmlChar *cur = ent->name;
4100 * This may look absurd but is needed to detect
4101 * entities problems
4103 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4104 (ent->content != NULL) && (ent->checked == 0)) {
4105 unsigned long oldnbent = ctxt->nbentities;
4107 rep = xmlStringDecodeEntities(ctxt, ent->content,
4108 XML_SUBSTITUTE_REF, 0, 0, 0);
4110 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4111 if (rep != NULL) {
4112 if (xmlStrchr(rep, '<'))
4113 ent->checked |= 1;
4114 xmlFree(rep);
4115 rep = NULL;
4120 * Just output the reference
4122 buf[len++] = '&';
4123 while (len + i + 10 > buf_size) {
4124 growBuffer(buf, i + 10);
4126 for (;i > 0;i--)
4127 buf[len++] = *cur++;
4128 buf[len++] = ';';
4131 } else {
4132 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133 if ((len != 0) || (!normalize)) {
4134 if ((!normalize) || (!in_space)) {
4135 COPY_BUF(l,buf,len,0x20);
4136 while (len + 10 > buf_size) {
4137 growBuffer(buf, 10);
4140 in_space = 1;
4142 } else {
4143 in_space = 0;
4144 COPY_BUF(l,buf,len,c);
4145 if (len + 10 > buf_size) {
4146 growBuffer(buf, 10);
4149 NEXTL(l);
4151 GROW;
4152 c = CUR_CHAR(l);
4154 if (ctxt->instate == XML_PARSER_EOF)
4155 goto error;
4157 if ((in_space) && (normalize)) {
4158 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4160 buf[len] = 0;
4161 if (RAW == '<') {
4162 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4163 } else if (RAW != limit) {
4164 if ((c != 0) && (!IS_CHAR(c))) {
4165 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4166 "invalid character in attribute value\n");
4167 } else {
4168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4169 "AttValue: ' expected\n");
4171 } else
4172 NEXT;
4175 * There we potentially risk an overflow, don't allow attribute value of
4176 * length more than INT_MAX it is a very reasonnable assumption !
4178 if (len >= INT_MAX) {
4179 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4180 "AttValue length too long\n");
4181 goto mem_error;
4184 if (attlen != NULL) *attlen = (int) len;
4185 return(buf);
4187 mem_error:
4188 xmlErrMemory(ctxt, NULL);
4189 error:
4190 if (buf != NULL)
4191 xmlFree(buf);
4192 if (rep != NULL)
4193 xmlFree(rep);
4194 return(NULL);
4198 * xmlParseAttValue:
4199 * @ctxt: an XML parser context
4201 * parse a value for an attribute
4202 * Note: the parser won't do substitution of entities here, this
4203 * will be handled later in xmlStringGetNodeList
4205 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4206 * "'" ([^<&'] | Reference)* "'"
4208 * 3.3.3 Attribute-Value Normalization:
4209 * Before the value of an attribute is passed to the application or
4210 * checked for validity, the XML processor must normalize it as follows:
4211 * - a character reference is processed by appending the referenced
4212 * character to the attribute value
4213 * - an entity reference is processed by recursively processing the
4214 * replacement text of the entity
4215 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4216 * appending #x20 to the normalized value, except that only a single
4217 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4218 * parsed entity or the literal entity value of an internal parsed entity
4219 * - other characters are processed by appending them to the normalized value
4220 * If the declared value is not CDATA, then the XML processor must further
4221 * process the normalized attribute value by discarding any leading and
4222 * trailing space (#x20) characters, and by replacing sequences of space
4223 * (#x20) characters by a single space (#x20) character.
4224 * All attributes for which no declaration has been read should be treated
4225 * by a non-validating parser as if declared CDATA.
4227 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4231 xmlChar *
4232 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4233 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4234 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4238 * xmlParseSystemLiteral:
4239 * @ctxt: an XML parser context
4241 * parse an XML Literal
4243 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4245 * Returns the SystemLiteral parsed or NULL
4248 xmlChar *
4249 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4250 xmlChar *buf = NULL;
4251 int len = 0;
4252 int size = XML_PARSER_BUFFER_SIZE;
4253 int cur, l;
4254 xmlChar stop;
4255 int state = ctxt->instate;
4256 int count = 0;
4258 SHRINK;
4259 if (RAW == '"') {
4260 NEXT;
4261 stop = '"';
4262 } else if (RAW == '\'') {
4263 NEXT;
4264 stop = '\'';
4265 } else {
4266 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4267 return(NULL);
4270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4271 if (buf == NULL) {
4272 xmlErrMemory(ctxt, NULL);
4273 return(NULL);
4275 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4276 cur = CUR_CHAR(l);
4277 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4278 if (len + 5 >= size) {
4279 xmlChar *tmp;
4281 if ((size > XML_MAX_NAME_LENGTH) &&
4282 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4283 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4284 xmlFree(buf);
4285 ctxt->instate = (xmlParserInputState) state;
4286 return(NULL);
4288 size *= 2;
4289 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4290 if (tmp == NULL) {
4291 xmlFree(buf);
4292 xmlErrMemory(ctxt, NULL);
4293 ctxt->instate = (xmlParserInputState) state;
4294 return(NULL);
4296 buf = tmp;
4298 count++;
4299 if (count > 50) {
4300 GROW;
4301 count = 0;
4302 if (ctxt->instate == XML_PARSER_EOF) {
4303 xmlFree(buf);
4304 return(NULL);
4307 COPY_BUF(l,buf,len,cur);
4308 NEXTL(l);
4309 cur = CUR_CHAR(l);
4310 if (cur == 0) {
4311 GROW;
4312 SHRINK;
4313 cur = CUR_CHAR(l);
4316 buf[len] = 0;
4317 ctxt->instate = (xmlParserInputState) state;
4318 if (!IS_CHAR(cur)) {
4319 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320 } else {
4321 NEXT;
4323 return(buf);
4327 * xmlParsePubidLiteral:
4328 * @ctxt: an XML parser context
4330 * parse an XML public literal
4332 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4334 * Returns the PubidLiteral parsed or NULL.
4337 xmlChar *
4338 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4339 xmlChar *buf = NULL;
4340 int len = 0;
4341 int size = XML_PARSER_BUFFER_SIZE;
4342 xmlChar cur;
4343 xmlChar stop;
4344 int count = 0;
4345 xmlParserInputState oldstate = ctxt->instate;
4347 SHRINK;
4348 if (RAW == '"') {
4349 NEXT;
4350 stop = '"';
4351 } else if (RAW == '\'') {
4352 NEXT;
4353 stop = '\'';
4354 } else {
4355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4356 return(NULL);
4358 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4359 if (buf == NULL) {
4360 xmlErrMemory(ctxt, NULL);
4361 return(NULL);
4363 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4364 cur = CUR;
4365 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4366 if (len + 1 >= size) {
4367 xmlChar *tmp;
4369 if ((size > XML_MAX_NAME_LENGTH) &&
4370 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4371 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4372 xmlFree(buf);
4373 return(NULL);
4375 size *= 2;
4376 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4377 if (tmp == NULL) {
4378 xmlErrMemory(ctxt, NULL);
4379 xmlFree(buf);
4380 return(NULL);
4382 buf = tmp;
4384 buf[len++] = cur;
4385 count++;
4386 if (count > 50) {
4387 GROW;
4388 count = 0;
4389 if (ctxt->instate == XML_PARSER_EOF) {
4390 xmlFree(buf);
4391 return(NULL);
4394 NEXT;
4395 cur = CUR;
4396 if (cur == 0) {
4397 GROW;
4398 SHRINK;
4399 cur = CUR;
4402 buf[len] = 0;
4403 if (cur != stop) {
4404 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4405 } else {
4406 NEXT;
4408 ctxt->instate = oldstate;
4409 return(buf);
4412 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4415 * used for the test in the inner loop of the char data testing
4417 static const unsigned char test_char_data[256] = {
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4423 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4424 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4425 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4426 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4427 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4428 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4429 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4430 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4431 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4432 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4433 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4453 * xmlParseCharData:
4454 * @ctxt: an XML parser context
4455 * @cdata: int indicating whether we are within a CDATA section
4457 * parse a CharData section.
4458 * if we are within a CDATA section ']]>' marks an end of section.
4460 * The right angle bracket (>) may be represented using the string "&gt;",
4461 * and must, for compatibility, be escaped using "&gt;" or a character
4462 * reference when it appears in the string "]]>" in content, when that
4463 * string is not marking the end of a CDATA section.
4465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4468 void
4469 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4470 const xmlChar *in;
4471 int nbchar = 0;
4472 int line = ctxt->input->line;
4473 int col = ctxt->input->col;
4474 int ccol;
4476 SHRINK;
4477 GROW;
4479 * Accelerated common case where input don't need to be
4480 * modified before passing it to the handler.
4482 if (!cdata) {
4483 in = ctxt->input->cur;
4484 do {
4485 get_more_space:
4486 while (*in == 0x20) { in++; ctxt->input->col++; }
4487 if (*in == 0xA) {
4488 do {
4489 ctxt->input->line++; ctxt->input->col = 1;
4490 in++;
4491 } while (*in == 0xA);
4492 goto get_more_space;
4494 if (*in == '<') {
4495 nbchar = in - ctxt->input->cur;
4496 if (nbchar > 0) {
4497 const xmlChar *tmp = ctxt->input->cur;
4498 ctxt->input->cur = in;
4500 if ((ctxt->sax != NULL) &&
4501 (ctxt->sax->ignorableWhitespace !=
4502 ctxt->sax->characters)) {
4503 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4504 if (ctxt->sax->ignorableWhitespace != NULL)
4505 ctxt->sax->ignorableWhitespace(ctxt->userData,
4506 tmp, nbchar);
4507 } else {
4508 if (ctxt->sax->characters != NULL)
4509 ctxt->sax->characters(ctxt->userData,
4510 tmp, nbchar);
4511 if (*ctxt->space == -1)
4512 *ctxt->space = -2;
4514 } else if ((ctxt->sax != NULL) &&
4515 (ctxt->sax->characters != NULL)) {
4516 ctxt->sax->characters(ctxt->userData,
4517 tmp, nbchar);
4520 return;
4523 get_more:
4524 ccol = ctxt->input->col;
4525 while (test_char_data[*in]) {
4526 in++;
4527 ccol++;
4529 ctxt->input->col = ccol;
4530 if (*in == 0xA) {
4531 do {
4532 ctxt->input->line++; ctxt->input->col = 1;
4533 in++;
4534 } while (*in == 0xA);
4535 goto get_more;
4537 if (*in == ']') {
4538 if ((in[1] == ']') && (in[2] == '>')) {
4539 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4540 ctxt->input->cur = in;
4541 return;
4543 in++;
4544 ctxt->input->col++;
4545 goto get_more;
4547 nbchar = in - ctxt->input->cur;
4548 if (nbchar > 0) {
4549 if ((ctxt->sax != NULL) &&
4550 (ctxt->sax->ignorableWhitespace !=
4551 ctxt->sax->characters) &&
4552 (IS_BLANK_CH(*ctxt->input->cur))) {
4553 const xmlChar *tmp = ctxt->input->cur;
4554 ctxt->input->cur = in;
4556 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4557 if (ctxt->sax->ignorableWhitespace != NULL)
4558 ctxt->sax->ignorableWhitespace(ctxt->userData,
4559 tmp, nbchar);
4560 } else {
4561 if (ctxt->sax->characters != NULL)
4562 ctxt->sax->characters(ctxt->userData,
4563 tmp, nbchar);
4564 if (*ctxt->space == -1)
4565 *ctxt->space = -2;
4567 line = ctxt->input->line;
4568 col = ctxt->input->col;
4569 } else if (ctxt->sax != NULL) {
4570 if (ctxt->sax->characters != NULL)
4571 ctxt->sax->characters(ctxt->userData,
4572 ctxt->input->cur, nbchar);
4573 line = ctxt->input->line;
4574 col = ctxt->input->col;
4576 /* something really bad happened in the SAX callback */
4577 if (ctxt->instate != XML_PARSER_CONTENT)
4578 return;
4580 ctxt->input->cur = in;
4581 if (*in == 0xD) {
4582 in++;
4583 if (*in == 0xA) {
4584 ctxt->input->cur = in;
4585 in++;
4586 ctxt->input->line++; ctxt->input->col = 1;
4587 continue; /* while */
4589 in--;
4591 if (*in == '<') {
4592 return;
4594 if (*in == '&') {
4595 return;
4597 SHRINK;
4598 GROW;
4599 if (ctxt->instate == XML_PARSER_EOF)
4600 return;
4601 in = ctxt->input->cur;
4602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4603 nbchar = 0;
4605 ctxt->input->line = line;
4606 ctxt->input->col = col;
4607 xmlParseCharDataComplex(ctxt, cdata);
4611 * xmlParseCharDataComplex:
4612 * @ctxt: an XML parser context
4613 * @cdata: int indicating whether we are within a CDATA section
4615 * parse a CharData section.this is the fallback function
4616 * of xmlParseCharData() when the parsing requires handling
4617 * of non-ASCII characters.
4619 static void
4620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4622 int nbchar = 0;
4623 int cur, l;
4624 int count = 0;
4626 SHRINK;
4627 GROW;
4628 cur = CUR_CHAR(l);
4629 while ((cur != '<') && /* checked */
4630 (cur != '&') &&
4631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4632 if ((cur == ']') && (NXT(1) == ']') &&
4633 (NXT(2) == '>')) {
4634 if (cdata) break;
4635 else {
4636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4639 COPY_BUF(l,buf,nbchar,cur);
4640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4641 buf[nbchar] = 0;
4644 * OK the segment is to be consumed as chars.
4646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4647 if (areBlanks(ctxt, buf, nbchar, 0)) {
4648 if (ctxt->sax->ignorableWhitespace != NULL)
4649 ctxt->sax->ignorableWhitespace(ctxt->userData,
4650 buf, nbchar);
4651 } else {
4652 if (ctxt->sax->characters != NULL)
4653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4654 if ((ctxt->sax->characters !=
4655 ctxt->sax->ignorableWhitespace) &&
4656 (*ctxt->space == -1))
4657 *ctxt->space = -2;
4660 nbchar = 0;
4661 /* something really bad happened in the SAX callback */
4662 if (ctxt->instate != XML_PARSER_CONTENT)
4663 return;
4665 count++;
4666 if (count > 50) {
4667 GROW;
4668 count = 0;
4669 if (ctxt->instate == XML_PARSER_EOF)
4670 return;
4672 NEXTL(l);
4673 cur = CUR_CHAR(l);
4675 if (nbchar != 0) {
4676 buf[nbchar] = 0;
4678 * OK the segment is to be consumed as chars.
4680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4681 if (areBlanks(ctxt, buf, nbchar, 0)) {
4682 if (ctxt->sax->ignorableWhitespace != NULL)
4683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4684 } else {
4685 if (ctxt->sax->characters != NULL)
4686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4688 (*ctxt->space == -1))
4689 *ctxt->space = -2;
4693 if ((cur != 0) && (!IS_CHAR(cur))) {
4694 /* Generate the error and skip the offending character */
4695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4696 "PCDATA invalid Char value %d\n",
4697 cur);
4698 NEXTL(l);
4703 * xmlParseExternalID:
4704 * @ctxt: an XML parser context
4705 * @publicID: a xmlChar** receiving PubidLiteral
4706 * @strict: indicate whether we should restrict parsing to only
4707 * production [75], see NOTE below
4709 * Parse an External ID or a Public ID
4711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4712 * 'PUBLIC' S PubidLiteral S SystemLiteral
4714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4719 * Returns the function returns SystemLiteral and in the second
4720 * case publicID receives PubidLiteral, is strict is off
4721 * it is possible to return NULL and have publicID set.
4724 xmlChar *
4725 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4726 xmlChar *URI = NULL;
4728 SHRINK;
4730 *publicID = NULL;
4731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4732 SKIP(6);
4733 if (!IS_BLANK_CH(CUR)) {
4734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735 "Space required after 'SYSTEM'\n");
4737 SKIP_BLANKS;
4738 URI = xmlParseSystemLiteral(ctxt);
4739 if (URI == NULL) {
4740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4743 SKIP(6);
4744 if (!IS_BLANK_CH(CUR)) {
4745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4746 "Space required after 'PUBLIC'\n");
4748 SKIP_BLANKS;
4749 *publicID = xmlParsePubidLiteral(ctxt);
4750 if (*publicID == NULL) {
4751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4753 if (strict) {
4755 * We don't handle [83] so "S SystemLiteral" is required.
4757 if (!IS_BLANK_CH(CUR)) {
4758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4759 "Space required after the Public Identifier\n");
4761 } else {
4763 * We handle [83] so we return immediately, if
4764 * "S SystemLiteral" is not detected. From a purely parsing
4765 * point of view that's a nice mess.
4767 const xmlChar *ptr;
4768 GROW;
4770 ptr = CUR_PTR;
4771 if (!IS_BLANK_CH(*ptr)) return(NULL);
4773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4776 SKIP_BLANKS;
4777 URI = xmlParseSystemLiteral(ctxt);
4778 if (URI == NULL) {
4779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4782 return(URI);
4786 * xmlParseCommentComplex:
4787 * @ctxt: an XML parser context
4788 * @buf: the already parsed part of the buffer
4789 * @len: number of bytes filles in the buffer
4790 * @size: allocated size of the buffer
4792 * Skip an XML (SGML) comment <!-- .... -->
4793 * The spec says that "For compatibility, the string "--" (double-hyphen)
4794 * must not occur within comments. "
4795 * This is the slow routine in case the accelerator for ascii didn't work
4797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4799 static void
4800 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801 size_t len, size_t size) {
4802 int q, ql;
4803 int r, rl;
4804 int cur, l;
4805 size_t count = 0;
4806 int inputid;
4808 inputid = ctxt->input->id;
4810 if (buf == NULL) {
4811 len = 0;
4812 size = XML_PARSER_BUFFER_SIZE;
4813 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4814 if (buf == NULL) {
4815 xmlErrMemory(ctxt, NULL);
4816 return;
4819 GROW; /* Assure there's enough input data */
4820 q = CUR_CHAR(ql);
4821 if (q == 0)
4822 goto not_terminated;
4823 if (!IS_CHAR(q)) {
4824 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4825 "xmlParseComment: invalid xmlChar value %d\n",
4827 xmlFree (buf);
4828 return;
4830 NEXTL(ql);
4831 r = CUR_CHAR(rl);
4832 if (r == 0)
4833 goto not_terminated;
4834 if (!IS_CHAR(r)) {
4835 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4836 "xmlParseComment: invalid xmlChar value %d\n",
4838 xmlFree (buf);
4839 return;
4841 NEXTL(rl);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0)
4844 goto not_terminated;
4845 while (IS_CHAR(cur) && /* checked */
4846 ((cur != '>') ||
4847 (r != '-') || (q != '-'))) {
4848 if ((r == '-') && (q == '-')) {
4849 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4851 if ((len > XML_MAX_TEXT_LENGTH) &&
4852 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment too big found", NULL);
4855 xmlFree (buf);
4856 return;
4858 if (len + 5 >= size) {
4859 xmlChar *new_buf;
4860 size_t new_size;
4862 new_size = size * 2;
4863 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4864 if (new_buf == NULL) {
4865 xmlFree (buf);
4866 xmlErrMemory(ctxt, NULL);
4867 return;
4869 buf = new_buf;
4870 size = new_size;
4872 COPY_BUF(ql,buf,len,q);
4873 q = r;
4874 ql = rl;
4875 r = cur;
4876 rl = l;
4878 count++;
4879 if (count > 50) {
4880 GROW;
4881 count = 0;
4882 if (ctxt->instate == XML_PARSER_EOF) {
4883 xmlFree(buf);
4884 return;
4887 NEXTL(l);
4888 cur = CUR_CHAR(l);
4889 if (cur == 0) {
4890 SHRINK;
4891 GROW;
4892 cur = CUR_CHAR(l);
4895 buf[len] = 0;
4896 if (cur == 0) {
4897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4898 "Comment not terminated \n<!--%.50s\n", buf);
4899 } else if (!IS_CHAR(cur)) {
4900 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4901 "xmlParseComment: invalid xmlChar value %d\n",
4902 cur);
4903 } else {
4904 if (inputid != ctxt->input->id) {
4905 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906 "Comment doesn't start and stop in the same entity\n");
4908 NEXT;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4910 (!ctxt->disableSAX))
4911 ctxt->sax->comment(ctxt->userData, buf);
4913 xmlFree(buf);
4914 return;
4915 not_terminated:
4916 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4917 "Comment not terminated\n", NULL);
4918 xmlFree(buf);
4919 return;
4923 * xmlParseComment:
4924 * @ctxt: an XML parser context
4926 * Skip an XML (SGML) comment <!-- .... -->
4927 * The spec says that "For compatibility, the string "--" (double-hyphen)
4928 * must not occur within comments. "
4930 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4932 void
4933 xmlParseComment(xmlParserCtxtPtr ctxt) {
4934 xmlChar *buf = NULL;
4935 size_t size = XML_PARSER_BUFFER_SIZE;
4936 size_t len = 0;
4937 xmlParserInputState state;
4938 const xmlChar *in;
4939 size_t nbchar = 0;
4940 int ccol;
4941 int inputid;
4944 * Check that there is a comment right here.
4946 if ((RAW != '<') || (NXT(1) != '!') ||
4947 (NXT(2) != '-') || (NXT(3) != '-')) return;
4948 state = ctxt->instate;
4949 ctxt->instate = XML_PARSER_COMMENT;
4950 inputid = ctxt->input->id;
4951 SKIP(4);
4952 SHRINK;
4953 GROW;
4956 * Accelerated common case where input don't need to be
4957 * modified before passing it to the handler.
4959 in = ctxt->input->cur;
4960 do {
4961 if (*in == 0xA) {
4962 do {
4963 ctxt->input->line++; ctxt->input->col = 1;
4964 in++;
4965 } while (*in == 0xA);
4967 get_more:
4968 ccol = ctxt->input->col;
4969 while (((*in > '-') && (*in <= 0x7F)) ||
4970 ((*in >= 0x20) && (*in < '-')) ||
4971 (*in == 0x09)) {
4972 in++;
4973 ccol++;
4975 ctxt->input->col = ccol;
4976 if (*in == 0xA) {
4977 do {
4978 ctxt->input->line++; ctxt->input->col = 1;
4979 in++;
4980 } while (*in == 0xA);
4981 goto get_more;
4983 nbchar = in - ctxt->input->cur;
4985 * save current set of data
4987 if (nbchar > 0) {
4988 if ((ctxt->sax != NULL) &&
4989 (ctxt->sax->comment != NULL)) {
4990 if (buf == NULL) {
4991 if ((*in == '-') && (in[1] == '-'))
4992 size = nbchar + 1;
4993 else
4994 size = XML_PARSER_BUFFER_SIZE + nbchar;
4995 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4996 if (buf == NULL) {
4997 xmlErrMemory(ctxt, NULL);
4998 ctxt->instate = state;
4999 return;
5001 len = 0;
5002 } else if (len + nbchar + 1 >= size) {
5003 xmlChar *new_buf;
5004 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5005 new_buf = (xmlChar *) xmlRealloc(buf,
5006 size * sizeof(xmlChar));
5007 if (new_buf == NULL) {
5008 xmlFree (buf);
5009 xmlErrMemory(ctxt, NULL);
5010 ctxt->instate = state;
5011 return;
5013 buf = new_buf;
5015 memcpy(&buf[len], ctxt->input->cur, nbchar);
5016 len += nbchar;
5017 buf[len] = 0;
5020 if ((len > XML_MAX_TEXT_LENGTH) &&
5021 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5022 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5023 "Comment too big found", NULL);
5024 xmlFree (buf);
5025 return;
5027 ctxt->input->cur = in;
5028 if (*in == 0xA) {
5029 in++;
5030 ctxt->input->line++; ctxt->input->col = 1;
5032 if (*in == 0xD) {
5033 in++;
5034 if (*in == 0xA) {
5035 ctxt->input->cur = in;
5036 in++;
5037 ctxt->input->line++; ctxt->input->col = 1;
5038 continue; /* while */
5040 in--;
5042 SHRINK;
5043 GROW;
5044 if (ctxt->instate == XML_PARSER_EOF) {
5045 xmlFree(buf);
5046 return;
5048 in = ctxt->input->cur;
5049 if (*in == '-') {
5050 if (in[1] == '-') {
5051 if (in[2] == '>') {
5052 if (ctxt->input->id != inputid) {
5053 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5054 "comment doesn't start and stop in the same entity\n");
5056 SKIP(3);
5057 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5058 (!ctxt->disableSAX)) {
5059 if (buf != NULL)
5060 ctxt->sax->comment(ctxt->userData, buf);
5061 else
5062 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5064 if (buf != NULL)
5065 xmlFree(buf);
5066 if (ctxt->instate != XML_PARSER_EOF)
5067 ctxt->instate = state;
5068 return;
5070 if (buf != NULL) {
5071 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5072 "Double hyphen within comment: "
5073 "<!--%.50s\n",
5074 buf);
5075 } else
5076 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5077 "Double hyphen within comment\n", NULL);
5078 in++;
5079 ctxt->input->col++;
5081 in++;
5082 ctxt->input->col++;
5083 goto get_more;
5085 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5086 xmlParseCommentComplex(ctxt, buf, len, size);
5087 ctxt->instate = state;
5088 return;
5093 * xmlParsePITarget:
5094 * @ctxt: an XML parser context
5096 * parse the name of a PI
5098 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5100 * Returns the PITarget name or NULL
5103 const xmlChar *
5104 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5105 const xmlChar *name;
5107 name = xmlParseName(ctxt);
5108 if ((name != NULL) &&
5109 ((name[0] == 'x') || (name[0] == 'X')) &&
5110 ((name[1] == 'm') || (name[1] == 'M')) &&
5111 ((name[2] == 'l') || (name[2] == 'L'))) {
5112 int i;
5113 if ((name[0] == 'x') && (name[1] == 'm') &&
5114 (name[2] == 'l') && (name[3] == 0)) {
5115 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5116 "XML declaration allowed only at the start of the document\n");
5117 return(name);
5118 } else if (name[3] == 0) {
5119 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5120 return(name);
5122 for (i = 0;;i++) {
5123 if (xmlW3CPIs[i] == NULL) break;
5124 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5125 return(name);
5127 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5128 "xmlParsePITarget: invalid name prefix 'xml'\n",
5129 NULL, NULL);
5131 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5132 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5133 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5135 return(name);
5138 #ifdef LIBXML_CATALOG_ENABLED
5140 * xmlParseCatalogPI:
5141 * @ctxt: an XML parser context
5142 * @catalog: the PI value string
5144 * parse an XML Catalog Processing Instruction.
5146 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5148 * Occurs only if allowed by the user and if happening in the Misc
5149 * part of the document before any doctype informations
5150 * This will add the given catalog to the parsing context in order
5151 * to be used if there is a resolution need further down in the document
5154 static void
5155 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5156 xmlChar *URL = NULL;
5157 const xmlChar *tmp, *base;
5158 xmlChar marker;
5160 tmp = catalog;
5161 while (IS_BLANK_CH(*tmp)) tmp++;
5162 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5163 goto error;
5164 tmp += 7;
5165 while (IS_BLANK_CH(*tmp)) tmp++;
5166 if (*tmp != '=') {
5167 return;
5169 tmp++;
5170 while (IS_BLANK_CH(*tmp)) tmp++;
5171 marker = *tmp;
5172 if ((marker != '\'') && (marker != '"'))
5173 goto error;
5174 tmp++;
5175 base = tmp;
5176 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5177 if (*tmp == 0)
5178 goto error;
5179 URL = xmlStrndup(base, tmp - base);
5180 tmp++;
5181 while (IS_BLANK_CH(*tmp)) tmp++;
5182 if (*tmp != 0)
5183 goto error;
5185 if (URL != NULL) {
5186 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5187 xmlFree(URL);
5189 return;
5191 error:
5192 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5193 "Catalog PI syntax error: %s\n",
5194 catalog, NULL);
5195 if (URL != NULL)
5196 xmlFree(URL);
5198 #endif
5201 * xmlParsePI:
5202 * @ctxt: an XML parser context
5204 * parse an XML Processing Instruction.
5206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5208 * The processing is transfered to SAX once parsed.
5211 void
5212 xmlParsePI(xmlParserCtxtPtr ctxt) {
5213 xmlChar *buf = NULL;
5214 size_t len = 0;
5215 size_t size = XML_PARSER_BUFFER_SIZE;
5216 int cur, l;
5217 const xmlChar *target;
5218 xmlParserInputState state;
5219 int count = 0;
5221 if ((RAW == '<') && (NXT(1) == '?')) {
5222 xmlParserInputPtr input = ctxt->input;
5223 state = ctxt->instate;
5224 ctxt->instate = XML_PARSER_PI;
5226 * this is a Processing Instruction.
5228 SKIP(2);
5229 SHRINK;
5232 * Parse the target name and check for special support like
5233 * namespace.
5235 target = xmlParsePITarget(ctxt);
5236 if (target != NULL) {
5237 if ((RAW == '?') && (NXT(1) == '>')) {
5238 if (input != ctxt->input) {
5239 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5240 "PI declaration doesn't start and stop in the same entity\n");
5242 SKIP(2);
5245 * SAX: PI detected.
5247 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5248 (ctxt->sax->processingInstruction != NULL))
5249 ctxt->sax->processingInstruction(ctxt->userData,
5250 target, NULL);
5251 if (ctxt->instate != XML_PARSER_EOF)
5252 ctxt->instate = state;
5253 return;
5255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5256 if (buf == NULL) {
5257 xmlErrMemory(ctxt, NULL);
5258 ctxt->instate = state;
5259 return;
5261 cur = CUR;
5262 if (!IS_BLANK(cur)) {
5263 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5264 "ParsePI: PI %s space expected\n", target);
5266 SKIP_BLANKS;
5267 cur = CUR_CHAR(l);
5268 while (IS_CHAR(cur) && /* checked */
5269 ((cur != '?') || (NXT(1) != '>'))) {
5270 if (len + 5 >= size) {
5271 xmlChar *tmp;
5272 size_t new_size = size * 2;
5273 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5274 if (tmp == NULL) {
5275 xmlErrMemory(ctxt, NULL);
5276 xmlFree(buf);
5277 ctxt->instate = state;
5278 return;
5280 buf = tmp;
5281 size = new_size;
5283 count++;
5284 if (count > 50) {
5285 GROW;
5286 if (ctxt->instate == XML_PARSER_EOF) {
5287 xmlFree(buf);
5288 return;
5290 count = 0;
5291 if ((len > XML_MAX_TEXT_LENGTH) &&
5292 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5293 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5294 "PI %s too big found", target);
5295 xmlFree(buf);
5296 ctxt->instate = state;
5297 return;
5300 COPY_BUF(l,buf,len,cur);
5301 NEXTL(l);
5302 cur = CUR_CHAR(l);
5303 if (cur == 0) {
5304 SHRINK;
5305 GROW;
5306 cur = CUR_CHAR(l);
5309 if ((len > XML_MAX_TEXT_LENGTH) &&
5310 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5311 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5312 "PI %s too big found", target);
5313 xmlFree(buf);
5314 ctxt->instate = state;
5315 return;
5317 buf[len] = 0;
5318 if (cur != '?') {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "ParsePI: PI %s never end ...\n", target);
5321 } else {
5322 if (input != ctxt->input) {
5323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324 "PI declaration doesn't start and stop in the same entity\n");
5326 SKIP(2);
5328 #ifdef LIBXML_CATALOG_ENABLED
5329 if (((state == XML_PARSER_MISC) ||
5330 (state == XML_PARSER_START)) &&
5331 (xmlStrEqual(target, XML_CATALOG_PI))) {
5332 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5333 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5334 (allow == XML_CATA_ALLOW_ALL))
5335 xmlParseCatalogPI(ctxt, buf);
5337 #endif
5341 * SAX: PI detected.
5343 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5344 (ctxt->sax->processingInstruction != NULL))
5345 ctxt->sax->processingInstruction(ctxt->userData,
5346 target, buf);
5348 xmlFree(buf);
5349 } else {
5350 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5352 if (ctxt->instate != XML_PARSER_EOF)
5353 ctxt->instate = state;
5358 * xmlParseNotationDecl:
5359 * @ctxt: an XML parser context
5361 * parse a notation declaration
5363 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5365 * Hence there is actually 3 choices:
5366 * 'PUBLIC' S PubidLiteral
5367 * 'PUBLIC' S PubidLiteral S SystemLiteral
5368 * and 'SYSTEM' S SystemLiteral
5370 * See the NOTE on xmlParseExternalID().
5373 void
5374 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5375 const xmlChar *name;
5376 xmlChar *Pubid;
5377 xmlChar *Systemid;
5379 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5380 xmlParserInputPtr input = ctxt->input;
5381 SHRINK;
5382 SKIP(10);
5383 if (!IS_BLANK_CH(CUR)) {
5384 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385 "Space required after '<!NOTATION'\n");
5386 return;
5388 SKIP_BLANKS;
5390 name = xmlParseName(ctxt);
5391 if (name == NULL) {
5392 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5393 return;
5395 if (!IS_BLANK_CH(CUR)) {
5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 "Space required after the NOTATION name'\n");
5398 return;
5400 if (xmlStrchr(name, ':') != NULL) {
5401 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5402 "colons are forbidden from notation names '%s'\n",
5403 name, NULL, NULL);
5405 SKIP_BLANKS;
5408 * Parse the IDs.
5410 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5411 SKIP_BLANKS;
5413 if (RAW == '>') {
5414 if (input != ctxt->input) {
5415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 "Notation declaration doesn't start and stop in the same entity\n");
5418 NEXT;
5419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5420 (ctxt->sax->notationDecl != NULL))
5421 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5422 } else {
5423 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5425 if (Systemid != NULL) xmlFree(Systemid);
5426 if (Pubid != NULL) xmlFree(Pubid);
5431 * xmlParseEntityDecl:
5432 * @ctxt: an XML parser context
5434 * parse <!ENTITY declarations
5436 * [70] EntityDecl ::= GEDecl | PEDecl
5438 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5440 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5442 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5444 * [74] PEDef ::= EntityValue | ExternalID
5446 * [76] NDataDecl ::= S 'NDATA' S Name
5448 * [ VC: Notation Declared ]
5449 * The Name must match the declared name of a notation.
5452 void
5453 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5454 const xmlChar *name = NULL;
5455 xmlChar *value = NULL;
5456 xmlChar *URI = NULL, *literal = NULL;
5457 const xmlChar *ndata = NULL;
5458 int isParameter = 0;
5459 xmlChar *orig = NULL;
5460 int skipped;
5462 /* GROW; done in the caller */
5463 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5464 xmlParserInputPtr input = ctxt->input;
5465 SHRINK;
5466 SKIP(8);
5467 skipped = SKIP_BLANKS;
5468 if (skipped == 0) {
5469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5470 "Space required after '<!ENTITY'\n");
5473 if (RAW == '%') {
5474 NEXT;
5475 skipped = SKIP_BLANKS;
5476 if (skipped == 0) {
5477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 "Space required after '%'\n");
5480 isParameter = 1;
5483 name = xmlParseName(ctxt);
5484 if (name == NULL) {
5485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5486 "xmlParseEntityDecl: no name\n");
5487 return;
5489 if (xmlStrchr(name, ':') != NULL) {
5490 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5491 "colons are forbidden from entities names '%s'\n",
5492 name, NULL, NULL);
5494 skipped = SKIP_BLANKS;
5495 if (skipped == 0) {
5496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5497 "Space required after the entity name\n");
5500 ctxt->instate = XML_PARSER_ENTITY_DECL;
5502 * handle the various case of definitions...
5504 if (isParameter) {
5505 if ((RAW == '"') || (RAW == '\'')) {
5506 value = xmlParseEntityValue(ctxt, &orig);
5507 if (value) {
5508 if ((ctxt->sax != NULL) &&
5509 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5510 ctxt->sax->entityDecl(ctxt->userData, name,
5511 XML_INTERNAL_PARAMETER_ENTITY,
5512 NULL, NULL, value);
5514 } else {
5515 URI = xmlParseExternalID(ctxt, &literal, 1);
5516 if ((URI == NULL) && (literal == NULL)) {
5517 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5519 if (URI) {
5520 xmlURIPtr uri;
5522 uri = xmlParseURI((const char *) URI);
5523 if (uri == NULL) {
5524 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5525 "Invalid URI: %s\n", URI);
5527 * This really ought to be a well formedness error
5528 * but the XML Core WG decided otherwise c.f. issue
5529 * E26 of the XML erratas.
5531 } else {
5532 if (uri->fragment != NULL) {
5534 * Okay this is foolish to block those but not
5535 * invalid URIs.
5537 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5538 } else {
5539 if ((ctxt->sax != NULL) &&
5540 (!ctxt->disableSAX) &&
5541 (ctxt->sax->entityDecl != NULL))
5542 ctxt->sax->entityDecl(ctxt->userData, name,
5543 XML_EXTERNAL_PARAMETER_ENTITY,
5544 literal, URI, NULL);
5546 xmlFreeURI(uri);
5550 } else {
5551 if ((RAW == '"') || (RAW == '\'')) {
5552 value = xmlParseEntityValue(ctxt, &orig);
5553 if ((ctxt->sax != NULL) &&
5554 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555 ctxt->sax->entityDecl(ctxt->userData, name,
5556 XML_INTERNAL_GENERAL_ENTITY,
5557 NULL, NULL, value);
5559 * For expat compatibility in SAX mode.
5561 if ((ctxt->myDoc == NULL) ||
5562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5563 if (ctxt->myDoc == NULL) {
5564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5565 if (ctxt->myDoc == NULL) {
5566 xmlErrMemory(ctxt, "New Doc failed");
5567 return;
5569 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5571 if (ctxt->myDoc->intSubset == NULL)
5572 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5573 BAD_CAST "fake", NULL, NULL);
5575 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5576 NULL, NULL, value);
5578 } else {
5579 URI = xmlParseExternalID(ctxt, &literal, 1);
5580 if ((URI == NULL) && (literal == NULL)) {
5581 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5583 if (URI) {
5584 xmlURIPtr uri;
5586 uri = xmlParseURI((const char *)URI);
5587 if (uri == NULL) {
5588 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5589 "Invalid URI: %s\n", URI);
5591 * This really ought to be a well formedness error
5592 * but the XML Core WG decided otherwise c.f. issue
5593 * E26 of the XML erratas.
5595 } else {
5596 if (uri->fragment != NULL) {
5598 * Okay this is foolish to block those but not
5599 * invalid URIs.
5601 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5603 xmlFreeURI(uri);
5606 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5608 "Space required before 'NDATA'\n");
5610 SKIP_BLANKS;
5611 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5612 SKIP(5);
5613 if (!IS_BLANK_CH(CUR)) {
5614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5615 "Space required after 'NDATA'\n");
5617 SKIP_BLANKS;
5618 ndata = xmlParseName(ctxt);
5619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5620 (ctxt->sax->unparsedEntityDecl != NULL))
5621 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5622 literal, URI, ndata);
5623 } else {
5624 if ((ctxt->sax != NULL) &&
5625 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5626 ctxt->sax->entityDecl(ctxt->userData, name,
5627 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5628 literal, URI, NULL);
5630 * For expat compatibility in SAX mode.
5631 * assuming the entity repalcement was asked for
5633 if ((ctxt->replaceEntities != 0) &&
5634 ((ctxt->myDoc == NULL) ||
5635 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5636 if (ctxt->myDoc == NULL) {
5637 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5638 if (ctxt->myDoc == NULL) {
5639 xmlErrMemory(ctxt, "New Doc failed");
5640 return;
5642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5645 if (ctxt->myDoc->intSubset == NULL)
5646 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5647 BAD_CAST "fake", NULL, NULL);
5648 xmlSAX2EntityDecl(ctxt, name,
5649 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5650 literal, URI, NULL);
5655 if (ctxt->instate == XML_PARSER_EOF)
5656 return;
5657 SKIP_BLANKS;
5658 if (RAW != '>') {
5659 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5660 "xmlParseEntityDecl: entity %s not terminated\n", name);
5661 } else {
5662 if (input != ctxt->input) {
5663 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5664 "Entity declaration doesn't start and stop in the same entity\n");
5666 NEXT;
5668 if (orig != NULL) {
5670 * Ugly mechanism to save the raw entity value.
5672 xmlEntityPtr cur = NULL;
5674 if (isParameter) {
5675 if ((ctxt->sax != NULL) &&
5676 (ctxt->sax->getParameterEntity != NULL))
5677 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5678 } else {
5679 if ((ctxt->sax != NULL) &&
5680 (ctxt->sax->getEntity != NULL))
5681 cur = ctxt->sax->getEntity(ctxt->userData, name);
5682 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5683 cur = xmlSAX2GetEntity(ctxt, name);
5686 if (cur != NULL) {
5687 if (cur->orig != NULL)
5688 xmlFree(orig);
5689 else
5690 cur->orig = orig;
5691 } else
5692 xmlFree(orig);
5694 if (value != NULL) xmlFree(value);
5695 if (URI != NULL) xmlFree(URI);
5696 if (literal != NULL) xmlFree(literal);
5701 * xmlParseDefaultDecl:
5702 * @ctxt: an XML parser context
5703 * @value: Receive a possible fixed default value for the attribute
5705 * Parse an attribute default declaration
5707 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5709 * [ VC: Required Attribute ]
5710 * if the default declaration is the keyword #REQUIRED, then the
5711 * attribute must be specified for all elements of the type in the
5712 * attribute-list declaration.
5714 * [ VC: Attribute Default Legal ]
5715 * The declared default value must meet the lexical constraints of
5716 * the declared attribute type c.f. xmlValidateAttributeDecl()
5718 * [ VC: Fixed Attribute Default ]
5719 * if an attribute has a default value declared with the #FIXED
5720 * keyword, instances of that attribute must match the default value.
5722 * [ WFC: No < in Attribute Values ]
5723 * handled in xmlParseAttValue()
5725 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5726 * or XML_ATTRIBUTE_FIXED.
5730 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5731 int val;
5732 xmlChar *ret;
5734 *value = NULL;
5735 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5736 SKIP(9);
5737 return(XML_ATTRIBUTE_REQUIRED);
5739 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5740 SKIP(8);
5741 return(XML_ATTRIBUTE_IMPLIED);
5743 val = XML_ATTRIBUTE_NONE;
5744 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5745 SKIP(6);
5746 val = XML_ATTRIBUTE_FIXED;
5747 if (!IS_BLANK_CH(CUR)) {
5748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5749 "Space required after '#FIXED'\n");
5751 SKIP_BLANKS;
5753 ret = xmlParseAttValue(ctxt);
5754 ctxt->instate = XML_PARSER_DTD;
5755 if (ret == NULL) {
5756 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5757 "Attribute default value declaration error\n");
5758 } else
5759 *value = ret;
5760 return(val);
5764 * xmlParseNotationType:
5765 * @ctxt: an XML parser context
5767 * parse an Notation attribute type.
5769 * Note: the leading 'NOTATION' S part has already being parsed...
5771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5773 * [ VC: Notation Attributes ]
5774 * Values of this type must match one of the notation names included
5775 * in the declaration; all notation names in the declaration must be declared.
5777 * Returns: the notation attribute tree built while parsing
5780 xmlEnumerationPtr
5781 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5782 const xmlChar *name;
5783 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5785 if (RAW != '(') {
5786 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5787 return(NULL);
5789 SHRINK;
5790 do {
5791 NEXT;
5792 SKIP_BLANKS;
5793 name = xmlParseName(ctxt);
5794 if (name == NULL) {
5795 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5796 "Name expected in NOTATION declaration\n");
5797 xmlFreeEnumeration(ret);
5798 return(NULL);
5800 tmp = ret;
5801 while (tmp != NULL) {
5802 if (xmlStrEqual(name, tmp->name)) {
5803 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5804 "standalone: attribute notation value token %s duplicated\n",
5805 name, NULL);
5806 if (!xmlDictOwns(ctxt->dict, name))
5807 xmlFree((xmlChar *) name);
5808 break;
5810 tmp = tmp->next;
5812 if (tmp == NULL) {
5813 cur = xmlCreateEnumeration(name);
5814 if (cur == NULL) {
5815 xmlFreeEnumeration(ret);
5816 return(NULL);
5818 if (last == NULL) ret = last = cur;
5819 else {
5820 last->next = cur;
5821 last = cur;
5824 SKIP_BLANKS;
5825 } while (RAW == '|');
5826 if (RAW != ')') {
5827 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5828 xmlFreeEnumeration(ret);
5829 return(NULL);
5831 NEXT;
5832 return(ret);
5836 * xmlParseEnumerationType:
5837 * @ctxt: an XML parser context
5839 * parse an Enumeration attribute type.
5841 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5843 * [ VC: Enumeration ]
5844 * Values of this type must match one of the Nmtoken tokens in
5845 * the declaration
5847 * Returns: the enumeration attribute tree built while parsing
5850 xmlEnumerationPtr
5851 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5852 xmlChar *name;
5853 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5855 if (RAW != '(') {
5856 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5857 return(NULL);
5859 SHRINK;
5860 do {
5861 NEXT;
5862 SKIP_BLANKS;
5863 name = xmlParseNmtoken(ctxt);
5864 if (name == NULL) {
5865 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5866 return(ret);
5868 tmp = ret;
5869 while (tmp != NULL) {
5870 if (xmlStrEqual(name, tmp->name)) {
5871 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5872 "standalone: attribute enumeration value token %s duplicated\n",
5873 name, NULL);
5874 if (!xmlDictOwns(ctxt->dict, name))
5875 xmlFree(name);
5876 break;
5878 tmp = tmp->next;
5880 if (tmp == NULL) {
5881 cur = xmlCreateEnumeration(name);
5882 if (!xmlDictOwns(ctxt->dict, name))
5883 xmlFree(name);
5884 if (cur == NULL) {
5885 xmlFreeEnumeration(ret);
5886 return(NULL);
5888 if (last == NULL) ret = last = cur;
5889 else {
5890 last->next = cur;
5891 last = cur;
5894 SKIP_BLANKS;
5895 } while (RAW == '|');
5896 if (RAW != ')') {
5897 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5898 return(ret);
5900 NEXT;
5901 return(ret);
5905 * xmlParseEnumeratedType:
5906 * @ctxt: an XML parser context
5907 * @tree: the enumeration tree built while parsing
5909 * parse an Enumerated attribute type.
5911 * [57] EnumeratedType ::= NotationType | Enumeration
5913 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5916 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5920 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5921 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5922 SKIP(8);
5923 if (!IS_BLANK_CH(CUR)) {
5924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5925 "Space required after 'NOTATION'\n");
5926 return(0);
5928 SKIP_BLANKS;
5929 *tree = xmlParseNotationType(ctxt);
5930 if (*tree == NULL) return(0);
5931 return(XML_ATTRIBUTE_NOTATION);
5933 *tree = xmlParseEnumerationType(ctxt);
5934 if (*tree == NULL) return(0);
5935 return(XML_ATTRIBUTE_ENUMERATION);
5939 * xmlParseAttributeType:
5940 * @ctxt: an XML parser context
5941 * @tree: the enumeration tree built while parsing
5943 * parse the Attribute list def for an element
5945 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5947 * [55] StringType ::= 'CDATA'
5949 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5950 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5952 * Validity constraints for attribute values syntax are checked in
5953 * xmlValidateAttributeValue()
5955 * [ VC: ID ]
5956 * Values of type ID must match the Name production. A name must not
5957 * appear more than once in an XML document as a value of this type;
5958 * i.e., ID values must uniquely identify the elements which bear them.
5960 * [ VC: One ID per Element Type ]
5961 * No element type may have more than one ID attribute specified.
5963 * [ VC: ID Attribute Default ]
5964 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5966 * [ VC: IDREF ]
5967 * Values of type IDREF must match the Name production, and values
5968 * of type IDREFS must match Names; each IDREF Name must match the value
5969 * of an ID attribute on some element in the XML document; i.e. IDREF
5970 * values must match the value of some ID attribute.
5972 * [ VC: Entity Name ]
5973 * Values of type ENTITY must match the Name production, values
5974 * of type ENTITIES must match Names; each Entity Name must match the
5975 * name of an unparsed entity declared in the DTD.
5977 * [ VC: Name Token ]
5978 * Values of type NMTOKEN must match the Nmtoken production; values
5979 * of type NMTOKENS must match Nmtokens.
5981 * Returns the attribute type
5984 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5985 SHRINK;
5986 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5987 SKIP(5);
5988 return(XML_ATTRIBUTE_CDATA);
5989 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5990 SKIP(6);
5991 return(XML_ATTRIBUTE_IDREFS);
5992 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5993 SKIP(5);
5994 return(XML_ATTRIBUTE_IDREF);
5995 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5996 SKIP(2);
5997 return(XML_ATTRIBUTE_ID);
5998 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5999 SKIP(6);
6000 return(XML_ATTRIBUTE_ENTITY);
6001 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6002 SKIP(8);
6003 return(XML_ATTRIBUTE_ENTITIES);
6004 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6005 SKIP(8);
6006 return(XML_ATTRIBUTE_NMTOKENS);
6007 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6008 SKIP(7);
6009 return(XML_ATTRIBUTE_NMTOKEN);
6011 return(xmlParseEnumeratedType(ctxt, tree));
6015 * xmlParseAttributeListDecl:
6016 * @ctxt: an XML parser context
6018 * : parse the Attribute list def for an element
6020 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6022 * [53] AttDef ::= S Name S AttType S DefaultDecl
6025 void
6026 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6027 const xmlChar *elemName;
6028 const xmlChar *attrName;
6029 xmlEnumerationPtr tree;
6031 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6032 xmlParserInputPtr input = ctxt->input;
6034 SKIP(9);
6035 if (!IS_BLANK_CH(CUR)) {
6036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6037 "Space required after '<!ATTLIST'\n");
6039 SKIP_BLANKS;
6040 elemName = xmlParseName(ctxt);
6041 if (elemName == NULL) {
6042 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6043 "ATTLIST: no name for Element\n");
6044 return;
6046 SKIP_BLANKS;
6047 GROW;
6048 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6049 const xmlChar *check = CUR_PTR;
6050 int type;
6051 int def;
6052 xmlChar *defaultValue = NULL;
6054 GROW;
6055 tree = NULL;
6056 attrName = xmlParseName(ctxt);
6057 if (attrName == NULL) {
6058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6059 "ATTLIST: no name for Attribute\n");
6060 break;
6062 GROW;
6063 if (!IS_BLANK_CH(CUR)) {
6064 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065 "Space required after the attribute name\n");
6066 break;
6068 SKIP_BLANKS;
6070 type = xmlParseAttributeType(ctxt, &tree);
6071 if (type <= 0) {
6072 break;
6075 GROW;
6076 if (!IS_BLANK_CH(CUR)) {
6077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6078 "Space required after the attribute type\n");
6079 if (tree != NULL)
6080 xmlFreeEnumeration(tree);
6081 break;
6083 SKIP_BLANKS;
6085 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6086 if (def <= 0) {
6087 if (defaultValue != NULL)
6088 xmlFree(defaultValue);
6089 if (tree != NULL)
6090 xmlFreeEnumeration(tree);
6091 break;
6093 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6094 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6096 GROW;
6097 if (RAW != '>') {
6098 if (!IS_BLANK_CH(CUR)) {
6099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100 "Space required after the attribute default value\n");
6101 if (defaultValue != NULL)
6102 xmlFree(defaultValue);
6103 if (tree != NULL)
6104 xmlFreeEnumeration(tree);
6105 break;
6107 SKIP_BLANKS;
6109 if (check == CUR_PTR) {
6110 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6111 "in xmlParseAttributeListDecl\n");
6112 if (defaultValue != NULL)
6113 xmlFree(defaultValue);
6114 if (tree != NULL)
6115 xmlFreeEnumeration(tree);
6116 break;
6118 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6119 (ctxt->sax->attributeDecl != NULL))
6120 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6121 type, def, defaultValue, tree);
6122 else if (tree != NULL)
6123 xmlFreeEnumeration(tree);
6125 if ((ctxt->sax2) && (defaultValue != NULL) &&
6126 (def != XML_ATTRIBUTE_IMPLIED) &&
6127 (def != XML_ATTRIBUTE_REQUIRED)) {
6128 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6130 if (ctxt->sax2) {
6131 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6133 if (defaultValue != NULL)
6134 xmlFree(defaultValue);
6135 GROW;
6137 if (RAW == '>') {
6138 if (input != ctxt->input) {
6139 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6140 "Attribute list declaration doesn't start and stop in the same entity\n",
6141 NULL, NULL);
6143 NEXT;
6149 * xmlParseElementMixedContentDecl:
6150 * @ctxt: an XML parser context
6151 * @inputchk: the input used for the current entity, needed for boundary checks
6153 * parse the declaration for a Mixed Element content
6154 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6156 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6157 * '(' S? '#PCDATA' S? ')'
6159 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6161 * [ VC: No Duplicate Types ]
6162 * The same name must not appear more than once in a single
6163 * mixed-content declaration.
6165 * returns: the list of the xmlElementContentPtr describing the element choices
6167 xmlElementContentPtr
6168 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6169 xmlElementContentPtr ret = NULL, cur = NULL, n;
6170 const xmlChar *elem = NULL;
6172 GROW;
6173 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6174 SKIP(7);
6175 SKIP_BLANKS;
6176 SHRINK;
6177 if (RAW == ')') {
6178 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6179 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6180 "Element content declaration doesn't start and stop in the same entity\n",
6181 NULL, NULL);
6183 NEXT;
6184 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6185 if (ret == NULL)
6186 return(NULL);
6187 if (RAW == '*') {
6188 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6191 return(ret);
6193 if ((RAW == '(') || (RAW == '|')) {
6194 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6195 if (ret == NULL) return(NULL);
6197 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6198 NEXT;
6199 if (elem == NULL) {
6200 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6201 if (ret == NULL) return(NULL);
6202 ret->c1 = cur;
6203 if (cur != NULL)
6204 cur->parent = ret;
6205 cur = ret;
6206 } else {
6207 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6208 if (n == NULL) return(NULL);
6209 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6210 if (n->c1 != NULL)
6211 n->c1->parent = n;
6212 cur->c2 = n;
6213 if (n != NULL)
6214 n->parent = cur;
6215 cur = n;
6217 SKIP_BLANKS;
6218 elem = xmlParseName(ctxt);
6219 if (elem == NULL) {
6220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6221 "xmlParseElementMixedContentDecl : Name expected\n");
6222 xmlFreeDocElementContent(ctxt->myDoc, cur);
6223 return(NULL);
6225 SKIP_BLANKS;
6226 GROW;
6228 if ((RAW == ')') && (NXT(1) == '*')) {
6229 if (elem != NULL) {
6230 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6231 XML_ELEMENT_CONTENT_ELEMENT);
6232 if (cur->c2 != NULL)
6233 cur->c2->parent = cur;
6235 if (ret != NULL)
6236 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6238 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6239 "Element content declaration doesn't start and stop in the same entity\n",
6240 NULL, NULL);
6242 SKIP(2);
6243 } else {
6244 xmlFreeDocElementContent(ctxt->myDoc, ret);
6245 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6246 return(NULL);
6249 } else {
6250 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6252 return(ret);
6256 * xmlParseElementChildrenContentDeclPriv:
6257 * @ctxt: an XML parser context
6258 * @inputchk: the input used for the current entity, needed for boundary checks
6259 * @depth: the level of recursion
6261 * parse the declaration for a Mixed Element content
6262 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274 * TODO Parameter-entity replacement text must be properly nested
6275 * with parenthesized groups. That is to say, if either of the
6276 * opening or closing parentheses in a choice, seq, or Mixed
6277 * construct is contained in the replacement text for a parameter
6278 * entity, both must be contained in the same replacement text. For
6279 * interoperability, if a parameter-entity reference appears in a
6280 * choice, seq, or Mixed construct, its replacement text should not
6281 * be empty, and neither the first nor last non-blank character of
6282 * the replacement text should be a connector (| or ,).
6284 * Returns the tree of xmlElementContentPtr describing the element
6285 * hierarchy.
6287 static xmlElementContentPtr
6288 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6289 int depth) {
6290 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6291 const xmlChar *elem;
6292 xmlChar type = 0;
6294 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6295 (depth > 2048)) {
6296 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6297 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6298 depth);
6299 return(NULL);
6301 SKIP_BLANKS;
6302 GROW;
6303 if (RAW == '(') {
6304 int inputid = ctxt->input->id;
6306 /* Recurse on first child */
6307 NEXT;
6308 SKIP_BLANKS;
6309 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6310 depth + 1);
6311 SKIP_BLANKS;
6312 GROW;
6313 } else {
6314 elem = xmlParseName(ctxt);
6315 if (elem == NULL) {
6316 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6317 return(NULL);
6319 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6320 if (cur == NULL) {
6321 xmlErrMemory(ctxt, NULL);
6322 return(NULL);
6324 GROW;
6325 if (RAW == '?') {
6326 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6327 NEXT;
6328 } else if (RAW == '*') {
6329 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6330 NEXT;
6331 } else if (RAW == '+') {
6332 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6333 NEXT;
6334 } else {
6335 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6337 GROW;
6339 SKIP_BLANKS;
6340 SHRINK;
6341 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6343 * Each loop we parse one separator and one element.
6345 if (RAW == ',') {
6346 if (type == 0) type = CUR;
6349 * Detect "Name | Name , Name" error
6351 else if (type != CUR) {
6352 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6353 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6354 type);
6355 if ((last != NULL) && (last != ret))
6356 xmlFreeDocElementContent(ctxt->myDoc, last);
6357 if (ret != NULL)
6358 xmlFreeDocElementContent(ctxt->myDoc, ret);
6359 return(NULL);
6361 NEXT;
6363 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6364 if (op == NULL) {
6365 if ((last != NULL) && (last != ret))
6366 xmlFreeDocElementContent(ctxt->myDoc, last);
6367 xmlFreeDocElementContent(ctxt->myDoc, ret);
6368 return(NULL);
6370 if (last == NULL) {
6371 op->c1 = ret;
6372 if (ret != NULL)
6373 ret->parent = op;
6374 ret = cur = op;
6375 } else {
6376 cur->c2 = op;
6377 if (op != NULL)
6378 op->parent = cur;
6379 op->c1 = last;
6380 if (last != NULL)
6381 last->parent = op;
6382 cur =op;
6383 last = NULL;
6385 } else if (RAW == '|') {
6386 if (type == 0) type = CUR;
6389 * Detect "Name , Name | Name" error
6391 else if (type != CUR) {
6392 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6393 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6394 type);
6395 if ((last != NULL) && (last != ret))
6396 xmlFreeDocElementContent(ctxt->myDoc, last);
6397 if (ret != NULL)
6398 xmlFreeDocElementContent(ctxt->myDoc, ret);
6399 return(NULL);
6401 NEXT;
6403 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6404 if (op == NULL) {
6405 if ((last != NULL) && (last != ret))
6406 xmlFreeDocElementContent(ctxt->myDoc, last);
6407 if (ret != NULL)
6408 xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 return(NULL);
6411 if (last == NULL) {
6412 op->c1 = ret;
6413 if (ret != NULL)
6414 ret->parent = op;
6415 ret = cur = op;
6416 } else {
6417 cur->c2 = op;
6418 if (op != NULL)
6419 op->parent = cur;
6420 op->c1 = last;
6421 if (last != NULL)
6422 last->parent = op;
6423 cur =op;
6424 last = NULL;
6426 } else {
6427 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6428 if ((last != NULL) && (last != ret))
6429 xmlFreeDocElementContent(ctxt->myDoc, last);
6430 if (ret != NULL)
6431 xmlFreeDocElementContent(ctxt->myDoc, ret);
6432 return(NULL);
6434 GROW;
6435 SKIP_BLANKS;
6436 GROW;
6437 if (RAW == '(') {
6438 int inputid = ctxt->input->id;
6439 /* Recurse on second child */
6440 NEXT;
6441 SKIP_BLANKS;
6442 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6443 depth + 1);
6444 SKIP_BLANKS;
6445 } else {
6446 elem = xmlParseName(ctxt);
6447 if (elem == NULL) {
6448 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6449 if (ret != NULL)
6450 xmlFreeDocElementContent(ctxt->myDoc, ret);
6451 return(NULL);
6453 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6454 if (last == NULL) {
6455 if (ret != NULL)
6456 xmlFreeDocElementContent(ctxt->myDoc, ret);
6457 return(NULL);
6459 if (RAW == '?') {
6460 last->ocur = XML_ELEMENT_CONTENT_OPT;
6461 NEXT;
6462 } else if (RAW == '*') {
6463 last->ocur = XML_ELEMENT_CONTENT_MULT;
6464 NEXT;
6465 } else if (RAW == '+') {
6466 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6467 NEXT;
6468 } else {
6469 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6472 SKIP_BLANKS;
6473 GROW;
6475 if ((cur != NULL) && (last != NULL)) {
6476 cur->c2 = last;
6477 if (last != NULL)
6478 last->parent = cur;
6480 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6481 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6482 "Element content declaration doesn't start and stop in the same entity\n",
6483 NULL, NULL);
6485 NEXT;
6486 if (RAW == '?') {
6487 if (ret != NULL) {
6488 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6489 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6490 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6491 else
6492 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6494 NEXT;
6495 } else if (RAW == '*') {
6496 if (ret != NULL) {
6497 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6498 cur = ret;
6500 * Some normalization:
6501 * (a | b* | c?)* == (a | b | c)*
6503 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6504 if ((cur->c1 != NULL) &&
6505 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6506 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6507 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6508 if ((cur->c2 != NULL) &&
6509 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6510 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6511 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512 cur = cur->c2;
6515 NEXT;
6516 } else if (RAW == '+') {
6517 if (ret != NULL) {
6518 int found = 0;
6520 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6521 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6522 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6523 else
6524 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6526 * Some normalization:
6527 * (a | b*)+ == (a | b)*
6528 * (a | b?)+ == (a | b)*
6530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 if ((cur->c1 != NULL) &&
6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 found = 1;
6537 if ((cur->c2 != NULL) &&
6538 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6539 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6540 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6541 found = 1;
6543 cur = cur->c2;
6545 if (found)
6546 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6548 NEXT;
6550 return(ret);
6554 * xmlParseElementChildrenContentDecl:
6555 * @ctxt: an XML parser context
6556 * @inputchk: the input used for the current entity, needed for boundary checks
6558 * parse the declaration for a Mixed Element content
6559 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6561 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6563 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6565 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6567 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6569 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6570 * TODO Parameter-entity replacement text must be properly nested
6571 * with parenthesized groups. That is to say, if either of the
6572 * opening or closing parentheses in a choice, seq, or Mixed
6573 * construct is contained in the replacement text for a parameter
6574 * entity, both must be contained in the same replacement text. For
6575 * interoperability, if a parameter-entity reference appears in a
6576 * choice, seq, or Mixed construct, its replacement text should not
6577 * be empty, and neither the first nor last non-blank character of
6578 * the replacement text should be a connector (| or ,).
6580 * Returns the tree of xmlElementContentPtr describing the element
6581 * hierarchy.
6583 xmlElementContentPtr
6584 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6585 /* stub left for API/ABI compat */
6586 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6590 * xmlParseElementContentDecl:
6591 * @ctxt: an XML parser context
6592 * @name: the name of the element being defined.
6593 * @result: the Element Content pointer will be stored here if any
6595 * parse the declaration for an Element content either Mixed or Children,
6596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6604 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6605 xmlElementContentPtr *result) {
6607 xmlElementContentPtr tree = NULL;
6608 int inputid = ctxt->input->id;
6609 int res;
6611 *result = NULL;
6613 if (RAW != '(') {
6614 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6615 "xmlParseElementContentDecl : %s '(' expected\n", name);
6616 return(-1);
6618 NEXT;
6619 GROW;
6620 if (ctxt->instate == XML_PARSER_EOF)
6621 return(-1);
6622 SKIP_BLANKS;
6623 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6624 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6625 res = XML_ELEMENT_TYPE_MIXED;
6626 } else {
6627 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6628 res = XML_ELEMENT_TYPE_ELEMENT;
6630 SKIP_BLANKS;
6631 *result = tree;
6632 return(res);
6636 * xmlParseElementDecl:
6637 * @ctxt: an XML parser context
6639 * parse an Element declaration.
6641 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6643 * [ VC: Unique Element Type Declaration ]
6644 * No element type may be declared more than once
6646 * Returns the type of the element, or -1 in case of error
6649 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6650 const xmlChar *name;
6651 int ret = -1;
6652 xmlElementContentPtr content = NULL;
6654 /* GROW; done in the caller */
6655 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6656 xmlParserInputPtr input = ctxt->input;
6658 SKIP(9);
6659 if (!IS_BLANK_CH(CUR)) {
6660 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6661 "Space required after 'ELEMENT'\n");
6663 SKIP_BLANKS;
6664 name = xmlParseName(ctxt);
6665 if (name == NULL) {
6666 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6667 "xmlParseElementDecl: no name for Element\n");
6668 return(-1);
6670 while ((RAW == 0) && (ctxt->inputNr > 1))
6671 xmlPopInput(ctxt);
6672 if (!IS_BLANK_CH(CUR)) {
6673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6674 "Space required after the element name\n");
6676 SKIP_BLANKS;
6677 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6678 SKIP(5);
6680 * Element must always be empty.
6682 ret = XML_ELEMENT_TYPE_EMPTY;
6683 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6684 (NXT(2) == 'Y')) {
6685 SKIP(3);
6687 * Element is a generic container.
6689 ret = XML_ELEMENT_TYPE_ANY;
6690 } else if (RAW == '(') {
6691 ret = xmlParseElementContentDecl(ctxt, name, &content);
6692 } else {
6694 * [ WFC: PEs in Internal Subset ] error handling.
6696 if ((RAW == '%') && (ctxt->external == 0) &&
6697 (ctxt->inputNr == 1)) {
6698 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6699 "PEReference: forbidden within markup decl in internal subset\n");
6700 } else {
6701 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6702 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6704 return(-1);
6707 SKIP_BLANKS;
6709 * Pop-up of finished entities.
6711 while ((RAW == 0) && (ctxt->inputNr > 1))
6712 xmlPopInput(ctxt);
6713 SKIP_BLANKS;
6715 if (RAW != '>') {
6716 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6717 if (content != NULL) {
6718 xmlFreeDocElementContent(ctxt->myDoc, content);
6720 } else {
6721 if (input != ctxt->input) {
6722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6723 "Element declaration doesn't start and stop in the same entity\n");
6726 NEXT;
6727 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6728 (ctxt->sax->elementDecl != NULL)) {
6729 if (content != NULL)
6730 content->parent = NULL;
6731 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6732 content);
6733 if ((content != NULL) && (content->parent == NULL)) {
6735 * this is a trick: if xmlAddElementDecl is called,
6736 * instead of copying the full tree it is plugged directly
6737 * if called from the parser. Avoid duplicating the
6738 * interfaces or change the API/ABI
6740 xmlFreeDocElementContent(ctxt->myDoc, content);
6742 } else if (content != NULL) {
6743 xmlFreeDocElementContent(ctxt->myDoc, content);
6747 return(ret);
6751 * xmlParseConditionalSections
6752 * @ctxt: an XML parser context
6754 * [61] conditionalSect ::= includeSect | ignoreSect
6755 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6756 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6757 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6758 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6761 static void
6762 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6763 int id = ctxt->input->id;
6765 SKIP(3);
6766 SKIP_BLANKS;
6767 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768 SKIP(7);
6769 SKIP_BLANKS;
6770 if (RAW != '[') {
6771 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772 } else {
6773 if (ctxt->input->id != id) {
6774 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6775 "All markup of the conditional section is not in the same entity\n",
6776 NULL, NULL);
6778 NEXT;
6780 if (xmlParserDebugEntities) {
6781 if ((ctxt->input != NULL) && (ctxt->input->filename))
6782 xmlGenericError(xmlGenericErrorContext,
6783 "%s(%d): ", ctxt->input->filename,
6784 ctxt->input->line);
6785 xmlGenericError(xmlGenericErrorContext,
6786 "Entering INCLUDE Conditional Section\n");
6789 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6790 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6791 const xmlChar *check = CUR_PTR;
6792 unsigned int cons = ctxt->input->consumed;
6794 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6795 xmlParseConditionalSections(ctxt);
6796 } else if (IS_BLANK_CH(CUR)) {
6797 NEXT;
6798 } else if (RAW == '%') {
6799 xmlParsePEReference(ctxt);
6800 } else
6801 xmlParseMarkupDecl(ctxt);
6804 * Pop-up of finished entities.
6806 while ((RAW == 0) && (ctxt->inputNr > 1))
6807 xmlPopInput(ctxt);
6809 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6810 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6811 break;
6814 if (xmlParserDebugEntities) {
6815 if ((ctxt->input != NULL) && (ctxt->input->filename))
6816 xmlGenericError(xmlGenericErrorContext,
6817 "%s(%d): ", ctxt->input->filename,
6818 ctxt->input->line);
6819 xmlGenericError(xmlGenericErrorContext,
6820 "Leaving INCLUDE Conditional Section\n");
6823 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6824 int state;
6825 xmlParserInputState instate;
6826 int depth = 0;
6828 SKIP(6);
6829 SKIP_BLANKS;
6830 if (RAW != '[') {
6831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6832 } else {
6833 if (ctxt->input->id != id) {
6834 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835 "All markup of the conditional section is not in the same entity\n",
6836 NULL, NULL);
6838 NEXT;
6840 if (xmlParserDebugEntities) {
6841 if ((ctxt->input != NULL) && (ctxt->input->filename))
6842 xmlGenericError(xmlGenericErrorContext,
6843 "%s(%d): ", ctxt->input->filename,
6844 ctxt->input->line);
6845 xmlGenericError(xmlGenericErrorContext,
6846 "Entering IGNORE Conditional Section\n");
6850 * Parse up to the end of the conditional section
6851 * But disable SAX event generating DTD building in the meantime
6853 state = ctxt->disableSAX;
6854 instate = ctxt->instate;
6855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6856 ctxt->instate = XML_PARSER_IGNORE;
6858 while (((depth >= 0) && (RAW != 0)) &&
6859 (ctxt->instate != XML_PARSER_EOF)) {
6860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6861 depth++;
6862 SKIP(3);
6863 continue;
6865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6866 if (--depth >= 0) SKIP(3);
6867 continue;
6869 NEXT;
6870 continue;
6873 ctxt->disableSAX = state;
6874 ctxt->instate = instate;
6876 if (xmlParserDebugEntities) {
6877 if ((ctxt->input != NULL) && (ctxt->input->filename))
6878 xmlGenericError(xmlGenericErrorContext,
6879 "%s(%d): ", ctxt->input->filename,
6880 ctxt->input->line);
6881 xmlGenericError(xmlGenericErrorContext,
6882 "Leaving IGNORE Conditional Section\n");
6885 } else {
6886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6889 if (RAW == 0)
6890 SHRINK;
6892 if (RAW == 0) {
6893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6894 } else {
6895 if (ctxt->input->id != id) {
6896 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6897 "All markup of the conditional section is not in the same entity\n",
6898 NULL, NULL);
6900 SKIP(3);
6905 * xmlParseMarkupDecl:
6906 * @ctxt: an XML parser context
6908 * parse Markup declarations
6910 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6911 * NotationDecl | PI | Comment
6913 * [ VC: Proper Declaration/PE Nesting ]
6914 * Parameter-entity replacement text must be properly nested with
6915 * markup declarations. That is to say, if either the first character
6916 * or the last character of a markup declaration (markupdecl above) is
6917 * contained in the replacement text for a parameter-entity reference,
6918 * both must be contained in the same replacement text.
6920 * [ WFC: PEs in Internal Subset ]
6921 * In the internal DTD subset, parameter-entity references can occur
6922 * only where markup declarations can occur, not within markup declarations.
6923 * (This does not apply to references that occur in external parameter
6924 * entities or to the external subset.)
6926 void
6927 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6928 GROW;
6929 if (CUR == '<') {
6930 if (NXT(1) == '!') {
6931 switch (NXT(2)) {
6932 case 'E':
6933 if (NXT(3) == 'L')
6934 xmlParseElementDecl(ctxt);
6935 else if (NXT(3) == 'N')
6936 xmlParseEntityDecl(ctxt);
6937 break;
6938 case 'A':
6939 xmlParseAttributeListDecl(ctxt);
6940 break;
6941 case 'N':
6942 xmlParseNotationDecl(ctxt);
6943 break;
6944 case '-':
6945 xmlParseComment(ctxt);
6946 break;
6947 default:
6948 /* there is an error but it will be detected later */
6949 break;
6951 } else if (NXT(1) == '?') {
6952 xmlParsePI(ctxt);
6956 * This is only for internal subset. On external entities,
6957 * the replacement is done before parsing stage
6959 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6960 xmlParsePEReference(ctxt);
6963 * Conditional sections are allowed from entities included
6964 * by PE References in the internal subset.
6966 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6967 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6968 xmlParseConditionalSections(ctxt);
6972 ctxt->instate = XML_PARSER_DTD;
6976 * xmlParseTextDecl:
6977 * @ctxt: an XML parser context
6979 * parse an XML declaration header for external entities
6981 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6984 void
6985 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6986 xmlChar *version;
6987 const xmlChar *encoding;
6990 * We know that '<?xml' is here.
6992 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6993 SKIP(5);
6994 } else {
6995 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6996 return;
6999 if (!IS_BLANK_CH(CUR)) {
7000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001 "Space needed after '<?xml'\n");
7003 SKIP_BLANKS;
7006 * We may have the VersionInfo here.
7008 version = xmlParseVersionInfo(ctxt);
7009 if (version == NULL)
7010 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7011 else {
7012 if (!IS_BLANK_CH(CUR)) {
7013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7014 "Space needed here\n");
7017 ctxt->input->version = version;
7020 * We must have the encoding declaration
7022 encoding = xmlParseEncodingDecl(ctxt);
7023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7025 * The XML REC instructs us to stop parsing right here
7027 return;
7029 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7030 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7031 "Missing encoding in text declaration\n");
7034 SKIP_BLANKS;
7035 if ((RAW == '?') && (NXT(1) == '>')) {
7036 SKIP(2);
7037 } else if (RAW == '>') {
7038 /* Deprecated old WD ... */
7039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7040 NEXT;
7041 } else {
7042 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7043 MOVETO_ENDTAG(CUR_PTR);
7044 NEXT;
7049 * xmlParseExternalSubset:
7050 * @ctxt: an XML parser context
7051 * @ExternalID: the external identifier
7052 * @SystemID: the system identifier (or URL)
7054 * parse Markup declarations from an external subset
7056 * [30] extSubset ::= textDecl? extSubsetDecl
7058 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7060 void
7061 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7062 const xmlChar *SystemID) {
7063 xmlDetectSAX2(ctxt);
7064 GROW;
7066 if ((ctxt->encoding == NULL) &&
7067 (ctxt->input->end - ctxt->input->cur >= 4)) {
7068 xmlChar start[4];
7069 xmlCharEncoding enc;
7071 start[0] = RAW;
7072 start[1] = NXT(1);
7073 start[2] = NXT(2);
7074 start[3] = NXT(3);
7075 enc = xmlDetectCharEncoding(start, 4);
7076 if (enc != XML_CHAR_ENCODING_NONE)
7077 xmlSwitchEncoding(ctxt, enc);
7080 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7081 xmlParseTextDecl(ctxt);
7082 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7084 * The XML REC instructs us to stop parsing right here
7086 ctxt->instate = XML_PARSER_EOF;
7087 return;
7090 if (ctxt->myDoc == NULL) {
7091 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7092 if (ctxt->myDoc == NULL) {
7093 xmlErrMemory(ctxt, "New Doc failed");
7094 return;
7096 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7098 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7099 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7101 ctxt->instate = XML_PARSER_DTD;
7102 ctxt->external = 1;
7103 while (((RAW == '<') && (NXT(1) == '?')) ||
7104 ((RAW == '<') && (NXT(1) == '!')) ||
7105 (RAW == '%') || IS_BLANK_CH(CUR)) {
7106 const xmlChar *check = CUR_PTR;
7107 unsigned int cons = ctxt->input->consumed;
7109 GROW;
7110 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7111 xmlParseConditionalSections(ctxt);
7112 } else if (IS_BLANK_CH(CUR)) {
7113 NEXT;
7114 } else if (RAW == '%') {
7115 xmlParsePEReference(ctxt);
7116 } else
7117 xmlParseMarkupDecl(ctxt);
7120 * Pop-up of finished entities.
7122 while ((RAW == 0) && (ctxt->inputNr > 1))
7123 xmlPopInput(ctxt);
7125 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7126 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7127 break;
7131 if (RAW != 0) {
7132 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7138 * xmlParseReference:
7139 * @ctxt: an XML parser context
7141 * parse and handle entity references in content, depending on the SAX
7142 * interface, this may end-up in a call to character() if this is a
7143 * CharRef, a predefined entity, if there is no reference() callback.
7144 * or if the parser was asked to switch to that mode.
7146 * [67] Reference ::= EntityRef | CharRef
7148 void
7149 xmlParseReference(xmlParserCtxtPtr ctxt) {
7150 xmlEntityPtr ent;
7151 xmlChar *val;
7152 int was_checked;
7153 xmlNodePtr list = NULL;
7154 xmlParserErrors ret = XML_ERR_OK;
7157 if (RAW != '&')
7158 return;
7161 * Simple case of a CharRef
7163 if (NXT(1) == '#') {
7164 int i = 0;
7165 xmlChar out[10];
7166 int hex = NXT(2);
7167 int value = xmlParseCharRef(ctxt);
7169 if (value == 0)
7170 return;
7171 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7173 * So we are using non-UTF-8 buffers
7174 * Check that the char fit on 8bits, if not
7175 * generate a CharRef.
7177 if (value <= 0xFF) {
7178 out[0] = value;
7179 out[1] = 0;
7180 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7181 (!ctxt->disableSAX))
7182 ctxt->sax->characters(ctxt->userData, out, 1);
7183 } else {
7184 if ((hex == 'x') || (hex == 'X'))
7185 snprintf((char *)out, sizeof(out), "#x%X", value);
7186 else
7187 snprintf((char *)out, sizeof(out), "#%d", value);
7188 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7189 (!ctxt->disableSAX))
7190 ctxt->sax->reference(ctxt->userData, out);
7192 } else {
7194 * Just encode the value in UTF-8
7196 COPY_BUF(0 ,out, i, value);
7197 out[i] = 0;
7198 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7199 (!ctxt->disableSAX))
7200 ctxt->sax->characters(ctxt->userData, out, i);
7202 return;
7206 * We are seeing an entity reference
7208 ent = xmlParseEntityRef(ctxt);
7209 if (ent == NULL) return;
7210 if (!ctxt->wellFormed)
7211 return;
7212 was_checked = ent->checked;
7214 /* special case of predefined entities */
7215 if ((ent->name == NULL) ||
7216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7217 val = ent->content;
7218 if (val == NULL) return;
7220 * inline the entity.
7222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223 (!ctxt->disableSAX))
7224 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7225 return;
7229 * The first reference to the entity trigger a parsing phase
7230 * where the ent->children is filled with the result from
7231 * the parsing.
7232 * Note: external parsed entities will not be loaded, it is not
7233 * required for a non-validating parser, unless the parsing option
7234 * of validating, or substituting entities were given. Doing so is
7235 * far more secure as the parser will only process data coming from
7236 * the document entity by default.
7238 if ((ent->checked == 0) &&
7239 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7240 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7241 unsigned long oldnbent = ctxt->nbentities;
7244 * This is a bit hackish but this seems the best
7245 * way to make sure both SAX and DOM entity support
7246 * behaves okay.
7248 void *user_data;
7249 if (ctxt->userData == ctxt)
7250 user_data = NULL;
7251 else
7252 user_data = ctxt->userData;
7255 * Check that this entity is well formed
7256 * 4.3.2: An internal general parsed entity is well-formed
7257 * if its replacement text matches the production labeled
7258 * content.
7260 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7261 ctxt->depth++;
7262 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7263 user_data, &list);
7264 ctxt->depth--;
7266 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7267 ctxt->depth++;
7268 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7269 user_data, ctxt->depth, ent->URI,
7270 ent->ExternalID, &list);
7271 ctxt->depth--;
7272 } else {
7273 ret = XML_ERR_ENTITY_PE_INTERNAL;
7274 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7275 "invalid entity type found\n", NULL);
7279 * Store the number of entities needing parsing for this entity
7280 * content and do checkings
7282 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7283 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7284 ent->checked |= 1;
7285 if (ret == XML_ERR_ENTITY_LOOP) {
7286 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7287 xmlFreeNodeList(list);
7288 return;
7290 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7291 xmlFreeNodeList(list);
7292 return;
7295 if ((ret == XML_ERR_OK) && (list != NULL)) {
7296 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7297 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7298 (ent->children == NULL)) {
7299 ent->children = list;
7300 if (ctxt->replaceEntities) {
7302 * Prune it directly in the generated document
7303 * except for single text nodes.
7305 if (((list->type == XML_TEXT_NODE) &&
7306 (list->next == NULL)) ||
7307 (ctxt->parseMode == XML_PARSE_READER)) {
7308 list->parent = (xmlNodePtr) ent;
7309 list = NULL;
7310 ent->owner = 1;
7311 } else {
7312 ent->owner = 0;
7313 while (list != NULL) {
7314 list->parent = (xmlNodePtr) ctxt->node;
7315 list->doc = ctxt->myDoc;
7316 if (list->next == NULL)
7317 ent->last = list;
7318 list = list->next;
7320 list = ent->children;
7321 #ifdef LIBXML_LEGACY_ENABLED
7322 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7323 xmlAddEntityReference(ent, list, NULL);
7324 #endif /* LIBXML_LEGACY_ENABLED */
7326 } else {
7327 ent->owner = 1;
7328 while (list != NULL) {
7329 list->parent = (xmlNodePtr) ent;
7330 xmlSetTreeDoc(list, ent->doc);
7331 if (list->next == NULL)
7332 ent->last = list;
7333 list = list->next;
7336 } else {
7337 xmlFreeNodeList(list);
7338 list = NULL;
7340 } else if ((ret != XML_ERR_OK) &&
7341 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7342 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7343 "Entity '%s' failed to parse\n", ent->name);
7344 xmlParserEntityCheck(ctxt, 0, ent, 0);
7345 } else if (list != NULL) {
7346 xmlFreeNodeList(list);
7347 list = NULL;
7349 if (ent->checked == 0)
7350 ent->checked = 2;
7351 } else if (ent->checked != 1) {
7352 ctxt->nbentities += ent->checked / 2;
7356 * Now that the entity content has been gathered
7357 * provide it to the application, this can take different forms based
7358 * on the parsing modes.
7360 if (ent->children == NULL) {
7362 * Probably running in SAX mode and the callbacks don't
7363 * build the entity content. So unless we already went
7364 * though parsing for first checking go though the entity
7365 * content to generate callbacks associated to the entity
7367 if (was_checked != 0) {
7368 void *user_data;
7370 * This is a bit hackish but this seems the best
7371 * way to make sure both SAX and DOM entity support
7372 * behaves okay.
7374 if (ctxt->userData == ctxt)
7375 user_data = NULL;
7376 else
7377 user_data = ctxt->userData;
7379 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7380 ctxt->depth++;
7381 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7382 ent->content, user_data, NULL);
7383 ctxt->depth--;
7384 } else if (ent->etype ==
7385 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7386 ctxt->depth++;
7387 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7388 ctxt->sax, user_data, ctxt->depth,
7389 ent->URI, ent->ExternalID, NULL);
7390 ctxt->depth--;
7391 } else {
7392 ret = XML_ERR_ENTITY_PE_INTERNAL;
7393 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7394 "invalid entity type found\n", NULL);
7396 if (ret == XML_ERR_ENTITY_LOOP) {
7397 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7398 return;
7401 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7402 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7404 * Entity reference callback comes second, it's somewhat
7405 * superfluous but a compatibility to historical behaviour
7407 ctxt->sax->reference(ctxt->userData, ent->name);
7409 return;
7413 * If we didn't get any children for the entity being built
7415 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7416 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7418 * Create a node.
7420 ctxt->sax->reference(ctxt->userData, ent->name);
7421 return;
7424 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7426 * There is a problem on the handling of _private for entities
7427 * (bug 155816): Should we copy the content of the field from
7428 * the entity (possibly overwriting some value set by the user
7429 * when a copy is created), should we leave it alone, or should
7430 * we try to take care of different situations? The problem
7431 * is exacerbated by the usage of this field by the xmlReader.
7432 * To fix this bug, we look at _private on the created node
7433 * and, if it's NULL, we copy in whatever was in the entity.
7434 * If it's not NULL we leave it alone. This is somewhat of a
7435 * hack - maybe we should have further tests to determine
7436 * what to do.
7438 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7440 * Seems we are generating the DOM content, do
7441 * a simple tree copy for all references except the first
7442 * In the first occurrence list contains the replacement.
7444 if (((list == NULL) && (ent->owner == 0)) ||
7445 (ctxt->parseMode == XML_PARSE_READER)) {
7446 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7449 * We are copying here, make sure there is no abuse
7451 ctxt->sizeentcopy += ent->length + 5;
7452 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7453 return;
7456 * when operating on a reader, the entities definitions
7457 * are always owning the entities subtree.
7458 if (ctxt->parseMode == XML_PARSE_READER)
7459 ent->owner = 1;
7462 cur = ent->children;
7463 while (cur != NULL) {
7464 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7465 if (nw != NULL) {
7466 if (nw->_private == NULL)
7467 nw->_private = cur->_private;
7468 if (firstChild == NULL){
7469 firstChild = nw;
7471 nw = xmlAddChild(ctxt->node, nw);
7473 if (cur == ent->last) {
7475 * needed to detect some strange empty
7476 * node cases in the reader tests
7478 if ((ctxt->parseMode == XML_PARSE_READER) &&
7479 (nw != NULL) &&
7480 (nw->type == XML_ELEMENT_NODE) &&
7481 (nw->children == NULL))
7482 nw->extra = 1;
7484 break;
7486 cur = cur->next;
7488 #ifdef LIBXML_LEGACY_ENABLED
7489 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7490 xmlAddEntityReference(ent, firstChild, nw);
7491 #endif /* LIBXML_LEGACY_ENABLED */
7492 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7493 xmlNodePtr nw = NULL, cur, next, last,
7494 firstChild = NULL;
7497 * We are copying here, make sure there is no abuse
7499 ctxt->sizeentcopy += ent->length + 5;
7500 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7501 return;
7504 * Copy the entity child list and make it the new
7505 * entity child list. The goal is to make sure any
7506 * ID or REF referenced will be the one from the
7507 * document content and not the entity copy.
7509 cur = ent->children;
7510 ent->children = NULL;
7511 last = ent->last;
7512 ent->last = NULL;
7513 while (cur != NULL) {
7514 next = cur->next;
7515 cur->next = NULL;
7516 cur->parent = NULL;
7517 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7518 if (nw != NULL) {
7519 if (nw->_private == NULL)
7520 nw->_private = cur->_private;
7521 if (firstChild == NULL){
7522 firstChild = cur;
7524 xmlAddChild((xmlNodePtr) ent, nw);
7525 xmlAddChild(ctxt->node, cur);
7527 if (cur == last)
7528 break;
7529 cur = next;
7531 if (ent->owner == 0)
7532 ent->owner = 1;
7533 #ifdef LIBXML_LEGACY_ENABLED
7534 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7535 xmlAddEntityReference(ent, firstChild, nw);
7536 #endif /* LIBXML_LEGACY_ENABLED */
7537 } else {
7538 const xmlChar *nbktext;
7541 * the name change is to avoid coalescing of the
7542 * node with a possible previous text one which
7543 * would make ent->children a dangling pointer
7545 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7546 -1);
7547 if (ent->children->type == XML_TEXT_NODE)
7548 ent->children->name = nbktext;
7549 if ((ent->last != ent->children) &&
7550 (ent->last->type == XML_TEXT_NODE))
7551 ent->last->name = nbktext;
7552 xmlAddChildList(ctxt->node, ent->children);
7556 * This is to avoid a nasty side effect, see
7557 * characters() in SAX.c
7559 ctxt->nodemem = 0;
7560 ctxt->nodelen = 0;
7561 return;
7567 * xmlParseEntityRef:
7568 * @ctxt: an XML parser context
7570 * parse ENTITY references declarations
7572 * [68] EntityRef ::= '&' Name ';'
7574 * [ WFC: Entity Declared ]
7575 * In a document without any DTD, a document with only an internal DTD
7576 * subset which contains no parameter entity references, or a document
7577 * with "standalone='yes'", the Name given in the entity reference
7578 * must match that in an entity declaration, except that well-formed
7579 * documents need not declare any of the following entities: amp, lt,
7580 * gt, apos, quot. The declaration of a parameter entity must precede
7581 * any reference to it. Similarly, the declaration of a general entity
7582 * must precede any reference to it which appears in a default value in an
7583 * attribute-list declaration. Note that if entities are declared in the
7584 * external subset or in external parameter entities, a non-validating
7585 * processor is not obligated to read and process their declarations;
7586 * for such documents, the rule that an entity must be declared is a
7587 * well-formedness constraint only if standalone='yes'.
7589 * [ WFC: Parsed Entity ]
7590 * An entity reference must not contain the name of an unparsed entity
7592 * Returns the xmlEntityPtr if found, or NULL otherwise.
7594 xmlEntityPtr
7595 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7596 const xmlChar *name;
7597 xmlEntityPtr ent = NULL;
7599 GROW;
7600 if (ctxt->instate == XML_PARSER_EOF)
7601 return(NULL);
7603 if (RAW != '&')
7604 return(NULL);
7605 NEXT;
7606 name = xmlParseName(ctxt);
7607 if (name == NULL) {
7608 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7609 "xmlParseEntityRef: no name\n");
7610 return(NULL);
7612 if (RAW != ';') {
7613 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7614 return(NULL);
7616 NEXT;
7619 * Predefined entities override any extra definition
7621 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7622 ent = xmlGetPredefinedEntity(name);
7623 if (ent != NULL)
7624 return(ent);
7628 * Increase the number of entity references parsed
7630 ctxt->nbentities++;
7633 * Ask first SAX for entity resolution, otherwise try the
7634 * entities which may have stored in the parser context.
7636 if (ctxt->sax != NULL) {
7637 if (ctxt->sax->getEntity != NULL)
7638 ent = ctxt->sax->getEntity(ctxt->userData, name);
7639 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7640 (ctxt->options & XML_PARSE_OLDSAX))
7641 ent = xmlGetPredefinedEntity(name);
7642 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7643 (ctxt->userData==ctxt)) {
7644 ent = xmlSAX2GetEntity(ctxt, name);
7647 if (ctxt->instate == XML_PARSER_EOF)
7648 return(NULL);
7650 * [ WFC: Entity Declared ]
7651 * In a document without any DTD, a document with only an
7652 * internal DTD subset which contains no parameter entity
7653 * references, or a document with "standalone='yes'", the
7654 * Name given in the entity reference must match that in an
7655 * entity declaration, except that well-formed documents
7656 * need not declare any of the following entities: amp, lt,
7657 * gt, apos, quot.
7658 * The declaration of a parameter entity must precede any
7659 * reference to it.
7660 * Similarly, the declaration of a general entity must
7661 * precede any reference to it which appears in a default
7662 * value in an attribute-list declaration. Note that if
7663 * entities are declared in the external subset or in
7664 * external parameter entities, a non-validating processor
7665 * is not obligated to read and process their declarations;
7666 * for such documents, the rule that an entity must be
7667 * declared is a well-formedness constraint only if
7668 * standalone='yes'.
7670 if (ent == NULL) {
7671 if ((ctxt->standalone == 1) ||
7672 ((ctxt->hasExternalSubset == 0) &&
7673 (ctxt->hasPErefs == 0))) {
7674 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7675 "Entity '%s' not defined\n", name);
7676 } else {
7677 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7678 "Entity '%s' not defined\n", name);
7679 if ((ctxt->inSubset == 0) &&
7680 (ctxt->sax != NULL) &&
7681 (ctxt->sax->reference != NULL)) {
7682 ctxt->sax->reference(ctxt->userData, name);
7685 xmlParserEntityCheck(ctxt, 0, ent, 0);
7686 ctxt->valid = 0;
7690 * [ WFC: Parsed Entity ]
7691 * An entity reference must not contain the name of an
7692 * unparsed entity
7694 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7695 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7696 "Entity reference to unparsed entity %s\n", name);
7700 * [ WFC: No External Entity References ]
7701 * Attribute values cannot contain direct or indirect
7702 * entity references to external entities.
7704 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7705 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7706 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7707 "Attribute references external entity '%s'\n", name);
7710 * [ WFC: No < in Attribute Values ]
7711 * The replacement text of any entity referred to directly or
7712 * indirectly in an attribute value (other than "&lt;") must
7713 * not contain a <.
7715 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7716 (ent != NULL) &&
7717 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7718 if (((ent->checked & 1) || (ent->checked == 0)) &&
7719 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7720 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7721 "'<' in entity '%s' is not allowed in attributes values\n", name);
7726 * Internal check, no parameter entities here ...
7728 else {
7729 switch (ent->etype) {
7730 case XML_INTERNAL_PARAMETER_ENTITY:
7731 case XML_EXTERNAL_PARAMETER_ENTITY:
7732 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7733 "Attempt to reference the parameter entity '%s'\n",
7734 name);
7735 break;
7736 default:
7737 break;
7742 * [ WFC: No Recursion ]
7743 * A parsed entity must not contain a recursive reference
7744 * to itself, either directly or indirectly.
7745 * Done somewhere else
7747 return(ent);
7751 * xmlParseStringEntityRef:
7752 * @ctxt: an XML parser context
7753 * @str: a pointer to an index in the string
7755 * parse ENTITY references declarations, but this version parses it from
7756 * a string value.
7758 * [68] EntityRef ::= '&' Name ';'
7760 * [ WFC: Entity Declared ]
7761 * In a document without any DTD, a document with only an internal DTD
7762 * subset which contains no parameter entity references, or a document
7763 * with "standalone='yes'", the Name given in the entity reference
7764 * must match that in an entity declaration, except that well-formed
7765 * documents need not declare any of the following entities: amp, lt,
7766 * gt, apos, quot. The declaration of a parameter entity must precede
7767 * any reference to it. Similarly, the declaration of a general entity
7768 * must precede any reference to it which appears in a default value in an
7769 * attribute-list declaration. Note that if entities are declared in the
7770 * external subset or in external parameter entities, a non-validating
7771 * processor is not obligated to read and process their declarations;
7772 * for such documents, the rule that an entity must be declared is a
7773 * well-formedness constraint only if standalone='yes'.
7775 * [ WFC: Parsed Entity ]
7776 * An entity reference must not contain the name of an unparsed entity
7778 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7779 * is updated to the current location in the string.
7781 static xmlEntityPtr
7782 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7783 xmlChar *name;
7784 const xmlChar *ptr;
7785 xmlChar cur;
7786 xmlEntityPtr ent = NULL;
7788 if ((str == NULL) || (*str == NULL))
7789 return(NULL);
7790 ptr = *str;
7791 cur = *ptr;
7792 if (cur != '&')
7793 return(NULL);
7795 ptr++;
7796 name = xmlParseStringName(ctxt, &ptr);
7797 if (name == NULL) {
7798 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7799 "xmlParseStringEntityRef: no name\n");
7800 *str = ptr;
7801 return(NULL);
7803 if (*ptr != ';') {
7804 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7805 xmlFree(name);
7806 *str = ptr;
7807 return(NULL);
7809 ptr++;
7813 * Predefined entities override any extra definition
7815 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7816 ent = xmlGetPredefinedEntity(name);
7817 if (ent != NULL) {
7818 xmlFree(name);
7819 *str = ptr;
7820 return(ent);
7825 * Increate the number of entity references parsed
7827 ctxt->nbentities++;
7830 * Ask first SAX for entity resolution, otherwise try the
7831 * entities which may have stored in the parser context.
7833 if (ctxt->sax != NULL) {
7834 if (ctxt->sax->getEntity != NULL)
7835 ent = ctxt->sax->getEntity(ctxt->userData, name);
7836 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7837 ent = xmlGetPredefinedEntity(name);
7838 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7839 ent = xmlSAX2GetEntity(ctxt, name);
7842 if (ctxt->instate == XML_PARSER_EOF) {
7843 xmlFree(name);
7844 return(NULL);
7848 * [ WFC: Entity Declared ]
7849 * In a document without any DTD, a document with only an
7850 * internal DTD subset which contains no parameter entity
7851 * references, or a document with "standalone='yes'", the
7852 * Name given in the entity reference must match that in an
7853 * entity declaration, except that well-formed documents
7854 * need not declare any of the following entities: amp, lt,
7855 * gt, apos, quot.
7856 * The declaration of a parameter entity must precede any
7857 * reference to it.
7858 * Similarly, the declaration of a general entity must
7859 * precede any reference to it which appears in a default
7860 * value in an attribute-list declaration. Note that if
7861 * entities are declared in the external subset or in
7862 * external parameter entities, a non-validating processor
7863 * is not obligated to read and process their declarations;
7864 * for such documents, the rule that an entity must be
7865 * declared is a well-formedness constraint only if
7866 * standalone='yes'.
7868 if (ent == NULL) {
7869 if ((ctxt->standalone == 1) ||
7870 ((ctxt->hasExternalSubset == 0) &&
7871 (ctxt->hasPErefs == 0))) {
7872 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7873 "Entity '%s' not defined\n", name);
7874 } else {
7875 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7876 "Entity '%s' not defined\n",
7877 name);
7879 xmlParserEntityCheck(ctxt, 0, ent, 0);
7880 /* TODO ? check regressions ctxt->valid = 0; */
7884 * [ WFC: Parsed Entity ]
7885 * An entity reference must not contain the name of an
7886 * unparsed entity
7888 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7889 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7890 "Entity reference to unparsed entity %s\n", name);
7894 * [ WFC: No External Entity References ]
7895 * Attribute values cannot contain direct or indirect
7896 * entity references to external entities.
7898 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7899 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7900 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7901 "Attribute references external entity '%s'\n", name);
7904 * [ WFC: No < in Attribute Values ]
7905 * The replacement text of any entity referred to directly or
7906 * indirectly in an attribute value (other than "&lt;") must
7907 * not contain a <.
7909 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7910 (ent != NULL) && (ent->content != NULL) &&
7911 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7912 (xmlStrchr(ent->content, '<'))) {
7913 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7914 "'<' in entity '%s' is not allowed in attributes values\n",
7915 name);
7919 * Internal check, no parameter entities here ...
7921 else {
7922 switch (ent->etype) {
7923 case XML_INTERNAL_PARAMETER_ENTITY:
7924 case XML_EXTERNAL_PARAMETER_ENTITY:
7925 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7926 "Attempt to reference the parameter entity '%s'\n",
7927 name);
7928 break;
7929 default:
7930 break;
7935 * [ WFC: No Recursion ]
7936 * A parsed entity must not contain a recursive reference
7937 * to itself, either directly or indirectly.
7938 * Done somewhere else
7941 xmlFree(name);
7942 *str = ptr;
7943 return(ent);
7947 * xmlParsePEReference:
7948 * @ctxt: an XML parser context
7950 * parse PEReference declarations
7951 * The entity content is handled directly by pushing it's content as
7952 * a new input stream.
7954 * [69] PEReference ::= '%' Name ';'
7956 * [ WFC: No Recursion ]
7957 * A parsed entity must not contain a recursive
7958 * reference to itself, either directly or indirectly.
7960 * [ WFC: Entity Declared ]
7961 * In a document without any DTD, a document with only an internal DTD
7962 * subset which contains no parameter entity references, or a document
7963 * with "standalone='yes'", ... ... The declaration of a parameter
7964 * entity must precede any reference to it...
7966 * [ VC: Entity Declared ]
7967 * In a document with an external subset or external parameter entities
7968 * with "standalone='no'", ... ... The declaration of a parameter entity
7969 * must precede any reference to it...
7971 * [ WFC: In DTD ]
7972 * Parameter-entity references may only appear in the DTD.
7973 * NOTE: misleading but this is handled.
7975 void
7976 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7978 const xmlChar *name;
7979 xmlEntityPtr entity = NULL;
7980 xmlParserInputPtr input;
7982 if (RAW != '%')
7983 return;
7984 NEXT;
7985 name = xmlParseName(ctxt);
7986 if (name == NULL) {
7987 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7988 "xmlParsePEReference: no name\n");
7989 return;
7991 if (RAW != ';') {
7992 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7993 return;
7996 NEXT;
7999 * Increate the number of entity references parsed
8001 ctxt->nbentities++;
8004 * Request the entity from SAX
8006 if ((ctxt->sax != NULL) &&
8007 (ctxt->sax->getParameterEntity != NULL))
8008 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8009 if (ctxt->instate == XML_PARSER_EOF)
8010 return;
8011 if (entity == NULL) {
8013 * [ WFC: Entity Declared ]
8014 * In a document without any DTD, a document with only an
8015 * internal DTD subset which contains no parameter entity
8016 * references, or a document with "standalone='yes'", ...
8017 * ... The declaration of a parameter entity must precede
8018 * any reference to it...
8020 if ((ctxt->standalone == 1) ||
8021 ((ctxt->hasExternalSubset == 0) &&
8022 (ctxt->hasPErefs == 0))) {
8023 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8024 "PEReference: %%%s; not found\n",
8025 name);
8026 } else {
8028 * [ VC: Entity Declared ]
8029 * In a document with an external subset or external
8030 * parameter entities with "standalone='no'", ...
8031 * ... The declaration of a parameter entity must
8032 * precede any reference to it...
8034 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8035 "PEReference: %%%s; not found\n",
8036 name, NULL);
8037 ctxt->valid = 0;
8039 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8040 } else {
8042 * Internal checking in case the entity quest barfed
8044 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8045 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8046 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8047 "Internal: %%%s; is not a parameter entity\n",
8048 name, NULL);
8049 } else if (ctxt->input->free != deallocblankswrapper) {
8050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8051 if (xmlPushInput(ctxt, input) < 0)
8052 return;
8053 } else {
8055 * TODO !!!
8056 * handle the extra spaces added before and after
8057 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8059 input = xmlNewEntityInputStream(ctxt, entity);
8060 if (xmlPushInput(ctxt, input) < 0)
8061 return;
8062 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8063 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8064 (IS_BLANK_CH(NXT(5)))) {
8065 xmlParseTextDecl(ctxt);
8066 if (ctxt->errNo ==
8067 XML_ERR_UNSUPPORTED_ENCODING) {
8069 * The XML REC instructs us to stop parsing
8070 * right here
8072 ctxt->instate = XML_PARSER_EOF;
8073 return;
8078 ctxt->hasPErefs = 1;
8082 * xmlLoadEntityContent:
8083 * @ctxt: an XML parser context
8084 * @entity: an unloaded system entity
8086 * Load the original content of the given system entity from the
8087 * ExternalID/SystemID given. This is to be used for Included in Literal
8088 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8090 * Returns 0 in case of success and -1 in case of failure
8092 static int
8093 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8094 xmlParserInputPtr input;
8095 xmlBufferPtr buf;
8096 int l, c;
8097 int count = 0;
8099 if ((ctxt == NULL) || (entity == NULL) ||
8100 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8101 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8102 (entity->content != NULL)) {
8103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8104 "xmlLoadEntityContent parameter error");
8105 return(-1);
8108 if (xmlParserDebugEntities)
8109 xmlGenericError(xmlGenericErrorContext,
8110 "Reading %s entity content input\n", entity->name);
8112 buf = xmlBufferCreate();
8113 if (buf == NULL) {
8114 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8115 "xmlLoadEntityContent parameter error");
8116 return(-1);
8119 input = xmlNewEntityInputStream(ctxt, entity);
8120 if (input == NULL) {
8121 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8122 "xmlLoadEntityContent input error");
8123 xmlBufferFree(buf);
8124 return(-1);
8128 * Push the entity as the current input, read char by char
8129 * saving to the buffer until the end of the entity or an error
8131 if (xmlPushInput(ctxt, input) < 0) {
8132 xmlBufferFree(buf);
8133 return(-1);
8136 GROW;
8137 c = CUR_CHAR(l);
8138 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8139 (IS_CHAR(c))) {
8140 xmlBufferAdd(buf, ctxt->input->cur, l);
8141 if (count++ > XML_PARSER_CHUNK_SIZE) {
8142 count = 0;
8143 GROW;
8144 if (ctxt->instate == XML_PARSER_EOF) {
8145 xmlBufferFree(buf);
8146 return(-1);
8149 NEXTL(l);
8150 c = CUR_CHAR(l);
8151 if (c == 0) {
8152 count = 0;
8153 GROW;
8154 if (ctxt->instate == XML_PARSER_EOF) {
8155 xmlBufferFree(buf);
8156 return(-1);
8158 c = CUR_CHAR(l);
8162 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8163 xmlPopInput(ctxt);
8164 } else if (!IS_CHAR(c)) {
8165 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8166 "xmlLoadEntityContent: invalid char value %d\n",
8168 xmlBufferFree(buf);
8169 return(-1);
8171 entity->content = buf->content;
8172 buf->content = NULL;
8173 xmlBufferFree(buf);
8175 return(0);
8179 * xmlParseStringPEReference:
8180 * @ctxt: an XML parser context
8181 * @str: a pointer to an index in the string
8183 * parse PEReference declarations
8185 * [69] PEReference ::= '%' Name ';'
8187 * [ WFC: No Recursion ]
8188 * A parsed entity must not contain a recursive
8189 * reference to itself, either directly or indirectly.
8191 * [ WFC: Entity Declared ]
8192 * In a document without any DTD, a document with only an internal DTD
8193 * subset which contains no parameter entity references, or a document
8194 * with "standalone='yes'", ... ... The declaration of a parameter
8195 * entity must precede any reference to it...
8197 * [ VC: Entity Declared ]
8198 * In a document with an external subset or external parameter entities
8199 * with "standalone='no'", ... ... The declaration of a parameter entity
8200 * must precede any reference to it...
8202 * [ WFC: In DTD ]
8203 * Parameter-entity references may only appear in the DTD.
8204 * NOTE: misleading but this is handled.
8206 * Returns the string of the entity content.
8207 * str is updated to the current value of the index
8209 static xmlEntityPtr
8210 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8211 const xmlChar *ptr;
8212 xmlChar cur;
8213 xmlChar *name;
8214 xmlEntityPtr entity = NULL;
8216 if ((str == NULL) || (*str == NULL)) return(NULL);
8217 ptr = *str;
8218 cur = *ptr;
8219 if (cur != '%')
8220 return(NULL);
8221 ptr++;
8222 name = xmlParseStringName(ctxt, &ptr);
8223 if (name == NULL) {
8224 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8225 "xmlParseStringPEReference: no name\n");
8226 *str = ptr;
8227 return(NULL);
8229 cur = *ptr;
8230 if (cur != ';') {
8231 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8232 xmlFree(name);
8233 *str = ptr;
8234 return(NULL);
8236 ptr++;
8239 * Increate the number of entity references parsed
8241 ctxt->nbentities++;
8244 * Request the entity from SAX
8246 if ((ctxt->sax != NULL) &&
8247 (ctxt->sax->getParameterEntity != NULL))
8248 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8249 if (ctxt->instate == XML_PARSER_EOF) {
8250 xmlFree(name);
8251 return(NULL);
8253 if (entity == NULL) {
8255 * [ WFC: Entity Declared ]
8256 * In a document without any DTD, a document with only an
8257 * internal DTD subset which contains no parameter entity
8258 * references, or a document with "standalone='yes'", ...
8259 * ... The declaration of a parameter entity must precede
8260 * any reference to it...
8262 if ((ctxt->standalone == 1) ||
8263 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8264 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8265 "PEReference: %%%s; not found\n", name);
8266 } else {
8268 * [ VC: Entity Declared ]
8269 * In a document with an external subset or external
8270 * parameter entities with "standalone='no'", ...
8271 * ... The declaration of a parameter entity must
8272 * precede any reference to it...
8274 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8275 "PEReference: %%%s; not found\n",
8276 name, NULL);
8277 ctxt->valid = 0;
8279 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8280 } else {
8282 * Internal checking in case the entity quest barfed
8284 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8285 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8286 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8287 "%%%s; is not a parameter entity\n",
8288 name, NULL);
8291 ctxt->hasPErefs = 1;
8292 xmlFree(name);
8293 *str = ptr;
8294 return(entity);
8298 * xmlParseDocTypeDecl:
8299 * @ctxt: an XML parser context
8301 * parse a DOCTYPE declaration
8303 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8304 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8306 * [ VC: Root Element Type ]
8307 * The Name in the document type declaration must match the element
8308 * type of the root element.
8311 void
8312 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8313 const xmlChar *name = NULL;
8314 xmlChar *ExternalID = NULL;
8315 xmlChar *URI = NULL;
8318 * We know that '<!DOCTYPE' has been detected.
8320 SKIP(9);
8322 SKIP_BLANKS;
8325 * Parse the DOCTYPE name.
8327 name = xmlParseName(ctxt);
8328 if (name == NULL) {
8329 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8330 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8332 ctxt->intSubName = name;
8334 SKIP_BLANKS;
8337 * Check for SystemID and ExternalID
8339 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8341 if ((URI != NULL) || (ExternalID != NULL)) {
8342 ctxt->hasExternalSubset = 1;
8344 ctxt->extSubURI = URI;
8345 ctxt->extSubSystem = ExternalID;
8347 SKIP_BLANKS;
8350 * Create and update the internal subset.
8352 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8353 (!ctxt->disableSAX))
8354 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8355 if (ctxt->instate == XML_PARSER_EOF)
8356 return;
8359 * Is there any internal subset declarations ?
8360 * they are handled separately in xmlParseInternalSubset()
8362 if (RAW == '[')
8363 return;
8366 * We should be at the end of the DOCTYPE declaration.
8368 if (RAW != '>') {
8369 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8371 NEXT;
8375 * xmlParseInternalSubset:
8376 * @ctxt: an XML parser context
8378 * parse the internal subset declaration
8380 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8383 static void
8384 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8386 * Is there any DTD definition ?
8388 if (RAW == '[') {
8389 ctxt->instate = XML_PARSER_DTD;
8390 NEXT;
8392 * Parse the succession of Markup declarations and
8393 * PEReferences.
8394 * Subsequence (markupdecl | PEReference | S)*
8396 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8397 const xmlChar *check = CUR_PTR;
8398 unsigned int cons = ctxt->input->consumed;
8400 SKIP_BLANKS;
8401 xmlParseMarkupDecl(ctxt);
8402 xmlParsePEReference(ctxt);
8405 * Pop-up of finished entities.
8407 while ((RAW == 0) && (ctxt->inputNr > 1))
8408 xmlPopInput(ctxt);
8410 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8411 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8412 "xmlParseInternalSubset: error detected in Markup declaration\n");
8413 break;
8416 if (RAW == ']') {
8417 NEXT;
8418 SKIP_BLANKS;
8423 * We should be at the end of the DOCTYPE declaration.
8425 if (RAW != '>') {
8426 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8428 NEXT;
8431 #ifdef LIBXML_SAX1_ENABLED
8433 * xmlParseAttribute:
8434 * @ctxt: an XML parser context
8435 * @value: a xmlChar ** used to store the value of the attribute
8437 * parse an attribute
8439 * [41] Attribute ::= Name Eq AttValue
8441 * [ WFC: No External Entity References ]
8442 * Attribute values cannot contain direct or indirect entity references
8443 * to external entities.
8445 * [ WFC: No < in Attribute Values ]
8446 * The replacement text of any entity referred to directly or indirectly in
8447 * an attribute value (other than "&lt;") must not contain a <.
8449 * [ VC: Attribute Value Type ]
8450 * The attribute must have been declared; the value must be of the type
8451 * declared for it.
8453 * [25] Eq ::= S? '=' S?
8455 * With namespace:
8457 * [NS 11] Attribute ::= QName Eq AttValue
8459 * Also the case QName == xmlns:??? is handled independently as a namespace
8460 * definition.
8462 * Returns the attribute name, and the value in *value.
8465 const xmlChar *
8466 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8467 const xmlChar *name;
8468 xmlChar *val;
8470 *value = NULL;
8471 GROW;
8472 name = xmlParseName(ctxt);
8473 if (name == NULL) {
8474 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8475 "error parsing attribute name\n");
8476 return(NULL);
8480 * read the value
8482 SKIP_BLANKS;
8483 if (RAW == '=') {
8484 NEXT;
8485 SKIP_BLANKS;
8486 val = xmlParseAttValue(ctxt);
8487 ctxt->instate = XML_PARSER_CONTENT;
8488 } else {
8489 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8490 "Specification mandate value for attribute %s\n", name);
8491 return(NULL);
8495 * Check that xml:lang conforms to the specification
8496 * No more registered as an error, just generate a warning now
8497 * since this was deprecated in XML second edition
8499 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8500 if (!xmlCheckLanguageID(val)) {
8501 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8502 "Malformed value for xml:lang : %s\n",
8503 val, NULL);
8508 * Check that xml:space conforms to the specification
8510 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8511 if (xmlStrEqual(val, BAD_CAST "default"))
8512 *(ctxt->space) = 0;
8513 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8514 *(ctxt->space) = 1;
8515 else {
8516 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8517 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8518 val, NULL);
8522 *value = val;
8523 return(name);
8527 * xmlParseStartTag:
8528 * @ctxt: an XML parser context
8530 * parse a start of tag either for rule element or
8531 * EmptyElement. In both case we don't parse the tag closing chars.
8533 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8535 * [ WFC: Unique Att Spec ]
8536 * No attribute name may appear more than once in the same start-tag or
8537 * empty-element tag.
8539 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8541 * [ WFC: Unique Att Spec ]
8542 * No attribute name may appear more than once in the same start-tag or
8543 * empty-element tag.
8545 * With namespace:
8547 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8549 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8551 * Returns the element name parsed
8554 const xmlChar *
8555 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8556 const xmlChar *name;
8557 const xmlChar *attname;
8558 xmlChar *attvalue;
8559 const xmlChar **atts = ctxt->atts;
8560 int nbatts = 0;
8561 int maxatts = ctxt->maxatts;
8562 int i;
8564 if (RAW != '<') return(NULL);
8565 NEXT1;
8567 name = xmlParseName(ctxt);
8568 if (name == NULL) {
8569 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8570 "xmlParseStartTag: invalid element name\n");
8571 return(NULL);
8575 * Now parse the attributes, it ends up with the ending
8577 * (S Attribute)* S?
8579 SKIP_BLANKS;
8580 GROW;
8582 while (((RAW != '>') &&
8583 ((RAW != '/') || (NXT(1) != '>')) &&
8584 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8585 const xmlChar *q = CUR_PTR;
8586 unsigned int cons = ctxt->input->consumed;
8588 attname = xmlParseAttribute(ctxt, &attvalue);
8589 if ((attname != NULL) && (attvalue != NULL)) {
8591 * [ WFC: Unique Att Spec ]
8592 * No attribute name may appear more than once in the same
8593 * start-tag or empty-element tag.
8595 for (i = 0; i < nbatts;i += 2) {
8596 if (xmlStrEqual(atts[i], attname)) {
8597 xmlErrAttributeDup(ctxt, NULL, attname);
8598 xmlFree(attvalue);
8599 goto failed;
8603 * Add the pair to atts
8605 if (atts == NULL) {
8606 maxatts = 22; /* allow for 10 attrs by default */
8607 atts = (const xmlChar **)
8608 xmlMalloc(maxatts * sizeof(xmlChar *));
8609 if (atts == NULL) {
8610 xmlErrMemory(ctxt, NULL);
8611 if (attvalue != NULL)
8612 xmlFree(attvalue);
8613 goto failed;
8615 ctxt->atts = atts;
8616 ctxt->maxatts = maxatts;
8617 } else if (nbatts + 4 > maxatts) {
8618 const xmlChar **n;
8620 maxatts *= 2;
8621 n = (const xmlChar **) xmlRealloc((void *) atts,
8622 maxatts * sizeof(const xmlChar *));
8623 if (n == NULL) {
8624 xmlErrMemory(ctxt, NULL);
8625 if (attvalue != NULL)
8626 xmlFree(attvalue);
8627 goto failed;
8629 atts = n;
8630 ctxt->atts = atts;
8631 ctxt->maxatts = maxatts;
8633 atts[nbatts++] = attname;
8634 atts[nbatts++] = attvalue;
8635 atts[nbatts] = NULL;
8636 atts[nbatts + 1] = NULL;
8637 } else {
8638 if (attvalue != NULL)
8639 xmlFree(attvalue);
8642 failed:
8644 GROW
8645 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8646 break;
8647 if (!IS_BLANK_CH(RAW)) {
8648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8649 "attributes construct error\n");
8651 SKIP_BLANKS;
8652 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8653 (attname == NULL) && (attvalue == NULL)) {
8654 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8655 "xmlParseStartTag: problem parsing attributes\n");
8656 break;
8658 SHRINK;
8659 GROW;
8663 * SAX: Start of Element !
8665 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8666 (!ctxt->disableSAX)) {
8667 if (nbatts > 0)
8668 ctxt->sax->startElement(ctxt->userData, name, atts);
8669 else
8670 ctxt->sax->startElement(ctxt->userData, name, NULL);
8673 if (atts != NULL) {
8674 /* Free only the content strings */
8675 for (i = 1;i < nbatts;i+=2)
8676 if (atts[i] != NULL)
8677 xmlFree((xmlChar *) atts[i]);
8679 return(name);
8683 * xmlParseEndTag1:
8684 * @ctxt: an XML parser context
8685 * @line: line of the start tag
8686 * @nsNr: number of namespaces on the start tag
8688 * parse an end of tag
8690 * [42] ETag ::= '</' Name S? '>'
8692 * With namespace
8694 * [NS 9] ETag ::= '</' QName S? '>'
8697 static void
8698 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8699 const xmlChar *name;
8701 GROW;
8702 if ((RAW != '<') || (NXT(1) != '/')) {
8703 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8704 "xmlParseEndTag: '</' not found\n");
8705 return;
8707 SKIP(2);
8709 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8712 * We should definitely be at the ending "S? '>'" part
8714 GROW;
8715 SKIP_BLANKS;
8716 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8717 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8718 } else
8719 NEXT1;
8722 * [ WFC: Element Type Match ]
8723 * The Name in an element's end-tag must match the element type in the
8724 * start-tag.
8727 if (name != (xmlChar*)1) {
8728 if (name == NULL) name = BAD_CAST "unparseable";
8729 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8730 "Opening and ending tag mismatch: %s line %d and %s\n",
8731 ctxt->name, line, name);
8735 * SAX: End of Tag
8737 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8738 (!ctxt->disableSAX))
8739 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8741 namePop(ctxt);
8742 spacePop(ctxt);
8743 return;
8747 * xmlParseEndTag:
8748 * @ctxt: an XML parser context
8750 * parse an end of tag
8752 * [42] ETag ::= '</' Name S? '>'
8754 * With namespace
8756 * [NS 9] ETag ::= '</' QName S? '>'
8759 void
8760 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8761 xmlParseEndTag1(ctxt, 0);
8763 #endif /* LIBXML_SAX1_ENABLED */
8765 /************************************************************************
8767 * SAX 2 specific operations *
8769 ************************************************************************/
8772 * xmlGetNamespace:
8773 * @ctxt: an XML parser context
8774 * @prefix: the prefix to lookup
8776 * Lookup the namespace name for the @prefix (which ca be NULL)
8777 * The prefix must come from the @ctxt->dict dictionnary
8779 * Returns the namespace name or NULL if not bound
8781 static const xmlChar *
8782 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8783 int i;
8785 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8786 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8787 if (ctxt->nsTab[i] == prefix) {
8788 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8789 return(NULL);
8790 return(ctxt->nsTab[i + 1]);
8792 return(NULL);
8796 * xmlParseQName:
8797 * @ctxt: an XML parser context
8798 * @prefix: pointer to store the prefix part
8800 * parse an XML Namespace QName
8802 * [6] QName ::= (Prefix ':')? LocalPart
8803 * [7] Prefix ::= NCName
8804 * [8] LocalPart ::= NCName
8806 * Returns the Name parsed or NULL
8809 static const xmlChar *
8810 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8811 const xmlChar *l, *p;
8813 GROW;
8815 l = xmlParseNCName(ctxt);
8816 if (l == NULL) {
8817 if (CUR == ':') {
8818 l = xmlParseName(ctxt);
8819 if (l != NULL) {
8820 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8821 "Failed to parse QName '%s'\n", l, NULL, NULL);
8822 *prefix = NULL;
8823 return(l);
8826 return(NULL);
8828 if (CUR == ':') {
8829 NEXT;
8830 p = l;
8831 l = xmlParseNCName(ctxt);
8832 if (l == NULL) {
8833 xmlChar *tmp;
8835 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8836 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8837 l = xmlParseNmtoken(ctxt);
8838 if (l == NULL)
8839 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8840 else {
8841 tmp = xmlBuildQName(l, p, NULL, 0);
8842 xmlFree((char *)l);
8844 p = xmlDictLookup(ctxt->dict, tmp, -1);
8845 if (tmp != NULL) xmlFree(tmp);
8846 *prefix = NULL;
8847 return(p);
8849 if (CUR == ':') {
8850 xmlChar *tmp;
8852 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8853 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8854 NEXT;
8855 tmp = (xmlChar *) xmlParseName(ctxt);
8856 if (tmp != NULL) {
8857 tmp = xmlBuildQName(tmp, l, NULL, 0);
8858 l = xmlDictLookup(ctxt->dict, tmp, -1);
8859 if (tmp != NULL) xmlFree(tmp);
8860 *prefix = p;
8861 return(l);
8863 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8864 l = xmlDictLookup(ctxt->dict, tmp, -1);
8865 if (tmp != NULL) xmlFree(tmp);
8866 *prefix = p;
8867 return(l);
8869 *prefix = p;
8870 } else
8871 *prefix = NULL;
8872 return(l);
8876 * xmlParseQNameAndCompare:
8877 * @ctxt: an XML parser context
8878 * @name: the localname
8879 * @prefix: the prefix, if any.
8881 * parse an XML name and compares for match
8882 * (specialized for endtag parsing)
8884 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8885 * and the name for mismatch
8888 static const xmlChar *
8889 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8890 xmlChar const *prefix) {
8891 const xmlChar *cmp;
8892 const xmlChar *in;
8893 const xmlChar *ret;
8894 const xmlChar *prefix2;
8896 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8898 GROW;
8899 in = ctxt->input->cur;
8901 cmp = prefix;
8902 while (*in != 0 && *in == *cmp) {
8903 ++in;
8904 ++cmp;
8906 if ((*cmp == 0) && (*in == ':')) {
8907 in++;
8908 cmp = name;
8909 while (*in != 0 && *in == *cmp) {
8910 ++in;
8911 ++cmp;
8913 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8914 /* success */
8915 ctxt->input->cur = in;
8916 return((const xmlChar*) 1);
8920 * all strings coms from the dictionary, equality can be done directly
8922 ret = xmlParseQName (ctxt, &prefix2);
8923 if ((ret == name) && (prefix == prefix2))
8924 return((const xmlChar*) 1);
8925 return ret;
8929 * xmlParseAttValueInternal:
8930 * @ctxt: an XML parser context
8931 * @len: attribute len result
8932 * @alloc: whether the attribute was reallocated as a new string
8933 * @normalize: if 1 then further non-CDATA normalization must be done
8935 * parse a value for an attribute.
8936 * NOTE: if no normalization is needed, the routine will return pointers
8937 * directly from the data buffer.
8939 * 3.3.3 Attribute-Value Normalization:
8940 * Before the value of an attribute is passed to the application or
8941 * checked for validity, the XML processor must normalize it as follows:
8942 * - a character reference is processed by appending the referenced
8943 * character to the attribute value
8944 * - an entity reference is processed by recursively processing the
8945 * replacement text of the entity
8946 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8947 * appending #x20 to the normalized value, except that only a single
8948 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8949 * parsed entity or the literal entity value of an internal parsed entity
8950 * - other characters are processed by appending them to the normalized value
8951 * If the declared value is not CDATA, then the XML processor must further
8952 * process the normalized attribute value by discarding any leading and
8953 * trailing space (#x20) characters, and by replacing sequences of space
8954 * (#x20) characters by a single space (#x20) character.
8955 * All attributes for which no declaration has been read should be treated
8956 * by a non-validating parser as if declared CDATA.
8958 * Returns the AttValue parsed or NULL. The value has to be freed by the
8959 * caller if it was copied, this can be detected by val[*len] == 0.
8962 static xmlChar *
8963 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8964 int normalize)
8966 xmlChar limit = 0;
8967 const xmlChar *in = NULL, *start, *end, *last;
8968 xmlChar *ret = NULL;
8969 int line, col;
8971 GROW;
8972 in = (xmlChar *) CUR_PTR;
8973 line = ctxt->input->line;
8974 col = ctxt->input->col;
8975 if (*in != '"' && *in != '\'') {
8976 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8977 return (NULL);
8979 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8982 * try to handle in this routine the most common case where no
8983 * allocation of a new string is required and where content is
8984 * pure ASCII.
8986 limit = *in++;
8987 col++;
8988 end = ctxt->input->end;
8989 start = in;
8990 if (in >= end) {
8991 const xmlChar *oldbase = ctxt->input->base;
8992 GROW;
8993 if (oldbase != ctxt->input->base) {
8994 long delta = ctxt->input->base - oldbase;
8995 start = start + delta;
8996 in = in + delta;
8998 end = ctxt->input->end;
9000 if (normalize) {
9002 * Skip any leading spaces
9004 while ((in < end) && (*in != limit) &&
9005 ((*in == 0x20) || (*in == 0x9) ||
9006 (*in == 0xA) || (*in == 0xD))) {
9007 if (*in == 0xA) {
9008 line++; col = 1;
9009 } else {
9010 col++;
9012 in++;
9013 start = in;
9014 if (in >= end) {
9015 const xmlChar *oldbase = ctxt->input->base;
9016 GROW;
9017 if (ctxt->instate == XML_PARSER_EOF)
9018 return(NULL);
9019 if (oldbase != ctxt->input->base) {
9020 long delta = ctxt->input->base - oldbase;
9021 start = start + delta;
9022 in = in + delta;
9024 end = ctxt->input->end;
9025 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9026 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9027 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9028 "AttValue length too long\n");
9029 return(NULL);
9033 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9034 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9035 col++;
9036 if ((*in++ == 0x20) && (*in == 0x20)) break;
9037 if (in >= end) {
9038 const xmlChar *oldbase = ctxt->input->base;
9039 GROW;
9040 if (ctxt->instate == XML_PARSER_EOF)
9041 return(NULL);
9042 if (oldbase != ctxt->input->base) {
9043 long delta = ctxt->input->base - oldbase;
9044 start = start + delta;
9045 in = in + delta;
9047 end = ctxt->input->end;
9048 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9049 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9050 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9051 "AttValue length too long\n");
9052 return(NULL);
9056 last = in;
9058 * skip the trailing blanks
9060 while ((last[-1] == 0x20) && (last > start)) last--;
9061 while ((in < end) && (*in != limit) &&
9062 ((*in == 0x20) || (*in == 0x9) ||
9063 (*in == 0xA) || (*in == 0xD))) {
9064 if (*in == 0xA) {
9065 line++, col = 1;
9066 } else {
9067 col++;
9069 in++;
9070 if (in >= end) {
9071 const xmlChar *oldbase = ctxt->input->base;
9072 GROW;
9073 if (ctxt->instate == XML_PARSER_EOF)
9074 return(NULL);
9075 if (oldbase != ctxt->input->base) {
9076 long delta = ctxt->input->base - oldbase;
9077 start = start + delta;
9078 in = in + delta;
9079 last = last + delta;
9081 end = ctxt->input->end;
9082 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9083 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9084 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9085 "AttValue length too long\n");
9086 return(NULL);
9090 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9091 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9092 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9093 "AttValue length too long\n");
9094 return(NULL);
9096 if (*in != limit) goto need_complex;
9097 } else {
9098 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9099 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9100 in++;
9101 col++;
9102 if (in >= end) {
9103 const xmlChar *oldbase = ctxt->input->base;
9104 GROW;
9105 if (ctxt->instate == XML_PARSER_EOF)
9106 return(NULL);
9107 if (oldbase != ctxt->input->base) {
9108 long delta = ctxt->input->base - oldbase;
9109 start = start + delta;
9110 in = in + delta;
9112 end = ctxt->input->end;
9113 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9114 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9116 "AttValue length too long\n");
9117 return(NULL);
9121 last = in;
9122 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9123 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9124 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9125 "AttValue length too long\n");
9126 return(NULL);
9128 if (*in != limit) goto need_complex;
9130 in++;
9131 col++;
9132 if (len != NULL) {
9133 *len = last - start;
9134 ret = (xmlChar *) start;
9135 } else {
9136 if (alloc) *alloc = 1;
9137 ret = xmlStrndup(start, last - start);
9139 CUR_PTR = in;
9140 ctxt->input->line = line;
9141 ctxt->input->col = col;
9142 if (alloc) *alloc = 0;
9143 return ret;
9144 need_complex:
9145 if (alloc) *alloc = 1;
9146 return xmlParseAttValueComplex(ctxt, len, normalize);
9150 * xmlParseAttribute2:
9151 * @ctxt: an XML parser context
9152 * @pref: the element prefix
9153 * @elem: the element name
9154 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9155 * @value: a xmlChar ** used to store the value of the attribute
9156 * @len: an int * to save the length of the attribute
9157 * @alloc: an int * to indicate if the attribute was allocated
9159 * parse an attribute in the new SAX2 framework.
9161 * Returns the attribute name, and the value in *value, .
9164 static const xmlChar *
9165 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9166 const xmlChar * pref, const xmlChar * elem,
9167 const xmlChar ** prefix, xmlChar ** value,
9168 int *len, int *alloc)
9170 const xmlChar *name;
9171 xmlChar *val, *internal_val = NULL;
9172 int normalize = 0;
9174 *value = NULL;
9175 GROW;
9176 name = xmlParseQName(ctxt, prefix);
9177 if (name == NULL) {
9178 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9179 "error parsing attribute name\n");
9180 return (NULL);
9184 * get the type if needed
9186 if (ctxt->attsSpecial != NULL) {
9187 int type;
9189 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9190 pref, elem, *prefix, name);
9191 if (type != 0)
9192 normalize = 1;
9196 * read the value
9198 SKIP_BLANKS;
9199 if (RAW == '=') {
9200 NEXT;
9201 SKIP_BLANKS;
9202 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9203 if (normalize) {
9205 * Sometimes a second normalisation pass for spaces is needed
9206 * but that only happens if charrefs or entities refernces
9207 * have been used in the attribute value, i.e. the attribute
9208 * value have been extracted in an allocated string already.
9210 if (*alloc) {
9211 const xmlChar *val2;
9213 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9214 if ((val2 != NULL) && (val2 != val)) {
9215 xmlFree(val);
9216 val = (xmlChar *) val2;
9220 ctxt->instate = XML_PARSER_CONTENT;
9221 } else {
9222 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9223 "Specification mandate value for attribute %s\n",
9224 name);
9225 return (NULL);
9228 if (*prefix == ctxt->str_xml) {
9230 * Check that xml:lang conforms to the specification
9231 * No more registered as an error, just generate a warning now
9232 * since this was deprecated in XML second edition
9234 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9235 internal_val = xmlStrndup(val, *len);
9236 if (!xmlCheckLanguageID(internal_val)) {
9237 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9238 "Malformed value for xml:lang : %s\n",
9239 internal_val, NULL);
9244 * Check that xml:space conforms to the specification
9246 if (xmlStrEqual(name, BAD_CAST "space")) {
9247 internal_val = xmlStrndup(val, *len);
9248 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9249 *(ctxt->space) = 0;
9250 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9251 *(ctxt->space) = 1;
9252 else {
9253 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9254 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9255 internal_val, NULL);
9258 if (internal_val) {
9259 xmlFree(internal_val);
9263 *value = val;
9264 return (name);
9267 * xmlParseStartTag2:
9268 * @ctxt: an XML parser context
9270 * parse a start of tag either for rule element or
9271 * EmptyElement. In both case we don't parse the tag closing chars.
9272 * This routine is called when running SAX2 parsing
9274 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9276 * [ WFC: Unique Att Spec ]
9277 * No attribute name may appear more than once in the same start-tag or
9278 * empty-element tag.
9280 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9282 * [ WFC: Unique Att Spec ]
9283 * No attribute name may appear more than once in the same start-tag or
9284 * empty-element tag.
9286 * With namespace:
9288 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9290 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9292 * Returns the element name parsed
9295 static const xmlChar *
9296 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9297 const xmlChar **URI, int *tlen) {
9298 const xmlChar *localname;
9299 const xmlChar *prefix;
9300 const xmlChar *attname;
9301 const xmlChar *aprefix;
9302 const xmlChar *nsname;
9303 xmlChar *attvalue;
9304 const xmlChar **atts = ctxt->atts;
9305 int maxatts = ctxt->maxatts;
9306 int nratts, nbatts, nbdef;
9307 int i, j, nbNs, attval, oldline, oldcol;
9308 const xmlChar *base;
9309 unsigned long cur;
9310 int nsNr = ctxt->nsNr;
9312 if (RAW != '<') return(NULL);
9313 NEXT1;
9316 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9317 * point since the attribute values may be stored as pointers to
9318 * the buffer and calling SHRINK would destroy them !
9319 * The Shrinking is only possible once the full set of attribute
9320 * callbacks have been done.
9322 reparse:
9323 SHRINK;
9324 base = ctxt->input->base;
9325 cur = ctxt->input->cur - ctxt->input->base;
9326 oldline = ctxt->input->line;
9327 oldcol = ctxt->input->col;
9328 nbatts = 0;
9329 nratts = 0;
9330 nbdef = 0;
9331 nbNs = 0;
9332 attval = 0;
9333 /* Forget any namespaces added during an earlier parse of this element. */
9334 ctxt->nsNr = nsNr;
9336 localname = xmlParseQName(ctxt, &prefix);
9337 if (localname == NULL) {
9338 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9339 "StartTag: invalid element name\n");
9340 return(NULL);
9342 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9345 * Now parse the attributes, it ends up with the ending
9347 * (S Attribute)* S?
9349 SKIP_BLANKS;
9350 GROW;
9351 if (ctxt->input->base != base) goto base_changed;
9353 while (((RAW != '>') &&
9354 ((RAW != '/') || (NXT(1) != '>')) &&
9355 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9356 const xmlChar *q = CUR_PTR;
9357 unsigned int cons = ctxt->input->consumed;
9358 int len = -1, alloc = 0;
9360 attname = xmlParseAttribute2(ctxt, prefix, localname,
9361 &aprefix, &attvalue, &len, &alloc);
9362 if (ctxt->input->base != base) {
9363 if ((attvalue != NULL) && (alloc != 0))
9364 xmlFree(attvalue);
9365 attvalue = NULL;
9366 goto base_changed;
9368 if ((attname != NULL) && (attvalue != NULL)) {
9369 if (len < 0) len = xmlStrlen(attvalue);
9370 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9371 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9372 xmlURIPtr uri;
9374 if (URL == NULL) {
9375 xmlErrMemory(ctxt, "dictionary allocation failure");
9376 if ((attvalue != NULL) && (alloc != 0))
9377 xmlFree(attvalue);
9378 return(NULL);
9380 if (*URL != 0) {
9381 uri = xmlParseURI((const char *) URL);
9382 if (uri == NULL) {
9383 xmlNsErr(ctxt, XML_WAR_NS_URI,
9384 "xmlns: '%s' is not a valid URI\n",
9385 URL, NULL, NULL);
9386 } else {
9387 if (uri->scheme == NULL) {
9388 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9389 "xmlns: URI %s is not absolute\n",
9390 URL, NULL, NULL);
9392 xmlFreeURI(uri);
9394 if (URL == ctxt->str_xml_ns) {
9395 if (attname != ctxt->str_xml) {
9396 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9397 "xml namespace URI cannot be the default namespace\n",
9398 NULL, NULL, NULL);
9400 goto skip_default_ns;
9402 if ((len == 29) &&
9403 (xmlStrEqual(URL,
9404 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406 "reuse of the xmlns namespace name is forbidden\n",
9407 NULL, NULL, NULL);
9408 goto skip_default_ns;
9412 * check that it's not a defined namespace
9414 for (j = 1;j <= nbNs;j++)
9415 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9416 break;
9417 if (j <= nbNs)
9418 xmlErrAttributeDup(ctxt, NULL, attname);
9419 else
9420 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9421 skip_default_ns:
9422 if (alloc != 0) xmlFree(attvalue);
9423 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9424 break;
9425 if (!IS_BLANK_CH(RAW)) {
9426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9427 "attributes construct error\n");
9428 break;
9430 SKIP_BLANKS;
9431 continue;
9433 if (aprefix == ctxt->str_xmlns) {
9434 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9435 xmlURIPtr uri;
9437 if (attname == ctxt->str_xml) {
9438 if (URL != ctxt->str_xml_ns) {
9439 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9440 "xml namespace prefix mapped to wrong URI\n",
9441 NULL, NULL, NULL);
9444 * Do not keep a namespace definition node
9446 goto skip_ns;
9448 if (URL == ctxt->str_xml_ns) {
9449 if (attname != ctxt->str_xml) {
9450 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9451 "xml namespace URI mapped to wrong prefix\n",
9452 NULL, NULL, NULL);
9454 goto skip_ns;
9456 if (attname == ctxt->str_xmlns) {
9457 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9458 "redefinition of the xmlns prefix is forbidden\n",
9459 NULL, NULL, NULL);
9460 goto skip_ns;
9462 if ((len == 29) &&
9463 (xmlStrEqual(URL,
9464 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9465 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9466 "reuse of the xmlns namespace name is forbidden\n",
9467 NULL, NULL, NULL);
9468 goto skip_ns;
9470 if ((URL == NULL) || (URL[0] == 0)) {
9471 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9472 "xmlns:%s: Empty XML namespace is not allowed\n",
9473 attname, NULL, NULL);
9474 goto skip_ns;
9475 } else {
9476 uri = xmlParseURI((const char *) URL);
9477 if (uri == NULL) {
9478 xmlNsErr(ctxt, XML_WAR_NS_URI,
9479 "xmlns:%s: '%s' is not a valid URI\n",
9480 attname, URL, NULL);
9481 } else {
9482 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9483 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9484 "xmlns:%s: URI %s is not absolute\n",
9485 attname, URL, NULL);
9487 xmlFreeURI(uri);
9492 * check that it's not a defined namespace
9494 for (j = 1;j <= nbNs;j++)
9495 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9496 break;
9497 if (j <= nbNs)
9498 xmlErrAttributeDup(ctxt, aprefix, attname);
9499 else
9500 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9501 skip_ns:
9502 if (alloc != 0) xmlFree(attvalue);
9503 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9504 break;
9505 if (!IS_BLANK_CH(RAW)) {
9506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9507 "attributes construct error\n");
9508 break;
9510 SKIP_BLANKS;
9511 if (ctxt->input->base != base) goto base_changed;
9512 continue;
9516 * Add the pair to atts
9518 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9519 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9520 if (attvalue[len] == 0)
9521 xmlFree(attvalue);
9522 goto failed;
9524 maxatts = ctxt->maxatts;
9525 atts = ctxt->atts;
9527 ctxt->attallocs[nratts++] = alloc;
9528 atts[nbatts++] = attname;
9529 atts[nbatts++] = aprefix;
9530 atts[nbatts++] = NULL; /* the URI will be fetched later */
9531 atts[nbatts++] = attvalue;
9532 attvalue += len;
9533 atts[nbatts++] = attvalue;
9535 * tag if some deallocation is needed
9537 if (alloc != 0) attval = 1;
9538 } else {
9539 if ((attvalue != NULL) && (attvalue[len] == 0))
9540 xmlFree(attvalue);
9543 failed:
9545 GROW
9546 if (ctxt->instate == XML_PARSER_EOF)
9547 break;
9548 if (ctxt->input->base != base) goto base_changed;
9549 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9550 break;
9551 if (!IS_BLANK_CH(RAW)) {
9552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9553 "attributes construct error\n");
9554 break;
9556 SKIP_BLANKS;
9557 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9558 (attname == NULL) && (attvalue == NULL)) {
9559 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9560 "xmlParseStartTag: problem parsing attributes\n");
9561 break;
9563 GROW;
9564 if (ctxt->input->base != base) goto base_changed;
9568 * The attributes defaulting
9570 if (ctxt->attsDefault != NULL) {
9571 xmlDefAttrsPtr defaults;
9573 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9574 if (defaults != NULL) {
9575 for (i = 0;i < defaults->nbAttrs;i++) {
9576 attname = defaults->values[5 * i];
9577 aprefix = defaults->values[5 * i + 1];
9580 * special work for namespaces defaulted defs
9582 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9584 * check that it's not a defined namespace
9586 for (j = 1;j <= nbNs;j++)
9587 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9588 break;
9589 if (j <= nbNs) continue;
9591 nsname = xmlGetNamespace(ctxt, NULL);
9592 if (nsname != defaults->values[5 * i + 2]) {
9593 if (nsPush(ctxt, NULL,
9594 defaults->values[5 * i + 2]) > 0)
9595 nbNs++;
9597 } else if (aprefix == ctxt->str_xmlns) {
9599 * check that it's not a defined namespace
9601 for (j = 1;j <= nbNs;j++)
9602 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9603 break;
9604 if (j <= nbNs) continue;
9606 nsname = xmlGetNamespace(ctxt, attname);
9607 if (nsname != defaults->values[2]) {
9608 if (nsPush(ctxt, attname,
9609 defaults->values[5 * i + 2]) > 0)
9610 nbNs++;
9612 } else {
9614 * check that it's not a defined attribute
9616 for (j = 0;j < nbatts;j+=5) {
9617 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9618 break;
9620 if (j < nbatts) continue;
9622 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9623 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9624 return(NULL);
9626 maxatts = ctxt->maxatts;
9627 atts = ctxt->atts;
9629 atts[nbatts++] = attname;
9630 atts[nbatts++] = aprefix;
9631 if (aprefix == NULL)
9632 atts[nbatts++] = NULL;
9633 else
9634 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9635 atts[nbatts++] = defaults->values[5 * i + 2];
9636 atts[nbatts++] = defaults->values[5 * i + 3];
9637 if ((ctxt->standalone == 1) &&
9638 (defaults->values[5 * i + 4] != NULL)) {
9639 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9640 "standalone: attribute %s on %s defaulted from external subset\n",
9641 attname, localname);
9643 nbdef++;
9650 * The attributes checkings
9652 for (i = 0; i < nbatts;i += 5) {
9654 * The default namespace does not apply to attribute names.
9656 if (atts[i + 1] != NULL) {
9657 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9658 if (nsname == NULL) {
9659 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9660 "Namespace prefix %s for %s on %s is not defined\n",
9661 atts[i + 1], atts[i], localname);
9663 atts[i + 2] = nsname;
9664 } else
9665 nsname = NULL;
9667 * [ WFC: Unique Att Spec ]
9668 * No attribute name may appear more than once in the same
9669 * start-tag or empty-element tag.
9670 * As extended by the Namespace in XML REC.
9672 for (j = 0; j < i;j += 5) {
9673 if (atts[i] == atts[j]) {
9674 if (atts[i+1] == atts[j+1]) {
9675 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9676 break;
9678 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9679 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9680 "Namespaced Attribute %s in '%s' redefined\n",
9681 atts[i], nsname, NULL);
9682 break;
9688 nsname = xmlGetNamespace(ctxt, prefix);
9689 if ((prefix != NULL) && (nsname == NULL)) {
9690 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9691 "Namespace prefix %s on %s is not defined\n",
9692 prefix, localname, NULL);
9694 *pref = prefix;
9695 *URI = nsname;
9698 * SAX: Start of Element !
9700 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9701 (!ctxt->disableSAX)) {
9702 if (nbNs > 0)
9703 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9704 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9705 nbatts / 5, nbdef, atts);
9706 else
9707 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9708 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9712 * Free up attribute allocated strings if needed
9714 if (attval != 0) {
9715 for (i = 3,j = 0; j < nratts;i += 5,j++)
9716 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9717 xmlFree((xmlChar *) atts[i]);
9720 return(localname);
9722 base_changed:
9724 * the attribute strings are valid iif the base didn't changed
9726 if (attval != 0) {
9727 for (i = 3,j = 0; j < nratts;i += 5,j++)
9728 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9729 xmlFree((xmlChar *) atts[i]);
9731 ctxt->input->cur = ctxt->input->base + cur;
9732 ctxt->input->line = oldline;
9733 ctxt->input->col = oldcol;
9734 if (ctxt->wellFormed == 1) {
9735 goto reparse;
9737 return(NULL);
9741 * xmlParseEndTag2:
9742 * @ctxt: an XML parser context
9743 * @line: line of the start tag
9744 * @nsNr: number of namespaces on the start tag
9746 * parse an end of tag
9748 * [42] ETag ::= '</' Name S? '>'
9750 * With namespace
9752 * [NS 9] ETag ::= '</' QName S? '>'
9755 static void
9756 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9757 const xmlChar *URI, int line, int nsNr, int tlen) {
9758 const xmlChar *name;
9760 GROW;
9761 if ((RAW != '<') || (NXT(1) != '/')) {
9762 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9763 return;
9765 SKIP(2);
9767 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9768 if (ctxt->input->cur[tlen] == '>') {
9769 ctxt->input->cur += tlen + 1;
9770 ctxt->input->col += tlen + 1;
9771 goto done;
9773 ctxt->input->cur += tlen;
9774 ctxt->input->col += tlen;
9775 name = (xmlChar*)1;
9776 } else {
9777 if (prefix == NULL)
9778 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9779 else
9780 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9784 * We should definitely be at the ending "S? '>'" part
9786 GROW;
9787 if (ctxt->instate == XML_PARSER_EOF)
9788 return;
9789 SKIP_BLANKS;
9790 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9791 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9792 } else
9793 NEXT1;
9796 * [ WFC: Element Type Match ]
9797 * The Name in an element's end-tag must match the element type in the
9798 * start-tag.
9801 if (name != (xmlChar*)1) {
9802 if (name == NULL) name = BAD_CAST "unparseable";
9803 if ((line == 0) && (ctxt->node != NULL))
9804 line = ctxt->node->line;
9805 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9806 "Opening and ending tag mismatch: %s line %d and %s\n",
9807 ctxt->name, line, name);
9811 * SAX: End of Tag
9813 done:
9814 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9815 (!ctxt->disableSAX))
9816 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9818 spacePop(ctxt);
9819 if (nsNr != 0)
9820 nsPop(ctxt, nsNr);
9821 return;
9825 * xmlParseCDSect:
9826 * @ctxt: an XML parser context
9828 * Parse escaped pure raw content.
9830 * [18] CDSect ::= CDStart CData CDEnd
9832 * [19] CDStart ::= '<![CDATA['
9834 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9836 * [21] CDEnd ::= ']]>'
9838 void
9839 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840 xmlChar *buf = NULL;
9841 int len = 0;
9842 int size = XML_PARSER_BUFFER_SIZE;
9843 int r, rl;
9844 int s, sl;
9845 int cur, l;
9846 int count = 0;
9848 /* Check 2.6.0 was NXT(0) not RAW */
9849 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9850 SKIP(9);
9851 } else
9852 return;
9854 ctxt->instate = XML_PARSER_CDATA_SECTION;
9855 r = CUR_CHAR(rl);
9856 if (!IS_CHAR(r)) {
9857 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9858 ctxt->instate = XML_PARSER_CONTENT;
9859 return;
9861 NEXTL(rl);
9862 s = CUR_CHAR(sl);
9863 if (!IS_CHAR(s)) {
9864 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9865 ctxt->instate = XML_PARSER_CONTENT;
9866 return;
9868 NEXTL(sl);
9869 cur = CUR_CHAR(l);
9870 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9871 if (buf == NULL) {
9872 xmlErrMemory(ctxt, NULL);
9873 return;
9875 while (IS_CHAR(cur) &&
9876 ((r != ']') || (s != ']') || (cur != '>'))) {
9877 if (len + 5 >= size) {
9878 xmlChar *tmp;
9880 if ((size > XML_MAX_TEXT_LENGTH) &&
9881 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9882 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9883 "CData section too big found", NULL);
9884 xmlFree (buf);
9885 return;
9887 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9888 if (tmp == NULL) {
9889 xmlFree(buf);
9890 xmlErrMemory(ctxt, NULL);
9891 return;
9893 buf = tmp;
9894 size *= 2;
9896 COPY_BUF(rl,buf,len,r);
9897 r = s;
9898 rl = sl;
9899 s = cur;
9900 sl = l;
9901 count++;
9902 if (count > 50) {
9903 GROW;
9904 if (ctxt->instate == XML_PARSER_EOF) {
9905 xmlFree(buf);
9906 return;
9908 count = 0;
9910 NEXTL(l);
9911 cur = CUR_CHAR(l);
9913 buf[len] = 0;
9914 ctxt->instate = XML_PARSER_CONTENT;
9915 if (cur != '>') {
9916 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9917 "CData section not finished\n%.50s\n", buf);
9918 xmlFree(buf);
9919 return;
9921 NEXTL(l);
9924 * OK the buffer is to be consumed as cdata.
9926 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9927 if (ctxt->sax->cdataBlock != NULL)
9928 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9929 else if (ctxt->sax->characters != NULL)
9930 ctxt->sax->characters(ctxt->userData, buf, len);
9932 xmlFree(buf);
9936 * xmlParseContent:
9937 * @ctxt: an XML parser context
9939 * Parse a content:
9941 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9944 void
9945 xmlParseContent(xmlParserCtxtPtr ctxt) {
9946 GROW;
9947 while ((RAW != 0) &&
9948 ((RAW != '<') || (NXT(1) != '/')) &&
9949 (ctxt->instate != XML_PARSER_EOF)) {
9950 const xmlChar *test = CUR_PTR;
9951 unsigned int cons = ctxt->input->consumed;
9952 const xmlChar *cur = ctxt->input->cur;
9955 * First case : a Processing Instruction.
9957 if ((*cur == '<') && (cur[1] == '?')) {
9958 xmlParsePI(ctxt);
9962 * Second case : a CDSection
9964 /* 2.6.0 test was *cur not RAW */
9965 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9966 xmlParseCDSect(ctxt);
9970 * Third case : a comment
9972 else if ((*cur == '<') && (NXT(1) == '!') &&
9973 (NXT(2) == '-') && (NXT(3) == '-')) {
9974 xmlParseComment(ctxt);
9975 ctxt->instate = XML_PARSER_CONTENT;
9979 * Fourth case : a sub-element.
9981 else if (*cur == '<') {
9982 xmlParseElement(ctxt);
9986 * Fifth case : a reference. If if has not been resolved,
9987 * parsing returns it's Name, create the node
9990 else if (*cur == '&') {
9991 xmlParseReference(ctxt);
9995 * Last case, text. Note that References are handled directly.
9997 else {
9998 xmlParseCharData(ctxt, 0);
10001 GROW;
10003 * Pop-up of finished entities.
10005 while ((RAW == 0) && (ctxt->inputNr > 1))
10006 xmlPopInput(ctxt);
10007 SHRINK;
10009 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10010 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10011 "detected an error in element content\n");
10012 ctxt->instate = XML_PARSER_EOF;
10013 break;
10019 * xmlParseElement:
10020 * @ctxt: an XML parser context
10022 * parse an XML element, this is highly recursive
10024 * [39] element ::= EmptyElemTag | STag content ETag
10026 * [ WFC: Element Type Match ]
10027 * The Name in an element's end-tag must match the element type in the
10028 * start-tag.
10032 void
10033 xmlParseElement(xmlParserCtxtPtr ctxt) {
10034 const xmlChar *name;
10035 const xmlChar *prefix = NULL;
10036 const xmlChar *URI = NULL;
10037 xmlParserNodeInfo node_info;
10038 int line, tlen = 0;
10039 xmlNodePtr ret;
10040 int nsNr = ctxt->nsNr;
10042 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10043 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10044 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10045 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10046 xmlParserMaxDepth);
10047 ctxt->instate = XML_PARSER_EOF;
10048 return;
10051 /* Capture start position */
10052 if (ctxt->record_info) {
10053 node_info.begin_pos = ctxt->input->consumed +
10054 (CUR_PTR - ctxt->input->base);
10055 node_info.begin_line = ctxt->input->line;
10058 if (ctxt->spaceNr == 0)
10059 spacePush(ctxt, -1);
10060 else if (*ctxt->space == -2)
10061 spacePush(ctxt, -1);
10062 else
10063 spacePush(ctxt, *ctxt->space);
10065 line = ctxt->input->line;
10066 #ifdef LIBXML_SAX1_ENABLED
10067 if (ctxt->sax2)
10068 #endif /* LIBXML_SAX1_ENABLED */
10069 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10070 #ifdef LIBXML_SAX1_ENABLED
10071 else
10072 name = xmlParseStartTag(ctxt);
10073 #endif /* LIBXML_SAX1_ENABLED */
10074 if (ctxt->instate == XML_PARSER_EOF)
10075 return;
10076 if (name == NULL) {
10077 spacePop(ctxt);
10078 return;
10080 namePush(ctxt, name);
10081 ret = ctxt->node;
10083 #ifdef LIBXML_VALID_ENABLED
10085 * [ VC: Root Element Type ]
10086 * The Name in the document type declaration must match the element
10087 * type of the root element.
10089 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10090 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10091 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10092 #endif /* LIBXML_VALID_ENABLED */
10095 * Check for an Empty Element.
10097 if ((RAW == '/') && (NXT(1) == '>')) {
10098 SKIP(2);
10099 if (ctxt->sax2) {
10100 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10101 (!ctxt->disableSAX))
10102 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10103 #ifdef LIBXML_SAX1_ENABLED
10104 } else {
10105 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10106 (!ctxt->disableSAX))
10107 ctxt->sax->endElement(ctxt->userData, name);
10108 #endif /* LIBXML_SAX1_ENABLED */
10110 namePop(ctxt);
10111 spacePop(ctxt);
10112 if (nsNr != ctxt->nsNr)
10113 nsPop(ctxt, ctxt->nsNr - nsNr);
10114 if ( ret != NULL && ctxt->record_info ) {
10115 node_info.end_pos = ctxt->input->consumed +
10116 (CUR_PTR - ctxt->input->base);
10117 node_info.end_line = ctxt->input->line;
10118 node_info.node = ret;
10119 xmlParserAddNodeInfo(ctxt, &node_info);
10121 return;
10123 if (RAW == '>') {
10124 NEXT1;
10125 } else {
10126 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10127 "Couldn't find end of Start Tag %s line %d\n",
10128 name, line, NULL);
10131 * end of parsing of this node.
10133 nodePop(ctxt);
10134 namePop(ctxt);
10135 spacePop(ctxt);
10136 if (nsNr != ctxt->nsNr)
10137 nsPop(ctxt, ctxt->nsNr - nsNr);
10140 * Capture end position and add node
10142 if ( ret != NULL && ctxt->record_info ) {
10143 node_info.end_pos = ctxt->input->consumed +
10144 (CUR_PTR - ctxt->input->base);
10145 node_info.end_line = ctxt->input->line;
10146 node_info.node = ret;
10147 xmlParserAddNodeInfo(ctxt, &node_info);
10149 return;
10153 * Parse the content of the element:
10155 xmlParseContent(ctxt);
10156 if (ctxt->instate == XML_PARSER_EOF)
10157 return;
10158 if (!IS_BYTE_CHAR(RAW)) {
10159 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10160 "Premature end of data in tag %s line %d\n",
10161 name, line, NULL);
10164 * end of parsing of this node.
10166 nodePop(ctxt);
10167 namePop(ctxt);
10168 spacePop(ctxt);
10169 if (nsNr != ctxt->nsNr)
10170 nsPop(ctxt, ctxt->nsNr - nsNr);
10171 return;
10175 * parse the end of tag: '</' should be here.
10177 if (ctxt->sax2) {
10178 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10179 namePop(ctxt);
10181 #ifdef LIBXML_SAX1_ENABLED
10182 else
10183 xmlParseEndTag1(ctxt, line);
10184 #endif /* LIBXML_SAX1_ENABLED */
10187 * Capture end position and add node
10189 if ( ret != NULL && ctxt->record_info ) {
10190 node_info.end_pos = ctxt->input->consumed +
10191 (CUR_PTR - ctxt->input->base);
10192 node_info.end_line = ctxt->input->line;
10193 node_info.node = ret;
10194 xmlParserAddNodeInfo(ctxt, &node_info);
10199 * xmlParseVersionNum:
10200 * @ctxt: an XML parser context
10202 * parse the XML version value.
10204 * [26] VersionNum ::= '1.' [0-9]+
10206 * In practice allow [0-9].[0-9]+ at that level
10208 * Returns the string giving the XML version number, or NULL
10210 xmlChar *
10211 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10212 xmlChar *buf = NULL;
10213 int len = 0;
10214 int size = 10;
10215 xmlChar cur;
10217 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10218 if (buf == NULL) {
10219 xmlErrMemory(ctxt, NULL);
10220 return(NULL);
10222 cur = CUR;
10223 if (!((cur >= '0') && (cur <= '9'))) {
10224 xmlFree(buf);
10225 return(NULL);
10227 buf[len++] = cur;
10228 NEXT;
10229 cur=CUR;
10230 if (cur != '.') {
10231 xmlFree(buf);
10232 return(NULL);
10234 buf[len++] = cur;
10235 NEXT;
10236 cur=CUR;
10237 while ((cur >= '0') && (cur <= '9')) {
10238 if (len + 1 >= size) {
10239 xmlChar *tmp;
10241 size *= 2;
10242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10243 if (tmp == NULL) {
10244 xmlFree(buf);
10245 xmlErrMemory(ctxt, NULL);
10246 return(NULL);
10248 buf = tmp;
10250 buf[len++] = cur;
10251 NEXT;
10252 cur=CUR;
10254 buf[len] = 0;
10255 return(buf);
10259 * xmlParseVersionInfo:
10260 * @ctxt: an XML parser context
10262 * parse the XML version.
10264 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10266 * [25] Eq ::= S? '=' S?
10268 * Returns the version string, e.g. "1.0"
10271 xmlChar *
10272 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10273 xmlChar *version = NULL;
10275 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10276 SKIP(7);
10277 SKIP_BLANKS;
10278 if (RAW != '=') {
10279 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10280 return(NULL);
10282 NEXT;
10283 SKIP_BLANKS;
10284 if (RAW == '"') {
10285 NEXT;
10286 version = xmlParseVersionNum(ctxt);
10287 if (RAW != '"') {
10288 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10289 } else
10290 NEXT;
10291 } else if (RAW == '\''){
10292 NEXT;
10293 version = xmlParseVersionNum(ctxt);
10294 if (RAW != '\'') {
10295 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10296 } else
10297 NEXT;
10298 } else {
10299 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10302 return(version);
10306 * xmlParseEncName:
10307 * @ctxt: an XML parser context
10309 * parse the XML encoding name
10311 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10313 * Returns the encoding name value or NULL
10315 xmlChar *
10316 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10317 xmlChar *buf = NULL;
10318 int len = 0;
10319 int size = 10;
10320 xmlChar cur;
10322 cur = CUR;
10323 if (((cur >= 'a') && (cur <= 'z')) ||
10324 ((cur >= 'A') && (cur <= 'Z'))) {
10325 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10326 if (buf == NULL) {
10327 xmlErrMemory(ctxt, NULL);
10328 return(NULL);
10331 buf[len++] = cur;
10332 NEXT;
10333 cur = CUR;
10334 while (((cur >= 'a') && (cur <= 'z')) ||
10335 ((cur >= 'A') && (cur <= 'Z')) ||
10336 ((cur >= '0') && (cur <= '9')) ||
10337 (cur == '.') || (cur == '_') ||
10338 (cur == '-')) {
10339 if (len + 1 >= size) {
10340 xmlChar *tmp;
10342 size *= 2;
10343 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10344 if (tmp == NULL) {
10345 xmlErrMemory(ctxt, NULL);
10346 xmlFree(buf);
10347 return(NULL);
10349 buf = tmp;
10351 buf[len++] = cur;
10352 NEXT;
10353 cur = CUR;
10354 if (cur == 0) {
10355 SHRINK;
10356 GROW;
10357 cur = CUR;
10360 buf[len] = 0;
10361 } else {
10362 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10364 return(buf);
10368 * xmlParseEncodingDecl:
10369 * @ctxt: an XML parser context
10371 * parse the XML encoding declaration
10373 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10375 * this setups the conversion filters.
10377 * Returns the encoding value or NULL
10380 const xmlChar *
10381 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10382 xmlChar *encoding = NULL;
10384 SKIP_BLANKS;
10385 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10386 SKIP(8);
10387 SKIP_BLANKS;
10388 if (RAW != '=') {
10389 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10390 return(NULL);
10392 NEXT;
10393 SKIP_BLANKS;
10394 if (RAW == '"') {
10395 NEXT;
10396 encoding = xmlParseEncName(ctxt);
10397 if (RAW != '"') {
10398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10399 } else
10400 NEXT;
10401 } else if (RAW == '\''){
10402 NEXT;
10403 encoding = xmlParseEncName(ctxt);
10404 if (RAW != '\'') {
10405 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10406 } else
10407 NEXT;
10408 } else {
10409 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10413 * Non standard parsing, allowing the user to ignore encoding
10415 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10416 xmlFree((xmlChar *) encoding);
10417 return(NULL);
10421 * UTF-16 encoding stwich has already taken place at this stage,
10422 * more over the little-endian/big-endian selection is already done
10424 if ((encoding != NULL) &&
10425 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10426 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10428 * If no encoding was passed to the parser, that we are
10429 * using UTF-16 and no decoder is present i.e. the
10430 * document is apparently UTF-8 compatible, then raise an
10431 * encoding mismatch fatal error
10433 if ((ctxt->encoding == NULL) &&
10434 (ctxt->input->buf != NULL) &&
10435 (ctxt->input->buf->encoder == NULL)) {
10436 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10437 "Document labelled UTF-16 but has UTF-8 content\n");
10439 if (ctxt->encoding != NULL)
10440 xmlFree((xmlChar *) ctxt->encoding);
10441 ctxt->encoding = encoding;
10444 * UTF-8 encoding is handled natively
10446 else if ((encoding != NULL) &&
10447 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10448 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10449 if (ctxt->encoding != NULL)
10450 xmlFree((xmlChar *) ctxt->encoding);
10451 ctxt->encoding = encoding;
10453 else if (encoding != NULL) {
10454 xmlCharEncodingHandlerPtr handler;
10456 if (ctxt->input->encoding != NULL)
10457 xmlFree((xmlChar *) ctxt->input->encoding);
10458 ctxt->input->encoding = encoding;
10460 handler = xmlFindCharEncodingHandler((const char *) encoding);
10461 if (handler != NULL) {
10462 xmlSwitchToEncoding(ctxt, handler);
10463 } else {
10464 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10465 "Unsupported encoding %s\n", encoding);
10466 return(NULL);
10470 return(encoding);
10474 * xmlParseSDDecl:
10475 * @ctxt: an XML parser context
10477 * parse the XML standalone declaration
10479 * [32] SDDecl ::= S 'standalone' Eq
10480 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10482 * [ VC: Standalone Document Declaration ]
10483 * TODO The standalone document declaration must have the value "no"
10484 * if any external markup declarations contain declarations of:
10485 * - attributes with default values, if elements to which these
10486 * attributes apply appear in the document without specifications
10487 * of values for these attributes, or
10488 * - entities (other than amp, lt, gt, apos, quot), if references
10489 * to those entities appear in the document, or
10490 * - attributes with values subject to normalization, where the
10491 * attribute appears in the document with a value which will change
10492 * as a result of normalization, or
10493 * - element types with element content, if white space occurs directly
10494 * within any instance of those types.
10496 * Returns:
10497 * 1 if standalone="yes"
10498 * 0 if standalone="no"
10499 * -2 if standalone attribute is missing or invalid
10500 * (A standalone value of -2 means that the XML declaration was found,
10501 * but no value was specified for the standalone attribute).
10505 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10506 int standalone = -2;
10508 SKIP_BLANKS;
10509 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10510 SKIP(10);
10511 SKIP_BLANKS;
10512 if (RAW != '=') {
10513 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10514 return(standalone);
10516 NEXT;
10517 SKIP_BLANKS;
10518 if (RAW == '\''){
10519 NEXT;
10520 if ((RAW == 'n') && (NXT(1) == 'o')) {
10521 standalone = 0;
10522 SKIP(2);
10523 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10524 (NXT(2) == 's')) {
10525 standalone = 1;
10526 SKIP(3);
10527 } else {
10528 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530 if (RAW != '\'') {
10531 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10532 } else
10533 NEXT;
10534 } else if (RAW == '"'){
10535 NEXT;
10536 if ((RAW == 'n') && (NXT(1) == 'o')) {
10537 standalone = 0;
10538 SKIP(2);
10539 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10540 (NXT(2) == 's')) {
10541 standalone = 1;
10542 SKIP(3);
10543 } else {
10544 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10546 if (RAW != '"') {
10547 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10548 } else
10549 NEXT;
10550 } else {
10551 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10554 return(standalone);
10558 * xmlParseXMLDecl:
10559 * @ctxt: an XML parser context
10561 * parse an XML declaration header
10563 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10566 void
10567 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10568 xmlChar *version;
10571 * This value for standalone indicates that the document has an
10572 * XML declaration but it does not have a standalone attribute.
10573 * It will be overwritten later if a standalone attribute is found.
10575 ctxt->input->standalone = -2;
10578 * We know that '<?xml' is here.
10580 SKIP(5);
10582 if (!IS_BLANK_CH(RAW)) {
10583 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10584 "Blank needed after '<?xml'\n");
10586 SKIP_BLANKS;
10589 * We must have the VersionInfo here.
10591 version = xmlParseVersionInfo(ctxt);
10592 if (version == NULL) {
10593 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10594 } else {
10595 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10597 * Changed here for XML-1.0 5th edition
10599 if (ctxt->options & XML_PARSE_OLD10) {
10600 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10601 "Unsupported version '%s'\n",
10602 version);
10603 } else {
10604 if ((version[0] == '1') && ((version[1] == '.'))) {
10605 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10606 "Unsupported version '%s'\n",
10607 version, NULL);
10608 } else {
10609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10610 "Unsupported version '%s'\n",
10611 version);
10615 if (ctxt->version != NULL)
10616 xmlFree((void *) ctxt->version);
10617 ctxt->version = version;
10621 * We may have the encoding declaration
10623 if (!IS_BLANK_CH(RAW)) {
10624 if ((RAW == '?') && (NXT(1) == '>')) {
10625 SKIP(2);
10626 return;
10628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10630 xmlParseEncodingDecl(ctxt);
10631 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10633 * The XML REC instructs us to stop parsing right here
10635 return;
10639 * We may have the standalone status.
10641 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10642 if ((RAW == '?') && (NXT(1) == '>')) {
10643 SKIP(2);
10644 return;
10646 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10650 * We can grow the input buffer freely at that point
10652 GROW;
10654 SKIP_BLANKS;
10655 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10657 SKIP_BLANKS;
10658 if ((RAW == '?') && (NXT(1) == '>')) {
10659 SKIP(2);
10660 } else if (RAW == '>') {
10661 /* Deprecated old WD ... */
10662 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10663 NEXT;
10664 } else {
10665 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10666 MOVETO_ENDTAG(CUR_PTR);
10667 NEXT;
10672 * xmlParseMisc:
10673 * @ctxt: an XML parser context
10675 * parse an XML Misc* optional field.
10677 * [27] Misc ::= Comment | PI | S
10680 void
10681 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10682 while ((ctxt->instate != XML_PARSER_EOF) &&
10683 (((RAW == '<') && (NXT(1) == '?')) ||
10684 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10685 IS_BLANK_CH(CUR))) {
10686 if ((RAW == '<') && (NXT(1) == '?')) {
10687 xmlParsePI(ctxt);
10688 } else if (IS_BLANK_CH(CUR)) {
10689 NEXT;
10690 } else
10691 xmlParseComment(ctxt);
10696 * xmlParseDocument:
10697 * @ctxt: an XML parser context
10699 * parse an XML document (and build a tree if using the standard SAX
10700 * interface).
10702 * [1] document ::= prolog element Misc*
10704 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10706 * Returns 0, -1 in case of error. the parser context is augmented
10707 * as a result of the parsing.
10711 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10712 xmlChar start[4];
10713 xmlCharEncoding enc;
10715 xmlInitParser();
10717 if ((ctxt == NULL) || (ctxt->input == NULL))
10718 return(-1);
10720 GROW;
10723 * SAX: detecting the level.
10725 xmlDetectSAX2(ctxt);
10728 * SAX: beginning of the document processing.
10730 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10731 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10732 if (ctxt->instate == XML_PARSER_EOF)
10733 return(-1);
10735 if ((ctxt->encoding == NULL) &&
10736 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10738 * Get the 4 first bytes and decode the charset
10739 * if enc != XML_CHAR_ENCODING_NONE
10740 * plug some encoding conversion routines.
10742 start[0] = RAW;
10743 start[1] = NXT(1);
10744 start[2] = NXT(2);
10745 start[3] = NXT(3);
10746 enc = xmlDetectCharEncoding(&start[0], 4);
10747 if (enc != XML_CHAR_ENCODING_NONE) {
10748 xmlSwitchEncoding(ctxt, enc);
10753 if (CUR == 0) {
10754 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10758 * Check for the XMLDecl in the Prolog.
10759 * do not GROW here to avoid the detected encoder to decode more
10760 * than just the first line, unless the amount of data is really
10761 * too small to hold "<?xml version="1.0" encoding="foo"
10763 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10764 GROW;
10766 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10769 * Note that we will switch encoding on the fly.
10771 xmlParseXMLDecl(ctxt);
10772 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10774 * The XML REC instructs us to stop parsing right here
10776 return(-1);
10778 ctxt->standalone = ctxt->input->standalone;
10779 SKIP_BLANKS;
10780 } else {
10781 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10783 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10784 ctxt->sax->startDocument(ctxt->userData);
10785 if (ctxt->instate == XML_PARSER_EOF)
10786 return(-1);
10787 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10788 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10789 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10793 * The Misc part of the Prolog
10795 GROW;
10796 xmlParseMisc(ctxt);
10799 * Then possibly doc type declaration(s) and more Misc
10800 * (doctypedecl Misc*)?
10802 GROW;
10803 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10805 ctxt->inSubset = 1;
10806 xmlParseDocTypeDecl(ctxt);
10807 if (RAW == '[') {
10808 ctxt->instate = XML_PARSER_DTD;
10809 xmlParseInternalSubset(ctxt);
10810 if (ctxt->instate == XML_PARSER_EOF)
10811 return(-1);
10815 * Create and update the external subset.
10817 ctxt->inSubset = 2;
10818 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10819 (!ctxt->disableSAX))
10820 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10821 ctxt->extSubSystem, ctxt->extSubURI);
10822 if (ctxt->instate == XML_PARSER_EOF)
10823 return(-1);
10824 ctxt->inSubset = 0;
10826 xmlCleanSpecialAttr(ctxt);
10828 ctxt->instate = XML_PARSER_PROLOG;
10829 xmlParseMisc(ctxt);
10833 * Time to start parsing the tree itself
10835 GROW;
10836 if (RAW != '<') {
10837 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10838 "Start tag expected, '<' not found\n");
10839 } else {
10840 ctxt->instate = XML_PARSER_CONTENT;
10841 xmlParseElement(ctxt);
10842 ctxt->instate = XML_PARSER_EPILOG;
10846 * The Misc part at the end
10848 xmlParseMisc(ctxt);
10850 if (RAW != 0) {
10851 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10853 ctxt->instate = XML_PARSER_EOF;
10857 * SAX: end of the document processing.
10859 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10860 ctxt->sax->endDocument(ctxt->userData);
10863 * Remove locally kept entity definitions if the tree was not built
10865 if ((ctxt->myDoc != NULL) &&
10866 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10867 xmlFreeDoc(ctxt->myDoc);
10868 ctxt->myDoc = NULL;
10871 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10872 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10873 if (ctxt->valid)
10874 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10875 if (ctxt->nsWellFormed)
10876 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10877 if (ctxt->options & XML_PARSE_OLD10)
10878 ctxt->myDoc->properties |= XML_DOC_OLD10;
10880 if (! ctxt->wellFormed) {
10881 ctxt->valid = 0;
10882 return(-1);
10884 return(0);
10888 * xmlParseExtParsedEnt:
10889 * @ctxt: an XML parser context
10891 * parse a general parsed entity
10892 * An external general parsed entity is well-formed if it matches the
10893 * production labeled extParsedEnt.
10895 * [78] extParsedEnt ::= TextDecl? content
10897 * Returns 0, -1 in case of error. the parser context is augmented
10898 * as a result of the parsing.
10902 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10903 xmlChar start[4];
10904 xmlCharEncoding enc;
10906 if ((ctxt == NULL) || (ctxt->input == NULL))
10907 return(-1);
10909 xmlDefaultSAXHandlerInit();
10911 xmlDetectSAX2(ctxt);
10913 GROW;
10916 * SAX: beginning of the document processing.
10918 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10919 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10922 * Get the 4 first bytes and decode the charset
10923 * if enc != XML_CHAR_ENCODING_NONE
10924 * plug some encoding conversion routines.
10926 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10927 start[0] = RAW;
10928 start[1] = NXT(1);
10929 start[2] = NXT(2);
10930 start[3] = NXT(3);
10931 enc = xmlDetectCharEncoding(start, 4);
10932 if (enc != XML_CHAR_ENCODING_NONE) {
10933 xmlSwitchEncoding(ctxt, enc);
10938 if (CUR == 0) {
10939 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10943 * Check for the XMLDecl in the Prolog.
10945 GROW;
10946 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10949 * Note that we will switch encoding on the fly.
10951 xmlParseXMLDecl(ctxt);
10952 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10954 * The XML REC instructs us to stop parsing right here
10956 return(-1);
10958 SKIP_BLANKS;
10959 } else {
10960 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10962 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10963 ctxt->sax->startDocument(ctxt->userData);
10964 if (ctxt->instate == XML_PARSER_EOF)
10965 return(-1);
10968 * Doing validity checking on chunk doesn't make sense
10970 ctxt->instate = XML_PARSER_CONTENT;
10971 ctxt->validate = 0;
10972 ctxt->loadsubset = 0;
10973 ctxt->depth = 0;
10975 xmlParseContent(ctxt);
10976 if (ctxt->instate == XML_PARSER_EOF)
10977 return(-1);
10979 if ((RAW == '<') && (NXT(1) == '/')) {
10980 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10981 } else if (RAW != 0) {
10982 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10986 * SAX: end of the document processing.
10988 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10989 ctxt->sax->endDocument(ctxt->userData);
10991 if (! ctxt->wellFormed) return(-1);
10992 return(0);
10995 #ifdef LIBXML_PUSH_ENABLED
10996 /************************************************************************
10998 * Progressive parsing interfaces *
11000 ************************************************************************/
11003 * xmlParseLookupSequence:
11004 * @ctxt: an XML parser context
11005 * @first: the first char to lookup
11006 * @next: the next char to lookup or zero
11007 * @third: the next char to lookup or zero
11009 * Try to find if a sequence (first, next, third) or just (first next) or
11010 * (first) is available in the input stream.
11011 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11012 * to avoid rescanning sequences of bytes, it DOES change the state of the
11013 * parser, do not use liberally.
11015 * Returns the index to the current parsing point if the full sequence
11016 * is available, -1 otherwise.
11018 static int
11019 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11020 xmlChar next, xmlChar third) {
11021 int base, len;
11022 xmlParserInputPtr in;
11023 const xmlChar *buf;
11025 in = ctxt->input;
11026 if (in == NULL) return(-1);
11027 base = in->cur - in->base;
11028 if (base < 0) return(-1);
11029 if (ctxt->checkIndex > base)
11030 base = ctxt->checkIndex;
11031 if (in->buf == NULL) {
11032 buf = in->base;
11033 len = in->length;
11034 } else {
11035 buf = xmlBufContent(in->buf->buffer);
11036 len = xmlBufUse(in->buf->buffer);
11038 /* take into account the sequence length */
11039 if (third) len -= 2;
11040 else if (next) len --;
11041 for (;base < len;base++) {
11042 if (buf[base] == first) {
11043 if (third != 0) {
11044 if ((buf[base + 1] != next) ||
11045 (buf[base + 2] != third)) continue;
11046 } else if (next != 0) {
11047 if (buf[base + 1] != next) continue;
11049 ctxt->checkIndex = 0;
11050 #ifdef DEBUG_PUSH
11051 if (next == 0)
11052 xmlGenericError(xmlGenericErrorContext,
11053 "PP: lookup '%c' found at %d\n",
11054 first, base);
11055 else if (third == 0)
11056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: lookup '%c%c' found at %d\n",
11058 first, next, base);
11059 else
11060 xmlGenericError(xmlGenericErrorContext,
11061 "PP: lookup '%c%c%c' found at %d\n",
11062 first, next, third, base);
11063 #endif
11064 return(base - (in->cur - in->base));
11067 ctxt->checkIndex = base;
11068 #ifdef DEBUG_PUSH
11069 if (next == 0)
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: lookup '%c' failed\n", first);
11072 else if (third == 0)
11073 xmlGenericError(xmlGenericErrorContext,
11074 "PP: lookup '%c%c' failed\n", first, next);
11075 else
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: lookup '%c%c%c' failed\n", first, next, third);
11078 #endif
11079 return(-1);
11083 * xmlParseGetLasts:
11084 * @ctxt: an XML parser context
11085 * @lastlt: pointer to store the last '<' from the input
11086 * @lastgt: pointer to store the last '>' from the input
11088 * Lookup the last < and > in the current chunk
11090 static void
11091 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11092 const xmlChar **lastgt) {
11093 const xmlChar *tmp;
11095 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11096 xmlGenericError(xmlGenericErrorContext,
11097 "Internal error: xmlParseGetLasts\n");
11098 return;
11100 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11101 tmp = ctxt->input->end;
11102 tmp--;
11103 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11104 if (tmp < ctxt->input->base) {
11105 *lastlt = NULL;
11106 *lastgt = NULL;
11107 } else {
11108 *lastlt = tmp;
11109 tmp++;
11110 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11111 if (*tmp == '\'') {
11112 tmp++;
11113 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11114 if (tmp < ctxt->input->end) tmp++;
11115 } else if (*tmp == '"') {
11116 tmp++;
11117 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11118 if (tmp < ctxt->input->end) tmp++;
11119 } else
11120 tmp++;
11122 if (tmp < ctxt->input->end)
11123 *lastgt = tmp;
11124 else {
11125 tmp = *lastlt;
11126 tmp--;
11127 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11128 if (tmp >= ctxt->input->base)
11129 *lastgt = tmp;
11130 else
11131 *lastgt = NULL;
11134 } else {
11135 *lastlt = NULL;
11136 *lastgt = NULL;
11140 * xmlCheckCdataPush:
11141 * @cur: pointer to the bock of characters
11142 * @len: length of the block in bytes
11144 * Check that the block of characters is okay as SCdata content [20]
11146 * Returns the number of bytes to pass if okay, a negative index where an
11147 * UTF-8 error occured otherwise
11149 static int
11150 xmlCheckCdataPush(const xmlChar *utf, int len) {
11151 int ix;
11152 unsigned char c;
11153 int codepoint;
11155 if ((utf == NULL) || (len <= 0))
11156 return(0);
11158 for (ix = 0; ix < len;) { /* string is 0-terminated */
11159 c = utf[ix];
11160 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11161 if (c >= 0x20)
11162 ix++;
11163 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11164 ix++;
11165 else
11166 return(-ix);
11167 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11168 if (ix + 2 > len) return(ix);
11169 if ((utf[ix+1] & 0xc0 ) != 0x80)
11170 return(-ix);
11171 codepoint = (utf[ix] & 0x1f) << 6;
11172 codepoint |= utf[ix+1] & 0x3f;
11173 if (!xmlIsCharQ(codepoint))
11174 return(-ix);
11175 ix += 2;
11176 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11177 if (ix + 3 > len) return(ix);
11178 if (((utf[ix+1] & 0xc0) != 0x80) ||
11179 ((utf[ix+2] & 0xc0) != 0x80))
11180 return(-ix);
11181 codepoint = (utf[ix] & 0xf) << 12;
11182 codepoint |= (utf[ix+1] & 0x3f) << 6;
11183 codepoint |= utf[ix+2] & 0x3f;
11184 if (!xmlIsCharQ(codepoint))
11185 return(-ix);
11186 ix += 3;
11187 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11188 if (ix + 4 > len) return(ix);
11189 if (((utf[ix+1] & 0xc0) != 0x80) ||
11190 ((utf[ix+2] & 0xc0) != 0x80) ||
11191 ((utf[ix+3] & 0xc0) != 0x80))
11192 return(-ix);
11193 codepoint = (utf[ix] & 0x7) << 18;
11194 codepoint |= (utf[ix+1] & 0x3f) << 12;
11195 codepoint |= (utf[ix+2] & 0x3f) << 6;
11196 codepoint |= utf[ix+3] & 0x3f;
11197 if (!xmlIsCharQ(codepoint))
11198 return(-ix);
11199 ix += 4;
11200 } else /* unknown encoding */
11201 return(-ix);
11203 return(ix);
11207 * xmlParseTryOrFinish:
11208 * @ctxt: an XML parser context
11209 * @terminate: last chunk indicator
11211 * Try to progress on parsing
11213 * Returns zero if no parsing was possible
11215 static int
11216 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11217 int ret = 0;
11218 int avail, tlen;
11219 xmlChar cur, next;
11220 const xmlChar *lastlt, *lastgt;
11222 if (ctxt->input == NULL)
11223 return(0);
11225 #ifdef DEBUG_PUSH
11226 switch (ctxt->instate) {
11227 case XML_PARSER_EOF:
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: try EOF\n"); break;
11230 case XML_PARSER_START:
11231 xmlGenericError(xmlGenericErrorContext,
11232 "PP: try START\n"); break;
11233 case XML_PARSER_MISC:
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: try MISC\n");break;
11236 case XML_PARSER_COMMENT:
11237 xmlGenericError(xmlGenericErrorContext,
11238 "PP: try COMMENT\n");break;
11239 case XML_PARSER_PROLOG:
11240 xmlGenericError(xmlGenericErrorContext,
11241 "PP: try PROLOG\n");break;
11242 case XML_PARSER_START_TAG:
11243 xmlGenericError(xmlGenericErrorContext,
11244 "PP: try START_TAG\n");break;
11245 case XML_PARSER_CONTENT:
11246 xmlGenericError(xmlGenericErrorContext,
11247 "PP: try CONTENT\n");break;
11248 case XML_PARSER_CDATA_SECTION:
11249 xmlGenericError(xmlGenericErrorContext,
11250 "PP: try CDATA_SECTION\n");break;
11251 case XML_PARSER_END_TAG:
11252 xmlGenericError(xmlGenericErrorContext,
11253 "PP: try END_TAG\n");break;
11254 case XML_PARSER_ENTITY_DECL:
11255 xmlGenericError(xmlGenericErrorContext,
11256 "PP: try ENTITY_DECL\n");break;
11257 case XML_PARSER_ENTITY_VALUE:
11258 xmlGenericError(xmlGenericErrorContext,
11259 "PP: try ENTITY_VALUE\n");break;
11260 case XML_PARSER_ATTRIBUTE_VALUE:
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: try ATTRIBUTE_VALUE\n");break;
11263 case XML_PARSER_DTD:
11264 xmlGenericError(xmlGenericErrorContext,
11265 "PP: try DTD\n");break;
11266 case XML_PARSER_EPILOG:
11267 xmlGenericError(xmlGenericErrorContext,
11268 "PP: try EPILOG\n");break;
11269 case XML_PARSER_PI:
11270 xmlGenericError(xmlGenericErrorContext,
11271 "PP: try PI\n");break;
11272 case XML_PARSER_IGNORE:
11273 xmlGenericError(xmlGenericErrorContext,
11274 "PP: try IGNORE\n");break;
11276 #endif
11278 if ((ctxt->input != NULL) &&
11279 (ctxt->input->cur - ctxt->input->base > 4096)) {
11280 xmlSHRINK(ctxt);
11281 ctxt->checkIndex = 0;
11283 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11285 while (ctxt->instate != XML_PARSER_EOF) {
11286 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11287 return(0);
11291 * Pop-up of finished entities.
11293 while ((RAW == 0) && (ctxt->inputNr > 1))
11294 xmlPopInput(ctxt);
11296 if (ctxt->input == NULL) break;
11297 if (ctxt->input->buf == NULL)
11298 avail = ctxt->input->length -
11299 (ctxt->input->cur - ctxt->input->base);
11300 else {
11302 * If we are operating on converted input, try to flush
11303 * remainng chars to avoid them stalling in the non-converted
11304 * buffer. But do not do this in document start where
11305 * encoding="..." may not have been read and we work on a
11306 * guessed encoding.
11308 if ((ctxt->instate != XML_PARSER_START) &&
11309 (ctxt->input->buf->raw != NULL) &&
11310 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11311 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11312 ctxt->input);
11313 size_t current = ctxt->input->cur - ctxt->input->base;
11315 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11316 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11317 base, current);
11319 avail = xmlBufUse(ctxt->input->buf->buffer) -
11320 (ctxt->input->cur - ctxt->input->base);
11322 if (avail < 1)
11323 goto done;
11324 switch (ctxt->instate) {
11325 case XML_PARSER_EOF:
11327 * Document parsing is done !
11329 goto done;
11330 case XML_PARSER_START:
11331 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11332 xmlChar start[4];
11333 xmlCharEncoding enc;
11336 * Very first chars read from the document flow.
11338 if (avail < 4)
11339 goto done;
11342 * Get the 4 first bytes and decode the charset
11343 * if enc != XML_CHAR_ENCODING_NONE
11344 * plug some encoding conversion routines,
11345 * else xmlSwitchEncoding will set to (default)
11346 * UTF8.
11348 start[0] = RAW;
11349 start[1] = NXT(1);
11350 start[2] = NXT(2);
11351 start[3] = NXT(3);
11352 enc = xmlDetectCharEncoding(start, 4);
11353 xmlSwitchEncoding(ctxt, enc);
11354 break;
11357 if (avail < 2)
11358 goto done;
11359 cur = ctxt->input->cur[0];
11360 next = ctxt->input->cur[1];
11361 if (cur == 0) {
11362 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11363 ctxt->sax->setDocumentLocator(ctxt->userData,
11364 &xmlDefaultSAXLocator);
11365 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11366 ctxt->instate = XML_PARSER_EOF;
11367 #ifdef DEBUG_PUSH
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: entering EOF\n");
11370 #endif
11371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11372 ctxt->sax->endDocument(ctxt->userData);
11373 goto done;
11375 if ((cur == '<') && (next == '?')) {
11376 /* PI or XML decl */
11377 if (avail < 5) return(ret);
11378 if ((!terminate) &&
11379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11380 return(ret);
11381 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11382 ctxt->sax->setDocumentLocator(ctxt->userData,
11383 &xmlDefaultSAXLocator);
11384 if ((ctxt->input->cur[2] == 'x') &&
11385 (ctxt->input->cur[3] == 'm') &&
11386 (ctxt->input->cur[4] == 'l') &&
11387 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11388 ret += 5;
11389 #ifdef DEBUG_PUSH
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: Parsing XML Decl\n");
11392 #endif
11393 xmlParseXMLDecl(ctxt);
11394 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11396 * The XML REC instructs us to stop parsing right
11397 * here
11399 ctxt->instate = XML_PARSER_EOF;
11400 return(0);
11402 ctxt->standalone = ctxt->input->standalone;
11403 if ((ctxt->encoding == NULL) &&
11404 (ctxt->input->encoding != NULL))
11405 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11406 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11407 (!ctxt->disableSAX))
11408 ctxt->sax->startDocument(ctxt->userData);
11409 ctxt->instate = XML_PARSER_MISC;
11410 #ifdef DEBUG_PUSH
11411 xmlGenericError(xmlGenericErrorContext,
11412 "PP: entering MISC\n");
11413 #endif
11414 } else {
11415 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11416 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11417 (!ctxt->disableSAX))
11418 ctxt->sax->startDocument(ctxt->userData);
11419 ctxt->instate = XML_PARSER_MISC;
11420 #ifdef DEBUG_PUSH
11421 xmlGenericError(xmlGenericErrorContext,
11422 "PP: entering MISC\n");
11423 #endif
11425 } else {
11426 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11427 ctxt->sax->setDocumentLocator(ctxt->userData,
11428 &xmlDefaultSAXLocator);
11429 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11430 if (ctxt->version == NULL) {
11431 xmlErrMemory(ctxt, NULL);
11432 break;
11434 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11435 (!ctxt->disableSAX))
11436 ctxt->sax->startDocument(ctxt->userData);
11437 ctxt->instate = XML_PARSER_MISC;
11438 #ifdef DEBUG_PUSH
11439 xmlGenericError(xmlGenericErrorContext,
11440 "PP: entering MISC\n");
11441 #endif
11443 break;
11444 case XML_PARSER_START_TAG: {
11445 const xmlChar *name;
11446 const xmlChar *prefix = NULL;
11447 const xmlChar *URI = NULL;
11448 int nsNr = ctxt->nsNr;
11450 if ((avail < 2) && (ctxt->inputNr == 1))
11451 goto done;
11452 cur = ctxt->input->cur[0];
11453 if (cur != '<') {
11454 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11455 ctxt->instate = XML_PARSER_EOF;
11456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11457 ctxt->sax->endDocument(ctxt->userData);
11458 goto done;
11460 if (!terminate) {
11461 if (ctxt->progressive) {
11462 /* > can be found unescaped in attribute values */
11463 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11464 goto done;
11465 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11466 goto done;
11469 if (ctxt->spaceNr == 0)
11470 spacePush(ctxt, -1);
11471 else if (*ctxt->space == -2)
11472 spacePush(ctxt, -1);
11473 else
11474 spacePush(ctxt, *ctxt->space);
11475 #ifdef LIBXML_SAX1_ENABLED
11476 if (ctxt->sax2)
11477 #endif /* LIBXML_SAX1_ENABLED */
11478 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11479 #ifdef LIBXML_SAX1_ENABLED
11480 else
11481 name = xmlParseStartTag(ctxt);
11482 #endif /* LIBXML_SAX1_ENABLED */
11483 if (ctxt->instate == XML_PARSER_EOF)
11484 goto done;
11485 if (name == NULL) {
11486 spacePop(ctxt);
11487 ctxt->instate = XML_PARSER_EOF;
11488 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11489 ctxt->sax->endDocument(ctxt->userData);
11490 goto done;
11492 #ifdef LIBXML_VALID_ENABLED
11494 * [ VC: Root Element Type ]
11495 * The Name in the document type declaration must match
11496 * the element type of the root element.
11498 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11499 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11500 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11501 #endif /* LIBXML_VALID_ENABLED */
11504 * Check for an Empty Element.
11506 if ((RAW == '/') && (NXT(1) == '>')) {
11507 SKIP(2);
11509 if (ctxt->sax2) {
11510 if ((ctxt->sax != NULL) &&
11511 (ctxt->sax->endElementNs != NULL) &&
11512 (!ctxt->disableSAX))
11513 ctxt->sax->endElementNs(ctxt->userData, name,
11514 prefix, URI);
11515 if (ctxt->nsNr - nsNr > 0)
11516 nsPop(ctxt, ctxt->nsNr - nsNr);
11517 #ifdef LIBXML_SAX1_ENABLED
11518 } else {
11519 if ((ctxt->sax != NULL) &&
11520 (ctxt->sax->endElement != NULL) &&
11521 (!ctxt->disableSAX))
11522 ctxt->sax->endElement(ctxt->userData, name);
11523 #endif /* LIBXML_SAX1_ENABLED */
11525 if (ctxt->instate == XML_PARSER_EOF)
11526 goto done;
11527 spacePop(ctxt);
11528 if (ctxt->nameNr == 0) {
11529 ctxt->instate = XML_PARSER_EPILOG;
11530 } else {
11531 ctxt->instate = XML_PARSER_CONTENT;
11533 ctxt->progressive = 1;
11534 break;
11536 if (RAW == '>') {
11537 NEXT;
11538 } else {
11539 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11540 "Couldn't find end of Start Tag %s\n",
11541 name);
11542 nodePop(ctxt);
11543 spacePop(ctxt);
11545 if (ctxt->sax2)
11546 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11547 #ifdef LIBXML_SAX1_ENABLED
11548 else
11549 namePush(ctxt, name);
11550 #endif /* LIBXML_SAX1_ENABLED */
11552 ctxt->instate = XML_PARSER_CONTENT;
11553 ctxt->progressive = 1;
11554 break;
11556 case XML_PARSER_CONTENT: {
11557 const xmlChar *test;
11558 unsigned int cons;
11559 if ((avail < 2) && (ctxt->inputNr == 1))
11560 goto done;
11561 cur = ctxt->input->cur[0];
11562 next = ctxt->input->cur[1];
11564 test = CUR_PTR;
11565 cons = ctxt->input->consumed;
11566 if ((cur == '<') && (next == '/')) {
11567 ctxt->instate = XML_PARSER_END_TAG;
11568 break;
11569 } else if ((cur == '<') && (next == '?')) {
11570 if ((!terminate) &&
11571 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11572 ctxt->progressive = XML_PARSER_PI;
11573 goto done;
11575 xmlParsePI(ctxt);
11576 ctxt->instate = XML_PARSER_CONTENT;
11577 ctxt->progressive = 1;
11578 } else if ((cur == '<') && (next != '!')) {
11579 ctxt->instate = XML_PARSER_START_TAG;
11580 break;
11581 } else if ((cur == '<') && (next == '!') &&
11582 (ctxt->input->cur[2] == '-') &&
11583 (ctxt->input->cur[3] == '-')) {
11584 int term;
11586 if (avail < 4)
11587 goto done;
11588 ctxt->input->cur += 4;
11589 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11590 ctxt->input->cur -= 4;
11591 if ((!terminate) && (term < 0)) {
11592 ctxt->progressive = XML_PARSER_COMMENT;
11593 goto done;
11595 xmlParseComment(ctxt);
11596 ctxt->instate = XML_PARSER_CONTENT;
11597 ctxt->progressive = 1;
11598 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11599 (ctxt->input->cur[2] == '[') &&
11600 (ctxt->input->cur[3] == 'C') &&
11601 (ctxt->input->cur[4] == 'D') &&
11602 (ctxt->input->cur[5] == 'A') &&
11603 (ctxt->input->cur[6] == 'T') &&
11604 (ctxt->input->cur[7] == 'A') &&
11605 (ctxt->input->cur[8] == '[')) {
11606 SKIP(9);
11607 ctxt->instate = XML_PARSER_CDATA_SECTION;
11608 break;
11609 } else if ((cur == '<') && (next == '!') &&
11610 (avail < 9)) {
11611 goto done;
11612 } else if (cur == '&') {
11613 if ((!terminate) &&
11614 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11615 goto done;
11616 xmlParseReference(ctxt);
11617 } else {
11618 /* TODO Avoid the extra copy, handle directly !!! */
11620 * Goal of the following test is:
11621 * - minimize calls to the SAX 'character' callback
11622 * when they are mergeable
11623 * - handle an problem for isBlank when we only parse
11624 * a sequence of blank chars and the next one is
11625 * not available to check against '<' presence.
11626 * - tries to homogenize the differences in SAX
11627 * callbacks between the push and pull versions
11628 * of the parser.
11630 if ((ctxt->inputNr == 1) &&
11631 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11632 if (!terminate) {
11633 if (ctxt->progressive) {
11634 if ((lastlt == NULL) ||
11635 (ctxt->input->cur > lastlt))
11636 goto done;
11637 } else if (xmlParseLookupSequence(ctxt,
11638 '<', 0, 0) < 0) {
11639 goto done;
11643 ctxt->checkIndex = 0;
11644 xmlParseCharData(ctxt, 0);
11647 * Pop-up of finished entities.
11649 while ((RAW == 0) && (ctxt->inputNr > 1))
11650 xmlPopInput(ctxt);
11651 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11652 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11653 "detected an error in element content\n");
11654 ctxt->instate = XML_PARSER_EOF;
11655 break;
11657 break;
11659 case XML_PARSER_END_TAG:
11660 if (avail < 2)
11661 goto done;
11662 if (!terminate) {
11663 if (ctxt->progressive) {
11664 /* > can be found unescaped in attribute values */
11665 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11666 goto done;
11667 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11668 goto done;
11671 if (ctxt->sax2) {
11672 xmlParseEndTag2(ctxt,
11673 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11674 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11675 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11676 nameNsPop(ctxt);
11678 #ifdef LIBXML_SAX1_ENABLED
11679 else
11680 xmlParseEndTag1(ctxt, 0);
11681 #endif /* LIBXML_SAX1_ENABLED */
11682 if (ctxt->instate == XML_PARSER_EOF) {
11683 /* Nothing */
11684 } else if (ctxt->nameNr == 0) {
11685 ctxt->instate = XML_PARSER_EPILOG;
11686 } else {
11687 ctxt->instate = XML_PARSER_CONTENT;
11689 break;
11690 case XML_PARSER_CDATA_SECTION: {
11692 * The Push mode need to have the SAX callback for
11693 * cdataBlock merge back contiguous callbacks.
11695 int base;
11697 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11698 if (base < 0) {
11699 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11700 int tmp;
11702 tmp = xmlCheckCdataPush(ctxt->input->cur,
11703 XML_PARSER_BIG_BUFFER_SIZE);
11704 if (tmp < 0) {
11705 tmp = -tmp;
11706 ctxt->input->cur += tmp;
11707 goto encoding_error;
11709 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11710 if (ctxt->sax->cdataBlock != NULL)
11711 ctxt->sax->cdataBlock(ctxt->userData,
11712 ctxt->input->cur, tmp);
11713 else if (ctxt->sax->characters != NULL)
11714 ctxt->sax->characters(ctxt->userData,
11715 ctxt->input->cur, tmp);
11717 if (ctxt->instate == XML_PARSER_EOF)
11718 goto done;
11719 SKIPL(tmp);
11720 ctxt->checkIndex = 0;
11722 goto done;
11723 } else {
11724 int tmp;
11726 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11727 if ((tmp < 0) || (tmp != base)) {
11728 tmp = -tmp;
11729 ctxt->input->cur += tmp;
11730 goto encoding_error;
11732 if ((ctxt->sax != NULL) && (base == 0) &&
11733 (ctxt->sax->cdataBlock != NULL) &&
11734 (!ctxt->disableSAX)) {
11736 * Special case to provide identical behaviour
11737 * between pull and push parsers on enpty CDATA
11738 * sections
11740 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11741 (!strncmp((const char *)&ctxt->input->cur[-9],
11742 "<![CDATA[", 9)))
11743 ctxt->sax->cdataBlock(ctxt->userData,
11744 BAD_CAST "", 0);
11745 } else if ((ctxt->sax != NULL) && (base > 0) &&
11746 (!ctxt->disableSAX)) {
11747 if (ctxt->sax->cdataBlock != NULL)
11748 ctxt->sax->cdataBlock(ctxt->userData,
11749 ctxt->input->cur, base);
11750 else if (ctxt->sax->characters != NULL)
11751 ctxt->sax->characters(ctxt->userData,
11752 ctxt->input->cur, base);
11754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
11756 SKIPL(base + 3);
11757 ctxt->checkIndex = 0;
11758 ctxt->instate = XML_PARSER_CONTENT;
11759 #ifdef DEBUG_PUSH
11760 xmlGenericError(xmlGenericErrorContext,
11761 "PP: entering CONTENT\n");
11762 #endif
11764 break;
11766 case XML_PARSER_MISC:
11767 SKIP_BLANKS;
11768 if (ctxt->input->buf == NULL)
11769 avail = ctxt->input->length -
11770 (ctxt->input->cur - ctxt->input->base);
11771 else
11772 avail = xmlBufUse(ctxt->input->buf->buffer) -
11773 (ctxt->input->cur - ctxt->input->base);
11774 if (avail < 2)
11775 goto done;
11776 cur = ctxt->input->cur[0];
11777 next = ctxt->input->cur[1];
11778 if ((cur == '<') && (next == '?')) {
11779 if ((!terminate) &&
11780 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11781 ctxt->progressive = XML_PARSER_PI;
11782 goto done;
11784 #ifdef DEBUG_PUSH
11785 xmlGenericError(xmlGenericErrorContext,
11786 "PP: Parsing PI\n");
11787 #endif
11788 xmlParsePI(ctxt);
11789 if (ctxt->instate == XML_PARSER_EOF)
11790 goto done;
11791 ctxt->instate = XML_PARSER_MISC;
11792 ctxt->progressive = 1;
11793 ctxt->checkIndex = 0;
11794 } else if ((cur == '<') && (next == '!') &&
11795 (ctxt->input->cur[2] == '-') &&
11796 (ctxt->input->cur[3] == '-')) {
11797 if ((!terminate) &&
11798 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11799 ctxt->progressive = XML_PARSER_COMMENT;
11800 goto done;
11802 #ifdef DEBUG_PUSH
11803 xmlGenericError(xmlGenericErrorContext,
11804 "PP: Parsing Comment\n");
11805 #endif
11806 xmlParseComment(ctxt);
11807 if (ctxt->instate == XML_PARSER_EOF)
11808 goto done;
11809 ctxt->instate = XML_PARSER_MISC;
11810 ctxt->progressive = 1;
11811 ctxt->checkIndex = 0;
11812 } else if ((cur == '<') && (next == '!') &&
11813 (ctxt->input->cur[2] == 'D') &&
11814 (ctxt->input->cur[3] == 'O') &&
11815 (ctxt->input->cur[4] == 'C') &&
11816 (ctxt->input->cur[5] == 'T') &&
11817 (ctxt->input->cur[6] == 'Y') &&
11818 (ctxt->input->cur[7] == 'P') &&
11819 (ctxt->input->cur[8] == 'E')) {
11820 if ((!terminate) &&
11821 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11822 ctxt->progressive = XML_PARSER_DTD;
11823 goto done;
11825 #ifdef DEBUG_PUSH
11826 xmlGenericError(xmlGenericErrorContext,
11827 "PP: Parsing internal subset\n");
11828 #endif
11829 ctxt->inSubset = 1;
11830 ctxt->progressive = 0;
11831 ctxt->checkIndex = 0;
11832 xmlParseDocTypeDecl(ctxt);
11833 if (ctxt->instate == XML_PARSER_EOF)
11834 goto done;
11835 if (RAW == '[') {
11836 ctxt->instate = XML_PARSER_DTD;
11837 #ifdef DEBUG_PUSH
11838 xmlGenericError(xmlGenericErrorContext,
11839 "PP: entering DTD\n");
11840 #endif
11841 } else {
11843 * Create and update the external subset.
11845 ctxt->inSubset = 2;
11846 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11847 (ctxt->sax->externalSubset != NULL))
11848 ctxt->sax->externalSubset(ctxt->userData,
11849 ctxt->intSubName, ctxt->extSubSystem,
11850 ctxt->extSubURI);
11851 ctxt->inSubset = 0;
11852 xmlCleanSpecialAttr(ctxt);
11853 ctxt->instate = XML_PARSER_PROLOG;
11854 #ifdef DEBUG_PUSH
11855 xmlGenericError(xmlGenericErrorContext,
11856 "PP: entering PROLOG\n");
11857 #endif
11859 } else if ((cur == '<') && (next == '!') &&
11860 (avail < 9)) {
11861 goto done;
11862 } else {
11863 ctxt->instate = XML_PARSER_START_TAG;
11864 ctxt->progressive = XML_PARSER_START_TAG;
11865 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11866 #ifdef DEBUG_PUSH
11867 xmlGenericError(xmlGenericErrorContext,
11868 "PP: entering START_TAG\n");
11869 #endif
11871 break;
11872 case XML_PARSER_PROLOG:
11873 SKIP_BLANKS;
11874 if (ctxt->input->buf == NULL)
11875 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11876 else
11877 avail = xmlBufUse(ctxt->input->buf->buffer) -
11878 (ctxt->input->cur - ctxt->input->base);
11879 if (avail < 2)
11880 goto done;
11881 cur = ctxt->input->cur[0];
11882 next = ctxt->input->cur[1];
11883 if ((cur == '<') && (next == '?')) {
11884 if ((!terminate) &&
11885 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11886 ctxt->progressive = XML_PARSER_PI;
11887 goto done;
11889 #ifdef DEBUG_PUSH
11890 xmlGenericError(xmlGenericErrorContext,
11891 "PP: Parsing PI\n");
11892 #endif
11893 xmlParsePI(ctxt);
11894 if (ctxt->instate == XML_PARSER_EOF)
11895 goto done;
11896 ctxt->instate = XML_PARSER_PROLOG;
11897 ctxt->progressive = 1;
11898 } else if ((cur == '<') && (next == '!') &&
11899 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11900 if ((!terminate) &&
11901 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11902 ctxt->progressive = XML_PARSER_COMMENT;
11903 goto done;
11905 #ifdef DEBUG_PUSH
11906 xmlGenericError(xmlGenericErrorContext,
11907 "PP: Parsing Comment\n");
11908 #endif
11909 xmlParseComment(ctxt);
11910 if (ctxt->instate == XML_PARSER_EOF)
11911 goto done;
11912 ctxt->instate = XML_PARSER_PROLOG;
11913 ctxt->progressive = 1;
11914 } else if ((cur == '<') && (next == '!') &&
11915 (avail < 4)) {
11916 goto done;
11917 } else {
11918 ctxt->instate = XML_PARSER_START_TAG;
11919 if (ctxt->progressive == 0)
11920 ctxt->progressive = XML_PARSER_START_TAG;
11921 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11922 #ifdef DEBUG_PUSH
11923 xmlGenericError(xmlGenericErrorContext,
11924 "PP: entering START_TAG\n");
11925 #endif
11927 break;
11928 case XML_PARSER_EPILOG:
11929 SKIP_BLANKS;
11930 if (ctxt->input->buf == NULL)
11931 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11932 else
11933 avail = xmlBufUse(ctxt->input->buf->buffer) -
11934 (ctxt->input->cur - ctxt->input->base);
11935 if (avail < 2)
11936 goto done;
11937 cur = ctxt->input->cur[0];
11938 next = ctxt->input->cur[1];
11939 if ((cur == '<') && (next == '?')) {
11940 if ((!terminate) &&
11941 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11942 ctxt->progressive = XML_PARSER_PI;
11943 goto done;
11945 #ifdef DEBUG_PUSH
11946 xmlGenericError(xmlGenericErrorContext,
11947 "PP: Parsing PI\n");
11948 #endif
11949 xmlParsePI(ctxt);
11950 if (ctxt->instate == XML_PARSER_EOF)
11951 goto done;
11952 ctxt->instate = XML_PARSER_EPILOG;
11953 ctxt->progressive = 1;
11954 } else if ((cur == '<') && (next == '!') &&
11955 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11956 if ((!terminate) &&
11957 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11958 ctxt->progressive = XML_PARSER_COMMENT;
11959 goto done;
11961 #ifdef DEBUG_PUSH
11962 xmlGenericError(xmlGenericErrorContext,
11963 "PP: Parsing Comment\n");
11964 #endif
11965 xmlParseComment(ctxt);
11966 if (ctxt->instate == XML_PARSER_EOF)
11967 goto done;
11968 ctxt->instate = XML_PARSER_EPILOG;
11969 ctxt->progressive = 1;
11970 } else if ((cur == '<') && (next == '!') &&
11971 (avail < 4)) {
11972 goto done;
11973 } else {
11974 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11975 ctxt->instate = XML_PARSER_EOF;
11976 #ifdef DEBUG_PUSH
11977 xmlGenericError(xmlGenericErrorContext,
11978 "PP: entering EOF\n");
11979 #endif
11980 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11981 ctxt->sax->endDocument(ctxt->userData);
11982 goto done;
11984 break;
11985 case XML_PARSER_DTD: {
11987 * Sorry but progressive parsing of the internal subset
11988 * is not expected to be supported. We first check that
11989 * the full content of the internal subset is available and
11990 * the parsing is launched only at that point.
11991 * Internal subset ends up with "']' S? '>'" in an unescaped
11992 * section and not in a ']]>' sequence which are conditional
11993 * sections (whoever argued to keep that crap in XML deserve
11994 * a place in hell !).
11996 int base, i;
11997 xmlChar *buf;
11998 xmlChar quote = 0;
11999 size_t use;
12001 base = ctxt->input->cur - ctxt->input->base;
12002 if (base < 0) return(0);
12003 if (ctxt->checkIndex > base)
12004 base = ctxt->checkIndex;
12005 buf = xmlBufContent(ctxt->input->buf->buffer);
12006 use = xmlBufUse(ctxt->input->buf->buffer);
12007 for (;(unsigned int) base < use; base++) {
12008 if (quote != 0) {
12009 if (buf[base] == quote)
12010 quote = 0;
12011 continue;
12013 if ((quote == 0) && (buf[base] == '<')) {
12014 int found = 0;
12015 /* special handling of comments */
12016 if (((unsigned int) base + 4 < use) &&
12017 (buf[base + 1] == '!') &&
12018 (buf[base + 2] == '-') &&
12019 (buf[base + 3] == '-')) {
12020 for (;(unsigned int) base + 3 < use; base++) {
12021 if ((buf[base] == '-') &&
12022 (buf[base + 1] == '-') &&
12023 (buf[base + 2] == '>')) {
12024 found = 1;
12025 base += 2;
12026 break;
12029 if (!found) {
12030 #if 0
12031 fprintf(stderr, "unfinished comment\n");
12032 #endif
12033 break; /* for */
12035 continue;
12038 if (buf[base] == '"') {
12039 quote = '"';
12040 continue;
12042 if (buf[base] == '\'') {
12043 quote = '\'';
12044 continue;
12046 if (buf[base] == ']') {
12047 #if 0
12048 fprintf(stderr, "%c%c%c%c: ", buf[base],
12049 buf[base + 1], buf[base + 2], buf[base + 3]);
12050 #endif
12051 if ((unsigned int) base +1 >= use)
12052 break;
12053 if (buf[base + 1] == ']') {
12054 /* conditional crap, skip both ']' ! */
12055 base++;
12056 continue;
12058 for (i = 1; (unsigned int) base + i < use; i++) {
12059 if (buf[base + i] == '>') {
12060 #if 0
12061 fprintf(stderr, "found\n");
12062 #endif
12063 goto found_end_int_subset;
12065 if (!IS_BLANK_CH(buf[base + i])) {
12066 #if 0
12067 fprintf(stderr, "not found\n");
12068 #endif
12069 goto not_end_of_int_subset;
12072 #if 0
12073 fprintf(stderr, "end of stream\n");
12074 #endif
12075 break;
12078 not_end_of_int_subset:
12079 continue; /* for */
12082 * We didn't found the end of the Internal subset
12084 if (quote == 0)
12085 ctxt->checkIndex = base;
12086 else
12087 ctxt->checkIndex = 0;
12088 #ifdef DEBUG_PUSH
12089 if (next == 0)
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: lookup of int subset end filed\n");
12092 #endif
12093 goto done;
12095 found_end_int_subset:
12096 ctxt->checkIndex = 0;
12097 xmlParseInternalSubset(ctxt);
12098 if (ctxt->instate == XML_PARSER_EOF)
12099 goto done;
12100 ctxt->inSubset = 2;
12101 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12102 (ctxt->sax->externalSubset != NULL))
12103 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12104 ctxt->extSubSystem, ctxt->extSubURI);
12105 ctxt->inSubset = 0;
12106 xmlCleanSpecialAttr(ctxt);
12107 if (ctxt->instate == XML_PARSER_EOF)
12108 goto done;
12109 ctxt->instate = XML_PARSER_PROLOG;
12110 ctxt->checkIndex = 0;
12111 #ifdef DEBUG_PUSH
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: entering PROLOG\n");
12114 #endif
12115 break;
12117 case XML_PARSER_COMMENT:
12118 xmlGenericError(xmlGenericErrorContext,
12119 "PP: internal error, state == COMMENT\n");
12120 ctxt->instate = XML_PARSER_CONTENT;
12121 #ifdef DEBUG_PUSH
12122 xmlGenericError(xmlGenericErrorContext,
12123 "PP: entering CONTENT\n");
12124 #endif
12125 break;
12126 case XML_PARSER_IGNORE:
12127 xmlGenericError(xmlGenericErrorContext,
12128 "PP: internal error, state == IGNORE");
12129 ctxt->instate = XML_PARSER_DTD;
12130 #ifdef DEBUG_PUSH
12131 xmlGenericError(xmlGenericErrorContext,
12132 "PP: entering DTD\n");
12133 #endif
12134 break;
12135 case XML_PARSER_PI:
12136 xmlGenericError(xmlGenericErrorContext,
12137 "PP: internal error, state == PI\n");
12138 ctxt->instate = XML_PARSER_CONTENT;
12139 #ifdef DEBUG_PUSH
12140 xmlGenericError(xmlGenericErrorContext,
12141 "PP: entering CONTENT\n");
12142 #endif
12143 break;
12144 case XML_PARSER_ENTITY_DECL:
12145 xmlGenericError(xmlGenericErrorContext,
12146 "PP: internal error, state == ENTITY_DECL\n");
12147 ctxt->instate = XML_PARSER_DTD;
12148 #ifdef DEBUG_PUSH
12149 xmlGenericError(xmlGenericErrorContext,
12150 "PP: entering DTD\n");
12151 #endif
12152 break;
12153 case XML_PARSER_ENTITY_VALUE:
12154 xmlGenericError(xmlGenericErrorContext,
12155 "PP: internal error, state == ENTITY_VALUE\n");
12156 ctxt->instate = XML_PARSER_CONTENT;
12157 #ifdef DEBUG_PUSH
12158 xmlGenericError(xmlGenericErrorContext,
12159 "PP: entering DTD\n");
12160 #endif
12161 break;
12162 case XML_PARSER_ATTRIBUTE_VALUE:
12163 xmlGenericError(xmlGenericErrorContext,
12164 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12165 ctxt->instate = XML_PARSER_START_TAG;
12166 #ifdef DEBUG_PUSH
12167 xmlGenericError(xmlGenericErrorContext,
12168 "PP: entering START_TAG\n");
12169 #endif
12170 break;
12171 case XML_PARSER_SYSTEM_LITERAL:
12172 xmlGenericError(xmlGenericErrorContext,
12173 "PP: internal error, state == SYSTEM_LITERAL\n");
12174 ctxt->instate = XML_PARSER_START_TAG;
12175 #ifdef DEBUG_PUSH
12176 xmlGenericError(xmlGenericErrorContext,
12177 "PP: entering START_TAG\n");
12178 #endif
12179 break;
12180 case XML_PARSER_PUBLIC_LITERAL:
12181 xmlGenericError(xmlGenericErrorContext,
12182 "PP: internal error, state == PUBLIC_LITERAL\n");
12183 ctxt->instate = XML_PARSER_START_TAG;
12184 #ifdef DEBUG_PUSH
12185 xmlGenericError(xmlGenericErrorContext,
12186 "PP: entering START_TAG\n");
12187 #endif
12188 break;
12191 done:
12192 #ifdef DEBUG_PUSH
12193 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12194 #endif
12195 return(ret);
12196 encoding_error:
12198 char buffer[150];
12200 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12201 ctxt->input->cur[0], ctxt->input->cur[1],
12202 ctxt->input->cur[2], ctxt->input->cur[3]);
12203 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12204 "Input is not proper UTF-8, indicate encoding !\n%s",
12205 BAD_CAST buffer, NULL);
12207 return(0);
12211 * xmlParseCheckTransition:
12212 * @ctxt: an XML parser context
12213 * @chunk: a char array
12214 * @size: the size in byte of the chunk
12216 * Check depending on the current parser state if the chunk given must be
12217 * processed immediately or one need more data to advance on parsing.
12219 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12221 static int
12222 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12223 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12224 return(-1);
12225 if (ctxt->instate == XML_PARSER_START_TAG) {
12226 if (memchr(chunk, '>', size) != NULL)
12227 return(1);
12228 return(0);
12230 if (ctxt->progressive == XML_PARSER_COMMENT) {
12231 if (memchr(chunk, '>', size) != NULL)
12232 return(1);
12233 return(0);
12235 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12236 if (memchr(chunk, '>', size) != NULL)
12237 return(1);
12238 return(0);
12240 if (ctxt->progressive == XML_PARSER_PI) {
12241 if (memchr(chunk, '>', size) != NULL)
12242 return(1);
12243 return(0);
12245 if (ctxt->instate == XML_PARSER_END_TAG) {
12246 if (memchr(chunk, '>', size) != NULL)
12247 return(1);
12248 return(0);
12250 if ((ctxt->progressive == XML_PARSER_DTD) ||
12251 (ctxt->instate == XML_PARSER_DTD)) {
12252 if (memchr(chunk, '>', size) != NULL)
12253 return(1);
12254 return(0);
12256 return(1);
12260 * xmlParseChunk:
12261 * @ctxt: an XML parser context
12262 * @chunk: an char array
12263 * @size: the size in byte of the chunk
12264 * @terminate: last chunk indicator
12266 * Parse a Chunk of memory
12268 * Returns zero if no error, the xmlParserErrors otherwise.
12271 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12272 int terminate) {
12273 int end_in_lf = 0;
12274 int remain = 0;
12275 size_t old_avail = 0;
12276 size_t avail = 0;
12278 if (ctxt == NULL)
12279 return(XML_ERR_INTERNAL_ERROR);
12280 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12281 return(ctxt->errNo);
12282 if (ctxt->instate == XML_PARSER_EOF)
12283 return(-1);
12284 if (ctxt->instate == XML_PARSER_START)
12285 xmlDetectSAX2(ctxt);
12286 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12287 (chunk[size - 1] == '\r')) {
12288 end_in_lf = 1;
12289 size--;
12292 xmldecl_done:
12294 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12295 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12296 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12297 size_t cur = ctxt->input->cur - ctxt->input->base;
12298 int res;
12300 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12302 * Specific handling if we autodetected an encoding, we should not
12303 * push more than the first line ... which depend on the encoding
12304 * And only push the rest once the final encoding was detected
12306 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12307 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12308 unsigned int len = 45;
12310 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12311 BAD_CAST "UTF-16")) ||
12312 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12313 BAD_CAST "UTF16")))
12314 len = 90;
12315 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12316 BAD_CAST "UCS-4")) ||
12317 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12318 BAD_CAST "UCS4")))
12319 len = 180;
12321 if (ctxt->input->buf->rawconsumed < len)
12322 len -= ctxt->input->buf->rawconsumed;
12325 * Change size for reading the initial declaration only
12326 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12327 * will blindly copy extra bytes from memory.
12329 if ((unsigned int) size > len) {
12330 remain = size - len;
12331 size = len;
12332 } else {
12333 remain = 0;
12336 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12337 if (res < 0) {
12338 ctxt->errNo = XML_PARSER_EOF;
12339 ctxt->disableSAX = 1;
12340 return (XML_PARSER_EOF);
12342 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12343 #ifdef DEBUG_PUSH
12344 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12345 #endif
12347 } else if (ctxt->instate != XML_PARSER_EOF) {
12348 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12349 xmlParserInputBufferPtr in = ctxt->input->buf;
12350 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12351 (in->raw != NULL)) {
12352 int nbchars;
12353 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12354 size_t current = ctxt->input->cur - ctxt->input->base;
12356 nbchars = xmlCharEncInput(in, terminate);
12357 if (nbchars < 0) {
12358 /* TODO 2.6.0 */
12359 xmlGenericError(xmlGenericErrorContext,
12360 "xmlParseChunk: encoder error\n");
12361 return(XML_ERR_INVALID_ENCODING);
12363 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12367 if (remain != 0) {
12368 xmlParseTryOrFinish(ctxt, 0);
12369 } else {
12370 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12371 avail = xmlBufUse(ctxt->input->buf->buffer);
12373 * Depending on the current state it may not be such
12374 * a good idea to try parsing if there is nothing in the chunk
12375 * which would be worth doing a parser state transition and we
12376 * need to wait for more data
12378 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12379 (old_avail == 0) || (avail == 0) ||
12380 (xmlParseCheckTransition(ctxt,
12381 (const char *)&ctxt->input->base[old_avail],
12382 avail - old_avail)))
12383 xmlParseTryOrFinish(ctxt, terminate);
12385 if (ctxt->instate == XML_PARSER_EOF)
12386 return(ctxt->errNo);
12388 if ((ctxt->input != NULL) &&
12389 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12390 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12391 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12392 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12393 ctxt->instate = XML_PARSER_EOF;
12395 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12396 return(ctxt->errNo);
12398 if (remain != 0) {
12399 chunk += size;
12400 size = remain;
12401 remain = 0;
12402 goto xmldecl_done;
12404 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12405 (ctxt->input->buf != NULL)) {
12406 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12407 ctxt->input);
12408 size_t current = ctxt->input->cur - ctxt->input->base;
12410 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12412 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12413 base, current);
12415 if (terminate) {
12417 * Check for termination
12419 int cur_avail = 0;
12421 if (ctxt->input != NULL) {
12422 if (ctxt->input->buf == NULL)
12423 cur_avail = ctxt->input->length -
12424 (ctxt->input->cur - ctxt->input->base);
12425 else
12426 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12427 (ctxt->input->cur - ctxt->input->base);
12430 if ((ctxt->instate != XML_PARSER_EOF) &&
12431 (ctxt->instate != XML_PARSER_EPILOG)) {
12432 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12434 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12435 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12437 if (ctxt->instate != XML_PARSER_EOF) {
12438 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12439 ctxt->sax->endDocument(ctxt->userData);
12441 ctxt->instate = XML_PARSER_EOF;
12443 if (ctxt->wellFormed == 0)
12444 return((xmlParserErrors) ctxt->errNo);
12445 else
12446 return(0);
12449 /************************************************************************
12451 * I/O front end functions to the parser *
12453 ************************************************************************/
12456 * xmlCreatePushParserCtxt:
12457 * @sax: a SAX handler
12458 * @user_data: The user data returned on SAX callbacks
12459 * @chunk: a pointer to an array of chars
12460 * @size: number of chars in the array
12461 * @filename: an optional file name or URI
12463 * Create a parser context for using the XML parser in push mode.
12464 * If @buffer and @size are non-NULL, the data is used to detect
12465 * the encoding. The remaining characters will be parsed so they
12466 * don't need to be fed in again through xmlParseChunk.
12467 * To allow content encoding detection, @size should be >= 4
12468 * The value of @filename is used for fetching external entities
12469 * and error/warning reports.
12471 * Returns the new parser context or NULL
12474 xmlParserCtxtPtr
12475 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12476 const char *chunk, int size, const char *filename) {
12477 xmlParserCtxtPtr ctxt;
12478 xmlParserInputPtr inputStream;
12479 xmlParserInputBufferPtr buf;
12480 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12483 * plug some encoding conversion routines
12485 if ((chunk != NULL) && (size >= 4))
12486 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12488 buf = xmlAllocParserInputBuffer(enc);
12489 if (buf == NULL) return(NULL);
12491 ctxt = xmlNewParserCtxt();
12492 if (ctxt == NULL) {
12493 xmlErrMemory(NULL, "creating parser: out of memory\n");
12494 xmlFreeParserInputBuffer(buf);
12495 return(NULL);
12497 ctxt->dictNames = 1;
12498 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12499 if (ctxt->pushTab == NULL) {
12500 xmlErrMemory(ctxt, NULL);
12501 xmlFreeParserInputBuffer(buf);
12502 xmlFreeParserCtxt(ctxt);
12503 return(NULL);
12505 if (sax != NULL) {
12506 #ifdef LIBXML_SAX1_ENABLED
12507 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12508 #endif /* LIBXML_SAX1_ENABLED */
12509 xmlFree(ctxt->sax);
12510 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12511 if (ctxt->sax == NULL) {
12512 xmlErrMemory(ctxt, NULL);
12513 xmlFreeParserInputBuffer(buf);
12514 xmlFreeParserCtxt(ctxt);
12515 return(NULL);
12517 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12518 if (sax->initialized == XML_SAX2_MAGIC)
12519 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12520 else
12521 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12522 if (user_data != NULL)
12523 ctxt->userData = user_data;
12525 if (filename == NULL) {
12526 ctxt->directory = NULL;
12527 } else {
12528 ctxt->directory = xmlParserGetDirectory(filename);
12531 inputStream = xmlNewInputStream(ctxt);
12532 if (inputStream == NULL) {
12533 xmlFreeParserCtxt(ctxt);
12534 xmlFreeParserInputBuffer(buf);
12535 return(NULL);
12538 if (filename == NULL)
12539 inputStream->filename = NULL;
12540 else {
12541 inputStream->filename = (char *)
12542 xmlCanonicPath((const xmlChar *) filename);
12543 if (inputStream->filename == NULL) {
12544 xmlFreeParserCtxt(ctxt);
12545 xmlFreeParserInputBuffer(buf);
12546 return(NULL);
12549 inputStream->buf = buf;
12550 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12551 inputPush(ctxt, inputStream);
12554 * If the caller didn't provide an initial 'chunk' for determining
12555 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12556 * that it can be automatically determined later
12558 if ((size == 0) || (chunk == NULL)) {
12559 ctxt->charset = XML_CHAR_ENCODING_NONE;
12560 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12561 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12562 size_t cur = ctxt->input->cur - ctxt->input->base;
12564 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12566 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12567 #ifdef DEBUG_PUSH
12568 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12569 #endif
12572 if (enc != XML_CHAR_ENCODING_NONE) {
12573 xmlSwitchEncoding(ctxt, enc);
12576 return(ctxt);
12578 #endif /* LIBXML_PUSH_ENABLED */
12581 * xmlStopParser:
12582 * @ctxt: an XML parser context
12584 * Blocks further parser processing
12586 void
12587 xmlStopParser(xmlParserCtxtPtr ctxt) {
12588 if (ctxt == NULL)
12589 return;
12590 ctxt->instate = XML_PARSER_EOF;
12591 ctxt->errNo = XML_ERR_USER_STOP;
12592 ctxt->disableSAX = 1;
12593 if (ctxt->input != NULL) {
12594 ctxt->input->cur = BAD_CAST"";
12595 ctxt->input->base = ctxt->input->cur;
12600 * xmlCreateIOParserCtxt:
12601 * @sax: a SAX handler
12602 * @user_data: The user data returned on SAX callbacks
12603 * @ioread: an I/O read function
12604 * @ioclose: an I/O close function
12605 * @ioctx: an I/O handler
12606 * @enc: the charset encoding if known
12608 * Create a parser context for using the XML parser with an existing
12609 * I/O stream
12611 * Returns the new parser context or NULL
12613 xmlParserCtxtPtr
12614 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12615 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12616 void *ioctx, xmlCharEncoding enc) {
12617 xmlParserCtxtPtr ctxt;
12618 xmlParserInputPtr inputStream;
12619 xmlParserInputBufferPtr buf;
12621 if (ioread == NULL) return(NULL);
12623 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12624 if (buf == NULL) {
12625 if (ioclose != NULL)
12626 ioclose(ioctx);
12627 return (NULL);
12630 ctxt = xmlNewParserCtxt();
12631 if (ctxt == NULL) {
12632 xmlFreeParserInputBuffer(buf);
12633 return(NULL);
12635 if (sax != NULL) {
12636 #ifdef LIBXML_SAX1_ENABLED
12637 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12638 #endif /* LIBXML_SAX1_ENABLED */
12639 xmlFree(ctxt->sax);
12640 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12641 if (ctxt->sax == NULL) {
12642 xmlErrMemory(ctxt, NULL);
12643 xmlFreeParserCtxt(ctxt);
12644 return(NULL);
12646 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12647 if (sax->initialized == XML_SAX2_MAGIC)
12648 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12649 else
12650 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12651 if (user_data != NULL)
12652 ctxt->userData = user_data;
12655 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12656 if (inputStream == NULL) {
12657 xmlFreeParserCtxt(ctxt);
12658 return(NULL);
12660 inputPush(ctxt, inputStream);
12662 return(ctxt);
12665 #ifdef LIBXML_VALID_ENABLED
12666 /************************************************************************
12668 * Front ends when parsing a DTD *
12670 ************************************************************************/
12673 * xmlIOParseDTD:
12674 * @sax: the SAX handler block or NULL
12675 * @input: an Input Buffer
12676 * @enc: the charset encoding if known
12678 * Load and parse a DTD
12680 * Returns the resulting xmlDtdPtr or NULL in case of error.
12681 * @input will be freed by the function in any case.
12684 xmlDtdPtr
12685 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12686 xmlCharEncoding enc) {
12687 xmlDtdPtr ret = NULL;
12688 xmlParserCtxtPtr ctxt;
12689 xmlParserInputPtr pinput = NULL;
12690 xmlChar start[4];
12692 if (input == NULL)
12693 return(NULL);
12695 ctxt = xmlNewParserCtxt();
12696 if (ctxt == NULL) {
12697 xmlFreeParserInputBuffer(input);
12698 return(NULL);
12701 /* We are loading a DTD */
12702 ctxt->options |= XML_PARSE_DTDLOAD;
12705 * Set-up the SAX context
12707 if (sax != NULL) {
12708 if (ctxt->sax != NULL)
12709 xmlFree(ctxt->sax);
12710 ctxt->sax = sax;
12711 ctxt->userData = ctxt;
12713 xmlDetectSAX2(ctxt);
12716 * generate a parser input from the I/O handler
12719 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12720 if (pinput == NULL) {
12721 if (sax != NULL) ctxt->sax = NULL;
12722 xmlFreeParserInputBuffer(input);
12723 xmlFreeParserCtxt(ctxt);
12724 return(NULL);
12728 * plug some encoding conversion routines here.
12730 if (xmlPushInput(ctxt, pinput) < 0) {
12731 if (sax != NULL) ctxt->sax = NULL;
12732 xmlFreeParserCtxt(ctxt);
12733 return(NULL);
12735 if (enc != XML_CHAR_ENCODING_NONE) {
12736 xmlSwitchEncoding(ctxt, enc);
12739 pinput->filename = NULL;
12740 pinput->line = 1;
12741 pinput->col = 1;
12742 pinput->base = ctxt->input->cur;
12743 pinput->cur = ctxt->input->cur;
12744 pinput->free = NULL;
12747 * let's parse that entity knowing it's an external subset.
12749 ctxt->inSubset = 2;
12750 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12751 if (ctxt->myDoc == NULL) {
12752 xmlErrMemory(ctxt, "New Doc failed");
12753 return(NULL);
12755 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12756 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12757 BAD_CAST "none", BAD_CAST "none");
12759 if ((enc == XML_CHAR_ENCODING_NONE) &&
12760 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12762 * Get the 4 first bytes and decode the charset
12763 * if enc != XML_CHAR_ENCODING_NONE
12764 * plug some encoding conversion routines.
12766 start[0] = RAW;
12767 start[1] = NXT(1);
12768 start[2] = NXT(2);
12769 start[3] = NXT(3);
12770 enc = xmlDetectCharEncoding(start, 4);
12771 if (enc != XML_CHAR_ENCODING_NONE) {
12772 xmlSwitchEncoding(ctxt, enc);
12776 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12778 if (ctxt->myDoc != NULL) {
12779 if (ctxt->wellFormed) {
12780 ret = ctxt->myDoc->extSubset;
12781 ctxt->myDoc->extSubset = NULL;
12782 if (ret != NULL) {
12783 xmlNodePtr tmp;
12785 ret->doc = NULL;
12786 tmp = ret->children;
12787 while (tmp != NULL) {
12788 tmp->doc = NULL;
12789 tmp = tmp->next;
12792 } else {
12793 ret = NULL;
12795 xmlFreeDoc(ctxt->myDoc);
12796 ctxt->myDoc = NULL;
12798 if (sax != NULL) ctxt->sax = NULL;
12799 xmlFreeParserCtxt(ctxt);
12801 return(ret);
12805 * xmlSAXParseDTD:
12806 * @sax: the SAX handler block
12807 * @ExternalID: a NAME* containing the External ID of the DTD
12808 * @SystemID: a NAME* containing the URL to the DTD
12810 * Load and parse an external subset.
12812 * Returns the resulting xmlDtdPtr or NULL in case of error.
12815 xmlDtdPtr
12816 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12817 const xmlChar *SystemID) {
12818 xmlDtdPtr ret = NULL;
12819 xmlParserCtxtPtr ctxt;
12820 xmlParserInputPtr input = NULL;
12821 xmlCharEncoding enc;
12822 xmlChar* systemIdCanonic;
12824 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12826 ctxt = xmlNewParserCtxt();
12827 if (ctxt == NULL) {
12828 return(NULL);
12831 /* We are loading a DTD */
12832 ctxt->options |= XML_PARSE_DTDLOAD;
12835 * Set-up the SAX context
12837 if (sax != NULL) {
12838 if (ctxt->sax != NULL)
12839 xmlFree(ctxt->sax);
12840 ctxt->sax = sax;
12841 ctxt->userData = ctxt;
12845 * Canonicalise the system ID
12847 systemIdCanonic = xmlCanonicPath(SystemID);
12848 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12849 xmlFreeParserCtxt(ctxt);
12850 return(NULL);
12854 * Ask the Entity resolver to load the damn thing
12857 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12858 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12859 systemIdCanonic);
12860 if (input == NULL) {
12861 if (sax != NULL) ctxt->sax = NULL;
12862 xmlFreeParserCtxt(ctxt);
12863 if (systemIdCanonic != NULL)
12864 xmlFree(systemIdCanonic);
12865 return(NULL);
12869 * plug some encoding conversion routines here.
12871 if (xmlPushInput(ctxt, input) < 0) {
12872 if (sax != NULL) ctxt->sax = NULL;
12873 xmlFreeParserCtxt(ctxt);
12874 if (systemIdCanonic != NULL)
12875 xmlFree(systemIdCanonic);
12876 return(NULL);
12878 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12879 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12880 xmlSwitchEncoding(ctxt, enc);
12883 if (input->filename == NULL)
12884 input->filename = (char *) systemIdCanonic;
12885 else
12886 xmlFree(systemIdCanonic);
12887 input->line = 1;
12888 input->col = 1;
12889 input->base = ctxt->input->cur;
12890 input->cur = ctxt->input->cur;
12891 input->free = NULL;
12894 * let's parse that entity knowing it's an external subset.
12896 ctxt->inSubset = 2;
12897 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12898 if (ctxt->myDoc == NULL) {
12899 xmlErrMemory(ctxt, "New Doc failed");
12900 if (sax != NULL) ctxt->sax = NULL;
12901 xmlFreeParserCtxt(ctxt);
12902 return(NULL);
12904 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12905 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12906 ExternalID, SystemID);
12907 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12909 if (ctxt->myDoc != NULL) {
12910 if (ctxt->wellFormed) {
12911 ret = ctxt->myDoc->extSubset;
12912 ctxt->myDoc->extSubset = NULL;
12913 if (ret != NULL) {
12914 xmlNodePtr tmp;
12916 ret->doc = NULL;
12917 tmp = ret->children;
12918 while (tmp != NULL) {
12919 tmp->doc = NULL;
12920 tmp = tmp->next;
12923 } else {
12924 ret = NULL;
12926 xmlFreeDoc(ctxt->myDoc);
12927 ctxt->myDoc = NULL;
12929 if (sax != NULL) ctxt->sax = NULL;
12930 xmlFreeParserCtxt(ctxt);
12932 return(ret);
12937 * xmlParseDTD:
12938 * @ExternalID: a NAME* containing the External ID of the DTD
12939 * @SystemID: a NAME* containing the URL to the DTD
12941 * Load and parse an external subset.
12943 * Returns the resulting xmlDtdPtr or NULL in case of error.
12946 xmlDtdPtr
12947 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12948 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12950 #endif /* LIBXML_VALID_ENABLED */
12952 /************************************************************************
12954 * Front ends when parsing an Entity *
12956 ************************************************************************/
12959 * xmlParseCtxtExternalEntity:
12960 * @ctx: the existing parsing context
12961 * @URL: the URL for the entity to load
12962 * @ID: the System ID for the entity to load
12963 * @lst: the return value for the set of parsed nodes
12965 * Parse an external general entity within an existing parsing context
12966 * An external general parsed entity is well-formed if it matches the
12967 * production labeled extParsedEnt.
12969 * [78] extParsedEnt ::= TextDecl? content
12971 * Returns 0 if the entity is well formed, -1 in case of args problem and
12972 * the parser error code otherwise
12976 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12977 const xmlChar *ID, xmlNodePtr *lst) {
12978 xmlParserCtxtPtr ctxt;
12979 xmlDocPtr newDoc;
12980 xmlNodePtr newRoot;
12981 xmlSAXHandlerPtr oldsax = NULL;
12982 int ret = 0;
12983 xmlChar start[4];
12984 xmlCharEncoding enc;
12986 if (ctx == NULL) return(-1);
12988 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12989 (ctx->depth > 1024)) {
12990 return(XML_ERR_ENTITY_LOOP);
12993 if (lst != NULL)
12994 *lst = NULL;
12995 if ((URL == NULL) && (ID == NULL))
12996 return(-1);
12997 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12998 return(-1);
13000 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13001 if (ctxt == NULL) {
13002 return(-1);
13005 oldsax = ctxt->sax;
13006 ctxt->sax = ctx->sax;
13007 xmlDetectSAX2(ctxt);
13008 newDoc = xmlNewDoc(BAD_CAST "1.0");
13009 if (newDoc == NULL) {
13010 xmlFreeParserCtxt(ctxt);
13011 return(-1);
13013 newDoc->properties = XML_DOC_INTERNAL;
13014 if (ctx->myDoc->dict) {
13015 newDoc->dict = ctx->myDoc->dict;
13016 xmlDictReference(newDoc->dict);
13018 if (ctx->myDoc != NULL) {
13019 newDoc->intSubset = ctx->myDoc->intSubset;
13020 newDoc->extSubset = ctx->myDoc->extSubset;
13022 if (ctx->myDoc->URL != NULL) {
13023 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13025 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13026 if (newRoot == NULL) {
13027 ctxt->sax = oldsax;
13028 xmlFreeParserCtxt(ctxt);
13029 newDoc->intSubset = NULL;
13030 newDoc->extSubset = NULL;
13031 xmlFreeDoc(newDoc);
13032 return(-1);
13034 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13035 nodePush(ctxt, newDoc->children);
13036 if (ctx->myDoc == NULL) {
13037 ctxt->myDoc = newDoc;
13038 } else {
13039 ctxt->myDoc = ctx->myDoc;
13040 newDoc->children->doc = ctx->myDoc;
13044 * Get the 4 first bytes and decode the charset
13045 * if enc != XML_CHAR_ENCODING_NONE
13046 * plug some encoding conversion routines.
13048 GROW
13049 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13050 start[0] = RAW;
13051 start[1] = NXT(1);
13052 start[2] = NXT(2);
13053 start[3] = NXT(3);
13054 enc = xmlDetectCharEncoding(start, 4);
13055 if (enc != XML_CHAR_ENCODING_NONE) {
13056 xmlSwitchEncoding(ctxt, enc);
13061 * Parse a possible text declaration first
13063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13064 xmlParseTextDecl(ctxt);
13066 * An XML-1.0 document can't reference an entity not XML-1.0
13068 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13069 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13070 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13071 "Version mismatch between document and entity\n");
13076 * If the user provided its own SAX callbacks then reuse the
13077 * useData callback field, otherwise the expected setup in a
13078 * DOM builder is to have userData == ctxt
13080 if (ctx->userData == ctx)
13081 ctxt->userData = ctxt;
13082 else
13083 ctxt->userData = ctx->userData;
13086 * Doing validity checking on chunk doesn't make sense
13088 ctxt->instate = XML_PARSER_CONTENT;
13089 ctxt->validate = ctx->validate;
13090 ctxt->valid = ctx->valid;
13091 ctxt->loadsubset = ctx->loadsubset;
13092 ctxt->depth = ctx->depth + 1;
13093 ctxt->replaceEntities = ctx->replaceEntities;
13094 if (ctxt->validate) {
13095 ctxt->vctxt.error = ctx->vctxt.error;
13096 ctxt->vctxt.warning = ctx->vctxt.warning;
13097 } else {
13098 ctxt->vctxt.error = NULL;
13099 ctxt->vctxt.warning = NULL;
13101 ctxt->vctxt.nodeTab = NULL;
13102 ctxt->vctxt.nodeNr = 0;
13103 ctxt->vctxt.nodeMax = 0;
13104 ctxt->vctxt.node = NULL;
13105 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13106 ctxt->dict = ctx->dict;
13107 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13108 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13109 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13110 ctxt->dictNames = ctx->dictNames;
13111 ctxt->attsDefault = ctx->attsDefault;
13112 ctxt->attsSpecial = ctx->attsSpecial;
13113 ctxt->linenumbers = ctx->linenumbers;
13115 xmlParseContent(ctxt);
13117 ctx->validate = ctxt->validate;
13118 ctx->valid = ctxt->valid;
13119 if ((RAW == '<') && (NXT(1) == '/')) {
13120 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13121 } else if (RAW != 0) {
13122 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13124 if (ctxt->node != newDoc->children) {
13125 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13128 if (!ctxt->wellFormed) {
13129 if (ctxt->errNo == 0)
13130 ret = 1;
13131 else
13132 ret = ctxt->errNo;
13133 } else {
13134 if (lst != NULL) {
13135 xmlNodePtr cur;
13138 * Return the newly created nodeset after unlinking it from
13139 * they pseudo parent.
13141 cur = newDoc->children->children;
13142 *lst = cur;
13143 while (cur != NULL) {
13144 cur->parent = NULL;
13145 cur = cur->next;
13147 newDoc->children->children = NULL;
13149 ret = 0;
13151 ctxt->sax = oldsax;
13152 ctxt->dict = NULL;
13153 ctxt->attsDefault = NULL;
13154 ctxt->attsSpecial = NULL;
13155 xmlFreeParserCtxt(ctxt);
13156 newDoc->intSubset = NULL;
13157 newDoc->extSubset = NULL;
13158 xmlFreeDoc(newDoc);
13160 return(ret);
13164 * xmlParseExternalEntityPrivate:
13165 * @doc: the document the chunk pertains to
13166 * @oldctxt: the previous parser context if available
13167 * @sax: the SAX handler bloc (possibly NULL)
13168 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13169 * @depth: Used for loop detection, use 0
13170 * @URL: the URL for the entity to load
13171 * @ID: the System ID for the entity to load
13172 * @list: the return value for the set of parsed nodes
13174 * Private version of xmlParseExternalEntity()
13176 * Returns 0 if the entity is well formed, -1 in case of args problem and
13177 * the parser error code otherwise
13180 static xmlParserErrors
13181 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13182 xmlSAXHandlerPtr sax,
13183 void *user_data, int depth, const xmlChar *URL,
13184 const xmlChar *ID, xmlNodePtr *list) {
13185 xmlParserCtxtPtr ctxt;
13186 xmlDocPtr newDoc;
13187 xmlNodePtr newRoot;
13188 xmlSAXHandlerPtr oldsax = NULL;
13189 xmlParserErrors ret = XML_ERR_OK;
13190 xmlChar start[4];
13191 xmlCharEncoding enc;
13193 if (((depth > 40) &&
13194 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13195 (depth > 1024)) {
13196 return(XML_ERR_ENTITY_LOOP);
13199 if (list != NULL)
13200 *list = NULL;
13201 if ((URL == NULL) && (ID == NULL))
13202 return(XML_ERR_INTERNAL_ERROR);
13203 if (doc == NULL)
13204 return(XML_ERR_INTERNAL_ERROR);
13207 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13208 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13209 ctxt->userData = ctxt;
13210 if (oldctxt != NULL) {
13211 ctxt->_private = oldctxt->_private;
13212 ctxt->loadsubset = oldctxt->loadsubset;
13213 ctxt->validate = oldctxt->validate;
13214 ctxt->external = oldctxt->external;
13215 ctxt->record_info = oldctxt->record_info;
13216 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13217 ctxt->node_seq.length = oldctxt->node_seq.length;
13218 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13219 } else {
13221 * Doing validity checking on chunk without context
13222 * doesn't make sense
13224 ctxt->_private = NULL;
13225 ctxt->validate = 0;
13226 ctxt->external = 2;
13227 ctxt->loadsubset = 0;
13229 if (sax != NULL) {
13230 oldsax = ctxt->sax;
13231 ctxt->sax = sax;
13232 if (user_data != NULL)
13233 ctxt->userData = user_data;
13235 xmlDetectSAX2(ctxt);
13236 newDoc = xmlNewDoc(BAD_CAST "1.0");
13237 if (newDoc == NULL) {
13238 ctxt->node_seq.maximum = 0;
13239 ctxt->node_seq.length = 0;
13240 ctxt->node_seq.buffer = NULL;
13241 xmlFreeParserCtxt(ctxt);
13242 return(XML_ERR_INTERNAL_ERROR);
13244 newDoc->properties = XML_DOC_INTERNAL;
13245 newDoc->intSubset = doc->intSubset;
13246 newDoc->extSubset = doc->extSubset;
13247 newDoc->dict = doc->dict;
13248 xmlDictReference(newDoc->dict);
13250 if (doc->URL != NULL) {
13251 newDoc->URL = xmlStrdup(doc->URL);
13253 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13254 if (newRoot == NULL) {
13255 if (sax != NULL)
13256 ctxt->sax = oldsax;
13257 ctxt->node_seq.maximum = 0;
13258 ctxt->node_seq.length = 0;
13259 ctxt->node_seq.buffer = NULL;
13260 xmlFreeParserCtxt(ctxt);
13261 newDoc->intSubset = NULL;
13262 newDoc->extSubset = NULL;
13263 xmlFreeDoc(newDoc);
13264 return(XML_ERR_INTERNAL_ERROR);
13266 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13267 nodePush(ctxt, newDoc->children);
13268 ctxt->myDoc = doc;
13269 newRoot->doc = doc;
13272 * Get the 4 first bytes and decode the charset
13273 * if enc != XML_CHAR_ENCODING_NONE
13274 * plug some encoding conversion routines.
13276 GROW;
13277 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13278 start[0] = RAW;
13279 start[1] = NXT(1);
13280 start[2] = NXT(2);
13281 start[3] = NXT(3);
13282 enc = xmlDetectCharEncoding(start, 4);
13283 if (enc != XML_CHAR_ENCODING_NONE) {
13284 xmlSwitchEncoding(ctxt, enc);
13289 * Parse a possible text declaration first
13291 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13292 xmlParseTextDecl(ctxt);
13295 ctxt->instate = XML_PARSER_CONTENT;
13296 ctxt->depth = depth;
13298 xmlParseContent(ctxt);
13300 if ((RAW == '<') && (NXT(1) == '/')) {
13301 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13302 } else if (RAW != 0) {
13303 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13305 if (ctxt->node != newDoc->children) {
13306 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13309 if (!ctxt->wellFormed) {
13310 if (ctxt->errNo == 0)
13311 ret = XML_ERR_INTERNAL_ERROR;
13312 else
13313 ret = (xmlParserErrors)ctxt->errNo;
13314 } else {
13315 if (list != NULL) {
13316 xmlNodePtr cur;
13319 * Return the newly created nodeset after unlinking it from
13320 * they pseudo parent.
13322 cur = newDoc->children->children;
13323 *list = cur;
13324 while (cur != NULL) {
13325 cur->parent = NULL;
13326 cur = cur->next;
13328 newDoc->children->children = NULL;
13330 ret = XML_ERR_OK;
13334 * Record in the parent context the number of entities replacement
13335 * done when parsing that reference.
13337 if (oldctxt != NULL)
13338 oldctxt->nbentities += ctxt->nbentities;
13341 * Also record the size of the entity parsed
13343 if (ctxt->input != NULL) {
13344 oldctxt->sizeentities += ctxt->input->consumed;
13345 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13348 * And record the last error if any
13350 if (ctxt->lastError.code != XML_ERR_OK)
13351 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13353 if (sax != NULL)
13354 ctxt->sax = oldsax;
13355 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13356 oldctxt->node_seq.length = ctxt->node_seq.length;
13357 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13358 ctxt->node_seq.maximum = 0;
13359 ctxt->node_seq.length = 0;
13360 ctxt->node_seq.buffer = NULL;
13361 xmlFreeParserCtxt(ctxt);
13362 newDoc->intSubset = NULL;
13363 newDoc->extSubset = NULL;
13364 xmlFreeDoc(newDoc);
13366 return(ret);
13369 #ifdef LIBXML_SAX1_ENABLED
13371 * xmlParseExternalEntity:
13372 * @doc: the document the chunk pertains to
13373 * @sax: the SAX handler bloc (possibly NULL)
13374 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13375 * @depth: Used for loop detection, use 0
13376 * @URL: the URL for the entity to load
13377 * @ID: the System ID for the entity to load
13378 * @lst: the return value for the set of parsed nodes
13380 * Parse an external general entity
13381 * An external general parsed entity is well-formed if it matches the
13382 * production labeled extParsedEnt.
13384 * [78] extParsedEnt ::= TextDecl? content
13386 * Returns 0 if the entity is well formed, -1 in case of args problem and
13387 * the parser error code otherwise
13391 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13392 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13393 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13394 ID, lst));
13398 * xmlParseBalancedChunkMemory:
13399 * @doc: the document the chunk pertains to
13400 * @sax: the SAX handler bloc (possibly NULL)
13401 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13402 * @depth: Used for loop detection, use 0
13403 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13404 * @lst: the return value for the set of parsed nodes
13406 * Parse a well-balanced chunk of an XML document
13407 * called by the parser
13408 * The allowed sequence for the Well Balanced Chunk is the one defined by
13409 * the content production in the XML grammar:
13411 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13413 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13414 * the parser error code otherwise
13418 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13419 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13420 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13421 depth, string, lst, 0 );
13423 #endif /* LIBXML_SAX1_ENABLED */
13426 * xmlParseBalancedChunkMemoryInternal:
13427 * @oldctxt: the existing parsing context
13428 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13429 * @user_data: the user data field for the parser context
13430 * @lst: the return value for the set of parsed nodes
13433 * Parse a well-balanced chunk of an XML document
13434 * called by the parser
13435 * The allowed sequence for the Well Balanced Chunk is the one defined by
13436 * the content production in the XML grammar:
13438 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13440 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13441 * error code otherwise
13443 * In case recover is set to 1, the nodelist will not be empty even if
13444 * the parsed chunk is not well balanced.
13446 static xmlParserErrors
13447 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13448 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13449 xmlParserCtxtPtr ctxt;
13450 xmlDocPtr newDoc = NULL;
13451 xmlNodePtr newRoot;
13452 xmlSAXHandlerPtr oldsax = NULL;
13453 xmlNodePtr content = NULL;
13454 xmlNodePtr last = NULL;
13455 int size;
13456 xmlParserErrors ret = XML_ERR_OK;
13457 #ifdef SAX2
13458 int i;
13459 #endif
13461 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13462 (oldctxt->depth > 1024)) {
13463 return(XML_ERR_ENTITY_LOOP);
13467 if (lst != NULL)
13468 *lst = NULL;
13469 if (string == NULL)
13470 return(XML_ERR_INTERNAL_ERROR);
13472 size = xmlStrlen(string);
13474 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13475 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13476 if (user_data != NULL)
13477 ctxt->userData = user_data;
13478 else
13479 ctxt->userData = ctxt;
13480 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13481 ctxt->dict = oldctxt->dict;
13482 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13483 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13484 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13486 #ifdef SAX2
13487 /* propagate namespaces down the entity */
13488 for (i = 0;i < oldctxt->nsNr;i += 2) {
13489 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13491 #endif
13493 oldsax = ctxt->sax;
13494 ctxt->sax = oldctxt->sax;
13495 xmlDetectSAX2(ctxt);
13496 ctxt->replaceEntities = oldctxt->replaceEntities;
13497 ctxt->options = oldctxt->options;
13499 ctxt->_private = oldctxt->_private;
13500 if (oldctxt->myDoc == NULL) {
13501 newDoc = xmlNewDoc(BAD_CAST "1.0");
13502 if (newDoc == NULL) {
13503 ctxt->sax = oldsax;
13504 ctxt->dict = NULL;
13505 xmlFreeParserCtxt(ctxt);
13506 return(XML_ERR_INTERNAL_ERROR);
13508 newDoc->properties = XML_DOC_INTERNAL;
13509 newDoc->dict = ctxt->dict;
13510 xmlDictReference(newDoc->dict);
13511 ctxt->myDoc = newDoc;
13512 } else {
13513 ctxt->myDoc = oldctxt->myDoc;
13514 content = ctxt->myDoc->children;
13515 last = ctxt->myDoc->last;
13517 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13518 if (newRoot == NULL) {
13519 ctxt->sax = oldsax;
13520 ctxt->dict = NULL;
13521 xmlFreeParserCtxt(ctxt);
13522 if (newDoc != NULL) {
13523 xmlFreeDoc(newDoc);
13525 return(XML_ERR_INTERNAL_ERROR);
13527 ctxt->myDoc->children = NULL;
13528 ctxt->myDoc->last = NULL;
13529 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13530 nodePush(ctxt, ctxt->myDoc->children);
13531 ctxt->instate = XML_PARSER_CONTENT;
13532 ctxt->depth = oldctxt->depth + 1;
13534 ctxt->validate = 0;
13535 ctxt->loadsubset = oldctxt->loadsubset;
13536 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13538 * ID/IDREF registration will be done in xmlValidateElement below
13540 ctxt->loadsubset |= XML_SKIP_IDS;
13542 ctxt->dictNames = oldctxt->dictNames;
13543 ctxt->attsDefault = oldctxt->attsDefault;
13544 ctxt->attsSpecial = oldctxt->attsSpecial;
13546 xmlParseContent(ctxt);
13547 if ((RAW == '<') && (NXT(1) == '/')) {
13548 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13549 } else if (RAW != 0) {
13550 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13552 if (ctxt->node != ctxt->myDoc->children) {
13553 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13556 if (!ctxt->wellFormed) {
13557 if (ctxt->errNo == 0)
13558 ret = XML_ERR_INTERNAL_ERROR;
13559 else
13560 ret = (xmlParserErrors)ctxt->errNo;
13561 } else {
13562 ret = XML_ERR_OK;
13565 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13566 xmlNodePtr cur;
13569 * Return the newly created nodeset after unlinking it from
13570 * they pseudo parent.
13572 cur = ctxt->myDoc->children->children;
13573 *lst = cur;
13574 while (cur != NULL) {
13575 #ifdef LIBXML_VALID_ENABLED
13576 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13577 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13578 (cur->type == XML_ELEMENT_NODE)) {
13579 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13580 oldctxt->myDoc, cur);
13582 #endif /* LIBXML_VALID_ENABLED */
13583 cur->parent = NULL;
13584 cur = cur->next;
13586 ctxt->myDoc->children->children = NULL;
13588 if (ctxt->myDoc != NULL) {
13589 xmlFreeNode(ctxt->myDoc->children);
13590 ctxt->myDoc->children = content;
13591 ctxt->myDoc->last = last;
13595 * Record in the parent context the number of entities replacement
13596 * done when parsing that reference.
13598 if (oldctxt != NULL)
13599 oldctxt->nbentities += ctxt->nbentities;
13602 * Also record the last error if any
13604 if (ctxt->lastError.code != XML_ERR_OK)
13605 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13607 ctxt->sax = oldsax;
13608 ctxt->dict = NULL;
13609 ctxt->attsDefault = NULL;
13610 ctxt->attsSpecial = NULL;
13611 xmlFreeParserCtxt(ctxt);
13612 if (newDoc != NULL) {
13613 xmlFreeDoc(newDoc);
13616 return(ret);
13620 * xmlParseInNodeContext:
13621 * @node: the context node
13622 * @data: the input string
13623 * @datalen: the input string length in bytes
13624 * @options: a combination of xmlParserOption
13625 * @lst: the return value for the set of parsed nodes
13627 * Parse a well-balanced chunk of an XML document
13628 * within the context (DTD, namespaces, etc ...) of the given node.
13630 * The allowed sequence for the data is a Well Balanced Chunk defined by
13631 * the content production in the XML grammar:
13633 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13635 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13636 * error code otherwise
13638 xmlParserErrors
13639 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13640 int options, xmlNodePtr *lst) {
13641 #ifdef SAX2
13642 xmlParserCtxtPtr ctxt;
13643 xmlDocPtr doc = NULL;
13644 xmlNodePtr fake, cur;
13645 int nsnr = 0;
13647 xmlParserErrors ret = XML_ERR_OK;
13650 * check all input parameters, grab the document
13652 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13653 return(XML_ERR_INTERNAL_ERROR);
13654 switch (node->type) {
13655 case XML_ELEMENT_NODE:
13656 case XML_ATTRIBUTE_NODE:
13657 case XML_TEXT_NODE:
13658 case XML_CDATA_SECTION_NODE:
13659 case XML_ENTITY_REF_NODE:
13660 case XML_PI_NODE:
13661 case XML_COMMENT_NODE:
13662 case XML_DOCUMENT_NODE:
13663 case XML_HTML_DOCUMENT_NODE:
13664 break;
13665 default:
13666 return(XML_ERR_INTERNAL_ERROR);
13669 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13670 (node->type != XML_DOCUMENT_NODE) &&
13671 (node->type != XML_HTML_DOCUMENT_NODE))
13672 node = node->parent;
13673 if (node == NULL)
13674 return(XML_ERR_INTERNAL_ERROR);
13675 if (node->type == XML_ELEMENT_NODE)
13676 doc = node->doc;
13677 else
13678 doc = (xmlDocPtr) node;
13679 if (doc == NULL)
13680 return(XML_ERR_INTERNAL_ERROR);
13683 * allocate a context and set-up everything not related to the
13684 * node position in the tree
13686 if (doc->type == XML_DOCUMENT_NODE)
13687 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13688 #ifdef LIBXML_HTML_ENABLED
13689 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13690 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13692 * When parsing in context, it makes no sense to add implied
13693 * elements like html/body/etc...
13695 options |= HTML_PARSE_NOIMPLIED;
13697 #endif
13698 else
13699 return(XML_ERR_INTERNAL_ERROR);
13701 if (ctxt == NULL)
13702 return(XML_ERR_NO_MEMORY);
13705 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13706 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13707 * we must wait until the last moment to free the original one.
13709 if (doc->dict != NULL) {
13710 if (ctxt->dict != NULL)
13711 xmlDictFree(ctxt->dict);
13712 ctxt->dict = doc->dict;
13713 } else
13714 options |= XML_PARSE_NODICT;
13716 if (doc->encoding != NULL) {
13717 xmlCharEncodingHandlerPtr hdlr;
13719 if (ctxt->encoding != NULL)
13720 xmlFree((xmlChar *) ctxt->encoding);
13721 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13723 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13724 if (hdlr != NULL) {
13725 xmlSwitchToEncoding(ctxt, hdlr);
13726 } else {
13727 return(XML_ERR_UNSUPPORTED_ENCODING);
13731 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13732 xmlDetectSAX2(ctxt);
13733 ctxt->myDoc = doc;
13734 /* parsing in context, i.e. as within existing content */
13735 ctxt->instate = XML_PARSER_CONTENT;
13737 fake = xmlNewComment(NULL);
13738 if (fake == NULL) {
13739 xmlFreeParserCtxt(ctxt);
13740 return(XML_ERR_NO_MEMORY);
13742 xmlAddChild(node, fake);
13744 if (node->type == XML_ELEMENT_NODE) {
13745 nodePush(ctxt, node);
13747 * initialize the SAX2 namespaces stack
13749 cur = node;
13750 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13751 xmlNsPtr ns = cur->nsDef;
13752 const xmlChar *iprefix, *ihref;
13754 while (ns != NULL) {
13755 if (ctxt->dict) {
13756 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13757 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13758 } else {
13759 iprefix = ns->prefix;
13760 ihref = ns->href;
13763 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13764 nsPush(ctxt, iprefix, ihref);
13765 nsnr++;
13767 ns = ns->next;
13769 cur = cur->parent;
13773 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13775 * ID/IDREF registration will be done in xmlValidateElement below
13777 ctxt->loadsubset |= XML_SKIP_IDS;
13780 #ifdef LIBXML_HTML_ENABLED
13781 if (doc->type == XML_HTML_DOCUMENT_NODE)
13782 __htmlParseContent(ctxt);
13783 else
13784 #endif
13785 xmlParseContent(ctxt);
13787 nsPop(ctxt, nsnr);
13788 if ((RAW == '<') && (NXT(1) == '/')) {
13789 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13790 } else if (RAW != 0) {
13791 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13793 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13794 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13795 ctxt->wellFormed = 0;
13798 if (!ctxt->wellFormed) {
13799 if (ctxt->errNo == 0)
13800 ret = XML_ERR_INTERNAL_ERROR;
13801 else
13802 ret = (xmlParserErrors)ctxt->errNo;
13803 } else {
13804 ret = XML_ERR_OK;
13808 * Return the newly created nodeset after unlinking it from
13809 * the pseudo sibling.
13812 cur = fake->next;
13813 fake->next = NULL;
13814 node->last = fake;
13816 if (cur != NULL) {
13817 cur->prev = NULL;
13820 *lst = cur;
13822 while (cur != NULL) {
13823 cur->parent = NULL;
13824 cur = cur->next;
13827 xmlUnlinkNode(fake);
13828 xmlFreeNode(fake);
13831 if (ret != XML_ERR_OK) {
13832 xmlFreeNodeList(*lst);
13833 *lst = NULL;
13836 if (doc->dict != NULL)
13837 ctxt->dict = NULL;
13838 xmlFreeParserCtxt(ctxt);
13840 return(ret);
13841 #else /* !SAX2 */
13842 return(XML_ERR_INTERNAL_ERROR);
13843 #endif
13846 #ifdef LIBXML_SAX1_ENABLED
13848 * xmlParseBalancedChunkMemoryRecover:
13849 * @doc: the document the chunk pertains to
13850 * @sax: the SAX handler bloc (possibly NULL)
13851 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13852 * @depth: Used for loop detection, use 0
13853 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13854 * @lst: the return value for the set of parsed nodes
13855 * @recover: return nodes even if the data is broken (use 0)
13858 * Parse a well-balanced chunk of an XML document
13859 * called by the parser
13860 * The allowed sequence for the Well Balanced Chunk is the one defined by
13861 * the content production in the XML grammar:
13863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13865 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13866 * the parser error code otherwise
13868 * In case recover is set to 1, the nodelist will not be empty even if
13869 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13870 * some extent.
13873 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13874 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13875 int recover) {
13876 xmlParserCtxtPtr ctxt;
13877 xmlDocPtr newDoc;
13878 xmlSAXHandlerPtr oldsax = NULL;
13879 xmlNodePtr content, newRoot;
13880 int size;
13881 int ret = 0;
13883 if (depth > 40) {
13884 return(XML_ERR_ENTITY_LOOP);
13888 if (lst != NULL)
13889 *lst = NULL;
13890 if (string == NULL)
13891 return(-1);
13893 size = xmlStrlen(string);
13895 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13896 if (ctxt == NULL) return(-1);
13897 ctxt->userData = ctxt;
13898 if (sax != NULL) {
13899 oldsax = ctxt->sax;
13900 ctxt->sax = sax;
13901 if (user_data != NULL)
13902 ctxt->userData = user_data;
13904 newDoc = xmlNewDoc(BAD_CAST "1.0");
13905 if (newDoc == NULL) {
13906 xmlFreeParserCtxt(ctxt);
13907 return(-1);
13909 newDoc->properties = XML_DOC_INTERNAL;
13910 if ((doc != NULL) && (doc->dict != NULL)) {
13911 xmlDictFree(ctxt->dict);
13912 ctxt->dict = doc->dict;
13913 xmlDictReference(ctxt->dict);
13914 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13915 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13916 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13917 ctxt->dictNames = 1;
13918 } else {
13919 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13921 if (doc != NULL) {
13922 newDoc->intSubset = doc->intSubset;
13923 newDoc->extSubset = doc->extSubset;
13925 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13926 if (newRoot == NULL) {
13927 if (sax != NULL)
13928 ctxt->sax = oldsax;
13929 xmlFreeParserCtxt(ctxt);
13930 newDoc->intSubset = NULL;
13931 newDoc->extSubset = NULL;
13932 xmlFreeDoc(newDoc);
13933 return(-1);
13935 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13936 nodePush(ctxt, newRoot);
13937 if (doc == NULL) {
13938 ctxt->myDoc = newDoc;
13939 } else {
13940 ctxt->myDoc = newDoc;
13941 newDoc->children->doc = doc;
13942 /* Ensure that doc has XML spec namespace */
13943 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13944 newDoc->oldNs = doc->oldNs;
13946 ctxt->instate = XML_PARSER_CONTENT;
13947 ctxt->depth = depth;
13950 * Doing validity checking on chunk doesn't make sense
13952 ctxt->validate = 0;
13953 ctxt->loadsubset = 0;
13954 xmlDetectSAX2(ctxt);
13956 if ( doc != NULL ){
13957 content = doc->children;
13958 doc->children = NULL;
13959 xmlParseContent(ctxt);
13960 doc->children = content;
13962 else {
13963 xmlParseContent(ctxt);
13965 if ((RAW == '<') && (NXT(1) == '/')) {
13966 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13967 } else if (RAW != 0) {
13968 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13970 if (ctxt->node != newDoc->children) {
13971 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13974 if (!ctxt->wellFormed) {
13975 if (ctxt->errNo == 0)
13976 ret = 1;
13977 else
13978 ret = ctxt->errNo;
13979 } else {
13980 ret = 0;
13983 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13984 xmlNodePtr cur;
13987 * Return the newly created nodeset after unlinking it from
13988 * they pseudo parent.
13990 cur = newDoc->children->children;
13991 *lst = cur;
13992 while (cur != NULL) {
13993 xmlSetTreeDoc(cur, doc);
13994 cur->parent = NULL;
13995 cur = cur->next;
13997 newDoc->children->children = NULL;
14000 if (sax != NULL)
14001 ctxt->sax = oldsax;
14002 xmlFreeParserCtxt(ctxt);
14003 newDoc->intSubset = NULL;
14004 newDoc->extSubset = NULL;
14005 newDoc->oldNs = NULL;
14006 xmlFreeDoc(newDoc);
14008 return(ret);
14012 * xmlSAXParseEntity:
14013 * @sax: the SAX handler block
14014 * @filename: the filename
14016 * parse an XML external entity out of context and build a tree.
14017 * It use the given SAX function block to handle the parsing callback.
14018 * If sax is NULL, fallback to the default DOM tree building routines.
14020 * [78] extParsedEnt ::= TextDecl? content
14022 * This correspond to a "Well Balanced" chunk
14024 * Returns the resulting document tree
14027 xmlDocPtr
14028 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14029 xmlDocPtr ret;
14030 xmlParserCtxtPtr ctxt;
14032 ctxt = xmlCreateFileParserCtxt(filename);
14033 if (ctxt == NULL) {
14034 return(NULL);
14036 if (sax != NULL) {
14037 if (ctxt->sax != NULL)
14038 xmlFree(ctxt->sax);
14039 ctxt->sax = sax;
14040 ctxt->userData = NULL;
14043 xmlParseExtParsedEnt(ctxt);
14045 if (ctxt->wellFormed)
14046 ret = ctxt->myDoc;
14047 else {
14048 ret = NULL;
14049 xmlFreeDoc(ctxt->myDoc);
14050 ctxt->myDoc = NULL;
14052 if (sax != NULL)
14053 ctxt->sax = NULL;
14054 xmlFreeParserCtxt(ctxt);
14056 return(ret);
14060 * xmlParseEntity:
14061 * @filename: the filename
14063 * parse an XML external entity out of context and build a tree.
14065 * [78] extParsedEnt ::= TextDecl? content
14067 * This correspond to a "Well Balanced" chunk
14069 * Returns the resulting document tree
14072 xmlDocPtr
14073 xmlParseEntity(const char *filename) {
14074 return(xmlSAXParseEntity(NULL, filename));
14076 #endif /* LIBXML_SAX1_ENABLED */
14079 * xmlCreateEntityParserCtxtInternal:
14080 * @URL: the entity URL
14081 * @ID: the entity PUBLIC ID
14082 * @base: a possible base for the target URI
14083 * @pctx: parser context used to set options on new context
14085 * Create a parser context for an external entity
14086 * Automatic support for ZLIB/Compress compressed document is provided
14087 * by default if found at compile-time.
14089 * Returns the new parser context or NULL
14091 static xmlParserCtxtPtr
14092 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14093 const xmlChar *base, xmlParserCtxtPtr pctx) {
14094 xmlParserCtxtPtr ctxt;
14095 xmlParserInputPtr inputStream;
14096 char *directory = NULL;
14097 xmlChar *uri;
14099 ctxt = xmlNewParserCtxt();
14100 if (ctxt == NULL) {
14101 return(NULL);
14104 if (pctx != NULL) {
14105 ctxt->options = pctx->options;
14106 ctxt->_private = pctx->_private;
14109 uri = xmlBuildURI(URL, base);
14111 if (uri == NULL) {
14112 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14113 if (inputStream == NULL) {
14114 xmlFreeParserCtxt(ctxt);
14115 return(NULL);
14118 inputPush(ctxt, inputStream);
14120 if ((ctxt->directory == NULL) && (directory == NULL))
14121 directory = xmlParserGetDirectory((char *)URL);
14122 if ((ctxt->directory == NULL) && (directory != NULL))
14123 ctxt->directory = directory;
14124 } else {
14125 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14126 if (inputStream == NULL) {
14127 xmlFree(uri);
14128 xmlFreeParserCtxt(ctxt);
14129 return(NULL);
14132 inputPush(ctxt, inputStream);
14134 if ((ctxt->directory == NULL) && (directory == NULL))
14135 directory = xmlParserGetDirectory((char *)uri);
14136 if ((ctxt->directory == NULL) && (directory != NULL))
14137 ctxt->directory = directory;
14138 xmlFree(uri);
14140 return(ctxt);
14144 * xmlCreateEntityParserCtxt:
14145 * @URL: the entity URL
14146 * @ID: the entity PUBLIC ID
14147 * @base: a possible base for the target URI
14149 * Create a parser context for an external entity
14150 * Automatic support for ZLIB/Compress compressed document is provided
14151 * by default if found at compile-time.
14153 * Returns the new parser context or NULL
14155 xmlParserCtxtPtr
14156 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14157 const xmlChar *base) {
14158 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14162 /************************************************************************
14164 * Front ends when parsing from a file *
14166 ************************************************************************/
14169 * xmlCreateURLParserCtxt:
14170 * @filename: the filename or URL
14171 * @options: a combination of xmlParserOption
14173 * Create a parser context for a file or URL content.
14174 * Automatic support for ZLIB/Compress compressed document is provided
14175 * by default if found at compile-time and for file accesses
14177 * Returns the new parser context or NULL
14179 xmlParserCtxtPtr
14180 xmlCreateURLParserCtxt(const char *filename, int options)
14182 xmlParserCtxtPtr ctxt;
14183 xmlParserInputPtr inputStream;
14184 char *directory = NULL;
14186 ctxt = xmlNewParserCtxt();
14187 if (ctxt == NULL) {
14188 xmlErrMemory(NULL, "cannot allocate parser context");
14189 return(NULL);
14192 if (options)
14193 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14194 ctxt->linenumbers = 1;
14196 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14197 if (inputStream == NULL) {
14198 xmlFreeParserCtxt(ctxt);
14199 return(NULL);
14202 inputPush(ctxt, inputStream);
14203 if ((ctxt->directory == NULL) && (directory == NULL))
14204 directory = xmlParserGetDirectory(filename);
14205 if ((ctxt->directory == NULL) && (directory != NULL))
14206 ctxt->directory = directory;
14208 return(ctxt);
14212 * xmlCreateFileParserCtxt:
14213 * @filename: the filename
14215 * Create a parser context for a file content.
14216 * Automatic support for ZLIB/Compress compressed document is provided
14217 * by default if found at compile-time.
14219 * Returns the new parser context or NULL
14221 xmlParserCtxtPtr
14222 xmlCreateFileParserCtxt(const char *filename)
14224 return(xmlCreateURLParserCtxt(filename, 0));
14227 #ifdef LIBXML_SAX1_ENABLED
14229 * xmlSAXParseFileWithData:
14230 * @sax: the SAX handler block
14231 * @filename: the filename
14232 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14233 * documents
14234 * @data: the userdata
14236 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14237 * compressed document is provided by default if found at compile-time.
14238 * It use the given SAX function block to handle the parsing callback.
14239 * If sax is NULL, fallback to the default DOM tree building routines.
14241 * User data (void *) is stored within the parser context in the
14242 * context's _private member, so it is available nearly everywhere in libxml
14244 * Returns the resulting document tree
14247 xmlDocPtr
14248 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14249 int recovery, void *data) {
14250 xmlDocPtr ret;
14251 xmlParserCtxtPtr ctxt;
14253 xmlInitParser();
14255 ctxt = xmlCreateFileParserCtxt(filename);
14256 if (ctxt == NULL) {
14257 return(NULL);
14259 if (sax != NULL) {
14260 if (ctxt->sax != NULL)
14261 xmlFree(ctxt->sax);
14262 ctxt->sax = sax;
14264 xmlDetectSAX2(ctxt);
14265 if (data!=NULL) {
14266 ctxt->_private = data;
14269 if (ctxt->directory == NULL)
14270 ctxt->directory = xmlParserGetDirectory(filename);
14272 ctxt->recovery = recovery;
14274 xmlParseDocument(ctxt);
14276 if ((ctxt->wellFormed) || recovery) {
14277 ret = ctxt->myDoc;
14278 if (ret != NULL) {
14279 if (ctxt->input->buf->compressed > 0)
14280 ret->compression = 9;
14281 else
14282 ret->compression = ctxt->input->buf->compressed;
14285 else {
14286 ret = NULL;
14287 xmlFreeDoc(ctxt->myDoc);
14288 ctxt->myDoc = NULL;
14290 if (sax != NULL)
14291 ctxt->sax = NULL;
14292 xmlFreeParserCtxt(ctxt);
14294 return(ret);
14298 * xmlSAXParseFile:
14299 * @sax: the SAX handler block
14300 * @filename: the filename
14301 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14302 * documents
14304 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14305 * compressed document is provided by default if found at compile-time.
14306 * It use the given SAX function block to handle the parsing callback.
14307 * If sax is NULL, fallback to the default DOM tree building routines.
14309 * Returns the resulting document tree
14312 xmlDocPtr
14313 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14314 int recovery) {
14315 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14319 * xmlRecoverDoc:
14320 * @cur: a pointer to an array of xmlChar
14322 * parse an XML in-memory document and build a tree.
14323 * In the case the document is not Well Formed, a attempt to build a
14324 * tree is tried anyway
14326 * Returns the resulting document tree or NULL in case of failure
14329 xmlDocPtr
14330 xmlRecoverDoc(const xmlChar *cur) {
14331 return(xmlSAXParseDoc(NULL, cur, 1));
14335 * xmlParseFile:
14336 * @filename: the filename
14338 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14339 * compressed document is provided by default if found at compile-time.
14341 * Returns the resulting document tree if the file was wellformed,
14342 * NULL otherwise.
14345 xmlDocPtr
14346 xmlParseFile(const char *filename) {
14347 return(xmlSAXParseFile(NULL, filename, 0));
14351 * xmlRecoverFile:
14352 * @filename: the filename
14354 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14355 * compressed document is provided by default if found at compile-time.
14356 * In the case the document is not Well Formed, it attempts to build
14357 * a tree anyway
14359 * Returns the resulting document tree or NULL in case of failure
14362 xmlDocPtr
14363 xmlRecoverFile(const char *filename) {
14364 return(xmlSAXParseFile(NULL, filename, 1));
14369 * xmlSetupParserForBuffer:
14370 * @ctxt: an XML parser context
14371 * @buffer: a xmlChar * buffer
14372 * @filename: a file name
14374 * Setup the parser context to parse a new buffer; Clears any prior
14375 * contents from the parser context. The buffer parameter must not be
14376 * NULL, but the filename parameter can be
14378 void
14379 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14380 const char* filename)
14382 xmlParserInputPtr input;
14384 if ((ctxt == NULL) || (buffer == NULL))
14385 return;
14387 input = xmlNewInputStream(ctxt);
14388 if (input == NULL) {
14389 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14390 xmlClearParserCtxt(ctxt);
14391 return;
14394 xmlClearParserCtxt(ctxt);
14395 if (filename != NULL)
14396 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14397 input->base = buffer;
14398 input->cur = buffer;
14399 input->end = &buffer[xmlStrlen(buffer)];
14400 inputPush(ctxt, input);
14404 * xmlSAXUserParseFile:
14405 * @sax: a SAX handler
14406 * @user_data: The user data returned on SAX callbacks
14407 * @filename: a file name
14409 * parse an XML file and call the given SAX handler routines.
14410 * Automatic support for ZLIB/Compress compressed document is provided
14412 * Returns 0 in case of success or a error number otherwise
14415 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14416 const char *filename) {
14417 int ret = 0;
14418 xmlParserCtxtPtr ctxt;
14420 ctxt = xmlCreateFileParserCtxt(filename);
14421 if (ctxt == NULL) return -1;
14422 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14423 xmlFree(ctxt->sax);
14424 ctxt->sax = sax;
14425 xmlDetectSAX2(ctxt);
14427 if (user_data != NULL)
14428 ctxt->userData = user_data;
14430 xmlParseDocument(ctxt);
14432 if (ctxt->wellFormed)
14433 ret = 0;
14434 else {
14435 if (ctxt->errNo != 0)
14436 ret = ctxt->errNo;
14437 else
14438 ret = -1;
14440 if (sax != NULL)
14441 ctxt->sax = NULL;
14442 if (ctxt->myDoc != NULL) {
14443 xmlFreeDoc(ctxt->myDoc);
14444 ctxt->myDoc = NULL;
14446 xmlFreeParserCtxt(ctxt);
14448 return ret;
14450 #endif /* LIBXML_SAX1_ENABLED */
14452 /************************************************************************
14454 * Front ends when parsing from memory *
14456 ************************************************************************/
14459 * xmlCreateMemoryParserCtxt:
14460 * @buffer: a pointer to a char array
14461 * @size: the size of the array
14463 * Create a parser context for an XML in-memory document.
14465 * Returns the new parser context or NULL
14467 xmlParserCtxtPtr
14468 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14469 xmlParserCtxtPtr ctxt;
14470 xmlParserInputPtr input;
14471 xmlParserInputBufferPtr buf;
14473 if (buffer == NULL)
14474 return(NULL);
14475 if (size <= 0)
14476 return(NULL);
14478 ctxt = xmlNewParserCtxt();
14479 if (ctxt == NULL)
14480 return(NULL);
14482 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14483 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14484 if (buf == NULL) {
14485 xmlFreeParserCtxt(ctxt);
14486 return(NULL);
14489 input = xmlNewInputStream(ctxt);
14490 if (input == NULL) {
14491 xmlFreeParserInputBuffer(buf);
14492 xmlFreeParserCtxt(ctxt);
14493 return(NULL);
14496 input->filename = NULL;
14497 input->buf = buf;
14498 xmlBufResetInput(input->buf->buffer, input);
14500 inputPush(ctxt, input);
14501 return(ctxt);
14504 #ifdef LIBXML_SAX1_ENABLED
14506 * xmlSAXParseMemoryWithData:
14507 * @sax: the SAX handler block
14508 * @buffer: an pointer to a char array
14509 * @size: the size of the array
14510 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14511 * documents
14512 * @data: the userdata
14514 * parse an XML in-memory block and use the given SAX function block
14515 * to handle the parsing callback. If sax is NULL, fallback to the default
14516 * DOM tree building routines.
14518 * User data (void *) is stored within the parser context in the
14519 * context's _private member, so it is available nearly everywhere in libxml
14521 * Returns the resulting document tree
14524 xmlDocPtr
14525 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14526 int size, int recovery, void *data) {
14527 xmlDocPtr ret;
14528 xmlParserCtxtPtr ctxt;
14530 xmlInitParser();
14532 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14533 if (ctxt == NULL) return(NULL);
14534 if (sax != NULL) {
14535 if (ctxt->sax != NULL)
14536 xmlFree(ctxt->sax);
14537 ctxt->sax = sax;
14539 xmlDetectSAX2(ctxt);
14540 if (data!=NULL) {
14541 ctxt->_private=data;
14544 ctxt->recovery = recovery;
14546 xmlParseDocument(ctxt);
14548 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14549 else {
14550 ret = NULL;
14551 xmlFreeDoc(ctxt->myDoc);
14552 ctxt->myDoc = NULL;
14554 if (sax != NULL)
14555 ctxt->sax = NULL;
14556 xmlFreeParserCtxt(ctxt);
14558 return(ret);
14562 * xmlSAXParseMemory:
14563 * @sax: the SAX handler block
14564 * @buffer: an pointer to a char array
14565 * @size: the size of the array
14566 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14567 * documents
14569 * parse an XML in-memory block and use the given SAX function block
14570 * to handle the parsing callback. If sax is NULL, fallback to the default
14571 * DOM tree building routines.
14573 * Returns the resulting document tree
14575 xmlDocPtr
14576 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14577 int size, int recovery) {
14578 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14582 * xmlParseMemory:
14583 * @buffer: an pointer to a char array
14584 * @size: the size of the array
14586 * parse an XML in-memory block and build a tree.
14588 * Returns the resulting document tree
14591 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14592 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14596 * xmlRecoverMemory:
14597 * @buffer: an pointer to a char array
14598 * @size: the size of the array
14600 * parse an XML in-memory block and build a tree.
14601 * In the case the document is not Well Formed, an attempt to
14602 * build a tree is tried anyway
14604 * Returns the resulting document tree or NULL in case of error
14607 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14608 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14612 * xmlSAXUserParseMemory:
14613 * @sax: a SAX handler
14614 * @user_data: The user data returned on SAX callbacks
14615 * @buffer: an in-memory XML document input
14616 * @size: the length of the XML document in bytes
14618 * A better SAX parsing routine.
14619 * parse an XML in-memory buffer and call the given SAX handler routines.
14621 * Returns 0 in case of success or a error number otherwise
14623 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14624 const char *buffer, int size) {
14625 int ret = 0;
14626 xmlParserCtxtPtr ctxt;
14628 xmlInitParser();
14630 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14631 if (ctxt == NULL) return -1;
14632 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14633 xmlFree(ctxt->sax);
14634 ctxt->sax = sax;
14635 xmlDetectSAX2(ctxt);
14637 if (user_data != NULL)
14638 ctxt->userData = user_data;
14640 xmlParseDocument(ctxt);
14642 if (ctxt->wellFormed)
14643 ret = 0;
14644 else {
14645 if (ctxt->errNo != 0)
14646 ret = ctxt->errNo;
14647 else
14648 ret = -1;
14650 if (sax != NULL)
14651 ctxt->sax = NULL;
14652 if (ctxt->myDoc != NULL) {
14653 xmlFreeDoc(ctxt->myDoc);
14654 ctxt->myDoc = NULL;
14656 xmlFreeParserCtxt(ctxt);
14658 return ret;
14660 #endif /* LIBXML_SAX1_ENABLED */
14663 * xmlCreateDocParserCtxt:
14664 * @cur: a pointer to an array of xmlChar
14666 * Creates a parser context for an XML in-memory document.
14668 * Returns the new parser context or NULL
14670 xmlParserCtxtPtr
14671 xmlCreateDocParserCtxt(const xmlChar *cur) {
14672 int len;
14674 if (cur == NULL)
14675 return(NULL);
14676 len = xmlStrlen(cur);
14677 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14680 #ifdef LIBXML_SAX1_ENABLED
14682 * xmlSAXParseDoc:
14683 * @sax: the SAX handler block
14684 * @cur: a pointer to an array of xmlChar
14685 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14686 * documents
14688 * parse an XML in-memory document and build a tree.
14689 * It use the given SAX function block to handle the parsing callback.
14690 * If sax is NULL, fallback to the default DOM tree building routines.
14692 * Returns the resulting document tree
14695 xmlDocPtr
14696 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14697 xmlDocPtr ret;
14698 xmlParserCtxtPtr ctxt;
14699 xmlSAXHandlerPtr oldsax = NULL;
14701 if (cur == NULL) return(NULL);
14704 ctxt = xmlCreateDocParserCtxt(cur);
14705 if (ctxt == NULL) return(NULL);
14706 if (sax != NULL) {
14707 oldsax = ctxt->sax;
14708 ctxt->sax = sax;
14709 ctxt->userData = NULL;
14711 xmlDetectSAX2(ctxt);
14713 xmlParseDocument(ctxt);
14714 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14715 else {
14716 ret = NULL;
14717 xmlFreeDoc(ctxt->myDoc);
14718 ctxt->myDoc = NULL;
14720 if (sax != NULL)
14721 ctxt->sax = oldsax;
14722 xmlFreeParserCtxt(ctxt);
14724 return(ret);
14728 * xmlParseDoc:
14729 * @cur: a pointer to an array of xmlChar
14731 * parse an XML in-memory document and build a tree.
14733 * Returns the resulting document tree
14736 xmlDocPtr
14737 xmlParseDoc(const xmlChar *cur) {
14738 return(xmlSAXParseDoc(NULL, cur, 0));
14740 #endif /* LIBXML_SAX1_ENABLED */
14742 #ifdef LIBXML_LEGACY_ENABLED
14743 /************************************************************************
14745 * Specific function to keep track of entities references *
14746 * and used by the XSLT debugger *
14748 ************************************************************************/
14750 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14753 * xmlAddEntityReference:
14754 * @ent : A valid entity
14755 * @firstNode : A valid first node for children of entity
14756 * @lastNode : A valid last node of children entity
14758 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14760 static void
14761 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14762 xmlNodePtr lastNode)
14764 if (xmlEntityRefFunc != NULL) {
14765 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14771 * xmlSetEntityReferenceFunc:
14772 * @func: A valid function
14774 * Set the function to call call back when a xml reference has been made
14776 void
14777 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14779 xmlEntityRefFunc = func;
14781 #endif /* LIBXML_LEGACY_ENABLED */
14783 /************************************************************************
14785 * Miscellaneous *
14787 ************************************************************************/
14789 #ifdef LIBXML_XPATH_ENABLED
14790 #include <libxml/xpath.h>
14791 #endif
14793 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14794 static int xmlParserInitialized = 0;
14797 * xmlInitParser:
14799 * Initialization function for the XML parser.
14800 * This is not reentrant. Call once before processing in case of
14801 * use in multithreaded programs.
14804 void
14805 xmlInitParser(void) {
14806 if (xmlParserInitialized != 0)
14807 return;
14809 #ifdef LIBXML_THREAD_ENABLED
14810 __xmlGlobalInitMutexLock();
14811 if (xmlParserInitialized == 0) {
14812 #endif
14813 xmlInitThreads();
14814 xmlInitGlobals();
14815 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14816 (xmlGenericError == NULL))
14817 initGenericErrorDefaultFunc(NULL);
14818 xmlInitMemory();
14819 xmlInitializeDict();
14820 xmlInitCharEncodingHandlers();
14821 xmlDefaultSAXHandlerInit();
14822 xmlRegisterDefaultInputCallbacks();
14823 #ifdef LIBXML_OUTPUT_ENABLED
14824 xmlRegisterDefaultOutputCallbacks();
14825 #endif /* LIBXML_OUTPUT_ENABLED */
14826 #ifdef LIBXML_HTML_ENABLED
14827 htmlInitAutoClose();
14828 htmlDefaultSAXHandlerInit();
14829 #endif
14830 #ifdef LIBXML_XPATH_ENABLED
14831 xmlXPathInit();
14832 #endif
14833 #ifdef LIBXML_CATALOG_ENABLED
14834 xmlInitializeCatalog();
14835 #endif
14836 xmlParserInitialized = 1;
14837 #ifdef LIBXML_THREAD_ENABLED
14839 __xmlGlobalInitMutexUnlock();
14840 #endif
14844 * xmlCleanupParser:
14846 * This function name is somewhat misleading. It does not clean up
14847 * parser state, it cleans up memory allocated by the library itself.
14848 * It is a cleanup function for the XML library. It tries to reclaim all
14849 * related global memory allocated for the library processing.
14850 * It doesn't deallocate any document related memory. One should
14851 * call xmlCleanupParser() only when the process has finished using
14852 * the library and all XML/HTML documents built with it.
14853 * See also xmlInitParser() which has the opposite function of preparing
14854 * the library for operations.
14856 * WARNING: if your application is multithreaded or has plugin support
14857 * calling this may crash the application if another thread or
14858 * a plugin is still using libxml2. It's sometimes very hard to
14859 * guess if libxml2 is in use in the application, some libraries
14860 * or plugins may use it without notice. In case of doubt abstain
14861 * from calling this function or do it just before calling exit()
14862 * to avoid leak reports from valgrind !
14865 void
14866 xmlCleanupParser(void) {
14867 if (!xmlParserInitialized)
14868 return;
14870 xmlCleanupCharEncodingHandlers();
14871 #ifdef LIBXML_CATALOG_ENABLED
14872 xmlCatalogCleanup();
14873 #endif
14874 xmlDictCleanup();
14875 xmlCleanupInputCallbacks();
14876 #ifdef LIBXML_OUTPUT_ENABLED
14877 xmlCleanupOutputCallbacks();
14878 #endif
14879 #ifdef LIBXML_SCHEMAS_ENABLED
14880 xmlSchemaCleanupTypes();
14881 xmlRelaxNGCleanupTypes();
14882 #endif
14883 xmlResetLastError();
14884 xmlCleanupGlobals();
14885 xmlCleanupThreads(); /* must be last if called not from the main thread */
14886 xmlCleanupMemory();
14887 xmlParserInitialized = 0;
14890 /************************************************************************
14892 * New set (2.6.0) of simpler and more flexible APIs *
14894 ************************************************************************/
14897 * DICT_FREE:
14898 * @str: a string
14900 * Free a string if it is not owned by the "dict" dictionnary in the
14901 * current scope
14903 #define DICT_FREE(str) \
14904 if ((str) && ((!dict) || \
14905 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14906 xmlFree((char *)(str));
14909 * xmlCtxtReset:
14910 * @ctxt: an XML parser context
14912 * Reset a parser context
14914 void
14915 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14917 xmlParserInputPtr input;
14918 xmlDictPtr dict;
14920 if (ctxt == NULL)
14921 return;
14923 dict = ctxt->dict;
14925 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14926 xmlFreeInputStream(input);
14928 ctxt->inputNr = 0;
14929 ctxt->input = NULL;
14931 ctxt->spaceNr = 0;
14932 if (ctxt->spaceTab != NULL) {
14933 ctxt->spaceTab[0] = -1;
14934 ctxt->space = &ctxt->spaceTab[0];
14935 } else {
14936 ctxt->space = NULL;
14940 ctxt->nodeNr = 0;
14941 ctxt->node = NULL;
14943 ctxt->nameNr = 0;
14944 ctxt->name = NULL;
14946 DICT_FREE(ctxt->version);
14947 ctxt->version = NULL;
14948 DICT_FREE(ctxt->encoding);
14949 ctxt->encoding = NULL;
14950 DICT_FREE(ctxt->directory);
14951 ctxt->directory = NULL;
14952 DICT_FREE(ctxt->extSubURI);
14953 ctxt->extSubURI = NULL;
14954 DICT_FREE(ctxt->extSubSystem);
14955 ctxt->extSubSystem = NULL;
14956 if (ctxt->myDoc != NULL)
14957 xmlFreeDoc(ctxt->myDoc);
14958 ctxt->myDoc = NULL;
14960 ctxt->standalone = -1;
14961 ctxt->hasExternalSubset = 0;
14962 ctxt->hasPErefs = 0;
14963 ctxt->html = 0;
14964 ctxt->external = 0;
14965 ctxt->instate = XML_PARSER_START;
14966 ctxt->token = 0;
14968 ctxt->wellFormed = 1;
14969 ctxt->nsWellFormed = 1;
14970 ctxt->disableSAX = 0;
14971 ctxt->valid = 1;
14972 #if 0
14973 ctxt->vctxt.userData = ctxt;
14974 ctxt->vctxt.error = xmlParserValidityError;
14975 ctxt->vctxt.warning = xmlParserValidityWarning;
14976 #endif
14977 ctxt->record_info = 0;
14978 ctxt->nbChars = 0;
14979 ctxt->checkIndex = 0;
14980 ctxt->inSubset = 0;
14981 ctxt->errNo = XML_ERR_OK;
14982 ctxt->depth = 0;
14983 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14984 ctxt->catalogs = NULL;
14985 ctxt->nbentities = 0;
14986 ctxt->sizeentities = 0;
14987 ctxt->sizeentcopy = 0;
14988 xmlInitNodeInfoSeq(&ctxt->node_seq);
14990 if (ctxt->attsDefault != NULL) {
14991 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14992 ctxt->attsDefault = NULL;
14994 if (ctxt->attsSpecial != NULL) {
14995 xmlHashFree(ctxt->attsSpecial, NULL);
14996 ctxt->attsSpecial = NULL;
14999 #ifdef LIBXML_CATALOG_ENABLED
15000 if (ctxt->catalogs != NULL)
15001 xmlCatalogFreeLocal(ctxt->catalogs);
15002 #endif
15003 if (ctxt->lastError.code != XML_ERR_OK)
15004 xmlResetError(&ctxt->lastError);
15008 * xmlCtxtResetPush:
15009 * @ctxt: an XML parser context
15010 * @chunk: a pointer to an array of chars
15011 * @size: number of chars in the array
15012 * @filename: an optional file name or URI
15013 * @encoding: the document encoding, or NULL
15015 * Reset a push parser context
15017 * Returns 0 in case of success and 1 in case of error
15020 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15021 int size, const char *filename, const char *encoding)
15023 xmlParserInputPtr inputStream;
15024 xmlParserInputBufferPtr buf;
15025 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15027 if (ctxt == NULL)
15028 return(1);
15030 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15031 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15033 buf = xmlAllocParserInputBuffer(enc);
15034 if (buf == NULL)
15035 return(1);
15037 if (ctxt == NULL) {
15038 xmlFreeParserInputBuffer(buf);
15039 return(1);
15042 xmlCtxtReset(ctxt);
15044 if (ctxt->pushTab == NULL) {
15045 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15046 sizeof(xmlChar *));
15047 if (ctxt->pushTab == NULL) {
15048 xmlErrMemory(ctxt, NULL);
15049 xmlFreeParserInputBuffer(buf);
15050 return(1);
15054 if (filename == NULL) {
15055 ctxt->directory = NULL;
15056 } else {
15057 ctxt->directory = xmlParserGetDirectory(filename);
15060 inputStream = xmlNewInputStream(ctxt);
15061 if (inputStream == NULL) {
15062 xmlFreeParserInputBuffer(buf);
15063 return(1);
15066 if (filename == NULL)
15067 inputStream->filename = NULL;
15068 else
15069 inputStream->filename = (char *)
15070 xmlCanonicPath((const xmlChar *) filename);
15071 inputStream->buf = buf;
15072 xmlBufResetInput(buf->buffer, inputStream);
15074 inputPush(ctxt, inputStream);
15076 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15077 (ctxt->input->buf != NULL)) {
15078 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15079 size_t cur = ctxt->input->cur - ctxt->input->base;
15081 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15083 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15084 #ifdef DEBUG_PUSH
15085 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15086 #endif
15089 if (encoding != NULL) {
15090 xmlCharEncodingHandlerPtr hdlr;
15092 if (ctxt->encoding != NULL)
15093 xmlFree((xmlChar *) ctxt->encoding);
15094 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15096 hdlr = xmlFindCharEncodingHandler(encoding);
15097 if (hdlr != NULL) {
15098 xmlSwitchToEncoding(ctxt, hdlr);
15099 } else {
15100 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15101 "Unsupported encoding %s\n", BAD_CAST encoding);
15103 } else if (enc != XML_CHAR_ENCODING_NONE) {
15104 xmlSwitchEncoding(ctxt, enc);
15107 return(0);
15112 * xmlCtxtUseOptionsInternal:
15113 * @ctxt: an XML parser context
15114 * @options: a combination of xmlParserOption
15115 * @encoding: the user provided encoding to use
15117 * Applies the options to the parser context
15119 * Returns 0 in case of success, the set of unknown or unimplemented options
15120 * in case of error.
15122 static int
15123 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15125 if (ctxt == NULL)
15126 return(-1);
15127 if (encoding != NULL) {
15128 if (ctxt->encoding != NULL)
15129 xmlFree((xmlChar *) ctxt->encoding);
15130 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15132 if (options & XML_PARSE_RECOVER) {
15133 ctxt->recovery = 1;
15134 options -= XML_PARSE_RECOVER;
15135 ctxt->options |= XML_PARSE_RECOVER;
15136 } else
15137 ctxt->recovery = 0;
15138 if (options & XML_PARSE_DTDLOAD) {
15139 ctxt->loadsubset = XML_DETECT_IDS;
15140 options -= XML_PARSE_DTDLOAD;
15141 ctxt->options |= XML_PARSE_DTDLOAD;
15142 } else
15143 ctxt->loadsubset = 0;
15144 if (options & XML_PARSE_DTDATTR) {
15145 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15146 options -= XML_PARSE_DTDATTR;
15147 ctxt->options |= XML_PARSE_DTDATTR;
15149 if (options & XML_PARSE_NOENT) {
15150 ctxt->replaceEntities = 1;
15151 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15152 options -= XML_PARSE_NOENT;
15153 ctxt->options |= XML_PARSE_NOENT;
15154 } else
15155 ctxt->replaceEntities = 0;
15156 if (options & XML_PARSE_PEDANTIC) {
15157 ctxt->pedantic = 1;
15158 options -= XML_PARSE_PEDANTIC;
15159 ctxt->options |= XML_PARSE_PEDANTIC;
15160 } else
15161 ctxt->pedantic = 0;
15162 if (options & XML_PARSE_NOBLANKS) {
15163 ctxt->keepBlanks = 0;
15164 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15165 options -= XML_PARSE_NOBLANKS;
15166 ctxt->options |= XML_PARSE_NOBLANKS;
15167 } else
15168 ctxt->keepBlanks = 1;
15169 if (options & XML_PARSE_DTDVALID) {
15170 ctxt->validate = 1;
15171 if (options & XML_PARSE_NOWARNING)
15172 ctxt->vctxt.warning = NULL;
15173 if (options & XML_PARSE_NOERROR)
15174 ctxt->vctxt.error = NULL;
15175 options -= XML_PARSE_DTDVALID;
15176 ctxt->options |= XML_PARSE_DTDVALID;
15177 } else
15178 ctxt->validate = 0;
15179 if (options & XML_PARSE_NOWARNING) {
15180 ctxt->sax->warning = NULL;
15181 options -= XML_PARSE_NOWARNING;
15183 if (options & XML_PARSE_NOERROR) {
15184 ctxt->sax->error = NULL;
15185 ctxt->sax->fatalError = NULL;
15186 options -= XML_PARSE_NOERROR;
15188 #ifdef LIBXML_SAX1_ENABLED
15189 if (options & XML_PARSE_SAX1) {
15190 ctxt->sax->startElement = xmlSAX2StartElement;
15191 ctxt->sax->endElement = xmlSAX2EndElement;
15192 ctxt->sax->startElementNs = NULL;
15193 ctxt->sax->endElementNs = NULL;
15194 ctxt->sax->initialized = 1;
15195 options -= XML_PARSE_SAX1;
15196 ctxt->options |= XML_PARSE_SAX1;
15198 #endif /* LIBXML_SAX1_ENABLED */
15199 if (options & XML_PARSE_NODICT) {
15200 ctxt->dictNames = 0;
15201 options -= XML_PARSE_NODICT;
15202 ctxt->options |= XML_PARSE_NODICT;
15203 } else {
15204 ctxt->dictNames = 1;
15206 if (options & XML_PARSE_NOCDATA) {
15207 ctxt->sax->cdataBlock = NULL;
15208 options -= XML_PARSE_NOCDATA;
15209 ctxt->options |= XML_PARSE_NOCDATA;
15211 if (options & XML_PARSE_NSCLEAN) {
15212 ctxt->options |= XML_PARSE_NSCLEAN;
15213 options -= XML_PARSE_NSCLEAN;
15215 if (options & XML_PARSE_NONET) {
15216 ctxt->options |= XML_PARSE_NONET;
15217 options -= XML_PARSE_NONET;
15219 if (options & XML_PARSE_COMPACT) {
15220 ctxt->options |= XML_PARSE_COMPACT;
15221 options -= XML_PARSE_COMPACT;
15223 if (options & XML_PARSE_OLD10) {
15224 ctxt->options |= XML_PARSE_OLD10;
15225 options -= XML_PARSE_OLD10;
15227 if (options & XML_PARSE_NOBASEFIX) {
15228 ctxt->options |= XML_PARSE_NOBASEFIX;
15229 options -= XML_PARSE_NOBASEFIX;
15231 if (options & XML_PARSE_HUGE) {
15232 ctxt->options |= XML_PARSE_HUGE;
15233 options -= XML_PARSE_HUGE;
15234 if (ctxt->dict != NULL)
15235 xmlDictSetLimit(ctxt->dict, 0);
15237 if (options & XML_PARSE_OLDSAX) {
15238 ctxt->options |= XML_PARSE_OLDSAX;
15239 options -= XML_PARSE_OLDSAX;
15241 if (options & XML_PARSE_IGNORE_ENC) {
15242 ctxt->options |= XML_PARSE_IGNORE_ENC;
15243 options -= XML_PARSE_IGNORE_ENC;
15245 if (options & XML_PARSE_BIG_LINES) {
15246 ctxt->options |= XML_PARSE_BIG_LINES;
15247 options -= XML_PARSE_BIG_LINES;
15249 ctxt->linenumbers = 1;
15250 return (options);
15254 * xmlCtxtUseOptions:
15255 * @ctxt: an XML parser context
15256 * @options: a combination of xmlParserOption
15258 * Applies the options to the parser context
15260 * Returns 0 in case of success, the set of unknown or unimplemented options
15261 * in case of error.
15264 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15266 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15270 * xmlDoRead:
15271 * @ctxt: an XML parser context
15272 * @URL: the base URL to use for the document
15273 * @encoding: the document encoding, or NULL
15274 * @options: a combination of xmlParserOption
15275 * @reuse: keep the context for reuse
15277 * Common front-end for the xmlRead functions
15279 * Returns the resulting document tree or NULL
15281 static xmlDocPtr
15282 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15283 int options, int reuse)
15285 xmlDocPtr ret;
15287 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15288 if (encoding != NULL) {
15289 xmlCharEncodingHandlerPtr hdlr;
15291 hdlr = xmlFindCharEncodingHandler(encoding);
15292 if (hdlr != NULL)
15293 xmlSwitchToEncoding(ctxt, hdlr);
15295 if ((URL != NULL) && (ctxt->input != NULL) &&
15296 (ctxt->input->filename == NULL))
15297 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15298 xmlParseDocument(ctxt);
15299 if ((ctxt->wellFormed) || ctxt->recovery)
15300 ret = ctxt->myDoc;
15301 else {
15302 ret = NULL;
15303 if (ctxt->myDoc != NULL) {
15304 xmlFreeDoc(ctxt->myDoc);
15307 ctxt->myDoc = NULL;
15308 if (!reuse) {
15309 xmlFreeParserCtxt(ctxt);
15312 return (ret);
15316 * xmlReadDoc:
15317 * @cur: a pointer to a zero terminated string
15318 * @URL: the base URL to use for the document
15319 * @encoding: the document encoding, or NULL
15320 * @options: a combination of xmlParserOption
15322 * parse an XML in-memory document and build a tree.
15324 * Returns the resulting document tree
15326 xmlDocPtr
15327 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15329 xmlParserCtxtPtr ctxt;
15331 if (cur == NULL)
15332 return (NULL);
15333 xmlInitParser();
15335 ctxt = xmlCreateDocParserCtxt(cur);
15336 if (ctxt == NULL)
15337 return (NULL);
15338 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15342 * xmlReadFile:
15343 * @filename: a file or URL
15344 * @encoding: the document encoding, or NULL
15345 * @options: a combination of xmlParserOption
15347 * parse an XML file from the filesystem or the network.
15349 * Returns the resulting document tree
15351 xmlDocPtr
15352 xmlReadFile(const char *filename, const char *encoding, int options)
15354 xmlParserCtxtPtr ctxt;
15356 xmlInitParser();
15357 ctxt = xmlCreateURLParserCtxt(filename, options);
15358 if (ctxt == NULL)
15359 return (NULL);
15360 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15364 * xmlReadMemory:
15365 * @buffer: a pointer to a char array
15366 * @size: the size of the array
15367 * @URL: the base URL to use for the document
15368 * @encoding: the document encoding, or NULL
15369 * @options: a combination of xmlParserOption
15371 * parse an XML in-memory document and build a tree.
15373 * Returns the resulting document tree
15375 xmlDocPtr
15376 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15378 xmlParserCtxtPtr ctxt;
15380 xmlInitParser();
15381 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15382 if (ctxt == NULL)
15383 return (NULL);
15384 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15388 * xmlReadFd:
15389 * @fd: an open file descriptor
15390 * @URL: the base URL to use for the document
15391 * @encoding: the document encoding, or NULL
15392 * @options: a combination of xmlParserOption
15394 * parse an XML from a file descriptor and build a tree.
15395 * NOTE that the file descriptor will not be closed when the
15396 * reader is closed or reset.
15398 * Returns the resulting document tree
15400 xmlDocPtr
15401 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15403 xmlParserCtxtPtr ctxt;
15404 xmlParserInputBufferPtr input;
15405 xmlParserInputPtr stream;
15407 if (fd < 0)
15408 return (NULL);
15409 xmlInitParser();
15411 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15412 if (input == NULL)
15413 return (NULL);
15414 input->closecallback = NULL;
15415 ctxt = xmlNewParserCtxt();
15416 if (ctxt == NULL) {
15417 xmlFreeParserInputBuffer(input);
15418 return (NULL);
15420 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15421 if (stream == NULL) {
15422 xmlFreeParserInputBuffer(input);
15423 xmlFreeParserCtxt(ctxt);
15424 return (NULL);
15426 inputPush(ctxt, stream);
15427 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15431 * xmlReadIO:
15432 * @ioread: an I/O read function
15433 * @ioclose: an I/O close function
15434 * @ioctx: an I/O handler
15435 * @URL: the base URL to use for the document
15436 * @encoding: the document encoding, or NULL
15437 * @options: a combination of xmlParserOption
15439 * parse an XML document from I/O functions and source and build a tree.
15441 * Returns the resulting document tree
15443 xmlDocPtr
15444 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15445 void *ioctx, const char *URL, const char *encoding, int options)
15447 xmlParserCtxtPtr ctxt;
15448 xmlParserInputBufferPtr input;
15449 xmlParserInputPtr stream;
15451 if (ioread == NULL)
15452 return (NULL);
15453 xmlInitParser();
15455 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15456 XML_CHAR_ENCODING_NONE);
15457 if (input == NULL) {
15458 if (ioclose != NULL)
15459 ioclose(ioctx);
15460 return (NULL);
15462 ctxt = xmlNewParserCtxt();
15463 if (ctxt == NULL) {
15464 xmlFreeParserInputBuffer(input);
15465 return (NULL);
15467 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15468 if (stream == NULL) {
15469 xmlFreeParserInputBuffer(input);
15470 xmlFreeParserCtxt(ctxt);
15471 return (NULL);
15473 inputPush(ctxt, stream);
15474 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15478 * xmlCtxtReadDoc:
15479 * @ctxt: an XML parser context
15480 * @cur: a pointer to a zero terminated string
15481 * @URL: the base URL to use for the document
15482 * @encoding: the document encoding, or NULL
15483 * @options: a combination of xmlParserOption
15485 * parse an XML in-memory document and build a tree.
15486 * This reuses the existing @ctxt parser context
15488 * Returns the resulting document tree
15490 xmlDocPtr
15491 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15492 const char *URL, const char *encoding, int options)
15494 xmlParserInputPtr stream;
15496 if (cur == NULL)
15497 return (NULL);
15498 if (ctxt == NULL)
15499 return (NULL);
15500 xmlInitParser();
15502 xmlCtxtReset(ctxt);
15504 stream = xmlNewStringInputStream(ctxt, cur);
15505 if (stream == NULL) {
15506 return (NULL);
15508 inputPush(ctxt, stream);
15509 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15513 * xmlCtxtReadFile:
15514 * @ctxt: an XML parser context
15515 * @filename: a file or URL
15516 * @encoding: the document encoding, or NULL
15517 * @options: a combination of xmlParserOption
15519 * parse an XML file from the filesystem or the network.
15520 * This reuses the existing @ctxt parser context
15522 * Returns the resulting document tree
15524 xmlDocPtr
15525 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15526 const char *encoding, int options)
15528 xmlParserInputPtr stream;
15530 if (filename == NULL)
15531 return (NULL);
15532 if (ctxt == NULL)
15533 return (NULL);
15534 xmlInitParser();
15536 xmlCtxtReset(ctxt);
15538 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15539 if (stream == NULL) {
15540 return (NULL);
15542 inputPush(ctxt, stream);
15543 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15547 * xmlCtxtReadMemory:
15548 * @ctxt: an XML parser context
15549 * @buffer: a pointer to a char array
15550 * @size: the size of the array
15551 * @URL: the base URL to use for the document
15552 * @encoding: the document encoding, or NULL
15553 * @options: a combination of xmlParserOption
15555 * parse an XML in-memory document and build a tree.
15556 * This reuses the existing @ctxt parser context
15558 * Returns the resulting document tree
15560 xmlDocPtr
15561 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15562 const char *URL, const char *encoding, int options)
15564 xmlParserInputBufferPtr input;
15565 xmlParserInputPtr stream;
15567 if (ctxt == NULL)
15568 return (NULL);
15569 if (buffer == NULL)
15570 return (NULL);
15571 xmlInitParser();
15573 xmlCtxtReset(ctxt);
15575 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15576 if (input == NULL) {
15577 return(NULL);
15580 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15581 if (stream == NULL) {
15582 xmlFreeParserInputBuffer(input);
15583 return(NULL);
15586 inputPush(ctxt, stream);
15587 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15591 * xmlCtxtReadFd:
15592 * @ctxt: an XML parser context
15593 * @fd: an open file descriptor
15594 * @URL: the base URL to use for the document
15595 * @encoding: the document encoding, or NULL
15596 * @options: a combination of xmlParserOption
15598 * parse an XML from a file descriptor and build a tree.
15599 * This reuses the existing @ctxt parser context
15600 * NOTE that the file descriptor will not be closed when the
15601 * reader is closed or reset.
15603 * Returns the resulting document tree
15605 xmlDocPtr
15606 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15607 const char *URL, const char *encoding, int options)
15609 xmlParserInputBufferPtr input;
15610 xmlParserInputPtr stream;
15612 if (fd < 0)
15613 return (NULL);
15614 if (ctxt == NULL)
15615 return (NULL);
15616 xmlInitParser();
15618 xmlCtxtReset(ctxt);
15621 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15622 if (input == NULL)
15623 return (NULL);
15624 input->closecallback = NULL;
15625 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15626 if (stream == NULL) {
15627 xmlFreeParserInputBuffer(input);
15628 return (NULL);
15630 inputPush(ctxt, stream);
15631 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15635 * xmlCtxtReadIO:
15636 * @ctxt: an XML parser context
15637 * @ioread: an I/O read function
15638 * @ioclose: an I/O close function
15639 * @ioctx: an I/O handler
15640 * @URL: the base URL to use for the document
15641 * @encoding: the document encoding, or NULL
15642 * @options: a combination of xmlParserOption
15644 * parse an XML document from I/O functions and source and build a tree.
15645 * This reuses the existing @ctxt parser context
15647 * Returns the resulting document tree
15649 xmlDocPtr
15650 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15651 xmlInputCloseCallback ioclose, void *ioctx,
15652 const char *URL,
15653 const char *encoding, int options)
15655 xmlParserInputBufferPtr input;
15656 xmlParserInputPtr stream;
15658 if (ioread == NULL)
15659 return (NULL);
15660 if (ctxt == NULL)
15661 return (NULL);
15662 xmlInitParser();
15664 xmlCtxtReset(ctxt);
15666 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15667 XML_CHAR_ENCODING_NONE);
15668 if (input == NULL) {
15669 if (ioclose != NULL)
15670 ioclose(ioctx);
15671 return (NULL);
15673 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15674 if (stream == NULL) {
15675 xmlFreeParserInputBuffer(input);
15676 return (NULL);
15678 inputPush(ctxt, stream);
15679 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15682 #define bottom_parser
15683 #include "elfgcchack.h"