Revert of Update the extension whitelist for application host change. (patchset ...
[chromium-blink-merge.git] / third_party / libxml / src / parser.c
blob6ee55378f3fc4c125d923422f142980787004a3e
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 #define IN_LIBXML
34 #include "libxml.h"
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
90 /************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
105 #define XML_PARSER_NON_LINEAR 10
108 * xmlParserEntityCheck
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
116 static int
117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
120 unsigned long consumed = 0;
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
128 * Do the check based on the replacement size of the entity
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
134 * A limit on the amount of text data reasonably used
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
140 consumed += ctxt->sizeentities;
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
147 * use the number of parsed entities in the replacement
149 size = ent->checked;
152 * The amount of data parsed counting entities size only once
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
158 consumed += ctxt->sizeentities;
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
168 * strange we got no data for checking just return
170 return (0);
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
178 * xmlParserMaxDepth:
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
185 unsigned int xmlParserMaxDepth = 256;
189 #define SAX2 1
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
195 * List of XML prefixed PI allowed by W3C specs
198 static const char *xmlW3CPIs[] = {
199 "xml-stylesheet",
200 NULL
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
211 void *user_data, int depth, const xmlChar *URL,
212 const xmlChar *ID, xmlNodePtr *list);
214 static int
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
218 static void
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
227 static int
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
230 /************************************************************************
232 * Some factorized error routines *
234 ************************************************************************/
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
242 * Handle a redefinition of attribute error
244 static void
245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
254 if (prefix == NULL)
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
259 else
260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
280 static void
281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
283 const char *errmsg;
285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
292 case XML_ERR_INVALID_DEC_CHARREF:
293 errmsg = "CharRef: invalid decimal value\n";
294 break;
295 case XML_ERR_INVALID_CHARREF:
296 errmsg = "CharRef: invalid value\n";
297 break;
298 case XML_ERR_INTERNAL_ERROR:
299 errmsg = "internal error";
300 break;
301 case XML_ERR_PEREF_AT_EOF:
302 errmsg = "PEReference at end of document\n";
303 break;
304 case XML_ERR_PEREF_IN_PROLOG:
305 errmsg = "PEReference in prolog\n";
306 break;
307 case XML_ERR_PEREF_IN_EPILOG:
308 errmsg = "PEReference in epilog\n";
309 break;
310 case XML_ERR_PEREF_NO_NAME:
311 errmsg = "PEReference: no name\n";
312 break;
313 case XML_ERR_PEREF_SEMICOL_MISSING:
314 errmsg = "PEReference: expecting ';'\n";
315 break;
316 case XML_ERR_ENTITY_LOOP:
317 errmsg = "Detected an entity reference loop\n";
318 break;
319 case XML_ERR_ENTITY_NOT_STARTED:
320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
322 case XML_ERR_ENTITY_PE_INTERNAL:
323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
325 case XML_ERR_ENTITY_NOT_FINISHED:
326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
329 errmsg = "AttValue: \" or ' expected\n";
330 break;
331 case XML_ERR_LT_IN_ATTRIBUTE:
332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
334 case XML_ERR_LITERAL_NOT_STARTED:
335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
337 case XML_ERR_LITERAL_NOT_FINISHED:
338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
340 case XML_ERR_MISPLACED_CDATA_END:
341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
343 case XML_ERR_URI_REQUIRED:
344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
346 case XML_ERR_PUBID_REQUIRED:
347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
349 case XML_ERR_HYPHEN_IN_COMMENT:
350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
352 case XML_ERR_PI_NOT_STARTED:
353 errmsg = "xmlParsePI : no target name\n";
354 break;
355 case XML_ERR_RESERVED_XML_NAME:
356 errmsg = "Invalid PI name\n";
357 break;
358 case XML_ERR_NOTATION_NOT_STARTED:
359 errmsg = "NOTATION: Name expected here\n";
360 break;
361 case XML_ERR_NOTATION_NOT_FINISHED:
362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
364 case XML_ERR_VALUE_REQUIRED:
365 errmsg = "Entity value required\n";
366 break;
367 case XML_ERR_URI_FRAGMENT:
368 errmsg = "Fragment not allowed";
369 break;
370 case XML_ERR_ATTLIST_NOT_STARTED:
371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
373 case XML_ERR_NMTOKEN_REQUIRED:
374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
376 case XML_ERR_ATTLIST_NOT_FINISHED:
377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
379 case XML_ERR_MIXED_NOT_STARTED:
380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
382 case XML_ERR_PCDATA_REQUIRED:
383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
391 case XML_ERR_PEREF_IN_INT_SUBSET:
392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
395 case XML_ERR_GT_REQUIRED:
396 errmsg = "expected '>'\n";
397 break;
398 case XML_ERR_CONDSEC_INVALID:
399 errmsg = "XML conditional section '[' expected\n";
400 break;
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
408 case XML_ERR_CONDSEC_NOT_FINISHED:
409 errmsg = "XML conditional section not closed\n";
410 break;
411 case XML_ERR_XMLDECL_NOT_STARTED:
412 errmsg = "Text declaration '<?xml' required\n";
413 break;
414 case XML_ERR_XMLDECL_NOT_FINISHED:
415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
417 case XML_ERR_EXT_ENTITY_STANDALONE:
418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421 errmsg = "EntityRef: expecting ';'\n";
422 break;
423 case XML_ERR_DOCTYPE_NOT_FINISHED:
424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
426 case XML_ERR_LTSLASH_REQUIRED:
427 errmsg = "EndTag: '</' not found\n";
428 break;
429 case XML_ERR_EQUAL_REQUIRED:
430 errmsg = "expected '='\n";
431 break;
432 case XML_ERR_STRING_NOT_CLOSED:
433 errmsg = "String not closed expecting \" or '\n";
434 break;
435 case XML_ERR_STRING_NOT_STARTED:
436 errmsg = "String not started expecting ' or \"\n";
437 break;
438 case XML_ERR_ENCODING_NAME:
439 errmsg = "Invalid XML encoding name\n";
440 break;
441 case XML_ERR_STANDALONE_VALUE:
442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
444 case XML_ERR_DOCUMENT_EMPTY:
445 errmsg = "Document is empty\n";
446 break;
447 case XML_ERR_DOCUMENT_END:
448 errmsg = "Extra content at the end of the document\n";
449 break;
450 case XML_ERR_NOT_WELL_BALANCED:
451 errmsg = "chunk is not well balanced\n";
452 break;
453 case XML_ERR_EXTRA_CONTENT:
454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
456 case XML_ERR_VERSION_MISSING:
457 errmsg = "Malformed declaration expecting version\n";
458 break;
459 #if 0
460 case:
461 errmsg = "\n";
462 break;
463 #endif
464 default:
465 errmsg = "Unregistered error message\n";
467 if (ctxt != NULL)
468 ctxt->errNo = error;
469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 static void
488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
494 if (ctxt != NULL)
495 ctxt->errNo = error;
496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
513 * Handle a warning.
515 static void
516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
519 xmlStructuredErrorFunc schannel = NULL;
521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
526 schannel = ctxt->sax->serror;
527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
551 * Handle a validity error.
553 static void
554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555 const char *msg, const xmlChar *str1, const xmlChar *str2)
557 xmlStructuredErrorFunc schannel = NULL;
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
569 ctxt->vctxt.error, ctxt->vctxt.userData,
570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
574 ctxt->valid = 0;
575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 static void
594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, int val)
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL,
603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
623 static void
624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL)
632 ctxt->errNo = error;
633 __xmlRaiseError(NULL, NULL, NULL,
634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
653 static void
654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655 const char *msg, const xmlChar * val)
657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
660 if (ctxt != NULL)
661 ctxt->errNo = error;
662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
680 * Handle a non fatal parser error
682 static void
683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
689 if (ctxt != NULL)
690 ctxt->errNo = error;
691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
707 static void
708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
716 if (ctxt != NULL)
717 ctxt->errNo = error;
718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736 static void
737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
751 /************************************************************************
753 * Library wide options *
755 ************************************************************************/
758 * xmlHasFeature:
759 * @feature: the feature to be examined
761 * Examines if the library has been compiled with a given feature.
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
768 xmlHasFeature(xmlFeature feature)
770 switch (feature) {
771 case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
773 return(1);
774 #else
775 return(0);
776 #endif
777 case XML_WITH_TREE:
778 #ifdef LIBXML_TREE_ENABLED
779 return(1);
780 #else
781 return(0);
782 #endif
783 case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786 #else
787 return(0);
788 #endif
789 case XML_WITH_PUSH:
790 #ifdef LIBXML_PUSH_ENABLED
791 return(1);
792 #else
793 return(0);
794 #endif
795 case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
797 return(1);
798 #else
799 return(0);
800 #endif
801 case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804 #else
805 return(0);
806 #endif
807 case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
809 return(1);
810 #else
811 return(0);
812 #endif
813 case XML_WITH_SAX1:
814 #ifdef LIBXML_SAX1_ENABLED
815 return(1);
816 #else
817 return(0);
818 #endif
819 case XML_WITH_FTP:
820 #ifdef LIBXML_FTP_ENABLED
821 return(1);
822 #else
823 return(0);
824 #endif
825 case XML_WITH_HTTP:
826 #ifdef LIBXML_HTTP_ENABLED
827 return(1);
828 #else
829 return(0);
830 #endif
831 case XML_WITH_VALID:
832 #ifdef LIBXML_VALID_ENABLED
833 return(1);
834 #else
835 return(0);
836 #endif
837 case XML_WITH_HTML:
838 #ifdef LIBXML_HTML_ENABLED
839 return(1);
840 #else
841 return(0);
842 #endif
843 case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846 #else
847 return(0);
848 #endif
849 case XML_WITH_C14N:
850 #ifdef LIBXML_C14N_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_XPATH:
862 #ifdef LIBXML_XPATH_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_XPTR:
868 #ifdef LIBXML_XPTR_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_ICONV:
880 #ifdef LIBXML_ICONV_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_EXPR:
910 #ifdef LIBXML_EXPR_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_DEBUG:
934 #ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_ZLIB:
952 #ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_ICU:
958 #ifdef LIBXML_ICU_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 default:
964 break;
966 return(0);
969 /************************************************************************
971 * SAX2 defaulted attributes handling *
973 ************************************************************************/
976 * xmlDetectSAX2:
977 * @ctxt: an XML parser context
979 * Do the SAX2 detection and specific intialization
981 static void
982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983 if (ctxt == NULL) return;
984 #ifdef LIBXML_SAX1_ENABLED
985 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986 ((ctxt->sax->startElementNs != NULL) ||
987 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
988 #else
989 ctxt->sax2 = 1;
990 #endif /* LIBXML_SAX1_ENABLED */
992 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996 (ctxt->str_xml_ns == NULL)) {
997 xmlErrMemory(ctxt, NULL);
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004 int nbAttrs; /* number of defaulted attributes on that element */
1005 int maxAttrs; /* the size of the array */
1006 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1010 * xmlAttrNormalizeSpace:
1011 * @src: the source string
1012 * @dst: the target string
1014 * Normalize the space in non CDATA attribute values:
1015 * If the attribute type is not CDATA, then the XML processor MUST further
1016 * process the normalized attribute value by discarding any leading and
1017 * trailing space (#x20) characters, and by replacing sequences of space
1018 * (#x20) characters by a single space (#x20) character.
1019 * Note that the size of dst need to be at least src, and if one doesn't need
1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021 * passing src as dst is just fine.
1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024 * is needed.
1026 static xmlChar *
1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1029 if ((src == NULL) || (dst == NULL))
1030 return(NULL);
1032 while (*src == 0x20) src++;
1033 while (*src != 0) {
1034 if (*src == 0x20) {
1035 while (*src == 0x20) src++;
1036 if (*src != 0)
1037 *dst++ = 0x20;
1038 } else {
1039 *dst++ = *src++;
1042 *dst = 0;
1043 if (dst == src)
1044 return(NULL);
1045 return(dst);
1049 * xmlAttrNormalizeSpace2:
1050 * @src: the source string
1052 * Normalize the space in non CDATA attribute values, a slightly more complex
1053 * front end to avoid allocation problems when running on attribute values
1054 * coming from the input.
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 * is needed.
1059 static const xmlChar *
1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1062 int i;
1063 int remove_head = 0;
1064 int need_realloc = 0;
1065 const xmlChar *cur;
1067 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068 return(NULL);
1069 i = *len;
1070 if (i <= 0)
1071 return(NULL);
1073 cur = src;
1074 while (*cur == 0x20) {
1075 cur++;
1076 remove_head++;
1078 while (*cur != 0) {
1079 if (*cur == 0x20) {
1080 cur++;
1081 if ((*cur == 0x20) || (*cur == 0)) {
1082 need_realloc = 1;
1083 break;
1085 } else
1086 cur++;
1088 if (need_realloc) {
1089 xmlChar *ret;
1091 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092 if (ret == NULL) {
1093 xmlErrMemory(ctxt, NULL);
1094 return(NULL);
1096 xmlAttrNormalizeSpace(ret, ret);
1097 *len = (int) strlen((const char *)ret);
1098 return(ret);
1099 } else if (remove_head) {
1100 *len -= remove_head;
1101 memmove(src, src + remove_head, 1 + *len);
1102 return(src);
1104 return(NULL);
1108 * xmlAddDefAttrs:
1109 * @ctxt: an XML parser context
1110 * @fullname: the element fullname
1111 * @fullattr: the attribute fullname
1112 * @value: the attribute value
1114 * Add a defaulted attribute for an element
1116 static void
1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118 const xmlChar *fullname,
1119 const xmlChar *fullattr,
1120 const xmlChar *value) {
1121 xmlDefAttrsPtr defaults;
1122 int len;
1123 const xmlChar *name;
1124 const xmlChar *prefix;
1127 * Allows to detect attribute redefinitions
1129 if (ctxt->attsSpecial != NULL) {
1130 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131 return;
1134 if (ctxt->attsDefault == NULL) {
1135 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136 if (ctxt->attsDefault == NULL)
1137 goto mem_error;
1141 * split the element name into prefix:localname , the string found
1142 * are within the DTD and then not associated to namespace names.
1144 name = xmlSplitQName3(fullname, &len);
1145 if (name == NULL) {
1146 name = xmlDictLookup(ctxt->dict, fullname, -1);
1147 prefix = NULL;
1148 } else {
1149 name = xmlDictLookup(ctxt->dict, name, -1);
1150 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1154 * make sure there is some storage
1156 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157 if (defaults == NULL) {
1158 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159 (4 * 5) * sizeof(const xmlChar *));
1160 if (defaults == NULL)
1161 goto mem_error;
1162 defaults->nbAttrs = 0;
1163 defaults->maxAttrs = 4;
1164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 defaults, NULL) < 0) {
1166 xmlFree(defaults);
1167 goto mem_error;
1169 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170 xmlDefAttrsPtr temp;
1172 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174 if (temp == NULL)
1175 goto mem_error;
1176 defaults = temp;
1177 defaults->maxAttrs *= 2;
1178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1180 xmlFree(defaults);
1181 goto mem_error;
1186 * Split the element name into prefix:localname , the string found
1187 * are within the DTD and hen not associated to namespace names.
1189 name = xmlSplitQName3(fullattr, &len);
1190 if (name == NULL) {
1191 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192 prefix = NULL;
1193 } else {
1194 name = xmlDictLookup(ctxt->dict, name, -1);
1195 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1198 defaults->values[5 * defaults->nbAttrs] = name;
1199 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200 /* intern the string and precompute the end */
1201 len = xmlStrlen(value);
1202 value = xmlDictLookup(ctxt->dict, value, len);
1203 defaults->values[5 * defaults->nbAttrs + 2] = value;
1204 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205 if (ctxt->external)
1206 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207 else
1208 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209 defaults->nbAttrs++;
1211 return;
1213 mem_error:
1214 xmlErrMemory(ctxt, NULL);
1215 return;
1219 * xmlAddSpecialAttr:
1220 * @ctxt: an XML parser context
1221 * @fullname: the element fullname
1222 * @fullattr: the attribute fullname
1223 * @type: the attribute type
1225 * Register this attribute type
1227 static void
1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 const xmlChar *fullname,
1230 const xmlChar *fullattr,
1231 int type)
1233 if (ctxt->attsSpecial == NULL) {
1234 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235 if (ctxt->attsSpecial == NULL)
1236 goto mem_error;
1239 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240 return;
1242 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243 (void *) (long) type);
1244 return;
1246 mem_error:
1247 xmlErrMemory(ctxt, NULL);
1248 return;
1252 * xmlCleanSpecialAttrCallback:
1254 * Removes CDATA attributes from the special attribute table
1256 static void
1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258 const xmlChar *fullname, const xmlChar *fullattr,
1259 const xmlChar *unused ATTRIBUTE_UNUSED) {
1260 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1262 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1268 * xmlCleanSpecialAttr:
1269 * @ctxt: an XML parser context
1271 * Trim the list of attributes defined to remove all those of type
1272 * CDATA as they are not special. This call should be done when finishing
1273 * to parse the DTD and before starting to parse the document root.
1275 static void
1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1278 if (ctxt->attsSpecial == NULL)
1279 return;
1281 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1283 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284 xmlHashFree(ctxt->attsSpecial, NULL);
1285 ctxt->attsSpecial = NULL;
1287 return;
1291 * xmlCheckLanguageID:
1292 * @lang: pointer to the string value
1294 * Checks that the value conforms to the LanguageID production:
1296 * NOTE: this is somewhat deprecated, those productions were removed from
1297 * the XML Second edition.
1299 * [33] LanguageID ::= Langcode ('-' Subcode)*
1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304 * [38] Subcode ::= ([a-z] | [A-Z])+
1306 * Returns 1 if correct 0 otherwise
1309 xmlCheckLanguageID(const xmlChar * lang)
1311 const xmlChar *cur = lang;
1313 if (cur == NULL)
1314 return (0);
1315 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1316 ((cur[0] == 'I') && (cur[1] == '-'))) {
1318 * IANA code
1320 cur += 2;
1321 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1322 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1323 cur++;
1324 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1325 ((cur[0] == 'X') && (cur[1] == '-'))) {
1327 * User code
1329 cur += 2;
1330 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1331 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1332 cur++;
1333 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1336 * ISO639
1338 cur++;
1339 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1340 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1341 cur++;
1342 else
1343 return (0);
1344 } else
1345 return (0);
1346 while (cur[0] != 0) { /* non input consuming */
1347 if (cur[0] != '-')
1348 return (0);
1349 cur++;
1350 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1351 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1352 cur++;
1353 else
1354 return (0);
1355 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1356 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1357 cur++;
1359 return (1);
1362 /************************************************************************
1364 * Parser stacks related functions and macros *
1366 ************************************************************************/
1368 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1369 const xmlChar ** str);
1371 #ifdef SAX2
1373 * nsPush:
1374 * @ctxt: an XML parser context
1375 * @prefix: the namespace prefix or NULL
1376 * @URL: the namespace name
1378 * Pushes a new parser namespace on top of the ns stack
1380 * Returns -1 in case of error, -2 if the namespace should be discarded
1381 * and the index in the stack otherwise.
1383 static int
1384 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1386 if (ctxt->options & XML_PARSE_NSCLEAN) {
1387 int i;
1388 for (i = 0;i < ctxt->nsNr;i += 2) {
1389 if (ctxt->nsTab[i] == prefix) {
1390 /* in scope */
1391 if (ctxt->nsTab[i + 1] == URL)
1392 return(-2);
1393 /* out of scope keep it */
1394 break;
1398 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1399 ctxt->nsMax = 10;
1400 ctxt->nsNr = 0;
1401 ctxt->nsTab = (const xmlChar **)
1402 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1403 if (ctxt->nsTab == NULL) {
1404 xmlErrMemory(ctxt, NULL);
1405 ctxt->nsMax = 0;
1406 return (-1);
1408 } else if (ctxt->nsNr >= ctxt->nsMax) {
1409 const xmlChar ** tmp;
1410 ctxt->nsMax *= 2;
1411 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1412 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1413 if (tmp == NULL) {
1414 xmlErrMemory(ctxt, NULL);
1415 ctxt->nsMax /= 2;
1416 return (-1);
1418 ctxt->nsTab = tmp;
1420 ctxt->nsTab[ctxt->nsNr++] = prefix;
1421 ctxt->nsTab[ctxt->nsNr++] = URL;
1422 return (ctxt->nsNr);
1425 * nsPop:
1426 * @ctxt: an XML parser context
1427 * @nr: the number to pop
1429 * Pops the top @nr parser prefix/namespace from the ns stack
1431 * Returns the number of namespaces removed
1433 static int
1434 nsPop(xmlParserCtxtPtr ctxt, int nr)
1436 int i;
1438 if (ctxt->nsTab == NULL) return(0);
1439 if (ctxt->nsNr < nr) {
1440 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1441 nr = ctxt->nsNr;
1443 if (ctxt->nsNr <= 0)
1444 return (0);
1446 for (i = 0;i < nr;i++) {
1447 ctxt->nsNr--;
1448 ctxt->nsTab[ctxt->nsNr] = NULL;
1450 return(nr);
1452 #endif
1454 static int
1455 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1456 const xmlChar **atts;
1457 int *attallocs;
1458 int maxatts;
1460 if (ctxt->atts == NULL) {
1461 maxatts = 55; /* allow for 10 attrs by default */
1462 atts = (const xmlChar **)
1463 xmlMalloc(maxatts * sizeof(xmlChar *));
1464 if (atts == NULL) goto mem_error;
1465 ctxt->atts = atts;
1466 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1467 if (attallocs == NULL) goto mem_error;
1468 ctxt->attallocs = attallocs;
1469 ctxt->maxatts = maxatts;
1470 } else if (nr + 5 > ctxt->maxatts) {
1471 maxatts = (nr + 5) * 2;
1472 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1473 maxatts * sizeof(const xmlChar *));
1474 if (atts == NULL) goto mem_error;
1475 ctxt->atts = atts;
1476 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1477 (maxatts / 5) * sizeof(int));
1478 if (attallocs == NULL) goto mem_error;
1479 ctxt->attallocs = attallocs;
1480 ctxt->maxatts = maxatts;
1482 return(ctxt->maxatts);
1483 mem_error:
1484 xmlErrMemory(ctxt, NULL);
1485 return(-1);
1489 * inputPush:
1490 * @ctxt: an XML parser context
1491 * @value: the parser input
1493 * Pushes a new parser input on top of the input stack
1495 * Returns -1 in case of error, the index in the stack otherwise
1498 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1500 if ((ctxt == NULL) || (value == NULL))
1501 return(-1);
1502 if (ctxt->inputNr >= ctxt->inputMax) {
1503 ctxt->inputMax *= 2;
1504 ctxt->inputTab =
1505 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1506 ctxt->inputMax *
1507 sizeof(ctxt->inputTab[0]));
1508 if (ctxt->inputTab == NULL) {
1509 xmlErrMemory(ctxt, NULL);
1510 xmlFreeInputStream(value);
1511 ctxt->inputMax /= 2;
1512 value = NULL;
1513 return (-1);
1516 ctxt->inputTab[ctxt->inputNr] = value;
1517 ctxt->input = value;
1518 return (ctxt->inputNr++);
1521 * inputPop:
1522 * @ctxt: an XML parser context
1524 * Pops the top parser input from the input stack
1526 * Returns the input just removed
1528 xmlParserInputPtr
1529 inputPop(xmlParserCtxtPtr ctxt)
1531 xmlParserInputPtr ret;
1533 if (ctxt == NULL)
1534 return(NULL);
1535 if (ctxt->inputNr <= 0)
1536 return (NULL);
1537 ctxt->inputNr--;
1538 if (ctxt->inputNr > 0)
1539 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1540 else
1541 ctxt->input = NULL;
1542 ret = ctxt->inputTab[ctxt->inputNr];
1543 ctxt->inputTab[ctxt->inputNr] = NULL;
1544 return (ret);
1547 * nodePush:
1548 * @ctxt: an XML parser context
1549 * @value: the element node
1551 * Pushes a new element node on top of the node stack
1553 * Returns -1 in case of error, the index in the stack otherwise
1556 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1558 if (ctxt == NULL) return(0);
1559 if (ctxt->nodeNr >= ctxt->nodeMax) {
1560 xmlNodePtr *tmp;
1562 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1563 ctxt->nodeMax * 2 *
1564 sizeof(ctxt->nodeTab[0]));
1565 if (tmp == NULL) {
1566 xmlErrMemory(ctxt, NULL);
1567 return (-1);
1569 ctxt->nodeTab = tmp;
1570 ctxt->nodeMax *= 2;
1572 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1573 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1574 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1575 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1576 xmlParserMaxDepth);
1577 ctxt->instate = XML_PARSER_EOF;
1578 return(-1);
1580 ctxt->nodeTab[ctxt->nodeNr] = value;
1581 ctxt->node = value;
1582 return (ctxt->nodeNr++);
1586 * nodePop:
1587 * @ctxt: an XML parser context
1589 * Pops the top element node from the node stack
1591 * Returns the node just removed
1593 xmlNodePtr
1594 nodePop(xmlParserCtxtPtr ctxt)
1596 xmlNodePtr ret;
1598 if (ctxt == NULL) return(NULL);
1599 if (ctxt->nodeNr <= 0)
1600 return (NULL);
1601 ctxt->nodeNr--;
1602 if (ctxt->nodeNr > 0)
1603 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1604 else
1605 ctxt->node = NULL;
1606 ret = ctxt->nodeTab[ctxt->nodeNr];
1607 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1608 return (ret);
1611 #ifdef LIBXML_PUSH_ENABLED
1613 * nameNsPush:
1614 * @ctxt: an XML parser context
1615 * @value: the element name
1616 * @prefix: the element prefix
1617 * @URI: the element namespace name
1619 * Pushes a new element name/prefix/URL on top of the name stack
1621 * Returns -1 in case of error, the index in the stack otherwise
1623 static int
1624 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1625 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1627 if (ctxt->nameNr >= ctxt->nameMax) {
1628 const xmlChar * *tmp;
1629 void **tmp2;
1630 ctxt->nameMax *= 2;
1631 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1632 ctxt->nameMax *
1633 sizeof(ctxt->nameTab[0]));
1634 if (tmp == NULL) {
1635 ctxt->nameMax /= 2;
1636 goto mem_error;
1638 ctxt->nameTab = tmp;
1639 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1640 ctxt->nameMax * 3 *
1641 sizeof(ctxt->pushTab[0]));
1642 if (tmp2 == NULL) {
1643 ctxt->nameMax /= 2;
1644 goto mem_error;
1646 ctxt->pushTab = tmp2;
1648 ctxt->nameTab[ctxt->nameNr] = value;
1649 ctxt->name = value;
1650 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1651 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1652 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1653 return (ctxt->nameNr++);
1654 mem_error:
1655 xmlErrMemory(ctxt, NULL);
1656 return (-1);
1659 * nameNsPop:
1660 * @ctxt: an XML parser context
1662 * Pops the top element/prefix/URI name from the name stack
1664 * Returns the name just removed
1666 static const xmlChar *
1667 nameNsPop(xmlParserCtxtPtr ctxt)
1669 const xmlChar *ret;
1671 if (ctxt->nameNr <= 0)
1672 return (NULL);
1673 ctxt->nameNr--;
1674 if (ctxt->nameNr > 0)
1675 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1676 else
1677 ctxt->name = NULL;
1678 ret = ctxt->nameTab[ctxt->nameNr];
1679 ctxt->nameTab[ctxt->nameNr] = NULL;
1680 return (ret);
1682 #endif /* LIBXML_PUSH_ENABLED */
1685 * namePush:
1686 * @ctxt: an XML parser context
1687 * @value: the element name
1689 * Pushes a new element name on top of the name stack
1691 * Returns -1 in case of error, the index in the stack otherwise
1694 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1696 if (ctxt == NULL) return (-1);
1698 if (ctxt->nameNr >= ctxt->nameMax) {
1699 const xmlChar * *tmp;
1700 ctxt->nameMax *= 2;
1701 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1702 ctxt->nameMax *
1703 sizeof(ctxt->nameTab[0]));
1704 if (tmp == NULL) {
1705 ctxt->nameMax /= 2;
1706 goto mem_error;
1708 ctxt->nameTab = tmp;
1710 ctxt->nameTab[ctxt->nameNr] = value;
1711 ctxt->name = value;
1712 return (ctxt->nameNr++);
1713 mem_error:
1714 xmlErrMemory(ctxt, NULL);
1715 return (-1);
1718 * namePop:
1719 * @ctxt: an XML parser context
1721 * Pops the top element name from the name stack
1723 * Returns the name just removed
1725 const xmlChar *
1726 namePop(xmlParserCtxtPtr ctxt)
1728 const xmlChar *ret;
1730 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1731 return (NULL);
1732 ctxt->nameNr--;
1733 if (ctxt->nameNr > 0)
1734 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1735 else
1736 ctxt->name = NULL;
1737 ret = ctxt->nameTab[ctxt->nameNr];
1738 ctxt->nameTab[ctxt->nameNr] = NULL;
1739 return (ret);
1742 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1743 if (ctxt->spaceNr >= ctxt->spaceMax) {
1744 int *tmp;
1746 ctxt->spaceMax *= 2;
1747 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1748 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1749 if (tmp == NULL) {
1750 xmlErrMemory(ctxt, NULL);
1751 ctxt->spaceMax /=2;
1752 return(-1);
1754 ctxt->spaceTab = tmp;
1756 ctxt->spaceTab[ctxt->spaceNr] = val;
1757 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1758 return(ctxt->spaceNr++);
1761 static int spacePop(xmlParserCtxtPtr ctxt) {
1762 int ret;
1763 if (ctxt->spaceNr <= 0) return(0);
1764 ctxt->spaceNr--;
1765 if (ctxt->spaceNr > 0)
1766 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1767 else
1768 ctxt->space = &ctxt->spaceTab[0];
1769 ret = ctxt->spaceTab[ctxt->spaceNr];
1770 ctxt->spaceTab[ctxt->spaceNr] = -1;
1771 return(ret);
1775 * Macros for accessing the content. Those should be used only by the parser,
1776 * and not exported.
1778 * Dirty macros, i.e. one often need to make assumption on the context to
1779 * use them
1781 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1782 * To be used with extreme caution since operations consuming
1783 * characters may move the input buffer to a different location !
1784 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1785 * This should be used internally by the parser
1786 * only to compare to ASCII values otherwise it would break when
1787 * running with UTF-8 encoding.
1788 * RAW same as CUR but in the input buffer, bypass any token
1789 * extraction that may have been done
1790 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1791 * to compare on ASCII based substring.
1792 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1793 * strings without newlines within the parser.
1794 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1795 * defined char within the parser.
1796 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1798 * NEXT Skip to the next character, this does the proper decoding
1799 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1800 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1801 * CUR_CHAR(l) returns the current unicode character (int), set l
1802 * to the number of xmlChars used for the encoding [0-5].
1803 * CUR_SCHAR same but operate on a string instead of the context
1804 * COPY_BUF copy the current unicode char to the target buffer, increment
1805 * the index
1806 * GROW, SHRINK handling of input buffers
1809 #define RAW (*ctxt->input->cur)
1810 #define CUR (*ctxt->input->cur)
1811 #define NXT(val) ctxt->input->cur[(val)]
1812 #define CUR_PTR ctxt->input->cur
1814 #define CMP4( s, c1, c2, c3, c4 ) \
1815 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1816 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1817 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1818 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1819 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1820 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1821 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1822 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1823 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1824 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1825 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1826 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1827 ((unsigned char *) s)[ 8 ] == c9 )
1828 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1829 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1830 ((unsigned char *) s)[ 9 ] == c10 )
1832 #define SKIP(val) do { \
1833 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1834 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1835 if ((*ctxt->input->cur == 0) && \
1836 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1837 xmlPopInput(ctxt); \
1838 } while (0)
1840 #define SKIPL(val) do { \
1841 int skipl; \
1842 for(skipl=0; skipl<val; skipl++) { \
1843 if (*(ctxt->input->cur) == '\n') { \
1844 ctxt->input->line++; ctxt->input->col = 1; \
1845 } else ctxt->input->col++; \
1846 ctxt->nbChars++; \
1847 ctxt->input->cur++; \
1849 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1850 if ((*ctxt->input->cur == 0) && \
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1852 xmlPopInput(ctxt); \
1853 } while (0)
1855 #define SHRINK if ((ctxt->progressive == 0) && \
1856 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1858 xmlSHRINK (ctxt);
1860 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1861 xmlParserInputShrink(ctxt->input);
1862 if ((*ctxt->input->cur == 0) &&
1863 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1864 xmlPopInput(ctxt);
1867 #define GROW if ((ctxt->progressive == 0) && \
1868 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1869 xmlGROW (ctxt);
1871 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1872 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1873 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1874 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1875 xmlPopInput(ctxt);
1878 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1880 #define NEXT xmlNextChar(ctxt)
1882 #define NEXT1 { \
1883 ctxt->input->col++; \
1884 ctxt->input->cur++; \
1885 ctxt->nbChars++; \
1886 if (*ctxt->input->cur == 0) \
1887 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1890 #define NEXTL(l) do { \
1891 if (*(ctxt->input->cur) == '\n') { \
1892 ctxt->input->line++; ctxt->input->col = 1; \
1893 } else ctxt->input->col++; \
1894 ctxt->input->cur += l; \
1895 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1896 } while (0)
1898 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1899 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1901 #define COPY_BUF(l,b,i,v) \
1902 if (l == 1) b[i++] = (xmlChar) v; \
1903 else i += xmlCopyCharMultiByte(&b[i],v)
1906 * xmlSkipBlankChars:
1907 * @ctxt: the XML parser context
1909 * skip all blanks character found at that point in the input streams.
1910 * It pops up finished entities in the process if allowable at that point.
1912 * Returns the number of space chars skipped
1916 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1917 int res = 0;
1920 * It's Okay to use CUR/NEXT here since all the blanks are on
1921 * the ASCII range.
1923 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1924 const xmlChar *cur;
1926 * if we are in the document content, go really fast
1928 cur = ctxt->input->cur;
1929 while (IS_BLANK_CH(*cur)) {
1930 if (*cur == '\n') {
1931 ctxt->input->line++; ctxt->input->col = 1;
1933 cur++;
1934 res++;
1935 if (*cur == 0) {
1936 ctxt->input->cur = cur;
1937 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1938 cur = ctxt->input->cur;
1941 ctxt->input->cur = cur;
1942 } else {
1943 int cur;
1944 do {
1945 cur = CUR;
1946 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1947 NEXT;
1948 cur = CUR;
1949 res++;
1951 while ((cur == 0) && (ctxt->inputNr > 1) &&
1952 (ctxt->instate != XML_PARSER_COMMENT)) {
1953 xmlPopInput(ctxt);
1954 cur = CUR;
1957 * Need to handle support of entities branching here
1959 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1960 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1962 return(res);
1965 /************************************************************************
1967 * Commodity functions to handle entities *
1969 ************************************************************************/
1972 * xmlPopInput:
1973 * @ctxt: an XML parser context
1975 * xmlPopInput: the current input pointed by ctxt->input came to an end
1976 * pop it and return the next char.
1978 * Returns the current xmlChar in the parser context
1980 xmlChar
1981 xmlPopInput(xmlParserCtxtPtr ctxt) {
1982 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1983 if (xmlParserDebugEntities)
1984 xmlGenericError(xmlGenericErrorContext,
1985 "Popping input %d\n", ctxt->inputNr);
1986 xmlFreeInputStream(inputPop(ctxt));
1987 if ((*ctxt->input->cur == 0) &&
1988 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1989 return(xmlPopInput(ctxt));
1990 return(CUR);
1994 * xmlPushInput:
1995 * @ctxt: an XML parser context
1996 * @input: an XML parser input fragment (entity, XML fragment ...).
1998 * xmlPushInput: switch to a new input stream which is stacked on top
1999 * of the previous one(s).
2000 * Returns -1 in case of error or the index in the input stack
2003 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2004 int ret;
2005 if (input == NULL) return(-1);
2007 if (xmlParserDebugEntities) {
2008 if ((ctxt->input != NULL) && (ctxt->input->filename))
2009 xmlGenericError(xmlGenericErrorContext,
2010 "%s(%d): ", ctxt->input->filename,
2011 ctxt->input->line);
2012 xmlGenericError(xmlGenericErrorContext,
2013 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2015 ret = inputPush(ctxt, input);
2016 if (ctxt->instate == XML_PARSER_EOF)
2017 return(-1);
2018 GROW;
2019 return(ret);
2023 * xmlParseCharRef:
2024 * @ctxt: an XML parser context
2026 * parse Reference declarations
2028 * [66] CharRef ::= '&#' [0-9]+ ';' |
2029 * '&#x' [0-9a-fA-F]+ ';'
2031 * [ WFC: Legal Character ]
2032 * Characters referred to using character references must match the
2033 * production for Char.
2035 * Returns the value parsed (as an int), 0 in case of error
2038 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2039 unsigned int val = 0;
2040 int count = 0;
2041 unsigned int outofrange = 0;
2044 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2046 if ((RAW == '&') && (NXT(1) == '#') &&
2047 (NXT(2) == 'x')) {
2048 SKIP(3);
2049 GROW;
2050 while (RAW != ';') { /* loop blocked by count */
2051 if (count++ > 20) {
2052 count = 0;
2053 GROW;
2054 if (ctxt->instate == XML_PARSER_EOF)
2055 return(0);
2057 if ((RAW >= '0') && (RAW <= '9'))
2058 val = val * 16 + (CUR - '0');
2059 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2060 val = val * 16 + (CUR - 'a') + 10;
2061 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2062 val = val * 16 + (CUR - 'A') + 10;
2063 else {
2064 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2065 val = 0;
2066 break;
2068 if (val > 0x10FFFF)
2069 outofrange = val;
2071 NEXT;
2072 count++;
2074 if (RAW == ';') {
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2076 ctxt->input->col++;
2077 ctxt->nbChars ++;
2078 ctxt->input->cur++;
2080 } else if ((RAW == '&') && (NXT(1) == '#')) {
2081 SKIP(2);
2082 GROW;
2083 while (RAW != ';') { /* loop blocked by count */
2084 if (count++ > 20) {
2085 count = 0;
2086 GROW;
2087 if (ctxt->instate == XML_PARSER_EOF)
2088 return(0);
2090 if ((RAW >= '0') && (RAW <= '9'))
2091 val = val * 10 + (CUR - '0');
2092 else {
2093 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2094 val = 0;
2095 break;
2097 if (val > 0x10FFFF)
2098 outofrange = val;
2100 NEXT;
2101 count++;
2103 if (RAW == ';') {
2104 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2105 ctxt->input->col++;
2106 ctxt->nbChars ++;
2107 ctxt->input->cur++;
2109 } else {
2110 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2114 * [ WFC: Legal Character ]
2115 * Characters referred to using character references must match the
2116 * production for Char.
2118 if ((IS_CHAR(val) && (outofrange == 0))) {
2119 return(val);
2120 } else {
2121 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2122 "xmlParseCharRef: invalid xmlChar value %d\n",
2123 val);
2125 return(0);
2129 * xmlParseStringCharRef:
2130 * @ctxt: an XML parser context
2131 * @str: a pointer to an index in the string
2133 * parse Reference declarations, variant parsing from a string rather
2134 * than an an input flow.
2136 * [66] CharRef ::= '&#' [0-9]+ ';' |
2137 * '&#x' [0-9a-fA-F]+ ';'
2139 * [ WFC: Legal Character ]
2140 * Characters referred to using character references must match the
2141 * production for Char.
2143 * Returns the value parsed (as an int), 0 in case of error, str will be
2144 * updated to the current value of the index
2146 static int
2147 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2148 const xmlChar *ptr;
2149 xmlChar cur;
2150 unsigned int val = 0;
2151 unsigned int outofrange = 0;
2153 if ((str == NULL) || (*str == NULL)) return(0);
2154 ptr = *str;
2155 cur = *ptr;
2156 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2157 ptr += 3;
2158 cur = *ptr;
2159 while (cur != ';') { /* Non input consuming loop */
2160 if ((cur >= '0') && (cur <= '9'))
2161 val = val * 16 + (cur - '0');
2162 else if ((cur >= 'a') && (cur <= 'f'))
2163 val = val * 16 + (cur - 'a') + 10;
2164 else if ((cur >= 'A') && (cur <= 'F'))
2165 val = val * 16 + (cur - 'A') + 10;
2166 else {
2167 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2168 val = 0;
2169 break;
2171 if (val > 0x10FFFF)
2172 outofrange = val;
2174 ptr++;
2175 cur = *ptr;
2177 if (cur == ';')
2178 ptr++;
2179 } else if ((cur == '&') && (ptr[1] == '#')){
2180 ptr += 2;
2181 cur = *ptr;
2182 while (cur != ';') { /* Non input consuming loops */
2183 if ((cur >= '0') && (cur <= '9'))
2184 val = val * 10 + (cur - '0');
2185 else {
2186 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2187 val = 0;
2188 break;
2190 if (val > 0x10FFFF)
2191 outofrange = val;
2193 ptr++;
2194 cur = *ptr;
2196 if (cur == ';')
2197 ptr++;
2198 } else {
2199 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2200 return(0);
2202 *str = ptr;
2205 * [ WFC: Legal Character ]
2206 * Characters referred to using character references must match the
2207 * production for Char.
2209 if ((IS_CHAR(val) && (outofrange == 0))) {
2210 return(val);
2211 } else {
2212 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2213 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2214 val);
2216 return(0);
2220 * xmlNewBlanksWrapperInputStream:
2221 * @ctxt: an XML parser context
2222 * @entity: an Entity pointer
2224 * Create a new input stream for wrapping
2225 * blanks around a PEReference
2227 * Returns the new input stream or NULL
2230 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2232 static xmlParserInputPtr
2233 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2234 xmlParserInputPtr input;
2235 xmlChar *buffer;
2236 size_t length;
2237 if (entity == NULL) {
2238 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2239 "xmlNewBlanksWrapperInputStream entity\n");
2240 return(NULL);
2242 if (xmlParserDebugEntities)
2243 xmlGenericError(xmlGenericErrorContext,
2244 "new blanks wrapper for entity: %s\n", entity->name);
2245 input = xmlNewInputStream(ctxt);
2246 if (input == NULL) {
2247 return(NULL);
2249 length = xmlStrlen(entity->name) + 5;
2250 buffer = xmlMallocAtomic(length);
2251 if (buffer == NULL) {
2252 xmlErrMemory(ctxt, NULL);
2253 xmlFree(input);
2254 return(NULL);
2256 buffer [0] = ' ';
2257 buffer [1] = '%';
2258 buffer [length-3] = ';';
2259 buffer [length-2] = ' ';
2260 buffer [length-1] = 0;
2261 memcpy(buffer + 2, entity->name, length - 5);
2262 input->free = deallocblankswrapper;
2263 input->base = buffer;
2264 input->cur = buffer;
2265 input->length = length;
2266 input->end = &buffer[length];
2267 return(input);
2271 * xmlParserHandlePEReference:
2272 * @ctxt: the parser context
2274 * [69] PEReference ::= '%' Name ';'
2276 * [ WFC: No Recursion ]
2277 * A parsed entity must not contain a recursive
2278 * reference to itself, either directly or indirectly.
2280 * [ WFC: Entity Declared ]
2281 * In a document without any DTD, a document with only an internal DTD
2282 * subset which contains no parameter entity references, or a document
2283 * with "standalone='yes'", ... ... The declaration of a parameter
2284 * entity must precede any reference to it...
2286 * [ VC: Entity Declared ]
2287 * In a document with an external subset or external parameter entities
2288 * with "standalone='no'", ... ... The declaration of a parameter entity
2289 * must precede any reference to it...
2291 * [ WFC: In DTD ]
2292 * Parameter-entity references may only appear in the DTD.
2293 * NOTE: misleading but this is handled.
2295 * A PEReference may have been detected in the current input stream
2296 * the handling is done accordingly to
2297 * http://www.w3.org/TR/REC-xml#entproc
2298 * i.e.
2299 * - Included in literal in entity values
2300 * - Included as Parameter Entity reference within DTDs
2302 void
2303 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2304 const xmlChar *name;
2305 xmlEntityPtr entity = NULL;
2306 xmlParserInputPtr input;
2308 if (RAW != '%') return;
2309 switch(ctxt->instate) {
2310 case XML_PARSER_CDATA_SECTION:
2311 return;
2312 case XML_PARSER_COMMENT:
2313 return;
2314 case XML_PARSER_START_TAG:
2315 return;
2316 case XML_PARSER_END_TAG:
2317 return;
2318 case XML_PARSER_EOF:
2319 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2320 return;
2321 case XML_PARSER_PROLOG:
2322 case XML_PARSER_START:
2323 case XML_PARSER_MISC:
2324 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2325 return;
2326 case XML_PARSER_ENTITY_DECL:
2327 case XML_PARSER_CONTENT:
2328 case XML_PARSER_ATTRIBUTE_VALUE:
2329 case XML_PARSER_PI:
2330 case XML_PARSER_SYSTEM_LITERAL:
2331 case XML_PARSER_PUBLIC_LITERAL:
2332 /* we just ignore it there */
2333 return;
2334 case XML_PARSER_EPILOG:
2335 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2336 return;
2337 case XML_PARSER_ENTITY_VALUE:
2339 * NOTE: in the case of entity values, we don't do the
2340 * substitution here since we need the literal
2341 * entity value to be able to save the internal
2342 * subset of the document.
2343 * This will be handled by xmlStringDecodeEntities
2345 return;
2346 case XML_PARSER_DTD:
2348 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2349 * In the internal DTD subset, parameter-entity references
2350 * can occur only where markup declarations can occur, not
2351 * within markup declarations.
2352 * In that case this is handled in xmlParseMarkupDecl
2354 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2355 return;
2356 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2357 return;
2358 break;
2359 case XML_PARSER_IGNORE:
2360 return;
2363 NEXT;
2364 name = xmlParseName(ctxt);
2365 if (xmlParserDebugEntities)
2366 xmlGenericError(xmlGenericErrorContext,
2367 "PEReference: %s\n", name);
2368 if (name == NULL) {
2369 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2370 } else {
2371 if (RAW == ';') {
2372 NEXT;
2373 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2374 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2375 if (ctxt->instate == XML_PARSER_EOF)
2376 return;
2377 if (entity == NULL) {
2380 * [ WFC: Entity Declared ]
2381 * In a document without any DTD, a document with only an
2382 * internal DTD subset which contains no parameter entity
2383 * references, or a document with "standalone='yes'", ...
2384 * ... The declaration of a parameter entity must precede
2385 * any reference to it...
2387 if ((ctxt->standalone == 1) ||
2388 ((ctxt->hasExternalSubset == 0) &&
2389 (ctxt->hasPErefs == 0))) {
2390 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2391 "PEReference: %%%s; not found\n", name);
2392 } else {
2394 * [ VC: Entity Declared ]
2395 * In a document with an external subset or external
2396 * parameter entities with "standalone='no'", ...
2397 * ... The declaration of a parameter entity must precede
2398 * any reference to it...
2400 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2401 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2402 "PEReference: %%%s; not found\n",
2403 name, NULL);
2404 } else
2405 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2406 "PEReference: %%%s; not found\n",
2407 name, NULL);
2408 ctxt->valid = 0;
2410 } else if (ctxt->input->free != deallocblankswrapper) {
2411 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2412 if (xmlPushInput(ctxt, input) < 0)
2413 return;
2414 } else {
2415 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2416 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2417 xmlChar start[4];
2418 xmlCharEncoding enc;
2421 * handle the extra spaces added before and after
2422 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2423 * this is done independently.
2425 input = xmlNewEntityInputStream(ctxt, entity);
2426 if (xmlPushInput(ctxt, input) < 0)
2427 return;
2430 * Get the 4 first bytes and decode the charset
2431 * if enc != XML_CHAR_ENCODING_NONE
2432 * plug some encoding conversion routines.
2433 * Note that, since we may have some non-UTF8
2434 * encoding (like UTF16, bug 135229), the 'length'
2435 * is not known, but we can calculate based upon
2436 * the amount of data in the buffer.
2438 GROW
2439 if (ctxt->instate == XML_PARSER_EOF)
2440 return;
2441 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2442 start[0] = RAW;
2443 start[1] = NXT(1);
2444 start[2] = NXT(2);
2445 start[3] = NXT(3);
2446 enc = xmlDetectCharEncoding(start, 4);
2447 if (enc != XML_CHAR_ENCODING_NONE) {
2448 xmlSwitchEncoding(ctxt, enc);
2452 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2453 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2454 (IS_BLANK_CH(NXT(5)))) {
2455 xmlParseTextDecl(ctxt);
2457 } else {
2458 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2459 "PEReference: %s is not a parameter entity\n",
2460 name);
2463 } else {
2464 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2470 * Macro used to grow the current buffer.
2472 #define growBuffer(buffer, n) { \
2473 xmlChar *tmp; \
2474 buffer##_size *= 2; \
2475 buffer##_size += n; \
2476 tmp = (xmlChar *) \
2477 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2478 if (tmp == NULL) goto mem_error; \
2479 buffer = tmp; \
2483 * xmlStringLenDecodeEntities:
2484 * @ctxt: the parser context
2485 * @str: the input string
2486 * @len: the string length
2487 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2488 * @end: an end marker xmlChar, 0 if none
2489 * @end2: an end marker xmlChar, 0 if none
2490 * @end3: an end marker xmlChar, 0 if none
2492 * Takes a entity string content and process to do the adequate substitutions.
2494 * [67] Reference ::= EntityRef | CharRef
2496 * [69] PEReference ::= '%' Name ';'
2498 * Returns A newly allocated string with the substitution done. The caller
2499 * must deallocate it !
2501 xmlChar *
2502 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2503 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2504 xmlChar *buffer = NULL;
2505 int buffer_size = 0;
2507 xmlChar *current = NULL;
2508 xmlChar *rep = NULL;
2509 const xmlChar *last;
2510 xmlEntityPtr ent;
2511 int c,l;
2512 int nbchars = 0;
2514 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2515 return(NULL);
2516 last = str + len;
2518 if (((ctxt->depth > 40) &&
2519 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2520 (ctxt->depth > 1024)) {
2521 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2522 return(NULL);
2526 * allocate a translation buffer.
2528 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2529 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2530 if (buffer == NULL) goto mem_error;
2533 * OK loop until we reach one of the ending char or a size limit.
2534 * we are operating on already parsed values.
2536 if (str < last)
2537 c = CUR_SCHAR(str, l);
2538 else
2539 c = 0;
2540 while ((c != 0) && (c != end) && /* non input consuming loop */
2541 (c != end2) && (c != end3)) {
2543 if (c == 0) break;
2544 if ((c == '&') && (str[1] == '#')) {
2545 int val = xmlParseStringCharRef(ctxt, &str);
2546 if (val != 0) {
2547 COPY_BUF(0,buffer,nbchars,val);
2549 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2550 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2552 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2553 if (xmlParserDebugEntities)
2554 xmlGenericError(xmlGenericErrorContext,
2555 "String decoding Entity Reference: %.30s\n",
2556 str);
2557 ent = xmlParseStringEntityRef(ctxt, &str);
2558 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2559 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2560 goto int_error;
2561 if (ent != NULL)
2562 ctxt->nbentities += ent->checked;
2563 if ((ent != NULL) &&
2564 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2565 if (ent->content != NULL) {
2566 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2567 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2568 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2570 } else {
2571 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2572 "predefined entity has no content\n");
2574 } else if ((ent != NULL) && (ent->content != NULL)) {
2575 ctxt->depth++;
2576 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2577 0, 0, 0);
2578 ctxt->depth--;
2580 if (rep != NULL) {
2581 current = rep;
2582 while (*current != 0) { /* non input consuming loop */
2583 buffer[nbchars++] = *current++;
2584 if (nbchars >
2585 buffer_size - XML_PARSER_BUFFER_SIZE) {
2586 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2587 goto int_error;
2588 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2591 xmlFree(rep);
2592 rep = NULL;
2594 } else if (ent != NULL) {
2595 int i = xmlStrlen(ent->name);
2596 const xmlChar *cur = ent->name;
2598 buffer[nbchars++] = '&';
2599 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2600 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2602 for (;i > 0;i--)
2603 buffer[nbchars++] = *cur++;
2604 buffer[nbchars++] = ';';
2606 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2607 if (xmlParserDebugEntities)
2608 xmlGenericError(xmlGenericErrorContext,
2609 "String decoding PE Reference: %.30s\n", str);
2610 ent = xmlParseStringPEReference(ctxt, &str);
2611 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2612 goto int_error;
2613 if (ent != NULL)
2614 ctxt->nbentities += ent->checked;
2615 if (ent != NULL) {
2616 if (ent->content == NULL) {
2617 xmlLoadEntityContent(ctxt, ent);
2619 ctxt->depth++;
2620 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2621 0, 0, 0);
2622 ctxt->depth--;
2623 if (rep != NULL) {
2624 current = rep;
2625 while (*current != 0) { /* non input consuming loop */
2626 buffer[nbchars++] = *current++;
2627 if (nbchars >
2628 buffer_size - XML_PARSER_BUFFER_SIZE) {
2629 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2630 goto int_error;
2631 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2634 xmlFree(rep);
2635 rep = NULL;
2638 } else {
2639 COPY_BUF(l,buffer,nbchars,c);
2640 str += l;
2641 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2642 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2645 if (str < last)
2646 c = CUR_SCHAR(str, l);
2647 else
2648 c = 0;
2650 buffer[nbchars] = 0;
2651 return(buffer);
2653 mem_error:
2654 xmlErrMemory(ctxt, NULL);
2655 int_error:
2656 if (rep != NULL)
2657 xmlFree(rep);
2658 if (buffer != NULL)
2659 xmlFree(buffer);
2660 return(NULL);
2664 * xmlStringDecodeEntities:
2665 * @ctxt: the parser context
2666 * @str: the input string
2667 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2668 * @end: an end marker xmlChar, 0 if none
2669 * @end2: an end marker xmlChar, 0 if none
2670 * @end3: an end marker xmlChar, 0 if none
2672 * Takes a entity string content and process to do the adequate substitutions.
2674 * [67] Reference ::= EntityRef | CharRef
2676 * [69] PEReference ::= '%' Name ';'
2678 * Returns A newly allocated string with the substitution done. The caller
2679 * must deallocate it !
2681 xmlChar *
2682 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2683 xmlChar end, xmlChar end2, xmlChar end3) {
2684 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2685 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2686 end, end2, end3));
2689 /************************************************************************
2691 * Commodity functions, cleanup needed ? *
2693 ************************************************************************/
2696 * areBlanks:
2697 * @ctxt: an XML parser context
2698 * @str: a xmlChar *
2699 * @len: the size of @str
2700 * @blank_chars: we know the chars are blanks
2702 * Is this a sequence of blank chars that one can ignore ?
2704 * Returns 1 if ignorable 0 otherwise.
2707 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2708 int blank_chars) {
2709 int i, ret;
2710 xmlNodePtr lastChild;
2713 * Don't spend time trying to differentiate them, the same callback is
2714 * used !
2716 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2717 return(0);
2720 * Check for xml:space value.
2722 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2723 (*(ctxt->space) == -2))
2724 return(0);
2727 * Check that the string is made of blanks
2729 if (blank_chars == 0) {
2730 for (i = 0;i < len;i++)
2731 if (!(IS_BLANK_CH(str[i]))) return(0);
2735 * Look if the element is mixed content in the DTD if available
2737 if (ctxt->node == NULL) return(0);
2738 if (ctxt->myDoc != NULL) {
2739 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2740 if (ret == 0) return(1);
2741 if (ret == 1) return(0);
2745 * Otherwise, heuristic :-\
2747 if ((RAW != '<') && (RAW != 0xD)) return(0);
2748 if ((ctxt->node->children == NULL) &&
2749 (RAW == '<') && (NXT(1) == '/')) return(0);
2751 lastChild = xmlGetLastChild(ctxt->node);
2752 if (lastChild == NULL) {
2753 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2754 (ctxt->node->content != NULL)) return(0);
2755 } else if (xmlNodeIsText(lastChild))
2756 return(0);
2757 else if ((ctxt->node->children != NULL) &&
2758 (xmlNodeIsText(ctxt->node->children)))
2759 return(0);
2760 return(1);
2763 /************************************************************************
2765 * Extra stuff for namespace support *
2766 * Relates to http://www.w3.org/TR/WD-xml-names *
2768 ************************************************************************/
2771 * xmlSplitQName:
2772 * @ctxt: an XML parser context
2773 * @name: an XML parser context
2774 * @prefix: a xmlChar **
2776 * parse an UTF8 encoded XML qualified name string
2778 * [NS 5] QName ::= (Prefix ':')? LocalPart
2780 * [NS 6] Prefix ::= NCName
2782 * [NS 7] LocalPart ::= NCName
2784 * Returns the local part, and prefix is updated
2785 * to get the Prefix if any.
2788 xmlChar *
2789 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2790 xmlChar buf[XML_MAX_NAMELEN + 5];
2791 xmlChar *buffer = NULL;
2792 int len = 0;
2793 int max = XML_MAX_NAMELEN;
2794 xmlChar *ret = NULL;
2795 const xmlChar *cur = name;
2796 int c;
2798 if (prefix == NULL) return(NULL);
2799 *prefix = NULL;
2801 if (cur == NULL) return(NULL);
2803 #ifndef XML_XML_NAMESPACE
2804 /* xml: prefix is not really a namespace */
2805 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2806 (cur[2] == 'l') && (cur[3] == ':'))
2807 return(xmlStrdup(name));
2808 #endif
2810 /* nasty but well=formed */
2811 if (cur[0] == ':')
2812 return(xmlStrdup(name));
2814 c = *cur++;
2815 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2816 buf[len++] = c;
2817 c = *cur++;
2819 if (len >= max) {
2821 * Okay someone managed to make a huge name, so he's ready to pay
2822 * for the processing speed.
2824 max = len * 2;
2826 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2827 if (buffer == NULL) {
2828 xmlErrMemory(ctxt, NULL);
2829 return(NULL);
2831 memcpy(buffer, buf, len);
2832 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2833 if (len + 10 > max) {
2834 xmlChar *tmp;
2836 max *= 2;
2837 tmp = (xmlChar *) xmlRealloc(buffer,
2838 max * sizeof(xmlChar));
2839 if (tmp == NULL) {
2840 xmlFree(buffer);
2841 xmlErrMemory(ctxt, NULL);
2842 return(NULL);
2844 buffer = tmp;
2846 buffer[len++] = c;
2847 c = *cur++;
2849 buffer[len] = 0;
2852 if ((c == ':') && (*cur == 0)) {
2853 if (buffer != NULL)
2854 xmlFree(buffer);
2855 *prefix = NULL;
2856 return(xmlStrdup(name));
2859 if (buffer == NULL)
2860 ret = xmlStrndup(buf, len);
2861 else {
2862 ret = buffer;
2863 buffer = NULL;
2864 max = XML_MAX_NAMELEN;
2868 if (c == ':') {
2869 c = *cur;
2870 *prefix = ret;
2871 if (c == 0) {
2872 return(xmlStrndup(BAD_CAST "", 0));
2874 len = 0;
2877 * Check that the first character is proper to start
2878 * a new name
2880 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2881 ((c >= 0x41) && (c <= 0x5A)) ||
2882 (c == '_') || (c == ':'))) {
2883 int l;
2884 int first = CUR_SCHAR(cur, l);
2886 if (!IS_LETTER(first) && (first != '_')) {
2887 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2888 "Name %s is not XML Namespace compliant\n",
2889 name);
2892 cur++;
2894 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2895 buf[len++] = c;
2896 c = *cur++;
2898 if (len >= max) {
2900 * Okay someone managed to make a huge name, so he's ready to pay
2901 * for the processing speed.
2903 max = len * 2;
2905 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2906 if (buffer == NULL) {
2907 xmlErrMemory(ctxt, NULL);
2908 return(NULL);
2910 memcpy(buffer, buf, len);
2911 while (c != 0) { /* tested bigname2.xml */
2912 if (len + 10 > max) {
2913 xmlChar *tmp;
2915 max *= 2;
2916 tmp = (xmlChar *) xmlRealloc(buffer,
2917 max * sizeof(xmlChar));
2918 if (tmp == NULL) {
2919 xmlErrMemory(ctxt, NULL);
2920 xmlFree(buffer);
2921 return(NULL);
2923 buffer = tmp;
2925 buffer[len++] = c;
2926 c = *cur++;
2928 buffer[len] = 0;
2931 if (buffer == NULL)
2932 ret = xmlStrndup(buf, len);
2933 else {
2934 ret = buffer;
2938 return(ret);
2941 /************************************************************************
2943 * The parser itself *
2944 * Relates to http://www.w3.org/TR/REC-xml *
2946 ************************************************************************/
2948 /************************************************************************
2950 * Routines to parse Name, NCName and NmToken *
2952 ************************************************************************/
2953 #ifdef DEBUG
2954 static unsigned long nbParseName = 0;
2955 static unsigned long nbParseNmToken = 0;
2956 static unsigned long nbParseNCName = 0;
2957 static unsigned long nbParseNCNameComplex = 0;
2958 static unsigned long nbParseNameComplex = 0;
2959 static unsigned long nbParseStringName = 0;
2960 #endif
2963 * The two following functions are related to the change of accepted
2964 * characters for Name and NmToken in the Revision 5 of XML-1.0
2965 * They correspond to the modified production [4] and the new production [4a]
2966 * changes in that revision. Also note that the macros used for the
2967 * productions Letter, Digit, CombiningChar and Extender are not needed
2968 * anymore.
2969 * We still keep compatibility to pre-revision5 parsing semantic if the
2970 * new XML_PARSE_OLD10 option is given to the parser.
2972 static int
2973 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 (c == '_') || (c == ':') ||
2983 ((c >= 0xC0) && (c <= 0xD6)) ||
2984 ((c >= 0xD8) && (c <= 0xF6)) ||
2985 ((c >= 0xF8) && (c <= 0x2FF)) ||
2986 ((c >= 0x370) && (c <= 0x37D)) ||
2987 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2988 ((c >= 0x200C) && (c <= 0x200D)) ||
2989 ((c >= 0x2070) && (c <= 0x218F)) ||
2990 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2991 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2992 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2993 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2994 ((c >= 0x10000) && (c <= 0xEFFFF))))
2995 return(1);
2996 } else {
2997 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2998 return(1);
3000 return(0);
3003 static int
3004 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3005 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3007 * Use the new checks of production [4] [4a] amd [5] of the
3008 * Update 5 of XML-1.0
3010 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3011 (((c >= 'a') && (c <= 'z')) ||
3012 ((c >= 'A') && (c <= 'Z')) ||
3013 ((c >= '0') && (c <= '9')) || /* !start */
3014 (c == '_') || (c == ':') ||
3015 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3016 ((c >= 0xC0) && (c <= 0xD6)) ||
3017 ((c >= 0xD8) && (c <= 0xF6)) ||
3018 ((c >= 0xF8) && (c <= 0x2FF)) ||
3019 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3020 ((c >= 0x370) && (c <= 0x37D)) ||
3021 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3022 ((c >= 0x200C) && (c <= 0x200D)) ||
3023 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3024 ((c >= 0x2070) && (c <= 0x218F)) ||
3025 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3026 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3027 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3028 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3029 ((c >= 0x10000) && (c <= 0xEFFFF))))
3030 return(1);
3031 } else {
3032 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3033 (c == '.') || (c == '-') ||
3034 (c == '_') || (c == ':') ||
3035 (IS_COMBINING(c)) ||
3036 (IS_EXTENDER(c)))
3037 return(1);
3039 return(0);
3042 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3043 int *len, int *alloc, int normalize);
3045 static const xmlChar *
3046 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3047 int len = 0, l;
3048 int c;
3049 int count = 0;
3051 #ifdef DEBUG
3052 nbParseNameComplex++;
3053 #endif
3056 * Handler for more complex cases
3058 GROW;
3059 if (ctxt->instate == XML_PARSER_EOF)
3060 return(NULL);
3061 c = CUR_CHAR(l);
3062 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3064 * Use the new checks of production [4] [4a] amd [5] of the
3065 * Update 5 of XML-1.0
3067 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3068 (!(((c >= 'a') && (c <= 'z')) ||
3069 ((c >= 'A') && (c <= 'Z')) ||
3070 (c == '_') || (c == ':') ||
3071 ((c >= 0xC0) && (c <= 0xD6)) ||
3072 ((c >= 0xD8) && (c <= 0xF6)) ||
3073 ((c >= 0xF8) && (c <= 0x2FF)) ||
3074 ((c >= 0x370) && (c <= 0x37D)) ||
3075 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3076 ((c >= 0x200C) && (c <= 0x200D)) ||
3077 ((c >= 0x2070) && (c <= 0x218F)) ||
3078 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3079 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3080 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3081 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3082 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3083 return(NULL);
3085 len += l;
3086 NEXTL(l);
3087 c = CUR_CHAR(l);
3088 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3089 (((c >= 'a') && (c <= 'z')) ||
3090 ((c >= 'A') && (c <= 'Z')) ||
3091 ((c >= '0') && (c <= '9')) || /* !start */
3092 (c == '_') || (c == ':') ||
3093 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3094 ((c >= 0xC0) && (c <= 0xD6)) ||
3095 ((c >= 0xD8) && (c <= 0xF6)) ||
3096 ((c >= 0xF8) && (c <= 0x2FF)) ||
3097 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3098 ((c >= 0x370) && (c <= 0x37D)) ||
3099 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 ((c >= 0x200C) && (c <= 0x200D)) ||
3101 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3102 ((c >= 0x2070) && (c <= 0x218F)) ||
3103 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3104 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3105 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3106 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3107 ((c >= 0x10000) && (c <= 0xEFFFF))
3108 )) {
3109 if (count++ > 100) {
3110 count = 0;
3111 GROW;
3112 if (ctxt->instate == XML_PARSER_EOF)
3113 return(NULL);
3115 len += l;
3116 NEXTL(l);
3117 c = CUR_CHAR(l);
3119 } else {
3120 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3121 (!IS_LETTER(c) && (c != '_') &&
3122 (c != ':'))) {
3123 return(NULL);
3125 len += l;
3126 NEXTL(l);
3127 c = CUR_CHAR(l);
3129 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3130 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
3132 (c == '_') || (c == ':') ||
3133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))) {
3135 if (count++ > 100) {
3136 count = 0;
3137 GROW;
3138 if (ctxt->instate == XML_PARSER_EOF)
3139 return(NULL);
3141 len += l;
3142 NEXTL(l);
3143 c = CUR_CHAR(l);
3146 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3147 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3148 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3152 * xmlParseName:
3153 * @ctxt: an XML parser context
3155 * parse an XML name.
3157 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3158 * CombiningChar | Extender
3160 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3162 * [6] Names ::= Name (#x20 Name)*
3164 * Returns the Name parsed or NULL
3167 const xmlChar *
3168 xmlParseName(xmlParserCtxtPtr ctxt) {
3169 const xmlChar *in;
3170 const xmlChar *ret;
3171 int count = 0;
3173 GROW;
3175 #ifdef DEBUG
3176 nbParseName++;
3177 #endif
3180 * Accelerator for simple ASCII names
3182 in = ctxt->input->cur;
3183 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3184 ((*in >= 0x41) && (*in <= 0x5A)) ||
3185 (*in == '_') || (*in == ':')) {
3186 in++;
3187 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3188 ((*in >= 0x41) && (*in <= 0x5A)) ||
3189 ((*in >= 0x30) && (*in <= 0x39)) ||
3190 (*in == '_') || (*in == '-') ||
3191 (*in == ':') || (*in == '.'))
3192 in++;
3193 if ((*in > 0) && (*in < 0x80)) {
3194 count = in - ctxt->input->cur;
3195 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3196 ctxt->input->cur = in;
3197 ctxt->nbChars += count;
3198 ctxt->input->col += count;
3199 if (ret == NULL)
3200 xmlErrMemory(ctxt, NULL);
3201 return(ret);
3204 /* accelerator for special cases */
3205 return(xmlParseNameComplex(ctxt));
3208 static const xmlChar *
3209 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3210 int len = 0, l;
3211 int c;
3212 int count = 0;
3214 #ifdef DEBUG
3215 nbParseNCNameComplex++;
3216 #endif
3219 * Handler for more complex cases
3221 GROW;
3222 c = CUR_CHAR(l);
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3225 return(NULL);
3228 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3229 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3230 if (count++ > 100) {
3231 count = 0;
3232 GROW;
3233 if (ctxt->instate == XML_PARSER_EOF)
3234 return(NULL);
3236 len += l;
3237 NEXTL(l);
3238 c = CUR_CHAR(l);
3240 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3244 * xmlParseNCName:
3245 * @ctxt: an XML parser context
3246 * @len: lenght of the string parsed
3248 * parse an XML name.
3250 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3251 * CombiningChar | Extender
3253 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3255 * Returns the Name parsed or NULL
3258 static const xmlChar *
3259 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3260 const xmlChar *in;
3261 const xmlChar *ret;
3262 int count = 0;
3264 #ifdef DEBUG
3265 nbParseNCName++;
3266 #endif
3269 * Accelerator for simple ASCII names
3271 in = ctxt->input->cur;
3272 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3273 ((*in >= 0x41) && (*in <= 0x5A)) ||
3274 (*in == '_')) {
3275 in++;
3276 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3277 ((*in >= 0x41) && (*in <= 0x5A)) ||
3278 ((*in >= 0x30) && (*in <= 0x39)) ||
3279 (*in == '_') || (*in == '-') ||
3280 (*in == '.'))
3281 in++;
3282 if ((*in > 0) && (*in < 0x80)) {
3283 count = in - ctxt->input->cur;
3284 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3285 ctxt->input->cur = in;
3286 ctxt->nbChars += count;
3287 ctxt->input->col += count;
3288 if (ret == NULL) {
3289 xmlErrMemory(ctxt, NULL);
3291 return(ret);
3294 return(xmlParseNCNameComplex(ctxt));
3298 * xmlParseNameAndCompare:
3299 * @ctxt: an XML parser context
3301 * parse an XML name and compares for match
3302 * (specialized for endtag parsing)
3304 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3305 * and the name for mismatch
3308 static const xmlChar *
3309 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3310 register const xmlChar *cmp = other;
3311 register const xmlChar *in;
3312 const xmlChar *ret;
3314 GROW;
3315 if (ctxt->instate == XML_PARSER_EOF)
3316 return(NULL);
3318 in = ctxt->input->cur;
3319 while (*in != 0 && *in == *cmp) {
3320 ++in;
3321 ++cmp;
3322 ctxt->input->col++;
3324 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3325 /* success */
3326 ctxt->input->cur = in;
3327 return (const xmlChar*) 1;
3329 /* failure (or end of input buffer), check with full function */
3330 ret = xmlParseName (ctxt);
3331 /* strings coming from the dictionnary direct compare possible */
3332 if (ret == other) {
3333 return (const xmlChar*) 1;
3335 return ret;
3339 * xmlParseStringName:
3340 * @ctxt: an XML parser context
3341 * @str: a pointer to the string pointer (IN/OUT)
3343 * parse an XML name.
3345 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3346 * CombiningChar | Extender
3348 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3350 * [6] Names ::= Name (#x20 Name)*
3352 * Returns the Name parsed or NULL. The @str pointer
3353 * is updated to the current location in the string.
3356 static xmlChar *
3357 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3358 xmlChar buf[XML_MAX_NAMELEN + 5];
3359 const xmlChar *cur = *str;
3360 int len = 0, l;
3361 int c;
3363 #ifdef DEBUG
3364 nbParseStringName++;
3365 #endif
3367 c = CUR_SCHAR(cur, l);
3368 if (!xmlIsNameStartChar(ctxt, c)) {
3369 return(NULL);
3372 COPY_BUF(l,buf,len,c);
3373 cur += l;
3374 c = CUR_SCHAR(cur, l);
3375 while (xmlIsNameChar(ctxt, c)) {
3376 COPY_BUF(l,buf,len,c);
3377 cur += l;
3378 c = CUR_SCHAR(cur, l);
3379 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3381 * Okay someone managed to make a huge name, so he's ready to pay
3382 * for the processing speed.
3384 xmlChar *buffer;
3385 int max = len * 2;
3387 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3388 if (buffer == NULL) {
3389 xmlErrMemory(ctxt, NULL);
3390 return(NULL);
3392 memcpy(buffer, buf, len);
3393 while (xmlIsNameChar(ctxt, c)) {
3394 if (len + 10 > max) {
3395 xmlChar *tmp;
3396 max *= 2;
3397 tmp = (xmlChar *) xmlRealloc(buffer,
3398 max * sizeof(xmlChar));
3399 if (tmp == NULL) {
3400 xmlErrMemory(ctxt, NULL);
3401 xmlFree(buffer);
3402 return(NULL);
3404 buffer = tmp;
3406 COPY_BUF(l,buffer,len,c);
3407 cur += l;
3408 c = CUR_SCHAR(cur, l);
3410 buffer[len] = 0;
3411 *str = cur;
3412 return(buffer);
3415 *str = cur;
3416 return(xmlStrndup(buf, len));
3420 * xmlParseNmtoken:
3421 * @ctxt: an XML parser context
3423 * parse an XML Nmtoken.
3425 * [7] Nmtoken ::= (NameChar)+
3427 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3429 * Returns the Nmtoken parsed or NULL
3432 xmlChar *
3433 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3434 xmlChar buf[XML_MAX_NAMELEN + 5];
3435 int len = 0, l;
3436 int c;
3437 int count = 0;
3439 #ifdef DEBUG
3440 nbParseNmToken++;
3441 #endif
3443 GROW;
3444 if (ctxt->instate == XML_PARSER_EOF)
3445 return(NULL);
3446 c = CUR_CHAR(l);
3448 while (xmlIsNameChar(ctxt, c)) {
3449 if (count++ > 100) {
3450 count = 0;
3451 GROW;
3453 COPY_BUF(l,buf,len,c);
3454 NEXTL(l);
3455 c = CUR_CHAR(l);
3456 if (len >= XML_MAX_NAMELEN) {
3458 * Okay someone managed to make a huge token, so he's ready to pay
3459 * for the processing speed.
3461 xmlChar *buffer;
3462 int max = len * 2;
3464 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3465 if (buffer == NULL) {
3466 xmlErrMemory(ctxt, NULL);
3467 return(NULL);
3469 memcpy(buffer, buf, len);
3470 while (xmlIsNameChar(ctxt, c)) {
3471 if (count++ > 100) {
3472 count = 0;
3473 GROW;
3474 if (ctxt->instate == XML_PARSER_EOF) {
3475 xmlFree(buffer);
3476 return(NULL);
3479 if (len + 10 > max) {
3480 xmlChar *tmp;
3482 max *= 2;
3483 tmp = (xmlChar *) xmlRealloc(buffer,
3484 max * sizeof(xmlChar));
3485 if (tmp == NULL) {
3486 xmlErrMemory(ctxt, NULL);
3487 xmlFree(buffer);
3488 return(NULL);
3490 buffer = tmp;
3492 COPY_BUF(l,buffer,len,c);
3493 NEXTL(l);
3494 c = CUR_CHAR(l);
3496 buffer[len] = 0;
3497 return(buffer);
3500 if (len == 0)
3501 return(NULL);
3502 return(xmlStrndup(buf, len));
3506 * xmlParseEntityValue:
3507 * @ctxt: an XML parser context
3508 * @orig: if non-NULL store a copy of the original entity value
3510 * parse a value for ENTITY declarations
3512 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3513 * "'" ([^%&'] | PEReference | Reference)* "'"
3515 * Returns the EntityValue parsed with reference substituted or NULL
3518 xmlChar *
3519 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3520 xmlChar *buf = NULL;
3521 int len = 0;
3522 int size = XML_PARSER_BUFFER_SIZE;
3523 int c, l;
3524 xmlChar stop;
3525 xmlChar *ret = NULL;
3526 const xmlChar *cur = NULL;
3527 xmlParserInputPtr input;
3529 if (RAW == '"') stop = '"';
3530 else if (RAW == '\'') stop = '\'';
3531 else {
3532 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3533 return(NULL);
3535 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3536 if (buf == NULL) {
3537 xmlErrMemory(ctxt, NULL);
3538 return(NULL);
3542 * The content of the entity definition is copied in a buffer.
3545 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3546 input = ctxt->input;
3547 GROW;
3548 if (ctxt->instate == XML_PARSER_EOF) {
3549 xmlFree(buf);
3550 return(NULL);
3552 NEXT;
3553 c = CUR_CHAR(l);
3555 * NOTE: 4.4.5 Included in Literal
3556 * When a parameter entity reference appears in a literal entity
3557 * value, ... a single or double quote character in the replacement
3558 * text is always treated as a normal data character and will not
3559 * terminate the literal.
3560 * In practice it means we stop the loop only when back at parsing
3561 * the initial entity and the quote is found
3563 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3564 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3565 if (len + 5 >= size) {
3566 xmlChar *tmp;
3568 size *= 2;
3569 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3570 if (tmp == NULL) {
3571 xmlErrMemory(ctxt, NULL);
3572 xmlFree(buf);
3573 return(NULL);
3575 buf = tmp;
3577 COPY_BUF(l,buf,len,c);
3578 NEXTL(l);
3580 * Pop-up of finished entities.
3582 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3583 xmlPopInput(ctxt);
3585 GROW;
3586 c = CUR_CHAR(l);
3587 if (c == 0) {
3588 GROW;
3589 c = CUR_CHAR(l);
3592 buf[len] = 0;
3593 if (ctxt->instate == XML_PARSER_EOF) {
3594 xmlFree(buf);
3595 return(NULL);
3599 * Raise problem w.r.t. '&' and '%' being used in non-entities
3600 * reference constructs. Note Charref will be handled in
3601 * xmlStringDecodeEntities()
3603 cur = buf;
3604 while (*cur != 0) { /* non input consuming */
3605 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3606 xmlChar *name;
3607 xmlChar tmp = *cur;
3609 cur++;
3610 name = xmlParseStringName(ctxt, &cur);
3611 if ((name == NULL) || (*cur != ';')) {
3612 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3613 "EntityValue: '%c' forbidden except for entities references\n",
3614 tmp);
3616 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3617 (ctxt->inputNr == 1)) {
3618 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3620 if (name != NULL)
3621 xmlFree(name);
3622 if (*cur == 0)
3623 break;
3625 cur++;
3629 * Then PEReference entities are substituted.
3631 if (c != stop) {
3632 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3633 xmlFree(buf);
3634 } else {
3635 NEXT;
3637 * NOTE: 4.4.7 Bypassed
3638 * When a general entity reference appears in the EntityValue in
3639 * an entity declaration, it is bypassed and left as is.
3640 * so XML_SUBSTITUTE_REF is not set here.
3642 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3643 0, 0, 0);
3644 if (orig != NULL)
3645 *orig = buf;
3646 else
3647 xmlFree(buf);
3650 return(ret);
3654 * xmlParseAttValueComplex:
3655 * @ctxt: an XML parser context
3656 * @len: the resulting attribute len
3657 * @normalize: wether to apply the inner normalization
3659 * parse a value for an attribute, this is the fallback function
3660 * of xmlParseAttValue() when the attribute parsing requires handling
3661 * of non-ASCII characters, or normalization compaction.
3663 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3665 static xmlChar *
3666 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3667 xmlChar limit = 0;
3668 xmlChar *buf = NULL;
3669 xmlChar *rep = NULL;
3670 int len = 0;
3671 int buf_size = 0;
3672 int c, l, in_space = 0;
3673 xmlChar *current = NULL;
3674 xmlEntityPtr ent;
3676 if (NXT(0) == '"') {
3677 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3678 limit = '"';
3679 NEXT;
3680 } else if (NXT(0) == '\'') {
3681 limit = '\'';
3682 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3683 NEXT;
3684 } else {
3685 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3686 return(NULL);
3690 * allocate a translation buffer.
3692 buf_size = XML_PARSER_BUFFER_SIZE;
3693 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3694 if (buf == NULL) goto mem_error;
3697 * OK loop until we reach one of the ending char or a size limit.
3699 c = CUR_CHAR(l);
3700 while (((NXT(0) != limit) && /* checked */
3701 (IS_CHAR(c)) && (c != '<')) &&
3702 (ctxt->instate != XML_PARSER_EOF)) {
3703 if (c == 0) break;
3704 if (c == '&') {
3705 in_space = 0;
3706 if (NXT(1) == '#') {
3707 int val = xmlParseCharRef(ctxt);
3709 if (val == '&') {
3710 if (ctxt->replaceEntities) {
3711 if (len > buf_size - 10) {
3712 growBuffer(buf, 10);
3714 buf[len++] = '&';
3715 } else {
3717 * The reparsing will be done in xmlStringGetNodeList()
3718 * called by the attribute() function in SAX.c
3720 if (len > buf_size - 10) {
3721 growBuffer(buf, 10);
3723 buf[len++] = '&';
3724 buf[len++] = '#';
3725 buf[len++] = '3';
3726 buf[len++] = '8';
3727 buf[len++] = ';';
3729 } else if (val != 0) {
3730 if (len > buf_size - 10) {
3731 growBuffer(buf, 10);
3733 len += xmlCopyChar(0, &buf[len], val);
3735 } else {
3736 ent = xmlParseEntityRef(ctxt);
3737 ctxt->nbentities++;
3738 if (ent != NULL)
3739 ctxt->nbentities += ent->owner;
3740 if ((ent != NULL) &&
3741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3742 if (len > buf_size - 10) {
3743 growBuffer(buf, 10);
3745 if ((ctxt->replaceEntities == 0) &&
3746 (ent->content[0] == '&')) {
3747 buf[len++] = '&';
3748 buf[len++] = '#';
3749 buf[len++] = '3';
3750 buf[len++] = '8';
3751 buf[len++] = ';';
3752 } else {
3753 buf[len++] = ent->content[0];
3755 } else if ((ent != NULL) &&
3756 (ctxt->replaceEntities != 0)) {
3757 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3758 rep = xmlStringDecodeEntities(ctxt, ent->content,
3759 XML_SUBSTITUTE_REF,
3760 0, 0, 0);
3761 if (rep != NULL) {
3762 current = rep;
3763 while (*current != 0) { /* non input consuming */
3764 if ((*current == 0xD) || (*current == 0xA) ||
3765 (*current == 0x9)) {
3766 buf[len++] = 0x20;
3767 current++;
3768 } else
3769 buf[len++] = *current++;
3770 if (len > buf_size - 10) {
3771 growBuffer(buf, 10);
3774 xmlFree(rep);
3775 rep = NULL;
3777 } else {
3778 if (len > buf_size - 10) {
3779 growBuffer(buf, 10);
3781 if (ent->content != NULL)
3782 buf[len++] = ent->content[0];
3784 } else if (ent != NULL) {
3785 int i = xmlStrlen(ent->name);
3786 const xmlChar *cur = ent->name;
3789 * This may look absurd but is needed to detect
3790 * entities problems
3792 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3793 (ent->content != NULL)) {
3794 rep = xmlStringDecodeEntities(ctxt, ent->content,
3795 XML_SUBSTITUTE_REF, 0, 0, 0);
3796 if (rep != NULL) {
3797 xmlFree(rep);
3798 rep = NULL;
3803 * Just output the reference
3805 buf[len++] = '&';
3806 while (len > buf_size - i - 10) {
3807 growBuffer(buf, i + 10);
3809 for (;i > 0;i--)
3810 buf[len++] = *cur++;
3811 buf[len++] = ';';
3814 } else {
3815 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3816 if ((len != 0) || (!normalize)) {
3817 if ((!normalize) || (!in_space)) {
3818 COPY_BUF(l,buf,len,0x20);
3819 while (len > buf_size - 10) {
3820 growBuffer(buf, 10);
3823 in_space = 1;
3825 } else {
3826 in_space = 0;
3827 COPY_BUF(l,buf,len,c);
3828 if (len > buf_size - 10) {
3829 growBuffer(buf, 10);
3832 NEXTL(l);
3834 GROW;
3835 c = CUR_CHAR(l);
3837 if (ctxt->instate == XML_PARSER_EOF)
3838 goto error;
3840 if ((in_space) && (normalize)) {
3841 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3843 buf[len] = 0;
3844 if (RAW == '<') {
3845 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3846 } else if (RAW != limit) {
3847 if ((c != 0) && (!IS_CHAR(c))) {
3848 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3849 "invalid character in attribute value\n");
3850 } else {
3851 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3852 "AttValue: ' expected\n");
3854 } else
3855 NEXT;
3856 if (attlen != NULL) *attlen = len;
3857 return(buf);
3859 mem_error:
3860 xmlErrMemory(ctxt, NULL);
3861 error:
3862 if (buf != NULL)
3863 xmlFree(buf);
3864 if (rep != NULL)
3865 xmlFree(rep);
3866 return(NULL);
3870 * xmlParseAttValue:
3871 * @ctxt: an XML parser context
3873 * parse a value for an attribute
3874 * Note: the parser won't do substitution of entities here, this
3875 * will be handled later in xmlStringGetNodeList
3877 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3878 * "'" ([^<&'] | Reference)* "'"
3880 * 3.3.3 Attribute-Value Normalization:
3881 * Before the value of an attribute is passed to the application or
3882 * checked for validity, the XML processor must normalize it as follows:
3883 * - a character reference is processed by appending the referenced
3884 * character to the attribute value
3885 * - an entity reference is processed by recursively processing the
3886 * replacement text of the entity
3887 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3888 * appending #x20 to the normalized value, except that only a single
3889 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3890 * parsed entity or the literal entity value of an internal parsed entity
3891 * - other characters are processed by appending them to the normalized value
3892 * If the declared value is not CDATA, then the XML processor must further
3893 * process the normalized attribute value by discarding any leading and
3894 * trailing space (#x20) characters, and by replacing sequences of space
3895 * (#x20) characters by a single space (#x20) character.
3896 * All attributes for which no declaration has been read should be treated
3897 * by a non-validating parser as if declared CDATA.
3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3903 xmlChar *
3904 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3905 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3906 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3910 * xmlParseSystemLiteral:
3911 * @ctxt: an XML parser context
3913 * parse an XML Literal
3915 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3917 * Returns the SystemLiteral parsed or NULL
3920 xmlChar *
3921 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3922 xmlChar *buf = NULL;
3923 int len = 0;
3924 int size = XML_PARSER_BUFFER_SIZE;
3925 int cur, l;
3926 xmlChar stop;
3927 int state = ctxt->instate;
3928 int count = 0;
3930 SHRINK;
3931 if (RAW == '"') {
3932 NEXT;
3933 stop = '"';
3934 } else if (RAW == '\'') {
3935 NEXT;
3936 stop = '\'';
3937 } else {
3938 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3939 return(NULL);
3942 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3943 if (buf == NULL) {
3944 xmlErrMemory(ctxt, NULL);
3945 return(NULL);
3947 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3948 cur = CUR_CHAR(l);
3949 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3950 if (len + 5 >= size) {
3951 xmlChar *tmp;
3953 size *= 2;
3954 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3955 if (tmp == NULL) {
3956 xmlFree(buf);
3957 xmlErrMemory(ctxt, NULL);
3958 ctxt->instate = (xmlParserInputState) state;
3959 return(NULL);
3961 buf = tmp;
3963 count++;
3964 if (count > 50) {
3965 GROW;
3966 count = 0;
3967 if (ctxt->instate == XML_PARSER_EOF) {
3968 xmlFree(buf);
3969 return(NULL);
3972 COPY_BUF(l,buf,len,cur);
3973 NEXTL(l);
3974 cur = CUR_CHAR(l);
3975 if (cur == 0) {
3976 GROW;
3977 SHRINK;
3978 cur = CUR_CHAR(l);
3981 buf[len] = 0;
3982 ctxt->instate = (xmlParserInputState) state;
3983 if (!IS_CHAR(cur)) {
3984 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3985 } else {
3986 NEXT;
3988 return(buf);
3992 * xmlParsePubidLiteral:
3993 * @ctxt: an XML parser context
3995 * parse an XML public literal
3997 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3999 * Returns the PubidLiteral parsed or NULL.
4002 xmlChar *
4003 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4004 xmlChar *buf = NULL;
4005 int len = 0;
4006 int size = XML_PARSER_BUFFER_SIZE;
4007 xmlChar cur;
4008 xmlChar stop;
4009 int count = 0;
4010 xmlParserInputState oldstate = ctxt->instate;
4012 SHRINK;
4013 if (RAW == '"') {
4014 NEXT;
4015 stop = '"';
4016 } else if (RAW == '\'') {
4017 NEXT;
4018 stop = '\'';
4019 } else {
4020 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4021 return(NULL);
4023 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4024 if (buf == NULL) {
4025 xmlErrMemory(ctxt, NULL);
4026 return(NULL);
4028 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4029 cur = CUR;
4030 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4031 if (len + 1 >= size) {
4032 xmlChar *tmp;
4034 size *= 2;
4035 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4036 if (tmp == NULL) {
4037 xmlErrMemory(ctxt, NULL);
4038 xmlFree(buf);
4039 return(NULL);
4041 buf = tmp;
4043 buf[len++] = cur;
4044 count++;
4045 if (count > 50) {
4046 GROW;
4047 count = 0;
4048 if (ctxt->instate == XML_PARSER_EOF) {
4049 xmlFree(buf);
4050 return(NULL);
4053 NEXT;
4054 cur = CUR;
4055 if (cur == 0) {
4056 GROW;
4057 SHRINK;
4058 cur = CUR;
4061 buf[len] = 0;
4062 if (cur != stop) {
4063 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064 } else {
4065 NEXT;
4067 ctxt->instate = oldstate;
4068 return(buf);
4071 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4074 * used for the test in the inner loop of the char data testing
4076 static const unsigned char test_char_data[256] = {
4077 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4078 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4079 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4080 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4081 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4082 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4083 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4084 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4085 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4086 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4087 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4088 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4089 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4090 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4091 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4092 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4093 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4094 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4095 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4096 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4097 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4098 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4099 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4100 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4101 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4102 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4103 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4104 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4105 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4106 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4107 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4108 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4112 * xmlParseCharData:
4113 * @ctxt: an XML parser context
4114 * @cdata: int indicating whether we are within a CDATA section
4116 * parse a CharData section.
4117 * if we are within a CDATA section ']]>' marks an end of section.
4119 * The right angle bracket (>) may be represented using the string "&gt;",
4120 * and must, for compatibility, be escaped using "&gt;" or a character
4121 * reference when it appears in the string "]]>" in content, when that
4122 * string is not marking the end of a CDATA section.
4124 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4127 void
4128 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4129 const xmlChar *in;
4130 int nbchar = 0;
4131 int line = ctxt->input->line;
4132 int col = ctxt->input->col;
4133 int ccol;
4135 SHRINK;
4136 GROW;
4138 * Accelerated common case where input don't need to be
4139 * modified before passing it to the handler.
4141 if (!cdata) {
4142 in = ctxt->input->cur;
4143 do {
4144 get_more_space:
4145 while (*in == 0x20) { in++; ctxt->input->col++; }
4146 if (*in == 0xA) {
4147 do {
4148 ctxt->input->line++; ctxt->input->col = 1;
4149 in++;
4150 } while (*in == 0xA);
4151 goto get_more_space;
4153 if (*in == '<') {
4154 nbchar = in - ctxt->input->cur;
4155 if (nbchar > 0) {
4156 const xmlChar *tmp = ctxt->input->cur;
4157 ctxt->input->cur = in;
4159 if ((ctxt->sax != NULL) &&
4160 (ctxt->sax->ignorableWhitespace !=
4161 ctxt->sax->characters)) {
4162 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4163 if (ctxt->sax->ignorableWhitespace != NULL)
4164 ctxt->sax->ignorableWhitespace(ctxt->userData,
4165 tmp, nbchar);
4166 } else {
4167 if (ctxt->sax->characters != NULL)
4168 ctxt->sax->characters(ctxt->userData,
4169 tmp, nbchar);
4170 if (*ctxt->space == -1)
4171 *ctxt->space = -2;
4173 } else if ((ctxt->sax != NULL) &&
4174 (ctxt->sax->characters != NULL)) {
4175 ctxt->sax->characters(ctxt->userData,
4176 tmp, nbchar);
4179 return;
4182 get_more:
4183 ccol = ctxt->input->col;
4184 while (test_char_data[*in]) {
4185 in++;
4186 ccol++;
4188 ctxt->input->col = ccol;
4189 if (*in == 0xA) {
4190 do {
4191 ctxt->input->line++; ctxt->input->col = 1;
4192 in++;
4193 } while (*in == 0xA);
4194 goto get_more;
4196 if (*in == ']') {
4197 if ((in[1] == ']') && (in[2] == '>')) {
4198 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4199 ctxt->input->cur = in;
4200 return;
4202 in++;
4203 ctxt->input->col++;
4204 goto get_more;
4206 nbchar = in - ctxt->input->cur;
4207 if (nbchar > 0) {
4208 if ((ctxt->sax != NULL) &&
4209 (ctxt->sax->ignorableWhitespace !=
4210 ctxt->sax->characters) &&
4211 (IS_BLANK_CH(*ctxt->input->cur))) {
4212 const xmlChar *tmp = ctxt->input->cur;
4213 ctxt->input->cur = in;
4215 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4216 if (ctxt->sax->ignorableWhitespace != NULL)
4217 ctxt->sax->ignorableWhitespace(ctxt->userData,
4218 tmp, nbchar);
4219 } else {
4220 if (ctxt->sax->characters != NULL)
4221 ctxt->sax->characters(ctxt->userData,
4222 tmp, nbchar);
4223 if (*ctxt->space == -1)
4224 *ctxt->space = -2;
4226 line = ctxt->input->line;
4227 col = ctxt->input->col;
4228 } else if (ctxt->sax != NULL) {
4229 if (ctxt->sax->characters != NULL)
4230 ctxt->sax->characters(ctxt->userData,
4231 ctxt->input->cur, nbchar);
4232 line = ctxt->input->line;
4233 col = ctxt->input->col;
4235 /* something really bad happened in the SAX callback */
4236 if (ctxt->instate != XML_PARSER_CONTENT)
4237 return;
4239 ctxt->input->cur = in;
4240 if (*in == 0xD) {
4241 in++;
4242 if (*in == 0xA) {
4243 ctxt->input->cur = in;
4244 in++;
4245 ctxt->input->line++; ctxt->input->col = 1;
4246 continue; /* while */
4248 in--;
4250 if (*in == '<') {
4251 return;
4253 if (*in == '&') {
4254 return;
4256 SHRINK;
4257 GROW;
4258 if (ctxt->instate == XML_PARSER_EOF)
4259 return;
4260 in = ctxt->input->cur;
4261 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4262 nbchar = 0;
4264 ctxt->input->line = line;
4265 ctxt->input->col = col;
4266 xmlParseCharDataComplex(ctxt, cdata);
4270 * xmlParseCharDataComplex:
4271 * @ctxt: an XML parser context
4272 * @cdata: int indicating whether we are within a CDATA section
4274 * parse a CharData section.this is the fallback function
4275 * of xmlParseCharData() when the parsing requires handling
4276 * of non-ASCII characters.
4278 static void
4279 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4280 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4281 int nbchar = 0;
4282 int cur, l;
4283 int count = 0;
4285 SHRINK;
4286 GROW;
4287 cur = CUR_CHAR(l);
4288 while ((cur != '<') && /* checked */
4289 (cur != '&') &&
4290 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4291 if ((cur == ']') && (NXT(1) == ']') &&
4292 (NXT(2) == '>')) {
4293 if (cdata) break;
4294 else {
4295 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4298 COPY_BUF(l,buf,nbchar,cur);
4299 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4300 buf[nbchar] = 0;
4303 * OK the segment is to be consumed as chars.
4305 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4306 if (areBlanks(ctxt, buf, nbchar, 0)) {
4307 if (ctxt->sax->ignorableWhitespace != NULL)
4308 ctxt->sax->ignorableWhitespace(ctxt->userData,
4309 buf, nbchar);
4310 } else {
4311 if (ctxt->sax->characters != NULL)
4312 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4313 if ((ctxt->sax->characters !=
4314 ctxt->sax->ignorableWhitespace) &&
4315 (*ctxt->space == -1))
4316 *ctxt->space = -2;
4319 nbchar = 0;
4320 /* something really bad happened in the SAX callback */
4321 if (ctxt->instate != XML_PARSER_CONTENT)
4322 return;
4324 count++;
4325 if (count > 50) {
4326 GROW;
4327 count = 0;
4328 if (ctxt->instate == XML_PARSER_EOF)
4329 return;
4331 NEXTL(l);
4332 cur = CUR_CHAR(l);
4334 if (nbchar != 0) {
4335 buf[nbchar] = 0;
4337 * OK the segment is to be consumed as chars.
4339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4340 if (areBlanks(ctxt, buf, nbchar, 0)) {
4341 if (ctxt->sax->ignorableWhitespace != NULL)
4342 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4343 } else {
4344 if (ctxt->sax->characters != NULL)
4345 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4346 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4347 (*ctxt->space == -1))
4348 *ctxt->space = -2;
4352 if ((cur != 0) && (!IS_CHAR(cur))) {
4353 /* Generate the error and skip the offending character */
4354 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4355 "PCDATA invalid Char value %d\n",
4356 cur);
4357 NEXTL(l);
4362 * xmlParseExternalID:
4363 * @ctxt: an XML parser context
4364 * @publicID: a xmlChar** receiving PubidLiteral
4365 * @strict: indicate whether we should restrict parsing to only
4366 * production [75], see NOTE below
4368 * Parse an External ID or a Public ID
4370 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4371 * 'PUBLIC' S PubidLiteral S SystemLiteral
4373 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4374 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4376 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4378 * Returns the function returns SystemLiteral and in the second
4379 * case publicID receives PubidLiteral, is strict is off
4380 * it is possible to return NULL and have publicID set.
4383 xmlChar *
4384 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4385 xmlChar *URI = NULL;
4387 SHRINK;
4389 *publicID = NULL;
4390 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4391 SKIP(6);
4392 if (!IS_BLANK_CH(CUR)) {
4393 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4394 "Space required after 'SYSTEM'\n");
4396 SKIP_BLANKS;
4397 URI = xmlParseSystemLiteral(ctxt);
4398 if (URI == NULL) {
4399 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4401 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4402 SKIP(6);
4403 if (!IS_BLANK_CH(CUR)) {
4404 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4405 "Space required after 'PUBLIC'\n");
4407 SKIP_BLANKS;
4408 *publicID = xmlParsePubidLiteral(ctxt);
4409 if (*publicID == NULL) {
4410 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4412 if (strict) {
4414 * We don't handle [83] so "S SystemLiteral" is required.
4416 if (!IS_BLANK_CH(CUR)) {
4417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4418 "Space required after the Public Identifier\n");
4420 } else {
4422 * We handle [83] so we return immediately, if
4423 * "S SystemLiteral" is not detected. From a purely parsing
4424 * point of view that's a nice mess.
4426 const xmlChar *ptr;
4427 GROW;
4429 ptr = CUR_PTR;
4430 if (!IS_BLANK_CH(*ptr)) return(NULL);
4432 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4433 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4435 SKIP_BLANKS;
4436 URI = xmlParseSystemLiteral(ctxt);
4437 if (URI == NULL) {
4438 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4441 return(URI);
4445 * xmlParseCommentComplex:
4446 * @ctxt: an XML parser context
4447 * @buf: the already parsed part of the buffer
4448 * @len: number of bytes filles in the buffer
4449 * @size: allocated size of the buffer
4451 * Skip an XML (SGML) comment <!-- .... -->
4452 * The spec says that "For compatibility, the string "--" (double-hyphen)
4453 * must not occur within comments. "
4454 * This is the slow routine in case the accelerator for ascii didn't work
4456 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4458 static void
4459 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4460 int q, ql;
4461 int r, rl;
4462 int cur, l;
4463 int count = 0;
4464 int inputid;
4466 inputid = ctxt->input->id;
4468 if (buf == NULL) {
4469 len = 0;
4470 size = XML_PARSER_BUFFER_SIZE;
4471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4472 if (buf == NULL) {
4473 xmlErrMemory(ctxt, NULL);
4474 return;
4477 GROW; /* Assure there's enough input data */
4478 q = CUR_CHAR(ql);
4479 if (q == 0)
4480 goto not_terminated;
4481 if (!IS_CHAR(q)) {
4482 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4483 "xmlParseComment: invalid xmlChar value %d\n",
4485 xmlFree (buf);
4486 return;
4488 NEXTL(ql);
4489 r = CUR_CHAR(rl);
4490 if (r == 0)
4491 goto not_terminated;
4492 if (!IS_CHAR(r)) {
4493 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4494 "xmlParseComment: invalid xmlChar value %d\n",
4496 xmlFree (buf);
4497 return;
4499 NEXTL(rl);
4500 cur = CUR_CHAR(l);
4501 if (cur == 0)
4502 goto not_terminated;
4503 while (IS_CHAR(cur) && /* checked */
4504 ((cur != '>') ||
4505 (r != '-') || (q != '-'))) {
4506 if ((r == '-') && (q == '-')) {
4507 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4509 if (len + 5 >= size) {
4510 xmlChar *new_buf;
4511 size *= 2;
4512 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4513 if (new_buf == NULL) {
4514 xmlFree (buf);
4515 xmlErrMemory(ctxt, NULL);
4516 return;
4518 buf = new_buf;
4520 COPY_BUF(ql,buf,len,q);
4521 q = r;
4522 ql = rl;
4523 r = cur;
4524 rl = l;
4526 count++;
4527 if (count > 50) {
4528 GROW;
4529 count = 0;
4530 if (ctxt->instate == XML_PARSER_EOF) {
4531 xmlFree(buf);
4532 return;
4535 NEXTL(l);
4536 cur = CUR_CHAR(l);
4537 if (cur == 0) {
4538 SHRINK;
4539 GROW;
4540 cur = CUR_CHAR(l);
4543 buf[len] = 0;
4544 if (cur == 0) {
4545 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4546 "Comment not terminated \n<!--%.50s\n", buf);
4547 } else if (!IS_CHAR(cur)) {
4548 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4549 "xmlParseComment: invalid xmlChar value %d\n",
4550 cur);
4551 } else {
4552 if (inputid != ctxt->input->id) {
4553 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4554 "Comment doesn't start and stop in the same entity\n");
4556 NEXT;
4557 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4558 (!ctxt->disableSAX))
4559 ctxt->sax->comment(ctxt->userData, buf);
4561 xmlFree(buf);
4562 return;
4563 not_terminated:
4564 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4565 "Comment not terminated\n", NULL);
4566 xmlFree(buf);
4567 return;
4571 * xmlParseComment:
4572 * @ctxt: an XML parser context
4574 * Skip an XML (SGML) comment <!-- .... -->
4575 * The spec says that "For compatibility, the string "--" (double-hyphen)
4576 * must not occur within comments. "
4578 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4580 void
4581 xmlParseComment(xmlParserCtxtPtr ctxt) {
4582 xmlChar *buf = NULL;
4583 int size = XML_PARSER_BUFFER_SIZE;
4584 int len = 0;
4585 xmlParserInputState state;
4586 const xmlChar *in;
4587 int nbchar = 0, ccol;
4588 int inputid;
4591 * Check that there is a comment right here.
4593 if ((RAW != '<') || (NXT(1) != '!') ||
4594 (NXT(2) != '-') || (NXT(3) != '-')) return;
4595 state = ctxt->instate;
4596 ctxt->instate = XML_PARSER_COMMENT;
4597 inputid = ctxt->input->id;
4598 SKIP(4);
4599 SHRINK;
4600 GROW;
4603 * Accelerated common case where input don't need to be
4604 * modified before passing it to the handler.
4606 in = ctxt->input->cur;
4607 do {
4608 if (*in == 0xA) {
4609 do {
4610 ctxt->input->line++; ctxt->input->col = 1;
4611 in++;
4612 } while (*in == 0xA);
4614 get_more:
4615 ccol = ctxt->input->col;
4616 while (((*in > '-') && (*in <= 0x7F)) ||
4617 ((*in >= 0x20) && (*in < '-')) ||
4618 (*in == 0x09)) {
4619 in++;
4620 ccol++;
4622 ctxt->input->col = ccol;
4623 if (*in == 0xA) {
4624 do {
4625 ctxt->input->line++; ctxt->input->col = 1;
4626 in++;
4627 } while (*in == 0xA);
4628 goto get_more;
4630 nbchar = in - ctxt->input->cur;
4632 * save current set of data
4634 if (nbchar > 0) {
4635 if ((ctxt->sax != NULL) &&
4636 (ctxt->sax->comment != NULL)) {
4637 if (buf == NULL) {
4638 if ((*in == '-') && (in[1] == '-'))
4639 size = nbchar + 1;
4640 else
4641 size = XML_PARSER_BUFFER_SIZE + nbchar;
4642 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4643 if (buf == NULL) {
4644 xmlErrMemory(ctxt, NULL);
4645 ctxt->instate = state;
4646 return;
4648 len = 0;
4649 } else if (len + nbchar + 1 >= size) {
4650 xmlChar *new_buf;
4651 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4652 new_buf = (xmlChar *) xmlRealloc(buf,
4653 size * sizeof(xmlChar));
4654 if (new_buf == NULL) {
4655 xmlFree (buf);
4656 xmlErrMemory(ctxt, NULL);
4657 ctxt->instate = state;
4658 return;
4660 buf = new_buf;
4662 memcpy(&buf[len], ctxt->input->cur, nbchar);
4663 len += nbchar;
4664 buf[len] = 0;
4667 ctxt->input->cur = in;
4668 if (*in == 0xA) {
4669 in++;
4670 ctxt->input->line++; ctxt->input->col = 1;
4672 if (*in == 0xD) {
4673 in++;
4674 if (*in == 0xA) {
4675 ctxt->input->cur = in;
4676 in++;
4677 ctxt->input->line++; ctxt->input->col = 1;
4678 continue; /* while */
4680 in--;
4682 SHRINK;
4683 GROW;
4684 if (ctxt->instate == XML_PARSER_EOF) {
4685 xmlFree(buf);
4686 return;
4688 in = ctxt->input->cur;
4689 if (*in == '-') {
4690 if (in[1] == '-') {
4691 if (in[2] == '>') {
4692 if (ctxt->input->id != inputid) {
4693 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4694 "comment doesn't start and stop in the same entity\n");
4696 SKIP(3);
4697 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4698 (!ctxt->disableSAX)) {
4699 if (buf != NULL)
4700 ctxt->sax->comment(ctxt->userData, buf);
4701 else
4702 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4704 if (buf != NULL)
4705 xmlFree(buf);
4706 if (ctxt->instate != XML_PARSER_EOF)
4707 ctxt->instate = state;
4708 return;
4710 if (buf != NULL)
4711 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4712 "Comment not terminated \n<!--%.50s\n",
4713 buf);
4714 else
4715 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4716 "Comment not terminated \n", NULL);
4717 in++;
4718 ctxt->input->col++;
4720 in++;
4721 ctxt->input->col++;
4722 goto get_more;
4724 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4725 xmlParseCommentComplex(ctxt, buf, len, size);
4726 ctxt->instate = state;
4727 return;
4732 * xmlParsePITarget:
4733 * @ctxt: an XML parser context
4735 * parse the name of a PI
4737 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4739 * Returns the PITarget name or NULL
4742 const xmlChar *
4743 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4744 const xmlChar *name;
4746 name = xmlParseName(ctxt);
4747 if ((name != NULL) &&
4748 ((name[0] == 'x') || (name[0] == 'X')) &&
4749 ((name[1] == 'm') || (name[1] == 'M')) &&
4750 ((name[2] == 'l') || (name[2] == 'L'))) {
4751 int i;
4752 if ((name[0] == 'x') && (name[1] == 'm') &&
4753 (name[2] == 'l') && (name[3] == 0)) {
4754 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4755 "XML declaration allowed only at the start of the document\n");
4756 return(name);
4757 } else if (name[3] == 0) {
4758 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4759 return(name);
4761 for (i = 0;;i++) {
4762 if (xmlW3CPIs[i] == NULL) break;
4763 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4764 return(name);
4766 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4767 "xmlParsePITarget: invalid name prefix 'xml'\n",
4768 NULL, NULL);
4770 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4771 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4772 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4774 return(name);
4777 #ifdef LIBXML_CATALOG_ENABLED
4779 * xmlParseCatalogPI:
4780 * @ctxt: an XML parser context
4781 * @catalog: the PI value string
4783 * parse an XML Catalog Processing Instruction.
4785 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4787 * Occurs only if allowed by the user and if happening in the Misc
4788 * part of the document before any doctype informations
4789 * This will add the given catalog to the parsing context in order
4790 * to be used if there is a resolution need further down in the document
4793 static void
4794 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4795 xmlChar *URL = NULL;
4796 const xmlChar *tmp, *base;
4797 xmlChar marker;
4799 tmp = catalog;
4800 while (IS_BLANK_CH(*tmp)) tmp++;
4801 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4802 goto error;
4803 tmp += 7;
4804 while (IS_BLANK_CH(*tmp)) tmp++;
4805 if (*tmp != '=') {
4806 return;
4808 tmp++;
4809 while (IS_BLANK_CH(*tmp)) tmp++;
4810 marker = *tmp;
4811 if ((marker != '\'') && (marker != '"'))
4812 goto error;
4813 tmp++;
4814 base = tmp;
4815 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4816 if (*tmp == 0)
4817 goto error;
4818 URL = xmlStrndup(base, tmp - base);
4819 tmp++;
4820 while (IS_BLANK_CH(*tmp)) tmp++;
4821 if (*tmp != 0)
4822 goto error;
4824 if (URL != NULL) {
4825 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4826 xmlFree(URL);
4828 return;
4830 error:
4831 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4832 "Catalog PI syntax error: %s\n",
4833 catalog, NULL);
4834 if (URL != NULL)
4835 xmlFree(URL);
4837 #endif
4840 * xmlParsePI:
4841 * @ctxt: an XML parser context
4843 * parse an XML Processing Instruction.
4845 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4847 * The processing is transfered to SAX once parsed.
4850 void
4851 xmlParsePI(xmlParserCtxtPtr ctxt) {
4852 xmlChar *buf = NULL;
4853 int len = 0;
4854 int size = XML_PARSER_BUFFER_SIZE;
4855 int cur, l;
4856 const xmlChar *target;
4857 xmlParserInputState state;
4858 int count = 0;
4860 if ((RAW == '<') && (NXT(1) == '?')) {
4861 xmlParserInputPtr input = ctxt->input;
4862 state = ctxt->instate;
4863 ctxt->instate = XML_PARSER_PI;
4865 * this is a Processing Instruction.
4867 SKIP(2);
4868 SHRINK;
4871 * Parse the target name and check for special support like
4872 * namespace.
4874 target = xmlParsePITarget(ctxt);
4875 if (target != NULL) {
4876 if ((RAW == '?') && (NXT(1) == '>')) {
4877 if (input != ctxt->input) {
4878 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4879 "PI declaration doesn't start and stop in the same entity\n");
4881 SKIP(2);
4884 * SAX: PI detected.
4886 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4887 (ctxt->sax->processingInstruction != NULL))
4888 ctxt->sax->processingInstruction(ctxt->userData,
4889 target, NULL);
4890 if (ctxt->instate != XML_PARSER_EOF)
4891 ctxt->instate = state;
4892 return;
4894 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4895 if (buf == NULL) {
4896 xmlErrMemory(ctxt, NULL);
4897 ctxt->instate = state;
4898 return;
4900 cur = CUR;
4901 if (!IS_BLANK(cur)) {
4902 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4903 "ParsePI: PI %s space expected\n", target);
4905 SKIP_BLANKS;
4906 cur = CUR_CHAR(l);
4907 while (IS_CHAR(cur) && /* checked */
4908 ((cur != '?') || (NXT(1) != '>'))) {
4909 if (len + 5 >= size) {
4910 xmlChar *tmp;
4912 size *= 2;
4913 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4914 if (tmp == NULL) {
4915 xmlErrMemory(ctxt, NULL);
4916 xmlFree(buf);
4917 ctxt->instate = state;
4918 return;
4920 buf = tmp;
4922 count++;
4923 if (count > 50) {
4924 GROW;
4925 if (ctxt->instate == XML_PARSER_EOF) {
4926 xmlFree(buf);
4927 return;
4929 count = 0;
4931 COPY_BUF(l,buf,len,cur);
4932 NEXTL(l);
4933 cur = CUR_CHAR(l);
4934 if (cur == 0) {
4935 SHRINK;
4936 GROW;
4937 cur = CUR_CHAR(l);
4940 buf[len] = 0;
4941 if (cur != '?') {
4942 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4943 "ParsePI: PI %s never end ...\n", target);
4944 } else {
4945 if (input != ctxt->input) {
4946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "PI declaration doesn't start and stop in the same entity\n");
4949 SKIP(2);
4951 #ifdef LIBXML_CATALOG_ENABLED
4952 if (((state == XML_PARSER_MISC) ||
4953 (state == XML_PARSER_START)) &&
4954 (xmlStrEqual(target, XML_CATALOG_PI))) {
4955 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4956 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4957 (allow == XML_CATA_ALLOW_ALL))
4958 xmlParseCatalogPI(ctxt, buf);
4960 #endif
4964 * SAX: PI detected.
4966 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4967 (ctxt->sax->processingInstruction != NULL))
4968 ctxt->sax->processingInstruction(ctxt->userData,
4969 target, buf);
4971 xmlFree(buf);
4972 } else {
4973 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4975 if (ctxt->instate != XML_PARSER_EOF)
4976 ctxt->instate = state;
4981 * xmlParseNotationDecl:
4982 * @ctxt: an XML parser context
4984 * parse a notation declaration
4986 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4988 * Hence there is actually 3 choices:
4989 * 'PUBLIC' S PubidLiteral
4990 * 'PUBLIC' S PubidLiteral S SystemLiteral
4991 * and 'SYSTEM' S SystemLiteral
4993 * See the NOTE on xmlParseExternalID().
4996 void
4997 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4998 const xmlChar *name;
4999 xmlChar *Pubid;
5000 xmlChar *Systemid;
5002 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5003 xmlParserInputPtr input = ctxt->input;
5004 SHRINK;
5005 SKIP(10);
5006 if (!IS_BLANK_CH(CUR)) {
5007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5008 "Space required after '<!NOTATION'\n");
5009 return;
5011 SKIP_BLANKS;
5013 name = xmlParseName(ctxt);
5014 if (name == NULL) {
5015 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5016 return;
5018 if (!IS_BLANK_CH(CUR)) {
5019 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5020 "Space required after the NOTATION name'\n");
5021 return;
5023 if (xmlStrchr(name, ':') != NULL) {
5024 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5025 "colon are forbidden from notation names '%s'\n",
5026 name, NULL, NULL);
5028 SKIP_BLANKS;
5031 * Parse the IDs.
5033 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5034 SKIP_BLANKS;
5036 if (RAW == '>') {
5037 if (input != ctxt->input) {
5038 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5039 "Notation declaration doesn't start and stop in the same entity\n");
5041 NEXT;
5042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5043 (ctxt->sax->notationDecl != NULL))
5044 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5045 } else {
5046 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5048 if (Systemid != NULL) xmlFree(Systemid);
5049 if (Pubid != NULL) xmlFree(Pubid);
5054 * xmlParseEntityDecl:
5055 * @ctxt: an XML parser context
5057 * parse <!ENTITY declarations
5059 * [70] EntityDecl ::= GEDecl | PEDecl
5061 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5063 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5065 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5067 * [74] PEDef ::= EntityValue | ExternalID
5069 * [76] NDataDecl ::= S 'NDATA' S Name
5071 * [ VC: Notation Declared ]
5072 * The Name must match the declared name of a notation.
5075 void
5076 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5077 const xmlChar *name = NULL;
5078 xmlChar *value = NULL;
5079 xmlChar *URI = NULL, *literal = NULL;
5080 const xmlChar *ndata = NULL;
5081 int isParameter = 0;
5082 xmlChar *orig = NULL;
5083 int skipped;
5085 /* GROW; done in the caller */
5086 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5087 xmlParserInputPtr input = ctxt->input;
5088 SHRINK;
5089 SKIP(8);
5090 skipped = SKIP_BLANKS;
5091 if (skipped == 0) {
5092 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5093 "Space required after '<!ENTITY'\n");
5096 if (RAW == '%') {
5097 NEXT;
5098 skipped = SKIP_BLANKS;
5099 if (skipped == 0) {
5100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5101 "Space required after '%'\n");
5103 isParameter = 1;
5106 name = xmlParseName(ctxt);
5107 if (name == NULL) {
5108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5109 "xmlParseEntityDecl: no name\n");
5110 return;
5112 if (xmlStrchr(name, ':') != NULL) {
5113 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5114 "colon are forbidden from entities names '%s'\n",
5115 name, NULL, NULL);
5117 skipped = SKIP_BLANKS;
5118 if (skipped == 0) {
5119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5120 "Space required after the entity name\n");
5123 ctxt->instate = XML_PARSER_ENTITY_DECL;
5125 * handle the various case of definitions...
5127 if (isParameter) {
5128 if ((RAW == '"') || (RAW == '\'')) {
5129 value = xmlParseEntityValue(ctxt, &orig);
5130 if (value) {
5131 if ((ctxt->sax != NULL) &&
5132 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5133 ctxt->sax->entityDecl(ctxt->userData, name,
5134 XML_INTERNAL_PARAMETER_ENTITY,
5135 NULL, NULL, value);
5137 } else {
5138 URI = xmlParseExternalID(ctxt, &literal, 1);
5139 if ((URI == NULL) && (literal == NULL)) {
5140 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5142 if (URI) {
5143 xmlURIPtr uri;
5145 uri = xmlParseURI((const char *) URI);
5146 if (uri == NULL) {
5147 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5148 "Invalid URI: %s\n", URI);
5150 * This really ought to be a well formedness error
5151 * but the XML Core WG decided otherwise c.f. issue
5152 * E26 of the XML erratas.
5154 } else {
5155 if (uri->fragment != NULL) {
5157 * Okay this is foolish to block those but not
5158 * invalid URIs.
5160 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5161 } else {
5162 if ((ctxt->sax != NULL) &&
5163 (!ctxt->disableSAX) &&
5164 (ctxt->sax->entityDecl != NULL))
5165 ctxt->sax->entityDecl(ctxt->userData, name,
5166 XML_EXTERNAL_PARAMETER_ENTITY,
5167 literal, URI, NULL);
5169 xmlFreeURI(uri);
5173 } else {
5174 if ((RAW == '"') || (RAW == '\'')) {
5175 value = xmlParseEntityValue(ctxt, &orig);
5176 if ((ctxt->sax != NULL) &&
5177 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5178 ctxt->sax->entityDecl(ctxt->userData, name,
5179 XML_INTERNAL_GENERAL_ENTITY,
5180 NULL, NULL, value);
5182 * For expat compatibility in SAX mode.
5184 if ((ctxt->myDoc == NULL) ||
5185 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5186 if (ctxt->myDoc == NULL) {
5187 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5188 if (ctxt->myDoc == NULL) {
5189 xmlErrMemory(ctxt, "New Doc failed");
5190 return;
5192 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5194 if (ctxt->myDoc->intSubset == NULL)
5195 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5196 BAD_CAST "fake", NULL, NULL);
5198 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5199 NULL, NULL, value);
5201 } else {
5202 URI = xmlParseExternalID(ctxt, &literal, 1);
5203 if ((URI == NULL) && (literal == NULL)) {
5204 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5206 if (URI) {
5207 xmlURIPtr uri;
5209 uri = xmlParseURI((const char *)URI);
5210 if (uri == NULL) {
5211 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5212 "Invalid URI: %s\n", URI);
5214 * This really ought to be a well formedness error
5215 * but the XML Core WG decided otherwise c.f. issue
5216 * E26 of the XML erratas.
5218 } else {
5219 if (uri->fragment != NULL) {
5221 * Okay this is foolish to block those but not
5222 * invalid URIs.
5224 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5226 xmlFreeURI(uri);
5229 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5230 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5231 "Space required before 'NDATA'\n");
5233 SKIP_BLANKS;
5234 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5235 SKIP(5);
5236 if (!IS_BLANK_CH(CUR)) {
5237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5238 "Space required after 'NDATA'\n");
5240 SKIP_BLANKS;
5241 ndata = xmlParseName(ctxt);
5242 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5243 (ctxt->sax->unparsedEntityDecl != NULL))
5244 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5245 literal, URI, ndata);
5246 } else {
5247 if ((ctxt->sax != NULL) &&
5248 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5249 ctxt->sax->entityDecl(ctxt->userData, name,
5250 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5251 literal, URI, NULL);
5253 * For expat compatibility in SAX mode.
5254 * assuming the entity repalcement was asked for
5256 if ((ctxt->replaceEntities != 0) &&
5257 ((ctxt->myDoc == NULL) ||
5258 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5259 if (ctxt->myDoc == NULL) {
5260 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5261 if (ctxt->myDoc == NULL) {
5262 xmlErrMemory(ctxt, "New Doc failed");
5263 return;
5265 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5268 if (ctxt->myDoc->intSubset == NULL)
5269 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5270 BAD_CAST "fake", NULL, NULL);
5271 xmlSAX2EntityDecl(ctxt, name,
5272 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5273 literal, URI, NULL);
5278 if (ctxt->instate == XML_PARSER_EOF)
5279 return;
5280 SKIP_BLANKS;
5281 if (RAW != '>') {
5282 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5283 "xmlParseEntityDecl: entity %s not terminated\n", name);
5284 } else {
5285 if (input != ctxt->input) {
5286 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5287 "Entity declaration doesn't start and stop in the same entity\n");
5289 NEXT;
5291 if (orig != NULL) {
5293 * Ugly mechanism to save the raw entity value.
5295 xmlEntityPtr cur = NULL;
5297 if (isParameter) {
5298 if ((ctxt->sax != NULL) &&
5299 (ctxt->sax->getParameterEntity != NULL))
5300 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5301 } else {
5302 if ((ctxt->sax != NULL) &&
5303 (ctxt->sax->getEntity != NULL))
5304 cur = ctxt->sax->getEntity(ctxt->userData, name);
5305 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5306 cur = xmlSAX2GetEntity(ctxt, name);
5309 if (cur != NULL) {
5310 if (cur->orig != NULL)
5311 xmlFree(orig);
5312 else
5313 cur->orig = orig;
5314 } else
5315 xmlFree(orig);
5317 if (value != NULL) xmlFree(value);
5318 if (URI != NULL) xmlFree(URI);
5319 if (literal != NULL) xmlFree(literal);
5324 * xmlParseDefaultDecl:
5325 * @ctxt: an XML parser context
5326 * @value: Receive a possible fixed default value for the attribute
5328 * Parse an attribute default declaration
5330 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5332 * [ VC: Required Attribute ]
5333 * if the default declaration is the keyword #REQUIRED, then the
5334 * attribute must be specified for all elements of the type in the
5335 * attribute-list declaration.
5337 * [ VC: Attribute Default Legal ]
5338 * The declared default value must meet the lexical constraints of
5339 * the declared attribute type c.f. xmlValidateAttributeDecl()
5341 * [ VC: Fixed Attribute Default ]
5342 * if an attribute has a default value declared with the #FIXED
5343 * keyword, instances of that attribute must match the default value.
5345 * [ WFC: No < in Attribute Values ]
5346 * handled in xmlParseAttValue()
5348 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5349 * or XML_ATTRIBUTE_FIXED.
5353 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5354 int val;
5355 xmlChar *ret;
5357 *value = NULL;
5358 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5359 SKIP(9);
5360 return(XML_ATTRIBUTE_REQUIRED);
5362 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5363 SKIP(8);
5364 return(XML_ATTRIBUTE_IMPLIED);
5366 val = XML_ATTRIBUTE_NONE;
5367 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5368 SKIP(6);
5369 val = XML_ATTRIBUTE_FIXED;
5370 if (!IS_BLANK_CH(CUR)) {
5371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "Space required after '#FIXED'\n");
5374 SKIP_BLANKS;
5376 ret = xmlParseAttValue(ctxt);
5377 ctxt->instate = XML_PARSER_DTD;
5378 if (ret == NULL) {
5379 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5380 "Attribute default value declaration error\n");
5381 } else
5382 *value = ret;
5383 return(val);
5387 * xmlParseNotationType:
5388 * @ctxt: an XML parser context
5390 * parse an Notation attribute type.
5392 * Note: the leading 'NOTATION' S part has already being parsed...
5394 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5396 * [ VC: Notation Attributes ]
5397 * Values of this type must match one of the notation names included
5398 * in the declaration; all notation names in the declaration must be declared.
5400 * Returns: the notation attribute tree built while parsing
5403 xmlEnumerationPtr
5404 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5405 const xmlChar *name;
5406 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5408 if (RAW != '(') {
5409 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5410 return(NULL);
5412 SHRINK;
5413 do {
5414 NEXT;
5415 SKIP_BLANKS;
5416 name = xmlParseName(ctxt);
5417 if (name == NULL) {
5418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5419 "Name expected in NOTATION declaration\n");
5420 xmlFreeEnumeration(ret);
5421 return(NULL);
5423 tmp = ret;
5424 while (tmp != NULL) {
5425 if (xmlStrEqual(name, tmp->name)) {
5426 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5427 "standalone: attribute notation value token %s duplicated\n",
5428 name, NULL);
5429 if (!xmlDictOwns(ctxt->dict, name))
5430 xmlFree((xmlChar *) name);
5431 break;
5433 tmp = tmp->next;
5435 if (tmp == NULL) {
5436 cur = xmlCreateEnumeration(name);
5437 if (cur == NULL) {
5438 xmlFreeEnumeration(ret);
5439 return(NULL);
5441 if (last == NULL) ret = last = cur;
5442 else {
5443 last->next = cur;
5444 last = cur;
5447 SKIP_BLANKS;
5448 } while (RAW == '|');
5449 if (RAW != ')') {
5450 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5451 xmlFreeEnumeration(ret);
5452 return(NULL);
5454 NEXT;
5455 return(ret);
5459 * xmlParseEnumerationType:
5460 * @ctxt: an XML parser context
5462 * parse an Enumeration attribute type.
5464 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5466 * [ VC: Enumeration ]
5467 * Values of this type must match one of the Nmtoken tokens in
5468 * the declaration
5470 * Returns: the enumeration attribute tree built while parsing
5473 xmlEnumerationPtr
5474 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5475 xmlChar *name;
5476 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5478 if (RAW != '(') {
5479 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5480 return(NULL);
5482 SHRINK;
5483 do {
5484 NEXT;
5485 SKIP_BLANKS;
5486 name = xmlParseNmtoken(ctxt);
5487 if (name == NULL) {
5488 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5489 return(ret);
5491 tmp = ret;
5492 while (tmp != NULL) {
5493 if (xmlStrEqual(name, tmp->name)) {
5494 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5495 "standalone: attribute enumeration value token %s duplicated\n",
5496 name, NULL);
5497 if (!xmlDictOwns(ctxt->dict, name))
5498 xmlFree(name);
5499 break;
5501 tmp = tmp->next;
5503 if (tmp == NULL) {
5504 cur = xmlCreateEnumeration(name);
5505 if (!xmlDictOwns(ctxt->dict, name))
5506 xmlFree(name);
5507 if (cur == NULL) {
5508 xmlFreeEnumeration(ret);
5509 return(NULL);
5511 if (last == NULL) ret = last = cur;
5512 else {
5513 last->next = cur;
5514 last = cur;
5517 SKIP_BLANKS;
5518 } while (RAW == '|');
5519 if (RAW != ')') {
5520 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5521 return(ret);
5523 NEXT;
5524 return(ret);
5528 * xmlParseEnumeratedType:
5529 * @ctxt: an XML parser context
5530 * @tree: the enumeration tree built while parsing
5532 * parse an Enumerated attribute type.
5534 * [57] EnumeratedType ::= NotationType | Enumeration
5536 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5539 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5543 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5544 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5545 SKIP(8);
5546 if (!IS_BLANK_CH(CUR)) {
5547 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5548 "Space required after 'NOTATION'\n");
5549 return(0);
5551 SKIP_BLANKS;
5552 *tree = xmlParseNotationType(ctxt);
5553 if (*tree == NULL) return(0);
5554 return(XML_ATTRIBUTE_NOTATION);
5556 *tree = xmlParseEnumerationType(ctxt);
5557 if (*tree == NULL) return(0);
5558 return(XML_ATTRIBUTE_ENUMERATION);
5562 * xmlParseAttributeType:
5563 * @ctxt: an XML parser context
5564 * @tree: the enumeration tree built while parsing
5566 * parse the Attribute list def for an element
5568 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5570 * [55] StringType ::= 'CDATA'
5572 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5573 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5575 * Validity constraints for attribute values syntax are checked in
5576 * xmlValidateAttributeValue()
5578 * [ VC: ID ]
5579 * Values of type ID must match the Name production. A name must not
5580 * appear more than once in an XML document as a value of this type;
5581 * i.e., ID values must uniquely identify the elements which bear them.
5583 * [ VC: One ID per Element Type ]
5584 * No element type may have more than one ID attribute specified.
5586 * [ VC: ID Attribute Default ]
5587 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5589 * [ VC: IDREF ]
5590 * Values of type IDREF must match the Name production, and values
5591 * of type IDREFS must match Names; each IDREF Name must match the value
5592 * of an ID attribute on some element in the XML document; i.e. IDREF
5593 * values must match the value of some ID attribute.
5595 * [ VC: Entity Name ]
5596 * Values of type ENTITY must match the Name production, values
5597 * of type ENTITIES must match Names; each Entity Name must match the
5598 * name of an unparsed entity declared in the DTD.
5600 * [ VC: Name Token ]
5601 * Values of type NMTOKEN must match the Nmtoken production; values
5602 * of type NMTOKENS must match Nmtokens.
5604 * Returns the attribute type
5606 int
5607 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5608 SHRINK;
5609 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5610 SKIP(5);
5611 return(XML_ATTRIBUTE_CDATA);
5612 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5613 SKIP(6);
5614 return(XML_ATTRIBUTE_IDREFS);
5615 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5616 SKIP(5);
5617 return(XML_ATTRIBUTE_IDREF);
5618 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5619 SKIP(2);
5620 return(XML_ATTRIBUTE_ID);
5621 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5622 SKIP(6);
5623 return(XML_ATTRIBUTE_ENTITY);
5624 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5625 SKIP(8);
5626 return(XML_ATTRIBUTE_ENTITIES);
5627 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5628 SKIP(8);
5629 return(XML_ATTRIBUTE_NMTOKENS);
5630 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5631 SKIP(7);
5632 return(XML_ATTRIBUTE_NMTOKEN);
5634 return(xmlParseEnumeratedType(ctxt, tree));
5638 * xmlParseAttributeListDecl:
5639 * @ctxt: an XML parser context
5641 * : parse the Attribute list def for an element
5643 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5645 * [53] AttDef ::= S Name S AttType S DefaultDecl
5648 void
5649 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5650 const xmlChar *elemName;
5651 const xmlChar *attrName;
5652 xmlEnumerationPtr tree;
5654 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5655 xmlParserInputPtr input = ctxt->input;
5657 SKIP(9);
5658 if (!IS_BLANK_CH(CUR)) {
5659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5660 "Space required after '<!ATTLIST'\n");
5662 SKIP_BLANKS;
5663 elemName = xmlParseName(ctxt);
5664 if (elemName == NULL) {
5665 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5666 "ATTLIST: no name for Element\n");
5667 return;
5669 SKIP_BLANKS;
5670 GROW;
5671 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5672 const xmlChar *check = CUR_PTR;
5673 int type;
5674 int def;
5675 xmlChar *defaultValue = NULL;
5677 GROW;
5678 tree = NULL;
5679 attrName = xmlParseName(ctxt);
5680 if (attrName == NULL) {
5681 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5682 "ATTLIST: no name for Attribute\n");
5683 break;
5685 GROW;
5686 if (!IS_BLANK_CH(CUR)) {
5687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5688 "Space required after the attribute name\n");
5689 break;
5691 SKIP_BLANKS;
5693 type = xmlParseAttributeType(ctxt, &tree);
5694 if (type <= 0) {
5695 break;
5698 GROW;
5699 if (!IS_BLANK_CH(CUR)) {
5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required after the attribute type\n");
5702 if (tree != NULL)
5703 xmlFreeEnumeration(tree);
5704 break;
5706 SKIP_BLANKS;
5708 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5709 if (def <= 0) {
5710 if (defaultValue != NULL)
5711 xmlFree(defaultValue);
5712 if (tree != NULL)
5713 xmlFreeEnumeration(tree);
5714 break;
5716 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5717 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5719 GROW;
5720 if (RAW != '>') {
5721 if (!IS_BLANK_CH(CUR)) {
5722 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723 "Space required after the attribute default value\n");
5724 if (defaultValue != NULL)
5725 xmlFree(defaultValue);
5726 if (tree != NULL)
5727 xmlFreeEnumeration(tree);
5728 break;
5730 SKIP_BLANKS;
5732 if (check == CUR_PTR) {
5733 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5734 "in xmlParseAttributeListDecl\n");
5735 if (defaultValue != NULL)
5736 xmlFree(defaultValue);
5737 if (tree != NULL)
5738 xmlFreeEnumeration(tree);
5739 break;
5741 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5742 (ctxt->sax->attributeDecl != NULL))
5743 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5744 type, def, defaultValue, tree);
5745 else if (tree != NULL)
5746 xmlFreeEnumeration(tree);
5748 if ((ctxt->sax2) && (defaultValue != NULL) &&
5749 (def != XML_ATTRIBUTE_IMPLIED) &&
5750 (def != XML_ATTRIBUTE_REQUIRED)) {
5751 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5753 if (ctxt->sax2) {
5754 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5756 if (defaultValue != NULL)
5757 xmlFree(defaultValue);
5758 GROW;
5760 if (RAW == '>') {
5761 if (input != ctxt->input) {
5762 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5763 "Attribute list declaration doesn't start and stop in the same entity\n",
5764 NULL, NULL);
5766 NEXT;
5772 * xmlParseElementMixedContentDecl:
5773 * @ctxt: an XML parser context
5774 * @inputchk: the input used for the current entity, needed for boundary checks
5776 * parse the declaration for a Mixed Element content
5777 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5779 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5780 * '(' S? '#PCDATA' S? ')'
5782 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5784 * [ VC: No Duplicate Types ]
5785 * The same name must not appear more than once in a single
5786 * mixed-content declaration.
5788 * returns: the list of the xmlElementContentPtr describing the element choices
5790 xmlElementContentPtr
5791 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5792 xmlElementContentPtr ret = NULL, cur = NULL, n;
5793 const xmlChar *elem = NULL;
5795 GROW;
5796 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5797 SKIP(7);
5798 SKIP_BLANKS;
5799 SHRINK;
5800 if (RAW == ')') {
5801 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5802 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5803 "Element content declaration doesn't start and stop in the same entity\n",
5804 NULL, NULL);
5806 NEXT;
5807 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5808 if (ret == NULL)
5809 return(NULL);
5810 if (RAW == '*') {
5811 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5812 NEXT;
5814 return(ret);
5816 if ((RAW == '(') || (RAW == '|')) {
5817 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5818 if (ret == NULL) return(NULL);
5820 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
5821 NEXT;
5822 if (elem == NULL) {
5823 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5824 if (ret == NULL) return(NULL);
5825 ret->c1 = cur;
5826 if (cur != NULL)
5827 cur->parent = ret;
5828 cur = ret;
5829 } else {
5830 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5831 if (n == NULL) return(NULL);
5832 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5833 if (n->c1 != NULL)
5834 n->c1->parent = n;
5835 cur->c2 = n;
5836 if (n != NULL)
5837 n->parent = cur;
5838 cur = n;
5840 SKIP_BLANKS;
5841 elem = xmlParseName(ctxt);
5842 if (elem == NULL) {
5843 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5844 "xmlParseElementMixedContentDecl : Name expected\n");
5845 xmlFreeDocElementContent(ctxt->myDoc, cur);
5846 return(NULL);
5848 SKIP_BLANKS;
5849 GROW;
5851 if ((RAW == ')') && (NXT(1) == '*')) {
5852 if (elem != NULL) {
5853 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5854 XML_ELEMENT_CONTENT_ELEMENT);
5855 if (cur->c2 != NULL)
5856 cur->c2->parent = cur;
5858 if (ret != NULL)
5859 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5860 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5861 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5862 "Element content declaration doesn't start and stop in the same entity\n",
5863 NULL, NULL);
5865 SKIP(2);
5866 } else {
5867 xmlFreeDocElementContent(ctxt->myDoc, ret);
5868 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5869 return(NULL);
5872 } else {
5873 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5875 return(ret);
5879 * xmlParseElementChildrenContentDeclPriv:
5880 * @ctxt: an XML parser context
5881 * @inputchk: the input used for the current entity, needed for boundary checks
5882 * @depth: the level of recursion
5884 * parse the declaration for a Mixed Element content
5885 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5888 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5890 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5892 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5894 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5896 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5897 * TODO Parameter-entity replacement text must be properly nested
5898 * with parenthesized groups. That is to say, if either of the
5899 * opening or closing parentheses in a choice, seq, or Mixed
5900 * construct is contained in the replacement text for a parameter
5901 * entity, both must be contained in the same replacement text. For
5902 * interoperability, if a parameter-entity reference appears in a
5903 * choice, seq, or Mixed construct, its replacement text should not
5904 * be empty, and neither the first nor last non-blank character of
5905 * the replacement text should be a connector (| or ,).
5907 * Returns the tree of xmlElementContentPtr describing the element
5908 * hierarchy.
5910 static xmlElementContentPtr
5911 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5912 int depth) {
5913 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5914 const xmlChar *elem;
5915 xmlChar type = 0;
5917 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5918 (depth > 2048)) {
5919 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5920 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5921 depth);
5922 return(NULL);
5924 SKIP_BLANKS;
5925 GROW;
5926 if (RAW == '(') {
5927 int inputid = ctxt->input->id;
5929 /* Recurse on first child */
5930 NEXT;
5931 SKIP_BLANKS;
5932 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5933 depth + 1);
5934 SKIP_BLANKS;
5935 GROW;
5936 } else {
5937 elem = xmlParseName(ctxt);
5938 if (elem == NULL) {
5939 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5940 return(NULL);
5942 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5943 if (cur == NULL) {
5944 xmlErrMemory(ctxt, NULL);
5945 return(NULL);
5947 GROW;
5948 if (RAW == '?') {
5949 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5950 NEXT;
5951 } else if (RAW == '*') {
5952 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5953 NEXT;
5954 } else if (RAW == '+') {
5955 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5956 NEXT;
5957 } else {
5958 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5960 GROW;
5962 SKIP_BLANKS;
5963 SHRINK;
5964 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
5966 * Each loop we parse one separator and one element.
5968 if (RAW == ',') {
5969 if (type == 0) type = CUR;
5972 * Detect "Name | Name , Name" error
5974 else if (type != CUR) {
5975 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5976 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5977 type);
5978 if ((last != NULL) && (last != ret))
5979 xmlFreeDocElementContent(ctxt->myDoc, last);
5980 if (ret != NULL)
5981 xmlFreeDocElementContent(ctxt->myDoc, ret);
5982 return(NULL);
5984 NEXT;
5986 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5987 if (op == NULL) {
5988 if ((last != NULL) && (last != ret))
5989 xmlFreeDocElementContent(ctxt->myDoc, last);
5990 xmlFreeDocElementContent(ctxt->myDoc, ret);
5991 return(NULL);
5993 if (last == NULL) {
5994 op->c1 = ret;
5995 if (ret != NULL)
5996 ret->parent = op;
5997 ret = cur = op;
5998 } else {
5999 cur->c2 = op;
6000 if (op != NULL)
6001 op->parent = cur;
6002 op->c1 = last;
6003 if (last != NULL)
6004 last->parent = op;
6005 cur =op;
6006 last = NULL;
6008 } else if (RAW == '|') {
6009 if (type == 0) type = CUR;
6012 * Detect "Name , Name | Name" error
6014 else if (type != CUR) {
6015 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6016 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6017 type);
6018 if ((last != NULL) && (last != ret))
6019 xmlFreeDocElementContent(ctxt->myDoc, last);
6020 if (ret != NULL)
6021 xmlFreeDocElementContent(ctxt->myDoc, ret);
6022 return(NULL);
6024 NEXT;
6026 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6027 if (op == NULL) {
6028 if ((last != NULL) && (last != ret))
6029 xmlFreeDocElementContent(ctxt->myDoc, last);
6030 if (ret != NULL)
6031 xmlFreeDocElementContent(ctxt->myDoc, ret);
6032 return(NULL);
6034 if (last == NULL) {
6035 op->c1 = ret;
6036 if (ret != NULL)
6037 ret->parent = op;
6038 ret = cur = op;
6039 } else {
6040 cur->c2 = op;
6041 if (op != NULL)
6042 op->parent = cur;
6043 op->c1 = last;
6044 if (last != NULL)
6045 last->parent = op;
6046 cur =op;
6047 last = NULL;
6049 } else {
6050 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6051 if ((last != NULL) && (last != ret))
6052 xmlFreeDocElementContent(ctxt->myDoc, last);
6053 if (ret != NULL)
6054 xmlFreeDocElementContent(ctxt->myDoc, ret);
6055 return(NULL);
6057 GROW;
6058 SKIP_BLANKS;
6059 GROW;
6060 if (RAW == '(') {
6061 int inputid = ctxt->input->id;
6062 /* Recurse on second child */
6063 NEXT;
6064 SKIP_BLANKS;
6065 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6066 depth + 1);
6067 SKIP_BLANKS;
6068 } else {
6069 elem = xmlParseName(ctxt);
6070 if (elem == NULL) {
6071 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6072 if (ret != NULL)
6073 xmlFreeDocElementContent(ctxt->myDoc, ret);
6074 return(NULL);
6076 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6077 if (last == NULL) {
6078 if (ret != NULL)
6079 xmlFreeDocElementContent(ctxt->myDoc, ret);
6080 return(NULL);
6082 if (RAW == '?') {
6083 last->ocur = XML_ELEMENT_CONTENT_OPT;
6084 NEXT;
6085 } else if (RAW == '*') {
6086 last->ocur = XML_ELEMENT_CONTENT_MULT;
6087 NEXT;
6088 } else if (RAW == '+') {
6089 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6090 NEXT;
6091 } else {
6092 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6095 SKIP_BLANKS;
6096 GROW;
6098 if ((cur != NULL) && (last != NULL)) {
6099 cur->c2 = last;
6100 if (last != NULL)
6101 last->parent = cur;
6103 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6104 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105 "Element content declaration doesn't start and stop in the same entity\n",
6106 NULL, NULL);
6108 NEXT;
6109 if (RAW == '?') {
6110 if (ret != NULL) {
6111 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6112 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6113 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6114 else
6115 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6117 NEXT;
6118 } else if (RAW == '*') {
6119 if (ret != NULL) {
6120 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6121 cur = ret;
6123 * Some normalization:
6124 * (a | b* | c?)* == (a | b | c)*
6126 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6127 if ((cur->c1 != NULL) &&
6128 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6129 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6130 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6131 if ((cur->c2 != NULL) &&
6132 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6133 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6134 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6135 cur = cur->c2;
6138 NEXT;
6139 } else if (RAW == '+') {
6140 if (ret != NULL) {
6141 int found = 0;
6143 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6144 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6145 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6146 else
6147 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6149 * Some normalization:
6150 * (a | b*)+ == (a | b)*
6151 * (a | b?)+ == (a | b)*
6153 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6154 if ((cur->c1 != NULL) &&
6155 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6156 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6157 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6158 found = 1;
6160 if ((cur->c2 != NULL) &&
6161 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6162 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6163 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6164 found = 1;
6166 cur = cur->c2;
6168 if (found)
6169 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6171 NEXT;
6173 return(ret);
6177 * xmlParseElementChildrenContentDecl:
6178 * @ctxt: an XML parser context
6179 * @inputchk: the input used for the current entity, needed for boundary checks
6181 * parse the declaration for a Mixed Element content
6182 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6184 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6186 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6188 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6190 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6192 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6193 * TODO Parameter-entity replacement text must be properly nested
6194 * with parenthesized groups. That is to say, if either of the
6195 * opening or closing parentheses in a choice, seq, or Mixed
6196 * construct is contained in the replacement text for a parameter
6197 * entity, both must be contained in the same replacement text. For
6198 * interoperability, if a parameter-entity reference appears in a
6199 * choice, seq, or Mixed construct, its replacement text should not
6200 * be empty, and neither the first nor last non-blank character of
6201 * the replacement text should be a connector (| or ,).
6203 * Returns the tree of xmlElementContentPtr describing the element
6204 * hierarchy.
6206 xmlElementContentPtr
6207 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6208 /* stub left for API/ABI compat */
6209 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6213 * xmlParseElementContentDecl:
6214 * @ctxt: an XML parser context
6215 * @name: the name of the element being defined.
6216 * @result: the Element Content pointer will be stored here if any
6218 * parse the declaration for an Element content either Mixed or Children,
6219 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6221 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6223 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6227 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6228 xmlElementContentPtr *result) {
6230 xmlElementContentPtr tree = NULL;
6231 int inputid = ctxt->input->id;
6232 int res;
6234 *result = NULL;
6236 if (RAW != '(') {
6237 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6238 "xmlParseElementContentDecl : %s '(' expected\n", name);
6239 return(-1);
6241 NEXT;
6242 GROW;
6243 if (ctxt->instate == XML_PARSER_EOF)
6244 return(-1);
6245 SKIP_BLANKS;
6246 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6247 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6248 res = XML_ELEMENT_TYPE_MIXED;
6249 } else {
6250 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6251 res = XML_ELEMENT_TYPE_ELEMENT;
6253 SKIP_BLANKS;
6254 *result = tree;
6255 return(res);
6259 * xmlParseElementDecl:
6260 * @ctxt: an XML parser context
6262 * parse an Element declaration.
6264 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6266 * [ VC: Unique Element Type Declaration ]
6267 * No element type may be declared more than once
6269 * Returns the type of the element, or -1 in case of error
6272 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6273 const xmlChar *name;
6274 int ret = -1;
6275 xmlElementContentPtr content = NULL;
6277 /* GROW; done in the caller */
6278 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6279 xmlParserInputPtr input = ctxt->input;
6281 SKIP(9);
6282 if (!IS_BLANK_CH(CUR)) {
6283 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6284 "Space required after 'ELEMENT'\n");
6286 SKIP_BLANKS;
6287 name = xmlParseName(ctxt);
6288 if (name == NULL) {
6289 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6290 "xmlParseElementDecl: no name for Element\n");
6291 return(-1);
6293 while ((RAW == 0) && (ctxt->inputNr > 1))
6294 xmlPopInput(ctxt);
6295 if (!IS_BLANK_CH(CUR)) {
6296 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6297 "Space required after the element name\n");
6299 SKIP_BLANKS;
6300 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6301 SKIP(5);
6303 * Element must always be empty.
6305 ret = XML_ELEMENT_TYPE_EMPTY;
6306 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6307 (NXT(2) == 'Y')) {
6308 SKIP(3);
6310 * Element is a generic container.
6312 ret = XML_ELEMENT_TYPE_ANY;
6313 } else if (RAW == '(') {
6314 ret = xmlParseElementContentDecl(ctxt, name, &content);
6315 } else {
6317 * [ WFC: PEs in Internal Subset ] error handling.
6319 if ((RAW == '%') && (ctxt->external == 0) &&
6320 (ctxt->inputNr == 1)) {
6321 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6322 "PEReference: forbidden within markup decl in internal subset\n");
6323 } else {
6324 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6325 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6327 return(-1);
6330 SKIP_BLANKS;
6332 * Pop-up of finished entities.
6334 while ((RAW == 0) && (ctxt->inputNr > 1))
6335 xmlPopInput(ctxt);
6336 SKIP_BLANKS;
6338 if (RAW != '>') {
6339 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6340 if (content != NULL) {
6341 xmlFreeDocElementContent(ctxt->myDoc, content);
6343 } else {
6344 if (input != ctxt->input) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "Element declaration doesn't start and stop in the same entity\n");
6349 NEXT;
6350 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6351 (ctxt->sax->elementDecl != NULL)) {
6352 if (content != NULL)
6353 content->parent = NULL;
6354 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6355 content);
6356 if ((content != NULL) && (content->parent == NULL)) {
6358 * this is a trick: if xmlAddElementDecl is called,
6359 * instead of copying the full tree it is plugged directly
6360 * if called from the parser. Avoid duplicating the
6361 * interfaces or change the API/ABI
6363 xmlFreeDocElementContent(ctxt->myDoc, content);
6365 } else if (content != NULL) {
6366 xmlFreeDocElementContent(ctxt->myDoc, content);
6370 return(ret);
6374 * xmlParseConditionalSections
6375 * @ctxt: an XML parser context
6377 * [61] conditionalSect ::= includeSect | ignoreSect
6378 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6379 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6380 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6381 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6384 static void
6385 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6386 int id = ctxt->input->id;
6388 SKIP(3);
6389 SKIP_BLANKS;
6390 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6391 SKIP(7);
6392 SKIP_BLANKS;
6393 if (RAW != '[') {
6394 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6395 } else {
6396 if (ctxt->input->id != id) {
6397 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398 "All markup of the conditional section is not in the same entity\n",
6399 NULL, NULL);
6401 NEXT;
6403 if (xmlParserDebugEntities) {
6404 if ((ctxt->input != NULL) && (ctxt->input->filename))
6405 xmlGenericError(xmlGenericErrorContext,
6406 "%s(%d): ", ctxt->input->filename,
6407 ctxt->input->line);
6408 xmlGenericError(xmlGenericErrorContext,
6409 "Entering INCLUDE Conditional Section\n");
6412 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6413 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6414 const xmlChar *check = CUR_PTR;
6415 unsigned int cons = ctxt->input->consumed;
6417 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6418 xmlParseConditionalSections(ctxt);
6419 } else if (IS_BLANK_CH(CUR)) {
6420 NEXT;
6421 } else if (RAW == '%') {
6422 xmlParsePEReference(ctxt);
6423 } else
6424 xmlParseMarkupDecl(ctxt);
6427 * Pop-up of finished entities.
6429 while ((RAW == 0) && (ctxt->inputNr > 1))
6430 xmlPopInput(ctxt);
6432 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6433 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6434 break;
6437 if (xmlParserDebugEntities) {
6438 if ((ctxt->input != NULL) && (ctxt->input->filename))
6439 xmlGenericError(xmlGenericErrorContext,
6440 "%s(%d): ", ctxt->input->filename,
6441 ctxt->input->line);
6442 xmlGenericError(xmlGenericErrorContext,
6443 "Leaving INCLUDE Conditional Section\n");
6446 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6447 int state;
6448 xmlParserInputState instate;
6449 int depth = 0;
6451 SKIP(6);
6452 SKIP_BLANKS;
6453 if (RAW != '[') {
6454 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6455 } else {
6456 if (ctxt->input->id != id) {
6457 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6458 "All markup of the conditional section is not in the same entity\n",
6459 NULL, NULL);
6461 NEXT;
6463 if (xmlParserDebugEntities) {
6464 if ((ctxt->input != NULL) && (ctxt->input->filename))
6465 xmlGenericError(xmlGenericErrorContext,
6466 "%s(%d): ", ctxt->input->filename,
6467 ctxt->input->line);
6468 xmlGenericError(xmlGenericErrorContext,
6469 "Entering IGNORE Conditional Section\n");
6473 * Parse up to the end of the conditional section
6474 * But disable SAX event generating DTD building in the meantime
6476 state = ctxt->disableSAX;
6477 instate = ctxt->instate;
6478 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6479 ctxt->instate = XML_PARSER_IGNORE;
6481 while (((depth >= 0) && (RAW != 0)) &&
6482 (ctxt->instate != XML_PARSER_EOF)) {
6483 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6484 depth++;
6485 SKIP(3);
6486 continue;
6488 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6489 if (--depth >= 0) SKIP(3);
6490 continue;
6492 NEXT;
6493 continue;
6496 ctxt->disableSAX = state;
6497 ctxt->instate = instate;
6499 if (xmlParserDebugEntities) {
6500 if ((ctxt->input != NULL) && (ctxt->input->filename))
6501 xmlGenericError(xmlGenericErrorContext,
6502 "%s(%d): ", ctxt->input->filename,
6503 ctxt->input->line);
6504 xmlGenericError(xmlGenericErrorContext,
6505 "Leaving IGNORE Conditional Section\n");
6508 } else {
6509 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6512 if (RAW == 0)
6513 SHRINK;
6515 if (RAW == 0) {
6516 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6517 } else {
6518 if (ctxt->input->id != id) {
6519 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520 "All markup of the conditional section is not in the same entity\n",
6521 NULL, NULL);
6523 SKIP(3);
6528 * xmlParseMarkupDecl:
6529 * @ctxt: an XML parser context
6531 * parse Markup declarations
6533 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6534 * NotationDecl | PI | Comment
6536 * [ VC: Proper Declaration/PE Nesting ]
6537 * Parameter-entity replacement text must be properly nested with
6538 * markup declarations. That is to say, if either the first character
6539 * or the last character of a markup declaration (markupdecl above) is
6540 * contained in the replacement text for a parameter-entity reference,
6541 * both must be contained in the same replacement text.
6543 * [ WFC: PEs in Internal Subset ]
6544 * In the internal DTD subset, parameter-entity references can occur
6545 * only where markup declarations can occur, not within markup declarations.
6546 * (This does not apply to references that occur in external parameter
6547 * entities or to the external subset.)
6549 void
6550 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6551 GROW;
6552 if (CUR == '<') {
6553 if (NXT(1) == '!') {
6554 switch (NXT(2)) {
6555 case 'E':
6556 if (NXT(3) == 'L')
6557 xmlParseElementDecl(ctxt);
6558 else if (NXT(3) == 'N')
6559 xmlParseEntityDecl(ctxt);
6560 break;
6561 case 'A':
6562 xmlParseAttributeListDecl(ctxt);
6563 break;
6564 case 'N':
6565 xmlParseNotationDecl(ctxt);
6566 break;
6567 case '-':
6568 xmlParseComment(ctxt);
6569 break;
6570 default:
6571 /* there is an error but it will be detected later */
6572 break;
6574 } else if (NXT(1) == '?') {
6575 xmlParsePI(ctxt);
6579 * This is only for internal subset. On external entities,
6580 * the replacement is done before parsing stage
6582 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6583 xmlParsePEReference(ctxt);
6586 * Conditional sections are allowed from entities included
6587 * by PE References in the internal subset.
6589 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6590 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6591 xmlParseConditionalSections(ctxt);
6595 ctxt->instate = XML_PARSER_DTD;
6599 * xmlParseTextDecl:
6600 * @ctxt: an XML parser context
6602 * parse an XML declaration header for external entities
6604 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6607 void
6608 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6609 xmlChar *version;
6610 const xmlChar *encoding;
6613 * We know that '<?xml' is here.
6615 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6616 SKIP(5);
6617 } else {
6618 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6619 return;
6622 if (!IS_BLANK_CH(CUR)) {
6623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6624 "Space needed after '<?xml'\n");
6626 SKIP_BLANKS;
6629 * We may have the VersionInfo here.
6631 version = xmlParseVersionInfo(ctxt);
6632 if (version == NULL)
6633 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6634 else {
6635 if (!IS_BLANK_CH(CUR)) {
6636 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6637 "Space needed here\n");
6640 ctxt->input->version = version;
6643 * We must have the encoding declaration
6645 encoding = xmlParseEncodingDecl(ctxt);
6646 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6648 * The XML REC instructs us to stop parsing right here
6650 return;
6652 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6653 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6654 "Missing encoding in text declaration\n");
6657 SKIP_BLANKS;
6658 if ((RAW == '?') && (NXT(1) == '>')) {
6659 SKIP(2);
6660 } else if (RAW == '>') {
6661 /* Deprecated old WD ... */
6662 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6663 NEXT;
6664 } else {
6665 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6666 MOVETO_ENDTAG(CUR_PTR);
6667 NEXT;
6672 * xmlParseExternalSubset:
6673 * @ctxt: an XML parser context
6674 * @ExternalID: the external identifier
6675 * @SystemID: the system identifier (or URL)
6677 * parse Markup declarations from an external subset
6679 * [30] extSubset ::= textDecl? extSubsetDecl
6681 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6683 void
6684 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6685 const xmlChar *SystemID) {
6686 xmlDetectSAX2(ctxt);
6687 GROW;
6689 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6690 (ctxt->input->end - ctxt->input->cur >= 4)) {
6691 xmlChar start[4];
6692 xmlCharEncoding enc;
6694 start[0] = RAW;
6695 start[1] = NXT(1);
6696 start[2] = NXT(2);
6697 start[3] = NXT(3);
6698 enc = xmlDetectCharEncoding(start, 4);
6699 if (enc != XML_CHAR_ENCODING_NONE)
6700 xmlSwitchEncoding(ctxt, enc);
6703 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6704 xmlParseTextDecl(ctxt);
6705 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6707 * The XML REC instructs us to stop parsing right here
6709 ctxt->instate = XML_PARSER_EOF;
6710 return;
6713 if (ctxt->myDoc == NULL) {
6714 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6715 if (ctxt->myDoc == NULL) {
6716 xmlErrMemory(ctxt, "New Doc failed");
6717 return;
6719 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6721 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6722 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6724 ctxt->instate = XML_PARSER_DTD;
6725 ctxt->external = 1;
6726 while (((RAW == '<') && (NXT(1) == '?')) ||
6727 ((RAW == '<') && (NXT(1) == '!')) ||
6728 (RAW == '%') || IS_BLANK_CH(CUR)) {
6729 const xmlChar *check = CUR_PTR;
6730 unsigned int cons = ctxt->input->consumed;
6732 GROW;
6733 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6734 xmlParseConditionalSections(ctxt);
6735 } else if (IS_BLANK_CH(CUR)) {
6736 NEXT;
6737 } else if (RAW == '%') {
6738 xmlParsePEReference(ctxt);
6739 } else
6740 xmlParseMarkupDecl(ctxt);
6743 * Pop-up of finished entities.
6745 while ((RAW == 0) && (ctxt->inputNr > 1))
6746 xmlPopInput(ctxt);
6748 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6749 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6750 break;
6754 if (RAW != 0) {
6755 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6761 * xmlParseReference:
6762 * @ctxt: an XML parser context
6764 * parse and handle entity references in content, depending on the SAX
6765 * interface, this may end-up in a call to character() if this is a
6766 * CharRef, a predefined entity, if there is no reference() callback.
6767 * or if the parser was asked to switch to that mode.
6769 * [67] Reference ::= EntityRef | CharRef
6771 void
6772 xmlParseReference(xmlParserCtxtPtr ctxt) {
6773 xmlEntityPtr ent;
6774 xmlChar *val;
6775 int was_checked;
6776 xmlNodePtr list = NULL;
6777 xmlParserErrors ret = XML_ERR_OK;
6780 if (RAW != '&')
6781 return;
6784 * Simple case of a CharRef
6786 if (NXT(1) == '#') {
6787 int i = 0;
6788 xmlChar out[10];
6789 int hex = NXT(2);
6790 int value = xmlParseCharRef(ctxt);
6792 if (value == 0)
6793 return;
6794 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6796 * So we are using non-UTF-8 buffers
6797 * Check that the char fit on 8bits, if not
6798 * generate a CharRef.
6800 if (value <= 0xFF) {
6801 out[0] = value;
6802 out[1] = 0;
6803 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6804 (!ctxt->disableSAX))
6805 ctxt->sax->characters(ctxt->userData, out, 1);
6806 } else {
6807 if ((hex == 'x') || (hex == 'X'))
6808 snprintf((char *)out, sizeof(out), "#x%X", value);
6809 else
6810 snprintf((char *)out, sizeof(out), "#%d", value);
6811 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6812 (!ctxt->disableSAX))
6813 ctxt->sax->reference(ctxt->userData, out);
6815 } else {
6817 * Just encode the value in UTF-8
6819 COPY_BUF(0 ,out, i, value);
6820 out[i] = 0;
6821 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6822 (!ctxt->disableSAX))
6823 ctxt->sax->characters(ctxt->userData, out, i);
6825 return;
6829 * We are seeing an entity reference
6831 ent = xmlParseEntityRef(ctxt);
6832 if (ent == NULL) return;
6833 if (!ctxt->wellFormed)
6834 return;
6835 was_checked = ent->checked;
6837 /* special case of predefined entities */
6838 if ((ent->name == NULL) ||
6839 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6840 val = ent->content;
6841 if (val == NULL) return;
6843 * inline the entity.
6845 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6846 (!ctxt->disableSAX))
6847 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6848 return;
6852 * The first reference to the entity trigger a parsing phase
6853 * where the ent->children is filled with the result from
6854 * the parsing.
6856 if (ent->checked == 0) {
6857 unsigned long oldnbent = ctxt->nbentities;
6860 * This is a bit hackish but this seems the best
6861 * way to make sure both SAX and DOM entity support
6862 * behaves okay.
6864 void *user_data;
6865 if (ctxt->userData == ctxt)
6866 user_data = NULL;
6867 else
6868 user_data = ctxt->userData;
6871 * Check that this entity is well formed
6872 * 4.3.2: An internal general parsed entity is well-formed
6873 * if its replacement text matches the production labeled
6874 * content.
6876 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6877 ctxt->depth++;
6878 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6879 user_data, &list);
6880 ctxt->depth--;
6882 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6883 ctxt->depth++;
6884 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6885 user_data, ctxt->depth, ent->URI,
6886 ent->ExternalID, &list);
6887 ctxt->depth--;
6888 } else {
6889 ret = XML_ERR_ENTITY_PE_INTERNAL;
6890 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6891 "invalid entity type found\n", NULL);
6895 * Store the number of entities needing parsing for this entity
6896 * content and do checkings
6898 ent->checked = ctxt->nbentities - oldnbent;
6899 if (ret == XML_ERR_ENTITY_LOOP) {
6900 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6901 xmlFreeNodeList(list);
6902 return;
6904 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6905 xmlFreeNodeList(list);
6906 return;
6909 if ((ret == XML_ERR_OK) && (list != NULL)) {
6910 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6911 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6912 (ent->children == NULL)) {
6913 ent->children = list;
6914 if (ctxt->replaceEntities) {
6916 * Prune it directly in the generated document
6917 * except for single text nodes.
6919 if (((list->type == XML_TEXT_NODE) &&
6920 (list->next == NULL)) ||
6921 (ctxt->parseMode == XML_PARSE_READER)) {
6922 list->parent = (xmlNodePtr) ent;
6923 list = NULL;
6924 ent->owner = 1;
6925 } else {
6926 ent->owner = 0;
6927 while (list != NULL) {
6928 list->parent = (xmlNodePtr) ctxt->node;
6929 list->doc = ctxt->myDoc;
6930 if (list->next == NULL)
6931 ent->last = list;
6932 list = list->next;
6934 list = ent->children;
6935 #ifdef LIBXML_LEGACY_ENABLED
6936 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6937 xmlAddEntityReference(ent, list, NULL);
6938 #endif /* LIBXML_LEGACY_ENABLED */
6940 } else {
6941 ent->owner = 1;
6942 while (list != NULL) {
6943 list->parent = (xmlNodePtr) ent;
6944 if (list->next == NULL)
6945 ent->last = list;
6946 list = list->next;
6949 } else {
6950 xmlFreeNodeList(list);
6951 list = NULL;
6953 } else if ((ret != XML_ERR_OK) &&
6954 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6956 "Entity '%s' failed to parse\n", ent->name);
6957 } else if (list != NULL) {
6958 xmlFreeNodeList(list);
6959 list = NULL;
6961 if (ent->checked == 0)
6962 ent->checked = 1;
6963 } else if (ent->checked != 1) {
6964 ctxt->nbentities += ent->checked;
6968 * Now that the entity content has been gathered
6969 * provide it to the application, this can take different forms based
6970 * on the parsing modes.
6972 if (ent->children == NULL) {
6974 * Probably running in SAX mode and the callbacks don't
6975 * build the entity content. So unless we already went
6976 * though parsing for first checking go though the entity
6977 * content to generate callbacks associated to the entity
6979 if (was_checked != 0) {
6980 void *user_data;
6982 * This is a bit hackish but this seems the best
6983 * way to make sure both SAX and DOM entity support
6984 * behaves okay.
6986 if (ctxt->userData == ctxt)
6987 user_data = NULL;
6988 else
6989 user_data = ctxt->userData;
6991 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6992 ctxt->depth++;
6993 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6994 ent->content, user_data, NULL);
6995 ctxt->depth--;
6996 } else if (ent->etype ==
6997 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6998 ctxt->depth++;
6999 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7000 ctxt->sax, user_data, ctxt->depth,
7001 ent->URI, ent->ExternalID, NULL);
7002 ctxt->depth--;
7003 } else {
7004 ret = XML_ERR_ENTITY_PE_INTERNAL;
7005 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7006 "invalid entity type found\n", NULL);
7008 if (ret == XML_ERR_ENTITY_LOOP) {
7009 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7010 return;
7013 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7014 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7016 * Entity reference callback comes second, it's somewhat
7017 * superfluous but a compatibility to historical behaviour
7019 ctxt->sax->reference(ctxt->userData, ent->name);
7021 return;
7025 * If we didn't get any children for the entity being built
7027 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7028 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7030 * Create a node.
7032 ctxt->sax->reference(ctxt->userData, ent->name);
7033 return;
7036 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7038 * There is a problem on the handling of _private for entities
7039 * (bug 155816): Should we copy the content of the field from
7040 * the entity (possibly overwriting some value set by the user
7041 * when a copy is created), should we leave it alone, or should
7042 * we try to take care of different situations? The problem
7043 * is exacerbated by the usage of this field by the xmlReader.
7044 * To fix this bug, we look at _private on the created node
7045 * and, if it's NULL, we copy in whatever was in the entity.
7046 * If it's not NULL we leave it alone. This is somewhat of a
7047 * hack - maybe we should have further tests to determine
7048 * what to do.
7050 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7052 * Seems we are generating the DOM content, do
7053 * a simple tree copy for all references except the first
7054 * In the first occurrence list contains the replacement.
7055 * progressive == 2 means we are operating on the Reader
7056 * and since nodes are discarded we must copy all the time.
7058 if (((list == NULL) && (ent->owner == 0)) ||
7059 (ctxt->parseMode == XML_PARSE_READER)) {
7060 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7063 * when operating on a reader, the entities definitions
7064 * are always owning the entities subtree.
7065 if (ctxt->parseMode == XML_PARSE_READER)
7066 ent->owner = 1;
7069 cur = ent->children;
7070 while (cur != NULL) {
7071 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7072 if (nw != NULL) {
7073 if (nw->_private == NULL)
7074 nw->_private = cur->_private;
7075 if (firstChild == NULL){
7076 firstChild = nw;
7078 nw = xmlAddChild(ctxt->node, nw);
7080 if (cur == ent->last) {
7082 * needed to detect some strange empty
7083 * node cases in the reader tests
7085 if ((ctxt->parseMode == XML_PARSE_READER) &&
7086 (nw != NULL) &&
7087 (nw->type == XML_ELEMENT_NODE) &&
7088 (nw->children == NULL))
7089 nw->extra = 1;
7091 break;
7093 cur = cur->next;
7095 #ifdef LIBXML_LEGACY_ENABLED
7096 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7097 xmlAddEntityReference(ent, firstChild, nw);
7098 #endif /* LIBXML_LEGACY_ENABLED */
7099 } else if (list == NULL) {
7100 xmlNodePtr nw = NULL, cur, next, last,
7101 firstChild = NULL;
7103 * Copy the entity child list and make it the new
7104 * entity child list. The goal is to make sure any
7105 * ID or REF referenced will be the one from the
7106 * document content and not the entity copy.
7108 cur = ent->children;
7109 ent->children = NULL;
7110 last = ent->last;
7111 ent->last = NULL;
7112 while (cur != NULL) {
7113 next = cur->next;
7114 cur->next = NULL;
7115 cur->parent = NULL;
7116 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7117 if (nw != NULL) {
7118 if (nw->_private == NULL)
7119 nw->_private = cur->_private;
7120 if (firstChild == NULL){
7121 firstChild = cur;
7123 xmlAddChild((xmlNodePtr) ent, nw);
7124 xmlAddChild(ctxt->node, cur);
7126 if (cur == last)
7127 break;
7128 cur = next;
7130 if (ent->owner == 0)
7131 ent->owner = 1;
7132 #ifdef LIBXML_LEGACY_ENABLED
7133 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7134 xmlAddEntityReference(ent, firstChild, nw);
7135 #endif /* LIBXML_LEGACY_ENABLED */
7136 } else {
7137 const xmlChar *nbktext;
7140 * the name change is to avoid coalescing of the
7141 * node with a possible previous text one which
7142 * would make ent->children a dangling pointer
7144 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7145 -1);
7146 if (ent->children->type == XML_TEXT_NODE)
7147 ent->children->name = nbktext;
7148 if ((ent->last != ent->children) &&
7149 (ent->last->type == XML_TEXT_NODE))
7150 ent->last->name = nbktext;
7151 xmlAddChildList(ctxt->node, ent->children);
7155 * This is to avoid a nasty side effect, see
7156 * characters() in SAX.c
7158 ctxt->nodemem = 0;
7159 ctxt->nodelen = 0;
7160 return;
7166 * xmlParseEntityRef:
7167 * @ctxt: an XML parser context
7169 * parse ENTITY references declarations
7171 * [68] EntityRef ::= '&' Name ';'
7173 * [ WFC: Entity Declared ]
7174 * In a document without any DTD, a document with only an internal DTD
7175 * subset which contains no parameter entity references, or a document
7176 * with "standalone='yes'", the Name given in the entity reference
7177 * must match that in an entity declaration, except that well-formed
7178 * documents need not declare any of the following entities: amp, lt,
7179 * gt, apos, quot. The declaration of a parameter entity must precede
7180 * any reference to it. Similarly, the declaration of a general entity
7181 * must precede any reference to it which appears in a default value in an
7182 * attribute-list declaration. Note that if entities are declared in the
7183 * external subset or in external parameter entities, a non-validating
7184 * processor is not obligated to read and process their declarations;
7185 * for such documents, the rule that an entity must be declared is a
7186 * well-formedness constraint only if standalone='yes'.
7188 * [ WFC: Parsed Entity ]
7189 * An entity reference must not contain the name of an unparsed entity
7191 * Returns the xmlEntityPtr if found, or NULL otherwise.
7193 xmlEntityPtr
7194 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7195 const xmlChar *name;
7196 xmlEntityPtr ent = NULL;
7198 GROW;
7199 if (ctxt->instate == XML_PARSER_EOF)
7200 return(NULL);
7202 if (RAW != '&')
7203 return(NULL);
7204 NEXT;
7205 name = xmlParseName(ctxt);
7206 if (name == NULL) {
7207 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7208 "xmlParseEntityRef: no name\n");
7209 return(NULL);
7211 if (RAW != ';') {
7212 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7213 return(NULL);
7215 NEXT;
7218 * Predefined entites override any extra definition
7220 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7221 ent = xmlGetPredefinedEntity(name);
7222 if (ent != NULL)
7223 return(ent);
7227 * Increate the number of entity references parsed
7229 ctxt->nbentities++;
7232 * Ask first SAX for entity resolution, otherwise try the
7233 * entities which may have stored in the parser context.
7235 if (ctxt->sax != NULL) {
7236 if (ctxt->sax->getEntity != NULL)
7237 ent = ctxt->sax->getEntity(ctxt->userData, name);
7238 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7239 (ctxt->options & XML_PARSE_OLDSAX))
7240 ent = xmlGetPredefinedEntity(name);
7241 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7242 (ctxt->userData==ctxt)) {
7243 ent = xmlSAX2GetEntity(ctxt, name);
7246 if (ctxt->instate == XML_PARSER_EOF)
7247 return(NULL);
7249 * [ WFC: Entity Declared ]
7250 * In a document without any DTD, a document with only an
7251 * internal DTD subset which contains no parameter entity
7252 * references, or a document with "standalone='yes'", the
7253 * Name given in the entity reference must match that in an
7254 * entity declaration, except that well-formed documents
7255 * need not declare any of the following entities: amp, lt,
7256 * gt, apos, quot.
7257 * The declaration of a parameter entity must precede any
7258 * reference to it.
7259 * Similarly, the declaration of a general entity must
7260 * precede any reference to it which appears in a default
7261 * value in an attribute-list declaration. Note that if
7262 * entities are declared in the external subset or in
7263 * external parameter entities, a non-validating processor
7264 * is not obligated to read and process their declarations;
7265 * for such documents, the rule that an entity must be
7266 * declared is a well-formedness constraint only if
7267 * standalone='yes'.
7269 if (ent == NULL) {
7270 if ((ctxt->standalone == 1) ||
7271 ((ctxt->hasExternalSubset == 0) &&
7272 (ctxt->hasPErefs == 0))) {
7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 "Entity '%s' not defined\n", name);
7275 } else {
7276 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7277 "Entity '%s' not defined\n", name);
7278 if ((ctxt->inSubset == 0) &&
7279 (ctxt->sax != NULL) &&
7280 (ctxt->sax->reference != NULL)) {
7281 ctxt->sax->reference(ctxt->userData, name);
7284 ctxt->valid = 0;
7288 * [ WFC: Parsed Entity ]
7289 * An entity reference must not contain the name of an
7290 * unparsed entity
7292 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7293 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7294 "Entity reference to unparsed entity %s\n", name);
7298 * [ WFC: No External Entity References ]
7299 * Attribute values cannot contain direct or indirect
7300 * entity references to external entities.
7302 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7303 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7304 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7305 "Attribute references external entity '%s'\n", name);
7308 * [ WFC: No < in Attribute Values ]
7309 * The replacement text of any entity referred to directly or
7310 * indirectly in an attribute value (other than "&lt;") must
7311 * not contain a <.
7313 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7314 (ent != NULL) && (ent->content != NULL) &&
7315 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7316 (xmlStrchr(ent->content, '<'))) {
7317 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7318 "'<' in entity '%s' is not allowed in attributes values\n", name);
7322 * Internal check, no parameter entities here ...
7324 else {
7325 switch (ent->etype) {
7326 case XML_INTERNAL_PARAMETER_ENTITY:
7327 case XML_EXTERNAL_PARAMETER_ENTITY:
7328 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7329 "Attempt to reference the parameter entity '%s'\n",
7330 name);
7331 break;
7332 default:
7333 break;
7338 * [ WFC: No Recursion ]
7339 * A parsed entity must not contain a recursive reference
7340 * to itself, either directly or indirectly.
7341 * Done somewhere else
7343 return(ent);
7347 * xmlParseStringEntityRef:
7348 * @ctxt: an XML parser context
7349 * @str: a pointer to an index in the string
7351 * parse ENTITY references declarations, but this version parses it from
7352 * a string value.
7354 * [68] EntityRef ::= '&' Name ';'
7356 * [ WFC: Entity Declared ]
7357 * In a document without any DTD, a document with only an internal DTD
7358 * subset which contains no parameter entity references, or a document
7359 * with "standalone='yes'", the Name given in the entity reference
7360 * must match that in an entity declaration, except that well-formed
7361 * documents need not declare any of the following entities: amp, lt,
7362 * gt, apos, quot. The declaration of a parameter entity must precede
7363 * any reference to it. Similarly, the declaration of a general entity
7364 * must precede any reference to it which appears in a default value in an
7365 * attribute-list declaration. Note that if entities are declared in the
7366 * external subset or in external parameter entities, a non-validating
7367 * processor is not obligated to read and process their declarations;
7368 * for such documents, the rule that an entity must be declared is a
7369 * well-formedness constraint only if standalone='yes'.
7371 * [ WFC: Parsed Entity ]
7372 * An entity reference must not contain the name of an unparsed entity
7374 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7375 * is updated to the current location in the string.
7377 static xmlEntityPtr
7378 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7379 xmlChar *name;
7380 const xmlChar *ptr;
7381 xmlChar cur;
7382 xmlEntityPtr ent = NULL;
7384 if ((str == NULL) || (*str == NULL))
7385 return(NULL);
7386 ptr = *str;
7387 cur = *ptr;
7388 if (cur != '&')
7389 return(NULL);
7391 ptr++;
7392 name = xmlParseStringName(ctxt, &ptr);
7393 if (name == NULL) {
7394 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7395 "xmlParseStringEntityRef: no name\n");
7396 *str = ptr;
7397 return(NULL);
7399 if (*ptr != ';') {
7400 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7401 xmlFree(name);
7402 *str = ptr;
7403 return(NULL);
7405 ptr++;
7409 * Predefined entites override any extra definition
7411 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7412 ent = xmlGetPredefinedEntity(name);
7413 if (ent != NULL) {
7414 xmlFree(name);
7415 *str = ptr;
7416 return(ent);
7421 * Increate the number of entity references parsed
7423 ctxt->nbentities++;
7426 * Ask first SAX for entity resolution, otherwise try the
7427 * entities which may have stored in the parser context.
7429 if (ctxt->sax != NULL) {
7430 if (ctxt->sax->getEntity != NULL)
7431 ent = ctxt->sax->getEntity(ctxt->userData, name);
7432 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7433 ent = xmlGetPredefinedEntity(name);
7434 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7435 ent = xmlSAX2GetEntity(ctxt, name);
7438 if (ctxt->instate == XML_PARSER_EOF) {
7439 xmlFree(name);
7440 return(NULL);
7444 * [ WFC: Entity Declared ]
7445 * In a document without any DTD, a document with only an
7446 * internal DTD subset which contains no parameter entity
7447 * references, or a document with "standalone='yes'", the
7448 * Name given in the entity reference must match that in an
7449 * entity declaration, except that well-formed documents
7450 * need not declare any of the following entities: amp, lt,
7451 * gt, apos, quot.
7452 * The declaration of a parameter entity must precede any
7453 * reference to it.
7454 * Similarly, the declaration of a general entity must
7455 * precede any reference to it which appears in a default
7456 * value in an attribute-list declaration. Note that if
7457 * entities are declared in the external subset or in
7458 * external parameter entities, a non-validating processor
7459 * is not obligated to read and process their declarations;
7460 * for such documents, the rule that an entity must be
7461 * declared is a well-formedness constraint only if
7462 * standalone='yes'.
7464 if (ent == NULL) {
7465 if ((ctxt->standalone == 1) ||
7466 ((ctxt->hasExternalSubset == 0) &&
7467 (ctxt->hasPErefs == 0))) {
7468 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7469 "Entity '%s' not defined\n", name);
7470 } else {
7471 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7472 "Entity '%s' not defined\n",
7473 name);
7475 /* TODO ? check regressions ctxt->valid = 0; */
7479 * [ WFC: Parsed Entity ]
7480 * An entity reference must not contain the name of an
7481 * unparsed entity
7483 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7484 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7485 "Entity reference to unparsed entity %s\n", name);
7489 * [ WFC: No External Entity References ]
7490 * Attribute values cannot contain direct or indirect
7491 * entity references to external entities.
7493 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7494 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7495 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7496 "Attribute references external entity '%s'\n", name);
7499 * [ WFC: No < in Attribute Values ]
7500 * The replacement text of any entity referred to directly or
7501 * indirectly in an attribute value (other than "&lt;") must
7502 * not contain a <.
7504 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7505 (ent != NULL) && (ent->content != NULL) &&
7506 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7507 (xmlStrchr(ent->content, '<'))) {
7508 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7509 "'<' in entity '%s' is not allowed in attributes values\n",
7510 name);
7514 * Internal check, no parameter entities here ...
7516 else {
7517 switch (ent->etype) {
7518 case XML_INTERNAL_PARAMETER_ENTITY:
7519 case XML_EXTERNAL_PARAMETER_ENTITY:
7520 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7521 "Attempt to reference the parameter entity '%s'\n",
7522 name);
7523 break;
7524 default:
7525 break;
7530 * [ WFC: No Recursion ]
7531 * A parsed entity must not contain a recursive reference
7532 * to itself, either directly or indirectly.
7533 * Done somewhere else
7536 xmlFree(name);
7537 *str = ptr;
7538 return(ent);
7542 * xmlParsePEReference:
7543 * @ctxt: an XML parser context
7545 * parse PEReference declarations
7546 * The entity content is handled directly by pushing it's content as
7547 * a new input stream.
7549 * [69] PEReference ::= '%' Name ';'
7551 * [ WFC: No Recursion ]
7552 * A parsed entity must not contain a recursive
7553 * reference to itself, either directly or indirectly.
7555 * [ WFC: Entity Declared ]
7556 * In a document without any DTD, a document with only an internal DTD
7557 * subset which contains no parameter entity references, or a document
7558 * with "standalone='yes'", ... ... The declaration of a parameter
7559 * entity must precede any reference to it...
7561 * [ VC: Entity Declared ]
7562 * In a document with an external subset or external parameter entities
7563 * with "standalone='no'", ... ... The declaration of a parameter entity
7564 * must precede any reference to it...
7566 * [ WFC: In DTD ]
7567 * Parameter-entity references may only appear in the DTD.
7568 * NOTE: misleading but this is handled.
7570 void
7571 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7573 const xmlChar *name;
7574 xmlEntityPtr entity = NULL;
7575 xmlParserInputPtr input;
7577 if (RAW != '%')
7578 return;
7579 NEXT;
7580 name = xmlParseName(ctxt);
7581 if (name == NULL) {
7582 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7583 "xmlParsePEReference: no name\n");
7584 return;
7586 if (RAW != ';') {
7587 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7588 return;
7591 NEXT;
7594 * Increate the number of entity references parsed
7596 ctxt->nbentities++;
7599 * Request the entity from SAX
7601 if ((ctxt->sax != NULL) &&
7602 (ctxt->sax->getParameterEntity != NULL))
7603 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7604 if (ctxt->instate == XML_PARSER_EOF)
7605 return;
7606 if (entity == NULL) {
7608 * [ WFC: Entity Declared ]
7609 * In a document without any DTD, a document with only an
7610 * internal DTD subset which contains no parameter entity
7611 * references, or a document with "standalone='yes'", ...
7612 * ... The declaration of a parameter entity must precede
7613 * any reference to it...
7615 if ((ctxt->standalone == 1) ||
7616 ((ctxt->hasExternalSubset == 0) &&
7617 (ctxt->hasPErefs == 0))) {
7618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619 "PEReference: %%%s; not found\n",
7620 name);
7621 } else {
7623 * [ VC: Entity Declared ]
7624 * In a document with an external subset or external
7625 * parameter entities with "standalone='no'", ...
7626 * ... The declaration of a parameter entity must
7627 * precede any reference to it...
7629 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7630 "PEReference: %%%s; not found\n",
7631 name, NULL);
7632 ctxt->valid = 0;
7634 } else {
7636 * Internal checking in case the entity quest barfed
7638 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7639 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7640 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7641 "Internal: %%%s; is not a parameter entity\n",
7642 name, NULL);
7643 } else if (ctxt->input->free != deallocblankswrapper) {
7644 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7645 if (xmlPushInput(ctxt, input) < 0)
7646 return;
7647 } else {
7649 * TODO !!!
7650 * handle the extra spaces added before and after
7651 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7653 input = xmlNewEntityInputStream(ctxt, entity);
7654 if (xmlPushInput(ctxt, input) < 0)
7655 return;
7656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7657 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7658 (IS_BLANK_CH(NXT(5)))) {
7659 xmlParseTextDecl(ctxt);
7660 if (ctxt->errNo ==
7661 XML_ERR_UNSUPPORTED_ENCODING) {
7663 * The XML REC instructs us to stop parsing
7664 * right here
7666 ctxt->instate = XML_PARSER_EOF;
7667 return;
7672 ctxt->hasPErefs = 1;
7676 * xmlLoadEntityContent:
7677 * @ctxt: an XML parser context
7678 * @entity: an unloaded system entity
7680 * Load the original content of the given system entity from the
7681 * ExternalID/SystemID given. This is to be used for Included in Literal
7682 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7684 * Returns 0 in case of success and -1 in case of failure
7686 static int
7687 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7688 xmlParserInputPtr input;
7689 xmlBufferPtr buf;
7690 int l, c;
7691 int count = 0;
7693 if ((ctxt == NULL) || (entity == NULL) ||
7694 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7695 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7696 (entity->content != NULL)) {
7697 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7698 "xmlLoadEntityContent parameter error");
7699 return(-1);
7702 if (xmlParserDebugEntities)
7703 xmlGenericError(xmlGenericErrorContext,
7704 "Reading %s entity content input\n", entity->name);
7706 buf = xmlBufferCreate();
7707 if (buf == NULL) {
7708 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7709 "xmlLoadEntityContent parameter error");
7710 return(-1);
7713 input = xmlNewEntityInputStream(ctxt, entity);
7714 if (input == NULL) {
7715 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7716 "xmlLoadEntityContent input error");
7717 xmlBufferFree(buf);
7718 return(-1);
7722 * Push the entity as the current input, read char by char
7723 * saving to the buffer until the end of the entity or an error
7725 if (xmlPushInput(ctxt, input) < 0) {
7726 xmlBufferFree(buf);
7727 return(-1);
7730 GROW;
7731 c = CUR_CHAR(l);
7732 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7733 (IS_CHAR(c))) {
7734 xmlBufferAdd(buf, ctxt->input->cur, l);
7735 if (count++ > 100) {
7736 count = 0;
7737 GROW;
7738 if (ctxt->instate == XML_PARSER_EOF) {
7739 xmlBufferFree(buf);
7740 return(-1);
7743 NEXTL(l);
7744 c = CUR_CHAR(l);
7747 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7748 xmlPopInput(ctxt);
7749 } else if (!IS_CHAR(c)) {
7750 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7751 "xmlLoadEntityContent: invalid char value %d\n",
7753 xmlBufferFree(buf);
7754 return(-1);
7756 entity->content = buf->content;
7757 buf->content = NULL;
7758 xmlBufferFree(buf);
7760 return(0);
7764 * xmlParseStringPEReference:
7765 * @ctxt: an XML parser context
7766 * @str: a pointer to an index in the string
7768 * parse PEReference declarations
7770 * [69] PEReference ::= '%' Name ';'
7772 * [ WFC: No Recursion ]
7773 * A parsed entity must not contain a recursive
7774 * reference to itself, either directly or indirectly.
7776 * [ WFC: Entity Declared ]
7777 * In a document without any DTD, a document with only an internal DTD
7778 * subset which contains no parameter entity references, or a document
7779 * with "standalone='yes'", ... ... The declaration of a parameter
7780 * entity must precede any reference to it...
7782 * [ VC: Entity Declared ]
7783 * In a document with an external subset or external parameter entities
7784 * with "standalone='no'", ... ... The declaration of a parameter entity
7785 * must precede any reference to it...
7787 * [ WFC: In DTD ]
7788 * Parameter-entity references may only appear in the DTD.
7789 * NOTE: misleading but this is handled.
7791 * Returns the string of the entity content.
7792 * str is updated to the current value of the index
7794 static xmlEntityPtr
7795 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7796 const xmlChar *ptr;
7797 xmlChar cur;
7798 xmlChar *name;
7799 xmlEntityPtr entity = NULL;
7801 if ((str == NULL) || (*str == NULL)) return(NULL);
7802 ptr = *str;
7803 cur = *ptr;
7804 if (cur != '%')
7805 return(NULL);
7806 ptr++;
7807 name = xmlParseStringName(ctxt, &ptr);
7808 if (name == NULL) {
7809 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7810 "xmlParseStringPEReference: no name\n");
7811 *str = ptr;
7812 return(NULL);
7814 cur = *ptr;
7815 if (cur != ';') {
7816 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7817 xmlFree(name);
7818 *str = ptr;
7819 return(NULL);
7821 ptr++;
7824 * Increate the number of entity references parsed
7826 ctxt->nbentities++;
7829 * Request the entity from SAX
7831 if ((ctxt->sax != NULL) &&
7832 (ctxt->sax->getParameterEntity != NULL))
7833 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7834 if (ctxt->instate == XML_PARSER_EOF) {
7835 xmlFree(name);
7836 return(NULL);
7838 if (entity == NULL) {
7840 * [ WFC: Entity Declared ]
7841 * In a document without any DTD, a document with only an
7842 * internal DTD subset which contains no parameter entity
7843 * references, or a document with "standalone='yes'", ...
7844 * ... The declaration of a parameter entity must precede
7845 * any reference to it...
7847 if ((ctxt->standalone == 1) ||
7848 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7849 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7850 "PEReference: %%%s; not found\n", name);
7851 } else {
7853 * [ VC: Entity Declared ]
7854 * In a document with an external subset or external
7855 * parameter entities with "standalone='no'", ...
7856 * ... The declaration of a parameter entity must
7857 * precede any reference to it...
7859 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7860 "PEReference: %%%s; not found\n",
7861 name, NULL);
7862 ctxt->valid = 0;
7864 } else {
7866 * Internal checking in case the entity quest barfed
7868 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7869 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7870 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7871 "%%%s; is not a parameter entity\n",
7872 name, NULL);
7875 ctxt->hasPErefs = 1;
7876 xmlFree(name);
7877 *str = ptr;
7878 return(entity);
7882 * xmlParseDocTypeDecl:
7883 * @ctxt: an XML parser context
7885 * parse a DOCTYPE declaration
7887 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7888 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7890 * [ VC: Root Element Type ]
7891 * The Name in the document type declaration must match the element
7892 * type of the root element.
7895 void
7896 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7897 const xmlChar *name = NULL;
7898 xmlChar *ExternalID = NULL;
7899 xmlChar *URI = NULL;
7902 * We know that '<!DOCTYPE' has been detected.
7904 SKIP(9);
7906 SKIP_BLANKS;
7909 * Parse the DOCTYPE name.
7911 name = xmlParseName(ctxt);
7912 if (name == NULL) {
7913 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7914 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7916 ctxt->intSubName = name;
7918 SKIP_BLANKS;
7921 * Check for SystemID and ExternalID
7923 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7925 if ((URI != NULL) || (ExternalID != NULL)) {
7926 ctxt->hasExternalSubset = 1;
7928 ctxt->extSubURI = URI;
7929 ctxt->extSubSystem = ExternalID;
7931 SKIP_BLANKS;
7934 * Create and update the internal subset.
7936 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7937 (!ctxt->disableSAX))
7938 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7939 if (ctxt->instate == XML_PARSER_EOF)
7940 return;
7943 * Is there any internal subset declarations ?
7944 * they are handled separately in xmlParseInternalSubset()
7946 if (RAW == '[')
7947 return;
7950 * We should be at the end of the DOCTYPE declaration.
7952 if (RAW != '>') {
7953 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7955 NEXT;
7959 * xmlParseInternalSubset:
7960 * @ctxt: an XML parser context
7962 * parse the internal subset declaration
7964 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7967 static void
7968 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7970 * Is there any DTD definition ?
7972 if (RAW == '[') {
7973 ctxt->instate = XML_PARSER_DTD;
7974 NEXT;
7976 * Parse the succession of Markup declarations and
7977 * PEReferences.
7978 * Subsequence (markupdecl | PEReference | S)*
7980 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
7981 const xmlChar *check = CUR_PTR;
7982 unsigned int cons = ctxt->input->consumed;
7984 SKIP_BLANKS;
7985 xmlParseMarkupDecl(ctxt);
7986 xmlParsePEReference(ctxt);
7989 * Pop-up of finished entities.
7991 while ((RAW == 0) && (ctxt->inputNr > 1))
7992 xmlPopInput(ctxt);
7994 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 "xmlParseInternalSubset: error detected in Markup declaration\n");
7997 break;
8000 if (RAW == ']') {
8001 NEXT;
8002 SKIP_BLANKS;
8007 * We should be at the end of the DOCTYPE declaration.
8009 if (RAW != '>') {
8010 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8012 NEXT;
8015 #ifdef LIBXML_SAX1_ENABLED
8017 * xmlParseAttribute:
8018 * @ctxt: an XML parser context
8019 * @value: a xmlChar ** used to store the value of the attribute
8021 * parse an attribute
8023 * [41] Attribute ::= Name Eq AttValue
8025 * [ WFC: No External Entity References ]
8026 * Attribute values cannot contain direct or indirect entity references
8027 * to external entities.
8029 * [ WFC: No < in Attribute Values ]
8030 * The replacement text of any entity referred to directly or indirectly in
8031 * an attribute value (other than "&lt;") must not contain a <.
8033 * [ VC: Attribute Value Type ]
8034 * The attribute must have been declared; the value must be of the type
8035 * declared for it.
8037 * [25] Eq ::= S? '=' S?
8039 * With namespace:
8041 * [NS 11] Attribute ::= QName Eq AttValue
8043 * Also the case QName == xmlns:??? is handled independently as a namespace
8044 * definition.
8046 * Returns the attribute name, and the value in *value.
8049 const xmlChar *
8050 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8051 const xmlChar *name;
8052 xmlChar *val;
8054 *value = NULL;
8055 GROW;
8056 name = xmlParseName(ctxt);
8057 if (name == NULL) {
8058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8059 "error parsing attribute name\n");
8060 return(NULL);
8064 * read the value
8066 SKIP_BLANKS;
8067 if (RAW == '=') {
8068 NEXT;
8069 SKIP_BLANKS;
8070 val = xmlParseAttValue(ctxt);
8071 ctxt->instate = XML_PARSER_CONTENT;
8072 } else {
8073 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8074 "Specification mandate value for attribute %s\n", name);
8075 return(NULL);
8079 * Check that xml:lang conforms to the specification
8080 * No more registered as an error, just generate a warning now
8081 * since this was deprecated in XML second edition
8083 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8084 if (!xmlCheckLanguageID(val)) {
8085 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8086 "Malformed value for xml:lang : %s\n",
8087 val, NULL);
8092 * Check that xml:space conforms to the specification
8094 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8095 if (xmlStrEqual(val, BAD_CAST "default"))
8096 *(ctxt->space) = 0;
8097 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8098 *(ctxt->space) = 1;
8099 else {
8100 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8101 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8102 val, NULL);
8106 *value = val;
8107 return(name);
8111 * xmlParseStartTag:
8112 * @ctxt: an XML parser context
8114 * parse a start of tag either for rule element or
8115 * EmptyElement. In both case we don't parse the tag closing chars.
8117 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8119 * [ WFC: Unique Att Spec ]
8120 * No attribute name may appear more than once in the same start-tag or
8121 * empty-element tag.
8123 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8125 * [ WFC: Unique Att Spec ]
8126 * No attribute name may appear more than once in the same start-tag or
8127 * empty-element tag.
8129 * With namespace:
8131 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8133 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8135 * Returns the element name parsed
8138 const xmlChar *
8139 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8140 const xmlChar *name;
8141 const xmlChar *attname;
8142 xmlChar *attvalue;
8143 const xmlChar **atts = ctxt->atts;
8144 int nbatts = 0;
8145 int maxatts = ctxt->maxatts;
8146 int i;
8148 if (RAW != '<') return(NULL);
8149 NEXT1;
8151 name = xmlParseName(ctxt);
8152 if (name == NULL) {
8153 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8154 "xmlParseStartTag: invalid element name\n");
8155 return(NULL);
8159 * Now parse the attributes, it ends up with the ending
8161 * (S Attribute)* S?
8163 SKIP_BLANKS;
8164 GROW;
8166 while (((RAW != '>') &&
8167 ((RAW != '/') || (NXT(1) != '>')) &&
8168 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8169 const xmlChar *q = CUR_PTR;
8170 unsigned int cons = ctxt->input->consumed;
8172 attname = xmlParseAttribute(ctxt, &attvalue);
8173 if ((attname != NULL) && (attvalue != NULL)) {
8175 * [ WFC: Unique Att Spec ]
8176 * No attribute name may appear more than once in the same
8177 * start-tag or empty-element tag.
8179 for (i = 0; i < nbatts;i += 2) {
8180 if (xmlStrEqual(atts[i], attname)) {
8181 xmlErrAttributeDup(ctxt, NULL, attname);
8182 xmlFree(attvalue);
8183 goto failed;
8187 * Add the pair to atts
8189 if (atts == NULL) {
8190 maxatts = 22; /* allow for 10 attrs by default */
8191 atts = (const xmlChar **)
8192 xmlMalloc(maxatts * sizeof(xmlChar *));
8193 if (atts == NULL) {
8194 xmlErrMemory(ctxt, NULL);
8195 if (attvalue != NULL)
8196 xmlFree(attvalue);
8197 goto failed;
8199 ctxt->atts = atts;
8200 ctxt->maxatts = maxatts;
8201 } else if (nbatts + 4 > maxatts) {
8202 const xmlChar **n;
8204 maxatts *= 2;
8205 n = (const xmlChar **) xmlRealloc((void *) atts,
8206 maxatts * sizeof(const xmlChar *));
8207 if (n == NULL) {
8208 xmlErrMemory(ctxt, NULL);
8209 if (attvalue != NULL)
8210 xmlFree(attvalue);
8211 goto failed;
8213 atts = n;
8214 ctxt->atts = atts;
8215 ctxt->maxatts = maxatts;
8217 atts[nbatts++] = attname;
8218 atts[nbatts++] = attvalue;
8219 atts[nbatts] = NULL;
8220 atts[nbatts + 1] = NULL;
8221 } else {
8222 if (attvalue != NULL)
8223 xmlFree(attvalue);
8226 failed:
8228 GROW
8229 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8230 break;
8231 if (!IS_BLANK_CH(RAW)) {
8232 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8233 "attributes construct error\n");
8235 SKIP_BLANKS;
8236 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8237 (attname == NULL) && (attvalue == NULL)) {
8238 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8239 "xmlParseStartTag: problem parsing attributes\n");
8240 break;
8242 SHRINK;
8243 GROW;
8247 * SAX: Start of Element !
8249 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8250 (!ctxt->disableSAX)) {
8251 if (nbatts > 0)
8252 ctxt->sax->startElement(ctxt->userData, name, atts);
8253 else
8254 ctxt->sax->startElement(ctxt->userData, name, NULL);
8257 if (atts != NULL) {
8258 /* Free only the content strings */
8259 for (i = 1;i < nbatts;i+=2)
8260 if (atts[i] != NULL)
8261 xmlFree((xmlChar *) atts[i]);
8263 return(name);
8267 * xmlParseEndTag1:
8268 * @ctxt: an XML parser context
8269 * @line: line of the start tag
8270 * @nsNr: number of namespaces on the start tag
8272 * parse an end of tag
8274 * [42] ETag ::= '</' Name S? '>'
8276 * With namespace
8278 * [NS 9] ETag ::= '</' QName S? '>'
8281 static void
8282 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8283 const xmlChar *name;
8285 GROW;
8286 if ((RAW != '<') || (NXT(1) != '/')) {
8287 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8288 "xmlParseEndTag: '</' not found\n");
8289 return;
8291 SKIP(2);
8293 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8296 * We should definitely be at the ending "S? '>'" part
8298 GROW;
8299 SKIP_BLANKS;
8300 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8301 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8302 } else
8303 NEXT1;
8306 * [ WFC: Element Type Match ]
8307 * The Name in an element's end-tag must match the element type in the
8308 * start-tag.
8311 if (name != (xmlChar*)1) {
8312 if (name == NULL) name = BAD_CAST "unparseable";
8313 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8314 "Opening and ending tag mismatch: %s line %d and %s\n",
8315 ctxt->name, line, name);
8319 * SAX: End of Tag
8321 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8322 (!ctxt->disableSAX))
8323 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8325 namePop(ctxt);
8326 spacePop(ctxt);
8327 return;
8331 * xmlParseEndTag:
8332 * @ctxt: an XML parser context
8334 * parse an end of tag
8336 * [42] ETag ::= '</' Name S? '>'
8338 * With namespace
8340 * [NS 9] ETag ::= '</' QName S? '>'
8343 void
8344 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8345 xmlParseEndTag1(ctxt, 0);
8347 #endif /* LIBXML_SAX1_ENABLED */
8349 /************************************************************************
8351 * SAX 2 specific operations *
8353 ************************************************************************/
8356 * xmlGetNamespace:
8357 * @ctxt: an XML parser context
8358 * @prefix: the prefix to lookup
8360 * Lookup the namespace name for the @prefix (which ca be NULL)
8361 * The prefix must come from the @ctxt->dict dictionnary
8363 * Returns the namespace name or NULL if not bound
8365 static const xmlChar *
8366 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8367 int i;
8369 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8370 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8371 if (ctxt->nsTab[i] == prefix) {
8372 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8373 return(NULL);
8374 return(ctxt->nsTab[i + 1]);
8376 return(NULL);
8380 * xmlParseQName:
8381 * @ctxt: an XML parser context
8382 * @prefix: pointer to store the prefix part
8384 * parse an XML Namespace QName
8386 * [6] QName ::= (Prefix ':')? LocalPart
8387 * [7] Prefix ::= NCName
8388 * [8] LocalPart ::= NCName
8390 * Returns the Name parsed or NULL
8393 static const xmlChar *
8394 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8395 const xmlChar *l, *p;
8397 GROW;
8399 l = xmlParseNCName(ctxt);
8400 if (l == NULL) {
8401 if (CUR == ':') {
8402 l = xmlParseName(ctxt);
8403 if (l != NULL) {
8404 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8405 "Failed to parse QName '%s'\n", l, NULL, NULL);
8406 *prefix = NULL;
8407 return(l);
8410 return(NULL);
8412 if (CUR == ':') {
8413 NEXT;
8414 p = l;
8415 l = xmlParseNCName(ctxt);
8416 if (l == NULL) {
8417 xmlChar *tmp;
8419 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8420 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8421 l = xmlParseNmtoken(ctxt);
8422 if (l == NULL)
8423 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8424 else {
8425 tmp = xmlBuildQName(l, p, NULL, 0);
8426 xmlFree((char *)l);
8428 p = xmlDictLookup(ctxt->dict, tmp, -1);
8429 if (tmp != NULL) xmlFree(tmp);
8430 *prefix = NULL;
8431 return(p);
8433 if (CUR == ':') {
8434 xmlChar *tmp;
8436 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8437 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8438 NEXT;
8439 tmp = (xmlChar *) xmlParseName(ctxt);
8440 if (tmp != NULL) {
8441 tmp = xmlBuildQName(tmp, l, NULL, 0);
8442 l = xmlDictLookup(ctxt->dict, tmp, -1);
8443 if (tmp != NULL) xmlFree(tmp);
8444 *prefix = p;
8445 return(l);
8447 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8448 l = xmlDictLookup(ctxt->dict, tmp, -1);
8449 if (tmp != NULL) xmlFree(tmp);
8450 *prefix = p;
8451 return(l);
8453 *prefix = p;
8454 } else
8455 *prefix = NULL;
8456 return(l);
8460 * xmlParseQNameAndCompare:
8461 * @ctxt: an XML parser context
8462 * @name: the localname
8463 * @prefix: the prefix, if any.
8465 * parse an XML name and compares for match
8466 * (specialized for endtag parsing)
8468 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8469 * and the name for mismatch
8472 static const xmlChar *
8473 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8474 xmlChar const *prefix) {
8475 const xmlChar *cmp;
8476 const xmlChar *in;
8477 const xmlChar *ret;
8478 const xmlChar *prefix2;
8480 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8482 GROW;
8483 in = ctxt->input->cur;
8485 cmp = prefix;
8486 while (*in != 0 && *in == *cmp) {
8487 ++in;
8488 ++cmp;
8490 if ((*cmp == 0) && (*in == ':')) {
8491 in++;
8492 cmp = name;
8493 while (*in != 0 && *in == *cmp) {
8494 ++in;
8495 ++cmp;
8497 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8498 /* success */
8499 ctxt->input->cur = in;
8500 return((const xmlChar*) 1);
8504 * all strings coms from the dictionary, equality can be done directly
8506 ret = xmlParseQName (ctxt, &prefix2);
8507 if ((ret == name) && (prefix == prefix2))
8508 return((const xmlChar*) 1);
8509 return ret;
8513 * xmlParseAttValueInternal:
8514 * @ctxt: an XML parser context
8515 * @len: attribute len result
8516 * @alloc: whether the attribute was reallocated as a new string
8517 * @normalize: if 1 then further non-CDATA normalization must be done
8519 * parse a value for an attribute.
8520 * NOTE: if no normalization is needed, the routine will return pointers
8521 * directly from the data buffer.
8523 * 3.3.3 Attribute-Value Normalization:
8524 * Before the value of an attribute is passed to the application or
8525 * checked for validity, the XML processor must normalize it as follows:
8526 * - a character reference is processed by appending the referenced
8527 * character to the attribute value
8528 * - an entity reference is processed by recursively processing the
8529 * replacement text of the entity
8530 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8531 * appending #x20 to the normalized value, except that only a single
8532 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8533 * parsed entity or the literal entity value of an internal parsed entity
8534 * - other characters are processed by appending them to the normalized value
8535 * If the declared value is not CDATA, then the XML processor must further
8536 * process the normalized attribute value by discarding any leading and
8537 * trailing space (#x20) characters, and by replacing sequences of space
8538 * (#x20) characters by a single space (#x20) character.
8539 * All attributes for which no declaration has been read should be treated
8540 * by a non-validating parser as if declared CDATA.
8542 * Returns the AttValue parsed or NULL. The value has to be freed by the
8543 * caller if it was copied, this can be detected by val[*len] == 0.
8546 static xmlChar *
8547 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8548 int normalize)
8550 xmlChar limit = 0;
8551 const xmlChar *in = NULL, *start, *end, *last;
8552 xmlChar *ret = NULL;
8554 GROW;
8555 in = (xmlChar *) CUR_PTR;
8556 if (*in != '"' && *in != '\'') {
8557 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8558 return (NULL);
8560 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8563 * try to handle in this routine the most common case where no
8564 * allocation of a new string is required and where content is
8565 * pure ASCII.
8567 limit = *in++;
8568 end = ctxt->input->end;
8569 start = in;
8570 if (in >= end) {
8571 const xmlChar *oldbase = ctxt->input->base;
8572 GROW;
8573 if (oldbase != ctxt->input->base) {
8574 long delta = ctxt->input->base - oldbase;
8575 start = start + delta;
8576 in = in + delta;
8578 end = ctxt->input->end;
8580 if (normalize) {
8582 * Skip any leading spaces
8584 while ((in < end) && (*in != limit) &&
8585 ((*in == 0x20) || (*in == 0x9) ||
8586 (*in == 0xA) || (*in == 0xD))) {
8587 in++;
8588 start = in;
8589 if (in >= end) {
8590 const xmlChar *oldbase = ctxt->input->base;
8591 GROW;
8592 if (ctxt->instate == XML_PARSER_EOF)
8593 return(NULL);
8594 if (oldbase != ctxt->input->base) {
8595 long delta = ctxt->input->base - oldbase;
8596 start = start + delta;
8597 in = in + delta;
8599 end = ctxt->input->end;
8602 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8603 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8604 if ((*in++ == 0x20) && (*in == 0x20)) break;
8605 if (in >= end) {
8606 const xmlChar *oldbase = ctxt->input->base;
8607 GROW;
8608 if (ctxt->instate == XML_PARSER_EOF)
8609 return(NULL);
8610 if (oldbase != ctxt->input->base) {
8611 long delta = ctxt->input->base - oldbase;
8612 start = start + delta;
8613 in = in + delta;
8615 end = ctxt->input->end;
8618 last = in;
8620 * skip the trailing blanks
8622 while ((last[-1] == 0x20) && (last > start)) last--;
8623 while ((in < end) && (*in != limit) &&
8624 ((*in == 0x20) || (*in == 0x9) ||
8625 (*in == 0xA) || (*in == 0xD))) {
8626 in++;
8627 if (in >= end) {
8628 const xmlChar *oldbase = ctxt->input->base;
8629 GROW;
8630 if (ctxt->instate == XML_PARSER_EOF)
8631 return(NULL);
8632 if (oldbase != ctxt->input->base) {
8633 long delta = ctxt->input->base - oldbase;
8634 start = start + delta;
8635 in = in + delta;
8636 last = last + delta;
8638 end = ctxt->input->end;
8641 if (*in != limit) goto need_complex;
8642 } else {
8643 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8644 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8645 in++;
8646 if (in >= end) {
8647 const xmlChar *oldbase = ctxt->input->base;
8648 GROW;
8649 if (ctxt->instate == XML_PARSER_EOF)
8650 return(NULL);
8651 if (oldbase != ctxt->input->base) {
8652 long delta = ctxt->input->base - oldbase;
8653 start = start + delta;
8654 in = in + delta;
8656 end = ctxt->input->end;
8659 last = in;
8660 if (*in != limit) goto need_complex;
8662 in++;
8663 if (len != NULL) {
8664 *len = last - start;
8665 ret = (xmlChar *) start;
8666 } else {
8667 if (alloc) *alloc = 1;
8668 ret = xmlStrndup(start, last - start);
8670 CUR_PTR = in;
8671 if (alloc) *alloc = 0;
8672 return ret;
8673 need_complex:
8674 if (alloc) *alloc = 1;
8675 return xmlParseAttValueComplex(ctxt, len, normalize);
8679 * xmlParseAttribute2:
8680 * @ctxt: an XML parser context
8681 * @pref: the element prefix
8682 * @elem: the element name
8683 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8684 * @value: a xmlChar ** used to store the value of the attribute
8685 * @len: an int * to save the length of the attribute
8686 * @alloc: an int * to indicate if the attribute was allocated
8688 * parse an attribute in the new SAX2 framework.
8690 * Returns the attribute name, and the value in *value, .
8693 static const xmlChar *
8694 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8695 const xmlChar * pref, const xmlChar * elem,
8696 const xmlChar ** prefix, xmlChar ** value,
8697 int *len, int *alloc)
8699 const xmlChar *name;
8700 xmlChar *val, *internal_val = NULL;
8701 int normalize = 0;
8703 *value = NULL;
8704 GROW;
8705 name = xmlParseQName(ctxt, prefix);
8706 if (name == NULL) {
8707 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8708 "error parsing attribute name\n");
8709 return (NULL);
8713 * get the type if needed
8715 if (ctxt->attsSpecial != NULL) {
8716 int type;
8718 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8719 pref, elem, *prefix, name);
8720 if (type != 0)
8721 normalize = 1;
8725 * read the value
8727 SKIP_BLANKS;
8728 if (RAW == '=') {
8729 NEXT;
8730 SKIP_BLANKS;
8731 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8732 if (normalize) {
8734 * Sometimes a second normalisation pass for spaces is needed
8735 * but that only happens if charrefs or entities refernces
8736 * have been used in the attribute value, i.e. the attribute
8737 * value have been extracted in an allocated string already.
8739 if (*alloc) {
8740 const xmlChar *val2;
8742 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8743 if ((val2 != NULL) && (val2 != val)) {
8744 xmlFree(val);
8745 val = (xmlChar *) val2;
8749 ctxt->instate = XML_PARSER_CONTENT;
8750 } else {
8751 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8752 "Specification mandate value for attribute %s\n",
8753 name);
8754 return (NULL);
8757 if (*prefix == ctxt->str_xml) {
8759 * Check that xml:lang conforms to the specification
8760 * No more registered as an error, just generate a warning now
8761 * since this was deprecated in XML second edition
8763 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8764 internal_val = xmlStrndup(val, *len);
8765 if (!xmlCheckLanguageID(internal_val)) {
8766 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8767 "Malformed value for xml:lang : %s\n",
8768 internal_val, NULL);
8773 * Check that xml:space conforms to the specification
8775 if (xmlStrEqual(name, BAD_CAST "space")) {
8776 internal_val = xmlStrndup(val, *len);
8777 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8778 *(ctxt->space) = 0;
8779 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8780 *(ctxt->space) = 1;
8781 else {
8782 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8783 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8784 internal_val, NULL);
8787 if (internal_val) {
8788 xmlFree(internal_val);
8792 *value = val;
8793 return (name);
8796 * xmlParseStartTag2:
8797 * @ctxt: an XML parser context
8799 * parse a start of tag either for rule element or
8800 * EmptyElement. In both case we don't parse the tag closing chars.
8801 * This routine is called when running SAX2 parsing
8803 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8805 * [ WFC: Unique Att Spec ]
8806 * No attribute name may appear more than once in the same start-tag or
8807 * empty-element tag.
8809 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8811 * [ WFC: Unique Att Spec ]
8812 * No attribute name may appear more than once in the same start-tag or
8813 * empty-element tag.
8815 * With namespace:
8817 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8819 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8821 * Returns the element name parsed
8824 static const xmlChar *
8825 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8826 const xmlChar **URI, int *tlen) {
8827 const xmlChar *localname;
8828 const xmlChar *prefix;
8829 const xmlChar *attname;
8830 const xmlChar *aprefix;
8831 const xmlChar *nsname;
8832 xmlChar *attvalue;
8833 const xmlChar **atts = ctxt->atts;
8834 int maxatts = ctxt->maxatts;
8835 int nratts, nbatts, nbdef;
8836 int i, j, nbNs, attval, oldline, oldcol;
8837 const xmlChar *base;
8838 unsigned long cur;
8839 int nsNr = ctxt->nsNr;
8841 if (RAW != '<') return(NULL);
8842 NEXT1;
8845 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8846 * point since the attribute values may be stored as pointers to
8847 * the buffer and calling SHRINK would destroy them !
8848 * The Shrinking is only possible once the full set of attribute
8849 * callbacks have been done.
8851 reparse:
8852 SHRINK;
8853 base = ctxt->input->base;
8854 cur = ctxt->input->cur - ctxt->input->base;
8855 oldline = ctxt->input->line;
8856 oldcol = ctxt->input->col;
8857 nbatts = 0;
8858 nratts = 0;
8859 nbdef = 0;
8860 nbNs = 0;
8861 attval = 0;
8862 /* Forget any namespaces added during an earlier parse of this element. */
8863 ctxt->nsNr = nsNr;
8865 localname = xmlParseQName(ctxt, &prefix);
8866 if (localname == NULL) {
8867 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8868 "StartTag: invalid element name\n");
8869 return(NULL);
8871 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8874 * Now parse the attributes, it ends up with the ending
8876 * (S Attribute)* S?
8878 SKIP_BLANKS;
8879 GROW;
8880 if (ctxt->input->base != base) goto base_changed;
8882 while (((RAW != '>') &&
8883 ((RAW != '/') || (NXT(1) != '>')) &&
8884 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8885 const xmlChar *q = CUR_PTR;
8886 unsigned int cons = ctxt->input->consumed;
8887 int len = -1, alloc = 0;
8889 attname = xmlParseAttribute2(ctxt, prefix, localname,
8890 &aprefix, &attvalue, &len, &alloc);
8891 if (ctxt->input->base != base) {
8892 if ((attvalue != NULL) && (alloc != 0))
8893 xmlFree(attvalue);
8894 attvalue = NULL;
8895 goto base_changed;
8897 if ((attname != NULL) && (attvalue != NULL)) {
8898 if (len < 0) len = xmlStrlen(attvalue);
8899 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8900 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8901 xmlURIPtr uri;
8903 if (*URL != 0) {
8904 uri = xmlParseURI((const char *) URL);
8905 if (uri == NULL) {
8906 xmlNsErr(ctxt, XML_WAR_NS_URI,
8907 "xmlns: '%s' is not a valid URI\n",
8908 URL, NULL, NULL);
8909 } else {
8910 if (uri->scheme == NULL) {
8911 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8912 "xmlns: URI %s is not absolute\n",
8913 URL, NULL, NULL);
8915 xmlFreeURI(uri);
8917 if (URL == ctxt->str_xml_ns) {
8918 if (attname != ctxt->str_xml) {
8919 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8920 "xml namespace URI cannot be the default namespace\n",
8921 NULL, NULL, NULL);
8923 goto skip_default_ns;
8925 if ((len == 29) &&
8926 (xmlStrEqual(URL,
8927 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8928 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8929 "reuse of the xmlns namespace name is forbidden\n",
8930 NULL, NULL, NULL);
8931 goto skip_default_ns;
8935 * check that it's not a defined namespace
8937 for (j = 1;j <= nbNs;j++)
8938 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8939 break;
8940 if (j <= nbNs)
8941 xmlErrAttributeDup(ctxt, NULL, attname);
8942 else
8943 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8944 skip_default_ns:
8945 if (alloc != 0) xmlFree(attvalue);
8946 SKIP_BLANKS;
8947 continue;
8949 if (aprefix == ctxt->str_xmlns) {
8950 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8951 xmlURIPtr uri;
8953 if (attname == ctxt->str_xml) {
8954 if (URL != ctxt->str_xml_ns) {
8955 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8956 "xml namespace prefix mapped to wrong URI\n",
8957 NULL, NULL, NULL);
8960 * Do not keep a namespace definition node
8962 goto skip_ns;
8964 if (URL == ctxt->str_xml_ns) {
8965 if (attname != ctxt->str_xml) {
8966 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967 "xml namespace URI mapped to wrong prefix\n",
8968 NULL, NULL, NULL);
8970 goto skip_ns;
8972 if (attname == ctxt->str_xmlns) {
8973 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8974 "redefinition of the xmlns prefix is forbidden\n",
8975 NULL, NULL, NULL);
8976 goto skip_ns;
8978 if ((len == 29) &&
8979 (xmlStrEqual(URL,
8980 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8981 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8982 "reuse of the xmlns namespace name is forbidden\n",
8983 NULL, NULL, NULL);
8984 goto skip_ns;
8986 if ((URL == NULL) || (URL[0] == 0)) {
8987 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8988 "xmlns:%s: Empty XML namespace is not allowed\n",
8989 attname, NULL, NULL);
8990 goto skip_ns;
8991 } else {
8992 uri = xmlParseURI((const char *) URL);
8993 if (uri == NULL) {
8994 xmlNsErr(ctxt, XML_WAR_NS_URI,
8995 "xmlns:%s: '%s' is not a valid URI\n",
8996 attname, URL, NULL);
8997 } else {
8998 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8999 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9000 "xmlns:%s: URI %s is not absolute\n",
9001 attname, URL, NULL);
9003 xmlFreeURI(uri);
9008 * check that it's not a defined namespace
9010 for (j = 1;j <= nbNs;j++)
9011 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9012 break;
9013 if (j <= nbNs)
9014 xmlErrAttributeDup(ctxt, aprefix, attname);
9015 else
9016 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9017 skip_ns:
9018 if (alloc != 0) xmlFree(attvalue);
9019 SKIP_BLANKS;
9020 if (ctxt->input->base != base) goto base_changed;
9021 continue;
9025 * Add the pair to atts
9027 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9028 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9029 if (attvalue[len] == 0)
9030 xmlFree(attvalue);
9031 goto failed;
9033 maxatts = ctxt->maxatts;
9034 atts = ctxt->atts;
9036 ctxt->attallocs[nratts++] = alloc;
9037 atts[nbatts++] = attname;
9038 atts[nbatts++] = aprefix;
9039 atts[nbatts++] = NULL; /* the URI will be fetched later */
9040 atts[nbatts++] = attvalue;
9041 attvalue += len;
9042 atts[nbatts++] = attvalue;
9044 * tag if some deallocation is needed
9046 if (alloc != 0) attval = 1;
9047 } else {
9048 if ((attvalue != NULL) && (attvalue[len] == 0))
9049 xmlFree(attvalue);
9052 failed:
9054 GROW
9055 if (ctxt->instate == XML_PARSER_EOF)
9056 break;
9057 if (ctxt->input->base != base) goto base_changed;
9058 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9059 break;
9060 if (!IS_BLANK_CH(RAW)) {
9061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9062 "attributes construct error\n");
9063 break;
9065 SKIP_BLANKS;
9066 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9067 (attname == NULL) && (attvalue == NULL)) {
9068 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9069 "xmlParseStartTag: problem parsing attributes\n");
9070 break;
9072 GROW;
9073 if (ctxt->input->base != base) goto base_changed;
9077 * The attributes defaulting
9079 if (ctxt->attsDefault != NULL) {
9080 xmlDefAttrsPtr defaults;
9082 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9083 if (defaults != NULL) {
9084 for (i = 0;i < defaults->nbAttrs;i++) {
9085 attname = defaults->values[5 * i];
9086 aprefix = defaults->values[5 * i + 1];
9089 * special work for namespaces defaulted defs
9091 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9093 * check that it's not a defined namespace
9095 for (j = 1;j <= nbNs;j++)
9096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9097 break;
9098 if (j <= nbNs) continue;
9100 nsname = xmlGetNamespace(ctxt, NULL);
9101 if (nsname != defaults->values[5 * i + 2]) {
9102 if (nsPush(ctxt, NULL,
9103 defaults->values[5 * i + 2]) > 0)
9104 nbNs++;
9106 } else if (aprefix == ctxt->str_xmlns) {
9108 * check that it's not a defined namespace
9110 for (j = 1;j <= nbNs;j++)
9111 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9112 break;
9113 if (j <= nbNs) continue;
9115 nsname = xmlGetNamespace(ctxt, attname);
9116 if (nsname != defaults->values[2]) {
9117 if (nsPush(ctxt, attname,
9118 defaults->values[5 * i + 2]) > 0)
9119 nbNs++;
9121 } else {
9123 * check that it's not a defined attribute
9125 for (j = 0;j < nbatts;j+=5) {
9126 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9127 break;
9129 if (j < nbatts) continue;
9131 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9132 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9133 return(NULL);
9135 maxatts = ctxt->maxatts;
9136 atts = ctxt->atts;
9138 atts[nbatts++] = attname;
9139 atts[nbatts++] = aprefix;
9140 if (aprefix == NULL)
9141 atts[nbatts++] = NULL;
9142 else
9143 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9144 atts[nbatts++] = defaults->values[5 * i + 2];
9145 atts[nbatts++] = defaults->values[5 * i + 3];
9146 if ((ctxt->standalone == 1) &&
9147 (defaults->values[5 * i + 4] != NULL)) {
9148 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9149 "standalone: attribute %s on %s defaulted from external subset\n",
9150 attname, localname);
9152 nbdef++;
9159 * The attributes checkings
9161 for (i = 0; i < nbatts;i += 5) {
9163 * The default namespace does not apply to attribute names.
9165 if (atts[i + 1] != NULL) {
9166 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9167 if (nsname == NULL) {
9168 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9169 "Namespace prefix %s for %s on %s is not defined\n",
9170 atts[i + 1], atts[i], localname);
9172 atts[i + 2] = nsname;
9173 } else
9174 nsname = NULL;
9176 * [ WFC: Unique Att Spec ]
9177 * No attribute name may appear more than once in the same
9178 * start-tag or empty-element tag.
9179 * As extended by the Namespace in XML REC.
9181 for (j = 0; j < i;j += 5) {
9182 if (atts[i] == atts[j]) {
9183 if (atts[i+1] == atts[j+1]) {
9184 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9185 break;
9187 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9188 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9189 "Namespaced Attribute %s in '%s' redefined\n",
9190 atts[i], nsname, NULL);
9191 break;
9197 nsname = xmlGetNamespace(ctxt, prefix);
9198 if ((prefix != NULL) && (nsname == NULL)) {
9199 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9200 "Namespace prefix %s on %s is not defined\n",
9201 prefix, localname, NULL);
9203 *pref = prefix;
9204 *URI = nsname;
9207 * SAX: Start of Element !
9209 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9210 (!ctxt->disableSAX)) {
9211 if (nbNs > 0)
9212 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9213 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9214 nbatts / 5, nbdef, atts);
9215 else
9216 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9217 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9221 * Free up attribute allocated strings if needed
9223 if (attval != 0) {
9224 for (i = 3,j = 0; j < nratts;i += 5,j++)
9225 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9226 xmlFree((xmlChar *) atts[i]);
9229 return(localname);
9231 base_changed:
9233 * the attribute strings are valid iif the base didn't changed
9235 if (attval != 0) {
9236 for (i = 3,j = 0; j < nratts;i += 5,j++)
9237 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9238 xmlFree((xmlChar *) atts[i]);
9240 ctxt->input->cur = ctxt->input->base + cur;
9241 ctxt->input->line = oldline;
9242 ctxt->input->col = oldcol;
9243 if (ctxt->wellFormed == 1) {
9244 goto reparse;
9246 return(NULL);
9250 * xmlParseEndTag2:
9251 * @ctxt: an XML parser context
9252 * @line: line of the start tag
9253 * @nsNr: number of namespaces on the start tag
9255 * parse an end of tag
9257 * [42] ETag ::= '</' Name S? '>'
9259 * With namespace
9261 * [NS 9] ETag ::= '</' QName S? '>'
9264 static void
9265 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9266 const xmlChar *URI, int line, int nsNr, int tlen) {
9267 const xmlChar *name;
9269 GROW;
9270 if ((RAW != '<') || (NXT(1) != '/')) {
9271 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9272 return;
9274 SKIP(2);
9276 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9277 if (ctxt->input->cur[tlen] == '>') {
9278 ctxt->input->cur += tlen + 1;
9279 goto done;
9281 ctxt->input->cur += tlen;
9282 name = (xmlChar*)1;
9283 } else {
9284 if (prefix == NULL)
9285 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9286 else
9287 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9291 * We should definitely be at the ending "S? '>'" part
9293 GROW;
9294 if (ctxt->instate == XML_PARSER_EOF)
9295 return;
9296 SKIP_BLANKS;
9297 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9298 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9299 } else
9300 NEXT1;
9303 * [ WFC: Element Type Match ]
9304 * The Name in an element's end-tag must match the element type in the
9305 * start-tag.
9308 if (name != (xmlChar*)1) {
9309 if (name == NULL) name = BAD_CAST "unparseable";
9310 if ((line == 0) && (ctxt->node != NULL))
9311 line = ctxt->node->line;
9312 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9313 "Opening and ending tag mismatch: %s line %d and %s\n",
9314 ctxt->name, line, name);
9318 * SAX: End of Tag
9320 done:
9321 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9322 (!ctxt->disableSAX))
9323 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9325 spacePop(ctxt);
9326 if (nsNr != 0)
9327 nsPop(ctxt, nsNr);
9328 return;
9332 * xmlParseCDSect:
9333 * @ctxt: an XML parser context
9335 * Parse escaped pure raw content.
9337 * [18] CDSect ::= CDStart CData CDEnd
9339 * [19] CDStart ::= '<![CDATA['
9341 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9343 * [21] CDEnd ::= ']]>'
9345 void
9346 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9347 xmlChar *buf = NULL;
9348 int len = 0;
9349 int size = XML_PARSER_BUFFER_SIZE;
9350 int r, rl;
9351 int s, sl;
9352 int cur, l;
9353 int count = 0;
9355 /* Check 2.6.0 was NXT(0) not RAW */
9356 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9357 SKIP(9);
9358 } else
9359 return;
9361 ctxt->instate = XML_PARSER_CDATA_SECTION;
9362 r = CUR_CHAR(rl);
9363 if (!IS_CHAR(r)) {
9364 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9365 ctxt->instate = XML_PARSER_CONTENT;
9366 return;
9368 NEXTL(rl);
9369 s = CUR_CHAR(sl);
9370 if (!IS_CHAR(s)) {
9371 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9372 ctxt->instate = XML_PARSER_CONTENT;
9373 return;
9375 NEXTL(sl);
9376 cur = CUR_CHAR(l);
9377 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9378 if (buf == NULL) {
9379 xmlErrMemory(ctxt, NULL);
9380 return;
9382 while (IS_CHAR(cur) &&
9383 ((r != ']') || (s != ']') || (cur != '>'))) {
9384 if (len + 5 >= size) {
9385 xmlChar *tmp;
9387 size *= 2;
9388 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9389 if (tmp == NULL) {
9390 xmlFree(buf);
9391 xmlErrMemory(ctxt, NULL);
9392 return;
9394 buf = tmp;
9396 COPY_BUF(rl,buf,len,r);
9397 r = s;
9398 rl = sl;
9399 s = cur;
9400 sl = l;
9401 count++;
9402 if (count > 50) {
9403 GROW;
9404 if (ctxt->instate == XML_PARSER_EOF) {
9405 xmlFree(buf);
9406 return;
9408 count = 0;
9410 NEXTL(l);
9411 cur = CUR_CHAR(l);
9413 buf[len] = 0;
9414 ctxt->instate = XML_PARSER_CONTENT;
9415 if (cur != '>') {
9416 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9417 "CData section not finished\n%.50s\n", buf);
9418 xmlFree(buf);
9419 return;
9421 NEXTL(l);
9424 * OK the buffer is to be consumed as cdata.
9426 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9427 if (ctxt->sax->cdataBlock != NULL)
9428 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9429 else if (ctxt->sax->characters != NULL)
9430 ctxt->sax->characters(ctxt->userData, buf, len);
9432 xmlFree(buf);
9436 * xmlParseContent:
9437 * @ctxt: an XML parser context
9439 * Parse a content:
9441 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9444 void
9445 xmlParseContent(xmlParserCtxtPtr ctxt) {
9446 GROW;
9447 while ((RAW != 0) &&
9448 ((RAW != '<') || (NXT(1) != '/')) &&
9449 (ctxt->instate != XML_PARSER_EOF)) {
9450 const xmlChar *test = CUR_PTR;
9451 unsigned int cons = ctxt->input->consumed;
9452 const xmlChar *cur = ctxt->input->cur;
9455 * First case : a Processing Instruction.
9457 if ((*cur == '<') && (cur[1] == '?')) {
9458 xmlParsePI(ctxt);
9462 * Second case : a CDSection
9464 /* 2.6.0 test was *cur not RAW */
9465 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9466 xmlParseCDSect(ctxt);
9470 * Third case : a comment
9472 else if ((*cur == '<') && (NXT(1) == '!') &&
9473 (NXT(2) == '-') && (NXT(3) == '-')) {
9474 xmlParseComment(ctxt);
9475 ctxt->instate = XML_PARSER_CONTENT;
9479 * Fourth case : a sub-element.
9481 else if (*cur == '<') {
9482 xmlParseElement(ctxt);
9486 * Fifth case : a reference. If if has not been resolved,
9487 * parsing returns it's Name, create the node
9490 else if (*cur == '&') {
9491 xmlParseReference(ctxt);
9495 * Last case, text. Note that References are handled directly.
9497 else {
9498 xmlParseCharData(ctxt, 0);
9501 GROW;
9503 * Pop-up of finished entities.
9505 while ((RAW == 0) && (ctxt->inputNr > 1))
9506 xmlPopInput(ctxt);
9507 SHRINK;
9509 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9510 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9511 "detected an error in element content\n");
9512 ctxt->instate = XML_PARSER_EOF;
9513 break;
9519 * xmlParseElement:
9520 * @ctxt: an XML parser context
9522 * parse an XML element, this is highly recursive
9524 * [39] element ::= EmptyElemTag | STag content ETag
9526 * [ WFC: Element Type Match ]
9527 * The Name in an element's end-tag must match the element type in the
9528 * start-tag.
9532 void
9533 xmlParseElement(xmlParserCtxtPtr ctxt) {
9534 const xmlChar *name;
9535 const xmlChar *prefix = NULL;
9536 const xmlChar *URI = NULL;
9537 xmlParserNodeInfo node_info;
9538 int line, tlen;
9539 xmlNodePtr ret;
9540 int nsNr = ctxt->nsNr;
9542 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9543 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9544 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9545 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9546 xmlParserMaxDepth);
9547 ctxt->instate = XML_PARSER_EOF;
9548 return;
9551 /* Capture start position */
9552 if (ctxt->record_info) {
9553 node_info.begin_pos = ctxt->input->consumed +
9554 (CUR_PTR - ctxt->input->base);
9555 node_info.begin_line = ctxt->input->line;
9558 if (ctxt->spaceNr == 0)
9559 spacePush(ctxt, -1);
9560 else if (*ctxt->space == -2)
9561 spacePush(ctxt, -1);
9562 else
9563 spacePush(ctxt, *ctxt->space);
9565 line = ctxt->input->line;
9566 #ifdef LIBXML_SAX1_ENABLED
9567 if (ctxt->sax2)
9568 #endif /* LIBXML_SAX1_ENABLED */
9569 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9570 #ifdef LIBXML_SAX1_ENABLED
9571 else
9572 name = xmlParseStartTag(ctxt);
9573 #endif /* LIBXML_SAX1_ENABLED */
9574 if (ctxt->instate == XML_PARSER_EOF)
9575 return;
9576 if (name == NULL) {
9577 spacePop(ctxt);
9578 return;
9580 namePush(ctxt, name);
9581 ret = ctxt->node;
9583 #ifdef LIBXML_VALID_ENABLED
9585 * [ VC: Root Element Type ]
9586 * The Name in the document type declaration must match the element
9587 * type of the root element.
9589 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9590 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9591 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9592 #endif /* LIBXML_VALID_ENABLED */
9595 * Check for an Empty Element.
9597 if ((RAW == '/') && (NXT(1) == '>')) {
9598 SKIP(2);
9599 if (ctxt->sax2) {
9600 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9601 (!ctxt->disableSAX))
9602 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9603 #ifdef LIBXML_SAX1_ENABLED
9604 } else {
9605 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9606 (!ctxt->disableSAX))
9607 ctxt->sax->endElement(ctxt->userData, name);
9608 #endif /* LIBXML_SAX1_ENABLED */
9610 namePop(ctxt);
9611 spacePop(ctxt);
9612 if (nsNr != ctxt->nsNr)
9613 nsPop(ctxt, ctxt->nsNr - nsNr);
9614 if ( ret != NULL && ctxt->record_info ) {
9615 node_info.end_pos = ctxt->input->consumed +
9616 (CUR_PTR - ctxt->input->base);
9617 node_info.end_line = ctxt->input->line;
9618 node_info.node = ret;
9619 xmlParserAddNodeInfo(ctxt, &node_info);
9621 return;
9623 if (RAW == '>') {
9624 NEXT1;
9625 } else {
9626 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9627 "Couldn't find end of Start Tag %s line %d\n",
9628 name, line, NULL);
9631 * end of parsing of this node.
9633 nodePop(ctxt);
9634 namePop(ctxt);
9635 spacePop(ctxt);
9636 if (nsNr != ctxt->nsNr)
9637 nsPop(ctxt, ctxt->nsNr - nsNr);
9640 * Capture end position and add node
9642 if ( ret != NULL && ctxt->record_info ) {
9643 node_info.end_pos = ctxt->input->consumed +
9644 (CUR_PTR - ctxt->input->base);
9645 node_info.end_line = ctxt->input->line;
9646 node_info.node = ret;
9647 xmlParserAddNodeInfo(ctxt, &node_info);
9649 return;
9653 * Parse the content of the element:
9655 xmlParseContent(ctxt);
9656 if (ctxt->instate == XML_PARSER_EOF)
9657 return;
9658 if (!IS_BYTE_CHAR(RAW)) {
9659 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9660 "Premature end of data in tag %s line %d\n",
9661 name, line, NULL);
9664 * end of parsing of this node.
9666 nodePop(ctxt);
9667 namePop(ctxt);
9668 spacePop(ctxt);
9669 if (nsNr != ctxt->nsNr)
9670 nsPop(ctxt, ctxt->nsNr - nsNr);
9671 return;
9675 * parse the end of tag: '</' should be here.
9677 if (ctxt->sax2) {
9678 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9679 namePop(ctxt);
9681 #ifdef LIBXML_SAX1_ENABLED
9682 else
9683 xmlParseEndTag1(ctxt, line);
9684 #endif /* LIBXML_SAX1_ENABLED */
9687 * Capture end position and add node
9689 if ( ret != NULL && ctxt->record_info ) {
9690 node_info.end_pos = ctxt->input->consumed +
9691 (CUR_PTR - ctxt->input->base);
9692 node_info.end_line = ctxt->input->line;
9693 node_info.node = ret;
9694 xmlParserAddNodeInfo(ctxt, &node_info);
9699 * xmlParseVersionNum:
9700 * @ctxt: an XML parser context
9702 * parse the XML version value.
9704 * [26] VersionNum ::= '1.' [0-9]+
9706 * In practice allow [0-9].[0-9]+ at that level
9708 * Returns the string giving the XML version number, or NULL
9710 xmlChar *
9711 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9712 xmlChar *buf = NULL;
9713 int len = 0;
9714 int size = 10;
9715 xmlChar cur;
9717 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9718 if (buf == NULL) {
9719 xmlErrMemory(ctxt, NULL);
9720 return(NULL);
9722 cur = CUR;
9723 if (!((cur >= '0') && (cur <= '9'))) {
9724 xmlFree(buf);
9725 return(NULL);
9727 buf[len++] = cur;
9728 NEXT;
9729 cur=CUR;
9730 if (cur != '.') {
9731 xmlFree(buf);
9732 return(NULL);
9734 buf[len++] = cur;
9735 NEXT;
9736 cur=CUR;
9737 while ((cur >= '0') && (cur <= '9')) {
9738 if (len + 1 >= size) {
9739 xmlChar *tmp;
9741 size *= 2;
9742 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9743 if (tmp == NULL) {
9744 xmlFree(buf);
9745 xmlErrMemory(ctxt, NULL);
9746 return(NULL);
9748 buf = tmp;
9750 buf[len++] = cur;
9751 NEXT;
9752 cur=CUR;
9754 buf[len] = 0;
9755 return(buf);
9759 * xmlParseVersionInfo:
9760 * @ctxt: an XML parser context
9762 * parse the XML version.
9764 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9766 * [25] Eq ::= S? '=' S?
9768 * Returns the version string, e.g. "1.0"
9771 xmlChar *
9772 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9773 xmlChar *version = NULL;
9775 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9776 SKIP(7);
9777 SKIP_BLANKS;
9778 if (RAW != '=') {
9779 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9780 return(NULL);
9782 NEXT;
9783 SKIP_BLANKS;
9784 if (RAW == '"') {
9785 NEXT;
9786 version = xmlParseVersionNum(ctxt);
9787 if (RAW != '"') {
9788 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9789 } else
9790 NEXT;
9791 } else if (RAW == '\''){
9792 NEXT;
9793 version = xmlParseVersionNum(ctxt);
9794 if (RAW != '\'') {
9795 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9796 } else
9797 NEXT;
9798 } else {
9799 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9802 return(version);
9806 * xmlParseEncName:
9807 * @ctxt: an XML parser context
9809 * parse the XML encoding name
9811 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9813 * Returns the encoding name value or NULL
9815 xmlChar *
9816 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9817 xmlChar *buf = NULL;
9818 int len = 0;
9819 int size = 10;
9820 xmlChar cur;
9822 cur = CUR;
9823 if (((cur >= 'a') && (cur <= 'z')) ||
9824 ((cur >= 'A') && (cur <= 'Z'))) {
9825 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9826 if (buf == NULL) {
9827 xmlErrMemory(ctxt, NULL);
9828 return(NULL);
9831 buf[len++] = cur;
9832 NEXT;
9833 cur = CUR;
9834 while (((cur >= 'a') && (cur <= 'z')) ||
9835 ((cur >= 'A') && (cur <= 'Z')) ||
9836 ((cur >= '0') && (cur <= '9')) ||
9837 (cur == '.') || (cur == '_') ||
9838 (cur == '-')) {
9839 if (len + 1 >= size) {
9840 xmlChar *tmp;
9842 size *= 2;
9843 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9844 if (tmp == NULL) {
9845 xmlErrMemory(ctxt, NULL);
9846 xmlFree(buf);
9847 return(NULL);
9849 buf = tmp;
9851 buf[len++] = cur;
9852 NEXT;
9853 cur = CUR;
9854 if (cur == 0) {
9855 SHRINK;
9856 GROW;
9857 cur = CUR;
9860 buf[len] = 0;
9861 } else {
9862 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9864 return(buf);
9868 * xmlParseEncodingDecl:
9869 * @ctxt: an XML parser context
9871 * parse the XML encoding declaration
9873 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9875 * this setups the conversion filters.
9877 * Returns the encoding value or NULL
9880 const xmlChar *
9881 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9882 xmlChar *encoding = NULL;
9884 SKIP_BLANKS;
9885 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9886 SKIP(8);
9887 SKIP_BLANKS;
9888 if (RAW != '=') {
9889 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9890 return(NULL);
9892 NEXT;
9893 SKIP_BLANKS;
9894 if (RAW == '"') {
9895 NEXT;
9896 encoding = xmlParseEncName(ctxt);
9897 if (RAW != '"') {
9898 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9899 } else
9900 NEXT;
9901 } else if (RAW == '\''){
9902 NEXT;
9903 encoding = xmlParseEncName(ctxt);
9904 if (RAW != '\'') {
9905 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9906 } else
9907 NEXT;
9908 } else {
9909 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9912 * UTF-16 encoding stwich has already taken place at this stage,
9913 * more over the little-endian/big-endian selection is already done
9915 if ((encoding != NULL) &&
9916 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9917 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9919 * If no encoding was passed to the parser, that we are
9920 * using UTF-16 and no decoder is present i.e. the
9921 * document is apparently UTF-8 compatible, then raise an
9922 * encoding mismatch fatal error
9924 if ((ctxt->encoding == NULL) &&
9925 (ctxt->input->buf != NULL) &&
9926 (ctxt->input->buf->encoder == NULL)) {
9927 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9928 "Document labelled UTF-16 but has UTF-8 content\n");
9930 if (ctxt->encoding != NULL)
9931 xmlFree((xmlChar *) ctxt->encoding);
9932 ctxt->encoding = encoding;
9935 * UTF-8 encoding is handled natively
9937 else if ((encoding != NULL) &&
9938 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9939 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9940 if (ctxt->encoding != NULL)
9941 xmlFree((xmlChar *) ctxt->encoding);
9942 ctxt->encoding = encoding;
9944 else if (encoding != NULL) {
9945 xmlCharEncodingHandlerPtr handler;
9947 if (ctxt->input->encoding != NULL)
9948 xmlFree((xmlChar *) ctxt->input->encoding);
9949 ctxt->input->encoding = encoding;
9951 handler = xmlFindCharEncodingHandler((const char *) encoding);
9952 if (handler != NULL) {
9953 xmlSwitchToEncoding(ctxt, handler);
9954 } else {
9955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9956 "Unsupported encoding %s\n", encoding);
9957 return(NULL);
9961 return(encoding);
9965 * xmlParseSDDecl:
9966 * @ctxt: an XML parser context
9968 * parse the XML standalone declaration
9970 * [32] SDDecl ::= S 'standalone' Eq
9971 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9973 * [ VC: Standalone Document Declaration ]
9974 * TODO The standalone document declaration must have the value "no"
9975 * if any external markup declarations contain declarations of:
9976 * - attributes with default values, if elements to which these
9977 * attributes apply appear in the document without specifications
9978 * of values for these attributes, or
9979 * - entities (other than amp, lt, gt, apos, quot), if references
9980 * to those entities appear in the document, or
9981 * - attributes with values subject to normalization, where the
9982 * attribute appears in the document with a value which will change
9983 * as a result of normalization, or
9984 * - element types with element content, if white space occurs directly
9985 * within any instance of those types.
9987 * Returns:
9988 * 1 if standalone="yes"
9989 * 0 if standalone="no"
9990 * -2 if standalone attribute is missing or invalid
9991 * (A standalone value of -2 means that the XML declaration was found,
9992 * but no value was specified for the standalone attribute).
9996 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9997 int standalone = -2;
9999 SKIP_BLANKS;
10000 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10001 SKIP(10);
10002 SKIP_BLANKS;
10003 if (RAW != '=') {
10004 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10005 return(standalone);
10007 NEXT;
10008 SKIP_BLANKS;
10009 if (RAW == '\''){
10010 NEXT;
10011 if ((RAW == 'n') && (NXT(1) == 'o')) {
10012 standalone = 0;
10013 SKIP(2);
10014 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10015 (NXT(2) == 's')) {
10016 standalone = 1;
10017 SKIP(3);
10018 } else {
10019 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10021 if (RAW != '\'') {
10022 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10023 } else
10024 NEXT;
10025 } else if (RAW == '"'){
10026 NEXT;
10027 if ((RAW == 'n') && (NXT(1) == 'o')) {
10028 standalone = 0;
10029 SKIP(2);
10030 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10031 (NXT(2) == 's')) {
10032 standalone = 1;
10033 SKIP(3);
10034 } else {
10035 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10037 if (RAW != '"') {
10038 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10039 } else
10040 NEXT;
10041 } else {
10042 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10045 return(standalone);
10049 * xmlParseXMLDecl:
10050 * @ctxt: an XML parser context
10052 * parse an XML declaration header
10054 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10057 void
10058 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10059 xmlChar *version;
10062 * This value for standalone indicates that the document has an
10063 * XML declaration but it does not have a standalone attribute.
10064 * It will be overwritten later if a standalone attribute is found.
10066 ctxt->input->standalone = -2;
10069 * We know that '<?xml' is here.
10071 SKIP(5);
10073 if (!IS_BLANK_CH(RAW)) {
10074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10075 "Blank needed after '<?xml'\n");
10077 SKIP_BLANKS;
10080 * We must have the VersionInfo here.
10082 version = xmlParseVersionInfo(ctxt);
10083 if (version == NULL) {
10084 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10085 } else {
10086 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10088 * Changed here for XML-1.0 5th edition
10090 if (ctxt->options & XML_PARSE_OLD10) {
10091 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10092 "Unsupported version '%s'\n",
10093 version);
10094 } else {
10095 if ((version[0] == '1') && ((version[1] == '.'))) {
10096 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10097 "Unsupported version '%s'\n",
10098 version, NULL);
10099 } else {
10100 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10101 "Unsupported version '%s'\n",
10102 version);
10106 if (ctxt->version != NULL)
10107 xmlFree((void *) ctxt->version);
10108 ctxt->version = version;
10112 * We may have the encoding declaration
10114 if (!IS_BLANK_CH(RAW)) {
10115 if ((RAW == '?') && (NXT(1) == '>')) {
10116 SKIP(2);
10117 return;
10119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10121 xmlParseEncodingDecl(ctxt);
10122 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10124 * The XML REC instructs us to stop parsing right here
10126 return;
10130 * We may have the standalone status.
10132 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10133 if ((RAW == '?') && (NXT(1) == '>')) {
10134 SKIP(2);
10135 return;
10137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10141 * We can grow the input buffer freely at that point
10143 GROW;
10145 SKIP_BLANKS;
10146 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10148 SKIP_BLANKS;
10149 if ((RAW == '?') && (NXT(1) == '>')) {
10150 SKIP(2);
10151 } else if (RAW == '>') {
10152 /* Deprecated old WD ... */
10153 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10154 NEXT;
10155 } else {
10156 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10157 MOVETO_ENDTAG(CUR_PTR);
10158 NEXT;
10163 * xmlParseMisc:
10164 * @ctxt: an XML parser context
10166 * parse an XML Misc* optional field.
10168 * [27] Misc ::= Comment | PI | S
10171 void
10172 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10173 while ((ctxt->instate != XML_PARSER_EOF) &&
10174 (((RAW == '<') && (NXT(1) == '?')) ||
10175 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10176 IS_BLANK_CH(CUR))) {
10177 if ((RAW == '<') && (NXT(1) == '?')) {
10178 xmlParsePI(ctxt);
10179 } else if (IS_BLANK_CH(CUR)) {
10180 NEXT;
10181 } else
10182 xmlParseComment(ctxt);
10187 * xmlParseDocument:
10188 * @ctxt: an XML parser context
10190 * parse an XML document (and build a tree if using the standard SAX
10191 * interface).
10193 * [1] document ::= prolog element Misc*
10195 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10197 * Returns 0, -1 in case of error. the parser context is augmented
10198 * as a result of the parsing.
10202 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10203 xmlChar start[4];
10204 xmlCharEncoding enc;
10206 xmlInitParser();
10208 if ((ctxt == NULL) || (ctxt->input == NULL))
10209 return(-1);
10211 GROW;
10214 * SAX: detecting the level.
10216 xmlDetectSAX2(ctxt);
10219 * SAX: beginning of the document processing.
10221 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10222 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10223 if (ctxt->instate == XML_PARSER_EOF)
10224 return(-1);
10226 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10227 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10229 * Get the 4 first bytes and decode the charset
10230 * if enc != XML_CHAR_ENCODING_NONE
10231 * plug some encoding conversion routines.
10233 start[0] = RAW;
10234 start[1] = NXT(1);
10235 start[2] = NXT(2);
10236 start[3] = NXT(3);
10237 enc = xmlDetectCharEncoding(&start[0], 4);
10238 if (enc != XML_CHAR_ENCODING_NONE) {
10239 xmlSwitchEncoding(ctxt, enc);
10244 if (CUR == 0) {
10245 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10249 * Check for the XMLDecl in the Prolog.
10250 * do not GROW here to avoid the detected encoder to decode more
10251 * than just the first line, unless the amount of data is really
10252 * too small to hold "<?xml version="1.0" encoding="foo"
10254 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10255 GROW;
10257 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10260 * Note that we will switch encoding on the fly.
10262 xmlParseXMLDecl(ctxt);
10263 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10265 * The XML REC instructs us to stop parsing right here
10267 return(-1);
10269 ctxt->standalone = ctxt->input->standalone;
10270 SKIP_BLANKS;
10271 } else {
10272 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10274 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10275 ctxt->sax->startDocument(ctxt->userData);
10276 if (ctxt->instate == XML_PARSER_EOF)
10277 return(-1);
10280 * The Misc part of the Prolog
10282 GROW;
10283 xmlParseMisc(ctxt);
10286 * Then possibly doc type declaration(s) and more Misc
10287 * (doctypedecl Misc*)?
10289 GROW;
10290 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10292 ctxt->inSubset = 1;
10293 xmlParseDocTypeDecl(ctxt);
10294 if (RAW == '[') {
10295 ctxt->instate = XML_PARSER_DTD;
10296 xmlParseInternalSubset(ctxt);
10297 if (ctxt->instate == XML_PARSER_EOF)
10298 return(-1);
10302 * Create and update the external subset.
10304 ctxt->inSubset = 2;
10305 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10306 (!ctxt->disableSAX))
10307 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10308 ctxt->extSubSystem, ctxt->extSubURI);
10309 if (ctxt->instate == XML_PARSER_EOF)
10310 return(-1);
10311 ctxt->inSubset = 0;
10313 xmlCleanSpecialAttr(ctxt);
10315 ctxt->instate = XML_PARSER_PROLOG;
10316 xmlParseMisc(ctxt);
10320 * Time to start parsing the tree itself
10322 GROW;
10323 if (RAW != '<') {
10324 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10325 "Start tag expected, '<' not found\n");
10326 } else {
10327 ctxt->instate = XML_PARSER_CONTENT;
10328 xmlParseElement(ctxt);
10329 ctxt->instate = XML_PARSER_EPILOG;
10333 * The Misc part at the end
10335 xmlParseMisc(ctxt);
10337 if (RAW != 0) {
10338 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10340 ctxt->instate = XML_PARSER_EOF;
10344 * SAX: end of the document processing.
10346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10347 ctxt->sax->endDocument(ctxt->userData);
10350 * Remove locally kept entity definitions if the tree was not built
10352 if ((ctxt->myDoc != NULL) &&
10353 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10354 xmlFreeDoc(ctxt->myDoc);
10355 ctxt->myDoc = NULL;
10358 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10359 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10360 if (ctxt->valid)
10361 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10362 if (ctxt->nsWellFormed)
10363 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10364 if (ctxt->options & XML_PARSE_OLD10)
10365 ctxt->myDoc->properties |= XML_DOC_OLD10;
10367 if (! ctxt->wellFormed) {
10368 ctxt->valid = 0;
10369 return(-1);
10371 return(0);
10375 * xmlParseExtParsedEnt:
10376 * @ctxt: an XML parser context
10378 * parse a general parsed entity
10379 * An external general parsed entity is well-formed if it matches the
10380 * production labeled extParsedEnt.
10382 * [78] extParsedEnt ::= TextDecl? content
10384 * Returns 0, -1 in case of error. the parser context is augmented
10385 * as a result of the parsing.
10389 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10390 xmlChar start[4];
10391 xmlCharEncoding enc;
10393 if ((ctxt == NULL) || (ctxt->input == NULL))
10394 return(-1);
10396 xmlDefaultSAXHandlerInit();
10398 xmlDetectSAX2(ctxt);
10400 GROW;
10403 * SAX: beginning of the document processing.
10405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10406 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10409 * Get the 4 first bytes and decode the charset
10410 * if enc != XML_CHAR_ENCODING_NONE
10411 * plug some encoding conversion routines.
10413 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10414 start[0] = RAW;
10415 start[1] = NXT(1);
10416 start[2] = NXT(2);
10417 start[3] = NXT(3);
10418 enc = xmlDetectCharEncoding(start, 4);
10419 if (enc != XML_CHAR_ENCODING_NONE) {
10420 xmlSwitchEncoding(ctxt, enc);
10425 if (CUR == 0) {
10426 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10430 * Check for the XMLDecl in the Prolog.
10432 GROW;
10433 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10436 * Note that we will switch encoding on the fly.
10438 xmlParseXMLDecl(ctxt);
10439 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10441 * The XML REC instructs us to stop parsing right here
10443 return(-1);
10445 SKIP_BLANKS;
10446 } else {
10447 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10449 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10450 ctxt->sax->startDocument(ctxt->userData);
10451 if (ctxt->instate == XML_PARSER_EOF)
10452 return(-1);
10455 * Doing validity checking on chunk doesn't make sense
10457 ctxt->instate = XML_PARSER_CONTENT;
10458 ctxt->validate = 0;
10459 ctxt->loadsubset = 0;
10460 ctxt->depth = 0;
10462 xmlParseContent(ctxt);
10463 if (ctxt->instate == XML_PARSER_EOF)
10464 return(-1);
10466 if ((RAW == '<') && (NXT(1) == '/')) {
10467 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10468 } else if (RAW != 0) {
10469 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10473 * SAX: end of the document processing.
10475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10476 ctxt->sax->endDocument(ctxt->userData);
10478 if (! ctxt->wellFormed) return(-1);
10479 return(0);
10482 #ifdef LIBXML_PUSH_ENABLED
10483 /************************************************************************
10485 * Progressive parsing interfaces *
10487 ************************************************************************/
10490 * xmlParseLookupSequence:
10491 * @ctxt: an XML parser context
10492 * @first: the first char to lookup
10493 * @next: the next char to lookup or zero
10494 * @third: the next char to lookup or zero
10496 * Try to find if a sequence (first, next, third) or just (first next) or
10497 * (first) is available in the input stream.
10498 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499 * to avoid rescanning sequences of bytes, it DOES change the state of the
10500 * parser, do not use liberally.
10502 * Returns the index to the current parsing point if the full sequence
10503 * is available, -1 otherwise.
10505 static int
10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507 xmlChar next, xmlChar third) {
10508 int base, len;
10509 xmlParserInputPtr in;
10510 const xmlChar *buf;
10512 in = ctxt->input;
10513 if (in == NULL) return(-1);
10514 base = in->cur - in->base;
10515 if (base < 0) return(-1);
10516 if (ctxt->checkIndex > base)
10517 base = ctxt->checkIndex;
10518 if (in->buf == NULL) {
10519 buf = in->base;
10520 len = in->length;
10521 } else {
10522 buf = in->buf->buffer->content;
10523 len = in->buf->buffer->use;
10525 /* take into account the sequence length */
10526 if (third) len -= 2;
10527 else if (next) len --;
10528 for (;base < len;base++) {
10529 if (buf[base] == first) {
10530 if (third != 0) {
10531 if ((buf[base + 1] != next) ||
10532 (buf[base + 2] != third)) continue;
10533 } else if (next != 0) {
10534 if (buf[base + 1] != next) continue;
10536 ctxt->checkIndex = 0;
10537 #ifdef DEBUG_PUSH
10538 if (next == 0)
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: lookup '%c' found at %d\n",
10541 first, base);
10542 else if (third == 0)
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: lookup '%c%c' found at %d\n",
10545 first, next, base);
10546 else
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: lookup '%c%c%c' found at %d\n",
10549 first, next, third, base);
10550 #endif
10551 return(base - (in->cur - in->base));
10554 ctxt->checkIndex = base;
10555 #ifdef DEBUG_PUSH
10556 if (next == 0)
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: lookup '%c' failed\n", first);
10559 else if (third == 0)
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: lookup '%c%c' failed\n", first, next);
10562 else
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: lookup '%c%c%c' failed\n", first, next, third);
10565 #endif
10566 return(-1);
10570 * xmlParseGetLasts:
10571 * @ctxt: an XML parser context
10572 * @lastlt: pointer to store the last '<' from the input
10573 * @lastgt: pointer to store the last '>' from the input
10575 * Lookup the last < and > in the current chunk
10577 static void
10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579 const xmlChar **lastgt) {
10580 const xmlChar *tmp;
10582 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583 xmlGenericError(xmlGenericErrorContext,
10584 "Internal error: xmlParseGetLasts\n");
10585 return;
10587 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10588 tmp = ctxt->input->end;
10589 tmp--;
10590 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10591 if (tmp < ctxt->input->base) {
10592 *lastlt = NULL;
10593 *lastgt = NULL;
10594 } else {
10595 *lastlt = tmp;
10596 tmp++;
10597 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598 if (*tmp == '\'') {
10599 tmp++;
10600 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601 if (tmp < ctxt->input->end) tmp++;
10602 } else if (*tmp == '"') {
10603 tmp++;
10604 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605 if (tmp < ctxt->input->end) tmp++;
10606 } else
10607 tmp++;
10609 if (tmp < ctxt->input->end)
10610 *lastgt = tmp;
10611 else {
10612 tmp = *lastlt;
10613 tmp--;
10614 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615 if (tmp >= ctxt->input->base)
10616 *lastgt = tmp;
10617 else
10618 *lastgt = NULL;
10621 } else {
10622 *lastlt = NULL;
10623 *lastgt = NULL;
10627 * xmlCheckCdataPush:
10628 * @cur: pointer to the bock of characters
10629 * @len: length of the block in bytes
10631 * Check that the block of characters is okay as SCdata content [20]
10633 * Returns the number of bytes to pass if okay, a negative index where an
10634 * UTF-8 error occured otherwise
10636 static int
10637 xmlCheckCdataPush(const xmlChar *utf, int len) {
10638 int ix;
10639 unsigned char c;
10640 int codepoint;
10642 if ((utf == NULL) || (len <= 0))
10643 return(0);
10645 for (ix = 0; ix < len;) { /* string is 0-terminated */
10646 c = utf[ix];
10647 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10648 if (c >= 0x20)
10649 ix++;
10650 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10651 ix++;
10652 else
10653 return(-ix);
10654 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655 if (ix + 2 > len) return(ix);
10656 if ((utf[ix+1] & 0xc0 ) != 0x80)
10657 return(-ix);
10658 codepoint = (utf[ix] & 0x1f) << 6;
10659 codepoint |= utf[ix+1] & 0x3f;
10660 if (!xmlIsCharQ(codepoint))
10661 return(-ix);
10662 ix += 2;
10663 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664 if (ix + 3 > len) return(ix);
10665 if (((utf[ix+1] & 0xc0) != 0x80) ||
10666 ((utf[ix+2] & 0xc0) != 0x80))
10667 return(-ix);
10668 codepoint = (utf[ix] & 0xf) << 12;
10669 codepoint |= (utf[ix+1] & 0x3f) << 6;
10670 codepoint |= utf[ix+2] & 0x3f;
10671 if (!xmlIsCharQ(codepoint))
10672 return(-ix);
10673 ix += 3;
10674 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675 if (ix + 4 > len) return(ix);
10676 if (((utf[ix+1] & 0xc0) != 0x80) ||
10677 ((utf[ix+2] & 0xc0) != 0x80) ||
10678 ((utf[ix+3] & 0xc0) != 0x80))
10679 return(-ix);
10680 codepoint = (utf[ix] & 0x7) << 18;
10681 codepoint |= (utf[ix+1] & 0x3f) << 12;
10682 codepoint |= (utf[ix+2] & 0x3f) << 6;
10683 codepoint |= utf[ix+3] & 0x3f;
10684 if (!xmlIsCharQ(codepoint))
10685 return(-ix);
10686 ix += 4;
10687 } else /* unknown encoding */
10688 return(-ix);
10690 return(ix);
10694 * xmlParseTryOrFinish:
10695 * @ctxt: an XML parser context
10696 * @terminate: last chunk indicator
10698 * Try to progress on parsing
10700 * Returns zero if no parsing was possible
10702 static int
10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10704 int ret = 0;
10705 int avail, tlen;
10706 xmlChar cur, next;
10707 const xmlChar *lastlt, *lastgt;
10709 if (ctxt->input == NULL)
10710 return(0);
10712 #ifdef DEBUG_PUSH
10713 switch (ctxt->instate) {
10714 case XML_PARSER_EOF:
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: try EOF\n"); break;
10717 case XML_PARSER_START:
10718 xmlGenericError(xmlGenericErrorContext,
10719 "PP: try START\n"); break;
10720 case XML_PARSER_MISC:
10721 xmlGenericError(xmlGenericErrorContext,
10722 "PP: try MISC\n");break;
10723 case XML_PARSER_COMMENT:
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: try COMMENT\n");break;
10726 case XML_PARSER_PROLOG:
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: try PROLOG\n");break;
10729 case XML_PARSER_START_TAG:
10730 xmlGenericError(xmlGenericErrorContext,
10731 "PP: try START_TAG\n");break;
10732 case XML_PARSER_CONTENT:
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: try CONTENT\n");break;
10735 case XML_PARSER_CDATA_SECTION:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try CDATA_SECTION\n");break;
10738 case XML_PARSER_END_TAG:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try END_TAG\n");break;
10741 case XML_PARSER_ENTITY_DECL:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try ENTITY_DECL\n");break;
10744 case XML_PARSER_ENTITY_VALUE:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try ENTITY_VALUE\n");break;
10747 case XML_PARSER_ATTRIBUTE_VALUE:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try ATTRIBUTE_VALUE\n");break;
10750 case XML_PARSER_DTD:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try DTD\n");break;
10753 case XML_PARSER_EPILOG:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try EPILOG\n");break;
10756 case XML_PARSER_PI:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: try PI\n");break;
10759 case XML_PARSER_IGNORE:
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: try IGNORE\n");break;
10763 #endif
10765 if ((ctxt->input != NULL) &&
10766 (ctxt->input->cur - ctxt->input->base > 4096)) {
10767 xmlSHRINK(ctxt);
10768 ctxt->checkIndex = 0;
10770 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10772 while (ctxt->instate != XML_PARSER_EOF) {
10773 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10774 return(0);
10778 * Pop-up of finished entities.
10780 while ((RAW == 0) && (ctxt->inputNr > 1))
10781 xmlPopInput(ctxt);
10783 if (ctxt->input == NULL) break;
10784 if (ctxt->input->buf == NULL)
10785 avail = ctxt->input->length -
10786 (ctxt->input->cur - ctxt->input->base);
10787 else {
10789 * If we are operating on converted input, try to flush
10790 * remainng chars to avoid them stalling in the non-converted
10791 * buffer.
10793 if ((ctxt->input->buf->raw != NULL) &&
10794 (ctxt->input->buf->raw->use > 0)) {
10795 int base = ctxt->input->base -
10796 ctxt->input->buf->buffer->content;
10797 int current = ctxt->input->cur - ctxt->input->base;
10799 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801 ctxt->input->cur = ctxt->input->base + current;
10802 ctxt->input->end =
10803 &ctxt->input->buf->buffer->content[
10804 ctxt->input->buf->buffer->use];
10806 avail = ctxt->input->buf->buffer->use -
10807 (ctxt->input->cur - ctxt->input->base);
10809 if (avail < 1)
10810 goto done;
10811 switch (ctxt->instate) {
10812 case XML_PARSER_EOF:
10814 * Document parsing is done !
10816 goto done;
10817 case XML_PARSER_START:
10818 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10819 xmlChar start[4];
10820 xmlCharEncoding enc;
10823 * Very first chars read from the document flow.
10825 if (avail < 4)
10826 goto done;
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
10831 * plug some encoding conversion routines,
10832 * else xmlSwitchEncoding will set to (default)
10833 * UTF8.
10835 start[0] = RAW;
10836 start[1] = NXT(1);
10837 start[2] = NXT(2);
10838 start[3] = NXT(3);
10839 enc = xmlDetectCharEncoding(start, 4);
10840 xmlSwitchEncoding(ctxt, enc);
10841 break;
10844 if (avail < 2)
10845 goto done;
10846 cur = ctxt->input->cur[0];
10847 next = ctxt->input->cur[1];
10848 if (cur == 0) {
10849 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850 ctxt->sax->setDocumentLocator(ctxt->userData,
10851 &xmlDefaultSAXLocator);
10852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10853 ctxt->instate = XML_PARSER_EOF;
10854 #ifdef DEBUG_PUSH
10855 xmlGenericError(xmlGenericErrorContext,
10856 "PP: entering EOF\n");
10857 #endif
10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 ctxt->sax->endDocument(ctxt->userData);
10860 goto done;
10862 if ((cur == '<') && (next == '?')) {
10863 /* PI or XML decl */
10864 if (avail < 5) return(ret);
10865 if ((!terminate) &&
10866 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10867 return(ret);
10868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869 ctxt->sax->setDocumentLocator(ctxt->userData,
10870 &xmlDefaultSAXLocator);
10871 if ((ctxt->input->cur[2] == 'x') &&
10872 (ctxt->input->cur[3] == 'm') &&
10873 (ctxt->input->cur[4] == 'l') &&
10874 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10875 ret += 5;
10876 #ifdef DEBUG_PUSH
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: Parsing XML Decl\n");
10879 #endif
10880 xmlParseXMLDecl(ctxt);
10881 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10883 * The XML REC instructs us to stop parsing right
10884 * here
10886 ctxt->instate = XML_PARSER_EOF;
10887 return(0);
10889 ctxt->standalone = ctxt->input->standalone;
10890 if ((ctxt->encoding == NULL) &&
10891 (ctxt->input->encoding != NULL))
10892 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894 (!ctxt->disableSAX))
10895 ctxt->sax->startDocument(ctxt->userData);
10896 ctxt->instate = XML_PARSER_MISC;
10897 #ifdef DEBUG_PUSH
10898 xmlGenericError(xmlGenericErrorContext,
10899 "PP: entering MISC\n");
10900 #endif
10901 } else {
10902 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904 (!ctxt->disableSAX))
10905 ctxt->sax->startDocument(ctxt->userData);
10906 ctxt->instate = XML_PARSER_MISC;
10907 #ifdef DEBUG_PUSH
10908 xmlGenericError(xmlGenericErrorContext,
10909 "PP: entering MISC\n");
10910 #endif
10912 } else {
10913 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914 ctxt->sax->setDocumentLocator(ctxt->userData,
10915 &xmlDefaultSAXLocator);
10916 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917 if (ctxt->version == NULL) {
10918 xmlErrMemory(ctxt, NULL);
10919 break;
10921 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922 (!ctxt->disableSAX))
10923 ctxt->sax->startDocument(ctxt->userData);
10924 ctxt->instate = XML_PARSER_MISC;
10925 #ifdef DEBUG_PUSH
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: entering MISC\n");
10928 #endif
10930 break;
10931 case XML_PARSER_START_TAG: {
10932 const xmlChar *name;
10933 const xmlChar *prefix = NULL;
10934 const xmlChar *URI = NULL;
10935 int nsNr = ctxt->nsNr;
10937 if ((avail < 2) && (ctxt->inputNr == 1))
10938 goto done;
10939 cur = ctxt->input->cur[0];
10940 if (cur != '<') {
10941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10942 ctxt->instate = XML_PARSER_EOF;
10943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944 ctxt->sax->endDocument(ctxt->userData);
10945 goto done;
10947 if (!terminate) {
10948 if (ctxt->progressive) {
10949 /* > can be found unescaped in attribute values */
10950 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10951 goto done;
10952 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10953 goto done;
10956 if (ctxt->spaceNr == 0)
10957 spacePush(ctxt, -1);
10958 else if (*ctxt->space == -2)
10959 spacePush(ctxt, -1);
10960 else
10961 spacePush(ctxt, *ctxt->space);
10962 #ifdef LIBXML_SAX1_ENABLED
10963 if (ctxt->sax2)
10964 #endif /* LIBXML_SAX1_ENABLED */
10965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10966 #ifdef LIBXML_SAX1_ENABLED
10967 else
10968 name = xmlParseStartTag(ctxt);
10969 #endif /* LIBXML_SAX1_ENABLED */
10970 if (ctxt->instate == XML_PARSER_EOF)
10971 goto done;
10972 if (name == NULL) {
10973 spacePop(ctxt);
10974 ctxt->instate = XML_PARSER_EOF;
10975 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10976 ctxt->sax->endDocument(ctxt->userData);
10977 goto done;
10979 #ifdef LIBXML_VALID_ENABLED
10981 * [ VC: Root Element Type ]
10982 * The Name in the document type declaration must match
10983 * the element type of the root element.
10985 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10986 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10987 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10988 #endif /* LIBXML_VALID_ENABLED */
10991 * Check for an Empty Element.
10993 if ((RAW == '/') && (NXT(1) == '>')) {
10994 SKIP(2);
10996 if (ctxt->sax2) {
10997 if ((ctxt->sax != NULL) &&
10998 (ctxt->sax->endElementNs != NULL) &&
10999 (!ctxt->disableSAX))
11000 ctxt->sax->endElementNs(ctxt->userData, name,
11001 prefix, URI);
11002 if (ctxt->nsNr - nsNr > 0)
11003 nsPop(ctxt, ctxt->nsNr - nsNr);
11004 #ifdef LIBXML_SAX1_ENABLED
11005 } else {
11006 if ((ctxt->sax != NULL) &&
11007 (ctxt->sax->endElement != NULL) &&
11008 (!ctxt->disableSAX))
11009 ctxt->sax->endElement(ctxt->userData, name);
11010 #endif /* LIBXML_SAX1_ENABLED */
11012 if (ctxt->instate == XML_PARSER_EOF)
11013 goto done;
11014 spacePop(ctxt);
11015 if (ctxt->nameNr == 0) {
11016 ctxt->instate = XML_PARSER_EPILOG;
11017 } else {
11018 ctxt->instate = XML_PARSER_CONTENT;
11020 break;
11022 if (RAW == '>') {
11023 NEXT;
11024 } else {
11025 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11026 "Couldn't find end of Start Tag %s\n",
11027 name);
11028 nodePop(ctxt);
11029 spacePop(ctxt);
11031 if (ctxt->sax2)
11032 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11033 #ifdef LIBXML_SAX1_ENABLED
11034 else
11035 namePush(ctxt, name);
11036 #endif /* LIBXML_SAX1_ENABLED */
11038 ctxt->instate = XML_PARSER_CONTENT;
11039 break;
11041 case XML_PARSER_CONTENT: {
11042 const xmlChar *test;
11043 unsigned int cons;
11044 if ((avail < 2) && (ctxt->inputNr == 1))
11045 goto done;
11046 cur = ctxt->input->cur[0];
11047 next = ctxt->input->cur[1];
11049 test = CUR_PTR;
11050 cons = ctxt->input->consumed;
11051 if ((cur == '<') && (next == '/')) {
11052 ctxt->instate = XML_PARSER_END_TAG;
11053 break;
11054 } else if ((cur == '<') && (next == '?')) {
11055 if ((!terminate) &&
11056 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11057 goto done;
11058 xmlParsePI(ctxt);
11059 } else if ((cur == '<') && (next != '!')) {
11060 ctxt->instate = XML_PARSER_START_TAG;
11061 break;
11062 } else if ((cur == '<') && (next == '!') &&
11063 (ctxt->input->cur[2] == '-') &&
11064 (ctxt->input->cur[3] == '-')) {
11065 int term;
11067 if (avail < 4)
11068 goto done;
11069 ctxt->input->cur += 4;
11070 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11071 ctxt->input->cur -= 4;
11072 if ((!terminate) && (term < 0))
11073 goto done;
11074 xmlParseComment(ctxt);
11075 ctxt->instate = XML_PARSER_CONTENT;
11076 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11077 (ctxt->input->cur[2] == '[') &&
11078 (ctxt->input->cur[3] == 'C') &&
11079 (ctxt->input->cur[4] == 'D') &&
11080 (ctxt->input->cur[5] == 'A') &&
11081 (ctxt->input->cur[6] == 'T') &&
11082 (ctxt->input->cur[7] == 'A') &&
11083 (ctxt->input->cur[8] == '[')) {
11084 SKIP(9);
11085 ctxt->instate = XML_PARSER_CDATA_SECTION;
11086 break;
11087 } else if ((cur == '<') && (next == '!') &&
11088 (avail < 9)) {
11089 goto done;
11090 } else if (cur == '&') {
11091 if ((!terminate) &&
11092 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11093 goto done;
11094 xmlParseReference(ctxt);
11095 } else {
11096 /* TODO Avoid the extra copy, handle directly !!! */
11098 * Goal of the following test is:
11099 * - minimize calls to the SAX 'character' callback
11100 * when they are mergeable
11101 * - handle an problem for isBlank when we only parse
11102 * a sequence of blank chars and the next one is
11103 * not available to check against '<' presence.
11104 * - tries to homogenize the differences in SAX
11105 * callbacks between the push and pull versions
11106 * of the parser.
11108 if ((ctxt->inputNr == 1) &&
11109 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11110 if (!terminate) {
11111 if (ctxt->progressive) {
11112 if ((lastlt == NULL) ||
11113 (ctxt->input->cur > lastlt))
11114 goto done;
11115 } else if (xmlParseLookupSequence(ctxt,
11116 '<', 0, 0) < 0) {
11117 goto done;
11121 ctxt->checkIndex = 0;
11122 xmlParseCharData(ctxt, 0);
11125 * Pop-up of finished entities.
11127 while ((RAW == 0) && (ctxt->inputNr > 1))
11128 xmlPopInput(ctxt);
11129 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11131 "detected an error in element content\n");
11132 ctxt->instate = XML_PARSER_EOF;
11133 break;
11135 break;
11137 case XML_PARSER_END_TAG:
11138 if (avail < 2)
11139 goto done;
11140 if (!terminate) {
11141 if (ctxt->progressive) {
11142 /* > can be found unescaped in attribute values */
11143 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11144 goto done;
11145 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11146 goto done;
11149 if (ctxt->sax2) {
11150 xmlParseEndTag2(ctxt,
11151 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11152 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11153 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11154 nameNsPop(ctxt);
11156 #ifdef LIBXML_SAX1_ENABLED
11157 else
11158 xmlParseEndTag1(ctxt, 0);
11159 #endif /* LIBXML_SAX1_ENABLED */
11160 if (ctxt->instate == XML_PARSER_EOF) {
11161 /* Nothing */
11162 } else if (ctxt->nameNr == 0) {
11163 ctxt->instate = XML_PARSER_EPILOG;
11164 } else {
11165 ctxt->instate = XML_PARSER_CONTENT;
11167 break;
11168 case XML_PARSER_CDATA_SECTION: {
11170 * The Push mode need to have the SAX callback for
11171 * cdataBlock merge back contiguous callbacks.
11173 int base;
11175 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11176 if (base < 0) {
11177 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11178 int tmp;
11180 tmp = xmlCheckCdataPush(ctxt->input->cur,
11181 XML_PARSER_BIG_BUFFER_SIZE);
11182 if (tmp < 0) {
11183 tmp = -tmp;
11184 ctxt->input->cur += tmp;
11185 goto encoding_error;
11187 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11188 if (ctxt->sax->cdataBlock != NULL)
11189 ctxt->sax->cdataBlock(ctxt->userData,
11190 ctxt->input->cur, tmp);
11191 else if (ctxt->sax->characters != NULL)
11192 ctxt->sax->characters(ctxt->userData,
11193 ctxt->input->cur, tmp);
11195 if (ctxt->instate == XML_PARSER_EOF)
11196 goto done;
11197 SKIPL(tmp);
11198 ctxt->checkIndex = 0;
11200 goto done;
11201 } else {
11202 int tmp;
11204 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11205 if ((tmp < 0) || (tmp != base)) {
11206 tmp = -tmp;
11207 ctxt->input->cur += tmp;
11208 goto encoding_error;
11210 if ((ctxt->sax != NULL) && (base == 0) &&
11211 (ctxt->sax->cdataBlock != NULL) &&
11212 (!ctxt->disableSAX)) {
11214 * Special case to provide identical behaviour
11215 * between pull and push parsers on enpty CDATA
11216 * sections
11218 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11219 (!strncmp((const char *)&ctxt->input->cur[-9],
11220 "<![CDATA[", 9)))
11221 ctxt->sax->cdataBlock(ctxt->userData,
11222 BAD_CAST "", 0);
11223 } else if ((ctxt->sax != NULL) && (base > 0) &&
11224 (!ctxt->disableSAX)) {
11225 if (ctxt->sax->cdataBlock != NULL)
11226 ctxt->sax->cdataBlock(ctxt->userData,
11227 ctxt->input->cur, base);
11228 else if (ctxt->sax->characters != NULL)
11229 ctxt->sax->characters(ctxt->userData,
11230 ctxt->input->cur, base);
11232 if (ctxt->instate == XML_PARSER_EOF)
11233 goto done;
11234 SKIPL(base + 3);
11235 ctxt->checkIndex = 0;
11236 ctxt->instate = XML_PARSER_CONTENT;
11237 #ifdef DEBUG_PUSH
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: entering CONTENT\n");
11240 #endif
11242 break;
11244 case XML_PARSER_MISC:
11245 SKIP_BLANKS;
11246 if (ctxt->input->buf == NULL)
11247 avail = ctxt->input->length -
11248 (ctxt->input->cur - ctxt->input->base);
11249 else
11250 avail = ctxt->input->buf->buffer->use -
11251 (ctxt->input->cur - ctxt->input->base);
11252 if (avail < 2)
11253 goto done;
11254 cur = ctxt->input->cur[0];
11255 next = ctxt->input->cur[1];
11256 if ((cur == '<') && (next == '?')) {
11257 if ((!terminate) &&
11258 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11259 goto done;
11260 #ifdef DEBUG_PUSH
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: Parsing PI\n");
11263 #endif
11264 xmlParsePI(ctxt);
11265 if (ctxt->instate == XML_PARSER_EOF)
11266 goto done;
11267 ctxt->checkIndex = 0;
11268 } else if ((cur == '<') && (next == '!') &&
11269 (ctxt->input->cur[2] == '-') &&
11270 (ctxt->input->cur[3] == '-')) {
11271 if ((!terminate) &&
11272 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11273 goto done;
11274 #ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: Parsing Comment\n");
11277 #endif
11278 xmlParseComment(ctxt);
11279 if (ctxt->instate == XML_PARSER_EOF)
11280 goto done;
11281 ctxt->instate = XML_PARSER_MISC;
11282 ctxt->checkIndex = 0;
11283 } else if ((cur == '<') && (next == '!') &&
11284 (ctxt->input->cur[2] == 'D') &&
11285 (ctxt->input->cur[3] == 'O') &&
11286 (ctxt->input->cur[4] == 'C') &&
11287 (ctxt->input->cur[5] == 'T') &&
11288 (ctxt->input->cur[6] == 'Y') &&
11289 (ctxt->input->cur[7] == 'P') &&
11290 (ctxt->input->cur[8] == 'E')) {
11291 if ((!terminate) &&
11292 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11293 goto done;
11294 #ifdef DEBUG_PUSH
11295 xmlGenericError(xmlGenericErrorContext,
11296 "PP: Parsing internal subset\n");
11297 #endif
11298 ctxt->inSubset = 1;
11299 xmlParseDocTypeDecl(ctxt);
11300 if (ctxt->instate == XML_PARSER_EOF)
11301 goto done;
11302 if (RAW == '[') {
11303 ctxt->instate = XML_PARSER_DTD;
11304 #ifdef DEBUG_PUSH
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: entering DTD\n");
11307 #endif
11308 } else {
11310 * Create and update the external subset.
11312 ctxt->inSubset = 2;
11313 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11314 (ctxt->sax->externalSubset != NULL))
11315 ctxt->sax->externalSubset(ctxt->userData,
11316 ctxt->intSubName, ctxt->extSubSystem,
11317 ctxt->extSubURI);
11318 ctxt->inSubset = 0;
11319 xmlCleanSpecialAttr(ctxt);
11320 ctxt->instate = XML_PARSER_PROLOG;
11321 #ifdef DEBUG_PUSH
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: entering PROLOG\n");
11324 #endif
11326 } else if ((cur == '<') && (next == '!') &&
11327 (avail < 9)) {
11328 goto done;
11329 } else {
11330 ctxt->instate = XML_PARSER_START_TAG;
11331 ctxt->progressive = 1;
11332 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11333 #ifdef DEBUG_PUSH
11334 xmlGenericError(xmlGenericErrorContext,
11335 "PP: entering START_TAG\n");
11336 #endif
11338 break;
11339 case XML_PARSER_PROLOG:
11340 SKIP_BLANKS;
11341 if (ctxt->input->buf == NULL)
11342 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11343 else
11344 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11345 if (avail < 2)
11346 goto done;
11347 cur = ctxt->input->cur[0];
11348 next = ctxt->input->cur[1];
11349 if ((cur == '<') && (next == '?')) {
11350 if ((!terminate) &&
11351 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11352 goto done;
11353 #ifdef DEBUG_PUSH
11354 xmlGenericError(xmlGenericErrorContext,
11355 "PP: Parsing PI\n");
11356 #endif
11357 xmlParsePI(ctxt);
11358 if (ctxt->instate == XML_PARSER_EOF)
11359 goto done;
11360 } else if ((cur == '<') && (next == '!') &&
11361 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11362 if ((!terminate) &&
11363 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11364 goto done;
11365 #ifdef DEBUG_PUSH
11366 xmlGenericError(xmlGenericErrorContext,
11367 "PP: Parsing Comment\n");
11368 #endif
11369 xmlParseComment(ctxt);
11370 if (ctxt->instate == XML_PARSER_EOF)
11371 goto done;
11372 ctxt->instate = XML_PARSER_PROLOG;
11373 } else if ((cur == '<') && (next == '!') &&
11374 (avail < 4)) {
11375 goto done;
11376 } else {
11377 ctxt->instate = XML_PARSER_START_TAG;
11378 if (ctxt->progressive == 0)
11379 ctxt->progressive = 1;
11380 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11381 #ifdef DEBUG_PUSH
11382 xmlGenericError(xmlGenericErrorContext,
11383 "PP: entering START_TAG\n");
11384 #endif
11386 break;
11387 case XML_PARSER_EPILOG:
11388 SKIP_BLANKS;
11389 if (ctxt->input->buf == NULL)
11390 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11391 else
11392 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11393 if (avail < 2)
11394 goto done;
11395 cur = ctxt->input->cur[0];
11396 next = ctxt->input->cur[1];
11397 if ((cur == '<') && (next == '?')) {
11398 if ((!terminate) &&
11399 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11400 goto done;
11401 #ifdef DEBUG_PUSH
11402 xmlGenericError(xmlGenericErrorContext,
11403 "PP: Parsing PI\n");
11404 #endif
11405 xmlParsePI(ctxt);
11406 if (ctxt->instate == XML_PARSER_EOF)
11407 goto done;
11408 ctxt->instate = XML_PARSER_EPILOG;
11409 } else if ((cur == '<') && (next == '!') &&
11410 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11411 if ((!terminate) &&
11412 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11413 goto done;
11414 #ifdef DEBUG_PUSH
11415 xmlGenericError(xmlGenericErrorContext,
11416 "PP: Parsing Comment\n");
11417 #endif
11418 xmlParseComment(ctxt);
11419 if (ctxt->instate == XML_PARSER_EOF)
11420 goto done;
11421 ctxt->instate = XML_PARSER_EPILOG;
11422 } else if ((cur == '<') && (next == '!') &&
11423 (avail < 4)) {
11424 goto done;
11425 } else {
11426 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11427 ctxt->instate = XML_PARSER_EOF;
11428 #ifdef DEBUG_PUSH
11429 xmlGenericError(xmlGenericErrorContext,
11430 "PP: entering EOF\n");
11431 #endif
11432 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11433 ctxt->sax->endDocument(ctxt->userData);
11434 goto done;
11436 break;
11437 case XML_PARSER_DTD: {
11439 * Sorry but progressive parsing of the internal subset
11440 * is not expected to be supported. We first check that
11441 * the full content of the internal subset is available and
11442 * the parsing is launched only at that point.
11443 * Internal subset ends up with "']' S? '>'" in an unescaped
11444 * section and not in a ']]>' sequence which are conditional
11445 * sections (whoever argued to keep that crap in XML deserve
11446 * a place in hell !).
11448 int base, i;
11449 xmlChar *buf;
11450 xmlChar quote = 0;
11452 base = ctxt->input->cur - ctxt->input->base;
11453 if (base < 0) return(0);
11454 if (ctxt->checkIndex > base)
11455 base = ctxt->checkIndex;
11456 buf = ctxt->input->buf->buffer->content;
11457 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11458 base++) {
11459 if (quote != 0) {
11460 if (buf[base] == quote)
11461 quote = 0;
11462 continue;
11464 if ((quote == 0) && (buf[base] == '<')) {
11465 int found = 0;
11466 /* special handling of comments */
11467 if (((unsigned int) base + 4 <
11468 ctxt->input->buf->buffer->use) &&
11469 (buf[base + 1] == '!') &&
11470 (buf[base + 2] == '-') &&
11471 (buf[base + 3] == '-')) {
11472 for (;(unsigned int) base + 3 <
11473 ctxt->input->buf->buffer->use; base++) {
11474 if ((buf[base] == '-') &&
11475 (buf[base + 1] == '-') &&
11476 (buf[base + 2] == '>')) {
11477 found = 1;
11478 base += 2;
11479 break;
11482 if (!found) {
11483 #if 0
11484 fprintf(stderr, "unfinished comment\n");
11485 #endif
11486 break; /* for */
11488 continue;
11491 if (buf[base] == '"') {
11492 quote = '"';
11493 continue;
11495 if (buf[base] == '\'') {
11496 quote = '\'';
11497 continue;
11499 if (buf[base] == ']') {
11500 #if 0
11501 fprintf(stderr, "%c%c%c%c: ", buf[base],
11502 buf[base + 1], buf[base + 2], buf[base + 3]);
11503 #endif
11504 if ((unsigned int) base +1 >=
11505 ctxt->input->buf->buffer->use)
11506 break;
11507 if (buf[base + 1] == ']') {
11508 /* conditional crap, skip both ']' ! */
11509 base++;
11510 continue;
11512 for (i = 1;
11513 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11514 i++) {
11515 if (buf[base + i] == '>') {
11516 #if 0
11517 fprintf(stderr, "found\n");
11518 #endif
11519 goto found_end_int_subset;
11521 if (!IS_BLANK_CH(buf[base + i])) {
11522 #if 0
11523 fprintf(stderr, "not found\n");
11524 #endif
11525 goto not_end_of_int_subset;
11528 #if 0
11529 fprintf(stderr, "end of stream\n");
11530 #endif
11531 break;
11534 not_end_of_int_subset:
11535 continue; /* for */
11538 * We didn't found the end of the Internal subset
11540 #ifdef DEBUG_PUSH
11541 if (next == 0)
11542 xmlGenericError(xmlGenericErrorContext,
11543 "PP: lookup of int subset end filed\n");
11544 #endif
11545 goto done;
11547 found_end_int_subset:
11548 xmlParseInternalSubset(ctxt);
11549 if (ctxt->instate == XML_PARSER_EOF)
11550 goto done;
11551 ctxt->inSubset = 2;
11552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11553 (ctxt->sax->externalSubset != NULL))
11554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11555 ctxt->extSubSystem, ctxt->extSubURI);
11556 ctxt->inSubset = 0;
11557 xmlCleanSpecialAttr(ctxt);
11558 if (ctxt->instate == XML_PARSER_EOF)
11559 goto done;
11560 ctxt->instate = XML_PARSER_PROLOG;
11561 ctxt->checkIndex = 0;
11562 #ifdef DEBUG_PUSH
11563 xmlGenericError(xmlGenericErrorContext,
11564 "PP: entering PROLOG\n");
11565 #endif
11566 break;
11568 case XML_PARSER_COMMENT:
11569 xmlGenericError(xmlGenericErrorContext,
11570 "PP: internal error, state == COMMENT\n");
11571 ctxt->instate = XML_PARSER_CONTENT;
11572 #ifdef DEBUG_PUSH
11573 xmlGenericError(xmlGenericErrorContext,
11574 "PP: entering CONTENT\n");
11575 #endif
11576 break;
11577 case XML_PARSER_IGNORE:
11578 xmlGenericError(xmlGenericErrorContext,
11579 "PP: internal error, state == IGNORE");
11580 ctxt->instate = XML_PARSER_DTD;
11581 #ifdef DEBUG_PUSH
11582 xmlGenericError(xmlGenericErrorContext,
11583 "PP: entering DTD\n");
11584 #endif
11585 break;
11586 case XML_PARSER_PI:
11587 xmlGenericError(xmlGenericErrorContext,
11588 "PP: internal error, state == PI\n");
11589 ctxt->instate = XML_PARSER_CONTENT;
11590 #ifdef DEBUG_PUSH
11591 xmlGenericError(xmlGenericErrorContext,
11592 "PP: entering CONTENT\n");
11593 #endif
11594 break;
11595 case XML_PARSER_ENTITY_DECL:
11596 xmlGenericError(xmlGenericErrorContext,
11597 "PP: internal error, state == ENTITY_DECL\n");
11598 ctxt->instate = XML_PARSER_DTD;
11599 #ifdef DEBUG_PUSH
11600 xmlGenericError(xmlGenericErrorContext,
11601 "PP: entering DTD\n");
11602 #endif
11603 break;
11604 case XML_PARSER_ENTITY_VALUE:
11605 xmlGenericError(xmlGenericErrorContext,
11606 "PP: internal error, state == ENTITY_VALUE\n");
11607 ctxt->instate = XML_PARSER_CONTENT;
11608 #ifdef DEBUG_PUSH
11609 xmlGenericError(xmlGenericErrorContext,
11610 "PP: entering DTD\n");
11611 #endif
11612 break;
11613 case XML_PARSER_ATTRIBUTE_VALUE:
11614 xmlGenericError(xmlGenericErrorContext,
11615 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11616 ctxt->instate = XML_PARSER_START_TAG;
11617 #ifdef DEBUG_PUSH
11618 xmlGenericError(xmlGenericErrorContext,
11619 "PP: entering START_TAG\n");
11620 #endif
11621 break;
11622 case XML_PARSER_SYSTEM_LITERAL:
11623 xmlGenericError(xmlGenericErrorContext,
11624 "PP: internal error, state == SYSTEM_LITERAL\n");
11625 ctxt->instate = XML_PARSER_START_TAG;
11626 #ifdef DEBUG_PUSH
11627 xmlGenericError(xmlGenericErrorContext,
11628 "PP: entering START_TAG\n");
11629 #endif
11630 break;
11631 case XML_PARSER_PUBLIC_LITERAL:
11632 xmlGenericError(xmlGenericErrorContext,
11633 "PP: internal error, state == PUBLIC_LITERAL\n");
11634 ctxt->instate = XML_PARSER_START_TAG;
11635 #ifdef DEBUG_PUSH
11636 xmlGenericError(xmlGenericErrorContext,
11637 "PP: entering START_TAG\n");
11638 #endif
11639 break;
11642 done:
11643 #ifdef DEBUG_PUSH
11644 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11645 #endif
11646 return(ret);
11647 encoding_error:
11649 char buffer[150];
11651 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11652 ctxt->input->cur[0], ctxt->input->cur[1],
11653 ctxt->input->cur[2], ctxt->input->cur[3]);
11654 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11655 "Input is not proper UTF-8, indicate encoding !\n%s",
11656 BAD_CAST buffer, NULL);
11658 return(0);
11662 * xmlParseChunk:
11663 * @ctxt: an XML parser context
11664 * @chunk: an char array
11665 * @size: the size in byte of the chunk
11666 * @terminate: last chunk indicator
11668 * Parse a Chunk of memory
11670 * Returns zero if no error, the xmlParserErrors otherwise.
11673 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11674 int terminate) {
11675 int end_in_lf = 0;
11676 int remain = 0;
11678 if (ctxt == NULL)
11679 return(XML_ERR_INTERNAL_ERROR);
11680 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11681 return(ctxt->errNo);
11682 if (ctxt->instate == XML_PARSER_EOF)
11683 return(-1);
11684 if (ctxt->instate == XML_PARSER_START)
11685 xmlDetectSAX2(ctxt);
11686 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11687 (chunk[size - 1] == '\r')) {
11688 end_in_lf = 1;
11689 size--;
11692 xmldecl_done:
11694 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11695 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11696 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11697 int cur = ctxt->input->cur - ctxt->input->base;
11698 int res;
11701 * Specific handling if we autodetected an encoding, we should not
11702 * push more than the first line ... which depend on the encoding
11703 * And only push the rest once the final encoding was detected
11705 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11706 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11707 unsigned int len = 45;
11709 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11710 BAD_CAST "UTF-16")) ||
11711 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11712 BAD_CAST "UTF16")))
11713 len = 90;
11714 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11715 BAD_CAST "UCS-4")) ||
11716 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11717 BAD_CAST "UCS4")))
11718 len = 180;
11720 if (ctxt->input->buf->rawconsumed < len)
11721 len -= ctxt->input->buf->rawconsumed;
11724 * Change size for reading the initial declaration only
11725 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11726 * will blindly copy extra bytes from memory.
11728 if (size > len) {
11729 remain = size - len;
11730 size = len;
11731 } else {
11732 remain = 0;
11735 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11736 if (res < 0) {
11737 ctxt->errNo = XML_PARSER_EOF;
11738 ctxt->disableSAX = 1;
11739 return (XML_PARSER_EOF);
11741 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11742 ctxt->input->cur = ctxt->input->base + cur;
11743 ctxt->input->end =
11744 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11745 #ifdef DEBUG_PUSH
11746 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11747 #endif
11749 } else if (ctxt->instate != XML_PARSER_EOF) {
11750 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11751 xmlParserInputBufferPtr in = ctxt->input->buf;
11752 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11753 (in->raw != NULL)) {
11754 int nbchars;
11756 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11757 if (nbchars < 0) {
11758 /* TODO 2.6.0 */
11759 xmlGenericError(xmlGenericErrorContext,
11760 "xmlParseChunk: encoder error\n");
11761 return(XML_ERR_INVALID_ENCODING);
11766 if (remain != 0)
11767 xmlParseTryOrFinish(ctxt, 0);
11768 else
11769 xmlParseTryOrFinish(ctxt, terminate);
11770 if (ctxt->instate == XML_PARSER_EOF)
11771 return(ctxt->errNo);
11772 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11773 return(ctxt->errNo);
11775 if (remain != 0) {
11776 chunk += size;
11777 size = remain;
11778 remain = 0;
11779 goto xmldecl_done;
11781 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11782 (ctxt->input->buf != NULL)) {
11783 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11785 if (terminate) {
11787 * Check for termination
11789 int avail = 0;
11791 if (ctxt->input != NULL) {
11792 if (ctxt->input->buf == NULL)
11793 avail = ctxt->input->length -
11794 (ctxt->input->cur - ctxt->input->base);
11795 else
11796 avail = ctxt->input->buf->buffer->use -
11797 (ctxt->input->cur - ctxt->input->base);
11800 if ((ctxt->instate != XML_PARSER_EOF) &&
11801 (ctxt->instate != XML_PARSER_EPILOG)) {
11802 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11804 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11805 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11807 if (ctxt->instate != XML_PARSER_EOF) {
11808 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11809 ctxt->sax->endDocument(ctxt->userData);
11811 ctxt->instate = XML_PARSER_EOF;
11813 return((xmlParserErrors) ctxt->errNo);
11816 /************************************************************************
11818 * I/O front end functions to the parser *
11820 ************************************************************************/
11823 * xmlCreatePushParserCtxt:
11824 * @sax: a SAX handler
11825 * @user_data: The user data returned on SAX callbacks
11826 * @chunk: a pointer to an array of chars
11827 * @size: number of chars in the array
11828 * @filename: an optional file name or URI
11830 * Create a parser context for using the XML parser in push mode.
11831 * If @buffer and @size are non-NULL, the data is used to detect
11832 * the encoding. The remaining characters will be parsed so they
11833 * don't need to be fed in again through xmlParseChunk.
11834 * To allow content encoding detection, @size should be >= 4
11835 * The value of @filename is used for fetching external entities
11836 * and error/warning reports.
11838 * Returns the new parser context or NULL
11841 xmlParserCtxtPtr
11842 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11843 const char *chunk, int size, const char *filename) {
11844 xmlParserCtxtPtr ctxt;
11845 xmlParserInputPtr inputStream;
11846 xmlParserInputBufferPtr buf;
11847 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11850 * plug some encoding conversion routines
11852 if ((chunk != NULL) && (size >= 4))
11853 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11855 buf = xmlAllocParserInputBuffer(enc);
11856 if (buf == NULL) return(NULL);
11858 ctxt = xmlNewParserCtxt();
11859 if (ctxt == NULL) {
11860 xmlErrMemory(NULL, "creating parser: out of memory\n");
11861 xmlFreeParserInputBuffer(buf);
11862 return(NULL);
11864 ctxt->dictNames = 1;
11865 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11866 if (ctxt->pushTab == NULL) {
11867 xmlErrMemory(ctxt, NULL);
11868 xmlFreeParserInputBuffer(buf);
11869 xmlFreeParserCtxt(ctxt);
11870 return(NULL);
11872 if (sax != NULL) {
11873 #ifdef LIBXML_SAX1_ENABLED
11874 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11875 #endif /* LIBXML_SAX1_ENABLED */
11876 xmlFree(ctxt->sax);
11877 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11878 if (ctxt->sax == NULL) {
11879 xmlErrMemory(ctxt, NULL);
11880 xmlFreeParserInputBuffer(buf);
11881 xmlFreeParserCtxt(ctxt);
11882 return(NULL);
11884 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11885 if (sax->initialized == XML_SAX2_MAGIC)
11886 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11887 else
11888 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11889 if (user_data != NULL)
11890 ctxt->userData = user_data;
11892 if (filename == NULL) {
11893 ctxt->directory = NULL;
11894 } else {
11895 ctxt->directory = xmlParserGetDirectory(filename);
11898 inputStream = xmlNewInputStream(ctxt);
11899 if (inputStream == NULL) {
11900 xmlFreeParserCtxt(ctxt);
11901 xmlFreeParserInputBuffer(buf);
11902 return(NULL);
11905 if (filename == NULL)
11906 inputStream->filename = NULL;
11907 else {
11908 inputStream->filename = (char *)
11909 xmlCanonicPath((const xmlChar *) filename);
11910 if (inputStream->filename == NULL) {
11911 xmlFreeParserCtxt(ctxt);
11912 xmlFreeParserInputBuffer(buf);
11913 return(NULL);
11916 inputStream->buf = buf;
11917 inputStream->base = inputStream->buf->buffer->content;
11918 inputStream->cur = inputStream->buf->buffer->content;
11919 inputStream->end =
11920 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11922 inputPush(ctxt, inputStream);
11925 * If the caller didn't provide an initial 'chunk' for determining
11926 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11927 * that it can be automatically determined later
11929 if ((size == 0) || (chunk == NULL)) {
11930 ctxt->charset = XML_CHAR_ENCODING_NONE;
11931 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11932 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11933 int cur = ctxt->input->cur - ctxt->input->base;
11935 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11937 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11938 ctxt->input->cur = ctxt->input->base + cur;
11939 ctxt->input->end =
11940 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11941 #ifdef DEBUG_PUSH
11942 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11943 #endif
11946 if (enc != XML_CHAR_ENCODING_NONE) {
11947 xmlSwitchEncoding(ctxt, enc);
11950 return(ctxt);
11952 #endif /* LIBXML_PUSH_ENABLED */
11955 * xmlStopParser:
11956 * @ctxt: an XML parser context
11958 * Blocks further parser processing
11960 void
11961 xmlStopParser(xmlParserCtxtPtr ctxt) {
11962 if (ctxt == NULL)
11963 return;
11964 ctxt->instate = XML_PARSER_EOF;
11965 ctxt->errNo = XML_ERR_USER_STOP;
11966 ctxt->disableSAX = 1;
11967 if (ctxt->input != NULL) {
11968 ctxt->input->cur = BAD_CAST"";
11969 ctxt->input->base = ctxt->input->cur;
11974 * xmlCreateIOParserCtxt:
11975 * @sax: a SAX handler
11976 * @user_data: The user data returned on SAX callbacks
11977 * @ioread: an I/O read function
11978 * @ioclose: an I/O close function
11979 * @ioctx: an I/O handler
11980 * @enc: the charset encoding if known
11982 * Create a parser context for using the XML parser with an existing
11983 * I/O stream
11985 * Returns the new parser context or NULL
11987 xmlParserCtxtPtr
11988 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11989 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11990 void *ioctx, xmlCharEncoding enc) {
11991 xmlParserCtxtPtr ctxt;
11992 xmlParserInputPtr inputStream;
11993 xmlParserInputBufferPtr buf;
11995 if (ioread == NULL) return(NULL);
11997 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11998 if (buf == NULL) return(NULL);
12000 ctxt = xmlNewParserCtxt();
12001 if (ctxt == NULL) {
12002 xmlFreeParserInputBuffer(buf);
12003 return(NULL);
12005 if (sax != NULL) {
12006 #ifdef LIBXML_SAX1_ENABLED
12007 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12008 #endif /* LIBXML_SAX1_ENABLED */
12009 xmlFree(ctxt->sax);
12010 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12011 if (ctxt->sax == NULL) {
12012 xmlErrMemory(ctxt, NULL);
12013 xmlFreeParserCtxt(ctxt);
12014 return(NULL);
12016 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12017 if (sax->initialized == XML_SAX2_MAGIC)
12018 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12019 else
12020 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12021 if (user_data != NULL)
12022 ctxt->userData = user_data;
12025 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12026 if (inputStream == NULL) {
12027 xmlFreeParserCtxt(ctxt);
12028 return(NULL);
12030 inputPush(ctxt, inputStream);
12032 return(ctxt);
12035 #ifdef LIBXML_VALID_ENABLED
12036 /************************************************************************
12038 * Front ends when parsing a DTD *
12040 ************************************************************************/
12043 * xmlIOParseDTD:
12044 * @sax: the SAX handler block or NULL
12045 * @input: an Input Buffer
12046 * @enc: the charset encoding if known
12048 * Load and parse a DTD
12050 * Returns the resulting xmlDtdPtr or NULL in case of error.
12051 * @input will be freed by the function in any case.
12054 xmlDtdPtr
12055 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12056 xmlCharEncoding enc) {
12057 xmlDtdPtr ret = NULL;
12058 xmlParserCtxtPtr ctxt;
12059 xmlParserInputPtr pinput = NULL;
12060 xmlChar start[4];
12062 if (input == NULL)
12063 return(NULL);
12065 ctxt = xmlNewParserCtxt();
12066 if (ctxt == NULL) {
12067 xmlFreeParserInputBuffer(input);
12068 return(NULL);
12072 * Set-up the SAX context
12074 if (sax != NULL) {
12075 if (ctxt->sax != NULL)
12076 xmlFree(ctxt->sax);
12077 ctxt->sax = sax;
12078 ctxt->userData = ctxt;
12080 xmlDetectSAX2(ctxt);
12083 * generate a parser input from the I/O handler
12086 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12087 if (pinput == NULL) {
12088 if (sax != NULL) ctxt->sax = NULL;
12089 xmlFreeParserInputBuffer(input);
12090 xmlFreeParserCtxt(ctxt);
12091 return(NULL);
12095 * plug some encoding conversion routines here.
12097 if (xmlPushInput(ctxt, pinput) < 0) {
12098 if (sax != NULL) ctxt->sax = NULL;
12099 xmlFreeParserCtxt(ctxt);
12100 return(NULL);
12102 if (enc != XML_CHAR_ENCODING_NONE) {
12103 xmlSwitchEncoding(ctxt, enc);
12106 pinput->filename = NULL;
12107 pinput->line = 1;
12108 pinput->col = 1;
12109 pinput->base = ctxt->input->cur;
12110 pinput->cur = ctxt->input->cur;
12111 pinput->free = NULL;
12114 * let's parse that entity knowing it's an external subset.
12116 ctxt->inSubset = 2;
12117 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12118 if (ctxt->myDoc == NULL) {
12119 xmlErrMemory(ctxt, "New Doc failed");
12120 return(NULL);
12122 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12123 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12124 BAD_CAST "none", BAD_CAST "none");
12126 if ((enc == XML_CHAR_ENCODING_NONE) &&
12127 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12129 * Get the 4 first bytes and decode the charset
12130 * if enc != XML_CHAR_ENCODING_NONE
12131 * plug some encoding conversion routines.
12133 start[0] = RAW;
12134 start[1] = NXT(1);
12135 start[2] = NXT(2);
12136 start[3] = NXT(3);
12137 enc = xmlDetectCharEncoding(start, 4);
12138 if (enc != XML_CHAR_ENCODING_NONE) {
12139 xmlSwitchEncoding(ctxt, enc);
12143 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12145 if (ctxt->myDoc != NULL) {
12146 if (ctxt->wellFormed) {
12147 ret = ctxt->myDoc->extSubset;
12148 ctxt->myDoc->extSubset = NULL;
12149 if (ret != NULL) {
12150 xmlNodePtr tmp;
12152 ret->doc = NULL;
12153 tmp = ret->children;
12154 while (tmp != NULL) {
12155 tmp->doc = NULL;
12156 tmp = tmp->next;
12159 } else {
12160 ret = NULL;
12162 xmlFreeDoc(ctxt->myDoc);
12163 ctxt->myDoc = NULL;
12165 if (sax != NULL) ctxt->sax = NULL;
12166 xmlFreeParserCtxt(ctxt);
12168 return(ret);
12172 * xmlSAXParseDTD:
12173 * @sax: the SAX handler block
12174 * @ExternalID: a NAME* containing the External ID of the DTD
12175 * @SystemID: a NAME* containing the URL to the DTD
12177 * Load and parse an external subset.
12179 * Returns the resulting xmlDtdPtr or NULL in case of error.
12182 xmlDtdPtr
12183 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12184 const xmlChar *SystemID) {
12185 xmlDtdPtr ret = NULL;
12186 xmlParserCtxtPtr ctxt;
12187 xmlParserInputPtr input = NULL;
12188 xmlCharEncoding enc;
12189 xmlChar* systemIdCanonic;
12191 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12193 ctxt = xmlNewParserCtxt();
12194 if (ctxt == NULL) {
12195 return(NULL);
12199 * Set-up the SAX context
12201 if (sax != NULL) {
12202 if (ctxt->sax != NULL)
12203 xmlFree(ctxt->sax);
12204 ctxt->sax = sax;
12205 ctxt->userData = ctxt;
12209 * Canonicalise the system ID
12211 systemIdCanonic = xmlCanonicPath(SystemID);
12212 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12213 xmlFreeParserCtxt(ctxt);
12214 return(NULL);
12218 * Ask the Entity resolver to load the damn thing
12221 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12222 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12223 systemIdCanonic);
12224 if (input == NULL) {
12225 if (sax != NULL) ctxt->sax = NULL;
12226 xmlFreeParserCtxt(ctxt);
12227 if (systemIdCanonic != NULL)
12228 xmlFree(systemIdCanonic);
12229 return(NULL);
12233 * plug some encoding conversion routines here.
12235 if (xmlPushInput(ctxt, input) < 0) {
12236 if (sax != NULL) ctxt->sax = NULL;
12237 xmlFreeParserCtxt(ctxt);
12238 if (systemIdCanonic != NULL)
12239 xmlFree(systemIdCanonic);
12240 return(NULL);
12242 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12243 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12244 xmlSwitchEncoding(ctxt, enc);
12247 if (input->filename == NULL)
12248 input->filename = (char *) systemIdCanonic;
12249 else
12250 xmlFree(systemIdCanonic);
12251 input->line = 1;
12252 input->col = 1;
12253 input->base = ctxt->input->cur;
12254 input->cur = ctxt->input->cur;
12255 input->free = NULL;
12258 * let's parse that entity knowing it's an external subset.
12260 ctxt->inSubset = 2;
12261 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12262 if (ctxt->myDoc == NULL) {
12263 xmlErrMemory(ctxt, "New Doc failed");
12264 if (sax != NULL) ctxt->sax = NULL;
12265 xmlFreeParserCtxt(ctxt);
12266 return(NULL);
12268 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12269 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12270 ExternalID, SystemID);
12271 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12273 if (ctxt->myDoc != NULL) {
12274 if (ctxt->wellFormed) {
12275 ret = ctxt->myDoc->extSubset;
12276 ctxt->myDoc->extSubset = NULL;
12277 if (ret != NULL) {
12278 xmlNodePtr tmp;
12280 ret->doc = NULL;
12281 tmp = ret->children;
12282 while (tmp != NULL) {
12283 tmp->doc = NULL;
12284 tmp = tmp->next;
12287 } else {
12288 ret = NULL;
12290 xmlFreeDoc(ctxt->myDoc);
12291 ctxt->myDoc = NULL;
12293 if (sax != NULL) ctxt->sax = NULL;
12294 xmlFreeParserCtxt(ctxt);
12296 return(ret);
12301 * xmlParseDTD:
12302 * @ExternalID: a NAME* containing the External ID of the DTD
12303 * @SystemID: a NAME* containing the URL to the DTD
12305 * Load and parse an external subset.
12307 * Returns the resulting xmlDtdPtr or NULL in case of error.
12310 xmlDtdPtr
12311 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12312 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12314 #endif /* LIBXML_VALID_ENABLED */
12316 /************************************************************************
12318 * Front ends when parsing an Entity *
12320 ************************************************************************/
12323 * xmlParseCtxtExternalEntity:
12324 * @ctx: the existing parsing context
12325 * @URL: the URL for the entity to load
12326 * @ID: the System ID for the entity to load
12327 * @lst: the return value for the set of parsed nodes
12329 * Parse an external general entity within an existing parsing context
12330 * An external general parsed entity is well-formed if it matches the
12331 * production labeled extParsedEnt.
12333 * [78] extParsedEnt ::= TextDecl? content
12335 * Returns 0 if the entity is well formed, -1 in case of args problem and
12336 * the parser error code otherwise
12340 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12341 const xmlChar *ID, xmlNodePtr *lst) {
12342 xmlParserCtxtPtr ctxt;
12343 xmlDocPtr newDoc;
12344 xmlNodePtr newRoot;
12345 xmlSAXHandlerPtr oldsax = NULL;
12346 int ret = 0;
12347 xmlChar start[4];
12348 xmlCharEncoding enc;
12350 if (ctx == NULL) return(-1);
12352 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12353 (ctx->depth > 1024)) {
12354 return(XML_ERR_ENTITY_LOOP);
12357 if (lst != NULL)
12358 *lst = NULL;
12359 if ((URL == NULL) && (ID == NULL))
12360 return(-1);
12361 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12362 return(-1);
12364 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12365 if (ctxt == NULL) {
12366 return(-1);
12369 oldsax = ctxt->sax;
12370 ctxt->sax = ctx->sax;
12371 xmlDetectSAX2(ctxt);
12372 newDoc = xmlNewDoc(BAD_CAST "1.0");
12373 if (newDoc == NULL) {
12374 xmlFreeParserCtxt(ctxt);
12375 return(-1);
12377 newDoc->properties = XML_DOC_INTERNAL;
12378 if (ctx->myDoc->dict) {
12379 newDoc->dict = ctx->myDoc->dict;
12380 xmlDictReference(newDoc->dict);
12382 if (ctx->myDoc != NULL) {
12383 newDoc->intSubset = ctx->myDoc->intSubset;
12384 newDoc->extSubset = ctx->myDoc->extSubset;
12386 if (ctx->myDoc->URL != NULL) {
12387 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12389 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12390 if (newRoot == NULL) {
12391 ctxt->sax = oldsax;
12392 xmlFreeParserCtxt(ctxt);
12393 newDoc->intSubset = NULL;
12394 newDoc->extSubset = NULL;
12395 xmlFreeDoc(newDoc);
12396 return(-1);
12398 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12399 nodePush(ctxt, newDoc->children);
12400 if (ctx->myDoc == NULL) {
12401 ctxt->myDoc = newDoc;
12402 } else {
12403 ctxt->myDoc = ctx->myDoc;
12404 newDoc->children->doc = ctx->myDoc;
12408 * Get the 4 first bytes and decode the charset
12409 * if enc != XML_CHAR_ENCODING_NONE
12410 * plug some encoding conversion routines.
12412 GROW
12413 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12414 start[0] = RAW;
12415 start[1] = NXT(1);
12416 start[2] = NXT(2);
12417 start[3] = NXT(3);
12418 enc = xmlDetectCharEncoding(start, 4);
12419 if (enc != XML_CHAR_ENCODING_NONE) {
12420 xmlSwitchEncoding(ctxt, enc);
12425 * Parse a possible text declaration first
12427 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12428 xmlParseTextDecl(ctxt);
12430 * An XML-1.0 document can't reference an entity not XML-1.0
12432 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12433 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12434 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12435 "Version mismatch between document and entity\n");
12440 * Doing validity checking on chunk doesn't make sense
12442 ctxt->instate = XML_PARSER_CONTENT;
12443 ctxt->validate = ctx->validate;
12444 ctxt->valid = ctx->valid;
12445 ctxt->loadsubset = ctx->loadsubset;
12446 ctxt->depth = ctx->depth + 1;
12447 ctxt->replaceEntities = ctx->replaceEntities;
12448 if (ctxt->validate) {
12449 ctxt->vctxt.error = ctx->vctxt.error;
12450 ctxt->vctxt.warning = ctx->vctxt.warning;
12451 } else {
12452 ctxt->vctxt.error = NULL;
12453 ctxt->vctxt.warning = NULL;
12455 ctxt->vctxt.nodeTab = NULL;
12456 ctxt->vctxt.nodeNr = 0;
12457 ctxt->vctxt.nodeMax = 0;
12458 ctxt->vctxt.node = NULL;
12459 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12460 ctxt->dict = ctx->dict;
12461 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12462 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12463 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12464 ctxt->dictNames = ctx->dictNames;
12465 ctxt->attsDefault = ctx->attsDefault;
12466 ctxt->attsSpecial = ctx->attsSpecial;
12467 ctxt->linenumbers = ctx->linenumbers;
12469 xmlParseContent(ctxt);
12471 ctx->validate = ctxt->validate;
12472 ctx->valid = ctxt->valid;
12473 if ((RAW == '<') && (NXT(1) == '/')) {
12474 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12475 } else if (RAW != 0) {
12476 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12478 if (ctxt->node != newDoc->children) {
12479 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12482 if (!ctxt->wellFormed) {
12483 if (ctxt->errNo == 0)
12484 ret = 1;
12485 else
12486 ret = ctxt->errNo;
12487 } else {
12488 if (lst != NULL) {
12489 xmlNodePtr cur;
12492 * Return the newly created nodeset after unlinking it from
12493 * they pseudo parent.
12495 cur = newDoc->children->children;
12496 *lst = cur;
12497 while (cur != NULL) {
12498 cur->parent = NULL;
12499 cur = cur->next;
12501 newDoc->children->children = NULL;
12503 ret = 0;
12505 ctxt->sax = oldsax;
12506 ctxt->dict = NULL;
12507 ctxt->attsDefault = NULL;
12508 ctxt->attsSpecial = NULL;
12509 xmlFreeParserCtxt(ctxt);
12510 newDoc->intSubset = NULL;
12511 newDoc->extSubset = NULL;
12512 xmlFreeDoc(newDoc);
12514 return(ret);
12518 * xmlParseExternalEntityPrivate:
12519 * @doc: the document the chunk pertains to
12520 * @oldctxt: the previous parser context if available
12521 * @sax: the SAX handler bloc (possibly NULL)
12522 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12523 * @depth: Used for loop detection, use 0
12524 * @URL: the URL for the entity to load
12525 * @ID: the System ID for the entity to load
12526 * @list: the return value for the set of parsed nodes
12528 * Private version of xmlParseExternalEntity()
12530 * Returns 0 if the entity is well formed, -1 in case of args problem and
12531 * the parser error code otherwise
12534 static xmlParserErrors
12535 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12536 xmlSAXHandlerPtr sax,
12537 void *user_data, int depth, const xmlChar *URL,
12538 const xmlChar *ID, xmlNodePtr *list) {
12539 xmlParserCtxtPtr ctxt;
12540 xmlDocPtr newDoc;
12541 xmlNodePtr newRoot;
12542 xmlSAXHandlerPtr oldsax = NULL;
12543 xmlParserErrors ret = XML_ERR_OK;
12544 xmlChar start[4];
12545 xmlCharEncoding enc;
12547 if (((depth > 40) &&
12548 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12549 (depth > 1024)) {
12550 return(XML_ERR_ENTITY_LOOP);
12553 if (list != NULL)
12554 *list = NULL;
12555 if ((URL == NULL) && (ID == NULL))
12556 return(XML_ERR_INTERNAL_ERROR);
12557 if (doc == NULL)
12558 return(XML_ERR_INTERNAL_ERROR);
12561 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12562 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12563 ctxt->userData = ctxt;
12564 if (oldctxt != NULL) {
12565 ctxt->_private = oldctxt->_private;
12566 ctxt->loadsubset = oldctxt->loadsubset;
12567 ctxt->validate = oldctxt->validate;
12568 ctxt->external = oldctxt->external;
12569 ctxt->record_info = oldctxt->record_info;
12570 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12571 ctxt->node_seq.length = oldctxt->node_seq.length;
12572 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12573 } else {
12575 * Doing validity checking on chunk without context
12576 * doesn't make sense
12578 ctxt->_private = NULL;
12579 ctxt->validate = 0;
12580 ctxt->external = 2;
12581 ctxt->loadsubset = 0;
12583 if (sax != NULL) {
12584 oldsax = ctxt->sax;
12585 ctxt->sax = sax;
12586 if (user_data != NULL)
12587 ctxt->userData = user_data;
12589 xmlDetectSAX2(ctxt);
12590 newDoc = xmlNewDoc(BAD_CAST "1.0");
12591 if (newDoc == NULL) {
12592 ctxt->node_seq.maximum = 0;
12593 ctxt->node_seq.length = 0;
12594 ctxt->node_seq.buffer = NULL;
12595 xmlFreeParserCtxt(ctxt);
12596 return(XML_ERR_INTERNAL_ERROR);
12598 newDoc->properties = XML_DOC_INTERNAL;
12599 newDoc->intSubset = doc->intSubset;
12600 newDoc->extSubset = doc->extSubset;
12601 newDoc->dict = doc->dict;
12602 xmlDictReference(newDoc->dict);
12604 if (doc->URL != NULL) {
12605 newDoc->URL = xmlStrdup(doc->URL);
12607 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12608 if (newRoot == NULL) {
12609 if (sax != NULL)
12610 ctxt->sax = oldsax;
12611 ctxt->node_seq.maximum = 0;
12612 ctxt->node_seq.length = 0;
12613 ctxt->node_seq.buffer = NULL;
12614 xmlFreeParserCtxt(ctxt);
12615 newDoc->intSubset = NULL;
12616 newDoc->extSubset = NULL;
12617 xmlFreeDoc(newDoc);
12618 return(XML_ERR_INTERNAL_ERROR);
12620 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12621 nodePush(ctxt, newDoc->children);
12622 ctxt->myDoc = doc;
12623 newRoot->doc = doc;
12626 * Get the 4 first bytes and decode the charset
12627 * if enc != XML_CHAR_ENCODING_NONE
12628 * plug some encoding conversion routines.
12630 GROW;
12631 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12632 start[0] = RAW;
12633 start[1] = NXT(1);
12634 start[2] = NXT(2);
12635 start[3] = NXT(3);
12636 enc = xmlDetectCharEncoding(start, 4);
12637 if (enc != XML_CHAR_ENCODING_NONE) {
12638 xmlSwitchEncoding(ctxt, enc);
12643 * Parse a possible text declaration first
12645 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12646 xmlParseTextDecl(ctxt);
12649 ctxt->instate = XML_PARSER_CONTENT;
12650 ctxt->depth = depth;
12652 xmlParseContent(ctxt);
12654 if ((RAW == '<') && (NXT(1) == '/')) {
12655 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12656 } else if (RAW != 0) {
12657 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12659 if (ctxt->node != newDoc->children) {
12660 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12663 if (!ctxt->wellFormed) {
12664 if (ctxt->errNo == 0)
12665 ret = XML_ERR_INTERNAL_ERROR;
12666 else
12667 ret = (xmlParserErrors)ctxt->errNo;
12668 } else {
12669 if (list != NULL) {
12670 xmlNodePtr cur;
12673 * Return the newly created nodeset after unlinking it from
12674 * they pseudo parent.
12676 cur = newDoc->children->children;
12677 *list = cur;
12678 while (cur != NULL) {
12679 cur->parent = NULL;
12680 cur = cur->next;
12682 newDoc->children->children = NULL;
12684 ret = XML_ERR_OK;
12688 * Record in the parent context the number of entities replacement
12689 * done when parsing that reference.
12691 if (oldctxt != NULL)
12692 oldctxt->nbentities += ctxt->nbentities;
12695 * Also record the size of the entity parsed
12697 if (ctxt->input != NULL) {
12698 oldctxt->sizeentities += ctxt->input->consumed;
12699 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12702 * And record the last error if any
12704 if (ctxt->lastError.code != XML_ERR_OK)
12705 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12707 if (sax != NULL)
12708 ctxt->sax = oldsax;
12709 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12710 oldctxt->node_seq.length = ctxt->node_seq.length;
12711 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12712 ctxt->node_seq.maximum = 0;
12713 ctxt->node_seq.length = 0;
12714 ctxt->node_seq.buffer = NULL;
12715 xmlFreeParserCtxt(ctxt);
12716 newDoc->intSubset = NULL;
12717 newDoc->extSubset = NULL;
12718 xmlFreeDoc(newDoc);
12720 return(ret);
12723 #ifdef LIBXML_SAX1_ENABLED
12725 * xmlParseExternalEntity:
12726 * @doc: the document the chunk pertains to
12727 * @sax: the SAX handler bloc (possibly NULL)
12728 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12729 * @depth: Used for loop detection, use 0
12730 * @URL: the URL for the entity to load
12731 * @ID: the System ID for the entity to load
12732 * @lst: the return value for the set of parsed nodes
12734 * Parse an external general entity
12735 * An external general parsed entity is well-formed if it matches the
12736 * production labeled extParsedEnt.
12738 * [78] extParsedEnt ::= TextDecl? content
12740 * Returns 0 if the entity is well formed, -1 in case of args problem and
12741 * the parser error code otherwise
12745 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12746 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12747 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12748 ID, lst));
12752 * xmlParseBalancedChunkMemory:
12753 * @doc: the document the chunk pertains to
12754 * @sax: the SAX handler bloc (possibly NULL)
12755 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12756 * @depth: Used for loop detection, use 0
12757 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12758 * @lst: the return value for the set of parsed nodes
12760 * Parse a well-balanced chunk of an XML document
12761 * called by the parser
12762 * The allowed sequence for the Well Balanced Chunk is the one defined by
12763 * the content production in the XML grammar:
12765 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12767 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12768 * the parser error code otherwise
12772 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12773 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12774 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12775 depth, string, lst, 0 );
12777 #endif /* LIBXML_SAX1_ENABLED */
12780 * xmlParseBalancedChunkMemoryInternal:
12781 * @oldctxt: the existing parsing context
12782 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12783 * @user_data: the user data field for the parser context
12784 * @lst: the return value for the set of parsed nodes
12787 * Parse a well-balanced chunk of an XML document
12788 * called by the parser
12789 * The allowed sequence for the Well Balanced Chunk is the one defined by
12790 * the content production in the XML grammar:
12792 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12794 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12795 * error code otherwise
12797 * In case recover is set to 1, the nodelist will not be empty even if
12798 * the parsed chunk is not well balanced.
12800 static xmlParserErrors
12801 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12802 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12803 xmlParserCtxtPtr ctxt;
12804 xmlDocPtr newDoc = NULL;
12805 xmlNodePtr newRoot;
12806 xmlSAXHandlerPtr oldsax = NULL;
12807 xmlNodePtr content = NULL;
12808 xmlNodePtr last = NULL;
12809 int size;
12810 xmlParserErrors ret = XML_ERR_OK;
12811 #ifdef SAX2
12812 int i;
12813 #endif
12815 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12816 (oldctxt->depth > 1024)) {
12817 return(XML_ERR_ENTITY_LOOP);
12821 if (lst != NULL)
12822 *lst = NULL;
12823 if (string == NULL)
12824 return(XML_ERR_INTERNAL_ERROR);
12826 size = xmlStrlen(string);
12828 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12829 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12830 if (user_data != NULL)
12831 ctxt->userData = user_data;
12832 else
12833 ctxt->userData = ctxt;
12834 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12835 ctxt->dict = oldctxt->dict;
12836 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12837 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12838 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12840 #ifdef SAX2
12841 /* propagate namespaces down the entity */
12842 for (i = 0;i < oldctxt->nsNr;i += 2) {
12843 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12845 #endif
12847 oldsax = ctxt->sax;
12848 ctxt->sax = oldctxt->sax;
12849 xmlDetectSAX2(ctxt);
12850 ctxt->replaceEntities = oldctxt->replaceEntities;
12851 ctxt->options = oldctxt->options;
12853 ctxt->_private = oldctxt->_private;
12854 if (oldctxt->myDoc == NULL) {
12855 newDoc = xmlNewDoc(BAD_CAST "1.0");
12856 if (newDoc == NULL) {
12857 ctxt->sax = oldsax;
12858 ctxt->dict = NULL;
12859 xmlFreeParserCtxt(ctxt);
12860 return(XML_ERR_INTERNAL_ERROR);
12862 newDoc->properties = XML_DOC_INTERNAL;
12863 newDoc->dict = ctxt->dict;
12864 xmlDictReference(newDoc->dict);
12865 ctxt->myDoc = newDoc;
12866 } else {
12867 ctxt->myDoc = oldctxt->myDoc;
12868 content = ctxt->myDoc->children;
12869 last = ctxt->myDoc->last;
12871 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12872 if (newRoot == NULL) {
12873 ctxt->sax = oldsax;
12874 ctxt->dict = NULL;
12875 xmlFreeParserCtxt(ctxt);
12876 if (newDoc != NULL) {
12877 xmlFreeDoc(newDoc);
12879 return(XML_ERR_INTERNAL_ERROR);
12881 ctxt->myDoc->children = NULL;
12882 ctxt->myDoc->last = NULL;
12883 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12884 nodePush(ctxt, ctxt->myDoc->children);
12885 ctxt->instate = XML_PARSER_CONTENT;
12886 ctxt->depth = oldctxt->depth + 1;
12888 ctxt->validate = 0;
12889 ctxt->loadsubset = oldctxt->loadsubset;
12890 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12892 * ID/IDREF registration will be done in xmlValidateElement below
12894 ctxt->loadsubset |= XML_SKIP_IDS;
12896 ctxt->dictNames = oldctxt->dictNames;
12897 ctxt->attsDefault = oldctxt->attsDefault;
12898 ctxt->attsSpecial = oldctxt->attsSpecial;
12900 xmlParseContent(ctxt);
12901 if ((RAW == '<') && (NXT(1) == '/')) {
12902 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12903 } else if (RAW != 0) {
12904 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12906 if (ctxt->node != ctxt->myDoc->children) {
12907 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12910 if (!ctxt->wellFormed) {
12911 if (ctxt->errNo == 0)
12912 ret = XML_ERR_INTERNAL_ERROR;
12913 else
12914 ret = (xmlParserErrors)ctxt->errNo;
12915 } else {
12916 ret = XML_ERR_OK;
12919 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12920 xmlNodePtr cur;
12923 * Return the newly created nodeset after unlinking it from
12924 * they pseudo parent.
12926 cur = ctxt->myDoc->children->children;
12927 *lst = cur;
12928 while (cur != NULL) {
12929 #ifdef LIBXML_VALID_ENABLED
12930 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12931 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12932 (cur->type == XML_ELEMENT_NODE)) {
12933 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12934 oldctxt->myDoc, cur);
12936 #endif /* LIBXML_VALID_ENABLED */
12937 cur->parent = NULL;
12938 cur = cur->next;
12940 ctxt->myDoc->children->children = NULL;
12942 if (ctxt->myDoc != NULL) {
12943 xmlFreeNode(ctxt->myDoc->children);
12944 ctxt->myDoc->children = content;
12945 ctxt->myDoc->last = last;
12949 * Record in the parent context the number of entities replacement
12950 * done when parsing that reference.
12952 if (oldctxt != NULL)
12953 oldctxt->nbentities += ctxt->nbentities;
12956 * Also record the last error if any
12958 if (ctxt->lastError.code != XML_ERR_OK)
12959 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12961 ctxt->sax = oldsax;
12962 ctxt->dict = NULL;
12963 ctxt->attsDefault = NULL;
12964 ctxt->attsSpecial = NULL;
12965 xmlFreeParserCtxt(ctxt);
12966 if (newDoc != NULL) {
12967 xmlFreeDoc(newDoc);
12970 return(ret);
12974 * xmlParseInNodeContext:
12975 * @node: the context node
12976 * @data: the input string
12977 * @datalen: the input string length in bytes
12978 * @options: a combination of xmlParserOption
12979 * @lst: the return value for the set of parsed nodes
12981 * Parse a well-balanced chunk of an XML document
12982 * within the context (DTD, namespaces, etc ...) of the given node.
12984 * The allowed sequence for the data is a Well Balanced Chunk defined by
12985 * the content production in the XML grammar:
12987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12989 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12990 * error code otherwise
12992 xmlParserErrors
12993 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12994 int options, xmlNodePtr *lst) {
12995 #ifdef SAX2
12996 xmlParserCtxtPtr ctxt;
12997 xmlDocPtr doc = NULL;
12998 xmlNodePtr fake, cur;
12999 int nsnr = 0;
13001 xmlParserErrors ret = XML_ERR_OK;
13004 * check all input parameters, grab the document
13006 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13007 return(XML_ERR_INTERNAL_ERROR);
13008 switch (node->type) {
13009 case XML_ELEMENT_NODE:
13010 case XML_ATTRIBUTE_NODE:
13011 case XML_TEXT_NODE:
13012 case XML_CDATA_SECTION_NODE:
13013 case XML_ENTITY_REF_NODE:
13014 case XML_PI_NODE:
13015 case XML_COMMENT_NODE:
13016 case XML_DOCUMENT_NODE:
13017 case XML_HTML_DOCUMENT_NODE:
13018 break;
13019 default:
13020 return(XML_ERR_INTERNAL_ERROR);
13023 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13024 (node->type != XML_DOCUMENT_NODE) &&
13025 (node->type != XML_HTML_DOCUMENT_NODE))
13026 node = node->parent;
13027 if (node == NULL)
13028 return(XML_ERR_INTERNAL_ERROR);
13029 if (node->type == XML_ELEMENT_NODE)
13030 doc = node->doc;
13031 else
13032 doc = (xmlDocPtr) node;
13033 if (doc == NULL)
13034 return(XML_ERR_INTERNAL_ERROR);
13037 * allocate a context and set-up everything not related to the
13038 * node position in the tree
13040 if (doc->type == XML_DOCUMENT_NODE)
13041 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13042 #ifdef LIBXML_HTML_ENABLED
13043 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13044 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13046 * When parsing in context, it makes no sense to add implied
13047 * elements like html/body/etc...
13049 options |= HTML_PARSE_NOIMPLIED;
13051 #endif
13052 else
13053 return(XML_ERR_INTERNAL_ERROR);
13055 if (ctxt == NULL)
13056 return(XML_ERR_NO_MEMORY);
13059 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13060 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13061 * we must wait until the last moment to free the original one.
13063 if (doc->dict != NULL) {
13064 if (ctxt->dict != NULL)
13065 xmlDictFree(ctxt->dict);
13066 ctxt->dict = doc->dict;
13067 } else
13068 options |= XML_PARSE_NODICT;
13070 if (doc->encoding != NULL) {
13071 xmlCharEncodingHandlerPtr hdlr;
13073 if (ctxt->encoding != NULL)
13074 xmlFree((xmlChar *) ctxt->encoding);
13075 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13077 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13078 if (hdlr != NULL) {
13079 xmlSwitchToEncoding(ctxt, hdlr);
13080 } else {
13081 return(XML_ERR_UNSUPPORTED_ENCODING);
13085 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13086 xmlDetectSAX2(ctxt);
13087 ctxt->myDoc = doc;
13089 fake = xmlNewComment(NULL);
13090 if (fake == NULL) {
13091 xmlFreeParserCtxt(ctxt);
13092 return(XML_ERR_NO_MEMORY);
13094 xmlAddChild(node, fake);
13096 if (node->type == XML_ELEMENT_NODE) {
13097 nodePush(ctxt, node);
13099 * initialize the SAX2 namespaces stack
13101 cur = node;
13102 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13103 xmlNsPtr ns = cur->nsDef;
13104 const xmlChar *iprefix, *ihref;
13106 while (ns != NULL) {
13107 if (ctxt->dict) {
13108 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13109 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13110 } else {
13111 iprefix = ns->prefix;
13112 ihref = ns->href;
13115 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13116 nsPush(ctxt, iprefix, ihref);
13117 nsnr++;
13119 ns = ns->next;
13121 cur = cur->parent;
13123 ctxt->instate = XML_PARSER_CONTENT;
13126 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13128 * ID/IDREF registration will be done in xmlValidateElement below
13130 ctxt->loadsubset |= XML_SKIP_IDS;
13133 #ifdef LIBXML_HTML_ENABLED
13134 if (doc->type == XML_HTML_DOCUMENT_NODE)
13135 __htmlParseContent(ctxt);
13136 else
13137 #endif
13138 xmlParseContent(ctxt);
13140 nsPop(ctxt, nsnr);
13141 if ((RAW == '<') && (NXT(1) == '/')) {
13142 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13143 } else if (RAW != 0) {
13144 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13146 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13147 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13148 ctxt->wellFormed = 0;
13151 if (!ctxt->wellFormed) {
13152 if (ctxt->errNo == 0)
13153 ret = XML_ERR_INTERNAL_ERROR;
13154 else
13155 ret = (xmlParserErrors)ctxt->errNo;
13156 } else {
13157 ret = XML_ERR_OK;
13161 * Return the newly created nodeset after unlinking it from
13162 * the pseudo sibling.
13165 cur = fake->next;
13166 fake->next = NULL;
13167 node->last = fake;
13169 if (cur != NULL) {
13170 cur->prev = NULL;
13173 *lst = cur;
13175 while (cur != NULL) {
13176 cur->parent = NULL;
13177 cur = cur->next;
13180 xmlUnlinkNode(fake);
13181 xmlFreeNode(fake);
13184 if (ret != XML_ERR_OK) {
13185 xmlFreeNodeList(*lst);
13186 *lst = NULL;
13189 if (doc->dict != NULL)
13190 ctxt->dict = NULL;
13191 xmlFreeParserCtxt(ctxt);
13193 return(ret);
13194 #else /* !SAX2 */
13195 return(XML_ERR_INTERNAL_ERROR);
13196 #endif
13199 #ifdef LIBXML_SAX1_ENABLED
13201 * xmlParseBalancedChunkMemoryRecover:
13202 * @doc: the document the chunk pertains to
13203 * @sax: the SAX handler bloc (possibly NULL)
13204 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13205 * @depth: Used for loop detection, use 0
13206 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13207 * @lst: the return value for the set of parsed nodes
13208 * @recover: return nodes even if the data is broken (use 0)
13211 * Parse a well-balanced chunk of an XML document
13212 * called by the parser
13213 * The allowed sequence for the Well Balanced Chunk is the one defined by
13214 * the content production in the XML grammar:
13216 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13218 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13219 * the parser error code otherwise
13221 * In case recover is set to 1, the nodelist will not be empty even if
13222 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13223 * some extent.
13226 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13227 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13228 int recover) {
13229 xmlParserCtxtPtr ctxt;
13230 xmlDocPtr newDoc;
13231 xmlSAXHandlerPtr oldsax = NULL;
13232 xmlNodePtr content, newRoot;
13233 int size;
13234 int ret = 0;
13236 if (depth > 40) {
13237 return(XML_ERR_ENTITY_LOOP);
13241 if (lst != NULL)
13242 *lst = NULL;
13243 if (string == NULL)
13244 return(-1);
13246 size = xmlStrlen(string);
13248 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13249 if (ctxt == NULL) return(-1);
13250 ctxt->userData = ctxt;
13251 if (sax != NULL) {
13252 oldsax = ctxt->sax;
13253 ctxt->sax = sax;
13254 if (user_data != NULL)
13255 ctxt->userData = user_data;
13257 newDoc = xmlNewDoc(BAD_CAST "1.0");
13258 if (newDoc == NULL) {
13259 xmlFreeParserCtxt(ctxt);
13260 return(-1);
13262 newDoc->properties = XML_DOC_INTERNAL;
13263 if ((doc != NULL) && (doc->dict != NULL)) {
13264 xmlDictFree(ctxt->dict);
13265 ctxt->dict = doc->dict;
13266 xmlDictReference(ctxt->dict);
13267 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13268 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13269 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13270 ctxt->dictNames = 1;
13271 } else {
13272 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13274 if (doc != NULL) {
13275 newDoc->intSubset = doc->intSubset;
13276 newDoc->extSubset = doc->extSubset;
13278 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13279 if (newRoot == NULL) {
13280 if (sax != NULL)
13281 ctxt->sax = oldsax;
13282 xmlFreeParserCtxt(ctxt);
13283 newDoc->intSubset = NULL;
13284 newDoc->extSubset = NULL;
13285 xmlFreeDoc(newDoc);
13286 return(-1);
13288 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13289 nodePush(ctxt, newRoot);
13290 if (doc == NULL) {
13291 ctxt->myDoc = newDoc;
13292 } else {
13293 ctxt->myDoc = newDoc;
13294 newDoc->children->doc = doc;
13295 /* Ensure that doc has XML spec namespace */
13296 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13297 newDoc->oldNs = doc->oldNs;
13299 ctxt->instate = XML_PARSER_CONTENT;
13300 ctxt->depth = depth;
13303 * Doing validity checking on chunk doesn't make sense
13305 ctxt->validate = 0;
13306 ctxt->loadsubset = 0;
13307 xmlDetectSAX2(ctxt);
13309 if ( doc != NULL ){
13310 content = doc->children;
13311 doc->children = NULL;
13312 xmlParseContent(ctxt);
13313 doc->children = content;
13315 else {
13316 xmlParseContent(ctxt);
13318 if ((RAW == '<') && (NXT(1) == '/')) {
13319 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13320 } else if (RAW != 0) {
13321 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13323 if (ctxt->node != newDoc->children) {
13324 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13327 if (!ctxt->wellFormed) {
13328 if (ctxt->errNo == 0)
13329 ret = 1;
13330 else
13331 ret = ctxt->errNo;
13332 } else {
13333 ret = 0;
13336 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13337 xmlNodePtr cur;
13340 * Return the newly created nodeset after unlinking it from
13341 * they pseudo parent.
13343 cur = newDoc->children->children;
13344 *lst = cur;
13345 while (cur != NULL) {
13346 xmlSetTreeDoc(cur, doc);
13347 cur->parent = NULL;
13348 cur = cur->next;
13350 newDoc->children->children = NULL;
13353 if (sax != NULL)
13354 ctxt->sax = oldsax;
13355 xmlFreeParserCtxt(ctxt);
13356 newDoc->intSubset = NULL;
13357 newDoc->extSubset = NULL;
13358 newDoc->oldNs = NULL;
13359 xmlFreeDoc(newDoc);
13361 return(ret);
13365 * xmlSAXParseEntity:
13366 * @sax: the SAX handler block
13367 * @filename: the filename
13369 * parse an XML external entity out of context and build a tree.
13370 * It use the given SAX function block to handle the parsing callback.
13371 * If sax is NULL, fallback to the default DOM tree building routines.
13373 * [78] extParsedEnt ::= TextDecl? content
13375 * This correspond to a "Well Balanced" chunk
13377 * Returns the resulting document tree
13380 xmlDocPtr
13381 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13382 xmlDocPtr ret;
13383 xmlParserCtxtPtr ctxt;
13385 ctxt = xmlCreateFileParserCtxt(filename);
13386 if (ctxt == NULL) {
13387 return(NULL);
13389 if (sax != NULL) {
13390 if (ctxt->sax != NULL)
13391 xmlFree(ctxt->sax);
13392 ctxt->sax = sax;
13393 ctxt->userData = NULL;
13396 xmlParseExtParsedEnt(ctxt);
13398 if (ctxt->wellFormed)
13399 ret = ctxt->myDoc;
13400 else {
13401 ret = NULL;
13402 xmlFreeDoc(ctxt->myDoc);
13403 ctxt->myDoc = NULL;
13405 if (sax != NULL)
13406 ctxt->sax = NULL;
13407 xmlFreeParserCtxt(ctxt);
13409 return(ret);
13413 * xmlParseEntity:
13414 * @filename: the filename
13416 * parse an XML external entity out of context and build a tree.
13418 * [78] extParsedEnt ::= TextDecl? content
13420 * This correspond to a "Well Balanced" chunk
13422 * Returns the resulting document tree
13425 xmlDocPtr
13426 xmlParseEntity(const char *filename) {
13427 return(xmlSAXParseEntity(NULL, filename));
13429 #endif /* LIBXML_SAX1_ENABLED */
13432 * xmlCreateEntityParserCtxtInternal:
13433 * @URL: the entity URL
13434 * @ID: the entity PUBLIC ID
13435 * @base: a possible base for the target URI
13436 * @pctx: parser context used to set options on new context
13438 * Create a parser context for an external entity
13439 * Automatic support for ZLIB/Compress compressed document is provided
13440 * by default if found at compile-time.
13442 * Returns the new parser context or NULL
13444 static xmlParserCtxtPtr
13445 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13446 const xmlChar *base, xmlParserCtxtPtr pctx) {
13447 xmlParserCtxtPtr ctxt;
13448 xmlParserInputPtr inputStream;
13449 char *directory = NULL;
13450 xmlChar *uri;
13452 ctxt = xmlNewParserCtxt();
13453 if (ctxt == NULL) {
13454 return(NULL);
13457 if (pctx != NULL) {
13458 ctxt->options = pctx->options;
13459 ctxt->_private = pctx->_private;
13462 uri = xmlBuildURI(URL, base);
13464 if (uri == NULL) {
13465 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13466 if (inputStream == NULL) {
13467 xmlFreeParserCtxt(ctxt);
13468 return(NULL);
13471 inputPush(ctxt, inputStream);
13473 if ((ctxt->directory == NULL) && (directory == NULL))
13474 directory = xmlParserGetDirectory((char *)URL);
13475 if ((ctxt->directory == NULL) && (directory != NULL))
13476 ctxt->directory = directory;
13477 } else {
13478 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13479 if (inputStream == NULL) {
13480 xmlFree(uri);
13481 xmlFreeParserCtxt(ctxt);
13482 return(NULL);
13485 inputPush(ctxt, inputStream);
13487 if ((ctxt->directory == NULL) && (directory == NULL))
13488 directory = xmlParserGetDirectory((char *)uri);
13489 if ((ctxt->directory == NULL) && (directory != NULL))
13490 ctxt->directory = directory;
13491 xmlFree(uri);
13493 return(ctxt);
13497 * xmlCreateEntityParserCtxt:
13498 * @URL: the entity URL
13499 * @ID: the entity PUBLIC ID
13500 * @base: a possible base for the target URI
13502 * Create a parser context for an external entity
13503 * Automatic support for ZLIB/Compress compressed document is provided
13504 * by default if found at compile-time.
13506 * Returns the new parser context or NULL
13508 xmlParserCtxtPtr
13509 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13510 const xmlChar *base) {
13511 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13515 /************************************************************************
13517 * Front ends when parsing from a file *
13519 ************************************************************************/
13522 * xmlCreateURLParserCtxt:
13523 * @filename: the filename or URL
13524 * @options: a combination of xmlParserOption
13526 * Create a parser context for a file or URL content.
13527 * Automatic support for ZLIB/Compress compressed document is provided
13528 * by default if found at compile-time and for file accesses
13530 * Returns the new parser context or NULL
13532 xmlParserCtxtPtr
13533 xmlCreateURLParserCtxt(const char *filename, int options)
13535 xmlParserCtxtPtr ctxt;
13536 xmlParserInputPtr inputStream;
13537 char *directory = NULL;
13539 ctxt = xmlNewParserCtxt();
13540 if (ctxt == NULL) {
13541 xmlErrMemory(NULL, "cannot allocate parser context");
13542 return(NULL);
13545 if (options)
13546 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13547 ctxt->linenumbers = 1;
13549 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13550 if (inputStream == NULL) {
13551 xmlFreeParserCtxt(ctxt);
13552 return(NULL);
13555 inputPush(ctxt, inputStream);
13556 if ((ctxt->directory == NULL) && (directory == NULL))
13557 directory = xmlParserGetDirectory(filename);
13558 if ((ctxt->directory == NULL) && (directory != NULL))
13559 ctxt->directory = directory;
13561 return(ctxt);
13565 * xmlCreateFileParserCtxt:
13566 * @filename: the filename
13568 * Create a parser context for a file content.
13569 * Automatic support for ZLIB/Compress compressed document is provided
13570 * by default if found at compile-time.
13572 * Returns the new parser context or NULL
13574 xmlParserCtxtPtr
13575 xmlCreateFileParserCtxt(const char *filename)
13577 return(xmlCreateURLParserCtxt(filename, 0));
13580 #ifdef LIBXML_SAX1_ENABLED
13582 * xmlSAXParseFileWithData:
13583 * @sax: the SAX handler block
13584 * @filename: the filename
13585 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13586 * documents
13587 * @data: the userdata
13589 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13590 * compressed document is provided by default if found at compile-time.
13591 * It use the given SAX function block to handle the parsing callback.
13592 * If sax is NULL, fallback to the default DOM tree building routines.
13594 * User data (void *) is stored within the parser context in the
13595 * context's _private member, so it is available nearly everywhere in libxml
13597 * Returns the resulting document tree
13600 xmlDocPtr
13601 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13602 int recovery, void *data) {
13603 xmlDocPtr ret;
13604 xmlParserCtxtPtr ctxt;
13606 xmlInitParser();
13608 ctxt = xmlCreateFileParserCtxt(filename);
13609 if (ctxt == NULL) {
13610 return(NULL);
13612 if (sax != NULL) {
13613 if (ctxt->sax != NULL)
13614 xmlFree(ctxt->sax);
13615 ctxt->sax = sax;
13617 xmlDetectSAX2(ctxt);
13618 if (data!=NULL) {
13619 ctxt->_private = data;
13622 if (ctxt->directory == NULL)
13623 ctxt->directory = xmlParserGetDirectory(filename);
13625 ctxt->recovery = recovery;
13627 xmlParseDocument(ctxt);
13629 if ((ctxt->wellFormed) || recovery) {
13630 ret = ctxt->myDoc;
13631 if (ret != NULL) {
13632 if (ctxt->input->buf->compressed > 0)
13633 ret->compression = 9;
13634 else
13635 ret->compression = ctxt->input->buf->compressed;
13638 else {
13639 ret = NULL;
13640 xmlFreeDoc(ctxt->myDoc);
13641 ctxt->myDoc = NULL;
13643 if (sax != NULL)
13644 ctxt->sax = NULL;
13645 xmlFreeParserCtxt(ctxt);
13647 return(ret);
13651 * xmlSAXParseFile:
13652 * @sax: the SAX handler block
13653 * @filename: the filename
13654 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13655 * documents
13657 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13658 * compressed document is provided by default if found at compile-time.
13659 * It use the given SAX function block to handle the parsing callback.
13660 * If sax is NULL, fallback to the default DOM tree building routines.
13662 * Returns the resulting document tree
13665 xmlDocPtr
13666 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13667 int recovery) {
13668 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13672 * xmlRecoverDoc:
13673 * @cur: a pointer to an array of xmlChar
13675 * parse an XML in-memory document and build a tree.
13676 * In the case the document is not Well Formed, a attempt to build a
13677 * tree is tried anyway
13679 * Returns the resulting document tree or NULL in case of failure
13682 xmlDocPtr
13683 xmlRecoverDoc(const xmlChar *cur) {
13684 return(xmlSAXParseDoc(NULL, cur, 1));
13688 * xmlParseFile:
13689 * @filename: the filename
13691 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13692 * compressed document is provided by default if found at compile-time.
13694 * Returns the resulting document tree if the file was wellformed,
13695 * NULL otherwise.
13698 xmlDocPtr
13699 xmlParseFile(const char *filename) {
13700 return(xmlSAXParseFile(NULL, filename, 0));
13704 * xmlRecoverFile:
13705 * @filename: the filename
13707 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13708 * compressed document is provided by default if found at compile-time.
13709 * In the case the document is not Well Formed, it attempts to build
13710 * a tree anyway
13712 * Returns the resulting document tree or NULL in case of failure
13715 xmlDocPtr
13716 xmlRecoverFile(const char *filename) {
13717 return(xmlSAXParseFile(NULL, filename, 1));
13722 * xmlSetupParserForBuffer:
13723 * @ctxt: an XML parser context
13724 * @buffer: a xmlChar * buffer
13725 * @filename: a file name
13727 * Setup the parser context to parse a new buffer; Clears any prior
13728 * contents from the parser context. The buffer parameter must not be
13729 * NULL, but the filename parameter can be
13731 void
13732 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13733 const char* filename)
13735 xmlParserInputPtr input;
13737 if ((ctxt == NULL) || (buffer == NULL))
13738 return;
13740 input = xmlNewInputStream(ctxt);
13741 if (input == NULL) {
13742 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13743 xmlClearParserCtxt(ctxt);
13744 return;
13747 xmlClearParserCtxt(ctxt);
13748 if (filename != NULL)
13749 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13750 input->base = buffer;
13751 input->cur = buffer;
13752 input->end = &buffer[xmlStrlen(buffer)];
13753 inputPush(ctxt, input);
13757 * xmlSAXUserParseFile:
13758 * @sax: a SAX handler
13759 * @user_data: The user data returned on SAX callbacks
13760 * @filename: a file name
13762 * parse an XML file and call the given SAX handler routines.
13763 * Automatic support for ZLIB/Compress compressed document is provided
13765 * Returns 0 in case of success or a error number otherwise
13768 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13769 const char *filename) {
13770 int ret = 0;
13771 xmlParserCtxtPtr ctxt;
13773 ctxt = xmlCreateFileParserCtxt(filename);
13774 if (ctxt == NULL) return -1;
13775 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13776 xmlFree(ctxt->sax);
13777 ctxt->sax = sax;
13778 xmlDetectSAX2(ctxt);
13780 if (user_data != NULL)
13781 ctxt->userData = user_data;
13783 xmlParseDocument(ctxt);
13785 if (ctxt->wellFormed)
13786 ret = 0;
13787 else {
13788 if (ctxt->errNo != 0)
13789 ret = ctxt->errNo;
13790 else
13791 ret = -1;
13793 if (sax != NULL)
13794 ctxt->sax = NULL;
13795 if (ctxt->myDoc != NULL) {
13796 xmlFreeDoc(ctxt->myDoc);
13797 ctxt->myDoc = NULL;
13799 xmlFreeParserCtxt(ctxt);
13801 return ret;
13803 #endif /* LIBXML_SAX1_ENABLED */
13805 /************************************************************************
13807 * Front ends when parsing from memory *
13809 ************************************************************************/
13812 * xmlCreateMemoryParserCtxt:
13813 * @buffer: a pointer to a char array
13814 * @size: the size of the array
13816 * Create a parser context for an XML in-memory document.
13818 * Returns the new parser context or NULL
13820 xmlParserCtxtPtr
13821 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13822 xmlParserCtxtPtr ctxt;
13823 xmlParserInputPtr input;
13824 xmlParserInputBufferPtr buf;
13826 if (buffer == NULL)
13827 return(NULL);
13828 if (size <= 0)
13829 return(NULL);
13831 ctxt = xmlNewParserCtxt();
13832 if (ctxt == NULL)
13833 return(NULL);
13835 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13836 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13837 if (buf == NULL) {
13838 xmlFreeParserCtxt(ctxt);
13839 return(NULL);
13842 input = xmlNewInputStream(ctxt);
13843 if (input == NULL) {
13844 xmlFreeParserInputBuffer(buf);
13845 xmlFreeParserCtxt(ctxt);
13846 return(NULL);
13849 input->filename = NULL;
13850 input->buf = buf;
13851 input->base = input->buf->buffer->content;
13852 input->cur = input->buf->buffer->content;
13853 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13855 inputPush(ctxt, input);
13856 return(ctxt);
13859 #ifdef LIBXML_SAX1_ENABLED
13861 * xmlSAXParseMemoryWithData:
13862 * @sax: the SAX handler block
13863 * @buffer: an pointer to a char array
13864 * @size: the size of the array
13865 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13866 * documents
13867 * @data: the userdata
13869 * parse an XML in-memory block and use the given SAX function block
13870 * to handle the parsing callback. If sax is NULL, fallback to the default
13871 * DOM tree building routines.
13873 * User data (void *) is stored within the parser context in the
13874 * context's _private member, so it is available nearly everywhere in libxml
13876 * Returns the resulting document tree
13879 xmlDocPtr
13880 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13881 int size, int recovery, void *data) {
13882 xmlDocPtr ret;
13883 xmlParserCtxtPtr ctxt;
13885 xmlInitParser();
13887 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13888 if (ctxt == NULL) return(NULL);
13889 if (sax != NULL) {
13890 if (ctxt->sax != NULL)
13891 xmlFree(ctxt->sax);
13892 ctxt->sax = sax;
13894 xmlDetectSAX2(ctxt);
13895 if (data!=NULL) {
13896 ctxt->_private=data;
13899 ctxt->recovery = recovery;
13901 xmlParseDocument(ctxt);
13903 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13904 else {
13905 ret = NULL;
13906 xmlFreeDoc(ctxt->myDoc);
13907 ctxt->myDoc = NULL;
13909 if (sax != NULL)
13910 ctxt->sax = NULL;
13911 xmlFreeParserCtxt(ctxt);
13913 return(ret);
13917 * xmlSAXParseMemory:
13918 * @sax: the SAX handler block
13919 * @buffer: an pointer to a char array
13920 * @size: the size of the array
13921 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13922 * documents
13924 * parse an XML in-memory block and use the given SAX function block
13925 * to handle the parsing callback. If sax is NULL, fallback to the default
13926 * DOM tree building routines.
13928 * Returns the resulting document tree
13930 xmlDocPtr
13931 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13932 int size, int recovery) {
13933 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13937 * xmlParseMemory:
13938 * @buffer: an pointer to a char array
13939 * @size: the size of the array
13941 * parse an XML in-memory block and build a tree.
13943 * Returns the resulting document tree
13946 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13947 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13951 * xmlRecoverMemory:
13952 * @buffer: an pointer to a char array
13953 * @size: the size of the array
13955 * parse an XML in-memory block and build a tree.
13956 * In the case the document is not Well Formed, an attempt to
13957 * build a tree is tried anyway
13959 * Returns the resulting document tree or NULL in case of error
13962 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13963 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13967 * xmlSAXUserParseMemory:
13968 * @sax: a SAX handler
13969 * @user_data: The user data returned on SAX callbacks
13970 * @buffer: an in-memory XML document input
13971 * @size: the length of the XML document in bytes
13973 * A better SAX parsing routine.
13974 * parse an XML in-memory buffer and call the given SAX handler routines.
13976 * Returns 0 in case of success or a error number otherwise
13978 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13979 const char *buffer, int size) {
13980 int ret = 0;
13981 xmlParserCtxtPtr ctxt;
13983 xmlInitParser();
13985 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13986 if (ctxt == NULL) return -1;
13987 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13988 xmlFree(ctxt->sax);
13989 ctxt->sax = sax;
13990 xmlDetectSAX2(ctxt);
13992 if (user_data != NULL)
13993 ctxt->userData = user_data;
13995 xmlParseDocument(ctxt);
13997 if (ctxt->wellFormed)
13998 ret = 0;
13999 else {
14000 if (ctxt->errNo != 0)
14001 ret = ctxt->errNo;
14002 else
14003 ret = -1;
14005 if (sax != NULL)
14006 ctxt->sax = NULL;
14007 if (ctxt->myDoc != NULL) {
14008 xmlFreeDoc(ctxt->myDoc);
14009 ctxt->myDoc = NULL;
14011 xmlFreeParserCtxt(ctxt);
14013 return ret;
14015 #endif /* LIBXML_SAX1_ENABLED */
14018 * xmlCreateDocParserCtxt:
14019 * @cur: a pointer to an array of xmlChar
14021 * Creates a parser context for an XML in-memory document.
14023 * Returns the new parser context or NULL
14025 xmlParserCtxtPtr
14026 xmlCreateDocParserCtxt(const xmlChar *cur) {
14027 int len;
14029 if (cur == NULL)
14030 return(NULL);
14031 len = xmlStrlen(cur);
14032 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14035 #ifdef LIBXML_SAX1_ENABLED
14037 * xmlSAXParseDoc:
14038 * @sax: the SAX handler block
14039 * @cur: a pointer to an array of xmlChar
14040 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14041 * documents
14043 * parse an XML in-memory document and build a tree.
14044 * It use the given SAX function block to handle the parsing callback.
14045 * If sax is NULL, fallback to the default DOM tree building routines.
14047 * Returns the resulting document tree
14050 xmlDocPtr
14051 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14052 xmlDocPtr ret;
14053 xmlParserCtxtPtr ctxt;
14054 xmlSAXHandlerPtr oldsax = NULL;
14056 if (cur == NULL) return(NULL);
14059 ctxt = xmlCreateDocParserCtxt(cur);
14060 if (ctxt == NULL) return(NULL);
14061 if (sax != NULL) {
14062 oldsax = ctxt->sax;
14063 ctxt->sax = sax;
14064 ctxt->userData = NULL;
14066 xmlDetectSAX2(ctxt);
14068 xmlParseDocument(ctxt);
14069 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14070 else {
14071 ret = NULL;
14072 xmlFreeDoc(ctxt->myDoc);
14073 ctxt->myDoc = NULL;
14075 if (sax != NULL)
14076 ctxt->sax = oldsax;
14077 xmlFreeParserCtxt(ctxt);
14079 return(ret);
14083 * xmlParseDoc:
14084 * @cur: a pointer to an array of xmlChar
14086 * parse an XML in-memory document and build a tree.
14088 * Returns the resulting document tree
14091 xmlDocPtr
14092 xmlParseDoc(const xmlChar *cur) {
14093 return(xmlSAXParseDoc(NULL, cur, 0));
14095 #endif /* LIBXML_SAX1_ENABLED */
14097 #ifdef LIBXML_LEGACY_ENABLED
14098 /************************************************************************
14100 * Specific function to keep track of entities references *
14101 * and used by the XSLT debugger *
14103 ************************************************************************/
14105 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14108 * xmlAddEntityReference:
14109 * @ent : A valid entity
14110 * @firstNode : A valid first node for children of entity
14111 * @lastNode : A valid last node of children entity
14113 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14115 static void
14116 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14117 xmlNodePtr lastNode)
14119 if (xmlEntityRefFunc != NULL) {
14120 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14126 * xmlSetEntityReferenceFunc:
14127 * @func: A valid function
14129 * Set the function to call call back when a xml reference has been made
14131 void
14132 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14134 xmlEntityRefFunc = func;
14136 #endif /* LIBXML_LEGACY_ENABLED */
14138 /************************************************************************
14140 * Miscellaneous *
14142 ************************************************************************/
14144 #ifdef LIBXML_XPATH_ENABLED
14145 #include <libxml/xpath.h>
14146 #endif
14148 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14149 static int xmlParserInitialized = 0;
14152 * xmlInitParser:
14154 * Initialization function for the XML parser.
14155 * This is not reentrant. Call once before processing in case of
14156 * use in multithreaded programs.
14159 void
14160 xmlInitParser(void) {
14161 if (xmlParserInitialized != 0)
14162 return;
14164 #ifdef LIBXML_THREAD_ENABLED
14165 __xmlGlobalInitMutexLock();
14166 if (xmlParserInitialized == 0) {
14167 #endif
14168 xmlInitGlobals();
14169 xmlInitThreads();
14170 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14171 (xmlGenericError == NULL))
14172 initGenericErrorDefaultFunc(NULL);
14173 xmlInitMemory();
14174 xmlInitCharEncodingHandlers();
14175 xmlDefaultSAXHandlerInit();
14176 xmlRegisterDefaultInputCallbacks();
14177 #ifdef LIBXML_OUTPUT_ENABLED
14178 xmlRegisterDefaultOutputCallbacks();
14179 #endif /* LIBXML_OUTPUT_ENABLED */
14180 #ifdef LIBXML_HTML_ENABLED
14181 htmlInitAutoClose();
14182 htmlDefaultSAXHandlerInit();
14183 #endif
14184 #ifdef LIBXML_XPATH_ENABLED
14185 xmlXPathInit();
14186 #endif
14187 xmlParserInitialized = 1;
14188 #ifdef LIBXML_THREAD_ENABLED
14190 __xmlGlobalInitMutexUnlock();
14191 #endif
14195 * xmlCleanupParser:
14197 * This function name is somewhat misleading. It does not clean up
14198 * parser state, it cleans up memory allocated by the library itself.
14199 * It is a cleanup function for the XML library. It tries to reclaim all
14200 * related global memory allocated for the library processing.
14201 * It doesn't deallocate any document related memory. One should
14202 * call xmlCleanupParser() only when the process has finished using
14203 * the library and all XML/HTML documents built with it.
14204 * See also xmlInitParser() which has the opposite function of preparing
14205 * the library for operations.
14207 * WARNING: if your application is multithreaded or has plugin support
14208 * calling this may crash the application if another thread or
14209 * a plugin is still using libxml2. It's sometimes very hard to
14210 * guess if libxml2 is in use in the application, some libraries
14211 * or plugins may use it without notice. In case of doubt abstain
14212 * from calling this function or do it just before calling exit()
14213 * to avoid leak reports from valgrind !
14216 void
14217 xmlCleanupParser(void) {
14218 if (!xmlParserInitialized)
14219 return;
14221 xmlCleanupCharEncodingHandlers();
14222 #ifdef LIBXML_CATALOG_ENABLED
14223 xmlCatalogCleanup();
14224 #endif
14225 xmlDictCleanup();
14226 xmlCleanupInputCallbacks();
14227 #ifdef LIBXML_OUTPUT_ENABLED
14228 xmlCleanupOutputCallbacks();
14229 #endif
14230 #ifdef LIBXML_SCHEMAS_ENABLED
14231 xmlSchemaCleanupTypes();
14232 xmlRelaxNGCleanupTypes();
14233 #endif
14234 xmlCleanupGlobals();
14235 xmlResetLastError();
14236 xmlCleanupThreads(); /* must be last if called not from the main thread */
14237 xmlCleanupMemory();
14238 xmlParserInitialized = 0;
14241 /************************************************************************
14243 * New set (2.6.0) of simpler and more flexible APIs *
14245 ************************************************************************/
14248 * DICT_FREE:
14249 * @str: a string
14251 * Free a string if it is not owned by the "dict" dictionnary in the
14252 * current scope
14254 #define DICT_FREE(str) \
14255 if ((str) && ((!dict) || \
14256 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14257 xmlFree((char *)(str));
14260 * xmlCtxtReset:
14261 * @ctxt: an XML parser context
14263 * Reset a parser context
14265 void
14266 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14268 xmlParserInputPtr input;
14269 xmlDictPtr dict;
14271 if (ctxt == NULL)
14272 return;
14274 dict = ctxt->dict;
14276 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14277 xmlFreeInputStream(input);
14279 ctxt->inputNr = 0;
14280 ctxt->input = NULL;
14282 ctxt->spaceNr = 0;
14283 if (ctxt->spaceTab != NULL) {
14284 ctxt->spaceTab[0] = -1;
14285 ctxt->space = &ctxt->spaceTab[0];
14286 } else {
14287 ctxt->space = NULL;
14291 ctxt->nodeNr = 0;
14292 ctxt->node = NULL;
14294 ctxt->nameNr = 0;
14295 ctxt->name = NULL;
14297 DICT_FREE(ctxt->version);
14298 ctxt->version = NULL;
14299 DICT_FREE(ctxt->encoding);
14300 ctxt->encoding = NULL;
14301 DICT_FREE(ctxt->directory);
14302 ctxt->directory = NULL;
14303 DICT_FREE(ctxt->extSubURI);
14304 ctxt->extSubURI = NULL;
14305 DICT_FREE(ctxt->extSubSystem);
14306 ctxt->extSubSystem = NULL;
14307 if (ctxt->myDoc != NULL)
14308 xmlFreeDoc(ctxt->myDoc);
14309 ctxt->myDoc = NULL;
14311 ctxt->standalone = -1;
14312 ctxt->hasExternalSubset = 0;
14313 ctxt->hasPErefs = 0;
14314 ctxt->html = 0;
14315 ctxt->external = 0;
14316 ctxt->instate = XML_PARSER_START;
14317 ctxt->token = 0;
14319 ctxt->wellFormed = 1;
14320 ctxt->nsWellFormed = 1;
14321 ctxt->disableSAX = 0;
14322 ctxt->valid = 1;
14323 #if 0
14324 ctxt->vctxt.userData = ctxt;
14325 ctxt->vctxt.error = xmlParserValidityError;
14326 ctxt->vctxt.warning = xmlParserValidityWarning;
14327 #endif
14328 ctxt->record_info = 0;
14329 ctxt->nbChars = 0;
14330 ctxt->checkIndex = 0;
14331 ctxt->inSubset = 0;
14332 ctxt->errNo = XML_ERR_OK;
14333 ctxt->depth = 0;
14334 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14335 ctxt->catalogs = NULL;
14336 ctxt->nbentities = 0;
14337 ctxt->sizeentities = 0;
14338 xmlInitNodeInfoSeq(&ctxt->node_seq);
14340 if (ctxt->attsDefault != NULL) {
14341 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14342 ctxt->attsDefault = NULL;
14344 if (ctxt->attsSpecial != NULL) {
14345 xmlHashFree(ctxt->attsSpecial, NULL);
14346 ctxt->attsSpecial = NULL;
14349 #ifdef LIBXML_CATALOG_ENABLED
14350 if (ctxt->catalogs != NULL)
14351 xmlCatalogFreeLocal(ctxt->catalogs);
14352 #endif
14353 if (ctxt->lastError.code != XML_ERR_OK)
14354 xmlResetError(&ctxt->lastError);
14358 * xmlCtxtResetPush:
14359 * @ctxt: an XML parser context
14360 * @chunk: a pointer to an array of chars
14361 * @size: number of chars in the array
14362 * @filename: an optional file name or URI
14363 * @encoding: the document encoding, or NULL
14365 * Reset a push parser context
14367 * Returns 0 in case of success and 1 in case of error
14370 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14371 int size, const char *filename, const char *encoding)
14373 xmlParserInputPtr inputStream;
14374 xmlParserInputBufferPtr buf;
14375 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14377 if (ctxt == NULL)
14378 return(1);
14380 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14381 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14383 buf = xmlAllocParserInputBuffer(enc);
14384 if (buf == NULL)
14385 return(1);
14387 if (ctxt == NULL) {
14388 xmlFreeParserInputBuffer(buf);
14389 return(1);
14392 xmlCtxtReset(ctxt);
14394 if (ctxt->pushTab == NULL) {
14395 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14396 sizeof(xmlChar *));
14397 if (ctxt->pushTab == NULL) {
14398 xmlErrMemory(ctxt, NULL);
14399 xmlFreeParserInputBuffer(buf);
14400 return(1);
14404 if (filename == NULL) {
14405 ctxt->directory = NULL;
14406 } else {
14407 ctxt->directory = xmlParserGetDirectory(filename);
14410 inputStream = xmlNewInputStream(ctxt);
14411 if (inputStream == NULL) {
14412 xmlFreeParserInputBuffer(buf);
14413 return(1);
14416 if (filename == NULL)
14417 inputStream->filename = NULL;
14418 else
14419 inputStream->filename = (char *)
14420 xmlCanonicPath((const xmlChar *) filename);
14421 inputStream->buf = buf;
14422 inputStream->base = inputStream->buf->buffer->content;
14423 inputStream->cur = inputStream->buf->buffer->content;
14424 inputStream->end =
14425 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14427 inputPush(ctxt, inputStream);
14429 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14430 (ctxt->input->buf != NULL)) {
14431 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14432 int cur = ctxt->input->cur - ctxt->input->base;
14434 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14436 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14437 ctxt->input->cur = ctxt->input->base + cur;
14438 ctxt->input->end =
14439 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14440 use];
14441 #ifdef DEBUG_PUSH
14442 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14443 #endif
14446 if (encoding != NULL) {
14447 xmlCharEncodingHandlerPtr hdlr;
14449 if (ctxt->encoding != NULL)
14450 xmlFree((xmlChar *) ctxt->encoding);
14451 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14453 hdlr = xmlFindCharEncodingHandler(encoding);
14454 if (hdlr != NULL) {
14455 xmlSwitchToEncoding(ctxt, hdlr);
14456 } else {
14457 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14458 "Unsupported encoding %s\n", BAD_CAST encoding);
14460 } else if (enc != XML_CHAR_ENCODING_NONE) {
14461 xmlSwitchEncoding(ctxt, enc);
14464 return(0);
14469 * xmlCtxtUseOptionsInternal:
14470 * @ctxt: an XML parser context
14471 * @options: a combination of xmlParserOption
14472 * @encoding: the user provided encoding to use
14474 * Applies the options to the parser context
14476 * Returns 0 in case of success, the set of unknown or unimplemented options
14477 * in case of error.
14479 static int
14480 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14482 if (ctxt == NULL)
14483 return(-1);
14484 if (encoding != NULL) {
14485 if (ctxt->encoding != NULL)
14486 xmlFree((xmlChar *) ctxt->encoding);
14487 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14489 if (options & XML_PARSE_RECOVER) {
14490 ctxt->recovery = 1;
14491 options -= XML_PARSE_RECOVER;
14492 ctxt->options |= XML_PARSE_RECOVER;
14493 } else
14494 ctxt->recovery = 0;
14495 if (options & XML_PARSE_DTDLOAD) {
14496 ctxt->loadsubset = XML_DETECT_IDS;
14497 options -= XML_PARSE_DTDLOAD;
14498 ctxt->options |= XML_PARSE_DTDLOAD;
14499 } else
14500 ctxt->loadsubset = 0;
14501 if (options & XML_PARSE_DTDATTR) {
14502 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14503 options -= XML_PARSE_DTDATTR;
14504 ctxt->options |= XML_PARSE_DTDATTR;
14506 if (options & XML_PARSE_NOENT) {
14507 ctxt->replaceEntities = 1;
14508 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14509 options -= XML_PARSE_NOENT;
14510 ctxt->options |= XML_PARSE_NOENT;
14511 } else
14512 ctxt->replaceEntities = 0;
14513 if (options & XML_PARSE_PEDANTIC) {
14514 ctxt->pedantic = 1;
14515 options -= XML_PARSE_PEDANTIC;
14516 ctxt->options |= XML_PARSE_PEDANTIC;
14517 } else
14518 ctxt->pedantic = 0;
14519 if (options & XML_PARSE_NOBLANKS) {
14520 ctxt->keepBlanks = 0;
14521 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14522 options -= XML_PARSE_NOBLANKS;
14523 ctxt->options |= XML_PARSE_NOBLANKS;
14524 } else
14525 ctxt->keepBlanks = 1;
14526 if (options & XML_PARSE_DTDVALID) {
14527 ctxt->validate = 1;
14528 if (options & XML_PARSE_NOWARNING)
14529 ctxt->vctxt.warning = NULL;
14530 if (options & XML_PARSE_NOERROR)
14531 ctxt->vctxt.error = NULL;
14532 options -= XML_PARSE_DTDVALID;
14533 ctxt->options |= XML_PARSE_DTDVALID;
14534 } else
14535 ctxt->validate = 0;
14536 if (options & XML_PARSE_NOWARNING) {
14537 ctxt->sax->warning = NULL;
14538 options -= XML_PARSE_NOWARNING;
14540 if (options & XML_PARSE_NOERROR) {
14541 ctxt->sax->error = NULL;
14542 ctxt->sax->fatalError = NULL;
14543 options -= XML_PARSE_NOERROR;
14545 #ifdef LIBXML_SAX1_ENABLED
14546 if (options & XML_PARSE_SAX1) {
14547 ctxt->sax->startElement = xmlSAX2StartElement;
14548 ctxt->sax->endElement = xmlSAX2EndElement;
14549 ctxt->sax->startElementNs = NULL;
14550 ctxt->sax->endElementNs = NULL;
14551 ctxt->sax->initialized = 1;
14552 options -= XML_PARSE_SAX1;
14553 ctxt->options |= XML_PARSE_SAX1;
14555 #endif /* LIBXML_SAX1_ENABLED */
14556 if (options & XML_PARSE_NODICT) {
14557 ctxt->dictNames = 0;
14558 options -= XML_PARSE_NODICT;
14559 ctxt->options |= XML_PARSE_NODICT;
14560 } else {
14561 ctxt->dictNames = 1;
14563 if (options & XML_PARSE_NOCDATA) {
14564 ctxt->sax->cdataBlock = NULL;
14565 options -= XML_PARSE_NOCDATA;
14566 ctxt->options |= XML_PARSE_NOCDATA;
14568 if (options & XML_PARSE_NSCLEAN) {
14569 ctxt->options |= XML_PARSE_NSCLEAN;
14570 options -= XML_PARSE_NSCLEAN;
14572 if (options & XML_PARSE_NONET) {
14573 ctxt->options |= XML_PARSE_NONET;
14574 options -= XML_PARSE_NONET;
14576 if (options & XML_PARSE_COMPACT) {
14577 ctxt->options |= XML_PARSE_COMPACT;
14578 options -= XML_PARSE_COMPACT;
14580 if (options & XML_PARSE_OLD10) {
14581 ctxt->options |= XML_PARSE_OLD10;
14582 options -= XML_PARSE_OLD10;
14584 if (options & XML_PARSE_NOBASEFIX) {
14585 ctxt->options |= XML_PARSE_NOBASEFIX;
14586 options -= XML_PARSE_NOBASEFIX;
14588 if (options & XML_PARSE_HUGE) {
14589 ctxt->options |= XML_PARSE_HUGE;
14590 options -= XML_PARSE_HUGE;
14592 if (options & XML_PARSE_OLDSAX) {
14593 ctxt->options |= XML_PARSE_OLDSAX;
14594 options -= XML_PARSE_OLDSAX;
14596 ctxt->linenumbers = 1;
14597 return (options);
14601 * xmlCtxtUseOptions:
14602 * @ctxt: an XML parser context
14603 * @options: a combination of xmlParserOption
14605 * Applies the options to the parser context
14607 * Returns 0 in case of success, the set of unknown or unimplemented options
14608 * in case of error.
14611 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14613 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14617 * xmlDoRead:
14618 * @ctxt: an XML parser context
14619 * @URL: the base URL to use for the document
14620 * @encoding: the document encoding, or NULL
14621 * @options: a combination of xmlParserOption
14622 * @reuse: keep the context for reuse
14624 * Common front-end for the xmlRead functions
14626 * Returns the resulting document tree or NULL
14628 static xmlDocPtr
14629 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14630 int options, int reuse)
14632 xmlDocPtr ret;
14634 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14635 if (encoding != NULL) {
14636 xmlCharEncodingHandlerPtr hdlr;
14638 hdlr = xmlFindCharEncodingHandler(encoding);
14639 if (hdlr != NULL)
14640 xmlSwitchToEncoding(ctxt, hdlr);
14642 if ((URL != NULL) && (ctxt->input != NULL) &&
14643 (ctxt->input->filename == NULL))
14644 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14645 xmlParseDocument(ctxt);
14646 if ((ctxt->wellFormed) || ctxt->recovery)
14647 ret = ctxt->myDoc;
14648 else {
14649 ret = NULL;
14650 if (ctxt->myDoc != NULL) {
14651 xmlFreeDoc(ctxt->myDoc);
14654 ctxt->myDoc = NULL;
14655 if (!reuse) {
14656 xmlFreeParserCtxt(ctxt);
14659 return (ret);
14663 * xmlReadDoc:
14664 * @cur: a pointer to a zero terminated string
14665 * @URL: the base URL to use for the document
14666 * @encoding: the document encoding, or NULL
14667 * @options: a combination of xmlParserOption
14669 * parse an XML in-memory document and build a tree.
14671 * Returns the resulting document tree
14673 xmlDocPtr
14674 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14676 xmlParserCtxtPtr ctxt;
14678 if (cur == NULL)
14679 return (NULL);
14681 ctxt = xmlCreateDocParserCtxt(cur);
14682 if (ctxt == NULL)
14683 return (NULL);
14684 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14688 * xmlReadFile:
14689 * @filename: a file or URL
14690 * @encoding: the document encoding, or NULL
14691 * @options: a combination of xmlParserOption
14693 * parse an XML file from the filesystem or the network.
14695 * Returns the resulting document tree
14697 xmlDocPtr
14698 xmlReadFile(const char *filename, const char *encoding, int options)
14700 xmlParserCtxtPtr ctxt;
14702 ctxt = xmlCreateURLParserCtxt(filename, options);
14703 if (ctxt == NULL)
14704 return (NULL);
14705 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14709 * xmlReadMemory:
14710 * @buffer: a pointer to a char array
14711 * @size: the size of the array
14712 * @URL: the base URL to use for the document
14713 * @encoding: the document encoding, or NULL
14714 * @options: a combination of xmlParserOption
14716 * parse an XML in-memory document and build a tree.
14718 * Returns the resulting document tree
14720 xmlDocPtr
14721 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14723 xmlParserCtxtPtr ctxt;
14725 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14726 if (ctxt == NULL)
14727 return (NULL);
14728 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14732 * xmlReadFd:
14733 * @fd: an open file descriptor
14734 * @URL: the base URL to use for the document
14735 * @encoding: the document encoding, or NULL
14736 * @options: a combination of xmlParserOption
14738 * parse an XML from a file descriptor and build a tree.
14739 * NOTE that the file descriptor will not be closed when the
14740 * reader is closed or reset.
14742 * Returns the resulting document tree
14744 xmlDocPtr
14745 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14747 xmlParserCtxtPtr ctxt;
14748 xmlParserInputBufferPtr input;
14749 xmlParserInputPtr stream;
14751 if (fd < 0)
14752 return (NULL);
14754 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14755 if (input == NULL)
14756 return (NULL);
14757 input->closecallback = NULL;
14758 ctxt = xmlNewParserCtxt();
14759 if (ctxt == NULL) {
14760 xmlFreeParserInputBuffer(input);
14761 return (NULL);
14763 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14764 if (stream == NULL) {
14765 xmlFreeParserInputBuffer(input);
14766 xmlFreeParserCtxt(ctxt);
14767 return (NULL);
14769 inputPush(ctxt, stream);
14770 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14774 * xmlReadIO:
14775 * @ioread: an I/O read function
14776 * @ioclose: an I/O close function
14777 * @ioctx: an I/O handler
14778 * @URL: the base URL to use for the document
14779 * @encoding: the document encoding, or NULL
14780 * @options: a combination of xmlParserOption
14782 * parse an XML document from I/O functions and source and build a tree.
14784 * Returns the resulting document tree
14786 xmlDocPtr
14787 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14788 void *ioctx, const char *URL, const char *encoding, int options)
14790 xmlParserCtxtPtr ctxt;
14791 xmlParserInputBufferPtr input;
14792 xmlParserInputPtr stream;
14794 if (ioread == NULL)
14795 return (NULL);
14797 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14798 XML_CHAR_ENCODING_NONE);
14799 if (input == NULL)
14800 return (NULL);
14801 ctxt = xmlNewParserCtxt();
14802 if (ctxt == NULL) {
14803 xmlFreeParserInputBuffer(input);
14804 return (NULL);
14806 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14807 if (stream == NULL) {
14808 xmlFreeParserInputBuffer(input);
14809 xmlFreeParserCtxt(ctxt);
14810 return (NULL);
14812 inputPush(ctxt, stream);
14813 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14817 * xmlCtxtReadDoc:
14818 * @ctxt: an XML parser context
14819 * @cur: a pointer to a zero terminated string
14820 * @URL: the base URL to use for the document
14821 * @encoding: the document encoding, or NULL
14822 * @options: a combination of xmlParserOption
14824 * parse an XML in-memory document and build a tree.
14825 * This reuses the existing @ctxt parser context
14827 * Returns the resulting document tree
14829 xmlDocPtr
14830 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14831 const char *URL, const char *encoding, int options)
14833 xmlParserInputPtr stream;
14835 if (cur == NULL)
14836 return (NULL);
14837 if (ctxt == NULL)
14838 return (NULL);
14840 xmlCtxtReset(ctxt);
14842 stream = xmlNewStringInputStream(ctxt, cur);
14843 if (stream == NULL) {
14844 return (NULL);
14846 inputPush(ctxt, stream);
14847 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14851 * xmlCtxtReadFile:
14852 * @ctxt: an XML parser context
14853 * @filename: a file or URL
14854 * @encoding: the document encoding, or NULL
14855 * @options: a combination of xmlParserOption
14857 * parse an XML file from the filesystem or the network.
14858 * This reuses the existing @ctxt parser context
14860 * Returns the resulting document tree
14862 xmlDocPtr
14863 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14864 const char *encoding, int options)
14866 xmlParserInputPtr stream;
14868 if (filename == NULL)
14869 return (NULL);
14870 if (ctxt == NULL)
14871 return (NULL);
14873 xmlCtxtReset(ctxt);
14875 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14876 if (stream == NULL) {
14877 return (NULL);
14879 inputPush(ctxt, stream);
14880 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14884 * xmlCtxtReadMemory:
14885 * @ctxt: an XML parser context
14886 * @buffer: a pointer to a char array
14887 * @size: the size of the array
14888 * @URL: the base URL to use for the document
14889 * @encoding: the document encoding, or NULL
14890 * @options: a combination of xmlParserOption
14892 * parse an XML in-memory document and build a tree.
14893 * This reuses the existing @ctxt parser context
14895 * Returns the resulting document tree
14897 xmlDocPtr
14898 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14899 const char *URL, const char *encoding, int options)
14901 xmlParserInputBufferPtr input;
14902 xmlParserInputPtr stream;
14904 if (ctxt == NULL)
14905 return (NULL);
14906 if (buffer == NULL)
14907 return (NULL);
14909 xmlCtxtReset(ctxt);
14911 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14912 if (input == NULL) {
14913 return(NULL);
14916 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14917 if (stream == NULL) {
14918 xmlFreeParserInputBuffer(input);
14919 return(NULL);
14922 inputPush(ctxt, stream);
14923 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14927 * xmlCtxtReadFd:
14928 * @ctxt: an XML parser context
14929 * @fd: an open file descriptor
14930 * @URL: the base URL to use for the document
14931 * @encoding: the document encoding, or NULL
14932 * @options: a combination of xmlParserOption
14934 * parse an XML from a file descriptor and build a tree.
14935 * This reuses the existing @ctxt parser context
14936 * NOTE that the file descriptor will not be closed when the
14937 * reader is closed or reset.
14939 * Returns the resulting document tree
14941 xmlDocPtr
14942 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14943 const char *URL, const char *encoding, int options)
14945 xmlParserInputBufferPtr input;
14946 xmlParserInputPtr stream;
14948 if (fd < 0)
14949 return (NULL);
14950 if (ctxt == NULL)
14951 return (NULL);
14953 xmlCtxtReset(ctxt);
14956 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14957 if (input == NULL)
14958 return (NULL);
14959 input->closecallback = NULL;
14960 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14961 if (stream == NULL) {
14962 xmlFreeParserInputBuffer(input);
14963 return (NULL);
14965 inputPush(ctxt, stream);
14966 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14970 * xmlCtxtReadIO:
14971 * @ctxt: an XML parser context
14972 * @ioread: an I/O read function
14973 * @ioclose: an I/O close function
14974 * @ioctx: an I/O handler
14975 * @URL: the base URL to use for the document
14976 * @encoding: the document encoding, or NULL
14977 * @options: a combination of xmlParserOption
14979 * parse an XML document from I/O functions and source and build a tree.
14980 * This reuses the existing @ctxt parser context
14982 * Returns the resulting document tree
14984 xmlDocPtr
14985 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14986 xmlInputCloseCallback ioclose, void *ioctx,
14987 const char *URL,
14988 const char *encoding, int options)
14990 xmlParserInputBufferPtr input;
14991 xmlParserInputPtr stream;
14993 if (ioread == NULL)
14994 return (NULL);
14995 if (ctxt == NULL)
14996 return (NULL);
14998 xmlCtxtReset(ctxt);
15000 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15001 XML_CHAR_ENCODING_NONE);
15002 if (input == NULL)
15003 return (NULL);
15004 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15005 if (stream == NULL) {
15006 xmlFreeParserInputBuffer(input);
15007 return (NULL);
15009 inputPush(ctxt, stream);
15010 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15013 #define bottom_parser
15014 #include "elfgcchack.h"