2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
13 #if defined(WIN32) && !defined (__CYGWIN__)
14 #define XML_DIR_SEP '\\'
16 #define XML_DIR_SEP '/'
26 #ifdef HAVE_SYS_STAT_H
39 #include <libxml/xmlmemory.h>
40 #include <libxml/tree.h>
41 #include <libxml/parser.h>
42 #include <libxml/parserInternals.h>
43 #include <libxml/valid.h>
44 #include <libxml/entities.h>
45 #include <libxml/xmlerror.h>
46 #include <libxml/encoding.h>
47 #include <libxml/valid.h>
48 #include <libxml/xmlIO.h>
49 #include <libxml/uri.h>
50 #include <libxml/SAX.h>
51 #ifdef LIBXML_CATALOG_ENABLED
52 #include <libxml/catalog.h>
54 #include <libxml/globals.h>
56 void xmlUpgradeOldNs(xmlDocPtr doc
);
59 * Various global defaults for parsing
64 * @version: the include version number
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
70 xmlCheckVersion(int version
) {
71 int myversion
= (int) LIBXML_VERSION
;
75 if ((myversion
/ 10000) != (version
/ 10000)) {
76 xmlGenericError(xmlGenericErrorContext
,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version
/ 10000), (myversion
/ 10000));
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version
/ 10000), (myversion
/ 10000));
83 if ((myversion
/ 100) < (version
/ 100)) {
84 xmlGenericError(xmlGenericErrorContext
,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version
/ 100), (myversion
/ 100));
91 static const char *xmlFeaturesList
[] = {
96 "fetch external entities",
97 "substitute entities",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
141 * Copy at most *@len feature names into the @result array
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
148 xmlGetFeaturesList(int *len
, const char **result
) {
151 ret
= sizeof(xmlFeaturesList
)/sizeof(xmlFeaturesList
[0]);
152 if ((len
== NULL
) || (result
== NULL
))
154 if ((*len
< 0) || (*len
>= 1000))
158 for (i
= 0;i
< *len
;i
++)
159 result
[i
] = xmlFeaturesList
[i
];
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
169 * Read the current value of one feature of this parser instance
171 * Returns -1 in case or error, 0 otherwise
174 xmlGetFeature(xmlParserCtxtPtr ctxt
, const char *name
, void *result
) {
175 if ((ctxt
== NULL
) || (name
== NULL
) || (result
== NULL
))
178 if (!strcmp(name
, "validate")) {
179 *((int *) result
) = ctxt
->validate
;
180 } else if (!strcmp(name
, "keep blanks")) {
181 *((int *) result
) = ctxt
->keepBlanks
;
182 } else if (!strcmp(name
, "disable SAX")) {
183 *((int *) result
) = ctxt
->disableSAX
;
184 } else if (!strcmp(name
, "fetch external entities")) {
185 *((int *) result
) = ctxt
->loadsubset
;
186 } else if (!strcmp(name
, "substitute entities")) {
187 *((int *) result
) = ctxt
->replaceEntities
;
188 } else if (!strcmp(name
, "gather line info")) {
189 *((int *) result
) = ctxt
->record_info
;
190 } else if (!strcmp(name
, "user data")) {
191 *((void **)result
) = ctxt
->userData
;
192 } else if (!strcmp(name
, "is html")) {
193 *((int *) result
) = ctxt
->html
;
194 } else if (!strcmp(name
, "is standalone")) {
195 *((int *) result
) = ctxt
->standalone
;
196 } else if (!strcmp(name
, "document")) {
197 *((xmlDocPtr
*) result
) = ctxt
->myDoc
;
198 } else if (!strcmp(name
, "is well formed")) {
199 *((int *) result
) = ctxt
->wellFormed
;
200 } else if (!strcmp(name
, "is valid")) {
201 *((int *) result
) = ctxt
->valid
;
202 } else if (!strcmp(name
, "SAX block")) {
203 *((xmlSAXHandlerPtr
*) result
) = ctxt
->sax
;
204 } else if (!strcmp(name
, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc
*) result
) = ctxt
->sax
->internalSubset
;
206 } else if (!strcmp(name
, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc
*) result
) = ctxt
->sax
->isStandalone
;
208 } else if (!strcmp(name
, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc
*) result
) = ctxt
->sax
->hasInternalSubset
;
210 } else if (!strcmp(name
, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc
*) result
) = ctxt
->sax
->hasExternalSubset
;
212 } else if (!strcmp(name
, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc
*) result
) = ctxt
->sax
->resolveEntity
;
214 } else if (!strcmp(name
, "SAX function getEntity")) {
215 *((getEntitySAXFunc
*) result
) = ctxt
->sax
->getEntity
;
216 } else if (!strcmp(name
, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc
*) result
) = ctxt
->sax
->entityDecl
;
218 } else if (!strcmp(name
, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc
*) result
) = ctxt
->sax
->notationDecl
;
220 } else if (!strcmp(name
, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc
*) result
) = ctxt
->sax
->attributeDecl
;
222 } else if (!strcmp(name
, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc
*) result
) = ctxt
->sax
->elementDecl
;
224 } else if (!strcmp(name
, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc
*) result
) = ctxt
->sax
->unparsedEntityDecl
;
226 } else if (!strcmp(name
, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc
*) result
) = ctxt
->sax
->setDocumentLocator
;
228 } else if (!strcmp(name
, "SAX function startDocument")) {
229 *((startDocumentSAXFunc
*) result
) = ctxt
->sax
->startDocument
;
230 } else if (!strcmp(name
, "SAX function endDocument")) {
231 *((endDocumentSAXFunc
*) result
) = ctxt
->sax
->endDocument
;
232 } else if (!strcmp(name
, "SAX function startElement")) {
233 *((startElementSAXFunc
*) result
) = ctxt
->sax
->startElement
;
234 } else if (!strcmp(name
, "SAX function endElement")) {
235 *((endElementSAXFunc
*) result
) = ctxt
->sax
->endElement
;
236 } else if (!strcmp(name
, "SAX function reference")) {
237 *((referenceSAXFunc
*) result
) = ctxt
->sax
->reference
;
238 } else if (!strcmp(name
, "SAX function characters")) {
239 *((charactersSAXFunc
*) result
) = ctxt
->sax
->characters
;
240 } else if (!strcmp(name
, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc
*) result
) = ctxt
->sax
->ignorableWhitespace
;
242 } else if (!strcmp(name
, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc
*) result
) = ctxt
->sax
->processingInstruction
;
244 } else if (!strcmp(name
, "SAX function comment")) {
245 *((commentSAXFunc
*) result
) = ctxt
->sax
->comment
;
246 } else if (!strcmp(name
, "SAX function warning")) {
247 *((warningSAXFunc
*) result
) = ctxt
->sax
->warning
;
248 } else if (!strcmp(name
, "SAX function error")) {
249 *((errorSAXFunc
*) result
) = ctxt
->sax
->error
;
250 } else if (!strcmp(name
, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc
*) result
) = ctxt
->sax
->fatalError
;
252 } else if (!strcmp(name
, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc
*) result
) = ctxt
->sax
->getParameterEntity
;
254 } else if (!strcmp(name
, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc
*) result
) = ctxt
->sax
->cdataBlock
;
256 } else if (!strcmp(name
, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc
*) result
) = ctxt
->sax
->externalSubset
;
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
270 * Change the current value of one feature of this parser instance
272 * Returns -1 in case or error, 0 otherwise
275 xmlSetFeature(xmlParserCtxtPtr ctxt
, const char *name
, void *value
) {
276 if ((ctxt
== NULL
) || (name
== NULL
) || (value
== NULL
))
279 if (!strcmp(name
, "validate")) {
280 int newvalidate
= *((int *) value
);
281 if ((!ctxt
->validate
) && (newvalidate
!= 0)) {
282 if (ctxt
->vctxt
.warning
== NULL
)
283 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
284 if (ctxt
->vctxt
.error
== NULL
)
285 ctxt
->vctxt
.error
= xmlParserValidityError
;
286 ctxt
->vctxt
.nodeMax
= 0;
288 ctxt
->validate
= newvalidate
;
289 } else if (!strcmp(name
, "keep blanks")) {
290 ctxt
->keepBlanks
= *((int *) value
);
291 } else if (!strcmp(name
, "disable SAX")) {
292 ctxt
->disableSAX
= *((int *) value
);
293 } else if (!strcmp(name
, "fetch external entities")) {
294 ctxt
->loadsubset
= *((int *) value
);
295 } else if (!strcmp(name
, "substitute entities")) {
296 ctxt
->replaceEntities
= *((int *) value
);
297 } else if (!strcmp(name
, "gather line info")) {
298 ctxt
->record_info
= *((int *) value
);
299 } else if (!strcmp(name
, "user data")) {
300 ctxt
->userData
= *((void **)value
);
301 } else if (!strcmp(name
, "is html")) {
302 ctxt
->html
= *((int *) value
);
303 } else if (!strcmp(name
, "is standalone")) {
304 ctxt
->standalone
= *((int *) value
);
305 } else if (!strcmp(name
, "document")) {
306 ctxt
->myDoc
= *((xmlDocPtr
*) value
);
307 } else if (!strcmp(name
, "is well formed")) {
308 ctxt
->wellFormed
= *((int *) value
);
309 } else if (!strcmp(name
, "is valid")) {
310 ctxt
->valid
= *((int *) value
);
311 } else if (!strcmp(name
, "SAX block")) {
312 ctxt
->sax
= *((xmlSAXHandlerPtr
*) value
);
313 } else if (!strcmp(name
, "SAX function internalSubset")) {
314 ctxt
->sax
->internalSubset
= *((internalSubsetSAXFunc
*) value
);
315 } else if (!strcmp(name
, "SAX function isStandalone")) {
316 ctxt
->sax
->isStandalone
= *((isStandaloneSAXFunc
*) value
);
317 } else if (!strcmp(name
, "SAX function hasInternalSubset")) {
318 ctxt
->sax
->hasInternalSubset
= *((hasInternalSubsetSAXFunc
*) value
);
319 } else if (!strcmp(name
, "SAX function hasExternalSubset")) {
320 ctxt
->sax
->hasExternalSubset
= *((hasExternalSubsetSAXFunc
*) value
);
321 } else if (!strcmp(name
, "SAX function resolveEntity")) {
322 ctxt
->sax
->resolveEntity
= *((resolveEntitySAXFunc
*) value
);
323 } else if (!strcmp(name
, "SAX function getEntity")) {
324 ctxt
->sax
->getEntity
= *((getEntitySAXFunc
*) value
);
325 } else if (!strcmp(name
, "SAX function entityDecl")) {
326 ctxt
->sax
->entityDecl
= *((entityDeclSAXFunc
*) value
);
327 } else if (!strcmp(name
, "SAX function notationDecl")) {
328 ctxt
->sax
->notationDecl
= *((notationDeclSAXFunc
*) value
);
329 } else if (!strcmp(name
, "SAX function attributeDecl")) {
330 ctxt
->sax
->attributeDecl
= *((attributeDeclSAXFunc
*) value
);
331 } else if (!strcmp(name
, "SAX function elementDecl")) {
332 ctxt
->sax
->elementDecl
= *((elementDeclSAXFunc
*) value
);
333 } else if (!strcmp(name
, "SAX function unparsedEntityDecl")) {
334 ctxt
->sax
->unparsedEntityDecl
= *((unparsedEntityDeclSAXFunc
*) value
);
335 } else if (!strcmp(name
, "SAX function setDocumentLocator")) {
336 ctxt
->sax
->setDocumentLocator
= *((setDocumentLocatorSAXFunc
*) value
);
337 } else if (!strcmp(name
, "SAX function startDocument")) {
338 ctxt
->sax
->startDocument
= *((startDocumentSAXFunc
*) value
);
339 } else if (!strcmp(name
, "SAX function endDocument")) {
340 ctxt
->sax
->endDocument
= *((endDocumentSAXFunc
*) value
);
341 } else if (!strcmp(name
, "SAX function startElement")) {
342 ctxt
->sax
->startElement
= *((startElementSAXFunc
*) value
);
343 } else if (!strcmp(name
, "SAX function endElement")) {
344 ctxt
->sax
->endElement
= *((endElementSAXFunc
*) value
);
345 } else if (!strcmp(name
, "SAX function reference")) {
346 ctxt
->sax
->reference
= *((referenceSAXFunc
*) value
);
347 } else if (!strcmp(name
, "SAX function characters")) {
348 ctxt
->sax
->characters
= *((charactersSAXFunc
*) value
);
349 } else if (!strcmp(name
, "SAX function ignorableWhitespace")) {
350 ctxt
->sax
->ignorableWhitespace
= *((ignorableWhitespaceSAXFunc
*) value
);
351 } else if (!strcmp(name
, "SAX function processingInstruction")) {
352 ctxt
->sax
->processingInstruction
= *((processingInstructionSAXFunc
*) value
);
353 } else if (!strcmp(name
, "SAX function comment")) {
354 ctxt
->sax
->comment
= *((commentSAXFunc
*) value
);
355 } else if (!strcmp(name
, "SAX function warning")) {
356 ctxt
->sax
->warning
= *((warningSAXFunc
*) value
);
357 } else if (!strcmp(name
, "SAX function error")) {
358 ctxt
->sax
->error
= *((errorSAXFunc
*) value
);
359 } else if (!strcmp(name
, "SAX function fatalError")) {
360 ctxt
->sax
->fatalError
= *((fatalErrorSAXFunc
*) value
);
361 } else if (!strcmp(name
, "SAX function getParameterEntity")) {
362 ctxt
->sax
->getParameterEntity
= *((getParameterEntitySAXFunc
*) value
);
363 } else if (!strcmp(name
, "SAX function cdataBlock")) {
364 ctxt
->sax
->cdataBlock
= *((cdataBlockSAXFunc
*) value
);
365 } else if (!strcmp(name
, "SAX function externalSubset")) {
366 ctxt
->sax
->externalSubset
= *((externalSubsetSAXFunc
*) value
);
373 /************************************************************************
375 * Some functions to avoid too large macros *
377 ************************************************************************/
381 * @c: an unicode character (int)
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
389 * Returns 0 if not, non-zero otherwise
394 ((c
) == 0x09) || ((c
) == 0x0A) || ((c
) == 0x0D) ||
395 (((c
) >= 0x20) && ((c
) <= 0xD7FF)) ||
396 (((c
) >= 0xE000) && ((c
) <= 0xFFFD)) ||
397 (((c
) >= 0x10000) && ((c
) <= 0x10FFFF)));
402 * @c: an unicode character (int)
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
408 * Returns 0 if not, non-zero otherwise
412 return(((c
) == 0x20) || ((c
) == 0x09) || ((c
) == 0xA) || ((c
) == 0x0D));
417 * @c: an unicode character (int)
419 * Check whether the character is allowed by the production
420 * [85] BaseChar ::= ... long list see REC ...
422 * VI is your friend !
423 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
425 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
427 * Returns 0 if not, non-zero otherwise
429 static int xmlBaseArray
[] = {
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
434 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
443 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449 xmlIsBaseChar(int c
) {
451 (((c
) < 0x0100) ? xmlBaseArray
[c
] :
453 (((c
) >= 0x0100) && ((c
) <= 0x0131)) ||
454 (((c
) >= 0x0134) && ((c
) <= 0x013E)) ||
455 (((c
) >= 0x0141) && ((c
) <= 0x0148)) ||
456 (((c
) >= 0x014A) && ((c
) <= 0x017E)) ||
457 (((c
) >= 0x0180) && ((c
) <= 0x01C3)) ||
458 (((c
) >= 0x01CD) && ((c
) <= 0x01F0)) ||
459 (((c
) >= 0x01F4) && ((c
) <= 0x01F5)) ||
460 (((c
) >= 0x01FA) && ((c
) <= 0x0217)) ||
461 (((c
) >= 0x0250) && ((c
) <= 0x02A8)) ||
462 (((c
) >= 0x02BB) && ((c
) <= 0x02C1)) ||
464 (((c
) >= 0x0388) && ((c
) <= 0x038A)) ||
466 (((c
) >= 0x038E) && ((c
) <= 0x03A1)) ||
467 (((c
) >= 0x03A3) && ((c
) <= 0x03CE)) ||
468 (((c
) >= 0x03D0) && ((c
) <= 0x03D6)) ||
473 (((c
) >= 0x03E2) && ((c
) <= 0x03F3)) ||
474 (((c
) >= 0x0401) && ((c
) <= 0x040C)) ||
475 (((c
) >= 0x040E) && ((c
) <= 0x044F)) ||
476 (((c
) >= 0x0451) && ((c
) <= 0x045C)) ||
477 (((c
) >= 0x045E) && ((c
) <= 0x0481)) ||
478 (((c
) >= 0x0490) && ((c
) <= 0x04C4)) ||
479 (((c
) >= 0x04C7) && ((c
) <= 0x04C8)) ||
480 (((c
) >= 0x04CB) && ((c
) <= 0x04CC)) ||
481 (((c
) >= 0x04D0) && ((c
) <= 0x04EB)) ||
482 (((c
) >= 0x04EE) && ((c
) <= 0x04F5)) ||
483 (((c
) >= 0x04F8) && ((c
) <= 0x04F9)) ||
484 (((c
) >= 0x0531) && ((c
) <= 0x0556)) ||
486 (((c
) >= 0x0561) && ((c
) <= 0x0586)) ||
487 (((c
) >= 0x05D0) && ((c
) <= 0x05EA)) ||
488 (((c
) >= 0x05F0) && ((c
) <= 0x05F2)) ||
489 (((c
) >= 0x0621) && ((c
) <= 0x063A)) ||
490 (((c
) >= 0x0641) && ((c
) <= 0x064A)) ||
491 (((c
) >= 0x0671) && ((c
) <= 0x06B7)) ||
492 (((c
) >= 0x06BA) && ((c
) <= 0x06BE)) ||
493 (((c
) >= 0x06C0) && ((c
) <= 0x06CE)) ||
494 (((c
) >= 0x06D0) && ((c
) <= 0x06D3)) ||
496 (((c
) >= 0x06E5) && ((c
) <= 0x06E6)) ||
497 (((c
) >= 0x905) && ( /* accelerator */
498 (((c
) >= 0x0905) && ((c
) <= 0x0939)) ||
500 (((c
) >= 0x0958) && ((c
) <= 0x0961)) ||
501 (((c
) >= 0x0985) && ((c
) <= 0x098C)) ||
502 (((c
) >= 0x098F) && ((c
) <= 0x0990)) ||
503 (((c
) >= 0x0993) && ((c
) <= 0x09A8)) ||
504 (((c
) >= 0x09AA) && ((c
) <= 0x09B0)) ||
506 (((c
) >= 0x09B6) && ((c
) <= 0x09B9)) ||
507 (((c
) >= 0x09DC) && ((c
) <= 0x09DD)) ||
508 (((c
) >= 0x09DF) && ((c
) <= 0x09E1)) ||
509 (((c
) >= 0x09F0) && ((c
) <= 0x09F1)) ||
510 (((c
) >= 0x0A05) && ((c
) <= 0x0A0A)) ||
511 (((c
) >= 0x0A0F) && ((c
) <= 0x0A10)) ||
512 (((c
) >= 0x0A13) && ((c
) <= 0x0A28)) ||
513 (((c
) >= 0x0A2A) && ((c
) <= 0x0A30)) ||
514 (((c
) >= 0x0A32) && ((c
) <= 0x0A33)) ||
515 (((c
) >= 0x0A35) && ((c
) <= 0x0A36)) ||
516 (((c
) >= 0x0A38) && ((c
) <= 0x0A39)) ||
517 (((c
) >= 0x0A59) && ((c
) <= 0x0A5C)) ||
519 (((c
) >= 0x0A72) && ((c
) <= 0x0A74)) ||
520 (((c
) >= 0x0A85) && ((c
) <= 0x0A8B)) ||
522 (((c
) >= 0x0A8F) && ((c
) <= 0x0A91)) ||
523 (((c
) >= 0x0A93) && ((c
) <= 0x0AA8)) ||
524 (((c
) >= 0x0AAA) && ((c
) <= 0x0AB0)) ||
525 (((c
) >= 0x0AB2) && ((c
) <= 0x0AB3)) ||
526 (((c
) >= 0x0AB5) && ((c
) <= 0x0AB9)) ||
529 (((c
) >= 0x0B05) && ((c
) <= 0x0B0C)) ||
530 (((c
) >= 0x0B0F) && ((c
) <= 0x0B10)) ||
531 (((c
) >= 0x0B13) && ((c
) <= 0x0B28)) ||
532 (((c
) >= 0x0B2A) && ((c
) <= 0x0B30)) ||
533 (((c
) >= 0x0B32) && ((c
) <= 0x0B33)) ||
534 (((c
) >= 0x0B36) && ((c
) <= 0x0B39)) ||
536 (((c
) >= 0x0B5C) && ((c
) <= 0x0B5D)) ||
537 (((c
) >= 0x0B5F) && ((c
) <= 0x0B61)) ||
538 (((c
) >= 0x0B85) && ((c
) <= 0x0B8A)) ||
539 (((c
) >= 0x0B8E) && ((c
) <= 0x0B90)) ||
540 (((c
) >= 0x0B92) && ((c
) <= 0x0B95)) ||
541 (((c
) >= 0x0B99) && ((c
) <= 0x0B9A)) ||
543 (((c
) >= 0x0B9E) && ((c
) <= 0x0B9F)) ||
544 (((c
) >= 0x0BA3) && ((c
) <= 0x0BA4)) ||
545 (((c
) >= 0x0BA8) && ((c
) <= 0x0BAA)) ||
546 (((c
) >= 0x0BAE) && ((c
) <= 0x0BB5)) ||
547 (((c
) >= 0x0BB7) && ((c
) <= 0x0BB9)) ||
548 (((c
) >= 0x0C05) && ((c
) <= 0x0C0C)) ||
549 (((c
) >= 0x0C0E) && ((c
) <= 0x0C10)) ||
550 (((c
) >= 0x0C12) && ((c
) <= 0x0C28)) ||
551 (((c
) >= 0x0C2A) && ((c
) <= 0x0C33)) ||
552 (((c
) >= 0x0C35) && ((c
) <= 0x0C39)) ||
553 (((c
) >= 0x0C60) && ((c
) <= 0x0C61)) ||
554 (((c
) >= 0x0C85) && ((c
) <= 0x0C8C)) ||
555 (((c
) >= 0x0C8E) && ((c
) <= 0x0C90)) ||
556 (((c
) >= 0x0C92) && ((c
) <= 0x0CA8)) ||
557 (((c
) >= 0x0CAA) && ((c
) <= 0x0CB3)) ||
558 (((c
) >= 0x0CB5) && ((c
) <= 0x0CB9)) ||
560 (((c
) >= 0x0CE0) && ((c
) <= 0x0CE1)) ||
561 (((c
) >= 0x0D05) && ((c
) <= 0x0D0C)) ||
562 (((c
) >= 0x0D0E) && ((c
) <= 0x0D10)) ||
563 (((c
) >= 0x0D12) && ((c
) <= 0x0D28)) ||
564 (((c
) >= 0x0D2A) && ((c
) <= 0x0D39)) ||
565 (((c
) >= 0x0D60) && ((c
) <= 0x0D61)) ||
566 (((c
) >= 0x0E01) && ((c
) <= 0x0E2E)) ||
568 (((c
) >= 0x0E32) && ((c
) <= 0x0E33)) ||
569 (((c
) >= 0x0E40) && ((c
) <= 0x0E45)) ||
570 (((c
) >= 0x0E81) && ((c
) <= 0x0E82)) ||
572 (((c
) >= 0x0E87) && ((c
) <= 0x0E88)) ||
575 (((c
) >= 0x0E94) && ((c
) <= 0x0E97)) ||
576 (((c
) >= 0x0E99) && ((c
) <= 0x0E9F)) ||
577 (((c
) >= 0x0EA1) && ((c
) <= 0x0EA3)) ||
580 (((c
) >= 0x0EAA) && ((c
) <= 0x0EAB)) ||
581 (((c
) >= 0x0EAD) && ((c
) <= 0x0EAE)) ||
583 (((c
) >= 0x0EB2) && ((c
) <= 0x0EB3)) ||
585 (((c
) >= 0x0EC0) && ((c
) <= 0x0EC4)) ||
586 (((c
) >= 0x0F40) && ((c
) <= 0x0F47)) ||
587 (((c
) >= 0x0F49) && ((c
) <= 0x0F69)) ||
588 (((c
) >= 0x10A0) && ( /* accelerator */
589 (((c
) >= 0x10A0) && ((c
) <= 0x10C5)) ||
590 (((c
) >= 0x10D0) && ((c
) <= 0x10F6)) ||
592 (((c
) >= 0x1102) && ((c
) <= 0x1103)) ||
593 (((c
) >= 0x1105) && ((c
) <= 0x1107)) ||
595 (((c
) >= 0x110B) && ((c
) <= 0x110C)) ||
596 (((c
) >= 0x110E) && ((c
) <= 0x1112)) ||
603 (((c
) >= 0x1154) && ((c
) <= 0x1155)) ||
605 (((c
) >= 0x115F) && ((c
) <= 0x1161)) ||
610 (((c
) >= 0x116D) && ((c
) <= 0x116E)) ||
611 (((c
) >= 0x1172) && ((c
) <= 0x1173)) ||
616 (((c
) >= 0x11AE) && ((c
) <= 0x11AF)) ||
617 (((c
) >= 0x11B7) && ((c
) <= 0x11B8)) ||
619 (((c
) >= 0x11BC) && ((c
) <= 0x11C2)) ||
623 (((c
) >= 0x1E00) && ((c
) <= 0x1E9B)) ||
624 (((c
) >= 0x1EA0) && ((c
) <= 0x1EF9)) ||
625 (((c
) >= 0x1F00) && ((c
) <= 0x1F15)) ||
626 (((c
) >= 0x1F18) && ((c
) <= 0x1F1D)) ||
627 (((c
) >= 0x1F20) && ((c
) <= 0x1F45)) ||
628 (((c
) >= 0x1F48) && ((c
) <= 0x1F4D)) ||
629 (((c
) >= 0x1F50) && ((c
) <= 0x1F57)) ||
633 (((c
) >= 0x1F5F) && ((c
) <= 0x1F7D)) ||
634 (((c
) >= 0x1F80) && ((c
) <= 0x1FB4)) ||
635 (((c
) >= 0x1FB6) && ((c
) <= 0x1FBC)) ||
637 (((c
) >= 0x1FC2) && ((c
) <= 0x1FC4)) ||
638 (((c
) >= 0x1FC6) && ((c
) <= 0x1FCC)) ||
639 (((c
) >= 0x1FD0) && ((c
) <= 0x1FD3)) ||
640 (((c
) >= 0x1FD6) && ((c
) <= 0x1FDB)) ||
641 (((c
) >= 0x1FE0) && ((c
) <= 0x1FEC)) ||
642 (((c
) >= 0x1FF2) && ((c
) <= 0x1FF4)) ||
643 (((c
) >= 0x1FF6) && ((c
) <= 0x1FFC)) ||
645 (((c
) >= 0x212A) && ((c
) <= 0x212B)) ||
647 (((c
) >= 0x2180) && ((c
) <= 0x2182)) ||
648 (((c
) >= 0x3041) && ((c
) <= 0x3094)) ||
649 (((c
) >= 0x30A1) && ((c
) <= 0x30FA)) ||
650 (((c
) >= 0x3105) && ((c
) <= 0x312C)) ||
651 (((c
) >= 0xAC00) && ((c
) <= 0xD7A3))) /* accelerators */ ))))));
656 * @c: an unicode character (int)
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
661 * Returns 0 if not, non-zero otherwise
666 (((c
) >= 0x0030) && ((c
) <= 0x0039)) ||
667 (((c
) >= 0x660) && ( /* accelerator */
668 (((c
) >= 0x0660) && ((c
) <= 0x0669)) ||
669 (((c
) >= 0x06F0) && ((c
) <= 0x06F9)) ||
670 (((c
) >= 0x0966) && ((c
) <= 0x096F)) ||
671 (((c
) >= 0x09E6) && ((c
) <= 0x09EF)) ||
672 (((c
) >= 0x0A66) && ((c
) <= 0x0A6F)) ||
673 (((c
) >= 0x0AE6) && ((c
) <= 0x0AEF)) ||
674 (((c
) >= 0x0B66) && ((c
) <= 0x0B6F)) ||
675 (((c
) >= 0x0BE7) && ((c
) <= 0x0BEF)) ||
676 (((c
) >= 0x0C66) && ((c
) <= 0x0C6F)) ||
677 (((c
) >= 0x0CE6) && ((c
) <= 0x0CEF)) ||
678 (((c
) >= 0x0D66) && ((c
) <= 0x0D6F)) ||
679 (((c
) >= 0x0E50) && ((c
) <= 0x0E59)) ||
680 (((c
) >= 0x0ED0) && ((c
) <= 0x0ED9)) ||
681 (((c
) >= 0x0F20) && ((c
) <= 0x0F29))) /* accelerator */ ));
686 * @c: an unicode character (int)
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
691 * Returns 0 if not, non-zero otherwise
694 xmlIsCombining(int c
) {
696 (((c
) >= 0x300) && ( /* accelerator */
697 (((c
) >= 0x0300) && ((c
) <= 0x0345)) ||
698 (((c
) >= 0x0360) && ((c
) <= 0x0361)) ||
699 (((c
) >= 0x0483) && ((c
) <= 0x0486)) ||
700 (((c
) >= 0x0591) && ((c
) <= 0x05A1)) ||
701 (((c
) >= 0x05A3) && ((c
) <= 0x05B9)) ||
702 (((c
) >= 0x05BB) && ((c
) <= 0x05BD)) ||
704 (((c
) >= 0x05C1) && ((c
) <= 0x05C2)) ||
706 (((c
) >= 0x064B) && ((c
) <= 0x0652)) ||
708 (((c
) >= 0x06D6) && ((c
) <= 0x06DC)) ||
709 (((c
) >= 0x06DD) && ((c
) <= 0x06DF)) ||
710 (((c
) >= 0x06E0) && ((c
) <= 0x06E4)) ||
711 (((c
) >= 0x06E7) && ((c
) <= 0x06E8)) ||
712 (((c
) >= 0x06EA) && ((c
) <= 0x06ED)) ||
713 (((c
) >= 0x0901) && ( /* accelerator */
714 (((c
) >= 0x0901) && ((c
) <= 0x0903)) ||
716 (((c
) >= 0x093E) && ((c
) <= 0x094C)) ||
718 (((c
) >= 0x0951) && ((c
) <= 0x0954)) ||
719 (((c
) >= 0x0962) && ((c
) <= 0x0963)) ||
720 (((c
) >= 0x0981) && ((c
) <= 0x0983)) ||
724 (((c
) >= 0x09C0) && ((c
) <= 0x09C4)) ||
725 (((c
) >= 0x09C7) && ((c
) <= 0x09C8)) ||
726 (((c
) >= 0x09CB) && ((c
) <= 0x09CD)) ||
728 (((c
) >= 0x09E2) && ((c
) <= 0x09E3)) ||
729 (((c
) >= 0x0A02) && ( /* accelerator */
734 (((c
) >= 0x0A40) && ((c
) <= 0x0A42)) ||
735 (((c
) >= 0x0A47) && ((c
) <= 0x0A48)) ||
736 (((c
) >= 0x0A4B) && ((c
) <= 0x0A4D)) ||
737 (((c
) >= 0x0A70) && ((c
) <= 0x0A71)) ||
738 (((c
) >= 0x0A81) && ((c
) <= 0x0A83)) ||
740 (((c
) >= 0x0ABE) && ((c
) <= 0x0AC5)) ||
741 (((c
) >= 0x0AC7) && ((c
) <= 0x0AC9)) ||
742 (((c
) >= 0x0ACB) && ((c
) <= 0x0ACD)) ||
743 (((c
) >= 0x0B01) && ((c
) <= 0x0B03)) ||
745 (((c
) >= 0x0B3E) && ((c
) <= 0x0B43)) ||
746 (((c
) >= 0x0B47) && ((c
) <= 0x0B48)) ||
747 (((c
) >= 0x0B4B) && ((c
) <= 0x0B4D)) ||
748 (((c
) >= 0x0B56) && ((c
) <= 0x0B57)) ||
749 (((c
) >= 0x0B82) && ((c
) <= 0x0B83)) ||
750 (((c
) >= 0x0BBE) && ((c
) <= 0x0BC2)) ||
751 (((c
) >= 0x0BC6) && ((c
) <= 0x0BC8)) ||
752 (((c
) >= 0x0BCA) && ((c
) <= 0x0BCD)) ||
754 (((c
) >= 0x0C01) && ((c
) <= 0x0C03)) ||
755 (((c
) >= 0x0C3E) && ((c
) <= 0x0C44)) ||
756 (((c
) >= 0x0C46) && ((c
) <= 0x0C48)) ||
757 (((c
) >= 0x0C4A) && ((c
) <= 0x0C4D)) ||
758 (((c
) >= 0x0C55) && ((c
) <= 0x0C56)) ||
759 (((c
) >= 0x0C82) && ((c
) <= 0x0C83)) ||
760 (((c
) >= 0x0CBE) && ((c
) <= 0x0CC4)) ||
761 (((c
) >= 0x0CC6) && ((c
) <= 0x0CC8)) ||
762 (((c
) >= 0x0CCA) && ((c
) <= 0x0CCD)) ||
763 (((c
) >= 0x0CD5) && ((c
) <= 0x0CD6)) ||
764 (((c
) >= 0x0D02) && ((c
) <= 0x0D03)) ||
765 (((c
) >= 0x0D3E) && ((c
) <= 0x0D43)) ||
766 (((c
) >= 0x0D46) && ((c
) <= 0x0D48)) ||
767 (((c
) >= 0x0D4A) && ((c
) <= 0x0D4D)) ||
769 (((c
) >= 0x0E31) && ( /* accelerator */
771 (((c
) >= 0x0E34) && ((c
) <= 0x0E3A)) ||
772 (((c
) >= 0x0E47) && ((c
) <= 0x0E4E)) ||
774 (((c
) >= 0x0EB4) && ((c
) <= 0x0EB9)) ||
775 (((c
) >= 0x0EBB) && ((c
) <= 0x0EBC)) ||
776 (((c
) >= 0x0EC8) && ((c
) <= 0x0ECD)) ||
777 (((c
) >= 0x0F18) && ((c
) <= 0x0F19)) ||
783 (((c
) >= 0x0F71) && ((c
) <= 0x0F84)) ||
784 (((c
) >= 0x0F86) && ((c
) <= 0x0F8B)) ||
785 (((c
) >= 0x0F90) && ((c
) <= 0x0F95)) ||
787 (((c
) >= 0x0F99) && ((c
) <= 0x0FAD)) ||
788 (((c
) >= 0x0FB1) && ((c
) <= 0x0FB7)) ||
790 (((c
) >= 0x20D0) && ((c
) <= 0x20DC)) ||
792 (((c
) >= 0x302A) && ((c
) <= 0x302F)) ||
794 ((c
) == 0x309A))))))))));
799 * @c: an unicode character (int)
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
806 * Returns 0 if not, non-zero otherwise
809 xmlIsExtender(int c
) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
815 case 0x30FD: case 0x30FE:
824 * @c: an unicode character (int)
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
829 * Returns 0 if not, non-zero otherwise
832 xmlIsIdeographic(int c
) {
833 return(((c
) < 0x0100) ? 0 :
834 (((c
) >= 0x4e00) && ((c
) <= 0x9fa5)) ||
835 (((c
) >= 0xf900) && ((c
) <= 0xfa2d)) ||
836 (((c
) >= 0x3021) && ((c
) <= 0x3029)) ||
842 * @c: an unicode character (int)
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
847 * Returns 0 if not, non-zero otherwise
851 return(IS_BASECHAR(c
) || IS_IDEOGRAPHIC(c
));
856 * @c: an unicode character (int)
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
861 * Returns 0 if not, non-zero otherwise
864 xmlIsPubidChar(int c
) {
866 ((c
) == 0x20) || ((c
) == 0x0D) || ((c
) == 0x0A) ||
867 (((c
) >= 'a') && ((c
) <= 'z')) ||
868 (((c
) >= 'A') && ((c
) <= 'Z')) ||
869 (((c
) >= '0') && ((c
) <= '9')) ||
870 ((c
) == '-') || ((c
) == '\'') || ((c
) == '(') || ((c
) == ')') ||
871 ((c
) == '+') || ((c
) == ',') || ((c
) == '.') || ((c
) == '/') ||
872 ((c
) == ':') || ((c
) == '=') || ((c
) == '?') || ((c
) == ';') ||
873 ((c
) == '!') || ((c
) == '*') || ((c
) == '#') || ((c
) == '@') ||
874 ((c
) == '$') || ((c
) == '_') || ((c
) == '%'));
877 /************************************************************************
879 * Input handling functions for progressive parsing *
881 ************************************************************************/
883 /* #define DEBUG_INPUT */
884 /* #define DEBUG_STACK */
885 /* #define DEBUG_PUSH */
888 /* we need to keep enough input to show errors in context */
892 #define CHECK_BUFFER(in) check_buffer(in)
894 void check_buffer(xmlParserInputPtr in
) {
895 if (in
->base
!= in
->buf
->buffer
->content
) {
896 xmlGenericError(xmlGenericErrorContext
,
897 "xmlParserInput: base mismatch problem\n");
899 if (in
->cur
< in
->base
) {
900 xmlGenericError(xmlGenericErrorContext
,
901 "xmlParserInput: cur < base problem\n");
903 if (in
->cur
> in
->base
+ in
->buf
->buffer
->use
) {
904 xmlGenericError(xmlGenericErrorContext
,
905 "xmlParserInput: cur > base + use problem\n");
907 xmlGenericError(xmlGenericErrorContext
,"buffer %x : content %x, cur %d, use %d, size %d\n",
908 (int) in
, (int) in
->buf
->buffer
->content
, in
->cur
- in
->base
,
909 in
->buf
->buffer
->use
, in
->buf
->buffer
->size
);
913 #define CHECK_BUFFER(in)
918 * xmlParserInputRead:
919 * @in: an XML parser input
920 * @len: an indicative size for the lookahead
922 * This function refresh the input for the parser. It doesn't try to
923 * preserve pointers to the input buffer, and discard already read data
925 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 xmlParserInputRead(xmlParserInputPtr in
, int len
) {
935 xmlGenericError(xmlGenericErrorContext
, "Read\n");
937 if (in
->buf
== NULL
) return(-1);
938 if (in
->base
== NULL
) return(-1);
939 if (in
->cur
== NULL
) return(-1);
940 if (in
->buf
->buffer
== NULL
) return(-1);
941 if (in
->buf
->readcallback
== NULL
) return(-1);
945 used
= in
->cur
- in
->buf
->buffer
->content
;
946 ret
= xmlBufferShrink(in
->buf
->buffer
, used
);
951 ret
= xmlParserInputBufferRead(in
->buf
, len
);
952 if (in
->base
!= in
->buf
->buffer
->content
) {
954 * the buffer has been reallocated
956 indx
= in
->cur
- in
->base
;
957 in
->base
= in
->buf
->buffer
->content
;
958 in
->cur
= &in
->buf
->buffer
->content
[indx
];
960 in
->end
= &in
->buf
->buffer
->content
[in
->buf
->buffer
->use
];
968 * xmlParserInputGrow:
969 * @in: an XML parser input
970 * @len: an indicative size for the lookahead
972 * This function increase the input for the parser. It tries to
973 * preserve pointers to the input buffer, and keep already read data
975 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
979 xmlParserInputGrow(xmlParserInputPtr in
, int len
) {
984 xmlGenericError(xmlGenericErrorContext
, "Grow\n");
986 if (in
->buf
== NULL
) return(-1);
987 if (in
->base
== NULL
) return(-1);
988 if (in
->cur
== NULL
) return(-1);
989 if (in
->buf
->buffer
== NULL
) return(-1);
993 indx
= in
->cur
- in
->base
;
994 if (in
->buf
->buffer
->use
> (unsigned int) indx
+ INPUT_CHUNK
) {
1000 if (in
->buf
->readcallback
!= NULL
)
1001 ret
= xmlParserInputBufferGrow(in
->buf
, len
);
1006 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
1007 * block, but we use it really as an integer to do some
1008 * pointer arithmetic. Insure will raise it as a bug but in
1009 * that specific case, that's not !
1011 if (in
->base
!= in
->buf
->buffer
->content
) {
1013 * the buffer has been reallocated
1015 indx
= in
->cur
- in
->base
;
1016 in
->base
= in
->buf
->buffer
->content
;
1017 in
->cur
= &in
->buf
->buffer
->content
[indx
];
1019 in
->end
= &in
->buf
->buffer
->content
[in
->buf
->buffer
->use
];
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1030 * This function removes used input for the parser.
1033 xmlParserInputShrink(xmlParserInputPtr in
) {
1039 xmlGenericError(xmlGenericErrorContext
, "Shrink\n");
1041 if (in
->buf
== NULL
) return;
1042 if (in
->base
== NULL
) return;
1043 if (in
->cur
== NULL
) return;
1044 if (in
->buf
->buffer
== NULL
) return;
1048 used
= in
->cur
- in
->buf
->buffer
->content
;
1050 * Do not shrink on large buffers whose only a tiny fraction
1053 if ((int) in
->buf
->buffer
->use
> used
+ 2 * INPUT_CHUNK
)
1055 if (used
> INPUT_CHUNK
) {
1056 ret
= xmlBufferShrink(in
->buf
->buffer
, used
- LINE_LEN
);
1059 in
->consumed
+= ret
;
1061 in
->end
= &in
->buf
->buffer
->content
[in
->buf
->buffer
->use
];
1066 if (in
->buf
->buffer
->use
> INPUT_CHUNK
) {
1069 xmlParserInputBufferRead(in
->buf
, 2 * INPUT_CHUNK
);
1070 if (in
->base
!= in
->buf
->buffer
->content
) {
1072 * the buffer has been reallocated
1074 indx
= in
->cur
- in
->base
;
1075 in
->base
= in
->buf
->buffer
->content
;
1076 in
->cur
= &in
->buf
->buffer
->content
[indx
];
1078 in
->end
= &in
->buf
->buffer
->content
[in
->buf
->buffer
->use
];
1083 /************************************************************************
1085 * UTF8 character input and related functions *
1087 ************************************************************************/
1091 * @ctxt: the XML parser context
1093 * Skip to the next char input char.
1097 xmlNextChar(xmlParserCtxtPtr ctxt
) {
1098 if (ctxt
->instate
== XML_PARSER_EOF
)
1102 * 2.11 End-of-Line Handling
1103 * the literal two-character sequence "#xD#xA" or a standalone
1104 * literal #xD, an XML processor must pass to the application
1105 * the single character #xA.
1107 if (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
) {
1108 if ((*ctxt
->input
->cur
== 0) &&
1109 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0) &&
1110 (ctxt
->instate
!= XML_PARSER_COMMENT
)) {
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1115 * the auto closing should be blocked in other cases
1119 if (*(ctxt
->input
->cur
) == '\n') {
1120 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
1121 } else ctxt
->input
->col
++;
1122 if (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
) {
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1132 * Check for the 0x110000 limit too
1134 const unsigned char *cur
= ctxt
->input
->cur
;
1140 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1141 if ((cur
[1] & 0xc0) != 0x80)
1142 goto encoding_error
;
1143 if ((c
& 0xe0) == 0xe0) {
1147 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1148 if ((cur
[2] & 0xc0) != 0x80)
1149 goto encoding_error
;
1150 if ((c
& 0xf0) == 0xf0) {
1152 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1153 if (((c
& 0xf8) != 0xf0) ||
1154 ((cur
[3] & 0xc0) != 0x80))
1155 goto encoding_error
;
1157 ctxt
->input
->cur
+= 4;
1158 val
= (cur
[0] & 0x7) << 18;
1159 val
|= (cur
[1] & 0x3f) << 12;
1160 val
|= (cur
[2] & 0x3f) << 6;
1161 val
|= cur
[3] & 0x3f;
1164 ctxt
->input
->cur
+= 3;
1165 val
= (cur
[0] & 0xf) << 12;
1166 val
|= (cur
[1] & 0x3f) << 6;
1167 val
|= cur
[2] & 0x3f;
1169 if (((val
> 0xd7ff) && (val
< 0xe000)) ||
1170 ((val
> 0xfffd) && (val
< 0x10000)) ||
1171 (val
>= 0x110000)) {
1172 if ((ctxt
->sax
!= NULL
) &&
1173 (ctxt
->sax
->error
!= NULL
))
1174 ctxt
->sax
->error(ctxt
->userData
,
1175 "Char 0x%X out of allowed range\n", val
);
1176 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1177 ctxt
->wellFormed
= 0;
1178 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
1182 ctxt
->input
->cur
+= 2;
1188 * Assume it's a fixed length encoding (1) with
1189 * a compatible encoding for the ASCII set, since
1190 * XML constructs only use < 128 chars
1195 if (*ctxt
->input
->cur
== 0)
1196 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1201 if (*ctxt
->input
->cur
== 0)
1202 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1204 if ((*ctxt
->input
->cur
== '%') && (!ctxt
->html
))
1205 xmlParserHandlePEReference(ctxt
);
1206 if ((*ctxt
->input
->cur
== 0) &&
1207 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1212 * If we detect an UTF8 error that probably mean that the
1213 * input encoding didn't get properly advertised in the
1214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1218 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
)) {
1219 ctxt
->sax
->error(ctxt
->userData
,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt
->sax
->error(ctxt
->userData
, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1222 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
1223 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
1225 ctxt
->wellFormed
= 0;
1226 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1228 ctxt
->charset
= XML_CHAR_ENCODING_8859_1
;
1235 * @ctxt: the XML parser context
1236 * @len: pointer to the length of the char read
1238 * The current char value, if using UTF-8 this may actually span multiple
1239 * bytes in the input buffer. Implement the end of line normalization:
1240 * 2.11 End-of-Line Handling
1241 * Wherever an external parsed entity or the literal entity value
1242 * of an internal parsed entity contains either the literal two-character
1243 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1244 * must pass to the application the single character #xA.
1245 * This behavior can conveniently be produced by normalizing all
1246 * line breaks to #xA on input, before parsing.)
1248 * Returns the current char value and its length
1252 xmlCurrentChar(xmlParserCtxtPtr ctxt
, int *len
) {
1253 if (ctxt
->instate
== XML_PARSER_EOF
)
1256 if ((*ctxt
->input
->cur
>= 0x20) && (*ctxt
->input
->cur
<= 0x7F)) {
1258 return((int) *ctxt
->input
->cur
);
1260 if (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
) {
1262 * We are supposed to handle UTF8, check it's valid
1263 * From rfc2044: encoding of the Unicode values on UTF-8:
1265 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1266 * 0000 0000-0000 007F 0xxxxxxx
1267 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1268 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1270 * Check for the 0x110000 limit too
1272 const unsigned char *cur
= ctxt
->input
->cur
;
1279 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1280 if ((cur
[1] & 0xc0) != 0x80)
1281 goto encoding_error
;
1282 if ((c
& 0xe0) == 0xe0) {
1285 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1286 if ((cur
[2] & 0xc0) != 0x80)
1287 goto encoding_error
;
1288 if ((c
& 0xf0) == 0xf0) {
1290 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1291 if (((c
& 0xf8) != 0xf0) ||
1292 ((cur
[3] & 0xc0) != 0x80))
1293 goto encoding_error
;
1296 val
= (cur
[0] & 0x7) << 18;
1297 val
|= (cur
[1] & 0x3f) << 12;
1298 val
|= (cur
[2] & 0x3f) << 6;
1299 val
|= cur
[3] & 0x3f;
1303 val
= (cur
[0] & 0xf) << 12;
1304 val
|= (cur
[1] & 0x3f) << 6;
1305 val
|= cur
[2] & 0x3f;
1310 val
= (cur
[0] & 0x1f) << 6;
1311 val
|= cur
[1] & 0x3f;
1313 if (!IS_CHAR(val
)) {
1314 if ((ctxt
->sax
!= NULL
) &&
1315 (ctxt
->sax
->error
!= NULL
))
1316 ctxt
->sax
->error(ctxt
->userData
,
1317 "Char 0x%X out of allowed range\n", val
);
1318 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1319 ctxt
->wellFormed
= 0;
1320 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
1326 if (*ctxt
->input
->cur
== 0xD) {
1327 if (ctxt
->input
->cur
[1] == 0xA) {
1333 return((int) *ctxt
->input
->cur
);
1337 * Assume it's a fixed length encoding (1) with
1338 * a compatible encoding for the ASCII set, since
1339 * XML constructs only use < 128 chars
1342 if (*ctxt
->input
->cur
== 0xD) {
1343 if (ctxt
->input
->cur
[1] == 0xA) {
1349 return((int) *ctxt
->input
->cur
);
1352 * An encoding problem may arise from a truncated input buffer
1353 * splitting a character in the middle. In that case do not raise
1354 * an error but return 0 to endicate an end of stream problem
1356 if (ctxt
->input
->end
- ctxt
->input
->cur
< 4) {
1362 * If we detect an UTF8 error that probably mean that the
1363 * input encoding didn't get properly advertised in the
1364 * declaration header. Report the error and switch the encoding
1365 * to ISO-Latin-1 (if you don't like this policy, just declare the
1368 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
)) {
1369 ctxt
->sax
->error(ctxt
->userData
,
1370 "Input is not proper UTF-8, indicate encoding !\n");
1371 ctxt
->sax
->error(ctxt
->userData
, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1372 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
1373 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
1375 ctxt
->wellFormed
= 0;
1376 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1378 ctxt
->charset
= XML_CHAR_ENCODING_8859_1
;
1380 return((int) *ctxt
->input
->cur
);
1384 * xmlStringCurrentChar:
1385 * @ctxt: the XML parser context
1386 * @cur: pointer to the beginning of the char
1387 * @len: pointer to the length of the char read
1389 * The current char value, if using UTF-8 this may actually span multiple
1390 * bytes in the input buffer.
1392 * Returns the current char value and its length
1396 xmlStringCurrentChar(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
, int *len
)
1398 if ((ctxt
== NULL
) || (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
)) {
1400 * We are supposed to handle UTF8, check it's valid
1401 * From rfc2044: encoding of the Unicode values on UTF-8:
1403 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1404 * 0000 0000-0000 007F 0xxxxxxx
1405 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1406 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1408 * Check for the 0x110000 limit too
1415 if ((cur
[1] & 0xc0) != 0x80)
1416 goto encoding_error
;
1417 if ((c
& 0xe0) == 0xe0) {
1419 if ((cur
[2] & 0xc0) != 0x80)
1420 goto encoding_error
;
1421 if ((c
& 0xf0) == 0xf0) {
1422 if (((c
& 0xf8) != 0xf0) || ((cur
[3] & 0xc0) != 0x80))
1423 goto encoding_error
;
1426 val
= (cur
[0] & 0x7) << 18;
1427 val
|= (cur
[1] & 0x3f) << 12;
1428 val
|= (cur
[2] & 0x3f) << 6;
1429 val
|= cur
[3] & 0x3f;
1433 val
= (cur
[0] & 0xf) << 12;
1434 val
|= (cur
[1] & 0x3f) << 6;
1435 val
|= cur
[2] & 0x3f;
1440 val
= (cur
[0] & 0x1f) << 6;
1441 val
|= cur
[1] & 0x3f;
1443 if (!IS_CHAR(val
)) {
1444 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
1445 (ctxt
->sax
->error
!= NULL
))
1446 ctxt
->sax
->error(ctxt
->userData
,
1447 "Char 0x%X out of allowed range\n",
1450 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1451 ctxt
->wellFormed
= 0;
1452 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
1459 return ((int) *cur
);
1463 * Assume it's a fixed length encoding (1) with
1464 * a compatible encoding for the ASCII set, since
1465 * XML constructs only use < 128 chars
1468 return ((int) *cur
);
1472 * If we detect an UTF8 error that probably mean that the
1473 * input encoding didn't get properly advertised in the
1474 * declaration header. Report the error and switch the encoding
1475 * to ISO-Latin-1 (if you don't like this policy, just declare the
1479 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
)) {
1480 ctxt
->sax
->error(ctxt
->userData
,
1481 "Input is not proper UTF-8, indicate encoding !\n");
1482 ctxt
->sax
->error(ctxt
->userData
,
1483 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1484 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
1485 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
1487 ctxt
->errNo
= XML_ERR_INVALID_ENCODING
;
1488 ctxt
->wellFormed
= 0;
1492 return ((int) *cur
);
1496 * xmlCopyCharMultiByte:
1497 * @out: pointer to an array of xmlChar
1498 * @val: the char value
1500 * append the char value in the array
1502 * Returns the number of xmlChar written
1505 xmlCopyCharMultiByte(xmlChar
*out
, int val
) {
1507 * We are supposed to handle UTF8, check it's valid
1508 * From rfc2044: encoding of the Unicode values on UTF-8:
1510 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1511 * 0000 0000-0000 007F 0xxxxxxx
1512 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1513 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1516 xmlChar
*savedout
= out
;
1518 if (val
< 0x800) { *out
++= (val
>> 6) | 0xC0; bits
= 0; }
1519 else if (val
< 0x10000) { *out
++= (val
>> 12) | 0xE0; bits
= 6;}
1520 else if (val
< 0x110000) { *out
++= (val
>> 18) | 0xF0; bits
= 12; }
1522 xmlGenericError(xmlGenericErrorContext
,
1523 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1527 for ( ; bits
>= 0; bits
-= 6)
1528 *out
++= ((val
>> bits
) & 0x3F) | 0x80 ;
1529 return (out
- savedout
);
1531 *out
= (xmlChar
) val
;
1537 * @len: Ignored, compatibility
1538 * @out: pointer to an array of xmlChar
1539 * @val: the char value
1541 * append the char value in the array
1543 * Returns the number of xmlChar written
1547 xmlCopyChar(int len ATTRIBUTE_UNUSED
, xmlChar
*out
, int val
) {
1548 /* the len parameter is ignored */
1550 return(xmlCopyCharMultiByte (out
, val
));
1552 *out
= (xmlChar
) val
;
1556 /************************************************************************
1558 * Commodity functions to switch encodings *
1560 ************************************************************************/
1563 * xmlSwitchEncoding:
1564 * @ctxt: the parser context
1565 * @enc: the encoding value (number)
1567 * change the input functions when discovering the character encoding
1568 * of a given entity.
1570 * Returns 0 in case of success, -1 otherwise
1573 xmlSwitchEncoding(xmlParserCtxtPtr ctxt
, xmlCharEncoding enc
)
1575 xmlCharEncodingHandlerPtr handler
;
1578 case XML_CHAR_ENCODING_ERROR
:
1579 ctxt
->errNo
= XML_ERR_UNKNOWN_ENCODING
;
1580 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1581 ctxt
->sax
->error(ctxt
->userData
, "encoding unknown\n");
1582 ctxt
->wellFormed
= 0;
1583 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
1585 case XML_CHAR_ENCODING_NONE
:
1586 /* let's assume it's UTF-8 without the XML decl */
1587 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1589 case XML_CHAR_ENCODING_UTF8
:
1590 /* default encoding, no conversion should be needed */
1591 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1594 * Errata on XML-1.0 June 20 2001
1595 * Specific handling of the Byte Order Mark for
1598 if ((ctxt
->input
!= NULL
) &&
1599 (ctxt
->input
->cur
[0] == 0xEF) &&
1600 (ctxt
->input
->cur
[1] == 0xBB) &&
1601 (ctxt
->input
->cur
[2] == 0xBF)) {
1602 ctxt
->input
->cur
+= 3;
1608 handler
= xmlGetCharEncodingHandler(enc
);
1609 if (handler
== NULL
) {
1614 case XML_CHAR_ENCODING_ERROR
:
1615 ctxt
->errNo
= XML_ERR_UNKNOWN_ENCODING
;
1616 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1617 ctxt
->sax
->error(ctxt
->userData
, "encoding unknown\n");
1618 ctxt
->wellFormed
= 0;
1619 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
1620 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1622 case XML_CHAR_ENCODING_NONE
:
1623 /* let's assume it's UTF-8 without the XML decl */
1624 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1626 case XML_CHAR_ENCODING_UTF8
:
1627 case XML_CHAR_ENCODING_ASCII
:
1628 /* default encoding, no conversion should be needed */
1629 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1631 case XML_CHAR_ENCODING_UTF16LE
:
1633 case XML_CHAR_ENCODING_UTF16BE
:
1635 case XML_CHAR_ENCODING_UCS4LE
:
1636 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1637 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1638 ctxt
->sax
->error(ctxt
->userData
,
1639 "char encoding USC4 little endian not supported\n");
1641 case XML_CHAR_ENCODING_UCS4BE
:
1642 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1643 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1644 ctxt
->sax
->error(ctxt
->userData
,
1645 "char encoding USC4 big endian not supported\n");
1647 case XML_CHAR_ENCODING_EBCDIC
:
1648 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1649 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1650 ctxt
->sax
->error(ctxt
->userData
,
1651 "char encoding EBCDIC not supported\n");
1653 case XML_CHAR_ENCODING_UCS4_2143
:
1654 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1655 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1656 ctxt
->sax
->error(ctxt
->userData
,
1657 "char encoding UCS4 2143 not supported\n");
1659 case XML_CHAR_ENCODING_UCS4_3412
:
1660 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1661 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1662 ctxt
->sax
->error(ctxt
->userData
,
1663 "char encoding UCS4 3412 not supported\n");
1665 case XML_CHAR_ENCODING_UCS2
:
1666 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1667 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1668 ctxt
->sax
->error(ctxt
->userData
,
1669 "char encoding UCS2 not supported\n");
1671 case XML_CHAR_ENCODING_8859_1
:
1672 case XML_CHAR_ENCODING_8859_2
:
1673 case XML_CHAR_ENCODING_8859_3
:
1674 case XML_CHAR_ENCODING_8859_4
:
1675 case XML_CHAR_ENCODING_8859_5
:
1676 case XML_CHAR_ENCODING_8859_6
:
1677 case XML_CHAR_ENCODING_8859_7
:
1678 case XML_CHAR_ENCODING_8859_8
:
1679 case XML_CHAR_ENCODING_8859_9
:
1681 * We used to keep the internal content in the
1682 * document encoding however this turns being unmaintainable
1683 * So xmlGetCharEncodingHandler() will return non-null
1684 * values for this now.
1686 if ((ctxt
->inputNr
== 1) &&
1687 (ctxt
->encoding
== NULL
) &&
1688 (ctxt
->input
->encoding
!= NULL
)) {
1689 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
1691 ctxt
->charset
= enc
;
1693 case XML_CHAR_ENCODING_2022_JP
:
1694 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1695 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1696 ctxt
->sax
->error(ctxt
->userData
,
1697 "char encoding ISO-2022-JPnot supported\n");
1699 case XML_CHAR_ENCODING_SHIFT_JIS
:
1700 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1701 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1702 ctxt
->sax
->error(ctxt
->userData
,
1703 "char encoding Shift_JIS not supported\n");
1705 case XML_CHAR_ENCODING_EUC_JP
:
1706 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
1707 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1708 ctxt
->sax
->error(ctxt
->userData
,
1709 "char encoding EUC-JPnot supported\n");
1713 if (handler
== NULL
)
1715 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1716 return(xmlSwitchToEncoding(ctxt
, handler
));
1720 * xmlSwitchToEncoding:
1721 * @ctxt: the parser context
1722 * @handler: the encoding handler
1724 * change the input functions when discovering the character encoding
1725 * of a given entity.
1727 * Returns 0 in case of success, -1 otherwise
1730 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt
, xmlCharEncodingHandlerPtr handler
)
1734 if (handler
!= NULL
) {
1735 if (ctxt
->input
!= NULL
) {
1736 if (ctxt
->input
->buf
!= NULL
) {
1737 if (ctxt
->input
->buf
->encoder
!= NULL
) {
1739 * Check in case the auto encoding detetection triggered
1742 if (ctxt
->input
->buf
->encoder
== handler
)
1746 * "UTF-16" can be used for both LE and BE
1748 if ((!xmlStrncmp(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
1749 BAD_CAST
"UTF-16", 6)) &&
1750 (!xmlStrncmp(BAD_CAST handler
->name
,
1751 BAD_CAST
"UTF-16", 6))) {
1756 * Note: this is a bit dangerous, but that's what it
1757 * takes to use nearly compatible signature for different
1760 xmlCharEncCloseFunc(ctxt
->input
->buf
->encoder
);
1761 ctxt
->input
->buf
->encoder
= handler
;
1764 ctxt
->input
->buf
->encoder
= handler
;
1767 * Is there already some content down the pipe to convert ?
1769 if ((ctxt
->input
->buf
->buffer
!= NULL
) &&
1770 (ctxt
->input
->buf
->buffer
->use
> 0)) {
1774 * Specific handling of the Byte Order Mark for
1777 if ((handler
->name
!= NULL
) &&
1778 (!strcmp(handler
->name
, "UTF-16LE")) &&
1779 (ctxt
->input
->cur
[0] == 0xFF) &&
1780 (ctxt
->input
->cur
[1] == 0xFE)) {
1781 ctxt
->input
->cur
+= 2;
1783 if ((handler
->name
!= NULL
) &&
1784 (!strcmp(handler
->name
, "UTF-16BE")) &&
1785 (ctxt
->input
->cur
[0] == 0xFE) &&
1786 (ctxt
->input
->cur
[1] == 0xFF)) {
1787 ctxt
->input
->cur
+= 2;
1790 * Errata on XML-1.0 June 20 2001
1791 * Specific handling of the Byte Order Mark for
1794 if ((handler
->name
!= NULL
) &&
1795 (!strcmp(handler
->name
, "UTF-8")) &&
1796 (ctxt
->input
->cur
[0] == 0xEF) &&
1797 (ctxt
->input
->cur
[1] == 0xBB) &&
1798 (ctxt
->input
->cur
[2] == 0xBF)) {
1799 ctxt
->input
->cur
+= 3;
1803 * Shrink the current input buffer.
1804 * Move it as the raw buffer and create a new input buffer
1806 processed
= ctxt
->input
->cur
- ctxt
->input
->base
;
1807 xmlBufferShrink(ctxt
->input
->buf
->buffer
, processed
);
1808 ctxt
->input
->buf
->raw
= ctxt
->input
->buf
->buffer
;
1809 ctxt
->input
->buf
->buffer
= xmlBufferCreate();
1813 * convert as much as possible of the buffer
1815 nbchars
= xmlCharEncInFunc(ctxt
->input
->buf
->encoder
,
1816 ctxt
->input
->buf
->buffer
,
1817 ctxt
->input
->buf
->raw
);
1820 * convert just enough to get
1821 * '<?xml version="1.0" encoding="xxx"?>'
1822 * parsed with the autodetected encoding
1823 * into the parser reading buffer.
1825 nbchars
= xmlCharEncFirstLine(ctxt
->input
->buf
->encoder
,
1826 ctxt
->input
->buf
->buffer
,
1827 ctxt
->input
->buf
->raw
);
1830 xmlGenericError(xmlGenericErrorContext
,
1831 "xmlSwitchToEncoding: encoder error\n");
1835 ctxt
->input
->cur
= ctxt
->input
->buf
->buffer
->content
;
1837 &ctxt
->input
->base
[ctxt
->input
->buf
->buffer
->use
];
1842 if ((ctxt
->input
->length
== 0) || (ctxt
->input
->buf
== NULL
)) {
1844 * When parsing a static memory array one must know the
1845 * size to be able to convert the buffer.
1847 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1848 ctxt
->sax
->error(ctxt
->userData
,
1849 "xmlSwitchToEncoding : no input\n");
1855 * Shrink the current input buffer.
1856 * Move it as the raw buffer and create a new input buffer
1858 processed
= ctxt
->input
->cur
- ctxt
->input
->base
;
1860 ctxt
->input
->buf
->raw
= xmlBufferCreate();
1861 xmlBufferAdd(ctxt
->input
->buf
->raw
, ctxt
->input
->cur
,
1862 ctxt
->input
->length
- processed
);
1863 ctxt
->input
->buf
->buffer
= xmlBufferCreate();
1866 * convert as much as possible of the raw input
1867 * to the parser reading buffer.
1869 nbchars
= xmlCharEncInFunc(ctxt
->input
->buf
->encoder
,
1870 ctxt
->input
->buf
->buffer
,
1871 ctxt
->input
->buf
->raw
);
1873 xmlGenericError(xmlGenericErrorContext
,
1874 "xmlSwitchToEncoding: encoder error\n");
1879 * Conversion succeeded, get rid of the old buffer
1881 if ((ctxt
->input
->free
!= NULL
) &&
1882 (ctxt
->input
->base
!= NULL
))
1883 ctxt
->input
->free((xmlChar
*) ctxt
->input
->base
);
1885 ctxt
->input
->cur
= ctxt
->input
->buf
->buffer
->content
;
1887 &ctxt
->input
->base
[ctxt
->input
->buf
->buffer
->use
];
1891 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1892 ctxt
->sax
->error(ctxt
->userData
,
1893 "xmlSwitchToEncoding : no input\n");
1897 * The parsing is now done in UTF8 natively
1899 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1906 /************************************************************************
1908 * Commodity functions to handle entities processing *
1910 ************************************************************************/
1913 * xmlFreeInputStream:
1914 * @input: an xmlParserInputPtr
1916 * Free up an input stream.
1919 xmlFreeInputStream(xmlParserInputPtr input
) {
1920 if (input
== NULL
) return;
1922 if (input
->filename
!= NULL
) xmlFree((char *) input
->filename
);
1923 if (input
->directory
!= NULL
) xmlFree((char *) input
->directory
);
1924 if (input
->encoding
!= NULL
) xmlFree((char *) input
->encoding
);
1925 if (input
->version
!= NULL
) xmlFree((char *) input
->version
);
1926 if ((input
->free
!= NULL
) && (input
->base
!= NULL
))
1927 input
->free((xmlChar
*) input
->base
);
1928 if (input
->buf
!= NULL
)
1929 xmlFreeParserInputBuffer(input
->buf
);
1934 * xmlNewInputStream:
1935 * @ctxt: an XML parser context
1937 * Create a new input stream structure
1938 * Returns the new input stream or NULL
1941 xmlNewInputStream(xmlParserCtxtPtr ctxt
) {
1942 xmlParserInputPtr input
;
1944 input
= (xmlParserInputPtr
) xmlMalloc(sizeof(xmlParserInput
));
1945 if (input
== NULL
) {
1947 ctxt
->errNo
= XML_ERR_NO_MEMORY
;
1948 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
1949 ctxt
->sax
->error(ctxt
->userData
,
1950 "malloc: couldn't allocate a new input stream\n");
1951 ctxt
->errNo
= XML_ERR_NO_MEMORY
;
1955 memset(input
, 0, sizeof(xmlParserInput
));
1958 input
->standalone
= -1;
1963 * xmlNewIOInputStream:
1964 * @ctxt: an XML parser context
1965 * @input: an I/O Input
1966 * @enc: the charset encoding if known
1968 * Create a new input stream structure encapsulating the @input into
1969 * a stream suitable for the parser.
1971 * Returns the new input stream or NULL
1974 xmlNewIOInputStream(xmlParserCtxtPtr ctxt
, xmlParserInputBufferPtr input
,
1975 xmlCharEncoding enc
) {
1976 xmlParserInputPtr inputStream
;
1978 if (xmlParserDebugEntities
)
1979 xmlGenericError(xmlGenericErrorContext
, "new input from I/O\n");
1980 inputStream
= xmlNewInputStream(ctxt
);
1981 if (inputStream
== NULL
) {
1984 inputStream
->filename
= NULL
;
1985 inputStream
->buf
= input
;
1986 inputStream
->base
= inputStream
->buf
->buffer
->content
;
1987 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
1988 inputStream
->end
= &inputStream
->base
[inputStream
->buf
->buffer
->use
];
1989 if (enc
!= XML_CHAR_ENCODING_NONE
) {
1990 xmlSwitchEncoding(ctxt
, enc
);
1993 return(inputStream
);
1997 * xmlNewEntityInputStream:
1998 * @ctxt: an XML parser context
1999 * @entity: an Entity pointer
2001 * Create a new input stream based on an xmlEntityPtr
2003 * Returns the new input stream or NULL
2006 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
2007 xmlParserInputPtr input
;
2009 if (entity
== NULL
) {
2010 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
2011 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2012 ctxt
->sax
->error(ctxt
->userData
,
2013 "internal: xmlNewEntityInputStream entity = NULL\n");
2014 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
2017 if (xmlParserDebugEntities
)
2018 xmlGenericError(xmlGenericErrorContext
,
2019 "new input from entity: %s\n", entity
->name
);
2020 if (entity
->content
== NULL
) {
2021 switch (entity
->etype
) {
2022 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
:
2023 ctxt
->errNo
= XML_ERR_UNPARSED_ENTITY
;
2024 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2025 ctxt
->sax
->error(ctxt
->userData
,
2026 "xmlNewEntityInputStream unparsed entity !\n");
2028 case XML_EXTERNAL_GENERAL_PARSED_ENTITY
:
2029 case XML_EXTERNAL_PARAMETER_ENTITY
:
2030 return(xmlLoadExternalEntity((char *) entity
->URI
,
2031 (char *) entity
->ExternalID
, ctxt
));
2032 case XML_INTERNAL_GENERAL_ENTITY
:
2033 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2034 ctxt
->sax
->error(ctxt
->userData
,
2035 "Internal entity %s without content !\n", entity
->name
);
2037 case XML_INTERNAL_PARAMETER_ENTITY
:
2038 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
2039 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2040 ctxt
->sax
->error(ctxt
->userData
,
2041 "Internal parameter entity %s without content !\n", entity
->name
);
2043 case XML_INTERNAL_PREDEFINED_ENTITY
:
2044 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
2045 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2046 ctxt
->sax
->error(ctxt
->userData
,
2047 "Predefined entity %s without content !\n", entity
->name
);
2052 input
= xmlNewInputStream(ctxt
);
2053 if (input
== NULL
) {
2056 input
->filename
= (char *) entity
->URI
;
2057 input
->base
= entity
->content
;
2058 input
->cur
= entity
->content
;
2059 input
->length
= entity
->length
;
2060 input
->end
= &entity
->content
[input
->length
];
2065 * xmlNewStringInputStream:
2066 * @ctxt: an XML parser context
2067 * @buffer: an memory buffer
2069 * Create a new input stream based on a memory buffer.
2070 * Returns the new input stream
2073 xmlNewStringInputStream(xmlParserCtxtPtr ctxt
, const xmlChar
*buffer
) {
2074 xmlParserInputPtr input
;
2076 if (buffer
== NULL
) {
2077 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
2078 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2079 ctxt
->sax
->error(ctxt
->userData
,
2080 "internal: xmlNewStringInputStream string = NULL\n");
2083 if (xmlParserDebugEntities
)
2084 xmlGenericError(xmlGenericErrorContext
,
2085 "new fixed input: %.30s\n", buffer
);
2086 input
= xmlNewInputStream(ctxt
);
2087 if (input
== NULL
) {
2090 input
->base
= buffer
;
2091 input
->cur
= buffer
;
2092 input
->length
= xmlStrlen(buffer
);
2093 input
->end
= &buffer
[input
->length
];
2098 * xmlNewInputFromFile:
2099 * @ctxt: an XML parser context
2100 * @filename: the filename to use as entity
2102 * Create a new input stream based on a file.
2104 * Returns the new input stream or NULL in case of error
2107 xmlNewInputFromFile(xmlParserCtxtPtr ctxt
, const char *filename
) {
2108 xmlParserInputBufferPtr buf
;
2109 xmlParserInputPtr inputStream
;
2110 char *directory
= NULL
;
2111 xmlChar
*URI
= NULL
;
2113 if (xmlParserDebugEntities
)
2114 xmlGenericError(xmlGenericErrorContext
,
2115 "new input from file: %s\n", filename
);
2116 if (ctxt
== NULL
) return(NULL
);
2117 buf
= xmlParserInputBufferCreateFilename(filename
, XML_CHAR_ENCODING_NONE
);
2121 URI
= xmlStrdup((xmlChar
*) filename
);
2122 directory
= xmlParserGetDirectory((const char *) URI
);
2124 inputStream
= xmlNewInputStream(ctxt
);
2125 if (inputStream
== NULL
) {
2126 if (directory
!= NULL
) xmlFree((char *) directory
);
2127 if (URI
!= NULL
) xmlFree((char *) URI
);
2131 inputStream
->filename
= (const char *) URI
;
2132 inputStream
->directory
= directory
;
2133 inputStream
->buf
= buf
;
2135 inputStream
->base
= inputStream
->buf
->buffer
->content
;
2136 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
2137 inputStream
->end
= &inputStream
->base
[inputStream
->buf
->buffer
->use
];
2138 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
2139 ctxt
->directory
= (char *) xmlStrdup((const xmlChar
*) directory
);
2140 return(inputStream
);
2143 /************************************************************************
2145 * Commodity functions to handle parser contexts *
2147 ************************************************************************/
2150 * xmlInitParserCtxt:
2151 * @ctxt: an XML parser context
2153 * Initialize a parser context
2157 xmlInitParserCtxt(xmlParserCtxtPtr ctxt
)
2160 xmlGenericError(xmlGenericErrorContext
,
2161 "xmlInitParserCtxt: NULL context given\n");
2165 xmlDefaultSAXHandlerInit();
2167 ctxt
->sax
= (xmlSAXHandler
*) xmlMalloc(sizeof(xmlSAXHandler
));
2168 if (ctxt
->sax
== NULL
) {
2169 xmlGenericError(xmlGenericErrorContext
,
2170 "xmlInitParserCtxt: out of memory\n");
2173 memcpy(ctxt
->sax
, &xmlDefaultSAXHandler
, sizeof(xmlSAXHandler
));
2175 /* Allocate the Input stack */
2176 ctxt
->inputTab
= (xmlParserInputPtr
*)
2177 xmlMalloc(5 * sizeof(xmlParserInputPtr
));
2178 if (ctxt
->inputTab
== NULL
) {
2179 xmlGenericError(xmlGenericErrorContext
,
2180 "xmlInitParserCtxt: out of memory\n");
2190 ctxt
->version
= NULL
;
2191 ctxt
->encoding
= NULL
;
2192 ctxt
->standalone
= -1;
2193 ctxt
->hasExternalSubset
= 0;
2194 ctxt
->hasPErefs
= 0;
2197 ctxt
->instate
= XML_PARSER_START
;
2199 ctxt
->directory
= NULL
;
2201 /* Allocate the Node stack */
2202 ctxt
->nodeTab
= (xmlNodePtr
*) xmlMalloc(10 * sizeof(xmlNodePtr
));
2203 if (ctxt
->nodeTab
== NULL
) {
2204 xmlGenericError(xmlGenericErrorContext
,
2205 "xmlInitParserCtxt: out of memory\n");
2218 /* Allocate the Name stack */
2219 ctxt
->nameTab
= (xmlChar
**) xmlMalloc(10 * sizeof(xmlChar
*));
2220 if (ctxt
->nameTab
== NULL
) {
2221 xmlGenericError(xmlGenericErrorContext
,
2222 "xmlInitParserCtxt: out of memory\n");
2238 /* Allocate the space stack */
2239 ctxt
->spaceTab
= (int *) xmlMalloc(10 * sizeof(int));
2240 if (ctxt
->spaceTab
== NULL
) {
2241 xmlGenericError(xmlGenericErrorContext
,
2242 "xmlInitParserCtxt: out of memory\n");
2258 ctxt
->spaceMax
= 10;
2259 ctxt
->spaceTab
[0] = -1;
2260 ctxt
->space
= &ctxt
->spaceTab
[0];
2261 ctxt
->userData
= ctxt
;
2263 ctxt
->wellFormed
= 1;
2265 ctxt
->loadsubset
= xmlLoadExtDtdDefaultValue
;
2266 ctxt
->validate
= xmlDoValidityCheckingDefaultValue
;
2267 ctxt
->pedantic
= xmlPedanticParserDefaultValue
;
2268 ctxt
->linenumbers
= xmlLineNumbersDefaultValue
;
2269 ctxt
->keepBlanks
= xmlKeepBlanksDefaultValue
;
2270 if (ctxt
->keepBlanks
== 0)
2271 ctxt
->sax
->ignorableWhitespace
= ignorableWhitespace
;
2273 ctxt
->vctxt
.userData
= ctxt
;
2274 ctxt
->vctxt
.error
= xmlParserValidityError
;
2275 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
2276 if (ctxt
->validate
) {
2277 if (xmlGetWarningsDefaultValue
== 0)
2278 ctxt
->vctxt
.warning
= NULL
;
2280 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
2281 ctxt
->vctxt
.nodeMax
= 0;
2283 ctxt
->replaceEntities
= xmlSubstituteEntitiesDefaultValue
;
2284 ctxt
->record_info
= 0;
2286 ctxt
->checkIndex
= 0;
2288 ctxt
->errNo
= XML_ERR_OK
;
2290 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
2291 ctxt
->catalogs
= NULL
;
2292 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
2296 * xmlFreeParserCtxt:
2297 * @ctxt: an XML parser context
2299 * Free all the memory used by a parser context. However the parsed
2300 * document in ctxt->myDoc is not freed.
2304 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt
)
2306 xmlParserInputPtr input
;
2309 if (ctxt
== NULL
) return;
2311 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
2312 xmlFreeInputStream(input
);
2314 while ((oldname
= namePop(ctxt
)) != NULL
) { /* Non consuming */
2317 if (ctxt
->spaceTab
!= NULL
) xmlFree(ctxt
->spaceTab
);
2318 if (ctxt
->nameTab
!= NULL
) xmlFree(ctxt
->nameTab
);
2319 if (ctxt
->nodeTab
!= NULL
) xmlFree(ctxt
->nodeTab
);
2320 if (ctxt
->inputTab
!= NULL
) xmlFree(ctxt
->inputTab
);
2321 if (ctxt
->version
!= NULL
) xmlFree((char *) ctxt
->version
);
2322 if (ctxt
->encoding
!= NULL
) xmlFree((char *) ctxt
->encoding
);
2323 if (ctxt
->intSubName
!= NULL
) xmlFree((char *) ctxt
->intSubName
);
2324 if (ctxt
->extSubURI
!= NULL
) xmlFree((char *) ctxt
->extSubURI
);
2325 if (ctxt
->extSubSystem
!= NULL
) xmlFree((char *) ctxt
->extSubSystem
);
2326 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
!= &xmlDefaultSAXHandler
))
2328 if (ctxt
->directory
!= NULL
) xmlFree((char *) ctxt
->directory
);
2329 if (ctxt
->vctxt
.nodeTab
!= NULL
) xmlFree(ctxt
->vctxt
.nodeTab
);
2330 #ifdef LIBXML_CATALOG_ENABLED
2331 if (ctxt
->catalogs
!= NULL
)
2332 xmlCatalogFreeLocal(ctxt
->catalogs
);
2340 * Allocate and initialize a new parser context.
2342 * Returns the xmlParserCtxtPtr or NULL
2348 xmlParserCtxtPtr ctxt
;
2350 ctxt
= (xmlParserCtxtPtr
) xmlMalloc(sizeof(xmlParserCtxt
));
2352 xmlGenericError(xmlGenericErrorContext
,
2353 "xmlNewParserCtxt : cannot allocate context\n");
2354 xmlGenericError(xmlGenericErrorContext
, "malloc failed");
2357 memset(ctxt
, 0, sizeof(xmlParserCtxt
));
2358 xmlInitParserCtxt(ctxt
);
2362 /************************************************************************
2364 * Handling of node informations *
2366 ************************************************************************/
2369 * xmlClearParserCtxt:
2370 * @ctxt: an XML parser context
2372 * Clear (release owned resources) and reinitialize a parser context
2376 xmlClearParserCtxt(xmlParserCtxtPtr ctxt
)
2380 xmlClearNodeInfoSeq(&ctxt
->node_seq
);
2381 xmlInitParserCtxt(ctxt
);
2385 * xmlParserFindNodeInfo:
2386 * @ctxt: an XML parser context
2387 * @node: an XML node within the tree
2389 * Find the parser node info struct for a given node
2391 * Returns an xmlParserNodeInfo block pointer or NULL
2393 const xmlParserNodeInfo
* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx
,
2394 const xmlNodePtr node
)
2398 /* Find position where node should be at */
2399 pos
= xmlParserFindNodeInfoIndex(&ctx
->node_seq
, node
);
2400 if (pos
< ctx
->node_seq
.length
&& ctx
->node_seq
.buffer
[pos
].node
== node
)
2401 return &ctx
->node_seq
.buffer
[pos
];
2408 * xmlInitNodeInfoSeq:
2409 * @seq: a node info sequence pointer
2411 * -- Initialize (set to initial state) node info sequence
2414 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq
)
2422 * xmlClearNodeInfoSeq:
2423 * @seq: a node info sequence pointer
2425 * -- Clear (release memory and reinitialize) node
2429 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq
)
2431 if ( seq
->buffer
!= NULL
)
2432 xmlFree(seq
->buffer
);
2433 xmlInitNodeInfoSeq(seq
);
2438 * xmlParserFindNodeInfoIndex:
2439 * @seq: a node info sequence pointer
2440 * @node: an XML node pointer
2443 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2444 * the given node is or should be at in a sorted sequence
2446 * Returns a long indicating the position of the record
2448 unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq
,
2449 const xmlNodePtr node
)
2451 unsigned long upper
, lower
, middle
;
2454 /* Do a binary search for the key */
2456 upper
= seq
->length
;
2458 while ( lower
<= upper
&& !found
) {
2459 middle
= lower
+ (upper
- lower
) / 2;
2460 if ( node
== seq
->buffer
[middle
- 1].node
)
2462 else if ( node
< seq
->buffer
[middle
- 1].node
)
2468 /* Return position */
2469 if ( middle
== 0 || seq
->buffer
[middle
- 1].node
< node
)
2477 * xmlParserAddNodeInfo:
2478 * @ctxt: an XML parser context
2479 * @info: a node info sequence pointer
2481 * Insert node info record into the sorted sequence
2484 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt
,
2485 const xmlParserNodeInfoPtr info
)
2489 /* Find pos and check to see if node is already in the sequence */
2490 pos
= xmlParserFindNodeInfoIndex(&ctxt
->node_seq
, (const xmlNodePtr
)
2492 if (pos
< ctxt
->node_seq
.length
2493 && ctxt
->node_seq
.buffer
[pos
].node
== info
->node
) {
2494 ctxt
->node_seq
.buffer
[pos
] = *info
;
2497 /* Otherwise, we need to add new node to buffer */
2499 if (ctxt
->node_seq
.length
+ 1 > ctxt
->node_seq
.maximum
) {
2500 xmlParserNodeInfo
*tmp_buffer
;
2501 unsigned int byte_size
;
2503 if (ctxt
->node_seq
.maximum
== 0)
2504 ctxt
->node_seq
.maximum
= 2;
2505 byte_size
= (sizeof(*ctxt
->node_seq
.buffer
) *
2506 (2 * ctxt
->node_seq
.maximum
));
2508 if (ctxt
->node_seq
.buffer
== NULL
)
2509 tmp_buffer
= (xmlParserNodeInfo
*) xmlMalloc(byte_size
);
2512 (xmlParserNodeInfo
*) xmlRealloc(ctxt
->node_seq
.buffer
,
2515 if (tmp_buffer
== NULL
) {
2516 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2517 ctxt
->sax
->error(ctxt
->userData
, "Out of memory\n");
2518 ctxt
->errNo
= XML_ERR_NO_MEMORY
;
2521 ctxt
->node_seq
.buffer
= tmp_buffer
;
2522 ctxt
->node_seq
.maximum
*= 2;
2525 /* If position is not at end, move elements out of the way */
2526 if (pos
!= ctxt
->node_seq
.length
) {
2529 for (i
= ctxt
->node_seq
.length
; i
> pos
; i
--)
2530 ctxt
->node_seq
.buffer
[i
] = ctxt
->node_seq
.buffer
[i
- 1];
2533 /* Copy element and increase length */
2534 ctxt
->node_seq
.buffer
[pos
] = *info
;
2535 ctxt
->node_seq
.length
++;
2539 /************************************************************************
2541 * Defaults settings *
2543 ************************************************************************/
2545 * xmlPedanticParserDefault:
2548 * Set and return the previous value for enabling pedantic warnings.
2550 * Returns the last value for 0 for no substitution, 1 for substitution.
2554 xmlPedanticParserDefault(int val
) {
2555 int old
= xmlPedanticParserDefaultValue
;
2557 xmlPedanticParserDefaultValue
= val
;
2562 * xmlLineNumbersDefault:
2565 * Set and return the previous value for enabling line numbers in elements
2566 * contents. This may break on old application and is turned off by default.
2568 * Returns the last value for 0 for no substitution, 1 for substitution.
2572 xmlLineNumbersDefault(int val
) {
2573 int old
= xmlLineNumbersDefaultValue
;
2575 xmlLineNumbersDefaultValue
= val
;
2580 * xmlSubstituteEntitiesDefault:
2583 * Set and return the previous value for default entity support.
2584 * Initially the parser always keep entity references instead of substituting
2585 * entity values in the output. This function has to be used to change the
2586 * default parser behavior
2587 * SAX::substituteEntities() has to be used for changing that on a file by
2590 * Returns the last value for 0 for no substitution, 1 for substitution.
2594 xmlSubstituteEntitiesDefault(int val
) {
2595 int old
= xmlSubstituteEntitiesDefaultValue
;
2597 xmlSubstituteEntitiesDefaultValue
= val
;
2602 * xmlKeepBlanksDefault:
2605 * Set and return the previous value for default blanks text nodes support.
2606 * The 1.x version of the parser used an heuristic to try to detect
2607 * ignorable white spaces. As a result the SAX callback was generating
2608 * ignorableWhitespace() callbacks instead of characters() one, and when
2609 * using the DOM output text nodes containing those blanks were not generated.
2610 * The 2.x and later version will switch to the XML standard way and
2611 * ignorableWhitespace() are only generated when running the parser in
2612 * validating mode and when the current element doesn't allow CDATA or
2614 * This function is provided as a way to force the standard behavior
2615 * on 1.X libs and to switch back to the old mode for compatibility when
2616 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2617 * by using xmlIsBlankNode() commodity function to detect the "empty"
2619 * This value also affect autogeneration of indentation when saving code
2620 * if blanks sections are kept, indentation is not generated.
2622 * Returns the last value for 0 for no substitution, 1 for substitution.
2626 xmlKeepBlanksDefault(int val
) {
2627 int old
= xmlKeepBlanksDefaultValue
;
2629 xmlKeepBlanksDefaultValue
= val
;
2630 xmlIndentTreeOutput
= !val
;
2634 /************************************************************************
2636 * Deprecated functions kept for compatibility *
2638 ************************************************************************/
2641 * xmlCheckLanguageID:
2642 * @lang: pointer to the string value
2644 * Checks that the value conforms to the LanguageID production:
2646 * NOTE: this is somewhat deprecated, those productions were removed from
2647 * the XML Second edition.
2649 * [33] LanguageID ::= Langcode ('-' Subcode)*
2650 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2651 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2652 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2653 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2654 * [38] Subcode ::= ([a-z] | [A-Z])+
2656 * Returns 1 if correct 0 otherwise
2659 xmlCheckLanguageID(const xmlChar
*lang
) {
2660 const xmlChar
*cur
= lang
;
2664 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
2665 ((cur
[0] == 'I') && (cur
[1] == '-'))) {
2670 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
2671 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
2673 } else if (((cur
[0] == 'x') && (cur
[1] == '-')) ||
2674 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
2679 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
2680 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
2682 } else if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
2683 ((cur
[0] >= 'a') && (cur
[0] <= 'z'))) {
2688 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
2689 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
2695 while (cur
[0] != 0) { /* non input consuming */
2699 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
2700 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
2704 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
2705 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
2712 * xmlDecodeEntities:
2713 * @ctxt: the parser context
2714 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2715 * @len: the len to decode (in bytes !), -1 for no size limit
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2720 * This function is deprecated, we now always process entities content
2721 * through xmlStringDecodeEntities
2723 * TODO: remove it in next major release.
2725 * [67] Reference ::= EntityRef | CharRef
2727 * [69] PEReference ::= '%' Name ';'
2729 * Returns A newly allocated string with the substitution done. The caller
2730 * must deallocate it !
2733 xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
, int len ATTRIBUTE_UNUSED
, int what ATTRIBUTE_UNUSED
,
2734 xmlChar end ATTRIBUTE_UNUSED
, xmlChar end2 ATTRIBUTE_UNUSED
, xmlChar end3 ATTRIBUTE_UNUSED
) {
2736 xmlChar
*buffer
= NULL
;
2737 unsigned int buffer_size
= 0;
2738 unsigned int nbchars
= 0;
2740 xmlChar
*current
= NULL
;
2742 unsigned int max
= (unsigned int) len
;
2746 static int deprecated
= 0;
2748 xmlGenericError(xmlGenericErrorContext
,
2749 "xmlDecodeEntities() deprecated function reached\n");
2754 if (ctxt
->depth
> 40) {
2755 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
2756 ctxt
->sax
->error(ctxt
->userData
,
2757 "Detected entity reference loop\n");
2758 ctxt
->wellFormed
= 0;
2759 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
2760 ctxt
->errNo
= XML_ERR_ENTITY_LOOP
;
2765 * allocate a translation buffer.
2767 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2768 buffer
= (xmlChar
*) xmlMalloc(buffer_size
* sizeof(xmlChar
));
2769 if (buffer
== NULL
) {
2770 xmlGenericError(xmlGenericErrorContext
,
2771 "xmlDecodeEntities: malloc failed");
2776 * OK loop until we reach one of the ending char or a size limit.
2780 while ((nbchars
< max
) && (c
!= end
) && /* NOTUSED */
2781 (c
!= end2
) && (c
!= end3
)) {
2784 if ((c
== '&') && (NXT(1) == '#')) {
2785 int val
= xmlParseCharRef(ctxt
);
2786 COPY_BUF(0,buffer
,nbchars
,val
);
2788 } else if (c
== '&') &&
2789 (what
& XML_SUBSTITUTE_REF
)) {
2790 if (xmlParserDebugEntities
)
2791 xmlGenericError(xmlGenericErrorContext
,
2792 "decoding Entity Reference\n");
2793 ent
= xmlParseEntityRef(ctxt
);
2794 if ((ent
!= NULL
) &&
2795 (ctxt
->replaceEntities
!= 0)) {
2796 current
= ent
->content
;
2797 while (*current
!= 0) { /* non input consuming loop */
2798 buffer
[nbchars
++] = *current
++;
2799 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2803 } else if (ent
!= NULL
) {
2804 const xmlChar
*cur
= ent
->name
;
2806 buffer
[nbchars
++] = '&';
2807 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2810 while (*cur
!= 0) { /* non input consuming loop */
2811 buffer
[nbchars
++] = *cur
++;
2813 buffer
[nbchars
++] = ';';
2815 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2817 * a PEReference induce to switch the entity flow,
2818 * we break here to flush the current set of chars
2819 * parsed if any. We will be called back later.
2821 if (xmlParserDebugEntities
)
2822 xmlGenericError(xmlGenericErrorContext
,
2823 "decoding PE Reference\n");
2824 if (nbchars
!= 0) break;
2826 xmlParsePEReference(ctxt
);
2829 * Pop-up of finished entities.
2831 while ((RAW
== 0) && (ctxt
->inputNr
> 1)) /* non input consuming */
2836 COPY_BUF(l
,buffer
,nbchars
,c
);
2838 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2844 buffer
[nbchars
++] = 0;
2851 * xmlNamespaceParseNCName:
2852 * @ctxt: an XML parser context
2854 * parse an XML namespace name.
2856 * TODO: this seems not in use anymore, the namespace handling is done on
2857 * top of the SAX interfaces, i.e. not on raw input.
2859 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2861 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2862 * CombiningChar | Extender
2864 * Returns the namespace name or NULL
2868 xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
2870 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2872 int cur
= CUR_CHAR(l
);
2875 static int deprecated
= 0;
2877 xmlGenericError(xmlGenericErrorContext
,
2878 "xmlNamespaceParseNCName() deprecated function reached\n");
2883 /* load first the value of the char !!! */
2885 if (!IS_LETTER(cur
) && (cur
!= '_')) return(NULL
);
2887 xmlGenericError(xmlGenericErrorContext
,
2888 "xmlNamespaceParseNCName: reached loop 3\n");
2889 while ((IS_LETTER(cur
)) || (IS_DIGIT(cur
)) || /* NOT REACHED */
2890 (cur
== '.') || (cur
== '-') ||
2892 (IS_COMBINING(cur
)) ||
2893 (IS_EXTENDER(cur
))) {
2894 COPY_BUF(l
,buf
,len
,cur
);
2897 if (len
>= XML_MAX_NAMELEN
) {
2898 xmlGenericError(xmlGenericErrorContext
,
2899 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2900 while ((IS_LETTER(cur
)) || (IS_DIGIT(cur
)) ||/* NOT REACHED */
2901 (cur
== '.') || (cur
== '-') ||
2903 (IS_COMBINING(cur
)) ||
2904 (IS_EXTENDER(cur
))) {
2911 return(xmlStrndup(buf
, len
));
2917 * xmlNamespaceParseQName:
2918 * @ctxt: an XML parser context
2919 * @prefix: a xmlChar **
2921 * TODO: this seems not in use anymore, the namespace handling is done on
2922 * top of the SAX interfaces, i.e. not on raw input.
2924 * parse an XML qualified name
2926 * [NS 5] QName ::= (Prefix ':')? LocalPart
2928 * [NS 6] Prefix ::= NCName
2930 * [NS 7] LocalPart ::= NCName
2932 * Returns the local part, and prefix is updated
2933 * to get the Prefix if any.
2937 xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
, xmlChar
**prefix ATTRIBUTE_UNUSED
) {
2939 static int deprecated
= 0;
2941 xmlGenericError(xmlGenericErrorContext
,
2942 "xmlNamespaceParseQName() deprecated function reached\n");
2947 xmlChar
*ret
= NULL
;
2950 ret
= xmlNamespaceParseNCName(ctxt
);
2954 ret
= xmlNamespaceParseNCName(ctxt
);
2963 * xmlNamespaceParseNSDef:
2964 * @ctxt: an XML parser context
2966 * parse a namespace prefix declaration
2968 * TODO: this seems not in use anymore, the namespace handling is done on
2969 * top of the SAX interfaces, i.e. not on raw input.
2971 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2973 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2975 * Returns the namespace name
2979 xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
2980 static int deprecated
= 0;
2982 xmlGenericError(xmlGenericErrorContext
,
2983 "xmlNamespaceParseNSDef() deprecated function reached\n");
2988 xmlChar
*name
= NULL
;
2990 if ((RAW
== 'x') && (NXT(1) == 'm') &&
2991 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2996 name
= xmlNamespaceParseNCName(ctxt
);
3004 * xmlParseQuotedString:
3005 * @ctxt: an XML parser context
3007 * Parse and return a string between quotes or doublequotes
3009 * TODO: Deprecated, to be removed at next drop of binary compatibility
3011 * Returns the string parser or NULL.
3014 xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
3015 static int deprecated
= 0;
3017 xmlGenericError(xmlGenericErrorContext
,
3018 "xmlParseQuotedString() deprecated function reached\n");
3024 xmlChar
*buf
= NULL
;
3026 int size
= XML_PARSER_BUFFER_SIZE
;
3029 buf
= (xmlChar
*) xmlMalloc(size
* sizeof(xmlChar
));
3031 xmlGenericError(xmlGenericErrorContext
,
3032 "malloc of %d byte failed\n", size
);
3035 xmlGenericError(xmlGenericErrorContext
,
3036 "xmlParseQuotedString: reached loop 4\n");
3040 while (IS_CHAR(c
) && (c
!= '"')) { /* NOTUSED */
3041 if (len
+ 5 >= size
) {
3043 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3045 xmlGenericError(xmlGenericErrorContext
,
3046 "realloc of %d byte failed\n", size
);
3050 COPY_BUF(l
,buf
,len
,c
);
3055 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
3056 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3057 ctxt
->sax
->error(ctxt
->userData
,
3058 "String not closed \"%.50s\"\n", buf
);
3059 ctxt
->wellFormed
= 0;
3060 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3064 } else if (RAW
== '\''){
3067 while (IS_CHAR(c
) && (c
!= '\'')) { /* NOTUSED */
3068 if (len
+ 1 >= size
) {
3070 buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3072 xmlGenericError(xmlGenericErrorContext
,
3073 "realloc of %d byte failed\n", size
);
3082 ctxt
->errNo
= XML_ERR_STRING_NOT_CLOSED
;
3083 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3084 ctxt
->sax
->error(ctxt
->userData
,
3085 "String not closed \"%.50s\"\n", buf
);
3086 ctxt
->wellFormed
= 0;
3087 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3097 * xmlParseNamespace:
3098 * @ctxt: an XML parser context
3100 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3102 * This is what the older xml-name Working Draft specified, a bunch of
3103 * other stuff may still rely on it, so support is still here as
3104 * if it was declared on the root of the Tree:-(
3106 * TODO: remove from library
3108 * To be removed at next drop of binary compatibility
3112 xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
3113 static int deprecated
= 0;
3115 xmlGenericError(xmlGenericErrorContext
,
3116 "xmlParseNamespace() deprecated function reached\n");
3121 xmlChar
*href
= NULL
;
3122 xmlChar
*prefix
= NULL
;
3126 * We just skipped "namespace" or "xml:namespace"
3130 xmlGenericError(xmlGenericErrorContext
,
3131 "xmlParseNamespace: reached loop 5\n");
3132 while (IS_CHAR(RAW
) && (RAW
!= '>')) { /* NOT REACHED */
3134 * We can have "ns" or "prefix" attributes
3135 * Old encoding as 'href' or 'AS' attributes is still supported
3137 if ((RAW
== 'n') && (NXT(1) == 's')) {
3142 if (RAW
!= '=') continue;
3146 href
= xmlParseQuotedString(ctxt
);
3148 } else if ((RAW
== 'h') && (NXT(1) == 'r') &&
3149 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3154 if (RAW
!= '=') continue;
3158 href
= xmlParseQuotedString(ctxt
);
3160 } else if ((RAW
== 'p') && (NXT(1) == 'r') &&
3161 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3162 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3167 if (RAW
!= '=') continue;
3171 prefix
= xmlParseQuotedString(ctxt
);
3173 } else if ((RAW
== 'A') && (NXT(1) == 'S')) {
3178 if (RAW
!= '=') continue;
3182 prefix
= xmlParseQuotedString(ctxt
);
3184 } else if ((RAW
== '?') && (NXT(1) == '>')) {
3189 * Found garbage when parsing the namespace
3192 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3193 ctxt
->sax
->error(ctxt
->userData
,
3194 "xmlParseNamespace found garbage\n");
3196 ctxt
->errNo
= XML_ERR_NS_DECL_ERROR
;
3197 ctxt
->wellFormed
= 0;
3198 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3203 MOVETO_ENDTAG(CUR_PTR
);
3209 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3210 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3213 if (prefix
!= NULL
) xmlFree(prefix
);
3214 if (href
!= NULL
) xmlFree(href
);
3220 * @ctxt: an XML parser context
3222 * Trickery: parse an XML name but without consuming the input flow
3223 * Needed for rollback cases. Used only when parsing entities references.
3225 * TODO: seems deprecated now, only used in the default part of
3226 * xmlParserHandleReference
3228 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3229 * CombiningChar | Extender
3231 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3233 * [6] Names ::= Name (S Name)*
3235 * Returns the Name parsed or NULL
3239 xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
3240 static int deprecated
= 0;
3242 xmlGenericError(xmlGenericErrorContext
,
3243 "xmlScanName() deprecated function reached\n");
3249 xmlChar buf
[XML_MAX_NAMELEN
];
3253 if (!IS_LETTER(RAW
) && (RAW
!= '_') &&
3259 while ((IS_LETTER(NXT(len
))) || (IS_DIGIT(NXT(len
))) || /* NOT REACHED */
3260 (NXT(len
) == '.') || (NXT(len
) == '-') ||
3261 (NXT(len
) == '_') || (NXT(len
) == ':') ||
3262 (IS_COMBINING(NXT(len
))) ||
3263 (IS_EXTENDER(NXT(len
)))) {
3265 buf
[len
] = NXT(len
);
3267 if (len
>= XML_MAX_NAMELEN
) {
3268 xmlGenericError(xmlGenericErrorContext
,
3269 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3270 while ((IS_LETTER(NXT(len
))) || /* NOT REACHED */
3271 (IS_DIGIT(NXT(len
))) ||
3272 (NXT(len
) == '.') || (NXT(len
) == '-') ||
3273 (NXT(len
) == '_') || (NXT(len
) == ':') ||
3274 (IS_COMBINING(NXT(len
))) ||
3275 (IS_EXTENDER(NXT(len
))))
3280 return(xmlStrndup(buf
, len
));
3285 * xmlParserHandleReference:
3286 * @ctxt: the parser context
3288 * TODO: Remove, now deprecated ... the test is done directly in the
3292 * [67] Reference ::= EntityRef | CharRef
3294 * [68] EntityRef ::= '&' Name ';'
3296 * [ WFC: Entity Declared ]
3297 * the Name given in the entity reference must match that in an entity
3298 * declaration, except that well-formed documents need not declare any
3299 * of the following entities: amp, lt, gt, apos, quot.
3301 * [ WFC: Parsed Entity ]
3302 * An entity reference must not contain the name of an unparsed entity
3304 * [66] CharRef ::= '&#' [0-9]+ ';' |
3305 * '&#x' [0-9a-fA-F]+ ';'
3307 * A PEReference may have been detected in the current input stream
3308 * the handling is done accordingly to
3309 * http://www.w3.org/TR/REC-xml#entproc
3312 xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
) {
3313 static int deprecated
= 0;
3315 xmlGenericError(xmlGenericErrorContext
,
3316 "xmlParserHandleReference() deprecated function reached\n");
3325 * @ctxt: an XML parser context
3326 * @entity: an XML entity pointer.
3328 * Default handling of defined entities, when should we define a new input
3329 * stream ? When do we just handle that as a set of chars ?
3331 * OBSOLETE: to be removed at some point.
3335 xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED
, xmlEntityPtr entity ATTRIBUTE_UNUSED
) {
3336 static int deprecated
= 0;
3338 xmlGenericError(xmlGenericErrorContext
,
3339 "xmlHandleEntity() deprecated function reached\n");
3345 xmlParserInputPtr input
;
3347 if (entity
->content
== NULL
) {
3348 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
3349 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->error
!= NULL
))
3350 ctxt
->sax
->error(ctxt
->userData
, "xmlHandleEntity %s: content == NULL\n",
3352 ctxt
->wellFormed
= 0;
3353 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
3356 len
= xmlStrlen(entity
->content
);
3357 if (len
<= 2) goto handle_as_char
;
3360 * Redefine its content as an input stream.
3362 input
= xmlNewEntityInputStream(ctxt
, entity
);
3363 xmlPushInput(ctxt
, input
);
3368 * Just handle the content as a set of chars.
3370 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
3371 (ctxt
->sax
->characters
!= NULL
))
3372 ctxt
->sax
->characters(ctxt
->userData
, entity
->content
, len
);
3378 * @doc: the document carrying the namespace
3379 * @href: the URI associated
3380 * @prefix: the prefix for the namespace
3382 * Creation of a Namespace, the old way using PI and without scoping
3384 * It now create a namespace on the root element of the document if found.
3385 * Returns NULL this functionality had been removed
3388 xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED
, const xmlChar
*href ATTRIBUTE_UNUSED
,
3389 const xmlChar
*prefix ATTRIBUTE_UNUSED
) {
3390 static int deprecated
= 0;
3392 xmlGenericError(xmlGenericErrorContext
,
3393 "xmlNewGlobalNs() deprecated function reached\n");
3402 root
= xmlDocGetRootElement(doc
);
3404 return(xmlNewNs(root
, href
, prefix
));
3407 * if there is no root element yet, create an old Namespace type
3408 * and it will be moved to the root at save time.
3410 cur
= (xmlNsPtr
) xmlMalloc(sizeof(xmlNs
));
3412 xmlGenericError(xmlGenericErrorContext
,
3413 "xmlNewGlobalNs : malloc failed\n");
3416 memset(cur
, 0, sizeof(xmlNs
));
3417 cur
->type
= XML_GLOBAL_NAMESPACE
;
3420 cur
->href
= xmlStrdup(href
);
3422 cur
->prefix
= xmlStrdup(prefix
);
3425 * Add it at the end to preserve parsing order ...
3428 if (doc
->oldNs
== NULL
) {
3431 xmlNsPtr prev
= doc
->oldNs
;
3433 while (prev
->next
!= NULL
) prev
= prev
->next
;
3444 * @doc: a document pointer
3446 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3450 xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED
) {
3451 static int deprecated
= 0;
3453 xmlGenericError(xmlGenericErrorContext
,
3454 "xmlUpgradeOldNs() deprecated function reached\n");
3460 if ((doc
== NULL
) || (doc
->oldNs
== NULL
)) return;
3461 if (doc
->children
== NULL
) {
3463 xmlGenericError(xmlGenericErrorContext
,
3464 "xmlUpgradeOldNs: failed no root !\n");
3470 while (cur
->next
!= NULL
) {
3471 cur
->type
= XML_LOCAL_NAMESPACE
;
3474 cur
->type
= XML_LOCAL_NAMESPACE
;
3475 cur
->next
= doc
->children
->nsDef
;
3476 doc
->children
->nsDef
= doc
->oldNs
;