Move setting of ioready 'wait' earlier in call chain, to
[python/dscho.git] / Modules / pyexpat.c
blob0f6608a6f96c2169848e180bd4e6a6b58ad99708
1 #include "Python.h"
2 #include <ctype.h>
4 #include "compile.h"
5 #include "frameobject.h"
6 #include "expat.h"
8 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10 #ifndef PyDoc_STRVAR
13 * fdrake says:
14 * Don't change the PyDoc_STR macro definition to (str), because
15 * '''the parentheses cause compile failures
16 * ("non-constant static initializer" or something like that)
17 * on some platforms (Irix?)'''
19 #define PyDoc_STR(str) str
20 #define PyDoc_VAR(name) static char name[]
21 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
22 #endif
24 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
25 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
26 #define Py_USING_UNICODE
27 #define NOFIX_TRACE
28 #endif
30 enum HandlerTypes {
31 StartElement,
32 EndElement,
33 ProcessingInstruction,
34 CharacterData,
35 UnparsedEntityDecl,
36 NotationDecl,
37 StartNamespaceDecl,
38 EndNamespaceDecl,
39 Comment,
40 StartCdataSection,
41 EndCdataSection,
42 Default,
43 DefaultHandlerExpand,
44 NotStandalone,
45 ExternalEntityRef,
46 StartDoctypeDecl,
47 EndDoctypeDecl,
48 EntityDecl,
49 XmlDecl,
50 ElementDecl,
51 AttlistDecl,
52 #if XML_COMBINED_VERSION >= 19504
53 SkippedEntity,
54 #endif
55 _DummyDecl
58 static PyObject *ErrorObject;
60 /* ----------------------------------------------------- */
62 /* Declarations for objects of type xmlparser */
64 typedef struct {
65 PyObject_HEAD
67 XML_Parser itself;
68 int returns_unicode; /* True if Unicode strings are returned;
69 if false, UTF-8 strings are returned */
70 int ordered_attributes; /* Return attributes as a list. */
71 int specified_attributes; /* Report only specified attributes. */
72 int in_callback; /* Is a callback active? */
73 int ns_prefixes; /* Namespace-triplets mode? */
74 XML_Char *buffer; /* Buffer used when accumulating characters */
75 /* NULL if not enabled */
76 int buffer_size; /* Size of buffer, in XML_Char units */
77 int buffer_used; /* Buffer units in use */
78 PyObject *intern; /* Dictionary to intern strings */
79 PyObject **handlers;
80 } xmlparseobject;
82 #define CHARACTER_DATA_BUFFER_SIZE 8192
84 static PyTypeObject Xmlparsetype;
86 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
87 typedef void* xmlhandler;
89 struct HandlerInfo {
90 const char *name;
91 xmlhandlersetter setter;
92 xmlhandler handler;
93 PyCodeObject *tb_code;
94 PyObject *nameobj;
97 static struct HandlerInfo handler_info[64];
99 /* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
102 static int
103 set_error_attr(PyObject *err, char *name, int value)
105 PyObject *v = PyInt_FromLong(value);
107 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
108 Py_DECREF(v);
109 return 0;
111 return 1;
114 /* Build and set an Expat exception, including positioning
115 * information. Always returns NULL.
117 static PyObject *
118 set_error(xmlparseobject *self, enum XML_Error code)
120 PyObject *err;
121 char buffer[256];
122 XML_Parser parser = self->itself;
123 int lineno = XML_GetErrorLineNumber(parser);
124 int column = XML_GetErrorColumnNumber(parser);
126 /* There is no risk of overflowing this buffer, since
127 even for 64-bit integers, there is sufficient space. */
128 sprintf(buffer, "%.200s: line %i, column %i",
129 XML_ErrorString(code), lineno, column);
130 err = PyObject_CallFunction(ErrorObject, "s", buffer);
131 if ( err != NULL
132 && set_error_attr(err, "code", code)
133 && set_error_attr(err, "offset", column)
134 && set_error_attr(err, "lineno", lineno)) {
135 PyErr_SetObject(ErrorObject, err);
137 return NULL;
140 static int
141 have_handler(xmlparseobject *self, int type)
143 PyObject *handler = self->handlers[type];
144 return handler != NULL;
147 static PyObject *
148 get_handler_name(struct HandlerInfo *hinfo)
150 PyObject *name = hinfo->nameobj;
151 if (name == NULL) {
152 name = PyString_FromString(hinfo->name);
153 hinfo->nameobj = name;
155 Py_XINCREF(name);
156 return name;
160 #ifdef Py_USING_UNICODE
161 /* Convert a string of XML_Chars into a Unicode string.
162 Returns None if str is a null pointer. */
164 static PyObject *
165 conv_string_to_unicode(const XML_Char *str)
167 /* XXX currently this code assumes that XML_Char is 8-bit,
168 and hence in UTF-8. */
169 /* UTF-8 from Expat, Unicode desired */
170 if (str == NULL) {
171 Py_INCREF(Py_None);
172 return Py_None;
174 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
177 static PyObject *
178 conv_string_len_to_unicode(const XML_Char *str, int len)
180 /* XXX currently this code assumes that XML_Char is 8-bit,
181 and hence in UTF-8. */
182 /* UTF-8 from Expat, Unicode desired */
183 if (str == NULL) {
184 Py_INCREF(Py_None);
185 return Py_None;
187 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
189 #endif
191 /* Convert a string of XML_Chars into an 8-bit Python string.
192 Returns None if str is a null pointer. */
194 static PyObject *
195 conv_string_to_utf8(const XML_Char *str)
197 /* XXX currently this code assumes that XML_Char is 8-bit,
198 and hence in UTF-8. */
199 /* UTF-8 from Expat, UTF-8 desired */
200 if (str == NULL) {
201 Py_INCREF(Py_None);
202 return Py_None;
204 return PyString_FromString(str);
207 static PyObject *
208 conv_string_len_to_utf8(const XML_Char *str, int len)
210 /* XXX currently this code assumes that XML_Char is 8-bit,
211 and hence in UTF-8. */
212 /* UTF-8 from Expat, UTF-8 desired */
213 if (str == NULL) {
214 Py_INCREF(Py_None);
215 return Py_None;
217 return PyString_FromStringAndSize((const char *)str, len);
220 /* Callback routines */
222 static void clear_handlers(xmlparseobject *self, int initial);
224 /* This handler is used when an error has been detected, in the hope
225 that actual parsing can be terminated early. This will only help
226 if an external entity reference is encountered. */
227 static int
228 error_external_entity_ref_handler(XML_Parser parser,
229 const XML_Char *context,
230 const XML_Char *base,
231 const XML_Char *systemId,
232 const XML_Char *publicId)
234 return 0;
237 static void
238 flag_error(xmlparseobject *self)
240 clear_handlers(self, 0);
241 XML_SetExternalEntityRefHandler(self->itself,
242 error_external_entity_ref_handler);
245 static PyCodeObject*
246 getcode(enum HandlerTypes slot, char* func_name, int lineno)
248 PyObject *code = NULL;
249 PyObject *name = NULL;
250 PyObject *nulltuple = NULL;
251 PyObject *filename = NULL;
253 if (handler_info[slot].tb_code == NULL) {
254 code = PyString_FromString("");
255 if (code == NULL)
256 goto failed;
257 name = PyString_FromString(func_name);
258 if (name == NULL)
259 goto failed;
260 nulltuple = PyTuple_New(0);
261 if (nulltuple == NULL)
262 goto failed;
263 filename = PyString_FromString(__FILE__);
264 handler_info[slot].tb_code =
265 PyCode_New(0, /* argcount */
266 0, /* nlocals */
267 0, /* stacksize */
268 0, /* flags */
269 code, /* code */
270 nulltuple, /* consts */
271 nulltuple, /* names */
272 nulltuple, /* varnames */
273 #if PYTHON_API_VERSION >= 1010
274 nulltuple, /* freevars */
275 nulltuple, /* cellvars */
276 #endif
277 filename, /* filename */
278 name, /* name */
279 lineno, /* firstlineno */
280 code /* lnotab */
282 if (handler_info[slot].tb_code == NULL)
283 goto failed;
284 Py_DECREF(code);
285 Py_DECREF(nulltuple);
286 Py_DECREF(filename);
287 Py_DECREF(name);
289 return handler_info[slot].tb_code;
290 failed:
291 Py_XDECREF(code);
292 Py_XDECREF(name);
293 return NULL;
296 #ifndef NOFIX_TRACE
297 static int
298 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
300 int result = 0;
301 if (!tstate->use_tracing || tstate->tracing)
302 return 0;
303 if (tstate->c_profilefunc != NULL) {
304 tstate->tracing++;
305 result = tstate->c_profilefunc(tstate->c_profileobj,
306 f, code , val);
307 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
308 || (tstate->c_profilefunc != NULL));
309 tstate->tracing--;
310 if (result)
311 return result;
313 if (tstate->c_tracefunc != NULL) {
314 tstate->tracing++;
315 result = tstate->c_tracefunc(tstate->c_traceobj,
316 f, code , val);
317 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
318 || (tstate->c_profilefunc != NULL));
319 tstate->tracing--;
321 return result;
323 #endif
325 static PyObject*
326 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
328 PyThreadState *tstate = PyThreadState_GET();
329 PyFrameObject *f;
330 PyObject *res;
332 if (c == NULL)
333 return NULL;
335 f = PyFrame_New(
336 tstate, /*back*/
337 c, /*code*/
338 PyEval_GetGlobals(), /*globals*/
339 NULL /*locals*/
341 if (f == NULL)
342 return NULL;
343 tstate->frame = f;
344 #ifndef NOFIX_TRACE
345 if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
346 Py_DECREF(f);
347 return NULL;
349 #endif
350 res = PyEval_CallObject(func, args);
351 if (res == NULL && tstate->curexc_traceback == NULL)
352 PyTraceBack_Here(f);
353 #ifndef NOFIX_TRACE
354 else {
355 if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
356 Py_XDECREF(res);
357 res = NULL;
360 #endif
361 tstate->frame = f->f_back;
362 Py_DECREF(f);
363 return res;
366 #ifndef Py_USING_UNICODE
367 #define STRING_CONV_FUNC conv_string_to_utf8
368 #else
369 /* Python 2.0 and later versions, when built with Unicode support */
370 #define STRING_CONV_FUNC (self->returns_unicode \
371 ? conv_string_to_unicode : conv_string_to_utf8)
372 #endif
374 static PyObject*
375 string_intern(xmlparseobject *self, const char* str)
377 PyObject *result = STRING_CONV_FUNC(str);
378 PyObject *value;
379 if (!self->intern)
380 return result;
381 value = PyDict_GetItem(self->intern, result);
382 if (!value) {
383 if (PyDict_SetItem(self->intern, result, result) == 0)
384 return result;
385 else
386 return NULL;
388 Py_INCREF(value);
389 Py_DECREF(result);
390 return value;
393 /* Return 0 on success, -1 on exception.
394 * flag_error() will be called before return if needed.
396 static int
397 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
399 PyObject *args;
400 PyObject *temp;
402 args = PyTuple_New(1);
403 if (args == NULL)
404 return -1;
405 #ifdef Py_USING_UNICODE
406 temp = (self->returns_unicode
407 ? conv_string_len_to_unicode(buffer, len)
408 : conv_string_len_to_utf8(buffer, len));
409 #else
410 temp = conv_string_len_to_utf8(buffer, len);
411 #endif
412 if (temp == NULL) {
413 Py_DECREF(args);
414 flag_error(self);
415 return -1;
417 PyTuple_SET_ITEM(args, 0, temp);
418 /* temp is now a borrowed reference; consider it unused. */
419 self->in_callback = 1;
420 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
421 self->handlers[CharacterData], args);
422 /* temp is an owned reference again, or NULL */
423 self->in_callback = 0;
424 Py_DECREF(args);
425 if (temp == NULL) {
426 flag_error(self);
427 return -1;
429 Py_DECREF(temp);
430 return 0;
433 static int
434 flush_character_buffer(xmlparseobject *self)
436 int rc;
437 if (self->buffer == NULL || self->buffer_used == 0)
438 return 0;
439 rc = call_character_handler(self, self->buffer, self->buffer_used);
440 self->buffer_used = 0;
441 return rc;
444 static void
445 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
447 xmlparseobject *self = (xmlparseobject *) userData;
448 if (self->buffer == NULL)
449 call_character_handler(self, data, len);
450 else {
451 if ((self->buffer_used + len) > self->buffer_size) {
452 if (flush_character_buffer(self) < 0)
453 return;
454 /* handler might have changed; drop the rest on the floor
455 * if there isn't a handler anymore
457 if (!have_handler(self, CharacterData))
458 return;
460 if (len > self->buffer_size) {
461 call_character_handler(self, data, len);
462 self->buffer_used = 0;
464 else {
465 memcpy(self->buffer + self->buffer_used,
466 data, len * sizeof(XML_Char));
467 self->buffer_used += len;
472 static void
473 my_StartElementHandler(void *userData,
474 const XML_Char *name, const XML_Char *atts[])
476 xmlparseobject *self = (xmlparseobject *)userData;
478 if (have_handler(self, StartElement)) {
479 PyObject *container, *rv, *args;
480 int i, max;
482 if (flush_character_buffer(self) < 0)
483 return;
484 /* Set max to the number of slots filled in atts[]; max/2 is
485 * the number of attributes we need to process.
487 if (self->specified_attributes) {
488 max = XML_GetSpecifiedAttributeCount(self->itself);
490 else {
491 max = 0;
492 while (atts[max] != NULL)
493 max += 2;
495 /* Build the container. */
496 if (self->ordered_attributes)
497 container = PyList_New(max);
498 else
499 container = PyDict_New();
500 if (container == NULL) {
501 flag_error(self);
502 return;
504 for (i = 0; i < max; i += 2) {
505 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
506 PyObject *v;
507 if (n == NULL) {
508 flag_error(self);
509 Py_DECREF(container);
510 return;
512 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
513 if (v == NULL) {
514 flag_error(self);
515 Py_DECREF(container);
516 Py_DECREF(n);
517 return;
519 if (self->ordered_attributes) {
520 PyList_SET_ITEM(container, i, n);
521 PyList_SET_ITEM(container, i+1, v);
523 else if (PyDict_SetItem(container, n, v)) {
524 flag_error(self);
525 Py_DECREF(n);
526 Py_DECREF(v);
527 return;
529 else {
530 Py_DECREF(n);
531 Py_DECREF(v);
534 args = Py_BuildValue("(NN)", string_intern(self, name), container);
535 if (args == NULL) {
536 Py_DECREF(container);
537 return;
539 /* Container is now a borrowed reference; ignore it. */
540 self->in_callback = 1;
541 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
542 self->handlers[StartElement], args);
543 self->in_callback = 0;
544 Py_DECREF(args);
545 if (rv == NULL) {
546 flag_error(self);
547 return;
549 Py_DECREF(rv);
553 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
554 RETURN, GETUSERDATA) \
555 static RC \
556 my_##NAME##Handler PARAMS {\
557 xmlparseobject *self = GETUSERDATA ; \
558 PyObject *args = NULL; \
559 PyObject *rv = NULL; \
560 INIT \
562 if (have_handler(self, NAME)) { \
563 if (flush_character_buffer(self) < 0) \
564 return RETURN; \
565 args = Py_BuildValue PARAM_FORMAT ;\
566 if (!args) { flag_error(self); return RETURN;} \
567 self->in_callback = 1; \
568 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
569 self->handlers[NAME], args); \
570 self->in_callback = 0; \
571 Py_DECREF(args); \
572 if (rv == NULL) { \
573 flag_error(self); \
574 return RETURN; \
576 CONVERSION \
577 Py_DECREF(rv); \
579 return RETURN; \
582 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
583 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
584 (xmlparseobject *)userData)
586 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
587 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
588 rc = PyInt_AsLong(rv);, rc, \
589 (xmlparseobject *)userData)
591 VOID_HANDLER(EndElement,
592 (void *userData, const XML_Char *name),
593 ("(N)", string_intern(self, name)))
595 VOID_HANDLER(ProcessingInstruction,
596 (void *userData,
597 const XML_Char *target,
598 const XML_Char *data),
599 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
601 VOID_HANDLER(UnparsedEntityDecl,
602 (void *userData,
603 const XML_Char *entityName,
604 const XML_Char *base,
605 const XML_Char *systemId,
606 const XML_Char *publicId,
607 const XML_Char *notationName),
608 ("(NNNNN)",
609 string_intern(self, entityName), string_intern(self, base),
610 string_intern(self, systemId), string_intern(self, publicId),
611 string_intern(self, notationName)))
613 #ifndef Py_USING_UNICODE
614 VOID_HANDLER(EntityDecl,
615 (void *userData,
616 const XML_Char *entityName,
617 int is_parameter_entity,
618 const XML_Char *value,
619 int value_length,
620 const XML_Char *base,
621 const XML_Char *systemId,
622 const XML_Char *publicId,
623 const XML_Char *notationName),
624 ("NiNNNNN",
625 string_intern(self, entityName), is_parameter_entity,
626 conv_string_len_to_utf8(value, value_length),
627 string_intern(self, base), string_intern(self, systemId),
628 string_intern(self, publicId),
629 string_intern(self, notationName)))
630 #else
631 VOID_HANDLER(EntityDecl,
632 (void *userData,
633 const XML_Char *entityName,
634 int is_parameter_entity,
635 const XML_Char *value,
636 int value_length,
637 const XML_Char *base,
638 const XML_Char *systemId,
639 const XML_Char *publicId,
640 const XML_Char *notationName),
641 ("NiNNNNN",
642 string_intern(self, entityName), is_parameter_entity,
643 (self->returns_unicode
644 ? conv_string_len_to_unicode(value, value_length)
645 : conv_string_len_to_utf8(value, value_length)),
646 string_intern(self, base), string_intern(self, systemId),
647 string_intern(self, publicId),
648 string_intern(self, notationName)))
649 #endif
651 VOID_HANDLER(XmlDecl,
652 (void *userData,
653 const XML_Char *version,
654 const XML_Char *encoding,
655 int standalone),
656 ("(O&O&i)",
657 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
658 standalone))
660 static PyObject *
661 conv_content_model(XML_Content * const model,
662 PyObject *(*conv_string)(const XML_Char *))
664 PyObject *result = NULL;
665 PyObject *children = PyTuple_New(model->numchildren);
666 int i;
668 if (children != NULL) {
669 assert(model->numchildren < INT_MAX);
670 for (i = 0; i < (int)model->numchildren; ++i) {
671 PyObject *child = conv_content_model(&model->children[i],
672 conv_string);
673 if (child == NULL) {
674 Py_XDECREF(children);
675 return NULL;
677 PyTuple_SET_ITEM(children, i, child);
679 result = Py_BuildValue("(iiO&N)",
680 model->type, model->quant,
681 conv_string,model->name, children);
683 return result;
686 static void
687 my_ElementDeclHandler(void *userData,
688 const XML_Char *name,
689 XML_Content *model)
691 xmlparseobject *self = (xmlparseobject *)userData;
692 PyObject *args = NULL;
694 if (have_handler(self, ElementDecl)) {
695 PyObject *rv = NULL;
696 PyObject *modelobj, *nameobj;
698 if (flush_character_buffer(self) < 0)
699 goto finally;
700 #ifdef Py_USING_UNICODE
701 modelobj = conv_content_model(model,
702 (self->returns_unicode
703 ? conv_string_to_unicode
704 : conv_string_to_utf8));
705 #else
706 modelobj = conv_content_model(model, conv_string_to_utf8);
707 #endif
708 if (modelobj == NULL) {
709 flag_error(self);
710 goto finally;
712 nameobj = string_intern(self, name);
713 if (nameobj == NULL) {
714 Py_DECREF(modelobj);
715 flag_error(self);
716 goto finally;
718 args = Py_BuildValue("NN", string_intern(self, name), modelobj);
719 if (args == NULL) {
720 Py_DECREF(modelobj);
721 flag_error(self);
722 goto finally;
724 self->in_callback = 1;
725 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
726 self->handlers[ElementDecl], args);
727 self->in_callback = 0;
728 if (rv == NULL) {
729 flag_error(self);
730 goto finally;
732 Py_DECREF(rv);
734 finally:
735 Py_XDECREF(args);
736 XML_FreeContentModel(self->itself, model);
737 return;
740 VOID_HANDLER(AttlistDecl,
741 (void *userData,
742 const XML_Char *elname,
743 const XML_Char *attname,
744 const XML_Char *att_type,
745 const XML_Char *dflt,
746 int isrequired),
747 ("(NNO&O&i)",
748 string_intern(self, elname), string_intern(self, attname),
749 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
750 isrequired))
752 #if XML_COMBINED_VERSION >= 19504
753 VOID_HANDLER(SkippedEntity,
754 (void *userData,
755 const XML_Char *entityName,
756 int is_parameter_entity),
757 ("Ni",
758 string_intern(self, entityName), is_parameter_entity))
759 #endif
761 VOID_HANDLER(NotationDecl,
762 (void *userData,
763 const XML_Char *notationName,
764 const XML_Char *base,
765 const XML_Char *systemId,
766 const XML_Char *publicId),
767 ("(NNNN)",
768 string_intern(self, notationName), string_intern(self, base),
769 string_intern(self, systemId), string_intern(self, publicId)))
771 VOID_HANDLER(StartNamespaceDecl,
772 (void *userData,
773 const XML_Char *prefix,
774 const XML_Char *uri),
775 ("(NN)",
776 string_intern(self, prefix), string_intern(self, uri)))
778 VOID_HANDLER(EndNamespaceDecl,
779 (void *userData,
780 const XML_Char *prefix),
781 ("(N)", string_intern(self, prefix)))
783 VOID_HANDLER(Comment,
784 (void *userData, const XML_Char *data),
785 ("(O&)", STRING_CONV_FUNC,data))
787 VOID_HANDLER(StartCdataSection,
788 (void *userData),
789 ("()"))
791 VOID_HANDLER(EndCdataSection,
792 (void *userData),
793 ("()"))
795 #ifndef Py_USING_UNICODE
796 VOID_HANDLER(Default,
797 (void *userData, const XML_Char *s, int len),
798 ("(N)", conv_string_len_to_utf8(s,len)))
800 VOID_HANDLER(DefaultHandlerExpand,
801 (void *userData, const XML_Char *s, int len),
802 ("(N)", conv_string_len_to_utf8(s,len)))
803 #else
804 VOID_HANDLER(Default,
805 (void *userData, const XML_Char *s, int len),
806 ("(N)", (self->returns_unicode
807 ? conv_string_len_to_unicode(s,len)
808 : conv_string_len_to_utf8(s,len))))
810 VOID_HANDLER(DefaultHandlerExpand,
811 (void *userData, const XML_Char *s, int len),
812 ("(N)", (self->returns_unicode
813 ? conv_string_len_to_unicode(s,len)
814 : conv_string_len_to_utf8(s,len))))
815 #endif
817 INT_HANDLER(NotStandalone,
818 (void *userData),
819 ("()"))
821 RC_HANDLER(int, ExternalEntityRef,
822 (XML_Parser parser,
823 const XML_Char *context,
824 const XML_Char *base,
825 const XML_Char *systemId,
826 const XML_Char *publicId),
827 int rc=0;,
828 ("(O&NNN)",
829 STRING_CONV_FUNC,context, string_intern(self, base),
830 string_intern(self, systemId), string_intern(self, publicId)),
831 rc = PyInt_AsLong(rv);, rc,
832 XML_GetUserData(parser))
834 /* XXX UnknownEncodingHandler */
836 VOID_HANDLER(StartDoctypeDecl,
837 (void *userData, const XML_Char *doctypeName,
838 const XML_Char *sysid, const XML_Char *pubid,
839 int has_internal_subset),
840 ("(NNNi)", string_intern(self, doctypeName),
841 string_intern(self, sysid), string_intern(self, pubid),
842 has_internal_subset))
844 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
846 /* ---------------------------------------------------------------- */
848 static PyObject *
849 get_parse_result(xmlparseobject *self, int rv)
851 if (PyErr_Occurred()) {
852 return NULL;
854 if (rv == 0) {
855 return set_error(self, XML_GetErrorCode(self->itself));
857 if (flush_character_buffer(self) < 0) {
858 return NULL;
860 return PyInt_FromLong(rv);
863 PyDoc_STRVAR(xmlparse_Parse__doc__,
864 "Parse(data[, isfinal])\n\
865 Parse XML data. `isfinal' should be true at end of input.");
867 static PyObject *
868 xmlparse_Parse(xmlparseobject *self, PyObject *args)
870 char *s;
871 int slen;
872 int isFinal = 0;
874 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
875 return NULL;
877 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
880 /* File reading copied from cPickle */
882 #define BUF_SIZE 2048
884 static int
885 readinst(char *buf, int buf_size, PyObject *meth)
887 PyObject *arg = NULL;
888 PyObject *bytes = NULL;
889 PyObject *str = NULL;
890 int len = -1;
892 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
893 goto finally;
895 if ((arg = PyTuple_New(1)) == NULL)
896 goto finally;
898 PyTuple_SET_ITEM(arg, 0, bytes);
900 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
901 goto finally;
903 /* XXX what to do if it returns a Unicode string? */
904 if (!PyString_Check(str)) {
905 PyErr_Format(PyExc_TypeError,
906 "read() did not return a string object (type=%.400s)",
907 str->ob_type->tp_name);
908 goto finally;
910 len = PyString_GET_SIZE(str);
911 if (len > buf_size) {
912 PyErr_Format(PyExc_ValueError,
913 "read() returned too much data: "
914 "%i bytes requested, %i returned",
915 buf_size, len);
916 Py_DECREF(str);
917 goto finally;
919 memcpy(buf, PyString_AsString(str), len);
920 finally:
921 Py_XDECREF(arg);
922 Py_XDECREF(str);
923 return len;
926 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
927 "ParseFile(file)\n\
928 Parse XML data from file-like object.");
930 static PyObject *
931 xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
933 int rv = 1;
934 PyObject *f;
935 FILE *fp;
936 PyObject *readmethod = NULL;
938 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
939 return NULL;
941 if (PyFile_Check(f)) {
942 fp = PyFile_AsFile(f);
944 else{
945 fp = NULL;
946 readmethod = PyObject_GetAttrString(f, "read");
947 if (readmethod == NULL) {
948 PyErr_Clear();
949 PyErr_SetString(PyExc_TypeError,
950 "argument must have 'read' attribute");
951 return NULL;
954 for (;;) {
955 int bytes_read;
956 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
957 if (buf == NULL)
958 return PyErr_NoMemory();
960 if (fp) {
961 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
962 if (bytes_read < 0) {
963 PyErr_SetFromErrno(PyExc_IOError);
964 return NULL;
967 else {
968 bytes_read = readinst(buf, BUF_SIZE, readmethod);
969 if (bytes_read < 0)
970 return NULL;
972 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
973 if (PyErr_Occurred())
974 return NULL;
976 if (!rv || bytes_read == 0)
977 break;
979 return get_parse_result(self, rv);
982 PyDoc_STRVAR(xmlparse_SetBase__doc__,
983 "SetBase(base_url)\n\
984 Set the base URL for the parser.");
986 static PyObject *
987 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
989 char *base;
991 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
992 return NULL;
993 if (!XML_SetBase(self->itself, base)) {
994 return PyErr_NoMemory();
996 Py_INCREF(Py_None);
997 return Py_None;
1000 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1001 "GetBase() -> url\n\
1002 Return base URL string for the parser.");
1004 static PyObject *
1005 xmlparse_GetBase(xmlparseobject *self, PyObject *args)
1007 if (!PyArg_ParseTuple(args, ":GetBase"))
1008 return NULL;
1010 return Py_BuildValue("z", XML_GetBase(self->itself));
1013 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1014 "GetInputContext() -> string\n\
1015 Return the untranslated text of the input that caused the current event.\n\
1016 If the event was generated by a large amount of text (such as a start tag\n\
1017 for an element with many attributes), not all of the text may be available.");
1019 static PyObject *
1020 xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
1022 PyObject *result = NULL;
1024 if (PyArg_ParseTuple(args, ":GetInputContext")) {
1025 if (self->in_callback) {
1026 int offset, size;
1027 const char *buffer
1028 = XML_GetInputContext(self->itself, &offset, &size);
1030 if (buffer != NULL)
1031 result = PyString_FromStringAndSize(buffer + offset, size);
1032 else {
1033 result = Py_None;
1034 Py_INCREF(result);
1037 else {
1038 result = Py_None;
1039 Py_INCREF(result);
1042 return result;
1045 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1046 "ExternalEntityParserCreate(context[, encoding])\n\
1047 Create a parser for parsing an external entity based on the\n\
1048 information passed to the ExternalEntityRefHandler.");
1050 static PyObject *
1051 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1053 char *context;
1054 char *encoding = NULL;
1055 xmlparseobject *new_parser;
1056 int i;
1058 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1059 &context, &encoding)) {
1060 return NULL;
1063 #ifndef Py_TPFLAGS_HAVE_GC
1064 /* Python versions 2.0 and 2.1 */
1065 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1066 #else
1067 /* Python versions 2.2 and later */
1068 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1069 #endif
1071 if (new_parser == NULL)
1072 return NULL;
1073 new_parser->buffer_size = self->buffer_size;
1074 new_parser->buffer_used = 0;
1075 if (self->buffer != NULL) {
1076 new_parser->buffer = malloc(new_parser->buffer_size);
1077 if (new_parser->buffer == NULL) {
1078 #ifndef Py_TPFLAGS_HAVE_GC
1079 /* Code for versions 2.0 and 2.1 */
1080 PyObject_Del(new_parser);
1081 #else
1082 /* Code for versions 2.2 and later. */
1083 PyObject_GC_Del(new_parser);
1084 #endif
1085 return PyErr_NoMemory();
1088 else
1089 new_parser->buffer = NULL;
1090 new_parser->returns_unicode = self->returns_unicode;
1091 new_parser->ordered_attributes = self->ordered_attributes;
1092 new_parser->specified_attributes = self->specified_attributes;
1093 new_parser->in_callback = 0;
1094 new_parser->ns_prefixes = self->ns_prefixes;
1095 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1096 encoding);
1097 new_parser->handlers = 0;
1098 new_parser->intern = self->intern;
1099 Py_XINCREF(new_parser->intern);
1100 #ifdef Py_TPFLAGS_HAVE_GC
1101 PyObject_GC_Track(new_parser);
1102 #else
1103 PyObject_GC_Init(new_parser);
1104 #endif
1106 if (!new_parser->itself) {
1107 Py_DECREF(new_parser);
1108 return PyErr_NoMemory();
1111 XML_SetUserData(new_parser->itself, (void *)new_parser);
1113 /* allocate and clear handlers first */
1114 for (i = 0; handler_info[i].name != NULL; i++)
1115 /* do nothing */;
1117 new_parser->handlers = malloc(sizeof(PyObject *) * i);
1118 if (!new_parser->handlers) {
1119 Py_DECREF(new_parser);
1120 return PyErr_NoMemory();
1122 clear_handlers(new_parser, 1);
1124 /* then copy handlers from self */
1125 for (i = 0; handler_info[i].name != NULL; i++) {
1126 PyObject *handler = self->handlers[i];
1127 if (handler != NULL) {
1128 Py_INCREF(handler);
1129 new_parser->handlers[i] = handler;
1130 handler_info[i].setter(new_parser->itself,
1131 handler_info[i].handler);
1134 return (PyObject *)new_parser;
1137 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1138 "SetParamEntityParsing(flag) -> success\n\
1139 Controls parsing of parameter entities (including the external DTD\n\
1140 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1141 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1142 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1143 was successful.");
1145 static PyObject*
1146 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1148 int flag;
1149 if (!PyArg_ParseTuple(args, "i", &flag))
1150 return NULL;
1151 flag = XML_SetParamEntityParsing(p->itself, flag);
1152 return PyInt_FromLong(flag);
1156 #if XML_COMBINED_VERSION >= 19505
1157 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1158 "UseForeignDTD([flag])\n\
1159 Allows the application to provide an artificial external subset if one is\n\
1160 not specified as part of the document instance. This readily allows the\n\
1161 use of a 'default' document type controlled by the application, while still\n\
1162 getting the advantage of providing document type information to the parser.\n\
1163 'flag' defaults to True if not provided.");
1165 static PyObject *
1166 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1168 PyObject *flagobj = NULL;
1169 XML_Bool flag = XML_TRUE;
1170 enum XML_Error rc;
1171 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1172 return NULL;
1173 if (flagobj != NULL)
1174 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1175 rc = XML_UseForeignDTD(self->itself, flag);
1176 if (rc != XML_ERROR_NONE) {
1177 return set_error(self, rc);
1179 Py_INCREF(Py_None);
1180 return Py_None;
1182 #endif
1184 static struct PyMethodDef xmlparse_methods[] = {
1185 {"Parse", (PyCFunction)xmlparse_Parse,
1186 METH_VARARGS, xmlparse_Parse__doc__},
1187 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1188 METH_VARARGS, xmlparse_ParseFile__doc__},
1189 {"SetBase", (PyCFunction)xmlparse_SetBase,
1190 METH_VARARGS, xmlparse_SetBase__doc__},
1191 {"GetBase", (PyCFunction)xmlparse_GetBase,
1192 METH_VARARGS, xmlparse_GetBase__doc__},
1193 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1194 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1195 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1196 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1197 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1198 METH_VARARGS, xmlparse_GetInputContext__doc__},
1199 #if XML_COMBINED_VERSION >= 19505
1200 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1201 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1202 #endif
1203 {NULL, NULL} /* sentinel */
1206 /* ---------- */
1209 #ifdef Py_USING_UNICODE
1211 /* pyexpat international encoding support.
1212 Make it as simple as possible.
1215 static char template_buffer[257];
1216 PyObject *template_string = NULL;
1218 static void
1219 init_template_buffer(void)
1221 int i;
1222 for (i = 0; i < 256; i++) {
1223 template_buffer[i] = i;
1225 template_buffer[256] = 0;
1228 static int
1229 PyUnknownEncodingHandler(void *encodingHandlerData,
1230 const XML_Char *name,
1231 XML_Encoding *info)
1233 PyUnicodeObject *_u_string = NULL;
1234 int result = 0;
1235 int i;
1237 /* Yes, supports only 8bit encodings */
1238 _u_string = (PyUnicodeObject *)
1239 PyUnicode_Decode(template_buffer, 256, name, "replace");
1241 if (_u_string == NULL)
1242 return result;
1244 for (i = 0; i < 256; i++) {
1245 /* Stupid to access directly, but fast */
1246 Py_UNICODE c = _u_string->str[i];
1247 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1248 info->map[i] = -1;
1249 else
1250 info->map[i] = c;
1252 info->data = NULL;
1253 info->convert = NULL;
1254 info->release = NULL;
1255 result = 1;
1256 Py_DECREF(_u_string);
1257 return result;
1260 #endif
1262 static PyObject *
1263 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1265 int i;
1266 xmlparseobject *self;
1268 #ifdef Py_TPFLAGS_HAVE_GC
1269 /* Code for versions 2.2 and later */
1270 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1271 #else
1272 self = PyObject_New(xmlparseobject, &Xmlparsetype);
1273 #endif
1274 if (self == NULL)
1275 return NULL;
1277 #ifdef Py_USING_UNICODE
1278 self->returns_unicode = 1;
1279 #else
1280 self->returns_unicode = 0;
1281 #endif
1283 self->buffer = NULL;
1284 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1285 self->buffer_used = 0;
1286 self->ordered_attributes = 0;
1287 self->specified_attributes = 0;
1288 self->in_callback = 0;
1289 self->ns_prefixes = 0;
1290 self->handlers = NULL;
1291 if (namespace_separator != NULL) {
1292 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1294 else {
1295 self->itself = XML_ParserCreate(encoding);
1297 self->intern = intern;
1298 Py_XINCREF(self->intern);
1299 #ifdef Py_TPFLAGS_HAVE_GC
1300 PyObject_GC_Track(self);
1301 #else
1302 PyObject_GC_Init(self);
1303 #endif
1304 if (self->itself == NULL) {
1305 PyErr_SetString(PyExc_RuntimeError,
1306 "XML_ParserCreate failed");
1307 Py_DECREF(self);
1308 return NULL;
1310 XML_SetUserData(self->itself, (void *)self);
1311 #ifdef Py_USING_UNICODE
1312 XML_SetUnknownEncodingHandler(self->itself,
1313 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1314 #endif
1316 for (i = 0; handler_info[i].name != NULL; i++)
1317 /* do nothing */;
1319 self->handlers = malloc(sizeof(PyObject *) * i);
1320 if (!self->handlers) {
1321 Py_DECREF(self);
1322 return PyErr_NoMemory();
1324 clear_handlers(self, 1);
1326 return (PyObject*)self;
1330 static void
1331 xmlparse_dealloc(xmlparseobject *self)
1333 int i;
1334 #ifdef Py_TPFLAGS_HAVE_GC
1335 PyObject_GC_UnTrack(self);
1336 #else
1337 PyObject_GC_Fini(self);
1338 #endif
1339 if (self->itself != NULL)
1340 XML_ParserFree(self->itself);
1341 self->itself = NULL;
1343 if (self->handlers != NULL) {
1344 PyObject *temp;
1345 for (i = 0; handler_info[i].name != NULL; i++) {
1346 temp = self->handlers[i];
1347 self->handlers[i] = NULL;
1348 Py_XDECREF(temp);
1350 free(self->handlers);
1351 self->handlers = NULL;
1353 if (self->buffer != NULL) {
1354 free(self->buffer);
1355 self->buffer = NULL;
1357 Py_XDECREF(self->intern);
1358 #ifndef Py_TPFLAGS_HAVE_GC
1359 /* Code for versions 2.0 and 2.1 */
1360 PyObject_Del(self);
1361 #else
1362 /* Code for versions 2.2 and later. */
1363 PyObject_GC_Del(self);
1364 #endif
1367 static int
1368 handlername2int(const char *name)
1370 int i;
1371 for (i = 0; handler_info[i].name != NULL; i++) {
1372 if (strcmp(name, handler_info[i].name) == 0) {
1373 return i;
1376 return -1;
1379 static PyObject *
1380 get_pybool(int istrue)
1382 PyObject *result = istrue ? Py_True : Py_False;
1383 Py_INCREF(result);
1384 return result;
1387 static PyObject *
1388 xmlparse_getattr(xmlparseobject *self, char *name)
1390 int handlernum = handlername2int(name);
1392 if (handlernum != -1) {
1393 PyObject *result = self->handlers[handlernum];
1394 if (result == NULL)
1395 result = Py_None;
1396 Py_INCREF(result);
1397 return result;
1399 if (name[0] == 'E') {
1400 if (strcmp(name, "ErrorCode") == 0)
1401 return PyInt_FromLong((long)
1402 XML_GetErrorCode(self->itself));
1403 if (strcmp(name, "ErrorLineNumber") == 0)
1404 return PyInt_FromLong((long)
1405 XML_GetErrorLineNumber(self->itself));
1406 if (strcmp(name, "ErrorColumnNumber") == 0)
1407 return PyInt_FromLong((long)
1408 XML_GetErrorColumnNumber(self->itself));
1409 if (strcmp(name, "ErrorByteIndex") == 0)
1410 return PyInt_FromLong((long)
1411 XML_GetErrorByteIndex(self->itself));
1413 if (name[0] == 'b') {
1414 if (strcmp(name, "buffer_size") == 0)
1415 return PyInt_FromLong((long) self->buffer_size);
1416 if (strcmp(name, "buffer_text") == 0)
1417 return get_pybool(self->buffer != NULL);
1418 if (strcmp(name, "buffer_used") == 0)
1419 return PyInt_FromLong((long) self->buffer_used);
1421 if (strcmp(name, "namespace_prefixes") == 0)
1422 return get_pybool(self->ns_prefixes);
1423 if (strcmp(name, "ordered_attributes") == 0)
1424 return get_pybool(self->ordered_attributes);
1425 if (strcmp(name, "returns_unicode") == 0)
1426 return get_pybool((long) self->returns_unicode);
1427 if (strcmp(name, "specified_attributes") == 0)
1428 return get_pybool((long) self->specified_attributes);
1429 if (strcmp(name, "intern") == 0) {
1430 if (self->intern == NULL) {
1431 Py_INCREF(Py_None);
1432 return Py_None;
1434 else {
1435 Py_INCREF(self->intern);
1436 return self->intern;
1440 #define APPEND(list, str) \
1441 do { \
1442 PyObject *o = PyString_FromString(str); \
1443 if (o != NULL) \
1444 PyList_Append(list, o); \
1445 Py_XDECREF(o); \
1446 } while (0)
1448 if (strcmp(name, "__members__") == 0) {
1449 int i;
1450 PyObject *rc = PyList_New(0);
1451 for (i = 0; handler_info[i].name != NULL; i++) {
1452 PyObject *o = get_handler_name(&handler_info[i]);
1453 if (o != NULL)
1454 PyList_Append(rc, o);
1455 Py_XDECREF(o);
1457 APPEND(rc, "ErrorCode");
1458 APPEND(rc, "ErrorLineNumber");
1459 APPEND(rc, "ErrorColumnNumber");
1460 APPEND(rc, "ErrorByteIndex");
1461 APPEND(rc, "buffer_size");
1462 APPEND(rc, "buffer_text");
1463 APPEND(rc, "buffer_used");
1464 APPEND(rc, "namespace_prefixes");
1465 APPEND(rc, "ordered_attributes");
1466 APPEND(rc, "returns_unicode");
1467 APPEND(rc, "specified_attributes");
1468 APPEND(rc, "intern");
1470 #undef APPEND
1471 return rc;
1473 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1476 static int
1477 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1479 int handlernum = handlername2int(name);
1480 if (handlernum >= 0) {
1481 xmlhandler c_handler = NULL;
1482 PyObject *temp = self->handlers[handlernum];
1484 if (v == Py_None)
1485 v = NULL;
1486 else if (v != NULL) {
1487 Py_INCREF(v);
1488 c_handler = handler_info[handlernum].handler;
1490 self->handlers[handlernum] = v;
1491 Py_XDECREF(temp);
1492 handler_info[handlernum].setter(self->itself, c_handler);
1493 return 1;
1495 return 0;
1498 static int
1499 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1501 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1502 if (v == NULL) {
1503 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1504 return -1;
1506 if (strcmp(name, "buffer_text") == 0) {
1507 if (PyObject_IsTrue(v)) {
1508 if (self->buffer == NULL) {
1509 self->buffer = malloc(self->buffer_size);
1510 if (self->buffer == NULL) {
1511 PyErr_NoMemory();
1512 return -1;
1514 self->buffer_used = 0;
1517 else if (self->buffer != NULL) {
1518 if (flush_character_buffer(self) < 0)
1519 return -1;
1520 free(self->buffer);
1521 self->buffer = NULL;
1523 return 0;
1525 if (strcmp(name, "namespace_prefixes") == 0) {
1526 if (PyObject_IsTrue(v))
1527 self->ns_prefixes = 1;
1528 else
1529 self->ns_prefixes = 0;
1530 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1531 return 0;
1533 if (strcmp(name, "ordered_attributes") == 0) {
1534 if (PyObject_IsTrue(v))
1535 self->ordered_attributes = 1;
1536 else
1537 self->ordered_attributes = 0;
1538 return 0;
1540 if (strcmp(name, "returns_unicode") == 0) {
1541 if (PyObject_IsTrue(v)) {
1542 #ifndef Py_USING_UNICODE
1543 PyErr_SetString(PyExc_ValueError,
1544 "Unicode support not available");
1545 return -1;
1546 #else
1547 self->returns_unicode = 1;
1548 #endif
1550 else
1551 self->returns_unicode = 0;
1552 return 0;
1554 if (strcmp(name, "specified_attributes") == 0) {
1555 if (PyObject_IsTrue(v))
1556 self->specified_attributes = 1;
1557 else
1558 self->specified_attributes = 0;
1559 return 0;
1561 if (strcmp(name, "CharacterDataHandler") == 0) {
1562 /* If we're changing the character data handler, flush all
1563 * cached data with the old handler. Not sure there's a
1564 * "right" thing to do, though, but this probably won't
1565 * happen.
1567 if (flush_character_buffer(self) < 0)
1568 return -1;
1570 if (sethandler(self, name, v)) {
1571 return 0;
1573 PyErr_SetString(PyExc_AttributeError, name);
1574 return -1;
1577 #ifdef WITH_CYCLE_GC
1578 static int
1579 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1581 int i, err;
1582 for (i = 0; handler_info[i].name != NULL; i++) {
1583 if (!op->handlers[i])
1584 continue;
1585 err = visit(op->handlers[i], arg);
1586 if (err)
1587 return err;
1589 return 0;
1592 static int
1593 xmlparse_clear(xmlparseobject *op)
1595 clear_handlers(op, 0);
1596 Py_XDECREF(op->intern);
1597 op->intern = 0;
1598 return 0;
1600 #endif
1602 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1604 static PyTypeObject Xmlparsetype = {
1605 PyObject_HEAD_INIT(NULL)
1606 0, /*ob_size*/
1607 "pyexpat.xmlparser", /*tp_name*/
1608 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1609 0, /*tp_itemsize*/
1610 /* methods */
1611 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1612 (printfunc)0, /*tp_print*/
1613 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1614 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1615 (cmpfunc)0, /*tp_compare*/
1616 (reprfunc)0, /*tp_repr*/
1617 0, /*tp_as_number*/
1618 0, /*tp_as_sequence*/
1619 0, /*tp_as_mapping*/
1620 (hashfunc)0, /*tp_hash*/
1621 (ternaryfunc)0, /*tp_call*/
1622 (reprfunc)0, /*tp_str*/
1623 0, /* tp_getattro */
1624 0, /* tp_setattro */
1625 0, /* tp_as_buffer */
1626 #ifdef Py_TPFLAGS_HAVE_GC
1627 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1628 #else
1629 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1630 #endif
1631 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1632 #ifdef WITH_CYCLE_GC
1633 (traverseproc)xmlparse_traverse, /* tp_traverse */
1634 (inquiry)xmlparse_clear /* tp_clear */
1635 #else
1636 0, 0
1637 #endif
1640 /* End of code for xmlparser objects */
1641 /* -------------------------------------------------------- */
1643 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1644 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1645 Return a new XML parser object.");
1647 static PyObject *
1648 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1650 char *encoding = NULL;
1651 char *namespace_separator = NULL;
1652 PyObject *intern = NULL;
1653 PyObject *result;
1654 int intern_decref = 0;
1655 static char *kwlist[] = {"encoding", "namespace_separator",
1656 "intern", NULL};
1658 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1659 &encoding, &namespace_separator, &intern))
1660 return NULL;
1661 if (namespace_separator != NULL
1662 && strlen(namespace_separator) > 1) {
1663 PyErr_SetString(PyExc_ValueError,
1664 "namespace_separator must be at most one"
1665 " character, omitted, or None");
1666 return NULL;
1668 /* Explicitly passing None means no interning is desired.
1669 Not passing anything means that a new dictionary is used. */
1670 if (intern == Py_None)
1671 intern = NULL;
1672 else if (intern == NULL) {
1673 intern = PyDict_New();
1674 if (!intern)
1675 return NULL;
1676 intern_decref = 1;
1678 else if (!PyDict_Check(intern)) {
1679 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1680 return NULL;
1683 result = newxmlparseobject(encoding, namespace_separator, intern);
1684 if (intern_decref) {
1685 Py_DECREF(intern);
1687 return result;
1690 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1691 "ErrorString(errno) -> string\n\
1692 Returns string error for given number.");
1694 static PyObject *
1695 pyexpat_ErrorString(PyObject *self, PyObject *args)
1697 long code = 0;
1699 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1700 return NULL;
1701 return Py_BuildValue("z", XML_ErrorString((int)code));
1704 /* List of methods defined in the module */
1706 static struct PyMethodDef pyexpat_methods[] = {
1707 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1708 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1709 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1710 METH_VARARGS, pyexpat_ErrorString__doc__},
1712 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
1715 /* Module docstring */
1717 PyDoc_STRVAR(pyexpat_module_documentation,
1718 "Python wrapper for Expat parser.");
1720 /* Return a Python string that represents the version number without the
1721 * extra cruft added by revision control, even if the right options were
1722 * given to the "cvs export" command to make it not include the extra
1723 * cruft.
1725 static PyObject *
1726 get_version_string(void)
1728 static char *rcsid = "$Revision$";
1729 char *rev = rcsid;
1730 int i = 0;
1732 while (!isdigit((int)*rev))
1733 ++rev;
1734 while (rev[i] != ' ' && rev[i] != '\0')
1735 ++i;
1737 return PyString_FromStringAndSize(rev, i);
1740 /* Initialization function for the module */
1742 #ifndef MODULE_NAME
1743 #define MODULE_NAME "pyexpat"
1744 #endif
1746 #ifndef MODULE_INITFUNC
1747 #define MODULE_INITFUNC initpyexpat
1748 #endif
1750 #ifndef PyMODINIT_FUNC
1751 # ifdef MS_WINDOWS
1752 # define PyMODINIT_FUNC __declspec(dllexport) void
1753 # else
1754 # define PyMODINIT_FUNC void
1755 # endif
1756 #endif
1758 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
1760 PyMODINIT_FUNC
1761 MODULE_INITFUNC(void)
1763 PyObject *m, *d;
1764 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1765 PyObject *errors_module;
1766 PyObject *modelmod_name;
1767 PyObject *model_module;
1768 PyObject *sys_modules;
1770 if (errmod_name == NULL)
1771 return;
1772 modelmod_name = PyString_FromString(MODULE_NAME ".model");
1773 if (modelmod_name == NULL)
1774 return;
1776 Xmlparsetype.ob_type = &PyType_Type;
1778 /* Create the module and add the functions */
1779 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1780 pyexpat_module_documentation);
1782 /* Add some symbolic constants to the module */
1783 if (ErrorObject == NULL) {
1784 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1785 NULL, NULL);
1786 if (ErrorObject == NULL)
1787 return;
1789 Py_INCREF(ErrorObject);
1790 PyModule_AddObject(m, "error", ErrorObject);
1791 Py_INCREF(ErrorObject);
1792 PyModule_AddObject(m, "ExpatError", ErrorObject);
1793 Py_INCREF(&Xmlparsetype);
1794 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1796 PyModule_AddObject(m, "__version__", get_version_string());
1797 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1798 (char *) XML_ExpatVersion());
1800 XML_Expat_Version info = XML_ExpatVersionInfo();
1801 PyModule_AddObject(m, "version_info",
1802 Py_BuildValue("(iii)", info.major,
1803 info.minor, info.micro));
1805 #ifdef Py_USING_UNICODE
1806 init_template_buffer();
1807 #endif
1808 /* XXX When Expat supports some way of figuring out how it was
1809 compiled, this should check and set native_encoding
1810 appropriately.
1812 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1814 sys_modules = PySys_GetObject("modules");
1815 d = PyModule_GetDict(m);
1816 errors_module = PyDict_GetItem(d, errmod_name);
1817 if (errors_module == NULL) {
1818 errors_module = PyModule_New(MODULE_NAME ".errors");
1819 if (errors_module != NULL) {
1820 PyDict_SetItem(sys_modules, errmod_name, errors_module);
1821 /* gives away the reference to errors_module */
1822 PyModule_AddObject(m, "errors", errors_module);
1825 Py_DECREF(errmod_name);
1826 model_module = PyDict_GetItem(d, modelmod_name);
1827 if (model_module == NULL) {
1828 model_module = PyModule_New(MODULE_NAME ".model");
1829 if (model_module != NULL) {
1830 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1831 /* gives away the reference to model_module */
1832 PyModule_AddObject(m, "model", model_module);
1835 Py_DECREF(modelmod_name);
1836 if (errors_module == NULL || model_module == NULL)
1837 /* Don't core dump later! */
1838 return;
1840 #if XML_COMBINED_VERSION > 19505
1842 const XML_Feature *features = XML_GetFeatureList();
1843 PyObject *list = PyList_New(0);
1844 if (list == NULL)
1845 /* just ignore it */
1846 PyErr_Clear();
1847 else {
1848 int i = 0;
1849 for (; features[i].feature != XML_FEATURE_END; ++i) {
1850 int ok;
1851 PyObject *item = Py_BuildValue("si", features[i].name,
1852 features[i].value);
1853 if (item == NULL) {
1854 Py_DECREF(list);
1855 list = NULL;
1856 break;
1858 ok = PyList_Append(list, item);
1859 Py_DECREF(item);
1860 if (ok < 0) {
1861 PyErr_Clear();
1862 break;
1865 if (list != NULL)
1866 PyModule_AddObject(m, "features", list);
1869 #endif
1871 #define MYCONST(name) \
1872 PyModule_AddStringConstant(errors_module, #name, \
1873 (char*)XML_ErrorString(name))
1875 MYCONST(XML_ERROR_NO_MEMORY);
1876 MYCONST(XML_ERROR_SYNTAX);
1877 MYCONST(XML_ERROR_NO_ELEMENTS);
1878 MYCONST(XML_ERROR_INVALID_TOKEN);
1879 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1880 MYCONST(XML_ERROR_PARTIAL_CHAR);
1881 MYCONST(XML_ERROR_TAG_MISMATCH);
1882 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1883 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1884 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1885 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1886 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1887 MYCONST(XML_ERROR_ASYNC_ENTITY);
1888 MYCONST(XML_ERROR_BAD_CHAR_REF);
1889 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1890 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1891 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1892 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1893 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1894 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1895 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1896 MYCONST(XML_ERROR_NOT_STANDALONE);
1898 PyModule_AddStringConstant(errors_module, "__doc__",
1899 "Constants used to describe error conditions.");
1901 #undef MYCONST
1903 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1904 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1905 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1906 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1907 #undef MYCONST
1909 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1910 PyModule_AddStringConstant(model_module, "__doc__",
1911 "Constants used to interpret content model information.");
1913 MYCONST(XML_CTYPE_EMPTY);
1914 MYCONST(XML_CTYPE_ANY);
1915 MYCONST(XML_CTYPE_MIXED);
1916 MYCONST(XML_CTYPE_NAME);
1917 MYCONST(XML_CTYPE_CHOICE);
1918 MYCONST(XML_CTYPE_SEQ);
1920 MYCONST(XML_CQUANT_NONE);
1921 MYCONST(XML_CQUANT_OPT);
1922 MYCONST(XML_CQUANT_REP);
1923 MYCONST(XML_CQUANT_PLUS);
1924 #undef MYCONST
1927 static void
1928 clear_handlers(xmlparseobject *self, int initial)
1930 int i = 0;
1931 PyObject *temp;
1933 for (; handler_info[i].name != NULL; i++) {
1934 if (initial)
1935 self->handlers[i] = NULL;
1936 else {
1937 temp = self->handlers[i];
1938 self->handlers[i] = NULL;
1939 Py_XDECREF(temp);
1940 handler_info[i].setter(self->itself, NULL);
1945 static struct HandlerInfo handler_info[] = {
1946 {"StartElementHandler",
1947 (xmlhandlersetter)XML_SetStartElementHandler,
1948 (xmlhandler)my_StartElementHandler},
1949 {"EndElementHandler",
1950 (xmlhandlersetter)XML_SetEndElementHandler,
1951 (xmlhandler)my_EndElementHandler},
1952 {"ProcessingInstructionHandler",
1953 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1954 (xmlhandler)my_ProcessingInstructionHandler},
1955 {"CharacterDataHandler",
1956 (xmlhandlersetter)XML_SetCharacterDataHandler,
1957 (xmlhandler)my_CharacterDataHandler},
1958 {"UnparsedEntityDeclHandler",
1959 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
1960 (xmlhandler)my_UnparsedEntityDeclHandler},
1961 {"NotationDeclHandler",
1962 (xmlhandlersetter)XML_SetNotationDeclHandler,
1963 (xmlhandler)my_NotationDeclHandler},
1964 {"StartNamespaceDeclHandler",
1965 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
1966 (xmlhandler)my_StartNamespaceDeclHandler},
1967 {"EndNamespaceDeclHandler",
1968 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
1969 (xmlhandler)my_EndNamespaceDeclHandler},
1970 {"CommentHandler",
1971 (xmlhandlersetter)XML_SetCommentHandler,
1972 (xmlhandler)my_CommentHandler},
1973 {"StartCdataSectionHandler",
1974 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
1975 (xmlhandler)my_StartCdataSectionHandler},
1976 {"EndCdataSectionHandler",
1977 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
1978 (xmlhandler)my_EndCdataSectionHandler},
1979 {"DefaultHandler",
1980 (xmlhandlersetter)XML_SetDefaultHandler,
1981 (xmlhandler)my_DefaultHandler},
1982 {"DefaultHandlerExpand",
1983 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
1984 (xmlhandler)my_DefaultHandlerExpandHandler},
1985 {"NotStandaloneHandler",
1986 (xmlhandlersetter)XML_SetNotStandaloneHandler,
1987 (xmlhandler)my_NotStandaloneHandler},
1988 {"ExternalEntityRefHandler",
1989 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
1990 (xmlhandler)my_ExternalEntityRefHandler},
1991 {"StartDoctypeDeclHandler",
1992 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
1993 (xmlhandler)my_StartDoctypeDeclHandler},
1994 {"EndDoctypeDeclHandler",
1995 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
1996 (xmlhandler)my_EndDoctypeDeclHandler},
1997 {"EntityDeclHandler",
1998 (xmlhandlersetter)XML_SetEntityDeclHandler,
1999 (xmlhandler)my_EntityDeclHandler},
2000 {"XmlDeclHandler",
2001 (xmlhandlersetter)XML_SetXmlDeclHandler,
2002 (xmlhandler)my_XmlDeclHandler},
2003 {"ElementDeclHandler",
2004 (xmlhandlersetter)XML_SetElementDeclHandler,
2005 (xmlhandler)my_ElementDeclHandler},
2006 {"AttlistDeclHandler",
2007 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2008 (xmlhandler)my_AttlistDeclHandler},
2009 #if XML_COMBINED_VERSION >= 19504
2010 {"SkippedEntityHandler",
2011 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2012 (xmlhandler)my_SkippedEntityHandler},
2013 #endif
2015 {NULL, NULL, NULL} /* sentinel */