This commit was manufactured by cvs2svn to create tag 'r23a1-fork'.
[python/dscho.git] / Modules / pyexpat.c
blobf74751ba616a1cb91f0be0ed27e2859039762107
1 #include "Python.h"
2 #include <ctype.h>
4 #include "compile.h"
5 #include "frameobject.h"
6 #include "expat.h"
8 #ifndef PyDoc_STRVAR
9 #define PyDoc_STR(str) str
10 #define PyDoc_VAR(name) static char name[]
11 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
12 #endif
14 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
15 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
16 #define Py_USING_UNICODE
17 #endif
19 enum HandlerTypes {
20 StartElement,
21 EndElement,
22 ProcessingInstruction,
23 CharacterData,
24 UnparsedEntityDecl,
25 NotationDecl,
26 StartNamespaceDecl,
27 EndNamespaceDecl,
28 Comment,
29 StartCdataSection,
30 EndCdataSection,
31 Default,
32 DefaultHandlerExpand,
33 NotStandalone,
34 ExternalEntityRef,
35 StartDoctypeDecl,
36 EndDoctypeDecl,
37 EntityDecl,
38 XmlDecl,
39 ElementDecl,
40 AttlistDecl,
41 _DummyDecl
44 static PyObject *ErrorObject;
46 /* ----------------------------------------------------- */
48 /* Declarations for objects of type xmlparser */
50 typedef struct {
51 PyObject_HEAD
53 XML_Parser itself;
54 int returns_unicode; /* True if Unicode strings are returned;
55 if false, UTF-8 strings are returned */
56 int ordered_attributes; /* Return attributes as a list. */
57 int specified_attributes; /* Report only specified attributes. */
58 int in_callback; /* Is a callback active? */
59 XML_Char *buffer; /* Buffer used when accumulating characters */
60 /* NULL if not enabled */
61 int buffer_size; /* Size of buffer, in XML_Char units */
62 int buffer_used; /* Buffer units in use */
63 PyObject *intern; /* Dictionary to intern strings */
64 PyObject **handlers;
65 } xmlparseobject;
67 #define CHARACTER_DATA_BUFFER_SIZE 8192
69 static PyTypeObject Xmlparsetype;
71 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
72 typedef void* xmlhandler;
74 struct HandlerInfo {
75 const char *name;
76 xmlhandlersetter setter;
77 xmlhandler handler;
78 PyCodeObject *tb_code;
79 PyObject *nameobj;
82 static struct HandlerInfo handler_info[64];
84 /* Set an integer attribute on the error object; return true on success,
85 * false on an exception.
87 static int
88 set_error_attr(PyObject *err, char *name, int value)
90 PyObject *v = PyInt_FromLong(value);
92 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
93 Py_DECREF(v);
94 return 0;
96 return 1;
99 /* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
102 static PyObject *
103 set_error(xmlparseobject *self)
105 PyObject *err;
106 char buffer[256];
107 XML_Parser parser = self->itself;
108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
110 enum XML_Error code = XML_GetErrorCode(parser);
112 /* There is no risk of overflowing this buffer, since
113 even for 64-bit integers, there is sufficient space. */
114 sprintf(buffer, "%.200s: line %i, column %i",
115 XML_ErrorString(code), lineno, column);
116 err = PyObject_CallFunction(ErrorObject, "s", buffer);
117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
123 return NULL;
126 static int
127 have_handler(xmlparseobject *self, int type)
129 PyObject *handler = self->handlers[type];
130 return handler != NULL;
133 static PyObject *
134 get_handler_name(struct HandlerInfo *hinfo)
136 PyObject *name = hinfo->nameobj;
137 if (name == NULL) {
138 name = PyString_FromString(hinfo->name);
139 hinfo->nameobj = name;
141 Py_XINCREF(name);
142 return name;
146 #ifdef Py_USING_UNICODE
147 /* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
150 static PyObject *
151 conv_string_to_unicode(const XML_Char *str)
153 /* XXX currently this code assumes that XML_Char is 8-bit,
154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163 static PyObject *
164 conv_string_len_to_unicode(const XML_Char *str, int len)
166 /* XXX currently this code assumes that XML_Char is 8-bit,
167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175 #endif
177 /* Convert a string of XML_Chars into an 8-bit Python string.
178 Returns None if str is a null pointer. */
180 static PyObject *
181 conv_string_to_utf8(const XML_Char *str)
183 /* XXX currently this code assumes that XML_Char is 8-bit,
184 and hence in UTF-8. */
185 /* UTF-8 from Expat, UTF-8 desired */
186 if (str == NULL) {
187 Py_INCREF(Py_None);
188 return Py_None;
190 return PyString_FromString(str);
193 static PyObject *
194 conv_string_len_to_utf8(const XML_Char *str, int len)
196 /* XXX currently this code assumes that XML_Char is 8-bit,
197 and hence in UTF-8. */
198 /* UTF-8 from Expat, UTF-8 desired */
199 if (str == NULL) {
200 Py_INCREF(Py_None);
201 return Py_None;
203 return PyString_FromStringAndSize((const char *)str, len);
206 /* Callback routines */
208 static void clear_handlers(xmlparseobject *self, int initial);
210 static void
211 flag_error(xmlparseobject *self)
213 clear_handlers(self, 0);
216 static PyCodeObject*
217 getcode(enum HandlerTypes slot, char* func_name, int lineno)
219 PyObject *code = NULL;
220 PyObject *name = NULL;
221 PyObject *nulltuple = NULL;
222 PyObject *filename = NULL;
224 if (handler_info[slot].tb_code == NULL) {
225 code = PyString_FromString("");
226 if (code == NULL)
227 goto failed;
228 name = PyString_FromString(func_name);
229 if (name == NULL)
230 goto failed;
231 nulltuple = PyTuple_New(0);
232 if (nulltuple == NULL)
233 goto failed;
234 filename = PyString_FromString(__FILE__);
235 handler_info[slot].tb_code =
236 PyCode_New(0, /* argcount */
237 0, /* nlocals */
238 0, /* stacksize */
239 0, /* flags */
240 code, /* code */
241 nulltuple, /* consts */
242 nulltuple, /* names */
243 nulltuple, /* varnames */
244 #if PYTHON_API_VERSION >= 1010
245 nulltuple, /* freevars */
246 nulltuple, /* cellvars */
247 #endif
248 filename, /* filename */
249 name, /* name */
250 lineno, /* firstlineno */
251 code /* lnotab */
253 if (handler_info[slot].tb_code == NULL)
254 goto failed;
255 Py_DECREF(code);
256 Py_DECREF(nulltuple);
257 Py_DECREF(filename);
258 Py_DECREF(name);
260 return handler_info[slot].tb_code;
261 failed:
262 Py_XDECREF(code);
263 Py_XDECREF(name);
264 return NULL;
267 static int
268 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
270 int result = 0;
271 if (!tstate->use_tracing || tstate->tracing)
272 return 0;
273 if (tstate->c_profilefunc != NULL) {
274 tstate->tracing++;
275 result = tstate->c_profilefunc(tstate->c_profileobj,
276 f, code , val);
277 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
278 || (tstate->c_profilefunc != NULL));
279 tstate->tracing--;
280 if (result)
281 return result;
283 if (tstate->c_tracefunc != NULL) {
284 tstate->tracing++;
285 result = tstate->c_tracefunc(tstate->c_traceobj,
286 f, code , val);
287 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
288 || (tstate->c_profilefunc != NULL));
289 tstate->tracing--;
291 return result;
294 static PyObject*
295 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
297 PyThreadState *tstate = PyThreadState_GET();
298 PyFrameObject *f;
299 PyObject *res;
301 if (c == NULL)
302 return NULL;
304 f = PyFrame_New(
305 tstate, /*back*/
306 c, /*code*/
307 PyEval_GetGlobals(), /*globals*/
308 NULL /*locals*/
310 if (f == NULL)
311 return NULL;
312 tstate->frame = f;
313 if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
314 Py_DECREF(f);
315 return NULL;
317 res = PyEval_CallObject(func, args);
318 if (res == NULL && tstate->curexc_traceback == NULL)
319 PyTraceBack_Here(f);
320 else {
321 if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
322 Py_XDECREF(res);
323 res = NULL;
326 tstate->frame = f->f_back;
327 Py_DECREF(f);
328 return res;
331 #ifndef Py_USING_UNICODE
332 #define STRING_CONV_FUNC conv_string_to_utf8
333 #else
334 /* Python 2.0 and later versions */
335 #define STRING_CONV_FUNC (self->returns_unicode \
336 ? conv_string_to_unicode : conv_string_to_utf8)
337 #endif
339 static PyObject*
340 string_intern(xmlparseobject *self, const char* str)
342 PyObject *result = STRING_CONV_FUNC(str);
343 PyObject *value;
344 if (!self->intern)
345 return result;
346 value = PyDict_GetItem(self->intern, result);
347 if (!value) {
348 if (PyDict_SetItem(self->intern, result, result) == 0)
349 return result;
350 else
351 return NULL;
353 Py_INCREF(value);
354 Py_DECREF(result);
355 return value;
358 /* Return 0 on success, -1 on exception.
359 * flag_error() will be called before return if needed.
361 static int
362 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
364 PyObject *args;
365 PyObject *temp;
367 args = PyTuple_New(1);
368 if (args == NULL)
369 return -1;
370 #ifdef Py_USING_UNICODE
371 temp = (self->returns_unicode
372 ? conv_string_len_to_unicode(buffer, len)
373 : conv_string_len_to_utf8(buffer, len));
374 #else
375 temp = conv_string_len_to_utf8(buffer, len);
376 #endif
377 if (temp == NULL) {
378 Py_DECREF(args);
379 flag_error(self);
380 return -1;
382 PyTuple_SET_ITEM(args, 0, temp);
383 /* temp is now a borrowed reference; consider it unused. */
384 self->in_callback = 1;
385 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
386 self->handlers[CharacterData], args);
387 /* temp is an owned reference again, or NULL */
388 self->in_callback = 0;
389 Py_DECREF(args);
390 if (temp == NULL) {
391 flag_error(self);
392 return -1;
394 Py_DECREF(temp);
395 return 0;
398 static int
399 flush_character_buffer(xmlparseobject *self)
401 int rc;
402 if (self->buffer == NULL || self->buffer_used == 0)
403 return 0;
404 rc = call_character_handler(self, self->buffer, self->buffer_used);
405 self->buffer_used = 0;
406 return rc;
409 static void
410 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
412 xmlparseobject *self = (xmlparseobject *) userData;
413 if (self->buffer == NULL)
414 call_character_handler(self, data, len);
415 else {
416 if ((self->buffer_used + len) > self->buffer_size) {
417 if (flush_character_buffer(self) < 0)
418 return;
419 /* handler might have changed; drop the rest on the floor
420 * if there isn't a handler anymore
422 if (!have_handler(self, CharacterData))
423 return;
425 if (len > self->buffer_size) {
426 call_character_handler(self, data, len);
427 self->buffer_used = 0;
429 else {
430 memcpy(self->buffer + self->buffer_used,
431 data, len * sizeof(XML_Char));
432 self->buffer_used += len;
437 static void
438 my_StartElementHandler(void *userData,
439 const XML_Char *name, const XML_Char *atts[])
441 xmlparseobject *self = (xmlparseobject *)userData;
443 if (have_handler(self, StartElement)) {
444 PyObject *container, *rv, *args;
445 int i, max;
447 if (flush_character_buffer(self) < 0)
448 return;
449 /* Set max to the number of slots filled in atts[]; max/2 is
450 * the number of attributes we need to process.
452 if (self->specified_attributes) {
453 max = XML_GetSpecifiedAttributeCount(self->itself);
455 else {
456 max = 0;
457 while (atts[max] != NULL)
458 max += 2;
460 /* Build the container. */
461 if (self->ordered_attributes)
462 container = PyList_New(max);
463 else
464 container = PyDict_New();
465 if (container == NULL) {
466 flag_error(self);
467 return;
469 for (i = 0; i < max; i += 2) {
470 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
471 PyObject *v;
472 if (n == NULL) {
473 flag_error(self);
474 Py_DECREF(container);
475 return;
477 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
478 if (v == NULL) {
479 flag_error(self);
480 Py_DECREF(container);
481 Py_DECREF(n);
482 return;
484 if (self->ordered_attributes) {
485 PyList_SET_ITEM(container, i, n);
486 PyList_SET_ITEM(container, i+1, v);
488 else if (PyDict_SetItem(container, n, v)) {
489 flag_error(self);
490 Py_DECREF(n);
491 Py_DECREF(v);
492 return;
494 else {
495 Py_DECREF(n);
496 Py_DECREF(v);
499 args = Py_BuildValue("(NN)", string_intern(self, name), container);
500 if (args == NULL) {
501 Py_DECREF(container);
502 return;
504 /* Container is now a borrowed reference; ignore it. */
505 self->in_callback = 1;
506 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
507 self->handlers[StartElement], args);
508 self->in_callback = 0;
509 Py_DECREF(args);
510 if (rv == NULL) {
511 flag_error(self);
512 return;
514 Py_DECREF(rv);
518 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
519 RETURN, GETUSERDATA) \
520 static RC \
521 my_##NAME##Handler PARAMS {\
522 xmlparseobject *self = GETUSERDATA ; \
523 PyObject *args = NULL; \
524 PyObject *rv = NULL; \
525 INIT \
527 if (have_handler(self, NAME)) { \
528 if (flush_character_buffer(self) < 0) \
529 return RETURN; \
530 args = Py_BuildValue PARAM_FORMAT ;\
531 if (!args) { flag_error(self); return RETURN;} \
532 self->in_callback = 1; \
533 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
534 self->handlers[NAME], args); \
535 self->in_callback = 0; \
536 Py_DECREF(args); \
537 if (rv == NULL) { \
538 flag_error(self); \
539 return RETURN; \
541 CONVERSION \
542 Py_DECREF(rv); \
544 return RETURN; \
547 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
548 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
549 (xmlparseobject *)userData)
551 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
552 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
553 rc = PyInt_AsLong(rv);, rc, \
554 (xmlparseobject *)userData)
556 VOID_HANDLER(EndElement,
557 (void *userData, const XML_Char *name),
558 ("(N)", string_intern(self, name)))
560 VOID_HANDLER(ProcessingInstruction,
561 (void *userData,
562 const XML_Char *target,
563 const XML_Char *data),
564 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
566 VOID_HANDLER(UnparsedEntityDecl,
567 (void *userData,
568 const XML_Char *entityName,
569 const XML_Char *base,
570 const XML_Char *systemId,
571 const XML_Char *publicId,
572 const XML_Char *notationName),
573 ("(NNNNN)",
574 string_intern(self, entityName), string_intern(self, base),
575 string_intern(self, systemId), string_intern(self, publicId),
576 string_intern(self, notationName)))
578 #ifndef Py_USING_UNICODE
579 VOID_HANDLER(EntityDecl,
580 (void *userData,
581 const XML_Char *entityName,
582 int is_parameter_entity,
583 const XML_Char *value,
584 int value_length,
585 const XML_Char *base,
586 const XML_Char *systemId,
587 const XML_Char *publicId,
588 const XML_Char *notationName),
589 ("NiNNNNN",
590 string_intern(self, entityName), is_parameter_entity,
591 conv_string_len_to_utf8(value, value_length),
592 string_intern(self, base), string_intern(self, systemId),
593 string_intern(self, publicId),
594 string_intern(self, notationName)))
595 #else
596 VOID_HANDLER(EntityDecl,
597 (void *userData,
598 const XML_Char *entityName,
599 int is_parameter_entity,
600 const XML_Char *value,
601 int value_length,
602 const XML_Char *base,
603 const XML_Char *systemId,
604 const XML_Char *publicId,
605 const XML_Char *notationName),
606 ("NiNNNNN",
607 string_intern(self, entityName), is_parameter_entity,
608 (self->returns_unicode
609 ? conv_string_len_to_unicode(value, value_length)
610 : conv_string_len_to_utf8(value, value_length)),
611 string_intern(self, base), string_intern(self, systemId),
612 string_intern(self, publicId),
613 string_intern(self, notationName)))
614 #endif
616 VOID_HANDLER(XmlDecl,
617 (void *userData,
618 const XML_Char *version,
619 const XML_Char *encoding,
620 int standalone),
621 ("(O&O&i)",
622 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
623 standalone))
625 static PyObject *
626 conv_content_model(XML_Content * const model,
627 PyObject *(*conv_string)(const XML_Char *))
629 PyObject *result = NULL;
630 PyObject *children = PyTuple_New(model->numchildren);
631 int i;
633 if (children != NULL) {
634 assert(model->numchildren < INT_MAX);
635 for (i = 0; i < (int)model->numchildren; ++i) {
636 PyObject *child = conv_content_model(&model->children[i],
637 conv_string);
638 if (child == NULL) {
639 Py_XDECREF(children);
640 return NULL;
642 PyTuple_SET_ITEM(children, i, child);
644 result = Py_BuildValue("(iiO&N)",
645 model->type, model->quant,
646 conv_string,model->name, children);
648 return result;
651 static PyObject *
652 conv_content_model_utf8(XML_Content * const model)
654 return conv_content_model(model, conv_string_to_utf8);
657 #ifdef Py_USING_UNICODE
658 static PyObject *
659 conv_content_model_unicode(XML_Content * const model)
661 return conv_content_model(model, conv_string_to_unicode);
664 VOID_HANDLER(ElementDecl,
665 (void *userData,
666 const XML_Char *name,
667 XML_Content *model),
668 ("NO&",
669 string_intern(self, name),
670 (self->returns_unicode ? conv_content_model_unicode
671 : conv_content_model_utf8),model))
672 #else
673 VOID_HANDLER(ElementDecl,
674 (void *userData,
675 const XML_Char *name,
676 XML_Content *model),
677 ("NO&",
678 string_intern(self, name), conv_content_model_utf8,model))
679 #endif
681 VOID_HANDLER(AttlistDecl,
682 (void *userData,
683 const XML_Char *elname,
684 const XML_Char *attname,
685 const XML_Char *att_type,
686 const XML_Char *dflt,
687 int isrequired),
688 ("(NNO&O&i)",
689 string_intern(self, elname), string_intern(self, attname),
690 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
691 isrequired))
693 VOID_HANDLER(NotationDecl,
694 (void *userData,
695 const XML_Char *notationName,
696 const XML_Char *base,
697 const XML_Char *systemId,
698 const XML_Char *publicId),
699 ("(NNNN)",
700 string_intern(self, notationName), string_intern(self, base),
701 string_intern(self, systemId), string_intern(self, publicId)))
703 VOID_HANDLER(StartNamespaceDecl,
704 (void *userData,
705 const XML_Char *prefix,
706 const XML_Char *uri),
707 ("(NN)",
708 string_intern(self, prefix), string_intern(self, uri)))
710 VOID_HANDLER(EndNamespaceDecl,
711 (void *userData,
712 const XML_Char *prefix),
713 ("(N)", string_intern(self, prefix)))
715 VOID_HANDLER(Comment,
716 (void *userData, const XML_Char *data),
717 ("(O&)", STRING_CONV_FUNC,data))
719 VOID_HANDLER(StartCdataSection,
720 (void *userData),
721 ("()"))
723 VOID_HANDLER(EndCdataSection,
724 (void *userData),
725 ("()"))
727 #ifndef Py_USING_UNICODE
728 VOID_HANDLER(Default,
729 (void *userData, const XML_Char *s, int len),
730 ("(N)", conv_string_len_to_utf8(s,len)))
732 VOID_HANDLER(DefaultHandlerExpand,
733 (void *userData, const XML_Char *s, int len),
734 ("(N)", conv_string_len_to_utf8(s,len)))
735 #else
736 VOID_HANDLER(Default,
737 (void *userData, const XML_Char *s, int len),
738 ("(N)", (self->returns_unicode
739 ? conv_string_len_to_unicode(s,len)
740 : conv_string_len_to_utf8(s,len))))
742 VOID_HANDLER(DefaultHandlerExpand,
743 (void *userData, const XML_Char *s, int len),
744 ("(N)", (self->returns_unicode
745 ? conv_string_len_to_unicode(s,len)
746 : conv_string_len_to_utf8(s,len))))
747 #endif
749 INT_HANDLER(NotStandalone,
750 (void *userData),
751 ("()"))
753 RC_HANDLER(int, ExternalEntityRef,
754 (XML_Parser parser,
755 const XML_Char *context,
756 const XML_Char *base,
757 const XML_Char *systemId,
758 const XML_Char *publicId),
759 int rc=0;,
760 ("(O&NNN)",
761 STRING_CONV_FUNC,context, string_intern(self, base),
762 string_intern(self, systemId), string_intern(self, publicId)),
763 rc = PyInt_AsLong(rv);, rc,
764 XML_GetUserData(parser))
766 /* XXX UnknownEncodingHandler */
768 VOID_HANDLER(StartDoctypeDecl,
769 (void *userData, const XML_Char *doctypeName,
770 const XML_Char *sysid, const XML_Char *pubid,
771 int has_internal_subset),
772 ("(NNNi)", string_intern(self, doctypeName),
773 string_intern(self, sysid), string_intern(self, pubid),
774 has_internal_subset))
776 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
778 /* ---------------------------------------------------------------- */
780 static PyObject *
781 get_parse_result(xmlparseobject *self, int rv)
783 if (PyErr_Occurred()) {
784 return NULL;
786 if (rv == 0) {
787 return set_error(self);
789 if (flush_character_buffer(self) < 0) {
790 return NULL;
792 return PyInt_FromLong(rv);
795 PyDoc_STRVAR(xmlparse_Parse__doc__,
796 "Parse(data[, isfinal])\n\
797 Parse XML data. `isfinal' should be true at end of input.");
799 static PyObject *
800 xmlparse_Parse(xmlparseobject *self, PyObject *args)
802 char *s;
803 int slen;
804 int isFinal = 0;
806 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
807 return NULL;
809 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
812 /* File reading copied from cPickle */
814 #define BUF_SIZE 2048
816 static int
817 readinst(char *buf, int buf_size, PyObject *meth)
819 PyObject *arg = NULL;
820 PyObject *bytes = NULL;
821 PyObject *str = NULL;
822 int len = -1;
824 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
825 goto finally;
827 if ((arg = PyTuple_New(1)) == NULL)
828 goto finally;
830 PyTuple_SET_ITEM(arg, 0, bytes);
832 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
833 goto finally;
835 /* XXX what to do if it returns a Unicode string? */
836 if (!PyString_Check(str)) {
837 PyErr_Format(PyExc_TypeError,
838 "read() did not return a string object (type=%.400s)",
839 str->ob_type->tp_name);
840 goto finally;
842 len = PyString_GET_SIZE(str);
843 if (len > buf_size) {
844 PyErr_Format(PyExc_ValueError,
845 "read() returned too much data: "
846 "%i bytes requested, %i returned",
847 buf_size, len);
848 Py_DECREF(str);
849 goto finally;
851 memcpy(buf, PyString_AsString(str), len);
852 finally:
853 Py_XDECREF(arg);
854 Py_XDECREF(str);
855 return len;
858 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
859 "ParseFile(file)\n\
860 Parse XML data from file-like object.");
862 static PyObject *
863 xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
865 int rv = 1;
866 PyObject *f;
867 FILE *fp;
868 PyObject *readmethod = NULL;
870 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
871 return NULL;
873 if (PyFile_Check(f)) {
874 fp = PyFile_AsFile(f);
876 else{
877 fp = NULL;
878 readmethod = PyObject_GetAttrString(f, "read");
879 if (readmethod == NULL) {
880 PyErr_Clear();
881 PyErr_SetString(PyExc_TypeError,
882 "argument must have 'read' attribute");
883 return NULL;
886 for (;;) {
887 int bytes_read;
888 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
889 if (buf == NULL)
890 return PyErr_NoMemory();
892 if (fp) {
893 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
894 if (bytes_read < 0) {
895 PyErr_SetFromErrno(PyExc_IOError);
896 return NULL;
899 else {
900 bytes_read = readinst(buf, BUF_SIZE, readmethod);
901 if (bytes_read < 0)
902 return NULL;
904 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
905 if (PyErr_Occurred())
906 return NULL;
908 if (!rv || bytes_read == 0)
909 break;
911 return get_parse_result(self, rv);
914 PyDoc_STRVAR(xmlparse_SetBase__doc__,
915 "SetBase(base_url)\n\
916 Set the base URL for the parser.");
918 static PyObject *
919 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
921 char *base;
923 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
924 return NULL;
925 if (!XML_SetBase(self->itself, base)) {
926 return PyErr_NoMemory();
928 Py_INCREF(Py_None);
929 return Py_None;
932 PyDoc_STRVAR(xmlparse_GetBase__doc__,
933 "GetBase() -> url\n\
934 Return base URL string for the parser.");
936 static PyObject *
937 xmlparse_GetBase(xmlparseobject *self, PyObject *args)
939 if (!PyArg_ParseTuple(args, ":GetBase"))
940 return NULL;
942 return Py_BuildValue("z", XML_GetBase(self->itself));
945 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
946 "GetInputContext() -> string\n\
947 Return the untranslated text of the input that caused the current event.\n\
948 If the event was generated by a large amount of text (such as a start tag\n\
949 for an element with many attributes), not all of the text may be available.");
951 static PyObject *
952 xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
954 PyObject *result = NULL;
956 if (PyArg_ParseTuple(args, ":GetInputContext")) {
957 if (self->in_callback) {
958 int offset, size;
959 const char *buffer
960 = XML_GetInputContext(self->itself, &offset, &size);
962 if (buffer != NULL)
963 result = PyString_FromStringAndSize(buffer + offset, size);
964 else {
965 result = Py_None;
966 Py_INCREF(result);
969 else {
970 result = Py_None;
971 Py_INCREF(result);
974 return result;
977 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
978 "ExternalEntityParserCreate(context[, encoding])\n\
979 Create a parser for parsing an external entity based on the\n\
980 information passed to the ExternalEntityRefHandler.");
982 static PyObject *
983 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
985 char *context;
986 char *encoding = NULL;
987 xmlparseobject *new_parser;
988 int i;
990 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
991 &context, &encoding)) {
992 return NULL;
995 #ifndef Py_TPFLAGS_HAVE_GC
996 /* Python versions 2.0 and 2.1 */
997 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
998 #else
999 /* Python versions 2.2 and later */
1000 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1001 #endif
1003 if (new_parser == NULL)
1004 return NULL;
1005 new_parser->buffer_size = self->buffer_size;
1006 new_parser->buffer_used = 0;
1007 if (self->buffer != NULL) {
1008 new_parser->buffer = malloc(new_parser->buffer_size);
1009 if (new_parser->buffer == NULL) {
1010 #ifndef Py_TPFLAGS_HAVE_GC
1011 /* Code for versions 2.0 and 2.1 */
1012 PyObject_Del(new_parser);
1013 #else
1014 /* Code for versions 2.2 and later. */
1015 PyObject_GC_Del(new_parser);
1016 #endif
1017 return PyErr_NoMemory();
1020 else
1021 new_parser->buffer = NULL;
1022 new_parser->returns_unicode = self->returns_unicode;
1023 new_parser->ordered_attributes = self->ordered_attributes;
1024 new_parser->specified_attributes = self->specified_attributes;
1025 new_parser->in_callback = 0;
1026 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1027 encoding);
1028 new_parser->handlers = 0;
1029 new_parser->intern = self->intern;
1030 Py_XINCREF(new_parser->intern);
1031 #ifdef Py_TPFLAGS_HAVE_GC
1032 PyObject_GC_Track(new_parser);
1033 #else
1034 PyObject_GC_Init(new_parser);
1035 #endif
1037 if (!new_parser->itself) {
1038 Py_DECREF(new_parser);
1039 return PyErr_NoMemory();
1042 XML_SetUserData(new_parser->itself, (void *)new_parser);
1044 /* allocate and clear handlers first */
1045 for (i = 0; handler_info[i].name != NULL; i++)
1046 /* do nothing */;
1048 new_parser->handlers = malloc(sizeof(PyObject *) * i);
1049 if (!new_parser->handlers) {
1050 Py_DECREF(new_parser);
1051 return PyErr_NoMemory();
1053 clear_handlers(new_parser, 1);
1055 /* then copy handlers from self */
1056 for (i = 0; handler_info[i].name != NULL; i++) {
1057 PyObject *handler = self->handlers[i];
1058 if (handler != NULL) {
1059 Py_INCREF(handler);
1060 new_parser->handlers[i] = handler;
1061 handler_info[i].setter(new_parser->itself,
1062 handler_info[i].handler);
1065 return (PyObject *)new_parser;
1068 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1069 "SetParamEntityParsing(flag) -> success\n\
1070 Controls parsing of parameter entities (including the external DTD\n\
1071 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1072 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1073 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1074 was successful.");
1076 static PyObject*
1077 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1079 int flag;
1080 if (!PyArg_ParseTuple(args, "i", &flag))
1081 return NULL;
1082 flag = XML_SetParamEntityParsing(p->itself, flag);
1083 return PyInt_FromLong(flag);
1086 static struct PyMethodDef xmlparse_methods[] = {
1087 {"Parse", (PyCFunction)xmlparse_Parse,
1088 METH_VARARGS, xmlparse_Parse__doc__},
1089 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1090 METH_VARARGS, xmlparse_ParseFile__doc__},
1091 {"SetBase", (PyCFunction)xmlparse_SetBase,
1092 METH_VARARGS, xmlparse_SetBase__doc__},
1093 {"GetBase", (PyCFunction)xmlparse_GetBase,
1094 METH_VARARGS, xmlparse_GetBase__doc__},
1095 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1096 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1097 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1098 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1099 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1100 METH_VARARGS, xmlparse_GetInputContext__doc__},
1101 {NULL, NULL} /* sentinel */
1104 /* ---------- */
1107 #ifdef Py_USING_UNICODE
1109 /* pyexpat international encoding support.
1110 Make it as simple as possible.
1113 static char template_buffer[257];
1114 PyObject *template_string = NULL;
1116 static void
1117 init_template_buffer(void)
1119 int i;
1120 for (i = 0; i < 256; i++) {
1121 template_buffer[i] = i;
1123 template_buffer[256] = 0;
1126 static int
1127 PyUnknownEncodingHandler(void *encodingHandlerData,
1128 const XML_Char *name,
1129 XML_Encoding *info)
1131 PyUnicodeObject *_u_string = NULL;
1132 int result = 0;
1133 int i;
1135 /* Yes, supports only 8bit encodings */
1136 _u_string = (PyUnicodeObject *)
1137 PyUnicode_Decode(template_buffer, 256, name, "replace");
1139 if (_u_string == NULL)
1140 return result;
1142 for (i = 0; i < 256; i++) {
1143 /* Stupid to access directly, but fast */
1144 Py_UNICODE c = _u_string->str[i];
1145 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1146 info->map[i] = -1;
1147 else
1148 info->map[i] = c;
1150 info->data = NULL;
1151 info->convert = NULL;
1152 info->release = NULL;
1153 result = 1;
1154 Py_DECREF(_u_string);
1155 return result;
1158 #endif
1160 static PyObject *
1161 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1163 int i;
1164 xmlparseobject *self;
1166 #ifdef Py_TPFLAGS_HAVE_GC
1167 /* Code for versions 2.2 and later */
1168 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1169 #else
1170 self = PyObject_New(xmlparseobject, &Xmlparsetype);
1171 #endif
1172 if (self == NULL)
1173 return NULL;
1175 #ifdef Py_USING_UNICODE
1176 self->returns_unicode = 1;
1177 #else
1178 self->returns_unicode = 0;
1179 #endif
1181 self->buffer = NULL;
1182 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1183 self->buffer_used = 0;
1184 self->ordered_attributes = 0;
1185 self->specified_attributes = 0;
1186 self->in_callback = 0;
1187 self->handlers = NULL;
1188 if (namespace_separator != NULL) {
1189 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1191 else {
1192 self->itself = XML_ParserCreate(encoding);
1194 self->intern = intern;
1195 Py_XINCREF(self->intern);
1196 #ifdef Py_TPFLAGS_HAVE_GC
1197 PyObject_GC_Track(self);
1198 #else
1199 PyObject_GC_Init(self);
1200 #endif
1201 if (self->itself == NULL) {
1202 PyErr_SetString(PyExc_RuntimeError,
1203 "XML_ParserCreate failed");
1204 Py_DECREF(self);
1205 return NULL;
1207 XML_SetUserData(self->itself, (void *)self);
1208 #ifdef Py_USING_UNICODE
1209 XML_SetUnknownEncodingHandler(self->itself,
1210 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1211 #endif
1213 for (i = 0; handler_info[i].name != NULL; i++)
1214 /* do nothing */;
1216 self->handlers = malloc(sizeof(PyObject *) * i);
1217 if (!self->handlers) {
1218 Py_DECREF(self);
1219 return PyErr_NoMemory();
1221 clear_handlers(self, 1);
1223 return (PyObject*)self;
1227 static void
1228 xmlparse_dealloc(xmlparseobject *self)
1230 int i;
1231 #ifdef Py_TPFLAGS_HAVE_GC
1232 PyObject_GC_UnTrack(self);
1233 #else
1234 PyObject_GC_Fini(self);
1235 #endif
1236 if (self->itself != NULL)
1237 XML_ParserFree(self->itself);
1238 self->itself = NULL;
1240 if (self->handlers != NULL) {
1241 PyObject *temp;
1242 for (i = 0; handler_info[i].name != NULL; i++) {
1243 temp = self->handlers[i];
1244 self->handlers[i] = NULL;
1245 Py_XDECREF(temp);
1247 free(self->handlers);
1248 self->handlers = NULL;
1250 if (self->buffer != NULL) {
1251 free(self->buffer);
1252 self->buffer = NULL;
1254 Py_XDECREF(self->intern);
1255 #ifndef Py_TPFLAGS_HAVE_GC
1256 /* Code for versions 2.0 and 2.1 */
1257 PyObject_Del(self);
1258 #else
1259 /* Code for versions 2.2 and later. */
1260 PyObject_GC_Del(self);
1261 #endif
1264 static int
1265 handlername2int(const char *name)
1267 int i;
1268 for (i = 0; handler_info[i].name != NULL; i++) {
1269 if (strcmp(name, handler_info[i].name) == 0) {
1270 return i;
1273 return -1;
1276 static PyObject *
1277 get_pybool(int istrue)
1279 PyObject *result = istrue ? Py_True : Py_False;
1280 Py_INCREF(result);
1281 return result;
1284 static PyObject *
1285 xmlparse_getattr(xmlparseobject *self, char *name)
1287 int handlernum = handlername2int(name);
1289 if (handlernum != -1) {
1290 PyObject *result = self->handlers[handlernum];
1291 if (result == NULL)
1292 result = Py_None;
1293 Py_INCREF(result);
1294 return result;
1296 if (name[0] == 'E') {
1297 if (strcmp(name, "ErrorCode") == 0)
1298 return PyInt_FromLong((long)
1299 XML_GetErrorCode(self->itself));
1300 if (strcmp(name, "ErrorLineNumber") == 0)
1301 return PyInt_FromLong((long)
1302 XML_GetErrorLineNumber(self->itself));
1303 if (strcmp(name, "ErrorColumnNumber") == 0)
1304 return PyInt_FromLong((long)
1305 XML_GetErrorColumnNumber(self->itself));
1306 if (strcmp(name, "ErrorByteIndex") == 0)
1307 return PyInt_FromLong((long)
1308 XML_GetErrorByteIndex(self->itself));
1310 if (name[0] == 'b') {
1311 if (strcmp(name, "buffer_size") == 0)
1312 return PyInt_FromLong((long) self->buffer_size);
1313 if (strcmp(name, "buffer_text") == 0)
1314 return get_pybool(self->buffer != NULL);
1315 if (strcmp(name, "buffer_used") == 0)
1316 return PyInt_FromLong((long) self->buffer_used);
1318 if (strcmp(name, "ordered_attributes") == 0)
1319 return get_pybool(self->ordered_attributes);
1320 if (strcmp(name, "returns_unicode") == 0)
1321 return get_pybool((long) self->returns_unicode);
1322 if (strcmp(name, "specified_attributes") == 0)
1323 return get_pybool((long) self->specified_attributes);
1324 if (strcmp(name, "intern") == 0) {
1325 if (self->intern == NULL) {
1326 Py_INCREF(Py_None);
1327 return Py_None;
1329 else {
1330 Py_INCREF(self->intern);
1331 return self->intern;
1335 if (strcmp(name, "__members__") == 0) {
1336 int i;
1337 PyObject *rc = PyList_New(0);
1338 for (i = 0; handler_info[i].name != NULL; i++) {
1339 PyList_Append(rc, get_handler_name(&handler_info[i]));
1341 PyList_Append(rc, PyString_FromString("ErrorCode"));
1342 PyList_Append(rc, PyString_FromString("ErrorLineNumber"));
1343 PyList_Append(rc, PyString_FromString("ErrorColumnNumber"));
1344 PyList_Append(rc, PyString_FromString("ErrorByteIndex"));
1345 PyList_Append(rc, PyString_FromString("buffer_size"));
1346 PyList_Append(rc, PyString_FromString("buffer_text"));
1347 PyList_Append(rc, PyString_FromString("buffer_used"));
1348 PyList_Append(rc, PyString_FromString("ordered_attributes"));
1349 PyList_Append(rc, PyString_FromString("returns_unicode"));
1350 PyList_Append(rc, PyString_FromString("specified_attributes"));
1351 PyList_Append(rc, PyString_FromString("intern"));
1353 return rc;
1355 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1358 static int
1359 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1361 int handlernum = handlername2int(name);
1362 if (handlernum >= 0) {
1363 xmlhandler c_handler = NULL;
1364 PyObject *temp = self->handlers[handlernum];
1366 if (v == Py_None)
1367 v = NULL;
1368 else if (v != NULL) {
1369 Py_INCREF(v);
1370 c_handler = handler_info[handlernum].handler;
1372 self->handlers[handlernum] = v;
1373 Py_XDECREF(temp);
1374 handler_info[handlernum].setter(self->itself, c_handler);
1375 return 1;
1377 return 0;
1380 static int
1381 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1383 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1384 if (v == NULL) {
1385 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1386 return -1;
1388 if (strcmp(name, "buffer_text") == 0) {
1389 if (PyObject_IsTrue(v)) {
1390 if (self->buffer == NULL) {
1391 self->buffer = malloc(self->buffer_size);
1392 if (self->buffer == NULL) {
1393 PyErr_NoMemory();
1394 return -1;
1396 self->buffer_used = 0;
1399 else if (self->buffer != NULL) {
1400 if (flush_character_buffer(self) < 0)
1401 return -1;
1402 free(self->buffer);
1403 self->buffer = NULL;
1405 return 0;
1407 if (strcmp(name, "ordered_attributes") == 0) {
1408 if (PyObject_IsTrue(v))
1409 self->ordered_attributes = 1;
1410 else
1411 self->ordered_attributes = 0;
1412 return 0;
1414 if (strcmp(name, "returns_unicode") == 0) {
1415 if (PyObject_IsTrue(v)) {
1416 #ifndef Py_USING_UNICODE
1417 PyErr_SetString(PyExc_ValueError,
1418 "Unicode support not available");
1419 return -1;
1420 #else
1421 self->returns_unicode = 1;
1422 #endif
1424 else
1425 self->returns_unicode = 0;
1426 return 0;
1428 if (strcmp(name, "specified_attributes") == 0) {
1429 if (PyObject_IsTrue(v))
1430 self->specified_attributes = 1;
1431 else
1432 self->specified_attributes = 0;
1433 return 0;
1435 if (strcmp(name, "CharacterDataHandler") == 0) {
1436 /* If we're changing the character data handler, flush all
1437 * cached data with the old handler. Not sure there's a
1438 * "right" thing to do, though, but this probably won't
1439 * happen.
1441 if (flush_character_buffer(self) < 0)
1442 return -1;
1444 if (sethandler(self, name, v)) {
1445 return 0;
1447 PyErr_SetString(PyExc_AttributeError, name);
1448 return -1;
1451 #ifdef WITH_CYCLE_GC
1452 static int
1453 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1455 int i, err;
1456 for (i = 0; handler_info[i].name != NULL; i++) {
1457 if (!op->handlers[i])
1458 continue;
1459 err = visit(op->handlers[i], arg);
1460 if (err)
1461 return err;
1463 return 0;
1466 static int
1467 xmlparse_clear(xmlparseobject *op)
1469 clear_handlers(op, 0);
1470 Py_XDECREF(op->intern);
1471 op->intern = 0;
1472 return 0;
1474 #endif
1476 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1478 static PyTypeObject Xmlparsetype = {
1479 PyObject_HEAD_INIT(NULL)
1480 0, /*ob_size*/
1481 "pyexpat.xmlparser", /*tp_name*/
1482 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1483 0, /*tp_itemsize*/
1484 /* methods */
1485 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1486 (printfunc)0, /*tp_print*/
1487 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1488 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1489 (cmpfunc)0, /*tp_compare*/
1490 (reprfunc)0, /*tp_repr*/
1491 0, /*tp_as_number*/
1492 0, /*tp_as_sequence*/
1493 0, /*tp_as_mapping*/
1494 (hashfunc)0, /*tp_hash*/
1495 (ternaryfunc)0, /*tp_call*/
1496 (reprfunc)0, /*tp_str*/
1497 0, /* tp_getattro */
1498 0, /* tp_setattro */
1499 0, /* tp_as_buffer */
1500 #ifdef Py_TPFLAGS_HAVE_GC
1501 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1502 #else
1503 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1504 #endif
1505 Xmlparsetype__doc__, /* Documentation string */
1506 #ifdef WITH_CYCLE_GC
1507 (traverseproc)xmlparse_traverse, /* tp_traverse */
1508 (inquiry)xmlparse_clear /* tp_clear */
1509 #else
1510 0, 0
1511 #endif
1514 /* End of code for xmlparser objects */
1515 /* -------------------------------------------------------- */
1517 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1518 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1519 Return a new XML parser object.");
1521 static PyObject *
1522 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1524 char *encoding = NULL;
1525 char *namespace_separator = NULL;
1526 PyObject *intern = NULL;
1527 PyObject *result;
1528 int intern_decref = 0;
1529 static char *kwlist[] = {"encoding", "namespace_separator",
1530 "intern", NULL};
1532 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1533 &encoding, &namespace_separator, &intern))
1534 return NULL;
1535 if (namespace_separator != NULL
1536 && strlen(namespace_separator) > 1) {
1537 PyErr_SetString(PyExc_ValueError,
1538 "namespace_separator must be at most one"
1539 " character, omitted, or None");
1540 return NULL;
1542 /* Explicitly passing None means no interning is desired.
1543 Not passing anything means that a new dictionary is used. */
1544 if (intern == Py_None)
1545 intern = NULL;
1546 else if (intern == NULL) {
1547 intern = PyDict_New();
1548 if (!intern)
1549 return NULL;
1550 intern_decref = 1;
1552 else if (!PyDict_Check(intern)) {
1553 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1554 return NULL;
1557 result = newxmlparseobject(encoding, namespace_separator, intern);
1558 if (intern_decref) {
1559 Py_DECREF(intern);
1561 return result;
1564 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1565 "ErrorString(errno) -> string\n\
1566 Returns string error for given number.");
1568 static PyObject *
1569 pyexpat_ErrorString(PyObject *self, PyObject *args)
1571 long code = 0;
1573 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1574 return NULL;
1575 return Py_BuildValue("z", XML_ErrorString((int)code));
1578 /* List of methods defined in the module */
1580 static struct PyMethodDef pyexpat_methods[] = {
1581 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1582 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1583 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1584 METH_VARARGS, pyexpat_ErrorString__doc__},
1586 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
1589 /* Module docstring */
1591 PyDoc_STRVAR(pyexpat_module_documentation,
1592 "Python wrapper for Expat parser.");
1594 /* Return a Python string that represents the version number without the
1595 * extra cruft added by revision control, even if the right options were
1596 * given to the "cvs export" command to make it not include the extra
1597 * cruft.
1599 static PyObject *
1600 get_version_string(void)
1602 static char *rcsid = "$Revision$";
1603 char *rev = rcsid;
1604 int i = 0;
1606 while (!isdigit((int)*rev))
1607 ++rev;
1608 while (rev[i] != ' ' && rev[i] != '\0')
1609 ++i;
1611 return PyString_FromStringAndSize(rev, i);
1614 /* Initialization function for the module */
1616 #ifndef MODULE_NAME
1617 #define MODULE_NAME "pyexpat"
1618 #endif
1620 #ifndef MODULE_INITFUNC
1621 #define MODULE_INITFUNC initpyexpat
1622 #endif
1624 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
1626 PyMODINIT_FUNC MODULE_INITFUNC(void)
1628 PyObject *m, *d;
1629 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1630 PyObject *errors_module;
1631 PyObject *modelmod_name;
1632 PyObject *model_module;
1633 PyObject *sys_modules;
1635 if (errmod_name == NULL)
1636 return;
1637 modelmod_name = PyString_FromString(MODULE_NAME ".model");
1638 if (modelmod_name == NULL)
1639 return;
1641 Xmlparsetype.ob_type = &PyType_Type;
1643 /* Create the module and add the functions */
1644 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1645 pyexpat_module_documentation);
1647 /* Add some symbolic constants to the module */
1648 if (ErrorObject == NULL) {
1649 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1650 NULL, NULL);
1651 if (ErrorObject == NULL)
1652 return;
1654 Py_INCREF(ErrorObject);
1655 PyModule_AddObject(m, "error", ErrorObject);
1656 Py_INCREF(ErrorObject);
1657 PyModule_AddObject(m, "ExpatError", ErrorObject);
1658 Py_INCREF(&Xmlparsetype);
1659 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1661 PyModule_AddObject(m, "__version__", get_version_string());
1662 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1663 (char *) XML_ExpatVersion());
1665 XML_Expat_Version info = XML_ExpatVersionInfo();
1666 PyModule_AddObject(m, "version_info",
1667 Py_BuildValue("(iii)", info.major,
1668 info.minor, info.micro));
1670 #ifdef Py_USING_UNICODE
1671 init_template_buffer();
1672 #endif
1673 /* XXX When Expat supports some way of figuring out how it was
1674 compiled, this should check and set native_encoding
1675 appropriately.
1677 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1679 sys_modules = PySys_GetObject("modules");
1680 d = PyModule_GetDict(m);
1681 errors_module = PyDict_GetItem(d, errmod_name);
1682 if (errors_module == NULL) {
1683 errors_module = PyModule_New(MODULE_NAME ".errors");
1684 if (errors_module != NULL) {
1685 PyDict_SetItem(sys_modules, errmod_name, errors_module);
1686 /* gives away the reference to errors_module */
1687 PyModule_AddObject(m, "errors", errors_module);
1690 Py_DECREF(errmod_name);
1691 model_module = PyDict_GetItem(d, modelmod_name);
1692 if (model_module == NULL) {
1693 model_module = PyModule_New(MODULE_NAME ".model");
1694 if (model_module != NULL) {
1695 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1696 /* gives away the reference to model_module */
1697 PyModule_AddObject(m, "model", model_module);
1700 Py_DECREF(modelmod_name);
1701 if (errors_module == NULL || model_module == NULL)
1702 /* Don't core dump later! */
1703 return;
1705 #define MYCONST(name) \
1706 PyModule_AddStringConstant(errors_module, #name, \
1707 (char*)XML_ErrorString(name))
1709 MYCONST(XML_ERROR_NO_MEMORY);
1710 MYCONST(XML_ERROR_SYNTAX);
1711 MYCONST(XML_ERROR_NO_ELEMENTS);
1712 MYCONST(XML_ERROR_INVALID_TOKEN);
1713 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1714 MYCONST(XML_ERROR_PARTIAL_CHAR);
1715 MYCONST(XML_ERROR_TAG_MISMATCH);
1716 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1717 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1718 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1719 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1720 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1721 MYCONST(XML_ERROR_ASYNC_ENTITY);
1722 MYCONST(XML_ERROR_BAD_CHAR_REF);
1723 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1724 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1725 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1726 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1727 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1728 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1729 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1730 MYCONST(XML_ERROR_NOT_STANDALONE);
1732 PyModule_AddStringConstant(errors_module, "__doc__",
1733 "Constants used to describe error conditions.");
1735 #undef MYCONST
1737 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1738 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1739 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1740 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1741 #undef MYCONST
1743 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1744 PyModule_AddStringConstant(model_module, "__doc__",
1745 "Constants used to interpret content model information.");
1747 MYCONST(XML_CTYPE_EMPTY);
1748 MYCONST(XML_CTYPE_ANY);
1749 MYCONST(XML_CTYPE_MIXED);
1750 MYCONST(XML_CTYPE_NAME);
1751 MYCONST(XML_CTYPE_CHOICE);
1752 MYCONST(XML_CTYPE_SEQ);
1754 MYCONST(XML_CQUANT_NONE);
1755 MYCONST(XML_CQUANT_OPT);
1756 MYCONST(XML_CQUANT_REP);
1757 MYCONST(XML_CQUANT_PLUS);
1758 #undef MYCONST
1761 static void
1762 clear_handlers(xmlparseobject *self, int initial)
1764 int i = 0;
1765 PyObject *temp;
1767 for (; handler_info[i].name != NULL; i++) {
1768 if (initial)
1769 self->handlers[i] = NULL;
1770 else {
1771 temp = self->handlers[i];
1772 self->handlers[i] = NULL;
1773 Py_XDECREF(temp);
1774 handler_info[i].setter(self->itself, NULL);
1779 static struct HandlerInfo handler_info[] = {
1780 {"StartElementHandler",
1781 (xmlhandlersetter)XML_SetStartElementHandler,
1782 (xmlhandler)my_StartElementHandler},
1783 {"EndElementHandler",
1784 (xmlhandlersetter)XML_SetEndElementHandler,
1785 (xmlhandler)my_EndElementHandler},
1786 {"ProcessingInstructionHandler",
1787 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1788 (xmlhandler)my_ProcessingInstructionHandler},
1789 {"CharacterDataHandler",
1790 (xmlhandlersetter)XML_SetCharacterDataHandler,
1791 (xmlhandler)my_CharacterDataHandler},
1792 {"UnparsedEntityDeclHandler",
1793 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
1794 (xmlhandler)my_UnparsedEntityDeclHandler},
1795 {"NotationDeclHandler",
1796 (xmlhandlersetter)XML_SetNotationDeclHandler,
1797 (xmlhandler)my_NotationDeclHandler},
1798 {"StartNamespaceDeclHandler",
1799 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
1800 (xmlhandler)my_StartNamespaceDeclHandler},
1801 {"EndNamespaceDeclHandler",
1802 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
1803 (xmlhandler)my_EndNamespaceDeclHandler},
1804 {"CommentHandler",
1805 (xmlhandlersetter)XML_SetCommentHandler,
1806 (xmlhandler)my_CommentHandler},
1807 {"StartCdataSectionHandler",
1808 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
1809 (xmlhandler)my_StartCdataSectionHandler},
1810 {"EndCdataSectionHandler",
1811 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
1812 (xmlhandler)my_EndCdataSectionHandler},
1813 {"DefaultHandler",
1814 (xmlhandlersetter)XML_SetDefaultHandler,
1815 (xmlhandler)my_DefaultHandler},
1816 {"DefaultHandlerExpand",
1817 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
1818 (xmlhandler)my_DefaultHandlerExpandHandler},
1819 {"NotStandaloneHandler",
1820 (xmlhandlersetter)XML_SetNotStandaloneHandler,
1821 (xmlhandler)my_NotStandaloneHandler},
1822 {"ExternalEntityRefHandler",
1823 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
1824 (xmlhandler)my_ExternalEntityRefHandler},
1825 {"StartDoctypeDeclHandler",
1826 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
1827 (xmlhandler)my_StartDoctypeDeclHandler},
1828 {"EndDoctypeDeclHandler",
1829 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
1830 (xmlhandler)my_EndDoctypeDeclHandler},
1831 {"EntityDeclHandler",
1832 (xmlhandlersetter)XML_SetEntityDeclHandler,
1833 (xmlhandler)my_EntityDeclHandler},
1834 {"XmlDeclHandler",
1835 (xmlhandlersetter)XML_SetXmlDeclHandler,
1836 (xmlhandler)my_XmlDeclHandler},
1837 {"ElementDeclHandler",
1838 (xmlhandlersetter)XML_SetElementDeclHandler,
1839 (xmlhandler)my_ElementDeclHandler},
1840 {"AttlistDeclHandler",
1841 (xmlhandlersetter)XML_SetAttlistDeclHandler,
1842 (xmlhandler)my_AttlistDeclHandler},
1844 {NULL, NULL, NULL} /* sentinel */