5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
15 #define MODULE_VERSION "1.0"
18 #include "structmember.h"
21 /* begin 2.2 compatibility macros */
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
29 #define PyDoc_STR(str) ""
31 #endif /* ifndef PyDoc_STRVAR */
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
40 /* end 2.2 compatibility macros */
42 static PyObject
*error_obj
; /* CSV exception */
43 static PyObject
*dialects
; /* Dialect registry */
46 START_RECORD
, START_FIELD
, ESCAPED_CHAR
, IN_FIELD
,
47 IN_QUOTED_FIELD
, ESCAPE_IN_QUOTED_FIELD
, QUOTE_IN_QUOTED_FIELD
51 QUOTE_MINIMAL
, QUOTE_ALL
, QUOTE_NONNUMERIC
, QUOTE_NONE
59 static StyleDesc quote_styles
[] = {
60 { QUOTE_MINIMAL
, "QUOTE_MINIMAL" },
61 { QUOTE_ALL
, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC
, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE
, "QUOTE_NONE" },
70 int doublequote
; /* is " represented by ""? */
71 char delimiter
; /* field separator */
72 char quotechar
; /* quote character */
73 char escapechar
; /* escape character */
74 int skipinitialspace
; /* ignore spaces following delimiter? */
75 PyObject
*lineterminator
; /* string to write between records */
76 QuoteStyle quoting
; /* style of quoting to write */
78 int strict
; /* raise exception on bad CSV */
81 staticforward PyTypeObject Dialect_Type
;
86 PyObject
*input_iter
; /* iterate over this for input lines */
88 DialectObj
*dialect
; /* parsing dialect */
90 PyObject
*fields
; /* field list for current record */
91 ParserState state
; /* current CSV parse state */
92 char *field
; /* build current field in here */
93 int field_size
; /* size of allocated buffer */
94 int field_len
; /* length of current field */
95 int had_parse_error
; /* did we have a parse error? */
98 staticforward PyTypeObject Reader_Type
;
100 #define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
105 PyObject
*writeline
; /* write output lines to this file */
107 DialectObj
*dialect
; /* parsing dialect */
109 char *rec
; /* buffer for parser.join */
110 int rec_size
; /* size of allocated record */
111 int rec_len
; /* length of record */
112 int num_fields
; /* number of fields in record */
115 staticforward PyTypeObject Writer_Type
;
122 get_dialect_from_registry(PyObject
* name_obj
)
124 PyObject
*dialect_obj
;
126 dialect_obj
= PyDict_GetItem(dialects
, name_obj
);
127 if (dialect_obj
== NULL
)
128 return PyErr_Format(error_obj
, "unknown dialect");
129 Py_INCREF(dialect_obj
);
134 check_delattr(PyObject
*v
)
137 PyErr_SetString(PyExc_TypeError
,
138 "Cannot delete attribute");
145 get_string(PyObject
*str
)
152 set_string(PyObject
**str
, PyObject
*v
)
154 if (check_delattr(v
) < 0)
156 if (!PyString_Check(v
)
157 #ifdef Py_USING_UNICODE
158 && !PyUnicode_Check(v
)
171 get_nullchar_as_None(char c
)
178 return PyString_FromStringAndSize((char*)&c
, 1);
182 set_None_as_nullchar(char * addr
, PyObject
*v
)
184 if (check_delattr(v
) < 0)
188 else if (!PyString_Check(v
) || PyString_Size(v
) != 1) {
193 char *s
= PyString_AsString(v
);
202 Dialect_get_lineterminator(DialectObj
*self
)
204 return get_string(self
->lineterminator
);
208 Dialect_set_lineterminator(DialectObj
*self
, PyObject
*value
)
210 return set_string(&self
->lineterminator
, value
);
214 Dialect_get_escapechar(DialectObj
*self
)
216 return get_nullchar_as_None(self
->escapechar
);
220 Dialect_set_escapechar(DialectObj
*self
, PyObject
*value
)
222 return set_None_as_nullchar(&self
->escapechar
, value
);
226 Dialect_get_quoting(DialectObj
*self
)
228 return PyInt_FromLong(self
->quoting
);
232 Dialect_set_quoting(DialectObj
*self
, PyObject
*v
)
235 StyleDesc
*qs
= quote_styles
;
237 if (check_delattr(v
) < 0)
239 if (!PyInt_Check(v
)) {
243 quoting
= PyInt_AsLong(v
);
244 for (qs
= quote_styles
; qs
->name
; qs
++) {
245 if (qs
->style
== quoting
) {
246 self
->quoting
= quoting
;
254 static struct PyMethodDef Dialect_methods
[] = {
258 #define D_OFF(x) offsetof(DialectObj, x)
260 static struct PyMemberDef Dialect_memberlist
[] = {
261 { "quotechar", T_CHAR
, D_OFF(quotechar
) },
262 { "delimiter", T_CHAR
, D_OFF(delimiter
) },
263 { "skipinitialspace", T_INT
, D_OFF(skipinitialspace
) },
264 { "doublequote", T_INT
, D_OFF(doublequote
) },
265 { "strict", T_INT
, D_OFF(strict
) },
269 static PyGetSetDef Dialect_getsetlist
[] = {
270 { "escapechar", (getter
)Dialect_get_escapechar
,
271 (setter
)Dialect_set_escapechar
},
272 { "lineterminator", (getter
)Dialect_get_lineterminator
,
273 (setter
)Dialect_set_lineterminator
},
274 { "quoting", (getter
)Dialect_get_quoting
,
275 (setter
)Dialect_set_quoting
},
280 Dialect_dealloc(DialectObj
*self
)
282 Py_XDECREF(self
->lineterminator
);
283 self
->ob_type
->tp_free((PyObject
*)self
);
287 dialect_init(DialectObj
* self
, PyObject
* args
, PyObject
* kwargs
)
289 PyObject
*dialect
= NULL
, *name_obj
, *value_obj
;
291 self
->quotechar
= '"';
292 self
->delimiter
= ',';
293 self
->escapechar
= '\0';
294 self
->skipinitialspace
= 0;
295 Py_XDECREF(self
->lineterminator
);
296 self
->lineterminator
= PyString_FromString("\r\n");
297 if (self
->lineterminator
== NULL
)
299 self
->quoting
= QUOTE_MINIMAL
;
300 self
->doublequote
= 1;
303 if (!PyArg_ParseTuple(args
, "|O", &dialect
))
306 if (kwargs
!= NULL
) {
307 PyObject
* key
= PyString_FromString("dialect");
310 d
= PyDict_GetItem(kwargs
, key
);
314 PyDict_DelItem(kwargs
, key
);
319 if (dialect
!= NULL
) {
323 /* If dialect is a string, look it up in our registry */
324 if (PyString_Check(dialect
)
325 #ifdef Py_USING_UNICODE
326 || PyUnicode_Check(dialect
)
330 new_dia
= get_dialect_from_registry(dialect
);
336 /* A class rather than an instance? Instantiate */
337 if (PyObject_TypeCheck(dialect
, &PyClass_Type
)) {
339 new_dia
= PyObject_CallFunction(dialect
, "");
345 /* Make sure we finally have an instance */
346 if (!PyInstance_Check(dialect
) ||
347 (dir_list
= PyObject_Dir(dialect
)) == NULL
) {
348 PyErr_SetString(PyExc_TypeError
,
349 "dialect must be an instance");
353 /* And extract the attributes */
354 for (i
= 0; i
< PyList_GET_SIZE(dir_list
); ++i
) {
356 name_obj
= PyList_GET_ITEM(dir_list
, i
);
357 s
= PyString_AsString(name_obj
);
362 value_obj
= PyObject_GetAttr(dialect
, name_obj
);
364 if (PyObject_SetAttr((PyObject
*)self
,
365 name_obj
, value_obj
)) {
366 Py_DECREF(value_obj
);
371 Py_DECREF(value_obj
);
377 if (kwargs
!= NULL
) {
380 while (PyDict_Next(kwargs
, &pos
, &name_obj
, &value_obj
)) {
381 if (PyObject_SetAttr((PyObject
*)self
,
382 name_obj
, value_obj
))
390 dialect_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwargs
)
393 self
= (DialectObj
*)type
->tp_alloc(type
, 0);
395 self
->lineterminator
= NULL
;
397 return (PyObject
*)self
;
401 PyDoc_STRVAR(Dialect_Type_doc
,
404 "The Dialect type records CSV parsing and generation options.\n");
406 static PyTypeObject Dialect_Type
= {
407 PyObject_HEAD_INIT(NULL
)
409 "_csv.Dialect", /* tp_name */
410 sizeof(DialectObj
), /* tp_basicsize */
413 (destructor
)Dialect_dealloc
, /* tp_dealloc */
414 (printfunc
)0, /* tp_print */
415 (getattrfunc
)0, /* tp_getattr */
416 (setattrfunc
)0, /* tp_setattr */
417 (cmpfunc
)0, /* tp_compare */
418 (reprfunc
)0, /* tp_repr */
419 0, /* tp_as_number */
420 0, /* tp_as_sequence */
421 0, /* tp_as_mapping */
422 (hashfunc
)0, /* tp_hash */
423 (ternaryfunc
)0, /* tp_call */
424 (reprfunc
)0, /* tp_str */
427 0, /* tp_as_buffer */
428 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
429 Dialect_Type_doc
, /* tp_doc */
432 0, /* tp_richcompare */
433 0, /* tp_weaklistoffset */
436 Dialect_methods
, /* tp_methods */
437 Dialect_memberlist
, /* tp_members */
438 Dialect_getsetlist
, /* tp_getset */
441 0, /* tp_descr_get */
442 0, /* tp_descr_set */
443 0, /* tp_dictoffset */
444 (initproc
)dialect_init
, /* tp_init */
445 PyType_GenericAlloc
, /* tp_alloc */
446 dialect_new
, /* tp_new */
451 parse_save_field(ReaderObj
*self
)
455 field
= PyString_FromStringAndSize(self
->field
, self
->field_len
);
457 PyList_Append(self
->fields
, field
);
464 parse_grow_buff(ReaderObj
*self
)
466 if (self
->field_size
== 0) {
467 self
->field_size
= 4096;
468 if (self
->field
!= NULL
)
469 PyMem_Free(self
->field
);
470 self
->field
= PyMem_Malloc(self
->field_size
);
473 self
->field_size
*= 2;
474 self
->field
= PyMem_Realloc(self
->field
, self
->field_size
);
476 if (self
->field
== NULL
) {
484 parse_add_char(ReaderObj
*self
, char c
)
486 if (self
->field_len
== self
->field_size
&& !parse_grow_buff(self
))
488 self
->field
[self
->field_len
++] = c
;
492 parse_process_char(ReaderObj
*self
, char c
)
494 DialectObj
*dialect
= self
->dialect
;
496 switch (self
->state
) {
498 /* start of record */
500 /* empty line - return [] */
502 /* normal character - handle as START_FIELD */
503 self
->state
= START_FIELD
;
506 /* expecting field */
508 /* save empty field - return [fields] */
509 parse_save_field(self
);
510 self
->state
= START_RECORD
;
512 else if (c
== dialect
->quotechar
) {
513 /* start quoted field */
514 self
->state
= IN_QUOTED_FIELD
;
516 else if (c
== dialect
->escapechar
) {
517 /* possible escaped character */
518 self
->state
= ESCAPED_CHAR
;
520 else if (c
== ' ' && dialect
->skipinitialspace
)
521 /* ignore space at start of field */
523 else if (c
== dialect
->delimiter
) {
524 /* save empty field */
525 parse_save_field(self
);
528 /* begin new unquoted field */
529 parse_add_char(self
, c
);
530 self
->state
= IN_FIELD
;
535 if (c
!= dialect
->escapechar
&&
536 c
!= dialect
->delimiter
&&
537 c
!= dialect
->quotechar
)
538 parse_add_char(self
, dialect
->escapechar
);
539 parse_add_char(self
, c
);
540 self
->state
= IN_FIELD
;
544 /* in unquoted field */
546 /* end of line - return [fields] */
547 parse_save_field(self
);
548 self
->state
= START_RECORD
;
550 else if (c
== dialect
->escapechar
) {
551 /* possible escaped character */
552 self
->state
= ESCAPED_CHAR
;
554 else if (c
== dialect
->delimiter
) {
555 /* save field - wait for new field */
556 parse_save_field(self
);
557 self
->state
= START_FIELD
;
560 /* normal character - save in field */
561 parse_add_char(self
, c
);
565 case IN_QUOTED_FIELD
:
566 /* in quoted field */
568 /* end of line - save '\n' in field */
569 parse_add_char(self
, '\n');
571 else if (c
== dialect
->escapechar
) {
572 /* Possible escape character */
573 self
->state
= ESCAPE_IN_QUOTED_FIELD
;
575 else if (c
== dialect
->quotechar
) {
576 if (dialect
->doublequote
) {
577 /* doublequote; " represented by "" */
578 self
->state
= QUOTE_IN_QUOTED_FIELD
;
581 /* end of quote part of field */
582 self
->state
= IN_FIELD
;
586 /* normal character - save in field */
587 parse_add_char(self
, c
);
591 case ESCAPE_IN_QUOTED_FIELD
:
592 if (c
!= dialect
->escapechar
&&
593 c
!= dialect
->delimiter
&&
594 c
!= dialect
->quotechar
)
595 parse_add_char(self
, dialect
->escapechar
);
596 parse_add_char(self
, c
);
597 self
->state
= IN_QUOTED_FIELD
;
600 case QUOTE_IN_QUOTED_FIELD
:
601 /* doublequote - seen a quote in an quoted field */
602 if (dialect
->quoting
!= QUOTE_NONE
&&
603 c
== dialect
->quotechar
) {
605 parse_add_char(self
, c
);
606 self
->state
= IN_QUOTED_FIELD
;
608 else if (c
== dialect
->delimiter
) {
609 /* save field - wait for new field */
610 parse_save_field(self
);
611 self
->state
= START_FIELD
;
613 else if (c
== '\n') {
614 /* end of line - return [fields] */
615 parse_save_field(self
);
616 self
->state
= START_RECORD
;
618 else if (!dialect
->strict
) {
619 parse_add_char(self
, c
);
620 self
->state
= IN_FIELD
;
624 self
->had_parse_error
= 1;
625 PyErr_Format(error_obj
, "%c expected after %c",
637 #define R_OFF(x) offsetof(ReaderObj, x)
639 static struct PyMemberDef Reader_memberlist
[] = {
640 { "dialect", T_OBJECT
, R_OFF(dialect
), RO
},
645 Reader_getiter(ReaderObj
*self
)
648 return (PyObject
*)self
;
652 Reader_iternext(ReaderObj
*self
)
659 lineobj
= PyIter_Next(self
->input_iter
);
660 if (lineobj
== NULL
) {
661 /* End of input OR exception */
662 if (!PyErr_Occurred() && self
->field_len
!= 0)
663 return PyErr_Format(error_obj
,
664 "newline inside string");
668 if (self
->had_parse_error
) {
670 Py_XDECREF(self
->fields
);
672 self
->fields
= PyList_New(0);
674 self
->state
= START_RECORD
;
675 self
->had_parse_error
= 0;
677 line
= PyString_AsString(lineobj
);
683 if (strlen(line
) < (size_t)PyString_GET_SIZE(lineobj
)) {
684 self
->had_parse_error
= 1;
686 return PyErr_Format(error_obj
,
687 "string with NUL bytes");
690 /* Process line of text - send '\n' to processing code to
691 represent end of line. End of line which is not at end of
692 string is an error. */
700 /* macintosh end of line */
705 /* DOS end of line */
708 self
->had_parse_error
= 1;
710 return PyErr_Format(error_obj
,
711 "newline inside string");
716 /* unix end of line */
718 self
->had_parse_error
= 1;
720 return PyErr_Format(error_obj
,
721 "newline inside string");
723 parse_process_char(self
, c
);
724 if (PyErr_Occurred()) {
729 parse_process_char(self
, '\n');
731 } while (self
->state
!= START_RECORD
);
733 fields
= self
->fields
;
734 self
->fields
= PyList_New(0);
739 Reader_dealloc(ReaderObj
*self
)
741 Py_XDECREF(self
->dialect
);
742 Py_XDECREF(self
->input_iter
);
743 Py_XDECREF(self
->fields
);
744 if (self
->field
!= NULL
)
745 PyMem_Free(self
->field
);
746 PyObject_GC_Del(self
);
750 Reader_traverse(ReaderObj
*self
, visitproc visit
, void *arg
)
753 #define VISIT(SLOT) \
755 err = visit((PyObject *)(SLOT), arg); \
759 VISIT(self
->dialect
);
760 VISIT(self
->input_iter
);
766 Reader_clear(ReaderObj
*self
)
768 Py_XDECREF(self
->dialect
);
769 Py_XDECREF(self
->input_iter
);
770 Py_XDECREF(self
->fields
);
771 self
->dialect
= NULL
;
772 self
->input_iter
= NULL
;
777 PyDoc_STRVAR(Reader_Type_doc
,
780 "Reader objects are responsible for reading and parsing tabular data\n"
784 static struct PyMethodDef Reader_methods
[] = {
788 static PyTypeObject Reader_Type
= {
789 PyObject_HEAD_INIT(NULL
)
791 "_csv.reader", /*tp_name*/
792 sizeof(ReaderObj
), /*tp_basicsize*/
795 (destructor
)Reader_dealloc
, /*tp_dealloc*/
796 (printfunc
)0, /*tp_print*/
797 (getattrfunc
)0, /*tp_getattr*/
798 (setattrfunc
)0, /*tp_setattr*/
799 (cmpfunc
)0, /*tp_compare*/
800 (reprfunc
)0, /*tp_repr*/
802 0, /*tp_as_sequence*/
804 (hashfunc
)0, /*tp_hash*/
805 (ternaryfunc
)0, /*tp_call*/
806 (reprfunc
)0, /*tp_str*/
810 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
811 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
812 Reader_Type_doc
, /*tp_doc*/
813 (traverseproc
)Reader_traverse
, /*tp_traverse*/
814 (inquiry
)Reader_clear
, /*tp_clear*/
815 0, /*tp_richcompare*/
816 0, /*tp_weaklistoffset*/
817 (getiterfunc
)Reader_getiter
, /*tp_iter*/
818 (getiterfunc
)Reader_iternext
, /*tp_iternext*/
819 Reader_methods
, /*tp_methods*/
820 Reader_memberlist
, /*tp_members*/
826 csv_reader(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
828 PyObject
* iterator
, * dialect
= NULL
, *ctor_args
;
829 ReaderObj
* self
= PyObject_GC_New(ReaderObj
, &Reader_Type
);
834 self
->dialect
= NULL
;
835 self
->input_iter
= self
->fields
= NULL
;
838 self
->input_iter
= NULL
;
839 self
->had_parse_error
= 0;
841 self
->field_size
= 0;
843 self
->state
= START_RECORD
;
845 if (!PyArg_ParseTuple(args
, "O|O", &iterator
, &dialect
)) {
849 self
->input_iter
= PyObject_GetIter(iterator
);
850 if (self
->input_iter
== NULL
) {
851 PyErr_SetString(PyExc_TypeError
,
852 "argument 1 must be an iterator");
856 ctor_args
= Py_BuildValue(dialect
? "(O)" : "()", dialect
);
857 if (ctor_args
== NULL
) {
861 self
->dialect
= (DialectObj
*)PyObject_Call((PyObject
*)&Dialect_Type
,
862 ctor_args
, keyword_args
);
863 Py_DECREF(ctor_args
);
864 if (self
->dialect
== NULL
) {
868 self
->fields
= PyList_New(0);
869 if (self
->fields
== NULL
) {
874 return (PyObject
*)self
;
880 /* ---------------------------------------------------------------- */
882 join_reset(WriterObj
*self
)
885 self
->num_fields
= 0;
888 #define MEM_INCR 32768
890 /* Calculate new record length or append field to record. Return new
894 join_append_data(WriterObj
*self
, char *field
, int quote_empty
,
895 int *quoted
, int copy_phase
)
897 DialectObj
*dialect
= self
->dialect
;
900 rec_len
= self
->rec_len
;
902 /* If this is not the first field we need a field separator.
904 if (self
->num_fields
> 0) {
906 self
->rec
[rec_len
] = dialect
->delimiter
;
909 /* Handle preceding quote.
911 switch (dialect
->quoting
) {
915 self
->rec
[rec_len
] = dialect
->quotechar
;
919 case QUOTE_NONNUMERIC
:
920 /* We only know about quoted in the copy phase.
922 if (copy_phase
&& *quoted
) {
923 self
->rec
[rec_len
] = dialect
->quotechar
;
930 /* Copy/count field data.
937 /* If in doublequote mode we escape quote chars with a
940 if (dialect
->quoting
!= QUOTE_NONE
&&
941 c
== dialect
->quotechar
&& dialect
->doublequote
) {
943 self
->rec
[rec_len
] = dialect
->quotechar
;
948 /* Some special characters need to be escaped. If we have a
949 * quote character switch to quoted field instead of escaping
950 * individual characters.
953 && (c
== dialect
->delimiter
||
954 c
== dialect
->escapechar
||
955 c
== '\n' || c
== '\r')) {
956 if (dialect
->quoting
!= QUOTE_NONE
)
958 else if (dialect
->escapechar
) {
960 self
->rec
[rec_len
] = dialect
->escapechar
;
964 PyErr_Format(error_obj
,
965 "delimiter must be quoted or escaped");
969 /* Copy field character into record buffer.
972 self
->rec
[rec_len
] = c
;
976 /* If field is empty check if it needs to be quoted.
978 if (i
== 0 && quote_empty
) {
979 if (dialect
->quoting
== QUOTE_NONE
) {
980 PyErr_Format(error_obj
,
981 "single empty field record must be quoted");
987 /* Handle final quote character on field.
991 self
->rec
[rec_len
] = dialect
->quotechar
;
993 /* Didn't know about leading quote until we found it
994 * necessary in field data - compensate for it now.
1004 join_check_rec_size(WriterObj
*self
, int rec_len
)
1006 if (rec_len
> self
->rec_size
) {
1007 if (self
->rec_size
== 0) {
1008 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1009 if (self
->rec
!= NULL
)
1010 PyMem_Free(self
->rec
);
1011 self
->rec
= PyMem_Malloc(self
->rec_size
);
1014 char *old_rec
= self
->rec
;
1016 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1017 self
->rec
= PyMem_Realloc(self
->rec
, self
->rec_size
);
1018 if (self
->rec
== NULL
)
1019 PyMem_Free(old_rec
);
1021 if (self
->rec
== NULL
) {
1030 join_append(WriterObj
*self
, char *field
, int *quoted
, int quote_empty
)
1034 rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 0);
1038 /* grow record buffer if necessary */
1039 if (!join_check_rec_size(self
, rec_len
))
1042 self
->rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 1);
1049 join_append_lineterminator(WriterObj
*self
)
1053 terminator_len
= PyString_Size(self
->dialect
->lineterminator
);
1055 /* grow record buffer if necessary */
1056 if (!join_check_rec_size(self
, self
->rec_len
+ terminator_len
))
1059 memmove(self
->rec
+ self
->rec_len
,
1060 /* should not be NULL */
1061 PyString_AsString(self
->dialect
->lineterminator
),
1063 self
->rec_len
+= terminator_len
;
1068 PyDoc_STRVAR(csv_writerow_doc
,
1069 "writerow(sequence)\n"
1071 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1072 "elements will be converted to string.");
1075 csv_writerow(WriterObj
*self
, PyObject
*seq
)
1077 DialectObj
*dialect
= self
->dialect
;
1080 if (!PySequence_Check(seq
))
1081 return PyErr_Format(error_obj
, "sequence expected");
1083 len
= PySequence_Length(seq
);
1087 /* Join all fields in internal buffer.
1090 for (i
= 0; i
< len
; i
++) {
1095 field
= PySequence_GetItem(seq
, i
);
1100 if (dialect
->quoting
== QUOTE_NONNUMERIC
) {
1103 num
= PyNumber_Float(field
);
1113 if (PyString_Check(field
)) {
1114 append_ok
= join_append(self
,
1115 PyString_AS_STRING(field
),
1119 else if (field
== Py_None
) {
1120 append_ok
= join_append(self
, "", "ed
, len
== 1);
1126 str
= PyObject_Str(field
);
1131 append_ok
= join_append(self
, PyString_AS_STRING(str
),
1139 /* Add line terminator.
1141 if (!join_append_lineterminator(self
))
1144 return PyObject_CallFunction(self
->writeline
,
1145 "(s#)", self
->rec
, self
->rec_len
);
1148 PyDoc_STRVAR(csv_writerows_doc
,
1149 "writerows(sequence of sequences)\n"
1151 "Construct and write a series of sequences to a csv file. Non-string\n"
1152 "elements will be converted to string.");
1155 csv_writerows(WriterObj
*self
, PyObject
*seqseq
)
1157 PyObject
*row_iter
, *row_obj
, *result
;
1159 row_iter
= PyObject_GetIter(seqseq
);
1160 if (row_iter
== NULL
) {
1161 PyErr_SetString(PyExc_TypeError
,
1162 "writerows() argument must be iterable");
1165 while ((row_obj
= PyIter_Next(row_iter
))) {
1166 result
= csv_writerow(self
, row_obj
);
1169 Py_DECREF(row_iter
);
1175 Py_DECREF(row_iter
);
1176 if (PyErr_Occurred())
1182 static struct PyMethodDef Writer_methods
[] = {
1183 { "writerow", (PyCFunction
)csv_writerow
, METH_O
, csv_writerow_doc
},
1184 { "writerows", (PyCFunction
)csv_writerows
, METH_O
, csv_writerows_doc
},
1188 #define W_OFF(x) offsetof(WriterObj, x)
1190 static struct PyMemberDef Writer_memberlist
[] = {
1191 { "dialect", T_OBJECT
, W_OFF(dialect
), RO
},
1196 Writer_dealloc(WriterObj
*self
)
1198 Py_XDECREF(self
->dialect
);
1199 Py_XDECREF(self
->writeline
);
1200 if (self
->rec
!= NULL
)
1201 PyMem_Free(self
->rec
);
1202 PyObject_GC_Del(self
);
1206 Writer_traverse(WriterObj
*self
, visitproc visit
, void *arg
)
1209 #define VISIT(SLOT) \
1211 err = visit((PyObject *)(SLOT), arg); \
1215 VISIT(self
->dialect
);
1216 VISIT(self
->writeline
);
1221 Writer_clear(WriterObj
*self
)
1223 Py_XDECREF(self
->dialect
);
1224 Py_XDECREF(self
->writeline
);
1225 self
->dialect
= NULL
;
1226 self
->writeline
= NULL
;
1230 PyDoc_STRVAR(Writer_Type_doc
,
1233 "Writer objects are responsible for generating tabular data\n"
1234 "in CSV format from sequence input.\n"
1237 static PyTypeObject Writer_Type
= {
1238 PyObject_HEAD_INIT(NULL
)
1240 "_csv.writer", /*tp_name*/
1241 sizeof(WriterObj
), /*tp_basicsize*/
1244 (destructor
)Writer_dealloc
, /*tp_dealloc*/
1245 (printfunc
)0, /*tp_print*/
1246 (getattrfunc
)0, /*tp_getattr*/
1247 (setattrfunc
)0, /*tp_setattr*/
1248 (cmpfunc
)0, /*tp_compare*/
1249 (reprfunc
)0, /*tp_repr*/
1251 0, /*tp_as_sequence*/
1252 0, /*tp_as_mapping*/
1253 (hashfunc
)0, /*tp_hash*/
1254 (ternaryfunc
)0, /*tp_call*/
1255 (reprfunc
)0, /*tp_str*/
1259 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
1260 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1262 (traverseproc
)Writer_traverse
, /*tp_traverse*/
1263 (inquiry
)Writer_clear
, /*tp_clear*/
1264 0, /*tp_richcompare*/
1265 0, /*tp_weaklistoffset*/
1266 (getiterfunc
)0, /*tp_iter*/
1267 (getiterfunc
)0, /*tp_iternext*/
1268 Writer_methods
, /*tp_methods*/
1269 Writer_memberlist
, /*tp_members*/
1274 csv_writer(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
1276 PyObject
* output_file
, * dialect
= NULL
, *ctor_args
;
1277 WriterObj
* self
= PyObject_GC_New(WriterObj
, &Writer_Type
);
1282 self
->dialect
= NULL
;
1283 self
->writeline
= NULL
;
1288 self
->num_fields
= 0;
1290 if (!PyArg_ParseTuple(args
, "O|O", &output_file
, &dialect
)) {
1294 self
->writeline
= PyObject_GetAttrString(output_file
, "write");
1295 if (self
->writeline
== NULL
|| !PyCallable_Check(self
->writeline
)) {
1296 PyErr_SetString(PyExc_TypeError
,
1297 "argument 1 must be an instance with a write method");
1301 ctor_args
= Py_BuildValue(dialect
? "(O)" : "()", dialect
);
1302 if (ctor_args
== NULL
) {
1306 self
->dialect
= (DialectObj
*)PyObject_Call((PyObject
*)&Dialect_Type
,
1307 ctor_args
, keyword_args
);
1308 Py_DECREF(ctor_args
);
1309 if (self
->dialect
== NULL
) {
1313 return (PyObject
*)self
;
1320 csv_list_dialects(PyObject
*module
, PyObject
*args
)
1322 return PyDict_Keys(dialects
);
1326 csv_register_dialect(PyObject
*module
, PyObject
*args
)
1328 PyObject
*name_obj
, *dialect_obj
;
1330 if (!PyArg_ParseTuple(args
, "OO", &name_obj
, &dialect_obj
))
1332 if (!PyString_Check(name_obj
)
1333 #ifdef Py_USING_UNICODE
1334 && !PyUnicode_Check(name_obj
)
1337 PyErr_SetString(PyExc_TypeError
,
1338 "dialect name must be a string or unicode");
1341 Py_INCREF(dialect_obj
);
1342 /* A class rather than an instance? Instanciate */
1343 if (PyObject_TypeCheck(dialect_obj
, &PyClass_Type
)) {
1345 new_dia
= PyObject_CallFunction(dialect_obj
, "");
1346 Py_DECREF(dialect_obj
);
1347 if (new_dia
== NULL
)
1349 dialect_obj
= new_dia
;
1351 /* Make sure we finally have an instance */
1352 if (!PyInstance_Check(dialect_obj
)) {
1353 PyErr_SetString(PyExc_TypeError
, "dialect must be an instance");
1354 Py_DECREF(dialect_obj
);
1357 if (PyObject_SetAttrString(dialect_obj
, "_name", name_obj
) < 0) {
1358 Py_DECREF(dialect_obj
);
1361 if (PyDict_SetItem(dialects
, name_obj
, dialect_obj
) < 0) {
1362 Py_DECREF(dialect_obj
);
1365 Py_DECREF(dialect_obj
);
1371 csv_unregister_dialect(PyObject
*module
, PyObject
*name_obj
)
1373 if (PyDict_DelItem(dialects
, name_obj
) < 0)
1374 return PyErr_Format(error_obj
, "unknown dialect");
1380 csv_get_dialect(PyObject
*module
, PyObject
*name_obj
)
1382 return get_dialect_from_registry(name_obj
);
1389 PyDoc_STRVAR(csv_module_doc
,
1390 "CSV parsing and writing.\n"
1392 "This module provides classes that assist in the reading and writing\n"
1393 "of Comma Separated Value (CSV) files, and implements the interface\n"
1394 "described by PEP 305. Although many CSV files are simple to parse,\n"
1395 "the format is not formally defined by a stable specification and\n"
1396 "is subtle enough that parsing lines of a CSV file with something\n"
1397 "like line.split(\",\") is bound to fail. The module supports three\n"
1398 "basic APIs: reading, writing, and registration of dialects.\n"
1401 "DIALECT REGISTRATION:\n"
1403 "Readers and writers support a dialect argument, which is a convenient\n"
1404 "handle on a group of settings. When the dialect argument is a string,\n"
1405 "it identifies one of the dialects previously registered with the module.\n"
1406 "If it is a class or instance, the attributes of the argument are used as\n"
1407 "the settings for the reader or writer:\n"
1410 " delimiter = ','\n"
1411 " quotechar = '\"'\n"
1412 " escapechar = None\n"
1413 " doublequote = True\n"
1414 " skipinitialspace = False\n"
1415 " lineterminator = '\r\n'\n"
1416 " quoting = QUOTE_MINIMAL\n"
1420 " * quotechar - specifies a one-character string to use as the \n"
1421 " quoting character. It defaults to '\"'.\n"
1422 " * delimiter - specifies a one-character string to use as the \n"
1423 " field separator. It defaults to ','.\n"
1424 " * skipinitialspace - specifies how to interpret whitespace which\n"
1425 " immediately follows a delimiter. It defaults to False, which\n"
1426 " means that whitespace immediately following a delimiter is part\n"
1427 " of the following field.\n"
1428 " * lineterminator - specifies the character sequence which should \n"
1429 " terminate rows.\n"
1430 " * quoting - controls when quotes should be generated by the writer.\n"
1431 " It can take on any of the following module constants:\n"
1433 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1434 " field contains either the quotechar or the delimiter\n"
1435 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1436 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1437 " fields which do not parse as integers or floating point\n"
1439 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1440 " * escapechar - specifies a one-character string used to escape \n"
1441 " the delimiter when quoting is set to QUOTE_NONE.\n"
1442 " * doublequote - controls the handling of quotes inside fields. When\n"
1443 " True, two consecutive quotes are interpreted as one during read,\n"
1444 " and when writing, each quote character embedded in the data is\n"
1445 " written as two quotes\n");
1447 PyDoc_STRVAR(csv_reader_doc
,
1448 " csv_reader = reader(iterable [, dialect='excel']\n"
1449 " [optional keyword args])\n"
1450 " for row in csv_reader:\n"
1453 "The \"iterable\" argument can be any object that returns a line\n"
1454 "of input for each iteration, such as a file object or a list. The\n"
1455 "optional \"dialect\" parameter is discussed below. The function\n"
1456 "also accepts optional keyword arguments which override settings\n"
1457 "provided by the dialect.\n"
1459 "The returned object is an iterator. Each iteration returns a row\n"
1460 "of the CSV file (which can span multiple input lines):\n");
1462 PyDoc_STRVAR(csv_writer_doc
,
1463 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1464 " [optional keyword args])\n"
1465 " for row in csv_writer:\n"
1466 " csv_writer.writerow(row)\n"
1470 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1471 " [optional keyword args])\n"
1472 " csv_writer.writerows(rows)\n"
1474 "The \"fileobj\" argument can be any object that supports the file API.\n");
1476 PyDoc_STRVAR(csv_list_dialects_doc
,
1477 "Return a list of all know dialect names.\n"
1478 " names = csv.list_dialects()");
1480 PyDoc_STRVAR(csv_get_dialect_doc
,
1481 "Return the dialect instance associated with name.\n"
1482 " dialect = csv.get_dialect(name)");
1484 PyDoc_STRVAR(csv_register_dialect_doc
,
1485 "Create a mapping from a string name to a dialect class.\n"
1486 " dialect = csv.register_dialect(name, dialect)");
1488 PyDoc_STRVAR(csv_unregister_dialect_doc
,
1489 "Delete the name/dialect mapping associated with a string name.\n"
1490 " csv.unregister_dialect(name)");
1492 static struct PyMethodDef csv_methods
[] = {
1493 { "reader", (PyCFunction
)csv_reader
,
1494 METH_VARARGS
| METH_KEYWORDS
, csv_reader_doc
},
1495 { "writer", (PyCFunction
)csv_writer
,
1496 METH_VARARGS
| METH_KEYWORDS
, csv_writer_doc
},
1497 { "list_dialects", (PyCFunction
)csv_list_dialects
,
1498 METH_NOARGS
, csv_list_dialects_doc
},
1499 { "register_dialect", (PyCFunction
)csv_register_dialect
,
1500 METH_VARARGS
, csv_register_dialect_doc
},
1501 { "unregister_dialect", (PyCFunction
)csv_unregister_dialect
,
1502 METH_O
, csv_unregister_dialect_doc
},
1503 { "get_dialect", (PyCFunction
)csv_get_dialect
,
1504 METH_O
, csv_get_dialect_doc
},
1514 if (PyType_Ready(&Dialect_Type
) < 0)
1517 if (PyType_Ready(&Reader_Type
) < 0)
1520 if (PyType_Ready(&Writer_Type
) < 0)
1523 /* Create the module and add the functions */
1524 module
= Py_InitModule3("_csv", csv_methods
, csv_module_doc
);
1528 /* Add version to the module. */
1529 if (PyModule_AddStringConstant(module
, "__version__",
1530 MODULE_VERSION
) == -1)
1533 /* Add _dialects dictionary */
1534 dialects
= PyDict_New();
1535 if (dialects
== NULL
)
1537 if (PyModule_AddObject(module
, "_dialects", dialects
))
1540 /* Add quote styles into dictionary */
1541 for (style
= quote_styles
; style
->name
; style
++) {
1542 if (PyModule_AddIntConstant(module
, style
->name
,
1543 style
->style
) == -1)
1547 /* Add the Dialect type */
1548 if (PyModule_AddObject(module
, "Dialect", (PyObject
*)&Dialect_Type
))
1551 /* Add the CSV exception object to the module. */
1552 error_obj
= PyErr_NewException("_csv.Error", NULL
, NULL
);
1553 if (error_obj
== NULL
)
1555 PyModule_AddObject(module
, "Error", error_obj
);