Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40 /* end 2.2 compatibility macros */
  41
  42 static PyObject *error_obj;     /* CSV exception */
  43 static PyObject *dialects;      /* Dialect registry */
  44
  45 typedef enum {
  46         START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  47         IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
  48 } ParserState;
  49
  50 typedef enum {
  51         QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  52 } QuoteStyle;
  53
  54 typedef struct {
  55         QuoteStyle style;
  56         char *name;
  57 } StyleDesc;
  58
  59 static StyleDesc quote_styles[] = {
  60         { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  61         { QUOTE_ALL,        "QUOTE_ALL" },
  62         { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  63         { QUOTE_NONE,       "QUOTE_NONE" },
  64         { 0 }
  65 };
  66
  67 typedef struct {
  68         PyObject_HEAD
  69
  70         int doublequote;        /* is " represented by ""? */
  71         char delimiter;         /* field separator */
  72         char quotechar;         /* quote character */
  73         char escapechar;        /* escape character */
  74         int skipinitialspace;   /* ignore spaces following delimiter? */
  75         PyObject *lineterminator; /* string to write between records */
  76         QuoteStyle quoting;     /* style of quoting to write */
  77
  78         int strict;             /* raise exception on bad CSV */
  79 } DialectObj;
  80
  81 staticforward PyTypeObject Dialect_Type;
  82
  83 typedef struct {
  84         PyObject_HEAD
  85
  86         PyObject *input_iter;   /* iterate over this for input lines */
  87
  88         DialectObj *dialect;    /* parsing dialect */
  89
  90         PyObject *fields;       /* field list for current record */
  91         ParserState state;      /* current CSV parse state */
  92         char *field;            /* build current field in here */
  93         int field_size;         /* size of allocated buffer */
  94         int field_len;          /* length of current field */
  95         int had_parse_error;    /* did we have a parse error? */
  96 } ReaderObj;
  97
  98 staticforward PyTypeObject Reader_Type;
  99
 100 #define ReaderObject_Check(v)   ((v)->ob_type == &Reader_Type)
 101
 102 typedef struct {
 103         PyObject_HEAD
 104
 105         PyObject *writeline;    /* write output lines to this file */
 106
 107         DialectObj *dialect;    /* parsing dialect */
 108
 109         char *rec;              /* buffer for parser.join */
 110         int rec_size;           /* size of allocated record */
 111         int rec_len;            /* length of record */
 112         int num_fields;         /* number of fields in record */
 113 } WriterObj;
 114
 115 staticforward PyTypeObject Writer_Type;
 116
 117 /*
 118  * DIALECT class
 119  */
 120
 121 static PyObject *
 122 get_dialect_from_registry(PyObject * name_obj)
 123 {
 124         PyObject *dialect_obj;
 125
 126         dialect_obj = PyDict_GetItem(dialects, name_obj);
 127         if (dialect_obj == NULL)
 128             return PyErr_Format(error_obj, "unknown dialect");
 129         Py_INCREF(dialect_obj);
 130         return dialect_obj;
 131 }
 132
 133 static int
 134 check_delattr(PyObject *v)
 135 {
 136         if (v == NULL) {
 137                 PyErr_SetString(PyExc_TypeError,
 138                                 "Cannot delete attribute");
 139                 return -1;
 140         }
 141         return 0;
 142 }
 143
 144 static PyObject *
 145 get_string(PyObject *str)
 146 {
 147         Py_XINCREF(str);
 148         return str;
 149 }
 150
 151 static int
 152 set_string(PyObject **str, PyObject *v)
 153 {
 154         if (check_delattr(v) < 0)
 155                 return -1;
 156         if (!PyString_Check(v)
 157 #ifdef Py_USING_UNICODE
 158 && !PyUnicode_Check(v)
 159 #endif
 160 ) {
 161                 PyErr_BadArgument();
 162                 return -1;
 163         }
 164         Py_XDECREF(*str);
 165         Py_INCREF(v);
 166         *str = v;
 167         return 0;
 168 }
 169
 170 static PyObject *
 171 get_nullchar_as_None(char c)
 172 {
 173         if (c == '\0') {
 174                 Py_INCREF(Py_None);
 175                 return Py_None;
 176         }
 177         else
 178                 return PyString_FromStringAndSize((char*)&c, 1);
 179 }
 180
 181 static int
 182 set_None_as_nullchar(char * addr, PyObject *v)
 183 {
 184         if (check_delattr(v) < 0)
 185                 return -1;
 186         if (v == Py_None)
 187                 *addr = '\0';
 188         else if (!PyString_Check(v) || PyString_Size(v) != 1) {
 189                 PyErr_BadArgument();
 190                 return -1;
 191         }
 192         else {
 193                 char *s = PyString_AsString(v);
 194                 if (s == NULL)
 195                         return -1;
 196                 *addr = s[0];
 197         }
 198         return 0;
 199 }
 200
 201 static PyObject *
 202 Dialect_get_lineterminator(DialectObj *self)
 203 {
 204         return get_string(self->lineterminator);
 205 }
 206
 207 static int
 208 Dialect_set_lineterminator(DialectObj *self, PyObject *value)
 209 {
 210         return set_string(&self->lineterminator, value);
 211 }
 212
 213 static PyObject *
 214 Dialect_get_escapechar(DialectObj *self)
 215 {
 216         return get_nullchar_as_None(self->escapechar);
 217 }
 218
 219 static int
 220 Dialect_set_escapechar(DialectObj *self, PyObject *value)
 221 {
 222         return set_None_as_nullchar(&self->escapechar, value);
 223 }
 224
 225 static PyObject *
 226 Dialect_get_quoting(DialectObj *self)
 227 {
 228         return PyInt_FromLong(self->quoting);
 229 }
 230
 231 static int
 232 Dialect_set_quoting(DialectObj *self, PyObject *v)
 233 {
 234         int quoting;
 235         StyleDesc *qs = quote_styles;
 236
 237         if (check_delattr(v) < 0)
 238                 return -1;
 239         if (!PyInt_Check(v)) {
 240                 PyErr_BadArgument();
 241                 return -1;
 242         }
 243         quoting = PyInt_AsLong(v);
 244         for (qs = quote_styles; qs->name; qs++) {
 245                 if (qs->style == quoting) {
 246                         self->quoting = quoting;
 247                         return 0;
 248                 }
 249         }
 250         PyErr_BadArgument();
 251         return -1;
 252 }
 253
 254 static struct PyMethodDef Dialect_methods[] = {
 255         { NULL, NULL }
 256 };
 257
 258 #define D_OFF(x) offsetof(DialectObj, x)
 259
 260 static struct PyMemberDef Dialect_memberlist[] = {
 261         { "quotechar",          T_CHAR, D_OFF(quotechar) },
 262         { "delimiter",          T_CHAR, D_OFF(delimiter) },
 263         { "skipinitialspace",   T_INT, D_OFF(skipinitialspace) },
 264         { "doublequote",        T_INT, D_OFF(doublequote) },
 265         { "strict",             T_INT, D_OFF(strict) },
 266         { NULL }
 267 };
 268
 269 static PyGetSetDef Dialect_getsetlist[] = {
 270         { "escapechar", (getter)Dialect_get_escapechar,
 271                 (setter)Dialect_set_escapechar },
 272         { "lineterminator", (getter)Dialect_get_lineterminator,
 273                 (setter)Dialect_set_lineterminator },
 274         { "quoting", (getter)Dialect_get_quoting,
 275                 (setter)Dialect_set_quoting },
 276         {NULL},
 277 };
 278
 279 static void
 280 Dialect_dealloc(DialectObj *self)
 281 {
 282         Py_XDECREF(self->lineterminator);
 283         self->ob_type->tp_free((PyObject *)self);
 284 }
 285
 286 static int
 287 dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
 288 {
 289         PyObject *dialect = NULL, *name_obj, *value_obj;
 290
 291         self->quotechar = '"';
 292         self->delimiter = ',';
 293         self->escapechar = '\0';
 294         self->skipinitialspace = 0;
 295         Py_XDECREF(self->lineterminator);
 296         self->lineterminator = PyString_FromString("\r\n");
 297         if (self->lineterminator == NULL)
 298                 return -1;
 299         self->quoting = QUOTE_MINIMAL;
 300         self->doublequote = 1;
 301         self->strict = 0;
 302
 303         if (!PyArg_ParseTuple(args, "|O", &dialect))
 304                 return -1;
 305         Py_XINCREF(dialect);
 306         if (kwargs != NULL) {
 307                 PyObject * key = PyString_FromString("dialect");
 308                 PyObject * d;
 309
 310                 d = PyDict_GetItem(kwargs, key);
 311                 if (d) {
 312                         Py_INCREF(d);
 313                         Py_XDECREF(dialect);
 314                         PyDict_DelItem(kwargs, key);
 315                         dialect = d;
 316                 }
 317                 Py_DECREF(key);
 318         }
 319         if (dialect != NULL) {
 320                 int i;
 321                 PyObject * dir_list;
 322
 323                 /* If dialect is a string, look it up in our registry */
 324                 if (PyString_Check(dialect)
 325 #ifdef Py_USING_UNICODE
 326                     || PyUnicode_Check(dialect)
 327 #endif
 328                         ) {
 329                         PyObject * new_dia;
 330                         new_dia = get_dialect_from_registry(dialect);
 331                         Py_DECREF(dialect);
 332                         if (new_dia == NULL)
 333                                 return -1;
 334                         dialect = new_dia;
 335                 }
 336                 /* A class rather than an instance? Instantiate */
 337                 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
 338                         PyObject * new_dia;
 339                         new_dia = PyObject_CallFunction(dialect, "");
 340                         Py_DECREF(dialect);
 341                         if (new_dia == NULL)
 342                                 return -1;
 343                         dialect = new_dia;
 344                 }
 345                 /* Make sure we finally have an instance */
 346                 if (!PyInstance_Check(dialect) ||
 347                     (dir_list = PyObject_Dir(dialect)) == NULL) {
 348                         PyErr_SetString(PyExc_TypeError,
 349                                         "dialect must be an instance");
 350                         Py_DECREF(dialect);
 351                         return -1;
 352                 }
 353                 /* And extract the attributes */
 354                 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
 355                         char *s;
 356                         name_obj = PyList_GET_ITEM(dir_list, i);
 357                         s = PyString_AsString(name_obj);
 358                         if (s == NULL)
 359                                 return -1;
 360                         if (s[0] == '_')
 361                                 continue;
 362                         value_obj = PyObject_GetAttr(dialect, name_obj);
 363                         if (value_obj) {
 364                                 if (PyObject_SetAttr((PyObject *)self,
 365                                                      name_obj, value_obj)) {
 366                                         Py_DECREF(value_obj);
 367                                         Py_DECREF(dir_list);
 368                                         Py_DECREF(dialect);
 369                                         return -1;
 370                                 }
 371                                 Py_DECREF(value_obj);
 372                         }
 373                 }
 374                 Py_DECREF(dir_list);
 375                 Py_DECREF(dialect);
 376         }
 377         if (kwargs != NULL) {
 378                 int pos = 0;
 379
 380                 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
 381                         if (PyObject_SetAttr((PyObject *)self,
 382                                              name_obj, value_obj))
 383                                 return -1;
 384                 }
 385         }
 386         return 0;
 387 }
 388
 389 static PyObject *
 390 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 391 {
 392         DialectObj *self;
 393         self = (DialectObj *)type->tp_alloc(type, 0);
 394         if (self != NULL) {
 395                 self->lineterminator = NULL;
 396         }
 397         return (PyObject *)self;
 398 }
 399
 400
 401 PyDoc_STRVAR(Dialect_Type_doc,
 402 "CSV dialect\n"
 403 "\n"
 404 "The Dialect type records CSV parsing and generation options.\n");
 405
 406 static PyTypeObject Dialect_Type = {
 407         PyObject_HEAD_INIT(NULL)
 408         0,                                      /* ob_size */
 409         "_csv.Dialect",                         /* tp_name */
 410         sizeof(DialectObj),                     /* tp_basicsize */
 411         0,                                      /* tp_itemsize */
 412         /*  methods  */
 413         (destructor)Dialect_dealloc,            /* tp_dealloc */
 414         (printfunc)0,                           /* tp_print */
 415         (getattrfunc)0,                         /* tp_getattr */
 416         (setattrfunc)0,                         /* tp_setattr */
 417         (cmpfunc)0,                             /* tp_compare */
 418         (reprfunc)0,                            /* tp_repr */
 419         0,                                      /* tp_as_number */
 420         0,                                      /* tp_as_sequence */
 421         0,                                      /* tp_as_mapping */
 422         (hashfunc)0,                            /* tp_hash */
 423         (ternaryfunc)0,                         /* tp_call */
 424         (reprfunc)0,                            /* tp_str */
 425         0,                                      /* tp_getattro */
 426         0,                                      /* tp_setattro */
 427         0,                                      /* tp_as_buffer */
 428         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 429         Dialect_Type_doc,                       /* tp_doc */
 430         0,                                      /* tp_traverse */
 431         0,                                      /* tp_clear */
 432         0,                                      /* tp_richcompare */
 433         0,                                      /* tp_weaklistoffset */
 434         0,                                      /* tp_iter */
 435         0,                                      /* tp_iternext */
 436         Dialect_methods,                        /* tp_methods */
 437         Dialect_memberlist,                     /* tp_members */
 438         Dialect_getsetlist,                     /* tp_getset */
 439         0,                                      /* tp_base */
 440         0,                                      /* tp_dict */
 441         0,                                      /* tp_descr_get */
 442         0,                                      /* tp_descr_set */
 443         0,                                      /* tp_dictoffset */
 444         (initproc)dialect_init,                 /* tp_init */
 445         PyType_GenericAlloc,                    /* tp_alloc */
 446         dialect_new,                            /* tp_new */
 447         0,                                      /* tp_free */
 448 };
 449
 450 static void
 451 parse_save_field(ReaderObj *self)
 452 {
 453         PyObject *field;
 454
 455         field = PyString_FromStringAndSize(self->field, self->field_len);
 456         if (field != NULL) {
 457                 PyList_Append(self->fields, field);
 458                 Py_XDECREF(field);
 459         }
 460         self->field_len = 0;
 461 }
 462
 463 static int
 464 parse_grow_buff(ReaderObj *self)
 465 {
 466         if (self->field_size == 0) {
 467                 self->field_size = 4096;
 468                 if (self->field != NULL)
 469                         PyMem_Free(self->field);
 470                 self->field = PyMem_Malloc(self->field_size);
 471         }
 472         else {
 473                 self->field_size *= 2;
 474                 self->field = PyMem_Realloc(self->field, self->field_size);
 475         }
 476         if (self->field == NULL) {
 477                 PyErr_NoMemory();
 478                 return 0;
 479         }
 480         return 1;
 481 }
 482
 483 static void
 484 parse_add_char(ReaderObj *self, char c)
 485 {
 486         if (self->field_len == self->field_size && !parse_grow_buff(self))
 487                 return;
 488         self->field[self->field_len++] = c;
 489 }
 490
 491 static void
 492 parse_process_char(ReaderObj *self, char c)
 493 {
 494         DialectObj *dialect = self->dialect;
 495
 496         switch (self->state) {
 497         case START_RECORD:
 498                 /* start of record */
 499                 if (c == '\n')
 500                         /* empty line - return [] */
 501                         break;
 502                 /* normal character - handle as START_FIELD */
 503                 self->state = START_FIELD;
 504                 /* fallthru */
 505         case START_FIELD:
 506                 /* expecting field */
 507                 if (c == '\n') {
 508                         /* save empty field - return [fields] */
 509                         parse_save_field(self);
 510                         self->state = START_RECORD;
 511                 }
 512                 else if (c == dialect->quotechar) {
 513                         /* start quoted field */
 514                         self->state = IN_QUOTED_FIELD;
 515                 }
 516                 else if (c == dialect->escapechar) {
 517                         /* possible escaped character */
 518                         self->state = ESCAPED_CHAR;
 519                 }
 520                 else if (c == ' ' && dialect->skipinitialspace)
 521                         /* ignore space at start of field */
 522                         ;
 523                 else if (c == dialect->delimiter) {
 524                         /* save empty field */
 525                         parse_save_field(self);
 526                 }
 527                 else {
 528                         /* begin new unquoted field */
 529                         parse_add_char(self, c);
 530                         self->state = IN_FIELD;
 531                 }
 532                 break;
 533
 534         case ESCAPED_CHAR:
 535                 if (c != dialect->escapechar &&
 536                     c != dialect->delimiter &&
 537                     c != dialect->quotechar)
 538                         parse_add_char(self, dialect->escapechar);
 539                 parse_add_char(self, c);
 540                 self->state = IN_FIELD;
 541                 break;
 542
 543         case IN_FIELD:
 544                 /* in unquoted field */
 545                 if (c == '\n') {
 546                         /* end of line - return [fields] */
 547                         parse_save_field(self);
 548                         self->state = START_RECORD;
 549                 }
 550                 else if (c == dialect->escapechar) {
 551                         /* possible escaped character */
 552                         self->state = ESCAPED_CHAR;
 553                 }
 554                 else if (c == dialect->delimiter) {
 555                         /* save field - wait for new field */
 556                         parse_save_field(self);
 557                         self->state = START_FIELD;
 558                 }
 559                 else {
 560                         /* normal character - save in field */
 561                         parse_add_char(self, c);
 562                 }
 563                 break;
 564
 565         case IN_QUOTED_FIELD:
 566                 /* in quoted field */
 567                 if (c == '\n') {
 568                         /* end of line - save '\n' in field */
 569                         parse_add_char(self, '\n');
 570                 }
 571                 else if (c == dialect->escapechar) {
 572                         /* Possible escape character */
 573                         self->state = ESCAPE_IN_QUOTED_FIELD;
 574                 }
 575                 else if (c == dialect->quotechar) {
 576                         if (dialect->doublequote) {
 577                                 /* doublequote; " represented by "" */
 578                                 self->state = QUOTE_IN_QUOTED_FIELD;
 579                         }
 580                         else {
 581                                 /* end of quote part of field */
 582                                 self->state = IN_FIELD;
 583                         }
 584                 }
 585                 else {
 586                         /* normal character - save in field */
 587                         parse_add_char(self, c);
 588                 }
 589                 break;
 590
 591         case ESCAPE_IN_QUOTED_FIELD:
 592                 if (c != dialect->escapechar &&
 593                     c != dialect->delimiter &&
 594                     c != dialect->quotechar)
 595                         parse_add_char(self, dialect->escapechar);
 596                 parse_add_char(self, c);
 597                 self->state = IN_QUOTED_FIELD;
 598                 break;
 599
 600         case QUOTE_IN_QUOTED_FIELD:
 601                 /* doublequote - seen a quote in an quoted field */
 602                 if (dialect->quoting != QUOTE_NONE &&
 603                     c == dialect->quotechar) {
 604                         /* save "" as " */
 605                         parse_add_char(self, c);
 606                         self->state = IN_QUOTED_FIELD;
 607                 }
 608                 else if (c == dialect->delimiter) {
 609                         /* save field - wait for new field */
 610                         parse_save_field(self);
 611                         self->state = START_FIELD;
 612                 }
 613                 else if (c == '\n') {
 614                         /* end of line - return [fields] */
 615                         parse_save_field(self);
 616                         self->state = START_RECORD;
 617                 }
 618                 else if (!dialect->strict) {
 619                         parse_add_char(self, c);
 620                         self->state = IN_FIELD;
 621                 }
 622                 else {
 623                         /* illegal */
 624                         self->had_parse_error = 1;
 625                         PyErr_Format(error_obj, "%c expected after %c",
 626                                         dialect->delimiter,
 627                                         dialect->quotechar);
 628                 }
 629                 break;
 630
 631         }
 632 }
 633
 634 /*
 635  * READER
 636  */
 637 #define R_OFF(x) offsetof(ReaderObj, x)
 638
 639 static struct PyMemberDef Reader_memberlist[] = {
 640         { "dialect", T_OBJECT, R_OFF(dialect), RO },
 641         { NULL }
 642 };
 643
 644 static PyObject *
 645 Reader_getiter(ReaderObj *self)
 646 {
 647         Py_INCREF(self);
 648         return (PyObject *)self;
 649 }
 650
 651 static PyObject *
 652 Reader_iternext(ReaderObj *self)
 653 {
 654         PyObject *lineobj;
 655         PyObject *fields;
 656         char *line;
 657
 658         do {
 659                 lineobj = PyIter_Next(self->input_iter);
 660                 if (lineobj == NULL) {
 661                         /* End of input OR exception */
 662                         if (!PyErr_Occurred() && self->field_len != 0)
 663                                 return PyErr_Format(error_obj,
 664                                                     "newline inside string");
 665                         return NULL;
 666                 }
 667
 668                 if (self->had_parse_error) {
 669                         if (self->fields) {
 670                                 Py_XDECREF(self->fields);
 671                         }
 672                         self->fields = PyList_New(0);
 673                         self->field_len = 0;
 674                         self->state = START_RECORD;
 675                         self->had_parse_error = 0;
 676                 }
 677                 line = PyString_AsString(lineobj);
 678
 679                 if (line == NULL) {
 680                         Py_DECREF(lineobj);
 681                         return NULL;
 682                 }
 683                 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
 684                         self->had_parse_error = 1;
 685                         Py_DECREF(lineobj);
 686                         return PyErr_Format(error_obj,
 687                                             "string with NUL bytes");
 688                 }
 689
 690                 /* Process line of text - send '\n' to processing code to
 691                 represent end of line.  End of line which is not at end of
 692                 string is an error. */
 693                 while (*line) {
 694                         char c;
 695
 696                         c = *line++;
 697                         if (c == '\r') {
 698                                 c = *line++;
 699                                 if (c == '\0')
 700                                         /* macintosh end of line */
 701                                         break;
 702                                 if (c == '\n') {
 703                                         c = *line++;
 704                                         if (c == '\0')
 705                                                 /* DOS end of line */
 706                                                 break;
 707                                 }
 708                                 self->had_parse_error = 1;
 709                                 Py_DECREF(lineobj);
 710                                 return PyErr_Format(error_obj,
 711                                                     "newline inside string");
 712                         }
 713                         if (c == '\n') {
 714                                 c = *line++;
 715                                 if (c == '\0')
 716                                         /* unix end of line */
 717                                         break;
 718                                 self->had_parse_error = 1;
 719                                 Py_DECREF(lineobj);
 720                                 return PyErr_Format(error_obj,
 721                                                     "newline inside string");
 722                         }
 723                         parse_process_char(self, c);
 724                         if (PyErr_Occurred()) {
 725                                 Py_DECREF(lineobj);
 726                                 return NULL;
 727                         }
 728                 }
 729                 parse_process_char(self, '\n');
 730                 Py_DECREF(lineobj);
 731         } while (self->state != START_RECORD);
 732
 733         fields = self->fields;
 734         self->fields = PyList_New(0);
 735         return fields;
 736 }
 737
 738 static void
 739 Reader_dealloc(ReaderObj *self)
 740 {
 741         Py_XDECREF(self->dialect);
 742         Py_XDECREF(self->input_iter);
 743         Py_XDECREF(self->fields);
 744         if (self->field != NULL)
 745                 PyMem_Free(self->field);
 746         PyObject_GC_Del(self);
 747 }
 748
 749 static int
 750 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 751 {
 752         int err;
 753 #define VISIT(SLOT) \
 754         if (SLOT) { \
 755                 err = visit((PyObject *)(SLOT), arg); \
 756                 if (err) \
 757                         return err; \
 758         }
 759         VISIT(self->dialect);
 760         VISIT(self->input_iter);
 761         VISIT(self->fields);
 762         return 0;
 763 }
 764
 765 static int
 766 Reader_clear(ReaderObj *self)
 767 {
 768         Py_XDECREF(self->dialect);
 769         Py_XDECREF(self->input_iter);
 770         Py_XDECREF(self->fields);
 771         self->dialect = NULL;
 772         self->input_iter = NULL;
 773         self->fields = NULL;
 774         return 0;
 775 }
 776
 777 PyDoc_STRVAR(Reader_Type_doc,
 778 "CSV reader\n"
 779 "\n"
 780 "Reader objects are responsible for reading and parsing tabular data\n"
 781 "in CSV format.\n"
 782 );
 783
 784 static struct PyMethodDef Reader_methods[] = {
 785         { NULL, NULL }
 786 };
 787
 788 static PyTypeObject Reader_Type = {
 789         PyObject_HEAD_INIT(NULL)
 790         0,                                      /*ob_size*/
 791         "_csv.reader",                          /*tp_name*/
 792         sizeof(ReaderObj),                      /*tp_basicsize*/
 793         0,                                      /*tp_itemsize*/
 794         /* methods */
 795         (destructor)Reader_dealloc,             /*tp_dealloc*/
 796         (printfunc)0,                           /*tp_print*/
 797         (getattrfunc)0,                         /*tp_getattr*/
 798         (setattrfunc)0,                         /*tp_setattr*/
 799         (cmpfunc)0,                             /*tp_compare*/
 800         (reprfunc)0,                            /*tp_repr*/
 801         0,                                      /*tp_as_number*/
 802         0,                                      /*tp_as_sequence*/
 803         0,                                      /*tp_as_mapping*/
 804         (hashfunc)0,                            /*tp_hash*/
 805         (ternaryfunc)0,                         /*tp_call*/
 806         (reprfunc)0,                            /*tp_str*/
 807         0,                                      /*tp_getattro*/
 808         0,                                      /*tp_setattro*/
 809         0,                                      /*tp_as_buffer*/
 810         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 811                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
 812         Reader_Type_doc,                        /*tp_doc*/
 813         (traverseproc)Reader_traverse,          /*tp_traverse*/
 814         (inquiry)Reader_clear,                  /*tp_clear*/
 815         0,                                      /*tp_richcompare*/
 816         0,                                      /*tp_weaklistoffset*/
 817         (getiterfunc)Reader_getiter,            /*tp_iter*/
 818         (getiterfunc)Reader_iternext,           /*tp_iternext*/
 819         Reader_methods,                         /*tp_methods*/
 820         Reader_memberlist,                      /*tp_members*/
 821         0,                                      /*tp_getset*/
 822
 823 };
 824
 825 static PyObject *
 826 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 827 {
 828         PyObject * iterator, * dialect = NULL, *ctor_args;
 829         ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 830
 831         if (!self)
 832                 return NULL;
 833
 834         self->dialect = NULL;
 835         self->input_iter = self->fields = NULL;
 836
 837         self->fields = NULL;
 838         self->input_iter = NULL;
 839         self->had_parse_error = 0;
 840         self->field = NULL;
 841         self->field_size = 0;
 842         self->field_len = 0;
 843         self->state = START_RECORD;
 844
 845         if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
 846                 Py_DECREF(self);
 847                 return NULL;
 848         }
 849         self->input_iter = PyObject_GetIter(iterator);
 850         if (self->input_iter == NULL) {
 851                 PyErr_SetString(PyExc_TypeError,
 852                                 "argument 1 must be an iterator");
 853                 Py_DECREF(self);
 854                 return NULL;
 855         }
 856         ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
 857         if (ctor_args == NULL) {
 858                 Py_DECREF(self);
 859                 return NULL;
 860         }
 861         self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
 862                                                     ctor_args, keyword_args);
 863         Py_DECREF(ctor_args);
 864         if (self->dialect == NULL) {
 865                 Py_DECREF(self);
 866                 return NULL;
 867         }
 868         self->fields = PyList_New(0);
 869         if (self->fields == NULL) {
 870                 Py_DECREF(self);
 871                 return NULL;
 872         }
 873
 874         return (PyObject *)self;
 875 }
 876
 877 /*
 878  * WRITER
 879  */
 880 /* ---------------------------------------------------------------- */
 881 static void
 882 join_reset(WriterObj *self)
 883 {
 884         self->rec_len = 0;
 885         self->num_fields = 0;
 886 }
 887
 888 #define MEM_INCR 32768
 889
 890 /* Calculate new record length or append field to record.  Return new
 891  * record length.
 892  */
 893 static int
 894 join_append_data(WriterObj *self, char *field, int quote_empty,
 895                  int *quoted, int copy_phase)
 896 {
 897         DialectObj *dialect = self->dialect;
 898         int i, rec_len;
 899
 900         rec_len = self->rec_len;
 901
 902         /* If this is not the first field we need a field separator.
 903          */
 904         if (self->num_fields > 0) {
 905                 if (copy_phase)
 906                         self->rec[rec_len] = dialect->delimiter;
 907                 rec_len++;
 908         }
 909         /* Handle preceding quote.
 910          */
 911         switch (dialect->quoting) {
 912         case QUOTE_ALL:
 913                 *quoted = 1;
 914                 if (copy_phase)
 915                         self->rec[rec_len] = dialect->quotechar;
 916                 rec_len++;
 917                 break;
 918         case QUOTE_MINIMAL:
 919         case QUOTE_NONNUMERIC:
 920                 /* We only know about quoted in the copy phase.
 921                  */
 922                 if (copy_phase && *quoted) {
 923                         self->rec[rec_len] = dialect->quotechar;
 924                         rec_len++;
 925                 }
 926                 break;
 927         case QUOTE_NONE:
 928                 break;
 929         }
 930         /* Copy/count field data.
 931          */
 932         for (i = 0;; i++) {
 933                 char c = field[i];
 934
 935                 if (c == '\0')
 936                         break;
 937                 /* If in doublequote mode we escape quote chars with a
 938                  * quote.
 939                  */
 940                 if (dialect->quoting != QUOTE_NONE &&
 941                     c == dialect->quotechar && dialect->doublequote) {
 942                         if (copy_phase)
 943                                 self->rec[rec_len] = dialect->quotechar;
 944                         *quoted = 1;
 945                         rec_len++;
 946                 }
 947
 948                 /* Some special characters need to be escaped.  If we have a
 949                  * quote character switch to quoted field instead of escaping
 950                  * individual characters.
 951                  */
 952                 if (!*quoted
 953                     && (c == dialect->delimiter ||
 954                         c == dialect->escapechar ||
 955                         c == '\n' || c == '\r')) {
 956                         if (dialect->quoting != QUOTE_NONE)
 957                                 *quoted = 1;
 958                         else if (dialect->escapechar) {
 959                                 if (copy_phase)
 960                                         self->rec[rec_len] = dialect->escapechar;
 961                                 rec_len++;
 962                         }
 963                         else {
 964                                 PyErr_Format(error_obj,
 965                                              "delimiter must be quoted or escaped");
 966                                 return -1;
 967                         }
 968                 }
 969                 /* Copy field character into record buffer.
 970                  */
 971                 if (copy_phase)
 972                         self->rec[rec_len] = c;
 973                 rec_len++;
 974         }
 975
 976         /* If field is empty check if it needs to be quoted.
 977          */
 978         if (i == 0 && quote_empty) {
 979                 if (dialect->quoting == QUOTE_NONE) {
 980                         PyErr_Format(error_obj,
 981                                      "single empty field record must be quoted");
 982                         return -1;
 983                 } else
 984                         *quoted = 1;
 985         }
 986
 987         /* Handle final quote character on field.
 988          */
 989         if (*quoted) {
 990                 if (copy_phase)
 991                         self->rec[rec_len] = dialect->quotechar;
 992                 else
 993                         /* Didn't know about leading quote until we found it
 994                          * necessary in field data - compensate for it now.
 995                          */
 996                         rec_len++;
 997                 rec_len++;
 998         }
 999
1000         return rec_len;
1001 }
1002
1003 static int
1004 join_check_rec_size(WriterObj *self, int rec_len)
1005 {
1006         if (rec_len > self->rec_size) {
1007                 if (self->rec_size == 0) {
1008                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1009                         if (self->rec != NULL)
1010                                 PyMem_Free(self->rec);
1011                         self->rec = PyMem_Malloc(self->rec_size);
1012                 }
1013                 else {
1014                         char *old_rec = self->rec;
1015
1016                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1017                         self->rec = PyMem_Realloc(self->rec, self->rec_size);
1018                         if (self->rec == NULL)
1019                                 PyMem_Free(old_rec);
1020                 }
1021                 if (self->rec == NULL) {
1022                         PyErr_NoMemory();
1023                         return 0;
1024                 }
1025         }
1026         return 1;
1027 }
1028
1029 static int
1030 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1031 {
1032         int rec_len;
1033
1034         rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1035         if (rec_len < 0)
1036                 return 0;
1037
1038         /* grow record buffer if necessary */
1039         if (!join_check_rec_size(self, rec_len))
1040                 return 0;
1041
1042         self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1043         self->num_fields++;
1044
1045         return 1;
1046 }
1047
1048 static int
1049 join_append_lineterminator(WriterObj *self)
1050 {
1051         int terminator_len;
1052
1053         terminator_len = PyString_Size(self->dialect->lineterminator);
1054
1055         /* grow record buffer if necessary */
1056         if (!join_check_rec_size(self, self->rec_len + terminator_len))
1057                 return 0;
1058
1059         memmove(self->rec + self->rec_len,
1060                 /* should not be NULL */
1061                 PyString_AsString(self->dialect->lineterminator),
1062                 terminator_len);
1063         self->rec_len += terminator_len;
1064
1065         return 1;
1066 }
1067
1068 PyDoc_STRVAR(csv_writerow_doc,
1069 "writerow(sequence)\n"
1070 "\n"
1071 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1072 "elements will be converted to string.");
1073
1074 static PyObject *
1075 csv_writerow(WriterObj *self, PyObject *seq)
1076 {
1077         DialectObj *dialect = self->dialect;
1078         int len, i;
1079
1080         if (!PySequence_Check(seq))
1081                 return PyErr_Format(error_obj, "sequence expected");
1082
1083         len = PySequence_Length(seq);
1084         if (len < 0)
1085                 return NULL;
1086
1087         /* Join all fields in internal buffer.
1088          */
1089         join_reset(self);
1090         for (i = 0; i < len; i++) {
1091                 PyObject *field;
1092                 int append_ok;
1093                 int quoted;
1094
1095                 field = PySequence_GetItem(seq, i);
1096                 if (field == NULL)
1097                         return NULL;
1098
1099                 quoted = 0;
1100                 if (dialect->quoting == QUOTE_NONNUMERIC) {
1101                         PyObject *num;
1102
1103                         num = PyNumber_Float(field);
1104                         if (num == NULL) {
1105                                 quoted = 1;
1106                                 PyErr_Clear();
1107                         }
1108                         else {
1109                                 Py_DECREF(num);
1110                         }
1111                 }
1112
1113                 if (PyString_Check(field)) {
1114                         append_ok = join_append(self,
1115                                                 PyString_AS_STRING(field),
1116                                                 &quoted, len == 1);
1117                         Py_DECREF(field);
1118                 }
1119                 else if (field == Py_None) {
1120                         append_ok = join_append(self, "", &quoted, len == 1);
1121                         Py_DECREF(field);
1122                 }
1123                 else {
1124                         PyObject *str;
1125
1126                         str = PyObject_Str(field);
1127                         Py_DECREF(field);
1128                         if (str == NULL)
1129                                 return NULL;
1130
1131                         append_ok = join_append(self, PyString_AS_STRING(str),
1132                                                 &quoted, len == 1);
1133                         Py_DECREF(str);
1134                 }
1135                 if (!append_ok)
1136                         return NULL;
1137         }
1138
1139         /* Add line terminator.
1140          */
1141         if (!join_append_lineterminator(self))
1142                 return 0;
1143
1144         return PyObject_CallFunction(self->writeline,
1145                                      "(s#)", self->rec, self->rec_len);
1146 }
1147
1148 PyDoc_STRVAR(csv_writerows_doc,
1149 "writerows(sequence of sequences)\n"
1150 "\n"
1151 "Construct and write a series of sequences to a csv file.  Non-string\n"
1152 "elements will be converted to string.");
1153
1154 static PyObject *
1155 csv_writerows(WriterObj *self, PyObject *seqseq)
1156 {
1157         PyObject *row_iter, *row_obj, *result;
1158
1159         row_iter = PyObject_GetIter(seqseq);
1160         if (row_iter == NULL) {
1161                 PyErr_SetString(PyExc_TypeError,
1162                                 "writerows() argument must be iterable");
1163                 return NULL;
1164         }
1165         while ((row_obj = PyIter_Next(row_iter))) {
1166                 result = csv_writerow(self, row_obj);
1167                 Py_DECREF(row_obj);
1168                 if (!result) {
1169                         Py_DECREF(row_iter);
1170                         return NULL;
1171                 }
1172                 else
1173                      Py_DECREF(result);
1174         }
1175         Py_DECREF(row_iter);
1176         if (PyErr_Occurred())
1177                 return NULL;
1178         Py_INCREF(Py_None);
1179         return Py_None;
1180 }
1181
1182 static struct PyMethodDef Writer_methods[] = {
1183         { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1184         { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1185         { NULL, NULL }
1186 };
1187
1188 #define W_OFF(x) offsetof(WriterObj, x)
1189
1190 static struct PyMemberDef Writer_memberlist[] = {
1191         { "dialect", T_OBJECT, W_OFF(dialect), RO },
1192         { NULL }
1193 };
1194
1195 static void
1196 Writer_dealloc(WriterObj *self)
1197 {
1198         Py_XDECREF(self->dialect);
1199         Py_XDECREF(self->writeline);
1200         if (self->rec != NULL)
1201                 PyMem_Free(self->rec);
1202         PyObject_GC_Del(self);
1203 }
1204
1205 static int
1206 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1207 {
1208         int err;
1209 #define VISIT(SLOT) \
1210         if (SLOT) { \
1211                 err = visit((PyObject *)(SLOT), arg); \
1212                 if (err) \
1213                         return err; \
1214         }
1215         VISIT(self->dialect);
1216         VISIT(self->writeline);
1217         return 0;
1218 }
1219
1220 static int
1221 Writer_clear(WriterObj *self)
1222 {
1223         Py_XDECREF(self->dialect);
1224         Py_XDECREF(self->writeline);
1225         self->dialect = NULL;
1226         self->writeline = NULL;
1227         return 0;
1228 }
1229
1230 PyDoc_STRVAR(Writer_Type_doc,
1231 "CSV writer\n"
1232 "\n"
1233 "Writer objects are responsible for generating tabular data\n"
1234 "in CSV format from sequence input.\n"
1235 );
1236
1237 static PyTypeObject Writer_Type = {
1238         PyObject_HEAD_INIT(NULL)
1239         0,                                      /*ob_size*/
1240         "_csv.writer",                          /*tp_name*/
1241         sizeof(WriterObj),                      /*tp_basicsize*/
1242         0,                                      /*tp_itemsize*/
1243         /* methods */
1244         (destructor)Writer_dealloc,             /*tp_dealloc*/
1245         (printfunc)0,                           /*tp_print*/
1246         (getattrfunc)0,                         /*tp_getattr*/
1247         (setattrfunc)0,                         /*tp_setattr*/
1248         (cmpfunc)0,                             /*tp_compare*/
1249         (reprfunc)0,                            /*tp_repr*/
1250         0,                                      /*tp_as_number*/
1251         0,                                      /*tp_as_sequence*/
1252         0,                                      /*tp_as_mapping*/
1253         (hashfunc)0,                            /*tp_hash*/
1254         (ternaryfunc)0,                         /*tp_call*/
1255         (reprfunc)0,                            /*tp_str*/
1256         0,                                      /*tp_getattro*/
1257         0,                                      /*tp_setattro*/
1258         0,                                      /*tp_as_buffer*/
1259         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1260                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
1261         Writer_Type_doc,
1262         (traverseproc)Writer_traverse,          /*tp_traverse*/
1263         (inquiry)Writer_clear,                  /*tp_clear*/
1264         0,                                      /*tp_richcompare*/
1265         0,                                      /*tp_weaklistoffset*/
1266         (getiterfunc)0,                         /*tp_iter*/
1267         (getiterfunc)0,                         /*tp_iternext*/
1268         Writer_methods,                         /*tp_methods*/
1269         Writer_memberlist,                      /*tp_members*/
1270         0,                                      /*tp_getset*/
1271 };
1272
1273 static PyObject *
1274 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1275 {
1276         PyObject * output_file, * dialect = NULL, *ctor_args;
1277         WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1278
1279         if (!self)
1280                 return NULL;
1281
1282         self->dialect = NULL;
1283         self->writeline = NULL;
1284
1285         self->rec = NULL;
1286         self->rec_size = 0;
1287         self->rec_len = 0;
1288         self->num_fields = 0;
1289
1290         if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1291                 Py_DECREF(self);
1292                 return NULL;
1293         }
1294         self->writeline = PyObject_GetAttrString(output_file, "write");
1295         if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1296                 PyErr_SetString(PyExc_TypeError,
1297                                 "argument 1 must be an instance with a write method");
1298                 Py_DECREF(self);
1299                 return NULL;
1300         }
1301         ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1302         if (ctor_args == NULL) {
1303                 Py_DECREF(self);
1304                 return NULL;
1305         }
1306         self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1307                                                     ctor_args, keyword_args);
1308         Py_DECREF(ctor_args);
1309         if (self->dialect == NULL) {
1310                 Py_DECREF(self);
1311                 return NULL;
1312         }
1313         return (PyObject *)self;
1314 }
1315
1316 /*
1317  * DIALECT REGISTRY
1318  */
1319 static PyObject *
1320 csv_list_dialects(PyObject *module, PyObject *args)
1321 {
1322         return PyDict_Keys(dialects);
1323 }
1324
1325 static PyObject *
1326 csv_register_dialect(PyObject *module, PyObject *args)
1327 {
1328         PyObject *name_obj, *dialect_obj;
1329
1330         if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1331                 return NULL;
1332         if (!PyString_Check(name_obj)
1333 #ifdef Py_USING_UNICODE
1334 && !PyUnicode_Check(name_obj)
1335 #endif
1336 ) {
1337                 PyErr_SetString(PyExc_TypeError,
1338                                 "dialect name must be a string or unicode");
1339                 return NULL;
1340         }
1341         Py_INCREF(dialect_obj);
1342         /* A class rather than an instance? Instanciate */
1343         if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1344                 PyObject * new_dia;
1345                 new_dia = PyObject_CallFunction(dialect_obj, "");
1346                 Py_DECREF(dialect_obj);
1347                 if (new_dia == NULL)
1348                         return NULL;
1349                 dialect_obj = new_dia;
1350         }
1351         /* Make sure we finally have an instance */
1352         if (!PyInstance_Check(dialect_obj)) {
1353                 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1354                 Py_DECREF(dialect_obj);
1355                 return NULL;
1356         }
1357         if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1358                 Py_DECREF(dialect_obj);
1359                 return NULL;
1360         }
1361         if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1362                 Py_DECREF(dialect_obj);
1363                 return NULL;
1364         }
1365         Py_DECREF(dialect_obj);
1366         Py_INCREF(Py_None);
1367         return Py_None;
1368 }
1369
1370 static PyObject *
1371 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1372 {
1373         if (PyDict_DelItem(dialects, name_obj) < 0)
1374                 return PyErr_Format(error_obj, "unknown dialect");
1375         Py_INCREF(Py_None);
1376         return Py_None;
1377 }
1378
1379 static PyObject *
1380 csv_get_dialect(PyObject *module, PyObject *name_obj)
1381 {
1382         return get_dialect_from_registry(name_obj);
1383 }
1384
1385 /*
1386  * MODULE
1387  */
1388
1389 PyDoc_STRVAR(csv_module_doc,
1390 "CSV parsing and writing.\n"
1391 "\n"
1392 "This module provides classes that assist in the reading and writing\n"
1393 "of Comma Separated Value (CSV) files, and implements the interface\n"
1394 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1395 "the format is not formally defined by a stable specification and\n"
1396 "is subtle enough that parsing lines of a CSV file with something\n"
1397 "like line.split(\",\") is bound to fail.  The module supports three\n"
1398 "basic APIs: reading, writing, and registration of dialects.\n"
1399 "\n"
1400 "\n"
1401 "DIALECT REGISTRATION:\n"
1402 "\n"
1403 "Readers and writers support a dialect argument, which is a convenient\n"
1404 "handle on a group of settings.  When the dialect argument is a string,\n"
1405 "it identifies one of the dialects previously registered with the module.\n"
1406 "If it is a class or instance, the attributes of the argument are used as\n"
1407 "the settings for the reader or writer:\n"
1408 "\n"
1409 "    class excel:\n"
1410 "        delimiter = ','\n"
1411 "        quotechar = '\"'\n"
1412 "        escapechar = None\n"
1413 "        doublequote = True\n"
1414 "        skipinitialspace = False\n"
1415 "        lineterminator = '\r\n'\n"
1416 "        quoting = QUOTE_MINIMAL\n"
1417 "\n"
1418 "SETTINGS:\n"
1419 "\n"
1420 "    * quotechar - specifies a one-character string to use as the \n"
1421 "        quoting character.  It defaults to '\"'.\n"
1422 "    * delimiter - specifies a one-character string to use as the \n"
1423 "        field separator.  It defaults to ','.\n"
1424 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1425 "        immediately follows a delimiter.  It defaults to False, which\n"
1426 "        means that whitespace immediately following a delimiter is part\n"
1427 "        of the following field.\n"
1428 "    * lineterminator -  specifies the character sequence which should \n"
1429 "        terminate rows.\n"
1430 "    * quoting - controls when quotes should be generated by the writer.\n"
1431 "        It can take on any of the following module constants:\n"
1432 "\n"
1433 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1434 "            field contains either the quotechar or the delimiter\n"
1435 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1436 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1437 "            fields which do not parse as integers or floating point\n"
1438 "            numbers.\n"
1439 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1440 "    * escapechar - specifies a one-character string used to escape \n"
1441 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1442 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1443 "        True, two consecutive quotes are interpreted as one during read,\n"
1444 "        and when writing, each quote character embedded in the data is\n"
1445 "        written as two quotes\n");
1446
1447 PyDoc_STRVAR(csv_reader_doc,
1448 "    csv_reader = reader(iterable [, dialect='excel']\n"
1449 "                        [optional keyword args])\n"
1450 "    for row in csv_reader:\n"
1451 "        process(row)\n"
1452 "\n"
1453 "The \"iterable\" argument can be any object that returns a line\n"
1454 "of input for each iteration, such as a file object or a list.  The\n"
1455 "optional \"dialect\" parameter is discussed below.  The function\n"
1456 "also accepts optional keyword arguments which override settings\n"
1457 "provided by the dialect.\n"
1458 "\n"
1459 "The returned object is an iterator.  Each iteration returns a row\n"
1460              "of the CSV file (which can span multiple input lines):\n");
1461
1462 PyDoc_STRVAR(csv_writer_doc,
1463 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1464 "                            [optional keyword args])\n"
1465 "    for row in csv_writer:\n"
1466 "        csv_writer.writerow(row)\n"
1467 "\n"
1468 "    [or]\n"
1469 "\n"
1470 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1471 "                            [optional keyword args])\n"
1472 "    csv_writer.writerows(rows)\n"
1473 "\n"
1474 "The \"fileobj\" argument can be any object that supports the file API.\n");
1475
1476 PyDoc_STRVAR(csv_list_dialects_doc,
1477 "Return a list of all know dialect names.\n"
1478 "    names = csv.list_dialects()");
1479
1480 PyDoc_STRVAR(csv_get_dialect_doc,
1481 "Return the dialect instance associated with name.\n"
1482 "    dialect = csv.get_dialect(name)");
1483
1484 PyDoc_STRVAR(csv_register_dialect_doc,
1485 "Create a mapping from a string name to a dialect class.\n"
1486 "    dialect = csv.register_dialect(name, dialect)");
1487
1488 PyDoc_STRVAR(csv_unregister_dialect_doc,
1489 "Delete the name/dialect mapping associated with a string name.\n"
1490 "    csv.unregister_dialect(name)");
1491
1492 static struct PyMethodDef csv_methods[] = {
1493         { "reader", (PyCFunction)csv_reader,
1494             METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1495         { "writer", (PyCFunction)csv_writer,
1496             METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1497         { "list_dialects", (PyCFunction)csv_list_dialects,
1498             METH_NOARGS, csv_list_dialects_doc},
1499         { "register_dialect", (PyCFunction)csv_register_dialect,
1500             METH_VARARGS, csv_register_dialect_doc},
1501         { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1502             METH_O, csv_unregister_dialect_doc},
1503         { "get_dialect", (PyCFunction)csv_get_dialect,
1504             METH_O, csv_get_dialect_doc},
1505         { NULL, NULL }
1506 };
1507
1508 PyMODINIT_FUNC
1509 init_csv(void)
1510 {
1511         PyObject *module;
1512         StyleDesc *style;
1513
1514         if (PyType_Ready(&Dialect_Type) < 0)
1515                 return;
1516
1517         if (PyType_Ready(&Reader_Type) < 0)
1518                 return;
1519
1520         if (PyType_Ready(&Writer_Type) < 0)
1521                 return;
1522
1523         /* Create the module and add the functions */
1524         module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1525         if (module == NULL)
1526                 return;
1527
1528         /* Add version to the module. */
1529         if (PyModule_AddStringConstant(module, "__version__",
1530                                        MODULE_VERSION) == -1)
1531                 return;
1532
1533         /* Add _dialects dictionary */
1534         dialects = PyDict_New();
1535         if (dialects == NULL)
1536                 return;
1537         if (PyModule_AddObject(module, "_dialects", dialects))
1538                 return;
1539
1540         /* Add quote styles into dictionary */
1541         for (style = quote_styles; style->name; style++) {
1542                 if (PyModule_AddIntConstant(module, style->name,
1543                                             style->style) == -1)
1544                         return;
1545         }
1546
1547         /* Add the Dialect type */
1548         if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1549                 return;
1550
1551         /* Add the CSV exception object to the module. */
1552         error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1553         if (error_obj == NULL)
1554                 return;
1555         PyModule_AddObject(module, "Error", error_obj);
1556 }