1 # Copyright (C) 2001,2002 Python Software Foundation
2 # csv package unit tests
6 from StringIO
import StringIO
9 from test
.test_support
import verbose
11 class Test_Csv(unittest
.TestCase
):
13 Test the underlying C csv parser in ways that are not appropriate
14 from the high level interface. Further tests of this nature are done
15 in TestDialectRegistry.
17 def test_reader_arg_valid(self
):
18 self
.assertRaises(TypeError, csv
.reader
)
19 self
.assertRaises(TypeError, csv
.reader
, None)
20 self
.assertRaises(AttributeError, csv
.reader
, [], bad_attr
= 0)
21 self
.assertRaises(csv
.Error
, csv
.reader
, [], 'foo')
25 self
.assertRaises(IOError, csv
.reader
, [], BadClass
)
26 self
.assertRaises(TypeError, csv
.reader
, [], None)
29 self
.assertRaises(AttributeError, csv
.reader
, [], BadDialect
)
31 def test_writer_arg_valid(self
):
32 self
.assertRaises(TypeError, csv
.writer
)
33 self
.assertRaises(TypeError, csv
.writer
, None)
34 self
.assertRaises(AttributeError, csv
.writer
, StringIO(), bad_attr
= 0)
36 def _test_attrs(self
, obj
):
37 self
.assertEqual(obj
.dialect
.delimiter
, ',')
38 obj
.dialect
.delimiter
= '\t'
39 self
.assertEqual(obj
.dialect
.delimiter
, '\t')
40 self
.assertRaises(TypeError, delattr, obj
.dialect
, 'delimiter')
41 self
.assertRaises(TypeError, setattr, obj
.dialect
,
42 'lineterminator', None)
43 obj
.dialect
.escapechar
= None
44 self
.assertEqual(obj
.dialect
.escapechar
, None)
45 self
.assertRaises(TypeError, delattr, obj
.dialect
, 'quoting')
46 self
.assertRaises(TypeError, setattr, obj
.dialect
, 'quoting', None)
47 obj
.dialect
.quoting
= csv
.QUOTE_MINIMAL
48 self
.assertEqual(obj
.dialect
.quoting
, csv
.QUOTE_MINIMAL
)
50 def test_reader_attrs(self
):
51 self
._test
_attrs
(csv
.reader([]))
53 def test_writer_attrs(self
):
54 self
._test
_attrs
(csv
.writer(StringIO()))
56 def _write_test(self
, fields
, expect
, **kwargs
):
58 writer
= csv
.writer(fileobj
, **kwargs
)
59 writer
.writerow(fields
)
60 self
.assertEqual(fileobj
.getvalue(),
61 expect
+ writer
.dialect
.lineterminator
)
63 def test_write_arg_valid(self
):
64 self
.assertRaises(csv
.Error
, self
._write
_test
, None, '')
65 self
._write
_test
((), '')
66 self
._write
_test
([None], '""')
67 self
.assertRaises(csv
.Error
, self
._write
_test
,
68 [None], None, quoting
= csv
.QUOTE_NONE
)
69 # Check that exceptions are passed up the chain
73 def __getitem__(self
, i
):
76 self
.assertRaises(IOError, self
._write
_test
, BadList(), '')
80 self
.assertRaises(IOError, self
._write
_test
, [BadItem()], '')
82 def test_write_bigfield(self
):
83 # This exercises the buffer realloc functionality
84 bigstring
= 'X' * 50000
85 self
._write
_test
([bigstring
,bigstring
], '%s,%s' % \
86 (bigstring
, bigstring
))
88 def test_write_quoting(self
):
89 self
._write
_test
(['a','1','p,q'], 'a,1,"p,q"')
90 self
.assertRaises(csv
.Error
,
92 ['a','1','p,q'], 'a,1,"p,q"',
93 quoting
= csv
.QUOTE_NONE
)
94 self
._write
_test
(['a','1','p,q'], 'a,1,"p,q"',
95 quoting
= csv
.QUOTE_MINIMAL
)
96 self
._write
_test
(['a','1','p,q'], '"a",1,"p,q"',
97 quoting
= csv
.QUOTE_NONNUMERIC
)
98 self
._write
_test
(['a','1','p,q'], '"a","1","p,q"',
99 quoting
= csv
.QUOTE_ALL
)
101 def test_write_escape(self
):
102 self
._write
_test
(['a','1','p,q'], 'a,1,"p,q"',
104 # FAILED - needs to be fixed [am]:
105 # self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
106 # escapechar='\\', doublequote = 0)
107 self
._write
_test
(['a','1','p,q'], 'a,1,p\\,q',
108 escapechar
='\\', quoting
= csv
.QUOTE_NONE
)
110 def test_writerows(self
):
112 def write(self
, buf
):
114 writer
= csv
.writer(BrokenFile())
115 self
.assertRaises(IOError, writer
.writerows
, [['a']])
117 writer
= csv
.writer(fileobj
)
118 self
.assertRaises(TypeError, writer
.writerows
, None)
119 writer
.writerows([['a','b'],['c','d']])
120 self
.assertEqual(fileobj
.getvalue(), "a,b\r\nc,d\r\n")
122 def _read_test(self
, input, expect
, **kwargs
):
123 reader
= csv
.reader(input, **kwargs
)
124 result
= list(reader
)
125 self
.assertEqual(result
, expect
)
127 def test_read_oddinputs(self
):
128 self
._read
_test
([], [])
129 self
._read
_test
([''], [[]])
130 self
.assertRaises(csv
.Error
, self
._read
_test
,
131 ['"ab"c'], None, strict
= 1)
132 # cannot handle null bytes for the moment
133 self
.assertRaises(csv
.Error
, self
._read
_test
,
134 ['ab\0c'], None, strict
= 1)
135 self
._read
_test
(['"ab"c'], [['abc']], doublequote
= 0)
137 def test_read_eol(self
):
138 self
._read
_test
(['a,b'], [['a','b']])
139 self
._read
_test
(['a,b\n'], [['a','b']])
140 self
._read
_test
(['a,b\r\n'], [['a','b']])
141 self
._read
_test
(['a,b\r'], [['a','b']])
142 self
.assertRaises(csv
.Error
, self
._read
_test
, ['a,b\rc,d'], [])
143 self
.assertRaises(csv
.Error
, self
._read
_test
, ['a,b\nc,d'], [])
144 self
.assertRaises(csv
.Error
, self
._read
_test
, ['a,b\r\nc,d'], [])
146 def test_read_escape(self
):
147 self
._read
_test
(['a,\\b,c'], [['a', '\\b', 'c']], escapechar
='\\')
148 self
._read
_test
(['a,b\\,c'], [['a', 'b,c']], escapechar
='\\')
149 self
._read
_test
(['a,"b\\,c"'], [['a', 'b,c']], escapechar
='\\')
150 self
._read
_test
(['a,"b,\\c"'], [['a', 'b,\\c']], escapechar
='\\')
151 self
._read
_test
(['a,"b,c\\""'], [['a', 'b,c"']], escapechar
='\\')
152 self
._read
_test
(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar
='\\')
154 def test_read_bigfield(self
):
155 # This exercises the buffer realloc functionality
156 bigstring
= 'X' * 50000
157 bigline
= '%s,%s' % (bigstring
, bigstring
)
158 self
._read
_test
([bigline
], [[bigstring
, bigstring
]])
160 class TestDialectRegistry(unittest
.TestCase
):
161 def test_registry_badargs(self
):
162 self
.assertRaises(TypeError, csv
.list_dialects
, None)
163 self
.assertRaises(TypeError, csv
.get_dialect
)
164 self
.assertRaises(csv
.Error
, csv
.get_dialect
, None)
165 self
.assertRaises(csv
.Error
, csv
.get_dialect
, "nonesuch")
166 self
.assertRaises(TypeError, csv
.unregister_dialect
)
167 self
.assertRaises(csv
.Error
, csv
.unregister_dialect
, None)
168 self
.assertRaises(csv
.Error
, csv
.unregister_dialect
, "nonesuch")
169 self
.assertRaises(TypeError, csv
.register_dialect
, None)
170 self
.assertRaises(TypeError, csv
.register_dialect
, None, None)
171 self
.assertRaises(TypeError, csv
.register_dialect
, "nonesuch", None)
175 self
.assertRaises(KeyError, csv
.register_dialect
, "nonesuch", bogus
)
177 def test_registry(self
):
178 class myexceltsv(csv
.excel
):
181 expected_dialects
= csv
.list_dialects() + [name
]
182 expected_dialects
.sort()
183 csv
.register_dialect(name
, myexceltsv
)
185 self
.failUnless(isinstance(csv
.get_dialect(name
), myexceltsv
))
186 got_dialects
= csv
.list_dialects()
188 self
.assertEqual(expected_dialects
, got_dialects
)
190 csv
.unregister_dialect(name
)
192 def test_incomplete_dialect(self
):
193 class myexceltsv(csv
.Dialect
):
195 self
.assertRaises(csv
.Error
, myexceltsv
)
197 def test_space_dialect(self
):
198 class space(csv
.excel
):
200 quoting
= csv
.QUOTE_NONE
203 s
= StringIO("abc def\nc1ccccc1 benzene\n")
204 rdr
= csv
.reader(s
, dialect
=space())
205 self
.assertEqual(rdr
.next(), ["abc", "def"])
206 self
.assertEqual(rdr
.next(), ["c1ccccc1", "benzene"])
208 def test_dialect_apply(self
):
209 class testA(csv
.excel
):
211 class testB(csv
.excel
):
213 class testC(csv
.excel
):
216 csv
.register_dialect('testC', testC
)
219 writer
= csv
.writer(fileobj
)
220 writer
.writerow([1,2,3])
221 self
.assertEqual(fileobj
.getvalue(), "1,2,3\r\n")
224 writer
= csv
.writer(fileobj
, testA
)
225 writer
.writerow([1,2,3])
226 self
.assertEqual(fileobj
.getvalue(), "1\t2\t3\r\n")
229 writer
= csv
.writer(fileobj
, dialect
=testB())
230 writer
.writerow([1,2,3])
231 self
.assertEqual(fileobj
.getvalue(), "1:2:3\r\n")
234 writer
= csv
.writer(fileobj
, dialect
='testC')
235 writer
.writerow([1,2,3])
236 self
.assertEqual(fileobj
.getvalue(), "1|2|3\r\n")
239 writer
= csv
.writer(fileobj
, dialect
=testA
, delimiter
=';')
240 writer
.writerow([1,2,3])
241 self
.assertEqual(fileobj
.getvalue(), "1;2;3\r\n")
243 csv
.unregister_dialect('testC')
245 def test_bad_dialect(self
):
247 self
.assertRaises(AttributeError, csv
.reader
, [], bad_attr
= 0)
249 self
.assertRaises(TypeError, csv
.reader
, [], delimiter
= None)
250 self
.assertRaises(TypeError, csv
.reader
, [], quoting
= -1)
251 self
.assertRaises(TypeError, csv
.reader
, [], quoting
= 100)
253 class TestCsvBase(unittest
.TestCase
):
254 def readerAssertEqual(self
, input, expected_result
):
255 reader
= csv
.reader(StringIO(input), dialect
= self
.dialect
)
256 fields
= list(reader
)
257 self
.assertEqual(fields
, expected_result
)
259 def writerAssertEqual(self
, input, expected_result
):
261 writer
= csv
.writer(fileobj
, dialect
= self
.dialect
)
262 writer
.writerows(input)
263 self
.assertEqual(fileobj
.getvalue(), expected_result
)
265 class TestDialectExcel(TestCsvBase
):
268 def test_single(self
):
269 self
.readerAssertEqual('abc', [['abc']])
271 def test_simple(self
):
272 self
.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
274 def test_blankline(self
):
275 self
.readerAssertEqual('', [])
277 def test_empty_fields(self
):
278 self
.readerAssertEqual(',', [['', '']])
280 def test_singlequoted(self
):
281 self
.readerAssertEqual('""', [['']])
283 def test_singlequoted_left_empty(self
):
284 self
.readerAssertEqual('"",', [['','']])
286 def test_singlequoted_right_empty(self
):
287 self
.readerAssertEqual(',""', [['','']])
289 def test_single_quoted_quote(self
):
290 self
.readerAssertEqual('""""', [['"']])
292 def test_quoted_quotes(self
):
293 self
.readerAssertEqual('""""""', [['""']])
295 def test_inline_quote(self
):
296 self
.readerAssertEqual('a""b', [['a""b']])
298 def test_inline_quotes(self
):
299 self
.readerAssertEqual('a"b"c', [['a"b"c']])
301 def test_quotes_and_more(self
):
302 self
.readerAssertEqual('"a"b', [['ab']])
304 def test_lone_quote(self
):
305 self
.readerAssertEqual('a"b', [['a"b']])
307 def test_quote_and_quote(self
):
308 self
.readerAssertEqual('"a" "b"', [['a "b"']])
310 def test_space_and_quote(self
):
311 self
.readerAssertEqual(' "a"', [[' "a"']])
313 def test_quoted(self
):
314 self
.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
316 'I think, therefore I am',
319 def test_quoted_quote(self
):
320 self
.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
322 '"I see," said the blind man',
323 'as he picked up his hammer and saw']])
325 def test_quoted_nl(self
):
328 said the blind man","as he picked up his
331 self
.readerAssertEqual(input,
333 '"I see,"\nsaid the blind man',
334 'as he picked up his\nhammer and saw'],
337 def test_dubious_quote(self
):
338 self
.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
341 self
.writerAssertEqual([], '')
343 def test_single(self
):
344 self
.writerAssertEqual([['abc']], 'abc\r\n')
346 def test_simple(self
):
347 self
.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
349 def test_quotes(self
):
350 self
.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
352 def test_quote_fieldsep(self
):
353 self
.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
355 def test_newlines(self
):
356 self
.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
358 class EscapedExcel(csv
.excel
):
359 quoting
= csv
.QUOTE_NONE
362 class TestEscapedExcel(TestCsvBase
):
363 dialect
= EscapedExcel()
365 def test_escape_fieldsep(self
):
366 self
.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
368 def test_read_escape_fieldsep(self
):
369 self
.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
371 class QuotedEscapedExcel(csv
.excel
):
372 quoting
= csv
.QUOTE_NONNUMERIC
375 class TestQuotedEscapedExcel(TestCsvBase
):
376 dialect
= QuotedEscapedExcel()
378 def test_write_escape_fieldsep(self
):
379 self
.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
381 def test_read_escape_fieldsep(self
):
382 self
.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
384 # Disabled, pending support in csv.utils module
385 class TestDictFields(unittest
.TestCase
):
386 ### "long" means the row is longer than the number of fieldnames
387 ### "short" means there are fewer elements in the row than fieldnames
388 def test_write_simple_dict(self
):
390 writer
= csv
.DictWriter(fileobj
, fieldnames
= ["f1", "f2", "f3"])
391 writer
.writerow({"f1": 10, "f3": "abc"})
392 self
.assertEqual(fileobj
.getvalue(), "10,,abc\r\n")
394 def test_write_no_fields(self
):
396 self
.assertRaises(TypeError, csv
.DictWriter
, fileobj
)
398 def test_read_dict_fields(self
):
399 reader
= csv
.DictReader(StringIO("1,2,abc\r\n"),
400 fieldnames
=["f1", "f2", "f3"])
401 self
.assertEqual(reader
.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
403 def test_read_long(self
):
404 reader
= csv
.DictReader(StringIO("1,2,abc,4,5,6\r\n"),
405 fieldnames
=["f1", "f2"])
406 self
.assertEqual(reader
.next(), {"f1": '1', "f2": '2',
407 None: ["abc", "4", "5", "6"]})
409 def test_read_long_with_rest(self
):
410 reader
= csv
.DictReader(StringIO("1,2,abc,4,5,6\r\n"),
411 fieldnames
=["f1", "f2"], restkey
="_rest")
412 self
.assertEqual(reader
.next(), {"f1": '1', "f2": '2',
413 "_rest": ["abc", "4", "5", "6"]})
415 def test_read_short(self
):
416 reader
= csv
.DictReader(["1,2,abc,4,5,6\r\n","1,2,abc\r\n"],
417 fieldnames
="1 2 3 4 5 6".split(),
419 self
.assertEqual(reader
.next(), {"1": '1', "2": '2', "3": 'abc',
420 "4": '4', "5": '5', "6": '6'})
421 self
.assertEqual(reader
.next(), {"1": '1', "2": '2', "3": 'abc',
422 "4": 'DEFAULT', "5": 'DEFAULT',
425 def test_read_with_blanks(self
):
426 reader
= csv
.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
427 "1,2,abc,4,5,6\r\n"],
428 fieldnames
="1 2 3 4 5 6".split())
429 self
.assertEqual(reader
.next(), {"1": '1', "2": '2', "3": 'abc',
430 "4": '4', "5": '5', "6": '6'})
431 self
.assertEqual(reader
.next(), {"1": '1', "2": '2', "3": 'abc',
432 "4": '4', "5": '5', "6": '6'})
434 class TestArrayWrites(unittest
.TestCase
):
435 def test_int_write(self
):
437 contents
= [(20-i
) for i
in range(20)]
438 a
= array
.array('i', contents
)
440 writer
= csv
.writer(fileobj
, dialect
="excel")
442 expected
= ",".join([str(i
) for i
in a
])+"\r\n"
443 self
.assertEqual(fileobj
.getvalue(), expected
)
445 def test_double_write(self
):
447 contents
= [(20-i
)*0.1 for i
in range(20)]
448 a
= array
.array('d', contents
)
450 writer
= csv
.writer(fileobj
, dialect
="excel")
452 expected
= ",".join([str(i
) for i
in a
])+"\r\n"
453 self
.assertEqual(fileobj
.getvalue(), expected
)
455 def test_float_write(self
):
457 contents
= [(20-i
)*0.1 for i
in range(20)]
458 a
= array
.array('f', contents
)
460 writer
= csv
.writer(fileobj
, dialect
="excel")
462 expected
= ",".join([str(i
) for i
in a
])+"\r\n"
463 self
.assertEqual(fileobj
.getvalue(), expected
)
465 def test_char_write(self
):
467 a
= array
.array('c', string
.letters
)
469 writer
= csv
.writer(fileobj
, dialect
="excel")
471 expected
= ",".join(a
)+"\r\n"
472 self
.assertEqual(fileobj
.getvalue(), expected
)
474 class TestDialectValidity(unittest
.TestCase
):
475 def test_quoting(self
):
476 class mydialect(csv
.Dialect
):
480 skipinitialspace
= True
481 lineterminator
= '\r\n'
482 quoting
= csv
.QUOTE_NONE
485 mydialect
.quoting
= None
486 self
.assertRaises(csv
.Error
, mydialect
)
488 mydialect
.quoting
= csv
.QUOTE_NONE
489 mydialect
.escapechar
= None
490 self
.assertRaises(csv
.Error
, mydialect
)
492 mydialect
.doublequote
= True
493 mydialect
.quoting
= csv
.QUOTE_ALL
494 mydialect
.quotechar
= '"'
497 mydialect
.quotechar
= "''"
498 self
.assertRaises(csv
.Error
, mydialect
)
500 mydialect
.quotechar
= 4
501 self
.assertRaises(csv
.Error
, mydialect
)
503 def test_delimiter(self
):
504 class mydialect(csv
.Dialect
):
508 skipinitialspace
= True
509 lineterminator
= '\r\n'
510 quoting
= csv
.QUOTE_NONE
513 mydialect
.delimiter
= ":::"
514 self
.assertRaises(csv
.Error
, mydialect
)
516 mydialect
.delimiter
= 4
517 self
.assertRaises(csv
.Error
, mydialect
)
519 def test_lineterminator(self
):
520 class mydialect(csv
.Dialect
):
524 skipinitialspace
= True
525 lineterminator
= '\r\n'
526 quoting
= csv
.QUOTE_NONE
529 mydialect
.lineterminator
= ":::"
532 mydialect
.lineterminator
= 4
533 self
.assertRaises(csv
.Error
, mydialect
)
536 class TestSniffer(unittest
.TestCase
):
538 Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
539 Shark City, Glendale Heights, IL, 12/28/02, Prezence
540 Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
541 Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
544 'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
545 'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
546 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
547 'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
551 "venue","city","state","date","performers"
553 def test_has_header(self
):
554 sniffer
= csv
.Sniffer()
555 self
.assertEqual(sniffer
.has_header(self
.sample1
), False)
556 self
.assertEqual(sniffer
.has_header(self
.header
+self
.sample1
), True)
558 def test_sniff(self
):
559 sniffer
= csv
.Sniffer()
560 dialect
= sniffer
.sniff(self
.sample1
)
561 self
.assertEqual(dialect
.delimiter
, ",")
562 self
.assertEqual(dialect
.quotechar
, '"')
563 self
.assertEqual(dialect
.skipinitialspace
, True)
565 dialect
= sniffer
.sniff(self
.sample2
)
566 self
.assertEqual(dialect
.delimiter
, ":")
567 self
.assertEqual(dialect
.quotechar
, "'")
568 self
.assertEqual(dialect
.skipinitialspace
, False)
570 if not hasattr(sys
, "gettotalrefcount"):
571 if verbose
: print "*** skipping leakage tests ***"
578 class TestLeaks(unittest
.TestCase
):
579 def test_create_read(self
):
581 lastrc
= sys
.gettotalrefcount()
584 self
.assertEqual(gc
.garbage
, [])
585 rc
= sys
.gettotalrefcount()
586 csv
.reader(["a,b,c\r\n"])
587 csv
.reader(["a,b,c\r\n"])
588 csv
.reader(["a,b,c\r\n"])
591 # if csv.reader() leaks, last delta should be 3 or more
592 self
.assertEqual(delta
< 3, True)
594 def test_create_write(self
):
596 lastrc
= sys
.gettotalrefcount()
600 self
.assertEqual(gc
.garbage
, [])
601 rc
= sys
.gettotalrefcount()
607 # if csv.writer() leaks, last delta should be 3 or more
608 self
.assertEqual(delta
< 3, True)
612 rows
= ["a,b,c\r\n"]*5
613 lastrc
= sys
.gettotalrefcount()
616 self
.assertEqual(gc
.garbage
, [])
617 rc
= sys
.gettotalrefcount()
618 rdr
= csv
.reader(rows
)
623 # if reader leaks during read, delta should be 5 or more
624 self
.assertEqual(delta
< 5, True)
626 def test_write(self
):
630 lastrc
= sys
.gettotalrefcount()
633 self
.assertEqual(gc
.garbage
, [])
634 rc
= sys
.gettotalrefcount()
635 writer
= csv
.writer(s
)
640 # if writer leaks during write, last delta should be 5 or more
641 self
.assertEqual(delta
< 5, True)
644 mod
= sys
.modules
[__name__
]
645 return [getattr(mod
, name
) for name
in dir(mod
) if name
.startswith('Test')]
648 suite
= unittest
.TestSuite()
649 for testclass
in _testclasses():
650 suite
.addTest(unittest
.makeSuite(testclass
))
653 if __name__
== '__main__':
654 unittest
.main(defaultTest
='suite')