1 """ Test script for the Unicode implementation.
3 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8 from test_support
import verify
, verbose
, TestFailed
11 if not sys
.platform
.startswith('java'):
12 # Test basic sanity of repr()
13 verify(repr(u
'abc') == "u'abc'")
14 verify(repr(u
'ab\\c') == "u'ab\\\\c'")
15 verify(repr(u
'ab\\') == "u'ab\\\\'")
16 verify(repr(u
'\\c') == "u'\\\\c'")
17 verify(repr(u
'\\') == "u'\\\\'")
18 verify(repr(u
'\n') == "u'\\n'")
19 verify(repr(u
'\r') == "u'\\r'")
20 verify(repr(u
'\t') == "u'\\t'")
21 verify(repr(u
'\b') == "u'\\x08'")
22 verify(repr(u
"'\"") == """u'\\'"'""")
23 verify(repr(u
"'\"") == """u'\\'"'""")
24 verify(repr(u
"'") == '''u"'"''')
25 verify(repr(u
'"') == """u'"'""")
26 verify(repr(u
''.join(map(unichr, range(256)))) ==
27 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
28 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
29 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
30 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
31 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
32 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
33 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
34 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
35 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
36 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
37 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
38 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
39 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
42 def test(method
, input, output
, *args
):
44 print '%s.%s%s =? %s... ' % (repr(input), method
, args
, repr(output
)),
46 f
= getattr(input, method
)
47 value
= apply(f
, args
)
50 exc
= sys
.exc_info()[:2]
53 if value
!= output
or type(value
) is not type(output
):
56 print '*',f
, `
input`
, `output`
, `value`
58 print ' value == %s: %s' % (exc
)
63 test('capitalize', u
' hello ', u
' hello ')
64 test('capitalize', u
'hello ', u
'Hello ')
65 test('capitalize', u
'aaaa', u
'Aaaa')
66 test('capitalize', u
'AaAa', u
'Aaaa')
68 test('count', u
'aaa', 3, u
'a')
69 test('count', u
'aaa', 0, u
'b')
70 test('count', 'aaa', 3, u
'a')
71 test('count', 'aaa', 0, u
'b')
72 test('count', u
'aaa', 3, 'a')
73 test('count', u
'aaa', 0, 'b')
75 test('title', u
' hello ', u
' Hello ')
76 test('title', u
'hello ', u
'Hello ')
77 test('title', u
"fOrMaT thIs aS titLe String", u
'Format This As Title String')
78 test('title', u
"fOrMaT,thIs-aS*titLe;String", u
'Format,This-As*Title;String')
79 test('title', u
"getInt", u
'Getint')
81 test('find', u
'abcdefghiabc', 0, u
'abc')
82 test('find', u
'abcdefghiabc', 9, u
'abc', 1)
83 test('find', u
'abcdefghiabc', -1, u
'def', 4)
85 test('rfind', u
'abcdefghiabc', 9, u
'abc')
87 test('lower', u
'HeLLo', u
'hello')
88 test('lower', u
'hello', u
'hello')
90 test('upper', u
'HeLLo', u
'HELLO')
91 test('upper', u
'HELLO', u
'HELLO')
94 transtable
= '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
96 test('maketrans', u
'abc', transtable
, u
'xyz')
97 test('maketrans', u
'abc', ValueError, u
'xyzq')
99 test('split', u
'this is the split function',
100 [u
'this', u
'is', u
'the', u
'split', u
'function'])
101 test('split', u
'a|b|c|d', [u
'a', u
'b', u
'c', u
'd'], u
'|')
102 test('split', u
'a|b|c|d', [u
'a', u
'b', u
'c|d'], u
'|', 2)
103 test('split', u
'a b c d', [u
'a', u
'b c d'], None, 1)
104 test('split', u
'a b c d', [u
'a', u
'b', u
'c d'], None, 2)
105 test('split', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'], None, 3)
106 test('split', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'], None, 4)
107 test('split', u
'a b c d', [u
'a b c d'], None, 0)
108 test('split', u
'a b c d', [u
'a', u
'b', u
'c d'], None, 2)
109 test('split', u
'a b c d ', [u
'a', u
'b', u
'c', u
'd'])
110 test('split', u
'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], u
'//')
111 test('split', u
'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], '//')
112 test('split', 'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], u
'//')
113 test('split', u
'endcase test', [u
'endcase ', u
''], u
'test')
114 test('split', u
'endcase test', [u
'endcase ', u
''], 'test')
115 test('split', 'endcase test', [u
'endcase ', u
''], u
'test')
118 # join now works with any sequence type
120 def __init__(self
, seq
): self
.seq
= seq
121 def __len__(self
): return len(self
.seq
)
122 def __getitem__(self
, i
): return self
.seq
[i
]
124 test('join', u
' ', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'])
125 test('join', u
' ', u
'a b c d', ['a', 'b', u
'c', u
'd'])
126 test('join', u
'', u
'abcd', (u
'a', u
'b', u
'c', u
'd'))
127 test('join', u
' ', u
'w x y z', Sequence('wxyz'))
128 test('join', u
' ', TypeError, 7)
129 test('join', u
' ', TypeError, Sequence([7, u
'hello', 123L]))
130 test('join', ' ', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'])
131 test('join', ' ', u
'a b c d', ['a', 'b', u
'c', u
'd'])
132 test('join', '', u
'abcd', (u
'a', u
'b', u
'c', u
'd'))
133 test('join', ' ', u
'w x y z', Sequence(u
'wxyz'))
134 test('join', ' ', TypeError, 7)
139 result
= result
+ u
':'
140 result
= result
+ u
'x'*10
141 test('join', u
':', result
, [u
'x' * 10] * 10)
142 test('join', u
':', result
, (u
'x' * 10,) * 10)
144 test('strip', u
' hello ', u
'hello')
145 test('lstrip', u
' hello ', u
'hello ')
146 test('rstrip', u
' hello ', u
' hello')
147 test('strip', u
'hello', u
'hello')
149 test('swapcase', u
'HeLLo cOmpUteRs', u
'hEllO CoMPuTErS')
152 test('translate', u
'xyzabcdef', u
'xyzxyz', transtable
, u
'def')
154 table
= string
.maketrans('a', u
'A')
155 test('translate', u
'abc', u
'Abc', table
)
156 test('translate', u
'xyz', u
'xyz', table
)
158 test('replace', u
'one!two!three!', u
'one@two!three!', u
'!', u
'@', 1)
159 test('replace', u
'one!two!three!', u
'onetwothree', '!', '')
160 test('replace', u
'one!two!three!', u
'one@two@three!', u
'!', u
'@', 2)
161 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@', 3)
162 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@', 4)
163 test('replace', u
'one!two!three!', u
'one!two!three!', u
'!', u
'@', 0)
164 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@')
165 test('replace', u
'one!two!three!', u
'one!two!three!', u
'x', u
'@')
166 test('replace', u
'one!two!three!', u
'one!two!three!', u
'x', u
'@', 2)
168 test('startswith', u
'hello', 1, u
'he')
169 test('startswith', u
'hello', 1, u
'hello')
170 test('startswith', u
'hello', 0, u
'hello world')
171 test('startswith', u
'hello', 1, u
'')
172 test('startswith', u
'hello', 0, u
'ello')
173 test('startswith', u
'hello', 1, u
'ello', 1)
174 test('startswith', u
'hello', 1, u
'o', 4)
175 test('startswith', u
'hello', 0, u
'o', 5)
176 test('startswith', u
'hello', 1, u
'', 5)
177 test('startswith', u
'hello', 0, u
'lo', 6)
178 test('startswith', u
'helloworld', 1, u
'lowo', 3)
179 test('startswith', u
'helloworld', 1, u
'lowo', 3, 7)
180 test('startswith', u
'helloworld', 0, u
'lowo', 3, 6)
182 test('endswith', u
'hello', 1, u
'lo')
183 test('endswith', u
'hello', 0, u
'he')
184 test('endswith', u
'hello', 1, u
'')
185 test('endswith', u
'hello', 0, u
'hello world')
186 test('endswith', u
'helloworld', 0, u
'worl')
187 test('endswith', u
'helloworld', 1, u
'worl', 3, 9)
188 test('endswith', u
'helloworld', 1, u
'world', 3, 12)
189 test('endswith', u
'helloworld', 1, u
'lowo', 1, 7)
190 test('endswith', u
'helloworld', 1, u
'lowo', 2, 7)
191 test('endswith', u
'helloworld', 1, u
'lowo', 3, 7)
192 test('endswith', u
'helloworld', 0, u
'lowo', 4, 7)
193 test('endswith', u
'helloworld', 0, u
'lowo', 3, 8)
194 test('endswith', u
'ab', 0, u
'ab', 0, 1)
195 test('endswith', u
'ab', 0, u
'ab', 0, 0)
197 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi')
198 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi', 8)
199 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi', 4)
200 test('expandtabs', u
'abc\r\nab\tdef\ng\thi', u
'abc\r\nab def\ng hi', 4)
203 test('capwords', u
'abc def ghi', u
'Abc Def Ghi')
204 test('capwords', u
'abc\tdef\nghi', u
'Abc Def Ghi')
205 test('capwords', u
'abc\t def \nghi', u
'Abc Def Ghi')
208 print 'Testing Unicode comparisons...',
209 verify(u
'abc' == 'abc')
210 verify('abc' == u
'abc')
211 verify(u
'abc' == u
'abc')
212 verify(u
'abcd' > 'abc')
213 verify('abcd' > u
'abc')
214 verify(u
'abcd' > u
'abc')
215 verify(u
'abc' < 'abcd')
216 verify('abc' < u
'abcd')
217 verify(u
'abc' < u
'abcd')
221 # Move these tests to a Unicode collation module test...
223 print 'Testing UTF-16 code point order comparisons...',
224 #No surrogates, no fixup required.
225 verify(u
'\u0061' < u
'\u20ac')
226 # Non surrogate below surrogate value, no fixup required
227 verify(u
'\u0061' < u
'\ud800\udc02')
229 # Non surrogate above surrogate value, fixup required
230 def test_lecmp(s
, s2
):
231 verify(s
< s2
, "comparison failed on %s < %s" % (s
, s2
))
267 test_fixup(u
'\ue000')
268 test_fixup(u
'\uff61')
270 # Surrogates on both sides, no fixup required
271 verify(u
'\ud800\udc02' < u
'\ud84d\udc56')
274 test('ljust', u
'abc', u
'abc ', 10)
275 test('rjust', u
'abc', u
' abc', 10)
276 test('center', u
'abc', u
' abc ', 10)
277 test('ljust', u
'abc', u
'abc ', 6)
278 test('rjust', u
'abc', u
' abc', 6)
279 test('center', u
'abc', u
' abc ', 6)
280 test('ljust', u
'abc', u
'abc', 2)
281 test('rjust', u
'abc', u
'abc', 2)
282 test('center', u
'abc', u
'abc', 2)
284 test('islower', u
'a', 1)
285 test('islower', u
'A', 0)
286 test('islower', u
'\n', 0)
287 test('islower', u
'\u1FFc', 0)
288 test('islower', u
'abc', 1)
289 test('islower', u
'aBc', 0)
290 test('islower', u
'abc\n', 1)
292 test('isupper', u
'a', 0)
293 test('isupper', u
'A', 1)
294 test('isupper', u
'\n', 0)
295 if sys
.platform
[:4] != 'java':
296 test('isupper', u
'\u1FFc', 0)
297 test('isupper', u
'ABC', 1)
298 test('isupper', u
'AbC', 0)
299 test('isupper', u
'ABC\n', 1)
301 test('istitle', u
'a', 0)
302 test('istitle', u
'A', 1)
303 test('istitle', u
'\n', 0)
304 test('istitle', u
'\u1FFc', 1)
305 test('istitle', u
'A Titlecased Line', 1)
306 test('istitle', u
'A\nTitlecased Line', 1)
307 test('istitle', u
'A Titlecased, Line', 1)
308 test('istitle', u
'Greek \u1FFcitlecases ...', 1)
309 test('istitle', u
'Not a capitalized String', 0)
310 test('istitle', u
'Not\ta Titlecase String', 0)
311 test('istitle', u
'Not--a Titlecase String', 0)
313 test('isalpha', u
'a', 1)
314 test('isalpha', u
'A', 1)
315 test('isalpha', u
'\n', 0)
316 test('isalpha', u
'\u1FFc', 1)
317 test('isalpha', u
'abc', 1)
318 test('isalpha', u
'aBc123', 0)
319 test('isalpha', u
'abc\n', 0)
321 test('isalnum', u
'a', 1)
322 test('isalnum', u
'A', 1)
323 test('isalnum', u
'\n', 0)
324 test('isalnum', u
'123abc456', 1)
325 test('isalnum', u
'a1b3c', 1)
326 test('isalnum', u
'aBc000 ', 0)
327 test('isalnum', u
'abc\n', 0)
329 test('splitlines', u
"abc\ndef\n\rghi", [u
'abc', u
'def', u
'', u
'ghi'])
330 test('splitlines', u
"abc\ndef\n\r\nghi", [u
'abc', u
'def', u
'', u
'ghi'])
331 test('splitlines', u
"abc\ndef\r\nghi", [u
'abc', u
'def', u
'ghi'])
332 test('splitlines', u
"abc\ndef\r\nghi\n", [u
'abc', u
'def', u
'ghi'])
333 test('splitlines', u
"abc\ndef\r\nghi\n\r", [u
'abc', u
'def', u
'ghi', u
''])
334 test('splitlines', u
"\nabc\ndef\r\nghi\n\r", [u
'', u
'abc', u
'def', u
'ghi', u
''])
335 test('splitlines', u
"\nabc\ndef\r\nghi\n\r", [u
'\n', u
'abc\n', u
'def\r\n', u
'ghi\n', u
'\r'], 1)
337 test('translate', u
"abababc", u
'bbbc', {ord('a'):None})
338 test('translate', u
"abababc", u
'iiic', {ord('a'):None, ord('b'):ord('i')})
339 test('translate', u
"abababc", u
'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u
'x'})
342 print 'Testing Unicode contains method...',
343 verify(('a' in u
'abdb') == 1)
344 verify(('a' in u
'bdab') == 1)
345 verify(('a' in u
'bdaba') == 1)
346 verify(('a' in u
'bdba') == 1)
347 verify(('a' in u
'bdba') == 1)
348 verify((u
'a' in u
'bdba') == 1)
349 verify((u
'a' in u
'bdb') == 0)
350 verify((u
'a' in 'bdb') == 0)
351 verify((u
'a' in 'bdba') == 1)
352 verify((u
'a' in ('a',1,None)) == 1)
353 verify((u
'a' in (1,None,'a')) == 1)
354 verify((u
'a' in (1,None,u
'a')) == 1)
355 verify(('a' in ('a',1,None)) == 1)
356 verify(('a' in (1,None,'a')) == 1)
357 verify(('a' in (1,None,u
'a')) == 1)
358 verify(('a' in ('x',1,u
'y')) == 0)
359 verify(('a' in ('x',1,None)) == 0)
363 print 'Testing Unicode formatting strings...',
364 verify(u
"%s, %s" % (u
"abc", "abc") == u
'abc, abc')
365 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", 1, 2, 3) == u
'abc, abc, 1, 2.000000, 3.00')
366 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", 1, -2, 3) == u
'abc, abc, 1, -2.000000, 3.00')
367 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 3.5) == u
'abc, abc, -1, -2.000000, 3.50')
368 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 3.57) == u
'abc, abc, -1, -2.000000, 3.57')
369 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 1003.57) == u
'abc, abc, -1, -2.000000, 1003.57')
370 verify(u
"%c" % (u
"a",) == u
'a')
371 verify(u
"%c" % ("a",) == u
'a')
372 verify(u
"%c" % (34,) == u
'"')
373 verify(u
"%c" % (36,) == u
'$')
374 if sys
.platform
[:4] != 'java':
375 value
= u
"%r, %r" % (u
"abc", "abc")
376 if value
!= u
"u'abc', 'abc'":
377 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
379 verify(u
"%(x)s, %(y)s" % {'x':u
"abc", 'y':"def"} == u
'abc, def')
381 value
= u
"%(x)s, %(ä)s" % {'x':u
"abc", u
'ä':"def"}
383 print '*** formatting failed for "%s"' % "u'abc, def'"
385 verify(value
== u
'abc, def')
387 # formatting jobs delegated from the string implementation:
388 verify('...%(foo)s...' % {'foo':u
"abc"} == u
'...abc...')
389 verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
390 verify('...%(foo)s...' % {u
'foo':"abc"} == '...abc...')
391 verify('...%(foo)s...' % {u
'foo':u
"abc"} == u
'...abc...')
392 verify('...%(foo)s...' % {u
'foo':u
"abc",'def':123} == u
'...abc...')
393 verify('...%(foo)s...' % {u
'foo':u
"abc",u
'def':123} == u
'...abc...')
394 verify('...%s...%s...%s...%s...' % (1,2,3,u
"abc") == u
'...1...2...3...abc...')
395 verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u
"abc") == u
'...%...%s...1...2...3...abc...')
396 verify('...%s...' % u
"abc" == u
'...abc...')
397 verify('%*s' % (5,u
'abc',) == u
' abc')
398 verify('%*s' % (-5,u
'abc',) == u
'abc ')
399 verify('%*.*s' % (5,2,u
'abc',) == u
' ab')
400 verify('%*.*s' % (5,3,u
'abc',) == u
' abc')
401 verify('%i %*.*s' % (10, 5,3,u
'abc',) == u
'10 abc')
402 verify('%i%s %*.*s' % (10, 3, 5,3,u
'abc',) == u
'103 abc')
405 print 'Testing builtin unicode()...',
407 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
409 verify(unicode(u
'unicode remains unicode') == u
'unicode remains unicode')
411 class UnicodeSubclass(unicode):
414 verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
415 == u
'unicode subclass becomes unicode')
417 verify(unicode('strings are converted to unicode')
418 == u
'strings are converted to unicode')
421 def __init__(self
, x
):
423 def __unicode__(self
):
426 verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
427 == u
'__unicode__ compatible objects are recognized')
430 def __init__(self
, x
):
435 verify(unicode(StringCompat('__str__ compatible objects are recognized'))
436 == u
'__str__ compatible objects are recognized')
438 # unicode(obj) is compatible to str():
440 o
= StringCompat('unicode(obj) is compatible to str()')
441 verify(unicode(o
) == u
'unicode(obj) is compatible to str()')
442 verify(str(o
) == 'unicode(obj) is compatible to str()')
444 for obj
in (123, 123.45, 123L):
445 verify(unicode(obj
) == unicode(str(obj
)))
447 # unicode(obj, encoding, error) tests (this maps to
448 # PyUnicode_FromEncodedObject() at C level)
450 if not sys
.platform
.startswith('java'):
452 unicode(u
'decoding unicode is not supported', 'utf-8', 'strict')
456 raise TestFailed
, "decoding unicode should NOT be supported"
458 verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
459 == u
'strings are decoded to unicode')
461 if not sys
.platform
.startswith('java'):
462 verify(unicode(buffer('character buffers are decoded to unicode'),
464 == u
'character buffers are decoded to unicode')
468 # Test builtin codecs
469 print 'Testing builtin codecs...',
471 # UTF-7 specific encoding tests:
472 utfTests
= [(u
'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
473 (u
'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
474 (u
'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
475 (u
'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
481 (ur
'\\?', '+AFwAXA?'),
482 (ur
'\\\?', '+AFwAXABc?'),
483 (ur
'++--', '+-+---')]
486 verify( x
.encode('utf-7') == y
)
489 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
493 raise TestFailed
, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
495 verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u
'\ufffd')
497 # UTF-8 specific encoding tests:
498 verify(u
'\u20ac'.encode('utf-8') == \
499 ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
500 verify(u
'\ud800\udc02'.encode('utf-8') == \
501 ''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))) )
502 verify(u
'\ud84d\udc56'.encode('utf-8') == \
503 ''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
504 # UTF-8 specific decoding tests
505 verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
506 'utf-8') == u
'\U00023456' )
507 verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
508 'utf-8') == u
'\U00010002' )
509 verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
510 'utf-8') == u
'\u20ac' )
512 # Other possible utf-8 test cases:
513 # * strict decoding testing for all of the
514 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
516 verify(unicode('hello','ascii') == u
'hello')
517 verify(unicode('hello','utf-8') == u
'hello')
518 verify(unicode('hello','utf8') == u
'hello')
519 verify(unicode('hello','latin-1') == u
'hello')
523 u
'Andr\202 x'.encode('ascii')
524 u
'Andr\202 x'.encode('ascii','strict')
528 raise TestFailed
, "u'Andr\202'.encode('ascii') failed to raise an exception"
529 verify(u
'Andr\202 x'.encode('ascii','ignore') == "Andr x")
530 verify(u
'Andr\202 x'.encode('ascii','replace') == "Andr? x")
533 unicode('Andr\202 x','ascii')
534 unicode('Andr\202 x','ascii','strict')
538 raise TestFailed
, "unicode('Andr\202') failed to raise an exception"
539 verify(unicode('Andr\202 x','ascii','ignore') == u
"Andr x")
540 verify(unicode('Andr\202 x','ascii','replace') == u
'Andr\uFFFD x')
542 verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u
"xx")
544 "\\".decode("unicode-escape")
548 raise TestFailed
, '"\\".decode("unicode-escape") should fail'
550 verify(u
'hello'.encode('ascii') == 'hello')
551 verify(u
'hello'.encode('utf-7') == 'hello')
552 verify(u
'hello'.encode('utf-8') == 'hello')
553 verify(u
'hello'.encode('utf8') == 'hello')
554 verify(u
'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
555 verify(u
'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
556 verify(u
'hello'.encode('latin-1') == 'hello')
558 # Roundtrip safety for BMP (just the first 1024 chars)
559 u
= u
''.join(map(unichr, range(1024)))
560 for encoding
in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
561 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
562 verify(unicode(u
.encode(encoding
),encoding
) == u
)
564 # Roundtrip safety for non-BMP (just a few chars)
565 u
= u
'\U00010001\U00020002\U00030003\U00040004\U00050005'
566 for encoding
in ('utf-8',
567 'utf-16', 'utf-16-le', 'utf-16-be',
568 #'raw_unicode_escape',
569 'unicode_escape', 'unicode_internal'):
570 verify(unicode(u
.encode(encoding
),encoding
) == u
)
572 u
= u
''.join(map(unichr, range(256)))
577 verify(unicode(u
.encode(encoding
),encoding
) == u
)
579 print '*** codec "%s" failed round-trip' % encoding
580 except ValueError,why
:
581 print '*** codec for "%s" failed: %s' % (encoding
, why
)
583 u
= u
''.join(map(unichr, range(128)))
588 verify(unicode(u
.encode(encoding
),encoding
) == u
)
590 print '*** codec "%s" failed round-trip' % encoding
591 except ValueError,why
:
592 print '*** codec for "%s" failed: %s' % (encoding
, why
)
596 print 'Testing standard mapping codecs...',
599 s
= ''.join(map(chr, range(128)))
602 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
603 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
604 'cp863', 'cp865', 'cp866',
605 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
606 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
607 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
608 'mac_cyrillic', 'mac_latin2',
610 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
611 'cp1256', 'cp1257', 'cp1258',
612 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
614 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
615 'cp1006', 'iso8859_8',
617 ### These have undefined mappings:
620 ### These fail the round-trip:
625 verify(unicode(s
,encoding
).encode(encoding
) == s
)
627 print '*** codec "%s" failed round-trip' % encoding
628 except ValueError,why
:
629 print '*** codec for "%s" failed: %s' % (encoding
, why
)
632 s
= ''.join(map(chr, range(128,256)))
635 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
636 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
637 'cp863', 'cp865', 'cp866',
638 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
639 'iso8859_2', 'iso8859_4', 'iso8859_5',
640 'iso8859_9', 'koi8_r', 'latin_1',
641 'mac_cyrillic', 'mac_latin2',
643 ### These have undefined mappings:
644 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
645 #'cp1256', 'cp1257', 'cp1258',
646 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
647 #'iso8859_3', 'iso8859_6', 'iso8859_7',
648 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
650 ### These fail the round-trip:
651 #'cp1006', 'cp875', 'iso8859_8',
655 verify(unicode(s
,encoding
).encode(encoding
) == s
)
657 print '*** codec "%s" failed round-trip' % encoding
658 except ValueError,why
:
659 print '*** codec for "%s" failed: %s' % (encoding
, why
)
663 print 'Testing Unicode string concatenation...',
664 verify((u
"abc" u
"def") == u
"abcdef")
665 verify(("abc" u
"def") == u
"abcdef")
666 verify((u
"abc" "def") == u
"abcdef")
667 verify((u
"abc" u
"def" "ghi") == u
"abcdefghi")
668 verify(("abc" "def" u
"ghi") == u
"abcdefghi")
671 print 'Testing Unicode printing...',