1 """ Test script for the Unicode implementation.
3 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8 from test_support
import verify
, verbose
, TestFailed
11 if not sys
.platform
.startswith('java'):
12 # Test basic sanity of repr()
13 verify(repr(u
'abc') == "u'abc'")
14 verify(repr(u
'ab\\c') == "u'ab\\\\c'")
15 verify(repr(u
'ab\\') == "u'ab\\\\'")
16 verify(repr(u
'\\c') == "u'\\\\c'")
17 verify(repr(u
'\\') == "u'\\\\'")
18 verify(repr(u
'\n') == "u'\\n'")
19 verify(repr(u
'\r') == "u'\\r'")
20 verify(repr(u
'\t') == "u'\\t'")
21 verify(repr(u
'\b') == "u'\\x08'")
22 verify(repr(u
"'\"") == """u'\\'"'""")
23 verify(repr(u
"'\"") == """u'\\'"'""")
24 verify(repr(u
"'") == '''u"'"''')
25 verify(repr(u
'"') == """u'"'""")
27 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
28 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
29 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
30 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
31 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
32 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
33 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
34 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
35 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
36 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
37 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
38 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
39 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
41 testrepr
= repr(u
''.join(map(unichr, range(256))))
42 verify(testrepr
== latin1repr
)
44 def test(method
, input, output
, *args
):
46 print '%s.%s%s =? %s... ' % (repr(input), method
, args
, repr(output
)),
48 f
= getattr(input, method
)
49 value
= apply(f
, args
)
52 exc
= sys
.exc_info()[:2]
55 if value
== output
and type(value
) is type(output
):
56 # if the original is returned make sure that
57 # this doesn't happen with subclasses
61 return 'usub(%r)' % unicode.__repr
__(self
)
64 f
= getattr(input, method
)
65 value
= apply(f
, args
)
68 exc
= sys
.exc_info()[:2]
72 print '*',f
, `
input`
, `output`
, `value`
74 if value
!= output
or type(value
) is not type(output
):
77 print '*',f
, `
input`
, `output`
, `value`
79 print ' value == %s: %s' % (exc
)
84 test('capitalize', u
' hello ', u
' hello ')
85 test('capitalize', u
'Hello ', u
'Hello ')
86 test('capitalize', u
'hello ', u
'Hello ')
87 test('capitalize', u
'aaaa', u
'Aaaa')
88 test('capitalize', u
'AaAa', u
'Aaaa')
90 test('count', u
'aaa', 3, u
'a')
91 test('count', u
'aaa', 0, u
'b')
92 test('count', 'aaa', 3, u
'a')
93 test('count', 'aaa', 0, u
'b')
94 test('count', u
'aaa', 3, 'a')
95 test('count', u
'aaa', 0, 'b')
97 test('title', u
' hello ', u
' Hello ')
98 test('title', u
'Hello ', u
'Hello ')
99 test('title', u
'hello ', u
'Hello ')
100 test('title', u
"fOrMaT thIs aS titLe String", u
'Format This As Title String')
101 test('title', u
"fOrMaT,thIs-aS*titLe;String", u
'Format,This-As*Title;String')
102 test('title', u
"getInt", u
'Getint')
104 test('find', u
'abcdefghiabc', 0, u
'abc')
105 test('find', u
'abcdefghiabc', 9, u
'abc', 1)
106 test('find', u
'abcdefghiabc', -1, u
'def', 4)
108 test('rfind', u
'abcdefghiabc', 9, u
'abc')
110 test('lower', u
'HeLLo', u
'hello')
111 test('lower', u
'hello', u
'hello')
113 test('upper', u
'HeLLo', u
'HELLO')
114 test('upper', u
'HELLO', u
'HELLO')
117 transtable
= '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
119 test('maketrans', u
'abc', transtable
, u
'xyz')
120 test('maketrans', u
'abc', ValueError, u
'xyzq')
122 test('split', u
'this is the split function',
123 [u
'this', u
'is', u
'the', u
'split', u
'function'])
124 test('split', u
'a|b|c|d', [u
'a', u
'b', u
'c', u
'd'], u
'|')
125 test('split', u
'a|b|c|d', [u
'a', u
'b', u
'c|d'], u
'|', 2)
126 test('split', u
'a b c d', [u
'a', u
'b c d'], None, 1)
127 test('split', u
'a b c d', [u
'a', u
'b', u
'c d'], None, 2)
128 test('split', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'], None, 3)
129 test('split', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'], None, 4)
130 test('split', u
'a b c d', [u
'a b c d'], None, 0)
131 test('split', u
'a b c d', [u
'a', u
'b', u
'c d'], None, 2)
132 test('split', u
'a b c d ', [u
'a', u
'b', u
'c', u
'd'])
133 test('split', u
'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], u
'//')
134 test('split', u
'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], '//')
135 test('split', 'a//b//c//d', [u
'a', u
'b', u
'c', u
'd'], u
'//')
136 test('split', u
'endcase test', [u
'endcase ', u
''], u
'test')
137 test('split', u
'endcase test', [u
'endcase ', u
''], 'test')
138 test('split', 'endcase test', [u
'endcase ', u
''], u
'test')
141 # join now works with any sequence type
143 def __init__(self
, seq
): self
.seq
= seq
144 def __len__(self
): return len(self
.seq
)
145 def __getitem__(self
, i
): return self
.seq
[i
]
147 test('join', u
' ', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'])
148 test('join', u
' ', u
'a b c d', ['a', 'b', u
'c', u
'd'])
149 test('join', u
'', u
'abcd', (u
'a', u
'b', u
'c', u
'd'))
150 test('join', u
' ', u
'w x y z', Sequence('wxyz'))
151 test('join', u
' ', TypeError, 7)
152 test('join', u
' ', TypeError, Sequence([7, u
'hello', 123L]))
153 test('join', ' ', u
'a b c d', [u
'a', u
'b', u
'c', u
'd'])
154 test('join', ' ', u
'a b c d', ['a', 'b', u
'c', u
'd'])
155 test('join', '', u
'abcd', (u
'a', u
'b', u
'c', u
'd'))
156 test('join', ' ', u
'w x y z', Sequence(u
'wxyz'))
157 test('join', ' ', TypeError, 7)
162 result
= result
+ u
':'
163 result
= result
+ u
'x'*10
164 test('join', u
':', result
, [u
'x' * 10] * 10)
165 test('join', u
':', result
, (u
'x' * 10,) * 10)
167 test('strip', u
' hello ', u
'hello')
168 test('lstrip', u
' hello ', u
'hello ')
169 test('rstrip', u
' hello ', u
' hello')
170 test('strip', u
'hello', u
'hello')
172 # strip/lstrip/rstrip with None arg
173 test('strip', u
' hello ', u
'hello', None)
174 test('lstrip', u
' hello ', u
'hello ', None)
175 test('rstrip', u
' hello ', u
' hello', None)
176 test('strip', u
'hello', u
'hello', None)
178 # strip/lstrip/rstrip with unicode arg
179 test('strip', u
'xyzzyhelloxyzzy', u
'hello', u
'xyz')
180 test('lstrip', u
'xyzzyhelloxyzzy', u
'helloxyzzy', u
'xyz')
181 test('rstrip', u
'xyzzyhelloxyzzy', u
'xyzzyhello', u
'xyz')
182 test('strip', u
'hello', u
'hello', u
'xyz')
184 # strip/lstrip/rstrip with str arg
185 test('strip', u
'xyzzyhelloxyzzy', u
'hello', 'xyz')
186 test('lstrip', u
'xyzzyhelloxyzzy', u
'helloxyzzy', 'xyz')
187 test('rstrip', u
'xyzzyhelloxyzzy', u
'xyzzyhello', 'xyz')
188 test('strip', u
'hello', u
'hello', 'xyz')
190 test('swapcase', u
'HeLLo cOmpUteRs', u
'hEllO CoMPuTErS')
193 test('translate', u
'xyzabcdef', u
'xyzxyz', transtable
, u
'def')
195 table
= string
.maketrans('a', u
'A')
196 test('translate', u
'abc', u
'Abc', table
)
197 test('translate', u
'xyz', u
'xyz', table
)
199 test('replace', u
'one!two!three!', u
'one@two!three!', u
'!', u
'@', 1)
200 test('replace', u
'one!two!three!', u
'onetwothree', '!', '')
201 test('replace', u
'one!two!three!', u
'one@two@three!', u
'!', u
'@', 2)
202 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@', 3)
203 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@', 4)
204 test('replace', u
'one!two!three!', u
'one!two!three!', u
'!', u
'@', 0)
205 test('replace', u
'one!two!three!', u
'one@two@three@', u
'!', u
'@')
206 test('replace', u
'one!two!three!', u
'one!two!three!', u
'x', u
'@')
207 test('replace', u
'one!two!three!', u
'one!two!three!', u
'x', u
'@', 2)
209 test('startswith', u
'hello', True, u
'he')
210 test('startswith', u
'hello', True, u
'hello')
211 test('startswith', u
'hello', False, u
'hello world')
212 test('startswith', u
'hello', True, u
'')
213 test('startswith', u
'hello', False, u
'ello')
214 test('startswith', u
'hello', True, u
'ello', 1)
215 test('startswith', u
'hello', True, u
'o', 4)
216 test('startswith', u
'hello', False, u
'o', 5)
217 test('startswith', u
'hello', True, u
'', 5)
218 test('startswith', u
'hello', False, u
'lo', 6)
219 test('startswith', u
'helloworld', True, u
'lowo', 3)
220 test('startswith', u
'helloworld', True, u
'lowo', 3, 7)
221 test('startswith', u
'helloworld', False, u
'lowo', 3, 6)
223 test('endswith', u
'hello', True, u
'lo')
224 test('endswith', u
'hello', False, u
'he')
225 test('endswith', u
'hello', True, u
'')
226 test('endswith', u
'hello', False, u
'hello world')
227 test('endswith', u
'helloworld', False, u
'worl')
228 test('endswith', u
'helloworld', True, u
'worl', 3, 9)
229 test('endswith', u
'helloworld', True, u
'world', 3, 12)
230 test('endswith', u
'helloworld', True, u
'lowo', 1, 7)
231 test('endswith', u
'helloworld', True, u
'lowo', 2, 7)
232 test('endswith', u
'helloworld', True, u
'lowo', 3, 7)
233 test('endswith', u
'helloworld', False, u
'lowo', 4, 7)
234 test('endswith', u
'helloworld', False, u
'lowo', 3, 8)
235 test('endswith', u
'ab', False, u
'ab', 0, 1)
236 test('endswith', u
'ab', False, u
'ab', 0, 0)
238 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi')
239 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi', 8)
240 test('expandtabs', u
'abc\rab\tdef\ng\thi', u
'abc\rab def\ng hi', 4)
241 test('expandtabs', u
'abc\r\nab\tdef\ng\thi', u
'abc\r\nab def\ng hi', 4)
242 test('expandtabs', u
'abc\r\nab\r\ndef\ng\r\nhi', u
'abc\r\nab\r\ndef\ng\r\nhi', 4)
245 test('capwords', u
'abc def ghi', u
'Abc Def Ghi')
246 test('capwords', u
'abc\tdef\nghi', u
'Abc Def Ghi')
247 test('capwords', u
'abc\t def \nghi', u
'Abc Def Ghi')
249 test('zfill', u
'123', u
'123', 2)
250 test('zfill', u
'123', u
'123', 3)
251 test('zfill', u
'123', u
'0123', 4)
252 test('zfill', u
'+123', u
'+123', 3)
253 test('zfill', u
'+123', u
'+123', 4)
254 test('zfill', u
'+123', u
'+0123', 5)
255 test('zfill', u
'-123', u
'-123', 3)
256 test('zfill', u
'-123', u
'-123', 4)
257 test('zfill', u
'-123', u
'-0123', 5)
258 test('zfill', u
'', u
'000', 3)
259 test('zfill', u
'34', u
'34', 1)
260 test('zfill', u
'34', u
'00034', 5)
263 print 'Testing Unicode comparisons...',
264 verify(u
'abc' == 'abc')
265 verify('abc' == u
'abc')
266 verify(u
'abc' == u
'abc')
267 verify(u
'abcd' > 'abc')
268 verify('abcd' > u
'abc')
269 verify(u
'abcd' > u
'abc')
270 verify(u
'abc' < 'abcd')
271 verify('abc' < u
'abcd')
272 verify(u
'abc' < u
'abcd')
276 # Move these tests to a Unicode collation module test...
278 print 'Testing UTF-16 code point order comparisons...',
279 #No surrogates, no fixup required.
280 verify(u
'\u0061' < u
'\u20ac')
281 # Non surrogate below surrogate value, no fixup required
282 verify(u
'\u0061' < u
'\ud800\udc02')
284 # Non surrogate above surrogate value, fixup required
285 def test_lecmp(s
, s2
):
286 verify(s
< s2
, "comparison failed on %s < %s" % (s
, s2
))
322 test_fixup(u
'\ue000')
323 test_fixup(u
'\uff61')
325 # Surrogates on both sides, no fixup required
326 verify(u
'\ud800\udc02' < u
'\ud84d\udc56')
329 test('ljust', u
'abc', u
'abc ', 10)
330 test('rjust', u
'abc', u
' abc', 10)
331 test('center', u
'abc', u
' abc ', 10)
332 test('ljust', u
'abc', u
'abc ', 6)
333 test('rjust', u
'abc', u
' abc', 6)
334 test('center', u
'abc', u
' abc ', 6)
335 test('ljust', u
'abc', u
'abc', 2)
336 test('rjust', u
'abc', u
'abc', 2)
337 test('center', u
'abc', u
'abc', 2)
339 test('islower', u
'a', True)
340 test('islower', u
'A', False)
341 test('islower', u
'\n', False)
342 test('islower', u
'\u1FFc', False)
343 test('islower', u
'abc', True)
344 test('islower', u
'aBc', False)
345 test('islower', u
'abc\n', True)
347 test('isupper', u
'a', False)
348 test('isupper', u
'A', True)
349 test('isupper', u
'\n', False)
350 if sys
.platform
[:4] != 'java':
351 test('isupper', u
'\u1FFc', False)
352 test('isupper', u
'ABC', True)
353 test('isupper', u
'AbC', False)
354 test('isupper', u
'ABC\n', True)
356 test('istitle', u
'a', False)
357 test('istitle', u
'A', True)
358 test('istitle', u
'\n', False)
359 test('istitle', u
'\u1FFc', True)
360 test('istitle', u
'A Titlecased Line', True)
361 test('istitle', u
'A\nTitlecased Line', True)
362 test('istitle', u
'A Titlecased, Line', True)
363 test('istitle', u
'Greek \u1FFcitlecases ...', True)
364 test('istitle', u
'Not a capitalized String', False)
365 test('istitle', u
'Not\ta Titlecase String', False)
366 test('istitle', u
'Not--a Titlecase String', False)
368 test('isalpha', u
'a', True)
369 test('isalpha', u
'A', True)
370 test('isalpha', u
'\n', False)
371 test('isalpha', u
'\u1FFc', True)
372 test('isalpha', u
'abc', True)
373 test('isalpha', u
'aBc123', False)
374 test('isalpha', u
'abc\n', False)
376 test('isalnum', u
'a', True)
377 test('isalnum', u
'A', True)
378 test('isalnum', u
'\n', False)
379 test('isalnum', u
'123abc456', True)
380 test('isalnum', u
'a1b3c', True)
381 test('isalnum', u
'aBc000 ', False)
382 test('isalnum', u
'abc\n', False)
384 test('splitlines', u
"abc\ndef\n\rghi", [u
'abc', u
'def', u
'', u
'ghi'])
385 test('splitlines', u
"abc\ndef\n\r\nghi", [u
'abc', u
'def', u
'', u
'ghi'])
386 test('splitlines', u
"abc\ndef\r\nghi", [u
'abc', u
'def', u
'ghi'])
387 test('splitlines', u
"abc\ndef\r\nghi\n", [u
'abc', u
'def', u
'ghi'])
388 test('splitlines', u
"abc\ndef\r\nghi\n\r", [u
'abc', u
'def', u
'ghi', u
''])
389 test('splitlines', u
"\nabc\ndef\r\nghi\n\r", [u
'', u
'abc', u
'def', u
'ghi', u
''])
390 test('splitlines', u
"\nabc\ndef\r\nghi\n\r", [u
'\n', u
'abc\n', u
'def\r\n', u
'ghi\n', u
'\r'], True)
392 test('translate', u
"abababc", u
'bbbc', {ord('a'):None})
393 test('translate', u
"abababc", u
'iiic', {ord('a'):None, ord('b'):ord('i')})
394 test('translate', u
"abababc", u
'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u
'x'})
397 print 'Testing Unicode contains method...',
398 verify(('a' in u
'abdb') == 1)
399 verify(('a' in u
'bdab') == 1)
400 verify(('a' in u
'bdaba') == 1)
401 verify(('a' in u
'bdba') == 1)
402 verify(('a' in u
'bdba') == 1)
403 verify((u
'a' in u
'bdba') == 1)
404 verify((u
'a' in u
'bdb') == 0)
405 verify((u
'a' in 'bdb') == 0)
406 verify((u
'a' in 'bdba') == 1)
407 verify((u
'a' in ('a',1,None)) == 1)
408 verify((u
'a' in (1,None,'a')) == 1)
409 verify((u
'a' in (1,None,u
'a')) == 1)
410 verify(('a' in ('a',1,None)) == 1)
411 verify(('a' in (1,None,'a')) == 1)
412 verify(('a' in (1,None,u
'a')) == 1)
413 verify(('a' in ('x',1,u
'y')) == 0)
414 verify(('a' in ('x',1,None)) == 0)
418 print 'Testing Unicode formatting strings...',
419 verify(u
"%s, %s" % (u
"abc", "abc") == u
'abc, abc')
420 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", 1, 2, 3) == u
'abc, abc, 1, 2.000000, 3.00')
421 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", 1, -2, 3) == u
'abc, abc, 1, -2.000000, 3.00')
422 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 3.5) == u
'abc, abc, -1, -2.000000, 3.50')
423 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 3.57) == u
'abc, abc, -1, -2.000000, 3.57')
424 verify(u
"%s, %s, %i, %f, %5.2f" % (u
"abc", "abc", -1, -2, 1003.57) == u
'abc, abc, -1, -2.000000, 1003.57')
425 verify(u
"%c" % (u
"a",) == u
'a')
426 verify(u
"%c" % ("a",) == u
'a')
427 verify(u
"%c" % (34,) == u
'"')
428 verify(u
"%c" % (36,) == u
'$')
429 if sys
.platform
[:4] != 'java':
430 value
= u
"%r, %r" % (u
"abc", "abc")
431 if value
!= u
"u'abc', 'abc'":
432 print '*** formatting failed for "%s"' % 'u"%r, %r" % (u"abc", "abc")'
434 verify(u
"%(x)s, %(y)s" % {'x':u
"abc", 'y':"def"} == u
'abc, def')
436 value
= u
"%(x)s, %(ä)s" % {'x':u
"abc", u
'ä':"def"}
438 print '*** formatting failed for "%s"' % "u'abc, def'"
440 verify(value
== u
'abc, def')
442 # formatting jobs delegated from the string implementation:
443 verify('...%(foo)s...' % {'foo':u
"abc"} == u
'...abc...')
444 verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
445 verify('...%(foo)s...' % {u
'foo':"abc"} == '...abc...')
446 verify('...%(foo)s...' % {u
'foo':u
"abc"} == u
'...abc...')
447 verify('...%(foo)s...' % {u
'foo':u
"abc",'def':123} == u
'...abc...')
448 verify('...%(foo)s...' % {u
'foo':u
"abc",u
'def':123} == u
'...abc...')
449 verify('...%s...%s...%s...%s...' % (1,2,3,u
"abc") == u
'...1...2...3...abc...')
450 verify('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u
"abc") == u
'...%...%s...1...2...3...abc...')
451 verify('...%s...' % u
"abc" == u
'...abc...')
452 verify('%*s' % (5,u
'abc',) == u
' abc')
453 verify('%*s' % (-5,u
'abc',) == u
'abc ')
454 verify('%*.*s' % (5,2,u
'abc',) == u
' ab')
455 verify('%*.*s' % (5,3,u
'abc',) == u
' abc')
456 verify('%i %*.*s' % (10, 5,3,u
'abc',) == u
'10 abc')
457 verify('%i%s %*.*s' % (10, 3, 5,3,u
'abc',) == u
'103 abc')
460 print 'Testing builtin unicode()...',
462 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
464 verify(unicode(u
'unicode remains unicode') == u
'unicode remains unicode')
466 class UnicodeSubclass(unicode):
469 verify(unicode(UnicodeSubclass('unicode subclass becomes unicode'))
470 == u
'unicode subclass becomes unicode')
472 verify(unicode('strings are converted to unicode')
473 == u
'strings are converted to unicode')
476 def __init__(self
, x
):
478 def __unicode__(self
):
481 verify(unicode(UnicodeCompat('__unicode__ compatible objects are recognized'))
482 == u
'__unicode__ compatible objects are recognized')
485 def __init__(self
, x
):
490 verify(unicode(StringCompat('__str__ compatible objects are recognized'))
491 == u
'__str__ compatible objects are recognized')
493 # unicode(obj) is compatible to str():
495 o
= StringCompat('unicode(obj) is compatible to str()')
496 verify(unicode(o
) == u
'unicode(obj) is compatible to str()')
497 verify(str(o
) == 'unicode(obj) is compatible to str()')
499 for obj
in (123, 123.45, 123L):
500 verify(unicode(obj
) == unicode(str(obj
)))
502 # unicode(obj, encoding, error) tests (this maps to
503 # PyUnicode_FromEncodedObject() at C level)
505 if not sys
.platform
.startswith('java'):
507 unicode(u
'decoding unicode is not supported', 'utf-8', 'strict')
511 raise TestFailed
, "decoding unicode should NOT be supported"
513 verify(unicode('strings are decoded to unicode', 'utf-8', 'strict')
514 == u
'strings are decoded to unicode')
516 if not sys
.platform
.startswith('java'):
517 verify(unicode(buffer('character buffers are decoded to unicode'),
519 == u
'character buffers are decoded to unicode')
523 # Test builtin codecs
524 print 'Testing builtin codecs...',
526 # UTF-7 specific encoding tests:
527 utfTests
= [(u
'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
528 (u
'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
529 (u
'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
530 (u
'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
536 (ur
'\\?', '+AFwAXA?'),
537 (ur
'\\\?', '+AFwAXABc?'),
538 (ur
'++--', '+-+---')]
541 verify( x
.encode('utf-7') == y
)
544 unicode('+3ADYAA-', 'utf-7') # surrogates not supported
548 raise TestFailed
, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
550 verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u
'\ufffd')
552 # UTF-8 specific encoding tests:
553 verify(u
''.encode('utf-8') == '')
554 verify(u
'\u20ac'.encode('utf-8') == '\xe2\x82\xac')
555 verify(u
'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82')
556 verify(u
'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96')
557 verify(u
'\ud800'.encode('utf-8') == '\xed\xa0\x80')
558 verify(u
'\udc00'.encode('utf-8') == '\xed\xb0\x80')
559 verify((u
'\ud800\udc02'*1000).encode('utf-8') ==
560 '\xf0\x90\x80\x82'*1000)
561 verify(u
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
562 u
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
563 u
'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
564 u
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
565 u
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
566 u
' Nunstuck git und'.encode('utf-8') ==
567 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
568 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
569 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
570 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
571 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
572 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
573 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
574 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
575 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
576 '\xe3\x80\x8cWenn ist das Nunstuck git und')
578 # UTF-8 specific decoding tests
579 verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u
'\U00023456' )
580 verify(unicode('\xf0\x90\x80\x82', 'utf-8') == u
'\U00010002' )
581 verify(unicode('\xe2\x82\xac', 'utf-8') == u
'\u20ac' )
583 # Other possible utf-8 test cases:
584 # * strict decoding testing for all of the
585 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
587 verify(unicode('hello','ascii') == u
'hello')
588 verify(unicode('hello','utf-8') == u
'hello')
589 verify(unicode('hello','utf8') == u
'hello')
590 verify(unicode('hello','latin-1') == u
'hello')
594 u
'Andr\202 x'.encode('ascii')
595 u
'Andr\202 x'.encode('ascii','strict')
599 raise TestFailed
, "u'Andr\202'.encode('ascii') failed to raise an exception"
600 verify(u
'Andr\202 x'.encode('ascii','ignore') == "Andr x")
601 verify(u
'Andr\202 x'.encode('ascii','replace') == "Andr? x")
604 unicode('Andr\202 x','ascii')
605 unicode('Andr\202 x','ascii','strict')
609 raise TestFailed
, "unicode('Andr\202') failed to raise an exception"
610 verify(unicode('Andr\202 x','ascii','ignore') == u
"Andr x")
611 verify(unicode('Andr\202 x','ascii','replace') == u
'Andr\uFFFD x')
613 verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u
"xx")
615 "\\".decode("unicode-escape")
619 raise TestFailed
, '"\\".decode("unicode-escape") should fail'
621 verify(u
'hello'.encode('ascii') == 'hello')
622 verify(u
'hello'.encode('utf-7') == 'hello')
623 verify(u
'hello'.encode('utf-8') == 'hello')
624 verify(u
'hello'.encode('utf8') == 'hello')
625 verify(u
'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
626 verify(u
'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
627 verify(u
'hello'.encode('latin-1') == 'hello')
629 # Roundtrip safety for BMP (just the first 1024 chars)
630 u
= u
''.join(map(unichr, range(1024)))
631 for encoding
in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
632 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
633 verify(unicode(u
.encode(encoding
),encoding
) == u
)
635 # Roundtrip safety for BMP (just the first 256 chars)
636 u
= u
''.join(map(unichr, range(256)))
641 verify(unicode(u
.encode(encoding
),encoding
) == u
)
643 print '*** codec "%s" failed round-trip' % encoding
644 except ValueError,why
:
645 print '*** codec for "%s" failed: %s' % (encoding
, why
)
647 # Roundtrip safety for BMP (just the first 128 chars)
648 u
= u
''.join(map(unichr, range(128)))
653 verify(unicode(u
.encode(encoding
),encoding
) == u
)
655 print '*** codec "%s" failed round-trip' % encoding
656 except ValueError,why
:
657 print '*** codec for "%s" failed: %s' % (encoding
, why
)
659 # Roundtrip safety for non-BMP (just a few chars)
660 u
= u
'\U00010001\U00020002\U00030003\U00040004\U00050005'
661 for encoding
in ('utf-8',
662 'utf-16', 'utf-16-le', 'utf-16-be',
663 #'raw_unicode_escape',
664 'unicode_escape', 'unicode_internal'):
665 verify(unicode(u
.encode(encoding
),encoding
) == u
)
667 # UTF-8 must be roundtrip safe for all UCS-2 code points
668 u
= u
''.join(map(unichr, range(0x10000)))
669 for encoding
in ('utf-8',):
670 verify(unicode(u
.encode(encoding
),encoding
) == u
)
674 print 'Testing standard mapping codecs...',
677 s
= ''.join(map(chr, range(128)))
680 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
681 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
682 'cp863', 'cp865', 'cp866',
683 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
684 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
685 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
686 'mac_cyrillic', 'mac_latin2',
688 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
689 'cp1256', 'cp1257', 'cp1258',
690 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
692 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
693 'cp1006', 'iso8859_8',
695 ### These have undefined mappings:
698 ### These fail the round-trip:
703 verify(unicode(s
,encoding
).encode(encoding
) == s
)
705 print '*** codec "%s" failed round-trip' % encoding
706 except ValueError,why
:
707 print '*** codec for "%s" failed: %s' % (encoding
, why
)
710 s
= ''.join(map(chr, range(128,256)))
713 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
714 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
715 'cp863', 'cp865', 'cp866',
716 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
717 'iso8859_2', 'iso8859_4', 'iso8859_5',
718 'iso8859_9', 'koi8_r', 'latin_1',
719 'mac_cyrillic', 'mac_latin2',
721 ### These have undefined mappings:
722 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
723 #'cp1256', 'cp1257', 'cp1258',
724 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
725 #'iso8859_3', 'iso8859_6', 'iso8859_7',
726 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
728 ### These fail the round-trip:
729 #'cp1006', 'cp875', 'iso8859_8',
733 verify(unicode(s
,encoding
).encode(encoding
) == s
)
735 print '*** codec "%s" failed round-trip' % encoding
736 except ValueError,why
:
737 print '*** codec for "%s" failed: %s' % (encoding
, why
)
741 print 'Testing Unicode string concatenation...',
742 verify((u
"abc" u
"def") == u
"abcdef")
743 verify(("abc" u
"def") == u
"abcdef")
744 verify((u
"abc" "def") == u
"abcdef")
745 verify((u
"abc" u
"def" "ghi") == u
"abcdefghi")
746 verify(("abc" "def" u
"ghi") == u
"abcdefghi")
749 print 'Testing Unicode printing...',