- Got rid of newmodule.c
[python/dscho.git] / Lib / test / test_ucn.py
blobf7d3ce43a9116c00411e6934ae789eaab27e4a9f
1 """ Test script for the Unicode implementation.
3 Written by Bill Tutt.
4 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8 """#"
9 from test_support import verify, verbose
11 print 'Testing General Unicode Character Name, and case insensitivity...',
13 # General and case insensitivity test:
14 try:
15 # put all \N escapes inside exec'd raw strings, to make sure this
16 # script runs even if the compiler chokes on \N escapes
17 exec r"""
18 s = u"\N{LATIN CAPITAL LETTER T}" \
19 u"\N{LATIN SMALL LETTER H}" \
20 u"\N{LATIN SMALL LETTER E}" \
21 u"\N{SPACE}" \
22 u"\N{LATIN SMALL LETTER R}" \
23 u"\N{LATIN CAPITAL LETTER E}" \
24 u"\N{LATIN SMALL LETTER D}" \
25 u"\N{SPACE}" \
26 u"\N{LATIN SMALL LETTER f}" \
27 u"\N{LATIN CAPITAL LeTtEr o}" \
28 u"\N{LATIN SMaLl LETTER x}" \
29 u"\N{SPACE}" \
30 u"\N{LATIN SMALL LETTER A}" \
31 u"\N{LATIN SMALL LETTER T}" \
32 u"\N{LATIN SMALL LETTER E}" \
33 u"\N{SPACE}" \
34 u"\N{LATIN SMALL LETTER T}" \
35 u"\N{LATIN SMALL LETTER H}" \
36 u"\N{LATIN SMALL LETTER E}" \
37 u"\N{SpAcE}" \
38 u"\N{LATIN SMALL LETTER S}" \
39 u"\N{LATIN SMALL LETTER H}" \
40 u"\N{LATIN SMALL LETTER E}" \
41 u"\N{LATIN SMALL LETTER E}" \
42 u"\N{LATIN SMALL LETTER P}" \
43 u"\N{FULL STOP}"
44 verify(s == u"The rEd fOx ate the sheep.", s)
45 """
46 except UnicodeError, v:
47 print v
48 print "done."
50 import unicodedata
52 print "Testing name to code mapping....",
53 for char in "SPAM":
54 name = "LATIN SMALL LETTER %s" % char
55 code = unicodedata.lookup(name)
56 verify(unicodedata.name(code) == name)
57 print "done."
59 print "Testing code to name mapping for all characters....",
60 count = 0
61 for code in range(65536):
62 try:
63 char = unichr(code)
64 name = unicodedata.name(char)
65 verify(unicodedata.lookup(name) == char)
66 count += 1
67 except (KeyError, ValueError):
68 pass
69 print "done."
71 print "Found", count, "characters in the unicode name database"
73 # misc. symbol testing
74 print "Testing misc. symbols for unicode character name expansion....",
75 exec r"""
76 verify(u"\N{PILCROW SIGN}" == u"\u00b6")
77 verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
78 verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
79 verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
80 """
81 print "done."
83 # strict error testing:
84 print "Testing unicode character name expansion strict error handling....",
85 try:
86 unicode("\N{blah}", 'unicode-escape', 'strict')
87 except UnicodeError:
88 pass
89 else:
90 raise AssertionError, "failed to raise an exception when given a bogus character name"
92 try:
93 unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
94 except UnicodeError:
95 pass
96 else:
97 raise AssertionError, "failed to raise an exception when given a very " \
98 "long bogus character name"
100 try:
101 unicode("\N{SPACE", 'unicode-escape', 'strict')
102 except UnicodeError:
103 pass
104 else:
105 raise AssertionError, "failed to raise an exception for a missing closing brace."
107 try:
108 unicode("\NSPACE", 'unicode-escape', 'strict')
109 except UnicodeError:
110 pass
111 else:
112 raise AssertionError, "failed to raise an exception for a missing opening brace."
113 print "done."