Lib/test/test_ucn.py

   1 """ Test script for the Unicode implementation.
   2
   3 Written by Bill Tutt.
   4 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
   5
   6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
   7
   8 """#"
   9 from test_support import verify, verbose
  10
  11 print 'Testing General Unicode Character Name, and case insensitivity...',
  12
  13 # General and case insensitivity test:
  14 try:
  15     # put all \N escapes inside exec'd raw strings, to make sure this
  16     # script runs even if the compiler chokes on \N escapes
  17     exec r"""
  18 s = u"\N{LATIN CAPITAL LETTER T}" \
  19     u"\N{LATIN SMALL LETTER H}" \
  20     u"\N{LATIN SMALL LETTER E}" \
  21     u"\N{SPACE}" \
  22     u"\N{LATIN SMALL LETTER R}" \
  23     u"\N{LATIN CAPITAL LETTER E}" \
  24     u"\N{LATIN SMALL LETTER D}" \
  25     u"\N{SPACE}" \
  26     u"\N{LATIN SMALL LETTER f}" \
  27     u"\N{LATIN CAPITAL LeTtEr o}" \
  28     u"\N{LATIN SMaLl LETTER x}" \
  29     u"\N{SPACE}" \
  30     u"\N{LATIN SMALL LETTER A}" \
  31     u"\N{LATIN SMALL LETTER T}" \
  32     u"\N{LATIN SMALL LETTER E}" \
  33     u"\N{SPACE}" \
  34     u"\N{LATIN SMALL LETTER T}" \
  35     u"\N{LATIN SMALL LETTER H}" \
  36     u"\N{LATIN SMALL LETTER E}" \
  37     u"\N{SpAcE}" \
  38     u"\N{LATIN SMALL LETTER S}" \
  39     u"\N{LATIN SMALL LETTER H}" \
  40     u"\N{LATIN SMALL LETTER E}" \
  41     u"\N{LATIN SMALL LETTER E}" \
  42     u"\N{LATIN SMALL LETTER P}" \
  43     u"\N{FULL STOP}"
  44 verify(s == u"The rEd fOx ate the sheep.", s)
  45 """
  46 except UnicodeError, v:
  47     print v
  48 print "done."
  49
  50 import unicodedata
  51
  52 print "Testing name to code mapping....",
  53 for char in "SPAM":
  54     name = "LATIN SMALL LETTER %s" % char
  55     code = unicodedata.lookup(name)
  56     verify(unicodedata.name(code) == name)
  57 print "done."
  58
  59 print "Testing code to name mapping for all characters....",
  60 count = 0
  61 for code in range(65536):
  62     try:
  63         char = unichr(code)
  64         name = unicodedata.name(char)
  65         verify(unicodedata.lookup(name) == char)
  66         count += 1
  67     except (KeyError, ValueError):
  68         pass
  69 print "done."
  70
  71 print "Found", count, "characters in the unicode name database"
  72
  73 # misc. symbol testing
  74 print "Testing misc. symbols for unicode character name expansion....",
  75 exec r"""
  76 verify(u"\N{PILCROW SIGN}" == u"\u00b6")
  77 verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
  78 verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
  79 verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
  80 """
  81 print "done."
  82
  83 # strict error testing:
  84 print "Testing unicode character name expansion strict error handling....",
  85 try:
  86     unicode("\N{blah}", 'unicode-escape', 'strict')
  87 except UnicodeError:
  88     pass
  89 else:
  90     raise AssertionError, "failed to raise an exception when given a bogus character name"
  91
  92 try:
  93     unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
  94 except UnicodeError:
  95     pass
  96 else:
  97     raise AssertionError, "failed to raise an exception when given a very " \
  98                           "long bogus character name"
  99
 100 try:
 101     unicode("\N{SPACE", 'unicode-escape', 'strict')
 102 except UnicodeError:
 103     pass
 104 else:
 105     raise AssertionError, "failed to raise an exception for a missing closing brace."
 106
 107 try:
 108     unicode("\NSPACE", 'unicode-escape', 'strict')
 109 except UnicodeError:
 110     pass
 111 else:
 112     raise AssertionError, "failed to raise an exception for a missing opening brace."
 113 print "done."