Update version number and release date.
[python/dscho.git] / Lib / test / test_normalization.py
blob94d07d524d43d5ac786f0b9abdc6172deb865661
1 from test.test_support import verbose, TestFailed, TestSkipped, verify
2 import sys
3 import os
4 from unicodedata import normalize
6 TESTDATAFILE = "NormalizationTest.txt"
8 # This search allows using a build directory just inside the source
9 # directory, and saving just one copy of the test data in the source
10 # tree, rather than having a copy in each build directory.
11 # There might be a better way to do this.
13 for path in [os.path.curdir, os.path.pardir]:
14 fn = os.path.join(path, TESTDATAFILE)
15 skip_expected = not os.path.exists(fn)
16 if not skip_expected:
17 TESTDATAFILE = fn
18 break
20 class RangeError:
21 pass
23 def NFC(str):
24 return normalize("NFC", str)
26 def NFKC(str):
27 return normalize("NFKC", str)
29 def NFD(str):
30 return normalize("NFD", str)
32 def NFKD(str):
33 return normalize("NFKD", str)
35 def unistr(data):
36 data = [int(x, 16) for x in data.split(" ")]
37 for x in data:
38 if x > sys.maxunicode:
39 raise RangeError
40 return u"".join([unichr(x) for x in data])
42 def test_main():
43 if skip_expected:
44 raise TestSkipped(TESTDATAFILE + " not found, download from " +
45 "http://www.unicode.org/Public/UNIDATA/" + TESTDATAFILE)
47 part1_data = {}
48 for line in open(TESTDATAFILE):
49 if '#' in line:
50 line = line.split('#')[0]
51 line = line.strip()
52 if not line:
53 continue
54 if line.startswith("@Part"):
55 part = line
56 continue
57 try:
58 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
59 except RangeError:
60 # Skip unsupported characters
61 continue
63 if verbose:
64 print line
66 # Perform tests
67 verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
68 verify(c4 == NFC(c4) == NFC(c5), line)
69 verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
70 verify(c5 == NFD(c4) == NFD(c5), line)
71 verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5),
72 line)
73 verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5),
74 line)
76 # Record part 1 data
77 if part == "@Part1":
78 part1_data[c1] = 1
80 # Perform tests for all other data
81 for c in range(sys.maxunicode+1):
82 X = unichr(c)
83 if X in part1_data:
84 continue
85 assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
87 if __name__ == "__main__":
88 test_main()