1 from test
.test_support
import verbose
, TestFailed
, TestSkipped
, verify
4 from unicodedata
import normalize
6 TESTDATAFILE
= "NormalizationTest.txt"
8 # This search allows using a build directory just inside the source
9 # directory, and saving just one copy of the test data in the source
10 # tree, rather than having a copy in each build directory.
11 # There might be a better way to do this.
13 for path
in [os
.path
.curdir
, os
.path
.pardir
]:
14 fn
= os
.path
.join(path
, TESTDATAFILE
)
15 skip_expected
= not os
.path
.exists(fn
)
24 return normalize("NFC", str)
27 return normalize("NFKC", str)
30 return normalize("NFD", str)
33 return normalize("NFKD", str)
36 data
= [int(x
, 16) for x
in data
.split(" ")]
38 if x
> sys
.maxunicode
:
40 return u
"".join([unichr(x
) for x
in data
])
44 raise TestSkipped(TESTDATAFILE
+ " not found, download from " +
45 "http://www.unicode.org/Public/UNIDATA/" + TESTDATAFILE
)
48 for line
in open(TESTDATAFILE
):
50 line
= line
.split('#')[0]
54 if line
.startswith("@Part"):
58 c1
,c2
,c3
,c4
,c5
= [unistr(x
) for x
in line
.split(';')[:-1]]
60 # Skip unsupported characters
67 verify(c2
== NFC(c1
) == NFC(c2
) == NFC(c3
), line
)
68 verify(c4
== NFC(c4
) == NFC(c5
), line
)
69 verify(c3
== NFD(c1
) == NFD(c2
) == NFD(c3
), line
)
70 verify(c5
== NFD(c4
) == NFD(c5
), line
)
71 verify(c4
== NFKC(c1
) == NFKC(c2
) == NFKC(c3
) == NFKC(c4
) == NFKC(c5
),
73 verify(c5
== NFKD(c1
) == NFKD(c2
) == NFKD(c3
) == NFKD(c4
) == NFKD(c5
),
80 # Perform tests for all other data
81 for c
in range(sys
.maxunicode
+1):
85 assert X
== NFC(X
) == NFD(X
) == NFKC(X
) == NFKD(X
), c
87 if __name__
== "__main__":