Fix typo in 9b54bd30006c008b4a951331b273613d5bac3abf
[pm.git] / intl / unicharutil / tests / NormalizationTest.cpp
blob94b5d67bb5b93e6ccb1a02d728440c190eba489c
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include <stdio.h>
7 #include "nsXPCOM.h"
8 #include "nsIUnicodeNormalizer.h"
9 #include "nsStringAPI.h"
10 #include "nsCharTraits.h"
11 #include "nsServiceManagerUtils.h"
13 struct testcaseLine {
14 wchar_t* c1;
15 wchar_t* c2;
16 wchar_t* c3;
17 wchar_t* c4;
18 wchar_t* c5;
19 char* description;
22 #ifdef DEBUG_smontagu
23 #define DEBUG_NAMED_TESTCASE(t, s) \
24 printf(t ": "); \
25 for (uint32_t i = 0; i < s.Length(); ++i) \
26 printf("%x ", s.CharAt(i)); \
27 printf("\n")
28 #else
29 #define DEBUG_NAMED_TESTCASE(t, s)
30 #endif
32 #define DEBUG_TESTCASE(x) DEBUG_NAMED_TESTCASE(#x, x)
34 #define NORMALIZE_AND_COMPARE(base, comparison, form, description) \
35 normalized.Truncate();\
36 normalizer->NormalizeUnicode##form(comparison, normalized);\
37 DEBUG_NAMED_TESTCASE(#form "(" #comparison ")", normalized);\
38 if (!base.Equals(normalized)) {\
39 rv = false;\
40 showError(description, #base " != " #form "(" #comparison ")\n");\
43 NS_DEFINE_CID(kUnicodeNormalizerCID, NS_UNICODE_NORMALIZER_CID);
45 nsIUnicodeNormalizer *normalizer;
46 bool verboseMode = false;
48 #include "NormalizationData.h"
50 void showError(const char* description, const char* errorText)
52 if (verboseMode)
53 printf("%s failed: %s", description, errorText);
56 bool TestInvariants(testcaseLine* testLine)
58 nsAutoString c1, c2, c3, c4, c5, normalized;
59 c1 = nsDependentString((char16_t*)testLine->c1);
60 c2 = nsDependentString((char16_t*)testLine->c2);
61 c3 = nsDependentString((char16_t*)testLine->c3);
62 c4 = nsDependentString((char16_t*)testLine->c4);
63 c5 = nsDependentString((char16_t*)testLine->c5);
64 bool rv = true;
67 1. The following invariants must be true for all conformant implementations
69 NFC
70 c2 == NFC(c1) == NFC(c2) == NFC(c3)
72 DEBUG_TESTCASE(c2);
73 NORMALIZE_AND_COMPARE(c2, c1, NFC, testLine->description);
74 NORMALIZE_AND_COMPARE(c2, c2, NFC, testLine->description);
75 NORMALIZE_AND_COMPARE(c2, c3, NFC, testLine->description);
78 c4 == NFC(c4) == NFC(c5)
80 DEBUG_TESTCASE(c4);
81 NORMALIZE_AND_COMPARE(c4, c4, NFC, testLine->description);
82 NORMALIZE_AND_COMPARE(c4, c5, NFC, testLine->description);
85 NFD
86 c3 == NFD(c1) == NFD(c2) == NFD(c3)
88 DEBUG_TESTCASE(c3);
89 NORMALIZE_AND_COMPARE(c3, c1, NFD, testLine->description);
90 NORMALIZE_AND_COMPARE(c3, c2, NFD, testLine->description);
91 NORMALIZE_AND_COMPARE(c3, c3, NFD, testLine->description);
93 c5 == NFD(c4) == NFD(c5)
95 DEBUG_TESTCASE(c5);
96 NORMALIZE_AND_COMPARE(c5, c4, NFD, testLine->description);
97 NORMALIZE_AND_COMPARE(c5, c5, NFD, testLine->description);
100 NFKC
101 c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
103 DEBUG_TESTCASE(c4);
104 NORMALIZE_AND_COMPARE(c4, c1, NFKC, testLine->description);
105 NORMALIZE_AND_COMPARE(c4, c2, NFKC, testLine->description);
106 NORMALIZE_AND_COMPARE(c4, c3, NFKC, testLine->description);
107 NORMALIZE_AND_COMPARE(c4, c4, NFKC, testLine->description);
108 NORMALIZE_AND_COMPARE(c4, c5, NFKC, testLine->description);
111 NFKD
112 c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
114 DEBUG_TESTCASE(c5);
115 NORMALIZE_AND_COMPARE(c5, c1, NFKD, testLine->description);
116 NORMALIZE_AND_COMPARE(c5, c2, NFKD, testLine->description);
117 NORMALIZE_AND_COMPARE(c5, c3, NFKD, testLine->description);
118 NORMALIZE_AND_COMPARE(c5, c4, NFKD, testLine->description);
119 NORMALIZE_AND_COMPARE(c5, c5, NFKD, testLine->description);
121 return rv;
124 uint32_t UTF32CodepointFromTestcase(testcaseLine* testLine)
126 if (!IS_SURROGATE(testLine->c1[0]))
127 return testLine->c1[0];
129 NS_ASSERTION(NS_IS_HIGH_SURROGATE(testLine->c1[0]) &&
130 NS_IS_LOW_SURROGATE(testLine->c1[1]),
131 "Test data neither in BMP nor legal surrogate pair");
132 return SURROGATE_TO_UCS4(testLine->c1[0], testLine->c1[1]);
135 bool TestUnspecifiedCodepoint(uint32_t codepoint)
137 bool rv = true;
138 char16_t unicharArray[3];
139 nsAutoString X, normalized;
140 char description[9];
142 if (IS_IN_BMP(codepoint)) {
143 unicharArray[0] = codepoint;
144 unicharArray[1] = 0;
145 X = nsDependentString(unicharArray);
147 else {
148 unicharArray[0] = H_SURROGATE(codepoint);
149 unicharArray[1] = L_SURROGATE(codepoint);
150 unicharArray[2] = 0;
151 X = nsDependentString(unicharArray);
155 2. For every code point X assigned in this version of Unicode that is not specifically
156 listed in Part 1, the following invariants must be true for all conformant
157 implementations:
159 X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
161 DEBUG_TESTCASE(X);
162 sprintf(description, "U+%04X", codepoint);
163 NORMALIZE_AND_COMPARE(X, X, NFC, description);
164 NORMALIZE_AND_COMPARE(X, X, NFD, description);
165 NORMALIZE_AND_COMPARE(X, X, NFKC, description);
166 NORMALIZE_AND_COMPARE(X, X, NFKD, description);
167 return rv;
170 void TestPart0()
172 printf("Test Part0: Specific cases\n");
174 uint32_t i = 0;
175 uint32_t numFailed = 0;
176 uint32_t numPassed = 0;
178 while (Part0TestData[i].c1[0] != 0) {
179 if (TestInvariants(&Part0TestData[i++]))
180 ++numPassed;
181 else
182 ++numFailed;
184 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
187 void TestPart1()
189 printf("Test Part1: Character by character test\n");
191 uint32_t i = 0;
192 uint32_t numFailed = 0;
193 uint32_t numPassed = 0;
194 uint32_t codepoint;
195 uint32_t testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[i]);
197 for (codepoint = 1; codepoint < 0x110000; ++codepoint) {
198 if (testDataCodepoint == codepoint) {
199 if (TestInvariants(&Part1TestData[i]))
200 ++numPassed;
201 else
202 ++numFailed;
203 testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[++i]);
204 } else {
205 if (TestUnspecifiedCodepoint(codepoint))
206 ++numPassed;
207 else
208 ++numFailed;
211 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
214 void TestPart2()
216 printf("Test Part2: Canonical Order Test\n");
218 uint32_t i = 0;
219 uint32_t numFailed = 0;
220 uint32_t numPassed = 0;
222 while (Part2TestData[i].c1[0] != 0) {
223 if (TestInvariants(&Part2TestData[i++]))
224 ++numPassed;
225 else
226 ++numFailed;
228 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
231 void TestPart3()
233 printf("Test Part3: PRI #29 Test\n");
235 uint32_t i = 0;
236 uint32_t numFailed = 0;
237 uint32_t numPassed = 0;
239 while (Part3TestData[i].c1[0] != 0) {
240 if (TestInvariants(&Part3TestData[i++]))
241 ++numPassed;
242 else
243 ++numFailed;
245 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
248 int main(int argc, char** argv) {
249 if (sizeof(wchar_t) != 2) {
250 printf("This test can only be run where sizeof(wchar_t) == 2\n");
251 return 1;
253 if (strlen(versionText) == 0) {
254 printf("No testcases: to run the tests generate the header file using\n");
255 printf(" perl genNormalizationData.pl\n");
256 printf("in intl/unichar/tools and rebuild\n");
257 return 1;
260 printf("NormalizationTest: test nsIUnicodeNormalizer. UCD version: %s\n",
261 versionText);
262 if (argc <= 1)
263 verboseMode = false;
264 else if ((argc == 2) && (!strcmp(argv[1], "-v")))
265 verboseMode = true;
266 else {
267 printf(" Usage: NormalizationTest [OPTION]..\n");
268 printf("Options:\n");
269 printf(" -v Verbose mode\n");
270 return 1;
273 nsresult rv = NS_InitXPCOM2(nullptr, nullptr, nullptr);
274 if (NS_FAILED(rv)) {
275 printf("NS_InitXPCOM2 failed\n");
276 return 1;
279 normalizer = nullptr;
280 nsresult res;
281 res = CallGetService(kUnicodeNormalizerCID, &normalizer);
283 if(NS_FAILED(res) || !normalizer) {
284 printf("GetService failed\n");
285 return 1;
288 TestPart0();
289 TestPart1();
290 TestPart2();
291 TestPart3();
293 NS_RELEASE(normalizer);
295 printf("Test finished \n");
296 return 0;