1 description("IDNA2003 handling in domain name labels.");
3 debug("The PASS/FAIL results of this test are set to the behavior in IDNA2003.");
6 // For IDNA Compatibility test material see
7 // http://www.unicode.org/reports/tr46/
8 // 1) Deviant character tests (deviant from IDNA2008)
9 // U+00DF normalizes to "ss" during IDNA2003's mapping phase
10 ["fa\u00DF.de","fass.de"],
11 // The ς U+03C2 GREEK SMALL LETTER FINAL SIGMA is mapped to U+03C3
12 ["\u03B2\u03CC\u03BB\u03BF\u03C2.com","xn--nxasmq6b.com"],
13 // The ZWJ U+200D ZERO WIDTH JOINER is mapped to nothing.
14 ["\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com","xn--10cl1a0b.com"],
15 // The ZWNJ U+200C ZERO WIDTH NON-JOINER is mapped to nothing.
16 ["\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com","xn--mgba3gch31f.com"],
17 // 2) Normalization tests
18 ["www.loo\u0138out.net","www.xn--looout-5bb.net"],
19 ["\u15EF\u15EF\u15EF.lookout.net","xn--1qeaa.lookout.net"],
20 ["www.lookout.\u0441\u043E\u043C","www.lookout.xn--l1adi"],
21 ["www.lookout.net\uFF1A80","www.lookout.net:80"],
22 ["www\u2025lookout.net","www..lookout.net"],
23 ["www.lookout\u2027net","www.xn--lookoutnet-406e"],
24 // using Latin letter kra ‘ĸ’ in domain
25 ["www.loo\u0138out.net","www.xn--looout-5bb.net"],
26 // \u2A74 decomposes into ::=
27 ["www.lookout.net\u2A7480","www.lookout.net::%3D80"],
28 // U+0341; COMBINING ACUTE TONE MARK is normalized to U+0301
29 ["lookout\u0341.net","xn--lookout-zge.net"],
30 // 3) Characters mapped away : See RFC 3454 B.1
31 // U+2060 WORD JOINER is mapped to nothing.
32 ["look\u2060out.net","lookout.net"],
33 // U+FEFF ZERO WIDTH NO-BREAK SPACE is mapped to nothing.
34 ["look\uFEFFout.net","lookout.net"],
35 // U+FE00 VARIATION SELECTOR-1 is mapped to nothing.
36 ["look\uFE00out.net","lookout.net"],
37 // 4) Prohibited code points
38 // Using prohibited high-ASCII \u00A0
39 ["www\u00A0.lookout.net","www%20.lookout.net"],
40 // using prohibited non-ASCII space chars 1680 (Ogham space mark)
41 ["\u1680lookout.net","%E1%9A%80lookout.net"],
42 // Using prohibited lower ASCII control character \u001F
43 ["\u001Flookout.net","%1Flookout.net"],
44 // Using prohibited U+06DD ARABIC END OF AYAH
45 ["look\u06DDout.net","look%DB%9Dout.net"],
46 // Using prohibited U+180E MONGOLIAN VOWEL SEPARATOR
47 ["look\u180Eout.net","look%E1%A0%8Eout.net"],
48 // Using prohibited Non-character code points 1FFFE [NONCHARACTER CODE POINTS]
49 ["look\uD83F\uDFFEout.net","look%F0%9F%BF%BEout.net"],
50 // Using prohibited U+DEAD half surrogate code point
51 // FIXME: ["look\uDEADout.net","look%ED%BA%ADout.net"],
52 // Using prohibited Inappropriate for plain text U+FFFA; INTERLINEAR ANNOTATION SEPARATOR
53 ["look\uFFFAout.net","look%EF%BF%BAout.net"],
54 // Using prohibited Inappropriate for canonical representation 2FF0-2FFB; [IDEOGRAPHIC DESCRIPTION CHARACTERS]
55 ["look\u2FF0out.net","look%E2%BF%B0out.net"],
56 // Using prohibited Change display properties or are deprecated 202E; RIGHT-TO-LEFT OVERRIDE
57 ["look\u202Eout.net","look%E2%80%AEout.net"],
58 // Using prohibited Change display properties or are deprecated 206B; ACTIVATE SYMMETRIC SWAPPING
59 ["look\u206Bout.net","look%E2%81%ABout.net"],
60 // Using prohibited Tagging characters E0001; LANGUAGE TAG
61 ["look\uDB40\uDC01out.net","look%F3%A0%80%81out.net"],
62 // Using prohibited Tagging characters E0020-E007F; [TAGGING CHARACTERS]
63 ["look\uDB40\uDC20out.net","look%F3%A0%80%A0out.net"],
64 // Using prohibited Characters with bidirectional property 05BE
65 ["look\u05BEout.net","look%D6%BEout.net"]
68 for (var i = 0; i < cases.length; ++i) {
69 test_vector = cases[i][0];
70 expected_result = cases[i][1];
71 shouldBe("canonicalize('http://" + test_vector + "/')",
72 "'http://" + expected_result + "/'");