1 // Test algorithm for unicode display of IDNA URL (bug 722299)
6 // Original Punycode or Expected UTF-8
10 ["cuillère", "xn--cuillre-6xa", true],
12 // repeated non-spacing marks
13 ["gruz̀̀ere", "xn--gruzere-ogea", false],
16 ["I♥NY", "xn--iny-zx5a", false],
19 Behaviour of this test changed in IDNA2008, replacing the non-XID
20 character with U+FFFD replacement character - when all platforms use
21 IDNA2008 it can be uncommented and the punycode URL changed to
24 // new non-XID character in Unicode 6.3
25 ["حلا\u061cل", "xn--bgbvr6gc", false],
28 // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490)
29 ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", true],
32 ["толсто́й", "xn--lsa83dealbred", true],
34 // Mixed script Cyrillic/Latin
35 ["толсто́й-in-Russian", "xn---in-russian-1jg071b0a8bb4cpd", false],
37 // Mixed script Latin/Cyrillic
38 ["war-and-миръ", "xn--war-and--b9g3b7b3h", false],
40 // Cherokee (Restricted script)
41 ["ᏣᎳᎩ", "xn--f9dt7l", false],
43 // Yi (former Aspirational script, now Restricted per Unicode 10.0 update to UAX 31)
44 ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c", false],
47 ["πλάτων", "xn--hxa3ahjw4a", true],
49 // Mixed script Greek/Latin
50 ["πλάτωνicrelationship", "xn--icrelationship-96j4t9a3cwe2e", false],
52 // Mixed script Latin/Greek
53 ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false],
56 ["मराठी", "xn--d2b1ag0dl", true],
58 // Devanagari with Armenian
59 ["मराठीՀայաստան", "xn--y9aaa1d0ai1cq964f8dwa2o1a", false],
61 // Devanagari with common
62 ["मराठी123", "xn--123-mhh3em2hra", true],
64 // Common with Devanagari
65 ["123मराठी", "xn--123-phh3em2hra", true],
68 ["chairman毛", "xn--chairman-k65r", true],
71 ["山葵sauce", "xn--sauce-6j9ii40v", true],
73 // Latin with Han, Hiragana and Katakana
74 ["van語ではドイ", "xn--van-ub4bpb6w0in486d", true],
76 // Latin with Han, Katakana and Hiragana
77 ["van語ドイでは", "xn--van-ub4bpb4w0ip486d", true],
79 // Latin with Hiragana, Han and Katakana
80 ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d", true],
82 // Latin with Hiragana, Katakana and Han
83 ["vanではドイ語", "xn--van-ub4bpb6w0ir486d", true],
85 // Latin with Katakana, Han and Hiragana
86 ["vanドイ語では", "xn--van-ub4bpb4w0ir486d", true],
88 // Latin with Katakana, Hiragana and Han
89 ["vanドイでは語", "xn--van-ub4bpb4w0it486d", true],
91 // Han with Latin, Hiragana and Katakana
92 ["語vanではドイ", "xn--van-ub4bpb6w0ik486d", true],
94 // Han with Latin, Katakana and Hiragana
95 ["語vanドイでは", "xn--van-ub4bpb4w0im486d", true],
97 // Han with Hiragana, Latin and Katakana
98 ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d", true],
100 // Han with Hiragana, Katakana and Latin
101 ["語ではドイvan", "xn--van-rb4bpb6w0in486d", true],
103 // Han with Katakana, Latin and Hiragana
104 ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d", true],
106 // Han with Katakana, Hiragana and Latin
107 ["語ドイではvan", "xn--van-rb4bpb4w0ip486d", true],
109 // Hiragana with Latin, Han and Katakana
110 ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", true],
112 // Hiragana with Latin, Katakana and Han
113 ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d", true],
115 // Hiragana with Han, Latin and Katakana
116 ["では語vanドイ", "xn--van-rb4bpb9w0im486d", true],
118 // Hiragana with Han, Katakana and Latin
119 ["では語ドイvan", "xn--van-rb4bpb6w0ip486d", true],
121 // Hiragana with Katakana, Latin and Han
122 ["ではドイvan語", "xn--van-rb4bpb6w0iu486d", true],
124 // Hiragana with Katakana, Han and Latin
125 ["ではドイ語van", "xn--van-rb4bpb6w0ir486d", true],
127 // Katakana with Latin, Han and Hiragana
128 ["ドイvan語では", "xn--van-ub4bpb1w0iu486d", true],
130 // Katakana with Latin, Hiragana and Han
131 ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d", true],
133 // Katakana with Han, Latin and Hiragana
134 ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d", true],
136 // Katakana with Han, Hiragana and Latin
137 ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d", true],
139 // Katakana with Hiragana, Latin and Han
140 ["ドイではvan語", "xn--van-rb4bpb4w0iw486d", true],
142 // Katakana with Hiragana, Han and Latin
143 ["ドイでは語van", "xn--van-rb4bpb4w0it486d", true],
146 ["中国123", "xn--123-u68dy61b", true],
149 ["123中国", "xn--123-x68dy61b", true],
151 // Characters that normalize to permitted characters
152 // (also tests Plane 1 supplementary characters)
153 ["super𝟖", "super8", true],
156 ["𠀀𠀁𠀂", "xn--j50icd", true],
158 // Han from Plane 2 with js (UTF-16) escapes
159 ["\uD840\uDC00\uD840\uDC01\uD840\uDC02", "xn--j50icd", true],
161 // Same with a lone high surrogate at the end
162 // Throws due to unpaired surrogate
163 // ["\uD840\uDC00\uD840\uDC01\uD840", "xn--zn7c0336bda", false],
165 // Latin text and Bengali digits
166 ["super৪", "xn--super-k2l", false],
168 // Bengali digits and Latin text
169 ["৫ab", "xn--ab-x5f", false],
171 // Bengali text and Latin digits
172 ["অঙ্কুর8", "xn--8-70d2cp0j6dtd", true],
174 // Latin digits and Bengali text
175 ["5াব", "xn--5-h3d7c", true],
177 // Mixed numbering systems
178 // Throws due to bidi rule violation
179 // ["٢٠۰٠", "xn--8hbae38c", false],
181 // Traditional Chinese
182 ["萬城", "xn--uis754h", true],
184 // Simplified Chinese
185 ["万城", "xn--chq31v", true],
187 // Simplified-only and Traditional-only Chinese in the same label
188 ["万萬城", "xn--chq31vsl1b", true],
190 // Traditional-only and Simplified-only Chinese in the same label
191 ["萬万城", "xn--chq31vrl1b", true],
193 // Han and Latin and Bopomofo
194 ["注音符号bopomofoㄅㄆㄇㄈ", "xn--bopomofo-hj5gkalm1637i876cuw0brk5f", true],
196 // Han, bopomofo, Latin
197 // Bug 1885096: Since the last character of "ㄅㄆㄇㄈ" is a CJK Ideograph,
198 // just use the first character "ㄅ" from the sequence "ㄅㄆㄇㄈ".
199 ["注音符号ㄅbopomofo", "xn--bopomofo-8i5gx891aylvccz9asi4e", true],
201 // Latin, Han, Bopomofo
202 ["bopomofo注音符号ㄅㄆㄇㄈ", "xn--bopomofo-hj5gkalm9637i876cuw0brk5f", true],
204 // Latin, Bopomofo, Han
205 ["bopomofoㄅㄆㄇㄈ注音符号", "xn--bopomofo-hj5gkalm3737i876cuw0brk5f", true],
207 // Bopomofo, Han, Latin
208 ["ㄅㄆㄇㄈ注音符号bopomofo", "xn--bopomofo-8i5gkalm3737i876cuw0brk5f", true],
210 // Bopomofo, Latin, Han
211 // Bug 1885096: Since the last character of "ㄅㄆㄇㄈ" is a CJK Ideograph,
212 // just use the first character "ㄅ" from the sequence "ㄅㄆㄇㄈ".
213 ["ㄅbopomofo注音符号", "xn--bopomofo-8i5g6891aylvccz9asi4e", true],
215 // Han, bopomofo and katakana
216 ["注音符号ㄅㄆㄇㄈボポモフォ", "xn--jckteuaez1shij0450gylvccz9asi4e", false],
218 // Han, katakana, bopomofo
219 ["注音符号ボポモフォㄅㄆㄇㄈ", "xn--jckteuaez6shij5350gylvccz9asi4e", false],
221 // bopomofo, han, katakana
222 ["ㄅㄆㄇㄈ注音符号ボポモフォ", "xn--jckteuaez1shij4450gylvccz9asi4e", false],
224 // bopomofo, katakana, han
225 ["ㄅㄆㄇㄈボポモフォ注音符号", "xn--jckteuaez1shij9450gylvccz9asi4e", false],
227 // katakana, Han, bopomofo
228 ["ボポモフォ注音符号ㄅㄆㄇㄈ", "xn--jckteuaez6shij0450gylvccz9asi4e", false],
230 // katakana, bopomofo, Han
231 ["ボポモフォㄅㄆㄇㄈ注音符号", "xn--jckteuaez6shij4450gylvccz9asi4e", false],
233 // Han, Hangul and Latin
234 ["韓한글hangul", "xn--hangul-2m5ti09k79ze", true],
236 // Han, Latin and Hangul
237 ["韓hangul한글", "xn--hangul-2m5to09k79ze", true],
239 // Hangul, Han and Latin
240 ["한글韓hangul", "xn--hangul-2m5th09k79ze", true],
242 // Hangul, Latin and Han
243 ["한글hangul韓", "xn--hangul-8m5t898k79ze", true],
245 // Latin, Han and Hangul
246 ["hangul韓한글", "xn--hangul-8m5ti09k79ze", true],
248 // Latin, Hangul and Han
249 ["hangul한글韓", "xn--hangul-8m5th09k79ze", true],
251 // Hangul and katakana
252 ["한글ハングル", "xn--qck1c2d4a9266lkmzb", false],
254 // Katakana and Hangul
255 ["ハングル한글", "xn--qck1c2d4a2366lkmzb", false],
257 // Thai (also tests that node with over 63 UTF-8 octets doesn't fail)
259 "เครื่องทําน้ําทําน้ําแข็ง",
260 "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd",
264 // Effect of adding valid or invalid subdomains (bug 1399540)
265 ["䕮䕵䕶䕱.ascii", "xn--google.ascii", true],
266 ["ascii.䕮䕵䕶䕱", "ascii.xn--google", true],
267 ["中国123.䕮䕵䕶䕱", "xn--123-u68dy61b.xn--google", true],
268 ["䕮䕵䕶䕱.中国123", "xn--google.xn--123-u68dy61b", true],
269 // Throw due to bogus Punycode
271 // "xn--accountlogin.䕮䕵䕶䕱",
272 // "xn--accountlogin.xn--google",
276 // "䕮䕵䕶䕱.xn--accountlogin",
277 // "xn--google.xn--accountlogin",
281 // Arabic diacritic not allowed in Latin text (bug 1370497)
282 ["goo\u0650gle", "xn--google-yri", false],
283 // ...but Arabic diacritics are allowed on Arabic text
284 ["العَرَبِي", "xn--mgbc0a5a6cxbzabt", true],
286 // Hebrew diacritic also not allowed in Latin text (bug 1404349)
287 ["goo\u05b4gle", "xn--google-rvh", false],
289 // Accents above dotless-i are not allowed
290 ["na\u0131\u0308ve", "xn--nave-mza04z", false],
291 ["d\u0131\u0302ner", "xn--dner-lza40z", false],
292 // but the corresponding accented-i (based on dotted i) is OK
293 ["na\u00efve.com", "xn--nave-6pa.com", true],
294 ["d\u00eener.com", "xn--dner-0pa.com", true],
297 function run_test() {
298 var idnService
= Cc
["@mozilla.org/network/idn-service;1"].getService(
302 for (var j
= 0; j
< testcases
.length
; ++j
) {
303 var test
= testcases
[j
];
304 var URL
= test
[0] + ".com";
305 var punycodeURL
= test
[1] + ".com";
306 var expectedUnicode
= test
[2];
310 result
= idnService
.convertToDisplayIDN(URL
);
315 punycodeURL
.substr(0, 4) == "xn--" ||
316 punycodeURL
.indexOf(".xn--") > 0
318 // test convertToDisplayIDN with a Unicode URL and with a
319 // Punycode URL if we have one
322 expectedUnicode
? escape(URL
) : escape(punycodeURL
)
325 result
= idnService
.convertToDisplayIDN(punycodeURL
);
328 expectedUnicode
? escape(URL
) : escape(punycodeURL
)
331 // The "punycode" URL isn't punycode. This happens in testcases
332 // where the Unicode URL has become normalized to an ASCII URL,
333 // so, even though expectedUnicode is true, the expected result
334 // is equal to punycodeURL
335 Assert
.equal(escape(result
), escape(punycodeURL
));