1 <!DOCTYPE HTML PUBLIC
"-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
6 <meta http-equiv=
"content-type" content=
"text/html; charset=UTF-8">
7 <title>test of JavaScript URI encoding and decoding methods
</title>
12 <script type=
"text/javascript">
14 if (window
.testRunner
) testRunner
.dumpAsText();
20 function hexDigit(number
)
23 return String
.fromCharCode(number
+ 55);
30 return "empty string";
32 for (var i
= 0; i
< s
.length
; i
++) {
34 var cc
= s
.charCodeAt(i
);
37 } else if (c
== "\"") {
39 } else if (c
== "\n") {
41 } else if (c
== "\r") {
43 } else if (c
== "\t") {
45 } else if (cc
>= 20 && cc
< 0x7F) {
47 } else if (cc
<= 0xFF) {
48 p
+= "\\x" + hexDigit(cc
>> 4) + hexDigit(cc
& 0xF);
49 } else if (cc
<= 0xFFFF) {
50 p
+= "\\u" + hexDigit((cc
>> 12) & 0xF) + hexDigit((cc
>> 8) & 0xF) + hexDigit((cc
>> 4) & 0xF) + hexDigit(cc
& 0xF);
52 p
+= "\\U" + hexDigit((cc
>> 28) & 0xF) + hexDigit((cc
>> 24) & 0xF) + hexDigit((cc
>> 20) & 0xF) + hexDigit((cc
>> 16) & 0xF)
53 + hexDigit((cc
>> 12) & 0xF) + hexDigit((cc
>> 8) & 0xF) + hexDigit((cc
>> 4) & 0xF) + hexDigit(cc
& 0xF);
56 return "\"" + p
+ "\"";
59 function encodedCharacter(c
)
61 // UTF-8 is what Gecko does, but not what WinIE 6 does.
62 // It makes much more sense, though, to produce encodings that actually work in URLs.
63 // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.
65 // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
67 case 0x80: return "%C2%80";
68 case 0x7FF: return "%DF%BF";
69 case 0x800: return "%E0%A0%80";
70 case 0x2022: return "%E2%80%A2";
71 case 0xD7FF: return "%ED%9F%BF";
72 case 0xD800: return "%ED%A0%80";
73 case 0xE000: return "%EE%80%80";
74 case 0xFFFC: return "%EF%BF%BC";
75 case 0xFFFD: return "%EF%BF%BD";
78 if (c
< 0 || c
> 0x7F) {
79 window
.alert("encodedCharacter doesn't know how to escape character code " + c
);
83 return "%" + hexDigit(c
>> 4) + hexDigit(c
- (c
>> 4 << 4));
86 function escapedCharacter(c
)
88 // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
89 // unicode characters less than or equal to 255, and %u encoding for everything else.
91 case 0x80: return "%80";
92 case 0x7FF: return "%u07FF";
93 case 0x800: return "%u0800";
94 case 0x2022: return "%u2022";
95 case 0xD7FF: return "%uD7FF";
96 case 0xD800: return "%uD800";
97 case 0xE000: return "%uE000";
98 case 0xFFFC: return "%uFFFC";
99 case 0xFFFD: return "%uFFFD";
102 if (c
< 0 || c
> 0x7F) {
103 window
.alert("escapedCharacter doesn't know how to escape character code " + c
);
107 return "%" + hexDigit(c
>> 4) + hexDigit(c
- (c
>> 4 << 4));
110 function forEachCharacterCode(f
, s
)
112 for (var i
= 0; i
< s
.length
; i
++) {
117 function call(functionName
, parameter
)
120 result
= eval(functionName
+ "(parameter)");
122 result
= "exception";
129 // Build up tables with expected results.
131 var expectedResult
= new Object
;
133 function addExpectedNonEscaped(f
, c
)
135 expectedResult
[f
+ "(" + String
.fromCharCode(c
) + ")"] = String
.fromCharCode(c
);
138 function addNoEscape(c
)
140 addExpectedNonEscaped("escape", c
);
141 addExpectedNonEscaped("encodeURI", c
);
142 addExpectedNonEscaped("encodeURIComponent", c
);
145 function addEscapeNoEscape(c
)
147 addExpectedNonEscaped("escape", c
);
150 function addURIComponentNoEscape(c
)
152 addExpectedNonEscaped("encodeURI", c
);
153 addExpectedNonEscaped("encodeURIComponent", c
);
156 function addURINoEscape(c
)
158 addExpectedNonEscaped("encodeURI", c
);
159 expectedResult
["decodeURI(" + escapedCharacter(c
) + ")"] = encodedCharacter(c
);
160 expectedResult
["decodeURI(" + escapedCharacter(c
).toLowerCase() + ")"] = encodedCharacter(c
).toLowerCase();
163 forEachCharacterCode(addNoEscape
, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
164 forEachCharacterCode(addEscapeNoEscape
, "+/");
165 forEachCharacterCode(addURINoEscape
, "@#$&+,/:;=?");
166 forEachCharacterCode(addURIComponentNoEscape
, "!'()~");
168 // WinIE 6's escape function does not escape @, although Gecko's does.
169 expectedResult
["escape(@)"] = "@";
175 var failureCount
= 0;
177 function test(functionName
, parameter
, desiredResult
)
179 var alternateResult
= expectedResult
[functionName
+ "(" + parameter
+ ")"];
181 desiredResult
= alternateResult
;
182 var result
= call(functionName
, parameter
);
183 if (result
!= desiredResult
) {
184 var s
= "called " + functionName
+ " on " + printable(parameter
) + " and got " + printable(result
) + " instead of " + printable(desiredResult
);
185 document
.writeln("<p>" + s
+ "</p>");
190 function testEscapeAndEncode(parameter
, escapeExpected
, encodeExpected
)
192 test("escape", parameter
, escapeExpected
);
193 test("encodeURI", parameter
, encodeExpected
);
194 test("encodeURIComponent", parameter
, encodeExpected
);
197 function testUnescape(parameter
, expected
)
199 test("unescape", parameter
, expected
);
202 function testDecode(parameter
, expected
)
204 if (expected
== "\uFFFE" || expected
== "\uFFFF")
207 test("decodeURI", parameter
, expected
);
208 test("decodeURIComponent", parameter
, expected
);
211 function testUnescapeAndDecode(parameter
, expectedUnescape
, expectedDecode
)
213 testUnescape(parameter
, expectedUnescape
);
214 testDecode(parameter
, expectedDecode
);
217 function testCharacter(c
)
219 var s
= String
.fromCharCode(c
);
220 var escaped
= escapedCharacter(c
);
221 var encoded
= encodedCharacter(c
);
223 testEscapeAndEncode(s
, escaped
, encoded
);
224 testUnescape(escaped
, s
);
225 testUnescape(escaped
.toLowerCase(), s
);
226 testDecode(encoded
, s
);
227 testDecode(encoded
.toLowerCase(), s
);
230 for (var c
= 0; c
<= 128; c
++) {
233 testCharacter(0x7FF);
234 testCharacter(0x800);
235 testCharacter(0x2022);
236 testCharacter(0xD7FF);
237 testCharacter(0xE000);
238 testCharacter(0xFFFC);
239 testCharacter(0xFFFD);
241 // These tests are currently turned off because it's not yet entirely clear what correct behavior
242 // is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
243 // U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
244 // And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
245 // than U+FFFF (outside the BMP) from working properly.
247 //testCharacter(0xD800);
248 //testCharacter(0xDBFF);
249 //testCharacter(0xDC00);
250 //testCharacter(0xDFFF);
251 //testCharacter(0xFFFE);
252 //testCharacter(0xFFFF);
253 //testCharacter(0x10000);
255 testUnescapeAndDecode("%", "%", "exception");
256 testUnescapeAndDecode("%0", "%0", "exception");
257 testUnescapeAndDecode("%a", "%a", "exception");
258 testUnescapeAndDecode("%u", "%u", "exception");
259 testUnescapeAndDecode("%xx", "%xx", "exception");
260 testUnescapeAndDecode("%u004", "%u004", "exception");
261 testUnescapeAndDecode("%u0041", "A", "exception");
262 testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");
264 testUnescapeAndDecode(String
.fromCharCode(0x80), String
.fromCharCode(0x80), String
.fromCharCode(0x80));
265 testUnescapeAndDecode(String
.fromCharCode(0xD800), String
.fromCharCode(0xD800), String
.fromCharCode(0xD800));
267 testUnescapeAndDecode("%C2%80", String
.fromCharCode(0xC2) + String
.fromCharCode(0x80), String
.fromCharCode(0x80));
268 testUnescapeAndDecode("%C2", String
.fromCharCode(0xC2), "exception");
269 testUnescapeAndDecode("%C2" + String
.fromCharCode(0x80), String
.fromCharCode(0xC2) + String
.fromCharCode(0x80), "exception");
271 // The characters below hav to be literal because String.fromCharCode will make a single character
272 // and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
273 // For most JavaScript engines, this will turn into two characters because they use UTF-16
274 // instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
275 // behavior, forbids it, or doesn't say either way.
276 testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
277 testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
278 testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
279 testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
280 testEscapeAndEncode("", "%uD87F%uDFFF", "%F0%AF%BF%BF");
281 testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "");
289 failuresMessage
= failureCount
+ " tests failed.";
291 failuresMessage
= "No failures.";
293 document
.writeln("<p>Testing complete. " + failuresMessage
+ "</p>");