third_party/WebKit/LayoutTests/fast/js/encode-URI-test.html

   1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
   2
   3 <html>
   4
   5 <head>
   6 <meta http-equiv="content-type" content="text/html; charset=UTF-8">
   7 <title>test of JavaScript URI encoding and decoding methods</title>
   8 </head>
   9
  10 <body>
  11
  12 <script type="text/javascript">
  13
  14 if (window.testRunner)  testRunner.dumpAsText();
  15
  16 // --------
  17
  18 // Helper functions.
  19
  20 function hexDigit(number)
  21 {
  22     if (number >= 10)
  23         return String.fromCharCode(number + 55);
  24     return number;
  25 }
  26
  27 function printable(s)
  28 {
  29     if (s == "")
  30         return "empty string";
  31     var p = "";
  32     for (var i = 0; i < s.length; i++) {
  33         var c = s.charAt(i);
  34         var cc = s.charCodeAt(i);
  35         if (c == "\\") {
  36             p += "\\\\";
  37         } else if (c == "\"") {
  38             p += "\\\"";
  39         } else if (c == "\n") {
  40             p += "\\n";
  41         } else if (c == "\r") {
  42             p += "\\r";
  43         } else if (c == "\t") {
  44             p += "\\t";
  45         } else if (cc >= 20 && cc < 0x7F) {
  46             p += c;
  47         } else if (cc <= 0xFF) {
  48             p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
  49         } else if (cc <= 0xFFFF) {
  50             p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
  51         } else {
  52             p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
  53                        + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
  54         }
  55     }
  56     return "\"" + p + "\"";
  57 }
  58
  59 function encodedCharacter(c)
  60 {
  61     // UTF-8 is what Gecko does, but not what WinIE 6 does.
  62     // It makes much more sense, though, to produce encodings that actually work in URLs.
  63     // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.
  64
  65     // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
  66     switch (c) {
  67         case    0x80: return "%C2%80";
  68         case   0x7FF: return "%DF%BF";
  69         case   0x800: return "%E0%A0%80";
  70         case  0x2022: return "%E2%80%A2";
  71         case  0xD7FF: return "%ED%9F%BF";
  72         case  0xD800: return "%ED%A0%80";
  73         case  0xE000: return "%EE%80%80";
  74         case  0xFFFC: return "%EF%BF%BC";
  75         case  0xFFFD: return "%EF%BF%BD";
  76     }
  77
  78     if (c < 0 || c > 0x7F) {
  79         window.alert("encodedCharacter doesn't know how to escape character code " + c);
  80         return "?";
  81     }
  82
  83     return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
  84 }
  85
  86 function escapedCharacter(c)
  87 {
  88     // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
  89     // unicode characters less than or equal to 255, and %u encoding for everything else.
  90     switch (c) {
  91         case    0x80: return "%80";
  92         case   0x7FF: return "%u07FF";
  93         case   0x800: return "%u0800";
  94         case  0x2022: return "%u2022";
  95         case  0xD7FF: return "%uD7FF";
  96         case  0xD800: return "%uD800";
  97         case  0xE000: return "%uE000";
  98         case  0xFFFC: return "%uFFFC";
  99         case  0xFFFD: return "%uFFFD";
 100     }
 101
 102     if (c < 0 || c > 0x7F) {
 103         window.alert("escapedCharacter doesn't know how to escape character code " + c);
 104         return "?";
 105     }
 106
 107     return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
 108 }
 109
 110 function forEachCharacterCode(f, s)
 111 {
 112     for (var i = 0; i < s.length; i++) {
 113         f(s.charCodeAt(i));
 114     }
 115 }
 116
 117 function call(functionName, parameter)
 118 {
 119     try {
 120         result = eval(functionName + "(parameter)");
 121     } catch (e) {
 122         result = "exception";
 123     }
 124     return result;
 125 }
 126
 127 // --------
 128
 129 // Build up tables with expected results.
 130
 131 var expectedResult = new Object;
 132
 133 function addExpectedNonEscaped(f, c)
 134 {
 135     expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
 136 }
 137
 138 function addNoEscape(c)
 139 {
 140     addExpectedNonEscaped("escape", c);
 141     addExpectedNonEscaped("encodeURI", c);
 142     addExpectedNonEscaped("encodeURIComponent", c);
 143 }
 144
 145 function addEscapeNoEscape(c)
 146 {
 147     addExpectedNonEscaped("escape", c);
 148 }
 149
 150 function addURIComponentNoEscape(c)
 151 {
 152     addExpectedNonEscaped("encodeURI", c);
 153     addExpectedNonEscaped("encodeURIComponent", c);
 154 }
 155
 156 function addURINoEscape(c)
 157 {
 158     addExpectedNonEscaped("encodeURI", c);
 159     expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
 160     expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
 161 }
 162
 163 forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
 164 forEachCharacterCode(addEscapeNoEscape, "+/");
 165 forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
 166 forEachCharacterCode(addURIComponentNoEscape, "!'()~");
 167
 168 // WinIE 6's escape function does not escape @, although Gecko's does.
 169 expectedResult["escape(@)"] = "@";
 170
 171 // --------
 172
 173 // Run tests.
 174
 175 var failureCount = 0;
 176
 177 function test(functionName, parameter, desiredResult)
 178 {
 179     var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
 180     if (alternateResult)
 181         desiredResult = alternateResult;
 182     var result = call(functionName, parameter);
 183     if (result != desiredResult) {
 184         var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
 185         document.writeln("<p>" + s + "</p>");
 186         failureCount += 1;
 187     }
 188 }
 189
 190 function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
 191 {
 192     test("escape", parameter, escapeExpected);
 193     test("encodeURI", parameter, encodeExpected);
 194     test("encodeURIComponent", parameter, encodeExpected);
 195 }
 196
 197 function testUnescape(parameter, expected)
 198 {
 199     test("unescape", parameter, expected);
 200 }
 201
 202 function testDecode(parameter, expected)
 203 {
 204     if (expected == "\uFFFE" || expected == "\uFFFF")
 205         expected = "\uFFFD";
 206
 207     test("decodeURI", parameter, expected);
 208     test("decodeURIComponent", parameter, expected);
 209 }
 210
 211 function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
 212 {
 213     testUnescape(parameter, expectedUnescape);
 214     testDecode(parameter, expectedDecode);
 215 }
 216
 217 function testCharacter(c)
 218 {
 219     var s = String.fromCharCode(c);
 220     var escaped = escapedCharacter(c);
 221     var encoded = encodedCharacter(c);
 222
 223     testEscapeAndEncode(s, escaped, encoded);
 224     testUnescape(escaped, s);
 225     testUnescape(escaped.toLowerCase(), s);
 226     testDecode(encoded, s);
 227     testDecode(encoded.toLowerCase(), s);
 228 }
 229
 230 for (var c = 0; c <= 128; c++) {
 231     testCharacter(c);
 232 }
 233 testCharacter(0x7FF);
 234 testCharacter(0x800);
 235 testCharacter(0x2022);
 236 testCharacter(0xD7FF);
 237 testCharacter(0xE000);
 238 testCharacter(0xFFFC);
 239 testCharacter(0xFFFD);
 240
 241 // These tests are currently turned off because it's not yet entirely clear what correct behavior
 242 // is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
 243 // U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
 244 // And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
 245 // than U+FFFF (outside the BMP) from working properly.
 246
 247 //testCharacter(0xD800);
 248 //testCharacter(0xDBFF);
 249 //testCharacter(0xDC00);
 250 //testCharacter(0xDFFF);
 251 //testCharacter(0xFFFE);
 252 //testCharacter(0xFFFF);
 253 //testCharacter(0x10000);
 254
 255 testUnescapeAndDecode("%", "%", "exception");
 256 testUnescapeAndDecode("%0", "%0", "exception");
 257 testUnescapeAndDecode("%a", "%a", "exception");
 258 testUnescapeAndDecode("%u", "%u", "exception");
 259 testUnescapeAndDecode("%xx", "%xx", "exception");
 260 testUnescapeAndDecode("%u004", "%u004", "exception");
 261 testUnescapeAndDecode("%u0041", "A", "exception");
 262 testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");
 263
 264 testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
 265 testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));
 266
 267 testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
 268 testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
 269 testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");
 270
 271 // The characters below hav to be literal because String.fromCharCode will make a single character
 272 // and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
 273 // For most JavaScript engines, this will turn into two characters because they use UTF-16
 274 // instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
 275 // behavior, forbids it, or doesn't say either way.
 276 testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
 277 testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
 278 testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
 279 testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
 280 testEscapeAndEncode("𯿿", "%uD87F%uDFFF", "%F0%AF%BF%BF");
 281 testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "𯿿");
 282
 283 // --------
 284
 285 // Summarize.
 286
 287 var failuresMessage;
 288 if (failureCount) {
 289     failuresMessage = failureCount + " tests failed.";
 290 } else {
 291     failuresMessage = "No failures.";
 292 }
 293 document.writeln("<p>Testing complete. " + failuresMessage + "</p>");
 294
 295 // --------
 296
 297 </script>
 298
 299 </body>
 300
 301 </html>