1 description("Canonicalization of paths.");
8 // double dots followed by a slash or the end of the string count
9 ["/foo/bar/..", "/foo/"],
10 ["/foo/bar/../", "/foo/"],
11 // don't count double dots when they aren't followed by a slash
12 ["/foo/..bar", "/foo/..bar"],
14 ["/foo/bar/../ton", "/foo/ton"],
15 ["/foo/bar/../ton/../../a", "/a"],
16 // we should not be able to go above the root
17 ["/foo/../../..", "/"],
18 ["/foo/../../../ton", "/ton"],
19 // escaped dots should be unescaped and treated the same as dots
20 ["/foo/%2e", "/foo/"],
21 ["/foo/%2e%2", "/foo/.%2"],
22 ["/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar"],
23 // Multiple slashes in a row should be preserved and treated like empty
26 ["/foo/bar//../..", "/foo/"],
27 ["/foo/bar//..", "/foo/bar/"],
28 ["/foo/bar/..", "/foo/"],
30 // ----- escaping tests -----
32 // Valid escape sequence
33 ["/%20foo", "/%20foo"],
34 // Invalid escape sequence we should pass through unchanged.
37 // Invalid escape sequence: bad characters should be treated the same as
38 // the sourrounding text, not as escaped (in this case, UTF-8).
39 ["/foo%2zbar", "/foo%2zbar"],
40 // (Disabled because requires UTF8)
41 // ["/foo%2\xc2\xa9zbar", "/foo%2%C2%A9zbar"],
42 ["/foo%2\u00c2\u00a9zbar", "/foo%2%C3%82%C2%A9zbar"],
43 // Regular characters that are escaped should be unescaped
44 ["/foo%41%7a", "/fooAz"],
45 // Funny characters that are unescaped should be escaped
46 ["/foo\u0009\u0091%91", "/foo%C2%91%91"],
47 // Invalid characters that are escaped should cause a failure.
48 ["/foo%00%51", "/foo%00Q"],
49 // Some characters should be passed through unchanged regardless of esc.
50 ["/(%28:%3A%29)", "/(%28:%3A%29)"],
51 // Characters that are properly escaped should not have the case changed
53 ["/%3A%3a%3C%3c", "/%3A%3a%3C%3c"],
54 // Funny characters that are unescaped should be escaped
55 ["/foo\tbar", "/foobar"],
56 // Backslashes should get converted to forward slashes
57 ["\\\\foo\\\\bar", "/foo/bar"],
58 // Hashes found in paths (possibly only when the caller explicitly sets
59 // the path on an already-parsed URL) should be escaped.
60 // (Disabled because requires ability to set path directly.)
61 // ["/foo#bar", "/foo%23bar"],
62 // %7f should be allowed and %3D should not be unescaped (these were wrong
63 // in a previous version).
64 ["/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd"],
65 // @ should be passed through unchanged (escaped or unescaped).
66 ["/@asdf%40", "/@asdf%40"],
68 // ----- encoding tests -----
70 ["/\u4f60\u597d\u4f60\u597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD"],
71 // Invalid unicode characters should fail. We only do validation on
72 // UTF-16 input, so this doesn't happen on 8-bit.
73 ["/\ufdd0zyx", "/%EF%BF%BDzyx"],
74 // U+2025 TWO DOT LEADER should not be normalized to .. in the path
75 ["/\u2025/foo", "/%E2%80%A5/foo"],
76 // A half-surrogate is an error by itself U+DEAD
77 // FIXME: ["/\uDEAD/foo", "/\uFFFD/foo"],
78 // BOM code point with special meaning U+FEFF ZERO WIDTH NO-BREAK SPACE
79 ["/\uFEFF/foo", "/%EF%BB%BF/foo"],
80 // The BIDI override code points RLO and LRO
81 ["/\u202E/foo/\u202D/bar", "/%E2%80%AE/foo/%E2%80%AD/bar"],
82 // U+FF0F FULLWIDTH SOLIDUS should normalize to / in a hostname
83 ["\uFF0Ffoo/", "%2Ffoo/"],
87 for (var i
= 0; i
< cases
.length
; ++i
) {
88 test_vector
= cases
[i
][0];
89 expected_result
= cases
[i
][1];
90 shouldBe("canonicalize('http://example.com" + test_vector
+ "')",
91 "'http://example.com" + expected_result
+ "'");