1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/tools/dump_cache/url_to_filename_encoder.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/string_piece.h"
13 #include "testing/gtest/include/gtest/gtest.h"
15 using base::StringPiece
;
21 char kDirSeparator
= '\\';
22 char kOtherDirSeparator
= '/';
24 char kDirSeparator
= '/';
25 char kOtherDirSeparator
= '\\';
28 class UrlToFilenameEncoderTest
: public ::testing::Test
{
30 UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar
),
31 dir_sep_(1, kDirSeparator
) {
34 void CheckSegmentLength(const StringPiece
& escaped_word
) {
35 std::vector
<StringPiece
> components
;
36 Tokenize(escaped_word
, StringPiece("/"), &components
);
37 for (size_t i
= 0; i
< components
.size(); ++i
) {
38 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength
,
39 components
[i
].size());
43 void CheckValidChars(const StringPiece
& escaped_word
, char invalid_slash
) {
44 // These characters are invalid in Windows. We add in ', as that's pretty
45 // inconvenient in a Unix filename.
47 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
48 const string kInvalidChars
= "<>:\"|?*'";
49 for (size_t i
= 0; i
< escaped_word
.size(); ++i
) {
50 char c
= escaped_word
[i
];
51 EXPECT_EQ(string::npos
, kInvalidChars
.find(c
));
52 EXPECT_NE(invalid_slash
, c
);
53 EXPECT_NE('\0', c
); // only invalid character in Posix
54 EXPECT_GT(0x7E, c
); // only English printable characters
58 void Validate(const string
& in_word
, const string
& gold_word
) {
59 string escaped_word
, url
;
60 UrlToFilenameEncoder::EncodeSegment(
61 std::string(), in_word
, '/', &escaped_word
);
62 EXPECT_EQ(gold_word
, escaped_word
);
63 CheckSegmentLength(escaped_word
);
64 CheckValidChars(escaped_word
, '\\');
65 UrlToFilenameEncoder::Decode(escaped_word
, '/', &url
);
66 EXPECT_EQ(in_word
, url
);
69 void ValidateAllSegmentsSmall(const string
& in_word
) {
70 string escaped_word
, url
;
71 UrlToFilenameEncoder::EncodeSegment(
72 std::string(), in_word
, '/', &escaped_word
);
73 CheckSegmentLength(escaped_word
);
74 CheckValidChars(escaped_word
, '\\');
75 UrlToFilenameEncoder::Decode(escaped_word
, '/', &url
);
76 EXPECT_EQ(in_word
, url
);
79 void ValidateNoChange(const string
& word
) {
80 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
81 Validate(word
, word
+ escape_
);
84 void ValidateEscaped(unsigned char ch
) {
85 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
87 const char escape
= UrlToFilenameEncoder::kEscapeChar
;
88 base::snprintf(escaped
, sizeof(escaped
), "%c%02X%c", escape
, ch
, escape
);
89 Validate(string(1, ch
), escaped
);
92 void ValidateUrl(const string
& url
, const string
& base_path
,
93 bool legacy_escape
, const string
& gold_filename
) {
94 string encoded_filename
= UrlToFilenameEncoder::Encode(
95 url
, base_path
, legacy_escape
);
96 EXPECT_EQ(gold_filename
, encoded_filename
);
98 CheckSegmentLength(encoded_filename
);
99 CheckValidChars(encoded_filename
, kOtherDirSeparator
);
101 UrlToFilenameEncoder::Decode(encoded_filename
, kDirSeparator
,
103 if (url
!= decoded_url
) {
104 EXPECT_EQ(url
, "http://" + decoded_url
);
109 void ValidateUrlOldNew(const string
& url
, const string
& gold_old_filename
,
110 const string
& gold_new_filename
) {
111 ValidateUrl(url
, std::string(), true, gold_old_filename
);
112 ValidateUrl(url
, std::string(), false, gold_new_filename
);
115 void ValidateEncodeSame(const string
& url1
, const string
& url2
) {
116 string filename1
= UrlToFilenameEncoder::Encode(url1
, std::string(), false);
117 string filename2
= UrlToFilenameEncoder::Encode(url2
, std::string(), false);
118 EXPECT_EQ(filename1
, filename2
);
125 TEST_F(UrlToFilenameEncoderTest
, DoesNotEscape
) {
126 ValidateNoChange(std::string());
127 ValidateNoChange("abcdefg");
128 ValidateNoChange("abcdefghijklmnopqrstuvwxyz");
129 ValidateNoChange("ZYXWVUT");
130 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA");
131 ValidateNoChange("01234567689");
132 ValidateNoChange("_.=+-");
133 ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA"
135 ValidateNoChange("index.html");
136 ValidateNoChange("/");
137 ValidateNoChange("/.");
138 ValidateNoChange(".");
139 ValidateNoChange("..");
142 TEST_F(UrlToFilenameEncoderTest
, Escapes
) {
143 const string bad_chars
=
144 "<>:\"\\|?*" // Illegal on Windows
145 "~`!$^&(){}[]';" // Bad for Unix shells
146 "^@" // Build tool doesn't like
147 "#%" // Tool doesn't like
148 ","; // The escape char has to be escaped
150 for (size_t i
= 0; i
< bad_chars
.size(); ++i
) {
151 ValidateEscaped(bad_chars
[i
]);
154 // Check non-printable characters.
155 ValidateEscaped('\0');
156 for (size_t i
= 127; i
< 256; ++i
) {
157 ValidateEscaped(static_cast<char>(i
));
161 TEST_F(UrlToFilenameEncoderTest
, DoesEscapeCorrectly
) {
162 Validate("mysite.com&x", "mysite.com" + escape_
+ "26x" + escape_
);
163 Validate("/./", "/" + escape_
+ "./" + escape_
);
164 Validate("/../", "/" + escape_
+ "../" + escape_
);
165 Validate("//", "/" + escape_
+ "2F" + escape_
);
166 Validate("/./leaf", "/" + escape_
+ "./leaf" + escape_
);
167 Validate("/../leaf", "/" + escape_
+ "../leaf" + escape_
);
168 Validate("//leaf", "/" + escape_
+ "2Fleaf" + escape_
);
169 Validate("mysite/u?param1=x¶m2=y",
170 "mysite/u" + escape_
+ "3Fparam1=x" + escape_
+ "26param2=y" +
172 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test.
173 "search" + escape_
+ "3Fq=dogs" + escape_
+ "26go=" + escape_
+
174 "26form=QBLH" + escape_
+ "26qs=n" + escape_
);
175 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true",
176 "" + escape_
+ "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_
+
177 "3Fid=138" + escape_
+ "26content=true" + escape_
);
180 TEST_F(UrlToFilenameEncoderTest
, EncodeUrlCorrectly
) {
181 ValidateUrlOldNew("http://www.google.com/index.html",
182 "www.google.com" + dir_sep_
+ "indexx2Ehtml",
183 "www.google.com" + dir_sep_
+ "index.html" + escape_
);
184 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=",
185 "www.google.com" + dir_sep_
+ "x" + dir_sep_
+
186 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D",
188 "www.google.com" + dir_sep_
+ "x" + dir_sep_
+ "search" +
189 escape_
+ "3Fhl=en" + escape_
+ "26q=dogs" + escape_
+
191 ValidateUrlOldNew("http://www.foo.com/a//",
192 "www.foo.com" + dir_sep_
+ "ax255Cx255Cindexx2Ehtml",
193 "www.foo.com" + dir_sep_
+ "a" + dir_sep_
+ escape_
+ "2F" +
196 // From bug: Double slash preserved.
197 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html",
200 "www.foo.com" + dir_sep_
+ "u" + escape_
+ "3Fsite=http" +
201 escape_
+ "3A" + dir_sep_
+ escape_
+ "2Fwww.google.com" +
202 dir_sep_
+ "index.html" + escape_
);
204 "http://blogutils.net/olct/online.php?"
205 "site=http://thelwordfanfics.blogspot.&interval=600",
207 "blogutils.net" + dir_sep_
+ "olct" + dir_sep_
+ "onlinex2Ephpx3F"
208 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600",
210 "blogutils.net" + dir_sep_
+ "olct" + dir_sep_
+ "online.php" + escape_
+
211 "3Fsite=http" + escape_
+ "3A" + dir_sep_
+ escape_
+
212 "2Fthelwordfanfics.blogspot." + escape_
+ "26interval=600" + escape_
);
215 // From bug: Escapes treated the same as normal char.
216 TEST_F(UrlToFilenameEncoderTest
, UnescapeUrlsBeforeEncode
) {
217 for (int i
= 0; i
< 128; ++i
) {
218 string
unescaped(1, static_cast<char>(i
));
219 string escaped
= base::StringPrintf("%%%02X", i
);
220 ValidateEncodeSame(unescaped
, escaped
);
224 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot"
225 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch",
227 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot"
228 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch");
231 // From bug: Filename encoding is not prefix-free.
232 TEST_F(UrlToFilenameEncoderTest
, EscapeSecondSlash
) {
233 Validate("/", "/" + escape_
);
234 Validate("//", "/" + escape_
+ "2F" + escape_
);
235 Validate("///", "/" + escape_
+ "2F" + "/" + escape_
);
238 TEST_F(UrlToFilenameEncoderTest
, LongTail
) {
239 static char long_word
[] =
240 "~joebob/briggs/12345678901234567890123456789012345678901234567890"
241 "1234567890123456789012345678901234567890123456789012345678901234567890"
242 "1234567890123456789012345678901234567890123456789012345678901234567890"
243 "1234567890123456789012345678901234567890123456789012345678901234567890"
244 "1234567890123456789012345678901234567890123456789012345678901234567890"
245 "1234567890123456789012345678901234567890123456789012345678901234567890";
247 // the long lines in the string below are 64 characters, so we can see
248 // the slashes every 128.
249 string gold_long_word
=
250 escape_
+ "7Ejoebob/briggs/"
251 "1234567890123456789012345678901234567890123456789012345678901234"
252 "56789012345678901234567890123456789012345678901234567890123456" +
254 "7890123456789012345678901234567890123456789012345678901234567890"
255 "12345678901234567890123456789012345678901234567890123456789012" +
257 "3456789012345678901234567890123456789012345678901234567890123456"
258 "78901234567890123456789012345678901234567890123456789012345678" +
260 "9012345678901234567890" + escape_
;
261 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength
,
263 Validate(long_word
, gold_long_word
);
266 TEST_F(UrlToFilenameEncoderTest
, LongTailQuestion
) {
267 // Here the '?' in the last path segment expands to @3F, making
268 // it hit 128 chars before the input segment gets that big.
269 static char long_word
[] =
270 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?"
271 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
272 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
273 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
274 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
275 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?";
277 // Notice that at the end of the third segment, we avoid splitting
278 // the (escape_ + "3F") that was generated from the "?", so that segment is
279 // only 127 characters.
280 string pattern
= "1234567" + escape_
+ "3F"; // 10 characters
281 string gold_long_word
=
282 escape_
+ "7Ejoebob/briggs/" +
283 pattern
+ pattern
+ pattern
+ pattern
+ pattern
+ pattern
+ "1234"
284 "567" + escape_
+ "3F" + pattern
+ pattern
+ pattern
+ pattern
+ pattern
+
285 "123456" + escape_
+ "-/"
286 "7" + escape_
+ "3F" + pattern
+ pattern
+ pattern
+ pattern
+ pattern
+
287 pattern
+ pattern
+ pattern
+ pattern
+ pattern
+ pattern
+ pattern
+
290 "34567" + escape_
+ "3F" + pattern
+ pattern
+ pattern
+ pattern
+ pattern
291 + "1234567" + escape_
+ "3F" + pattern
+ pattern
+ pattern
+ pattern
292 + pattern
+ "1234567" +
294 escape_
+ "3F" + pattern
+ pattern
+ escape_
;
295 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength
,
297 Validate(long_word
, gold_long_word
);
300 TEST_F(UrlToFilenameEncoderTest
, CornerCasesNearMaxLenNoEscape
) {
301 // hit corner cases, +/- 4 characters from kMaxLen
302 for (int i
= -4; i
<= 4; ++i
) {
304 input
.append(i
+ UrlToFilenameEncoder::kMaximumSubdirectoryLength
, 'x');
305 ValidateAllSegmentsSmall(input
);
309 TEST_F(UrlToFilenameEncoderTest
, CornerCasesNearMaxLenWithEscape
) {
310 // hit corner cases, +/- 4 characters from kMaxLen. This time we
311 // leave off the last 'x' and put in a '.', which ensures that we
312 // are truncating with '/' *after* the expansion.
313 for (int i
= -4; i
<= 4; ++i
) {
315 input
.append(i
+ UrlToFilenameEncoder::kMaximumSubdirectoryLength
- 1, 'x');
316 input
.append(1, '.'); // this will expand to 3 characters.
317 ValidateAllSegmentsSmall(input
);
321 TEST_F(UrlToFilenameEncoderTest
, LeafBranchAlias
) {
322 Validate("/a/b/c", "/a/b/c" + escape_
); // c is leaf file "c,"
323 Validate("/a/b/c/d", "/a/b/c/d" + escape_
); // c is directory "c"
324 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_
);
328 TEST_F(UrlToFilenameEncoderTest
, BackslashSeparator
) {
331 long_word
.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength
+ 1, 'x');
332 UrlToFilenameEncoder::EncodeSegment(
333 std::string(), long_word
, '\\', &escaped_word
);
335 // check that one backslash, plus the escape ",-", and the ending , got added.
336 EXPECT_EQ(long_word
.size() + 4, escaped_word
.size());
337 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength
,
338 escaped_word
.size());
339 // Check that the backslash got inserted at the correct spot.
340 EXPECT_EQ('\\', escaped_word
[
341 UrlToFilenameEncoder::kMaximumSubdirectoryLength
]);