Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / net / tools / flip_server / url_to_filename_encoder_unittest.cc
blob2662f756d2f9edec98ff62709ebf0de399be6734
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/tools/flip_server/url_to_filename_encoder.h"
7 #include <string>
8 #include <vector>
10 #include "base/strings/string_piece.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/stringprintf.h"
14 #include "testing/gtest/include/gtest/gtest.h"
16 using base::StringPiece;
17 using std::string;
19 namespace net {
21 #ifdef WIN32
22 char kDirSeparator = '\\';
23 char kOtherDirSeparator = '/';
24 #else
25 char kDirSeparator = '/';
26 char kOtherDirSeparator = '\\';
27 #endif
29 class UrlToFilenameEncoderTest : public ::testing::Test {
30 protected:
31 UrlToFilenameEncoderTest()
32 : escape_(1, UrlToFilenameEncoder::kEscapeChar),
33 dir_sep_(1, kDirSeparator) {}
35 void CheckSegmentLength(const StringPiece& escaped_word) {
36 for (const base::StringPiece& component :
37 base::SplitStringPiece(escaped_word, "/", base::KEEP_WHITESPACE,
38 base::SPLIT_WANT_NONEMPTY)) {
39 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
40 component.size());
44 void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) {
45 // These characters are invalid in Windows. We add in ', as that's pretty
46 // inconvenient in a Unix filename.
48 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
49 const string kInvalidChars = "<>:\"|?*'";
50 for (size_t i = 0; i < escaped_word.size(); ++i) {
51 char c = escaped_word[i];
52 EXPECT_EQ(string::npos, kInvalidChars.find(c));
53 EXPECT_NE(invalid_slash, c);
54 EXPECT_NE('\0', c); // only invalid character in Posix
55 EXPECT_GT(0x7E, c); // only English printable characters
59 void Validate(const string& in_word, const string& gold_word) {
60 string escaped_word, url;
61 UrlToFilenameEncoder::EncodeSegment(std::string(), in_word, '/',
62 &escaped_word);
63 EXPECT_EQ(gold_word, escaped_word);
64 CheckSegmentLength(escaped_word);
65 CheckValidChars(escaped_word, '\\');
66 UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
67 EXPECT_EQ(in_word, url);
70 void ValidateAllSegmentsSmall(const string& in_word) {
71 string escaped_word, url;
72 UrlToFilenameEncoder::EncodeSegment(std::string(), in_word, '/',
73 &escaped_word);
74 CheckSegmentLength(escaped_word);
75 CheckValidChars(escaped_word, '\\');
76 UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
77 EXPECT_EQ(in_word, url);
80 void ValidateNoChange(const string& word) {
81 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
82 Validate(word, word + escape_);
85 void ValidateEscaped(unsigned char ch) {
86 // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
87 char escaped[100];
88 const char escape = UrlToFilenameEncoder::kEscapeChar;
89 base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape);
90 Validate(string(1, ch), escaped);
93 void ValidateUrl(const string& url,
94 const string& base_path,
95 bool legacy_escape,
96 const string& gold_filename) {
97 string encoded_filename =
98 UrlToFilenameEncoder::Encode(url, base_path, legacy_escape);
99 EXPECT_EQ(gold_filename, encoded_filename);
100 if (!legacy_escape) {
101 CheckSegmentLength(encoded_filename);
102 CheckValidChars(encoded_filename, kOtherDirSeparator);
103 string decoded_url;
104 UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator,
105 &decoded_url);
106 if (url != decoded_url) {
107 EXPECT_EQ(url, "http://" + decoded_url);
112 void ValidateUrlOldNew(const string& url,
113 const string& gold_old_filename,
114 const string& gold_new_filename) {
115 ValidateUrl(url, std::string(), true, gold_old_filename);
116 ValidateUrl(url, std::string(), false, gold_new_filename);
119 void ValidateEncodeSame(const string& url1, const string& url2) {
120 string filename1 = UrlToFilenameEncoder::Encode(url1, std::string(), false);
121 string filename2 = UrlToFilenameEncoder::Encode(url2, std::string(), false);
122 EXPECT_EQ(filename1, filename2);
125 string escape_;
126 string dir_sep_;
129 TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) {
130 ValidateNoChange(std::string());
131 ValidateNoChange("abcdefg");
132 ValidateNoChange("abcdefghijklmnopqrstuvwxyz");
133 ValidateNoChange("ZYXWVUT");
134 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA");
135 ValidateNoChange("01234567689");
136 ValidateNoChange("_.=+-");
137 ValidateNoChange(
138 "abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA"
139 "01234567689_.=+-");
140 ValidateNoChange("index.html");
141 ValidateNoChange("/");
142 ValidateNoChange("/.");
143 ValidateNoChange(".");
144 ValidateNoChange("..");
147 TEST_F(UrlToFilenameEncoderTest, Escapes) {
148 const string bad_chars =
149 "<>:\"\\|?*" // Illegal on Windows
150 "~`!$^&(){}[]';" // Bad for Unix shells
151 "^@" // Build tool doesn't like
152 "#%" // Tool doesn't like
153 ","; // The escape char has to be escaped
155 for (size_t i = 0; i < bad_chars.size(); ++i) {
156 ValidateEscaped(bad_chars[i]);
159 // Check non-printable characters.
160 ValidateEscaped('\0');
161 for (size_t i = 127; i < 256; ++i) {
162 ValidateEscaped(static_cast<char>(i));
166 TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) {
167 Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_);
168 Validate("/./", "/" + escape_ + "./" + escape_);
169 Validate("/../", "/" + escape_ + "../" + escape_);
170 Validate("//", "/" + escape_ + "2F" + escape_);
171 Validate("/./leaf", "/" + escape_ + "./leaf" + escape_);
172 Validate("/../leaf", "/" + escape_ + "../leaf" + escape_);
173 Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_);
174 Validate("mysite/u?param1=x&param2=y", "mysite/u" + escape_ + "3Fparam1=x" +
175 escape_ + "26param2=y" + escape_);
176 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test.
177 "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ +
178 "26form=QBLH" + escape_ + "26qs=n" + escape_);
179 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true",
180 "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ +
181 "3Fid=138" + escape_ + "26content=true" + escape_);
184 TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) {
185 ValidateUrlOldNew("http://www.google.com/index.html",
186 "www.google.com" + dir_sep_ + "indexx2Ehtml",
187 "www.google.com" + dir_sep_ + "index.html" + escape_);
188 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=",
189 "www.google.com" + dir_sep_ + "x" + dir_sep_ +
190 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D",
192 "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" +
193 escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ +
194 "26oq=" + escape_);
195 ValidateUrlOldNew(
196 "http://www.foo.com/a//",
197 "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml",
198 "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + escape_);
200 // From bug: Double slash preserved.
201 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html",
202 std::string(), false,
203 "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" +
204 escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" +
205 dir_sep_ + "index.html" + escape_);
206 ValidateUrlOldNew(
207 "http://blogutils.net/olct/online.php?"
208 "site=http://thelwordfanfics.blogspot.&interval=600",
210 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ +
211 "onlinex2Ephpx3F"
212 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D6"
213 "00",
215 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ +
216 "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ +
217 "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_);
220 // From bug: Escapes treated the same as normal char.
221 TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) {
222 for (int i = 0; i < 128; ++i) {
223 string unescaped(1, static_cast<char>(i));
224 string escaped = base::StringPrintf("%%%02X", i);
225 ValidateEncodeSame(unescaped, escaped);
228 ValidateEncodeSame(
229 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot"
230 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch",
232 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot"
233 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch");
236 // From bug: Filename encoding is not prefix-free.
237 TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) {
238 Validate("/", "/" + escape_);
239 Validate("//", "/" + escape_ + "2F" + escape_);
240 Validate("///", "/" + escape_ + "2F" + "/" + escape_);
243 TEST_F(UrlToFilenameEncoderTest, LongTail) {
244 static char long_word[] =
245 "~joebob/briggs/12345678901234567890123456789012345678901234567890"
246 "1234567890123456789012345678901234567890123456789012345678901234567890"
247 "1234567890123456789012345678901234567890123456789012345678901234567890"
248 "1234567890123456789012345678901234567890123456789012345678901234567890"
249 "1234567890123456789012345678901234567890123456789012345678901234567890"
250 "1234567890123456789012345678901234567890123456789012345678901234567890";
252 // the long lines in the string below are 64 characters, so we can see
253 // the slashes every 128.
254 string gold_long_word =
255 escape_ +
256 "7Ejoebob/briggs/"
257 "1234567890123456789012345678901234567890123456789012345678901234"
258 "56789012345678901234567890123456789012345678901234567890123456" +
259 escape_ +
260 "-/"
261 "7890123456789012345678901234567890123456789012345678901234567890"
262 "12345678901234567890123456789012345678901234567890123456789012" +
263 escape_ +
264 "-/"
265 "3456789012345678901234567890123456789012345678901234567890123456"
266 "78901234567890123456789012345678901234567890123456789012345678" +
267 escape_ +
268 "-/"
269 "9012345678901234567890" +
270 escape_;
271 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
272 sizeof(long_word));
273 Validate(long_word, gold_long_word);
276 TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) {
277 // Here the '?' in the last path segment expands to @3F, making
278 // it hit 128 chars before the input segment gets that big.
279 static char long_word[] =
280 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?"
281 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
282 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
283 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
284 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
285 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?";
287 // Notice that at the end of the third segment, we avoid splitting
288 // the (escape_ + "3F") that was generated from the "?", so that segment is
289 // only 127 characters.
290 string pattern = "1234567" + escape_ + "3F"; // 10 characters
291 string gold_long_word =
292 escape_ + "7Ejoebob/briggs/" + pattern + pattern + pattern + pattern +
293 pattern + pattern +
294 "1234"
295 "567" +
296 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
297 "123456" + escape_ +
298 "-/"
299 "7" +
300 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
301 pattern + pattern + pattern + pattern + pattern + pattern + pattern +
302 "12" + escape_ +
303 "-/"
304 "34567" +
305 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
306 "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern +
307 pattern + "1234567" + escape_ + "-/" + escape_ + "3F" + pattern +
308 pattern + escape_;
309 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
310 sizeof(long_word));
311 Validate(long_word, gold_long_word);
314 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) {
315 // hit corner cases, +/- 4 characters from kMaxLen
316 for (int i = -4; i <= 4; ++i) {
317 string input;
318 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x');
319 ValidateAllSegmentsSmall(input);
323 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) {
324 // hit corner cases, +/- 4 characters from kMaxLen. This time we
325 // leave off the last 'x' and put in a '.', which ensures that we
326 // are truncating with '/' *after* the expansion.
327 for (int i = -4; i <= 4; ++i) {
328 string input;
329 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x');
330 input.append(1, '.'); // this will expand to 3 characters.
331 ValidateAllSegmentsSmall(input);
335 TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) {
336 Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c,"
337 Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c"
338 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_);
341 TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) {
342 string long_word;
343 string escaped_word;
344 long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x');
345 UrlToFilenameEncoder::EncodeSegment(std::string(), long_word, '\\',
346 &escaped_word);
348 // check that one backslash, plus the escape ",-", and the ending , got added.
349 EXPECT_EQ(long_word.size() + 4, escaped_word.size());
350 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
351 escaped_word.size());
352 // Check that the backslash got inserted at the correct spot.
353 EXPECT_EQ('\\',
354 escaped_word[UrlToFilenameEncoder::kMaximumSubdirectoryLength]);
357 } // namespace net