Supervised user whitelists: Cleanup
[chromium-blink-merge.git] / net / base / net_util_icu_unittest.cc
blob1b315c297c45edcb773d884c058208d3c9991aed
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
7 #include <string.h>
9 #include <vector>
11 #include "base/format_macros.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/time/time.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "url/gurl.h"
19 using base::ASCIIToUTF16;
20 using base::WideToUTF16;
22 namespace net {
24 namespace {
26 const size_t kNpos = base::string16::npos;
28 const char* const kLanguages[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
33 "zh,ru,en"
36 struct IDNTestCase {
37 const char* const input;
38 const wchar_t* unicode_output;
39 const bool unicode_allowed[arraysize(kLanguages)];
42 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases[] = {
46 // No IDN
47 {"www.google.com", L"www.google.com",
48 {true, true, true, true, true,
49 true, true, true, true, true,
50 true, true, true, true, true,
51 true, true, true, true, true,
52 true}},
53 {"www.google.com.", L"www.google.com.",
54 {true, true, true, true, true,
55 true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
58 true}},
59 {".", L".",
60 {true, true, true, true, true,
61 true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
64 true}},
65 {"", L"",
66 {true, true, true, true, true,
67 true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
70 true}},
71 // IDN
72 // Hanzi (Traditional Chinese)
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
74 {true, false, true, true, false,
75 false, false, false, false, false,
76 false, false, false, false, false,
77 false, false, true, true, false,
78 true}},
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
81 {true, false, true, false, false,
82 false, false, false, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
85 true}},
86 // Hanzi + '123'
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
88 {true, false, true, true, false,
89 false, false, false, false, false,
90 false, false, false, false, false,
91 false, false, true, true, false,
92 true}},
93 // Hanzi + Latin : U+56FD is simplified and is regarded
94 // as not supported in zh-TW.
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
96 {false, false, true, true, false,
97 false, false, false, false, false,
98 false, false, false, false, false,
99 false, false, false, true, false,
100 true}},
101 // Kanji + Kana (Japanese)
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
103 {true, false, false, true, false,
104 false, false, false, false, false,
105 false, false, false, false, false,
106 false, false, false, true, false,
107 false}},
108 // Katakana including U+30FC
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
110 {true, false, false, true, false,
111 false, false, false, false, false,
112 false, false, false, false, false,
113 false, false, false, true, false,
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
116 {true, false, false, true, false,
117 false, false, false, false, false,
118 false, false, false, false, false,
119 false, false, false, true, false,
121 // Katakana + Latin (Japanese)
122 // TODO(jungshik): Change 'false' in the first element to 'true'
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
124 // of our IsIDNComponentInSingleScript().
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
126 {false, false, false, true, false,
127 false, false, false, false, false,
128 false, false, false, false, false,
129 false, false, false, true, false,
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
132 {false, false, false, true, false,
133 false, false, false, false, false,
134 false, false, false, false, false,
135 false, false, false, true, false,
137 // Hangul (Korean)
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
139 {true, false, false, false, true,
140 false, false, false, false, false,
141 false, false, false, false, false,
142 false, false, false, true, false,
143 false}},
144 // b<u-umlaut>cher (German)
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
146 {true, false, false, false, false,
147 false, false, false, false, true,
148 true, false, false, false, false,
149 true, false, false, false, false,
150 false}},
151 // a with diaeresis
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
157 false}},
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
164 false}},
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
171 false}},
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
178 false}},
179 // s with caron
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false}},
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
188 // Eutopia (Greek)
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
194 false}},
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
202 false}},
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
209 true}},
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
216 true}},
217 // Arabic
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
223 false}},
224 // Hebrew
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
230 false}},
231 // Thai
232 {"xn--12c2cc4ag3b4ccu.th",
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
238 false}},
239 // Devangari (Hindi)
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
245 false}},
246 // Invalid IDN
247 {"xn--hello?world.com", NULL,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
252 false}},
253 // Unsafe IDNs
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false}},
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false}},
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false}},
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false}},
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
291 "aaaaaaa",
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
293 L"aaaaaaaa",
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
298 true}},
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false}},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
334 #if 0
335 // These two cases are special. We need a separate test.
336 // U+3000 and U+3002 are normalized to ASCII space and dot.
337 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
338 {false, false, true, false, false,
339 false, false, false, false, false,
340 false, false, false, false, false,
341 false, false, true, false, false,
342 true}},
343 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
344 {false, false, true, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, true, false, false,
348 true}},
349 #endif
352 struct AdjustOffsetCase {
353 size_t input_offset;
354 size_t output_offset;
357 struct UrlTestData {
358 const char* const description;
359 const char* const input;
360 const char* const languages;
361 FormatUrlTypes format_types;
362 UnescapeRule::Type escape_rules;
363 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
364 size_t prefix_len;
367 // A helper for IDN*{Fast,Slow}.
368 // Append "::<language list>" to |expected| and |actual| to make it
369 // easy to tell which sub-case fails without debugging.
370 void AppendLanguagesToOutputs(const char* languages,
371 base::string16* expected,
372 base::string16* actual) {
373 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
374 expected->append(to_append);
375 actual->append(to_append);
378 // A pair of helpers for the FormatUrlWithOffsets() test.
379 void VerboseExpect(size_t expected,
380 size_t actual,
381 const std::string& original_url,
382 size_t position,
383 const base::string16& formatted_url) {
384 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
385 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
388 void CheckAdjustedOffsets(const std::string& url_string,
389 const std::string& languages,
390 FormatUrlTypes format_types,
391 UnescapeRule::Type unescape_rules,
392 const size_t* output_offsets) {
393 GURL url(url_string);
394 size_t url_length = url_string.length();
395 std::vector<size_t> offsets;
396 for (size_t i = 0; i <= url_length + 1; ++i)
397 offsets.push_back(i);
398 offsets.push_back(500000); // Something larger than any input length.
399 offsets.push_back(std::string::npos);
400 base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
401 format_types, unescape_rules, NULL, NULL, &offsets);
402 for (size_t i = 0; i < url_length; ++i)
403 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
404 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
405 url_length, formatted_url);
406 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
407 500000, formatted_url);
408 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
409 std::string::npos, formatted_url);
412 } // anonymous namespace
414 TEST(NetUtilTest, IDNToUnicodeFast) {
415 for (size_t i = 0; i < arraysize(idn_cases); i++) {
416 for (size_t j = 0; j < arraysize(kLanguages); j++) {
417 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
418 if (j == 3 || j == 17 || j == 18)
419 continue;
420 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
421 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
422 WideToUTF16(idn_cases[i].unicode_output) :
423 ASCIIToUTF16(idn_cases[i].input));
424 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
425 EXPECT_EQ(expected, output);
430 TEST(NetUtilTest, IDNToUnicodeSlow) {
431 for (size_t i = 0; i < arraysize(idn_cases); i++) {
432 for (size_t j = 0; j < arraysize(kLanguages); j++) {
433 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
434 if (!(j == 3 || j == 17 || j == 18))
435 continue;
436 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
437 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
438 WideToUTF16(idn_cases[i].unicode_output) :
439 ASCIIToUTF16(idn_cases[i].input));
440 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
441 EXPECT_EQ(expected, output);
446 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
447 // te), which was causing a crash (See http://crbug.com/510551). This may be an
448 // icu bug, but regardless, that should not cause a crash.
449 TEST(NetUtilTest, IDNToUnicodeNeverCrashes) {
450 for (char c1 = 'a'; c1 <= 'z'; c1++) {
451 for (char c2 = 'a'; c2 <= 'z'; c2++) {
452 std::string lang = base::StringPrintf("%c%c", c1, c2);
453 base::string16 output(IDNToUnicode("xn--74h", lang));
458 TEST(NetUtilTest, StripWWW) {
459 EXPECT_EQ(base::string16(), StripWWW(base::string16()));
460 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
461 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
462 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
465 // This is currently a windows specific function.
466 #if defined(OS_WIN)
467 namespace {
469 struct GetDirectoryListingEntryCase {
470 const wchar_t* name;
471 const char* const raw_bytes;
472 bool is_dir;
473 int64 filesize;
474 base::Time time;
475 const char* const expected;
478 } // namespace
480 TEST(NetUtilTest, GetDirectoryListingEntry) {
481 const GetDirectoryListingEntryCase test_cases[] = {
482 {L"Foo",
484 false,
485 10000,
486 base::Time(),
487 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
488 {L"quo\"tes",
490 false,
491 10000,
492 base::Time(),
493 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
494 "\n"},
495 {L"quo\"tes",
496 "quo\"tes",
497 false,
498 10000,
499 base::Time(),
500 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
501 "\n"},
502 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
503 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
504 {L"\xD55C\xAE00.txt",
506 false,
507 10000,
508 base::Time(),
509 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\","
510 "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"},
511 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
512 // a local or remote file in EUC-KR.
513 {L"\xD55C\xAE00.txt",
514 "\xC7\xD1\xB1\xDB.txt",
515 false,
516 10000,
517 base::Time(),
518 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\""
519 ",0,\"9.8 kB\",\"\");</script>\n"},
522 for (size_t i = 0; i < arraysize(test_cases); ++i) {
523 const std::string results = GetDirectoryListingEntry(
524 WideToUTF16(test_cases[i].name),
525 test_cases[i].raw_bytes,
526 test_cases[i].is_dir,
527 test_cases[i].filesize,
528 test_cases[i].time);
529 EXPECT_EQ(test_cases[i].expected, results);
533 #endif
535 TEST(NetUtilTest, FormatUrl) {
536 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
537 const UrlTestData tests[] = {
538 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
540 {"Simple URL",
541 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
542 L"http://www.google.com/", 7},
544 {"With a port number and a reference",
545 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
546 UnescapeRule::NORMAL,
547 L"http://www.google.com:8080/#\x30B0", 7},
549 // -------- IDN tests --------
550 {"Japanese IDN with ja",
551 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
552 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
554 {"Japanese IDN with en",
555 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
556 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
558 {"Japanese IDN without any languages",
559 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
560 UnescapeRule::NORMAL,
561 // Single script is safe for empty languages.
562 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
564 {"mailto: with Japanese IDN",
565 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
566 UnescapeRule::NORMAL,
567 // GURL doesn't assume an email address's domain part as a host name.
568 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
570 {"file: with Japanese IDN",
571 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
572 UnescapeRule::NORMAL,
573 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
575 {"ftp: with Japanese IDN",
576 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
577 UnescapeRule::NORMAL,
578 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
580 // -------- omit_username_password flag tests --------
581 {"With username and password, omit_username_password=false",
582 "http://user:passwd@example.com/foo", "",
583 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
584 L"http://user:passwd@example.com/foo", 19},
586 {"With username and password, omit_username_password=true",
587 "http://user:passwd@example.com/foo", "", default_format_type,
588 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
590 {"With username and no password",
591 "http://user@example.com/foo", "", default_format_type,
592 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
594 {"Just '@' without username and password",
595 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
596 L"http://example.com/foo", 7},
598 // GURL doesn't think local-part of an email address is username for URL.
599 {"mailto:, omit_username_password=true",
600 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
601 L"mailto:foo@example.com", 7},
603 // -------- unescape flag tests --------
604 {"Do not unescape",
605 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
606 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
607 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
608 UnescapeRule::NONE,
609 // GURL parses %-encoded hostnames into Punycode.
610 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
611 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
613 {"Unescape normally",
614 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
615 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
616 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
617 UnescapeRule::NORMAL,
618 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
619 L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
621 {"Unescape normally with BiDi control character",
622 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type,
623 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
625 {"Unescape normally including unescape spaces",
626 "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
627 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
630 {"unescape=true with some special characters",
631 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
632 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
633 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
635 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
637 // -------- omit http: --------
638 {"omit http with user name",
639 "http://user@example.com/foo", "", kFormatUrlOmitAll,
640 UnescapeRule::NORMAL, L"example.com/foo", 0},
642 {"omit http",
643 "http://www.google.com/", "en", kFormatUrlOmitHTTP,
644 UnescapeRule::NORMAL, L"www.google.com/",
647 {"omit http with https",
648 "https://www.google.com/", "en", kFormatUrlOmitHTTP,
649 UnescapeRule::NORMAL, L"https://www.google.com/",
652 {"omit http starts with ftp.",
653 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
654 UnescapeRule::NORMAL, L"http://ftp.google.com/",
657 // -------- omit trailing slash on bare hostname --------
658 {"omit slash when it's the entire path",
659 "http://www.google.com/", "en",
660 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
661 L"http://www.google.com", 7},
662 {"omit slash when there's a ref",
663 "http://www.google.com/#ref", "en",
664 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
665 L"http://www.google.com/#ref", 7},
666 {"omit slash when there's a query",
667 "http://www.google.com/?", "en",
668 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
669 L"http://www.google.com/?", 7},
670 {"omit slash when it's not the entire path",
671 "http://www.google.com/foo", "en",
672 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
673 L"http://www.google.com/foo", 7},
674 {"omit slash for nonstandard URLs",
675 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
676 UnescapeRule::NORMAL, L"data:/", 5},
677 {"omit slash for file URLs",
678 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
679 UnescapeRule::NORMAL, L"file:///", 7},
681 // -------- view-source: --------
682 {"view-source",
683 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
684 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
685 19},
687 {"view-source of view-source",
688 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
689 default_format_type, UnescapeRule::NORMAL,
690 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
692 // view-source should omit http and trailing slash where non-view-source
693 // would.
694 {"view-source omit http",
695 "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
696 UnescapeRule::NORMAL, L"view-source:a.b/c",
697 12},
698 {"view-source omit http starts with ftp.",
699 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
700 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
701 19},
702 {"view-source omit slash when it's the entire path",
703 "view-source:http://a.b/", "en", kFormatUrlOmitAll,
704 UnescapeRule::NORMAL, L"view-source:a.b",
705 12},
708 for (size_t i = 0; i < arraysize(tests); ++i) {
709 size_t prefix_len;
710 base::string16 formatted = FormatUrl(
711 GURL(tests[i].input), tests[i].languages, tests[i].format_types,
712 tests[i].escape_rules, NULL, &prefix_len, NULL);
713 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
714 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
718 TEST(NetUtilTest, FormatUrlParsed) {
719 // No unescape case.
720 url::Parsed parsed;
721 base::string16 formatted = FormatUrl(
722 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
723 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
724 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
725 NULL);
726 EXPECT_EQ(WideToUTF16(
727 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
728 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
729 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
730 formatted.substr(parsed.username.begin, parsed.username.len));
731 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
732 formatted.substr(parsed.password.begin, parsed.password.len));
733 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
734 formatted.substr(parsed.host.begin, parsed.host.len));
735 EXPECT_EQ(WideToUTF16(L"8080"),
736 formatted.substr(parsed.port.begin, parsed.port.len));
737 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
738 formatted.substr(parsed.path.begin, parsed.path.len));
739 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
740 formatted.substr(parsed.query.begin, parsed.query.len));
741 EXPECT_EQ(WideToUTF16(L"\x30B0"),
742 formatted.substr(parsed.ref.begin, parsed.ref.len));
744 // Unescape case.
745 formatted = FormatUrl(
746 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
747 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
748 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
749 NULL);
750 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
751 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
752 EXPECT_EQ(WideToUTF16(L"\x30B0"),
753 formatted.substr(parsed.username.begin, parsed.username.len));
754 EXPECT_EQ(WideToUTF16(L"\x30FC"),
755 formatted.substr(parsed.password.begin, parsed.password.len));
756 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
757 formatted.substr(parsed.host.begin, parsed.host.len));
758 EXPECT_EQ(WideToUTF16(L"8080"),
759 formatted.substr(parsed.port.begin, parsed.port.len));
760 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
761 formatted.substr(parsed.path.begin, parsed.path.len));
762 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
763 formatted.substr(parsed.query.begin, parsed.query.len));
764 EXPECT_EQ(WideToUTF16(L"\x30B0"),
765 formatted.substr(parsed.ref.begin, parsed.ref.len));
767 // Omit_username_password + unescape case.
768 formatted = FormatUrl(
769 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
770 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
771 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
772 NULL, NULL);
773 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
774 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
775 EXPECT_FALSE(parsed.username.is_valid());
776 EXPECT_FALSE(parsed.password.is_valid());
777 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
778 formatted.substr(parsed.host.begin, parsed.host.len));
779 EXPECT_EQ(WideToUTF16(L"8080"),
780 formatted.substr(parsed.port.begin, parsed.port.len));
781 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
782 formatted.substr(parsed.path.begin, parsed.path.len));
783 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
784 formatted.substr(parsed.query.begin, parsed.query.len));
785 EXPECT_EQ(WideToUTF16(L"\x30B0"),
786 formatted.substr(parsed.ref.begin, parsed.ref.len));
788 // View-source case.
789 formatted =
790 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
791 std::string(),
792 kFormatUrlOmitUsernamePassword,
793 UnescapeRule::NORMAL,
794 &parsed,
795 NULL,
796 NULL);
797 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
798 formatted);
799 EXPECT_EQ(WideToUTF16(L"view-source:http"),
800 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
801 EXPECT_FALSE(parsed.username.is_valid());
802 EXPECT_FALSE(parsed.password.is_valid());
803 EXPECT_EQ(WideToUTF16(L"host"),
804 formatted.substr(parsed.host.begin, parsed.host.len));
805 EXPECT_EQ(WideToUTF16(L"81"),
806 formatted.substr(parsed.port.begin, parsed.port.len));
807 EXPECT_EQ(WideToUTF16(L"/path"),
808 formatted.substr(parsed.path.begin, parsed.path.len));
809 EXPECT_EQ(WideToUTF16(L"query"),
810 formatted.substr(parsed.query.begin, parsed.query.len));
811 EXPECT_EQ(WideToUTF16(L"ref"),
812 formatted.substr(parsed.ref.begin, parsed.ref.len));
814 // omit http case.
815 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"),
816 std::string(),
817 kFormatUrlOmitHTTP,
818 UnescapeRule::NORMAL,
819 &parsed,
820 NULL,
821 NULL);
822 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
823 EXPECT_FALSE(parsed.scheme.is_valid());
824 EXPECT_FALSE(parsed.username.is_valid());
825 EXPECT_FALSE(parsed.password.is_valid());
826 EXPECT_EQ(WideToUTF16(L"host"),
827 formatted.substr(parsed.host.begin, parsed.host.len));
828 EXPECT_EQ(WideToUTF16(L"8000"),
829 formatted.substr(parsed.port.begin, parsed.port.len));
830 EXPECT_EQ(WideToUTF16(L"/a"),
831 formatted.substr(parsed.path.begin, parsed.path.len));
832 EXPECT_EQ(WideToUTF16(L"b=c"),
833 formatted.substr(parsed.query.begin, parsed.query.len));
834 EXPECT_EQ(WideToUTF16(L"d"),
835 formatted.substr(parsed.ref.begin, parsed.ref.len));
837 // omit http starts with ftp case.
838 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"),
839 std::string(),
840 kFormatUrlOmitHTTP,
841 UnescapeRule::NORMAL,
842 &parsed,
843 NULL,
844 NULL);
845 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
846 EXPECT_TRUE(parsed.scheme.is_valid());
847 EXPECT_FALSE(parsed.username.is_valid());
848 EXPECT_FALSE(parsed.password.is_valid());
849 EXPECT_EQ(WideToUTF16(L"http"),
850 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
851 EXPECT_EQ(WideToUTF16(L"ftp.host"),
852 formatted.substr(parsed.host.begin, parsed.host.len));
853 EXPECT_EQ(WideToUTF16(L"8000"),
854 formatted.substr(parsed.port.begin, parsed.port.len));
855 EXPECT_EQ(WideToUTF16(L"/a"),
856 formatted.substr(parsed.path.begin, parsed.path.len));
857 EXPECT_EQ(WideToUTF16(L"b=c"),
858 formatted.substr(parsed.query.begin, parsed.query.len));
859 EXPECT_EQ(WideToUTF16(L"d"),
860 formatted.substr(parsed.ref.begin, parsed.ref.len));
862 // omit http starts with 'f' case.
863 formatted = FormatUrl(GURL("http://f/"),
864 std::string(),
865 kFormatUrlOmitHTTP,
866 UnescapeRule::NORMAL,
867 &parsed,
868 NULL,
869 NULL);
870 EXPECT_EQ(WideToUTF16(L"f/"), formatted);
871 EXPECT_FALSE(parsed.scheme.is_valid());
872 EXPECT_FALSE(parsed.username.is_valid());
873 EXPECT_FALSE(parsed.password.is_valid());
874 EXPECT_FALSE(parsed.port.is_valid());
875 EXPECT_TRUE(parsed.path.is_valid());
876 EXPECT_FALSE(parsed.query.is_valid());
877 EXPECT_FALSE(parsed.ref.is_valid());
878 EXPECT_EQ(WideToUTF16(L"f"),
879 formatted.substr(parsed.host.begin, parsed.host.len));
880 EXPECT_EQ(WideToUTF16(L"/"),
881 formatted.substr(parsed.path.begin, parsed.path.len));
884 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
885 // results in the original GURL, for each ASCII character in the path.
886 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) {
887 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
888 GURL url(std::string("http://www.google.com/") +
889 static_cast<char>(test_char));
890 size_t prefix_len;
891 base::string16 formatted = FormatUrl(url,
892 std::string(),
893 kFormatUrlOmitUsernamePassword,
894 UnescapeRule::NORMAL,
895 NULL,
896 &prefix_len,
897 NULL);
898 EXPECT_EQ(url.spec(), GURL(formatted).spec());
902 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
903 // results in the original GURL, for each escaped ASCII character in the path.
904 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) {
905 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
906 std::string original_url("http://www.google.com/");
907 original_url.push_back('%');
908 original_url.append(base::HexEncode(&test_char, 1));
910 GURL url(original_url);
911 size_t prefix_len;
912 base::string16 formatted = FormatUrl(url,
913 std::string(),
914 kFormatUrlOmitUsernamePassword,
915 UnescapeRule::NORMAL,
916 NULL,
917 &prefix_len,
918 NULL);
919 EXPECT_EQ(url.spec(), GURL(formatted).spec());
923 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
924 // results in the original GURL, for each ASCII character in the query.
925 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) {
926 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
927 GURL url(std::string("http://www.google.com/?") +
928 static_cast<char>(test_char));
929 size_t prefix_len;
930 base::string16 formatted = FormatUrl(url,
931 std::string(),
932 kFormatUrlOmitUsernamePassword,
933 UnescapeRule::NORMAL,
934 NULL,
935 &prefix_len,
936 NULL);
937 EXPECT_EQ(url.spec(), GURL(formatted).spec());
941 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
942 // only results in a different GURL for certain characters.
943 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) {
944 // A full list of characters which FormatURL should unescape and GURL should
945 // not escape again, when they appear in a query string.
946 const char kUnescapedCharacters[] =
947 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
948 for (unsigned char test_char = 0; test_char < 128; ++test_char) {
949 std::string original_url("http://www.google.com/?");
950 original_url.push_back('%');
951 original_url.append(base::HexEncode(&test_char, 1));
953 GURL url(original_url);
954 size_t prefix_len;
955 base::string16 formatted = FormatUrl(url,
956 std::string(),
957 kFormatUrlOmitUsernamePassword,
958 UnescapeRule::NORMAL,
959 NULL,
960 &prefix_len,
961 NULL);
963 if (test_char &&
964 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
965 EXPECT_NE(url.spec(), GURL(formatted).spec());
966 } else {
967 EXPECT_EQ(url.spec(), GURL(formatted).spec());
972 TEST(NetUtilTest, FormatUrlWithOffsets) {
973 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
974 UnescapeRule::NORMAL, NULL);
976 const size_t basic_offsets[] = {
977 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
978 21, 22, 23, 24, 25
980 CheckAdjustedOffsets("http://www.google.com/foo/", "en",
981 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
982 basic_offsets);
984 const size_t omit_auth_offsets_1[] = {
985 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
986 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
988 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
989 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
990 omit_auth_offsets_1);
992 const size_t omit_auth_offsets_2[] = {
993 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
994 15, 16, 17, 18, 19, 20, 21
996 CheckAdjustedOffsets("http://foo@www.google.com/", "en",
997 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
998 omit_auth_offsets_2);
1000 const size_t dont_omit_auth_offsets[] = {
1001 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1002 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1003 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1004 30, 31
1006 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
1007 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
1008 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1009 dont_omit_auth_offsets);
1011 const size_t view_source_offsets[] = {
1012 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
1013 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
1015 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
1016 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1017 view_source_offsets);
1019 const size_t idn_hostname_offsets_1[] = {
1020 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1021 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
1022 13, 14, 15, 16, 17, 18, 19
1024 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
1025 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
1026 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1027 idn_hostname_offsets_1);
1029 const size_t idn_hostname_offsets_2[] = {
1030 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
1031 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
1032 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1033 kNpos, 19, 20, 21, 22, 23, 24
1035 // Convert punycode to
1036 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
1037 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
1038 "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1039 idn_hostname_offsets_2);
1041 const size_t unescape_offsets[] = {
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1043 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
1044 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
1045 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1046 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
1048 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
1049 CheckAdjustedOffsets(
1050 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
1051 "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets);
1053 const size_t ref_offsets[] = {
1054 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1055 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
1058 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
1059 CheckAdjustedOffsets(
1060 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
1061 kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets);
1063 const size_t omit_http_offsets[] = {
1064 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1065 10, 11, 12, 13, 14
1067 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
1068 UnescapeRule::NORMAL, omit_http_offsets);
1070 const size_t omit_http_start_with_ftp_offsets[] = {
1071 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1073 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1074 UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets);
1076 const size_t omit_all_offsets[] = {
1077 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
1078 0, 1, 2, 3, 4, 5, 6, 7
1080 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
1081 UnescapeRule::NORMAL, omit_all_offsets);
1084 } // namespace net