Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / net / base / net_util_icu_unittest.cc
blobf643426aa678ed48f36e96877adde254a26624b5
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
7 #include <string.h>
9 #include <vector>
11 #include "base/format_macros.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/time/time.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "url/gurl.h"
19 using base::ASCIIToUTF16;
20 using base::WideToUTF16;
22 namespace net {
24 namespace {
26 const size_t kNpos = base::string16::npos;
28 const char* const kLanguages[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
33 "zh,ru,en"
36 struct IDNTestCase {
37 const char* const input;
38 const wchar_t* unicode_output;
39 const bool unicode_allowed[arraysize(kLanguages)];
42 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases[] = {
46 // No IDN
47 {"www.google.com", L"www.google.com",
48 {true, true, true, true, true,
49 true, true, true, true, true,
50 true, true, true, true, true,
51 true, true, true, true, true,
52 true}},
53 {"www.google.com.", L"www.google.com.",
54 {true, true, true, true, true,
55 true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
58 true}},
59 {".", L".",
60 {true, true, true, true, true,
61 true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
64 true}},
65 {"", L"",
66 {true, true, true, true, true,
67 true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
70 true}},
71 // IDN
72 // Hanzi (Traditional Chinese)
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
74 {true, false, true, true, false,
75 false, false, false, false, false,
76 false, false, false, false, false,
77 false, false, true, true, false,
78 true}},
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
81 {true, false, true, false, false,
82 false, false, false, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
85 true}},
86 // Hanzi + '123'
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
88 {true, false, true, true, false,
89 false, false, false, false, false,
90 false, false, false, false, false,
91 false, false, true, true, false,
92 true}},
93 // Hanzi + Latin : U+56FD is simplified and is regarded
94 // as not supported in zh-TW.
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
96 {false, false, true, true, false,
97 false, false, false, false, false,
98 false, false, false, false, false,
99 false, false, false, true, false,
100 true}},
101 // Kanji + Kana (Japanese)
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
103 {true, false, false, true, false,
104 false, false, false, false, false,
105 false, false, false, false, false,
106 false, false, false, true, false,
107 false}},
108 // Katakana including U+30FC
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
110 {true, false, false, true, false,
111 false, false, false, false, false,
112 false, false, false, false, false,
113 false, false, false, true, false,
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
116 {true, false, false, true, false,
117 false, false, false, false, false,
118 false, false, false, false, false,
119 false, false, false, true, false,
121 // Katakana + Latin (Japanese)
122 // TODO(jungshik): Change 'false' in the first element to 'true'
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
124 // of our IsIDNComponentInSingleScript().
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
126 {false, false, false, true, false,
127 false, false, false, false, false,
128 false, false, false, false, false,
129 false, false, false, true, false,
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
132 {false, false, false, true, false,
133 false, false, false, false, false,
134 false, false, false, false, false,
135 false, false, false, true, false,
137 // Hangul (Korean)
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
139 {true, false, false, false, true,
140 false, false, false, false, false,
141 false, false, false, false, false,
142 false, false, false, true, false,
143 false}},
144 // b<u-umlaut>cher (German)
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
146 {true, false, false, false, false,
147 false, false, false, false, true,
148 true, false, false, false, false,
149 true, false, false, false, false,
150 false}},
151 // a with diaeresis
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
157 false}},
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
164 false}},
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
171 false}},
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
178 false}},
179 // s with caron
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false}},
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
188 // Eutopia (Greek)
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
194 false}},
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
202 false}},
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
209 true}},
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
216 true}},
217 // Arabic
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
223 false}},
224 // Hebrew
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
230 false}},
231 // Thai
232 {"xn--12c2cc4ag3b4ccu.th",
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
238 false}},
239 // Devangari (Hindi)
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
245 false}},
246 // Invalid IDN
247 {"xn--hello?world.com", NULL,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
252 false}},
253 // Unsafe IDNs
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false}},
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false}},
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false}},
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false}},
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
291 "aaaaaaa",
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
293 L"aaaaaaaa",
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
298 true}},
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false}},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
334 // Padlock icon spoof.
335 {"xn--google-hj64e", L"\U0001f512google.com",
336 {false, false, false, false, false,
337 false, false, false, false, false,
338 false, false, false, false, false,
339 false, false, false, false, false,
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
342 // all strings with the surrogate '\xdd12'.
343 {"xn--fk9c.com", L"\U00010912.com",
344 {true, false, false, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
349 #if 0
350 // These two cases are special. We need a separate test.
351 // U+3000 and U+3002 are normalized to ASCII space and dot.
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
353 {false, false, true, false, false,
354 false, false, false, false, false,
355 false, false, false, false, false,
356 false, false, true, false, false,
357 true}},
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
359 {false, false, true, false, false,
360 false, false, false, false, false,
361 false, false, false, false, false,
362 false, false, true, false, false,
363 true}},
364 #endif
367 struct AdjustOffsetCase {
368 size_t input_offset;
369 size_t output_offset;
372 struct UrlTestData {
373 const char* const description;
374 const char* const input;
375 const char* const languages;
376 FormatUrlTypes format_types;
377 UnescapeRule::Type escape_rules;
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
379 size_t prefix_len;
382 // A helper for IDN*{Fast,Slow}.
383 // Append "::<language list>" to |expected| and |actual| to make it
384 // easy to tell which sub-case fails without debugging.
385 void AppendLanguagesToOutputs(const char* languages,
386 base::string16* expected,
387 base::string16* actual) {
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
389 expected->append(to_append);
390 actual->append(to_append);
393 // A pair of helpers for the FormatUrlWithOffsets() test.
394 void VerboseExpect(size_t expected,
395 size_t actual,
396 const std::string& original_url,
397 size_t position,
398 const base::string16& formatted_url) {
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
403 void CheckAdjustedOffsets(const std::string& url_string,
404 const std::string& languages,
405 FormatUrlTypes format_types,
406 UnescapeRule::Type unescape_rules,
407 const size_t* output_offsets) {
408 GURL url(url_string);
409 size_t url_length = url_string.length();
410 std::vector<size_t> offsets;
411 for (size_t i = 0; i <= url_length + 1; ++i)
412 offsets.push_back(i);
413 offsets.push_back(500000); // Something larger than any input length.
414 offsets.push_back(std::string::npos);
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
416 format_types, unescape_rules, NULL, NULL, &offsets);
417 for (size_t i = 0; i < url_length; ++i)
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
420 url_length, formatted_url);
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
422 500000, formatted_url);
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
424 std::string::npos, formatted_url);
427 } // anonymous namespace
429 TEST(NetUtilTest, IDNToUnicodeFast) {
430 for (size_t i = 0; i < arraysize(idn_cases); i++) {
431 for (size_t j = 0; j < arraysize(kLanguages); j++) {
432 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
433 if (j == 3 || j == 17 || j == 18)
434 continue;
435 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
436 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
437 WideToUTF16(idn_cases[i].unicode_output) :
438 ASCIIToUTF16(idn_cases[i].input));
439 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
440 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
441 << "\", languages: \"" << kLanguages[j]
442 << "\"";
447 TEST(NetUtilTest, IDNToUnicodeSlow) {
448 for (size_t i = 0; i < arraysize(idn_cases); i++) {
449 for (size_t j = 0; j < arraysize(kLanguages); j++) {
450 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
451 if (!(j == 3 || j == 17 || j == 18))
452 continue;
453 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
454 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
455 WideToUTF16(idn_cases[i].unicode_output) :
456 ASCIIToUTF16(idn_cases[i].input));
457 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
458 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
459 << "\", languages: \"" << kLanguages[j]
460 << "\"";
465 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
466 // te), which was causing a crash (See http://crbug.com/510551). This may be an
467 // icu bug, but regardless, that should not cause a crash.
468 TEST(NetUtilTest, IDNToUnicodeNeverCrashes) {
469 for (char c1 = 'a'; c1 <= 'z'; c1++) {
470 for (char c2 = 'a'; c2 <= 'z'; c2++) {
471 std::string lang = base::StringPrintf("%c%c", c1, c2);
472 base::string16 output(IDNToUnicode("xn--74h", lang));
477 TEST(NetUtilTest, StripWWW) {
478 EXPECT_EQ(base::string16(), StripWWW(base::string16()));
479 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
480 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
481 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
484 // This is currently a windows specific function.
485 #if defined(OS_WIN)
486 namespace {
488 struct GetDirectoryListingEntryCase {
489 const wchar_t* name;
490 const char* const raw_bytes;
491 bool is_dir;
492 int64_t filesize;
493 base::Time time;
494 const char* const expected;
497 } // namespace
499 TEST(NetUtilTest, GetDirectoryListingEntry) {
500 const GetDirectoryListingEntryCase test_cases[] = {
501 {L"Foo",
503 false,
504 10000,
505 base::Time(),
506 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
507 {L"quo\"tes",
509 false,
510 10000,
511 base::Time(),
512 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
513 "\n"},
514 {L"quo\"tes",
515 "quo\"tes",
516 false,
517 10000,
518 base::Time(),
519 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
520 "\n"},
521 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
522 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
523 {L"\xD55C\xAE00.txt",
525 false,
526 10000,
527 base::Time(),
528 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\","
529 "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"},
530 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
531 // a local or remote file in EUC-KR.
532 {L"\xD55C\xAE00.txt",
533 "\xC7\xD1\xB1\xDB.txt",
534 false,
535 10000,
536 base::Time(),
537 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\""
538 ",0,\"9.8 kB\",\"\");</script>\n"},
541 for (size_t i = 0; i < arraysize(test_cases); ++i) {
542 const std::string results = GetDirectoryListingEntry(
543 WideToUTF16(test_cases[i].name),
544 test_cases[i].raw_bytes,
545 test_cases[i].is_dir,
546 test_cases[i].filesize,
547 test_cases[i].time);
548 EXPECT_EQ(test_cases[i].expected, results);
552 #endif
554 TEST(NetUtilTest, FormatUrl) {
555 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
556 const UrlTestData tests[] = {
557 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
559 {"Simple URL",
560 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
561 L"http://www.google.com/", 7},
563 {"With a port number and a reference",
564 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
565 UnescapeRule::NORMAL,
566 L"http://www.google.com:8080/#\x30B0", 7},
568 // -------- IDN tests --------
569 {"Japanese IDN with ja",
570 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
571 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
573 {"Japanese IDN with en",
574 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
575 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
577 {"Japanese IDN without any languages",
578 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
579 UnescapeRule::NORMAL,
580 // Single script is safe for empty languages.
581 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
583 {"mailto: with Japanese IDN",
584 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
585 UnescapeRule::NORMAL,
586 // GURL doesn't assume an email address's domain part as a host name.
587 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
589 {"file: with Japanese IDN",
590 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
591 UnescapeRule::NORMAL,
592 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
594 {"ftp: with Japanese IDN",
595 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
596 UnescapeRule::NORMAL,
597 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
599 // -------- omit_username_password flag tests --------
600 {"With username and password, omit_username_password=false",
601 "http://user:passwd@example.com/foo", "",
602 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
603 L"http://user:passwd@example.com/foo", 19},
605 {"With username and password, omit_username_password=true",
606 "http://user:passwd@example.com/foo", "", default_format_type,
607 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
609 {"With username and no password",
610 "http://user@example.com/foo", "", default_format_type,
611 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
613 {"Just '@' without username and password",
614 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
615 L"http://example.com/foo", 7},
617 // GURL doesn't think local-part of an email address is username for URL.
618 {"mailto:, omit_username_password=true",
619 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
620 L"mailto:foo@example.com", 7},
622 // -------- unescape flag tests --------
623 {"Do not unescape",
624 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
625 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
626 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
627 UnescapeRule::NONE,
628 // GURL parses %-encoded hostnames into Punycode.
629 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
630 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
632 {"Unescape normally",
633 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
634 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
635 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
636 UnescapeRule::NORMAL,
637 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
638 L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
640 {"Unescape normally with BiDi control character",
641 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type,
642 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
644 {"Unescape normally including unescape spaces",
645 "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
646 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
649 {"unescape=true with some special characters",
650 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
651 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
652 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
654 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
656 // -------- omit http: --------
657 {"omit http with user name",
658 "http://user@example.com/foo", "", kFormatUrlOmitAll,
659 UnescapeRule::NORMAL, L"example.com/foo", 0},
661 {"omit http",
662 "http://www.google.com/", "en", kFormatUrlOmitHTTP,
663 UnescapeRule::NORMAL, L"www.google.com/",
666 {"omit http with https",
667 "https://www.google.com/", "en", kFormatUrlOmitHTTP,
668 UnescapeRule::NORMAL, L"https://www.google.com/",
671 {"omit http starts with ftp.",
672 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
673 UnescapeRule::NORMAL, L"http://ftp.google.com/",
676 // -------- omit trailing slash on bare hostname --------
677 {"omit slash when it's the entire path",
678 "http://www.google.com/", "en",
679 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
680 L"http://www.google.com", 7},
681 {"omit slash when there's a ref",
682 "http://www.google.com/#ref", "en",
683 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
684 L"http://www.google.com/#ref", 7},
685 {"omit slash when there's a query",
686 "http://www.google.com/?", "en",
687 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
688 L"http://www.google.com/?", 7},
689 {"omit slash when it's not the entire path",
690 "http://www.google.com/foo", "en",
691 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
692 L"http://www.google.com/foo", 7},
693 {"omit slash for nonstandard URLs",
694 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
695 UnescapeRule::NORMAL, L"data:/", 5},
696 {"omit slash for file URLs",
697 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
698 UnescapeRule::NORMAL, L"file:///", 7},
700 // -------- view-source: --------
701 {"view-source",
702 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
703 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
704 19},
706 {"view-source of view-source",
707 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
708 default_format_type, UnescapeRule::NORMAL,
709 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
711 // view-source should omit http and trailing slash where non-view-source
712 // would.
713 {"view-source omit http",
714 "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
715 UnescapeRule::NORMAL, L"view-source:a.b/c",
716 12},
717 {"view-source omit http starts with ftp.",
718 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
719 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
720 19},
721 {"view-source omit slash when it's the entire path",
722 "view-source:http://a.b/", "en", kFormatUrlOmitAll,
723 UnescapeRule::NORMAL, L"view-source:a.b",
724 12},
727 for (size_t i = 0; i < arraysize(tests); ++i) {
728 size_t prefix_len;
729 base::string16 formatted = FormatUrl(
730 GURL(tests[i].input), tests[i].languages, tests[i].format_types,
731 tests[i].escape_rules, NULL, &prefix_len, NULL);
732 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
733 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
737 TEST(NetUtilTest, FormatUrlParsed) {
738 // No unescape case.
739 url::Parsed parsed;
740 base::string16 formatted = FormatUrl(
741 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
742 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
743 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
744 NULL);
745 EXPECT_EQ(WideToUTF16(
746 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
747 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
748 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
749 formatted.substr(parsed.username.begin, parsed.username.len));
750 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
751 formatted.substr(parsed.password.begin, parsed.password.len));
752 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
753 formatted.substr(parsed.host.begin, parsed.host.len));
754 EXPECT_EQ(WideToUTF16(L"8080"),
755 formatted.substr(parsed.port.begin, parsed.port.len));
756 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
757 formatted.substr(parsed.path.begin, parsed.path.len));
758 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
759 formatted.substr(parsed.query.begin, parsed.query.len));
760 EXPECT_EQ(WideToUTF16(L"\x30B0"),
761 formatted.substr(parsed.ref.begin, parsed.ref.len));
763 // Unescape case.
764 formatted = FormatUrl(
765 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
766 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
767 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
768 NULL);
769 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
770 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
771 EXPECT_EQ(WideToUTF16(L"\x30B0"),
772 formatted.substr(parsed.username.begin, parsed.username.len));
773 EXPECT_EQ(WideToUTF16(L"\x30FC"),
774 formatted.substr(parsed.password.begin, parsed.password.len));
775 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
776 formatted.substr(parsed.host.begin, parsed.host.len));
777 EXPECT_EQ(WideToUTF16(L"8080"),
778 formatted.substr(parsed.port.begin, parsed.port.len));
779 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
780 formatted.substr(parsed.path.begin, parsed.path.len));
781 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
782 formatted.substr(parsed.query.begin, parsed.query.len));
783 EXPECT_EQ(WideToUTF16(L"\x30B0"),
784 formatted.substr(parsed.ref.begin, parsed.ref.len));
786 // Omit_username_password + unescape case.
787 formatted = FormatUrl(
788 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
789 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
790 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
791 NULL, NULL);
792 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
793 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
794 EXPECT_FALSE(parsed.username.is_valid());
795 EXPECT_FALSE(parsed.password.is_valid());
796 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
797 formatted.substr(parsed.host.begin, parsed.host.len));
798 EXPECT_EQ(WideToUTF16(L"8080"),
799 formatted.substr(parsed.port.begin, parsed.port.len));
800 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
801 formatted.substr(parsed.path.begin, parsed.path.len));
802 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
803 formatted.substr(parsed.query.begin, parsed.query.len));
804 EXPECT_EQ(WideToUTF16(L"\x30B0"),
805 formatted.substr(parsed.ref.begin, parsed.ref.len));
807 // View-source case.
808 formatted =
809 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
810 std::string(),
811 kFormatUrlOmitUsernamePassword,
812 UnescapeRule::NORMAL,
813 &parsed,
814 NULL,
815 NULL);
816 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
817 formatted);
818 EXPECT_EQ(WideToUTF16(L"view-source:http"),
819 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
820 EXPECT_FALSE(parsed.username.is_valid());
821 EXPECT_FALSE(parsed.password.is_valid());
822 EXPECT_EQ(WideToUTF16(L"host"),
823 formatted.substr(parsed.host.begin, parsed.host.len));
824 EXPECT_EQ(WideToUTF16(L"81"),
825 formatted.substr(parsed.port.begin, parsed.port.len));
826 EXPECT_EQ(WideToUTF16(L"/path"),
827 formatted.substr(parsed.path.begin, parsed.path.len));
828 EXPECT_EQ(WideToUTF16(L"query"),
829 formatted.substr(parsed.query.begin, parsed.query.len));
830 EXPECT_EQ(WideToUTF16(L"ref"),
831 formatted.substr(parsed.ref.begin, parsed.ref.len));
833 // omit http case.
834 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"),
835 std::string(),
836 kFormatUrlOmitHTTP,
837 UnescapeRule::NORMAL,
838 &parsed,
839 NULL,
840 NULL);
841 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
842 EXPECT_FALSE(parsed.scheme.is_valid());
843 EXPECT_FALSE(parsed.username.is_valid());
844 EXPECT_FALSE(parsed.password.is_valid());
845 EXPECT_EQ(WideToUTF16(L"host"),
846 formatted.substr(parsed.host.begin, parsed.host.len));
847 EXPECT_EQ(WideToUTF16(L"8000"),
848 formatted.substr(parsed.port.begin, parsed.port.len));
849 EXPECT_EQ(WideToUTF16(L"/a"),
850 formatted.substr(parsed.path.begin, parsed.path.len));
851 EXPECT_EQ(WideToUTF16(L"b=c"),
852 formatted.substr(parsed.query.begin, parsed.query.len));
853 EXPECT_EQ(WideToUTF16(L"d"),
854 formatted.substr(parsed.ref.begin, parsed.ref.len));
856 // omit http starts with ftp case.
857 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"),
858 std::string(),
859 kFormatUrlOmitHTTP,
860 UnescapeRule::NORMAL,
861 &parsed,
862 NULL,
863 NULL);
864 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
865 EXPECT_TRUE(parsed.scheme.is_valid());
866 EXPECT_FALSE(parsed.username.is_valid());
867 EXPECT_FALSE(parsed.password.is_valid());
868 EXPECT_EQ(WideToUTF16(L"http"),
869 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
870 EXPECT_EQ(WideToUTF16(L"ftp.host"),
871 formatted.substr(parsed.host.begin, parsed.host.len));
872 EXPECT_EQ(WideToUTF16(L"8000"),
873 formatted.substr(parsed.port.begin, parsed.port.len));
874 EXPECT_EQ(WideToUTF16(L"/a"),
875 formatted.substr(parsed.path.begin, parsed.path.len));
876 EXPECT_EQ(WideToUTF16(L"b=c"),
877 formatted.substr(parsed.query.begin, parsed.query.len));
878 EXPECT_EQ(WideToUTF16(L"d"),
879 formatted.substr(parsed.ref.begin, parsed.ref.len));
881 // omit http starts with 'f' case.
882 formatted = FormatUrl(GURL("http://f/"),
883 std::string(),
884 kFormatUrlOmitHTTP,
885 UnescapeRule::NORMAL,
886 &parsed,
887 NULL,
888 NULL);
889 EXPECT_EQ(WideToUTF16(L"f/"), formatted);
890 EXPECT_FALSE(parsed.scheme.is_valid());
891 EXPECT_FALSE(parsed.username.is_valid());
892 EXPECT_FALSE(parsed.password.is_valid());
893 EXPECT_FALSE(parsed.port.is_valid());
894 EXPECT_TRUE(parsed.path.is_valid());
895 EXPECT_FALSE(parsed.query.is_valid());
896 EXPECT_FALSE(parsed.ref.is_valid());
897 EXPECT_EQ(WideToUTF16(L"f"),
898 formatted.substr(parsed.host.begin, parsed.host.len));
899 EXPECT_EQ(WideToUTF16(L"/"),
900 formatted.substr(parsed.path.begin, parsed.path.len));
903 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
904 // results in the original GURL, for each ASCII character in the path.
905 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) {
906 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
907 GURL url(std::string("http://www.google.com/") +
908 static_cast<char>(test_char));
909 size_t prefix_len;
910 base::string16 formatted = FormatUrl(url,
911 std::string(),
912 kFormatUrlOmitUsernamePassword,
913 UnescapeRule::NORMAL,
914 NULL,
915 &prefix_len,
916 NULL);
917 EXPECT_EQ(url.spec(), GURL(formatted).spec());
921 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
922 // results in the original GURL, for each escaped ASCII character in the path.
923 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) {
924 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
925 std::string original_url("http://www.google.com/");
926 original_url.push_back('%');
927 original_url.append(base::HexEncode(&test_char, 1));
929 GURL url(original_url);
930 size_t prefix_len;
931 base::string16 formatted = FormatUrl(url,
932 std::string(),
933 kFormatUrlOmitUsernamePassword,
934 UnescapeRule::NORMAL,
935 NULL,
936 &prefix_len,
937 NULL);
938 EXPECT_EQ(url.spec(), GURL(formatted).spec());
942 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
943 // results in the original GURL, for each ASCII character in the query.
944 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) {
945 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
946 GURL url(std::string("http://www.google.com/?") +
947 static_cast<char>(test_char));
948 size_t prefix_len;
949 base::string16 formatted = FormatUrl(url,
950 std::string(),
951 kFormatUrlOmitUsernamePassword,
952 UnescapeRule::NORMAL,
953 NULL,
954 &prefix_len,
955 NULL);
956 EXPECT_EQ(url.spec(), GURL(formatted).spec());
960 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
961 // only results in a different GURL for certain characters.
962 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) {
963 // A full list of characters which FormatURL should unescape and GURL should
964 // not escape again, when they appear in a query string.
965 const char kUnescapedCharacters[] =
966 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
967 for (unsigned char test_char = 0; test_char < 128; ++test_char) {
968 std::string original_url("http://www.google.com/?");
969 original_url.push_back('%');
970 original_url.append(base::HexEncode(&test_char, 1));
972 GURL url(original_url);
973 size_t prefix_len;
974 base::string16 formatted = FormatUrl(url,
975 std::string(),
976 kFormatUrlOmitUsernamePassword,
977 UnescapeRule::NORMAL,
978 NULL,
979 &prefix_len,
980 NULL);
982 if (test_char &&
983 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
984 EXPECT_NE(url.spec(), GURL(formatted).spec());
985 } else {
986 EXPECT_EQ(url.spec(), GURL(formatted).spec());
991 TEST(NetUtilTest, FormatUrlWithOffsets) {
992 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
993 UnescapeRule::NORMAL, NULL);
995 const size_t basic_offsets[] = {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
997 21, 22, 23, 24, 25
999 CheckAdjustedOffsets("http://www.google.com/foo/", "en",
1000 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1001 basic_offsets);
1003 const size_t omit_auth_offsets_1[] = {
1004 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
1005 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1007 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
1008 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1009 omit_auth_offsets_1);
1011 const size_t omit_auth_offsets_2[] = {
1012 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
1013 15, 16, 17, 18, 19, 20, 21
1015 CheckAdjustedOffsets("http://foo@www.google.com/", "en",
1016 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1017 omit_auth_offsets_2);
1019 const size_t dont_omit_auth_offsets[] = {
1020 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1021 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1022 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1023 30, 31
1025 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
1026 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
1027 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1028 dont_omit_auth_offsets);
1030 const size_t view_source_offsets[] = {
1031 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
1032 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
1034 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
1035 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1036 view_source_offsets);
1038 const size_t idn_hostname_offsets_1[] = {
1039 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1040 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
1041 13, 14, 15, 16, 17, 18, 19
1043 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
1044 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
1045 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1046 idn_hostname_offsets_1);
1048 const size_t idn_hostname_offsets_2[] = {
1049 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
1050 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
1051 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1052 kNpos, 19, 20, 21, 22, 23, 24
1054 // Convert punycode to
1055 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
1056 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
1057 "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1058 idn_hostname_offsets_2);
1060 const size_t unescape_offsets[] = {
1061 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1062 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
1063 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
1064 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1065 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
1067 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
1068 CheckAdjustedOffsets(
1069 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
1070 "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets);
1072 const size_t ref_offsets[] = {
1073 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1074 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
1077 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
1078 CheckAdjustedOffsets(
1079 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
1080 kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets);
1082 const size_t omit_http_offsets[] = {
1083 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1084 10, 11, 12, 13, 14
1086 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
1087 UnescapeRule::NORMAL, omit_http_offsets);
1089 const size_t omit_http_start_with_ftp_offsets[] = {
1090 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1092 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1093 UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets);
1095 const size_t omit_all_offsets[] = {
1096 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
1097 0, 1, 2, 3, 4, 5, 6, 7
1099 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
1100 UnescapeRule::NORMAL, omit_all_offsets);
1103 } // namespace net