1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
11 #include "base/format_macros.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/time/time.h"
16 #include "testing/gtest/include/gtest/gtest.h"
19 using base::ASCIIToUTF16
;
20 using base::WideToUTF16
;
26 const size_t kNpos
= base::string16::npos
;
28 const char* const kLanguages
[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
37 const char* const input
;
38 const wchar_t* unicode_output
;
39 const bool unicode_allowed
[arraysize(kLanguages
)];
42 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases
[] = {
47 {"www.google.com", L
"www.google.com",
48 {true, true, true, true, true,
49 true, true, true, true, true,
50 true, true, true, true, true,
51 true, true, true, true, true,
53 {"www.google.com.", L
"www.google.com.",
54 {true, true, true, true, true,
55 true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
60 {true, true, true, true, true,
61 true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
66 {true, true, true, true, true,
67 true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
72 // Hanzi (Traditional Chinese)
73 {"xn--1lq90ic7f1rc.cn", L
"\x5317\x4eac\x5927\x5b78.cn",
74 {true, false, true, true, false,
75 false, false, false, false, false,
76 false, false, false, false, false,
77 false, false, true, true, false,
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
80 {"xn--cy2a840a.com", L
"\x89c6\x9891.com",
81 {true, false, true, false, false,
82 false, false, false, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
87 {"www.xn--123-p18d.com", L
"www.\x4e00" L
"123.com",
88 {true, false, true, true, false,
89 false, false, false, false, false,
90 false, false, false, false, false,
91 false, false, true, true, false,
93 // Hanzi + Latin : U+56FD is simplified and is regarded
94 // as not supported in zh-TW.
95 {"www.xn--hello-9n1hm04c.com", L
"www.hello\x4e2d\x56fd.com",
96 {false, false, true, true, false,
97 false, false, false, false, false,
98 false, false, false, false, false,
99 false, false, false, true, false,
101 // Kanji + Kana (Japanese)
102 {"xn--l8jvb1ey91xtjb.jp", L
"\x671d\x65e5\x3042\x3055\x3072.jp",
103 {true, false, false, true, false,
104 false, false, false, false, false,
105 false, false, false, false, false,
106 false, false, false, true, false,
108 // Katakana including U+30FC
109 {"xn--tckm4i2e.jp", L
"\x30b3\x30de\x30fc\x30b9.jp",
110 {true, false, false, true, false,
111 false, false, false, false, false,
112 false, false, false, false, false,
113 false, false, false, true, false,
115 {"xn--3ck7a7g.jp", L
"\u30ce\u30f3\u30bd.jp",
116 {true, false, false, true, false,
117 false, false, false, false, false,
118 false, false, false, false, false,
119 false, false, false, true, false,
121 // Katakana + Latin (Japanese)
122 // TODO(jungshik): Change 'false' in the first element to 'true'
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
124 // of our IsIDNComponentInSingleScript().
125 {"xn--e-efusa1mzf.jp", L
"e\x30b3\x30de\x30fc\x30b9.jp",
126 {false, false, false, true, false,
127 false, false, false, false, false,
128 false, false, false, false, false,
129 false, false, false, true, false,
131 {"xn--3bkxe.jp", L
"\x30c8\x309a.jp",
132 {false, false, false, true, false,
133 false, false, false, false, false,
134 false, false, false, false, false,
135 false, false, false, true, false,
138 {"www.xn--or3b17p6jjc.kr", L
"www.\xc804\xc790\xc815\xbd80.kr",
139 {true, false, false, false, true,
140 false, false, false, false, false,
141 false, false, false, false, false,
142 false, false, false, true, false,
144 // b<u-umlaut>cher (German)
145 {"xn--bcher-kva.de", L
"b\x00fc" L
"cher.de",
146 {true, false, false, false, false,
147 false, false, false, false, true,
148 true, false, false, false, false,
149 true, false, false, false, false,
152 {"www.xn--frgbolaget-q5a.se", L
"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L
"www.alliancefran\x00e7" L
"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L
"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L
"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
180 {"xn--achy-f6a.com", L
"\x0161" L
"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
189 {"xn--kxae4bafwg.gr", L
"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L
"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L
"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L
"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
218 {"xn--mgba1fmg.ar", L
"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
225 {"xn--4dbib.he", L
"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
232 {"xn--12c2cc4ag3b4ccu.th",
233 L
"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
240 {"www.xn--l1b6a9e1b7c.in", L
"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
247 {"xn--hello?world.com", NULL
,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L
"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L
"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L
"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L
"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L
"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
292 L
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L
"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
310 {"www.xn--google-ho0coa.com", L
"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
316 {"google.xn--comabc-k8d", L
"google.com\x0338" L
"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
322 {"google.xn--com-oh4ba.evil.jp", L
"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
328 {"google.xn--comevil-v04f.jp", L
"google.com\x30ce" L
"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
334 // Padlock icon spoof.
335 {"xn--google-hj64e", L
"\U0001f512google.com",
336 {false, false, false, false, false,
337 false, false, false, false, false,
338 false, false, false, false, false,
339 false, false, false, false, false,
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
342 // all strings with the surrogate '\xdd12'.
343 {"xn--fk9c.com", L
"\U00010912.com",
344 {true, false, false, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
350 // These two cases are special. We need a separate test.
351 // U+3000 and U+3002 are normalized to ASCII space and dot.
352 {"xn-- -kq6ay5z.cn", L
"\x4e2d\x56fd\x3000.cn",
353 {false, false, true, false, false,
354 false, false, false, false, false,
355 false, false, false, false, false,
356 false, false, true, false, false,
358 {"xn--fiqs8s.cn", L
"\x4e2d\x56fd\x3002" L
"cn",
359 {false, false, true, false, false,
360 false, false, false, false, false,
361 false, false, false, false, false,
362 false, false, true, false, false,
367 struct AdjustOffsetCase
{
369 size_t output_offset
;
373 const char* const description
;
374 const char* const input
;
375 const char* const languages
;
376 FormatUrlTypes format_types
;
377 UnescapeRule::Type escape_rules
;
378 const wchar_t* output
; // Use |wchar_t| to handle Unicode constants easily.
382 // A helper for IDN*{Fast,Slow}.
383 // Append "::<language list>" to |expected| and |actual| to make it
384 // easy to tell which sub-case fails without debugging.
385 void AppendLanguagesToOutputs(const char* languages
,
386 base::string16
* expected
,
387 base::string16
* actual
) {
388 base::string16 to_append
= ASCIIToUTF16("::") + ASCIIToUTF16(languages
);
389 expected
->append(to_append
);
390 actual
->append(to_append
);
393 // A pair of helpers for the FormatUrlWithOffsets() test.
394 void VerboseExpect(size_t expected
,
396 const std::string
& original_url
,
398 const base::string16
& formatted_url
) {
399 EXPECT_EQ(expected
, actual
) << "Original URL: " << original_url
400 << " (at char " << position
<< ")\nFormatted URL: " << formatted_url
;
403 void CheckAdjustedOffsets(const std::string
& url_string
,
404 const std::string
& languages
,
405 FormatUrlTypes format_types
,
406 UnescapeRule::Type unescape_rules
,
407 const size_t* output_offsets
) {
408 GURL
url(url_string
);
409 size_t url_length
= url_string
.length();
410 std::vector
<size_t> offsets
;
411 for (size_t i
= 0; i
<= url_length
+ 1; ++i
)
412 offsets
.push_back(i
);
413 offsets
.push_back(500000); // Something larger than any input length.
414 offsets
.push_back(std::string::npos
);
415 base::string16 formatted_url
= FormatUrlWithOffsets(url
, languages
,
416 format_types
, unescape_rules
, NULL
, NULL
, &offsets
);
417 for (size_t i
= 0; i
< url_length
; ++i
)
418 VerboseExpect(output_offsets
[i
], offsets
[i
], url_string
, i
, formatted_url
);
419 VerboseExpect(formatted_url
.length(), offsets
[url_length
], url_string
,
420 url_length
, formatted_url
);
421 VerboseExpect(base::string16::npos
, offsets
[url_length
+ 1], url_string
,
422 500000, formatted_url
);
423 VerboseExpect(base::string16::npos
, offsets
[url_length
+ 2], url_string
,
424 std::string::npos
, formatted_url
);
427 } // anonymous namespace
429 TEST(NetUtilTest
, IDNToUnicodeFast
) {
430 for (size_t i
= 0; i
< arraysize(idn_cases
); i
++) {
431 for (size_t j
= 0; j
< arraysize(kLanguages
); j
++) {
432 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
433 if (j
== 3 || j
== 17 || j
== 18)
435 base::string16
output(IDNToUnicode(idn_cases
[i
].input
, kLanguages
[j
]));
436 base::string16
expected(idn_cases
[i
].unicode_allowed
[j
] ?
437 WideToUTF16(idn_cases
[i
].unicode_output
) :
438 ASCIIToUTF16(idn_cases
[i
].input
));
439 AppendLanguagesToOutputs(kLanguages
[j
], &expected
, &output
);
440 EXPECT_EQ(expected
, output
) << "input: \"" << idn_cases
[i
].input
441 << "\", languages: \"" << kLanguages
[j
]
447 TEST(NetUtilTest
, IDNToUnicodeSlow
) {
448 for (size_t i
= 0; i
< arraysize(idn_cases
); i
++) {
449 for (size_t j
= 0; j
< arraysize(kLanguages
); j
++) {
450 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
451 if (!(j
== 3 || j
== 17 || j
== 18))
453 base::string16
output(IDNToUnicode(idn_cases
[i
].input
, kLanguages
[j
]));
454 base::string16
expected(idn_cases
[i
].unicode_allowed
[j
] ?
455 WideToUTF16(idn_cases
[i
].unicode_output
) :
456 ASCIIToUTF16(idn_cases
[i
].input
));
457 AppendLanguagesToOutputs(kLanguages
[j
], &expected
, &output
);
458 EXPECT_EQ(expected
, output
) << "input: \"" << idn_cases
[i
].input
459 << "\", languages: \"" << kLanguages
[j
]
465 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
466 // te), which was causing a crash (See http://crbug.com/510551). This may be an
467 // icu bug, but regardless, that should not cause a crash.
468 TEST(NetUtilTest
, IDNToUnicodeNeverCrashes
) {
469 for (char c1
= 'a'; c1
<= 'z'; c1
++) {
470 for (char c2
= 'a'; c2
<= 'z'; c2
++) {
471 std::string lang
= base::StringPrintf("%c%c", c1
, c2
);
472 base::string16
output(IDNToUnicode("xn--74h", lang
));
477 TEST(NetUtilTest
, StripWWW
) {
478 EXPECT_EQ(base::string16(), StripWWW(base::string16()));
479 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
480 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
481 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
484 // This is currently a windows specific function.
488 struct GetDirectoryListingEntryCase
{
490 const char* const raw_bytes
;
494 const char* const expected
;
499 TEST(NetUtilTest
, GetDirectoryListingEntry
) {
500 const GetDirectoryListingEntryCase test_cases
[] = {
506 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
512 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
519 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
521 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
522 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
523 {L
"\xD55C\xAE00.txt",
528 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\","
529 "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"},
530 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
531 // a local or remote file in EUC-KR.
532 {L
"\xD55C\xAE00.txt",
533 "\xC7\xD1\xB1\xDB.txt",
537 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\""
538 ",0,\"9.8 kB\",\"\");</script>\n"},
541 for (size_t i
= 0; i
< arraysize(test_cases
); ++i
) {
542 const std::string results
= GetDirectoryListingEntry(
543 WideToUTF16(test_cases
[i
].name
),
544 test_cases
[i
].raw_bytes
,
545 test_cases
[i
].is_dir
,
546 test_cases
[i
].filesize
,
548 EXPECT_EQ(test_cases
[i
].expected
, results
);
554 TEST(NetUtilTest
, FormatUrl
) {
555 FormatUrlTypes default_format_type
= kFormatUrlOmitUsernamePassword
;
556 const UrlTestData tests
[] = {
557 {"Empty URL", "", "", default_format_type
, UnescapeRule::NORMAL
, L
"", 0},
560 "http://www.google.com/", "", default_format_type
, UnescapeRule::NORMAL
,
561 L
"http://www.google.com/", 7},
563 {"With a port number and a reference",
564 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type
,
565 UnescapeRule::NORMAL
,
566 L
"http://www.google.com:8080/#\x30B0", 7},
568 // -------- IDN tests --------
569 {"Japanese IDN with ja",
570 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type
,
571 UnescapeRule::NORMAL
, L
"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
573 {"Japanese IDN with en",
574 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type
,
575 UnescapeRule::NORMAL
, L
"http://xn--l8jvb1ey91xtjb.jp/", 7},
577 {"Japanese IDN without any languages",
578 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type
,
579 UnescapeRule::NORMAL
,
580 // Single script is safe for empty languages.
581 L
"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
583 {"mailto: with Japanese IDN",
584 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type
,
585 UnescapeRule::NORMAL
,
586 // GURL doesn't assume an email address's domain part as a host name.
587 L
"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
589 {"file: with Japanese IDN",
590 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type
,
591 UnescapeRule::NORMAL
,
592 L
"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
594 {"ftp: with Japanese IDN",
595 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type
,
596 UnescapeRule::NORMAL
,
597 L
"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
599 // -------- omit_username_password flag tests --------
600 {"With username and password, omit_username_password=false",
601 "http://user:passwd@example.com/foo", "",
602 kFormatUrlOmitNothing
, UnescapeRule::NORMAL
,
603 L
"http://user:passwd@example.com/foo", 19},
605 {"With username and password, omit_username_password=true",
606 "http://user:passwd@example.com/foo", "", default_format_type
,
607 UnescapeRule::NORMAL
, L
"http://example.com/foo", 7},
609 {"With username and no password",
610 "http://user@example.com/foo", "", default_format_type
,
611 UnescapeRule::NORMAL
, L
"http://example.com/foo", 7},
613 {"Just '@' without username and password",
614 "http://@example.com/foo", "", default_format_type
, UnescapeRule::NORMAL
,
615 L
"http://example.com/foo", 7},
617 // GURL doesn't think local-part of an email address is username for URL.
618 {"mailto:, omit_username_password=true",
619 "mailto:foo@example.com", "", default_format_type
, UnescapeRule::NORMAL
,
620 L
"mailto:foo@example.com", 7},
622 // -------- unescape flag tests --------
624 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
625 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
626 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type
,
628 // GURL parses %-encoded hostnames into Punycode.
629 L
"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
630 L
"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
632 {"Unescape normally",
633 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
634 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
635 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type
,
636 UnescapeRule::NORMAL
,
637 L
"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
638 L
"?q=\x30B0\x30FC\x30B0\x30EB", 7},
640 {"Unescape normally with BiDi control character",
641 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type
,
642 UnescapeRule::NORMAL
, L
"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
644 {"Unescape normally including unescape spaces",
645 "http://www.google.com/search?q=Hello%20World", "en", default_format_type
,
646 UnescapeRule::SPACES
, L
"http://www.google.com/search?q=Hello World", 7},
649 {"unescape=true with some special characters",
650 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
651 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
652 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
654 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
656 // -------- omit http: --------
657 {"omit http with user name",
658 "http://user@example.com/foo", "", kFormatUrlOmitAll
,
659 UnescapeRule::NORMAL
, L
"example.com/foo", 0},
662 "http://www.google.com/", "en", kFormatUrlOmitHTTP
,
663 UnescapeRule::NORMAL
, L
"www.google.com/",
666 {"omit http with https",
667 "https://www.google.com/", "en", kFormatUrlOmitHTTP
,
668 UnescapeRule::NORMAL
, L
"https://www.google.com/",
671 {"omit http starts with ftp.",
672 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP
,
673 UnescapeRule::NORMAL
, L
"http://ftp.google.com/",
676 // -------- omit trailing slash on bare hostname --------
677 {"omit slash when it's the entire path",
678 "http://www.google.com/", "en",
679 kFormatUrlOmitTrailingSlashOnBareHostname
, UnescapeRule::NORMAL
,
680 L
"http://www.google.com", 7},
681 {"omit slash when there's a ref",
682 "http://www.google.com/#ref", "en",
683 kFormatUrlOmitTrailingSlashOnBareHostname
, UnescapeRule::NORMAL
,
684 L
"http://www.google.com/#ref", 7},
685 {"omit slash when there's a query",
686 "http://www.google.com/?", "en",
687 kFormatUrlOmitTrailingSlashOnBareHostname
, UnescapeRule::NORMAL
,
688 L
"http://www.google.com/?", 7},
689 {"omit slash when it's not the entire path",
690 "http://www.google.com/foo", "en",
691 kFormatUrlOmitTrailingSlashOnBareHostname
, UnescapeRule::NORMAL
,
692 L
"http://www.google.com/foo", 7},
693 {"omit slash for nonstandard URLs",
694 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname
,
695 UnescapeRule::NORMAL
, L
"data:/", 5},
696 {"omit slash for file URLs",
697 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname
,
698 UnescapeRule::NORMAL
, L
"file:///", 7},
700 // -------- view-source: --------
702 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type
,
703 UnescapeRule::NORMAL
, L
"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
706 {"view-source of view-source",
707 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
708 default_format_type
, UnescapeRule::NORMAL
,
709 L
"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
711 // view-source should omit http and trailing slash where non-view-source
713 {"view-source omit http",
714 "view-source:http://a.b/c", "en", kFormatUrlOmitAll
,
715 UnescapeRule::NORMAL
, L
"view-source:a.b/c",
717 {"view-source omit http starts with ftp.",
718 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll
,
719 UnescapeRule::NORMAL
, L
"view-source:http://ftp.b/c",
721 {"view-source omit slash when it's the entire path",
722 "view-source:http://a.b/", "en", kFormatUrlOmitAll
,
723 UnescapeRule::NORMAL
, L
"view-source:a.b",
727 for (size_t i
= 0; i
< arraysize(tests
); ++i
) {
729 base::string16 formatted
= FormatUrl(
730 GURL(tests
[i
].input
), tests
[i
].languages
, tests
[i
].format_types
,
731 tests
[i
].escape_rules
, NULL
, &prefix_len
, NULL
);
732 EXPECT_EQ(WideToUTF16(tests
[i
].output
), formatted
) << tests
[i
].description
;
733 EXPECT_EQ(tests
[i
].prefix_len
, prefix_len
) << tests
[i
].description
;
737 TEST(NetUtilTest
, FormatUrlParsed
) {
740 base::string16 formatted
= FormatUrl(
741 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
742 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
743 "ja", kFormatUrlOmitNothing
, UnescapeRule::NONE
, &parsed
, NULL
,
745 EXPECT_EQ(WideToUTF16(
746 L
"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
747 L
"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted
);
748 EXPECT_EQ(WideToUTF16(L
"%E3%82%B0"),
749 formatted
.substr(parsed
.username
.begin
, parsed
.username
.len
));
750 EXPECT_EQ(WideToUTF16(L
"%E3%83%BC"),
751 formatted
.substr(parsed
.password
.begin
, parsed
.password
.len
));
752 EXPECT_EQ(WideToUTF16(L
"\x30B0\x30FC\x30B0\x30EB.jp"),
753 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
754 EXPECT_EQ(WideToUTF16(L
"8080"),
755 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
756 EXPECT_EQ(WideToUTF16(L
"/%E3%82%B0/"),
757 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
758 EXPECT_EQ(WideToUTF16(L
"q=%E3%82%B0"),
759 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
760 EXPECT_EQ(WideToUTF16(L
"\x30B0"),
761 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
764 formatted
= FormatUrl(
765 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
766 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
767 "ja", kFormatUrlOmitNothing
, UnescapeRule::NORMAL
, &parsed
, NULL
,
769 EXPECT_EQ(WideToUTF16(L
"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
770 L
"/\x30B0/?q=\x30B0#\x30B0"), formatted
);
771 EXPECT_EQ(WideToUTF16(L
"\x30B0"),
772 formatted
.substr(parsed
.username
.begin
, parsed
.username
.len
));
773 EXPECT_EQ(WideToUTF16(L
"\x30FC"),
774 formatted
.substr(parsed
.password
.begin
, parsed
.password
.len
));
775 EXPECT_EQ(WideToUTF16(L
"\x30B0\x30FC\x30B0\x30EB.jp"),
776 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
777 EXPECT_EQ(WideToUTF16(L
"8080"),
778 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
779 EXPECT_EQ(WideToUTF16(L
"/\x30B0/"),
780 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
781 EXPECT_EQ(WideToUTF16(L
"q=\x30B0"),
782 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
783 EXPECT_EQ(WideToUTF16(L
"\x30B0"),
784 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
786 // Omit_username_password + unescape case.
787 formatted
= FormatUrl(
788 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
789 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
790 "ja", kFormatUrlOmitUsernamePassword
, UnescapeRule::NORMAL
, &parsed
,
792 EXPECT_EQ(WideToUTF16(L
"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
793 L
"/\x30B0/?q=\x30B0#\x30B0"), formatted
);
794 EXPECT_FALSE(parsed
.username
.is_valid());
795 EXPECT_FALSE(parsed
.password
.is_valid());
796 EXPECT_EQ(WideToUTF16(L
"\x30B0\x30FC\x30B0\x30EB.jp"),
797 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
798 EXPECT_EQ(WideToUTF16(L
"8080"),
799 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
800 EXPECT_EQ(WideToUTF16(L
"/\x30B0/"),
801 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
802 EXPECT_EQ(WideToUTF16(L
"q=\x30B0"),
803 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
804 EXPECT_EQ(WideToUTF16(L
"\x30B0"),
805 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
809 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
811 kFormatUrlOmitUsernamePassword
,
812 UnescapeRule::NORMAL
,
816 EXPECT_EQ(WideToUTF16(L
"view-source:http://host:81/path?query#ref"),
818 EXPECT_EQ(WideToUTF16(L
"view-source:http"),
819 formatted
.substr(parsed
.scheme
.begin
, parsed
.scheme
.len
));
820 EXPECT_FALSE(parsed
.username
.is_valid());
821 EXPECT_FALSE(parsed
.password
.is_valid());
822 EXPECT_EQ(WideToUTF16(L
"host"),
823 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
824 EXPECT_EQ(WideToUTF16(L
"81"),
825 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
826 EXPECT_EQ(WideToUTF16(L
"/path"),
827 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
828 EXPECT_EQ(WideToUTF16(L
"query"),
829 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
830 EXPECT_EQ(WideToUTF16(L
"ref"),
831 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
834 formatted
= FormatUrl(GURL("http://host:8000/a?b=c#d"),
837 UnescapeRule::NORMAL
,
841 EXPECT_EQ(WideToUTF16(L
"host:8000/a?b=c#d"), formatted
);
842 EXPECT_FALSE(parsed
.scheme
.is_valid());
843 EXPECT_FALSE(parsed
.username
.is_valid());
844 EXPECT_FALSE(parsed
.password
.is_valid());
845 EXPECT_EQ(WideToUTF16(L
"host"),
846 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
847 EXPECT_EQ(WideToUTF16(L
"8000"),
848 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
849 EXPECT_EQ(WideToUTF16(L
"/a"),
850 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
851 EXPECT_EQ(WideToUTF16(L
"b=c"),
852 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
853 EXPECT_EQ(WideToUTF16(L
"d"),
854 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
856 // omit http starts with ftp case.
857 formatted
= FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"),
860 UnescapeRule::NORMAL
,
864 EXPECT_EQ(WideToUTF16(L
"http://ftp.host:8000/a?b=c#d"), formatted
);
865 EXPECT_TRUE(parsed
.scheme
.is_valid());
866 EXPECT_FALSE(parsed
.username
.is_valid());
867 EXPECT_FALSE(parsed
.password
.is_valid());
868 EXPECT_EQ(WideToUTF16(L
"http"),
869 formatted
.substr(parsed
.scheme
.begin
, parsed
.scheme
.len
));
870 EXPECT_EQ(WideToUTF16(L
"ftp.host"),
871 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
872 EXPECT_EQ(WideToUTF16(L
"8000"),
873 formatted
.substr(parsed
.port
.begin
, parsed
.port
.len
));
874 EXPECT_EQ(WideToUTF16(L
"/a"),
875 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
876 EXPECT_EQ(WideToUTF16(L
"b=c"),
877 formatted
.substr(parsed
.query
.begin
, parsed
.query
.len
));
878 EXPECT_EQ(WideToUTF16(L
"d"),
879 formatted
.substr(parsed
.ref
.begin
, parsed
.ref
.len
));
881 // omit http starts with 'f' case.
882 formatted
= FormatUrl(GURL("http://f/"),
885 UnescapeRule::NORMAL
,
889 EXPECT_EQ(WideToUTF16(L
"f/"), formatted
);
890 EXPECT_FALSE(parsed
.scheme
.is_valid());
891 EXPECT_FALSE(parsed
.username
.is_valid());
892 EXPECT_FALSE(parsed
.password
.is_valid());
893 EXPECT_FALSE(parsed
.port
.is_valid());
894 EXPECT_TRUE(parsed
.path
.is_valid());
895 EXPECT_FALSE(parsed
.query
.is_valid());
896 EXPECT_FALSE(parsed
.ref
.is_valid());
897 EXPECT_EQ(WideToUTF16(L
"f"),
898 formatted
.substr(parsed
.host
.begin
, parsed
.host
.len
));
899 EXPECT_EQ(WideToUTF16(L
"/"),
900 formatted
.substr(parsed
.path
.begin
, parsed
.path
.len
));
903 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
904 // results in the original GURL, for each ASCII character in the path.
905 TEST(NetUtilTest
, FormatUrlRoundTripPathASCII
) {
906 for (unsigned char test_char
= 32; test_char
< 128; ++test_char
) {
907 GURL
url(std::string("http://www.google.com/") +
908 static_cast<char>(test_char
));
910 base::string16 formatted
= FormatUrl(url
,
912 kFormatUrlOmitUsernamePassword
,
913 UnescapeRule::NORMAL
,
917 EXPECT_EQ(url
.spec(), GURL(formatted
).spec());
921 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
922 // results in the original GURL, for each escaped ASCII character in the path.
923 TEST(NetUtilTest
, FormatUrlRoundTripPathEscaped
) {
924 for (unsigned char test_char
= 32; test_char
< 128; ++test_char
) {
925 std::string
original_url("http://www.google.com/");
926 original_url
.push_back('%');
927 original_url
.append(base::HexEncode(&test_char
, 1));
929 GURL
url(original_url
);
931 base::string16 formatted
= FormatUrl(url
,
933 kFormatUrlOmitUsernamePassword
,
934 UnescapeRule::NORMAL
,
938 EXPECT_EQ(url
.spec(), GURL(formatted
).spec());
942 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
943 // results in the original GURL, for each ASCII character in the query.
944 TEST(NetUtilTest
, FormatUrlRoundTripQueryASCII
) {
945 for (unsigned char test_char
= 32; test_char
< 128; ++test_char
) {
946 GURL
url(std::string("http://www.google.com/?") +
947 static_cast<char>(test_char
));
949 base::string16 formatted
= FormatUrl(url
,
951 kFormatUrlOmitUsernamePassword
,
952 UnescapeRule::NORMAL
,
956 EXPECT_EQ(url
.spec(), GURL(formatted
).spec());
960 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
961 // only results in a different GURL for certain characters.
962 TEST(NetUtilTest
, FormatUrlRoundTripQueryEscaped
) {
963 // A full list of characters which FormatURL should unescape and GURL should
964 // not escape again, when they appear in a query string.
965 const char kUnescapedCharacters
[] =
966 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
967 for (unsigned char test_char
= 0; test_char
< 128; ++test_char
) {
968 std::string
original_url("http://www.google.com/?");
969 original_url
.push_back('%');
970 original_url
.append(base::HexEncode(&test_char
, 1));
972 GURL
url(original_url
);
974 base::string16 formatted
= FormatUrl(url
,
976 kFormatUrlOmitUsernamePassword
,
977 UnescapeRule::NORMAL
,
983 strchr(kUnescapedCharacters
, static_cast<char>(test_char
))) {
984 EXPECT_NE(url
.spec(), GURL(formatted
).spec());
986 EXPECT_EQ(url
.spec(), GURL(formatted
).spec());
991 TEST(NetUtilTest
, FormatUrlWithOffsets
) {
992 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing
,
993 UnescapeRule::NORMAL
, NULL
);
995 const size_t basic_offsets
[] = {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
999 CheckAdjustedOffsets("http://www.google.com/foo/", "en",
1000 kFormatUrlOmitNothing
, UnescapeRule::NORMAL
,
1003 const size_t omit_auth_offsets_1
[] = {
1004 0, 1, 2, 3, 4, 5, 6, 7, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 7,
1005 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1007 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
1008 kFormatUrlOmitUsernamePassword
, UnescapeRule::NORMAL
,
1009 omit_auth_offsets_1
);
1011 const size_t omit_auth_offsets_2
[] = {
1012 0, 1, 2, 3, 4, 5, 6, 7, kNpos
, kNpos
, kNpos
, 7, 8, 9, 10, 11, 12, 13, 14,
1013 15, 16, 17, 18, 19, 20, 21
1015 CheckAdjustedOffsets("http://foo@www.google.com/", "en",
1016 kFormatUrlOmitUsernamePassword
, UnescapeRule::NORMAL
,
1017 omit_auth_offsets_2
);
1019 const size_t dont_omit_auth_offsets
[] = {
1020 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1021 kNpos
, kNpos
, 11, 12, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1022 kNpos
, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1025 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
1026 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
1027 kFormatUrlOmitNothing
, UnescapeRule::NORMAL
,
1028 dont_omit_auth_offsets
);
1030 const size_t view_source_offsets
[] = {
1031 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos
,
1032 kNpos
, kNpos
, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
1034 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
1035 kFormatUrlOmitUsernamePassword
, UnescapeRule::NORMAL
,
1036 view_source_offsets
);
1038 const size_t idn_hostname_offsets_1
[] = {
1039 0, 1, 2, 3, 4, 5, 6, 7, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1040 kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 12,
1041 13, 14, 15, 16, 17, 18, 19
1043 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
1044 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
1045 kFormatUrlOmitNothing
, UnescapeRule::NORMAL
,
1046 idn_hostname_offsets_1
);
1048 const size_t idn_hostname_offsets_2
[] = {
1049 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1050 kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 14, 15, kNpos
, kNpos
, kNpos
,
1051 kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1052 kNpos
, 19, 20, 21, 22, 23, 24
1054 // Convert punycode to
1055 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
1056 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
1057 "zh-CN", kFormatUrlOmitNothing
, UnescapeRule::NORMAL
,
1058 idn_hostname_offsets_2
);
1060 const size_t unescape_offsets
[] = {
1061 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1062 21, 22, 23, 24, 25, kNpos
, kNpos
, 26, 27, 28, 29, 30, kNpos
, kNpos
, kNpos
,
1063 kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 31, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1064 kNpos
, kNpos
, kNpos
, 32, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
,
1065 kNpos
, 33, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
1067 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
1068 CheckAdjustedOffsets(
1069 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
1070 "en", kFormatUrlOmitNothing
, UnescapeRule::SPACES
, unescape_offsets
);
1072 const size_t ref_offsets
[] = {
1073 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1074 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos
, kNpos
, 32, kNpos
, kNpos
,
1077 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
1078 CheckAdjustedOffsets(
1079 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
1080 kFormatUrlOmitNothing
, UnescapeRule::NORMAL
, ref_offsets
);
1082 const size_t omit_http_offsets
[] = {
1083 0, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1086 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP
,
1087 UnescapeRule::NORMAL
, omit_http_offsets
);
1089 const size_t omit_http_start_with_ftp_offsets
[] = {
1090 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1092 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP
,
1093 UnescapeRule::NORMAL
, omit_http_start_with_ftp_offsets
);
1095 const size_t omit_all_offsets
[] = {
1096 0, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, kNpos
, 0, kNpos
, kNpos
, kNpos
, kNpos
,
1097 0, 1, 2, 3, 4, 5, 6, 7
1099 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll
,
1100 UnescapeRule::NORMAL
, omit_all_offsets
);