Update V8 to version 4.7.21.
[chromium-blink-merge.git] / components / url_formatter / url_formatter_unittest.cc
blob0dd635a9488c9dc89b43e574e11ccd548d1b30e6
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/url_formatter/url_formatter.h"
7 #include <string.h>
9 #include <vector>
11 #include "base/macros.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "url/gurl.h"
19 namespace url_formatter {
21 namespace {
23 using base::WideToUTF16;
24 using base::ASCIIToUTF16;
26 const size_t kNpos = base::string16::npos;
28 const char* const kLanguages[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
33 "zh,ru,en"
36 struct IDNTestCase {
37 const char* const input;
38 const wchar_t* unicode_output;
39 const bool unicode_allowed[arraysize(kLanguages)];
42 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases[] = {
46 // No IDN
47 {"www.google.com", L"www.google.com",
48 {true, true, true, true, true,
49 true, true, true, true, true,
50 true, true, true, true, true,
51 true, true, true, true, true,
52 true}},
53 {"www.google.com.", L"www.google.com.",
54 {true, true, true, true, true,
55 true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
58 true}},
59 {".", L".",
60 {true, true, true, true, true,
61 true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
64 true}},
65 {"", L"",
66 {true, true, true, true, true,
67 true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
70 true}},
71 // IDN
72 // Hanzi (Traditional Chinese)
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
74 {true, false, true, true, false,
75 false, false, false, false, false,
76 false, false, false, false, false,
77 false, false, true, true, false,
78 true}},
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
81 {true, false, true, false, false,
82 false, false, false, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
85 true}},
86 // Hanzi + '123'
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
88 {true, false, true, true, false,
89 false, false, false, false, false,
90 false, false, false, false, false,
91 false, false, true, true, false,
92 true}},
93 // Hanzi + Latin : U+56FD is simplified and is regarded
94 // as not supported in zh-TW.
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
96 {false, false, true, true, false,
97 false, false, false, false, false,
98 false, false, false, false, false,
99 false, false, false, true, false,
100 true}},
101 // Kanji + Kana (Japanese)
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
103 {true, false, false, true, false,
104 false, false, false, false, false,
105 false, false, false, false, false,
106 false, false, false, true, false,
107 false}},
108 // Katakana including U+30FC
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
110 {true, false, false, true, false,
111 false, false, false, false, false,
112 false, false, false, false, false,
113 false, false, false, true, false,
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
116 {true, false, false, true, false,
117 false, false, false, false, false,
118 false, false, false, false, false,
119 false, false, false, true, false,
121 // Katakana + Latin (Japanese)
122 // TODO(jungshik): Change 'false' in the first element to 'true'
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
124 // of our IsIDNComponentInSingleScript().
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
126 {false, false, false, true, false,
127 false, false, false, false, false,
128 false, false, false, false, false,
129 false, false, false, true, false,
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
132 {false, false, false, true, false,
133 false, false, false, false, false,
134 false, false, false, false, false,
135 false, false, false, true, false,
137 // Hangul (Korean)
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
139 {true, false, false, false, true,
140 false, false, false, false, false,
141 false, false, false, false, false,
142 false, false, false, true, false,
143 false}},
144 // b<u-umlaut>cher (German)
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
146 {true, false, false, false, false,
147 false, false, false, false, true,
148 true, false, false, false, false,
149 true, false, false, false, false,
150 false}},
151 // a with diaeresis
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
157 false}},
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
164 false}},
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
171 false}},
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
178 false}},
179 // s with caron
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false}},
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
188 // Eutopia (Greek)
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
194 false}},
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
202 false}},
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
209 true}},
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
216 true}},
217 // Arabic
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
223 false}},
224 // Hebrew
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
230 false}},
231 // Thai
232 {"xn--12c2cc4ag3b4ccu.th",
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
238 false}},
239 // Devangari (Hindi)
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
245 false}},
246 // Invalid IDN
247 {"xn--hello?world.com", NULL,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
252 false}},
253 // Unsafe IDNs
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false}},
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false}},
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false}},
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false}},
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
291 "aaaaaaa",
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
293 L"aaaaaaaa",
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
298 true}},
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false}},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
334 // Padlock icon spoof.
335 {"xn--google-hj64e", L"\U0001f512google.com",
336 {false, false, false, false, false,
337 false, false, false, false, false,
338 false, false, false, false, false,
339 false, false, false, false, false,
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
342 // all strings with the surrogate '\xdd12'.
343 {"xn--fk9c.com", L"\U00010912.com",
344 {true, false, false, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
349 #if 0
350 // These two cases are special. We need a separate test.
351 // U+3000 and U+3002 are normalized to ASCII space and dot.
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
353 {false, false, true, false, false,
354 false, false, false, false, false,
355 false, false, false, false, false,
356 false, false, true, false, false,
357 true}},
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
359 {false, false, true, false, false,
360 false, false, false, false, false,
361 false, false, false, false, false,
362 false, false, true, false, false,
363 true}},
364 #endif
367 struct AdjustOffsetCase {
368 size_t input_offset;
369 size_t output_offset;
372 struct UrlTestData {
373 const char* const description;
374 const char* const input;
375 const char* const languages;
376 FormatUrlTypes format_types;
377 net::UnescapeRule::Type escape_rules;
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
379 size_t prefix_len;
382 // A helper for IDN*{Fast,Slow}.
383 // Append "::<language list>" to |expected| and |actual| to make it
384 // easy to tell which sub-case fails without debugging.
385 void AppendLanguagesToOutputs(const char* languages,
386 base::string16* expected,
387 base::string16* actual) {
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
389 expected->append(to_append);
390 actual->append(to_append);
393 // A pair of helpers for the FormatUrlWithOffsets() test.
394 void VerboseExpect(size_t expected,
395 size_t actual,
396 const std::string& original_url,
397 size_t position,
398 const base::string16& formatted_url) {
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
403 void CheckAdjustedOffsets(const std::string& url_string,
404 const std::string& languages,
405 FormatUrlTypes format_types,
406 net::UnescapeRule::Type unescape_rules,
407 const size_t* output_offsets) {
408 GURL url(url_string);
409 size_t url_length = url_string.length();
410 std::vector<size_t> offsets;
411 for (size_t i = 0; i <= url_length + 1; ++i)
412 offsets.push_back(i);
413 offsets.push_back(500000); // Something larger than any input length.
414 offsets.push_back(std::string::npos);
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
416 format_types, unescape_rules, NULL, NULL, &offsets);
417 for (size_t i = 0; i < url_length; ++i)
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
420 url_length, formatted_url);
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
422 500000, formatted_url);
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
424 std::string::npos, formatted_url);
427 TEST(UrlFormatterTest, IDNToUnicodeFast) {
428 for (size_t i = 0; i < arraysize(idn_cases); i++) {
429 for (size_t j = 0; j < arraysize(kLanguages); j++) {
430 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
431 if (j == 3 || j == 17 || j == 18)
432 continue;
433 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
434 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
435 WideToUTF16(idn_cases[i].unicode_output) :
436 ASCIIToUTF16(idn_cases[i].input));
437 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
438 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
439 << "\", languages: \"" << kLanguages[j]
440 << "\"";
445 TEST(UrlFormatterTest, IDNToUnicodeSlow) {
446 for (size_t i = 0; i < arraysize(idn_cases); i++) {
447 for (size_t j = 0; j < arraysize(kLanguages); j++) {
448 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
449 if (!(j == 3 || j == 17 || j == 18))
450 continue;
451 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
452 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
453 WideToUTF16(idn_cases[i].unicode_output) :
454 ASCIIToUTF16(idn_cases[i].input));
455 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
456 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
457 << "\", languages: \"" << kLanguages[j]
458 << "\"";
463 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
464 // te), which was causing a crash (See http://crbug.com/510551). This may be an
465 // icu bug, but regardless, that should not cause a crash.
466 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
467 for (char c1 = 'a'; c1 <= 'z'; c1++) {
468 for (char c2 = 'a'; c2 <= 'z'; c2++) {
469 std::string lang = base::StringPrintf("%c%c", c1, c2);
470 base::string16 output(IDNToUnicode("xn--74h", lang));
475 TEST(UrlFormatterTest, FormatUrl) {
476 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
477 const UrlTestData tests[] = {
478 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"",
481 {"Simple URL", "http://www.google.com/", "", default_format_type,
482 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
484 {"With a port number and a reference",
485 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
486 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
488 // -------- IDN tests --------
489 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja",
490 default_format_type, net::UnescapeRule::NORMAL,
491 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
493 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en",
494 default_format_type, net::UnescapeRule::NORMAL,
495 L"http://xn--l8jvb1ey91xtjb.jp/", 7},
497 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
498 default_format_type, net::UnescapeRule::NORMAL,
499 // Single script is safe for empty languages.
500 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
502 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
503 default_format_type, net::UnescapeRule::NORMAL,
504 // GURL doesn't assume an email address's domain part as a host name.
505 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
507 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
508 "ja", default_format_type, net::UnescapeRule::NORMAL,
509 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
511 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja",
512 default_format_type, net::UnescapeRule::NORMAL,
513 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
515 // -------- omit_username_password flag tests --------
516 {"With username and password, omit_username_password=false",
517 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing,
518 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
520 {"With username and password, omit_username_password=true",
521 "http://user:passwd@example.com/foo", "", default_format_type,
522 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
524 {"With username and no password", "http://user@example.com/foo", "",
525 default_format_type, net::UnescapeRule::NORMAL,
526 L"http://example.com/foo", 7},
528 {"Just '@' without username and password", "http://@example.com/foo", "",
529 default_format_type, net::UnescapeRule::NORMAL,
530 L"http://example.com/foo", 7},
532 // GURL doesn't think local-part of an email address is username for URL.
533 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "",
534 default_format_type, net::UnescapeRule::NORMAL,
535 L"mailto:foo@example.com", 7},
537 // -------- unescape flag tests --------
538 {"Do not unescape",
539 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
540 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
541 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
542 "en", default_format_type, net::UnescapeRule::NONE,
543 // GURL parses %-encoded hostnames into Punycode.
544 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
545 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
548 {"Unescape normally",
549 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
550 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
551 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
552 "en", default_format_type, net::UnescapeRule::NORMAL,
553 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
554 L"?q=\x30B0\x30FC\x30B0\x30EB",
557 {"Unescape normally with BiDi control character",
558 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en",
559 default_format_type, net::UnescapeRule::NORMAL,
560 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
562 {"Unescape normally including unescape spaces",
563 "http://www.google.com/search?q=Hello%20World", "en",
564 default_format_type, net::UnescapeRule::SPACES,
565 L"http://www.google.com/search?q=Hello World", 7},
568 {"unescape=true with some special characters",
569 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
570 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
571 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
573 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
575 // -------- omit http: --------
576 {"omit http with user name", "http://user@example.com/foo", "",
577 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
579 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP,
580 net::UnescapeRule::NORMAL, L"www.google.com/", 0},
582 {"omit http with https", "https://www.google.com/", "en",
583 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
584 L"https://www.google.com/", 8},
586 {"omit http starts with ftp.", "http://ftp.google.com/", "en",
587 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
590 // -------- omit trailing slash on bare hostname --------
591 {"omit slash when it's the entire path", "http://www.google.com/", "en",
592 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
593 L"http://www.google.com", 7},
594 {"omit slash when there's a ref", "http://www.google.com/#ref", "en",
595 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
596 L"http://www.google.com/#ref", 7},
597 {"omit slash when there's a query", "http://www.google.com/?", "en",
598 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
599 L"http://www.google.com/?", 7},
600 {"omit slash when it's not the entire path", "http://www.google.com/foo",
601 "en", kFormatUrlOmitTrailingSlashOnBareHostname,
602 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
603 {"omit slash for nonstandard URLs", "data:/", "en",
604 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
605 L"data:/", 5},
606 {"omit slash for file URLs", "file:///", "en",
607 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
608 L"file:///", 7},
610 // -------- view-source: --------
611 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja",
612 default_format_type, net::UnescapeRule::NORMAL,
613 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
615 {"view-source of view-source",
616 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
617 default_format_type, net::UnescapeRule::NORMAL,
618 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
620 // view-source should omit http and trailing slash where non-view-source
621 // would.
622 {"view-source omit http", "view-source:http://a.b/c", "en",
623 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
624 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
625 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
626 L"view-source:http://ftp.b/c", 19},
627 {"view-source omit slash when it's the entire path",
628 "view-source:http://a.b/", "en", kFormatUrlOmitAll,
629 net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
632 for (size_t i = 0; i < arraysize(tests); ++i) {
633 size_t prefix_len;
634 base::string16 formatted = FormatUrl(
635 GURL(tests[i].input), tests[i].languages, tests[i].format_types,
636 tests[i].escape_rules, NULL, &prefix_len, NULL);
637 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
638 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
642 TEST(UrlFormatterTest, FormatUrlParsed) {
643 // No unescape case.
644 url::Parsed parsed;
645 base::string16 formatted =
646 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
647 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
648 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed,
649 NULL, NULL);
650 EXPECT_EQ(WideToUTF16(
651 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
652 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
653 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
654 formatted.substr(parsed.username.begin, parsed.username.len));
655 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
656 formatted.substr(parsed.password.begin, parsed.password.len));
657 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
658 formatted.substr(parsed.host.begin, parsed.host.len));
659 EXPECT_EQ(WideToUTF16(L"8080"),
660 formatted.substr(parsed.port.begin, parsed.port.len));
661 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
662 formatted.substr(parsed.path.begin, parsed.path.len));
663 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
664 formatted.substr(parsed.query.begin, parsed.query.len));
665 EXPECT_EQ(WideToUTF16(L"\x30B0"),
666 formatted.substr(parsed.ref.begin, parsed.ref.len));
668 // Unescape case.
669 formatted =
670 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
671 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
672 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, &parsed,
673 NULL, NULL);
674 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
675 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
676 EXPECT_EQ(WideToUTF16(L"\x30B0"),
677 formatted.substr(parsed.username.begin, parsed.username.len));
678 EXPECT_EQ(WideToUTF16(L"\x30FC"),
679 formatted.substr(parsed.password.begin, parsed.password.len));
680 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
681 formatted.substr(parsed.host.begin, parsed.host.len));
682 EXPECT_EQ(WideToUTF16(L"8080"),
683 formatted.substr(parsed.port.begin, parsed.port.len));
684 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
685 formatted.substr(parsed.path.begin, parsed.path.len));
686 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
687 formatted.substr(parsed.query.begin, parsed.query.len));
688 EXPECT_EQ(WideToUTF16(L"\x30B0"),
689 formatted.substr(parsed.ref.begin, parsed.ref.len));
691 // Omit_username_password + unescape case.
692 formatted =
693 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
694 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
695 "ja", kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL,
696 &parsed, NULL, NULL);
697 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
698 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
699 EXPECT_FALSE(parsed.username.is_valid());
700 EXPECT_FALSE(parsed.password.is_valid());
701 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
702 formatted.substr(parsed.host.begin, parsed.host.len));
703 EXPECT_EQ(WideToUTF16(L"8080"),
704 formatted.substr(parsed.port.begin, parsed.port.len));
705 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
706 formatted.substr(parsed.path.begin, parsed.path.len));
707 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
708 formatted.substr(parsed.query.begin, parsed.query.len));
709 EXPECT_EQ(WideToUTF16(L"\x30B0"),
710 formatted.substr(parsed.ref.begin, parsed.ref.len));
712 // View-source case.
713 formatted =
714 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
715 std::string(), kFormatUrlOmitUsernamePassword,
716 net::UnescapeRule::NORMAL, &parsed, NULL, NULL);
717 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
718 formatted);
719 EXPECT_EQ(WideToUTF16(L"view-source:http"),
720 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
721 EXPECT_FALSE(parsed.username.is_valid());
722 EXPECT_FALSE(parsed.password.is_valid());
723 EXPECT_EQ(WideToUTF16(L"host"),
724 formatted.substr(parsed.host.begin, parsed.host.len));
725 EXPECT_EQ(WideToUTF16(L"81"),
726 formatted.substr(parsed.port.begin, parsed.port.len));
727 EXPECT_EQ(WideToUTF16(L"/path"),
728 formatted.substr(parsed.path.begin, parsed.path.len));
729 EXPECT_EQ(WideToUTF16(L"query"),
730 formatted.substr(parsed.query.begin, parsed.query.len));
731 EXPECT_EQ(WideToUTF16(L"ref"),
732 formatted.substr(parsed.ref.begin, parsed.ref.len));
734 // omit http case.
735 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), std::string(),
736 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed,
737 NULL, NULL);
738 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
739 EXPECT_FALSE(parsed.scheme.is_valid());
740 EXPECT_FALSE(parsed.username.is_valid());
741 EXPECT_FALSE(parsed.password.is_valid());
742 EXPECT_EQ(WideToUTF16(L"host"),
743 formatted.substr(parsed.host.begin, parsed.host.len));
744 EXPECT_EQ(WideToUTF16(L"8000"),
745 formatted.substr(parsed.port.begin, parsed.port.len));
746 EXPECT_EQ(WideToUTF16(L"/a"),
747 formatted.substr(parsed.path.begin, parsed.path.len));
748 EXPECT_EQ(WideToUTF16(L"b=c"),
749 formatted.substr(parsed.query.begin, parsed.query.len));
750 EXPECT_EQ(WideToUTF16(L"d"),
751 formatted.substr(parsed.ref.begin, parsed.ref.len));
753 // omit http starts with ftp case.
754 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), std::string(),
755 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed,
756 NULL, NULL);
757 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
758 EXPECT_TRUE(parsed.scheme.is_valid());
759 EXPECT_FALSE(parsed.username.is_valid());
760 EXPECT_FALSE(parsed.password.is_valid());
761 EXPECT_EQ(WideToUTF16(L"http"),
762 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
763 EXPECT_EQ(WideToUTF16(L"ftp.host"),
764 formatted.substr(parsed.host.begin, parsed.host.len));
765 EXPECT_EQ(WideToUTF16(L"8000"),
766 formatted.substr(parsed.port.begin, parsed.port.len));
767 EXPECT_EQ(WideToUTF16(L"/a"),
768 formatted.substr(parsed.path.begin, parsed.path.len));
769 EXPECT_EQ(WideToUTF16(L"b=c"),
770 formatted.substr(parsed.query.begin, parsed.query.len));
771 EXPECT_EQ(WideToUTF16(L"d"),
772 formatted.substr(parsed.ref.begin, parsed.ref.len));
774 // omit http starts with 'f' case.
775 formatted = FormatUrl(GURL("http://f/"), std::string(), kFormatUrlOmitHTTP,
776 net::UnescapeRule::NORMAL, &parsed, NULL, NULL);
777 EXPECT_EQ(WideToUTF16(L"f/"), formatted);
778 EXPECT_FALSE(parsed.scheme.is_valid());
779 EXPECT_FALSE(parsed.username.is_valid());
780 EXPECT_FALSE(parsed.password.is_valid());
781 EXPECT_FALSE(parsed.port.is_valid());
782 EXPECT_TRUE(parsed.path.is_valid());
783 EXPECT_FALSE(parsed.query.is_valid());
784 EXPECT_FALSE(parsed.ref.is_valid());
785 EXPECT_EQ(WideToUTF16(L"f"),
786 formatted.substr(parsed.host.begin, parsed.host.len));
787 EXPECT_EQ(WideToUTF16(L"/"),
788 formatted.substr(parsed.path.begin, parsed.path.len));
791 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
792 // results in the original GURL, for each ASCII character in the path.
793 TEST(UrlFormatterTest, FormatUrlRoundTripPathASCII) {
794 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
795 GURL url(std::string("http://www.google.com/") +
796 static_cast<char>(test_char));
797 size_t prefix_len;
798 base::string16 formatted =
799 FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
800 net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
801 EXPECT_EQ(url.spec(), GURL(formatted).spec());
805 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
806 // results in the original GURL, for each escaped ASCII character in the path.
807 TEST(UrlFormatterTest, FormatUrlRoundTripPathEscaped) {
808 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
809 std::string original_url("http://www.google.com/");
810 original_url.push_back('%');
811 original_url.append(base::HexEncode(&test_char, 1));
813 GURL url(original_url);
814 size_t prefix_len;
815 base::string16 formatted =
816 FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
817 net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
818 EXPECT_EQ(url.spec(), GURL(formatted).spec());
822 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
823 // results in the original GURL, for each ASCII character in the query.
824 TEST(UrlFormatterTest, FormatUrlRoundTripQueryASCII) {
825 for (unsigned char test_char = 32; test_char < 128; ++test_char) {
826 GURL url(std::string("http://www.google.com/?") +
827 static_cast<char>(test_char));
828 size_t prefix_len;
829 base::string16 formatted =
830 FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
831 net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
832 EXPECT_EQ(url.spec(), GURL(formatted).spec());
836 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
837 // only results in a different GURL for certain characters.
838 TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) {
839 // A full list of characters which FormatURL should unescape and GURL should
840 // not escape again, when they appear in a query string.
841 const char kUnescapedCharacters[] =
842 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
843 for (unsigned char test_char = 0; test_char < 128; ++test_char) {
844 std::string original_url("http://www.google.com/?");
845 original_url.push_back('%');
846 original_url.append(base::HexEncode(&test_char, 1));
848 GURL url(original_url);
849 size_t prefix_len;
850 base::string16 formatted =
851 FormatUrl(url, std::string(), kFormatUrlOmitUsernamePassword,
852 net::UnescapeRule::NORMAL, NULL, &prefix_len, NULL);
854 if (test_char &&
855 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
856 EXPECT_NE(url.spec(), GURL(formatted).spec());
857 } else {
858 EXPECT_EQ(url.spec(), GURL(formatted).spec());
863 TEST(UrlFormatterTest, FormatUrlWithOffsets) {
864 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
865 net::UnescapeRule::NORMAL, NULL);
867 const size_t basic_offsets[] = {
868 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
869 21, 22, 23, 24, 25
871 CheckAdjustedOffsets("http://www.google.com/foo/", "en",
872 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
873 basic_offsets);
875 const size_t omit_auth_offsets_1[] = {
876 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
877 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
879 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
880 kFormatUrlOmitUsernamePassword,
881 net::UnescapeRule::NORMAL, omit_auth_offsets_1);
883 const size_t omit_auth_offsets_2[] = {
884 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
885 15, 16, 17, 18, 19, 20, 21
887 CheckAdjustedOffsets("http://foo@www.google.com/", "en",
888 kFormatUrlOmitUsernamePassword,
889 net::UnescapeRule::NORMAL, omit_auth_offsets_2);
891 const size_t dont_omit_auth_offsets[] = {
892 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
893 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
894 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
895 30, 31
897 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
898 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
899 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
900 dont_omit_auth_offsets);
902 const size_t view_source_offsets[] = {
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
904 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
906 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
907 kFormatUrlOmitUsernamePassword,
908 net::UnescapeRule::NORMAL, view_source_offsets);
910 const size_t idn_hostname_offsets_1[] = {
911 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
912 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
913 13, 14, 15, 16, 17, 18, 19
915 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
916 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
917 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
918 idn_hostname_offsets_1);
920 const size_t idn_hostname_offsets_2[] = {
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
922 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
924 kNpos, 19, 20, 21, 22, 23, 24
926 // Convert punycode to
927 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
928 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
929 "zh-CN", kFormatUrlOmitNothing,
930 net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
932 const size_t unescape_offsets[] = {
933 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
934 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
935 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
936 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
937 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
939 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
940 CheckAdjustedOffsets(
941 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
942 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
944 const size_t ref_offsets[] = {
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
946 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
949 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
950 CheckAdjustedOffsets(
951 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
952 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
954 const size_t omit_http_offsets[] = {
955 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
956 10, 11, 12, 13, 14
958 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
959 net::UnescapeRule::NORMAL, omit_http_offsets);
961 const size_t omit_http_start_with_ftp_offsets[] = {
962 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
964 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
965 net::UnescapeRule::NORMAL,
966 omit_http_start_with_ftp_offsets);
968 const size_t omit_all_offsets[] = {
969 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
970 0, 1, 2, 3, 4, 5, 6, 7
972 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
973 net::UnescapeRule::NORMAL, omit_all_offsets);
976 } // namespace
978 } // namespace url_formatter