1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/utility/importer/bookmark_html_reader.h"
8 #include "base/bind_helpers.h"
9 #include "base/callback.h"
10 #include "base/files/file_path.h"
11 #include "base/path_service.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "chrome/common/chrome_paths.h"
16 #include "chrome/common/importer/imported_bookmark_entry.h"
17 #include "testing/gtest/include/gtest/gtest.h"
19 using base::ASCIIToUTF16
;
20 using base::UTF16ToWide
;
22 namespace bookmark_html_reader
{
24 TEST(BookmarkHTMLReaderTest
, ParseTests
) {
29 result
= internal::ParseCharsetFromLine(
30 "<META HTTP-EQUIV=\"Content-Type\" "
31 "CONTENT=\"text/html; charset=UTF-8\">",
34 EXPECT_EQ("UTF-8", charset
);
36 // Escaped characters in name.
37 base::string16 folder_name
;
38 bool is_toolbar_folder
;
39 base::Time folder_add_date
;
40 result
= internal::ParseFolderNameFromLine(
41 "<DT><H3 ADD_DATE=\"1207558707\" >< >"
42 " & " ' \\ /</H3>",
43 charset
, &folder_name
, &is_toolbar_folder
, &folder_add_date
);
45 EXPECT_EQ(ASCIIToUTF16("< > & \" ' \\ /"), folder_name
);
46 EXPECT_FALSE(is_toolbar_folder
);
47 EXPECT_TRUE(base::Time::FromTimeT(1207558707) == folder_add_date
);
49 // Empty name and toolbar folder attribute.
50 result
= internal::ParseFolderNameFromLine(
51 "<DT><H3 PERSONAL_TOOLBAR_FOLDER=\"true\"></H3>",
52 charset
, &folder_name
, &is_toolbar_folder
, &folder_add_date
);
54 EXPECT_EQ(base::string16(), folder_name
);
55 EXPECT_TRUE(is_toolbar_folder
);
57 // Unicode characters in title and shortcut.
60 base::string16 shortcut
;
61 base::string16 post_data
;
63 result
= internal::ParseBookmarkFromLine(
64 "<DT><A HREF=\"http://chinese.site.cn/path?query=1#ref\" "
65 "SHORTCUTURL=\"\xE4\xB8\xAD\">\xE4\xB8\xAD\xE6\x96\x87</A>",
66 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
68 EXPECT_EQ(L
"\x4E2D\x6587", UTF16ToWide(title
));
69 EXPECT_EQ("http://chinese.site.cn/path?query=1#ref", url
.spec());
70 EXPECT_EQ(L
"\x4E2D", UTF16ToWide(shortcut
));
71 EXPECT_EQ(base::string16(), post_data
);
72 EXPECT_TRUE(base::Time() == add_date
);
74 // No shortcut, and url contains %22 ('"' character).
75 result
= internal::ParseBookmarkFromLine(
76 "<DT><A HREF=\"http://domain.com/?q=%22<>%22\">name</A>",
77 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
79 EXPECT_EQ(ASCIIToUTF16("name"), title
);
80 EXPECT_EQ("http://domain.com/?q=%22%3C%3E%22", url
.spec());
81 EXPECT_EQ(base::string16(), shortcut
);
82 EXPECT_EQ(base::string16(), post_data
);
83 EXPECT_TRUE(base::Time() == add_date
);
85 result
= internal::ParseBookmarkFromLine(
86 "<DT><A HREF=\"http://domain.com/?g="\"\">name</A>",
87 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
89 EXPECT_EQ(ASCIIToUTF16("name"), title
);
90 EXPECT_EQ("http://domain.com/?g=%22", url
.spec());
91 EXPECT_EQ(base::string16(), shortcut
);
92 EXPECT_EQ(base::string16(), post_data
);
93 EXPECT_TRUE(base::Time() == add_date
);
96 result
= internal::ParseBookmarkFromLine(
97 "<DT><A HREF=\"http://site/\" ADD_DATE=\"1121301154\">name</A>",
98 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
100 EXPECT_EQ(ASCIIToUTF16("name"), title
);
101 EXPECT_EQ(GURL("http://site/"), url
);
102 EXPECT_EQ(base::string16(), shortcut
);
103 EXPECT_EQ(base::string16(), post_data
);
104 EXPECT_TRUE(base::Time::FromTimeT(1121301154) == add_date
);
107 result
= internal::ParseBookmarkFromLine(
108 "<DT><A HREF=\"http://localhost:8080/test/hello.html\" ADD_DATE=\""
109 "1212447159\" LAST_VISIT=\"1212447251\" LAST_MODIFIED=\"1212447248\""
110 "SHORTCUTURL=\"post\" ICON=\"data:\" POST_DATA=\"lname%3D%25s\""
111 "LAST_CHARSET=\"UTF-8\" ID=\"rdf:#$weKaR3\">Test Post keyword</A>",
112 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
114 EXPECT_EQ(ASCIIToUTF16("Test Post keyword"), title
);
115 EXPECT_EQ("http://localhost:8080/test/hello.html", url
.spec());
116 EXPECT_EQ(ASCIIToUTF16("post"), shortcut
);
117 EXPECT_EQ(ASCIIToUTF16("lname%3D%25s"), post_data
);
118 EXPECT_TRUE(base::Time::FromTimeT(1212447159) == add_date
);
121 result
= internal::ParseBookmarkFromLine(
122 "<DT><A HREF=\"http://domain.com/?q=%22",
123 charset
, &title
, &url
, &favicon
, &shortcut
, &add_date
, &post_data
);
124 EXPECT_FALSE(result
);
125 EXPECT_EQ(base::string16(), title
);
126 EXPECT_EQ("", url
.spec());
127 EXPECT_EQ(base::string16(), shortcut
);
128 EXPECT_EQ(base::string16(), post_data
);
129 EXPECT_TRUE(base::Time() == add_date
);
132 result
= internal::ParseMinimumBookmarkFromLine(
133 "<dt><a href=\"http://www.google.com/\">Google</a></dt>",
134 charset
, &title
, &url
);
136 EXPECT_EQ(ASCIIToUTF16("Google"), title
);
137 EXPECT_EQ("http://www.google.com/", url
.spec());
140 TEST(BookmarkHTMLReaderTest
, CanImportURLAsSearchEngineTest
) {
142 const std::string url
;
143 const bool can_be_imported_as_search_engine
;
145 { "http://www.example.%s.com", true },
146 { "http://www.example.%S.com", true },
147 { "http://www.example.%x.com", false },
148 { "http://www.example.com", false },
149 { "http://%s.example.com", true },
150 { "http://www.example.%s.test.%s.com", true },
151 { "http://www.test&test.%s.com", true },
152 { "http://www.example.com?q=%s&foo=bar", true },
153 { "http://www.example.com/%s/?q=%s&foo=bar", true },
154 { "http//google.com", false },
156 { "http:/path/%s/", true },
161 std::string search_engine_url
;
162 for (size_t i
= 0; i
< arraysize(test_cases
); ++i
) {
163 EXPECT_EQ(test_cases
[i
].can_be_imported_as_search_engine
,
164 CanImportURLAsSearchEngine(GURL(test_cases
[i
].url
),
165 &search_engine_url
));
171 class BookmarkHTMLReaderTestWithData
: public testing::Test
{
173 void SetUp() override
;
176 void ExpectFirstFirefox2Bookmark(const ImportedBookmarkEntry
& entry
);
177 void ExpectSecondFirefox2Bookmark(const ImportedBookmarkEntry
& entry
);
178 void ExpectThirdFirefox2Bookmark(const ImportedBookmarkEntry
& entry
);
179 void ExpectFirstEpiphanyBookmark(const ImportedBookmarkEntry
& entry
);
180 void ExpectSecondEpiphanyBookmark(const ImportedBookmarkEntry
& entry
);
181 void ExpectFirstFirefox23Bookmark(const ImportedBookmarkEntry
& entry
);
182 void ExpectSecondFirefox23Bookmark(const ImportedBookmarkEntry
& entry
);
183 void ExpectThirdFirefox23Bookmark(const ImportedBookmarkEntry
& entry
);
184 void ExpectFirstFirefoxBookmarkWithKeyword(
185 const importer::SearchEngineInfo
& info
);
186 void ExpectSecondFirefoxBookmarkWithKeyword(
187 const importer::SearchEngineInfo
& info
);
189 base::FilePath test_data_path_
;
192 void BookmarkHTMLReaderTestWithData::SetUp() {
193 ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA
, &test_data_path_
));
194 test_data_path_
= test_data_path_
.AppendASCII("bookmark_html_reader");
197 void BookmarkHTMLReaderTestWithData::ExpectFirstFirefox2Bookmark(
198 const ImportedBookmarkEntry
& entry
) {
199 EXPECT_EQ(ASCIIToUTF16("Empty"), entry
.title
);
200 EXPECT_TRUE(entry
.is_folder
);
201 EXPECT_EQ(base::Time::FromTimeT(1295938143), entry
.creation_time
);
202 EXPECT_EQ(1U, entry
.path
.size());
203 if (entry
.path
.size() == 1)
204 EXPECT_EQ(ASCIIToUTF16("Empty's Parent"), entry
.path
.front());
207 void BookmarkHTMLReaderTestWithData::ExpectSecondFirefox2Bookmark(
208 const ImportedBookmarkEntry
& entry
) {
209 EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry
.title
);
210 EXPECT_FALSE(entry
.is_folder
);
211 EXPECT_EQ(base::Time::FromTimeT(1234567890), entry
.creation_time
);
212 EXPECT_EQ(1U, entry
.path
.size());
213 if (entry
.path
.size() == 1)
214 EXPECT_EQ(ASCIIToUTF16("Not Empty"), entry
.path
.front());
215 EXPECT_EQ("http://www.tamurayukari.com/", entry
.url
.spec());
218 void BookmarkHTMLReaderTestWithData::ExpectThirdFirefox2Bookmark(
219 const ImportedBookmarkEntry
& entry
) {
220 EXPECT_EQ(ASCIIToUTF16("Google"), entry
.title
);
221 EXPECT_FALSE(entry
.is_folder
);
222 EXPECT_EQ(base::Time::FromTimeT(0000000000), entry
.creation_time
);
223 EXPECT_EQ(1U, entry
.path
.size());
224 if (entry
.path
.size() == 1)
225 EXPECT_EQ(ASCIIToUTF16("Not Empty But Default"), entry
.path
.front());
226 EXPECT_EQ("http://www.google.com/", entry
.url
.spec());
229 void BookmarkHTMLReaderTestWithData::ExpectFirstEpiphanyBookmark(
230 const ImportedBookmarkEntry
& entry
) {
231 EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry
.title
);
232 EXPECT_EQ("http://www.tamurayukari.com/", entry
.url
.spec());
233 EXPECT_EQ(0U, entry
.path
.size());
236 void BookmarkHTMLReaderTestWithData::ExpectSecondEpiphanyBookmark(
237 const ImportedBookmarkEntry
& entry
) {
238 EXPECT_EQ(ASCIIToUTF16("Google"), entry
.title
);
239 EXPECT_EQ("http://www.google.com/", entry
.url
.spec());
240 EXPECT_EQ(0U, entry
.path
.size());
243 void BookmarkHTMLReaderTestWithData::ExpectFirstFirefox23Bookmark(
244 const ImportedBookmarkEntry
& entry
) {
245 EXPECT_EQ(ASCIIToUTF16("Google"), entry
.title
);
246 EXPECT_FALSE(entry
.is_folder
);
247 EXPECT_EQ(base::Time::FromTimeT(1376102167), entry
.creation_time
);
248 EXPECT_EQ(0U, entry
.path
.size());
249 EXPECT_EQ("https://www.google.com/", entry
.url
.spec());
252 void BookmarkHTMLReaderTestWithData::ExpectSecondFirefox23Bookmark(
253 const ImportedBookmarkEntry
& entry
) {
254 EXPECT_EQ(ASCIIToUTF16("Issues"), entry
.title
);
255 EXPECT_FALSE(entry
.is_folder
);
256 EXPECT_EQ(base::Time::FromTimeT(1376102304), entry
.creation_time
);
257 EXPECT_EQ(1U, entry
.path
.size());
258 EXPECT_EQ(ASCIIToUTF16("Chromium"), entry
.path
.front());
259 EXPECT_EQ("https://code.google.com/p/chromium/issues/list", entry
.url
.spec());
262 void BookmarkHTMLReaderTestWithData::ExpectThirdFirefox23Bookmark(
263 const ImportedBookmarkEntry
& entry
) {
264 EXPECT_EQ(ASCIIToUTF16("CodeSearch"), entry
.title
);
265 EXPECT_FALSE(entry
.is_folder
);
266 EXPECT_EQ(base::Time::FromTimeT(1376102224), entry
.creation_time
);
267 EXPECT_EQ(1U, entry
.path
.size());
268 EXPECT_EQ(ASCIIToUTF16("Chromium"), entry
.path
.front());
269 EXPECT_EQ("http://code.google.com/p/chromium/codesearch", entry
.url
.spec());
272 void BookmarkHTMLReaderTestWithData::ExpectFirstFirefoxBookmarkWithKeyword(
273 const importer::SearchEngineInfo
& info
) {
274 EXPECT_EQ(ASCIIToUTF16("http://example.{searchTerms}.com/"), info
.url
);
275 EXPECT_EQ(ASCIIToUTF16("keyword"), info
.keyword
);
276 EXPECT_EQ(ASCIIToUTF16("Bookmark Keyword"), info
.display_name
);
279 void BookmarkHTMLReaderTestWithData::ExpectSecondFirefoxBookmarkWithKeyword(
280 const importer::SearchEngineInfo
& info
) {
281 EXPECT_EQ(ASCIIToUTF16("http://example.com/?q={searchTerms}"), info
.url
);
282 EXPECT_EQ(ASCIIToUTF16("keyword"), info
.keyword
);
283 EXPECT_EQ(ASCIIToUTF16("BookmarkName"), info
.display_name
);
288 TEST_F(BookmarkHTMLReaderTestWithData
, Firefox2BookmarkFileImport
) {
289 base::FilePath path
= test_data_path_
.AppendASCII("firefox2.html");
291 std::vector
<ImportedBookmarkEntry
> bookmarks
;
292 ImportBookmarksFile(base::Callback
<bool(void)>(),
293 base::Callback
<bool(const GURL
&)>(),
294 path
, &bookmarks
, NULL
, NULL
);
296 ASSERT_EQ(3U, bookmarks
.size());
297 ExpectFirstFirefox2Bookmark(bookmarks
[0]);
298 ExpectSecondFirefox2Bookmark(bookmarks
[1]);
299 ExpectThirdFirefox2Bookmark(bookmarks
[2]);
302 TEST_F(BookmarkHTMLReaderTestWithData
, BookmarkFileWithHrTagImport
) {
303 base::FilePath path
= test_data_path_
.AppendASCII("firefox23.html");
305 std::vector
<ImportedBookmarkEntry
> bookmarks
;
306 ImportBookmarksFile(base::Callback
<bool(void)>(),
307 base::Callback
<bool(const GURL
&)>(),
308 path
, &bookmarks
, NULL
, NULL
);
310 ASSERT_EQ(3U, bookmarks
.size());
311 ExpectFirstFirefox23Bookmark(bookmarks
[0]);
312 ExpectSecondFirefox23Bookmark(bookmarks
[1]);
313 ExpectThirdFirefox23Bookmark(bookmarks
[2]);
316 TEST_F(BookmarkHTMLReaderTestWithData
, EpiphanyBookmarkFileImport
) {
317 base::FilePath path
= test_data_path_
.AppendASCII("epiphany.html");
319 std::vector
<ImportedBookmarkEntry
> bookmarks
;
320 ImportBookmarksFile(base::Callback
<bool(void)>(),
321 base::Callback
<bool(const GURL
&)>(),
322 path
, &bookmarks
, NULL
, NULL
);
324 ASSERT_EQ(2U, bookmarks
.size());
325 ExpectFirstEpiphanyBookmark(bookmarks
[0]);
326 ExpectSecondEpiphanyBookmark(bookmarks
[1]);
329 TEST_F(BookmarkHTMLReaderTestWithData
, FirefoxBookmarkFileWithKeywordImport
) {
330 base::FilePath path
= test_data_path_
.AppendASCII(
331 "firefox_bookmark_keyword.html");
333 std::vector
<importer::SearchEngineInfo
> search_engines
;
334 ImportBookmarksFile(base::Callback
<bool(void)>(),
335 base::Callback
<bool(const GURL
&)>(),
336 path
, NULL
, &search_engines
, NULL
);
338 ASSERT_EQ(2U, search_engines
.size());
339 ExpectFirstFirefoxBookmarkWithKeyword(search_engines
[0]);
340 ExpectSecondFirefoxBookmarkWithKeyword(search_engines
[1]);
343 // Verifies that importing a bookmarks file without a charset specified succeeds
344 // (by falling back to a default charset). Per [ http://crbug.com/460423 ], this
345 // sort of bookmarks file is generated by IE.
346 TEST_F(BookmarkHTMLReaderTestWithData
,
347 InternetExplorerBookmarkFileWithoutCharsetImport
) {
348 base::FilePath path
= test_data_path_
.AppendASCII("ie_sans_charset.html");
350 std::vector
<ImportedBookmarkEntry
> bookmarks
;
351 ImportBookmarksFile(base::Callback
<bool(void)>(),
352 base::Callback
<bool(const GURL
&)>(),
353 path
, &bookmarks
, NULL
, NULL
);
355 ASSERT_EQ(3U, bookmarks
.size());
356 EXPECT_EQ(ASCIIToUTF16("Google"), bookmarks
[0].title
);
357 EXPECT_EQ(ASCIIToUTF16("Outlook"), bookmarks
[1].title
);
358 EXPECT_EQ(ASCIIToUTF16("Speed Test"), bookmarks
[2].title
);
363 class CancelAfterFifteenCalls
{
366 CancelAfterFifteenCalls() : count(0) { }
367 bool ShouldCancel() {
374 TEST_F(BookmarkHTMLReaderTestWithData
, CancellationCallback
) {
375 // Use a file for testing that has multiple bookmarks.
376 base::FilePath path
= test_data_path_
.AppendASCII("firefox2.html");
378 std::vector
<ImportedBookmarkEntry
> bookmarks
;
379 CancelAfterFifteenCalls cancel_fifteen
;
380 ImportBookmarksFile(base::Bind(&CancelAfterFifteenCalls::ShouldCancel
,
381 base::Unretained(&cancel_fifteen
)),
382 base::Callback
<bool(const GURL
&)>(),
383 path
, &bookmarks
, NULL
, NULL
);
385 // The cancellation callback is checked before each line is read, so fifteen
386 // lines are imported. The first fifteen lines of firefox2.html include only
388 ASSERT_EQ(1U, bookmarks
.size());
389 ExpectFirstFirefox2Bookmark(bookmarks
[0]);
394 bool IsURLValid(const GURL
& url
) {
395 // No offense to whomever owns this domain...
396 return !url
.DomainIs("tamurayukari.com");
401 TEST_F(BookmarkHTMLReaderTestWithData
, ValidURLCallback
) {
402 // Use a file for testing that has multiple bookmarks.
403 base::FilePath path
= test_data_path_
.AppendASCII("firefox2.html");
405 std::vector
<ImportedBookmarkEntry
> bookmarks
;
406 ImportBookmarksFile(base::Callback
<bool(void)>(),
407 base::Bind(&IsURLValid
),
408 path
, &bookmarks
, NULL
, NULL
);
410 ASSERT_EQ(2U, bookmarks
.size());
411 ExpectFirstFirefox2Bookmark(bookmarks
[0]);
412 ExpectThirdFirefox2Bookmark(bookmarks
[1]);
415 } // namespace bookmark_html_reader