[safe-browsing] Database full hash matches like prefix match.
[chromium-blink-merge.git] / chrome / browser / autocomplete / bookmark_provider_unittest.cc
blobc2a2e043655663a66d4c0f0d03682a0800048798
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/bookmark_provider.h"
7 #include <algorithm>
8 #include <string>
9 #include <vector>
11 #include "base/memory/ref_counted.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/browser/autocomplete/autocomplete_provider.h"
18 #include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
19 #include "chrome/browser/bookmarks/test_bookmark_client.h"
20 #include "chrome/test/base/testing_profile.h"
21 #include "components/bookmarks/core/browser/bookmark_match.h"
22 #include "components/bookmarks/core/browser/bookmark_model.h"
23 #include "testing/gtest/include/gtest/gtest.h"
25 // The bookmark corpus against which we will simulate searches.
26 struct BookmarksTestInfo {
27 std::string title;
28 std::string url;
29 } bookmark_provider_test_data[] = {
30 { "abc def", "http://www.catsanddogs.com/a" },
31 { "abcde", "http://www.catsanddogs.com/b" },
32 { "abcdef", "http://www.catsanddogs.com/c" },
33 { "a definition", "http://www.catsanddogs.com/d" },
34 { "carry carbon carefully", "http://www.catsanddogs.com/e" },
35 { "ghi jkl", "http://www.catsanddogs.com/f" },
36 { "jkl ghi", "http://www.catsanddogs.com/g" },
37 { "frankly frankly frank", "http://www.catsanddogs.com/h" },
38 { "foobar foobar", "http://www.foobar.com/" },
39 { "domain", "http://www.domain.com/http/" },
40 { "repeat", "http://www.repeat.com/1/repeat/2/" },
41 // For testing inline_autocompletion.
42 { "http://blah.com/", "http://blah.com/" },
43 { "http://fiddle.com/", "http://fiddle.com/" },
44 { "http://www.www.com/", "http://www.www.com/" },
45 { "chrome://version", "chrome://version" },
46 { "chrome://omnibox", "chrome://omnibox" },
47 // For testing ranking with different URLs.
48 {"achlorhydric featherheads resuscitates mockingbirds",
49 "http://www.featherheads.com/a" },
50 {"achlorhydric mockingbirds resuscitates featherhead",
51 "http://www.featherheads.com/b" },
52 {"featherhead resuscitates achlorhydric mockingbirds",
53 "http://www.featherheads.com/c" },
54 {"mockingbirds resuscitates featherheads achlorhydric",
55 "http://www.featherheads.com/d" },
56 // For testing URL boosting.
57 {"burning worms #1", "http://www.burned.com/" },
58 {"burning worms #2", "http://www.worms.com/" },
59 {"worming burns #10", "http://www.burned.com/" },
60 {"worming burns #20", "http://www.worms.com/" },
61 {"jive music", "http://www.worms.com/" },
64 class BookmarkProviderTest : public testing::Test,
65 public AutocompleteProviderListener {
66 public:
67 BookmarkProviderTest();
69 // AutocompleteProviderListener: Not called.
70 virtual void OnProviderUpdate(bool updated_matches) OVERRIDE {}
72 protected:
73 virtual void SetUp() OVERRIDE;
75 test::TestBookmarkClient client_;
76 scoped_ptr<TestingProfile> profile_;
77 scoped_ptr<BookmarkModel> model_;
78 scoped_refptr<BookmarkProvider> provider_;
80 private:
81 DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest);
84 BookmarkProviderTest::BookmarkProviderTest() {
85 model_ = client_.CreateModel(false);
88 void BookmarkProviderTest::SetUp() {
89 profile_.reset(new TestingProfile());
90 DCHECK(profile_.get());
91 provider_ = new BookmarkProvider(this, profile_.get());
92 DCHECK(provider_.get());
93 provider_->set_bookmark_model_for_testing(model_.get());
95 const BookmarkNode* other_node = model_->other_node();
96 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) {
97 const BookmarksTestInfo& cur(bookmark_provider_test_data[i]);
98 const GURL url(cur.url);
99 model_->AddURL(other_node, other_node->child_count(),
100 base::ASCIIToUTF16(cur.title), url);
104 // Structures and functions supporting the BookmarkProviderTest.Positions
105 // unit test.
107 struct TestBookmarkPosition {
108 TestBookmarkPosition(size_t begin, size_t end)
109 : begin(begin), end(end) {}
111 size_t begin;
112 size_t end;
114 typedef std::vector<TestBookmarkPosition> TestBookmarkPositions;
116 // Return |positions| as a formatted string for unit test diagnostic output.
117 std::string TestBookmarkPositionsAsString(
118 const TestBookmarkPositions& positions) {
119 std::string position_string("{");
120 for (TestBookmarkPositions::const_iterator i = positions.begin();
121 i != positions.end(); ++i) {
122 if (i != positions.begin())
123 position_string += ", ";
124 position_string += "{" + base::IntToString(i->begin) + ", " +
125 base::IntToString(i->end) + "}";
127 position_string += "}\n";
128 return position_string;
131 // Return the positions in |matches| as a formatted string for unit test
132 // diagnostic output.
133 base::string16 MatchesAsString16(const ACMatches& matches) {
134 base::string16 matches_string;
135 for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
136 matches_string.append(base::ASCIIToUTF16(" '"));
137 matches_string.append(i->description);
138 matches_string.append(base::ASCIIToUTF16("'\n"));
140 return matches_string;
143 // Comparison function for sorting search terms by descending length.
144 bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a,
145 const TestBookmarkPosition& pos_b) {
146 return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end;
149 // Convience function to make comparing ACMatchClassifications against the
150 // test expectations structure easier.
151 TestBookmarkPositions PositionsFromAutocompleteMatch(
152 const AutocompleteMatch& match) {
153 TestBookmarkPositions positions;
154 bool started = false;
155 size_t start = 0;
156 for (AutocompleteMatch::ACMatchClassifications::const_iterator
157 i = match.description_class.begin();
158 i != match.description_class.end(); ++i) {
159 if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) {
160 // We have found the start of a match.
161 EXPECT_FALSE(started);
162 started = true;
163 start = i->offset;
164 } else if (started) {
165 // We have found the end of a match.
166 started = false;
167 positions.push_back(TestBookmarkPosition(start, i->offset));
168 start = 0;
171 // Record the final position if the last match goes to the end of the
172 // candidate string.
173 if (started)
174 positions.push_back(TestBookmarkPosition(start, match.description.size()));
175 return positions;
178 // Convience function to make comparing test expectations structure against the
179 // actual ACMatchClassifications easier.
180 TestBookmarkPositions PositionsFromExpectations(
181 const size_t expectations[9][2]) {
182 TestBookmarkPositions positions;
183 size_t i = 0;
184 // The array is zero-terminated in the [1]th element.
185 while (expectations[i][1]) {
186 positions.push_back(
187 TestBookmarkPosition(expectations[i][0], expectations[i][1]));
188 ++i;
190 return positions;
193 TEST_F(BookmarkProviderTest, Positions) {
194 // Simulate searches.
195 // Description of |positions|:
196 // The first index represents the collection of positions for each expected
197 // match. The count of the actual subarrays in each instance of |query_data|
198 // must equal |match_count|. The second index represents each expected
199 // match position. The third index represents the |start| and |end| of the
200 // expected match's position within the |test_data|. This array must be
201 // terminated by an entry with a value of '0' for |end|.
202 // Example:
203 // Consider the line for 'def' below:
204 // {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}},
205 // There are two expected matches:
206 // 0. {{4, 7}, {XXX, 0}}
207 // 1. {{2, 5}, {11 ,14}, {XXX, 0}}
208 // For the first match, [0], there is one match within the bookmark's title
209 // expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX'
210 // value is ignored. The second match, [1], indicates that two matches are
211 // expected within the bookmark title "a definite definition". In each case,
212 // the {XXX, 0} indicates the end of the subarray. Or:
213 // Match #1 Match #2
214 // ------------------ ----------------------------
215 // Pos1 Term Pos1 Pos2 Term
216 // ------ -------- ------ -------- --------
217 // {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}},
219 struct QueryData {
220 const std::string query;
221 const size_t match_count; // This count must match the number of major
222 // elements in the following |positions| array.
223 const size_t positions[99][9][2];
224 } query_data[] = {
225 // This first set is primarily for position detection validation.
226 {"abc", 3, {{{0, 3}, {0, 0}},
227 {{0, 3}, {0, 0}},
228 {{0, 3}, {0, 0}}}},
229 {"abcde", 2, {{{0, 5}, {0, 0}},
230 {{0, 5}, {0, 0}}}},
231 {"foo bar", 0, {{{0, 0}}}},
232 {"fooey bark", 0, {{{0, 0}}}},
233 {"def", 2, {{{2, 5}, {0, 0}},
234 {{4, 7}, {0, 0}}}},
235 {"ghi jkl", 2, {{{0, 3}, {4, 7}, {0, 0}},
236 {{0, 3}, {4, 7}, {0, 0}}}},
237 // NB: GetBookmarksWithTitlesMatching(...) uses exact match for "a".
238 {"a", 1, {{{0, 1}, {0, 0}}}},
239 {"a d", 0, {{{0, 0}}}},
240 {"carry carbon", 1, {{{0, 5}, {6, 12}, {0, 0}}}},
241 // NB: GetBookmarksWithTitlesMatching(...) sorts the match positions.
242 {"carbon carry", 1, {{{0, 5}, {6, 12}, {0, 0}}}},
243 {"arbon", 0, {{{0, 0}}}},
244 {"ar", 0, {{{0, 0}}}},
245 {"arry", 0, {{{0, 0}}}},
246 // Quoted terms are single terms.
247 {"\"carry carbon\"", 1, {{{0, 12}, {0, 0}}}},
248 {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}},
249 // Quoted terms require complete word matches.
250 {"\"carry carbo\"", 0, {{{0, 0}}}},
251 // This set uses duplicated and/or overlaps search terms in the title.
252 {"frank", 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}},
253 {"frankly", 1, {{{0, 7}, {8, 15}, {0, 0}}}},
254 {"frankly frankly", 1, {{{0, 7}, {8, 15}, {0, 0}}}},
255 {"foobar foo", 1, {{{0, 6}, {7, 13}, {0, 0}}}},
256 {"foo foobar", 1, {{{0, 6}, {7, 13}, {0, 0}}}},
259 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
260 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
261 base::string16::npos, base::string16(), GURL(),
262 AutocompleteInput::INVALID_SPEC, false, false,
263 false, true);
264 provider_->Start(input, false);
265 const ACMatches& matches(provider_->matches());
266 // Validate number of results is as expected.
267 EXPECT_LE(matches.size(), query_data[i].match_count)
268 << "One or more of the following matches were unexpected:\n"
269 << MatchesAsString16(matches)
270 << "For query '" << query_data[i].query << "'.";
271 EXPECT_GE(matches.size(), query_data[i].match_count)
272 << "One or more expected matches are missing. Matches found:\n"
273 << MatchesAsString16(matches)
274 << "for query '" << query_data[i].query << "'.";
275 // Validate positions within each match is as expected.
276 for (size_t j = 0; j < matches.size(); ++j) {
277 // Collect the expected positions as a vector, collect the match's
278 // classifications for match positions as a vector, then compare.
279 TestBookmarkPositions expected_positions(
280 PositionsFromExpectations(query_data[i].positions[j]));
281 TestBookmarkPositions actual_positions(
282 PositionsFromAutocompleteMatch(matches[j]));
283 EXPECT_TRUE(std::equal(expected_positions.begin(),
284 expected_positions.end(),
285 actual_positions.begin(),
286 TestBookmarkPositionsEqual))
287 << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions)
288 << "ACTUAL: " << TestBookmarkPositionsAsString(actual_positions)
289 << " for query: '" << query_data[i].query << "'.";
294 TEST_F(BookmarkProviderTest, Rankings) {
295 // Simulate searches.
296 struct QueryData {
297 const std::string query;
298 // |match_count| must match the number of elements in the following
299 // |matches| array.
300 const size_t match_count;
301 // |matches| specifies the titles for all bookmarks expected to be matched
302 // by the |query|
303 const std::string matches[3];
304 } query_data[] = {
305 // Basic ranking test.
306 {"abc", 3, {"abcde", // Most complete match.
307 "abcdef",
308 "abc def"}}, // Least complete match.
309 {"ghi", 2, {"ghi jkl", // Matched earlier.
310 "jkl ghi", // Matched later.
311 ""}},
312 // Rankings of exact-word matches with different URLs.
313 {"achlorhydric",
314 3, {"achlorhydric mockingbirds resuscitates featherhead",
315 "achlorhydric featherheads resuscitates mockingbirds",
316 "featherhead resuscitates achlorhydric mockingbirds"}},
317 {"achlorhydric featherheads",
318 2, {"achlorhydric featherheads resuscitates mockingbirds",
319 "mockingbirds resuscitates featherheads achlorhydric",
320 ""}},
321 {"mockingbirds resuscitates",
322 3, {"mockingbirds resuscitates featherheads achlorhydric",
323 "achlorhydric mockingbirds resuscitates featherhead",
324 "featherhead resuscitates achlorhydric mockingbirds"}},
325 // Ranking of exact-word matches with URL boost.
326 {"worms", 2, {"burning worms #2", // boosted
327 "burning worms #1", // not boosted
328 ""}},
329 // Ranking of prefix matches with URL boost. Note that a query of
330 // "worm burn" will have the same results.
331 {"burn worm", 3, {"burning worms #2", // boosted
332 "worming burns #20", // boosted
333 "burning worms #1"}}, // not boosted but shorter
336 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
337 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
338 base::string16::npos, base::string16(), GURL(),
339 AutocompleteInput::INVALID_SPEC, false, false,
340 false, true);
341 provider_->Start(input, false);
342 const ACMatches& matches(provider_->matches());
343 // Validate number and content of results is as expected.
344 for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size());
345 ++j) {
346 EXPECT_LT(j, query_data[i].match_count) << " Unexpected match '"
347 << base::UTF16ToUTF8(matches[j].description) << "' for query: '"
348 << query_data[i].query << "'.";
349 if (j >= query_data[i].match_count)
350 continue;
351 EXPECT_LT(j, matches.size()) << " Missing match '"
352 << query_data[i].matches[j] << "' for query: '"
353 << query_data[i].query << "'.";
354 if (j >= matches.size())
355 continue;
356 EXPECT_EQ(query_data[i].matches[j],
357 base::UTF16ToUTF8(matches[j].description))
358 << " Mismatch at [" << base::IntToString(j) << "] for query '"
359 << query_data[i].query << "'.";
364 TEST_F(BookmarkProviderTest, InlineAutocompletion) {
365 // Simulate searches.
366 struct QueryData {
367 const std::string query;
368 const std::string url;
369 const bool allowed_to_be_default_match;
370 const std::string inline_autocompletion;
371 } query_data[] = {
372 { "bla", "http://blah.com/", true, "h.com" },
373 { "blah ", "http://blah.com/", false, ".com" },
374 { "http://bl", "http://blah.com/", true, "ah.com" },
375 { "fiddle.c", "http://fiddle.com/", true, "om" },
376 { "www", "http://www.www.com/", true, ".com" },
377 { "chro", "chrome://version", true, "me://version" },
378 { "chrome://ve", "chrome://version", true, "rsion" },
379 { "chrome ver", "chrome://version", false, "" },
380 { "versi", "chrome://version", false, "" },
381 { "abou", "chrome://omnibox", false, "" },
382 { "about:om", "chrome://omnibox", true, "nibox" }
383 // Note: when adding a new URL to this test, be sure to add it to the list
384 // of bookmarks at the top of the file as well. All items in this list
385 // need to be in the bookmarks list because BookmarkProvider's
386 // TitleMatchToACMatch() has an assertion that verifies the URL is
387 // actually bookmarked.
390 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
391 const std::string description = "for query=" + query_data[i].query +
392 " and url=" + query_data[i].url;
393 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
394 base::string16::npos, base::string16(), GURL(),
395 AutocompleteInput::INVALID_SPEC, false, false,
396 false, true);
397 AutocompleteInput fixed_up_input(input);
398 provider_->FixupUserInput(&fixed_up_input);
399 BookmarkNode node(GURL(query_data[i].url));
400 node.SetTitle(base::ASCIIToUTF16(query_data[i].url));
401 BookmarkMatch bookmark_match;
402 bookmark_match.node = &node;
403 const AutocompleteMatch& ac_match = provider_->BookmarkMatchToACMatch(
404 input, fixed_up_input, bookmark_match);
405 EXPECT_EQ(query_data[i].allowed_to_be_default_match,
406 ac_match.allowed_to_be_default_match) << description;
407 EXPECT_EQ(base::ASCIIToUTF16(query_data[i].inline_autocompletion),
408 ac_match.inline_autocompletion) << description;
412 TEST_F(BookmarkProviderTest, StripHttpAndAdjustOffsets) {
413 // Simulate searches.
414 struct QueryData {
415 const std::string query;
416 const std::string expected_contents;
417 // |expected_contents_class| is in format offset:style,offset:style,...
418 const std::string expected_contents_class;
419 } query_data[] = {
420 { "foo", "www.foobar.com", "0:1,4:3,7:1" },
421 { "www foo", "www.foobar.com", "0:3,3:1,4:3,7:1" },
422 { "foo www", "www.foobar.com", "0:3,3:1,4:3,7:1" },
423 { "foo http", "http://www.foobar.com", "0:3,4:1,11:3,14:1" },
424 { "blah", "blah.com", "0:3,4:1" },
425 { "http blah", "http://blah.com", "0:3,4:1,7:3,11:1" },
426 { "dom", "www.domain.com/http/", "0:1,4:3,7:1" },
427 { "dom http", "http://www.domain.com/http/",
428 "0:3,4:1,11:3,14:1,22:3,26:1" },
429 { "rep", "www.repeat.com/1/repeat/2/", "0:1,4:3,7:1,17:3,20:1" },
430 { "versi", "chrome://version", "0:1,9:3,14:1" }
433 // Reload the bookmarks index with |index_urls| == true.
434 model_ = client_.CreateModel(true);
435 SetUp();
437 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
438 std::string description = "for query=" + query_data[i].query;
439 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
440 base::string16::npos, base::string16(), GURL(),
441 AutocompleteInput::INVALID_SPEC, false, false,
442 false, true);
443 provider_->Start(input, false);
444 const ACMatches& matches(provider_->matches());
445 ASSERT_EQ(1U, matches.size()) << description;
446 const AutocompleteMatch& match = matches[0];
447 EXPECT_EQ(base::ASCIIToUTF16(query_data[i].expected_contents),
448 match.contents) << description;
449 std::vector<std::string> class_strings;
450 base::SplitString(
451 query_data[i].expected_contents_class, ',', &class_strings);
452 ASSERT_EQ(class_strings.size(), match.contents_class.size())
453 << description;
454 for (size_t i = 0; i < class_strings.size(); ++i) {
455 std::vector<std::string> chunks;
456 base::SplitString(class_strings[i], ':', &chunks);
457 ASSERT_EQ(2U, chunks.size()) << description;
458 size_t offset;
459 EXPECT_TRUE(base::StringToSizeT(chunks[0], &offset)) << description;
460 EXPECT_EQ(offset, match.contents_class[i].offset) << description;
461 int style;
462 EXPECT_TRUE(base::StringToInt(chunks[1], &style)) << description;
463 EXPECT_EQ(style, match.contents_class[i].style) << description;