Give names to all utility processes.
[chromium-blink-merge.git] / chrome / browser / autocomplete / scored_history_match_unittest.cc
blob25a91efb28d2fa94979eda49c4a9203262bbec86
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/scored_history_match.h"
7 #include <algorithm>
9 #include "base/auto_reset.h"
10 #include "base/bind.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gmock/include/gmock/gmock.h"
15 #include "testing/gtest/include/gtest/gtest.h"
17 using base::ASCIIToUTF16;
18 using testing::ElementsAre;
19 using testing::Pair;
21 namespace {
23 // Returns a VisitInfoVector that includes |num_visits| spread over the
24 // last |frequency|*|num_visits| days (relative to |now|). A frequency of
25 // one means one visit each day, two means every other day, etc.
26 VisitInfoVector CreateVisitInfoVector(int num_visits,
27 int frequency,
28 base::Time now) {
29 VisitInfoVector visits;
30 for (int i = 0; i < num_visits; ++i) {
31 visits.push_back(
32 std::make_pair(now - base::TimeDelta::FromDays(i * frequency),
33 ui::PAGE_TRANSITION_LINK));
35 return visits;
38 } // namespace
40 class ScoredHistoryMatchTest : public testing::Test {
41 protected:
42 // Convenience function to create a history::URLRow with basic data for |url|,
43 // |title|, |visit_count|, and |typed_count|. |days_since_last_visit| gives
44 // the number of days ago to which to set the URL's last_visit.
45 history::URLRow MakeURLRow(const char* url,
46 const char* title,
47 int visit_count,
48 int days_since_last_visit,
49 int typed_count);
51 // Convenience function to set the word starts information from a
52 // history::URLRow's URL and title.
53 void PopulateWordStarts(const history::URLRow& url_row,
54 RowWordStarts* word_starts);
56 // Convenience functions for easily creating vectors of search terms.
57 String16Vector Make1Term(const char* term) const;
58 String16Vector Make2Terms(const char* term_1, const char* term_2) const;
60 // Convenience function for GetTopicalityScore() that builds the term match
61 // and word break information automatically that are needed to call
62 // GetTopicalityScore(). It only works for scoring a single term, not
63 // multiple terms.
64 float GetTopicalityScoreOfTermAgainstURLAndTitle(const base::string16& term,
65 const base::string16& url,
66 const base::string16& title);
69 history::URLRow ScoredHistoryMatchTest::MakeURLRow(const char* url,
70 const char* title,
71 int visit_count,
72 int days_since_last_visit,
73 int typed_count) {
74 history::URLRow row(GURL(url), 0);
75 row.set_title(ASCIIToUTF16(title));
76 row.set_visit_count(visit_count);
77 row.set_typed_count(typed_count);
78 row.set_last_visit(base::Time::NowFromSystemTime() -
79 base::TimeDelta::FromDays(days_since_last_visit));
80 return row;
83 void ScoredHistoryMatchTest::PopulateWordStarts(const history::URLRow& url_row,
84 RowWordStarts* word_starts) {
85 String16SetFromString16(ASCIIToUTF16(url_row.url().spec()),
86 &word_starts->url_word_starts_);
87 String16SetFromString16(url_row.title(), &word_starts->title_word_starts_);
90 String16Vector ScoredHistoryMatchTest::Make1Term(const char* term) const {
91 String16Vector original_terms;
92 original_terms.push_back(ASCIIToUTF16(term));
93 return original_terms;
96 String16Vector ScoredHistoryMatchTest::Make2Terms(const char* term_1,
97 const char* term_2) const {
98 String16Vector original_terms;
99 original_terms.push_back(ASCIIToUTF16(term_1));
100 original_terms.push_back(ASCIIToUTF16(term_2));
101 return original_terms;
104 float ScoredHistoryMatchTest::GetTopicalityScoreOfTermAgainstURLAndTitle(
105 const base::string16& term,
106 const base::string16& url,
107 const base::string16& title) {
108 // Make an empty match and simply populate the fields we need in order
109 // to call GetTopicalityScore().
110 ScoredHistoryMatch scored_match;
111 scored_match.url_matches = MatchTermInString(term, url, 0);
112 scored_match.title_matches = MatchTermInString(term, title, 0);
113 RowWordStarts word_starts;
114 String16SetFromString16(url, &word_starts.url_word_starts_);
115 String16SetFromString16(title, &word_starts.title_word_starts_);
116 WordStarts one_word_no_offset(1, 0u);
117 return scored_match.GetTopicalityScore(1, url, one_word_no_offset,
118 word_starts);
121 TEST_F(ScoredHistoryMatchTest, Scoring) {
122 // We use NowFromSystemTime() because MakeURLRow uses the same function
123 // to calculate last visit time when building a row.
124 base::Time now = base::Time::NowFromSystemTime();
126 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1));
127 RowWordStarts word_starts_a;
128 PopulateWordStarts(row_a, &word_starts_a);
129 WordStarts one_word_no_offset(1, 0u);
130 VisitInfoVector visits_a = CreateVisitInfoVector(3, 30, now);
131 // Mark one visit as typed.
132 visits_a[0].second = ui::PAGE_TRANSITION_TYPED;
133 ScoredHistoryMatch scored_a(row_a, visits_a, std::string(),
134 ASCIIToUTF16("abc"), Make1Term("abc"),
135 one_word_no_offset, word_starts_a, false, now);
137 // Test scores based on visit_count.
138 history::URLRow row_b(MakeURLRow("http://abcdef", "abcd bcd", 10, 30, 1));
139 RowWordStarts word_starts_b;
140 PopulateWordStarts(row_b, &word_starts_b);
141 VisitInfoVector visits_b = CreateVisitInfoVector(10, 30, now);
142 visits_b[0].second = ui::PAGE_TRANSITION_TYPED;
143 ScoredHistoryMatch scored_b(row_b, visits_b, std::string(),
144 ASCIIToUTF16("abc"), Make1Term("abc"),
145 one_word_no_offset, word_starts_b, false, now);
146 EXPECT_GT(scored_b.raw_score, scored_a.raw_score);
148 // Test scores based on last_visit.
149 history::URLRow row_c(MakeURLRow("http://abcdef", "abcd bcd", 3, 10, 1));
150 RowWordStarts word_starts_c;
151 PopulateWordStarts(row_c, &word_starts_c);
152 VisitInfoVector visits_c = CreateVisitInfoVector(3, 10, now);
153 visits_c[0].second = ui::PAGE_TRANSITION_TYPED;
154 ScoredHistoryMatch scored_c(row_c, visits_c, std::string(),
155 ASCIIToUTF16("abc"), Make1Term("abc"),
156 one_word_no_offset, word_starts_c, false, now);
157 EXPECT_GT(scored_c.raw_score, scored_a.raw_score);
159 // Test scores based on typed_count.
160 history::URLRow row_d(MakeURLRow("http://abcdef", "abcd bcd", 3, 30, 3));
161 RowWordStarts word_starts_d;
162 PopulateWordStarts(row_d, &word_starts_d);
163 VisitInfoVector visits_d = CreateVisitInfoVector(3, 30, now);
164 visits_d[0].second = ui::PAGE_TRANSITION_TYPED;
165 visits_d[1].second = ui::PAGE_TRANSITION_TYPED;
166 visits_d[2].second = ui::PAGE_TRANSITION_TYPED;
167 ScoredHistoryMatch scored_d(row_d, visits_d, std::string(),
168 ASCIIToUTF16("abc"), Make1Term("abc"),
169 one_word_no_offset, word_starts_d, false, now);
170 EXPECT_GT(scored_d.raw_score, scored_a.raw_score);
172 // Test scores based on a terms appearing multiple times.
173 history::URLRow row_e(MakeURLRow(
174 "http://csi.csi.csi/csi_csi",
175 "CSI Guide to CSI Las Vegas, CSI New York, CSI Provo", 3, 30, 3));
176 RowWordStarts word_starts_e;
177 PopulateWordStarts(row_e, &word_starts_e);
178 const VisitInfoVector visits_e = visits_d;
179 ScoredHistoryMatch scored_e(row_e, visits_e, std::string(),
180 ASCIIToUTF16("csi"), Make1Term("csi"),
181 one_word_no_offset, word_starts_e, false, now);
182 EXPECT_LT(scored_e.raw_score, 1400);
184 // Test that a result with only a mid-term match (i.e., not at a word
185 // boundary) scores 0.
186 ScoredHistoryMatch scored_f(row_a, visits_a, std::string(),
187 ASCIIToUTF16("cd"), Make1Term("cd"),
188 one_word_no_offset, word_starts_a, false, now);
189 EXPECT_EQ(scored_f.raw_score, 0);
192 TEST_F(ScoredHistoryMatchTest, ScoringBookmarks) {
193 // We use NowFromSystemTime() because MakeURLRow uses the same function
194 // to calculate last visit time when building a row.
195 base::Time now = base::Time::NowFromSystemTime();
197 std::string url_string("http://fedcba");
198 const GURL url(url_string);
199 history::URLRow row(MakeURLRow(url_string.c_str(), "abcd bcd", 8, 3, 1));
200 RowWordStarts word_starts;
201 PopulateWordStarts(row, &word_starts);
202 WordStarts one_word_no_offset(1, 0u);
203 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
204 ScoredHistoryMatch scored(row, visits, std::string(), ASCIIToUTF16("abc"),
205 Make1Term("abc"), one_word_no_offset, word_starts,
206 false, now);
207 // Now check that if URL is bookmarked then its score increases.
208 base::AutoReset<int> reset(&ScoredHistoryMatch::bookmark_value_, 5);
209 ScoredHistoryMatch scored_with_bookmark(
210 row, visits, std::string(), ASCIIToUTF16("abc"), Make1Term("abc"),
211 one_word_no_offset, word_starts, true, now);
212 EXPECT_GT(scored_with_bookmark.raw_score, scored.raw_score);
215 TEST_F(ScoredHistoryMatchTest, ScoringTLD) {
216 // We use NowFromSystemTime() because MakeURLRow uses the same function
217 // to calculate last visit time when building a row.
218 base::Time now = base::Time::NowFromSystemTime();
220 // By default the URL should not be returned for a query that includes "com".
221 std::string url_string("http://fedcba.com/");
222 const GURL url(url_string);
223 history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1));
224 RowWordStarts word_starts;
225 PopulateWordStarts(row, &word_starts);
226 WordStarts two_words_no_offsets(2, 0u);
227 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
228 ScoredHistoryMatch scored(row, visits, std::string(), ASCIIToUTF16("fed com"),
229 Make2Terms("fed", "com"), two_words_no_offsets,
230 word_starts, false, now);
231 EXPECT_EQ(0, scored.raw_score);
233 // Now allow credit for the match in the TLD.
234 base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_tld_matches_, true);
235 ScoredHistoryMatch scored_with_tld(
236 row, visits, std::string(), ASCIIToUTF16("fed com"),
237 Make2Terms("fed", "com"), two_words_no_offsets, word_starts, false, now);
238 EXPECT_GT(scored_with_tld.raw_score, 0);
241 TEST_F(ScoredHistoryMatchTest, ScoringScheme) {
242 // We use NowFromSystemTime() because MakeURLRow uses the same function
243 // to calculate last visit time when building a row.
244 base::Time now = base::Time::NowFromSystemTime();
246 // By default the URL should not be returned for a query that includes "http".
247 std::string url_string("http://fedcba/");
248 const GURL url(url_string);
249 history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1));
250 RowWordStarts word_starts;
251 PopulateWordStarts(row, &word_starts);
252 WordStarts two_words_no_offsets(2, 0u);
253 VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
254 ScoredHistoryMatch scored(row, visits, std::string(),
255 ASCIIToUTF16("fed http"), Make2Terms("fed", "http"),
256 two_words_no_offsets, word_starts, false, now);
257 EXPECT_EQ(0, scored.raw_score);
259 // Now allow credit for the match in the scheme.
260 base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_scheme_matches_, true);
261 ScoredHistoryMatch scored_with_scheme(
262 row, visits, std::string(), ASCIIToUTF16("fed http"),
263 Make2Terms("fed", "http"), two_words_no_offsets, word_starts, false, now);
264 EXPECT_GT(scored_with_scheme.raw_score, 0);
267 TEST_F(ScoredHistoryMatchTest, Inlining) {
268 // We use NowFromSystemTime() because MakeURLRow uses the same function
269 // to calculate last visit time when building a row.
270 base::Time now = base::Time::NowFromSystemTime();
271 RowWordStarts word_starts;
272 WordStarts one_word_no_offset(1, 0u);
273 VisitInfoVector visits;
276 history::URLRow row(
277 MakeURLRow("http://www.google.com", "abcdef", 3, 30, 1));
278 PopulateWordStarts(row, &word_starts);
279 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("g"),
280 Make1Term("g"), one_word_no_offset, word_starts,
281 false, now);
282 EXPECT_TRUE(scored_a.can_inline);
283 EXPECT_FALSE(scored_a.match_in_scheme);
284 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("w"),
285 Make1Term("w"), one_word_no_offset, word_starts,
286 false, now);
287 EXPECT_TRUE(scored_b.can_inline);
288 EXPECT_FALSE(scored_b.match_in_scheme);
289 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("h"),
290 Make1Term("h"), one_word_no_offset, word_starts,
291 false, now);
292 EXPECT_TRUE(scored_c.can_inline);
293 EXPECT_TRUE(scored_c.match_in_scheme);
294 ScoredHistoryMatch scored_d(row, visits, std::string(), ASCIIToUTF16("o"),
295 Make1Term("o"), one_word_no_offset, word_starts,
296 false, now);
297 EXPECT_FALSE(scored_d.can_inline);
298 EXPECT_FALSE(scored_d.match_in_scheme);
302 history::URLRow row(MakeURLRow("http://teams.foo.com", "abcdef", 3, 30, 1));
303 PopulateWordStarts(row, &word_starts);
304 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("t"),
305 Make1Term("t"), one_word_no_offset, word_starts,
306 false, now);
307 EXPECT_TRUE(scored_a.can_inline);
308 EXPECT_FALSE(scored_a.match_in_scheme);
309 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("f"),
310 Make1Term("f"), one_word_no_offset, word_starts,
311 false, now);
312 EXPECT_FALSE(scored_b.can_inline);
313 EXPECT_FALSE(scored_b.match_in_scheme);
314 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("o"),
315 Make1Term("o"), one_word_no_offset, word_starts,
316 false, now);
317 EXPECT_FALSE(scored_c.can_inline);
318 EXPECT_FALSE(scored_c.match_in_scheme);
322 history::URLRow row(
323 MakeURLRow("https://www.testing.com", "abcdef", 3, 30, 1));
324 PopulateWordStarts(row, &word_starts);
325 ScoredHistoryMatch scored_a(row, visits, std::string(), ASCIIToUTF16("t"),
326 Make1Term("t"), one_word_no_offset, word_starts,
327 false, now);
328 EXPECT_TRUE(scored_a.can_inline);
329 EXPECT_FALSE(scored_a.match_in_scheme);
330 ScoredHistoryMatch scored_b(row, visits, std::string(), ASCIIToUTF16("h"),
331 Make1Term("h"), one_word_no_offset, word_starts,
332 false, now);
333 EXPECT_TRUE(scored_b.can_inline);
334 EXPECT_TRUE(scored_b.match_in_scheme);
335 ScoredHistoryMatch scored_c(row, visits, std::string(), ASCIIToUTF16("w"),
336 Make1Term("w"), one_word_no_offset, word_starts,
337 false, now);
338 EXPECT_TRUE(scored_c.can_inline);
339 EXPECT_FALSE(scored_c.match_in_scheme);
343 TEST_F(ScoredHistoryMatchTest, GetTopicalityScoreTrailingSlash) {
344 const float hostname = GetTopicalityScoreOfTermAgainstURLAndTitle(
345 ASCIIToUTF16("def"), ASCIIToUTF16("http://abc.def.com/"),
346 ASCIIToUTF16("Non-Matching Title"));
347 const float hostname_no_slash = GetTopicalityScoreOfTermAgainstURLAndTitle(
348 ASCIIToUTF16("def"), ASCIIToUTF16("http://abc.def.com"),
349 ASCIIToUTF16("Non-Matching Title"));
350 EXPECT_EQ(hostname_no_slash, hostname);
353 // This function only tests scoring of single terms that match exactly
354 // once somewhere in the URL or title.
355 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) {
356 base::string16 url = ASCIIToUTF16(
357 "http://abc.def.com/path1/path2?"
358 "arg1=val1&arg2=val2#hash_component");
359 base::string16 title = ASCIIToUTF16("here is a title");
360 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
361 ASCIIToUTF16("abc"), url, title);
362 const float hostname_mid_word_score =
363 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url,
364 title);
365 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
366 ASCIIToUTF16("def"), url, title);
367 const float domain_name_mid_word_score =
368 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url,
369 title);
370 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
371 ASCIIToUTF16("com"), url, title);
372 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
373 ASCIIToUTF16("om"), url, title);
374 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
375 ASCIIToUTF16("path1"), url, title);
376 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
377 ASCIIToUTF16("ath1"), url, title);
378 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
379 ASCIIToUTF16("arg2"), url, title);
380 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
381 ASCIIToUTF16("rg2"), url, title);
382 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
383 ASCIIToUTF16("htt"), url, title);
384 const float protocol_mid_word_score =
385 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url,
386 title);
387 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
388 ASCIIToUTF16("her"), url, title);
389 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
390 ASCIIToUTF16("er"), url, title);
391 // Verify hostname and domain name > path > arg.
392 EXPECT_GT(hostname_score, path_score);
393 EXPECT_GT(domain_name_score, path_score);
394 EXPECT_GT(path_score, arg_score);
395 // Verify that domain name > path and domain name > arg for non-word
396 // boundaries.
397 EXPECT_GT(hostname_mid_word_score, path_mid_word_score);
398 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score);
399 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score);
400 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score);
401 // Also verify that the matches at non-word-boundaries all score
402 // worse than the matches at word boundaries. These three sets suffice.
403 EXPECT_GT(arg_score, hostname_mid_word_score);
404 EXPECT_GT(arg_score, domain_name_mid_word_score);
405 EXPECT_GT(title_score, title_mid_word_score);
406 // Check that title matches fit somewhere reasonable compared to the
407 // various types of URL matches.
408 EXPECT_GT(title_score, arg_score);
409 EXPECT_GT(arg_score, title_mid_word_score);
410 // Finally, verify that protocol matches and top level domain name
411 // matches (.com, .net, etc.) score worse than some of the mid-word
412 // matches that actually count.
413 EXPECT_GT(hostname_mid_word_score, protocol_score);
414 EXPECT_GT(hostname_mid_word_score, protocol_mid_word_score);
415 EXPECT_GT(hostname_mid_word_score, tld_score);
416 EXPECT_GT(hostname_mid_word_score, tld_mid_word_score);
419 // Test the function GetFinalRelevancyScore().
420 TEST_F(ScoredHistoryMatchTest, GetFinalRelevancyScore) {
421 // hqp_relevance_buckets = "0.0:100,1.0:200,4.0:500,8.0:900,10.0:1000";
422 std::vector<ScoredHistoryMatch::ScoreMaxRelevance> hqp_buckets;
423 hqp_buckets.push_back(std::make_pair(0.0, 100));
424 hqp_buckets.push_back(std::make_pair(1.0, 200));
425 hqp_buckets.push_back(std::make_pair(4.0, 500));
426 hqp_buckets.push_back(std::make_pair(8.0, 900));
427 hqp_buckets.push_back(std::make_pair(10.0, 1000));
428 // Check when topicality score is zero.
429 float topicality_score = 0.0;
430 float frequency_score = 10.0;
431 // intermediate_score = 0.0 * 10.0 = 0.0.
432 EXPECT_EQ(0, ScoredHistoryMatch::GetFinalRelevancyScore(
433 topicality_score, frequency_score, hqp_buckets));
435 // Check when intermediate score falls at the border range.
436 topicality_score = 0.4f;
437 frequency_score = 10.0f;
438 // intermediate_score = 0.5 * 10.0 = 4.0.
439 EXPECT_EQ(500, ScoredHistoryMatch::GetFinalRelevancyScore(
440 topicality_score, frequency_score, hqp_buckets));
442 // Checking the score that falls into one of the buckets.
443 topicality_score = 0.5f;
444 frequency_score = 10.0f;
445 // intermediate_score = 0.5 * 10.0 = 5.0.
446 EXPECT_EQ(600, // 500 + (((900 - 500)/(8 -4)) * 1) = 600.
447 ScoredHistoryMatch::GetFinalRelevancyScore(
448 topicality_score, frequency_score, hqp_buckets));
450 // Never give the score greater than maximum specified.
451 topicality_score = 0.5f;
452 frequency_score = 22.0f;
453 // intermediate_score = 0.5 * 22.0 = 11.0
454 EXPECT_EQ(1000, ScoredHistoryMatch::GetFinalRelevancyScore(
455 topicality_score, frequency_score, hqp_buckets));
458 // Test the function GetHQPBucketsFromString().
459 TEST_F(ScoredHistoryMatchTest, GetHQPBucketsFromString) {
460 std::string buckets_str = "0.0:400,1.5:600,12.0:1300,20.0:1399";
461 std::vector<ScoredHistoryMatch::ScoreMaxRelevance> hqp_buckets;
463 EXPECT_TRUE(
464 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));
465 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600),
466 Pair(12.0, 1300), Pair(20.0, 1399)));
467 // invalid string.
468 buckets_str = "0.0,400,1.5,600";
469 EXPECT_FALSE(
470 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));