From ff34ad59ce2dd38e33ff85ed64a3b14f4a324a31 Mon Sep 17 00:00:00 2001 From: "bartn@chromium.org" Date: Thu, 26 Dec 2013 22:41:34 +0000 Subject: [PATCH] HUP Experimental Scoring framework. High level summary: (a) Introduce a new set of Finch variation params and use it in the Omnibox bundled experiment (b) Group together HUP scoring params in a HUPScoringParams struct and initialize it based on the new experiment params (c) Modify HUP scoring by applying optional demotion This change is fairly safe because of the following reasons: (1) It is disabled by default (2) It never changes the relative order of HUP matches (3) It can only demote a HUP match (4) It is fairly isolated and unobtrusive BUG=295756 TESTS=OmniboxFieldTrialTest,HistoryURLProviderTest Review URL: https://codereview.chromium.org/23707058 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@242569 0039d316-1c4b-4281-b951-d872f2087c98 --- .../browser/autocomplete/history_url_provider.cc | 74 ++++++- chrome/browser/autocomplete/history_url_provider.h | 18 ++ .../autocomplete/history_url_provider_unittest.cc | 232 +++++++++++++++------ chrome/browser/omnibox/omnibox_field_trial.cc | 108 +++++++++- chrome/browser/omnibox/omnibox_field_trial.h | 85 ++++++++ .../omnibox/omnibox_field_trial_unittest.cc | 58 ++++++ 6 files changed, 511 insertions(+), 64 deletions(-) diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 7c9d4797f880..037904d91794 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -14,8 +14,10 @@ #include "base/prefs/pref_service.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" +#include "base/time/time.h" #include "chrome/browser/autocomplete/autocomplete_match.h" #include "chrome/browser/autocomplete/autocomplete_provider_listener.h" +#include "chrome/browser/autocomplete/autocomplete_result.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_service.h" @@ -187,6 +189,37 @@ void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, match->RecordAdditionalInfo("last visit", info.last_visit()); } +// Calculates a new relevance score applying half-life time decaying to |count| +// using |time_since_last_visit| and |score_buckets|. +// This function will never return a score higher than |undecayed_relevance|. +// In other words, it can only demote the old score. +double CalculateRelevanceUsingScoreBuckets( + const HUPScoringParams::ScoreBuckets& score_buckets, + const base::TimeDelta& time_since_last_visit, + int undecayed_relevance, + int count) { + // Back off if above relevance cap. + if ((score_buckets.relevance_cap() != -1) && + (undecayed_relevance >= score_buckets.relevance_cap())) + return undecayed_relevance; + + // Time based decay using half-life time. + double decayed_count = count; + if (decayed_count > 0) + decayed_count *= score_buckets.HalfLifeTimeDecay(time_since_last_visit); + + // Find a threshold where decayed_count >= bucket. + const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket = NULL; + for (size_t i = 0; i < score_buckets.buckets().size(); ++i) { + score_bucket = &score_buckets.buckets()[i]; + if (decayed_count >= score_bucket->first) + break; // Buckets are in descending order, so we can ignore the rest. + } + + return (score_bucket && (undecayed_relevance > score_bucket->second)) ? + score_bucket->second : undecayed_relevance; +} + } // namespace // ----------------------------------------------------------------- @@ -372,6 +405,8 @@ HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, !OmniboxFieldTrial:: InHUPCreateShorterMatchFieldTrialExperimentGroup()), search_url_database_(true) { + // Initialize HUP scoring params based on the current experiment. + OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_); } void HistoryURLProvider::Start(const AutocompleteInput& input, @@ -606,6 +641,11 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, CalculateRelevance(NORMAL, history_matches.size() - 1 - i); AutocompleteMatch ac_match = HistoryMatchToACMatch(*params, match, NORMAL, relevance); + // The experimental scoring must not change the top result's score. + if (!params->matches.empty()) { + relevance = CalculateRelevanceScoreUsingScoringParams(match, relevance); + ac_match.relevance = relevance; + } params->matches.push_back(ac_match); } } @@ -877,8 +917,10 @@ bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( // future pass from suggesting the exact input as a better match. if (params) { params->dont_suggest_exact_input = true; - params->matches.push_back(HistoryMatchToACMatch(*params, match, - INLINE_AUTOCOMPLETE, CalculateRelevance(INLINE_AUTOCOMPLETE, 0))); + AutocompleteMatch ac_match = HistoryMatchToACMatch( + *params, match, INLINE_AUTOCOMPLETE, + CalculateRelevance(INLINE_AUTOCOMPLETE, 0)); + params->matches.push_back(ac_match); } return true; } @@ -1095,11 +1137,37 @@ AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( return match; } +int HistoryURLProvider::CalculateRelevanceScoreUsingScoringParams( + const history::HistoryMatch& match, + int old_relevance) const { + if (!scoring_params_.experimental_scoring_enabled) + return old_relevance; + + const base::TimeDelta time_since_last_visit = + base::Time::Now() - match.url_info.last_visit(); + + int relevance = CalculateRelevanceUsingScoreBuckets( + scoring_params_.typed_count_buckets, time_since_last_visit, old_relevance, + match.url_info.typed_count()); + + // Additional demotion (on top of typed_count demotion) of URLs that were + // never typed. + if (match.url_info.typed_count() == 0) { + relevance = CalculateRelevanceUsingScoreBuckets( + scoring_params_.visited_count_buckets, time_since_last_visit, relevance, + match.url_info.visit_count()); + } + + DCHECK_LE(relevance, old_relevance); + return relevance; +} + // static ACMatchClassifications HistoryURLProvider::ClassifyDescription( const base::string16& input_text, const base::string16& description) { - base::string16 clean_description = history::CleanUpTitleForMatching(description); + base::string16 clean_description = history::CleanUpTitleForMatching( + description); history::TermMatches description_matches(SortAndDeoverlapMatches( history::MatchTermInString(input_text, clean_description, 0))); history::WordStarts description_word_starts; diff --git a/chrome/browser/autocomplete/history_url_provider.h b/chrome/browser/autocomplete/history_url_provider.h index 3926e0327be4..aec0bb97bd85 100644 --- a/chrome/browser/autocomplete/history_url_provider.h +++ b/chrome/browser/autocomplete/history_url_provider.h @@ -14,6 +14,7 @@ #include "chrome/browser/autocomplete/history_provider.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/autocomplete/url_prefix.h" +#include "chrome/browser/omnibox/omnibox_field_trial.h" #include "chrome/browser/search_engines/search_terms_data.h" #include "chrome/browser/search_engines/template_url.h" @@ -197,6 +198,8 @@ class HistoryURLProvider : public HistoryProvider { void QueryComplete(HistoryURLProviderParams* params_gets_deleted); private: + FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, HUPScoringExperiment); + enum MatchType { NORMAL, WHAT_YOU_TYPED, @@ -285,6 +288,8 @@ class HistoryURLProvider : public HistoryProvider { const std::vector& remove) const; // Converts a line from the database into an autocomplete match for display. + // If experimental scoring is enabled, the final relevance score might be + // different from the given |relevance|. AutocompleteMatch HistoryMatchToACMatch( const HistoryURLProviderParams& params, const history::HistoryMatch& history_match, @@ -297,12 +302,25 @@ class HistoryURLProvider : public HistoryProvider { const base::string16& input_text, const base::string16& description); + // Returns a new relevance score for the given |match| based on the + // |old_relevance| score and |scoring_params_|. The new relevance score is + // guaranteed to be less than or equal to |old_relevance|. In other words, + // this function can only demote a score, never boost it. + // Returns |old_relevance| score if experimental scoring is disabled + // or if the |match.promoted| is true. + int CalculateRelevanceScoreUsingScoringParams( + const history::HistoryMatch& match, + int old_relevance) const; + // Params for the current query. The provider should not free this directly; // instead, it is passed as a parameter through the history backend, and the // parameter itself is freed once it's no longer needed. The only reason we // keep this member is so we can set the cancel bit on it. HistoryURLProviderParams* params_; + // Params controlling experimental behavior of this provider. + HUPScoringParams scoring_params_; + // If true, HistoryURL provider should lookup and cull redirects. If // false, it returns matches that may be redirects to each other and // simply hopes the default AutoCompleteController behavior to remove diff --git a/chrome/browser/autocomplete/history_url_provider_unittest.cc b/chrome/browser/autocomplete/history_url_provider_unittest.cc index 50e745eaee0d..a5f63b389032 100644 --- a/chrome/browser/autocomplete/history_url_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_url_provider_unittest.cc @@ -39,97 +39,107 @@ struct TestURLInfo { const char* title; int visit_count; int typed_count; + int age_in_days; } test_db[] = { - {"http://www.google.com/", "Google", 3, 3}, + {"http://www.google.com/", "Google", 3, 3, 80}, // High-quality pages should get a host synthesized as a lower-quality match. - {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100}, + {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100, 80}, // Less popular pages should have hosts synthesized as higher-quality // matches. - {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0}, + {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0, 80}, // Unpopular pages should not appear in the results at all. - {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0}, + {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0, 80}, // If a host has a match, we should pick it up during host synthesis. - {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2}, - {"http://news.google.com/", "Google News", 1, 1}, + {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 80}, + {"http://news.google.com/", "Google News", 1, 1, 80}, // Matches that are normally not inline-autocompletable should be // autocompleted if they are shorter substitutes for longer matches that would // have been inline autocompleted. - {"http://synthesisatest.com/foo/", "Test A", 1, 1}, - {"http://synthesisbtest.com/foo/", "Test B", 1, 1}, - {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2}, + {"http://synthesisatest.com/foo/", "Test A", 1, 1, 80}, + {"http://synthesisbtest.com/foo/", "Test B", 1, 1, 80}, + {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2, 80}, // Suggested short URLs must be "good enough" and must match user input. - {"http://foo.com/", "Dir", 5, 5}, - {"http://foo.com/dir/", "Dir", 2, 2}, - {"http://foo.com/dir/another/", "Dir", 5, 1}, - {"http://foo.com/dir/another/again/", "Dir", 10, 0}, - {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2}, + {"http://foo.com/", "Dir", 5, 5, 80}, + {"http://foo.com/dir/", "Dir", 2, 2, 80}, + {"http://foo.com/dir/another/", "Dir", 5, 1, 80}, + {"http://foo.com/dir/another/again/", "Dir", 10, 0, 80}, + {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 80}, // We throw in a lot of extra URLs here to make sure we're testing the // history database's query, not just the autocomplete provider. - {"http://startest.com/y/a", "A", 2, 2}, - {"http://startest.com/y/b", "B", 5, 2}, - {"http://startest.com/x/c", "C", 5, 2}, - {"http://startest.com/x/d", "D", 5, 5}, - {"http://startest.com/y/e", "E", 4, 2}, - {"http://startest.com/y/f", "F", 3, 2}, - {"http://startest.com/y/g", "G", 3, 2}, - {"http://startest.com/y/h", "H", 3, 2}, - {"http://startest.com/y/i", "I", 3, 2}, - {"http://startest.com/y/j", "J", 3, 2}, - {"http://startest.com/y/k", "K", 3, 2}, - {"http://startest.com/y/l", "L", 3, 2}, - {"http://startest.com/y/m", "M", 3, 2}, + {"http://startest.com/y/a", "A", 2, 2, 80}, + {"http://startest.com/y/b", "B", 5, 2, 80}, + {"http://startest.com/x/c", "C", 5, 2, 80}, + {"http://startest.com/x/d", "D", 5, 5, 80}, + {"http://startest.com/y/e", "E", 4, 2, 80}, + {"http://startest.com/y/f", "F", 3, 2, 80}, + {"http://startest.com/y/g", "G", 3, 2, 80}, + {"http://startest.com/y/h", "H", 3, 2, 80}, + {"http://startest.com/y/i", "I", 3, 2, 80}, + {"http://startest.com/y/j", "J", 3, 2, 80}, + {"http://startest.com/y/k", "K", 3, 2, 80}, + {"http://startest.com/y/l", "L", 3, 2, 80}, + {"http://startest.com/y/m", "M", 3, 2, 80}, // A file: URL is useful for testing that fixup does the right thing w.r.t. // the number of trailing slashes on the user's input. - {"file:///C:/foo.txt", "", 2, 2}, + {"file:///C:/foo.txt", "", 2, 2, 80}, // Results with absurdly high typed_counts so that very generic queries like // "http" will give consistent results even if more data is added above. - {"http://bogussite.com/a", "Bogus A", 10002, 10000}, - {"http://bogussite.com/b", "Bogus B", 10001, 10000}, - {"http://bogussite.com/c", "Bogus C", 10000, 10000}, + {"http://bogussite.com/a", "Bogus A", 10002, 10000, 80}, + {"http://bogussite.com/b", "Bogus B", 10001, 10000, 80}, + {"http://bogussite.com/c", "Bogus C", 10000, 10000, 80}, // Domain name with number. - {"http://www.17173.com/", "Domain with number", 3, 3}, + {"http://www.17173.com/", "Domain with number", 3, 3, 80}, // URLs to test exact-matching behavior. - {"http://go/", "Intranet URL", 1, 1}, - {"http://gooey/", "Intranet URL 2", 5, 5}, + {"http://go/", "Intranet URL", 1, 1, 80}, + {"http://gooey/", "Intranet URL 2", 5, 5, 80}, // URLs for testing offset adjustment. - {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2}, - {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2}, - {"http://ms/c++%20style%20guide", "Style guide", 2, 2}, + {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2, 80}, + {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 80}, + {"http://ms/c++%20style%20guide", "Style guide", 2, 2, 80}, // URLs for testing ctrl-enter behavior. - {"http://binky/", "Intranet binky", 2, 2}, - {"http://winky/", "Intranet winky", 2, 2}, - {"http://www.winky.com/", "Internet winky", 5, 0}, + {"http://binky/", "Intranet binky", 2, 2, 80}, + {"http://winky/", "Intranet winky", 2, 2, 80}, + {"http://www.winky.com/", "Internet winky", 5, 0, 80}, // URLs used by EmptyVisits. - {"http://pandora.com/", "Pandora", 2, 2}, + {"http://pandora.com/", "Pandora", 2, 2, 80}, // This entry is explicitly added more recently than // history::kLowQualityMatchAgeLimitInDays. - // {"http://p/", "p", 0, 0}, + // {"http://p/", "p", 0, 0, 80}, // For intranet based tests. - {"http://intra/one", "Intranet", 2, 2}, - {"http://intra/two", "Intranet two", 1, 1}, - {"http://intra/three", "Intranet three", 2, 2}, - {"http://moo/bar", "Intranet moo", 1, 1}, - {"http://typedhost/typedpath", "Intranet typed", 1, 1}, - {"http://typedhost/untypedpath", "Intranet untyped", 1, 0}, - - {"http://x.com/one", "Internet", 2, 2}, - {"http://x.com/two", "Internet two", 1, 1}, - {"http://x.com/three", "Internet three", 2, 2}, + {"http://intra/one", "Intranet", 2, 2, 80}, + {"http://intra/two", "Intranet two", 1, 1, 80}, + {"http://intra/three", "Intranet three", 2, 2, 80}, + {"http://moo/bar", "Intranet moo", 1, 1, 80}, + {"http://typedhost/typedpath", "Intranet typed", 1, 1, 80}, + {"http://typedhost/untypedpath", "Intranet untyped", 1, 0, 80}, + + {"http://x.com/one", "Internet", 2, 2, 80}, + {"http://x.com/two", "Internet two", 1, 1, 80}, + {"http://x.com/three", "Internet three", 2, 2, 80}, + + // For experimental HUP scoring test. + {"http://7.com/1a", "One", 8, 4, 4}, + {"http://7.com/2a", "Two A", 4, 2, 8}, + {"http://7.com/2b", "Two B", 4, 1, 8}, + {"http://7.com/3a", "Three", 2, 1, 16}, + {"http://7.com/4a", "Four A", 1, 1, 32}, + {"http://7.com/4b", "Four B", 1, 1, 64}, + {"http://7.com/5a", "Five A", 8, 0, 64}, // never typed. }; class HistoryURLProviderTest : public testing::Test, @@ -236,23 +246,22 @@ void HistoryURLProviderTest::TearDown() { } void HistoryURLProviderTest::FillData() { - // All visits are a long time ago (some tests require this since we do some + // Most visits are a long time ago (some tests require this since we do some // special logic for things visited very recently). Note that this time must // be more recent than the "archived history" threshold for the data to go // into the main database. // // TODO(brettw) It would be nice if we could test this behavior, in which // case the time would be specifed in the test_db structure. - Time visit_time = Time::Now() - TimeDelta::FromDays(80); + const Time now = Time::Now(); for (size_t i = 0; i < arraysize(test_db); ++i) { const TestURLInfo& cur = test_db[i]; const GURL current_url(cur.url); - history_service_->AddPageWithDetails(current_url, - base::UTF8ToUTF16(cur.title), - cur.visit_count, cur.typed_count, - visit_time, false, - history::SOURCE_BROWSED); + history_service_->AddPageWithDetails( + current_url, base::UTF8ToUTF16(cur.title), cur.visit_count, + cur.typed_count, now - TimeDelta::FromDays(cur.age_in_days), false, + history::SOURCE_BROWSED); } history_service_->AddPageWithDetails( @@ -909,3 +918,108 @@ TEST_F(HistoryURLProviderTest, SuggestExactInput) { EXPECT_EQ(npos, test_cases[i].offsets[match.contents_class.size()]); } } + +TEST_F(HistoryURLProviderTest, HUPScoringExperiment) { + HUPScoringParams max_2000_no_time_decay; + max_2000_no_time_decay.typed_count_buckets.buckets().push_back( + std::make_pair(0.0, 2000)); + HUPScoringParams max_1250_no_time_decay; + max_1250_no_time_decay.typed_count_buckets.buckets().push_back( + std::make_pair(0.0, 1250)); + HUPScoringParams max_1000_no_time_decay; + max_1000_no_time_decay.typed_count_buckets.buckets().push_back( + std::make_pair(0.0, 1000)); + + HUPScoringParams max_1100_with_time_decay_and_max_cap; + max_1100_with_time_decay_and_max_cap.typed_count_buckets. + set_relevance_cap(1400); + max_1100_with_time_decay_and_max_cap.typed_count_buckets. + set_half_life_days(16); + max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( + std::make_pair(0.5, 1100)); + max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( + std::make_pair(0.24, 200)); + max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( + std::make_pair(0.0, 100)); + + HUPScoringParams max_1100_visit_typed_decays; + max_1100_visit_typed_decays.typed_count_buckets.set_half_life_days(16); + max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back( + std::make_pair(0.5, 1100)); + max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back( + std::make_pair(0.0, 100)); + max_1100_visit_typed_decays.visited_count_buckets.set_half_life_days(16); + max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back( + std::make_pair(0.5, 550)); + max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back( + std::make_pair(0.0, 50)); + + const int kMaxMatches = 3; + struct TestCase { + const char* input; + HUPScoringParams scoring_params; + struct ExpectedMatch { + const char* url; + int control_relevance; + int experiment_relevance; + }; + ExpectedMatch matches[kMaxMatches]; + } test_cases[] = { + // Max score 2000 -> no demotion. + { "7.com/1", max_2000_no_time_decay, + {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, + + // Limit score to 1250/1000 and make sure that the top match is unchanged. + { "7.com/1", max_1250_no_time_decay, + {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, + { "7.com/2", max_1250_no_time_decay, + {{"7.com/2a", 1413, 1413}, {"7.com/2b", 1412, 1250}, {NULL, 0, 0}} }, + { "7.com/4", max_1000_no_time_decay, + {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 1000}, + {"7.com/4b", 1201, 999}} }, + + // Max relevance cap is 1400 and half-life is 16 days. + { "7.com/1", max_1100_with_time_decay_and_max_cap, + {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, + { "7.com/4", max_1100_with_time_decay_and_max_cap, + {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 200}, + {"7.com/4b", 1201, 100}} }, + + // Max relevance cap is 1400 and half-life is 16 days for both visit/typed. + { "7.com/5", max_1100_visit_typed_decays, + {{"7.com/5", 1203, 1203}, {"7.com/5a", 1202, 50}, {NULL, 0, 0}} }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { + SCOPED_TRACE(test_cases[i].input); + UrlAndLegalDefault output[kMaxMatches]; + int max_matches; + for (max_matches = 0; max_matches < kMaxMatches; ++max_matches) { + if (test_cases[i].matches[max_matches].url == NULL) + break; + output[max_matches].url = URLFixerUpper::FixupURL( + test_cases[i].matches[max_matches].url, std::string()).spec(); + output[max_matches].allowed_to_be_default_match = true; + } + autocomplete_->scoring_params_ = test_cases[i].scoring_params; + + // Test the control (scoring disabled). + autocomplete_->scoring_params_.experimental_scoring_enabled = false; + ASSERT_NO_FATAL_FAILURE( + RunTest(ASCIIToUTF16(test_cases[i].input), + string16(), false, output, max_matches)); + for (int j = 0; j < max_matches; ++j) { + EXPECT_EQ(test_cases[i].matches[j].control_relevance, + matches_[j].relevance); + } + + // Test the experiment (scoring enabled). + autocomplete_->scoring_params_.experimental_scoring_enabled = true; + ASSERT_NO_FATAL_FAILURE( + RunTest(ASCIIToUTF16(test_cases[i].input), + string16(), false, output, max_matches)); + for (int j = 0; j < max_matches; ++j) { + EXPECT_EQ(test_cases[i].matches[j].experiment_relevance, + matches_[j].relevance); + } + } +} diff --git a/chrome/browser/omnibox/omnibox_field_trial.cc b/chrome/browser/omnibox/omnibox_field_trial.cc index 8b6de0205a08..93ec15b188d9 100644 --- a/chrome/browser/omnibox/omnibox_field_trial.cc +++ b/chrome/browser/omnibox/omnibox_field_trial.cc @@ -4,6 +4,7 @@ #include "chrome/browser/omnibox/omnibox_field_trial.h" +#include #include #include "base/metrics/field_trial.h" @@ -11,6 +12,7 @@ #include "base/strings/string_split.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" +#include "base/time/time.h" #include "chrome/browser/autocomplete/autocomplete_input.h" #include "chrome/browser/search/search.h" #include "chrome/common/metrics/variations/variation_ids.h" @@ -19,6 +21,9 @@ namespace { +typedef std::map VariationParams; +typedef HUPScoringParams::ScoreBuckets ScoreBuckets; + // Field trial names. const char kHUPCullRedirectsFieldTrialName[] = "OmniboxHUPCullRedirects"; const char kHUPCreateShorterMatchFieldTrialName[] = @@ -83,8 +88,66 @@ std::string DynamicFieldTrialName(int id) { return base::StringPrintf("%s%d", kAutocompleteDynamicFieldTrialPrefix, id); } +void InitializeScoreBuckets(const VariationParams& params, + const char* relevance_cap_param, + const char* half_life_param, + const char* score_buckets_param, + ScoreBuckets* score_buckets) { + VariationParams::const_iterator it = params.find(relevance_cap_param); + if (it != params.end()) { + int relevance_cap; + if (base::StringToInt(it->second, &relevance_cap)) + score_buckets->set_relevance_cap(relevance_cap); + } + + it = params.find(half_life_param); + if (it != params.end()) { + int half_life_days; + if (base::StringToInt(it->second, &half_life_days)) + score_buckets->set_half_life_days(half_life_days); + } + + it = params.find(score_buckets_param); + if (it != params.end()) { + // The value of the score bucket is a comma-separated list of + // {DecayedCount + ":" + MaxRelevance}. + base::StringPairs kv_pairs; + if (base::SplitStringIntoKeyValuePairs(it->second, ':', ',', &kv_pairs)) { + for (base::StringPairs::const_iterator it = kv_pairs.begin(); + it != kv_pairs.end(); ++it) { + ScoreBuckets::CountMaxRelevance bucket; + base::StringToDouble(it->first, &bucket.first); + base::StringToInt(it->second, &bucket.second); + score_buckets->buckets().push_back(bucket); + } + std::sort(score_buckets->buckets().begin(), + score_buckets->buckets().end(), + std::greater()); + } + } +} + } // namespace +HUPScoringParams::ScoreBuckets::ScoreBuckets() + : relevance_cap_(-1), + half_life_days_(-1) { +} + +HUPScoringParams::ScoreBuckets::~ScoreBuckets() { +} + +double HUPScoringParams::ScoreBuckets::HalfLifeTimeDecay( + const base::TimeDelta& elapsed_time) const { + double time_ms; + if ((half_life_days_ <= 0) || + ((time_ms = elapsed_time.InMillisecondsF()) <= 0)) + return 1.0; + + const double half_life_intervals = + time_ms / base::TimeDelta::FromDays(half_life_days_).InMillisecondsF(); + return pow(2.0, -half_life_intervals); +} void OmniboxFieldTrial::ActivateStaticTrials() { DCHECK(!static_field_trials_initialized); @@ -305,6 +368,32 @@ bool OmniboxFieldTrial::ReorderForLegalDefaultMatch( kReorderForLegalDefaultMatchRuleEnabled; } +void OmniboxFieldTrial::GetExperimentalHUPScoringParams( + HUPScoringParams* scoring_params) { + scoring_params->experimental_scoring_enabled = false; + + VariationParams params; + if (!chrome_variations::GetVariationParams(kBundledExperimentFieldTrialName, + ¶ms)) + return; + + VariationParams::const_iterator it = params.find(kHUPNewScoringEnabledParam); + if (it != params.end()) { + int enabled = 0; + if (base::StringToInt(it->second, &enabled)) + scoring_params->experimental_scoring_enabled = (enabled != 0); + } + + InitializeScoreBuckets(params, kHUPNewScoringTypedCountRelevanceCapParam, + kHUPNewScoringTypedCountHalfLifeTimeParam, + kHUPNewScoringTypedCountScoreBucketsParam, + &scoring_params->typed_count_buckets); + InitializeScoreBuckets(params, kHUPNewScoringVisitedCountRelevanceCapParam, + kHUPNewScoringVisitedCountHalfLifeTimeParam, + kHUPNewScoringVisitedCountScoreBucketsParam, + &scoring_params->visited_count_buckets); +} + int OmniboxFieldTrial::HQPBookmarkValue() { std::string bookmark_value_str = chrome_variations:: GetVariationParamValue(kBundledExperimentFieldTrialName, @@ -356,6 +445,21 @@ const char OmniboxFieldTrial::kHQPAllowMatchInSchemeRule[] = const char OmniboxFieldTrial::kReorderForLegalDefaultMatchRuleEnabled[] = "ReorderForLegalDefaultMatch"; +const char OmniboxFieldTrial::kHUPNewScoringEnabledParam[] = + "HUPExperimentalScoringEnabled"; +const char OmniboxFieldTrial::kHUPNewScoringTypedCountRelevanceCapParam[] = + "TypedCountRelevanceCap"; +const char OmniboxFieldTrial::kHUPNewScoringTypedCountHalfLifeTimeParam[] = + "TypedCountHalfLifeTime"; +const char OmniboxFieldTrial::kHUPNewScoringTypedCountScoreBucketsParam[] = + "TypedCountScoreBuckets"; +const char OmniboxFieldTrial::kHUPNewScoringVisitedCountRelevanceCapParam[] = + "VisitedCountRelevanceCap"; +const char OmniboxFieldTrial::kHUPNewScoringVisitedCountHalfLifeTimeParam[] = + "VisitedCountHalfLifeTime"; +const char OmniboxFieldTrial::kHUPNewScoringVisitedCountScoreBucketsParam[] = + "VisitedCountScoreBuckets"; + // Background and implementation details: // // Each experiment group in any field trial can come with an optional set of @@ -392,7 +496,7 @@ const char OmniboxFieldTrial::kReorderForLegalDefaultMatchRuleEnabled[] = std::string OmniboxFieldTrial::GetValueForRuleInContext( const std::string& rule, AutocompleteInput::PageClassification page_classification) { - std::map params; + VariationParams params; if (!chrome_variations::GetVariationParams(kBundledExperimentFieldTrialName, ¶ms)) { return std::string(); @@ -402,7 +506,7 @@ std::string OmniboxFieldTrial::GetValueForRuleInContext( const std::string instant_extended = chrome::IsInstantExtendedAPIEnabled() ? "1" : "0"; // Look up rule in this exact context. - std::map::iterator it = params.find( + VariationParams::const_iterator it = params.find( rule + ":" + page_classification_str + ":" + instant_extended); if (it != params.end()) return it->second; diff --git a/chrome/browser/omnibox/omnibox_field_trial.h b/chrome/browser/omnibox/omnibox_field_trial.h index 9e9ee55f45c8..a75eccc9d74f 100644 --- a/chrome/browser/omnibox/omnibox_field_trial.h +++ b/chrome/browser/omnibox/omnibox_field_trial.h @@ -15,6 +15,74 @@ #include "chrome/browser/autocomplete/autocomplete_input.h" #include "chrome/common/autocomplete_match_type.h" +namespace base { +class TimeDelta; +} + +// The set of parameters customizing the HUP scoring. +struct HUPScoringParams { + // A set of parameters describing how to cap a given count score. First, + // we apply a half-life based decay of the given count and then find the + // maximum relevance score in the corresponding bucket list. + class ScoreBuckets { + public: + // (decayed_count, max_relevance) pair. + typedef std::pair CountMaxRelevance; + + ScoreBuckets(); + ~ScoreBuckets(); + + // Computes a half-life time decay given the |elapsed_time|. + double HalfLifeTimeDecay(const base::TimeDelta& elapsed_time) const; + + int relevance_cap() const { return relevance_cap_; } + void set_relevance_cap(int relevance_cap) { + relevance_cap_ = relevance_cap; + } + + int half_life_days() const { return half_life_days_; } + void set_half_life_days(int half_life_days) { + half_life_days_ = half_life_days; + } + + std::vector& buckets() { return buckets_; } + const std::vector& buckets() const { return buckets_; } + + private: + // History matches with relevance score greater or equal to |relevance_cap_| + // are not affected by this experiment. + // Set to -1, if there is no relevance cap in place and all matches are + // subject to demotion. + int relevance_cap_; + + // Half life time for a decayed count as measured since the last visit. + // Set to -1 if not used. + int half_life_days_; + + // The relevance score caps for given decayed count values. + // Each pair (decayed_count, max_score) indicates what the maximum relevance + // score is of a decayed count equal or greater than decayed_count. + // + // Consider this example: + // [(1, 1000), (0.5, 500), (0, 100)] + // If decayed count is 2 (which is >= 1), the corresponding match's maximum + // relevance will be capped at 1000. In case of 0.5, the score is capped + // at 500. Anything below 0.5 is capped at 100. + // + // This list is sorted by the pair's first element in descending order. + std::vector buckets_; + }; + + HUPScoringParams() : experimental_scoring_enabled(false) {} + + bool experimental_scoring_enabled; + + ScoreBuckets typed_count_buckets; + + // Used only when the typed count is 0. + ScoreBuckets visited_count_buckets; +}; + // This class manages the Omnibox field trials. class OmniboxFieldTrial { public: @@ -175,6 +243,14 @@ class OmniboxFieldTrial { AutocompleteInput::PageClassification current_page_classification); // --------------------------------------------------------- + // For the HistoryURL provider new scoring experiment that is part of the + // bundled omnibox field trial. + + // Initializes the HUP |scoring_params| based on the active HUP scoring + // experiment. If there is no such experiment, this function simply sets + // |scoring_params|->experimental_scoring_enabled to false. + static void GetExperimentalHUPScoringParams(HUPScoringParams* scoring_params); + // For the HQPBookmarkValue experiment that's part of the // bundled omnibox field trial. @@ -228,6 +304,15 @@ class OmniboxFieldTrial { // Rule values. static const char kReorderForLegalDefaultMatchRuleEnabled[]; + // Parameter names used by the HUP new scoring experiments. + static const char kHUPNewScoringEnabledParam[]; + static const char kHUPNewScoringTypedCountRelevanceCapParam[]; + static const char kHUPNewScoringTypedCountHalfLifeTimeParam[]; + static const char kHUPNewScoringTypedCountScoreBucketsParam[]; + static const char kHUPNewScoringVisitedCountRelevanceCapParam[]; + static const char kHUPNewScoringVisitedCountHalfLifeTimeParam[]; + static const char kHUPNewScoringVisitedCountScoreBucketsParam[]; + private: friend class OmniboxFieldTrialTest; diff --git a/chrome/browser/omnibox/omnibox_field_trial_unittest.cc b/chrome/browser/omnibox/omnibox_field_trial_unittest.cc index 4eda3542abf0..32e1d80aae66 100644 --- a/chrome/browser/omnibox/omnibox_field_trial_unittest.cc +++ b/chrome/browser/omnibox/omnibox_field_trial_unittest.cc @@ -325,3 +325,61 @@ TEST_F(OmniboxFieldTrialTest, GetValueForRuleInContext) { "rule5", AutocompleteInput::OTHER); // no rule at all } } + +TEST_F(OmniboxFieldTrialTest, HUPNewScoringFieldTrial) { + { + std::map params; + params[std::string(OmniboxFieldTrial::kHUPNewScoringEnabledParam)] = "1"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringTypedCountRelevanceCapParam)] = "56"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringTypedCountHalfLifeTimeParam)] = "77"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringTypedCountScoreBucketsParam)] = + "0.2:25,0.1:1001,2.3:777"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringVisitedCountRelevanceCapParam)] = "11"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringVisitedCountHalfLifeTimeParam)] = "31"; + params[std::string( + OmniboxFieldTrial::kHUPNewScoringVisitedCountScoreBucketsParam)] = + "5:300,0:200"; + ASSERT_TRUE(chrome_variations::AssociateVariationParams( + OmniboxFieldTrial::kBundledExperimentFieldTrialName, "A", params)); + } + base::FieldTrialList::CreateFieldTrial( + OmniboxFieldTrial::kBundledExperimentFieldTrialName, "A"); + + HUPScoringParams scoring_params; + OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params); + EXPECT_TRUE(scoring_params.experimental_scoring_enabled); + EXPECT_EQ(56, scoring_params.typed_count_buckets.relevance_cap()); + EXPECT_EQ(77, scoring_params.typed_count_buckets.half_life_days()); + ASSERT_EQ(3u, scoring_params.typed_count_buckets.buckets().size()); + EXPECT_EQ(std::make_pair(2.3, 777), + scoring_params.typed_count_buckets.buckets()[0]); + EXPECT_EQ(std::make_pair(0.2, 25), + scoring_params.typed_count_buckets.buckets()[1]); + EXPECT_EQ(std::make_pair(0.1, 1001), + scoring_params.typed_count_buckets.buckets()[2]); + EXPECT_EQ(11, scoring_params.visited_count_buckets.relevance_cap()); + EXPECT_EQ(31, scoring_params.visited_count_buckets.half_life_days()); + ASSERT_EQ(2u, scoring_params.visited_count_buckets.buckets().size()); + EXPECT_EQ(std::make_pair(5.0, 300), + scoring_params.visited_count_buckets.buckets()[0]); + EXPECT_EQ(std::make_pair(0.0, 200), + scoring_params.visited_count_buckets.buckets()[1]); +} + +TEST_F(OmniboxFieldTrialTest, HalfLifeTimeDecay) { + HUPScoringParams::ScoreBuckets buckets; + + // No decay by default. + EXPECT_EQ(1.0, buckets.HalfLifeTimeDecay(base::TimeDelta::FromDays(7))); + + buckets.set_half_life_days(7); + EXPECT_EQ(0.5, buckets.HalfLifeTimeDecay(base::TimeDelta::FromDays(7))); + EXPECT_EQ(0.25, buckets.HalfLifeTimeDecay(base::TimeDelta::FromDays(14))); + EXPECT_EQ(1.0, buckets.HalfLifeTimeDecay(base::TimeDelta::FromDays(0))); + EXPECT_EQ(1.0, buckets.HalfLifeTimeDecay(base::TimeDelta::FromDays(-1))); +} -- 2.11.4.GIT