[safe-browsing] Database full hash matches like prefix match.
[chromium-blink-merge.git] / chrome / browser / autocomplete / bookmark_provider.cc
blob75b74e7faf0b597c31ff5c293de6a4dd943ff6cb
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/bookmark_provider.h"
7 #include <algorithm>
8 #include <functional>
9 #include <vector>
11 #include "base/prefs/pref_service.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "chrome/browser/autocomplete/autocomplete_result.h"
14 #include "chrome/browser/autocomplete/history_provider.h"
15 #include "chrome/browser/autocomplete/url_prefix.h"
16 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
17 #include "chrome/browser/omnibox/omnibox_field_trial.h"
18 #include "chrome/browser/profiles/profile.h"
19 #include "chrome/common/pref_names.h"
20 #include "components/bookmarks/core/browser/bookmark_match.h"
21 #include "components/bookmarks/core/browser/bookmark_model.h"
22 #include "net/base/net_util.h"
24 typedef std::vector<BookmarkMatch> BookmarkMatches;
26 // BookmarkProvider ------------------------------------------------------------
28 BookmarkProvider::BookmarkProvider(
29 AutocompleteProviderListener* listener,
30 Profile* profile)
31 : AutocompleteProvider(listener, profile,
32 AutocompleteProvider::TYPE_BOOKMARK),
33 bookmark_model_(NULL),
34 score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) {
35 if (profile) {
36 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
37 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
41 void BookmarkProvider::Start(const AutocompleteInput& input,
42 bool minimal_changes) {
43 if (minimal_changes)
44 return;
45 matches_.clear();
47 if (input.text().empty() ||
48 (input.type() == AutocompleteInput::FORCED_QUERY))
49 return;
51 DoAutocomplete(input);
54 BookmarkProvider::~BookmarkProvider() {}
56 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
57 // We may not have a bookmark model for some unit tests.
58 if (!bookmark_model_)
59 return;
61 BookmarkMatches matches;
62 // Retrieve enough bookmarks so that we have a reasonable probability of
63 // suggesting the one that the user desires.
64 const size_t kMaxBookmarkMatches = 50;
66 // GetBookmarksMatching returns bookmarks matching the user's
67 // search terms using the following rules:
68 // - The search text is broken up into search terms. Each term is searched
69 // for separately.
70 // - Term matches are always performed against the start of a word. 'def'
71 // will match against 'define' but not against 'indefinite'.
72 // - Terms must be at least three characters in length in order to perform
73 // partial word matches. Any term of lesser length will only be used as an
74 // exact match. 'def' will match against 'define' but 'de' will not match.
75 // - A search containing multiple terms will return results with those words
76 // occuring in any order.
77 // - Terms enclosed in quotes comprises a phrase that must match exactly.
78 // - Multiple terms enclosed in quotes will require those exact words in that
79 // exact order to match.
81 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for
82 // complete details of how searches are performed against the user's
83 // bookmarks.
84 bookmark_model_->GetBookmarksMatching(input.text(),
85 kMaxBookmarkMatches,
86 &matches);
87 if (matches.empty())
88 return; // There were no matches.
89 AutocompleteInput fixed_up_input(input);
90 FixupUserInput(&fixed_up_input);
91 for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end();
92 ++i) {
93 // Create and score the AutocompleteMatch. If its score is 0 then the
94 // match is discarded.
95 AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i));
96 if (match.relevance > 0)
97 matches_.push_back(match);
100 // Sort and clip the resulting matches.
101 size_t num_matches =
102 std::min(matches_.size(), AutocompleteProvider::kMaxMatches);
103 std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
104 matches_.end(), AutocompleteMatch::MoreRelevant);
105 matches_.resize(num_matches);
108 namespace {
110 // for_each helper functor that calculates a match factor for each query term
111 // when calculating the final score.
113 // Calculate a 'factor' from 0 to the bookmark's title length for a match
114 // based on 1) how many characters match and 2) where the match is positioned.
115 class ScoringFunctor {
116 public:
117 // |title_length| is the length of the bookmark title against which this
118 // match will be scored.
119 explicit ScoringFunctor(size_t title_length)
120 : title_length_(static_cast<double>(title_length)),
121 scoring_factor_(0.0) {
124 void operator()(const query_parser::Snippet::MatchPosition& match) {
125 double term_length = static_cast<double>(match.second - match.first);
126 scoring_factor_ += term_length *
127 (title_length_ - match.first) / title_length_;
130 double ScoringFactor() { return scoring_factor_; }
132 private:
133 double title_length_;
134 double scoring_factor_;
137 } // namespace
139 AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch(
140 const AutocompleteInput& input,
141 const AutocompleteInput& fixed_up_input,
142 const BookmarkMatch& bookmark_match) {
143 // The AutocompleteMatch we construct is non-deletable because the only
144 // way to support this would be to delete the underlying bookmark, which is
145 // unlikely to be what the user intends.
146 AutocompleteMatch match(this, 0, false,
147 AutocompleteMatchType::BOOKMARK_TITLE);
148 base::string16 title(bookmark_match.node->GetTitle());
149 const GURL& url(bookmark_match.node->url());
150 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
151 size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset(
152 input, fixed_up_input, false, url_utf16);
153 match.destination_url = url;
154 const size_t match_start = bookmark_match.url_match_positions.empty() ?
155 0 : bookmark_match.url_match_positions[0].first;
156 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
157 ((match_start == base::string16::npos) || (match_start != 0));
158 std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions(
159 bookmark_match.url_match_positions);
160 // In addition to knowing how |offsets| is transformed, we need to know how
161 // |inline_autocomplete_offset| is transformed. We add it to the end of
162 // |offsets|, compute how everything is transformed, then remove it from the
163 // end.
164 offsets.push_back(inline_autocomplete_offset);
165 match.contents = net::FormatUrlWithOffsets(url, languages_,
166 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
167 net::UnescapeRule::SPACES, NULL, NULL, &offsets);
168 inline_autocomplete_offset = offsets.back();
169 offsets.pop_back();
170 BookmarkMatch::MatchPositions new_url_match_positions =
171 BookmarkMatch::ReplaceOffsetsInMatchPositions(
172 bookmark_match.url_match_positions, offsets);
173 match.contents_class =
174 ClassificationsFromMatch(new_url_match_positions,
175 match.contents.size(),
176 true);
177 match.fill_into_edit =
178 AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
179 match.contents);
180 if (inline_autocomplete_offset != base::string16::npos) {
181 // |inline_autocomplete_offset| may be beyond the end of the
182 // |fill_into_edit| if the user has typed an URL with a scheme and the
183 // last character typed is a slash. That slash is removed by the
184 // FormatURLWithOffsets call above.
185 if (inline_autocomplete_offset < match.fill_into_edit.length()) {
186 match.inline_autocompletion =
187 match.fill_into_edit.substr(inline_autocomplete_offset);
189 match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
190 !HistoryProvider::PreventInlineAutocomplete(input);
192 match.description = title;
193 match.description_class =
194 ClassificationsFromMatch(bookmark_match.title_match_positions,
195 match.description.size(),
196 false);
197 match.starred = true;
199 // Summary on how a relevance score is determined for the match:
201 // For each match within the bookmark's title or URL (or both), calculate a
202 // 'factor', sum up those factors, then use the sum to figure out a value
203 // between the base score and the maximum score.
205 // The factor for each match is the product of:
207 // 1) how many characters in the bookmark's title/URL are part of this match.
208 // This is capped at the length of the bookmark's title
209 // to prevent terms that match in both the title and the URL from
210 // scoring too strongly.
212 // 2) where the match occurs within the bookmark's title or URL,
213 // giving more points for matches that appear earlier in the string:
214 // ((string_length - position of match start) / string_length).
216 // Example: Given a bookmark title of 'abcde fghijklm', with a title length
217 // of 14, and two different search terms, 'abcde' and 'fghij', with
218 // start positions of 0 and 6, respectively, 'abcde' will score higher
219 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
220 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither
221 // term matches in the URL.)
223 // Once all match factors have been calculated they are summed. If URL
224 // matches are not considered, the resulting sum will never be greater than
225 // the length of the bookmark title because of the way the bookmark model
226 // matches and removes overlaps. (In particular, the bookmark model only
227 // matches terms to the beginning of words and it removes all overlapping
228 // matches, keeping only the longest. Together these mean that each
229 // character is included in at most one match.) If URL matches are
230 // considered, the sum can be greater.
232 // This sum is then normalized by the length of the bookmark title (if URL
233 // matches are not considered) or by the length of the bookmark title + 10
234 // (if URL matches are considered) and capped at 1.0. (If URL matches
235 // are considered, we want to expand the scoring range so fewer bookmarks
236 // will hit the 1.0 cap and hence lose all ability to distinguish between
237 // these high-quality bookmarks.)
239 // The normalized value is multiplied against the scoring range available,
240 // which is 299. The 299 is calculated by subtracting the minimum possible
241 // score, 900, from the maximum possible score, 1199. This product, ranging
242 // from 0 to 299, is added to the minimum possible score, 900, giving the
243 // preliminary score.
245 // If the preliminary score is less than the maximum possible score, 1199,
246 // it can be boosted up to that maximum possible score if the URL referenced
247 // by the bookmark is also referenced by any of the user's other bookmarks.
248 // A count of how many times the bookmark's URL is referenced is determined
249 // and, for each additional reference beyond the one for the bookmark being
250 // scored up to a maximum of three, the score is boosted by a fixed amount
251 // given by |kURLCountBoost|, below.
253 if (score_using_url_matches_) {
254 // Pretend empty titles are identical to the URL.
255 if (title.empty())
256 title = base::ASCIIToUTF16(url.spec());
257 } else {
258 DCHECK(!title.empty());
260 ScoringFunctor title_position_functor =
261 for_each(bookmark_match.title_match_positions.begin(),
262 bookmark_match.title_match_positions.end(),
263 ScoringFunctor(title.size()));
264 ScoringFunctor url_position_functor =
265 for_each(bookmark_match.url_match_positions.begin(),
266 bookmark_match.url_match_positions.end(),
267 ScoringFunctor(bookmark_match.node->url().spec().length()));
268 const double summed_factors = title_position_functor.ScoringFactor() +
269 (score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0);
270 const double normalized_sum = std::min(
271 summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)),
272 1.0);
273 const int kBaseBookmarkScore = 900;
274 const int kMaxBookmarkScore = 1199;
275 const double kBookmarkScoreRange =
276 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
277 match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
278 kBaseBookmarkScore;
279 // Don't waste any time searching for additional referenced URLs if we
280 // already have a perfect title match.
281 if (match.relevance >= kMaxBookmarkScore)
282 return match;
283 // Boost the score if the bookmark's URL is referenced by other bookmarks.
284 const int kURLCountBoost[4] = { 0, 75, 125, 150 };
285 std::vector<const BookmarkNode*> nodes;
286 bookmark_model_->GetNodesByURL(url, &nodes);
287 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
288 match.relevance +=
289 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
290 match.relevance = std::min(kMaxBookmarkScore, match.relevance);
291 return match;
294 // static
295 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
296 const query_parser::Snippet::MatchPositions& positions,
297 size_t text_length,
298 bool is_url) {
299 ACMatchClassification::Style url_style =
300 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
301 ACMatchClassifications classifications;
302 if (positions.empty()) {
303 classifications.push_back(
304 ACMatchClassification(0, url_style));
305 return classifications;
308 for (query_parser::Snippet::MatchPositions::const_iterator i =
309 positions.begin();
310 i != positions.end();
311 ++i) {
312 AutocompleteMatch::ACMatchClassifications new_class;
313 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
314 text_length, url_style, &new_class);
315 classifications = AutocompleteMatch::MergeClassifications(
316 classifications, new_class);
318 return classifications;