1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/bookmark_provider.h"
11 #include "base/prefs/pref_service.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/bookmarks/browser/bookmark_match.h"
15 #include "components/bookmarks/browser/bookmark_model.h"
16 #include "components/metrics/proto/omnibox_input_type.pb.h"
17 #include "components/omnibox/autocomplete_provider_client.h"
18 #include "components/omnibox/autocomplete_result.h"
19 #include "components/omnibox/history_provider.h"
20 #include "components/omnibox/url_prefix.h"
21 #include "net/base/net_util.h"
22 #include "url/url_constants.h"
24 using bookmarks::BookmarkMatch
;
25 using bookmarks::BookmarkNode
;
27 typedef std::vector
<BookmarkMatch
> BookmarkMatches
;
31 // Removes leading spaces from |title| before displaying, otherwise it looks
32 // funny. In the process, corrects |title_match_positions| so the correct
33 // characters are highlighted.
34 void CorrectTitleAndMatchPositions(
35 base::string16
* title
,
36 BookmarkMatch::MatchPositions
* title_match_positions
) {
37 size_t leading_whitespace_chars
= title
->length();
38 base::TrimWhitespace(*title
, base::TRIM_LEADING
, title
);
39 leading_whitespace_chars
-= title
->length();
40 if (leading_whitespace_chars
== 0)
42 for (query_parser::Snippet::MatchPositions::iterator it
=
43 title_match_positions
->begin();
44 it
!= title_match_positions
->end(); ++it
) {
45 (*it
) = query_parser::Snippet::MatchPosition(
46 it
->first
- leading_whitespace_chars
,
47 it
->second
- leading_whitespace_chars
);
53 // BookmarkProvider ------------------------------------------------------------
55 BookmarkProvider::BookmarkProvider(AutocompleteProviderClient
* client
)
56 : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK
),
58 bookmark_model_(NULL
) {
60 bookmark_model_
= client_
->GetBookmarkModel();
61 languages_
= client_
->GetAcceptLanguages();
65 void BookmarkProvider::Start(const AutocompleteInput
& input
,
66 bool minimal_changes
) {
71 if (input
.from_omnibox_focus() || input
.text().empty() ||
72 (input
.type() == metrics::OmniboxInputType::FORCED_QUERY
))
75 DoAutocomplete(input
);
78 BookmarkProvider::~BookmarkProvider() {}
80 void BookmarkProvider::DoAutocomplete(const AutocompleteInput
& input
) {
81 // We may not have a bookmark model for some unit tests.
85 BookmarkMatches matches
;
86 // Retrieve enough bookmarks so that we have a reasonable probability of
87 // suggesting the one that the user desires.
88 const size_t kMaxBookmarkMatches
= 50;
90 // GetBookmarksMatching returns bookmarks matching the user's
91 // search terms using the following rules:
92 // - The search text is broken up into search terms. Each term is searched
94 // - Term matches are always performed against the start of a word. 'def'
95 // will match against 'define' but not against 'indefinite'.
96 // - Terms must be at least three characters in length in order to perform
97 // partial word matches. Any term of lesser length will only be used as an
98 // exact match. 'def' will match against 'define' but 'de' will not match.
99 // - A search containing multiple terms will return results with those words
100 // occuring in any order.
101 // - Terms enclosed in quotes comprises a phrase that must match exactly.
102 // - Multiple terms enclosed in quotes will require those exact words in that
103 // exact order to match.
105 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for
106 // complete details of how searches are performed against the user's
108 bookmark_model_
->GetBookmarksMatching(input
.text(),
112 return; // There were no matches.
113 const base::string16
fixed_up_input(FixupUserInput(input
).second
);
114 for (BookmarkMatches::const_iterator i
= matches
.begin(); i
!= matches
.end();
116 // Create and score the AutocompleteMatch. If its score is 0 then the
117 // match is discarded.
118 AutocompleteMatch
match(BookmarkMatchToACMatch(input
, fixed_up_input
, *i
));
119 if (match
.relevance
> 0)
120 matches_
.push_back(match
);
123 // Sort and clip the resulting matches.
125 std::min(matches_
.size(), AutocompleteProvider::kMaxMatches
);
126 std::partial_sort(matches_
.begin(), matches_
.begin() + num_matches
,
127 matches_
.end(), AutocompleteMatch::MoreRelevant
);
128 matches_
.resize(num_matches
);
133 // for_each helper functor that calculates a match factor for each query term
134 // when calculating the final score.
136 // Calculate a 'factor' from 0 to the bookmark's title length for a match
137 // based on 1) how many characters match and 2) where the match is positioned.
138 class ScoringFunctor
{
140 // |title_length| is the length of the bookmark title against which this
141 // match will be scored.
142 explicit ScoringFunctor(size_t title_length
)
143 : title_length_(static_cast<double>(title_length
)),
144 scoring_factor_(0.0) {
147 void operator()(const query_parser::Snippet::MatchPosition
& match
) {
148 double term_length
= static_cast<double>(match
.second
- match
.first
);
149 scoring_factor_
+= term_length
*
150 (title_length_
- match
.first
) / title_length_
;
153 double ScoringFactor() { return scoring_factor_
; }
156 double title_length_
;
157 double scoring_factor_
;
162 AutocompleteMatch
BookmarkProvider::BookmarkMatchToACMatch(
163 const AutocompleteInput
& input
,
164 const base::string16
& fixed_up_input_text
,
165 const BookmarkMatch
& bookmark_match
) {
166 // The AutocompleteMatch we construct is non-deletable because the only
167 // way to support this would be to delete the underlying bookmark, which is
168 // unlikely to be what the user intends.
169 AutocompleteMatch
match(this, 0, false,
170 AutocompleteMatchType::BOOKMARK_TITLE
);
171 base::string16
title(bookmark_match
.node
->GetTitle());
172 BookmarkMatch::MatchPositions new_title_match_positions
=
173 bookmark_match
.title_match_positions
;
174 CorrectTitleAndMatchPositions(&title
, &new_title_match_positions
);
175 const GURL
& url(bookmark_match
.node
->url());
176 const base::string16
& url_utf16
= base::UTF8ToUTF16(url
.spec());
177 size_t inline_autocomplete_offset
= URLPrefix::GetInlineAutocompleteOffset(
178 input
.text(), fixed_up_input_text
, false, url_utf16
);
179 match
.destination_url
= url
;
180 const size_t match_start
= bookmark_match
.url_match_positions
.empty() ?
181 0 : bookmark_match
.url_match_positions
[0].first
;
182 const bool trim_http
= !AutocompleteInput::HasHTTPScheme(input
.text()) &&
183 ((match_start
== base::string16::npos
) || (match_start
!= 0));
184 std::vector
<size_t> offsets
= BookmarkMatch::OffsetsFromMatchPositions(
185 bookmark_match
.url_match_positions
);
186 // In addition to knowing how |offsets| is transformed, we need to know how
187 // |inline_autocomplete_offset| is transformed. We add it to the end of
188 // |offsets|, compute how everything is transformed, then remove it from the
190 offsets
.push_back(inline_autocomplete_offset
);
191 match
.contents
= net::FormatUrlWithOffsets(url
, languages_
,
192 net::kFormatUrlOmitAll
& ~(trim_http
? 0 : net::kFormatUrlOmitHTTP
),
193 net::UnescapeRule::SPACES
, NULL
, NULL
, &offsets
);
194 inline_autocomplete_offset
= offsets
.back();
196 BookmarkMatch::MatchPositions new_url_match_positions
=
197 BookmarkMatch::ReplaceOffsetsInMatchPositions(
198 bookmark_match
.url_match_positions
, offsets
);
199 match
.contents_class
=
200 ClassificationsFromMatch(new_url_match_positions
,
201 match
.contents
.size(),
203 match
.fill_into_edit
=
204 AutocompleteInput::FormattedStringWithEquivalentMeaning(
205 url
, match
.contents
, client_
->GetSchemeClassifier());
206 if (inline_autocomplete_offset
!= base::string16::npos
) {
207 // |inline_autocomplete_offset| may be beyond the end of the
208 // |fill_into_edit| if the user has typed an URL with a scheme and the
209 // last character typed is a slash. That slash is removed by the
210 // FormatURLWithOffsets call above.
211 if (inline_autocomplete_offset
< match
.fill_into_edit
.length()) {
212 match
.inline_autocompletion
=
213 match
.fill_into_edit
.substr(inline_autocomplete_offset
);
215 match
.allowed_to_be_default_match
= match
.inline_autocompletion
.empty() ||
216 !HistoryProvider::PreventInlineAutocomplete(input
);
218 match
.description
= title
;
219 match
.description_class
=
220 ClassificationsFromMatch(bookmark_match
.title_match_positions
,
221 match
.description
.size(),
224 // Summary on how a relevance score is determined for the match:
226 // For each match within the bookmark's title or URL (or both), calculate a
227 // 'factor', sum up those factors, then use the sum to figure out a value
228 // between the base score and the maximum score.
230 // The factor for each match is the product of:
232 // 1) how many characters in the bookmark's title/URL are part of this match.
233 // This is capped at the length of the bookmark's title
234 // to prevent terms that match in both the title and the URL from
235 // scoring too strongly.
237 // 2) where the match occurs within the bookmark's title or URL,
238 // giving more points for matches that appear earlier in the string:
239 // ((string_length - position of match start) / string_length).
241 // Example: Given a bookmark title of 'abcde fghijklm', with a title length
242 // of 14, and two different search terms, 'abcde' and 'fghij', with
243 // start positions of 0 and 6, respectively, 'abcde' will score higher
244 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
245 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither
246 // term matches in the URL.)
248 // Once all match factors have been calculated they are summed. If there
249 // are no URL matches, the resulting sum will never be greater than the
250 // length of the bookmark title because of the way the bookmark model matches
251 // and removes overlaps. (In particular, the bookmark model only
252 // matches terms to the beginning of words and it removes all overlapping
253 // matches, keeping only the longest. Together these mean that each
254 // character is included in at most one match.) If there are matches in the
255 // URL, the sum can be greater.
257 // This sum is then normalized by the length of the bookmark title + 10
258 // and capped at 1.0. The +10 is to expand the scoring range so fewer
259 // bookmarks will hit the 1.0 cap and hence lose all ability to distinguish
260 // between these high-quality bookmarks.
262 // The normalized value is multiplied against the scoring range available,
263 // which is the difference between the minimum possible score and the maximum
264 // possible score. This product is added to the minimum possible score to
265 // give the preliminary score.
267 // If the preliminary score is less than the maximum possible score, 1199,
268 // it can be boosted up to that maximum possible score if the URL referenced
269 // by the bookmark is also referenced by any of the user's other bookmarks.
270 // A count of how many times the bookmark's URL is referenced is determined
271 // and, for each additional reference beyond the one for the bookmark being
272 // scored up to a maximum of three, the score is boosted by a fixed amount
273 // given by |kURLCountBoost|, below.
276 // Pretend empty titles are identical to the URL.
278 title
= base::ASCIIToUTF16(url
.spec());
279 ScoringFunctor title_position_functor
=
280 for_each(bookmark_match
.title_match_positions
.begin(),
281 bookmark_match
.title_match_positions
.end(),
282 ScoringFunctor(title
.size()));
283 ScoringFunctor url_position_functor
=
284 for_each(bookmark_match
.url_match_positions
.begin(),
285 bookmark_match
.url_match_positions
.end(),
286 ScoringFunctor(bookmark_match
.node
->url().spec().length()));
287 const double title_match_strength
= title_position_functor
.ScoringFactor();
288 const double summed_factors
= title_match_strength
+
289 url_position_functor
.ScoringFactor();
290 const double normalized_sum
=
291 std::min(summed_factors
/ (title
.size() + 10), 1.0);
292 // Bookmarks with javascript scheme ("bookmarklets") that do not have title
293 // matches get a lower base and lower maximum score because returning them
294 // for matches in their (often very long) URL looks stupid and is often not
295 // intended by the user.
296 const bool bookmarklet_without_title_match
=
297 url
.SchemeIs(url::kJavaScriptScheme
) && (title_match_strength
== 0.0);
298 const int kBaseBookmarkScore
= bookmarklet_without_title_match
? 400 : 900;
299 const int kMaxBookmarkScore
= bookmarklet_without_title_match
? 799 : 1199;
300 const double kBookmarkScoreRange
=
301 static_cast<double>(kMaxBookmarkScore
- kBaseBookmarkScore
);
302 match
.relevance
= static_cast<int>(normalized_sum
* kBookmarkScoreRange
) +
304 // Don't waste any time searching for additional referenced URLs if we
305 // already have a perfect title match.
306 if (match
.relevance
>= kMaxBookmarkScore
)
308 // Boost the score if the bookmark's URL is referenced by other bookmarks.
309 const int kURLCountBoost
[4] = { 0, 75, 125, 150 };
310 std::vector
<const BookmarkNode
*> nodes
;
311 bookmark_model_
->GetNodesByURL(url
, &nodes
);
312 DCHECK_GE(std::min(arraysize(kURLCountBoost
), nodes
.size()), 1U);
314 kURLCountBoost
[std::min(arraysize(kURLCountBoost
), nodes
.size()) - 1];
315 match
.relevance
= std::min(kMaxBookmarkScore
, match
.relevance
);
320 ACMatchClassifications
BookmarkProvider::ClassificationsFromMatch(
321 const query_parser::Snippet::MatchPositions
& positions
,
324 ACMatchClassification::Style url_style
=
325 is_url
? ACMatchClassification::URL
: ACMatchClassification::NONE
;
326 ACMatchClassifications classifications
;
327 if (positions
.empty()) {
329 classifications
.push_back(ACMatchClassification(0, url_style
));
330 return classifications
;
333 for (query_parser::Snippet::MatchPositions::const_iterator i
=
335 i
!= positions
.end();
337 AutocompleteMatch::ACMatchClassifications new_class
;
338 AutocompleteMatch::ClassifyLocationInString(i
->first
, i
->second
- i
->first
,
339 text_length
, url_style
, &new_class
);
340 classifications
= AutocompleteMatch::MergeClassifications(
341 classifications
, new_class
);
343 return classifications
;