1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/bookmark_provider.h"
11 #include "base/metrics/histogram.h"
12 #include "base/prefs/pref_service.h"
13 #include "base/time/time.h"
14 #include "chrome/browser/autocomplete/autocomplete_result.h"
15 #include "chrome/browser/bookmarks/bookmark_model.h"
16 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
17 #include "chrome/browser/bookmarks/bookmark_title_match.h"
18 #include "chrome/browser/profiles/profile.h"
19 #include "chrome/common/pref_names.h"
20 #include "net/base/net_util.h"
22 typedef std::vector
<BookmarkTitleMatch
> TitleMatches
;
24 // BookmarkProvider ------------------------------------------------------------
26 BookmarkProvider::BookmarkProvider(
27 AutocompleteProviderListener
* listener
,
29 : AutocompleteProvider(listener
, profile
,
30 AutocompleteProvider::TYPE_BOOKMARK
),
31 bookmark_model_(NULL
) {
33 bookmark_model_
= BookmarkModelFactory::GetForProfile(profile
);
34 languages_
= profile_
->GetPrefs()->GetString(prefs::kAcceptLanguages
);
38 void BookmarkProvider::Start(const AutocompleteInput
& input
,
39 bool minimal_changes
) {
44 // Short-circuit any matching when inline autocompletion is disabled and
45 // we're looking for BEST_MATCH because none of the BookmarkProvider's
46 // matches can score high enough to qualify.
47 if (input
.text().empty() ||
48 ((input
.type() != AutocompleteInput::UNKNOWN
) &&
49 (input
.type() != AutocompleteInput::QUERY
)) ||
50 ((input
.matches_requested() == AutocompleteInput::BEST_MATCH
) &&
51 input
.prevent_inline_autocomplete()))
54 base::TimeTicks start_time
= base::TimeTicks::Now();
56 input
.matches_requested() == AutocompleteInput::BEST_MATCH
);
57 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime",
58 base::TimeTicks::Now() - start_time
);
61 BookmarkProvider::~BookmarkProvider() {}
63 void BookmarkProvider::DoAutocomplete(const AutocompleteInput
& input
,
65 // We may not have a bookmark model for some unit tests.
70 // Retrieve enough bookmarks so that we have a reasonable probability of
71 // suggesting the one that the user desires.
72 const size_t kMaxBookmarkMatches
= 50;
74 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's
75 // search terms using the following rules:
76 // - The search text is broken up into search terms. Each term is searched
78 // - Term matches are always performed against the start of a word. 'def'
79 // will match against 'define' but not against 'indefinite'.
80 // - Terms must be at least three characters in length in order to perform
81 // partial word matches. Any term of lesser length will only be used as an
82 // exact match. 'def' will match against 'define' but 'de' will not match.
83 // - A search containing multiple terms will return results with those words
84 // occuring in any order.
85 // - Terms enclosed in quotes comprises a phrase that must match exactly.
86 // - Multiple terms enclosed in quotes will require those exact words in that
87 // exact order to match.
89 // Note: GetBookmarksWithTitlesMatching() will never return a match span
90 // greater than the length of the title against which it is being matched,
91 // nor can those spans ever overlap because the match spans are coalesced
92 // for all matched terms.
94 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
95 // for complete details of how title searches are performed against the user's
97 bookmark_model_
->GetBookmarksWithTitlesMatching(input
.text(),
101 return; // There were no matches.
102 for (TitleMatches::const_iterator i
= matches
.begin(); i
!= matches
.end();
104 // Create and score the AutocompleteMatch. If its score is 0 then the
105 // match is discarded.
106 AutocompleteMatch
match(TitleMatchToACMatch(*i
));
107 if (match
.relevance
> 0)
108 matches_
.push_back(match
);
111 // Sort and clip the resulting matches.
112 size_t max_matches
= best_match
? 1 : AutocompleteProvider::kMaxMatches
;
113 if (matches_
.size() > max_matches
) {
114 std::partial_sort(matches_
.begin(),
115 matches_
.begin() + max_matches
,
117 AutocompleteMatch::MoreRelevant
);
118 matches_
.resize(max_matches
);
120 std::sort(matches_
.begin(), matches_
.end(),
121 AutocompleteMatch::MoreRelevant
);
127 // for_each helper functor that calculates a match factor for each query term
128 // when calculating the final score.
130 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
131 // title the term matches, and 2) where the match is positioned within the
132 // bookmark's title. A full length match earns a 1.0. A half-length match earns
133 // at most a 0.5 and at least a 0.25. A single character match against a title
134 // that is 100 characters long where the match is at the first character will
135 // earn a 0.01 and at the last character will earn a 0.0001.
136 class ScoringFunctor
{
138 // |title_length| is the length of the bookmark title against which this
139 // match will be scored.
140 explicit ScoringFunctor(size_t title_length
)
141 : title_length_(static_cast<double>(title_length
)),
142 scoring_factor_(0.0) {
145 void operator()(const Snippet::MatchPosition
& match
) {
146 double term_length
= static_cast<double>(match
.second
- match
.first
);
147 scoring_factor_
+= term_length
/ title_length_
*
148 (title_length_
- match
.first
) / title_length_
;
151 double ScoringFactor() { return scoring_factor_
; }
154 double title_length_
;
155 double scoring_factor_
;
160 AutocompleteMatch
BookmarkProvider::TitleMatchToACMatch(
161 const BookmarkTitleMatch
& title_match
) {
162 // The AutocompleteMatch we construct is non-deletable because the only
163 // way to support this would be to delete the underlying bookmark, which is
164 // unlikely to be what the user intends.
165 AutocompleteMatch
match(this, 0, false,
166 AutocompleteMatchType::BOOKMARK_TITLE
);
167 const base::string16
& title(title_match
.node
->GetTitle());
168 DCHECK(!title
.empty());
169 const GURL
& url(title_match
.node
->url());
170 match
.destination_url
= url
;
171 match
.contents
= net::FormatUrl(url
, languages_
,
172 net::kFormatUrlOmitAll
& net::kFormatUrlOmitHTTP
,
173 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
);
174 match
.contents_class
.push_back(
175 ACMatchClassification(0, ACMatchClassification::NONE
));
176 match
.fill_into_edit
=
177 AutocompleteInput::FormattedStringWithEquivalentMeaning(url
,
179 match
.description
= title
;
180 match
.description_class
=
181 ClassificationsFromMatch(title_match
.match_positions
,
182 match
.description
.size());
183 match
.starred
= true;
185 // Summary on how a relevance score is determined for the match:
187 // For each term matching within the bookmark's title (as given by the set of
188 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then
189 // use the sum to figure out a value between the base score and the maximum
192 // The factor for each term is the product of:
194 // 1) how much of the bookmark's title has been matched by the term:
195 // (term length / title length).
197 // Example: Given a bookmark title 'abcde fghijklm', with a title length
198 // of 14, and two different search terms, 'abcde' and 'fghijklm', with
199 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher
200 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
202 // 2) where the term match occurs within the bookmark's title, giving more
203 // points for matches that appear earlier in the title:
204 // ((title length - position of match start) / title_length).
206 // Example: Given a bookmark title of 'abcde fghijklm', with a title length
207 // of 14, and two different search terms, 'abcde' and 'fghij', with
208 // start positions of 0 and 6, respectively, 'abcde' will score higher
209 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
210 // a partial factor of (14-6)/14 = 0.571 ).
212 // Once all term factors have been calculated they are summed. The resulting
213 // sum will never be greater than 1.0 because of the way the bookmark model
214 // matches and removes overlaps. (In particular, the bookmark model only
215 // matches terms to the beginning of words and it removes all overlapping
216 // matches, keeping only the longest. Together these mean that each
217 // character is included in at most one match. This property ensures the
218 // sum of factors is at most 1.) This sum is then multiplied against the
219 // scoring range available, which is 299. The 299 is calculated by
220 // subtracting the minimum possible score, 900, from the maximum possible
221 // score, 1199. This product, ranging from 0 to 299, is added to the minimum
222 // possible score, 900, giving the preliminary score.
224 // If the preliminary score is less than the maximum possible score, 1199,
225 // it can be boosted up to that maximum possible score if the URL referenced
226 // by the bookmark is also referenced by any of the user's other bookmarks.
227 // A count of how many times the bookmark's URL is referenced is determined
228 // and, for each additional reference beyond the one for the bookmark being
229 // scored up to a maximum of three, the score is boosted by a fixed amount
230 // given by |kURLCountBoost|, below.
232 ScoringFunctor position_functor
=
233 for_each(title_match
.match_positions
.begin(),
234 title_match
.match_positions
.end(), ScoringFunctor(title
.size()));
235 const int kBaseBookmarkScore
= 900;
236 const int kMaxBookmarkScore
= AutocompleteResult::kLowestDefaultScore
- 1;
237 const double kBookmarkScoreRange
=
238 static_cast<double>(kMaxBookmarkScore
- kBaseBookmarkScore
);
239 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping
240 // matches but let's play it safe.
241 match
.relevance
= std::min(kMaxBookmarkScore
,
242 static_cast<int>(position_functor
.ScoringFactor() * kBookmarkScoreRange
) +
244 // Don't waste any time searching for additional referenced URLs if we
245 // already have a perfect title match.
246 if (match
.relevance
>= kMaxBookmarkScore
)
248 // Boost the score if the bookmark's URL is referenced by other bookmarks.
249 const int kURLCountBoost
[4] = { 0, 75, 125, 150 };
250 std::vector
<const BookmarkNode
*> nodes
;
251 bookmark_model_
->GetNodesByURL(url
, &nodes
);
252 DCHECK_GE(std::min(arraysize(kURLCountBoost
), nodes
.size()), 1U);
254 kURLCountBoost
[std::min(arraysize(kURLCountBoost
), nodes
.size()) - 1];
255 match
.relevance
= std::min(kMaxBookmarkScore
, match
.relevance
);
260 ACMatchClassifications
BookmarkProvider::ClassificationsFromMatch(
261 const Snippet::MatchPositions
& positions
,
262 size_t text_length
) {
263 ACMatchClassifications classifications
;
264 if (positions
.empty()) {
265 classifications
.push_back(
266 ACMatchClassification(0, ACMatchClassification::NONE
));
267 return classifications
;
270 for (Snippet::MatchPositions::const_iterator i
= positions
.begin();
271 i
!= positions
.end(); ++i
) {
272 AutocompleteMatch::ACMatchClassifications new_class
;
273 AutocompleteMatch::ClassifyLocationInString(i
->first
, i
->second
- i
->first
,
274 text_length
, 0, &new_class
);
275 classifications
= AutocompleteMatch::MergeClassifications(
276 classifications
, new_class
);
278 return classifications
;