1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/shortcuts_provider.h"
12 #include "base/i18n/break_iterator.h"
13 #include "base/i18n/case_conversion.h"
14 #include "base/logging.h"
15 #include "base/metrics/histogram.h"
16 #include "base/prefs/pref_service.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/autocomplete/autocomplete_input.h"
22 #include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
23 #include "chrome/browser/autocomplete/autocomplete_result.h"
24 #include "chrome/browser/autocomplete/history_provider.h"
25 #include "chrome/browser/autocomplete/url_prefix.h"
26 #include "chrome/browser/history/history_notifications.h"
27 #include "chrome/browser/history/history_service.h"
28 #include "chrome/browser/history/history_service_factory.h"
29 #include "chrome/browser/history/shortcuts_backend_factory.h"
30 #include "chrome/browser/omnibox/omnibox_field_trial.h"
31 #include "chrome/browser/profiles/profile.h"
32 #include "chrome/common/net/url_fixer_upper.h"
33 #include "chrome/common/pref_names.h"
34 #include "chrome/common/url_constants.h"
35 #include "url/url_parse.h"
39 class DestinationURLEqualsURL
{
41 explicit DestinationURLEqualsURL(const GURL
& url
) : url_(url
) {}
42 bool operator()(const AutocompleteMatch
& match
) const {
43 return match
.destination_url
== url_
;
49 // Like URLPrefix::BestURLPrefix() except also handles the prefix of
50 // "www.". This is needed because sometimes the string we're matching
51 // against here (which comes from |fill_into_edit|) can start with
52 // "www." without having a protocol at the beginning. Because "www."
53 // is not on the default prefix list, we test for it explicitly here
54 // and use that match if the default list didn't have a match or the
55 // default list's match was shorter than it could've been.
56 const URLPrefix
* BestURLPrefixWithWWWCase(
57 const base::string16
& text
,
58 const base::string16
& prefix_suffix
) {
59 CR_DEFINE_STATIC_LOCAL(URLPrefix
, www_prefix
,
60 (base::ASCIIToUTF16("www."), 1));
61 const URLPrefix
* best_prefix
= URLPrefix::BestURLPrefix(text
, prefix_suffix
);
62 if ((best_prefix
== NULL
) ||
63 (best_prefix
->num_components
< www_prefix
.num_components
)) {
64 if (URLPrefix::PrefixMatch(www_prefix
, text
, prefix_suffix
))
65 best_prefix
= &www_prefix
;
72 ShortcutsProvider::ShortcutsProvider(AutocompleteProviderListener
* listener
,
74 : AutocompleteProvider(listener
, profile
,
75 AutocompleteProvider::TYPE_SHORTCUTS
),
76 languages_(profile_
->GetPrefs()->GetString(prefs::kAcceptLanguages
)),
78 scoped_refptr
<history::ShortcutsBackend
> backend
=
79 ShortcutsBackendFactory::GetForProfile(profile_
);
81 backend
->AddObserver(this);
82 if (backend
->initialized())
87 void ShortcutsProvider::Start(const AutocompleteInput
& input
,
88 bool minimal_changes
) {
91 if ((input
.type() == AutocompleteInput::INVALID
) ||
92 (input
.type() == AutocompleteInput::FORCED_QUERY
))
95 if (input
.text().empty())
101 base::TimeTicks start_time
= base::TimeTicks::Now();
103 if (input
.text().length() < 6) {
104 base::TimeTicks end_time
= base::TimeTicks::Now();
105 std::string name
= "ShortcutsProvider.QueryIndexTime." +
106 base::IntToString(input
.text().size());
107 base::HistogramBase
* counter
= base::Histogram::FactoryGet(
108 name
, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag
);
109 counter
->Add(static_cast<int>((end_time
- start_time
).InMilliseconds()));
111 UpdateStarredStateOfMatches();
114 void ShortcutsProvider::DeleteMatch(const AutocompleteMatch
& match
) {
115 // Copy the URL since DeleteMatchesWithURLs() will invalidate |match|.
116 GURL
url(match
.destination_url
);
118 // When a user deletes a match, he probably means for the URL to disappear out
119 // of history entirely. So nuke all shortcuts that map to this URL.
120 scoped_refptr
<history::ShortcutsBackend
> backend
=
121 ShortcutsBackendFactory::GetForProfileIfExists(profile_
);
122 if (backend
) // Can be NULL in Incognito.
123 backend
->DeleteShortcutsWithUrl(url
);
124 matches_
.erase(std::remove_if(matches_
.begin(), matches_
.end(),
125 DestinationURLEqualsURL(url
)),
127 // NOTE: |match| is now dead!
128 listener_
->OnProviderUpdate(true);
130 // Delete the match from the history DB. This will eventually result in a
131 // second call to DeleteShortcutsWithURLs(), which is harmless.
132 HistoryService
* const history_service
=
133 HistoryServiceFactory::GetForProfile(profile_
, Profile::EXPLICIT_ACCESS
);
135 DCHECK(history_service
&& url
.is_valid());
136 history_service
->DeleteURL(url
);
139 ShortcutsProvider::~ShortcutsProvider() {
140 scoped_refptr
<history::ShortcutsBackend
> backend
=
141 ShortcutsBackendFactory::GetForProfileIfExists(profile_
);
143 backend
->RemoveObserver(this);
146 void ShortcutsProvider::OnShortcutsLoaded() {
150 void ShortcutsProvider::GetMatches(const AutocompleteInput
& input
) {
151 scoped_refptr
<history::ShortcutsBackend
> backend
=
152 ShortcutsBackendFactory::GetForProfileIfExists(profile_
);
155 // Get the URLs from the shortcuts database with keys that partially or
156 // completely match the search term.
157 base::string16
term_string(base::i18n::ToLower(input
.text()));
158 DCHECK(!term_string
.empty());
160 base::string16
fixed_up_term_string(term_string
);
161 AutocompleteInput
fixed_up_input(input
);
162 if (FixupUserInput(&fixed_up_input
))
163 fixed_up_term_string
= fixed_up_input
.text();
166 if (!OmniboxFieldTrial::ShortcutsScoringMaxRelevance(
167 input
.current_page_classification(), &max_relevance
))
168 max_relevance
= AutocompleteResult::kLowestDefaultScore
- 1;
170 for (history::ShortcutsBackend::ShortcutMap::const_iterator it
=
171 FindFirstMatch(term_string
, backend
.get());
172 it
!= backend
->shortcuts_map().end() &&
173 StartsWith(it
->first
, term_string
, true); ++it
) {
174 // Don't return shortcuts with zero relevance.
175 int relevance
= CalculateScore(term_string
, it
->second
, max_relevance
);
177 matches_
.push_back(ShortcutToACMatch(
178 it
->second
, relevance
, term_string
, fixed_up_term_string
,
179 input
.prevent_inline_autocomplete()));
182 std::partial_sort(matches_
.begin(),
184 std::min(AutocompleteProvider::kMaxMatches
, matches_
.size()),
185 matches_
.end(), &AutocompleteMatch::MoreRelevant
);
186 if (matches_
.size() > AutocompleteProvider::kMaxMatches
) {
187 matches_
.erase(matches_
.begin() + AutocompleteProvider::kMaxMatches
,
190 // Reset relevance scores to guarantee no match is given a score that may
191 // allow it to become the highest ranked match (i.e., the default match)
192 // unless either it is a legal default match (i.e., inlineable) or the
193 // omnibox will reorder matches as necessary to correct the problem. In
194 // the process of resetting scores, guarantee that all scores are decreasing
195 // (but do not assign any scores below 1).
196 if (!OmniboxFieldTrial::ReorderForLegalDefaultMatch(
197 input
.current_page_classification()) &&
198 (matches_
.empty() || !matches_
.front().allowed_to_be_default_match
)) {
199 max_relevance
= std::min(max_relevance
,
200 AutocompleteResult::kLowestDefaultScore
- 1);
202 for (ACMatches::iterator it
= matches_
.begin(); it
!= matches_
.end(); ++it
) {
203 max_relevance
= std::min(max_relevance
, it
->relevance
);
204 it
->relevance
= max_relevance
;
205 if (max_relevance
> 1)
210 AutocompleteMatch
ShortcutsProvider::ShortcutToACMatch(
211 const history::ShortcutsBackend::Shortcut
& shortcut
,
213 const base::string16
& term_string
,
214 const base::string16
& fixed_up_term_string
,
215 const bool prevent_inline_autocomplete
) {
216 DCHECK(!term_string
.empty());
217 AutocompleteMatch
match(shortcut
.match_core
.ToMatch());
218 match
.provider
= this;
219 match
.relevance
= relevance
;
220 match
.deletable
= true;
221 DCHECK(match
.destination_url
.is_valid());
222 match
.RecordAdditionalInfo("number of hits", shortcut
.number_of_hits
);
223 match
.RecordAdditionalInfo("last access time", shortcut
.last_access_time
);
224 match
.RecordAdditionalInfo("original input text",
225 base::UTF16ToUTF8(shortcut
.text
));
227 // Set |inline_autocompletion| and |allowed_to_be_default_match| if possible.
228 // If the match is a search query this is easy: simply check whether the
229 // user text is a prefix of the query. If the match is a navigation, we
230 // assume the fill_into_edit looks something like a URL, so we use
231 // BestURLPrefix() to try and strip off any prefixes that the user might
232 // not think would change the meaning, but would otherwise prevent inline
233 // autocompletion. This allows, for example, the input of "foo.c" to
234 // autocomplete to "foo.com" for a fill_into_edit of "http://foo.com".
235 if (AutocompleteMatch::IsSearchType(match
.type
)) {
236 if (StartsWith(match
.fill_into_edit
, term_string
, false)) {
237 match
.inline_autocompletion
=
238 match
.fill_into_edit
.substr(term_string
.length());
239 match
.allowed_to_be_default_match
=
240 !prevent_inline_autocomplete
|| match
.inline_autocompletion
.empty();
243 const URLPrefix
* best_prefix
=
244 BestURLPrefixWithWWWCase(match
.fill_into_edit
, term_string
);
245 const base::string16
* matching_string
= &term_string
;
246 // If we failed to find a best_prefix initially, try again using a
247 // fixed-up version of the user input. This is especially useful to
248 // get about: URLs to inline against chrome:// shortcuts. (about:
249 // URLs are fixed up to the chrome:// scheme.)
250 if ((best_prefix
== NULL
) && !fixed_up_term_string
.empty() &&
251 (fixed_up_term_string
!= term_string
)) {
252 best_prefix
= BestURLPrefixWithWWWCase(
253 match
.fill_into_edit
, fixed_up_term_string
);
254 matching_string
= &fixed_up_term_string
;
256 if (best_prefix
!= NULL
) {
257 match
.inline_autocompletion
= match
.fill_into_edit
.substr(
258 best_prefix
->prefix
.length() + matching_string
->length());
259 match
.allowed_to_be_default_match
=
260 !prevent_inline_autocomplete
|| match
.inline_autocompletion
.empty();
264 // Try to mark pieces of the contents and description as matches if they
265 // appear in |term_string|.
266 WordMap
terms_map(CreateWordMapForString(term_string
));
267 if (!terms_map
.empty()) {
268 match
.contents_class
= ClassifyAllMatchesInString(term_string
, terms_map
,
269 match
.contents
, match
.contents_class
);
270 match
.description_class
= ClassifyAllMatchesInString(term_string
, terms_map
,
271 match
.description
, match
.description_class
);
277 ShortcutsProvider::WordMap
ShortcutsProvider::CreateWordMapForString(
278 const base::string16
& text
) {
279 // First, convert |text| to a vector of the unique words in it.
281 base::i18n::BreakIterator
word_iter(text
,
282 base::i18n::BreakIterator::BREAK_WORD
);
283 if (!word_iter
.Init())
285 std::vector
<base::string16
> words
;
286 while (word_iter
.Advance()) {
287 if (word_iter
.IsWord())
288 words
.push_back(word_iter
.GetString());
292 std::sort(words
.begin(), words
.end());
293 words
.erase(std::unique(words
.begin(), words
.end()), words
.end());
295 // Now create a map from (first character) to (words beginning with that
296 // character). We insert in reverse lexicographical order and rely on the
297 // multimap preserving insertion order for values with the same key. (This
298 // is mandated in C++11, and part of that decision was based on a survey of
299 // existing implementations that found that it was already true everywhere.)
300 std::reverse(words
.begin(), words
.end());
301 for (std::vector
<base::string16
>::const_iterator
i(words
.begin());
302 i
!= words
.end(); ++i
)
303 word_map
.insert(std::make_pair((*i
)[0], *i
));
308 ACMatchClassifications
ShortcutsProvider::ClassifyAllMatchesInString(
309 const base::string16
& find_text
,
310 const WordMap
& find_words
,
311 const base::string16
& text
,
312 const ACMatchClassifications
& original_class
) {
313 DCHECK(!find_text
.empty());
314 DCHECK(!find_words
.empty());
316 // The code below assumes |text| is nonempty and therefore the resulting
317 // classification vector should always be nonempty as well. Returning early
318 // if |text| is empty assures we'll return the (correct) empty vector rather
319 // than a vector with a single (0, NONE) match.
321 return original_class
;
323 // First check whether |text| begins with |find_text| and mark that whole
324 // section as a match if so.
325 base::string16
text_lowercase(base::i18n::ToLower(text
));
326 ACMatchClassifications match_class
;
327 size_t last_position
= 0;
328 if (StartsWith(text_lowercase
, find_text
, true)) {
329 match_class
.push_back(
330 ACMatchClassification(0, ACMatchClassification::MATCH
));
331 last_position
= find_text
.length();
332 // If |text_lowercase| is actually equal to |find_text|, we don't need to
333 // (and in fact shouldn't) put a trailing NONE classification after the end
335 if (last_position
< text_lowercase
.length()) {
336 match_class
.push_back(
337 ACMatchClassification(last_position
, ACMatchClassification::NONE
));
340 // |match_class| should start at position 0. If the first matching word is
341 // found at position 0, this will be popped from the vector further down.
342 match_class
.push_back(
343 ACMatchClassification(0, ACMatchClassification::NONE
));
346 // Now, starting with |last_position|, check each character in
347 // |text_lowercase| to see if we have words starting with that character in
348 // |find_words|. If so, check each of them to see if they match the portion
349 // of |text_lowercase| beginning with |last_position|. Accept the first
350 // matching word found (which should be the longest possible match at this
351 // location, given the construction of |find_words|) and add a MATCH region to
352 // |match_class|, moving |last_position| to be after the matching word. If we
353 // found no matching words, move to the next character and repeat.
354 while (last_position
< text_lowercase
.length()) {
355 std::pair
<WordMap::const_iterator
, WordMap::const_iterator
> range(
356 find_words
.equal_range(text_lowercase
[last_position
]));
357 size_t next_character
= last_position
+ 1;
358 for (WordMap::const_iterator
i(range
.first
); i
!= range
.second
; ++i
) {
359 const base::string16
& word
= i
->second
;
360 size_t word_end
= last_position
+ word
.length();
361 if ((word_end
<= text_lowercase
.length()) &&
362 !text_lowercase
.compare(last_position
, word
.length(), word
)) {
363 // Collapse adjacent ranges into one.
364 if (match_class
.back().offset
== last_position
)
365 match_class
.pop_back();
367 AutocompleteMatch::AddLastClassificationIfNecessary(&match_class
,
368 last_position
, ACMatchClassification::MATCH
);
369 if (word_end
< text_lowercase
.length()) {
370 match_class
.push_back(
371 ACMatchClassification(word_end
, ACMatchClassification::NONE
));
373 last_position
= word_end
;
377 last_position
= std::max(last_position
, next_character
);
380 return AutocompleteMatch::MergeClassifications(original_class
, match_class
);
383 history::ShortcutsBackend::ShortcutMap::const_iterator
384 ShortcutsProvider::FindFirstMatch(const base::string16
& keyword
,
385 history::ShortcutsBackend
* backend
) {
387 history::ShortcutsBackend::ShortcutMap::const_iterator it
=
388 backend
->shortcuts_map().lower_bound(keyword
);
389 // Lower bound not necessarily matches the keyword, check for item pointed by
390 // the lower bound iterator to at least start with keyword.
391 return ((it
== backend
->shortcuts_map().end()) ||
392 StartsWith(it
->first
, keyword
, true)) ? it
:
393 backend
->shortcuts_map().end();
396 int ShortcutsProvider::CalculateScore(
397 const base::string16
& terms
,
398 const history::ShortcutsBackend::Shortcut
& shortcut
,
400 DCHECK(!terms
.empty());
401 DCHECK_LE(terms
.length(), shortcut
.text
.length());
403 // The initial score is based on how much of the shortcut the user has typed.
404 // Using the square root of the typed fraction boosts the base score rapidly
405 // as characters are typed, compared with simply using the typed fraction
406 // directly. This makes sense since the first characters typed are much more
407 // important for determining how likely it is a user wants a particular
408 // shortcut than are the remaining continued characters.
409 double base_score
= max_relevance
*
410 sqrt(static_cast<double>(terms
.length()) / shortcut
.text
.length());
412 // Then we decay this by half each week.
413 const double kLn2
= 0.6931471805599453;
414 base::TimeDelta time_passed
= base::Time::Now() - shortcut
.last_access_time
;
415 // Clamp to 0 in case time jumps backwards (e.g. due to DST).
416 double decay_exponent
= std::max(0.0, kLn2
* static_cast<double>(
417 time_passed
.InMicroseconds()) / base::Time::kMicrosecondsPerWeek
);
419 // We modulate the decay factor based on how many times the shortcut has been
420 // used. Newly created shortcuts decay at full speed; otherwise, decaying by
421 // half takes |n| times as much time, where n increases by
422 // (1.0 / each 5 additional hits), up to a maximum of 5x as long.
423 const double kMaxDecaySpeedDivisor
= 5.0;
424 const double kNumUsesPerDecaySpeedDivisorIncrement
= 5.0;
425 double decay_divisor
= std::min(kMaxDecaySpeedDivisor
,
426 (shortcut
.number_of_hits
+ kNumUsesPerDecaySpeedDivisorIncrement
- 1) /
427 kNumUsesPerDecaySpeedDivisorIncrement
);
429 return static_cast<int>((base_score
/ exp(decay_exponent
/ decay_divisor
)) +