1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/history_quick_provider.h"
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/i18n/break_iterator.h"
12 #include "base/logging.h"
13 #include "base/metrics/field_trial.h"
14 #include "base/metrics/histogram.h"
15 #include "base/prefs/pref_service.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/autocomplete/autocomplete_result.h"
21 #include "chrome/browser/autocomplete/history_url_provider.h"
22 #include "chrome/browser/history/history_database.h"
23 #include "chrome/browser/history/history_service.h"
24 #include "chrome/browser/history/history_service_factory.h"
25 #include "chrome/browser/history/in_memory_url_index.h"
26 #include "chrome/browser/history/in_memory_url_index_types.h"
27 #include "chrome/browser/history/scored_history_match.h"
28 #include "chrome/browser/omnibox/omnibox_field_trial.h"
29 #include "chrome/browser/profiles/profile.h"
30 #include "chrome/browser/search/search.h"
31 #include "chrome/browser/search_engines/template_url.h"
32 #include "chrome/browser/search_engines/template_url_service.h"
33 #include "chrome/browser/search_engines/template_url_service_factory.h"
34 #include "chrome/common/autocomplete_match_type.h"
35 #include "chrome/common/chrome_switches.h"
36 #include "chrome/common/net/url_fixer_upper.h"
37 #include "chrome/common/pref_names.h"
38 #include "chrome/common/url_constants.h"
39 #include "content/public/browser/browser_thread.h"
40 #include "content/public/browser/notification_source.h"
41 #include "content/public/browser/notification_types.h"
42 #include "net/base/escape.h"
43 #include "net/base/net_util.h"
44 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
45 #include "url/url_parse.h"
46 #include "url/url_util.h"
48 using history::InMemoryURLIndex
;
49 using history::ScoredHistoryMatch
;
50 using history::ScoredHistoryMatches
;
52 bool HistoryQuickProvider::disabled_
= false;
54 HistoryQuickProvider::HistoryQuickProvider(
55 AutocompleteProviderListener
* listener
,
57 : HistoryProvider(listener
, profile
,
58 AutocompleteProvider::TYPE_HISTORY_QUICK
),
59 languages_(profile_
->GetPrefs()->GetString(prefs::kAcceptLanguages
)) {
62 void HistoryQuickProvider::Start(const AutocompleteInput
& input
,
63 bool minimal_changes
) {
68 // Don't bother with INVALID and FORCED_QUERY. Also pass when looking for
69 // BEST_MATCH and there is no inline autocompletion because none of the HQP
70 // matches can score highly enough to qualify.
71 if ((input
.type() == AutocompleteInput::INVALID
) ||
72 (input
.type() == AutocompleteInput::FORCED_QUERY
) ||
73 (input
.matches_requested() == AutocompleteInput::BEST_MATCH
&&
74 input
.prevent_inline_autocomplete()))
77 autocomplete_input_
= input
;
79 // TODO(pkasting): We should just block here until this loads. Any time
80 // someone unloads the history backend, we'll get inconsistent inline
81 // autocomplete behavior here.
83 base::TimeTicks start_time
= base::TimeTicks::Now();
85 if (input
.text().length() < 6) {
86 base::TimeTicks end_time
= base::TimeTicks::Now();
87 std::string name
= "HistoryQuickProvider.QueryIndexTime." +
88 base::IntToString(input
.text().length());
89 base::HistogramBase
* counter
= base::Histogram::FactoryGet(
90 name
, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag
);
91 counter
->Add(static_cast<int>((end_time
- start_time
).InMilliseconds()));
93 UpdateStarredStateOfMatches();
97 void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch
& match
) {
98 DCHECK(match
.deletable
);
99 DCHECK(match
.destination_url
.is_valid());
100 // Delete the match from the InMemoryURLIndex.
101 GetIndex()->DeleteURL(match
.destination_url
);
102 DeleteMatchFromMatches(match
);
105 HistoryQuickProvider::~HistoryQuickProvider() {}
107 void HistoryQuickProvider::DoAutocomplete() {
108 // Get the matching URLs from the DB.
109 ScoredHistoryMatches matches
= GetIndex()->HistoryItemsForTerms(
110 autocomplete_input_
.text(),
111 autocomplete_input_
.cursor_position());
115 // Figure out if HistoryURL provider has a URL-what-you-typed match
116 // that ought to go first and what its score will be.
117 bool will_have_url_what_you_typed_match_first
= false;
118 int url_what_you_typed_match_score
= -1; // undefined
119 // These are necessary (but not sufficient) conditions for the omnibox
120 // input to be a URL-what-you-typed match. The username test checks that
121 // either the username does not exist (a regular URL such as http://site/)
122 // or, if the username exists (http://user@site/), there must be either
123 // a password or a port. Together these exclude pure username@site
124 // inputs because these are likely to be an e-mail address. HistoryURL
125 // provider won't promote the URL-what-you-typed match to first
127 const bool can_have_url_what_you_typed_match_first
=
128 autocomplete_input_
.canonicalized_url().is_valid() &&
129 (autocomplete_input_
.type() != AutocompleteInput::QUERY
) &&
130 (autocomplete_input_
.type() != AutocompleteInput::FORCED_QUERY
) &&
131 (!autocomplete_input_
.parts().username
.is_nonempty() ||
132 autocomplete_input_
.parts().password
.is_nonempty() ||
133 autocomplete_input_
.parts().path
.is_nonempty());
134 if (can_have_url_what_you_typed_match_first
) {
135 HistoryService
* const history_service
=
136 HistoryServiceFactory::GetForProfile(profile_
,
137 Profile::EXPLICIT_ACCESS
);
138 // We expect HistoryService to be available. In case it's not,
139 // (e.g., due to Profile corruption) we let HistoryQuick provider
140 // completions (which may be available because it's a different
141 // data structure) compete with the URL-what-you-typed match as
143 if (history_service
) {
144 history::URLDatabase
* url_db
= history_service
->InMemoryDatabase();
145 // url_db can be NULL if it hasn't finished initializing (or
146 // failed to to initialize). In this case, we let HistoryQuick
147 // provider completions compete with the URL-what-you-typed
150 const std::string
host(base::UTF16ToUTF8(
151 autocomplete_input_
.text().substr(
152 autocomplete_input_
.parts().host
.begin
,
153 autocomplete_input_
.parts().host
.len
)));
154 // We want to put the URL-what-you-typed match first if either
155 // * the user visited the URL before (intranet or internet).
156 // * it's a URL on a host that user visited before and this
157 // is the root path of the host. (If the user types some
158 // of a path--more than a simple "/"--we let autocomplete compete
159 // normally with the URL-what-you-typed match.)
160 // TODO(mpearson): Remove this hacky code and simply score URL-what-
161 // you-typed in some sane way relative to possible completions:
162 // URL-what-you-typed should get some sort of a boost relative
163 // to completions, but completions should naturally win if
164 // they're a lot more popular. In this process, if the input
165 // is a bare intranet hostname that has been visited before, we
166 // may want to enforce that the only completions that can outscore
167 // the URL-what-you-typed match are on the same host (i.e., aren't
168 // from a longer internet hostname for which the omnibox input is
170 if (url_db
->GetRowForURL(
171 autocomplete_input_
.canonicalized_url(), NULL
) != 0) {
172 // We visited this URL before.
173 will_have_url_what_you_typed_match_first
= true;
174 // HistoryURLProvider gives visited what-you-typed URLs a high score.
175 url_what_you_typed_match_score
=
176 HistoryURLProvider::kScoreForBestInlineableResult
;
177 } else if (url_db
->IsTypedHost(host
) &&
178 (!autocomplete_input_
.parts().path
.is_nonempty() ||
179 ((autocomplete_input_
.parts().path
.len
== 1) &&
180 (autocomplete_input_
.text()[
181 autocomplete_input_
.parts().path
.begin
] == '/'))) &&
182 !autocomplete_input_
.parts().query
.is_nonempty() &&
183 !autocomplete_input_
.parts().ref
.is_nonempty()) {
184 // Not visited, but we've seen the host before.
185 will_have_url_what_you_typed_match_first
= true;
186 const size_t registry_length
=
187 net::registry_controlled_domains::GetRegistryLength(
189 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
190 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
191 if (registry_length
== 0) {
192 // Known intranet hosts get one score.
193 url_what_you_typed_match_score
=
194 HistoryURLProvider::kScoreForUnvisitedIntranetResult
;
196 // Known internet hosts get another.
197 url_what_you_typed_match_score
=
198 HistoryURLProvider::kScoreForWhatYouTypedResult
;
205 // Loop over every result and add it to matches_. In the process,
206 // guarantee that scores are decreasing. |max_match_score| keeps
207 // track of the highest score we can assign to any later results we
208 // see. Also, if we're not allowing inline autocompletions in
209 // general or the current best suggestion isn't inlineable,
210 // artificially reduce the starting |max_match_score| (which
211 // therefore applies to all results) to something low enough that
212 // guarantees no result will be offered as an inline autocomplete
213 // suggestion. Also do a similar reduction if we think there will be
214 // a URL-what-you-typed match. (We want URL-what-you-typed matches for
215 // visited URLs to beat out any longer URLs, no matter how frequently
216 // they're visited.) The strength of this last reduction depends on the
217 // likely score for the URL-what-you-typed result.
219 // |template_url_service| or |template_url| can be NULL in unit tests.
220 TemplateURLService
* template_url_service
=
221 TemplateURLServiceFactory::GetForProfile(profile_
);
222 TemplateURL
* template_url
= template_url_service
?
223 template_url_service
->GetDefaultSearchProvider() : NULL
;
224 int max_match_score
=
225 (OmniboxFieldTrial::ReorderForLegalDefaultMatch(
226 autocomplete_input_
.current_page_classification()) ||
227 (!PreventInlineAutocomplete(autocomplete_input_
) &&
228 matches
.begin()->can_inline())) ?
229 matches
.begin()->raw_score() :
230 (AutocompleteResult::kLowestDefaultScore
- 1);
231 if (will_have_url_what_you_typed_match_first
) {
232 max_match_score
= std::min(max_match_score
,
233 url_what_you_typed_match_score
- 1);
235 for (ScoredHistoryMatches::const_iterator match_iter
= matches
.begin();
236 match_iter
!= matches
.end(); ++match_iter
) {
237 const ScoredHistoryMatch
& history_match(*match_iter
);
238 // Culls results corresponding to queries from the default search engine.
239 // These are low-quality, difficult-to-understand matches for users, and the
240 // SearchProvider should surface past queries in a better way anyway.
242 !template_url
->IsSearchURL(history_match
.url_info
.url())) {
243 // Set max_match_score to the score we'll assign this result:
244 max_match_score
= std::min(max_match_score
, history_match
.raw_score());
245 matches_
.push_back(QuickMatchToACMatch(history_match
, max_match_score
));
246 // Mark this max_match_score as being used:
252 AutocompleteMatch
HistoryQuickProvider::QuickMatchToACMatch(
253 const ScoredHistoryMatch
& history_match
,
255 const history::URLRow
& info
= history_match
.url_info
;
256 AutocompleteMatch
match(
257 this, score
, !!info
.visit_count(),
258 history_match
.url_matches().empty() ?
259 AutocompleteMatchType::HISTORY_TITLE
:
260 AutocompleteMatchType::HISTORY_URL
);
261 match
.typed_count
= info
.typed_count();
262 match
.destination_url
= info
.url();
263 DCHECK(match
.destination_url
.is_valid());
265 // Format the URL autocomplete presentation.
266 std::vector
<size_t> offsets
=
267 OffsetsFromTermMatches(history_match
.url_matches());
268 const net::FormatUrlTypes format_types
= net::kFormatUrlOmitAll
&
269 ~(!history_match
.match_in_scheme
? 0 : net::kFormatUrlOmitHTTP
);
270 match
.fill_into_edit
=
271 AutocompleteInput::FormattedStringWithEquivalentMeaning(info
.url(),
272 net::FormatUrlWithOffsets(info
.url(), languages_
, format_types
,
273 net::UnescapeRule::SPACES
, NULL
, NULL
, &offsets
));
274 history::TermMatches new_matches
=
275 ReplaceOffsetsInTermMatches(history_match
.url_matches(), offsets
);
276 match
.contents
= net::FormatUrl(info
.url(), languages_
, format_types
,
277 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
);
278 match
.contents_class
=
279 SpansFromTermMatch(new_matches
, match
.contents
.length(), true);
281 match
.allowed_to_be_default_match
= history_match
.can_inline() &&
282 !PreventInlineAutocomplete(autocomplete_input_
);
283 if (match
.allowed_to_be_default_match
) {
284 DCHECK(!new_matches
.empty());
285 size_t inline_autocomplete_offset
= new_matches
[0].offset
+
286 new_matches
[0].length
;
287 // |inline_autocomplete_offset| may be beyond the end of the
288 // |fill_into_edit| if the user has typed an URL with a scheme and the
289 // last character typed is a slash. That slash is removed by the
290 // FormatURLWithOffsets call above.
291 if (inline_autocomplete_offset
< match
.fill_into_edit
.length()) {
292 match
.inline_autocompletion
=
293 match
.fill_into_edit
.substr(inline_autocomplete_offset
);
297 // Format the description autocomplete presentation.
298 match
.description
= info
.title();
299 match
.description_class
= SpansFromTermMatch(
300 history_match
.title_matches(), match
.description
.length(), false);
302 match
.RecordAdditionalInfo("typed count", info
.typed_count());
303 match
.RecordAdditionalInfo("visit count", info
.visit_count());
304 match
.RecordAdditionalInfo("last visit", info
.last_visit());
309 history::InMemoryURLIndex
* HistoryQuickProvider::GetIndex() {
310 if (index_for_testing_
.get())
311 return index_for_testing_
.get();
313 HistoryService
* const history_service
=
314 HistoryServiceFactory::GetForProfile(profile_
, Profile::EXPLICIT_ACCESS
);
315 if (!history_service
)
318 return history_service
->InMemoryIndex();