1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/history_quick_provider.h"
9 #include "base/basictypes.h"
10 #include "base/debug/crash_logging.h"
11 #include "base/i18n/break_iterator.h"
12 #include "base/logging.h"
13 #include "base/metrics/field_trial.h"
14 #include "base/prefs/pref_service.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "components/bookmarks/browser/bookmark_model.h"
19 #include "components/history/core/browser/history_database.h"
20 #include "components/history/core/browser/history_service.h"
21 #include "components/metrics/proto/omnibox_input_type.pb.h"
22 #include "components/omnibox/autocomplete_match_type.h"
23 #include "components/omnibox/autocomplete_provider_client.h"
24 #include "components/omnibox/autocomplete_result.h"
25 #include "components/omnibox/history_url_provider.h"
26 #include "components/omnibox/in_memory_url_index.h"
27 #include "components/omnibox/in_memory_url_index_types.h"
28 #include "components/omnibox/omnibox_field_trial.h"
29 #include "components/search_engines/template_url.h"
30 #include "components/search_engines/template_url_service.h"
31 #include "net/base/escape.h"
32 #include "net/base/net_util.h"
33 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
34 #include "url/third_party/mozilla/url_parse.h"
35 #include "url/url_util.h"
39 // Used to help investigate bug 464926. NOTE: This value is defined multiple
40 // places in the codebase due to layering issues. DO NOT change the value here
41 // without changing it in all other places that it is defined in the codebase
42 // (search for |kBug464926CrashKey|).
43 const char kBug464926CrashKey
[] = "bug-464926-info";
47 bool HistoryQuickProvider::disabled_
= false;
49 HistoryQuickProvider::HistoryQuickProvider(AutocompleteProviderClient
* client
)
50 : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_QUICK
, client
),
51 languages_(client
->GetAcceptLanguages()),
52 in_memory_url_index_(client
->GetInMemoryURLIndex()) {
55 void HistoryQuickProvider::Start(const AutocompleteInput
& input
,
56 bool minimal_changes
) {
58 if (disabled_
|| input
.from_omnibox_focus())
61 // Don't bother with INVALID and FORCED_QUERY.
62 if ((input
.type() == metrics::OmniboxInputType::INVALID
) ||
63 (input
.type() == metrics::OmniboxInputType::FORCED_QUERY
))
66 autocomplete_input_
= input
;
68 // TODO(pkasting): We should just block here until this loads. Any time
69 // someone unloads the history backend, we'll get inconsistent inline
70 // autocomplete behavior here.
71 if (in_memory_url_index_
) {
76 HistoryQuickProvider::~HistoryQuickProvider() {
79 void HistoryQuickProvider::DoAutocomplete() {
80 // Get the matching URLs from the DB.
81 ScoredHistoryMatches matches
= in_memory_url_index_
->HistoryItemsForTerms(
82 autocomplete_input_
.text(), autocomplete_input_
.cursor_position(),
83 AutocompleteProvider::kMaxMatches
);
87 // Figure out if HistoryURL provider has a URL-what-you-typed match
88 // that ought to go first and what its score will be.
89 bool will_have_url_what_you_typed_match_first
= false;
90 int url_what_you_typed_match_score
= -1; // undefined
91 // These are necessary (but not sufficient) conditions for the omnibox
92 // input to be a URL-what-you-typed match. The username test checks that
93 // either the username does not exist (a regular URL such as http://site/)
94 // or, if the username exists (http://user@site/), there must be either
95 // a password or a port. Together these exclude pure username@site
96 // inputs because these are likely to be an e-mail address. HistoryURL
97 // provider won't promote the URL-what-you-typed match to first
99 const bool can_have_url_what_you_typed_match_first
=
100 (autocomplete_input_
.type() != metrics::OmniboxInputType::QUERY
) &&
101 (!autocomplete_input_
.parts().username
.is_nonempty() ||
102 autocomplete_input_
.parts().password
.is_nonempty() ||
103 autocomplete_input_
.parts().path
.is_nonempty());
104 if (can_have_url_what_you_typed_match_first
) {
105 history::HistoryService
* const history_service
=
106 client()->GetHistoryService();
107 // We expect HistoryService to be available. In case it's not,
108 // (e.g., due to Profile corruption) we let HistoryQuick provider
109 // completions (which may be available because it's a different
110 // data structure) compete with the URL-what-you-typed match as
112 if (history_service
) {
113 history::URLDatabase
* url_db
= history_service
->InMemoryDatabase();
114 // url_db can be NULL if it hasn't finished initializing (or
115 // failed to to initialize). In this case, we let HistoryQuick
116 // provider completions compete with the URL-what-you-typed
119 const std::string
host(base::UTF16ToUTF8(
120 autocomplete_input_
.text().substr(
121 autocomplete_input_
.parts().host
.begin
,
122 autocomplete_input_
.parts().host
.len
)));
123 // We want to put the URL-what-you-typed match first if either
124 // * the user visited the URL before (intranet or internet).
125 // * it's a URL on a host that user visited before and this
126 // is the root path of the host. (If the user types some
127 // of a path--more than a simple "/"--we let autocomplete compete
128 // normally with the URL-what-you-typed match.)
129 // TODO(mpearson): Remove this hacky code and simply score URL-what-
130 // you-typed in some sane way relative to possible completions:
131 // URL-what-you-typed should get some sort of a boost relative
132 // to completions, but completions should naturally win if
133 // they're a lot more popular. In this process, if the input
134 // is a bare intranet hostname that has been visited before, we
135 // may want to enforce that the only completions that can outscore
136 // the URL-what-you-typed match are on the same host (i.e., aren't
137 // from a longer internet hostname for which the omnibox input is
139 if (url_db
->GetRowForURL(
140 autocomplete_input_
.canonicalized_url(), NULL
) != 0) {
141 // We visited this URL before.
142 will_have_url_what_you_typed_match_first
= true;
143 // HistoryURLProvider gives visited what-you-typed URLs a high score.
144 url_what_you_typed_match_score
=
145 HistoryURLProvider::kScoreForBestInlineableResult
;
146 } else if (url_db
->IsTypedHost(host
) &&
147 (!autocomplete_input_
.parts().path
.is_nonempty() ||
148 ((autocomplete_input_
.parts().path
.len
== 1) &&
149 (autocomplete_input_
.text()[
150 autocomplete_input_
.parts().path
.begin
] == '/'))) &&
151 !autocomplete_input_
.parts().query
.is_nonempty() &&
152 !autocomplete_input_
.parts().ref
.is_nonempty()) {
153 // Not visited, but we've seen the host before.
154 will_have_url_what_you_typed_match_first
= true;
155 const size_t registry_length
=
156 net::registry_controlled_domains::GetRegistryLength(
158 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
159 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
160 if (registry_length
== 0) {
161 // Known intranet hosts get one score.
162 url_what_you_typed_match_score
=
163 HistoryURLProvider::kScoreForUnvisitedIntranetResult
;
165 // Known internet hosts get another.
166 url_what_you_typed_match_score
=
167 HistoryURLProvider::kScoreForWhatYouTypedResult
;
174 // Loop over every result and add it to matches_. In the process,
175 // guarantee that scores are decreasing. |max_match_score| keeps
176 // track of the highest score we can assign to any later results we
177 // see. Also, reduce |max_match_score| if we think there will be
178 // a URL-what-you-typed match. (We want URL-what-you-typed matches for
179 // visited URLs to beat out any longer URLs, no matter how frequently
180 // they're visited.) The strength of this reduction depends on the
181 // likely score for the URL-what-you-typed result.
183 // |template_url_service| or |template_url| can be NULL in unit tests.
184 TemplateURLService
* template_url_service
= client()->GetTemplateURLService();
185 TemplateURL
* template_url
= template_url_service
?
186 template_url_service
->GetDefaultSearchProvider() : NULL
;
187 int max_match_score
= matches
.begin()->raw_score
;
188 if (will_have_url_what_you_typed_match_first
) {
189 max_match_score
= std::min(max_match_score
,
190 url_what_you_typed_match_score
- 1);
192 for (ScoredHistoryMatches::const_iterator match_iter
= matches
.begin();
193 match_iter
!= matches
.end(); ++match_iter
) {
194 const ScoredHistoryMatch
& history_match(*match_iter
);
195 // Culls results corresponding to queries from the default search engine.
196 // These are low-quality, difficult-to-understand matches for users, and the
197 // SearchProvider should surface past queries in a better way anyway.
199 !template_url
->IsSearchURL(history_match
.url_info
.url(),
200 template_url_service
->search_terms_data())) {
201 // Set max_match_score to the score we'll assign this result:
202 max_match_score
= std::min(max_match_score
, history_match
.raw_score
);
203 matches_
.push_back(QuickMatchToACMatch(history_match
, max_match_score
));
204 // Mark this max_match_score as being used:
210 AutocompleteMatch
HistoryQuickProvider::QuickMatchToACMatch(
211 const ScoredHistoryMatch
& history_match
,
213 const history::URLRow
& info
= history_match
.url_info
;
214 AutocompleteMatch
match(
215 this, score
, !!info
.visit_count(),
216 history_match
.url_matches
.empty() ?
217 AutocompleteMatchType::HISTORY_TITLE
:
218 AutocompleteMatchType::HISTORY_URL
);
219 match
.typed_count
= info
.typed_count();
220 match
.destination_url
= info
.url();
221 DCHECK(match
.destination_url
.is_valid());
223 // Format the URL autocomplete presentation.
224 const net::FormatUrlTypes format_types
= net::kFormatUrlOmitAll
&
225 ~(!history_match
.match_in_scheme
? 0 : net::kFormatUrlOmitHTTP
);
226 match
.fill_into_edit
=
227 AutocompleteInput::FormattedStringWithEquivalentMeaning(
229 net::FormatUrl(info
.url(), languages_
, format_types
,
230 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
),
231 client()->GetSchemeClassifier());
232 std::vector
<size_t> offsets
=
233 OffsetsFromTermMatches(history_match
.url_matches
);
234 base::OffsetAdjuster::Adjustments adjustments
;
235 match
.contents
= net::FormatUrlWithAdjustments(
236 info
.url(), languages_
, format_types
, net::UnescapeRule::SPACES
, NULL
,
238 base::OffsetAdjuster::AdjustOffsets(adjustments
, &offsets
);
239 TermMatches new_matches
=
240 ReplaceOffsetsInTermMatches(history_match
.url_matches
, offsets
);
241 match
.contents_class
=
242 SpansFromTermMatch(new_matches
, match
.contents
.length(), true);
244 // Set |inline_autocompletion| and |allowed_to_be_default_match| if possible.
245 if (history_match
.can_inline
) {
246 base::debug::ScopedCrashKey
crash_info(
248 info
.url().spec().substr(0, 30) + " " +
249 base::UTF16ToUTF8(autocomplete_input_
.text()).substr(0, 20) + " " +
250 base::SizeTToString(history_match
.url_matches
.size()) + " " +
251 base::SizeTToString(offsets
.size()));
252 CHECK(!new_matches
.empty());
253 size_t inline_autocomplete_offset
= new_matches
[0].offset
+
254 new_matches
[0].length
;
255 // |inline_autocomplete_offset| may be beyond the end of the
256 // |fill_into_edit| if the user has typed an URL with a scheme and the
257 // last character typed is a slash. That slash is removed by the
258 // FormatURLWithOffsets call above.
259 if (inline_autocomplete_offset
< match
.fill_into_edit
.length()) {
260 match
.inline_autocompletion
=
261 match
.fill_into_edit
.substr(inline_autocomplete_offset
);
263 match
.allowed_to_be_default_match
= match
.inline_autocompletion
.empty() ||
264 !PreventInlineAutocomplete(autocomplete_input_
);
266 match
.EnsureUWYTIsAllowedToBeDefault(autocomplete_input_
.canonicalized_url(),
267 client()->GetTemplateURLService());
269 // Format the description autocomplete presentation.
270 match
.description
= info
.title();
271 match
.description_class
= SpansFromTermMatch(
272 history_match
.title_matches
, match
.description
.length(), false);
274 match
.RecordAdditionalInfo("typed count", info
.typed_count());
275 match
.RecordAdditionalInfo("visit count", info
.visit_count());
276 match
.RecordAdditionalInfo("last visit", info
.last_visit());