1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/search_suggestion_parser.h"
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/values.h"
15 #include "components/omnibox/autocomplete_input.h"
16 #include "components/omnibox/url_prefix.h"
17 #include "components/url_fixer/url_fixer.h"
18 #include "net/base/net_util.h"
19 #include "net/http/http_response_headers.h"
20 #include "net/url_request/url_fetcher.h"
21 #include "url/url_constants.h"
25 AutocompleteMatchType::Type
GetAutocompleteMatchType(const std::string
& type
) {
27 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY
;
28 if (type
== "INFINITE")
29 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE
;
30 if (type
== "PERSONALIZED_QUERY")
31 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED
;
32 if (type
== "PROFILE")
33 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE
;
34 if (type
== "NAVIGATION")
35 return AutocompleteMatchType::NAVSUGGEST
;
36 if (type
== "PERSONALIZED_NAVIGATION")
37 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
;
38 return AutocompleteMatchType::SEARCH_SUGGEST
;
43 // SearchSuggestionParser::Result ----------------------------------------------
45 SearchSuggestionParser::Result::Result(bool from_keyword_provider
,
47 bool relevance_from_server
,
48 AutocompleteMatchType::Type type
,
49 const std::string
& deletion_url
)
50 : from_keyword_provider_(from_keyword_provider
),
52 relevance_(relevance
),
53 relevance_from_server_(relevance_from_server
),
54 received_after_last_keystroke_(true),
55 deletion_url_(deletion_url
) {}
57 SearchSuggestionParser::Result::~Result() {}
59 // SearchSuggestionParser::SuggestResult ---------------------------------------
61 SearchSuggestionParser::SuggestResult::SuggestResult(
62 const base::string16
& suggestion
,
63 AutocompleteMatchType::Type type
,
64 const base::string16
& match_contents
,
65 const base::string16
& match_contents_prefix
,
66 const base::string16
& annotation
,
67 const base::string16
& answer_contents
,
68 const base::string16
& answer_type
,
69 scoped_ptr
<SuggestionAnswer
> answer
,
70 const std::string
& suggest_query_params
,
71 const std::string
& deletion_url
,
72 bool from_keyword_provider
,
74 bool relevance_from_server
,
76 const base::string16
& input_text
)
77 : Result(from_keyword_provider
,
79 relevance_from_server
,
82 suggestion_(suggestion
),
83 match_contents_prefix_(match_contents_prefix
),
84 annotation_(annotation
),
85 suggest_query_params_(suggest_query_params
),
86 answer_contents_(answer_contents
),
87 answer_type_(answer_type
),
88 answer_(answer
.Pass()),
89 should_prefetch_(should_prefetch
) {
90 match_contents_
= match_contents
;
91 DCHECK(!match_contents_
.empty());
92 ClassifyMatchContents(true, input_text
);
95 SearchSuggestionParser::SuggestResult::SuggestResult(
96 const SuggestResult
& result
)
98 suggestion_(result
.suggestion_
),
99 match_contents_prefix_(result
.match_contents_prefix_
),
100 annotation_(result
.annotation_
),
101 suggest_query_params_(result
.suggest_query_params_
),
102 answer_contents_(result
.answer_contents_
),
103 answer_type_(result
.answer_type_
),
104 answer_(SuggestionAnswer::copy(result
.answer_
.get())),
105 should_prefetch_(result
.should_prefetch_
) {
108 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
110 SearchSuggestionParser::SuggestResult
&
111 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult
& rhs
) {
115 // Assign via parent class first.
116 Result::operator=(rhs
);
118 suggestion_
= rhs
.suggestion_
;
119 match_contents_prefix_
= rhs
.match_contents_prefix_
;
120 annotation_
= rhs
.annotation_
;
121 suggest_query_params_
= rhs
.suggest_query_params_
;
122 answer_contents_
= rhs
.answer_contents_
;
123 answer_type_
= rhs
.answer_type_
;
124 answer_
= SuggestionAnswer::copy(rhs
.answer_
.get());
125 should_prefetch_
= rhs
.should_prefetch_
;
130 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
131 const bool allow_bolding_all
,
132 const base::string16
& input_text
) {
133 if (input_text
.empty()) {
134 // In case of zero-suggest results, do not highlight matches.
135 match_contents_class_
.push_back(
136 ACMatchClassification(0, ACMatchClassification::NONE
));
140 base::string16 lookup_text
= input_text
;
141 if (type_
== AutocompleteMatchType::SEARCH_SUGGEST_INFINITE
) {
142 const size_t contents_index
=
143 suggestion_
.length() - match_contents_
.length();
144 // Ensure the query starts with the input text, and ends with the match
145 // contents, and the input text has an overlap with contents.
146 if (StartsWith(suggestion_
, input_text
, true) &&
147 EndsWith(suggestion_
, match_contents_
, true) &&
148 (input_text
.length() > contents_index
)) {
149 lookup_text
= input_text
.substr(contents_index
);
152 size_t lookup_position
= match_contents_
.find(lookup_text
);
153 if (!allow_bolding_all
&& (lookup_position
== base::string16::npos
)) {
154 // Bail if the code below to update the bolding would bold the whole
155 // string. Note that the string may already be entirely bolded; if
156 // so, leave it as is.
159 match_contents_class_
.clear();
160 // We do intra-string highlighting for suggestions - the suggested segment
161 // will be highlighted, e.g. for input_text = "you" the suggestion may be
162 // "youtube", so we'll bold the "tube" section: you*tube*.
163 if (input_text
!= match_contents_
) {
164 if (lookup_position
== base::string16::npos
) {
165 // The input text is not a substring of the query string, e.g. input
166 // text is "slasdot" and the query string is "slashdot", so we bold the
168 match_contents_class_
.push_back(
169 ACMatchClassification(0, ACMatchClassification::MATCH
));
171 // We don't iterate over the string here annotating all matches because
172 // it looks odd to have every occurrence of a substring that may be as
173 // short as a single character highlighted in a query suggestion result,
174 // e.g. for input text "s" and query string "southwest airlines", it
175 // looks odd if both the first and last s are highlighted.
176 if (lookup_position
!= 0) {
177 match_contents_class_
.push_back(
178 ACMatchClassification(0, ACMatchClassification::MATCH
));
180 match_contents_class_
.push_back(
181 ACMatchClassification(lookup_position
, ACMatchClassification::NONE
));
182 size_t next_fragment_position
= lookup_position
+ lookup_text
.length();
183 if (next_fragment_position
< match_contents_
.length()) {
184 match_contents_class_
.push_back(ACMatchClassification(
185 next_fragment_position
, ACMatchClassification::MATCH
));
189 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
190 // for the default provider or a keyword search provider.
191 match_contents_class_
.push_back(
192 ACMatchClassification(0, ACMatchClassification::NONE
));
196 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
197 const AutocompleteInput
& input
,
198 bool keyword_provider_requested
) const {
199 if (!from_keyword_provider_
&& keyword_provider_requested
)
201 return ((input
.type() == metrics::OmniboxInputType::URL
) ? 300 : 600);
204 // SearchSuggestionParser::NavigationResult ------------------------------------
206 SearchSuggestionParser::NavigationResult::NavigationResult(
207 const AutocompleteSchemeClassifier
& scheme_classifier
,
209 AutocompleteMatchType::Type type
,
210 const base::string16
& description
,
211 const std::string
& deletion_url
,
212 bool from_keyword_provider
,
214 bool relevance_from_server
,
215 const base::string16
& input_text
,
216 const std::string
& languages
)
217 : Result(from_keyword_provider
, relevance
, relevance_from_server
, type
,
220 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
221 url
, net::FormatUrl(url
, languages
,
222 net::kFormatUrlOmitAll
& ~net::kFormatUrlOmitHTTP
,
223 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
),
225 description_(description
) {
226 DCHECK(url_
.is_valid());
227 CalculateAndClassifyMatchContents(true, input_text
, languages
);
230 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
233 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
234 const bool allow_bolding_nothing
,
235 const base::string16
& input_text
,
236 const std::string
& languages
) {
237 if (input_text
.empty()) {
238 // In case of zero-suggest results, do not highlight matches.
239 match_contents_class_
.push_back(
240 ACMatchClassification(0, ACMatchClassification::NONE
));
244 // First look for the user's input inside the formatted url as it would be
245 // without trimming the scheme, so we can find matches at the beginning of the
247 const URLPrefix
* prefix
=
248 URLPrefix::BestURLPrefix(formatted_url_
, input_text
);
249 size_t match_start
= (prefix
== NULL
) ?
250 formatted_url_
.find(input_text
) : prefix
->prefix
.length();
251 bool trim_http
= !AutocompleteInput::HasHTTPScheme(input_text
) &&
252 (!prefix
|| (match_start
!= 0));
253 const net::FormatUrlTypes format_types
=
254 net::kFormatUrlOmitAll
& ~(trim_http
? 0 : net::kFormatUrlOmitHTTP
);
256 base::string16 match_contents
= net::FormatUrl(url_
, languages
, format_types
,
257 net::UnescapeRule::SPACES
, NULL
, NULL
, &match_start
);
258 // If the first match in the untrimmed string was inside a scheme that we
259 // trimmed, look for a subsequent match.
260 if (match_start
== base::string16::npos
)
261 match_start
= match_contents
.find(input_text
);
262 // Update |match_contents_| and |match_contents_class_| if it's allowed.
263 if (allow_bolding_nothing
|| (match_start
!= base::string16::npos
)) {
264 match_contents_
= match_contents
;
265 // Safe if |match_start| is npos; also safe if the input is longer than the
266 // remaining contents after |match_start|.
267 AutocompleteMatch::ClassifyLocationInString(match_start
,
268 input_text
.length(), match_contents_
.length(),
269 ACMatchClassification::URL
, &match_contents_class_
);
273 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
274 const AutocompleteInput
& input
,
275 bool keyword_provider_requested
) const {
276 return (from_keyword_provider_
|| !keyword_provider_requested
) ? 800 : 150;
279 // SearchSuggestionParser::Results ---------------------------------------------
281 SearchSuggestionParser::Results::Results()
282 : verbatim_relevance(-1),
283 field_trial_triggered(false),
284 relevances_from_server(false) {}
286 SearchSuggestionParser::Results::~Results() {}
288 void SearchSuggestionParser::Results::Clear() {
289 suggest_results
.clear();
290 navigation_results
.clear();
291 verbatim_relevance
= -1;
295 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
296 if (verbatim_relevance
>= 0)
299 // Right now either all results of one type will be server-scored or they will
300 // all be locally scored, but in case we change this later, we'll just check
302 for (SuggestResults::const_iterator
i(suggest_results
.begin());
303 i
!= suggest_results
.end(); ++i
) {
304 if (i
->relevance_from_server())
307 for (NavigationResults::const_iterator
i(navigation_results
.begin());
308 i
!= navigation_results
.end(); ++i
) {
309 if (i
->relevance_from_server())
316 // SearchSuggestionParser ------------------------------------------------------
319 std::string
SearchSuggestionParser::ExtractJsonData(
320 const net::URLFetcher
* source
) {
321 const net::HttpResponseHeaders
* const response_headers
=
322 source
->GetResponseHeaders();
323 std::string json_data
;
324 source
->GetResponseAsString(&json_data
);
326 // JSON is supposed to be UTF-8, but some suggest service providers send
327 // JSON files in non-UTF-8 encodings. The actual encoding is usually
328 // specified in the Content-Type header field.
329 if (response_headers
) {
331 if (response_headers
->GetCharset(&charset
)) {
332 base::string16 data_16
;
333 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
334 if (base::CodepageToUTF16(json_data
, charset
.c_str(),
335 base::OnStringConversionError::FAIL
,
337 json_data
= base::UTF16ToUTF8(data_16
);
344 scoped_ptr
<base::Value
> SearchSuggestionParser::DeserializeJsonData(
345 std::string json_data
) {
346 // The JSON response should be an array.
347 for (size_t response_start_index
= json_data
.find("["), i
= 0;
348 response_start_index
!= std::string::npos
&& i
< 5;
349 response_start_index
= json_data
.find("[", 1), i
++) {
350 // Remove any XSSI guards to allow for JSON parsing.
351 if (response_start_index
> 0)
352 json_data
.erase(0, response_start_index
);
354 JSONStringValueSerializer
deserializer(json_data
);
355 deserializer
.set_allow_trailing_comma(true);
357 scoped_ptr
<base::Value
> data(deserializer
.Deserialize(&error_code
, NULL
));
361 return scoped_ptr
<base::Value
>();
365 bool SearchSuggestionParser::ParseSuggestResults(
366 const base::Value
& root_val
,
367 const AutocompleteInput
& input
,
368 const AutocompleteSchemeClassifier
& scheme_classifier
,
369 int default_result_relevance
,
370 const std::string
& languages
,
371 bool is_keyword_result
,
373 base::string16 query
;
374 const base::ListValue
* root_list
= NULL
;
375 const base::ListValue
* results_list
= NULL
;
377 if (!root_val
.GetAsList(&root_list
) || !root_list
->GetString(0, &query
) ||
378 query
!= input
.text() || !root_list
->GetList(1, &results_list
))
381 // 3rd element: Description list.
382 const base::ListValue
* descriptions
= NULL
;
383 root_list
->GetList(2, &descriptions
);
385 // 4th element: Disregard the query URL list for now.
387 // Reset suggested relevance information.
388 results
->verbatim_relevance
= -1;
390 // 5th element: Optional key-value pairs from the Suggest server.
391 const base::ListValue
* types
= NULL
;
392 const base::ListValue
* relevances
= NULL
;
393 const base::ListValue
* suggestion_details
= NULL
;
394 const base::DictionaryValue
* extras
= NULL
;
395 int prefetch_index
= -1;
396 if (root_list
->GetDictionary(4, &extras
)) {
397 extras
->GetList("google:suggesttype", &types
);
399 // Discard this list if its size does not match that of the suggestions.
400 if (extras
->GetList("google:suggestrelevance", &relevances
) &&
401 (relevances
->GetSize() != results_list
->GetSize()))
403 extras
->GetInteger("google:verbatimrelevance",
404 &results
->verbatim_relevance
);
406 // Check if the active suggest field trial (if any) has triggered either
407 // for the default provider or keyword provider.
408 results
->field_trial_triggered
= false;
409 extras
->GetBoolean("google:fieldtrialtriggered",
410 &results
->field_trial_triggered
);
412 const base::DictionaryValue
* client_data
= NULL
;
413 if (extras
->GetDictionary("google:clientdata", &client_data
) && client_data
)
414 client_data
->GetInteger("phi", &prefetch_index
);
416 if (extras
->GetList("google:suggestdetail", &suggestion_details
) &&
417 suggestion_details
->GetSize() != results_list
->GetSize())
418 suggestion_details
= NULL
;
420 // Store the metadata that came with the response in case we need to pass it
421 // along with the prefetch query to Instant.
422 JSONStringValueSerializer
json_serializer(&results
->metadata
);
423 json_serializer
.Serialize(*extras
);
426 // Clear the previous results now that new results are available.
427 results
->suggest_results
.clear();
428 results
->navigation_results
.clear();
429 results
->answers_image_urls
.clear();
431 base::string16 suggestion
;
433 int relevance
= default_result_relevance
;
434 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
435 const bool allow_navsuggest
=
436 input
.type() != metrics::OmniboxInputType::FORCED_QUERY
;
437 const base::string16
& trimmed_input
=
438 base::CollapseWhitespace(input
.text(), false);
439 for (size_t index
= 0; results_list
->GetString(index
, &suggestion
); ++index
) {
440 // Google search may return empty suggestions for weird input characters,
441 // they make no sense at all and can cause problems in our code.
442 if (suggestion
.empty())
445 // Apply valid suggested relevance scores; discard invalid lists.
446 if (relevances
!= NULL
&& !relevances
->GetInteger(index
, &relevance
))
448 AutocompleteMatchType::Type match_type
=
449 AutocompleteMatchType::SEARCH_SUGGEST
;
450 if (types
&& types
->GetString(index
, &type
))
451 match_type
= GetAutocompleteMatchType(type
);
452 const base::DictionaryValue
* suggestion_detail
= NULL
;
453 std::string deletion_url
;
455 if (suggestion_details
&&
456 suggestion_details
->GetDictionary(index
, &suggestion_detail
))
457 suggestion_detail
->GetString("du", &deletion_url
);
459 if ((match_type
== AutocompleteMatchType::NAVSUGGEST
) ||
460 (match_type
== AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
)) {
461 // Do not blindly trust the URL coming from the server to be valid.
463 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion
), std::string()));
464 if (url
.is_valid() && allow_navsuggest
) {
465 base::string16 title
;
466 if (descriptions
!= NULL
)
467 descriptions
->GetString(index
, &title
);
468 results
->navigation_results
.push_back(NavigationResult(
469 scheme_classifier
, url
, match_type
, title
, deletion_url
,
470 is_keyword_result
, relevance
, relevances
!= NULL
, input
.text(),
474 base::string16 match_contents
= suggestion
;
475 base::string16 match_contents_prefix
;
476 base::string16 annotation
;
477 base::string16 answer_contents
;
478 base::string16 answer_type_str
;
479 scoped_ptr
<SuggestionAnswer
> answer
;
480 std::string suggest_query_params
;
482 if (suggestion_details
) {
483 suggestion_details
->GetDictionary(index
, &suggestion_detail
);
484 if (suggestion_detail
) {
485 suggestion_detail
->GetString("t", &match_contents
);
486 suggestion_detail
->GetString("mp", &match_contents_prefix
);
487 // Error correction for bad data from server.
488 if (match_contents
.empty())
489 match_contents
= suggestion
;
490 suggestion_detail
->GetString("a", &annotation
);
491 suggestion_detail
->GetString("q", &suggest_query_params
);
493 // Extract the Answer, if provided.
494 const base::DictionaryValue
* answer_json
= NULL
;
495 if (suggestion_detail
->GetDictionary("ansa", &answer_json
) &&
496 suggestion_detail
->GetString("ansb", &answer_type_str
)) {
497 answer
= SuggestionAnswer::ParseAnswer(answer_json
);
500 if (answer
&& base::StringToInt(answer_type_str
, &answer_type
)) {
501 match_type
= AutocompleteMatchType::SEARCH_SUGGEST_ANSWER
;
503 answer
->set_type(answer_type
);
504 answer
->AddImageURLsTo(&results
->answers_image_urls
);
506 std::string contents
;
507 base::JSONWriter::Write(answer_json
, &contents
);
508 answer_contents
= base::UTF8ToUTF16(contents
);
510 answer_type_str
= base::string16();
516 bool should_prefetch
= static_cast<int>(index
) == prefetch_index
;
517 // TODO(kochi): Improve calculator suggestion presentation.
518 results
->suggest_results
.push_back(SuggestResult(
519 base::CollapseWhitespace(suggestion
, false), match_type
,
520 base::CollapseWhitespace(match_contents
, false),
521 match_contents_prefix
, annotation
, answer_contents
, answer_type_str
,
522 answer
.Pass(), suggest_query_params
, deletion_url
, is_keyword_result
,
523 relevance
, relevances
!= NULL
, should_prefetch
, trimmed_input
));
526 results
->relevances_from_server
= relevances
!= NULL
;