1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/browser/search_suggestion_parser.h"
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/json/json_string_value_serializer.h"
11 #include "base/json/json_writer.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/values.h"
18 #include "components/omnibox/browser/autocomplete_i18n.h"
19 #include "components/omnibox/browser/autocomplete_input.h"
20 #include "components/omnibox/browser/url_prefix.h"
21 #include "components/url_formatter/url_fixer.h"
22 #include "components/url_formatter/url_formatter.h"
23 #include "net/http/http_response_headers.h"
24 #include "net/url_request/url_fetcher.h"
25 #include "url/url_constants.h"
29 AutocompleteMatchType::Type
GetAutocompleteMatchType(const std::string
& type
) {
30 if (type
== "CALCULATOR")
31 return AutocompleteMatchType::CALCULATOR
;
33 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY
;
35 return AutocompleteMatchType::SEARCH_SUGGEST_TAIL
;
36 if (type
== "PERSONALIZED_QUERY")
37 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED
;
38 if (type
== "PROFILE")
39 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE
;
40 if (type
== "NAVIGATION")
41 return AutocompleteMatchType::NAVSUGGEST
;
42 if (type
== "PERSONALIZED_NAVIGATION")
43 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
;
44 return AutocompleteMatchType::SEARCH_SUGGEST
;
49 // SearchSuggestionParser::Result ----------------------------------------------
51 SearchSuggestionParser::Result::Result(bool from_keyword_provider
,
53 bool relevance_from_server
,
54 AutocompleteMatchType::Type type
,
55 const std::string
& deletion_url
)
56 : from_keyword_provider_(from_keyword_provider
),
58 relevance_(relevance
),
59 relevance_from_server_(relevance_from_server
),
60 received_after_last_keystroke_(true),
61 deletion_url_(deletion_url
) {}
63 SearchSuggestionParser::Result::~Result() {}
65 // SearchSuggestionParser::SuggestResult ---------------------------------------
67 SearchSuggestionParser::SuggestResult::SuggestResult(
68 const base::string16
& suggestion
,
69 AutocompleteMatchType::Type type
,
70 const base::string16
& match_contents
,
71 const base::string16
& match_contents_prefix
,
72 const base::string16
& annotation
,
73 const base::string16
& answer_contents
,
74 const base::string16
& answer_type
,
75 scoped_ptr
<SuggestionAnswer
> answer
,
76 const std::string
& suggest_query_params
,
77 const std::string
& deletion_url
,
78 bool from_keyword_provider
,
80 bool relevance_from_server
,
82 const base::string16
& input_text
)
83 : Result(from_keyword_provider
,
85 relevance_from_server
,
88 suggestion_(suggestion
),
89 match_contents_prefix_(match_contents_prefix
),
90 annotation_(annotation
),
91 suggest_query_params_(suggest_query_params
),
92 answer_contents_(answer_contents
),
93 answer_type_(answer_type
),
94 answer_(answer
.Pass()),
95 should_prefetch_(should_prefetch
) {
96 match_contents_
= match_contents
;
97 DCHECK(!match_contents_
.empty());
98 ClassifyMatchContents(true, input_text
);
101 SearchSuggestionParser::SuggestResult::SuggestResult(
102 const SuggestResult
& result
)
104 suggestion_(result
.suggestion_
),
105 match_contents_prefix_(result
.match_contents_prefix_
),
106 annotation_(result
.annotation_
),
107 suggest_query_params_(result
.suggest_query_params_
),
108 answer_contents_(result
.answer_contents_
),
109 answer_type_(result
.answer_type_
),
110 answer_(SuggestionAnswer::copy(result
.answer_
.get())),
111 should_prefetch_(result
.should_prefetch_
) {
114 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
116 SearchSuggestionParser::SuggestResult
&
117 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult
& rhs
) {
121 // Assign via parent class first.
122 Result::operator=(rhs
);
124 suggestion_
= rhs
.suggestion_
;
125 match_contents_prefix_
= rhs
.match_contents_prefix_
;
126 annotation_
= rhs
.annotation_
;
127 suggest_query_params_
= rhs
.suggest_query_params_
;
128 answer_contents_
= rhs
.answer_contents_
;
129 answer_type_
= rhs
.answer_type_
;
130 answer_
= SuggestionAnswer::copy(rhs
.answer_
.get());
131 should_prefetch_
= rhs
.should_prefetch_
;
136 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
137 const bool allow_bolding_all
,
138 const base::string16
& input_text
) {
139 if (input_text
.empty()) {
140 // In case of zero-suggest results, do not highlight matches.
141 match_contents_class_
.push_back(
142 ACMatchClassification(0, ACMatchClassification::NONE
));
146 base::string16 lookup_text
= input_text
;
147 if (type_
== AutocompleteMatchType::SEARCH_SUGGEST_TAIL
) {
148 const size_t contents_index
=
149 suggestion_
.length() - match_contents_
.length();
150 // Ensure the query starts with the input text, and ends with the match
151 // contents, and the input text has an overlap with contents.
152 if (base::StartsWith(suggestion_
, input_text
,
153 base::CompareCase::SENSITIVE
) &&
154 base::EndsWith(suggestion_
, match_contents_
,
155 base::CompareCase::SENSITIVE
) &&
156 (input_text
.length() > contents_index
)) {
157 lookup_text
= input_text
.substr(contents_index
);
160 // Do a case-insensitive search for |lookup_text|.
161 base::string16::const_iterator lookup_position
= std::search(
162 match_contents_
.begin(), match_contents_
.end(), lookup_text
.begin(),
163 lookup_text
.end(), SimpleCaseInsensitiveCompareUCS2());
164 if (!allow_bolding_all
&& (lookup_position
== match_contents_
.end())) {
165 // Bail if the code below to update the bolding would bold the whole
166 // string. Note that the string may already be entirely bolded; if
167 // so, leave it as is.
170 match_contents_class_
.clear();
171 // We do intra-string highlighting for suggestions - the suggested segment
172 // will be highlighted, e.g. for input_text = "you" the suggestion may be
173 // "youtube", so we'll bold the "tube" section: you*tube*.
174 if (input_text
!= match_contents_
) {
175 if (lookup_position
== match_contents_
.end()) {
176 // The input text is not a substring of the query string, e.g. input
177 // text is "slasdot" and the query string is "slashdot", so we bold the
179 match_contents_class_
.push_back(
180 ACMatchClassification(0, ACMatchClassification::MATCH
));
182 // We don't iterate over the string here annotating all matches because
183 // it looks odd to have every occurrence of a substring that may be as
184 // short as a single character highlighted in a query suggestion result,
185 // e.g. for input text "s" and query string "southwest airlines", it
186 // looks odd if both the first and last s are highlighted.
187 const size_t lookup_index
= lookup_position
- match_contents_
.begin();
188 if (lookup_index
!= 0) {
189 match_contents_class_
.push_back(
190 ACMatchClassification(0, ACMatchClassification::MATCH
));
192 match_contents_class_
.push_back(
193 ACMatchClassification(lookup_index
, ACMatchClassification::NONE
));
194 size_t next_fragment_position
= lookup_index
+ lookup_text
.length();
195 if (next_fragment_position
< match_contents_
.length()) {
196 match_contents_class_
.push_back(ACMatchClassification(
197 next_fragment_position
, ACMatchClassification::MATCH
));
201 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
202 // for the default provider or a keyword search provider.
203 match_contents_class_
.push_back(
204 ACMatchClassification(0, ACMatchClassification::NONE
));
208 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
209 const AutocompleteInput
& input
,
210 bool keyword_provider_requested
) const {
211 if (!from_keyword_provider_
&& keyword_provider_requested
)
213 return ((input
.type() == metrics::OmniboxInputType::URL
) ? 300 : 600);
216 // SearchSuggestionParser::NavigationResult ------------------------------------
218 SearchSuggestionParser::NavigationResult::NavigationResult(
219 const AutocompleteSchemeClassifier
& scheme_classifier
,
221 AutocompleteMatchType::Type type
,
222 const base::string16
& description
,
223 const std::string
& deletion_url
,
224 bool from_keyword_provider
,
226 bool relevance_from_server
,
227 const base::string16
& input_text
,
228 const std::string
& languages
)
229 : Result(from_keyword_provider
,
231 relevance_from_server
,
235 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
237 url_formatter::FormatUrl(url
,
239 url_formatter::kFormatUrlOmitAll
&
240 ~url_formatter::kFormatUrlOmitHTTP
,
241 net::UnescapeRule::SPACES
,
246 description_(description
) {
247 DCHECK(url_
.is_valid());
248 CalculateAndClassifyMatchContents(true, input_text
, languages
);
251 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
254 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
255 const bool allow_bolding_nothing
,
256 const base::string16
& input_text
,
257 const std::string
& languages
) {
258 if (input_text
.empty()) {
259 // In case of zero-suggest results, do not highlight matches.
260 match_contents_class_
.push_back(
261 ACMatchClassification(0, ACMatchClassification::NONE
));
265 // First look for the user's input inside the formatted url as it would be
266 // without trimming the scheme, so we can find matches at the beginning of the
268 const URLPrefix
* prefix
=
269 URLPrefix::BestURLPrefix(formatted_url_
, input_text
);
270 size_t match_start
= (prefix
== NULL
) ?
271 formatted_url_
.find(input_text
) : prefix
->prefix
.length();
272 bool trim_http
= !AutocompleteInput::HasHTTPScheme(input_text
) &&
273 (!prefix
|| (match_start
!= 0));
274 const url_formatter::FormatUrlTypes format_types
=
275 url_formatter::kFormatUrlOmitAll
&
276 ~(trim_http
? 0 : url_formatter::kFormatUrlOmitHTTP
);
278 base::string16 match_contents
= url_formatter::FormatUrl(
279 url_
, languages
, format_types
, net::UnescapeRule::SPACES
, nullptr,
280 nullptr, &match_start
);
281 // If the first match in the untrimmed string was inside a scheme that we
282 // trimmed, look for a subsequent match.
283 if (match_start
== base::string16::npos
)
284 match_start
= match_contents
.find(input_text
);
285 // Update |match_contents_| and |match_contents_class_| if it's allowed.
286 if (allow_bolding_nothing
|| (match_start
!= base::string16::npos
)) {
287 match_contents_
= match_contents
;
288 // Safe if |match_start| is npos; also safe if the input is longer than the
289 // remaining contents after |match_start|.
290 AutocompleteMatch::ClassifyLocationInString(match_start
,
291 input_text
.length(), match_contents_
.length(),
292 ACMatchClassification::URL
, &match_contents_class_
);
296 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
297 const AutocompleteInput
& input
,
298 bool keyword_provider_requested
) const {
299 return (from_keyword_provider_
|| !keyword_provider_requested
) ? 800 : 150;
302 // SearchSuggestionParser::Results ---------------------------------------------
304 SearchSuggestionParser::Results::Results()
305 : verbatim_relevance(-1),
306 field_trial_triggered(false),
307 relevances_from_server(false) {}
309 SearchSuggestionParser::Results::~Results() {}
311 void SearchSuggestionParser::Results::Clear() {
312 suggest_results
.clear();
313 navigation_results
.clear();
314 verbatim_relevance
= -1;
318 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
319 if (verbatim_relevance
>= 0)
322 // Right now either all results of one type will be server-scored or they will
323 // all be locally scored, but in case we change this later, we'll just check
325 for (SuggestResults::const_iterator
i(suggest_results
.begin());
326 i
!= suggest_results
.end(); ++i
) {
327 if (i
->relevance_from_server())
330 for (NavigationResults::const_iterator
i(navigation_results
.begin());
331 i
!= navigation_results
.end(); ++i
) {
332 if (i
->relevance_from_server())
339 // SearchSuggestionParser ------------------------------------------------------
342 std::string
SearchSuggestionParser::ExtractJsonData(
343 const net::URLFetcher
* source
) {
344 const net::HttpResponseHeaders
* const response_headers
=
345 source
->GetResponseHeaders();
346 std::string json_data
;
347 source
->GetResponseAsString(&json_data
);
349 // JSON is supposed to be UTF-8, but some suggest service providers send
350 // JSON files in non-UTF-8 encodings. The actual encoding is usually
351 // specified in the Content-Type header field.
352 if (response_headers
) {
354 if (response_headers
->GetCharset(&charset
)) {
355 base::string16 data_16
;
356 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
357 if (base::CodepageToUTF16(json_data
, charset
.c_str(),
358 base::OnStringConversionError::FAIL
,
360 json_data
= base::UTF16ToUTF8(data_16
);
367 scoped_ptr
<base::Value
> SearchSuggestionParser::DeserializeJsonData(
368 base::StringPiece json_data
) {
369 // The JSON response should be an array.
370 for (size_t response_start_index
= json_data
.find("["), i
= 0;
371 response_start_index
!= base::StringPiece::npos
&& i
< 5;
372 response_start_index
= json_data
.find("[", 1), i
++) {
373 // Remove any XSSI guards to allow for JSON parsing.
374 json_data
.remove_prefix(response_start_index
);
376 JSONStringValueDeserializer
deserializer(json_data
);
377 deserializer
.set_allow_trailing_comma(true);
379 scoped_ptr
<base::Value
> data(deserializer
.Deserialize(&error_code
, NULL
));
383 return scoped_ptr
<base::Value
>();
387 bool SearchSuggestionParser::ParseSuggestResults(
388 const base::Value
& root_val
,
389 const AutocompleteInput
& input
,
390 const AutocompleteSchemeClassifier
& scheme_classifier
,
391 int default_result_relevance
,
392 const std::string
& languages
,
393 bool is_keyword_result
,
395 base::string16 query
;
396 const base::ListValue
* root_list
= NULL
;
397 const base::ListValue
* results_list
= NULL
;
399 if (!root_val
.GetAsList(&root_list
) || !root_list
->GetString(0, &query
) ||
400 query
!= input
.text() || !root_list
->GetList(1, &results_list
))
403 // 3rd element: Description list.
404 const base::ListValue
* descriptions
= NULL
;
405 root_list
->GetList(2, &descriptions
);
407 // 4th element: Disregard the query URL list for now.
409 // Reset suggested relevance information.
410 results
->verbatim_relevance
= -1;
412 // 5th element: Optional key-value pairs from the Suggest server.
413 const base::ListValue
* types
= NULL
;
414 const base::ListValue
* relevances
= NULL
;
415 const base::ListValue
* suggestion_details
= NULL
;
416 const base::DictionaryValue
* extras
= NULL
;
417 int prefetch_index
= -1;
418 if (root_list
->GetDictionary(4, &extras
)) {
419 extras
->GetList("google:suggesttype", &types
);
421 // Discard this list if its size does not match that of the suggestions.
422 if (extras
->GetList("google:suggestrelevance", &relevances
) &&
423 (relevances
->GetSize() != results_list
->GetSize()))
425 extras
->GetInteger("google:verbatimrelevance",
426 &results
->verbatim_relevance
);
428 // Check if the active suggest field trial (if any) has triggered either
429 // for the default provider or keyword provider.
430 results
->field_trial_triggered
= false;
431 extras
->GetBoolean("google:fieldtrialtriggered",
432 &results
->field_trial_triggered
);
434 const base::DictionaryValue
* client_data
= NULL
;
435 if (extras
->GetDictionary("google:clientdata", &client_data
) && client_data
)
436 client_data
->GetInteger("phi", &prefetch_index
);
438 if (extras
->GetList("google:suggestdetail", &suggestion_details
) &&
439 suggestion_details
->GetSize() != results_list
->GetSize())
440 suggestion_details
= NULL
;
442 // Store the metadata that came with the response in case we need to pass it
443 // along with the prefetch query to Instant.
444 JSONStringValueSerializer
json_serializer(&results
->metadata
);
445 json_serializer
.Serialize(*extras
);
448 // Clear the previous results now that new results are available.
449 results
->suggest_results
.clear();
450 results
->navigation_results
.clear();
451 results
->answers_image_urls
.clear();
453 base::string16 suggestion
;
455 int relevance
= default_result_relevance
;
456 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
457 const bool allow_navsuggest
=
458 input
.type() != metrics::OmniboxInputType::FORCED_QUERY
;
459 const base::string16
& trimmed_input
=
460 base::CollapseWhitespace(input
.text(), false);
461 for (size_t index
= 0; results_list
->GetString(index
, &suggestion
); ++index
) {
462 // Google search may return empty suggestions for weird input characters,
463 // they make no sense at all and can cause problems in our code.
464 if (suggestion
.empty())
467 // Apply valid suggested relevance scores; discard invalid lists.
468 if (relevances
!= NULL
&& !relevances
->GetInteger(index
, &relevance
))
470 AutocompleteMatchType::Type match_type
=
471 AutocompleteMatchType::SEARCH_SUGGEST
;
472 if (types
&& types
->GetString(index
, &type
))
473 match_type
= GetAutocompleteMatchType(type
);
474 const base::DictionaryValue
* suggestion_detail
= NULL
;
475 std::string deletion_url
;
477 if (suggestion_details
&&
478 suggestion_details
->GetDictionary(index
, &suggestion_detail
))
479 suggestion_detail
->GetString("du", &deletion_url
);
481 if ((match_type
== AutocompleteMatchType::NAVSUGGEST
) ||
482 (match_type
== AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
)) {
483 // Do not blindly trust the URL coming from the server to be valid.
484 GURL
url(url_formatter::FixupURL(base::UTF16ToUTF8(suggestion
),
486 if (url
.is_valid() && allow_navsuggest
) {
487 base::string16 title
;
488 if (descriptions
!= NULL
)
489 descriptions
->GetString(index
, &title
);
490 results
->navigation_results
.push_back(NavigationResult(
491 scheme_classifier
, url
, match_type
, title
, deletion_url
,
492 is_keyword_result
, relevance
, relevances
!= NULL
, input
.text(),
496 // TODO(dschuyler) If the "= " is no longer sent from the back-end
497 // then this may be removed.
498 if ((match_type
== AutocompleteMatchType::CALCULATOR
) &&
499 !suggestion
.compare(0, 2, base::UTF8ToUTF16("= ")))
500 suggestion
.erase(0, 2);
502 base::string16 match_contents
= suggestion
;
503 base::string16 match_contents_prefix
;
504 base::string16 annotation
;
505 base::string16 answer_contents
;
506 base::string16 answer_type_str
;
507 scoped_ptr
<SuggestionAnswer
> answer
;
508 std::string suggest_query_params
;
510 if (suggestion_details
) {
511 suggestion_details
->GetDictionary(index
, &suggestion_detail
);
512 if (suggestion_detail
) {
513 suggestion_detail
->GetString("t", &match_contents
);
514 suggestion_detail
->GetString("mp", &match_contents_prefix
);
515 // Error correction for bad data from server.
516 if (match_contents
.empty())
517 match_contents
= suggestion
;
518 suggestion_detail
->GetString("a", &annotation
);
519 suggestion_detail
->GetString("q", &suggest_query_params
);
521 // Extract the Answer, if provided.
522 const base::DictionaryValue
* answer_json
= NULL
;
523 if (suggestion_detail
->GetDictionary("ansa", &answer_json
) &&
524 suggestion_detail
->GetString("ansb", &answer_type_str
)) {
525 bool answer_parsed_successfully
= false;
526 answer
= SuggestionAnswer::ParseAnswer(answer_json
);
528 if (answer
&& base::StringToInt(answer_type_str
, &answer_type
)) {
529 answer_parsed_successfully
= true;
531 answer
->set_type(answer_type
);
532 answer
->AddImageURLsTo(&results
->answers_image_urls
);
534 std::string contents
;
535 base::JSONWriter::Write(*answer_json
, &contents
);
536 answer_contents
= base::UTF8ToUTF16(contents
);
538 answer_type_str
= base::string16();
540 UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
541 answer_parsed_successfully
);
546 bool should_prefetch
= static_cast<int>(index
) == prefetch_index
;
547 results
->suggest_results
.push_back(SuggestResult(
548 base::CollapseWhitespace(suggestion
, false), match_type
,
549 base::CollapseWhitespace(match_contents
, false),
550 match_contents_prefix
, annotation
, answer_contents
, answer_type_str
,
551 answer
.Pass(), suggest_query_params
, deletion_url
, is_keyword_result
,
552 relevance
, relevances
!= NULL
, should_prefetch
, trimmed_input
));
555 results
->relevances_from_server
= relevances
!= NULL
;