1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/search_suggestion_parser.h"
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/values.h"
16 #include "components/omnibox/autocomplete_input.h"
17 #include "components/omnibox/url_prefix.h"
18 #include "components/url_fixer/url_fixer.h"
19 #include "net/base/net_util.h"
20 #include "net/http/http_response_headers.h"
21 #include "net/url_request/url_fetcher.h"
22 #include "url/url_constants.h"
26 AutocompleteMatchType::Type
GetAutocompleteMatchType(const std::string
& type
) {
28 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY
;
30 return AutocompleteMatchType::SEARCH_SUGGEST_TAIL
;
31 if (type
== "PERSONALIZED_QUERY")
32 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED
;
33 if (type
== "PROFILE")
34 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE
;
35 if (type
== "NAVIGATION")
36 return AutocompleteMatchType::NAVSUGGEST
;
37 if (type
== "PERSONALIZED_NAVIGATION")
38 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
;
39 return AutocompleteMatchType::SEARCH_SUGGEST
;
44 // SearchSuggestionParser::Result ----------------------------------------------
46 SearchSuggestionParser::Result::Result(bool from_keyword_provider
,
48 bool relevance_from_server
,
49 AutocompleteMatchType::Type type
,
50 const std::string
& deletion_url
)
51 : from_keyword_provider_(from_keyword_provider
),
53 relevance_(relevance
),
54 relevance_from_server_(relevance_from_server
),
55 received_after_last_keystroke_(true),
56 deletion_url_(deletion_url
) {}
58 SearchSuggestionParser::Result::~Result() {}
60 // SearchSuggestionParser::SuggestResult ---------------------------------------
62 SearchSuggestionParser::SuggestResult::SuggestResult(
63 const base::string16
& suggestion
,
64 AutocompleteMatchType::Type type
,
65 const base::string16
& match_contents
,
66 const base::string16
& match_contents_prefix
,
67 const base::string16
& annotation
,
68 const base::string16
& answer_contents
,
69 const base::string16
& answer_type
,
70 scoped_ptr
<SuggestionAnswer
> answer
,
71 const std::string
& suggest_query_params
,
72 const std::string
& deletion_url
,
73 bool from_keyword_provider
,
75 bool relevance_from_server
,
77 const base::string16
& input_text
)
78 : Result(from_keyword_provider
,
80 relevance_from_server
,
83 suggestion_(suggestion
),
84 match_contents_prefix_(match_contents_prefix
),
85 annotation_(annotation
),
86 suggest_query_params_(suggest_query_params
),
87 answer_contents_(answer_contents
),
88 answer_type_(answer_type
),
89 answer_(answer
.Pass()),
90 should_prefetch_(should_prefetch
) {
91 match_contents_
= match_contents
;
92 DCHECK(!match_contents_
.empty());
93 ClassifyMatchContents(true, input_text
);
96 SearchSuggestionParser::SuggestResult::SuggestResult(
97 const SuggestResult
& result
)
99 suggestion_(result
.suggestion_
),
100 match_contents_prefix_(result
.match_contents_prefix_
),
101 annotation_(result
.annotation_
),
102 suggest_query_params_(result
.suggest_query_params_
),
103 answer_contents_(result
.answer_contents_
),
104 answer_type_(result
.answer_type_
),
105 answer_(SuggestionAnswer::copy(result
.answer_
.get())),
106 should_prefetch_(result
.should_prefetch_
) {
109 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
111 SearchSuggestionParser::SuggestResult
&
112 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult
& rhs
) {
116 // Assign via parent class first.
117 Result::operator=(rhs
);
119 suggestion_
= rhs
.suggestion_
;
120 match_contents_prefix_
= rhs
.match_contents_prefix_
;
121 annotation_
= rhs
.annotation_
;
122 suggest_query_params_
= rhs
.suggest_query_params_
;
123 answer_contents_
= rhs
.answer_contents_
;
124 answer_type_
= rhs
.answer_type_
;
125 answer_
= SuggestionAnswer::copy(rhs
.answer_
.get());
126 should_prefetch_
= rhs
.should_prefetch_
;
131 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
132 const bool allow_bolding_all
,
133 const base::string16
& input_text
) {
134 if (input_text
.empty()) {
135 // In case of zero-suggest results, do not highlight matches.
136 match_contents_class_
.push_back(
137 ACMatchClassification(0, ACMatchClassification::NONE
));
141 base::string16 lookup_text
= input_text
;
142 if (type_
== AutocompleteMatchType::SEARCH_SUGGEST_TAIL
) {
143 const size_t contents_index
=
144 suggestion_
.length() - match_contents_
.length();
145 // Ensure the query starts with the input text, and ends with the match
146 // contents, and the input text has an overlap with contents.
147 if (StartsWith(suggestion_
, input_text
, true) &&
148 EndsWith(suggestion_
, match_contents_
, true) &&
149 (input_text
.length() > contents_index
)) {
150 lookup_text
= input_text
.substr(contents_index
);
153 size_t lookup_position
= match_contents_
.find(lookup_text
);
154 if (!allow_bolding_all
&& (lookup_position
== base::string16::npos
)) {
155 // Bail if the code below to update the bolding would bold the whole
156 // string. Note that the string may already be entirely bolded; if
157 // so, leave it as is.
160 match_contents_class_
.clear();
161 // We do intra-string highlighting for suggestions - the suggested segment
162 // will be highlighted, e.g. for input_text = "you" the suggestion may be
163 // "youtube", so we'll bold the "tube" section: you*tube*.
164 if (input_text
!= match_contents_
) {
165 if (lookup_position
== base::string16::npos
) {
166 // The input text is not a substring of the query string, e.g. input
167 // text is "slasdot" and the query string is "slashdot", so we bold the
169 match_contents_class_
.push_back(
170 ACMatchClassification(0, ACMatchClassification::MATCH
));
172 // We don't iterate over the string here annotating all matches because
173 // it looks odd to have every occurrence of a substring that may be as
174 // short as a single character highlighted in a query suggestion result,
175 // e.g. for input text "s" and query string "southwest airlines", it
176 // looks odd if both the first and last s are highlighted.
177 if (lookup_position
!= 0) {
178 match_contents_class_
.push_back(
179 ACMatchClassification(0, ACMatchClassification::MATCH
));
181 match_contents_class_
.push_back(
182 ACMatchClassification(lookup_position
, ACMatchClassification::NONE
));
183 size_t next_fragment_position
= lookup_position
+ lookup_text
.length();
184 if (next_fragment_position
< match_contents_
.length()) {
185 match_contents_class_
.push_back(ACMatchClassification(
186 next_fragment_position
, ACMatchClassification::MATCH
));
190 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
191 // for the default provider or a keyword search provider.
192 match_contents_class_
.push_back(
193 ACMatchClassification(0, ACMatchClassification::NONE
));
197 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
198 const AutocompleteInput
& input
,
199 bool keyword_provider_requested
) const {
200 if (!from_keyword_provider_
&& keyword_provider_requested
)
202 return ((input
.type() == metrics::OmniboxInputType::URL
) ? 300 : 600);
205 // SearchSuggestionParser::NavigationResult ------------------------------------
207 SearchSuggestionParser::NavigationResult::NavigationResult(
208 const AutocompleteSchemeClassifier
& scheme_classifier
,
210 AutocompleteMatchType::Type type
,
211 const base::string16
& description
,
212 const std::string
& deletion_url
,
213 bool from_keyword_provider
,
215 bool relevance_from_server
,
216 const base::string16
& input_text
,
217 const std::string
& languages
)
218 : Result(from_keyword_provider
, relevance
, relevance_from_server
, type
,
221 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
222 url
, net::FormatUrl(url
, languages
,
223 net::kFormatUrlOmitAll
& ~net::kFormatUrlOmitHTTP
,
224 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
),
226 description_(description
) {
227 DCHECK(url_
.is_valid());
228 CalculateAndClassifyMatchContents(true, input_text
, languages
);
231 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
234 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
235 const bool allow_bolding_nothing
,
236 const base::string16
& input_text
,
237 const std::string
& languages
) {
238 if (input_text
.empty()) {
239 // In case of zero-suggest results, do not highlight matches.
240 match_contents_class_
.push_back(
241 ACMatchClassification(0, ACMatchClassification::NONE
));
245 // First look for the user's input inside the formatted url as it would be
246 // without trimming the scheme, so we can find matches at the beginning of the
248 const URLPrefix
* prefix
=
249 URLPrefix::BestURLPrefix(formatted_url_
, input_text
);
250 size_t match_start
= (prefix
== NULL
) ?
251 formatted_url_
.find(input_text
) : prefix
->prefix
.length();
252 bool trim_http
= !AutocompleteInput::HasHTTPScheme(input_text
) &&
253 (!prefix
|| (match_start
!= 0));
254 const net::FormatUrlTypes format_types
=
255 net::kFormatUrlOmitAll
& ~(trim_http
? 0 : net::kFormatUrlOmitHTTP
);
257 base::string16 match_contents
= net::FormatUrl(url_
, languages
, format_types
,
258 net::UnescapeRule::SPACES
, NULL
, NULL
, &match_start
);
259 // If the first match in the untrimmed string was inside a scheme that we
260 // trimmed, look for a subsequent match.
261 if (match_start
== base::string16::npos
)
262 match_start
= match_contents
.find(input_text
);
263 // Update |match_contents_| and |match_contents_class_| if it's allowed.
264 if (allow_bolding_nothing
|| (match_start
!= base::string16::npos
)) {
265 match_contents_
= match_contents
;
266 // Safe if |match_start| is npos; also safe if the input is longer than the
267 // remaining contents after |match_start|.
268 AutocompleteMatch::ClassifyLocationInString(match_start
,
269 input_text
.length(), match_contents_
.length(),
270 ACMatchClassification::URL
, &match_contents_class_
);
274 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
275 const AutocompleteInput
& input
,
276 bool keyword_provider_requested
) const {
277 return (from_keyword_provider_
|| !keyword_provider_requested
) ? 800 : 150;
280 // SearchSuggestionParser::Results ---------------------------------------------
282 SearchSuggestionParser::Results::Results()
283 : verbatim_relevance(-1),
284 field_trial_triggered(false),
285 relevances_from_server(false) {}
287 SearchSuggestionParser::Results::~Results() {}
289 void SearchSuggestionParser::Results::Clear() {
290 suggest_results
.clear();
291 navigation_results
.clear();
292 verbatim_relevance
= -1;
296 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
297 if (verbatim_relevance
>= 0)
300 // Right now either all results of one type will be server-scored or they will
301 // all be locally scored, but in case we change this later, we'll just check
303 for (SuggestResults::const_iterator
i(suggest_results
.begin());
304 i
!= suggest_results
.end(); ++i
) {
305 if (i
->relevance_from_server())
308 for (NavigationResults::const_iterator
i(navigation_results
.begin());
309 i
!= navigation_results
.end(); ++i
) {
310 if (i
->relevance_from_server())
317 // SearchSuggestionParser ------------------------------------------------------
320 std::string
SearchSuggestionParser::ExtractJsonData(
321 const net::URLFetcher
* source
) {
322 const net::HttpResponseHeaders
* const response_headers
=
323 source
->GetResponseHeaders();
324 std::string json_data
;
325 source
->GetResponseAsString(&json_data
);
327 // JSON is supposed to be UTF-8, but some suggest service providers send
328 // JSON files in non-UTF-8 encodings. The actual encoding is usually
329 // specified in the Content-Type header field.
330 if (response_headers
) {
332 if (response_headers
->GetCharset(&charset
)) {
333 base::string16 data_16
;
334 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
335 if (base::CodepageToUTF16(json_data
, charset
.c_str(),
336 base::OnStringConversionError::FAIL
,
338 json_data
= base::UTF16ToUTF8(data_16
);
345 scoped_ptr
<base::Value
> SearchSuggestionParser::DeserializeJsonData(
346 base::StringPiece json_data
) {
347 // The JSON response should be an array.
348 for (size_t response_start_index
= json_data
.find("["), i
= 0;
349 response_start_index
!= base::StringPiece::npos
&& i
< 5;
350 response_start_index
= json_data
.find("[", 1), i
++) {
351 // Remove any XSSI guards to allow for JSON parsing.
352 json_data
.remove_prefix(response_start_index
);
354 JSONStringValueSerializer
deserializer(json_data
);
355 deserializer
.set_allow_trailing_comma(true);
357 scoped_ptr
<base::Value
> data(deserializer
.Deserialize(&error_code
, NULL
));
361 return scoped_ptr
<base::Value
>();
365 bool SearchSuggestionParser::ParseSuggestResults(
366 const base::Value
& root_val
,
367 const AutocompleteInput
& input
,
368 const AutocompleteSchemeClassifier
& scheme_classifier
,
369 int default_result_relevance
,
370 const std::string
& languages
,
371 bool is_keyword_result
,
373 base::string16 query
;
374 const base::ListValue
* root_list
= NULL
;
375 const base::ListValue
* results_list
= NULL
;
377 if (!root_val
.GetAsList(&root_list
) || !root_list
->GetString(0, &query
) ||
378 query
!= input
.text() || !root_list
->GetList(1, &results_list
))
381 // 3rd element: Description list.
382 const base::ListValue
* descriptions
= NULL
;
383 root_list
->GetList(2, &descriptions
);
385 // 4th element: Disregard the query URL list for now.
387 // Reset suggested relevance information.
388 results
->verbatim_relevance
= -1;
390 // 5th element: Optional key-value pairs from the Suggest server.
391 const base::ListValue
* types
= NULL
;
392 const base::ListValue
* relevances
= NULL
;
393 const base::ListValue
* suggestion_details
= NULL
;
394 const base::DictionaryValue
* extras
= NULL
;
395 int prefetch_index
= -1;
396 if (root_list
->GetDictionary(4, &extras
)) {
397 extras
->GetList("google:suggesttype", &types
);
399 // Discard this list if its size does not match that of the suggestions.
400 if (extras
->GetList("google:suggestrelevance", &relevances
) &&
401 (relevances
->GetSize() != results_list
->GetSize()))
403 extras
->GetInteger("google:verbatimrelevance",
404 &results
->verbatim_relevance
);
406 // Check if the active suggest field trial (if any) has triggered either
407 // for the default provider or keyword provider.
408 results
->field_trial_triggered
= false;
409 extras
->GetBoolean("google:fieldtrialtriggered",
410 &results
->field_trial_triggered
);
412 const base::DictionaryValue
* client_data
= NULL
;
413 if (extras
->GetDictionary("google:clientdata", &client_data
) && client_data
)
414 client_data
->GetInteger("phi", &prefetch_index
);
416 if (extras
->GetList("google:suggestdetail", &suggestion_details
) &&
417 suggestion_details
->GetSize() != results_list
->GetSize())
418 suggestion_details
= NULL
;
420 // Store the metadata that came with the response in case we need to pass it
421 // along with the prefetch query to Instant.
422 JSONStringValueSerializer
json_serializer(&results
->metadata
);
423 json_serializer
.Serialize(*extras
);
426 // Clear the previous results now that new results are available.
427 results
->suggest_results
.clear();
428 results
->navigation_results
.clear();
429 results
->answers_image_urls
.clear();
431 base::string16 suggestion
;
433 int relevance
= default_result_relevance
;
434 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
435 const bool allow_navsuggest
=
436 input
.type() != metrics::OmniboxInputType::FORCED_QUERY
;
437 const base::string16
& trimmed_input
=
438 base::CollapseWhitespace(input
.text(), false);
439 for (size_t index
= 0; results_list
->GetString(index
, &suggestion
); ++index
) {
440 // Google search may return empty suggestions for weird input characters,
441 // they make no sense at all and can cause problems in our code.
442 if (suggestion
.empty())
445 // Apply valid suggested relevance scores; discard invalid lists.
446 if (relevances
!= NULL
&& !relevances
->GetInteger(index
, &relevance
))
448 AutocompleteMatchType::Type match_type
=
449 AutocompleteMatchType::SEARCH_SUGGEST
;
450 if (types
&& types
->GetString(index
, &type
))
451 match_type
= GetAutocompleteMatchType(type
);
452 const base::DictionaryValue
* suggestion_detail
= NULL
;
453 std::string deletion_url
;
455 if (suggestion_details
&&
456 suggestion_details
->GetDictionary(index
, &suggestion_detail
))
457 suggestion_detail
->GetString("du", &deletion_url
);
459 if ((match_type
== AutocompleteMatchType::NAVSUGGEST
) ||
460 (match_type
== AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
)) {
461 // Do not blindly trust the URL coming from the server to be valid.
463 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion
), std::string()));
464 if (url
.is_valid() && allow_navsuggest
) {
465 base::string16 title
;
466 if (descriptions
!= NULL
)
467 descriptions
->GetString(index
, &title
);
468 results
->navigation_results
.push_back(NavigationResult(
469 scheme_classifier
, url
, match_type
, title
, deletion_url
,
470 is_keyword_result
, relevance
, relevances
!= NULL
, input
.text(),
474 base::string16 match_contents
= suggestion
;
475 base::string16 match_contents_prefix
;
476 base::string16 annotation
;
477 base::string16 answer_contents
;
478 base::string16 answer_type_str
;
479 scoped_ptr
<SuggestionAnswer
> answer
;
480 std::string suggest_query_params
;
482 if (suggestion_details
) {
483 suggestion_details
->GetDictionary(index
, &suggestion_detail
);
484 if (suggestion_detail
) {
485 suggestion_detail
->GetString("t", &match_contents
);
486 suggestion_detail
->GetString("mp", &match_contents_prefix
);
487 // Error correction for bad data from server.
488 if (match_contents
.empty())
489 match_contents
= suggestion
;
490 suggestion_detail
->GetString("a", &annotation
);
491 suggestion_detail
->GetString("q", &suggest_query_params
);
493 // Extract the Answer, if provided.
494 const base::DictionaryValue
* answer_json
= NULL
;
495 if (suggestion_detail
->GetDictionary("ansa", &answer_json
) &&
496 suggestion_detail
->GetString("ansb", &answer_type_str
)) {
497 bool answer_parsed_successfully
= false;
498 answer
= SuggestionAnswer::ParseAnswer(answer_json
);
500 if (answer
&& base::StringToInt(answer_type_str
, &answer_type
)) {
501 answer_parsed_successfully
= true;
503 answer
->set_type(answer_type
);
504 answer
->AddImageURLsTo(&results
->answers_image_urls
);
506 std::string contents
;
507 base::JSONWriter::Write(answer_json
, &contents
);
508 answer_contents
= base::UTF8ToUTF16(contents
);
510 answer_type_str
= base::string16();
512 UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
513 answer_parsed_successfully
);
518 bool should_prefetch
= static_cast<int>(index
) == prefetch_index
;
519 // TODO(kochi): Improve calculator suggestion presentation.
520 results
->suggest_results
.push_back(SuggestResult(
521 base::CollapseWhitespace(suggestion
, false), match_type
,
522 base::CollapseWhitespace(match_contents
, false),
523 match_contents_prefix
, annotation
, answer_contents
, answer_type_str
,
524 answer
.Pass(), suggest_query_params
, deletion_url
, is_keyword_result
,
525 relevance
, relevances
!= NULL
, should_prefetch
, trimmed_input
));
528 results
->relevances_from_server
= relevances
!= NULL
;