1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/search_suggestion_parser.h"
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "components/omnibox/autocomplete_input.h"
15 #include "components/omnibox/url_prefix.h"
16 #include "components/url_fixer/url_fixer.h"
17 #include "net/base/net_util.h"
18 #include "net/http/http_response_headers.h"
19 #include "net/url_request/url_fetcher.h"
20 #include "url/url_constants.h"
24 AutocompleteMatchType::Type
GetAutocompleteMatchType(const std::string
& type
) {
26 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY
;
27 if (type
== "INFINITE")
28 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE
;
29 if (type
== "PERSONALIZED_QUERY")
30 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED
;
31 if (type
== "PROFILE")
32 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE
;
33 if (type
== "NAVIGATION")
34 return AutocompleteMatchType::NAVSUGGEST
;
35 if (type
== "PERSONALIZED_NAVIGATION")
36 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
;
37 return AutocompleteMatchType::SEARCH_SUGGEST
;
42 // SearchSuggestionParser::Result ----------------------------------------------
44 SearchSuggestionParser::Result::Result(bool from_keyword_provider
,
46 bool relevance_from_server
,
47 AutocompleteMatchType::Type type
,
48 const std::string
& deletion_url
)
49 : from_keyword_provider_(from_keyword_provider
),
51 relevance_(relevance
),
52 relevance_from_server_(relevance_from_server
),
53 received_after_last_keystroke_(true),
54 deletion_url_(deletion_url
) {}
56 SearchSuggestionParser::Result::~Result() {}
58 // SearchSuggestionParser::SuggestResult ---------------------------------------
60 SearchSuggestionParser::SuggestResult::SuggestResult(
61 const base::string16
& suggestion
,
62 AutocompleteMatchType::Type type
,
63 const base::string16
& match_contents
,
64 const base::string16
& match_contents_prefix
,
65 const base::string16
& annotation
,
66 const base::string16
& answer_contents
,
67 const base::string16
& answer_type
,
68 const std::string
& suggest_query_params
,
69 const std::string
& deletion_url
,
70 bool from_keyword_provider
,
72 bool relevance_from_server
,
74 const base::string16
& input_text
)
75 : Result(from_keyword_provider
,
77 relevance_from_server
,
80 suggestion_(suggestion
),
81 match_contents_prefix_(match_contents_prefix
),
82 annotation_(annotation
),
83 suggest_query_params_(suggest_query_params
),
84 answer_contents_(answer_contents
),
85 answer_type_(answer_type
),
86 should_prefetch_(should_prefetch
) {
87 match_contents_
= match_contents
;
88 DCHECK(!match_contents_
.empty());
89 ClassifyMatchContents(true, input_text
);
92 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
94 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
95 const bool allow_bolding_all
,
96 const base::string16
& input_text
) {
97 if (input_text
.empty()) {
98 // In case of zero-suggest results, do not highlight matches.
99 match_contents_class_
.push_back(
100 ACMatchClassification(0, ACMatchClassification::NONE
));
104 base::string16 lookup_text
= input_text
;
105 if (type_
== AutocompleteMatchType::SEARCH_SUGGEST_INFINITE
) {
106 const size_t contents_index
=
107 suggestion_
.length() - match_contents_
.length();
108 // Ensure the query starts with the input text, and ends with the match
109 // contents, and the input text has an overlap with contents.
110 if (StartsWith(suggestion_
, input_text
, true) &&
111 EndsWith(suggestion_
, match_contents_
, true) &&
112 (input_text
.length() > contents_index
)) {
113 lookup_text
= input_text
.substr(contents_index
);
116 size_t lookup_position
= match_contents_
.find(lookup_text
);
117 if (!allow_bolding_all
&& (lookup_position
== base::string16::npos
)) {
118 // Bail if the code below to update the bolding would bold the whole
119 // string. Note that the string may already be entirely bolded; if
120 // so, leave it as is.
123 match_contents_class_
.clear();
124 // We do intra-string highlighting for suggestions - the suggested segment
125 // will be highlighted, e.g. for input_text = "you" the suggestion may be
126 // "youtube", so we'll bold the "tube" section: you*tube*.
127 if (input_text
!= match_contents_
) {
128 if (lookup_position
== base::string16::npos
) {
129 // The input text is not a substring of the query string, e.g. input
130 // text is "slasdot" and the query string is "slashdot", so we bold the
132 match_contents_class_
.push_back(
133 ACMatchClassification(0, ACMatchClassification::MATCH
));
135 // We don't iterate over the string here annotating all matches because
136 // it looks odd to have every occurrence of a substring that may be as
137 // short as a single character highlighted in a query suggestion result,
138 // e.g. for input text "s" and query string "southwest airlines", it
139 // looks odd if both the first and last s are highlighted.
140 if (lookup_position
!= 0) {
141 match_contents_class_
.push_back(
142 ACMatchClassification(0, ACMatchClassification::MATCH
));
144 match_contents_class_
.push_back(
145 ACMatchClassification(lookup_position
, ACMatchClassification::NONE
));
146 size_t next_fragment_position
= lookup_position
+ lookup_text
.length();
147 if (next_fragment_position
< match_contents_
.length()) {
148 match_contents_class_
.push_back(ACMatchClassification(
149 next_fragment_position
, ACMatchClassification::MATCH
));
153 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
154 // for the default provider or a keyword search provider.
155 match_contents_class_
.push_back(
156 ACMatchClassification(0, ACMatchClassification::NONE
));
160 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
161 const AutocompleteInput
& input
,
162 bool keyword_provider_requested
) const {
163 if (!from_keyword_provider_
&& keyword_provider_requested
)
165 return ((input
.type() == metrics::OmniboxInputType::URL
) ? 300 : 600);
168 // SearchSuggestionParser::NavigationResult ------------------------------------
170 SearchSuggestionParser::NavigationResult::NavigationResult(
171 const AutocompleteSchemeClassifier
& scheme_classifier
,
173 AutocompleteMatchType::Type type
,
174 const base::string16
& description
,
175 const std::string
& deletion_url
,
176 bool from_keyword_provider
,
178 bool relevance_from_server
,
179 const base::string16
& input_text
,
180 const std::string
& languages
)
181 : Result(from_keyword_provider
, relevance
, relevance_from_server
, type
,
184 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
185 url
, net::FormatUrl(url
, languages
,
186 net::kFormatUrlOmitAll
& ~net::kFormatUrlOmitHTTP
,
187 net::UnescapeRule::SPACES
, NULL
, NULL
, NULL
),
189 description_(description
) {
190 DCHECK(url_
.is_valid());
191 CalculateAndClassifyMatchContents(true, input_text
, languages
);
194 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
197 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
198 const bool allow_bolding_nothing
,
199 const base::string16
& input_text
,
200 const std::string
& languages
) {
201 if (input_text
.empty()) {
202 // In case of zero-suggest results, do not highlight matches.
203 match_contents_class_
.push_back(
204 ACMatchClassification(0, ACMatchClassification::NONE
));
208 // First look for the user's input inside the formatted url as it would be
209 // without trimming the scheme, so we can find matches at the beginning of the
211 const URLPrefix
* prefix
=
212 URLPrefix::BestURLPrefix(formatted_url_
, input_text
);
213 size_t match_start
= (prefix
== NULL
) ?
214 formatted_url_
.find(input_text
) : prefix
->prefix
.length();
215 bool trim_http
= !AutocompleteInput::HasHTTPScheme(input_text
) &&
216 (!prefix
|| (match_start
!= 0));
217 const net::FormatUrlTypes format_types
=
218 net::kFormatUrlOmitAll
& ~(trim_http
? 0 : net::kFormatUrlOmitHTTP
);
220 base::string16 match_contents
= net::FormatUrl(url_
, languages
, format_types
,
221 net::UnescapeRule::SPACES
, NULL
, NULL
, &match_start
);
222 // If the first match in the untrimmed string was inside a scheme that we
223 // trimmed, look for a subsequent match.
224 if (match_start
== base::string16::npos
)
225 match_start
= match_contents
.find(input_text
);
226 // Update |match_contents_| and |match_contents_class_| if it's allowed.
227 if (allow_bolding_nothing
|| (match_start
!= base::string16::npos
)) {
228 match_contents_
= match_contents
;
229 // Safe if |match_start| is npos; also safe if the input is longer than the
230 // remaining contents after |match_start|.
231 AutocompleteMatch::ClassifyLocationInString(match_start
,
232 input_text
.length(), match_contents_
.length(),
233 ACMatchClassification::URL
, &match_contents_class_
);
237 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
238 const AutocompleteInput
& input
,
239 bool keyword_provider_requested
) const {
240 return (from_keyword_provider_
|| !keyword_provider_requested
) ? 800 : 150;
243 // SearchSuggestionParser::Results ---------------------------------------------
245 SearchSuggestionParser::Results::Results()
246 : verbatim_relevance(-1),
247 field_trial_triggered(false),
248 relevances_from_server(false) {}
250 SearchSuggestionParser::Results::~Results() {}
252 void SearchSuggestionParser::Results::Clear() {
253 suggest_results
.clear();
254 navigation_results
.clear();
255 verbatim_relevance
= -1;
259 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
260 if (verbatim_relevance
>= 0)
263 // Right now either all results of one type will be server-scored or they will
264 // all be locally scored, but in case we change this later, we'll just check
266 for (SuggestResults::const_iterator
i(suggest_results
.begin());
267 i
!= suggest_results
.end(); ++i
) {
268 if (i
->relevance_from_server())
271 for (NavigationResults::const_iterator
i(navigation_results
.begin());
272 i
!= navigation_results
.end(); ++i
) {
273 if (i
->relevance_from_server())
280 // SearchSuggestionParser ------------------------------------------------------
283 std::string
SearchSuggestionParser::ExtractJsonData(
284 const net::URLFetcher
* source
) {
285 const net::HttpResponseHeaders
* const response_headers
=
286 source
->GetResponseHeaders();
287 std::string json_data
;
288 source
->GetResponseAsString(&json_data
);
290 // JSON is supposed to be UTF-8, but some suggest service providers send
291 // JSON files in non-UTF-8 encodings. The actual encoding is usually
292 // specified in the Content-Type header field.
293 if (response_headers
) {
295 if (response_headers
->GetCharset(&charset
)) {
296 base::string16 data_16
;
297 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
298 if (base::CodepageToUTF16(json_data
, charset
.c_str(),
299 base::OnStringConversionError::FAIL
,
301 json_data
= base::UTF16ToUTF8(data_16
);
308 scoped_ptr
<base::Value
> SearchSuggestionParser::DeserializeJsonData(
309 std::string json_data
) {
310 // The JSON response should be an array.
311 for (size_t response_start_index
= json_data
.find("["), i
= 0;
312 response_start_index
!= std::string::npos
&& i
< 5;
313 response_start_index
= json_data
.find("[", 1), i
++) {
314 // Remove any XSSI guards to allow for JSON parsing.
315 if (response_start_index
> 0)
316 json_data
.erase(0, response_start_index
);
318 JSONStringValueSerializer
deserializer(json_data
);
319 deserializer
.set_allow_trailing_comma(true);
321 scoped_ptr
<base::Value
> data(deserializer
.Deserialize(&error_code
, NULL
));
325 return scoped_ptr
<base::Value
>();
329 bool SearchSuggestionParser::ParseSuggestResults(
330 const base::Value
& root_val
,
331 const AutocompleteInput
& input
,
332 const AutocompleteSchemeClassifier
& scheme_classifier
,
333 int default_result_relevance
,
334 const std::string
& languages
,
335 bool is_keyword_result
,
337 base::string16 query
;
338 const base::ListValue
* root_list
= NULL
;
339 const base::ListValue
* results_list
= NULL
;
341 if (!root_val
.GetAsList(&root_list
) || !root_list
->GetString(0, &query
) ||
342 query
!= input
.text() || !root_list
->GetList(1, &results_list
))
345 // 3rd element: Description list.
346 const base::ListValue
* descriptions
= NULL
;
347 root_list
->GetList(2, &descriptions
);
349 // 4th element: Disregard the query URL list for now.
351 // Reset suggested relevance information.
352 results
->verbatim_relevance
= -1;
354 // 5th element: Optional key-value pairs from the Suggest server.
355 const base::ListValue
* types
= NULL
;
356 const base::ListValue
* relevances
= NULL
;
357 const base::ListValue
* suggestion_details
= NULL
;
358 const base::DictionaryValue
* extras
= NULL
;
359 int prefetch_index
= -1;
360 if (root_list
->GetDictionary(4, &extras
)) {
361 extras
->GetList("google:suggesttype", &types
);
363 // Discard this list if its size does not match that of the suggestions.
364 if (extras
->GetList("google:suggestrelevance", &relevances
) &&
365 (relevances
->GetSize() != results_list
->GetSize()))
367 extras
->GetInteger("google:verbatimrelevance",
368 &results
->verbatim_relevance
);
370 // Check if the active suggest field trial (if any) has triggered either
371 // for the default provider or keyword provider.
372 results
->field_trial_triggered
= false;
373 extras
->GetBoolean("google:fieldtrialtriggered",
374 &results
->field_trial_triggered
);
376 const base::DictionaryValue
* client_data
= NULL
;
377 if (extras
->GetDictionary("google:clientdata", &client_data
) && client_data
)
378 client_data
->GetInteger("phi", &prefetch_index
);
380 if (extras
->GetList("google:suggestdetail", &suggestion_details
) &&
381 suggestion_details
->GetSize() != results_list
->GetSize())
382 suggestion_details
= NULL
;
384 // Store the metadata that came with the response in case we need to pass it
385 // along with the prefetch query to Instant.
386 JSONStringValueSerializer
json_serializer(&results
->metadata
);
387 json_serializer
.Serialize(*extras
);
390 // Clear the previous results now that new results are available.
391 results
->suggest_results
.clear();
392 results
->navigation_results
.clear();
393 results
->answers_image_urls
.clear();
395 base::string16 suggestion
;
397 int relevance
= default_result_relevance
;
398 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
399 const bool allow_navsuggest
=
400 input
.type() != metrics::OmniboxInputType::FORCED_QUERY
;
401 const base::string16
& trimmed_input
=
402 base::CollapseWhitespace(input
.text(), false);
403 for (size_t index
= 0; results_list
->GetString(index
, &suggestion
); ++index
) {
404 // Google search may return empty suggestions for weird input characters,
405 // they make no sense at all and can cause problems in our code.
406 if (suggestion
.empty())
409 // Apply valid suggested relevance scores; discard invalid lists.
410 if (relevances
!= NULL
&& !relevances
->GetInteger(index
, &relevance
))
412 AutocompleteMatchType::Type match_type
=
413 AutocompleteMatchType::SEARCH_SUGGEST
;
414 if (types
&& types
->GetString(index
, &type
))
415 match_type
= GetAutocompleteMatchType(type
);
416 const base::DictionaryValue
* suggestion_detail
= NULL
;
417 std::string deletion_url
;
419 if (suggestion_details
&&
420 suggestion_details
->GetDictionary(index
, &suggestion_detail
))
421 suggestion_detail
->GetString("du", &deletion_url
);
423 if ((match_type
== AutocompleteMatchType::NAVSUGGEST
) ||
424 (match_type
== AutocompleteMatchType::NAVSUGGEST_PERSONALIZED
)) {
425 // Do not blindly trust the URL coming from the server to be valid.
427 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion
), std::string()));
428 if (url
.is_valid() && allow_navsuggest
) {
429 base::string16 title
;
430 if (descriptions
!= NULL
)
431 descriptions
->GetString(index
, &title
);
432 results
->navigation_results
.push_back(NavigationResult(
433 scheme_classifier
, url
, match_type
, title
, deletion_url
,
434 is_keyword_result
, relevance
, relevances
!= NULL
, input
.text(),
438 base::string16 match_contents
= suggestion
;
439 base::string16 match_contents_prefix
;
440 base::string16 annotation
;
441 base::string16 answer_contents
;
442 base::string16 answer_type
;
443 std::string suggest_query_params
;
445 if (suggestion_details
) {
446 suggestion_details
->GetDictionary(index
, &suggestion_detail
);
447 if (suggestion_detail
) {
448 suggestion_detail
->GetString("t", &match_contents
);
449 suggestion_detail
->GetString("mp", &match_contents_prefix
);
450 // Error correction for bad data from server.
451 if (match_contents
.empty())
452 match_contents
= suggestion
;
453 suggestion_detail
->GetString("a", &annotation
);
454 suggestion_detail
->GetString("q", &suggest_query_params
);
456 // Extract Answers, if provided.
457 const base::DictionaryValue
* answer_json
= NULL
;
458 if (suggestion_detail
->GetDictionary("ansa", &answer_json
)) {
459 match_type
= AutocompleteMatchType::SEARCH_SUGGEST_ANSWER
;
460 GetAnswersImageURLs(answer_json
, &results
->answers_image_urls
);
461 std::string contents
;
462 base::JSONWriter::Write(answer_json
, &contents
);
463 answer_contents
= base::UTF8ToUTF16(contents
);
464 suggestion_detail
->GetString("ansb", &answer_type
);
469 bool should_prefetch
= static_cast<int>(index
) == prefetch_index
;
470 // TODO(kochi): Improve calculator suggestion presentation.
471 results
->suggest_results
.push_back(SuggestResult(
472 base::CollapseWhitespace(suggestion
, false), match_type
,
473 base::CollapseWhitespace(match_contents
, false),
474 match_contents_prefix
, annotation
, answer_contents
, answer_type
,
475 suggest_query_params
, deletion_url
, is_keyword_result
, relevance
,
476 relevances
!= NULL
, should_prefetch
, trimmed_input
));
479 results
->relevances_from_server
= relevances
!= NULL
;
484 void SearchSuggestionParser::GetAnswersImageURLs(
485 const base::DictionaryValue
* answer_json
,
486 std::vector
<GURL
>* urls
) {
489 const base::ListValue
* lines
= NULL
;
490 if (!answer_json
->GetList("l", &lines
) || !lines
|| lines
->GetSize() == 0)
493 for (base::ListValue::const_iterator iter
= lines
->begin();
494 iter
!= lines
->end();
496 const base::DictionaryValue
* line
= NULL
;
497 if (!(*iter
)->GetAsDictionary(&line
) || !line
)
500 std::string image_host_and_path
;
501 if (!line
->GetString("il.i.d", &image_host_and_path
) ||
502 image_host_and_path
.empty())
504 // Concatenate scheme and host/path using only ':' as separator. This is
505 // due to the results delivering strings of the form '//host/path', which
506 // is web-speak for "use the enclosing page's scheme", but not a valid path
509 GURL(std::string(url::kHttpsScheme
) + ":" + image_host_and_path
));
510 if (image_url
.is_valid())
511 urls
->push_back(image_url
);