Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / components / omnibox / search_suggestion_parser.cc
blobd8452ee9465435f6046366ce829e6e5fe1dd27fe
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/search_suggestion_parser.h"
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/values.h"
16 #include "components/omnibox/autocomplete_input.h"
17 #include "components/omnibox/url_prefix.h"
18 #include "components/url_fixer/url_fixer.h"
19 #include "net/base/net_util.h"
20 #include "net/http/http_response_headers.h"
21 #include "net/url_request/url_fetcher.h"
22 #include "url/url_constants.h"
24 namespace {
26 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
27 if (type == "ENTITY")
28 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
29 if (type == "INFINITE")
30 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
31 if (type == "PERSONALIZED_QUERY")
32 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
33 if (type == "PROFILE")
34 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
35 if (type == "NAVIGATION")
36 return AutocompleteMatchType::NAVSUGGEST;
37 if (type == "PERSONALIZED_NAVIGATION")
38 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
39 return AutocompleteMatchType::SEARCH_SUGGEST;
42 } // namespace
44 // SearchSuggestionParser::Result ----------------------------------------------
46 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
47 int relevance,
48 bool relevance_from_server,
49 AutocompleteMatchType::Type type,
50 const std::string& deletion_url)
51 : from_keyword_provider_(from_keyword_provider),
52 type_(type),
53 relevance_(relevance),
54 relevance_from_server_(relevance_from_server),
55 received_after_last_keystroke_(true),
56 deletion_url_(deletion_url) {}
58 SearchSuggestionParser::Result::~Result() {}
60 // SearchSuggestionParser::SuggestResult ---------------------------------------
62 SearchSuggestionParser::SuggestResult::SuggestResult(
63 const base::string16& suggestion,
64 AutocompleteMatchType::Type type,
65 const base::string16& match_contents,
66 const base::string16& match_contents_prefix,
67 const base::string16& annotation,
68 const base::string16& answer_contents,
69 const base::string16& answer_type,
70 scoped_ptr<SuggestionAnswer> answer,
71 const std::string& suggest_query_params,
72 const std::string& deletion_url,
73 bool from_keyword_provider,
74 int relevance,
75 bool relevance_from_server,
76 bool should_prefetch,
77 const base::string16& input_text)
78 : Result(from_keyword_provider,
79 relevance,
80 relevance_from_server,
81 type,
82 deletion_url),
83 suggestion_(suggestion),
84 match_contents_prefix_(match_contents_prefix),
85 annotation_(annotation),
86 suggest_query_params_(suggest_query_params),
87 answer_contents_(answer_contents),
88 answer_type_(answer_type),
89 answer_(answer.Pass()),
90 should_prefetch_(should_prefetch) {
91 match_contents_ = match_contents;
92 DCHECK(!match_contents_.empty());
93 ClassifyMatchContents(true, input_text);
96 SearchSuggestionParser::SuggestResult::SuggestResult(
97 const SuggestResult& result)
98 : Result(result),
99 suggestion_(result.suggestion_),
100 match_contents_prefix_(result.match_contents_prefix_),
101 annotation_(result.annotation_),
102 suggest_query_params_(result.suggest_query_params_),
103 answer_contents_(result.answer_contents_),
104 answer_type_(result.answer_type_),
105 answer_(SuggestionAnswer::copy(result.answer_.get())),
106 should_prefetch_(result.should_prefetch_) {
109 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
111 SearchSuggestionParser::SuggestResult&
112 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult& rhs) {
113 if (this == &rhs)
114 return *this;
116 // Assign via parent class first.
117 Result::operator=(rhs);
119 suggestion_ = rhs.suggestion_;
120 match_contents_prefix_ = rhs.match_contents_prefix_;
121 annotation_ = rhs.annotation_;
122 suggest_query_params_ = rhs.suggest_query_params_;
123 answer_contents_ = rhs.answer_contents_;
124 answer_type_ = rhs.answer_type_;
125 answer_ = SuggestionAnswer::copy(rhs.answer_.get());
126 should_prefetch_ = rhs.should_prefetch_;
128 return *this;
131 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
132 const bool allow_bolding_all,
133 const base::string16& input_text) {
134 if (input_text.empty()) {
135 // In case of zero-suggest results, do not highlight matches.
136 match_contents_class_.push_back(
137 ACMatchClassification(0, ACMatchClassification::NONE));
138 return;
141 base::string16 lookup_text = input_text;
142 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
143 const size_t contents_index =
144 suggestion_.length() - match_contents_.length();
145 // Ensure the query starts with the input text, and ends with the match
146 // contents, and the input text has an overlap with contents.
147 if (StartsWith(suggestion_, input_text, true) &&
148 EndsWith(suggestion_, match_contents_, true) &&
149 (input_text.length() > contents_index)) {
150 lookup_text = input_text.substr(contents_index);
153 size_t lookup_position = match_contents_.find(lookup_text);
154 if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
155 // Bail if the code below to update the bolding would bold the whole
156 // string. Note that the string may already be entirely bolded; if
157 // so, leave it as is.
158 return;
160 match_contents_class_.clear();
161 // We do intra-string highlighting for suggestions - the suggested segment
162 // will be highlighted, e.g. for input_text = "you" the suggestion may be
163 // "youtube", so we'll bold the "tube" section: you*tube*.
164 if (input_text != match_contents_) {
165 if (lookup_position == base::string16::npos) {
166 // The input text is not a substring of the query string, e.g. input
167 // text is "slasdot" and the query string is "slashdot", so we bold the
168 // whole thing.
169 match_contents_class_.push_back(
170 ACMatchClassification(0, ACMatchClassification::MATCH));
171 } else {
172 // We don't iterate over the string here annotating all matches because
173 // it looks odd to have every occurrence of a substring that may be as
174 // short as a single character highlighted in a query suggestion result,
175 // e.g. for input text "s" and query string "southwest airlines", it
176 // looks odd if both the first and last s are highlighted.
177 if (lookup_position != 0) {
178 match_contents_class_.push_back(
179 ACMatchClassification(0, ACMatchClassification::MATCH));
181 match_contents_class_.push_back(
182 ACMatchClassification(lookup_position, ACMatchClassification::NONE));
183 size_t next_fragment_position = lookup_position + lookup_text.length();
184 if (next_fragment_position < match_contents_.length()) {
185 match_contents_class_.push_back(ACMatchClassification(
186 next_fragment_position, ACMatchClassification::MATCH));
189 } else {
190 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
191 // for the default provider or a keyword search provider.
192 match_contents_class_.push_back(
193 ACMatchClassification(0, ACMatchClassification::NONE));
197 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
198 const AutocompleteInput& input,
199 bool keyword_provider_requested) const {
200 if (!from_keyword_provider_ && keyword_provider_requested)
201 return 100;
202 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
205 // SearchSuggestionParser::NavigationResult ------------------------------------
207 SearchSuggestionParser::NavigationResult::NavigationResult(
208 const AutocompleteSchemeClassifier& scheme_classifier,
209 const GURL& url,
210 AutocompleteMatchType::Type type,
211 const base::string16& description,
212 const std::string& deletion_url,
213 bool from_keyword_provider,
214 int relevance,
215 bool relevance_from_server,
216 const base::string16& input_text,
217 const std::string& languages)
218 : Result(from_keyword_provider, relevance, relevance_from_server, type,
219 deletion_url),
220 url_(url),
221 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
222 url, net::FormatUrl(url, languages,
223 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
224 net::UnescapeRule::SPACES, NULL, NULL, NULL),
225 scheme_classifier)),
226 description_(description) {
227 DCHECK(url_.is_valid());
228 CalculateAndClassifyMatchContents(true, input_text, languages);
231 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
233 void
234 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
235 const bool allow_bolding_nothing,
236 const base::string16& input_text,
237 const std::string& languages) {
238 if (input_text.empty()) {
239 // In case of zero-suggest results, do not highlight matches.
240 match_contents_class_.push_back(
241 ACMatchClassification(0, ACMatchClassification::NONE));
242 return;
245 // First look for the user's input inside the formatted url as it would be
246 // without trimming the scheme, so we can find matches at the beginning of the
247 // scheme.
248 const URLPrefix* prefix =
249 URLPrefix::BestURLPrefix(formatted_url_, input_text);
250 size_t match_start = (prefix == NULL) ?
251 formatted_url_.find(input_text) : prefix->prefix.length();
252 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
253 (!prefix || (match_start != 0));
254 const net::FormatUrlTypes format_types =
255 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
257 base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
258 net::UnescapeRule::SPACES, NULL, NULL, &match_start);
259 // If the first match in the untrimmed string was inside a scheme that we
260 // trimmed, look for a subsequent match.
261 if (match_start == base::string16::npos)
262 match_start = match_contents.find(input_text);
263 // Update |match_contents_| and |match_contents_class_| if it's allowed.
264 if (allow_bolding_nothing || (match_start != base::string16::npos)) {
265 match_contents_ = match_contents;
266 // Safe if |match_start| is npos; also safe if the input is longer than the
267 // remaining contents after |match_start|.
268 AutocompleteMatch::ClassifyLocationInString(match_start,
269 input_text.length(), match_contents_.length(),
270 ACMatchClassification::URL, &match_contents_class_);
274 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
275 const AutocompleteInput& input,
276 bool keyword_provider_requested) const {
277 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
280 // SearchSuggestionParser::Results ---------------------------------------------
282 SearchSuggestionParser::Results::Results()
283 : verbatim_relevance(-1),
284 field_trial_triggered(false),
285 relevances_from_server(false) {}
287 SearchSuggestionParser::Results::~Results() {}
289 void SearchSuggestionParser::Results::Clear() {
290 suggest_results.clear();
291 navigation_results.clear();
292 verbatim_relevance = -1;
293 metadata.clear();
296 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
297 if (verbatim_relevance >= 0)
298 return true;
300 // Right now either all results of one type will be server-scored or they will
301 // all be locally scored, but in case we change this later, we'll just check
302 // them all.
303 for (SuggestResults::const_iterator i(suggest_results.begin());
304 i != suggest_results.end(); ++i) {
305 if (i->relevance_from_server())
306 return true;
308 for (NavigationResults::const_iterator i(navigation_results.begin());
309 i != navigation_results.end(); ++i) {
310 if (i->relevance_from_server())
311 return true;
314 return false;
317 // SearchSuggestionParser ------------------------------------------------------
319 // static
320 std::string SearchSuggestionParser::ExtractJsonData(
321 const net::URLFetcher* source) {
322 const net::HttpResponseHeaders* const response_headers =
323 source->GetResponseHeaders();
324 std::string json_data;
325 source->GetResponseAsString(&json_data);
327 // JSON is supposed to be UTF-8, but some suggest service providers send
328 // JSON files in non-UTF-8 encodings. The actual encoding is usually
329 // specified in the Content-Type header field.
330 if (response_headers) {
331 std::string charset;
332 if (response_headers->GetCharset(&charset)) {
333 base::string16 data_16;
334 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
335 if (base::CodepageToUTF16(json_data, charset.c_str(),
336 base::OnStringConversionError::FAIL,
337 &data_16))
338 json_data = base::UTF16ToUTF8(data_16);
341 return json_data;
344 // static
345 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
346 std::string json_data) {
347 // The JSON response should be an array.
348 for (size_t response_start_index = json_data.find("["), i = 0;
349 response_start_index != std::string::npos && i < 5;
350 response_start_index = json_data.find("[", 1), i++) {
351 // Remove any XSSI guards to allow for JSON parsing.
352 if (response_start_index > 0)
353 json_data.erase(0, response_start_index);
355 JSONStringValueSerializer deserializer(json_data);
356 deserializer.set_allow_trailing_comma(true);
357 int error_code = 0;
358 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
359 if (error_code == 0)
360 return data.Pass();
362 return scoped_ptr<base::Value>();
365 // static
366 bool SearchSuggestionParser::ParseSuggestResults(
367 const base::Value& root_val,
368 const AutocompleteInput& input,
369 const AutocompleteSchemeClassifier& scheme_classifier,
370 int default_result_relevance,
371 const std::string& languages,
372 bool is_keyword_result,
373 Results* results) {
374 base::string16 query;
375 const base::ListValue* root_list = NULL;
376 const base::ListValue* results_list = NULL;
378 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
379 query != input.text() || !root_list->GetList(1, &results_list))
380 return false;
382 // 3rd element: Description list.
383 const base::ListValue* descriptions = NULL;
384 root_list->GetList(2, &descriptions);
386 // 4th element: Disregard the query URL list for now.
388 // Reset suggested relevance information.
389 results->verbatim_relevance = -1;
391 // 5th element: Optional key-value pairs from the Suggest server.
392 const base::ListValue* types = NULL;
393 const base::ListValue* relevances = NULL;
394 const base::ListValue* suggestion_details = NULL;
395 const base::DictionaryValue* extras = NULL;
396 int prefetch_index = -1;
397 if (root_list->GetDictionary(4, &extras)) {
398 extras->GetList("google:suggesttype", &types);
400 // Discard this list if its size does not match that of the suggestions.
401 if (extras->GetList("google:suggestrelevance", &relevances) &&
402 (relevances->GetSize() != results_list->GetSize()))
403 relevances = NULL;
404 extras->GetInteger("google:verbatimrelevance",
405 &results->verbatim_relevance);
407 // Check if the active suggest field trial (if any) has triggered either
408 // for the default provider or keyword provider.
409 results->field_trial_triggered = false;
410 extras->GetBoolean("google:fieldtrialtriggered",
411 &results->field_trial_triggered);
413 const base::DictionaryValue* client_data = NULL;
414 if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
415 client_data->GetInteger("phi", &prefetch_index);
417 if (extras->GetList("google:suggestdetail", &suggestion_details) &&
418 suggestion_details->GetSize() != results_list->GetSize())
419 suggestion_details = NULL;
421 // Store the metadata that came with the response in case we need to pass it
422 // along with the prefetch query to Instant.
423 JSONStringValueSerializer json_serializer(&results->metadata);
424 json_serializer.Serialize(*extras);
427 // Clear the previous results now that new results are available.
428 results->suggest_results.clear();
429 results->navigation_results.clear();
430 results->answers_image_urls.clear();
432 base::string16 suggestion;
433 std::string type;
434 int relevance = default_result_relevance;
435 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
436 const bool allow_navsuggest =
437 input.type() != metrics::OmniboxInputType::FORCED_QUERY;
438 const base::string16& trimmed_input =
439 base::CollapseWhitespace(input.text(), false);
440 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
441 // Google search may return empty suggestions for weird input characters,
442 // they make no sense at all and can cause problems in our code.
443 if (suggestion.empty())
444 continue;
446 // Apply valid suggested relevance scores; discard invalid lists.
447 if (relevances != NULL && !relevances->GetInteger(index, &relevance))
448 relevances = NULL;
449 AutocompleteMatchType::Type match_type =
450 AutocompleteMatchType::SEARCH_SUGGEST;
451 if (types && types->GetString(index, &type))
452 match_type = GetAutocompleteMatchType(type);
453 const base::DictionaryValue* suggestion_detail = NULL;
454 std::string deletion_url;
456 if (suggestion_details &&
457 suggestion_details->GetDictionary(index, &suggestion_detail))
458 suggestion_detail->GetString("du", &deletion_url);
460 if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
461 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
462 // Do not blindly trust the URL coming from the server to be valid.
463 GURL url(
464 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
465 if (url.is_valid() && allow_navsuggest) {
466 base::string16 title;
467 if (descriptions != NULL)
468 descriptions->GetString(index, &title);
469 results->navigation_results.push_back(NavigationResult(
470 scheme_classifier, url, match_type, title, deletion_url,
471 is_keyword_result, relevance, relevances != NULL, input.text(),
472 languages));
474 } else {
475 base::string16 match_contents = suggestion;
476 base::string16 match_contents_prefix;
477 base::string16 annotation;
478 base::string16 answer_contents;
479 base::string16 answer_type_str;
480 scoped_ptr<SuggestionAnswer> answer;
481 std::string suggest_query_params;
483 if (suggestion_details) {
484 suggestion_details->GetDictionary(index, &suggestion_detail);
485 if (suggestion_detail) {
486 suggestion_detail->GetString("t", &match_contents);
487 suggestion_detail->GetString("mp", &match_contents_prefix);
488 // Error correction for bad data from server.
489 if (match_contents.empty())
490 match_contents = suggestion;
491 suggestion_detail->GetString("a", &annotation);
492 suggestion_detail->GetString("q", &suggest_query_params);
494 // Extract the Answer, if provided.
495 const base::DictionaryValue* answer_json = NULL;
496 if (suggestion_detail->GetDictionary("ansa", &answer_json) &&
497 suggestion_detail->GetString("ansb", &answer_type_str)) {
498 bool answer_parsed_successfully = false;
499 answer = SuggestionAnswer::ParseAnswer(answer_json);
500 int answer_type = 0;
501 if (answer && base::StringToInt(answer_type_str, &answer_type)) {
502 answer_parsed_successfully = true;
503 match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
505 answer->set_type(answer_type);
506 answer->AddImageURLsTo(&results->answers_image_urls);
508 std::string contents;
509 base::JSONWriter::Write(answer_json, &contents);
510 answer_contents = base::UTF8ToUTF16(contents);
511 } else {
512 answer_type_str = base::string16();
514 UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
515 answer_parsed_successfully);
520 bool should_prefetch = static_cast<int>(index) == prefetch_index;
521 // TODO(kochi): Improve calculator suggestion presentation.
522 results->suggest_results.push_back(SuggestResult(
523 base::CollapseWhitespace(suggestion, false), match_type,
524 base::CollapseWhitespace(match_contents, false),
525 match_contents_prefix, annotation, answer_contents, answer_type_str,
526 answer.Pass(), suggest_query_params, deletion_url, is_keyword_result,
527 relevance, relevances != NULL, should_prefetch, trimmed_input));
530 results->relevances_from_server = relevances != NULL;
531 return true;