Linux: Depend on liberation-fonts package for RPMs.
[chromium-blink-merge.git] / components / omnibox / browser / search_suggestion_parser.cc
blob24ab6370806b3c6b890de5032f75736d6ad7dee9
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/browser/search_suggestion_parser.h"
7 #include <algorithm>
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/json/json_string_value_serializer.h"
11 #include "base/json/json_writer.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/values.h"
18 #include "components/omnibox/browser/autocomplete_i18n.h"
19 #include "components/omnibox/browser/autocomplete_input.h"
20 #include "components/omnibox/browser/url_prefix.h"
21 #include "components/url_formatter/url_fixer.h"
22 #include "components/url_formatter/url_formatter.h"
23 #include "net/http/http_response_headers.h"
24 #include "net/url_request/url_fetcher.h"
25 #include "url/url_constants.h"
27 namespace {
29 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
30 if (type == "CALCULATOR")
31 return AutocompleteMatchType::CALCULATOR;
32 if (type == "ENTITY")
33 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
34 if (type == "TAIL")
35 return AutocompleteMatchType::SEARCH_SUGGEST_TAIL;
36 if (type == "PERSONALIZED_QUERY")
37 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
38 if (type == "PROFILE")
39 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
40 if (type == "NAVIGATION")
41 return AutocompleteMatchType::NAVSUGGEST;
42 if (type == "PERSONALIZED_NAVIGATION")
43 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
44 return AutocompleteMatchType::SEARCH_SUGGEST;
47 } // namespace
49 // SearchSuggestionParser::Result ----------------------------------------------
51 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
52 int relevance,
53 bool relevance_from_server,
54 AutocompleteMatchType::Type type,
55 const std::string& deletion_url)
56 : from_keyword_provider_(from_keyword_provider),
57 type_(type),
58 relevance_(relevance),
59 relevance_from_server_(relevance_from_server),
60 received_after_last_keystroke_(true),
61 deletion_url_(deletion_url) {}
63 SearchSuggestionParser::Result::~Result() {}
65 // SearchSuggestionParser::SuggestResult ---------------------------------------
67 SearchSuggestionParser::SuggestResult::SuggestResult(
68 const base::string16& suggestion,
69 AutocompleteMatchType::Type type,
70 const base::string16& match_contents,
71 const base::string16& match_contents_prefix,
72 const base::string16& annotation,
73 const base::string16& answer_contents,
74 const base::string16& answer_type,
75 scoped_ptr<SuggestionAnswer> answer,
76 const std::string& suggest_query_params,
77 const std::string& deletion_url,
78 bool from_keyword_provider,
79 int relevance,
80 bool relevance_from_server,
81 bool should_prefetch,
82 const base::string16& input_text)
83 : Result(from_keyword_provider,
84 relevance,
85 relevance_from_server,
86 type,
87 deletion_url),
88 suggestion_(suggestion),
89 match_contents_prefix_(match_contents_prefix),
90 annotation_(annotation),
91 suggest_query_params_(suggest_query_params),
92 answer_contents_(answer_contents),
93 answer_type_(answer_type),
94 answer_(answer.Pass()),
95 should_prefetch_(should_prefetch) {
96 match_contents_ = match_contents;
97 DCHECK(!match_contents_.empty());
98 ClassifyMatchContents(true, input_text);
101 SearchSuggestionParser::SuggestResult::SuggestResult(
102 const SuggestResult& result)
103 : Result(result),
104 suggestion_(result.suggestion_),
105 match_contents_prefix_(result.match_contents_prefix_),
106 annotation_(result.annotation_),
107 suggest_query_params_(result.suggest_query_params_),
108 answer_contents_(result.answer_contents_),
109 answer_type_(result.answer_type_),
110 answer_(SuggestionAnswer::copy(result.answer_.get())),
111 should_prefetch_(result.should_prefetch_) {
114 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
116 SearchSuggestionParser::SuggestResult&
117 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult& rhs) {
118 if (this == &rhs)
119 return *this;
121 // Assign via parent class first.
122 Result::operator=(rhs);
124 suggestion_ = rhs.suggestion_;
125 match_contents_prefix_ = rhs.match_contents_prefix_;
126 annotation_ = rhs.annotation_;
127 suggest_query_params_ = rhs.suggest_query_params_;
128 answer_contents_ = rhs.answer_contents_;
129 answer_type_ = rhs.answer_type_;
130 answer_ = SuggestionAnswer::copy(rhs.answer_.get());
131 should_prefetch_ = rhs.should_prefetch_;
133 return *this;
136 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
137 const bool allow_bolding_all,
138 const base::string16& input_text) {
139 if (input_text.empty()) {
140 // In case of zero-suggest results, do not highlight matches.
141 match_contents_class_.push_back(
142 ACMatchClassification(0, ACMatchClassification::NONE));
143 return;
146 base::string16 lookup_text = input_text;
147 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_TAIL) {
148 const size_t contents_index =
149 suggestion_.length() - match_contents_.length();
150 // Ensure the query starts with the input text, and ends with the match
151 // contents, and the input text has an overlap with contents.
152 if (base::StartsWith(suggestion_, input_text,
153 base::CompareCase::SENSITIVE) &&
154 base::EndsWith(suggestion_, match_contents_,
155 base::CompareCase::SENSITIVE) &&
156 (input_text.length() > contents_index)) {
157 lookup_text = input_text.substr(contents_index);
160 // Do a case-insensitive search for |lookup_text|.
161 base::string16::const_iterator lookup_position = std::search(
162 match_contents_.begin(), match_contents_.end(), lookup_text.begin(),
163 lookup_text.end(), SimpleCaseInsensitiveCompareUCS2());
164 if (!allow_bolding_all && (lookup_position == match_contents_.end())) {
165 // Bail if the code below to update the bolding would bold the whole
166 // string. Note that the string may already be entirely bolded; if
167 // so, leave it as is.
168 return;
170 match_contents_class_.clear();
171 // We do intra-string highlighting for suggestions - the suggested segment
172 // will be highlighted, e.g. for input_text = "you" the suggestion may be
173 // "youtube", so we'll bold the "tube" section: you*tube*.
174 if (input_text != match_contents_) {
175 if (lookup_position == match_contents_.end()) {
176 // The input text is not a substring of the query string, e.g. input
177 // text is "slasdot" and the query string is "slashdot", so we bold the
178 // whole thing.
179 match_contents_class_.push_back(
180 ACMatchClassification(0, ACMatchClassification::MATCH));
181 } else {
182 // We don't iterate over the string here annotating all matches because
183 // it looks odd to have every occurrence of a substring that may be as
184 // short as a single character highlighted in a query suggestion result,
185 // e.g. for input text "s" and query string "southwest airlines", it
186 // looks odd if both the first and last s are highlighted.
187 const size_t lookup_index = lookup_position - match_contents_.begin();
188 if (lookup_index != 0) {
189 match_contents_class_.push_back(
190 ACMatchClassification(0, ACMatchClassification::MATCH));
192 match_contents_class_.push_back(
193 ACMatchClassification(lookup_index, ACMatchClassification::NONE));
194 size_t next_fragment_position = lookup_index + lookup_text.length();
195 if (next_fragment_position < match_contents_.length()) {
196 match_contents_class_.push_back(ACMatchClassification(
197 next_fragment_position, ACMatchClassification::MATCH));
200 } else {
201 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
202 // for the default provider or a keyword search provider.
203 match_contents_class_.push_back(
204 ACMatchClassification(0, ACMatchClassification::NONE));
208 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
209 const AutocompleteInput& input,
210 bool keyword_provider_requested) const {
211 if (!from_keyword_provider_ && keyword_provider_requested)
212 return 100;
213 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
216 // SearchSuggestionParser::NavigationResult ------------------------------------
218 SearchSuggestionParser::NavigationResult::NavigationResult(
219 const AutocompleteSchemeClassifier& scheme_classifier,
220 const GURL& url,
221 AutocompleteMatchType::Type type,
222 const base::string16& description,
223 const std::string& deletion_url,
224 bool from_keyword_provider,
225 int relevance,
226 bool relevance_from_server,
227 const base::string16& input_text,
228 const std::string& languages)
229 : Result(from_keyword_provider,
230 relevance,
231 relevance_from_server,
232 type,
233 deletion_url),
234 url_(url),
235 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
236 url,
237 url_formatter::FormatUrl(url,
238 languages,
239 url_formatter::kFormatUrlOmitAll &
240 ~url_formatter::kFormatUrlOmitHTTP,
241 net::UnescapeRule::SPACES,
242 nullptr,
243 nullptr,
244 nullptr),
245 scheme_classifier)),
246 description_(description) {
247 DCHECK(url_.is_valid());
248 CalculateAndClassifyMatchContents(true, input_text, languages);
251 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
253 void
254 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
255 const bool allow_bolding_nothing,
256 const base::string16& input_text,
257 const std::string& languages) {
258 if (input_text.empty()) {
259 // In case of zero-suggest results, do not highlight matches.
260 match_contents_class_.push_back(
261 ACMatchClassification(0, ACMatchClassification::NONE));
262 return;
265 // First look for the user's input inside the formatted url as it would be
266 // without trimming the scheme, so we can find matches at the beginning of the
267 // scheme.
268 const URLPrefix* prefix =
269 URLPrefix::BestURLPrefix(formatted_url_, input_text);
270 size_t match_start = (prefix == NULL) ?
271 formatted_url_.find(input_text) : prefix->prefix.length();
272 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
273 (!prefix || (match_start != 0));
274 const url_formatter::FormatUrlTypes format_types =
275 url_formatter::kFormatUrlOmitAll &
276 ~(trim_http ? 0 : url_formatter::kFormatUrlOmitHTTP);
278 base::string16 match_contents = url_formatter::FormatUrl(
279 url_, languages, format_types, net::UnescapeRule::SPACES, nullptr,
280 nullptr, &match_start);
281 // If the first match in the untrimmed string was inside a scheme that we
282 // trimmed, look for a subsequent match.
283 if (match_start == base::string16::npos)
284 match_start = match_contents.find(input_text);
285 // Update |match_contents_| and |match_contents_class_| if it's allowed.
286 if (allow_bolding_nothing || (match_start != base::string16::npos)) {
287 match_contents_ = match_contents;
288 // Safe if |match_start| is npos; also safe if the input is longer than the
289 // remaining contents after |match_start|.
290 AutocompleteMatch::ClassifyLocationInString(match_start,
291 input_text.length(), match_contents_.length(),
292 ACMatchClassification::URL, &match_contents_class_);
296 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
297 const AutocompleteInput& input,
298 bool keyword_provider_requested) const {
299 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
302 // SearchSuggestionParser::Results ---------------------------------------------
304 SearchSuggestionParser::Results::Results()
305 : verbatim_relevance(-1),
306 field_trial_triggered(false),
307 relevances_from_server(false) {}
309 SearchSuggestionParser::Results::~Results() {}
311 void SearchSuggestionParser::Results::Clear() {
312 suggest_results.clear();
313 navigation_results.clear();
314 verbatim_relevance = -1;
315 metadata.clear();
318 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
319 if (verbatim_relevance >= 0)
320 return true;
322 // Right now either all results of one type will be server-scored or they will
323 // all be locally scored, but in case we change this later, we'll just check
324 // them all.
325 for (SuggestResults::const_iterator i(suggest_results.begin());
326 i != suggest_results.end(); ++i) {
327 if (i->relevance_from_server())
328 return true;
330 for (NavigationResults::const_iterator i(navigation_results.begin());
331 i != navigation_results.end(); ++i) {
332 if (i->relevance_from_server())
333 return true;
336 return false;
339 // SearchSuggestionParser ------------------------------------------------------
341 // static
342 std::string SearchSuggestionParser::ExtractJsonData(
343 const net::URLFetcher* source) {
344 const net::HttpResponseHeaders* const response_headers =
345 source->GetResponseHeaders();
346 std::string json_data;
347 source->GetResponseAsString(&json_data);
349 // JSON is supposed to be UTF-8, but some suggest service providers send
350 // JSON files in non-UTF-8 encodings. The actual encoding is usually
351 // specified in the Content-Type header field.
352 if (response_headers) {
353 std::string charset;
354 if (response_headers->GetCharset(&charset)) {
355 base::string16 data_16;
356 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
357 if (base::CodepageToUTF16(json_data, charset.c_str(),
358 base::OnStringConversionError::FAIL,
359 &data_16))
360 json_data = base::UTF16ToUTF8(data_16);
363 return json_data;
366 // static
367 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
368 base::StringPiece json_data) {
369 // The JSON response should be an array.
370 for (size_t response_start_index = json_data.find("["), i = 0;
371 response_start_index != base::StringPiece::npos && i < 5;
372 response_start_index = json_data.find("[", 1), i++) {
373 // Remove any XSSI guards to allow for JSON parsing.
374 json_data.remove_prefix(response_start_index);
376 JSONStringValueDeserializer deserializer(json_data);
377 deserializer.set_allow_trailing_comma(true);
378 int error_code = 0;
379 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
380 if (error_code == 0)
381 return data.Pass();
383 return scoped_ptr<base::Value>();
386 // static
387 bool SearchSuggestionParser::ParseSuggestResults(
388 const base::Value& root_val,
389 const AutocompleteInput& input,
390 const AutocompleteSchemeClassifier& scheme_classifier,
391 int default_result_relevance,
392 const std::string& languages,
393 bool is_keyword_result,
394 Results* results) {
395 base::string16 query;
396 const base::ListValue* root_list = NULL;
397 const base::ListValue* results_list = NULL;
399 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
400 query != input.text() || !root_list->GetList(1, &results_list))
401 return false;
403 // 3rd element: Description list.
404 const base::ListValue* descriptions = NULL;
405 root_list->GetList(2, &descriptions);
407 // 4th element: Disregard the query URL list for now.
409 // Reset suggested relevance information.
410 results->verbatim_relevance = -1;
412 // 5th element: Optional key-value pairs from the Suggest server.
413 const base::ListValue* types = NULL;
414 const base::ListValue* relevances = NULL;
415 const base::ListValue* suggestion_details = NULL;
416 const base::DictionaryValue* extras = NULL;
417 int prefetch_index = -1;
418 if (root_list->GetDictionary(4, &extras)) {
419 extras->GetList("google:suggesttype", &types);
421 // Discard this list if its size does not match that of the suggestions.
422 if (extras->GetList("google:suggestrelevance", &relevances) &&
423 (relevances->GetSize() != results_list->GetSize()))
424 relevances = NULL;
425 extras->GetInteger("google:verbatimrelevance",
426 &results->verbatim_relevance);
428 // Check if the active suggest field trial (if any) has triggered either
429 // for the default provider or keyword provider.
430 results->field_trial_triggered = false;
431 extras->GetBoolean("google:fieldtrialtriggered",
432 &results->field_trial_triggered);
434 const base::DictionaryValue* client_data = NULL;
435 if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
436 client_data->GetInteger("phi", &prefetch_index);
438 if (extras->GetList("google:suggestdetail", &suggestion_details) &&
439 suggestion_details->GetSize() != results_list->GetSize())
440 suggestion_details = NULL;
442 // Store the metadata that came with the response in case we need to pass it
443 // along with the prefetch query to Instant.
444 JSONStringValueSerializer json_serializer(&results->metadata);
445 json_serializer.Serialize(*extras);
448 // Clear the previous results now that new results are available.
449 results->suggest_results.clear();
450 results->navigation_results.clear();
451 results->answers_image_urls.clear();
453 base::string16 suggestion;
454 std::string type;
455 int relevance = default_result_relevance;
456 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
457 const bool allow_navsuggest =
458 input.type() != metrics::OmniboxInputType::FORCED_QUERY;
459 const base::string16& trimmed_input =
460 base::CollapseWhitespace(input.text(), false);
461 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
462 // Google search may return empty suggestions for weird input characters,
463 // they make no sense at all and can cause problems in our code.
464 if (suggestion.empty())
465 continue;
467 // Apply valid suggested relevance scores; discard invalid lists.
468 if (relevances != NULL && !relevances->GetInteger(index, &relevance))
469 relevances = NULL;
470 AutocompleteMatchType::Type match_type =
471 AutocompleteMatchType::SEARCH_SUGGEST;
472 if (types && types->GetString(index, &type))
473 match_type = GetAutocompleteMatchType(type);
474 const base::DictionaryValue* suggestion_detail = NULL;
475 std::string deletion_url;
477 if (suggestion_details &&
478 suggestion_details->GetDictionary(index, &suggestion_detail))
479 suggestion_detail->GetString("du", &deletion_url);
481 if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
482 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
483 // Do not blindly trust the URL coming from the server to be valid.
484 GURL url(url_formatter::FixupURL(base::UTF16ToUTF8(suggestion),
485 std::string()));
486 if (url.is_valid() && allow_navsuggest) {
487 base::string16 title;
488 if (descriptions != NULL)
489 descriptions->GetString(index, &title);
490 results->navigation_results.push_back(NavigationResult(
491 scheme_classifier, url, match_type, title, deletion_url,
492 is_keyword_result, relevance, relevances != NULL, input.text(),
493 languages));
495 } else {
496 // TODO(dschuyler) If the "= " is no longer sent from the back-end
497 // then this may be removed.
498 if ((match_type == AutocompleteMatchType::CALCULATOR) &&
499 !suggestion.compare(0, 2, base::UTF8ToUTF16("= ")))
500 suggestion.erase(0, 2);
502 base::string16 match_contents = suggestion;
503 base::string16 match_contents_prefix;
504 base::string16 annotation;
505 base::string16 answer_contents;
506 base::string16 answer_type_str;
507 scoped_ptr<SuggestionAnswer> answer;
508 std::string suggest_query_params;
510 if (suggestion_details) {
511 suggestion_details->GetDictionary(index, &suggestion_detail);
512 if (suggestion_detail) {
513 suggestion_detail->GetString("t", &match_contents);
514 suggestion_detail->GetString("mp", &match_contents_prefix);
515 // Error correction for bad data from server.
516 if (match_contents.empty())
517 match_contents = suggestion;
518 suggestion_detail->GetString("a", &annotation);
519 suggestion_detail->GetString("q", &suggest_query_params);
521 // Extract the Answer, if provided.
522 const base::DictionaryValue* answer_json = NULL;
523 if (suggestion_detail->GetDictionary("ansa", &answer_json) &&
524 suggestion_detail->GetString("ansb", &answer_type_str)) {
525 bool answer_parsed_successfully = false;
526 answer = SuggestionAnswer::ParseAnswer(answer_json);
527 int answer_type = 0;
528 if (answer && base::StringToInt(answer_type_str, &answer_type)) {
529 answer_parsed_successfully = true;
531 answer->set_type(answer_type);
532 answer->AddImageURLsTo(&results->answers_image_urls);
534 std::string contents;
535 base::JSONWriter::Write(*answer_json, &contents);
536 answer_contents = base::UTF8ToUTF16(contents);
537 } else {
538 answer_type_str = base::string16();
540 UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
541 answer_parsed_successfully);
546 bool should_prefetch = static_cast<int>(index) == prefetch_index;
547 results->suggest_results.push_back(SuggestResult(
548 base::CollapseWhitespace(suggestion, false), match_type,
549 base::CollapseWhitespace(match_contents, false),
550 match_contents_prefix, annotation, answer_contents, answer_type_str,
551 answer.Pass(), suggest_query_params, deletion_url, is_keyword_result,
552 relevance, relevances != NULL, should_prefetch, trimmed_input));
555 results->relevances_from_server = relevances != NULL;
556 return true;