Roll src/third_party/WebKit eac3800:0237a66 (svn 202606:202607)
[chromium-blink-merge.git] / chrome / browser / predictors / autocomplete_action_predictor.cc
blob0f70fad8a1f93c7ac5af4e868998f8a594292650
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/predictors/autocomplete_action_predictor.h"
7 #include <math.h>
9 #include <vector>
11 #include "base/bind.h"
12 #include "base/guid.h"
13 #include "base/i18n/case_conversion.h"
14 #include "base/metrics/histogram.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "chrome/browser/chrome_notification_types.h"
19 #include "chrome/browser/history/history_service_factory.h"
20 #include "chrome/browser/predictors/autocomplete_action_predictor_factory.h"
21 #include "chrome/browser/predictors/predictor_database.h"
22 #include "chrome/browser/predictors/predictor_database_factory.h"
23 #include "chrome/browser/prerender/prerender_field_trial.h"
24 #include "chrome/browser/prerender/prerender_handle.h"
25 #include "chrome/browser/prerender/prerender_manager.h"
26 #include "chrome/browser/prerender/prerender_manager_factory.h"
27 #include "chrome/browser/profiles/profile.h"
28 #include "components/history/core/browser/history_service.h"
29 #include "components/history/core/browser/in_memory_database.h"
30 #include "components/omnibox/browser/autocomplete_match.h"
31 #include "components/omnibox/browser/autocomplete_result.h"
32 #include "components/omnibox/browser/omnibox_log.h"
33 #include "components/omnibox/browser/omnibox_popup_model.h"
34 #include "content/public/browser/browser_thread.h"
35 #include "content/public/browser/notification_details.h"
36 #include "content/public/browser/notification_service.h"
37 #include "content/public/browser/notification_source.h"
39 namespace {
41 const float kConfidenceCutoff[] = {
42 0.8f,
43 0.5f
46 static_assert(arraysize(kConfidenceCutoff) ==
47 predictors::AutocompleteActionPredictor::LAST_PREDICT_ACTION,
48 "kConfidenceCutoff count should match LAST_PREDICT_ACTION");
50 const size_t kMinimumUserTextLength = 1;
51 const int kMinimumNumberOfHits = 3;
53 enum DatabaseAction {
54 DATABASE_ACTION_ADD,
55 DATABASE_ACTION_UPDATE,
56 DATABASE_ACTION_DELETE_SOME,
57 DATABASE_ACTION_DELETE_ALL,
58 DATABASE_ACTION_COUNT
61 } // namespace
63 namespace predictors {
65 const int AutocompleteActionPredictor::kMaximumDaysToKeepEntry = 14;
67 AutocompleteActionPredictor::AutocompleteActionPredictor(Profile* profile)
68 : profile_(profile),
69 main_profile_predictor_(NULL),
70 incognito_predictor_(NULL),
71 initialized_(false),
72 history_service_observer_(this) {
73 if (profile_->IsOffTheRecord()) {
74 main_profile_predictor_ = AutocompleteActionPredictorFactory::GetForProfile(
75 profile_->GetOriginalProfile());
76 DCHECK(main_profile_predictor_);
77 main_profile_predictor_->incognito_predictor_ = this;
78 if (main_profile_predictor_->initialized_)
79 CopyFromMainProfile();
80 } else {
81 // Request the in-memory database from the history to force it to load so
82 // it's available as soon as possible.
83 history::HistoryService* history_service =
84 HistoryServiceFactory::GetForProfile(
85 profile_, ServiceAccessType::EXPLICIT_ACCESS);
86 if (history_service)
87 history_service->InMemoryDatabase();
89 table_ =
90 PredictorDatabaseFactory::GetForProfile(profile_)->autocomplete_table();
92 // Observe all main frame loads so we can wait for the first to complete
93 // before accessing DB and IO threads to build the local cache.
94 notification_registrar_.Add(this,
95 content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME,
96 content::NotificationService::AllSources());
100 AutocompleteActionPredictor::~AutocompleteActionPredictor() {
101 if (main_profile_predictor_)
102 main_profile_predictor_->incognito_predictor_ = NULL;
103 else if (incognito_predictor_)
104 incognito_predictor_->main_profile_predictor_ = NULL;
105 if (prerender_handle_.get())
106 prerender_handle_->OnCancel();
109 void AutocompleteActionPredictor::RegisterTransitionalMatches(
110 const base::string16& user_text,
111 const AutocompleteResult& result) {
112 if (user_text.length() < kMinimumUserTextLength)
113 return;
114 const base::string16 lower_user_text(base::i18n::ToLower(user_text));
116 // Merge this in to an existing match if we already saw |user_text|
117 std::vector<TransitionalMatch>::iterator match_it =
118 std::find(transitional_matches_.begin(), transitional_matches_.end(),
119 lower_user_text);
121 if (match_it == transitional_matches_.end()) {
122 TransitionalMatch transitional_match;
123 transitional_match.user_text = lower_user_text;
124 match_it = transitional_matches_.insert(transitional_matches_.end(),
125 transitional_match);
128 for (const auto& i : result) {
129 if (std::find(match_it->urls.begin(), match_it->urls.end(),
130 i.destination_url) == match_it->urls.end()) {
131 match_it->urls.push_back(i.destination_url);
136 void AutocompleteActionPredictor::ClearTransitionalMatches() {
137 transitional_matches_.clear();
140 void AutocompleteActionPredictor::CancelPrerender() {
141 // If the prerender has already been abandoned, leave it to its own timeout;
142 // this normally gets called immediately after OnOmniboxOpenedUrl.
143 if (prerender_handle_ && !prerender_handle_->IsAbandoned()) {
144 prerender_handle_->OnCancel();
145 prerender_handle_.reset();
149 void AutocompleteActionPredictor::StartPrerendering(
150 const GURL& url,
151 content::SessionStorageNamespace* session_storage_namespace,
152 const gfx::Size& size) {
153 // Only cancel the old prerender after starting the new one, so if the URLs
154 // are the same, the underlying prerender will be reused.
155 scoped_ptr<prerender::PrerenderHandle> old_prerender_handle(
156 prerender_handle_.release());
157 if (prerender::PrerenderManager* prerender_manager =
158 prerender::PrerenderManagerFactory::GetForProfile(profile_)) {
159 prerender_handle_.reset(prerender_manager->AddPrerenderFromOmnibox(
160 url, session_storage_namespace, size));
162 if (old_prerender_handle)
163 old_prerender_handle->OnCancel();
166 // Given a match, return a recommended action.
167 AutocompleteActionPredictor::Action
168 AutocompleteActionPredictor::RecommendAction(
169 const base::string16& user_text,
170 const AutocompleteMatch& match) const {
171 bool is_in_db = false;
172 const double confidence = CalculateConfidence(user_text, match, &is_in_db);
173 DCHECK(confidence >= 0.0 && confidence <= 1.0);
175 UMA_HISTOGRAM_BOOLEAN("AutocompleteActionPredictor.MatchIsInDb", is_in_db);
177 if (is_in_db) {
178 // Multiple enties with the same URL are fine as the confidence may be
179 // different.
180 tracked_urls_.push_back(std::make_pair(match.destination_url, confidence));
181 UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.Confidence",
182 confidence * 100);
185 // Map the confidence to an action.
186 Action action = ACTION_NONE;
187 for (int i = 0; i < LAST_PREDICT_ACTION; ++i) {
188 if (confidence >= kConfidenceCutoff[i]) {
189 action = static_cast<Action>(i);
190 break;
194 // Downgrade prerender to preconnect if this is a search match or if omnibox
195 // prerendering is disabled. There are cases when Instant will not handle a
196 // search suggestion and in those cases it would be good to prerender the
197 // search results, however search engines have not been set up to correctly
198 // handle being prerendered and until they are we should avoid it.
199 // http://crbug.com/117495
200 if (action == ACTION_PRERENDER &&
201 (AutocompleteMatch::IsSearchType(match.type) ||
202 !prerender::IsOmniboxEnabled(profile_))) {
203 action = ACTION_PRECONNECT;
206 return action;
209 // Return true if the suggestion type warrants a TCP/IP preconnection.
210 // i.e., it is now quite likely that the user will select the related domain.
211 // static
212 bool AutocompleteActionPredictor::IsPreconnectable(
213 const AutocompleteMatch& match) {
214 return AutocompleteMatch::IsSearchType(match.type);
217 bool AutocompleteActionPredictor::IsPrerenderAbandonedForTesting() {
218 return prerender_handle_ && prerender_handle_->IsAbandoned();
221 void AutocompleteActionPredictor::OnOmniboxOpenedUrl(const OmniboxLog& log) {
222 if (!initialized_)
223 return;
225 // TODO(dominich): The body of this method doesn't need to be run
226 // synchronously. Investigate posting it as a task to be run later.
228 if (log.text.length() < kMinimumUserTextLength)
229 return;
231 // Do not attempt to learn from omnibox interactions where the omnibox
232 // dropdown is closed. In these cases the user text (|log.text|) that we
233 // learn from is either empty or effectively identical to the destination
234 // string. In either case, it can't teach us much. Also do not attempt
235 // to learn from paste-and-go actions even if the popup is open because
236 // the paste-and-go destination has no relation to whatever text the user
237 // may have typed.
238 if (!log.is_popup_open || log.is_paste_and_go)
239 return;
241 // Abandon the current prerender. If it is to be used, it will be used very
242 // soon, so use the lower timeout.
243 if (prerender_handle_) {
244 prerender_handle_->OnNavigateAway();
245 // Don't release |prerender_handle_| so it is canceled if it survives to the
246 // next StartPrerendering call.
249 UMA_HISTOGRAM_BOOLEAN(
250 base::StringPrintf("Prerender.OmniboxNavigationsCouldPrerender%s",
251 prerender::PrerenderManager::GetModeString()).c_str(),
252 prerender::IsOmniboxEnabled(profile_));
254 const AutocompleteMatch& match = log.result.match_at(log.selected_index);
255 const GURL& opened_url = match.destination_url;
256 const base::string16 lower_user_text(base::i18n::ToLower(log.text));
258 // Traverse transitional matches for those that have a user_text that is a
259 // prefix of |lower_user_text|.
260 std::vector<AutocompleteActionPredictorTable::Row> rows_to_add;
261 std::vector<AutocompleteActionPredictorTable::Row> rows_to_update;
263 for (std::vector<TransitionalMatch>::const_iterator it =
264 transitional_matches_.begin(); it != transitional_matches_.end();
265 ++it) {
266 if (!base::StartsWith(lower_user_text, it->user_text,
267 base::CompareCase::SENSITIVE))
268 continue;
270 // Add entries to the database for those matches.
271 for (std::vector<GURL>::const_iterator url_it = it->urls.begin();
272 url_it != it->urls.end(); ++url_it) {
273 DCHECK(it->user_text.length() >= kMinimumUserTextLength);
274 const DBCacheKey key = { it->user_text, *url_it };
275 const bool is_hit = (*url_it == opened_url);
277 AutocompleteActionPredictorTable::Row row;
278 row.user_text = key.user_text;
279 row.url = key.url;
281 DBCacheMap::iterator it = db_cache_.find(key);
282 if (it == db_cache_.end()) {
283 row.id = base::GenerateGUID();
284 row.number_of_hits = is_hit ? 1 : 0;
285 row.number_of_misses = is_hit ? 0 : 1;
287 rows_to_add.push_back(row);
288 } else {
289 DCHECK(db_id_cache_.find(key) != db_id_cache_.end());
290 row.id = db_id_cache_.find(key)->second;
291 row.number_of_hits = it->second.number_of_hits + (is_hit ? 1 : 0);
292 row.number_of_misses = it->second.number_of_misses + (is_hit ? 0 : 1);
294 rows_to_update.push_back(row);
298 if (rows_to_add.size() > 0 || rows_to_update.size() > 0)
299 AddAndUpdateRows(rows_to_add, rows_to_update);
301 ClearTransitionalMatches();
303 // Check against tracked urls and log accuracy for the confidence we
304 // predicted.
305 for (std::vector<std::pair<GURL, double> >::const_iterator it =
306 tracked_urls_.begin(); it != tracked_urls_.end();
307 ++it) {
308 if (opened_url == it->first) {
309 UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.AccurateCount",
310 it->second * 100);
313 tracked_urls_.clear();
316 void AutocompleteActionPredictor::Observe(
317 int type,
318 const content::NotificationSource& source,
319 const content::NotificationDetails& details) {
320 DCHECK_EQ(content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME, type);
321 CreateLocalCachesFromDatabase();
322 notification_registrar_.Remove(
323 this, content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME,
324 content::NotificationService::AllSources());
327 void AutocompleteActionPredictor::CreateLocalCachesFromDatabase() {
328 // Create local caches using the database as loaded. We will garbage collect
329 // rows from the caches and the database once the history service is
330 // available.
331 std::vector<AutocompleteActionPredictorTable::Row>* rows =
332 new std::vector<AutocompleteActionPredictorTable::Row>();
333 content::BrowserThread::PostTaskAndReply(
334 content::BrowserThread::DB, FROM_HERE,
335 base::Bind(&AutocompleteActionPredictorTable::GetAllRows, table_, rows),
336 base::Bind(&AutocompleteActionPredictor::CreateCaches, AsWeakPtr(),
337 base::Owned(rows)));
340 void AutocompleteActionPredictor::DeleteAllRows() {
341 if (!initialized_)
342 return;
344 db_cache_.clear();
345 db_id_cache_.clear();
347 if (table_.get()) {
348 content::BrowserThread::PostTask(
349 content::BrowserThread::DB, FROM_HERE,
350 base::Bind(&AutocompleteActionPredictorTable::DeleteAllRows, table_));
353 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
354 DATABASE_ACTION_DELETE_ALL, DATABASE_ACTION_COUNT);
357 void AutocompleteActionPredictor::DeleteRowsWithURLs(
358 const history::URLRows& rows) {
359 if (!initialized_)
360 return;
362 std::vector<AutocompleteActionPredictorTable::Row::Id> id_list;
364 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) {
365 if (std::find_if(rows.begin(), rows.end(),
366 history::URLRow::URLRowHasURL(it->first.url)) !=
367 rows.end()) {
368 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first);
369 DCHECK(id_it != db_id_cache_.end());
370 id_list.push_back(id_it->second);
371 db_id_cache_.erase(id_it);
372 db_cache_.erase(it++);
373 } else {
374 ++it;
378 if (table_.get()) {
379 content::BrowserThread::PostTask(
380 content::BrowserThread::DB, FROM_HERE,
381 base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_,
382 id_list));
385 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
386 DATABASE_ACTION_DELETE_SOME, DATABASE_ACTION_COUNT);
389 void AutocompleteActionPredictor::AddAndUpdateRows(
390 const AutocompleteActionPredictorTable::Rows& rows_to_add,
391 const AutocompleteActionPredictorTable::Rows& rows_to_update) {
392 if (!initialized_)
393 return;
395 for (AutocompleteActionPredictorTable::Rows::const_iterator it =
396 rows_to_add.begin(); it != rows_to_add.end(); ++it) {
397 const DBCacheKey key = { it->user_text, it->url };
398 DBCacheValue value = { it->number_of_hits, it->number_of_misses };
400 DCHECK(db_cache_.find(key) == db_cache_.end());
402 db_cache_[key] = value;
403 db_id_cache_[key] = it->id;
404 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
405 DATABASE_ACTION_ADD, DATABASE_ACTION_COUNT);
407 for (AutocompleteActionPredictorTable::Rows::const_iterator it =
408 rows_to_update.begin(); it != rows_to_update.end(); ++it) {
409 const DBCacheKey key = { it->user_text, it->url };
411 DBCacheMap::iterator db_it = db_cache_.find(key);
412 DCHECK(db_it != db_cache_.end());
413 DCHECK(db_id_cache_.find(key) != db_id_cache_.end());
415 db_it->second.number_of_hits = it->number_of_hits;
416 db_it->second.number_of_misses = it->number_of_misses;
417 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
418 DATABASE_ACTION_UPDATE, DATABASE_ACTION_COUNT);
421 if (table_.get()) {
422 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
423 base::Bind(&AutocompleteActionPredictorTable::AddAndUpdateRows,
424 table_, rows_to_add, rows_to_update));
428 void AutocompleteActionPredictor::CreateCaches(
429 std::vector<AutocompleteActionPredictorTable::Row>* rows) {
430 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
431 DCHECK(!profile_->IsOffTheRecord());
432 DCHECK(!initialized_);
433 DCHECK(db_cache_.empty());
434 DCHECK(db_id_cache_.empty());
436 for (std::vector<AutocompleteActionPredictorTable::Row>::const_iterator it =
437 rows->begin(); it != rows->end(); ++it) {
438 const DBCacheKey key = { it->user_text, it->url };
439 const DBCacheValue value = { it->number_of_hits, it->number_of_misses };
440 db_cache_[key] = value;
441 db_id_cache_[key] = it->id;
444 // If the history service is ready, delete any old or invalid entries.
445 history::HistoryService* history_service =
446 HistoryServiceFactory::GetForProfile(profile_,
447 ServiceAccessType::EXPLICIT_ACCESS);
448 if (!TryDeleteOldEntries(history_service)) {
449 // Wait for the notification that the history service is ready and the URL
450 // DB is loaded.
451 if (history_service)
452 history_service_observer_.Add(history_service);
456 bool AutocompleteActionPredictor::TryDeleteOldEntries(
457 history::HistoryService* service) {
458 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
459 DCHECK(!profile_->IsOffTheRecord());
460 DCHECK(!initialized_);
462 if (!service)
463 return false;
465 history::URLDatabase* url_db = service->InMemoryDatabase();
466 if (!url_db)
467 return false;
469 DeleteOldEntries(url_db);
470 return true;
473 void AutocompleteActionPredictor::DeleteOldEntries(
474 history::URLDatabase* url_db) {
475 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
476 DCHECK(!profile_->IsOffTheRecord());
477 DCHECK(!initialized_);
478 DCHECK(table_.get());
480 std::vector<AutocompleteActionPredictorTable::Row::Id> ids_to_delete;
481 DeleteOldIdsFromCaches(url_db, &ids_to_delete);
483 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
484 base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_,
485 ids_to_delete));
487 FinishInitialization();
488 if (incognito_predictor_)
489 incognito_predictor_->CopyFromMainProfile();
492 void AutocompleteActionPredictor::DeleteOldIdsFromCaches(
493 history::URLDatabase* url_db,
494 std::vector<AutocompleteActionPredictorTable::Row::Id>* id_list) {
495 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
496 DCHECK(!profile_->IsOffTheRecord());
497 DCHECK(!initialized_);
498 DCHECK(url_db);
499 DCHECK(id_list);
501 id_list->clear();
502 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) {
503 history::URLRow url_row;
505 if ((url_db->GetRowForURL(it->first.url, &url_row) == 0) ||
506 ((base::Time::Now() - url_row.last_visit()).InDays() >
507 kMaximumDaysToKeepEntry)) {
508 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first);
509 DCHECK(id_it != db_id_cache_.end());
510 id_list->push_back(id_it->second);
511 db_id_cache_.erase(id_it);
512 db_cache_.erase(it++);
513 } else {
514 ++it;
519 void AutocompleteActionPredictor::CopyFromMainProfile() {
520 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
521 DCHECK(profile_->IsOffTheRecord());
522 DCHECK(!initialized_);
523 DCHECK(main_profile_predictor_);
524 DCHECK(main_profile_predictor_->initialized_);
526 db_cache_ = main_profile_predictor_->db_cache_;
527 db_id_cache_ = main_profile_predictor_->db_id_cache_;
528 FinishInitialization();
531 void AutocompleteActionPredictor::FinishInitialization() {
532 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
533 DCHECK(!initialized_);
534 initialized_ = true;
537 double AutocompleteActionPredictor::CalculateConfidence(
538 const base::string16& user_text,
539 const AutocompleteMatch& match,
540 bool* is_in_db) const {
541 const DBCacheKey key = { user_text, match.destination_url };
543 *is_in_db = false;
544 if (user_text.length() < kMinimumUserTextLength)
545 return 0.0;
547 const DBCacheMap::const_iterator iter = db_cache_.find(key);
548 if (iter == db_cache_.end())
549 return 0.0;
551 *is_in_db = true;
552 return CalculateConfidenceForDbEntry(iter);
555 double AutocompleteActionPredictor::CalculateConfidenceForDbEntry(
556 DBCacheMap::const_iterator iter) const {
557 const DBCacheValue& value = iter->second;
558 if (value.number_of_hits < kMinimumNumberOfHits)
559 return 0.0;
561 const double number_of_hits = static_cast<double>(value.number_of_hits);
562 return number_of_hits / (number_of_hits + value.number_of_misses);
565 void AutocompleteActionPredictor::Shutdown() {
566 history_service_observer_.RemoveAll();
569 void AutocompleteActionPredictor::OnURLsDeleted(
570 history::HistoryService* history_service,
571 bool all_history,
572 bool expired,
573 const history::URLRows& deleted_rows,
574 const std::set<GURL>& favicon_urls) {
575 if (!initialized_)
576 return;
578 if (all_history)
579 DeleteAllRows();
580 else
581 DeleteRowsWithURLs(deleted_rows);
584 void AutocompleteActionPredictor::OnHistoryServiceLoaded(
585 history::HistoryService* history_service) {
586 TryDeleteOldEntries(history_service);
587 history_service_observer_.Remove(history_service);
590 AutocompleteActionPredictor::TransitionalMatch::TransitionalMatch() {
593 AutocompleteActionPredictor::TransitionalMatch::~TransitionalMatch() {
596 } // namespace predictors