chrome/browser/autocomplete/in_memory_url_index_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <algorithm>
   6 #include <fstream>
   7
   8 #include "base/auto_reset.h"
   9 #include "base/files/file_path.h"
  10 #include "base/files/file_util.h"
  11 #include "base/files/scoped_temp_dir.h"
  12 #include "base/i18n/case_conversion.h"
  13 #include "base/path_service.h"
  14 #include "base/run_loop.h"
  15 #include "base/strings/string16.h"
  16 #include "base/strings/string_split.h"
  17 #include "base/strings/string_util.h"
  18 #include "base/strings/stringprintf.h"
  19 #include "base/strings/utf_string_conversions.h"
  20 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
  21 #include "chrome/browser/history/history_service_factory.h"
  22 #include "chrome/common/chrome_paths.h"
  23 #include "chrome/test/base/history_index_restore_observer.h"
  24 #include "chrome/test/base/testing_profile.h"
  25 #include "components/bookmarks/test/bookmark_test_helpers.h"
  26 #include "components/history/core/browser/history_backend.h"
  27 #include "components/history/core/browser/history_database.h"
  28 #include "components/history/core/browser/history_service.h"
  29 #include "components/omnibox/browser/in_memory_url_index.h"
  30 #include "components/omnibox/browser/in_memory_url_index_types.h"
  31 #include "components/omnibox/browser/url_index_private_data.h"
  32 #include "content/public/browser/browser_thread.h"
  33 #include "content/public/test/test_browser_thread_bundle.h"
  34 #include "sql/transaction.h"
  35 #include "testing/gtest/include/gtest/gtest.h"
  36
  37 using base::ASCIIToUTF16;
  38
  39 // The test version of the history url database table ('url') is contained in
  40 // a database file created from a text file('url_history_provider_test.db.txt').
  41 // The only difference between this table and a live 'urls' table from a
  42 // profile is that the last_visit_time column in the test table contains a
  43 // number specifying the number of days relative to 'today' to which the
  44 // absolute time should be set during the test setup stage.
  45 //
  46 // The format of the test database text file is of a SQLite .dump file.
  47 // Note that only lines whose first character is an upper-case letter are
  48 // processed when creating the test database.
  49
  50 namespace {
  51 const size_t kInvalid = base::string16::npos;
  52 const size_t kMaxMatches = 3;
  53 const char kTestLanguages[] = "en,ja,hi,zh";
  54 const char kClientWhitelistedScheme[] = "xyz";
  55
  56 // Helper function to set lower case |lower_string| and |lower_terms| (words
  57 // list) based on supplied |search_string| and |cursor_position|. If
  58 // |cursor_position| is set and useful (not at either end of the string), allow
  59 // the |search_string| to be broken at |cursor_position|. We do this by
  60 // pretending there's a space where the cursor is. |lower_terms| are obtained by
  61 // splitting the |lower_string| on whitespace into tokens.
  62 void StringToTerms(const char* search_string,
  63                    size_t cursor_position,
  64                    base::string16* lower_string,
  65                    String16Vector* lower_terms) {
  66   *lower_string = base::i18n::ToLower(ASCIIToUTF16(search_string));
  67   if ((cursor_position != kInvalid) &&
  68       (cursor_position < lower_string->length()) && (cursor_position > 0)) {
  69     lower_string->insert(cursor_position, base::ASCIIToUTF16(" "));
  70   }
  71
  72   *lower_terms = base::SplitString(*lower_string, base::kWhitespaceUTF16,
  73                                    base::KEEP_WHITESPACE,
  74                                    base::SPLIT_WANT_NONEMPTY);
  75 }
  76
  77 }  // namespace
  78
  79 // -----------------------------------------------------------------------------
  80
  81 // Observer class so the unit tests can wait while the cache is being saved.
  82 class CacheFileSaverObserver : public InMemoryURLIndex::SaveCacheObserver {
  83  public:
  84   explicit CacheFileSaverObserver(const base::Closure& task);
  85
  86   bool succeeded() { return succeeded_; }
  87
  88  private:
  89   // SaveCacheObserver implementation.
  90   void OnCacheSaveFinished(bool succeeded) override;
  91
  92   base::Closure task_;
  93   bool succeeded_;
  94
  95   DISALLOW_COPY_AND_ASSIGN(CacheFileSaverObserver);
  96 };
  97
  98 CacheFileSaverObserver::CacheFileSaverObserver(const base::Closure& task)
  99     : task_(task),
 100       succeeded_(false) {
 101 }
 102
 103 void CacheFileSaverObserver::OnCacheSaveFinished(bool succeeded) {
 104   succeeded_ = succeeded;
 105   task_.Run();
 106 }
 107
 108 // -----------------------------------------------------------------------------
 109
 110 class InMemoryURLIndexTest : public testing::Test {
 111  public:
 112   InMemoryURLIndexTest();
 113
 114  protected:
 115   // Test setup.
 116   void SetUp() override;
 117   void TearDown() override;
 118
 119   // Allows the database containing the test data to be customized by
 120   // subclasses.
 121   virtual base::FilePath::StringType TestDBName() const;
 122
 123   // Allows the test to control when the InMemoryURLIndex is initialized.
 124   virtual bool InitializeInMemoryURLIndexInSetUp() const;
 125
 126   // Initialize the InMemoryURLIndex for the tests.
 127   void InitializeInMemoryURLIndex();
 128
 129   // Validates that the given |term| is contained in |cache| and that it is
 130   // marked as in-use.
 131   void CheckTerm(const URLIndexPrivateData::SearchTermCacheMap& cache,
 132                  base::string16 term) const;
 133
 134   // Pass-through function to simplify our friendship with HistoryService.
 135   sql::Connection& GetDB();
 136
 137   // Pass-through functions to simplify our friendship with InMemoryURLIndex.
 138   URLIndexPrivateData* GetPrivateData() const;
 139   base::CancelableTaskTracker* GetPrivateDataTracker() const;
 140   void ClearPrivateData();
 141   void set_history_dir(const base::FilePath& dir_path);
 142   bool GetCacheFilePath(base::FilePath* file_path) const;
 143   void PostRestoreFromCacheFileTask();
 144   void PostSaveToCacheFileTask();
 145   const SchemeSet& scheme_whitelist();
 146
 147
 148   // Pass-through functions to simplify our friendship with URLIndexPrivateData.
 149   bool UpdateURL(const history::URLRow& row);
 150   bool DeleteURL(const GURL& url);
 151
 152   // Data verification helper functions.
 153   void ExpectPrivateDataNotEmpty(const URLIndexPrivateData& data);
 154   void ExpectPrivateDataEmpty(const URLIndexPrivateData& data);
 155   void ExpectPrivateDataEqual(const URLIndexPrivateData& expected,
 156                               const URLIndexPrivateData& actual);
 157
 158   content::TestBrowserThreadBundle thread_bundle_;
 159   scoped_ptr<InMemoryURLIndex> url_index_;
 160   TestingProfile profile_;
 161   history::HistoryService* history_service_;
 162   history::HistoryDatabase* history_database_;
 163 };
 164
 165 InMemoryURLIndexTest::InMemoryURLIndexTest()
 166     : history_service_(nullptr), history_database_(nullptr) {
 167 }
 168
 169 sql::Connection& InMemoryURLIndexTest::GetDB() {
 170   return history_database_->GetDB();
 171 }
 172
 173 URLIndexPrivateData* InMemoryURLIndexTest::GetPrivateData() const {
 174   DCHECK(url_index_->private_data());
 175   return url_index_->private_data();
 176 }
 177
 178 base::CancelableTaskTracker* InMemoryURLIndexTest::GetPrivateDataTracker()
 179     const {
 180   DCHECK(url_index_->private_data_tracker());
 181   return url_index_->private_data_tracker();
 182 }
 183
 184 void InMemoryURLIndexTest::ClearPrivateData() {
 185   return url_index_->ClearPrivateData();
 186 }
 187
 188 void InMemoryURLIndexTest::set_history_dir(const base::FilePath& dir_path) {
 189   return url_index_->set_history_dir(dir_path);
 190 }
 191
 192 bool InMemoryURLIndexTest::GetCacheFilePath(base::FilePath* file_path) const {
 193   DCHECK(file_path);
 194   return url_index_->GetCacheFilePath(file_path);
 195 }
 196
 197 void InMemoryURLIndexTest::PostRestoreFromCacheFileTask() {
 198   url_index_->PostRestoreFromCacheFileTask();
 199 }
 200
 201 void InMemoryURLIndexTest::PostSaveToCacheFileTask() {
 202   url_index_->PostSaveToCacheFileTask();
 203 }
 204
 205 const SchemeSet& InMemoryURLIndexTest::scheme_whitelist() {
 206   return url_index_->scheme_whitelist();
 207 }
 208
 209 bool InMemoryURLIndexTest::UpdateURL(const history::URLRow& row) {
 210   return GetPrivateData()->UpdateURL(history_service_,
 211                                      row,
 212                                      url_index_->languages_,
 213                                      url_index_->scheme_whitelist_,
 214                                      GetPrivateDataTracker());
 215 }
 216
 217 bool InMemoryURLIndexTest::DeleteURL(const GURL& url) {
 218   return GetPrivateData()->DeleteURL(url);
 219 }
 220
 221 void InMemoryURLIndexTest::SetUp() {
 222   // We cannot access the database until the backend has been loaded.
 223   ASSERT_TRUE(profile_.CreateHistoryService(true, false));
 224   profile_.CreateBookmarkModel(true);
 225   bookmarks::test::WaitForBookmarkModelToLoad(
 226       BookmarkModelFactory::GetForProfile(&profile_));
 227   profile_.BlockUntilHistoryProcessesPendingRequests();
 228   profile_.BlockUntilHistoryIndexIsRefreshed();
 229   history_service_ = HistoryServiceFactory::GetForProfile(
 230       &profile_, ServiceAccessType::EXPLICIT_ACCESS);
 231   ASSERT_TRUE(history_service_);
 232   history::HistoryBackend* backend = history_service_->history_backend_.get();
 233   history_database_ = backend->db();
 234
 235   // Create and populate a working copy of the URL history database.
 236   base::FilePath history_proto_path;
 237   PathService::Get(chrome::DIR_TEST_DATA, &history_proto_path);
 238   history_proto_path = history_proto_path.Append(
 239       FILE_PATH_LITERAL("History"));
 240   history_proto_path = history_proto_path.Append(TestDBName());
 241   EXPECT_TRUE(base::PathExists(history_proto_path));
 242
 243   std::ifstream proto_file(history_proto_path.value().c_str());
 244   static const size_t kCommandBufferMaxSize = 2048;
 245   char sql_cmd_line[kCommandBufferMaxSize];
 246
 247   sql::Connection& db(GetDB());
 248   ASSERT_TRUE(db.is_open());
 249   {
 250     sql::Transaction transaction(&db);
 251     transaction.Begin();
 252     while (!proto_file.eof()) {
 253       proto_file.getline(sql_cmd_line, kCommandBufferMaxSize);
 254       if (!proto_file.eof()) {
 255         // We only process lines which begin with a upper-case letter.
 256         // TODO(mrossetti): Can iswupper() be used here?
 257         if (sql_cmd_line[0] >= 'A' && sql_cmd_line[0] <= 'Z') {
 258           std::string sql_cmd(sql_cmd_line);
 259           sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line));
 260           EXPECT_TRUE(sql_stmt.Run());
 261         }
 262       }
 263     }
 264     transaction.Commit();
 265   }
 266
 267   // Update the last_visit_time table column in the "urls" table
 268   // such that it represents a time relative to 'now'.
 269   sql::Statement statement(db.GetUniqueStatement(
 270       "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
 271   ASSERT_TRUE(statement.is_valid());
 272   base::Time time_right_now = base::Time::NowFromSystemTime();
 273   base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
 274   {
 275     sql::Transaction transaction(&db);
 276     transaction.Begin();
 277     while (statement.Step()) {
 278       history::URLRow row;
 279       history_database_->FillURLRow(statement, &row);
 280       base::Time last_visit = time_right_now;
 281       for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i)
 282         last_visit -= day_delta;
 283       row.set_last_visit(last_visit);
 284       history_database_->UpdateURLRow(row.id(), row);
 285     }
 286     transaction.Commit();
 287   }
 288
 289   // Update the visit_time table column in the "visits" table
 290   // such that it represents a time relative to 'now'.
 291   statement.Assign(db.GetUniqueStatement(
 292       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits;"));
 293   ASSERT_TRUE(statement.is_valid());
 294   {
 295     sql::Transaction transaction(&db);
 296     transaction.Begin();
 297     while (statement.Step()) {
 298       history::VisitRow row;
 299       history_database_->FillVisitRow(statement, &row);
 300       base::Time last_visit = time_right_now;
 301       for (int64 i = row.visit_time.ToInternalValue(); i > 0; --i)
 302         last_visit -= day_delta;
 303       row.visit_time = last_visit;
 304       history_database_->UpdateVisitRow(row);
 305     }
 306     transaction.Commit();
 307   }
 308
 309   if (InitializeInMemoryURLIndexInSetUp())
 310     InitializeInMemoryURLIndex();
 311 }
 312
 313 void InMemoryURLIndexTest::TearDown() {
 314   // Ensure that the InMemoryURLIndex no longer observes HistoryService before
 315   // it is destroyed in order to prevent HistoryService calling dead observer.
 316   if (url_index_)
 317     url_index_->Shutdown();
 318 }
 319
 320 base::FilePath::StringType InMemoryURLIndexTest::TestDBName() const {
 321     return FILE_PATH_LITERAL("url_history_provider_test.db.txt");
 322 }
 323
 324 bool InMemoryURLIndexTest::InitializeInMemoryURLIndexInSetUp() const {
 325   return true;
 326 }
 327
 328 void InMemoryURLIndexTest::InitializeInMemoryURLIndex() {
 329   DCHECK(!url_index_);
 330
 331   SchemeSet client_schemes_to_whitelist;
 332   client_schemes_to_whitelist.insert(kClientWhitelistedScheme);
 333   url_index_.reset(new InMemoryURLIndex(
 334       nullptr, history_service_, content::BrowserThread::GetBlockingPool(),
 335       base::FilePath(), kTestLanguages, client_schemes_to_whitelist));
 336   url_index_->Init();
 337   url_index_->RebuildFromHistory(history_database_);
 338 }
 339
 340 void InMemoryURLIndexTest::CheckTerm(
 341     const URLIndexPrivateData::SearchTermCacheMap& cache,
 342     base::string16 term) const {
 343   URLIndexPrivateData::SearchTermCacheMap::const_iterator cache_iter(
 344       cache.find(term));
 345   ASSERT_TRUE(cache.end() != cache_iter)
 346       << "Cache does not contain '" << term << "' but should.";
 347   URLIndexPrivateData::SearchTermCacheItem cache_item = cache_iter->second;
 348   EXPECT_TRUE(cache_item.used_)
 349       << "Cache item '" << term << "' should be marked as being in use.";
 350 }
 351
 352 void InMemoryURLIndexTest::ExpectPrivateDataNotEmpty(
 353     const URLIndexPrivateData& data) {
 354   EXPECT_FALSE(data.word_list_.empty());
 355   // available_words_ will be empty since we have freshly built the
 356   // data set for these tests.
 357   EXPECT_TRUE(data.available_words_.empty());
 358   EXPECT_FALSE(data.word_map_.empty());
 359   EXPECT_FALSE(data.char_word_map_.empty());
 360   EXPECT_FALSE(data.word_id_history_map_.empty());
 361   EXPECT_FALSE(data.history_id_word_map_.empty());
 362   EXPECT_FALSE(data.history_info_map_.empty());
 363 }
 364
 365 void InMemoryURLIndexTest::ExpectPrivateDataEmpty(
 366     const URLIndexPrivateData& data) {
 367   EXPECT_TRUE(data.word_list_.empty());
 368   EXPECT_TRUE(data.available_words_.empty());
 369   EXPECT_TRUE(data.word_map_.empty());
 370   EXPECT_TRUE(data.char_word_map_.empty());
 371   EXPECT_TRUE(data.word_id_history_map_.empty());
 372   EXPECT_TRUE(data.history_id_word_map_.empty());
 373   EXPECT_TRUE(data.history_info_map_.empty());
 374 }
 375
 376 // Helper function which compares two maps for equivalence. The maps' values
 377 // are associative containers and their contents are compared as well.
 378 template<typename T>
 379 void ExpectMapOfContainersIdentical(const T& expected, const T& actual) {
 380   ASSERT_EQ(expected.size(), actual.size());
 381   for (typename T::const_iterator expected_iter = expected.begin();
 382        expected_iter != expected.end(); ++expected_iter) {
 383     typename T::const_iterator actual_iter = actual.find(expected_iter->first);
 384     ASSERT_TRUE(actual.end() != actual_iter);
 385     typename T::mapped_type const& expected_values(expected_iter->second);
 386     typename T::mapped_type const& actual_values(actual_iter->second);
 387     ASSERT_EQ(expected_values.size(), actual_values.size());
 388     for (typename T::mapped_type::const_iterator set_iter =
 389          expected_values.begin(); set_iter != expected_values.end(); ++set_iter)
 390       EXPECT_EQ(actual_values.count(*set_iter),
 391                 expected_values.count(*set_iter));
 392   }
 393 }
 394
 395 void InMemoryURLIndexTest::ExpectPrivateDataEqual(
 396     const URLIndexPrivateData& expected,
 397     const URLIndexPrivateData& actual) {
 398   EXPECT_EQ(expected.word_list_.size(), actual.word_list_.size());
 399   EXPECT_EQ(expected.word_map_.size(), actual.word_map_.size());
 400   EXPECT_EQ(expected.char_word_map_.size(), actual.char_word_map_.size());
 401   EXPECT_EQ(expected.word_id_history_map_.size(),
 402             actual.word_id_history_map_.size());
 403   EXPECT_EQ(expected.history_id_word_map_.size(),
 404             actual.history_id_word_map_.size());
 405   EXPECT_EQ(expected.history_info_map_.size(), actual.history_info_map_.size());
 406   EXPECT_EQ(expected.word_starts_map_.size(), actual.word_starts_map_.size());
 407   // WordList must be index-by-index equal.
 408   size_t count = expected.word_list_.size();
 409   for (size_t i = 0; i < count; ++i)
 410     EXPECT_EQ(expected.word_list_[i], actual.word_list_[i]);
 411
 412   ExpectMapOfContainersIdentical(expected.char_word_map_,
 413                                  actual.char_word_map_);
 414   ExpectMapOfContainersIdentical(expected.word_id_history_map_,
 415                                  actual.word_id_history_map_);
 416   ExpectMapOfContainersIdentical(expected.history_id_word_map_,
 417                                  actual.history_id_word_map_);
 418
 419   for (HistoryInfoMap::const_iterator expected_info =
 420       expected.history_info_map_.begin();
 421       expected_info != expected.history_info_map_.end(); ++expected_info) {
 422     HistoryInfoMap::const_iterator actual_info =
 423         actual.history_info_map_.find(expected_info->first);
 424     // NOTE(yfriedman): ASSERT_NE can't be used due to incompatibility between
 425     // gtest and STLPort in the Android build. See
 426     // http://code.google.com/p/googletest/issues/detail?id=359
 427     ASSERT_TRUE(actual_info != actual.history_info_map_.end());
 428     const history::URLRow& expected_row(expected_info->second.url_row);
 429     const history::URLRow& actual_row(actual_info->second.url_row);
 430     EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count());
 431     EXPECT_EQ(expected_row.typed_count(), actual_row.typed_count());
 432     EXPECT_EQ(expected_row.last_visit(), actual_row.last_visit());
 433     EXPECT_EQ(expected_row.url(), actual_row.url());
 434     const VisitInfoVector& expected_visits(expected_info->second.visits);
 435     const VisitInfoVector& actual_visits(actual_info->second.visits);
 436     EXPECT_EQ(expected_visits.size(), actual_visits.size());
 437     for (size_t i = 0;
 438          i < std::min(expected_visits.size(), actual_visits.size()); ++i) {
 439       EXPECT_EQ(expected_visits[i].first, actual_visits[i].first);
 440       EXPECT_EQ(expected_visits[i].second, actual_visits[i].second);
 441     }
 442   }
 443
 444   for (WordStartsMap::const_iterator expected_starts =
 445       expected.word_starts_map_.begin();
 446       expected_starts != expected.word_starts_map_.end();
 447       ++expected_starts) {
 448     WordStartsMap::const_iterator actual_starts =
 449         actual.word_starts_map_.find(expected_starts->first);
 450     // NOTE(yfriedman): ASSERT_NE can't be used due to incompatibility between
 451     // gtest and STLPort in the Android build. See
 452     // http://code.google.com/p/googletest/issues/detail?id=359
 453     ASSERT_TRUE(actual_starts != actual.word_starts_map_.end());
 454     const RowWordStarts& expected_word_starts(expected_starts->second);
 455     const RowWordStarts& actual_word_starts(actual_starts->second);
 456     EXPECT_EQ(expected_word_starts.url_word_starts_.size(),
 457               actual_word_starts.url_word_starts_.size());
 458     EXPECT_TRUE(std::equal(expected_word_starts.url_word_starts_.begin(),
 459                            expected_word_starts.url_word_starts_.end(),
 460                            actual_word_starts.url_word_starts_.begin()));
 461     EXPECT_EQ(expected_word_starts.title_word_starts_.size(),
 462               actual_word_starts.title_word_starts_.size());
 463     EXPECT_TRUE(std::equal(expected_word_starts.title_word_starts_.begin(),
 464                            expected_word_starts.title_word_starts_.end(),
 465                            actual_word_starts.title_word_starts_.begin()));
 466   }
 467 }
 468
 469 //------------------------------------------------------------------------------
 470
 471 class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest {
 472  protected:
 473   base::FilePath::StringType TestDBName() const override;
 474   bool InitializeInMemoryURLIndexInSetUp() const override;
 475 };
 476
 477 base::FilePath::StringType LimitedInMemoryURLIndexTest::TestDBName() const {
 478   return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
 479 }
 480
 481 bool LimitedInMemoryURLIndexTest::InitializeInMemoryURLIndexInSetUp() const {
 482   return false;
 483 }
 484
 485 TEST_F(LimitedInMemoryURLIndexTest, Initialization) {
 486   // Verify that the database contains the expected number of items, which
 487   // is the pre-filtered count, i.e. all of the items.
 488   sql::Statement statement(GetDB().GetUniqueStatement("SELECT * FROM urls;"));
 489   ASSERT_TRUE(statement.is_valid());
 490   uint64 row_count = 0;
 491   while (statement.Step()) ++row_count;
 492   EXPECT_EQ(1U, row_count);
 493
 494   InitializeInMemoryURLIndex();
 495   URLIndexPrivateData& private_data(*GetPrivateData());
 496
 497   // history_info_map_ should have the same number of items as were filtered.
 498   EXPECT_EQ(1U, private_data.history_info_map_.size());
 499   EXPECT_EQ(35U, private_data.char_word_map_.size());
 500   EXPECT_EQ(17U, private_data.word_map_.size());
 501 }
 502
 503 #if defined(OS_WIN)
 504 // Flaky on windows trybots: http://crbug.com/351500
 505 #define MAYBE_Retrieval DISABLED_Retrieval
 506 #else
 507 #define MAYBE_Retrieval Retrieval
 508 #endif
 509 TEST_F(InMemoryURLIndexTest, MAYBE_Retrieval) {
 510   // See if a very specific term gives a single result.
 511   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 512       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
 513   ASSERT_EQ(1U, matches.size());
 514
 515   // Verify that we got back the result we expected.
 516   EXPECT_EQ(5, matches[0].url_info.id());
 517   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
 518   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
 519   EXPECT_TRUE(matches[0].can_inline);
 520
 521   // Make sure a trailing space prevents inline-ability but still results
 522   // in the expected result.
 523   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("DrudgeReport "),
 524                                              base::string16::npos, kMaxMatches);
 525   ASSERT_EQ(1U, matches.size());
 526   EXPECT_EQ(5, matches[0].url_info.id());
 527   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
 528   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
 529   EXPECT_FALSE(matches[0].can_inline);
 530
 531   // Search which should result in multiple results.
 532   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("drudge"),
 533                                              base::string16::npos, kMaxMatches);
 534   ASSERT_EQ(2U, matches.size());
 535   // The results should be in descending score order.
 536   EXPECT_GE(matches[0].raw_score, matches[1].raw_score);
 537
 538   // Search which should result in nearly perfect result.
 539   matches = url_index_->HistoryItemsForTerms(
 540       ASCIIToUTF16("Nearly Perfect Result"), base::string16::npos, kMaxMatches);
 541   ASSERT_EQ(1U, matches.size());
 542   // The results should have a very high score.
 543   EXPECT_GT(matches[0].raw_score, 900);
 544   EXPECT_EQ(32, matches[0].url_info.id());
 545   EXPECT_EQ("https://nearlyperfectresult.com/",
 546             matches[0].url_info.url().spec());  // Note: URL gets lowercased.
 547   EXPECT_EQ(ASCIIToUTF16("Practically Perfect Search Result"),
 548             matches[0].url_info.title());
 549   EXPECT_FALSE(matches[0].can_inline);
 550
 551   // Search which should result in very poor result.
 552   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("qui c"),
 553                                              base::string16::npos, kMaxMatches);
 554   ASSERT_EQ(1U, matches.size());
 555   // The results should have a poor score.
 556   EXPECT_LT(matches[0].raw_score, 500);
 557   EXPECT_EQ(33, matches[0].url_info.id());
 558   EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
 559             matches[0].url_info.url().spec());  // Note: URL gets lowercased.
 560   EXPECT_EQ(ASCIIToUTF16("Practically Useless Search Result"),
 561             matches[0].url_info.title());
 562   EXPECT_FALSE(matches[0].can_inline);
 563
 564   // Search which will match at the end of an URL with encoded characters.
 565   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("Mice"),
 566                                              base::string16::npos, kMaxMatches);
 567   ASSERT_EQ(1U, matches.size());
 568   EXPECT_EQ(30, matches[0].url_info.id());
 569   EXPECT_FALSE(matches[0].can_inline);
 570
 571   // Check that URLs are not escaped an escape time.
 572   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("1% wikipedia"),
 573                                              base::string16::npos, kMaxMatches);
 574   ASSERT_EQ(1U, matches.size());
 575   EXPECT_EQ(35, matches[0].url_info.id());
 576   EXPECT_EQ("http://en.wikipedia.org/wiki/1%25_rule_(Internet_culture)",
 577             matches[0].url_info.url().spec());
 578
 579   // Verify that a single term can appear multiple times in the URL and as long
 580   // as one starts the URL it is still inlined.
 581   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("fubar"),
 582                                              base::string16::npos, kMaxMatches);
 583   ASSERT_EQ(1U, matches.size());
 584   EXPECT_EQ(34, matches[0].url_info.id());
 585   EXPECT_EQ("http://fubarfubarandfubar.com/", matches[0].url_info.url().spec());
 586   EXPECT_EQ(ASCIIToUTF16("Situation Normal -- FUBARED"),
 587             matches[0].url_info.title());
 588   EXPECT_TRUE(matches[0].can_inline);
 589 }
 590
 591 TEST_F(InMemoryURLIndexTest, CursorPositionRetrieval) {
 592   // See if a very specific term with no cursor gives an empty result.
 593   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 594       ASCIIToUTF16("DrudReport"), base::string16::npos, kMaxMatches);
 595   ASSERT_EQ(0U, matches.size());
 596
 597   // The same test with the cursor at the end should give an empty result.
 598   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("DrudReport"), 10u,
 599                                              kMaxMatches);
 600   ASSERT_EQ(0U, matches.size());
 601
 602   // If the cursor is between Drud and Report, we should find the desired
 603   // result.
 604   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("DrudReport"), 4u,
 605                                              kMaxMatches);
 606   ASSERT_EQ(1U, matches.size());
 607   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
 608   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
 609
 610   // Now check multi-word inputs.  No cursor should fail to find a
 611   // result on this input.
 612   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("MORTGAGERATE DROPS"),
 613                                              base::string16::npos, kMaxMatches);
 614   ASSERT_EQ(0U, matches.size());
 615
 616   // Ditto with cursor at end.
 617   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("MORTGAGERATE DROPS"),
 618                                              18u, kMaxMatches);
 619   ASSERT_EQ(0U, matches.size());
 620
 621   // If the cursor is between MORTAGE And RATE, we should find the
 622   // desired result.
 623   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("MORTGAGERATE DROPS"),
 624                                              8u, kMaxMatches);
 625   ASSERT_EQ(1U, matches.size());
 626   EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
 627             matches[0].url_info.url().spec());
 628   EXPECT_EQ(ASCIIToUTF16(
 629       "UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
 630             matches[0].url_info.title());
 631 }
 632
 633 TEST_F(InMemoryURLIndexTest, URLPrefixMatching) {
 634   // "drudgere" - found, can inline
 635   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 636       ASCIIToUTF16("drudgere"), base::string16::npos, kMaxMatches);
 637   ASSERT_EQ(1U, matches.size());
 638   EXPECT_TRUE(matches[0].can_inline);
 639
 640   // "drudgere" - found, can inline
 641   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("drudgere"),
 642                                              base::string16::npos, kMaxMatches);
 643   ASSERT_EQ(1U, matches.size());
 644   EXPECT_TRUE(matches[0].can_inline);
 645
 646   // "www.atdmt" - not found
 647   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("www.atdmt"),
 648                                              base::string16::npos, kMaxMatches);
 649   EXPECT_EQ(0U, matches.size());
 650
 651   // "atdmt" - found, cannot inline
 652   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("atdmt"),
 653                                              base::string16::npos, kMaxMatches);
 654   ASSERT_EQ(1U, matches.size());
 655   EXPECT_FALSE(matches[0].can_inline);
 656
 657   // "view.atdmt" - found, can inline
 658   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("view.atdmt"),
 659                                              base::string16::npos, kMaxMatches);
 660   ASSERT_EQ(1U, matches.size());
 661   EXPECT_TRUE(matches[0].can_inline);
 662
 663   // "view.atdmt" - found, can inline
 664   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("view.atdmt"),
 665                                              base::string16::npos, kMaxMatches);
 666   ASSERT_EQ(1U, matches.size());
 667   EXPECT_TRUE(matches[0].can_inline);
 668
 669   // "cnn.com" - found, can inline
 670   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("cnn.com"),
 671                                              base::string16::npos, kMaxMatches);
 672   ASSERT_EQ(2U, matches.size());
 673   // One match should be inline-able, the other not.
 674   EXPECT_TRUE(matches[0].can_inline != matches[1].can_inline);
 675
 676   // "www.cnn.com" - found, can inline
 677   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("www.cnn.com"),
 678                                              base::string16::npos, kMaxMatches);
 679   ASSERT_EQ(1U, matches.size());
 680   EXPECT_TRUE(matches[0].can_inline);
 681
 682   // "ww.cnn.com" - found because we allow mid-term matches in hostnames
 683   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ww.cnn.com"),
 684                                              base::string16::npos, kMaxMatches);
 685   ASSERT_EQ(1U, matches.size());
 686
 687   // "www.cnn.com" - found, can inline
 688   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("www.cnn.com"),
 689                                              base::string16::npos, kMaxMatches);
 690   ASSERT_EQ(1U, matches.size());
 691   EXPECT_TRUE(matches[0].can_inline);
 692
 693   // "tp://www.cnn.com" - not found because we don't allow tp as a mid-term
 694   // match
 695   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("tp://www.cnn.com"),
 696                                              base::string16::npos, kMaxMatches);
 697   ASSERT_EQ(0U, matches.size());
 698 }
 699
 700 TEST_F(InMemoryURLIndexTest, ProperStringMatching) {
 701   // Search for the following with the expected results:
 702   // "atdmt view" - found
 703   // "atdmt.view" - not found
 704   // "view.atdmt" - found
 705   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 706       ASCIIToUTF16("atdmt view"), base::string16::npos, kMaxMatches);
 707   ASSERT_EQ(1U, matches.size());
 708   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("atdmt.view"),
 709                                              base::string16::npos, kMaxMatches);
 710   ASSERT_EQ(0U, matches.size());
 711   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("view.atdmt"),
 712                                              base::string16::npos, kMaxMatches);
 713   ASSERT_EQ(1U, matches.size());
 714 }
 715
 716 TEST_F(InMemoryURLIndexTest, HugeResultSet) {
 717   // Create a huge set of qualifying history items.
 718   for (history::URLID row_id = 5000; row_id < 6000; ++row_id) {
 719     history::URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"),
 720                             row_id);
 721     new_row.set_last_visit(base::Time::Now());
 722     EXPECT_TRUE(UpdateURL(new_row));
 723   }
 724
 725   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 726       ASCIIToUTF16("b"), base::string16::npos, kMaxMatches);
 727   URLIndexPrivateData& private_data(*GetPrivateData());
 728   ASSERT_EQ(kMaxMatches, matches.size());
 729   // There are 7 matches already in the database.
 730   ASSERT_EQ(1008U, private_data.pre_filter_item_count_);
 731   ASSERT_EQ(500U, private_data.post_filter_item_count_);
 732   ASSERT_EQ(kMaxMatches, private_data.post_scoring_item_count_);
 733 }
 734
 735 #if defined(OS_WIN)
 736 // Flaky on windows trybots: http://crbug.com/351500
 737 #define MAYBE_TitleSearch DISABLED_TitleSearch
 738 #else
 739 #define MAYBE_TitleSearch TitleSearch
 740 #endif
 741 TEST_F(InMemoryURLIndexTest, MAYBE_TitleSearch) {
 742   // Signal if someone has changed the test DB.
 743   EXPECT_EQ(29U, GetPrivateData()->history_info_map_.size());
 744
 745   // Ensure title is being searched.
 746   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 747       ASCIIToUTF16("MORTGAGE RATE DROPS"), base::string16::npos, kMaxMatches);
 748   ASSERT_EQ(1U, matches.size());
 749
 750   // Verify that we got back the result we expected.
 751   EXPECT_EQ(1, matches[0].url_info.id());
 752   EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
 753             matches[0].url_info.url().spec());
 754   EXPECT_EQ(ASCIIToUTF16(
 755       "UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
 756       matches[0].url_info.title());
 757 }
 758
 759 TEST_F(InMemoryURLIndexTest, TitleChange) {
 760   // Verify current title terms retrieves desired item.
 761   base::string16 original_terms =
 762       ASCIIToUTF16("lebronomics could high taxes influence");
 763   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 764       original_terms, base::string16::npos, kMaxMatches);
 765   ASSERT_EQ(1U, matches.size());
 766
 767   // Verify that we got back the result we expected.
 768   const history::URLID expected_id = 3;
 769   EXPECT_EQ(expected_id, matches[0].url_info.id());
 770   EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx",
 771             matches[0].url_info.url().spec());
 772   EXPECT_EQ(ASCIIToUTF16(
 773       "LeBronomics: Could High Taxes Influence James' Team Decision?"),
 774       matches[0].url_info.title());
 775   history::URLRow old_row(matches[0].url_info);
 776
 777   // Verify new title terms retrieves nothing.
 778   base::string16 new_terms = ASCIIToUTF16("does eat oats little lambs ivy");
 779   matches = url_index_->HistoryItemsForTerms(new_terms, base::string16::npos,
 780                                              kMaxMatches);
 781   ASSERT_EQ(0U, matches.size());
 782
 783   // Update the row.
 784   old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy"));
 785   EXPECT_TRUE(UpdateURL(old_row));
 786
 787   // Verify we get the row using the new terms but not the original terms.
 788   matches = url_index_->HistoryItemsForTerms(new_terms, base::string16::npos,
 789                                              kMaxMatches);
 790   ASSERT_EQ(1U, matches.size());
 791   EXPECT_EQ(expected_id, matches[0].url_info.id());
 792   matches = url_index_->HistoryItemsForTerms(original_terms,
 793                                              base::string16::npos, kMaxMatches);
 794   ASSERT_EQ(0U, matches.size());
 795 }
 796
 797 TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) {
 798   // The presence of duplicate characters should succeed. Exercise by cycling
 799   // through a string with several duplicate characters.
 800   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 801       ASCIIToUTF16("ABRA"), base::string16::npos, kMaxMatches);
 802   ASSERT_EQ(1U, matches.size());
 803   EXPECT_EQ(28, matches[0].url_info.id());
 804   EXPECT_EQ("http://www.ddj.com/windows/184416623",
 805             matches[0].url_info.url().spec());
 806
 807   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ABRACAD"),
 808                                              base::string16::npos, kMaxMatches);
 809   ASSERT_EQ(1U, matches.size());
 810   EXPECT_EQ(28, matches[0].url_info.id());
 811
 812   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ABRACADABRA"),
 813                                              base::string16::npos, kMaxMatches);
 814   ASSERT_EQ(1U, matches.size());
 815   EXPECT_EQ(28, matches[0].url_info.id());
 816
 817   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ABRACADABR"),
 818                                              base::string16::npos, kMaxMatches);
 819   ASSERT_EQ(1U, matches.size());
 820   EXPECT_EQ(28, matches[0].url_info.id());
 821
 822   matches = url_index_->HistoryItemsForTerms(ASCIIToUTF16("ABRACA"),
 823                                              base::string16::npos, kMaxMatches);
 824   ASSERT_EQ(1U, matches.size());
 825   EXPECT_EQ(28, matches[0].url_info.id());
 826 }
 827
 828 TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
 829   // Verify that match results for previously typed characters are retained
 830   // (in the term_char_word_set_cache_) and reused, if possible, in future
 831   // autocompletes.
 832
 833   URLIndexPrivateData::SearchTermCacheMap& cache(
 834       GetPrivateData()->search_term_cache_);
 835
 836   // The cache should be empty at this point.
 837   EXPECT_EQ(0U, cache.size());
 838
 839   // Now simulate typing search terms into the omnibox and check the state of
 840   // the cache as each item is 'typed'.
 841
 842   // Simulate typing "r" giving "r" in the simulated omnibox. The results for
 843   // 'r' will be not cached because it is only 1 character long.
 844   url_index_->HistoryItemsForTerms(ASCIIToUTF16("r"), base::string16::npos,
 845                                    kMaxMatches);
 846   EXPECT_EQ(0U, cache.size());
 847
 848   // Simulate typing "re" giving "r re" in the simulated omnibox.
 849   // 're' should be cached at this point but not 'r' as it is a single
 850   // character.
 851   url_index_->HistoryItemsForTerms(ASCIIToUTF16("r re"), base::string16::npos,
 852                                    kMaxMatches);
 853   ASSERT_EQ(1U, cache.size());
 854   CheckTerm(cache, ASCIIToUTF16("re"));
 855
 856   // Simulate typing "reco" giving "r re reco" in the simulated omnibox.
 857   // 're' and 'reco' should be cached at this point but not 'r' as it is a
 858   // single character.
 859   url_index_->HistoryItemsForTerms(ASCIIToUTF16("r re reco"),
 860                                    base::string16::npos, kMaxMatches);
 861   ASSERT_EQ(2U, cache.size());
 862   CheckTerm(cache, ASCIIToUTF16("re"));
 863   CheckTerm(cache, ASCIIToUTF16("reco"));
 864
 865   // Simulate typing "mort".
 866   // Since we now have only one search term, the cached results for 're' and
 867   // 'reco' should be purged, giving us only 1 item in the cache (for 'mort').
 868   url_index_->HistoryItemsForTerms(ASCIIToUTF16("mort"), base::string16::npos,
 869                                    kMaxMatches);
 870   ASSERT_EQ(1U, cache.size());
 871   CheckTerm(cache, ASCIIToUTF16("mort"));
 872
 873   // Simulate typing "reco" giving "mort reco" in the simulated omnibox.
 874   url_index_->HistoryItemsForTerms(ASCIIToUTF16("mort reco"),
 875                                    base::string16::npos, kMaxMatches);
 876   ASSERT_EQ(2U, cache.size());
 877   CheckTerm(cache, ASCIIToUTF16("mort"));
 878   CheckTerm(cache, ASCIIToUTF16("reco"));
 879
 880   // Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.
 881   url_index_->HistoryItemsForTerms(ASCIIToUTF16("mort rec"),
 882                                    base::string16::npos, kMaxMatches);
 883   ASSERT_EQ(2U, cache.size());
 884   CheckTerm(cache, ASCIIToUTF16("mort"));
 885   CheckTerm(cache, ASCIIToUTF16("rec"));
 886 }
 887
 888 TEST_F(InMemoryURLIndexTest, AddNewRows) {
 889   // Verify that the row we're going to add does not already exist.
 890   history::URLID new_row_id = 87654321;
 891   // Newly created history::URLRows get a last_visit time of 'right now' so it
 892   // should
 893   // qualify as a quick result candidate.
 894   EXPECT_TRUE(url_index_->HistoryItemsForTerms(ASCIIToUTF16("brokeandalone"),
 895                                                base::string16::npos,
 896                                                kMaxMatches).empty());
 897
 898   // Add a new row.
 899   history::URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"),
 900                           new_row_id++);
 901   new_row.set_last_visit(base::Time::Now());
 902   EXPECT_TRUE(UpdateURL(new_row));
 903
 904   // Verify that we can retrieve it.
 905   EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(ASCIIToUTF16("brokeandalone"),
 906                                                  base::string16::npos,
 907                                                  kMaxMatches).size());
 908
 909   // Add it again just to be sure that is harmless and that it does not update
 910   // the index.
 911   EXPECT_FALSE(UpdateURL(new_row));
 912   EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(ASCIIToUTF16("brokeandalone"),
 913                                                  base::string16::npos,
 914                                                  kMaxMatches).size());
 915
 916   // Make up an URL that does not qualify and try to add it.
 917   history::URLRow unqualified_row(
 918       GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id++);
 919   EXPECT_FALSE(UpdateURL(new_row));
 920 }
 921
 922 TEST_F(InMemoryURLIndexTest, DeleteRows) {
 923   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 924       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
 925   ASSERT_EQ(1U, matches.size());
 926
 927   // Delete the URL then search again.
 928   EXPECT_TRUE(DeleteURL(matches[0].url_info.url()));
 929   EXPECT_TRUE(url_index_->HistoryItemsForTerms(ASCIIToUTF16("DrudgeReport"),
 930                                                base::string16::npos,
 931                                                kMaxMatches).empty());
 932
 933   // Make up an URL that does not exist in the database and delete it.
 934   GURL url("http://www.hokeypokey.com/putyourrightfootin.html");
 935   EXPECT_FALSE(DeleteURL(url));
 936 }
 937
 938 TEST_F(InMemoryURLIndexTest, ExpireRow) {
 939   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
 940       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
 941   ASSERT_EQ(1U, matches.size());
 942
 943   // Determine the row id for the result, remember that id, broadcast a
 944   // delete notification, then ensure that the row has been deleted.
 945   history::URLRows deleted_rows;
 946   deleted_rows.push_back(matches[0].url_info);
 947   url_index_->OnURLsDeleted(nullptr, false, false, deleted_rows,
 948                             std::set<GURL>());
 949   EXPECT_TRUE(url_index_->HistoryItemsForTerms(ASCIIToUTF16("DrudgeReport"),
 950                                                base::string16::npos,
 951                                                kMaxMatches).empty());
 952 }
 953
 954 TEST_F(InMemoryURLIndexTest, WhitelistedURLs) {
 955   std::string client_whitelisted_url =
 956       base::StringPrintf("%s://foo", kClientWhitelistedScheme);
 957   struct TestData {
 958     const std::string url_spec;
 959     const bool expected_is_whitelisted;
 960   } data[] = {
 961     // URLs with whitelisted schemes.
 962     { "about:histograms", true },
 963     { "file://localhost/Users/joeschmoe/sekrets", true },
 964     { "ftp://public.mycompany.com/myfile.txt", true },
 965     { "http://www.google.com/translate", true },
 966     { "https://www.gmail.com/", true },
 967     { "mailto:support@google.com", true },
 968     { client_whitelisted_url, true },
 969     // URLs with unacceptable schemes.
 970     { "aaa://www.dummyhost.com;frammy", false },
 971     { "aaas://www.dummyhost.com;frammy", false },
 972     { "acap://suzie@somebody.com", false },
 973     { "cap://cal.example.com/Company/Holidays", false },
 974     { "cid:foo4*foo1@bar.net", false },
 975     { "crid://example.com/foobar", false },
 976     { "data:image/png;base64,iVBORw0KGgoAAAANSUhE=", false },
 977     { "dict://dict.org/d:shortcake:", false },
 978     { "dns://192.168.1.1/ftp.example.org?type=A", false },
 979     { "fax:+358.555.1234567", false },
 980     { "geo:13.4125,103.8667", false },
 981     { "go:Mercedes%20Benz", false },
 982     { "gopher://farnsworth.ca:666/gopher", false },
 983     { "h323:farmer-john;sixpence", false },
 984     { "iax:johnQ@example.com/12022561414", false },
 985     { "icap://icap.net/service?mode=translate&lang=french", false },
 986     { "im:fred@example.com", false },
 987     { "imap://michael@minbari.org/users.*", false },
 988     { "info:ddc/22/eng//004.678", false },
 989     { "ipp://example.com/printer/fox", false },
 990     { "iris:dreg1//example.com/local/myhosts", false },
 991     { "iris.beep:dreg1//example.com/local/myhosts", false },
 992     { "iris.lws:dreg1//example.com/local/myhosts", false },
 993     { "iris.xpc:dreg1//example.com/local/myhosts", false },
 994     { "iris.xpcs:dreg1//example.com/local/myhosts", false },
 995     { "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US", false },
 996     { "mid:foo4%25foo1@bar.net", false },
 997     { "modem:+3585551234567;type=v32b?7e1;type=v110", false },
 998     { "msrp://atlanta.example.com:7654/jshA7weztas;tcp", false },
 999     { "msrps://atlanta.example.com:7654/jshA7weztas;tcp", false },
1000     { "news:colorectal.info.banned", false },
1001     { "nfs://server/d/e/f", false },
1002     { "nntp://www.example.com:6543/info.comp.lies/1234", false },
1003     { "pop://rg;AUTH=+APOP@mail.mycompany.com:8110", false },
1004     { "pres:fred@example.com", false },
1005     { "prospero://host.dom//pros/name", false },
1006     { "rsync://syler@lost.com/Source", false },
1007     { "rtsp://media.example.com:554/twister/audiotrack", false },
1008     { "service:acap://some.where.net;authentication=KERBEROSV4", false },
1009     { "shttp://www.terces.com/secret", false },
1010     { "sieve://example.com//script", false },
1011     { "sip:+1-212-555-1212:1234@gateway.com;user=phone", false },
1012     { "sips:+1-212-555-1212:1234@gateway.com;user=phone", false },
1013     { "sms:+15105551212?body=hello%20there", false },
1014     { "snmp://tester5@example.com:8161/bridge1;800002b804616263", false },
1015     { "soap.beep://stockquoteserver.example.com/StockQuote", false },
1016     { "soap.beeps://stockquoteserver.example.com/StockQuote", false },
1017     { "tag:blogger.com,1999:blog-555", false },
1018     { "tel:+358-555-1234567;postd=pp22", false },
1019     { "telnet://mayor_margie:one2rule4All@www.mycity.com:6789/", false },
1020     { "tftp://example.com/mystartupfile", false },
1021     { "tip://123.123.123.123/?urn:xopen:xid", false },
1022     { "tv:nbc.com", false },
1023     { "urn:foo:A123,456", false },
1024     { "vemmi://zeus.mctel.fr/demo", false },
1025     { "wais://www.mydomain.net:8765/mydatabase", false },
1026     { "xmpp:node@example.com", false },
1027     { "xmpp://guest@example.com", false },
1028   };
1029
1030   const SchemeSet& whitelist(scheme_whitelist());
1031   for (size_t i = 0; i < arraysize(data); ++i) {
1032     GURL url(data[i].url_spec);
1033     EXPECT_EQ(data[i].expected_is_whitelisted,
1034               URLIndexPrivateData::URLSchemeIsWhitelisted(url, whitelist));
1035   }
1036 }
1037
1038 TEST_F(InMemoryURLIndexTest, ReadVisitsFromHistory) {
1039   const HistoryInfoMap& history_info_map = GetPrivateData()->history_info_map_;
1040
1041   // Check (for URL with id 1) that the number of visits and their
1042   // transition types are what we expect.  We don't bother checking
1043   // the timestamps because it's too much trouble.  (The timestamps go
1044   // through a transformation in InMemoryURLIndexTest::SetUp().  We
1045   // assume that if the count and transitions show up with the right
1046   // information, we're getting the right information from the history
1047   // database file.)
1048   HistoryInfoMap::const_iterator entry = history_info_map.find(1);
1049   ASSERT_TRUE(entry != history_info_map.end());
1050   {
1051     const VisitInfoVector& visits = entry->second.visits;
1052     EXPECT_EQ(3u, visits.size());
1053     EXPECT_EQ(0u, visits[0].second);
1054     EXPECT_EQ(1u, visits[1].second);
1055     EXPECT_EQ(0u, visits[2].second);
1056   }
1057
1058   // Ditto but for URL with id 35.
1059   entry = history_info_map.find(35);
1060   ASSERT_TRUE(entry != history_info_map.end());
1061   {
1062     const VisitInfoVector& visits = entry->second.visits;
1063     EXPECT_EQ(2u, visits.size());
1064     EXPECT_EQ(1u, visits[0].second);
1065     EXPECT_EQ(1u, visits[1].second);
1066   }
1067
1068   // The URL with id 32 has many visits listed in the database, but we
1069   // should only read the most recent 10 (which are all transition type 0).
1070   entry = history_info_map.find(32);
1071   ASSERT_TRUE(entry != history_info_map.end());
1072   {
1073     const VisitInfoVector& visits = entry->second.visits;
1074     EXPECT_EQ(10u, visits.size());
1075     for (size_t i = 0; i < visits.size(); ++i)
1076       EXPECT_EQ(0u, visits[i].second);
1077   }
1078 }
1079
1080 TEST_F(InMemoryURLIndexTest, CacheSaveRestore) {
1081   base::ScopedTempDir temp_directory;
1082   ASSERT_TRUE(temp_directory.CreateUniqueTempDir());
1083   set_history_dir(temp_directory.path());
1084
1085   URLIndexPrivateData& private_data(*GetPrivateData());
1086
1087   // Ensure that there is really something there to be saved.
1088   EXPECT_FALSE(private_data.word_list_.empty());
1089   // available_words_ will already be empty since we have freshly built the
1090   // data set for this test.
1091   EXPECT_TRUE(private_data.available_words_.empty());
1092   EXPECT_FALSE(private_data.word_map_.empty());
1093   EXPECT_FALSE(private_data.char_word_map_.empty());
1094   EXPECT_FALSE(private_data.word_id_history_map_.empty());
1095   EXPECT_FALSE(private_data.history_id_word_map_.empty());
1096   EXPECT_FALSE(private_data.history_info_map_.empty());
1097   EXPECT_FALSE(private_data.word_starts_map_.empty());
1098
1099   // Make sure the data we have was built from history.  (Version 0
1100   // means rebuilt from history.)
1101   EXPECT_EQ(0, private_data.restored_cache_version_);
1102
1103   // Capture the current private data for later comparison to restored data.
1104   scoped_refptr<URLIndexPrivateData> old_data(private_data.Duplicate());
1105   const base::Time rebuild_time = private_data.last_time_rebuilt_from_history_;
1106
1107   {
1108     // Save then restore our private data.
1109     base::RunLoop run_loop;
1110     CacheFileSaverObserver save_observer(run_loop.QuitClosure());
1111     url_index_->set_save_cache_observer(&save_observer);
1112     PostSaveToCacheFileTask();
1113     run_loop.Run();
1114     EXPECT_TRUE(save_observer.succeeded());
1115   }
1116
1117   // Clear and then prove it's clear before restoring.
1118   ClearPrivateData();
1119   EXPECT_TRUE(private_data.word_list_.empty());
1120   EXPECT_TRUE(private_data.available_words_.empty());
1121   EXPECT_TRUE(private_data.word_map_.empty());
1122   EXPECT_TRUE(private_data.char_word_map_.empty());
1123   EXPECT_TRUE(private_data.word_id_history_map_.empty());
1124   EXPECT_TRUE(private_data.history_id_word_map_.empty());
1125   EXPECT_TRUE(private_data.history_info_map_.empty());
1126   EXPECT_TRUE(private_data.word_starts_map_.empty());
1127
1128   {
1129     base::RunLoop run_loop;
1130     HistoryIndexRestoreObserver restore_observer(run_loop.QuitClosure());
1131     url_index_->set_restore_cache_observer(&restore_observer);
1132     PostRestoreFromCacheFileTask();
1133     run_loop.Run();
1134     EXPECT_TRUE(restore_observer.succeeded());
1135   }
1136
1137   URLIndexPrivateData& new_data(*GetPrivateData());
1138
1139   // Make sure the data we have was reloaded from cache.  (Version 0
1140   // means rebuilt from history; anything else means restored from
1141   // a cache version.)  Also, the rebuild time should not have changed.
1142   EXPECT_GT(new_data.restored_cache_version_, 0);
1143   EXPECT_EQ(rebuild_time, new_data.last_time_rebuilt_from_history_);
1144
1145   // Compare the captured and restored for equality.
1146   ExpectPrivateDataEqual(*old_data.get(), new_data);
1147 }
1148
1149 #if defined(OS_WIN)
1150 // http://crbug.com/351500
1151 #define MAYBE_RebuildFromHistoryIfCacheOld DISABLED_RebuildFromHistoryIfCacheOld
1152 #else
1153 #define MAYBE_RebuildFromHistoryIfCacheOld RebuildFromHistoryIfCacheOld
1154 #endif
1155 TEST_F(InMemoryURLIndexTest, MAYBE_RebuildFromHistoryIfCacheOld) {
1156   base::ScopedTempDir temp_directory;
1157   ASSERT_TRUE(temp_directory.CreateUniqueTempDir());
1158   set_history_dir(temp_directory.path());
1159
1160   URLIndexPrivateData& private_data(*GetPrivateData());
1161
1162   // Ensure that there is really something there to be saved.
1163   EXPECT_FALSE(private_data.word_list_.empty());
1164   // available_words_ will already be empty since we have freshly built the
1165   // data set for this test.
1166   EXPECT_TRUE(private_data.available_words_.empty());
1167   EXPECT_FALSE(private_data.word_map_.empty());
1168   EXPECT_FALSE(private_data.char_word_map_.empty());
1169   EXPECT_FALSE(private_data.word_id_history_map_.empty());
1170   EXPECT_FALSE(private_data.history_id_word_map_.empty());
1171   EXPECT_FALSE(private_data.history_info_map_.empty());
1172   EXPECT_FALSE(private_data.word_starts_map_.empty());
1173
1174   // Make sure the data we have was built from history.  (Version 0
1175   // means rebuilt from history.)
1176   EXPECT_EQ(0, private_data.restored_cache_version_);
1177
1178   // Overwrite the build time so that we'll think the data is too old
1179   // and rebuild the cache from history.
1180   const base::Time fake_rebuild_time =
1181       private_data.last_time_rebuilt_from_history_ -
1182       base::TimeDelta::FromDays(30);
1183   private_data.last_time_rebuilt_from_history_ = fake_rebuild_time;
1184
1185   // Capture the current private data for later comparison to restored data.
1186   scoped_refptr<URLIndexPrivateData> old_data(private_data.Duplicate());
1187
1188   {
1189     // Save then restore our private data.
1190     base::RunLoop run_loop;
1191     CacheFileSaverObserver save_observer(run_loop.QuitClosure());
1192     url_index_->set_save_cache_observer(&save_observer);
1193     PostSaveToCacheFileTask();
1194     run_loop.Run();
1195     EXPECT_TRUE(save_observer.succeeded());
1196   }
1197
1198   // Clear and then prove it's clear before restoring.
1199   ClearPrivateData();
1200   EXPECT_TRUE(private_data.word_list_.empty());
1201   EXPECT_TRUE(private_data.available_words_.empty());
1202   EXPECT_TRUE(private_data.word_map_.empty());
1203   EXPECT_TRUE(private_data.char_word_map_.empty());
1204   EXPECT_TRUE(private_data.word_id_history_map_.empty());
1205   EXPECT_TRUE(private_data.history_id_word_map_.empty());
1206   EXPECT_TRUE(private_data.history_info_map_.empty());
1207   EXPECT_TRUE(private_data.word_starts_map_.empty());
1208
1209   {
1210     base::RunLoop run_loop;
1211     HistoryIndexRestoreObserver restore_observer(run_loop.QuitClosure());
1212     url_index_->set_restore_cache_observer(&restore_observer);
1213     PostRestoreFromCacheFileTask();
1214     run_loop.Run();
1215     EXPECT_TRUE(restore_observer.succeeded());
1216   }
1217
1218   URLIndexPrivateData& new_data(*GetPrivateData());
1219
1220   // Make sure the data we have was rebuilt from history.  (Version 0
1221   // means rebuilt from history; anything else means restored from
1222   // a cache version.)
1223   EXPECT_EQ(0, new_data.restored_cache_version_);
1224   EXPECT_NE(fake_rebuild_time, new_data.last_time_rebuilt_from_history_);
1225
1226   // Compare the captured and restored for equality.
1227   ExpectPrivateDataEqual(*old_data.get(), new_data);
1228 }
1229
1230 TEST_F(InMemoryURLIndexTest, AddHistoryMatch) {
1231   const struct {
1232     const char* search_string;
1233     size_t cursor_position;
1234     const size_t expected_word_starts_offsets_size;
1235     const size_t expected_word_starts_offsets[3];
1236   } test_cases[] = {
1237     /* No punctuations, only cursor position change. */
1238     { "ABCD", kInvalid, 1, {0, kInvalid, kInvalid} },
1239     { "abcd", 0,        1, {0, kInvalid, kInvalid} },
1240     { "AbcD", 1,        2, {0, 0, kInvalid} },
1241     { "abcd", 4,        1, {0, kInvalid, kInvalid} },
1242
1243     /* Starting with punctuation. */
1244     { ".abcd",  kInvalid, 1, {1, kInvalid, kInvalid} },
1245     { ".abcd",  0,        1, {1, kInvalid, kInvalid} },
1246     { "!abcd",  1,        2, {1, 0, kInvalid} },
1247     { "::abcd", 1,        2, {1, 1, kInvalid} },
1248     { ":abcd",  5,        1, {1, kInvalid, kInvalid} },
1249
1250     /* Ending with punctuation. */
1251     { "abcd://", kInvalid, 1, {0, kInvalid, kInvalid} },
1252     { "ABCD://", 0,        1, {0, kInvalid, kInvalid} },
1253     { "abcd://", 1,        2, {0, 0, kInvalid} },
1254     { "abcd://", 4,        2, {0, 3, kInvalid} },
1255     { "abcd://", 7,        1, {0, kInvalid, kInvalid} },
1256
1257     /* Punctuation in the middle. */
1258     { "ab.cd", kInvalid, 1, {0, kInvalid, kInvalid} },
1259     { "ab.cd", 0,        1, {0, kInvalid, kInvalid} },
1260     { "ab!cd", 1,        2, {0, 0, kInvalid} },
1261     { "AB.cd", 2,        2, {0, 1, kInvalid} },
1262     { "AB.cd", 3,        2, {0, 0, kInvalid} },
1263     { "ab:cd", 5,        1, {0, kInvalid, kInvalid} },
1264
1265     /* Hyphenation */
1266     { "Ab-cd", kInvalid, 1, {0, kInvalid, kInvalid} },
1267     { "ab-cd", 0,        1, {0, kInvalid, kInvalid} },
1268     { "-abcd", 0,        1, {1, kInvalid, kInvalid} },
1269     { "-abcd", 1,        2, {1, 0, kInvalid} },
1270     { "abcd-", 2,        2, {0, 0, kInvalid} },
1271     { "abcd-", 4,        2, {0, 1, kInvalid} },
1272     { "ab-cd", 5,        1, {0, kInvalid, kInvalid} },
1273
1274     /* Whitespace */
1275     { "Ab cd",  kInvalid, 2, {0, 0, kInvalid} },
1276     { "ab cd",  0,        2, {0, 0, kInvalid} },
1277     { " abcd",  0,        1, {0, kInvalid, kInvalid} },
1278     { " abcd",  1,        1, {0, kInvalid, kInvalid} },
1279     { "abcd ",  2,        2, {0, 0, kInvalid} },
1280     { "abcd :", 4,        2, {0, 1, kInvalid} },
1281     { "abcd :", 5,        2, {0, 1, kInvalid} },
1282     { "abcd :", 2,        3, {0, 0, 1} }
1283   };
1284
1285   for (size_t i = 0; i < arraysize(test_cases); ++i) {
1286     SCOPED_TRACE(testing::Message()
1287                  << "search_string = " << test_cases[i].search_string
1288                  << ", cursor_position = " << test_cases[i].cursor_position);
1289
1290     base::string16 lower_string;
1291     String16Vector lower_terms;
1292     StringToTerms(test_cases[i].search_string, test_cases[i].cursor_position,
1293                   &lower_string, &lower_terms);
1294     URLIndexPrivateData::AddHistoryMatch match(nullptr, *GetPrivateData(),
1295                                                kTestLanguages, lower_string,
1296                                                lower_terms, base::Time::Now());
1297
1298     // Verify against expectations.
1299     EXPECT_EQ(test_cases[i].expected_word_starts_offsets_size,
1300               match.lower_terms_to_word_starts_offsets_.size());
1301     for (size_t j = 0; j < test_cases[i].expected_word_starts_offsets_size;
1302          ++j) {
1303       EXPECT_EQ(test_cases[i].expected_word_starts_offsets[j],
1304                 match.lower_terms_to_word_starts_offsets_[j]);
1305     }
1306   }
1307 }
1308
1309 class InMemoryURLIndexCacheTest : public testing::Test {
1310  public:
1311   InMemoryURLIndexCacheTest() {}
1312
1313  protected:
1314   void SetUp() override;
1315   void TearDown() override;
1316
1317   // Pass-through functions to simplify our friendship with InMemoryURLIndex.
1318   void set_history_dir(const base::FilePath& dir_path);
1319   bool GetCacheFilePath(base::FilePath* file_path) const;
1320
1321   content::TestBrowserThreadBundle thread_bundle_;
1322   base::ScopedTempDir temp_dir_;
1323   scoped_ptr<InMemoryURLIndex> url_index_;
1324 };
1325
1326 void InMemoryURLIndexCacheTest::SetUp() {
1327   ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
1328   base::FilePath path(temp_dir_.path());
1329   url_index_.reset(new InMemoryURLIndex(
1330       nullptr, nullptr, content::BrowserThread::GetBlockingPool(), path,
1331       kTestLanguages, SchemeSet()));
1332 }
1333
1334 void InMemoryURLIndexCacheTest::TearDown() {
1335   if (url_index_)
1336     url_index_->Shutdown();
1337 }
1338
1339 void InMemoryURLIndexCacheTest::set_history_dir(
1340     const base::FilePath& dir_path) {
1341   return url_index_->set_history_dir(dir_path);
1342 }
1343
1344 bool InMemoryURLIndexCacheTest::GetCacheFilePath(
1345     base::FilePath* file_path) const {
1346   DCHECK(file_path);
1347   return url_index_->GetCacheFilePath(file_path);
1348 }
1349
1350 TEST_F(InMemoryURLIndexCacheTest, CacheFilePath) {
1351   base::FilePath expectedPath =
1352       temp_dir_.path().Append(FILE_PATH_LITERAL("History Provider Cache"));
1353   std::vector<base::FilePath::StringType> expected_parts;
1354   expectedPath.GetComponents(&expected_parts);
1355   base::FilePath full_file_path;
1356   ASSERT_TRUE(GetCacheFilePath(&full_file_path));
1357   std::vector<base::FilePath::StringType> actual_parts;
1358   full_file_path.GetComponents(&actual_parts);
1359   ASSERT_EQ(expected_parts.size(), actual_parts.size());
1360   size_t count = expected_parts.size();
1361   for (size_t i = 0; i < count; ++i)
1362     EXPECT_EQ(expected_parts[i], actual_parts[i]);
1363   // Must clear the history_dir_ to satisfy the dtor's DCHECK.
1364   set_history_dir(base::FilePath());
1365 }