Componentize HistoryURLProvider/ScoredHistoryMatch.
[chromium-blink-merge.git] / chrome / browser / autocomplete / in_memory_url_index.h
blobc881a1ff132ca4566b059cb4a5dd5d8f539d6a79
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_IN_MEMORY_URL_INDEX_H_
6 #define CHROME_BROWSER_AUTOCOMPLETE_IN_MEMORY_URL_INDEX_H_
8 #include <functional>
9 #include <map>
10 #include <set>
11 #include <string>
12 #include <vector>
14 #include "base/basictypes.h"
15 #include "base/files/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/strings/string16.h"
20 #include "base/task/cancelable_task_tracker.h"
21 #include "components/history/core/browser/history_db_task.h"
22 #include "components/history/core/browser/history_service_observer.h"
23 #include "components/history/core/browser/history_types.h"
24 #include "components/keyed_service/core/keyed_service.h"
25 #include "components/omnibox/scored_history_match.h"
27 class HistoryQuickProviderTest;
29 namespace base {
30 class SequencedTaskRunner;
31 class Time;
34 namespace bookmarks {
35 class BookmarkModel;
38 namespace in_memory_url_index {
39 class InMemoryURLIndexCacheItem;
42 namespace history {
43 class HistoryDatabase;
44 class HistoryService;
47 class URLIndexPrivateData;
49 // The URL history source.
50 // Holds portions of the URL database in memory in an indexed form. Used to
51 // quickly look up matching URLs for a given query string. Used by
52 // the HistoryURLProvider for inline autocomplete and to provide URL
53 // matches to the omnibox.
55 // Note about multi-byte codepoints and the data structures in the
56 // InMemoryURLIndex class: One will quickly notice that no effort is made to
57 // insure that multi-byte character boundaries are detected when indexing the
58 // words and characters in the URL history database except when converting
59 // URL strings to lowercase. Multi-byte-edness makes no difference when
60 // indexing or when searching the index as the final filtering of results
61 // is dependent on the comparison of a string of bytes, not individual
62 // characters. While the lookup of those bytes during a search in the
63 // |char_word_map_| could serve up words in which the individual char16
64 // occurs as a portion of a composite character the next filtering step
65 // will eliminate such words except in the case where a single character
66 // is being searched on and which character occurs as the second char16 of a
67 // multi-char16 instance.
68 class InMemoryURLIndex : public KeyedService,
69 public history::HistoryServiceObserver,
70 public base::SupportsWeakPtr<InMemoryURLIndex> {
71 public:
72 // Defines an abstract class which is notified upon completion of restoring
73 // the index's private data either by reading from the cache file or by
74 // rebuilding from the history database.
75 class RestoreCacheObserver {
76 public:
77 virtual ~RestoreCacheObserver();
79 // Callback that lets the observer know that the restore operation has
80 // completed. |succeeded| indicates if the restore was successful. This is
81 // called on the UI thread.
82 virtual void OnCacheRestoreFinished(bool succeeded) = 0;
85 // Defines an abstract class which is notified upon completion of saving
86 // the index's private data to the cache file.
87 class SaveCacheObserver {
88 public:
89 virtual ~SaveCacheObserver();
91 // Callback that lets the observer know that the save succeeded.
92 // This is called on the UI thread.
93 virtual void OnCacheSaveFinished(bool succeeded) = 0;
96 // |history_service| which may be null during unit testing is used to register
97 // |as an HistoryServiceObserver. |history_dir| is a path to the directory
98 // containing the history database within the profile wherein the cache and
99 // transaction journals will be stored. |languages| gives a list of language
100 // encodings by which URLs and omnibox searches are broken down into words and
101 // characters.
102 InMemoryURLIndex(bookmarks::BookmarkModel* bookmark_model,
103 history::HistoryService* history_service,
104 const base::FilePath& history_dir,
105 const std::string& languages);
106 ~InMemoryURLIndex() override;
108 // Opens and prepares the index of historical URL visits. If the index private
109 // data cannot be restored from its cache file then it is rebuilt from the
110 // history database.
111 void Init();
113 // Scans the history index and returns a vector with all scored, matching
114 // history items. This entry point simply forwards the call on to the
115 // URLIndexPrivateData class. For a complete description of this function
116 // refer to that class. If |cursor_position| is base::string16::npos, the
117 // function doesn't do anything special with the cursor; this is equivalent
118 // to the cursor being at the end. In total, |max_matches| of items will be
119 // returned in the |ScoredHistoryMatches| vector.
120 ScoredHistoryMatches HistoryItemsForTerms(const base::string16& term_string,
121 size_t cursor_position,
122 size_t max_matches);
124 // Deletes the index entry, if any, for the given |url|.
125 void DeleteURL(const GURL& url);
127 // Sets the optional observers for completion of restoral and saving of the
128 // index's private data.
129 void set_restore_cache_observer(
130 RestoreCacheObserver* restore_cache_observer) {
131 restore_cache_observer_ = restore_cache_observer;
133 void set_save_cache_observer(SaveCacheObserver* save_cache_observer) {
134 save_cache_observer_ = save_cache_observer;
137 // Indicates that the index restoration is complete.
138 bool restored() const {
139 return restored_;
142 private:
143 friend class ::HistoryQuickProviderTest;
144 friend class InMemoryURLIndexTest;
145 friend class InMemoryURLIndexCacheTest;
146 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, ExpireRow);
147 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
149 // HistoryDBTask used to rebuild our private data from the history database.
150 class RebuildPrivateDataFromHistoryDBTask : public history::HistoryDBTask {
151 public:
152 explicit RebuildPrivateDataFromHistoryDBTask(
153 InMemoryURLIndex* index,
154 const std::string& languages,
155 const std::set<std::string>& scheme_whitelist);
157 bool RunOnDBThread(history::HistoryBackend* backend,
158 history::HistoryDatabase* db) override;
159 void DoneRunOnMainThread() override;
161 private:
162 ~RebuildPrivateDataFromHistoryDBTask() override;
164 InMemoryURLIndex* index_; // Call back to this index at completion.
165 std::string languages_; // Languages for word-breaking.
166 std::set<std::string> scheme_whitelist_; // Schemes to be indexed.
167 bool succeeded_; // Indicates if the rebuild was successful.
168 scoped_refptr<URLIndexPrivateData> data_; // The rebuilt private data.
170 DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask);
173 // Initializes all index data members in preparation for restoring the index
174 // from the cache or a complete rebuild from the history database.
175 void ClearPrivateData();
177 // Constructs a file path for the cache file within the same directory where
178 // the history database is kept and saves that path to |file_path|. Returns
179 // true if |file_path| can be successfully constructed. (This function
180 // provided as a hook for unit testing.)
181 bool GetCacheFilePath(base::FilePath* file_path);
183 // Restores the index's private data from the cache file stored in the history
184 // directory.
185 void PostRestoreFromCacheFileTask();
187 // Schedules a history task to rebuild our private data from the history
188 // database.
189 void ScheduleRebuildFromHistory();
191 // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion
192 // or rebuilding our private data from the history database. |succeeded|
193 // will be true if the rebuild was successful. |data| will point to a new
194 // instanceof the private data just rebuilt.
195 void DoneRebuidingPrivateDataFromHistoryDB(
196 bool succeeded,
197 scoped_refptr<URLIndexPrivateData> private_data);
199 // Rebuilds the history index from the history database in |history_db|.
200 // Used for unit testing only.
201 void RebuildFromHistory(history::HistoryDatabase* history_db);
203 // Determines if the private data was successfully reloaded from the cache
204 // file or if the private data must be rebuilt from the history database.
205 // |private_data_ptr|'s data will be NULL if the cache file load failed. If
206 // successful, sets the private data and notifies any
207 // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history
208 // database.
209 void OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data_ptr);
211 // Callback function that sets the private data from the just-restored-from-
212 // file |private_data|. Notifies any |restore_cache_observer_| that the
213 // restore has succeeded.
214 void OnCacheRestored(URLIndexPrivateData* private_data);
216 // Posts a task to cache the index private data and write the cache file to
217 // the history directory.
218 void PostSaveToCacheFileTask();
220 // Saves private_data_ to the given |path|. Runs on the UI thread.
221 // Provided for unit testing so that a test cache file can be used.
222 void DoSaveToCacheFile(const base::FilePath& path);
224 // Notifies the observer, if any, of the success of the private data caching.
225 // |succeeded| is true on a successful save.
226 void OnCacheSaveDone(bool succeeded);
228 // KeyedService:
229 // Signals that any outstanding initialization should be canceled and
230 // flushes the cache to disk.
231 void Shutdown() override;
233 // HistoryServiceObserver:
234 void OnURLVisited(history::HistoryService* history_service,
235 ui::PageTransition transition,
236 const history::URLRow& row,
237 const history::RedirectList& redirects,
238 base::Time visit_time) override;
239 void OnURLsModified(history::HistoryService* history_service,
240 const history::URLRows& changed_urls) override;
241 void OnURLsDeleted(history::HistoryService* history_service,
242 bool all_history,
243 bool expired,
244 const history::URLRows& deleted_rows,
245 const std::set<GURL>& favicon_urls) override;
246 void OnHistoryServiceLoaded(
247 history::HistoryService* history_service) override;
249 // Sets the directory wherein the cache file will be maintained.
250 // For unit test usage only.
251 void set_history_dir(const base::FilePath& dir_path) {
252 history_dir_ = dir_path;
255 // Returns a pointer to our private data. For unit testing only.
256 URLIndexPrivateData* private_data() { return private_data_.get(); }
258 // Returns a pointer to our private data cancelable request tracker. For
259 // unit testing only.
260 base::CancelableTaskTracker* private_data_tracker() {
261 return &private_data_tracker_;
264 // Returns the set of whitelisted schemes. For unit testing only.
265 const std::set<std::string>& scheme_whitelist() { return scheme_whitelist_; }
267 // The BookmarkModel; may be null when testing.
268 bookmarks::BookmarkModel* bookmark_model_;
270 // The HistoryService; may be null when testing.
271 history::HistoryService* history_service_;
273 // Directory where cache file resides. This is, except when unit testing,
274 // the same directory in which the history database is found. It should never
275 // be empty.
276 base::FilePath history_dir_;
278 // Languages used during the word-breaking process during indexing.
279 std::string languages_;
281 // Only URLs with a whitelisted scheme are indexed.
282 std::set<std::string> scheme_whitelist_;
284 // The index's durable private data.
285 scoped_refptr<URLIndexPrivateData> private_data_;
287 // Observers to notify upon restoral or save of the private data cache.
288 RestoreCacheObserver* restore_cache_observer_;
289 SaveCacheObserver* save_cache_observer_;
291 // Task runner from the worker pool, used for operations which require disk
292 // access.
293 scoped_refptr<base::SequencedTaskRunner> task_runner_;
295 base::CancelableTaskTracker private_data_tracker_;
296 base::CancelableTaskTracker cache_reader_tracker_;
298 // Set to true once the shutdown process has begun.
299 bool shutdown_;
301 // Set to true once the index restoration is complete.
302 bool restored_;
304 // Set to true when changes to the index have been made and the index needs
305 // to be cached. Set to false when the index has been cached. Used as a
306 // temporary safety check to insure that the cache is saved before the
307 // index has been destructed.
308 bool needs_to_be_cached_;
310 // This flag is set to true if we want to listen to the
311 // HistoryServiceLoaded Notification.
312 bool listen_to_history_service_loaded_;
314 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
317 #endif // CHROME_BROWSER_AUTOCOMPLETE_IN_MEMORY_URL_INDEX_H_