1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_IN_MEMORY_URL_INDEX_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_IN_MEMORY_URL_INDEX_H_
14 #include "base/basictypes.h"
15 #include "base/files/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/strings/string16.h"
20 #include "base/task/cancelable_task_tracker.h"
21 #include "base/threading/sequenced_worker_pool.h"
22 #include "base/threading/thread_checker.h"
23 #include "components/history/core/browser/history_db_task.h"
24 #include "components/history/core/browser/history_service_observer.h"
25 #include "components/history/core/browser/history_types.h"
26 #include "components/keyed_service/core/keyed_service.h"
27 #include "components/omnibox/browser/scored_history_match.h"
29 class HistoryQuickProviderTest
;
32 class SequencedTaskRunner
;
40 namespace in_memory_url_index
{
41 class InMemoryURLIndexCacheItem
;
45 class HistoryDatabase
;
49 class URLIndexPrivateData
;
51 typedef std::set
<std::string
> SchemeSet
;
53 // The URL history source.
54 // Holds portions of the URL database in memory in an indexed form. Used to
55 // quickly look up matching URLs for a given query string. Used by
56 // the HistoryURLProvider for inline autocomplete and to provide URL
57 // matches to the omnibox.
59 // Note about multi-byte codepoints and the data structures in the
60 // InMemoryURLIndex class: One will quickly notice that no effort is made to
61 // insure that multi-byte character boundaries are detected when indexing the
62 // words and characters in the URL history database except when converting
63 // URL strings to lowercase. Multi-byte-edness makes no difference when
64 // indexing or when searching the index as the final filtering of results
65 // is dependent on the comparison of a string of bytes, not individual
66 // characters. While the lookup of those bytes during a search in the
67 // |char_word_map_| could serve up words in which the individual char16
68 // occurs as a portion of a composite character the next filtering step
69 // will eliminate such words except in the case where a single character
70 // is being searched on and which character occurs as the second char16 of a
71 // multi-char16 instance.
72 class InMemoryURLIndex
: public KeyedService
,
73 public history::HistoryServiceObserver
,
74 public base::SupportsWeakPtr
<InMemoryURLIndex
> {
76 // Defines an abstract class which is notified upon completion of restoring
77 // the index's private data either by reading from the cache file or by
78 // rebuilding from the history database.
79 class RestoreCacheObserver
{
81 virtual ~RestoreCacheObserver();
83 // Callback that lets the observer know that the restore operation has
84 // completed. |succeeded| indicates if the restore was successful. This is
85 // called on the UI thread.
86 virtual void OnCacheRestoreFinished(bool succeeded
) = 0;
89 // Defines an abstract class which is notified upon completion of saving
90 // the index's private data to the cache file.
91 class SaveCacheObserver
{
93 virtual ~SaveCacheObserver();
95 // Callback that lets the observer know that the save succeeded.
96 // This is called on the UI thread.
97 virtual void OnCacheSaveFinished(bool succeeded
) = 0;
100 // |history_service| which may be null during unit testing is used to register
101 // |as an HistoryServiceObserver. |history_dir| is a path to the directory
102 // containing the history database within the profile wherein the cache and
103 // transaction journals will be stored. |languages| gives a list of language
104 // encodings by which URLs and omnibox searches are broken down into words and
106 InMemoryURLIndex(bookmarks::BookmarkModel
* bookmark_model
,
107 history::HistoryService
* history_service
,
108 base::SequencedWorkerPool
* worker_pool
,
109 const base::FilePath
& history_dir
,
110 const std::string
& languages
,
111 const SchemeSet
& client_schemes_to_whitelist
);
112 ~InMemoryURLIndex() override
;
114 // Opens and prepares the index of historical URL visits. If the index private
115 // data cannot be restored from its cache file then it is rebuilt from the
119 // Scans the history index and returns a vector with all scored, matching
120 // history items. This entry point simply forwards the call on to the
121 // URLIndexPrivateData class. For a complete description of this function
122 // refer to that class. If |cursor_position| is base::string16::npos, the
123 // function doesn't do anything special with the cursor; this is equivalent
124 // to the cursor being at the end. In total, |max_matches| of items will be
125 // returned in the |ScoredHistoryMatches| vector.
126 ScoredHistoryMatches
HistoryItemsForTerms(const base::string16
& term_string
,
127 size_t cursor_position
,
130 // Deletes the index entry, if any, for the given |url|.
131 void DeleteURL(const GURL
& url
);
133 // Sets the optional observers for completion of restoral and saving of the
134 // index's private data.
135 void set_restore_cache_observer(
136 RestoreCacheObserver
* restore_cache_observer
) {
137 restore_cache_observer_
= restore_cache_observer
;
139 void set_save_cache_observer(SaveCacheObserver
* save_cache_observer
) {
140 save_cache_observer_
= save_cache_observer
;
143 // Indicates that the index restoration is complete.
144 bool restored() const {
149 friend class ::HistoryQuickProviderTest
;
150 friend class InMemoryURLIndexTest
;
151 friend class InMemoryURLIndexCacheTest
;
152 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest
, ExpireRow
);
153 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest
, Initialization
);
155 // HistoryDBTask used to rebuild our private data from the history database.
156 class RebuildPrivateDataFromHistoryDBTask
: public history::HistoryDBTask
{
158 explicit RebuildPrivateDataFromHistoryDBTask(
159 InMemoryURLIndex
* index
,
160 const std::string
& languages
,
161 const SchemeSet
& scheme_whitelist
);
163 bool RunOnDBThread(history::HistoryBackend
* backend
,
164 history::HistoryDatabase
* db
) override
;
165 void DoneRunOnMainThread() override
;
168 ~RebuildPrivateDataFromHistoryDBTask() override
;
170 InMemoryURLIndex
* index_
; // Call back to this index at completion.
171 std::string languages_
; // Languages for word-breaking.
172 SchemeSet scheme_whitelist_
; // Schemes to be indexed.
173 bool succeeded_
; // Indicates if the rebuild was successful.
174 scoped_refptr
<URLIndexPrivateData
> data_
; // The rebuilt private data.
176 DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask
);
179 // Initializes all index data members in preparation for restoring the index
180 // from the cache or a complete rebuild from the history database.
181 void ClearPrivateData();
183 // Constructs a file path for the cache file within the same directory where
184 // the history database is kept and saves that path to |file_path|. Returns
185 // true if |file_path| can be successfully constructed. (This function
186 // provided as a hook for unit testing.)
187 bool GetCacheFilePath(base::FilePath
* file_path
);
189 // Restores the index's private data from the cache file stored in the history
191 void PostRestoreFromCacheFileTask();
193 // Schedules a history task to rebuild our private data from the history
195 void ScheduleRebuildFromHistory();
197 // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion
198 // or rebuilding our private data from the history database. |succeeded|
199 // will be true if the rebuild was successful. |data| will point to a new
200 // instanceof the private data just rebuilt.
201 void DoneRebuidingPrivateDataFromHistoryDB(
203 scoped_refptr
<URLIndexPrivateData
> private_data
);
205 // Rebuilds the history index from the history database in |history_db|.
206 // Used for unit testing only.
207 void RebuildFromHistory(history::HistoryDatabase
* history_db
);
209 // Determines if the private data was successfully reloaded from the cache
210 // file or if the private data must be rebuilt from the history database.
211 // |private_data_ptr|'s data will be NULL if the cache file load failed. If
212 // successful, sets the private data and notifies any
213 // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history
215 void OnCacheLoadDone(scoped_refptr
<URLIndexPrivateData
> private_data_ptr
);
217 // Callback function that sets the private data from the just-restored-from-
218 // file |private_data|. Notifies any |restore_cache_observer_| that the
219 // restore has succeeded.
220 void OnCacheRestored(URLIndexPrivateData
* private_data
);
222 // Posts a task to cache the index private data and write the cache file to
223 // the history directory.
224 void PostSaveToCacheFileTask();
226 // Saves private_data_ to the given |path|. Runs on the UI thread.
227 // Provided for unit testing so that a test cache file can be used.
228 void DoSaveToCacheFile(const base::FilePath
& path
);
230 // Notifies the observer, if any, of the success of the private data caching.
231 // |succeeded| is true on a successful save.
232 void OnCacheSaveDone(bool succeeded
);
235 // Signals that any outstanding initialization should be canceled and
236 // flushes the cache to disk.
237 void Shutdown() override
;
239 // HistoryServiceObserver:
240 void OnURLVisited(history::HistoryService
* history_service
,
241 ui::PageTransition transition
,
242 const history::URLRow
& row
,
243 const history::RedirectList
& redirects
,
244 base::Time visit_time
) override
;
245 void OnURLsModified(history::HistoryService
* history_service
,
246 const history::URLRows
& changed_urls
) override
;
247 void OnURLsDeleted(history::HistoryService
* history_service
,
250 const history::URLRows
& deleted_rows
,
251 const std::set
<GURL
>& favicon_urls
) override
;
252 void OnHistoryServiceLoaded(
253 history::HistoryService
* history_service
) override
;
255 // Sets the directory wherein the cache file will be maintained.
256 // For unit test usage only.
257 void set_history_dir(const base::FilePath
& dir_path
) {
258 history_dir_
= dir_path
;
261 // Returns a pointer to our private data. For unit testing only.
262 URLIndexPrivateData
* private_data() { return private_data_
.get(); }
264 // Returns a pointer to our private data cancelable request tracker. For
265 // unit testing only.
266 base::CancelableTaskTracker
* private_data_tracker() {
267 return &private_data_tracker_
;
270 // Returns the set of whitelisted schemes. For unit testing only.
271 const SchemeSet
& scheme_whitelist() { return scheme_whitelist_
; }
273 // The BookmarkModel; may be null when testing.
274 bookmarks::BookmarkModel
* bookmark_model_
;
276 // The HistoryService; may be null when testing.
277 history::HistoryService
* history_service_
;
279 // Directory where cache file resides. This is, except when unit testing,
280 // the same directory in which the history database is found. It should never
282 base::FilePath history_dir_
;
284 // Languages used during the word-breaking process during indexing.
285 std::string languages_
;
287 // Only URLs with a whitelisted scheme are indexed.
288 SchemeSet scheme_whitelist_
;
290 // The index's durable private data.
291 scoped_refptr
<URLIndexPrivateData
> private_data_
;
293 // Observers to notify upon restoral or save of the private data cache.
294 RestoreCacheObserver
* restore_cache_observer_
;
295 SaveCacheObserver
* save_cache_observer_
;
297 // Task runner from the worker pool, used for operations which require disk
299 scoped_refptr
<base::SequencedTaskRunner
> task_runner_
;
301 base::CancelableTaskTracker private_data_tracker_
;
302 base::CancelableTaskTracker cache_reader_tracker_
;
304 // Set to true once the shutdown process has begun.
307 // Set to true once the index restoration is complete.
310 // Set to true when changes to the index have been made and the index needs
311 // to be cached. Set to false when the index has been cached. Used as a
312 // temporary safety check to insure that the cache is saved before the
313 // index has been destructed.
314 bool needs_to_be_cached_
;
316 // This flag is set to true if we want to listen to the
317 // HistoryServiceLoaded Notification.
318 bool listen_to_history_service_loaded_
;
320 base::ThreadChecker thread_checker_
;
322 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex
);
325 #endif // COMPONENTS_OMNIBOX_BROWSER_IN_MEMORY_URL_INDEX_H_