1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
14 #include "base/callback.h"
15 #include "base/callback_forward.h"
16 #include "base/files/file_path.h"
17 #include "base/gtest_prod_util.h"
18 #include "base/memory/shared_memory.h"
19 #include "base/threading/sequenced_worker_pool.h"
20 #include "components/visitedlink/common/visitedlink_common.h"
22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
23 #include "base/logging.h"
32 namespace visitedlink
{
34 class VisitedLinkDelegate
;
36 // Controls the link coloring database. The master controls all writing to the
37 // database as well as disk I/O. There should be only one master.
39 // This class will defer writing operations to the file thread. This means that
40 // class destruction, the file may still be open since operations are pending on
42 class VisitedLinkMaster
: public VisitedLinkCommon
{
44 // Listens to the link coloring database events. The master is given this
45 // event as a constructor argument and dispatches events using it.
48 virtual ~Listener() {}
50 // Called when link coloring database has been created or replaced. The
51 // argument is the new table handle.
52 virtual void NewTable(base::SharedMemory
*) = 0;
54 // Called when new link has been added. The argument is the fingerprint
55 // (hash) of the link.
56 virtual void Add(Fingerprint fingerprint
) = 0;
58 // Called when link coloring state has been reset. This may occur when
59 // entire or parts of history were deleted.
60 virtual void Reset() = 0;
63 VisitedLinkMaster(content::BrowserContext
* browser_context
,
64 VisitedLinkDelegate
* delegate
,
65 bool persist_to_disk
);
67 // In unit test mode, we allow the caller to optionally specify the database
68 // filename so that it can be run from a unit test. The directory where this
69 // file resides must exist in this mode. You can also specify the default
70 // table size to test table resizing. If this parameter is 0, we will use the
73 // In the unit test mode, we also allow the caller to provide a history
74 // service pointer (the history service can't be fetched from the browser
75 // process when we're in unit test mode). This can be NULL to try to access
76 // the main version, which will probably fail (which can be good for testing
77 // this failure mode).
79 // When |suppress_rebuild| is set, we'll not attempt to load data from
80 // history if the file can't be loaded. This should generally be set for
81 // testing except when you want to test the rebuild process explicitly.
82 VisitedLinkMaster(Listener
* listener
,
83 VisitedLinkDelegate
* delegate
,
85 bool suppress_rebuild
,
86 const base::FilePath
& filename
,
87 int32 default_table_size
);
88 virtual ~VisitedLinkMaster();
90 // Must be called immediately after object creation. Nothing else will work
91 // until this is called. Returns true on success, false means that this
95 base::SharedMemory
* shared_memory() { return shared_memory_
; }
97 // Adds a URL to the table.
98 void AddURL(const GURL
& url
);
100 // Adds a set of URLs to the table.
101 void AddURLs(const std::vector
<GURL
>& url
);
106 // HasNextURL must return true when this is called. Returns the next URL
107 // then advances the iterator. Note that the returned reference is only
108 // valid until the next call of NextURL.
109 virtual const GURL
& NextURL() = 0;
111 // Returns true if still has URLs to be iterated.
112 virtual bool HasNextURL() const = 0;
115 virtual ~URLIterator() {}
118 // Deletes the specified URLs from |rows| from the table.
119 void DeleteURLs(URLIterator
* iterator
);
121 // Clears the visited links table by deleting the file from disk. Used as
122 // part of history clearing.
123 void DeleteAllURLs();
125 // Returns the Delegate of this Master.
126 VisitedLinkDelegate
* GetDelegate();
128 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
129 // This is a debugging function that can be called to double-check internal
130 // data structures. It will assert if the check fails.
131 void DebugValidate();
133 // Sets a task to execute when the next rebuild from history is complete.
134 // This is used by unit tests to wait for the rebuild to complete before
135 // they continue. The pointer will be owned by this object after the call.
136 void set_rebuild_complete_task(const base::Closure
& task
) {
137 DCHECK(rebuild_complete_task_
.is_null());
138 rebuild_complete_task_
= task
;
141 // returns the number of items in the table for testing verification
142 int32
GetUsedCount() const {
146 // Returns the listener.
147 VisitedLinkMaster::Listener
* GetListener() const {
148 return listener_
.get();
151 // Call to cause the entire database file to be re-written from scratch
152 // to disk. Used by the performance tester.
159 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest
, Delete
);
160 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest
, BigDelete
);
161 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest
, BigImport
);
163 // Object to rebuild the table on the history thread (see the .cc file).
166 // Byte offsets of values in the header.
167 static const int32 kFileHeaderSignatureOffset
;
168 static const int32 kFileHeaderVersionOffset
;
169 static const int32 kFileHeaderLengthOffset
;
170 static const int32 kFileHeaderUsedOffset
;
171 static const int32 kFileHeaderSaltOffset
;
173 // The signature at the beginning of a file.
174 static const int32 kFileSignature
;
176 // version of the file format this module currently uses
177 static const int32 kFileCurrentVersion
;
179 // Bytes in the file header, including the salt.
180 static const size_t kFileHeaderSize
;
182 // When creating a fresh new table, we use this many entries.
183 static const unsigned kDefaultTableSize
;
185 // When the user is deleting a boatload of URLs, we don't really want to do
186 // individual writes for each of them. When the count exceeds this threshold,
187 // we will write the whole table to disk at once instead of individual items.
188 static const size_t kBigDeleteThreshold
;
190 // Backend for the constructors initializing the members.
193 // If a rebuild is in progress, we save the URL in the temporary list.
194 // Otherwise, we add this to the table. Returns the index of the
195 // inserted fingerprint or null_hash_ on failure.
196 Hash
TryToAddURL(const GURL
& url
);
198 // File I/O functions
199 // ------------------
200 // These functions are only called if |persist_to_disk_| is true.
202 // Posts the given task to the blocking worker pool with our options.
203 void PostIOTask(const tracked_objects::Location
& from_here
,
204 const base::Closure
& task
);
206 // Writes the entire table to disk. It will leave the table file open and
207 // the handle to it will be stored in file_.
208 void WriteFullTable();
210 // Try to load the table from the database file. If the file doesn't exist or
211 // is corrupt, this will return failure.
214 // Reads the header of the link coloring database from disk. Assumes the
215 // file pointer is at the beginning of the file and that there are no pending
216 // asynchronous I/O operations.
218 // Returns true on success and places the size of the table in num_entries
219 // and the number of nonzero fingerprints in used_count. This will fail if
220 // the version of the file is not the current version of the database.
221 bool ReadFileHeader(FILE* hfile
, int32
* num_entries
, int32
* used_count
,
222 uint8 salt
[LINK_SALT_LENGTH
]);
224 // Fills *filename with the name of the link database filename
225 bool GetDatabaseFileName(base::FilePath
* filename
);
227 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
228 // the write to a background thread.
229 void WriteToFile(FILE** hfile
, off_t offset
, void* data
, int32 data_size
);
231 // Helper function to schedule and asynchronous write of the used count to
232 // disk (this is a common operation).
233 void WriteUsedItemCountToFile();
235 // Helper function to schedule an asynchronous write of the given range of
236 // hash functions to disk. The range is inclusive on both ends. The range can
237 // wrap around at 0 and this function will handle it.
238 void WriteHashRangeToFile(Hash first_hash
, Hash last_hash
);
240 // Synchronous read from the file. Assumes there are no pending asynchronous
241 // I/O functions. Returns true if the entire buffer was successfully filled.
242 bool ReadFromFile(FILE* hfile
, off_t offset
, void* data
, size_t data_size
);
244 // General table handling
245 // ----------------------
247 // Called to add a fingerprint to the table. If |send_notifications| is true
248 // and the item is added successfully, Listener::Add will be invoked.
249 // Returns the index of the inserted fingerprint or null_hash_ if there was a
250 // duplicate and this item was skippped.
251 Hash
AddFingerprint(Fingerprint fingerprint
, bool send_notifications
);
253 // Deletes all fingerprints from the given vector from the current hash table
254 // and syncs it to disk if there are changes. This does not update the
255 // deleted_since_rebuild_ list, the caller must update this itself if there
256 // is an update pending.
257 void DeleteFingerprintsFromCurrentTable(
258 const std::set
<Fingerprint
>& fingerprints
);
260 // Removes the indicated fingerprint from the table. If the update_file flag
261 // is set, the changes will also be written to disk. Returns true if the
262 // fingerprint was deleted, false if it was not in the table to delete.
263 bool DeleteFingerprint(Fingerprint fingerprint
, bool update_file
);
265 // Creates a new empty table, call if InitFromFile() fails. Normally, when
266 // |suppress_rebuild| is false, the table will be rebuilt from history,
267 // keeping us in sync. When |suppress_rebuild| is true, the new table will be
268 // empty and we will not consult history. This is used when clearing the
269 // database and for unit tests.
270 bool InitFromScratch(bool suppress_rebuild
);
272 // Allocates the Fingerprint structure and length. When init_to_empty is set,
273 // the table will be filled with 0s and used_items_ will be set to 0 as well.
274 // If the flag is not set, these things are untouched and it is the
275 // responsibility of the caller to fill them (like when we are reading from
277 bool CreateURLTable(int32 num_entries
, bool init_to_empty
);
279 // A wrapper for CreateURLTable, this will allocate a new table, initialized
280 // to empty. The caller is responsible for saving the shared memory pointer
281 // and handles before this call (they will be replaced with new ones) and
282 // releasing them later. This is designed for callers that make a new table
283 // and then copy values from the old table to the new one, then release the
286 // Returns true on success. On failure, the old table will be restored. The
287 // caller should not attemp to release the pointer/handle in this case.
288 bool BeginReplaceURLTable(int32 num_entries
);
290 // unallocates the Fingerprint table
293 // For growing the table. ResizeTableIfNecessary will check to see if the
294 // table should be resized and calls ResizeTable if needed. Returns true if
295 // we decided to resize the table.
296 bool ResizeTableIfNecessary();
298 // Resizes the table (growing or shrinking) as necessary to accomodate the
300 void ResizeTable(int32 new_size
);
302 // Returns the desired table size for |item_count| URLs.
303 uint32
NewTableSizeForCount(int32 item_count
) const;
305 // Computes the table load as fraction. For example, if 1/4 of the entries are
306 // full, this value will be 0.25
307 float ComputeTableLoad() const {
308 return static_cast<float>(used_items_
) / static_cast<float>(table_length_
);
311 // Initializes a rebuild of the visited link database based on the browser
312 // history. This will set table_builder_ while working, and there should not
313 // already be a rebuild in place when called. See the definition for more
314 // details on how this works.
316 // Returns true on success. Failure means we're not attempting to rebuild
317 // the database because something failed.
318 bool RebuildTableFromDelegate();
320 // Callback that the table rebuilder uses when the rebuild is complete.
321 // |success| is true if the fingerprint generation succeeded, in which case
322 // |fingerprints| will contain the computed fingerprints. On failure, there
323 // will be no fingerprints.
324 void OnTableRebuildComplete(bool success
,
325 const std::vector
<Fingerprint
>& fingerprints
);
327 // Increases or decreases the given hash value by one, wrapping around as
328 // necessary. Used for probing.
329 inline Hash
IncrementHash(Hash hash
) {
330 if (hash
>= table_length_
- 1)
331 return 0; // Wrap around.
334 inline Hash
DecrementHash(Hash hash
) {
336 return table_length_
- 1; // Wrap around.
341 // Indicates whether any asynchronous operation has ever been completed.
342 // We do some synchronous reads that require that no asynchronous operations
343 // are pending, yet we don't track whether they have been completed. This
344 // flag is a sanity check that these reads only happen before any
345 // asynchronous writes have been fired.
346 bool posted_asynchronous_operation_
;
349 // Reference to the browser context that this object belongs to
350 // (it knows the path to where the data is stored)
351 content::BrowserContext
* browser_context_
;
353 // Client owns the delegate and is responsible for it being valid through
354 // the life time this VisitedLinkMaster.
355 VisitedLinkDelegate
* delegate_
;
357 // VisitedLinkEventListener to handle incoming events.
358 scoped_ptr
<Listener
> listener_
;
360 // Lazily initialized sequence token for posting file tasks.
361 base::SequencedWorkerPool::SequenceToken sequence_token_
;
363 // When non-NULL, indicates we are in database rebuild mode and points to
364 // the class collecting fingerprint information from the history system.
365 // The pointer is owned by this class, but it must remain valid while the
366 // history query is running. We must only delete it when the query is done.
367 scoped_refptr
<TableBuilder
> table_builder_
;
369 // Indicates URLs added and deleted since we started rebuilding the table.
370 std::set
<Fingerprint
> added_since_rebuild_
;
371 std::set
<Fingerprint
> deleted_since_rebuild_
;
373 // TODO(brettw) Support deletion, we need to track whether anything was
374 // deleted during the rebuild here. Then we should delete any of these
375 // entries from the complete table later.
376 // std::vector<Fingerprint> removed_since_rebuild_;
378 // The currently open file with the table in it. This may be NULL if we're
379 // rebuilding and haven't written a new version yet or if |persist_to_disk_|
380 // is false. Writing to the file may be safely ignored in this case. Also
381 // |file_| may be non-NULL but point to a NULL pointer. That would mean that
382 // opening of the file is already scheduled in a background thread and any
383 // writing to the file can also be scheduled to the background thread as it's
384 // guaranteed to be executed after the opening.
385 // The class owns both the |file_| pointer and the pointer pointed
389 // If true, will try to persist the hash table to disk. Will rebuild from
390 // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
391 bool persist_to_disk_
;
393 // Shared memory consists of a SharedHeader followed by the table.
394 base::SharedMemory
*shared_memory_
;
396 // When we generate new tables, we increment the serial number of the
397 // shared memory object.
398 int32 shared_memory_serial_
;
400 // Number of non-empty items in the table, used to compute fullness.
403 // Testing values -----------------------------------------------------------
405 // The following fields exist for testing purposes. They are not used in
406 // release builds. It'd be nice to eliminate them in release builds, but we
407 // don't want to change the signature of the object between the unit test and
408 // regular builds. Instead, we just have "default" values that these take
409 // in release builds that give "regular" behavior.
411 // Overridden database file name for testing
412 base::FilePath database_name_override_
;
414 // When nonzero, overrides the table size for new databases for testing
415 int32 table_size_override_
;
417 // When set, indicates the task that should be run after the next rebuild from
418 // history is complete.
419 base::Closure rebuild_complete_task_
;
421 // Set to prevent us from attempting to rebuild the database from global
422 // history if we have an error opening the file. This is used for testing,
423 // will be false in production.
424 bool suppress_rebuild_
;
426 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster
);
429 // NOTE: These methods are defined inline here, so we can share the compilation
430 // of visitedlink_master.cc between the browser and the unit/perf tests.
432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
433 inline void VisitedLinkMaster::DebugValidate() {
434 int32 used_count
= 0;
435 for (int32 i
= 0; i
< table_length_
; i
++) {
439 DCHECK_EQ(used_count
, used_items_
);
443 } // namespace visitedlink
445 #endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_