Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / components / visitedlink / browser / visitedlink_master.cc
blobee493415d2da9e4d44ed468d918d8bcad18ba6e6
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/visitedlink/browser/visitedlink_master.h"
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
14 #include <algorithm>
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/files/file_util.h"
20 #include "base/files/scoped_file.h"
21 #include "base/logging.h"
22 #include "base/message_loop/message_loop.h"
23 #include "base/rand_util.h"
24 #include "base/strings/string_util.h"
25 #include "base/threading/thread_restrictions.h"
26 #include "components/visitedlink/browser/visitedlink_delegate.h"
27 #include "components/visitedlink/browser/visitedlink_event_listener.h"
28 #include "content/public/browser/browser_context.h"
29 #include "content/public/browser/browser_thread.h"
30 #include "url/gurl.h"
32 using content::BrowserThread;
34 namespace visitedlink {
36 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
37 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
38 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
39 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
40 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
42 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
44 // the signature at the beginning of the URL table = "VLnk" (visited links)
45 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
46 const size_t VisitedLinkMaster::kFileHeaderSize =
47 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
49 // This value should also be the same as the smallest size in the lookup
50 // table in NewTableSizeForCount (prime number).
51 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
53 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
55 namespace {
57 // Fills the given salt structure with some quasi-random values
58 // It is not necessary to generate a cryptographically strong random string,
59 // only that it be reasonably different for different users.
60 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
61 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
62 uint64 randval = base::RandUint64();
63 memcpy(salt, &randval, 8);
66 // Opens file on a background thread to not block UI thread.
67 void AsyncOpen(FILE** file, const base::FilePath& filename) {
68 *file = base::OpenFile(filename, "wb+");
69 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
72 // Returns true if the write was complete.
73 static bool WriteToFile(FILE* file,
74 off_t offset,
75 const void* data,
76 size_t data_len) {
77 if (fseek(file, offset, SEEK_SET) != 0)
78 return false; // Don't write to an invalid part of the file.
80 size_t num_written = fwrite(data, 1, data_len, file);
82 // The write may not make it to the kernel (stdlib may buffer the write)
83 // until the next fseek/fclose call. If we crash, it's easy for our used
84 // item count to be out of sync with the number of hashes we write.
85 // Protect against this by calling fflush.
86 int ret = fflush(file);
87 DCHECK_EQ(0, ret);
88 return num_written == data_len;
91 // This task executes on a background thread and executes a write. This
92 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
93 // is used because file may still not be opened by the time of scheduling
94 // the task for execution.
95 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
96 if (*file)
97 WriteToFile(*file, offset, data.data(), data.size());
100 // Truncates the file to the current position asynchronously on a background
101 // thread. Double pointer to FILE is used because file may still not be opened
102 // by the time of scheduling the task for execution.
103 void AsyncTruncate(FILE** file) {
104 if (*file)
105 base::IgnoreResult(base::TruncateFile(*file));
108 // Closes the file on a background thread and releases memory used for storage
109 // of FILE* value. Double pointer to FILE is used because file may still not
110 // be opened by the time of scheduling the task for execution.
111 void AsyncClose(FILE** file) {
112 if (*file)
113 base::IgnoreResult(fclose(*file));
114 free(file);
117 } // namespace
119 // TableBuilder ---------------------------------------------------------------
121 // How rebuilding from history works
122 // ---------------------------------
124 // We mark that we're rebuilding from history by setting the table_builder_
125 // member in VisitedLinkMaster to the TableBuilder we create. This builder
126 // will be called on the history thread by the history system for every URL
127 // in the database.
129 // The builder will store the fingerprints for those URLs, and then marshalls
130 // back to the main thread where the VisitedLinkMaster will be notified. The
131 // master then replaces its table with a new table containing the computed
132 // fingerprints.
134 // The builder must remain active while the history system is using it.
135 // Sometimes, the master will be deleted before the rebuild is complete, in
136 // which case it notifies the builder via DisownMaster(). The builder will
137 // delete itself once rebuilding is complete, and not execute any callback.
138 class VisitedLinkMaster::TableBuilder
139 : public VisitedLinkDelegate::URLEnumerator {
140 public:
141 TableBuilder(VisitedLinkMaster* master,
142 const uint8 salt[LINK_SALT_LENGTH]);
144 // Called on the main thread when the master is being destroyed. This will
145 // prevent a crash when the query completes and the master is no longer
146 // around. We can not actually do anything but mark this fact, since the
147 // table will be being rebuilt simultaneously on the other thread.
148 void DisownMaster();
150 // VisitedLinkDelegate::URLEnumerator
151 void OnURL(const GURL& url) override;
152 void OnComplete(bool succeed) override;
154 private:
155 ~TableBuilder() override {}
157 // OnComplete mashals to this function on the main thread to do the
158 // notification.
159 void OnCompleteMainThread();
161 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
162 VisitedLinkMaster* master_;
164 // Indicates whether the operation has failed or not.
165 bool success_;
167 // Salt for this new table.
168 uint8 salt_[LINK_SALT_LENGTH];
170 // Stores the fingerprints we computed on the background thread.
171 VisitedLinkCommon::Fingerprints fingerprints_;
173 DISALLOW_COPY_AND_ASSIGN(TableBuilder);
176 // VisitedLinkMaster ----------------------------------------------------------
178 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
179 VisitedLinkDelegate* delegate,
180 bool persist_to_disk)
181 : browser_context_(browser_context),
182 delegate_(delegate),
183 listener_(new VisitedLinkEventListener(this, browser_context)),
184 persist_to_disk_(persist_to_disk) {
185 InitMembers();
188 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
189 VisitedLinkDelegate* delegate,
190 bool persist_to_disk,
191 bool suppress_rebuild,
192 const base::FilePath& filename,
193 int32 default_table_size)
194 : browser_context_(NULL),
195 delegate_(delegate),
196 persist_to_disk_(persist_to_disk) {
197 listener_.reset(listener);
198 DCHECK(listener_.get());
199 InitMembers();
201 database_name_override_ = filename;
202 table_size_override_ = default_table_size;
203 suppress_rebuild_ = suppress_rebuild;
206 VisitedLinkMaster::~VisitedLinkMaster() {
207 if (table_builder_.get()) {
208 // Prevent the table builder from calling us back now that we're being
209 // destroyed. Note that we DON'T delete the object, since the history
210 // system is still writing into it. When that is complete, the table
211 // builder will destroy itself when it finds we are gone.
212 table_builder_->DisownMaster();
214 FreeURLTable();
215 // FreeURLTable() will schedule closing of the file and deletion of |file_|.
216 // So nothing should be done here.
219 void VisitedLinkMaster::InitMembers() {
220 file_ = NULL;
221 shared_memory_ = NULL;
222 shared_memory_serial_ = 0;
223 used_items_ = 0;
224 table_size_override_ = 0;
225 suppress_rebuild_ = false;
226 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
228 #ifndef NDEBUG
229 posted_asynchronous_operation_ = false;
230 #endif
233 bool VisitedLinkMaster::Init() {
234 // We probably shouldn't be loading this from the UI thread,
235 // but it does need to happen early on in startup.
236 // http://code.google.com/p/chromium/issues/detail?id=24163
237 base::ThreadRestrictions::ScopedAllowIO allow_io;
239 if (persist_to_disk_) {
240 if (InitFromFile())
241 return true;
243 return InitFromScratch(suppress_rebuild_);
246 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
247 // Extra check that we are not incognito. This should not happen.
248 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
249 // removed from BrowserContext.
250 if (browser_context_ && browser_context_->IsOffTheRecord()) {
251 NOTREACHED();
252 return null_hash_;
255 if (!url.is_valid())
256 return null_hash_; // Don't add invalid URLs.
258 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
259 url.spec().size(),
260 salt_);
261 if (table_builder_.get()) {
262 // If we have a pending delete for this fingerprint, cancel it.
263 std::set<Fingerprint>::iterator found =
264 deleted_since_rebuild_.find(fingerprint);
265 if (found != deleted_since_rebuild_.end())
266 deleted_since_rebuild_.erase(found);
268 // A rebuild is in progress, save this addition in the temporary list so
269 // it can be added once rebuild is complete.
270 added_since_rebuild_.insert(fingerprint);
273 // If the table is "full", we don't add URLs and just drop them on the floor.
274 // This can happen if we get thousands of new URLs and something causes
275 // the table resizing to fail. This check prevents a hang in that case. Note
276 // that this is *not* the resize limit, this is just a sanity check.
277 if (used_items_ / 8 > table_length_ / 10)
278 return null_hash_; // Table is more than 80% full.
280 return AddFingerprint(fingerprint, true);
283 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
284 const base::Closure& task) {
285 DCHECK(persist_to_disk_);
286 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
287 from_here, task);
290 void VisitedLinkMaster::AddURL(const GURL& url) {
291 Hash index = TryToAddURL(url);
292 if (!table_builder_.get() && index != null_hash_) {
293 // Not rebuilding, so we want to keep the file on disk up-to-date.
294 if (persist_to_disk_) {
295 WriteUsedItemCountToFile();
296 WriteHashRangeToFile(index, index);
298 ResizeTableIfNecessary();
302 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
303 for (std::vector<GURL>::const_iterator i = url.begin();
304 i != url.end(); ++i) {
305 Hash index = TryToAddURL(*i);
306 if (!table_builder_.get() && index != null_hash_)
307 ResizeTableIfNecessary();
310 // Keeps the file on disk up-to-date.
311 if (!table_builder_.get() && persist_to_disk_)
312 WriteFullTable();
315 void VisitedLinkMaster::DeleteAllURLs() {
316 // Any pending modifications are invalid.
317 added_since_rebuild_.clear();
318 deleted_since_rebuild_.clear();
320 // Clear the hash table.
321 used_items_ = 0;
322 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
324 // Resize it if it is now too empty. Resize may write the new table out for
325 // us, otherwise, schedule writing the new table to disk ourselves.
326 if (!ResizeTableIfNecessary() && persist_to_disk_)
327 WriteFullTable();
329 listener_->Reset();
332 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
333 return delegate_;
336 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
337 if (!urls->HasNextURL())
338 return;
340 listener_->Reset();
342 if (table_builder_.get()) {
343 // A rebuild is in progress, save this deletion in the temporary list so
344 // it can be added once rebuild is complete.
345 while (urls->HasNextURL()) {
346 const GURL& url(urls->NextURL());
347 if (!url.is_valid())
348 continue;
350 Fingerprint fingerprint =
351 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
352 deleted_since_rebuild_.insert(fingerprint);
354 // If the URL was just added and now we're deleting it, it may be in the
355 // list of things added since the last rebuild. Delete it from that list.
356 std::set<Fingerprint>::iterator found =
357 added_since_rebuild_.find(fingerprint);
358 if (found != added_since_rebuild_.end())
359 added_since_rebuild_.erase(found);
361 // Delete the URLs from the in-memory table, but don't bother writing
362 // to disk since it will be replaced soon.
363 DeleteFingerprint(fingerprint, false);
365 return;
368 // Compute the deleted URLs' fingerprints and delete them
369 std::set<Fingerprint> deleted_fingerprints;
370 while (urls->HasNextURL()) {
371 const GURL& url(urls->NextURL());
372 if (!url.is_valid())
373 continue;
374 deleted_fingerprints.insert(
375 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
377 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
380 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
381 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
382 Fingerprint fingerprint,
383 bool send_notifications) {
384 if (!hash_table_ || table_length_ == 0) {
385 NOTREACHED(); // Not initialized.
386 return null_hash_;
389 Hash cur_hash = HashFingerprint(fingerprint);
390 Hash first_hash = cur_hash;
391 while (true) {
392 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
393 if (cur_fingerprint == fingerprint)
394 return null_hash_; // This fingerprint is already in there, do nothing.
396 if (cur_fingerprint == null_fingerprint_) {
397 // End of probe sequence found, insert here.
398 hash_table_[cur_hash] = fingerprint;
399 used_items_++;
400 // If allowed, notify listener that a new visited link was added.
401 if (send_notifications)
402 listener_->Add(fingerprint);
403 return cur_hash;
406 // Advance in the probe sequence.
407 cur_hash = IncrementHash(cur_hash);
408 if (cur_hash == first_hash) {
409 // This means that we've wrapped around and are about to go into an
410 // infinite loop. Something was wrong with the hashtable resizing
411 // logic, so stop here.
412 NOTREACHED();
413 return null_hash_;
418 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
419 const std::set<Fingerprint>& fingerprints) {
420 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
422 // Delete the URLs from the table.
423 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
424 i != fingerprints.end(); ++i)
425 DeleteFingerprint(*i, !bulk_write);
427 // These deleted fingerprints may make us shrink the table.
428 if (ResizeTableIfNecessary())
429 return; // The resize function wrote the new table to disk for us.
431 // Nobody wrote this out for us, write the full file to disk.
432 if (bulk_write && persist_to_disk_)
433 WriteFullTable();
436 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
437 bool update_file) {
438 if (!hash_table_ || table_length_ == 0) {
439 NOTREACHED(); // Not initialized.
440 return false;
442 if (!IsVisited(fingerprint))
443 return false; // Not in the database to delete.
445 // First update the header used count.
446 used_items_--;
447 if (update_file && persist_to_disk_)
448 WriteUsedItemCountToFile();
450 Hash deleted_hash = HashFingerprint(fingerprint);
452 // Find the range of "stuff" in the hash table that is adjacent to this
453 // fingerprint. These are things that could be affected by the change in
454 // the hash table. Since we use linear probing, anything after the deleted
455 // item up until an empty item could be affected.
456 Hash end_range = deleted_hash;
457 while (true) {
458 Hash next_hash = IncrementHash(end_range);
459 if (next_hash == deleted_hash)
460 break; // We wrapped around and the whole table is full.
461 if (!hash_table_[next_hash])
462 break; // Found the last spot.
463 end_range = next_hash;
466 // We could get all fancy and move the affected fingerprints around, but
467 // instead we just remove them all and re-add them (minus our deleted one).
468 // This will mean there's a small window of time where the affected links
469 // won't be marked visited.
470 base::StackVector<Fingerprint, 32> shuffled_fingerprints;
471 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
472 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
473 if (hash_table_[i] != fingerprint) {
474 // Don't save the one we're deleting!
475 shuffled_fingerprints->push_back(hash_table_[i]);
477 // This will balance the increment of this value in AddFingerprint below
478 // so there is no net change.
479 used_items_--;
481 hash_table_[i] = null_fingerprint_;
484 if (!shuffled_fingerprints->empty()) {
485 // Need to add the new items back.
486 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
487 AddFingerprint(shuffled_fingerprints[i], false);
490 // Write the affected range to disk [deleted_hash, end_range].
491 if (update_file && persist_to_disk_)
492 WriteHashRangeToFile(deleted_hash, end_range);
494 return true;
497 void VisitedLinkMaster::WriteFullTable() {
498 // This function can get called when the file is open, for example, when we
499 // resize the table. We must handle this case and not try to reopen the file,
500 // since there may be write operations pending on the file I/O thread.
502 // Note that once we start writing, we do not delete on error. This means
503 // there can be a partial file, but the short file will be detected next time
504 // we start, and will be replaced.
506 // This might possibly get corrupted if we crash in the middle of writing.
507 // We should pick up the most common types of these failures when we notice
508 // that the file size is different when we load it back in, and then we will
509 // regenerate the table.
510 DCHECK(persist_to_disk_);
512 if (!file_) {
513 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
514 base::FilePath filename;
515 GetDatabaseFileName(&filename);
516 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
519 // Write the new header.
520 int32 header[4];
521 header[0] = kFileSignature;
522 header[1] = kFileCurrentVersion;
523 header[2] = table_length_;
524 header[3] = used_items_;
525 WriteToFile(file_, 0, header, sizeof(header));
526 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
528 // Write the hash data.
529 WriteToFile(file_, kFileHeaderSize,
530 hash_table_, table_length_ * sizeof(Fingerprint));
532 // The hash table may have shrunk, so make sure this is the end.
533 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
536 bool VisitedLinkMaster::InitFromFile() {
537 DCHECK(file_ == NULL);
538 DCHECK(persist_to_disk_);
540 base::FilePath filename;
541 GetDatabaseFileName(&filename);
542 base::ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
543 if (!file_closer.get())
544 return false;
546 int32 num_entries, used_count;
547 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
548 return false; // Header isn't valid.
550 // Allocate and read the table.
551 if (!CreateURLTable(num_entries, false))
552 return false;
553 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
554 hash_table_, num_entries * sizeof(Fingerprint))) {
555 FreeURLTable();
556 return false;
558 used_items_ = used_count;
560 #ifndef NDEBUG
561 DebugValidate();
562 #endif
564 file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
565 *file_ = file_closer.release();
566 return true;
569 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
570 int32 table_size = kDefaultTableSize;
571 if (table_size_override_)
572 table_size = table_size_override_;
574 // The salt must be generated before the table so that it can be copied to
575 // the shared memory.
576 GenerateSalt(salt_);
577 if (!CreateURLTable(table_size, true))
578 return false;
580 #ifndef NDEBUG
581 DebugValidate();
582 #endif
584 if (suppress_rebuild && persist_to_disk_) {
585 // When we disallow rebuilds (normally just unit tests), just use the
586 // current empty table.
587 WriteFullTable();
588 return true;
591 // This will build the table from history. On the first run, history will
592 // be empty, so this will be correct. This will also write the new table
593 // to disk. We don't want to save explicitly here, since the rebuild may
594 // not complete, leaving us with an empty but valid visited link database.
595 // In the future, we won't know we need to try rebuilding again.
596 return RebuildTableFromDelegate();
599 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
600 int32* num_entries,
601 int32* used_count,
602 uint8 salt[LINK_SALT_LENGTH]) {
603 DCHECK(persist_to_disk_);
605 // Get file size.
606 // Note that there is no need to seek back to the original location in the
607 // file since ReadFromFile() [which is the next call accessing the file]
608 // seeks before reading.
609 if (fseek(file, 0, SEEK_END) == -1)
610 return false;
611 size_t file_size = ftell(file);
613 if (file_size <= kFileHeaderSize)
614 return false;
616 uint8 header[kFileHeaderSize];
617 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
618 return false;
620 // Verify the signature.
621 int32 signature;
622 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
623 if (signature != kFileSignature)
624 return false;
626 // Verify the version is up-to-date. As with other read errors, a version
627 // mistmatch will trigger a rebuild of the database from history, which will
628 // have the effect of migrating the database.
629 int32 version;
630 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
631 if (version != kFileCurrentVersion)
632 return false; // Bad version.
634 // Read the table size and make sure it matches the file size.
635 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
636 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
637 return false; // Bad size.
639 // Read the used item count.
640 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
641 if (*used_count > *num_entries)
642 return false; // Bad used item count;
644 // Read the salt.
645 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
647 // This file looks OK from the header's perspective.
648 return true;
651 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
652 if (!database_name_override_.empty()) {
653 // use this filename, the directory must exist
654 *filename = database_name_override_;
655 return true;
658 if (!browser_context_ || browser_context_->GetPath().empty())
659 return false;
661 base::FilePath profile_dir = browser_context_->GetPath();
662 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
663 return true;
666 // Initializes the shared memory structure. The salt should already be filled
667 // in so that it can be written to the shared memory
668 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
669 // The table is the size of the table followed by the entries.
670 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
672 // Create the shared memory object.
673 shared_memory_ = new base::SharedMemory();
674 if (!shared_memory_)
675 return false;
677 base::SharedMemoryCreateOptions options;
678 options.size = alloc_size;
679 options.share_read_only = true;
681 if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) {
682 delete shared_memory_;
683 shared_memory_ = NULL;
684 return false;
687 if (init_to_empty) {
688 memset(shared_memory_->memory(), 0, alloc_size);
689 used_items_ = 0;
691 table_length_ = num_entries;
693 // Save the header for other processes to read.
694 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
695 header->length = table_length_;
696 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
698 // Our table pointer is just the data immediately following the size.
699 hash_table_ = reinterpret_cast<Fingerprint*>(
700 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
702 return true;
705 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
706 base::SharedMemory *old_shared_memory = shared_memory_;
707 Fingerprint* old_hash_table = hash_table_;
708 int32 old_table_length = table_length_;
709 if (!CreateURLTable(num_entries, true)) {
710 // Try to put back the old state.
711 shared_memory_ = old_shared_memory;
712 hash_table_ = old_hash_table;
713 table_length_ = old_table_length;
714 return false;
717 #ifndef NDEBUG
718 DebugValidate();
719 #endif
721 return true;
724 void VisitedLinkMaster::FreeURLTable() {
725 if (shared_memory_) {
726 delete shared_memory_;
727 shared_memory_ = NULL;
729 if (!persist_to_disk_ || !file_)
730 return;
731 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
732 // AsyncClose() will close the file and free the memory pointed by |file_|.
733 file_ = NULL;
736 bool VisitedLinkMaster::ResizeTableIfNecessary() {
737 DCHECK(table_length_ > 0) << "Must have a table";
739 // Load limits for good performance/space. We are pretty conservative about
740 // keeping the table not very full. This is because we use linear probing
741 // which increases the likelihood of clumps of entries which will reduce
742 // performance.
743 const float max_table_load = 0.5f; // Grow when we're > this full.
744 const float min_table_load = 0.2f; // Shrink when we're < this full.
746 float load = ComputeTableLoad();
747 if (load < max_table_load &&
748 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
749 load > min_table_load))
750 return false;
752 // Table needs to grow or shrink.
753 int new_size = NewTableSizeForCount(used_items_);
754 DCHECK(new_size > used_items_);
755 DCHECK(load <= min_table_load || new_size > table_length_);
756 ResizeTable(new_size);
757 return true;
760 void VisitedLinkMaster::ResizeTable(int32 new_size) {
761 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
762 shared_memory_serial_++;
764 #ifndef NDEBUG
765 DebugValidate();
766 #endif
768 base::SharedMemory* old_shared_memory = shared_memory_;
769 Fingerprint* old_hash_table = hash_table_;
770 int32 old_table_length = table_length_;
771 if (!BeginReplaceURLTable(new_size))
772 return;
774 // Now we have two tables, our local copy which is the old one, and the new
775 // one loaded into this object where we need to copy the data.
776 for (int32 i = 0; i < old_table_length; i++) {
777 Fingerprint cur = old_hash_table[i];
778 if (cur)
779 AddFingerprint(cur, false);
782 // On error unmapping, just forget about it since we can't do anything
783 // else to release it.
784 delete old_shared_memory;
786 // Send an update notification to all child processes so they read the new
787 // table.
788 listener_->NewTable(shared_memory_);
790 #ifndef NDEBUG
791 DebugValidate();
792 #endif
794 // The new table needs to be written to disk.
795 if (persist_to_disk_)
796 WriteFullTable();
799 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
800 // These table sizes are selected to be the maximum prime number less than
801 // a "convenient" multiple of 1K.
802 static const int table_sizes[] = {
803 16381, // 16K = 16384 <- don't shrink below this table size
804 // (should be == default_table_size)
805 32767, // 32K = 32768
806 65521, // 64K = 65536
807 130051, // 128K = 131072
808 262127, // 256K = 262144
809 524269, // 512K = 524288
810 1048549, // 1M = 1048576
811 2097143, // 2M = 2097152
812 4194301, // 4M = 4194304
813 8388571, // 8M = 8388608
814 16777199, // 16M = 16777216
815 33554347}; // 32M = 33554432
817 // Try to leave the table 33% full.
818 int desired = item_count * 3;
820 // Find the closest prime.
821 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
822 if (table_sizes[i] > desired)
823 return table_sizes[i];
826 // Growing very big, just approximate a "good" number, not growing as much
827 // as normal.
828 return item_count * 2 - 1;
831 // See the TableBuilder definition in the header file for how this works.
832 bool VisitedLinkMaster::RebuildTableFromDelegate() {
833 DCHECK(!table_builder_.get());
835 // TODO(brettw) make sure we have reasonable salt!
836 table_builder_ = new TableBuilder(this, salt_);
837 delegate_->RebuildTable(table_builder_);
838 return true;
841 // See the TableBuilder declaration above for how this works.
842 void VisitedLinkMaster::OnTableRebuildComplete(
843 bool success,
844 const std::vector<Fingerprint>& fingerprints) {
845 if (success) {
846 // Replace the old table with a new blank one.
847 shared_memory_serial_++;
849 // We are responsible for freeing it AFTER it has been replaced if
850 // replacement succeeds.
851 base::SharedMemory* old_shared_memory = shared_memory_;
853 int new_table_size = NewTableSizeForCount(
854 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
855 if (BeginReplaceURLTable(new_table_size)) {
856 // Free the old table.
857 delete old_shared_memory;
859 // Add the stored fingerprints to the hash table.
860 for (size_t i = 0; i < fingerprints.size(); i++)
861 AddFingerprint(fingerprints[i], false);
863 // Also add anything that was added while we were asynchronously
864 // generating the new table.
865 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
866 i != added_since_rebuild_.end(); ++i)
867 AddFingerprint(*i, false);
868 added_since_rebuild_.clear();
870 // Now handle deletions.
871 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
872 deleted_since_rebuild_.clear();
874 // Send an update notification to all child processes.
875 listener_->NewTable(shared_memory_);
877 if (persist_to_disk_)
878 WriteFullTable();
881 table_builder_ = NULL; // Will release our reference to the builder.
883 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
884 if (!rebuild_complete_task_.is_null()) {
885 rebuild_complete_task_.Run();
886 rebuild_complete_task_.Reset();
890 void VisitedLinkMaster::WriteToFile(FILE** file,
891 off_t offset,
892 void* data,
893 int32 data_size) {
894 DCHECK(persist_to_disk_);
895 #ifndef NDEBUG
896 posted_asynchronous_operation_ = true;
897 #endif
898 PostIOTask(FROM_HERE,
899 base::Bind(&AsyncWrite, file, offset,
900 std::string(static_cast<const char*>(data), data_size)));
903 void VisitedLinkMaster::WriteUsedItemCountToFile() {
904 DCHECK(persist_to_disk_);
905 if (!file_)
906 return; // See comment on the file_ variable for why this might happen.
907 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
910 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
911 DCHECK(persist_to_disk_);
913 if (!file_)
914 return; // See comment on the file_ variable for why this might happen.
915 if (last_hash < first_hash) {
916 // Handle wraparound at 0. This first write is first_hash->EOF
917 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
918 &hash_table_[first_hash],
919 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
921 // Now do 0->last_lash.
922 WriteToFile(file_, kFileHeaderSize, hash_table_,
923 (last_hash + 1) * sizeof(Fingerprint));
924 } else {
925 // Normal case, just write the range.
926 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
927 &hash_table_[first_hash],
928 (last_hash - first_hash + 1) * sizeof(Fingerprint));
932 bool VisitedLinkMaster::ReadFromFile(FILE* file,
933 off_t offset,
934 void* data,
935 size_t data_size) {
936 DCHECK(persist_to_disk_);
937 #ifndef NDEBUG
938 // Since this function is synchronous, we require that no asynchronous
939 // operations could possibly be pending.
940 DCHECK(!posted_asynchronous_operation_);
941 #endif
943 if (fseek(file, offset, SEEK_SET) != 0)
944 return false;
946 size_t num_read = fread(data, 1, data_size, file);
947 return num_read == data_size;
950 // VisitedLinkTableBuilder ----------------------------------------------------
952 VisitedLinkMaster::TableBuilder::TableBuilder(
953 VisitedLinkMaster* master,
954 const uint8 salt[LINK_SALT_LENGTH])
955 : master_(master),
956 success_(true) {
957 fingerprints_.reserve(4096);
958 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
961 // TODO(brettw): Do we want to try to cancel the request if this happens? It
962 // could delay shutdown if there are a lot of URLs.
963 void VisitedLinkMaster::TableBuilder::DisownMaster() {
964 master_ = NULL;
967 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
968 if (!url.is_empty()) {
969 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
970 url.spec().data(), url.spec().length(), salt_));
974 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
975 success_ = success;
976 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
978 // Marshal to the main thread to notify the VisitedLinkMaster that the
979 // rebuild is complete.
980 BrowserThread::PostTask(
981 BrowserThread::UI, FROM_HERE,
982 base::Bind(&TableBuilder::OnCompleteMainThread, this));
985 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
986 if (master_)
987 master_->OnTableRebuildComplete(success_, fingerprints_);
990 } // namespace visitedlink