cc: Added inline to Tile::IsReadyToDraw
[chromium-blink-merge.git] / components / visitedlink / browser / visitedlink_master.cc
blob7cf68911af7fb69c4290f4946182d7e7f67a8662
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/visitedlink/browser/visitedlink_master.h"
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
14 #include <algorithm>
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/file_util.h"
20 #include "base/logging.h"
21 #include "base/message_loop/message_loop.h"
22 #include "base/path_service.h"
23 #include "base/rand_util.h"
24 #include "base/strings/string_util.h"
25 #include "base/threading/thread_restrictions.h"
26 #include "components/visitedlink/browser/visitedlink_delegate.h"
27 #include "components/visitedlink/browser/visitedlink_event_listener.h"
28 #include "content/public/browser/browser_context.h"
29 #include "content/public/browser/browser_thread.h"
30 #include "url/gurl.h"
32 using content::BrowserThread;
33 using file_util::ScopedFILE;
34 using file_util::OpenFile;
35 using file_util::TruncateFile;
37 namespace visitedlink {
39 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
40 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
41 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
42 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
43 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
45 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
47 // the signature at the beginning of the URL table = "VLnk" (visited links)
48 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
49 const size_t VisitedLinkMaster::kFileHeaderSize =
50 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
52 // This value should also be the same as the smallest size in the lookup
53 // table in NewTableSizeForCount (prime number).
54 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
56 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
58 namespace {
60 // Fills the given salt structure with some quasi-random values
61 // It is not necessary to generate a cryptographically strong random string,
62 // only that it be reasonably different for different users.
63 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
64 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
65 uint64 randval = base::RandUint64();
66 memcpy(salt, &randval, 8);
69 // Opens file on a background thread to not block UI thread.
70 void AsyncOpen(FILE** file, const base::FilePath& filename) {
71 *file = OpenFile(filename, "wb+");
72 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
75 // Returns true if the write was complete.
76 static bool WriteToFile(FILE* file,
77 off_t offset,
78 const void* data,
79 size_t data_len) {
80 if (fseek(file, offset, SEEK_SET) != 0)
81 return false; // Don't write to an invalid part of the file.
83 size_t num_written = fwrite(data, 1, data_len, file);
85 // The write may not make it to the kernel (stdlib may buffer the write)
86 // until the next fseek/fclose call. If we crash, it's easy for our used
87 // item count to be out of sync with the number of hashes we write.
88 // Protect against this by calling fflush.
89 int ret = fflush(file);
90 DCHECK_EQ(0, ret);
91 return num_written == data_len;
94 // This task executes on a background thread and executes a write. This
95 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
96 // is used because file may still not be opened by the time of scheduling
97 // the task for execution.
98 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
99 if (*file)
100 WriteToFile(*file, offset, data.data(), data.size());
103 // Truncates the file to the current position asynchronously on a background
104 // thread. Double pointer to FILE is used because file may still not be opened
105 // by the time of scheduling the task for execution.
106 void AsyncTruncate(FILE** file) {
107 if (*file)
108 base::IgnoreResult(TruncateFile(*file));
111 // Closes the file on a background thread and releases memory used for storage
112 // of FILE* value. Double pointer to FILE is used because file may still not
113 // be opened by the time of scheduling the task for execution.
114 void AsyncClose(FILE** file) {
115 if (*file)
116 base::IgnoreResult(fclose(*file));
117 free(file);
120 } // namespace
122 // TableBuilder ---------------------------------------------------------------
124 // How rebuilding from history works
125 // ---------------------------------
127 // We mark that we're rebuilding from history by setting the table_builder_
128 // member in VisitedLinkMaster to the TableBuilder we create. This builder
129 // will be called on the history thread by the history system for every URL
130 // in the database.
132 // The builder will store the fingerprints for those URLs, and then marshalls
133 // back to the main thread where the VisitedLinkMaster will be notified. The
134 // master then replaces its table with a new table containing the computed
135 // fingerprints.
137 // The builder must remain active while the history system is using it.
138 // Sometimes, the master will be deleted before the rebuild is complete, in
139 // which case it notifies the builder via DisownMaster(). The builder will
140 // delete itself once rebuilding is complete, and not execute any callback.
141 class VisitedLinkMaster::TableBuilder
142 : public VisitedLinkDelegate::URLEnumerator {
143 public:
144 TableBuilder(VisitedLinkMaster* master,
145 const uint8 salt[LINK_SALT_LENGTH]);
147 // Called on the main thread when the master is being destroyed. This will
148 // prevent a crash when the query completes and the master is no longer
149 // around. We can not actually do anything but mark this fact, since the
150 // table will be being rebuilt simultaneously on the other thread.
151 void DisownMaster();
153 // VisitedLinkDelegate::URLEnumerator
154 virtual void OnURL(const GURL& url) OVERRIDE;
155 virtual void OnComplete(bool succeed) OVERRIDE;
157 private:
158 virtual ~TableBuilder() {}
160 // OnComplete mashals to this function on the main thread to do the
161 // notification.
162 void OnCompleteMainThread();
164 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
165 VisitedLinkMaster* master_;
167 // Indicates whether the operation has failed or not.
168 bool success_;
170 // Salt for this new table.
171 uint8 salt_[LINK_SALT_LENGTH];
173 // Stores the fingerprints we computed on the background thread.
174 VisitedLinkCommon::Fingerprints fingerprints_;
176 DISALLOW_COPY_AND_ASSIGN(TableBuilder);
179 // VisitedLinkMaster ----------------------------------------------------------
181 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
182 VisitedLinkDelegate* delegate,
183 bool persist_to_disk)
184 : browser_context_(browser_context),
185 delegate_(delegate),
186 listener_(new VisitedLinkEventListener(this, browser_context)),
187 persist_to_disk_(persist_to_disk) {
188 InitMembers();
191 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
192 VisitedLinkDelegate* delegate,
193 bool persist_to_disk,
194 bool suppress_rebuild,
195 const base::FilePath& filename,
196 int32 default_table_size)
197 : browser_context_(NULL),
198 delegate_(delegate),
199 persist_to_disk_(persist_to_disk) {
200 listener_.reset(listener);
201 DCHECK(listener_.get());
202 InitMembers();
204 database_name_override_ = filename;
205 table_size_override_ = default_table_size;
206 suppress_rebuild_ = suppress_rebuild;
209 VisitedLinkMaster::~VisitedLinkMaster() {
210 if (table_builder_.get()) {
211 // Prevent the table builder from calling us back now that we're being
212 // destroyed. Note that we DON'T delete the object, since the history
213 // system is still writing into it. When that is complete, the table
214 // builder will destroy itself when it finds we are gone.
215 table_builder_->DisownMaster();
217 FreeURLTable();
218 // FreeURLTable() will schedule closing of the file and deletion of |file_|.
219 // So nothing should be done here.
222 void VisitedLinkMaster::InitMembers() {
223 file_ = NULL;
224 shared_memory_ = NULL;
225 shared_memory_serial_ = 0;
226 used_items_ = 0;
227 table_size_override_ = 0;
228 suppress_rebuild_ = false;
229 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
231 #ifndef NDEBUG
232 posted_asynchronous_operation_ = false;
233 #endif
236 bool VisitedLinkMaster::Init() {
237 // We probably shouldn't be loading this from the UI thread,
238 // but it does need to happen early on in startup.
239 // http://code.google.com/p/chromium/issues/detail?id=24163
240 base::ThreadRestrictions::ScopedAllowIO allow_io;
242 if (persist_to_disk_) {
243 if (InitFromFile())
244 return true;
246 return InitFromScratch(suppress_rebuild_);
249 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
250 // Extra check that we are not incognito. This should not happen.
251 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
252 // removed from BrowserContext.
253 if (browser_context_ && browser_context_->IsOffTheRecord()) {
254 NOTREACHED();
255 return null_hash_;
258 if (!url.is_valid())
259 return null_hash_; // Don't add invalid URLs.
261 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
262 url.spec().size(),
263 salt_);
264 if (table_builder_.get()) {
265 // If we have a pending delete for this fingerprint, cancel it.
266 std::set<Fingerprint>::iterator found =
267 deleted_since_rebuild_.find(fingerprint);
268 if (found != deleted_since_rebuild_.end())
269 deleted_since_rebuild_.erase(found);
271 // A rebuild is in progress, save this addition in the temporary list so
272 // it can be added once rebuild is complete.
273 added_since_rebuild_.insert(fingerprint);
276 // If the table is "full", we don't add URLs and just drop them on the floor.
277 // This can happen if we get thousands of new URLs and something causes
278 // the table resizing to fail. This check prevents a hang in that case. Note
279 // that this is *not* the resize limit, this is just a sanity check.
280 if (used_items_ / 8 > table_length_ / 10)
281 return null_hash_; // Table is more than 80% full.
283 return AddFingerprint(fingerprint, true);
286 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
287 const base::Closure& task) {
288 DCHECK(persist_to_disk_);
289 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
290 from_here, task);
293 void VisitedLinkMaster::AddURL(const GURL& url) {
294 Hash index = TryToAddURL(url);
295 if (!table_builder_.get() && index != null_hash_) {
296 // Not rebuilding, so we want to keep the file on disk up-to-date.
297 if (persist_to_disk_) {
298 WriteUsedItemCountToFile();
299 WriteHashRangeToFile(index, index);
301 ResizeTableIfNecessary();
305 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
306 for (std::vector<GURL>::const_iterator i = url.begin();
307 i != url.end(); ++i) {
308 Hash index = TryToAddURL(*i);
309 if (!table_builder_.get() && index != null_hash_)
310 ResizeTableIfNecessary();
313 // Keeps the file on disk up-to-date.
314 if (!table_builder_.get() && persist_to_disk_)
315 WriteFullTable();
318 void VisitedLinkMaster::DeleteAllURLs() {
319 // Any pending modifications are invalid.
320 added_since_rebuild_.clear();
321 deleted_since_rebuild_.clear();
323 // Clear the hash table.
324 used_items_ = 0;
325 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
327 // Resize it if it is now too empty. Resize may write the new table out for
328 // us, otherwise, schedule writing the new table to disk ourselves.
329 if (!ResizeTableIfNecessary() && persist_to_disk_)
330 WriteFullTable();
332 listener_->Reset();
335 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
336 return delegate_;
339 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
340 if (!urls->HasNextURL())
341 return;
343 listener_->Reset();
345 if (table_builder_.get()) {
346 // A rebuild is in progress, save this deletion in the temporary list so
347 // it can be added once rebuild is complete.
348 while (urls->HasNextURL()) {
349 const GURL& url(urls->NextURL());
350 if (!url.is_valid())
351 continue;
353 Fingerprint fingerprint =
354 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
355 deleted_since_rebuild_.insert(fingerprint);
357 // If the URL was just added and now we're deleting it, it may be in the
358 // list of things added since the last rebuild. Delete it from that list.
359 std::set<Fingerprint>::iterator found =
360 added_since_rebuild_.find(fingerprint);
361 if (found != added_since_rebuild_.end())
362 added_since_rebuild_.erase(found);
364 // Delete the URLs from the in-memory table, but don't bother writing
365 // to disk since it will be replaced soon.
366 DeleteFingerprint(fingerprint, false);
368 return;
371 // Compute the deleted URLs' fingerprints and delete them
372 std::set<Fingerprint> deleted_fingerprints;
373 while (urls->HasNextURL()) {
374 const GURL& url(urls->NextURL());
375 if (!url.is_valid())
376 continue;
377 deleted_fingerprints.insert(
378 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
380 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
385 Fingerprint fingerprint,
386 bool send_notifications) {
387 if (!hash_table_ || table_length_ == 0) {
388 NOTREACHED(); // Not initialized.
389 return null_hash_;
392 Hash cur_hash = HashFingerprint(fingerprint);
393 Hash first_hash = cur_hash;
394 while (true) {
395 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
396 if (cur_fingerprint == fingerprint)
397 return null_hash_; // This fingerprint is already in there, do nothing.
399 if (cur_fingerprint == null_fingerprint_) {
400 // End of probe sequence found, insert here.
401 hash_table_[cur_hash] = fingerprint;
402 used_items_++;
403 // If allowed, notify listener that a new visited link was added.
404 if (send_notifications)
405 listener_->Add(fingerprint);
406 return cur_hash;
409 // Advance in the probe sequence.
410 cur_hash = IncrementHash(cur_hash);
411 if (cur_hash == first_hash) {
412 // This means that we've wrapped around and are about to go into an
413 // infinite loop. Something was wrong with the hashtable resizing
414 // logic, so stop here.
415 NOTREACHED();
416 return null_hash_;
421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
422 const std::set<Fingerprint>& fingerprints) {
423 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
425 // Delete the URLs from the table.
426 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
427 i != fingerprints.end(); ++i)
428 DeleteFingerprint(*i, !bulk_write);
430 // These deleted fingerprints may make us shrink the table.
431 if (ResizeTableIfNecessary())
432 return; // The resize function wrote the new table to disk for us.
434 // Nobody wrote this out for us, write the full file to disk.
435 if (bulk_write && persist_to_disk_)
436 WriteFullTable();
439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
440 bool update_file) {
441 if (!hash_table_ || table_length_ == 0) {
442 NOTREACHED(); // Not initialized.
443 return false;
445 if (!IsVisited(fingerprint))
446 return false; // Not in the database to delete.
448 // First update the header used count.
449 used_items_--;
450 if (update_file && persist_to_disk_)
451 WriteUsedItemCountToFile();
453 Hash deleted_hash = HashFingerprint(fingerprint);
455 // Find the range of "stuff" in the hash table that is adjacent to this
456 // fingerprint. These are things that could be affected by the change in
457 // the hash table. Since we use linear probing, anything after the deleted
458 // item up until an empty item could be affected.
459 Hash end_range = deleted_hash;
460 while (true) {
461 Hash next_hash = IncrementHash(end_range);
462 if (next_hash == deleted_hash)
463 break; // We wrapped around and the whole table is full.
464 if (!hash_table_[next_hash])
465 break; // Found the last spot.
466 end_range = next_hash;
469 // We could get all fancy and move the affected fingerprints around, but
470 // instead we just remove them all and re-add them (minus our deleted one).
471 // This will mean there's a small window of time where the affected links
472 // won't be marked visited.
473 base::StackVector<Fingerprint, 32> shuffled_fingerprints;
474 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
475 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
476 if (hash_table_[i] != fingerprint) {
477 // Don't save the one we're deleting!
478 shuffled_fingerprints->push_back(hash_table_[i]);
480 // This will balance the increment of this value in AddFingerprint below
481 // so there is no net change.
482 used_items_--;
484 hash_table_[i] = null_fingerprint_;
487 if (!shuffled_fingerprints->empty()) {
488 // Need to add the new items back.
489 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
490 AddFingerprint(shuffled_fingerprints[i], false);
493 // Write the affected range to disk [deleted_hash, end_range].
494 if (update_file && persist_to_disk_)
495 WriteHashRangeToFile(deleted_hash, end_range);
497 return true;
500 void VisitedLinkMaster::WriteFullTable() {
501 // This function can get called when the file is open, for example, when we
502 // resize the table. We must handle this case and not try to reopen the file,
503 // since there may be write operations pending on the file I/O thread.
505 // Note that once we start writing, we do not delete on error. This means
506 // there can be a partial file, but the short file will be detected next time
507 // we start, and will be replaced.
509 // This might possibly get corrupted if we crash in the middle of writing.
510 // We should pick up the most common types of these failures when we notice
511 // that the file size is different when we load it back in, and then we will
512 // regenerate the table.
513 DCHECK(persist_to_disk_);
515 if (!file_) {
516 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
517 base::FilePath filename;
518 GetDatabaseFileName(&filename);
519 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
522 // Write the new header.
523 int32 header[4];
524 header[0] = kFileSignature;
525 header[1] = kFileCurrentVersion;
526 header[2] = table_length_;
527 header[3] = used_items_;
528 WriteToFile(file_, 0, header, sizeof(header));
529 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
531 // Write the hash data.
532 WriteToFile(file_, kFileHeaderSize,
533 hash_table_, table_length_ * sizeof(Fingerprint));
535 // The hash table may have shrunk, so make sure this is the end.
536 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
539 bool VisitedLinkMaster::InitFromFile() {
540 DCHECK(file_ == NULL);
541 DCHECK(persist_to_disk_);
543 base::FilePath filename;
544 GetDatabaseFileName(&filename);
545 ScopedFILE file_closer(OpenFile(filename, "rb+"));
546 if (!file_closer.get())
547 return false;
549 int32 num_entries, used_count;
550 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
551 return false; // Header isn't valid.
553 // Allocate and read the table.
554 if (!CreateURLTable(num_entries, false))
555 return false;
556 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
557 hash_table_, num_entries * sizeof(Fingerprint))) {
558 FreeURLTable();
559 return false;
561 used_items_ = used_count;
563 #ifndef NDEBUG
564 DebugValidate();
565 #endif
567 file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
568 *file_ = file_closer.release();
569 return true;
572 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
573 int32 table_size = kDefaultTableSize;
574 if (table_size_override_)
575 table_size = table_size_override_;
577 // The salt must be generated before the table so that it can be copied to
578 // the shared memory.
579 GenerateSalt(salt_);
580 if (!CreateURLTable(table_size, true))
581 return false;
583 #ifndef NDEBUG
584 DebugValidate();
585 #endif
587 if (suppress_rebuild && persist_to_disk_) {
588 // When we disallow rebuilds (normally just unit tests), just use the
589 // current empty table.
590 WriteFullTable();
591 return true;
594 // This will build the table from history. On the first run, history will
595 // be empty, so this will be correct. This will also write the new table
596 // to disk. We don't want to save explicitly here, since the rebuild may
597 // not complete, leaving us with an empty but valid visited link database.
598 // In the future, we won't know we need to try rebuilding again.
599 return RebuildTableFromDelegate();
602 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
603 int32* num_entries,
604 int32* used_count,
605 uint8 salt[LINK_SALT_LENGTH]) {
606 DCHECK(persist_to_disk_);
608 // Get file size.
609 // Note that there is no need to seek back to the original location in the
610 // file since ReadFromFile() [which is the next call accessing the file]
611 // seeks before reading.
612 if (fseek(file, 0, SEEK_END) == -1)
613 return false;
614 size_t file_size = ftell(file);
616 if (file_size <= kFileHeaderSize)
617 return false;
619 uint8 header[kFileHeaderSize];
620 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
621 return false;
623 // Verify the signature.
624 int32 signature;
625 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
626 if (signature != kFileSignature)
627 return false;
629 // Verify the version is up-to-date. As with other read errors, a version
630 // mistmatch will trigger a rebuild of the database from history, which will
631 // have the effect of migrating the database.
632 int32 version;
633 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
634 if (version != kFileCurrentVersion)
635 return false; // Bad version.
637 // Read the table size and make sure it matches the file size.
638 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
639 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
640 return false; // Bad size.
642 // Read the used item count.
643 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
644 if (*used_count > *num_entries)
645 return false; // Bad used item count;
647 // Read the salt.
648 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
650 // This file looks OK from the header's perspective.
651 return true;
654 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
655 if (!database_name_override_.empty()) {
656 // use this filename, the directory must exist
657 *filename = database_name_override_;
658 return true;
661 if (!browser_context_ || browser_context_->GetPath().empty())
662 return false;
664 base::FilePath profile_dir = browser_context_->GetPath();
665 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
666 return true;
669 // Initializes the shared memory structure. The salt should already be filled
670 // in so that it can be written to the shared memory
671 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
672 // The table is the size of the table followed by the entries.
673 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
675 // Create the shared memory object.
676 shared_memory_ = new base::SharedMemory();
677 if (!shared_memory_)
678 return false;
680 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
681 delete shared_memory_;
682 shared_memory_ = NULL;
683 return false;
686 if (init_to_empty) {
687 memset(shared_memory_->memory(), 0, alloc_size);
688 used_items_ = 0;
690 table_length_ = num_entries;
692 // Save the header for other processes to read.
693 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
694 header->length = table_length_;
695 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
697 // Our table pointer is just the data immediately following the size.
698 hash_table_ = reinterpret_cast<Fingerprint*>(
699 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
701 return true;
704 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
705 base::SharedMemory *old_shared_memory = shared_memory_;
706 Fingerprint* old_hash_table = hash_table_;
707 int32 old_table_length = table_length_;
708 if (!CreateURLTable(num_entries, true)) {
709 // Try to put back the old state.
710 shared_memory_ = old_shared_memory;
711 hash_table_ = old_hash_table;
712 table_length_ = old_table_length;
713 return false;
716 #ifndef NDEBUG
717 DebugValidate();
718 #endif
720 return true;
723 void VisitedLinkMaster::FreeURLTable() {
724 if (shared_memory_) {
725 delete shared_memory_;
726 shared_memory_ = NULL;
728 if (!persist_to_disk_ || !file_)
729 return;
730 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
731 // AsyncClose() will close the file and free the memory pointed by |file_|.
732 file_ = NULL;
735 bool VisitedLinkMaster::ResizeTableIfNecessary() {
736 DCHECK(table_length_ > 0) << "Must have a table";
738 // Load limits for good performance/space. We are pretty conservative about
739 // keeping the table not very full. This is because we use linear probing
740 // which increases the likelihood of clumps of entries which will reduce
741 // performance.
742 const float max_table_load = 0.5f; // Grow when we're > this full.
743 const float min_table_load = 0.2f; // Shrink when we're < this full.
745 float load = ComputeTableLoad();
746 if (load < max_table_load &&
747 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
748 load > min_table_load))
749 return false;
751 // Table needs to grow or shrink.
752 int new_size = NewTableSizeForCount(used_items_);
753 DCHECK(new_size > used_items_);
754 DCHECK(load <= min_table_load || new_size > table_length_);
755 ResizeTable(new_size);
756 return true;
759 void VisitedLinkMaster::ResizeTable(int32 new_size) {
760 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
761 shared_memory_serial_++;
763 #ifndef NDEBUG
764 DebugValidate();
765 #endif
767 base::SharedMemory* old_shared_memory = shared_memory_;
768 Fingerprint* old_hash_table = hash_table_;
769 int32 old_table_length = table_length_;
770 if (!BeginReplaceURLTable(new_size))
771 return;
773 // Now we have two tables, our local copy which is the old one, and the new
774 // one loaded into this object where we need to copy the data.
775 for (int32 i = 0; i < old_table_length; i++) {
776 Fingerprint cur = old_hash_table[i];
777 if (cur)
778 AddFingerprint(cur, false);
781 // On error unmapping, just forget about it since we can't do anything
782 // else to release it.
783 delete old_shared_memory;
785 // Send an update notification to all child processes so they read the new
786 // table.
787 listener_->NewTable(shared_memory_);
789 #ifndef NDEBUG
790 DebugValidate();
791 #endif
793 // The new table needs to be written to disk.
794 if (persist_to_disk_)
795 WriteFullTable();
798 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
799 // These table sizes are selected to be the maximum prime number less than
800 // a "convenient" multiple of 1K.
801 static const int table_sizes[] = {
802 16381, // 16K = 16384 <- don't shrink below this table size
803 // (should be == default_table_size)
804 32767, // 32K = 32768
805 65521, // 64K = 65536
806 130051, // 128K = 131072
807 262127, // 256K = 262144
808 524269, // 512K = 524288
809 1048549, // 1M = 1048576
810 2097143, // 2M = 2097152
811 4194301, // 4M = 4194304
812 8388571, // 8M = 8388608
813 16777199, // 16M = 16777216
814 33554347}; // 32M = 33554432
816 // Try to leave the table 33% full.
817 int desired = item_count * 3;
819 // Find the closest prime.
820 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
821 if (table_sizes[i] > desired)
822 return table_sizes[i];
825 // Growing very big, just approximate a "good" number, not growing as much
826 // as normal.
827 return item_count * 2 - 1;
830 // See the TableBuilder definition in the header file for how this works.
831 bool VisitedLinkMaster::RebuildTableFromDelegate() {
832 DCHECK(!table_builder_.get());
834 // TODO(brettw) make sure we have reasonable salt!
835 table_builder_ = new TableBuilder(this, salt_);
836 delegate_->RebuildTable(table_builder_);
837 return true;
840 // See the TableBuilder declaration above for how this works.
841 void VisitedLinkMaster::OnTableRebuildComplete(
842 bool success,
843 const std::vector<Fingerprint>& fingerprints) {
844 if (success) {
845 // Replace the old table with a new blank one.
846 shared_memory_serial_++;
848 // We are responsible for freeing it AFTER it has been replaced if
849 // replacement succeeds.
850 base::SharedMemory* old_shared_memory = shared_memory_;
852 int new_table_size = NewTableSizeForCount(
853 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
854 if (BeginReplaceURLTable(new_table_size)) {
855 // Free the old table.
856 delete old_shared_memory;
858 // Add the stored fingerprints to the hash table.
859 for (size_t i = 0; i < fingerprints.size(); i++)
860 AddFingerprint(fingerprints[i], false);
862 // Also add anything that was added while we were asynchronously
863 // generating the new table.
864 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
865 i != added_since_rebuild_.end(); ++i)
866 AddFingerprint(*i, false);
867 added_since_rebuild_.clear();
869 // Now handle deletions.
870 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
871 deleted_since_rebuild_.clear();
873 // Send an update notification to all child processes.
874 listener_->NewTable(shared_memory_);
876 if (persist_to_disk_)
877 WriteFullTable();
880 table_builder_ = NULL; // Will release our reference to the builder.
882 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
883 if (!rebuild_complete_task_.is_null()) {
884 rebuild_complete_task_.Run();
885 rebuild_complete_task_.Reset();
889 void VisitedLinkMaster::WriteToFile(FILE** file,
890 off_t offset,
891 void* data,
892 int32 data_size) {
893 DCHECK(persist_to_disk_);
894 #ifndef NDEBUG
895 posted_asynchronous_operation_ = true;
896 #endif
897 PostIOTask(FROM_HERE,
898 base::Bind(&AsyncWrite, file, offset,
899 std::string(static_cast<const char*>(data), data_size)));
902 void VisitedLinkMaster::WriteUsedItemCountToFile() {
903 DCHECK(persist_to_disk_);
904 if (!file_)
905 return; // See comment on the file_ variable for why this might happen.
906 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
909 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
910 DCHECK(persist_to_disk_);
912 if (!file_)
913 return; // See comment on the file_ variable for why this might happen.
914 if (last_hash < first_hash) {
915 // Handle wraparound at 0. This first write is first_hash->EOF
916 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
917 &hash_table_[first_hash],
918 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
920 // Now do 0->last_lash.
921 WriteToFile(file_, kFileHeaderSize, hash_table_,
922 (last_hash + 1) * sizeof(Fingerprint));
923 } else {
924 // Normal case, just write the range.
925 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
926 &hash_table_[first_hash],
927 (last_hash - first_hash + 1) * sizeof(Fingerprint));
931 bool VisitedLinkMaster::ReadFromFile(FILE* file,
932 off_t offset,
933 void* data,
934 size_t data_size) {
935 DCHECK(persist_to_disk_);
936 #ifndef NDEBUG
937 // Since this function is synchronous, we require that no asynchronous
938 // operations could possibly be pending.
939 DCHECK(!posted_asynchronous_operation_);
940 #endif
942 if (fseek(file, offset, SEEK_SET) != 0)
943 return false;
945 size_t num_read = fread(data, 1, data_size, file);
946 return num_read == data_size;
949 // VisitedLinkTableBuilder ----------------------------------------------------
951 VisitedLinkMaster::TableBuilder::TableBuilder(
952 VisitedLinkMaster* master,
953 const uint8 salt[LINK_SALT_LENGTH])
954 : master_(master),
955 success_(true) {
956 fingerprints_.reserve(4096);
957 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
960 // TODO(brettw): Do we want to try to cancel the request if this happens? It
961 // could delay shutdown if there are a lot of URLs.
962 void VisitedLinkMaster::TableBuilder::DisownMaster() {
963 master_ = NULL;
966 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
967 if (!url.is_empty()) {
968 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
969 url.spec().data(), url.spec().length(), salt_));
973 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
974 success_ = success;
975 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
977 // Marshal to the main thread to notify the VisitedLinkMaster that the
978 // rebuild is complete.
979 BrowserThread::PostTask(
980 BrowserThread::UI, FROM_HERE,
981 base::Bind(&TableBuilder::OnCompleteMainThread, this));
984 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
985 if (master_)
986 master_->OnTableRebuildComplete(success_, fingerprints_);
989 } // namespace visitedlink