1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/visitedlink/browser/visitedlink_master.h"
11 #endif // defined(OS_WIN)
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/file_util.h"
20 #include "base/files/scoped_file.h"
21 #include "base/logging.h"
22 #include "base/message_loop/message_loop.h"
23 #include "base/path_service.h"
24 #include "base/rand_util.h"
25 #include "base/strings/string_util.h"
26 #include "base/threading/thread_restrictions.h"
27 #include "components/visitedlink/browser/visitedlink_delegate.h"
28 #include "components/visitedlink/browser/visitedlink_event_listener.h"
29 #include "content/public/browser/browser_context.h"
30 #include "content/public/browser/browser_thread.h"
33 using content::BrowserThread
;
35 namespace visitedlink
{
37 const int32
VisitedLinkMaster::kFileHeaderSignatureOffset
= 0;
38 const int32
VisitedLinkMaster::kFileHeaderVersionOffset
= 4;
39 const int32
VisitedLinkMaster::kFileHeaderLengthOffset
= 8;
40 const int32
VisitedLinkMaster::kFileHeaderUsedOffset
= 12;
41 const int32
VisitedLinkMaster::kFileHeaderSaltOffset
= 16;
43 const int32
VisitedLinkMaster::kFileCurrentVersion
= 3;
45 // the signature at the beginning of the URL table = "VLnk" (visited links)
46 const int32
VisitedLinkMaster::kFileSignature
= 0x6b6e4c56;
47 const size_t VisitedLinkMaster::kFileHeaderSize
=
48 kFileHeaderSaltOffset
+ LINK_SALT_LENGTH
;
50 // This value should also be the same as the smallest size in the lookup
51 // table in NewTableSizeForCount (prime number).
52 const unsigned VisitedLinkMaster::kDefaultTableSize
= 16381;
54 const size_t VisitedLinkMaster::kBigDeleteThreshold
= 64;
58 // Fills the given salt structure with some quasi-random values
59 // It is not necessary to generate a cryptographically strong random string,
60 // only that it be reasonably different for different users.
61 void GenerateSalt(uint8 salt
[LINK_SALT_LENGTH
]) {
62 DCHECK_EQ(LINK_SALT_LENGTH
, 8) << "This code assumes the length of the salt";
63 uint64 randval
= base::RandUint64();
64 memcpy(salt
, &randval
, 8);
67 // Opens file on a background thread to not block UI thread.
68 void AsyncOpen(FILE** file
, const base::FilePath
& filename
) {
69 *file
= base::OpenFile(filename
, "wb+");
70 DLOG_IF(ERROR
, !(*file
)) << "Failed to open file " << filename
.value();
73 // Returns true if the write was complete.
74 static bool WriteToFile(FILE* file
,
78 if (fseek(file
, offset
, SEEK_SET
) != 0)
79 return false; // Don't write to an invalid part of the file.
81 size_t num_written
= fwrite(data
, 1, data_len
, file
);
83 // The write may not make it to the kernel (stdlib may buffer the write)
84 // until the next fseek/fclose call. If we crash, it's easy for our used
85 // item count to be out of sync with the number of hashes we write.
86 // Protect against this by calling fflush.
87 int ret
= fflush(file
);
89 return num_written
== data_len
;
92 // This task executes on a background thread and executes a write. This
93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
94 // is used because file may still not be opened by the time of scheduling
95 // the task for execution.
96 void AsyncWrite(FILE** file
, int32 offset
, const std::string
& data
) {
98 WriteToFile(*file
, offset
, data
.data(), data
.size());
101 // Truncates the file to the current position asynchronously on a background
102 // thread. Double pointer to FILE is used because file may still not be opened
103 // by the time of scheduling the task for execution.
104 void AsyncTruncate(FILE** file
) {
106 base::IgnoreResult(base::TruncateFile(*file
));
109 // Closes the file on a background thread and releases memory used for storage
110 // of FILE* value. Double pointer to FILE is used because file may still not
111 // be opened by the time of scheduling the task for execution.
112 void AsyncClose(FILE** file
) {
114 base::IgnoreResult(fclose(*file
));
120 // TableBuilder ---------------------------------------------------------------
122 // How rebuilding from history works
123 // ---------------------------------
125 // We mark that we're rebuilding from history by setting the table_builder_
126 // member in VisitedLinkMaster to the TableBuilder we create. This builder
127 // will be called on the history thread by the history system for every URL
130 // The builder will store the fingerprints for those URLs, and then marshalls
131 // back to the main thread where the VisitedLinkMaster will be notified. The
132 // master then replaces its table with a new table containing the computed
135 // The builder must remain active while the history system is using it.
136 // Sometimes, the master will be deleted before the rebuild is complete, in
137 // which case it notifies the builder via DisownMaster(). The builder will
138 // delete itself once rebuilding is complete, and not execute any callback.
139 class VisitedLinkMaster::TableBuilder
140 : public VisitedLinkDelegate::URLEnumerator
{
142 TableBuilder(VisitedLinkMaster
* master
,
143 const uint8 salt
[LINK_SALT_LENGTH
]);
145 // Called on the main thread when the master is being destroyed. This will
146 // prevent a crash when the query completes and the master is no longer
147 // around. We can not actually do anything but mark this fact, since the
148 // table will be being rebuilt simultaneously on the other thread.
151 // VisitedLinkDelegate::URLEnumerator
152 virtual void OnURL(const GURL
& url
) OVERRIDE
;
153 virtual void OnComplete(bool succeed
) OVERRIDE
;
156 virtual ~TableBuilder() {}
158 // OnComplete mashals to this function on the main thread to do the
160 void OnCompleteMainThread();
162 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
163 VisitedLinkMaster
* master_
;
165 // Indicates whether the operation has failed or not.
168 // Salt for this new table.
169 uint8 salt_
[LINK_SALT_LENGTH
];
171 // Stores the fingerprints we computed on the background thread.
172 VisitedLinkCommon::Fingerprints fingerprints_
;
174 DISALLOW_COPY_AND_ASSIGN(TableBuilder
);
177 // VisitedLinkMaster ----------------------------------------------------------
179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext
* browser_context
,
180 VisitedLinkDelegate
* delegate
,
181 bool persist_to_disk
)
182 : browser_context_(browser_context
),
184 listener_(new VisitedLinkEventListener(this, browser_context
)),
185 persist_to_disk_(persist_to_disk
) {
189 VisitedLinkMaster::VisitedLinkMaster(Listener
* listener
,
190 VisitedLinkDelegate
* delegate
,
191 bool persist_to_disk
,
192 bool suppress_rebuild
,
193 const base::FilePath
& filename
,
194 int32 default_table_size
)
195 : browser_context_(NULL
),
197 persist_to_disk_(persist_to_disk
) {
198 listener_
.reset(listener
);
199 DCHECK(listener_
.get());
202 database_name_override_
= filename
;
203 table_size_override_
= default_table_size
;
204 suppress_rebuild_
= suppress_rebuild
;
207 VisitedLinkMaster::~VisitedLinkMaster() {
208 if (table_builder_
.get()) {
209 // Prevent the table builder from calling us back now that we're being
210 // destroyed. Note that we DON'T delete the object, since the history
211 // system is still writing into it. When that is complete, the table
212 // builder will destroy itself when it finds we are gone.
213 table_builder_
->DisownMaster();
216 // FreeURLTable() will schedule closing of the file and deletion of |file_|.
217 // So nothing should be done here.
220 void VisitedLinkMaster::InitMembers() {
222 shared_memory_
= NULL
;
223 shared_memory_serial_
= 0;
225 table_size_override_
= 0;
226 suppress_rebuild_
= false;
227 sequence_token_
= BrowserThread::GetBlockingPool()->GetSequenceToken();
230 posted_asynchronous_operation_
= false;
234 bool VisitedLinkMaster::Init() {
235 // We probably shouldn't be loading this from the UI thread,
236 // but it does need to happen early on in startup.
237 // http://code.google.com/p/chromium/issues/detail?id=24163
238 base::ThreadRestrictions::ScopedAllowIO allow_io
;
240 if (persist_to_disk_
) {
244 return InitFromScratch(suppress_rebuild_
);
247 VisitedLinkMaster::Hash
VisitedLinkMaster::TryToAddURL(const GURL
& url
) {
248 // Extra check that we are not incognito. This should not happen.
249 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
250 // removed from BrowserContext.
251 if (browser_context_
&& browser_context_
->IsOffTheRecord()) {
257 return null_hash_
; // Don't add invalid URLs.
259 Fingerprint fingerprint
= ComputeURLFingerprint(url
.spec().data(),
262 if (table_builder_
.get()) {
263 // If we have a pending delete for this fingerprint, cancel it.
264 std::set
<Fingerprint
>::iterator found
=
265 deleted_since_rebuild_
.find(fingerprint
);
266 if (found
!= deleted_since_rebuild_
.end())
267 deleted_since_rebuild_
.erase(found
);
269 // A rebuild is in progress, save this addition in the temporary list so
270 // it can be added once rebuild is complete.
271 added_since_rebuild_
.insert(fingerprint
);
274 // If the table is "full", we don't add URLs and just drop them on the floor.
275 // This can happen if we get thousands of new URLs and something causes
276 // the table resizing to fail. This check prevents a hang in that case. Note
277 // that this is *not* the resize limit, this is just a sanity check.
278 if (used_items_
/ 8 > table_length_
/ 10)
279 return null_hash_
; // Table is more than 80% full.
281 return AddFingerprint(fingerprint
, true);
284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location
& from_here
,
285 const base::Closure
& task
) {
286 DCHECK(persist_to_disk_
);
287 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_
,
291 void VisitedLinkMaster::AddURL(const GURL
& url
) {
292 Hash index
= TryToAddURL(url
);
293 if (!table_builder_
.get() && index
!= null_hash_
) {
294 // Not rebuilding, so we want to keep the file on disk up-to-date.
295 if (persist_to_disk_
) {
296 WriteUsedItemCountToFile();
297 WriteHashRangeToFile(index
, index
);
299 ResizeTableIfNecessary();
303 void VisitedLinkMaster::AddURLs(const std::vector
<GURL
>& url
) {
304 for (std::vector
<GURL
>::const_iterator i
= url
.begin();
305 i
!= url
.end(); ++i
) {
306 Hash index
= TryToAddURL(*i
);
307 if (!table_builder_
.get() && index
!= null_hash_
)
308 ResizeTableIfNecessary();
311 // Keeps the file on disk up-to-date.
312 if (!table_builder_
.get() && persist_to_disk_
)
316 void VisitedLinkMaster::DeleteAllURLs() {
317 // Any pending modifications are invalid.
318 added_since_rebuild_
.clear();
319 deleted_since_rebuild_
.clear();
321 // Clear the hash table.
323 memset(hash_table_
, 0, this->table_length_
* sizeof(Fingerprint
));
325 // Resize it if it is now too empty. Resize may write the new table out for
326 // us, otherwise, schedule writing the new table to disk ourselves.
327 if (!ResizeTableIfNecessary() && persist_to_disk_
)
333 VisitedLinkDelegate
* VisitedLinkMaster::GetDelegate() {
337 void VisitedLinkMaster::DeleteURLs(URLIterator
* urls
) {
338 if (!urls
->HasNextURL())
343 if (table_builder_
.get()) {
344 // A rebuild is in progress, save this deletion in the temporary list so
345 // it can be added once rebuild is complete.
346 while (urls
->HasNextURL()) {
347 const GURL
& url(urls
->NextURL());
351 Fingerprint fingerprint
=
352 ComputeURLFingerprint(url
.spec().data(), url
.spec().size(), salt_
);
353 deleted_since_rebuild_
.insert(fingerprint
);
355 // If the URL was just added and now we're deleting it, it may be in the
356 // list of things added since the last rebuild. Delete it from that list.
357 std::set
<Fingerprint
>::iterator found
=
358 added_since_rebuild_
.find(fingerprint
);
359 if (found
!= added_since_rebuild_
.end())
360 added_since_rebuild_
.erase(found
);
362 // Delete the URLs from the in-memory table, but don't bother writing
363 // to disk since it will be replaced soon.
364 DeleteFingerprint(fingerprint
, false);
369 // Compute the deleted URLs' fingerprints and delete them
370 std::set
<Fingerprint
> deleted_fingerprints
;
371 while (urls
->HasNextURL()) {
372 const GURL
& url(urls
->NextURL());
375 deleted_fingerprints
.insert(
376 ComputeURLFingerprint(url
.spec().data(), url
.spec().size(), salt_
));
378 DeleteFingerprintsFromCurrentTable(deleted_fingerprints
);
381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
382 VisitedLinkMaster::Hash
VisitedLinkMaster::AddFingerprint(
383 Fingerprint fingerprint
,
384 bool send_notifications
) {
385 if (!hash_table_
|| table_length_
== 0) {
386 NOTREACHED(); // Not initialized.
390 Hash cur_hash
= HashFingerprint(fingerprint
);
391 Hash first_hash
= cur_hash
;
393 Fingerprint cur_fingerprint
= FingerprintAt(cur_hash
);
394 if (cur_fingerprint
== fingerprint
)
395 return null_hash_
; // This fingerprint is already in there, do nothing.
397 if (cur_fingerprint
== null_fingerprint_
) {
398 // End of probe sequence found, insert here.
399 hash_table_
[cur_hash
] = fingerprint
;
401 // If allowed, notify listener that a new visited link was added.
402 if (send_notifications
)
403 listener_
->Add(fingerprint
);
407 // Advance in the probe sequence.
408 cur_hash
= IncrementHash(cur_hash
);
409 if (cur_hash
== first_hash
) {
410 // This means that we've wrapped around and are about to go into an
411 // infinite loop. Something was wrong with the hashtable resizing
412 // logic, so stop here.
419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
420 const std::set
<Fingerprint
>& fingerprints
) {
421 bool bulk_write
= (fingerprints
.size() > kBigDeleteThreshold
);
423 // Delete the URLs from the table.
424 for (std::set
<Fingerprint
>::const_iterator i
= fingerprints
.begin();
425 i
!= fingerprints
.end(); ++i
)
426 DeleteFingerprint(*i
, !bulk_write
);
428 // These deleted fingerprints may make us shrink the table.
429 if (ResizeTableIfNecessary())
430 return; // The resize function wrote the new table to disk for us.
432 // Nobody wrote this out for us, write the full file to disk.
433 if (bulk_write
&& persist_to_disk_
)
437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint
,
439 if (!hash_table_
|| table_length_
== 0) {
440 NOTREACHED(); // Not initialized.
443 if (!IsVisited(fingerprint
))
444 return false; // Not in the database to delete.
446 // First update the header used count.
448 if (update_file
&& persist_to_disk_
)
449 WriteUsedItemCountToFile();
451 Hash deleted_hash
= HashFingerprint(fingerprint
);
453 // Find the range of "stuff" in the hash table that is adjacent to this
454 // fingerprint. These are things that could be affected by the change in
455 // the hash table. Since we use linear probing, anything after the deleted
456 // item up until an empty item could be affected.
457 Hash end_range
= deleted_hash
;
459 Hash next_hash
= IncrementHash(end_range
);
460 if (next_hash
== deleted_hash
)
461 break; // We wrapped around and the whole table is full.
462 if (!hash_table_
[next_hash
])
463 break; // Found the last spot.
464 end_range
= next_hash
;
467 // We could get all fancy and move the affected fingerprints around, but
468 // instead we just remove them all and re-add them (minus our deleted one).
469 // This will mean there's a small window of time where the affected links
470 // won't be marked visited.
471 base::StackVector
<Fingerprint
, 32> shuffled_fingerprints
;
472 Hash stop_loop
= IncrementHash(end_range
); // The end range is inclusive.
473 for (Hash i
= deleted_hash
; i
!= stop_loop
; i
= IncrementHash(i
)) {
474 if (hash_table_
[i
] != fingerprint
) {
475 // Don't save the one we're deleting!
476 shuffled_fingerprints
->push_back(hash_table_
[i
]);
478 // This will balance the increment of this value in AddFingerprint below
479 // so there is no net change.
482 hash_table_
[i
] = null_fingerprint_
;
485 if (!shuffled_fingerprints
->empty()) {
486 // Need to add the new items back.
487 for (size_t i
= 0; i
< shuffled_fingerprints
->size(); i
++)
488 AddFingerprint(shuffled_fingerprints
[i
], false);
491 // Write the affected range to disk [deleted_hash, end_range].
492 if (update_file
&& persist_to_disk_
)
493 WriteHashRangeToFile(deleted_hash
, end_range
);
498 void VisitedLinkMaster::WriteFullTable() {
499 // This function can get called when the file is open, for example, when we
500 // resize the table. We must handle this case and not try to reopen the file,
501 // since there may be write operations pending on the file I/O thread.
503 // Note that once we start writing, we do not delete on error. This means
504 // there can be a partial file, but the short file will be detected next time
505 // we start, and will be replaced.
507 // This might possibly get corrupted if we crash in the middle of writing.
508 // We should pick up the most common types of these failures when we notice
509 // that the file size is different when we load it back in, and then we will
510 // regenerate the table.
511 DCHECK(persist_to_disk_
);
514 file_
= static_cast<FILE**>(calloc(1, sizeof(*file_
)));
515 base::FilePath filename
;
516 GetDatabaseFileName(&filename
);
517 PostIOTask(FROM_HERE
, base::Bind(&AsyncOpen
, file_
, filename
));
520 // Write the new header.
522 header
[0] = kFileSignature
;
523 header
[1] = kFileCurrentVersion
;
524 header
[2] = table_length_
;
525 header
[3] = used_items_
;
526 WriteToFile(file_
, 0, header
, sizeof(header
));
527 WriteToFile(file_
, sizeof(header
), salt_
, LINK_SALT_LENGTH
);
529 // Write the hash data.
530 WriteToFile(file_
, kFileHeaderSize
,
531 hash_table_
, table_length_
* sizeof(Fingerprint
));
533 // The hash table may have shrunk, so make sure this is the end.
534 PostIOTask(FROM_HERE
, base::Bind(&AsyncTruncate
, file_
));
537 bool VisitedLinkMaster::InitFromFile() {
538 DCHECK(file_
== NULL
);
539 DCHECK(persist_to_disk_
);
541 base::FilePath filename
;
542 GetDatabaseFileName(&filename
);
543 base::ScopedFILE
file_closer(base::OpenFile(filename
, "rb+"));
544 if (!file_closer
.get())
547 int32 num_entries
, used_count
;
548 if (!ReadFileHeader(file_closer
.get(), &num_entries
, &used_count
, salt_
))
549 return false; // Header isn't valid.
551 // Allocate and read the table.
552 if (!CreateURLTable(num_entries
, false))
554 if (!ReadFromFile(file_closer
.get(), kFileHeaderSize
,
555 hash_table_
, num_entries
* sizeof(Fingerprint
))) {
559 used_items_
= used_count
;
565 file_
= static_cast<FILE**>(malloc(sizeof(*file_
)));
566 *file_
= file_closer
.release();
570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild
) {
571 int32 table_size
= kDefaultTableSize
;
572 if (table_size_override_
)
573 table_size
= table_size_override_
;
575 // The salt must be generated before the table so that it can be copied to
576 // the shared memory.
578 if (!CreateURLTable(table_size
, true))
585 if (suppress_rebuild
&& persist_to_disk_
) {
586 // When we disallow rebuilds (normally just unit tests), just use the
587 // current empty table.
592 // This will build the table from history. On the first run, history will
593 // be empty, so this will be correct. This will also write the new table
594 // to disk. We don't want to save explicitly here, since the rebuild may
595 // not complete, leaving us with an empty but valid visited link database.
596 // In the future, we won't know we need to try rebuilding again.
597 return RebuildTableFromDelegate();
600 bool VisitedLinkMaster::ReadFileHeader(FILE* file
,
603 uint8 salt
[LINK_SALT_LENGTH
]) {
604 DCHECK(persist_to_disk_
);
607 // Note that there is no need to seek back to the original location in the
608 // file since ReadFromFile() [which is the next call accessing the file]
609 // seeks before reading.
610 if (fseek(file
, 0, SEEK_END
) == -1)
612 size_t file_size
= ftell(file
);
614 if (file_size
<= kFileHeaderSize
)
617 uint8 header
[kFileHeaderSize
];
618 if (!ReadFromFile(file
, 0, &header
, kFileHeaderSize
))
621 // Verify the signature.
623 memcpy(&signature
, &header
[kFileHeaderSignatureOffset
], sizeof(signature
));
624 if (signature
!= kFileSignature
)
627 // Verify the version is up-to-date. As with other read errors, a version
628 // mistmatch will trigger a rebuild of the database from history, which will
629 // have the effect of migrating the database.
631 memcpy(&version
, &header
[kFileHeaderVersionOffset
], sizeof(version
));
632 if (version
!= kFileCurrentVersion
)
633 return false; // Bad version.
635 // Read the table size and make sure it matches the file size.
636 memcpy(num_entries
, &header
[kFileHeaderLengthOffset
], sizeof(*num_entries
));
637 if (*num_entries
* sizeof(Fingerprint
) + kFileHeaderSize
!= file_size
)
638 return false; // Bad size.
640 // Read the used item count.
641 memcpy(used_count
, &header
[kFileHeaderUsedOffset
], sizeof(*used_count
));
642 if (*used_count
> *num_entries
)
643 return false; // Bad used item count;
646 memcpy(salt
, &header
[kFileHeaderSaltOffset
], LINK_SALT_LENGTH
);
648 // This file looks OK from the header's perspective.
652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath
* filename
) {
653 if (!database_name_override_
.empty()) {
654 // use this filename, the directory must exist
655 *filename
= database_name_override_
;
659 if (!browser_context_
|| browser_context_
->GetPath().empty())
662 base::FilePath profile_dir
= browser_context_
->GetPath();
663 *filename
= profile_dir
.Append(FILE_PATH_LITERAL("Visited Links"));
667 // Initializes the shared memory structure. The salt should already be filled
668 // in so that it can be written to the shared memory
669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries
, bool init_to_empty
) {
670 // The table is the size of the table followed by the entries.
671 uint32 alloc_size
= num_entries
* sizeof(Fingerprint
) + sizeof(SharedHeader
);
673 // Create the shared memory object.
674 shared_memory_
= new base::SharedMemory();
678 base::SharedMemoryCreateOptions options
;
679 options
.size
= alloc_size
;
680 options
.share_read_only
= true;
682 if (!shared_memory_
->Create(options
) || !shared_memory_
->Map(alloc_size
)) {
683 delete shared_memory_
;
684 shared_memory_
= NULL
;
689 memset(shared_memory_
->memory(), 0, alloc_size
);
692 table_length_
= num_entries
;
694 // Save the header for other processes to read.
695 SharedHeader
* header
= static_cast<SharedHeader
*>(shared_memory_
->memory());
696 header
->length
= table_length_
;
697 memcpy(header
->salt
, salt_
, LINK_SALT_LENGTH
);
699 // Our table pointer is just the data immediately following the size.
700 hash_table_
= reinterpret_cast<Fingerprint
*>(
701 static_cast<char*>(shared_memory_
->memory()) + sizeof(SharedHeader
));
706 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries
) {
707 base::SharedMemory
*old_shared_memory
= shared_memory_
;
708 Fingerprint
* old_hash_table
= hash_table_
;
709 int32 old_table_length
= table_length_
;
710 if (!CreateURLTable(num_entries
, true)) {
711 // Try to put back the old state.
712 shared_memory_
= old_shared_memory
;
713 hash_table_
= old_hash_table
;
714 table_length_
= old_table_length
;
725 void VisitedLinkMaster::FreeURLTable() {
726 if (shared_memory_
) {
727 delete shared_memory_
;
728 shared_memory_
= NULL
;
730 if (!persist_to_disk_
|| !file_
)
732 PostIOTask(FROM_HERE
, base::Bind(&AsyncClose
, file_
));
733 // AsyncClose() will close the file and free the memory pointed by |file_|.
737 bool VisitedLinkMaster::ResizeTableIfNecessary() {
738 DCHECK(table_length_
> 0) << "Must have a table";
740 // Load limits for good performance/space. We are pretty conservative about
741 // keeping the table not very full. This is because we use linear probing
742 // which increases the likelihood of clumps of entries which will reduce
744 const float max_table_load
= 0.5f
; // Grow when we're > this full.
745 const float min_table_load
= 0.2f
; // Shrink when we're < this full.
747 float load
= ComputeTableLoad();
748 if (load
< max_table_load
&&
749 (table_length_
<= static_cast<float>(kDefaultTableSize
) ||
750 load
> min_table_load
))
753 // Table needs to grow or shrink.
754 int new_size
= NewTableSizeForCount(used_items_
);
755 DCHECK(new_size
> used_items_
);
756 DCHECK(load
<= min_table_load
|| new_size
> table_length_
);
757 ResizeTable(new_size
);
761 void VisitedLinkMaster::ResizeTable(int32 new_size
) {
762 DCHECK(shared_memory_
&& shared_memory_
->memory() && hash_table_
);
763 shared_memory_serial_
++;
769 base::SharedMemory
* old_shared_memory
= shared_memory_
;
770 Fingerprint
* old_hash_table
= hash_table_
;
771 int32 old_table_length
= table_length_
;
772 if (!BeginReplaceURLTable(new_size
))
775 // Now we have two tables, our local copy which is the old one, and the new
776 // one loaded into this object where we need to copy the data.
777 for (int32 i
= 0; i
< old_table_length
; i
++) {
778 Fingerprint cur
= old_hash_table
[i
];
780 AddFingerprint(cur
, false);
783 // On error unmapping, just forget about it since we can't do anything
784 // else to release it.
785 delete old_shared_memory
;
787 // Send an update notification to all child processes so they read the new
789 listener_
->NewTable(shared_memory_
);
795 // The new table needs to be written to disk.
796 if (persist_to_disk_
)
800 uint32
VisitedLinkMaster::NewTableSizeForCount(int32 item_count
) const {
801 // These table sizes are selected to be the maximum prime number less than
802 // a "convenient" multiple of 1K.
803 static const int table_sizes
[] = {
804 16381, // 16K = 16384 <- don't shrink below this table size
805 // (should be == default_table_size)
806 32767, // 32K = 32768
807 65521, // 64K = 65536
808 130051, // 128K = 131072
809 262127, // 256K = 262144
810 524269, // 512K = 524288
811 1048549, // 1M = 1048576
812 2097143, // 2M = 2097152
813 4194301, // 4M = 4194304
814 8388571, // 8M = 8388608
815 16777199, // 16M = 16777216
816 33554347}; // 32M = 33554432
818 // Try to leave the table 33% full.
819 int desired
= item_count
* 3;
821 // Find the closest prime.
822 for (size_t i
= 0; i
< arraysize(table_sizes
); i
++) {
823 if (table_sizes
[i
] > desired
)
824 return table_sizes
[i
];
827 // Growing very big, just approximate a "good" number, not growing as much
829 return item_count
* 2 - 1;
832 // See the TableBuilder definition in the header file for how this works.
833 bool VisitedLinkMaster::RebuildTableFromDelegate() {
834 DCHECK(!table_builder_
.get());
836 // TODO(brettw) make sure we have reasonable salt!
837 table_builder_
= new TableBuilder(this, salt_
);
838 delegate_
->RebuildTable(table_builder_
);
842 // See the TableBuilder declaration above for how this works.
843 void VisitedLinkMaster::OnTableRebuildComplete(
845 const std::vector
<Fingerprint
>& fingerprints
) {
847 // Replace the old table with a new blank one.
848 shared_memory_serial_
++;
850 // We are responsible for freeing it AFTER it has been replaced if
851 // replacement succeeds.
852 base::SharedMemory
* old_shared_memory
= shared_memory_
;
854 int new_table_size
= NewTableSizeForCount(
855 static_cast<int>(fingerprints
.size() + added_since_rebuild_
.size()));
856 if (BeginReplaceURLTable(new_table_size
)) {
857 // Free the old table.
858 delete old_shared_memory
;
860 // Add the stored fingerprints to the hash table.
861 for (size_t i
= 0; i
< fingerprints
.size(); i
++)
862 AddFingerprint(fingerprints
[i
], false);
864 // Also add anything that was added while we were asynchronously
865 // generating the new table.
866 for (std::set
<Fingerprint
>::iterator i
= added_since_rebuild_
.begin();
867 i
!= added_since_rebuild_
.end(); ++i
)
868 AddFingerprint(*i
, false);
869 added_since_rebuild_
.clear();
871 // Now handle deletions.
872 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_
);
873 deleted_since_rebuild_
.clear();
875 // Send an update notification to all child processes.
876 listener_
->NewTable(shared_memory_
);
878 if (persist_to_disk_
)
882 table_builder_
= NULL
; // Will release our reference to the builder.
884 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
885 if (!rebuild_complete_task_
.is_null()) {
886 rebuild_complete_task_
.Run();
887 rebuild_complete_task_
.Reset();
891 void VisitedLinkMaster::WriteToFile(FILE** file
,
895 DCHECK(persist_to_disk_
);
897 posted_asynchronous_operation_
= true;
899 PostIOTask(FROM_HERE
,
900 base::Bind(&AsyncWrite
, file
, offset
,
901 std::string(static_cast<const char*>(data
), data_size
)));
904 void VisitedLinkMaster::WriteUsedItemCountToFile() {
905 DCHECK(persist_to_disk_
);
907 return; // See comment on the file_ variable for why this might happen.
908 WriteToFile(file_
, kFileHeaderUsedOffset
, &used_items_
, sizeof(used_items_
));
911 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash
, Hash last_hash
) {
912 DCHECK(persist_to_disk_
);
915 return; // See comment on the file_ variable for why this might happen.
916 if (last_hash
< first_hash
) {
917 // Handle wraparound at 0. This first write is first_hash->EOF
918 WriteToFile(file_
, first_hash
* sizeof(Fingerprint
) + kFileHeaderSize
,
919 &hash_table_
[first_hash
],
920 (table_length_
- first_hash
+ 1) * sizeof(Fingerprint
));
922 // Now do 0->last_lash.
923 WriteToFile(file_
, kFileHeaderSize
, hash_table_
,
924 (last_hash
+ 1) * sizeof(Fingerprint
));
926 // Normal case, just write the range.
927 WriteToFile(file_
, first_hash
* sizeof(Fingerprint
) + kFileHeaderSize
,
928 &hash_table_
[first_hash
],
929 (last_hash
- first_hash
+ 1) * sizeof(Fingerprint
));
933 bool VisitedLinkMaster::ReadFromFile(FILE* file
,
937 DCHECK(persist_to_disk_
);
939 // Since this function is synchronous, we require that no asynchronous
940 // operations could possibly be pending.
941 DCHECK(!posted_asynchronous_operation_
);
944 if (fseek(file
, offset
, SEEK_SET
) != 0)
947 size_t num_read
= fread(data
, 1, data_size
, file
);
948 return num_read
== data_size
;
951 // VisitedLinkTableBuilder ----------------------------------------------------
953 VisitedLinkMaster::TableBuilder::TableBuilder(
954 VisitedLinkMaster
* master
,
955 const uint8 salt
[LINK_SALT_LENGTH
])
958 fingerprints_
.reserve(4096);
959 memcpy(salt_
, salt
, LINK_SALT_LENGTH
* sizeof(uint8
));
962 // TODO(brettw): Do we want to try to cancel the request if this happens? It
963 // could delay shutdown if there are a lot of URLs.
964 void VisitedLinkMaster::TableBuilder::DisownMaster() {
968 void VisitedLinkMaster::TableBuilder::OnURL(const GURL
& url
) {
969 if (!url
.is_empty()) {
970 fingerprints_
.push_back(VisitedLinkMaster::ComputeURLFingerprint(
971 url
.spec().data(), url
.spec().length(), salt_
));
975 void VisitedLinkMaster::TableBuilder::OnComplete(bool success
) {
977 DLOG_IF(WARNING
, !success
) << "Unable to rebuild visited links";
979 // Marshal to the main thread to notify the VisitedLinkMaster that the
980 // rebuild is complete.
981 BrowserThread::PostTask(
982 BrowserThread::UI
, FROM_HERE
,
983 base::Bind(&TableBuilder::OnCompleteMainThread
, this));
986 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
988 master_
->OnTableRebuildComplete(success_
, fingerprints_
);
991 } // namespace visitedlink