Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / components / visitedlink / common / visitedlink_common.h
blob37afe1d157581a010214801f361142ca12b652d7
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
6 #define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
8 #include <vector>
10 #include "base/basictypes.h"
12 class GURL;
14 namespace visitedlink {
16 // number of bytes in the salt
17 #define LINK_SALT_LENGTH 8
19 // A multiprocess-safe database of the visited links for the browser. There
20 // should be exactly one process that has write access (implemented by
21 // VisitedLinkMaster), while all other processes should be read-only
22 // (implemented by VisitedLinkSlave). These other processes add links by calling
23 // the writer process to add them for it. The writer may also notify the readers
24 // to replace their table when the table is resized.
26 // IPC is not implemented in these classes. This is done through callback
27 // functions supplied by the creator of these objects to allow more flexibility,
28 // especially for testing.
30 // This class defines the common base for these others. We implement accessors
31 // for looking things up in the hash table, and for computing hash values and
32 // fingerprints. Both the master and the slave inherit from this, and add their
33 // own code to set up and change these values as their design requires. The
34 // slave pretty much just sets up the shared memory and saves the pointer. The
35 // master does a lot of work to manage the table, reading and writing it to and
36 // from disk, and resizing it when it gets too full.
38 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
39 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
40 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
41 // to get collisions, which is the penalty for not storing all URL strings in
42 // memory (which could get to be more than we want to have in memory). We use
43 // a salt value for the links on one computer so that an attacker can not
44 // manually create a link that causes a collision.
45 class VisitedLinkCommon {
46 public:
47 // A number that identifies the URL.
48 typedef uint64 Fingerprint;
49 typedef std::vector<Fingerprint> Fingerprints;
51 // A hash value of a fingerprint
52 typedef int32 Hash;
54 // A fingerprint or hash value that does not exist
55 static const Fingerprint null_fingerprint_;
56 static const Hash null_hash_;
58 VisitedLinkCommon();
59 virtual ~VisitedLinkCommon();
61 // Returns the fingerprint for the given URL.
62 Fingerprint ComputeURLFingerprint(const char* canonical_url,
63 size_t url_len) const {
64 return ComputeURLFingerprint(canonical_url, url_len, salt_);
67 // Looks up the given key in the table. The fingerprint for the URL is
68 // computed if you call one with the string argument. Returns true if found.
69 // Does not modify the hastable.
70 bool IsVisited(const char* canonical_url, size_t url_len) const;
71 bool IsVisited(const GURL& url) const;
72 bool IsVisited(Fingerprint fingerprint) const;
74 #ifdef UNIT_TEST
75 // Returns statistics about DB usage
76 void GetUsageStatistics(int32* table_size,
77 VisitedLinkCommon::Fingerprint** fingerprints) {
78 *table_size = table_length_;
79 *fingerprints = hash_table_;
81 #endif
83 protected:
84 // This structure is at the beginning of the shared memory so that the slaves
85 // can get stats on the table
86 struct SharedHeader {
87 // see goes into table_length_
88 uint32 length;
90 // goes into salt_
91 uint8 salt[LINK_SALT_LENGTH];
94 // Returns the fingerprint at the given index into the URL table. This
95 // function should be called instead of accessing the table directly to
96 // contain endian issues.
97 Fingerprint FingerprintAt(int32 table_offset) const {
98 if (!hash_table_)
99 return null_fingerprint_;
100 return hash_table_[table_offset];
103 // Computes the fingerprint of the given canonical URL. It is static so the
104 // same algorithm can be re-used by the table rebuilder, so you will have to
105 // pass the salt as a parameter. See the non-static version above if you
106 // want to use the current class' salt.
107 static Fingerprint ComputeURLFingerprint(const char* canonical_url,
108 size_t url_len,
109 const uint8 salt[LINK_SALT_LENGTH]);
111 // Computes the hash value of the given fingerprint, this is used as a lookup
112 // into the hashtable.
113 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
114 if (table_length == 0)
115 return null_hash_;
116 return static_cast<Hash>(fingerprint % table_length);
118 // Uses the current hashtable.
119 Hash HashFingerprint(Fingerprint fingerprint) const {
120 return HashFingerprint(fingerprint, table_length_);
123 // pointer to the first item
124 VisitedLinkCommon::Fingerprint* hash_table_;
126 // the number of items in the hash table
127 int32 table_length_;
129 // salt used for each URL when computing the fingerprint
130 uint8 salt_[LINK_SALT_LENGTH];
132 private:
133 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
136 } // namespace visitedlink
138 #endif // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_