1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
6 #define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
10 #include "base/basictypes.h"
14 namespace visitedlink
{
16 // number of bytes in the salt
17 #define LINK_SALT_LENGTH 8
19 // A multiprocess-safe database of the visited links for the browser. There
20 // should be exactly one process that has write access (implemented by
21 // VisitedLinkMaster), while all other processes should be read-only
22 // (implemented by VisitedLinkSlave). These other processes add links by calling
23 // the writer process to add them for it. The writer may also notify the readers
24 // to replace their table when the table is resized.
26 // IPC is not implemented in these classes. This is done through callback
27 // functions supplied by the creator of these objects to allow more flexibility,
28 // especially for testing.
30 // This class defines the common base for these others. We implement accessors
31 // for looking things up in the hash table, and for computing hash values and
32 // fingerprints. Both the master and the slave inherit from this, and add their
33 // own code to set up and change these values as their design requires. The
34 // slave pretty much just sets up the shared memory and saves the pointer. The
35 // master does a lot of work to manage the table, reading and writing it to and
36 // from disk, and resizing it when it gets too full.
38 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
39 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
40 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
41 // to get collisions, which is the penalty for not storing all URL strings in
42 // memory (which could get to be more than we want to have in memory). We use
43 // a salt value for the links on one computer so that an attacker can not
44 // manually create a link that causes a collision.
45 class VisitedLinkCommon
{
47 // A number that identifies the URL.
48 typedef uint64 Fingerprint
;
49 typedef std::vector
<Fingerprint
> Fingerprints
;
51 // A hash value of a fingerprint
54 // A fingerprint or hash value that does not exist
55 static const Fingerprint null_fingerprint_
;
56 static const Hash null_hash_
;
59 virtual ~VisitedLinkCommon();
61 // Returns the fingerprint for the given URL.
62 Fingerprint
ComputeURLFingerprint(const char* canonical_url
,
63 size_t url_len
) const {
64 return ComputeURLFingerprint(canonical_url
, url_len
, salt_
);
67 // Looks up the given key in the table. The fingerprint for the URL is
68 // computed if you call one with the string argument. Returns true if found.
69 // Does not modify the hastable.
70 bool IsVisited(const char* canonical_url
, size_t url_len
) const;
71 bool IsVisited(const GURL
& url
) const;
72 bool IsVisited(Fingerprint fingerprint
) const;
75 // Returns statistics about DB usage
76 void GetUsageStatistics(int32
* table_size
,
77 VisitedLinkCommon::Fingerprint
** fingerprints
) {
78 *table_size
= table_length_
;
79 *fingerprints
= hash_table_
;
84 // This structure is at the beginning of the shared memory so that the slaves
85 // can get stats on the table
87 // see goes into table_length_
91 uint8 salt
[LINK_SALT_LENGTH
];
94 // Returns the fingerprint at the given index into the URL table. This
95 // function should be called instead of accessing the table directly to
96 // contain endian issues.
97 Fingerprint
FingerprintAt(int32 table_offset
) const {
99 return null_fingerprint_
;
100 return hash_table_
[table_offset
];
103 // Computes the fingerprint of the given canonical URL. It is static so the
104 // same algorithm can be re-used by the table rebuilder, so you will have to
105 // pass the salt as a parameter. See the non-static version above if you
106 // want to use the current class' salt.
107 static Fingerprint
ComputeURLFingerprint(const char* canonical_url
,
109 const uint8 salt
[LINK_SALT_LENGTH
]);
111 // Computes the hash value of the given fingerprint, this is used as a lookup
112 // into the hashtable.
113 static Hash
HashFingerprint(Fingerprint fingerprint
, int32 table_length
) {
114 if (table_length
== 0)
116 return static_cast<Hash
>(fingerprint
% table_length
);
118 // Uses the current hashtable.
119 Hash
HashFingerprint(Fingerprint fingerprint
) const {
120 return HashFingerprint(fingerprint
, table_length_
);
123 // pointer to the first item
124 VisitedLinkCommon::Fingerprint
* hash_table_
;
126 // the number of items in the hash table
129 // salt used for each URL when computing the fingerprint
130 uint8 salt_
[LINK_SALT_LENGTH
];
133 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon
);
136 } // namespace visitedlink
138 #endif // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_