1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/visitedlink/common/visitedlink_common.h"
7 #include <string.h> // for memset()
9 #include "base/logging.h"
13 namespace visitedlink
{
15 const VisitedLinkCommon::Fingerprint
VisitedLinkCommon::null_fingerprint_
= 0;
16 const VisitedLinkCommon::Hash
VisitedLinkCommon::null_hash_
= -1;
18 VisitedLinkCommon::VisitedLinkCommon()
21 memset(salt_
, 0, sizeof(salt_
));
24 VisitedLinkCommon::~VisitedLinkCommon() {
27 // FIXME: this uses linear probing, it should be replaced with quadratic
28 // probing or something better. See VisitedLinkMaster::AddFingerprint
29 bool VisitedLinkCommon::IsVisited(const char* canonical_url
,
30 size_t url_len
) const {
33 if (!hash_table_
|| table_length_
== 0)
35 return IsVisited(ComputeURLFingerprint(canonical_url
, url_len
));
38 bool VisitedLinkCommon::IsVisited(const GURL
& url
) const {
39 return IsVisited(url
.spec().data(), url
.spec().size());
42 bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint
) const {
43 // Go through the table until we find the item or an empty spot (meaning it
44 // wasn't found). This loop will terminate as long as the table isn't full,
45 // which should be enforced by AddFingerprint.
46 Hash first_hash
= HashFingerprint(fingerprint
);
47 Hash cur_hash
= first_hash
;
49 Fingerprint cur_fingerprint
= FingerprintAt(cur_hash
);
50 if (cur_fingerprint
== null_fingerprint_
)
51 return false; // End of probe sequence found.
52 if (cur_fingerprint
== fingerprint
)
53 return true; // Found a match.
55 // This spot was taken, but not by the item we're looking for, search in
58 if (cur_hash
== table_length_
)
60 if (cur_hash
== first_hash
) {
61 // Wrapped around and didn't find an empty space, this means we're in an
62 // infinite loop because AddFingerprint didn't do its job resizing.
69 // Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
70 // this is as random as any other subset of the MD5SUM.
72 // FIXME: this uses the MD5SUM of the 16-bit character version. For systems
73 // where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be
74 // compatable. We should define explicitly what should happen here across
75 // platforms, and convert if necessary (probably to UTF-16).
78 VisitedLinkCommon::Fingerprint
VisitedLinkCommon::ComputeURLFingerprint(
79 const char* canonical_url
,
81 const uint8 salt
[LINK_SALT_LENGTH
]) {
82 DCHECK(url_len
> 0) << "Canonical URLs should not be empty";
86 base::MD5Update(&ctx
, base::StringPiece(reinterpret_cast<const char*>(salt
),
88 base::MD5Update(&ctx
, base::StringPiece(canonical_url
, url_len
));
90 base::MD5Digest digest
;
91 base::MD5Final(&digest
, &ctx
);
93 // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that
94 // direct cast the alignment could be wrong, and we can't access a 64-bit int
95 // on arbitrary alignment on some processors. This reinterpret_casts it
96 // down to a char array of the same size as fingerprint, and then does the
97 // bit cast, which amounts to a memcpy. This does not handle endian issues.
98 return bit_cast
<Fingerprint
, uint8
[8]>(
99 *reinterpret_cast<uint8(*)[8]>(&digest
.a
));
102 } // namespace visitedlink