Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / components / rappor / bloom_filter.cc
blob6c6f65409151fa37f089a9bf86c4ea756c418756
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/rappor/bloom_filter.h"
7 #include "base/logging.h"
8 #include "third_party/smhasher/src/City.h"
10 namespace rappor {
12 namespace {
14 uint32_t ComputeHash(const std::string& str, uint32_t seed) {
15 // Using CityHash here because we have support for it in Dremel. Many hash
16 // functions, such as MD5, SHA1, or Murmur, would probably also work.
17 return CityHash64WithSeed(str.data(), str.size(), seed);
20 } // namespace
22 BloomFilter::BloomFilter(uint32_t bytes_size,
23 uint32_t hash_function_count,
24 uint32_t hash_seed_offset)
25 : bytes_(bytes_size),
26 hash_function_count_(hash_function_count),
27 hash_seed_offset_(hash_seed_offset) {
28 DCHECK_GT(bytes_size, 0u);
31 BloomFilter::~BloomFilter() {}
33 void BloomFilter::SetString(const std::string& str) {
34 for (size_t i = 0; i < bytes_.size(); ++i) {
35 bytes_[i] = 0;
37 for (size_t i = 0; i < hash_function_count_; ++i) {
38 uint32_t index = ComputeHash(str, hash_seed_offset_ + i);
39 // Note that the "bytes" are uint8_t, so they are always 8-bits.
40 uint32_t byte_index = (index / 8) % bytes_.size();
41 uint32_t bit_index = index % 8;
42 bytes_[byte_index] |= 1 << bit_index;
46 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) {
47 DCHECK_EQ(bytes_.size(), bytes.size());
48 for (size_t i = 0; i < bytes_.size(); ++i) {
49 bytes_[i] = bytes[i];
53 namespace internal {
55 uint64_t GetBloomBits(uint32_t bytes_size,
56 uint32_t hash_function_count,
57 uint32_t hash_seed_offset,
58 const std::string& str) {
59 // Make sure result fits in uint64.
60 DCHECK_LE(bytes_size, 8u);
61 uint64_t output = 0;
62 const uint32_t bits_size = bytes_size * 8;
63 for (size_t i = 0; i < hash_function_count; ++i) {
64 uint32_t index = ComputeHash(str, hash_seed_offset + i);
65 output |= 1ULL << uint64_t(index % bits_size);
67 return output;
70 } // namespace internal
72 } // namespace rappor