1 /* Copyright (c) 2003-2004, Roger Dingledine
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
8 * \brief Uses bitarray_t to implement a bloom filter.
13 #include "lib/malloc/malloc.h"
14 #include "lib/container/bloomfilt.h"
15 #include "lib/intmath/bits.h"
16 #include "lib/log/util_bug.h"
17 #include "ext/siphash.h"
19 /** How many bloom-filter bits we set per address. This is twice the
20 * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
22 #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
25 /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
27 struct sipkey key
[BLOOMFILT_N_HASHES
];
28 bloomfilt_hash_fn hashfn
; /**< Function used to generate hashes */
29 uint32_t mask
; /**< One less than the number of bits in <b>ba</b>; always
30 * one less than a power of two. */
31 bitarray_t
*ba
; /**< A bit array to implement the Bloom filter. */
34 #define BIT(set, n) ((n) & (set)->mask)
36 /** Add the element <b>item</b> to <b>set</b>. */
38 bloomfilt_add(bloomfilt_t
*set
,
42 for (i
= 0; i
< BLOOMFILT_N_HASHES
; ++i
) {
43 uint64_t h
= set
->hashfn(&set
->key
[i
], item
);
44 uint32_t high_bits
= (uint32_t)(h
>> 32);
45 uint32_t low_bits
= (uint32_t)(h
);
46 bitarray_set(set
->ba
, BIT(set
, high_bits
));
47 bitarray_set(set
->ba
, BIT(set
, low_bits
));
51 /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
52 * <em>probably</em> return zero. */
54 bloomfilt_probably_contains(const bloomfilt_t
*set
,
58 for (i
= 0; i
< BLOOMFILT_N_HASHES
; ++i
) {
59 uint64_t h
= set
->hashfn(&set
->key
[i
], item
);
60 uint32_t high_bits
= (uint32_t)(h
>> 32);
61 uint32_t low_bits
= (uint32_t)(h
);
62 // Note that !! is necessary here, since bitarray_is_set does not
63 // necessarily return 1 on true.
64 matches
+= !! bitarray_is_set(set
->ba
, BIT(set
, high_bits
));
65 matches
+= !! bitarray_is_set(set
->ba
, BIT(set
, low_bits
));
67 return matches
== N_BITS_PER_ITEM
;
70 /** Return a newly allocated bloomfilt_t, optimized to hold a total of
71 * <b>max_elements</b> elements with a reasonably low false positive weight.
73 * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
74 * functions of the items, and the key material <b>random_key</b> to
75 * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
78 bloomfilt_new(int max_elements
,
79 bloomfilt_hash_fn hashfn
,
80 const uint8_t *random_key
)
82 /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
83 * is the number of hash functions per entry, m is the bits in the array,
84 * and n is the number of elements inserted. For us, k==4, n<=max_elements,
85 * and m==n_bits= approximately max_elements*32. This gives
86 * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
88 * It would be more optimal in space vs false positives to get this false
89 * positive rate by going for k==13, and m==18.5n, but we also want to
90 * conserve CPU, and k==13 is pretty big.
92 int n_bits
= 1u << (tor_log2(max_elements
)+5);
93 bloomfilt_t
*r
= tor_malloc(sizeof(bloomfilt_t
));
95 r
->ba
= bitarray_init_zero(n_bits
);
97 tor_assert(sizeof(r
->key
) == BLOOMFILT_KEY_LEN
);
98 memcpy(r
->key
, random_key
, sizeof(r
->key
));
105 /** Free all storage held in <b>set</b>. */
107 bloomfilt_free_(bloomfilt_t
*set
)
111 bitarray_free(set
->ba
);