scripts: Fix Python3 warnings
[tor.git] / src / lib / container / bloomfilt.c
blobace3f6786e18ffc997a4bd7acd533ebc253c9f41
1 /* Copyright (c) 2003-2004, Roger Dingledine
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
6 /**
7 * \file bloomfilt.c
8 * \brief Uses bitarray_t to implement a bloom filter.
9 **/
11 #include <stdlib.h>
13 #include "lib/malloc/malloc.h"
14 #include "lib/container/bloomfilt.h"
15 #include "lib/intmath/bits.h"
16 #include "lib/log/util_bug.h"
17 #include "ext/siphash.h"
19 /** How many bloom-filter bits we set per address. This is twice the
20 * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
21 * values. */
22 #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
24 struct bloomfilt_t {
25 /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
26 * items. */
27 struct sipkey key[BLOOMFILT_N_HASHES];
28 bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
29 uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
30 * one less than a power of two. */
31 bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
34 #define BIT(set, n) ((n) & (set)->mask)
36 /** Add the element <b>item</b> to <b>set</b>. */
37 void
38 bloomfilt_add(bloomfilt_t *set,
39 const void *item)
41 int i;
42 for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
43 uint64_t h = set->hashfn(&set->key[i], item);
44 uint32_t high_bits = (uint32_t)(h >> 32);
45 uint32_t low_bits = (uint32_t)(h);
46 bitarray_set(set->ba, BIT(set, high_bits));
47 bitarray_set(set->ba, BIT(set, low_bits));
51 /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
52 * <em>probably</em> return zero. */
53 int
54 bloomfilt_probably_contains(const bloomfilt_t *set,
55 const void *item)
57 int i, matches = 0;
58 for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
59 uint64_t h = set->hashfn(&set->key[i], item);
60 uint32_t high_bits = (uint32_t)(h >> 32);
61 uint32_t low_bits = (uint32_t)(h);
62 // Note that !! is necessary here, since bitarray_is_set does not
63 // necessarily return 1 on true.
64 matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
65 matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
67 return matches == N_BITS_PER_ITEM;
70 /** Return a newly allocated bloomfilt_t, optimized to hold a total of
71 * <b>max_elements</b> elements with a reasonably low false positive weight.
73 * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
74 * functions of the items, and the key material <b>random_key</b> to
75 * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
76 **/
77 bloomfilt_t *
78 bloomfilt_new(int max_elements,
79 bloomfilt_hash_fn hashfn,
80 const uint8_t *random_key)
82 /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
83 * is the number of hash functions per entry, m is the bits in the array,
84 * and n is the number of elements inserted. For us, k==4, n<=max_elements,
85 * and m==n_bits= approximately max_elements*32. This gives
86 * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
88 * It would be more optimal in space vs false positives to get this false
89 * positive rate by going for k==13, and m==18.5n, but we also want to
90 * conserve CPU, and k==13 is pretty big.
92 int n_bits = 1u << (tor_log2(max_elements)+5);
93 bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
94 r->mask = n_bits - 1;
95 r->ba = bitarray_init_zero(n_bits);
97 tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
98 memcpy(r->key, random_key, sizeof(r->key));
100 r->hashfn = hashfn;
102 return r;
105 /** Free all storage held in <b>set</b>. */
106 void
107 bloomfilt_free_(bloomfilt_t *set)
109 if (!set)
110 return;
111 bitarray_free(set->ba);
112 tor_free(set);