1 /*-------------------------------------------------------------------------
4 * Space-efficient set membership testing
6 * A Bloom filter is a probabilistic data structure that is used to test an
7 * element's membership of a set. False positives are possible, but false
8 * negatives are not; a test of membership of the set returns either "possibly
9 * in set" or "definitely not in set". This is typically very space efficient,
10 * which can be a decisive advantage.
12 * Elements can be added to the set, but not removed. The more elements that
13 * are added, the larger the probability of false positives. Caller must hint
14 * an estimated total size of the set when the Bloom filter is initialized.
15 * This is used to balance the use of memory against the final false positive
18 * The implementation is well suited to data synchronization problems between
19 * unordered sets, especially where predictable performance is important and
20 * some false positives are acceptable. It's also well suited to cache
21 * filtering problems where a relatively small and/or low cardinality set is
22 * fingerprinted, especially when many subsequent membership tests end up
23 * indicating that values of interest are not present. That should save the
24 * caller many authoritative lookups, such as expensive probes of a much larger
27 * Copyright (c) 2018-2022, PostgreSQL Global Development Group
30 * src/backend/lib/bloomfilter.c
32 *-------------------------------------------------------------------------
38 #include "common/hashfn.h"
39 #include "lib/bloomfilter.h"
40 #include "port/pg_bitutils.h"
42 #define MAX_HASH_FUNCS 10
46 /* K hash functions are used, seeded by caller's seed */
49 /* m is bitset size, in bits. Must be a power of two <= 2^32. */
51 unsigned char bitset
[FLEXIBLE_ARRAY_MEMBER
];
54 static int my_bloom_power(uint64 target_bitset_bits
);
55 static int optimal_k(uint64 bitset_bits
, int64 total_elems
);
56 static void k_hashes(bloom_filter
*filter
, uint32
*hashes
, unsigned char *elem
,
58 static inline uint32
mod_m(uint32 val
, uint64 m
);
61 * Create Bloom filter in caller's memory context. We aim for a false positive
62 * rate of between 1% and 2% when bitset size is not constrained by memory
65 * total_elems is an estimate of the final size of the set. It should be
66 * approximately correct, but the implementation can cope well with it being
67 * off by perhaps a factor of five or more. See "Bloom Filters in
68 * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why
71 * bloom_work_mem is sized in KB, in line with the general work_mem convention.
72 * This determines the size of the underlying bitset (trivial bookkeeping space
73 * isn't counted). The bitset is always sized as a power of two number of
74 * bits, and the largest possible bitset is 512MB (2^32 bits). The
75 * implementation allocates only enough memory to target its standard false
76 * positive rate, using a simple formula with caller's total_elems estimate as
77 * an input. The bitset might be as small as 1MB, even when bloom_work_mem is
80 * The Bloom filter is seeded using a value provided by the caller. Using a
81 * distinct seed value on every call makes it unlikely that the same false
82 * positives will reoccur when the same set is fingerprinted a second time.
83 * Callers that don't care about this pass a constant as their seed, typically
84 * 0. Callers can also use a pseudo-random seed, eg from pg_prng_uint64().
87 bloom_create(int64 total_elems
, int bloom_work_mem
, uint64 seed
)
95 * Aim for two bytes per element; this is sufficient to get a false
96 * positive rate below 1%, independent of the size of the bitset or total
97 * number of elements. Also, if rounding down the size of the bitset to
98 * the next lowest power of two turns out to be a significant drop, the
99 * false positive rate still won't exceed 2% in almost all cases.
101 bitset_bytes
= Min(bloom_work_mem
* UINT64CONST(1024), total_elems
* 2);
102 bitset_bytes
= Max(1024 * 1024, bitset_bytes
);
105 * Size in bits should be the highest power of two <= target. bitset_bits
106 * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
108 bloom_power
= my_bloom_power(bitset_bytes
* BITS_PER_BYTE
);
109 bitset_bits
= UINT64CONST(1) << bloom_power
;
110 bitset_bytes
= bitset_bits
/ BITS_PER_BYTE
;
112 /* Allocate bloom filter with unset bitset */
113 filter
= palloc0(offsetof(bloom_filter
, bitset
) +
114 sizeof(unsigned char) * bitset_bytes
);
115 filter
->k_hash_funcs
= optimal_k(bitset_bits
, total_elems
);
117 filter
->m
= bitset_bits
;
126 bloom_free(bloom_filter
*filter
)
132 * Add element to Bloom filter
135 bloom_add_element(bloom_filter
*filter
, unsigned char *elem
, size_t len
)
137 uint32 hashes
[MAX_HASH_FUNCS
];
140 k_hashes(filter
, hashes
, elem
, len
);
142 /* Map a bit-wise address to a byte-wise address + bit offset */
143 for (i
= 0; i
< filter
->k_hash_funcs
; i
++)
145 filter
->bitset
[hashes
[i
] >> 3] |= 1 << (hashes
[i
] & 7);
150 * Test if Bloom filter definitely lacks element.
152 * Returns true if the element is definitely not in the set of elements
153 * observed by bloom_add_element(). Otherwise, returns false, indicating that
154 * element is probably present in set.
157 bloom_lacks_element(bloom_filter
*filter
, unsigned char *elem
, size_t len
)
159 uint32 hashes
[MAX_HASH_FUNCS
];
162 k_hashes(filter
, hashes
, elem
, len
);
164 /* Map a bit-wise address to a byte-wise address + bit offset */
165 for (i
= 0; i
< filter
->k_hash_funcs
; i
++)
167 if (!(filter
->bitset
[hashes
[i
] >> 3] & (1 << (hashes
[i
] & 7))))
175 * What proportion of bits are currently set?
177 * Returns proportion, expressed as a multiplier of filter size. That should
178 * generally be close to 0.5, even when we have more than enough memory to
179 * ensure a false positive rate within target 1% to 2% band, since more hash
180 * functions are used as more memory is available per element.
182 * This is the only instrumentation that is low overhead enough to appear in
183 * debug traces. When debugging Bloom filter code, it's likely to be far more
184 * interesting to directly test the false positive rate.
187 bloom_prop_bits_set(bloom_filter
*filter
)
189 int bitset_bytes
= filter
->m
/ BITS_PER_BYTE
;
190 uint64 bits_set
= pg_popcount((char *) filter
->bitset
, bitset_bytes
);
192 return bits_set
/ (double) filter
->m
;
196 * Which element in the sequence of powers of two is less than or equal to
197 * target_bitset_bits?
199 * Value returned here must be generally safe as the basis for actual bitset
202 * Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient
203 * for the needs of all current callers, and allows us to use 32-bit hash
204 * functions. It also makes it easy to stay under the MaxAllocSize restriction
205 * (caller needs to leave room for non-bitset fields that appear before
206 * flexible array member, so a 1GB bitset would use an allocation that just
207 * exceeds MaxAllocSize).
210 my_bloom_power(uint64 target_bitset_bits
)
212 int bloom_power
= -1;
214 while (target_bitset_bits
> 0 && bloom_power
< 32)
217 target_bitset_bits
>>= 1;
224 * Determine optimal number of hash functions based on size of filter in bits,
225 * and projected total number of elements. The optimal number is the number
226 * that minimizes the false positive rate.
229 optimal_k(uint64 bitset_bits
, int64 total_elems
)
231 int k
= rint(log(2.0) * bitset_bits
/ total_elems
);
233 return Max(1, Min(k
, MAX_HASH_FUNCS
));
237 * Generate k hash values for element.
239 * Caller passes array, which is filled-in with k values determined by hashing
242 * Only 2 real independent hash functions are actually used to support an
243 * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is
244 * used to make this work. The main reason we prefer enhanced double hashing
245 * to classic double hashing is that the latter has an issue with collisions
246 * when using power of two sized bitsets. See Dillinger & Manolios for full
250 k_hashes(bloom_filter
*filter
, uint32
*hashes
, unsigned char *elem
, size_t len
)
258 /* Use 64-bit hashing to get two independent 32-bit hashes */
259 hash
= DatumGetUInt64(hash_any_extended(elem
, len
, filter
->seed
));
261 y
= (uint32
) (hash
>> 32);
267 /* Accumulate hashes */
269 for (i
= 1; i
< filter
->k_hash_funcs
; i
++)
279 * Calculate "val MOD m" inexpensively.
281 * Assumes that m (which is bitset size) is a power of two.
283 * Using a power of two number of bits for bitset size allows us to use bitwise
284 * AND operations to calculate the modulo of a hash value. It's also a simple
285 * way of avoiding the modulo bias effect.
288 mod_m(uint32 val
, uint64 m
)
290 Assert(m
<= PG_UINT32_MAX
+ UINT64CONST(1));
291 Assert(((m
- 1) & m
) == 0);
293 return val
& (m
- 1);