src/backend/lib/bloomfilter.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * bloomfilter.c
   4  *              Space-efficient set membership testing
   5  *
   6  * A Bloom filter is a probabilistic data structure that is used to test an
   7  * element's membership of a set.  False positives are possible, but false
   8  * negatives are not; a test of membership of the set returns either "possibly
   9  * in set" or "definitely not in set".  This is typically very space efficient,
  10  * which can be a decisive advantage.
  11  *
  12  * Elements can be added to the set, but not removed.  The more elements that
  13  * are added, the larger the probability of false positives.  Caller must hint
  14  * an estimated total size of the set when the Bloom filter is initialized.
  15  * This is used to balance the use of memory against the final false positive
  16  * rate.
  17  *
  18  * The implementation is well suited to data synchronization problems between
  19  * unordered sets, especially where predictable performance is important and
  20  * some false positives are acceptable.  It's also well suited to cache
  21  * filtering problems where a relatively small and/or low cardinality set is
  22  * fingerprinted, especially when many subsequent membership tests end up
  23  * indicating that values of interest are not present.  That should save the
  24  * caller many authoritative lookups, such as expensive probes of a much larger
  25  * on-disk structure.
  26  *
  27  * Copyright (c) 2018-2022, PostgreSQL Global Development Group
  28  *
  29  * IDENTIFICATION
  30  *        src/backend/lib/bloomfilter.c
  31  *
  32  *-------------------------------------------------------------------------
  33  */
  34 #include "postgres.h"
  35
  36 #include <math.h>
  37
  38 #include "common/hashfn.h"
  39 #include "lib/bloomfilter.h"
  40 #include "port/pg_bitutils.h"
  41
  42 #define MAX_HASH_FUNCS          10
  43
  44 struct bloom_filter
  45 {
  46         /* K hash functions are used, seeded by caller's seed */
  47         int                     k_hash_funcs;
  48         uint64          seed;
  49         /* m is bitset size, in bits.  Must be a power of two <= 2^32.  */
  50         uint64          m;
  51         unsigned char bitset[FLEXIBLE_ARRAY_MEMBER];
  52 };
  53
  54 static int      my_bloom_power(uint64 target_bitset_bits);
  55 static int      optimal_k(uint64 bitset_bits, int64 total_elems);
  56 static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem,
  57                                          size_t len);
  58 static inline uint32 mod_m(uint32 val, uint64 m);
  59
  60 /*
  61  * Create Bloom filter in caller's memory context.  We aim for a false positive
  62  * rate of between 1% and 2% when bitset size is not constrained by memory
  63  * availability.
  64  *
  65  * total_elems is an estimate of the final size of the set.  It should be
  66  * approximately correct, but the implementation can cope well with it being
  67  * off by perhaps a factor of five or more.  See "Bloom Filters in
  68  * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why
  69  * this is the case.
  70  *
  71  * bloom_work_mem is sized in KB, in line with the general work_mem convention.
  72  * This determines the size of the underlying bitset (trivial bookkeeping space
  73  * isn't counted).  The bitset is always sized as a power of two number of
  74  * bits, and the largest possible bitset is 512MB (2^32 bits).  The
  75  * implementation allocates only enough memory to target its standard false
  76  * positive rate, using a simple formula with caller's total_elems estimate as
  77  * an input.  The bitset might be as small as 1MB, even when bloom_work_mem is
  78  * much higher.
  79  *
  80  * The Bloom filter is seeded using a value provided by the caller.  Using a
  81  * distinct seed value on every call makes it unlikely that the same false
  82  * positives will reoccur when the same set is fingerprinted a second time.
  83  * Callers that don't care about this pass a constant as their seed, typically
  84  * 0.  Callers can also use a pseudo-random seed, eg from pg_prng_uint64().
  85  */
  86 bloom_filter *
  87 bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
  88 {
  89         bloom_filter *filter;
  90         int                     bloom_power;
  91         uint64          bitset_bytes;
  92         uint64          bitset_bits;
  93
  94         /*
  95          * Aim for two bytes per element; this is sufficient to get a false
  96          * positive rate below 1%, independent of the size of the bitset or total
  97          * number of elements.  Also, if rounding down the size of the bitset to
  98          * the next lowest power of two turns out to be a significant drop, the
  99          * false positive rate still won't exceed 2% in almost all cases.
 100          */
 101         bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2);
 102         bitset_bytes = Max(1024 * 1024, bitset_bytes);
 103
 104         /*
 105          * Size in bits should be the highest power of two <= target.  bitset_bits
 106          * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
 107          */
 108         bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE);
 109         bitset_bits = UINT64CONST(1) << bloom_power;
 110         bitset_bytes = bitset_bits / BITS_PER_BYTE;
 111
 112         /* Allocate bloom filter with unset bitset */
 113         filter = palloc0(offsetof(bloom_filter, bitset) +
 114                                          sizeof(unsigned char) * bitset_bytes);
 115         filter->k_hash_funcs = optimal_k(bitset_bits, total_elems);
 116         filter->seed = seed;
 117         filter->m = bitset_bits;
 118
 119         return filter;
 120 }
 121
 122 /*
 123  * Free Bloom filter
 124  */
 125 void
 126 bloom_free(bloom_filter *filter)
 127 {
 128         pfree(filter);
 129 }
 130
 131 /*
 132  * Add element to Bloom filter
 133  */
 134 void
 135 bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
 136 {
 137         uint32          hashes[MAX_HASH_FUNCS];
 138         int                     i;
 139
 140         k_hashes(filter, hashes, elem, len);
 141
 142         /* Map a bit-wise address to a byte-wise address + bit offset */
 143         for (i = 0; i < filter->k_hash_funcs; i++)
 144         {
 145                 filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7);
 146         }
 147 }
 148
 149 /*
 150  * Test if Bloom filter definitely lacks element.
 151  *
 152  * Returns true if the element is definitely not in the set of elements
 153  * observed by bloom_add_element().  Otherwise, returns false, indicating that
 154  * element is probably present in set.
 155  */
 156 bool
 157 bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
 158 {
 159         uint32          hashes[MAX_HASH_FUNCS];
 160         int                     i;
 161
 162         k_hashes(filter, hashes, elem, len);
 163
 164         /* Map a bit-wise address to a byte-wise address + bit offset */
 165         for (i = 0; i < filter->k_hash_funcs; i++)
 166         {
 167                 if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7))))
 168                         return true;
 169         }
 170
 171         return false;
 172 }
 173
 174 /*
 175  * What proportion of bits are currently set?
 176  *
 177  * Returns proportion, expressed as a multiplier of filter size.  That should
 178  * generally be close to 0.5, even when we have more than enough memory to
 179  * ensure a false positive rate within target 1% to 2% band, since more hash
 180  * functions are used as more memory is available per element.
 181  *
 182  * This is the only instrumentation that is low overhead enough to appear in
 183  * debug traces.  When debugging Bloom filter code, it's likely to be far more
 184  * interesting to directly test the false positive rate.
 185  */
 186 double
 187 bloom_prop_bits_set(bloom_filter *filter)
 188 {
 189         int                     bitset_bytes = filter->m / BITS_PER_BYTE;
 190         uint64          bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
 191
 192         return bits_set / (double) filter->m;
 193 }
 194
 195 /*
 196  * Which element in the sequence of powers of two is less than or equal to
 197  * target_bitset_bits?
 198  *
 199  * Value returned here must be generally safe as the basis for actual bitset
 200  * size.
 201  *
 202  * Bitset is never allowed to exceed 2 ^ 32 bits (512MB).  This is sufficient
 203  * for the needs of all current callers, and allows us to use 32-bit hash
 204  * functions.  It also makes it easy to stay under the MaxAllocSize restriction
 205  * (caller needs to leave room for non-bitset fields that appear before
 206  * flexible array member, so a 1GB bitset would use an allocation that just
 207  * exceeds MaxAllocSize).
 208  */
 209 static int
 210 my_bloom_power(uint64 target_bitset_bits)
 211 {
 212         int                     bloom_power = -1;
 213
 214         while (target_bitset_bits > 0 && bloom_power < 32)
 215         {
 216                 bloom_power++;
 217                 target_bitset_bits >>= 1;
 218         }
 219
 220         return bloom_power;
 221 }
 222
 223 /*
 224  * Determine optimal number of hash functions based on size of filter in bits,
 225  * and projected total number of elements.  The optimal number is the number
 226  * that minimizes the false positive rate.
 227  */
 228 static int
 229 optimal_k(uint64 bitset_bits, int64 total_elems)
 230 {
 231         int                     k = rint(log(2.0) * bitset_bits / total_elems);
 232
 233         return Max(1, Min(k, MAX_HASH_FUNCS));
 234 }
 235
 236 /*
 237  * Generate k hash values for element.
 238  *
 239  * Caller passes array, which is filled-in with k values determined by hashing
 240  * caller's element.
 241  *
 242  * Only 2 real independent hash functions are actually used to support an
 243  * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is
 244  * used to make this work.  The main reason we prefer enhanced double hashing
 245  * to classic double hashing is that the latter has an issue with collisions
 246  * when using power of two sized bitsets.  See Dillinger & Manolios for full
 247  * details.
 248  */
 249 static void
 250 k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
 251 {
 252         uint64          hash;
 253         uint32          x,
 254                                 y;
 255         uint64          m;
 256         int                     i;
 257
 258         /* Use 64-bit hashing to get two independent 32-bit hashes */
 259         hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed));
 260         x = (uint32) hash;
 261         y = (uint32) (hash >> 32);
 262         m = filter->m;
 263
 264         x = mod_m(x, m);
 265         y = mod_m(y, m);
 266
 267         /* Accumulate hashes */
 268         hashes[0] = x;
 269         for (i = 1; i < filter->k_hash_funcs; i++)
 270         {
 271                 x = mod_m(x + y, m);
 272                 y = mod_m(y + i, m);
 273
 274                 hashes[i] = x;
 275         }
 276 }
 277
 278 /*
 279  * Calculate "val MOD m" inexpensively.
 280  *
 281  * Assumes that m (which is bitset size) is a power of two.
 282  *
 283  * Using a power of two number of bits for bitset size allows us to use bitwise
 284  * AND operations to calculate the modulo of a hash value.  It's also a simple
 285  * way of avoiding the modulo bias effect.
 286  */
 287 static inline uint32
 288 mod_m(uint32 val, uint64 m)
 289 {
 290         Assert(m <= PG_UINT32_MAX + UINT64CONST(1));
 291         Assert(((m - 1) & m) == 0);
 292
 293         return val & (m - 1);
 294 }