2 * Copyright (c) 2012-2019, Jyri J. Virkki
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* Obtained from https://github.com/jvirkki/libbloom */
32 * Refer to bloom.h for documentation on the public interfaces.
43 #include <sys/types.h>
47 #include "murmurhash2.h"
49 #define MAKESTRING(n) STRING(n)
53 inline static int test_bit_set_bit(unsigned char * buf
,
54 unsigned int x
, int set_bit
)
56 unsigned int byte
= x
>> 3;
57 unsigned char c
= buf
[byte
]; // expensive memory access
58 unsigned int mask
= 1 << (x
% 8);
71 static int bloom_check_add(struct bloom
* bloom
,
72 const void * buffer
, int len
, int add
)
74 if (bloom
->ready
== 0) {
75 printf("bloom at %p not initialized!\n", (void *)bloom
);
80 register unsigned int a
= murmurhash2(buffer
, len
, bloom
->seed
);
81 register unsigned int b
= murmurhash2(buffer
, len
, a
);
82 register unsigned int x
;
83 register unsigned int i
;
85 for (i
= 0; i
< bloom
->hashes
; i
++) {
86 x
= (a
+ i
*b
) % bloom
->bits
;
87 if (test_bit_set_bit(bloom
->bf
, x
, add
)) {
90 // Don't care about the presence of all the bits. Just our own.
95 if (hits
== bloom
->hashes
) {
96 return 1; // 1 == element already in (or collision)
103 int bloom_init_size(struct bloom
* bloom
, int entries
, double error
,
104 unsigned int cache_size
)
106 return bloom_init(bloom
, entries
, error
);
110 int bloom_init(struct bloom
* bloom
, int entries
, double error
)
114 bloom
->seed
= arc4random();
116 if (entries
< 1000 || error
== 0) {
120 bloom
->entries
= entries
;
121 bloom
->error
= error
;
123 double num
= log(bloom
->error
);
124 double denom
= 0.480453013918201; // ln(2)^2
125 bloom
->bpe
= -(num
/ denom
);
127 double dentries
= (double)entries
;
128 bloom
->bits
= (int)(dentries
* bloom
->bpe
);
130 if (bloom
->bits
% 8) {
131 bloom
->bytes
= (bloom
->bits
/ 8) + 1;
133 bloom
->bytes
= bloom
->bits
/ 8;
136 bloom
->hashes
= (int)ceil(0.693147180559945 * bloom
->bpe
); // ln(2)
138 bloom
->bf
= (unsigned char *)calloc(bloom
->bytes
, sizeof(unsigned char));
139 if (bloom
->bf
== NULL
) { // LCOV_EXCL_START
148 int bloom_check(struct bloom
* bloom
, const void * buffer
, int len
)
150 return bloom_check_add(bloom
, buffer
, len
, 0);
154 int bloom_add(struct bloom
* bloom
, const void * buffer
, int len
)
156 return bloom_check_add(bloom
, buffer
, len
, 1);
160 void bloom_print(struct bloom
* bloom
)
162 printf("bloom at %p\n", (void *)bloom
);
163 printf(" ->entries = %d\n", bloom
->entries
);
164 printf(" ->error = %f\n", bloom
->error
);
165 printf(" ->bits = %d\n", bloom
->bits
);
166 printf(" ->bits per elem = %f\n", bloom
->bpe
);
167 printf(" ->bytes = %d\n", bloom
->bytes
);
168 printf(" ->hash functions = %d\n", bloom
->hashes
);
172 void bloom_free(struct bloom
* bloom
)
181 int bloom_reset(struct bloom
* bloom
)
183 if (!bloom
->ready
) return 1;
184 memset(bloom
->bf
, 0, bloom
->bytes
);