doc: s/bogomips.org/yhbt.net/
[ruby-tdb.git] / ext / tdb / murmur2.c
blob6b6f8a6b10675e0fbea25750d07e4771fd3978c3
1 #include "rbtdb.h"
2 /*
3 * https://sites.google.com/site/murmurhash/
5 * Public Domain hash functions by Austin Appleby.
7 * Trivially adapted for use with Ruby TDB by Eric Wong.
8 */
11 * 'm' and 'r' are mixing constants generated offline.
12 * They're not really 'magic', they just happen to work well.
15 static const unsigned int m = 0x5bd1e995;
16 static const int r = 24;
17 static const unsigned int seed;
19 unsigned int rbtdb_murmur2(TDB_DATA * key)
21 const unsigned char *data = key->dptr;
22 int len = (int)key->dsize;
23 /* Initialize the hash to a 'random' value */
24 unsigned int h = seed ^ len;
26 while (len >= 4) {
27 unsigned int k = *(const unsigned int *)data;
29 k *= m;
30 k ^= k >> r;
31 k *= m;
33 h *= m;
34 h ^= k;
36 data += 4;
37 len -= 4;
40 /* Handle the last few bytes of the input array */
41 switch (len) {
42 case 3:
43 h ^= data[2] << 16;
44 case 2:
45 h ^= data[1] << 8;
46 case 1:
47 h ^= data[0];
48 h *= m;
52 * Do a few final mixes of the hash to ensure the last few
53 * bytes are well-incorporated.
56 h ^= h >> 13;
57 h *= m;
58 h ^= h >> 15;
60 return h;
64 * This is a variant of MurmurHash2 modified to use the Merkle-Damgard
65 * construction. Bulk speed should be identical to Murmur2, small-key speed
66 * will be 10%-20% slower due to the added overhead at the end of the hash.
68 * This variant fixes a minor issue where null keys were more likely to
69 * collide with each other than expected, and also makes the algorithm
70 * more amenable to incremental implementations. All other caveats from
71 * MurmurHash2 still apply.
74 #define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
76 unsigned int rbtdb_murmur2a(TDB_DATA * key)
78 const unsigned char *data = key->dptr;
79 int len = (int)key->dsize;
80 unsigned int l = (unsigned int)len;
81 unsigned int h = seed;
82 unsigned int t = 0;
84 while (len >= 4) {
85 unsigned int k = *(const unsigned int *)data;
87 mmix(h, k);
89 data += 4;
90 len -= 4;
93 switch (len) {
94 case 3:
95 t ^= data[2] << 16;
96 case 2:
97 t ^= data[1] << 8;
98 case 1:
99 t ^= data[0];
102 mmix(h, t);
103 mmix(h, l);
105 h ^= h >> 13;
106 h *= m;
107 h ^= h >> 15;
109 return h;
113 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
114 * on certain platforms
116 * Performance will be lower than MurmurHash2
119 #define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
121 unsigned int rbtdb_murmur2_aligned(TDB_DATA * key)
123 const unsigned char *data = key->dptr;
124 int len = (int)key->dsize;
125 unsigned int h = seed ^ len;
126 union { const unsigned char *byte; int integer; } cast = { data };
127 int align = cast.integer & 3;
129 if (align && (len >= 4)) {
130 /* Pre-load the temp registers */
131 unsigned int t = 0, d = 0;
132 int sl, sr;
134 switch (align) {
135 case 1:
136 t |= data[2] << 16;
137 case 2:
138 t |= data[1] << 8;
139 case 3:
140 t |= data[0];
143 t <<= (8 * align);
145 data += 4 - align;
146 len -= 4 - align;
148 sl = 8 * (4 - align);
149 sr = 8 * align;
151 /* Mix */
152 while (len >= 4) {
153 unsigned int k;
155 d = *(const unsigned int *)data;
156 t = (t >> sr) | (d << sl);
158 k = t;
160 MIX(h, k, m);
162 t = d;
164 data += 4;
165 len -= 4;
168 /* Handle leftover data in temp registers */
169 d = 0;
170 if (len >= align) {
171 unsigned int k;
173 switch (align) {
174 case 3:
175 d |= data[2] << 16;
176 case 2:
177 d |= data[1] << 8;
178 case 1:
179 d |= data[0];
182 k = (t >> sr) | (d << sl);
183 MIX(h, k, m);
185 data += align;
186 len -= align;
188 /* Handle tail bytes */
189 switch (len) {
190 case 3:
191 h ^= data[2] << 16;
192 case 2:
193 h ^= data[1] << 8;
194 case 1:
195 h ^= data[0];
196 h *= m;
198 } else {
199 switch (len) {
200 case 3:
201 d |= data[2] << 16;
202 case 2:
203 d |= data[1] << 8;
204 case 1:
205 d |= data[0];
206 case 0:
207 h ^= (t >> sr) | (d << sl);
208 h *= m;
212 h ^= h >> 13;
213 h *= m;
214 h ^= h >> 15;
216 return h;
217 } else {
218 while (len >= 4) {
219 unsigned int k = *(const unsigned int *)data;
221 MIX(h, k, m);
223 data += 4;
224 len -= 4;
227 /* Handle tail bytes */
228 switch (len) {
229 case 3:
230 h ^= data[2] << 16;
231 case 2:
232 h ^= data[1] << 8;
233 case 1:
234 h ^= data[0];
235 h *= m;
238 h ^= h >> 13;
239 h *= m;
240 h ^= h >> 15;
242 return h;
247 * Same as MurmurHash2, but endian- and alignment-neutral.
248 * Half the speed though, alas.
250 unsigned int rbtdb_murmur2_neutral(TDB_DATA * key)
252 const unsigned char *data = key->dptr;
253 int len = (int)key->dsize;
254 unsigned int h = seed ^ len;
256 while (len >= 4) {
257 unsigned int k;
259 k = data[0];
260 k |= data[1] << 8;
261 k |= data[2] << 16;
262 k |= data[3] << 24;
264 k *= m;
265 k ^= k >> r;
266 k *= m;
268 h *= m;
269 h ^= k;
271 data += 4;
272 len -= 4;
275 switch (len) {
276 case 3:
277 h ^= data[2] << 16;
278 case 2:
279 h ^= data[1] << 8;
280 case 1:
281 h ^= data[0];
282 h *= m;
285 h ^= h >> 13;
286 h *= m;
287 h ^= h >> 15;
289 return h;