1 /** Hashing infrastructure
12 #include "util/hash.h"
13 #include "util/memory.h"
16 /* String hashes functions chooser. */
17 /* #define MIX_HASH */ /* Far more better but slower */
18 #define X31_HASH /* Weaker but faster */
21 /* We provide common infrastructure for hashing - each hash consists from one
22 * particularly large array full of small lists of keys with same index in the
23 * array (same hash value). */
25 #define hash_mask(n) (hash_size(n) - 1)
26 #define hash_size(n) (1 << (n))
28 static hash_value_T
strhash(unsigned char *k
, unsigned int length
, hash_value_T initval
);
30 static inline struct hash
*
31 init_hash(unsigned int width
, hash_func_T func
)
36 assert(width
> 0 && func
);
37 if_assert_failed
return NULL
;
39 /* One is already reserved in struct hash, so use size - 1. */
40 hash
= mem_alloc(sizeof(*hash
) + (hash_size(width
) - 1)
41 * sizeof(struct list_head
));
42 if (!hash
) return NULL
;
47 /* Initialize dummy list_heads */
48 for (; i
< hash_size(width
); i
++)
49 init_list(hash
->hash
[i
]);
58 return init_hash(8, &strhash
);
63 free_hash(struct hash
**hashp
)
67 assert(hashp
&& *hashp
);
68 if_assert_failed
return;
70 for (; i
< hash_size((*hashp
)->width
); i
++)
71 free_list((*hashp
)->hash
[i
]);
73 mem_free_set(hashp
, NULL
);
77 /** Initialization vector for the hash function.
78 * I've no much idea about what to set here.. I think it doesn't matter much
79 * anyway.. ;) --pasky */
80 #define HASH_MAGIC 0xdeadbeef
82 /** @returns hash_item if ok, NULL if error.
85 add_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
,
89 struct hash_item
*item
= mem_alloc(sizeof(*item
));
91 if (!item
) return NULL
;
93 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
96 item
->keylen
= keylen
;
99 add_to_list(hash
->hash
[hashval
], item
);
106 get_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
)
108 struct list_head
*list
;
109 struct hash_item
*item
;
110 hash_value_T hashval
;
112 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
113 list
= &hash
->hash
[hashval
];
115 foreach (item
, *list
) {
116 if (keylen
!= item
->keylen
) continue;
117 if (memcmp(key
, item
->key
, keylen
)) continue;
119 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
120 * the list by popularity of its items. Inspired from Links,
121 * probably PerM. --pasky */
122 move_to_top_of_list(*list
, item
);
132 /** Delete @a item from @a hash.
133 * If key and/or value were dynamically allocated, think about freeing them.
134 * This function doesn't do that.
137 del_hash_item(struct hash
*hash
, struct hash_item
*item
)
140 if_assert_failed
return;
149 /** Fast string hashing.
151 * @param length the length of the key
152 * @param initval the previous hash, or an arbitrary value */
154 strhash(unsigned char *k
,
156 hash_value_T initval
)
158 const unsigned char *p
= (const unsigned char *) k
;
159 hash_value_T h
= initval
;
162 assert(k
&& length
> 0);
163 if_assert_failed
return h
;
165 /* This loop was unrolled, should improve performance on most cpus,
166 * After some tests, 8 seems a good value for most keys. --Zas */
168 h
= (h
<< 5) - h
+ p
[i
];
169 if (++i
== length
) break;
170 h
= (h
<< 5) - h
+ p
[i
];
171 if (++i
== length
) break;
172 h
= (h
<< 5) - h
+ p
[i
];
173 if (++i
== length
) break;
174 h
= (h
<< 5) - h
+ p
[i
];
175 if (++i
== length
) break;
176 h
= (h
<< 5) - h
+ p
[i
];
177 if (++i
== length
) break;
178 h
= (h
<< 5) - h
+ p
[i
];
179 if (++i
== length
) break;
180 h
= (h
<< 5) - h
+ p
[i
];
181 if (++i
== length
) break;
182 h
= (h
<< 5) - h
+ p
[i
];
183 if (++i
== length
) break;
191 /* String hashing function follows; it is not written by me, somewhere below
192 * are credits. I only hacked it a bit. --pasky */
194 /* This is a big CPU hog, in fact:
196 * % cumulative self self total-----------
197 * time seconds seconds calls us/call us/call name----
198 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
200 * It should be investigated whether we couldn't push this down a little. */
203 * --------------------------------------------------------------------
204 * mix -- mix 3 32-bit values reversibly.
205 * For every delta with one or two bits set, and the deltas of all three
206 * high bits or all three low bits, whether the original value of a,b,c
207 * is almost all zero or is uniformly distributed,
208 * * If mix() is run forward or backward, at least 32 bits in a,b,c
209 * have at least 1/4 probability of changing.
210 * * If mix() is run forward, every bit of c will change between 1/3 and
211 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
212 * mix() was built out of 36 single-cycle latency instructions in a
213 * structure that could supported 2x parallelism, like so:
215 * a -= c; x = (c>>13);
217 * b -= a; x = (a<<8);
219 * c -= b; x = (b>>13);
221 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
222 * of that parallelism. They've also turned some of those single-cycle
223 * latency instructions into multi-cycle latency instructions. Still,
224 * this is the fastest good hash I could find. There were about 2^^68
225 * to choose from. I only looked at a billion or so.
226 * --------------------------------------------------------------------
229 #define mix(a, b, c) { \
230 a -= b; a -= c; a ^= (c>>13); \
231 b -= c; b -= a; b ^= (a<<8); \
232 c -= a; c -= b; c ^= (b>>13); \
233 a -= b; a -= c; a ^= (c>>12); \
234 b -= c; b -= a; b ^= (a<<16); \
235 c -= a; c -= b; c ^= (b>>5); \
236 a -= b; a -= c; a ^= (c>>3); \
237 b -= c; b -= a; b ^= (a<<10); \
238 c -= a; c -= b; c ^= (b>>15); \
242 --------------------------------------------------------------------
243 hash() -- hash a variable-length key into a 32-bit value
244 k : the key (the unaligned variable-length array of bytes)
245 len : the length of the key, counting by bytes
246 initval : can be any 4-byte value
247 Returns a 32-bit value. Every bit of the key affects every bit of
248 the return value. Every 1-bit and 2-bit delta achieves avalanche.
249 About 6*len+35 instructions.
251 The best hash table sizes are powers of 2. There is no need to do
252 mod a prime (mod is sooo slow!). If you need less than 32 bits,
253 use a bitmask. For example, if you need only 10 bits, do
254 h = (h & hashmask(10));
255 In which case, the hash table should have hashsize(10) elements.
257 If you are hashing n strings (ub1 **) k, do it like this:
258 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
260 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
261 code any way you wish, private, educational, or commercial. It's free.
263 See http://burtleburtle.net/bob/hash/evahash.html
264 Use for hash table lookup, or anything where one collision in 2^^32 is
265 acceptable. Do NOT use for cryptographic purposes.
266 --------------------------------------------------------------------
269 #define keycompute(a) ((k[(a)]) \
270 + ((hash_value_T) (k[(a)+1])<<8) \
271 + ((hash_value_T) (k[(a)+2])<<16) \
272 + ((hash_value_T) (k[(a)+3])<<24))
274 /** Hash an array of bytes.
276 * @param length the length of the key
277 * @param initval the previous hash, or an arbitrary value */
279 strhash(unsigned char *k
,
281 hash_value_T initval
)
284 hash_value_T a
, b
, c
;
286 /* Set up the internal state */
288 a
= b
= 0x9e3779b9; /* the golden ratio; an arbitrary value */
289 c
= initval
; /* the previous hash value */
291 /*---------------------------------------- handle most of the key */
301 /*------------------------------------- handle the last 11 bytes */
303 switch (len
) { /* all the case statements fall through */
304 case 11: c
+= ((hash_value_T
) (k
[10])<<24);
305 case 10: c
+= ((hash_value_T
) (k
[9])<<16);
306 case 9 : c
+= ((hash_value_T
) (k
[8])<<8);
307 /* the first byte of c is reserved for the length */
308 case 8 : b
+= ((hash_value_T
) (k
[7])<<24);
309 case 7 : b
+= ((hash_value_T
) (k
[6])<<16);
310 case 6 : b
+= ((hash_value_T
) (k
[5])<<8);
311 case 5 : b
+= (k
[4]);
312 case 4 : a
+= ((hash_value_T
) (k
[3])<<24);
313 case 3 : a
+= ((hash_value_T
) (k
[2])<<16);
314 case 2 : a
+= ((hash_value_T
) (k
[1])<<8);
315 case 1 : a
+= (k
[0]);
316 /* case 0: nothing left to add */
321 /*-------------------------------------------- report the result */
329 #endif /* MIX_HASH */