merge: be more careful about aligning hunk-headers with newlines.
[wiggle/upstream.git] / ccan / hash / hash.h
blob0400e6a3b29bbc4505e707ed5231ed197ad6f22a
1 #ifndef CCAN_HASH_H
2 #define CCAN_HASH_H
3 #include "config.h"
4 #include <stdint.h>
5 #include <stdlib.h>
6 #include <ccan/build_assert/build_assert.h>
8 /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
9 *
10 * http://burtleburtle.net/bob/c/lookup3.c
13 /**
14 * hash - fast hash of an array for internal use
15 * @p: the array or pointer to first element
16 * @num: the number of elements to hash
17 * @base: the base number to roll into the hash (usually 0)
19 * The memory region pointed to by p is combined with the base to form
20 * a 32-bit hash.
22 * This hash will have different results on different machines, so is
23 * only useful for internal hashes (ie. not hashes sent across the
24 * network or saved to disk).
26 * It may also change with future versions: it could even detect at runtime
27 * what the fastest hash to use is.
29 * See also: hash64, hash_stable.
31 * Example:
32 * #include <ccan/hash/hash.h>
33 * #include <err.h>
34 * #include <stdio.h>
35 * #include <string.h>
37 * // Simple demonstration: idential strings will have the same hash, but
38 * // two different strings will probably not.
39 * int main(int argc, char *argv[])
40 * {
41 * uint32_t hash1, hash2;
43 * if (argc != 3)
44 * err(1, "Usage: %s <string1> <string2>", argv[0]);
46 * hash1 = hash(argv[1], strlen(argv[1]), 0);
47 * hash2 = hash(argv[2], strlen(argv[2]), 0);
48 * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
49 * return 0;
50 * }
52 #define hash(p, num, base) hash_any((p), (num)*sizeof(*(p)), (base))
54 /**
55 * hash_stable - hash of an array for external use
56 * @p: the array or pointer to first element
57 * @num: the number of elements to hash
58 * @base: the base number to roll into the hash (usually 0)
60 * The array of simple integer types pointed to by p is combined with
61 * the base to form a 32-bit hash.
63 * This hash will have the same results on different machines, so can
64 * be used for external hashes (ie. hashes sent across the network or
65 * saved to disk). The results will not change in future versions of
66 * this module.
68 * Note that it is only legal to hand an array of simple integer types
69 * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
70 * the same values will have the same hash result, even though the
71 * memory representations of integers depend on the machine
72 * endianness.
74 * See also:
75 * hash64_stable
77 * Example:
78 * #include <ccan/hash/hash.h>
79 * #include <err.h>
80 * #include <stdio.h>
81 * #include <string.h>
83 * int main(int argc, char *argv[])
84 * {
85 * if (argc != 2)
86 * err(1, "Usage: %s <string-to-hash>", argv[0]);
88 * printf("Hash stable result is %u\n",
89 * hash_stable(argv[1], strlen(argv[1]), 0));
90 * return 0;
91 * }
93 #define hash_stable(p, num, base) \
94 (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
95 || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
96 sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \
97 : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base)) \
98 : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base)) \
99 : hash_stable_8((p), (num), (base)))
102 * hash_u32 - fast hash an array of 32-bit values for internal use
103 * @key: the array of uint32_t
104 * @num: the number of elements to hash
105 * @base: the base number to roll into the hash (usually 0)
107 * The array of uint32_t pointed to by @key is combined with the base
108 * to form a 32-bit hash. This is 2-3 times faster than hash() on small
109 * arrays, but the advantage vanishes over large hashes.
111 * This hash will have different results on different machines, so is
112 * only useful for internal hashes (ie. not hashes sent across the
113 * network or saved to disk).
115 uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
118 * hash_string - very fast hash of an ascii string
119 * @str: the nul-terminated string
121 * The string is hashed, using a hash function optimized for ASCII and
122 * similar strings. It's weaker than the other hash functions.
124 * This hash may have different results on different machines, so is
125 * only useful for internal hashes (ie. not hashes sent across the
126 * network or saved to disk). The results will be different from the
127 * other hash functions in this module, too.
129 static inline uint32_t hash_string(const char *string)
131 /* This is Karl Nelson <kenelson@ece.ucdavis.edu>'s X31 hash.
132 * It's a little faster than the (much better) lookup3 hash(): 56ns vs
133 * 84ns on my 2GHz Intel Core Duo 2 laptop for a 10 char string. */
134 uint32_t ret;
136 for (ret = 0; *string; string++)
137 ret = (ret << 5) - ret + *string;
139 return ret;
143 * hash64 - fast 64-bit hash of an array for internal use
144 * @p: the array or pointer to first element
145 * @num: the number of elements to hash
146 * @base: the 64-bit base number to roll into the hash (usually 0)
148 * The memory region pointed to by p is combined with the base to form
149 * a 64-bit hash.
151 * This hash will have different results on different machines, so is
152 * only useful for internal hashes (ie. not hashes sent across the
153 * network or saved to disk).
155 * It may also change with future versions: it could even detect at runtime
156 * what the fastest hash to use is.
158 * See also: hash.
160 * Example:
161 * #include <ccan/hash/hash.h>
162 * #include <err.h>
163 * #include <stdio.h>
164 * #include <string.h>
166 * // Simple demonstration: idential strings will have the same hash, but
167 * // two different strings will probably not.
168 * int main(int argc, char *argv[])
170 * uint64_t hash1, hash2;
172 * if (argc != 3)
173 * err(1, "Usage: %s <string1> <string2>", argv[0]);
175 * hash1 = hash64(argv[1], strlen(argv[1]), 0);
176 * hash2 = hash64(argv[2], strlen(argv[2]), 0);
177 * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
178 * return 0;
181 #define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base))
184 * hash64_stable - 64 bit hash of an array for external use
185 * @p: the array or pointer to first element
186 * @num: the number of elements to hash
187 * @base: the base number to roll into the hash (usually 0)
189 * The array of simple integer types pointed to by p is combined with
190 * the base to form a 64-bit hash.
192 * This hash will have the same results on different machines, so can
193 * be used for external hashes (ie. hashes sent across the network or
194 * saved to disk). The results will not change in future versions of
195 * this module.
197 * Note that it is only legal to hand an array of simple integer types
198 * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
199 * the same values will have the same hash result, even though the
200 * memory representations of integers depend on the machine
201 * endianness.
203 * See also:
204 * hash_stable
206 * Example:
207 * #include <ccan/hash/hash.h>
208 * #include <err.h>
209 * #include <stdio.h>
210 * #include <string.h>
212 * int main(int argc, char *argv[])
214 * if (argc != 2)
215 * err(1, "Usage: %s <string-to-hash>", argv[0]);
217 * printf("Hash stable result is %llu\n",
218 * (long long)hash64_stable(argv[1], strlen(argv[1]), 0));
219 * return 0;
222 #define hash64_stable(p, num, base) \
223 (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
224 || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
225 sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base)) \
226 : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base)) \
227 : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base)) \
228 : hash64_stable_8((p), (num), (base)))
232 * hashl - fast 32/64-bit hash of an array for internal use
233 * @p: the array or pointer to first element
234 * @num: the number of elements to hash
235 * @base: the base number to roll into the hash (usually 0)
237 * This is either hash() or hash64(), on 32/64 bit long machines.
239 #define hashl(p, num, base) \
240 (BUILD_ASSERT_OR_ZERO(sizeof(long) == sizeof(uint32_t) \
241 || sizeof(long) == sizeof(uint64_t)) + \
242 (sizeof(long) == sizeof(uint64_t) \
243 ? hash64((p), (num), (base)) : hash((p), (num), (base))))
245 /* Our underlying operations. */
246 uint32_t hash_any(const void *key, size_t length, uint32_t base);
247 uint32_t hash_stable_64(const void *key, size_t n, uint32_t base);
248 uint32_t hash_stable_32(const void *key, size_t n, uint32_t base);
249 uint32_t hash_stable_16(const void *key, size_t n, uint32_t base);
250 uint32_t hash_stable_8(const void *key, size_t n, uint32_t base);
251 uint64_t hash64_any(const void *key, size_t length, uint64_t base);
252 uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base);
253 uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base);
254 uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base);
255 uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base);
258 * hash_pointer - hash a pointer for internal use
259 * @p: the pointer value to hash
260 * @base: the base number to roll into the hash (usually 0)
262 * The pointer p (not what p points to!) is combined with the base to form
263 * a 32-bit hash.
265 * This hash will have different results on different machines, so is
266 * only useful for internal hashes (ie. not hashes sent across the
267 * network or saved to disk).
269 * Example:
270 * #include <ccan/hash/hash.h>
272 * // Code to keep track of memory regions.
273 * struct region {
274 * struct region *chain;
275 * void *start;
276 * unsigned int size;
277 * };
278 * // We keep a simple hash table.
279 * static struct region *region_hash[128];
281 * static void add_region(struct region *r)
283 * unsigned int h = hash_pointer(r->start, 0);
285 * r->chain = region_hash[h];
286 * region_hash[h] = r->chain;
289 * static struct region *find_region(const void *start)
291 * struct region *r;
293 * for (r = region_hash[hash_pointer(start, 0)]; r; r = r->chain)
294 * if (r->start == start)
295 * return r;
296 * return NULL;
299 static inline uint32_t hash_pointer(const void *p, uint32_t base)
301 if (sizeof(p) % sizeof(uint32_t) == 0) {
302 /* This convoluted union is the right way of aliasing. */
303 union {
304 uint32_t u32[sizeof(p) / sizeof(uint32_t)];
305 const void *p;
306 } u;
307 u.p = p;
308 return hash_u32(u.u32, sizeof(p) / sizeof(uint32_t), base);
309 } else
310 return hash(&p, 1, base);
312 #endif /* HASH_H */