2 Copied from Pandas: https://github.com/pydata/pandas/blob/master/pandas/src/khash.h
6 Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice shall be
17 included in all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 KHASH_MAP_INIT_INT(32, char)
37 khash_t(32) *h = kh_init(32);
38 k = kh_put(32, h, 5, &ret);
39 if (!ret) kh_del(32, h, k);
41 k = kh_get(32, h, 10);
42 is_missing = (k == kh_end(h));
45 for (k = kh_begin(h); k != kh_end(h); ++k)
46 if (kh_exist(h, k)) kh_value(h, k) = 1;
55 * The capacity is a power of 2. This seems to dramatically improve the
56 speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
58 - http://code.google.com/p/ulib/
59 - http://nothings.org/computer/judy/
61 * Allow to optionally use linear probing which usually has better
62 performance for random input. Double hashing is still the default as it
63 is more robust to certain non-random input.
65 * Added Wang's integer hash function (not used by default). This hash
66 function is more robust to certain non-random input.
70 * Allow to declare global functions.
78 * Corrected the example
83 * Improved speed a little in kh_put()
88 * Fixed a compiling error
92 * Changed to token concatenation which increases flexibility.
96 * Fixed a bug in kh_get(), which has not been tested previously.
110 Generic hash table library.
113 #define AC_VERSION_KHASH_H "0.2.6"
120 /* compipler specific configuration */
122 #if UINT_MAX == 0xffffffffu
123 typedef unsigned int khint32_t
;
124 #elif ULONG_MAX == 0xffffffffu
125 typedef unsigned long khint32_t
;
128 #if ULONG_MAX == ULLONG_MAX
129 typedef unsigned long khuint64_t
;
130 typedef signed long khint64_t
;
132 typedef unsigned long long khuint64_t
;
133 typedef signed long long khint64_t
;
136 typedef double khfloat64_t
;
138 #ifndef PANDAS_INLINE
139 #if defined(__GNUC__)
140 #define PANDAS_INLINE __inline__
141 #elif defined(_MSC_VER)
142 #define PANDAS_INLINE __inline
143 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
144 #define PANDAS_INLINE inline
146 #define PANDAS_INLINE
150 typedef khint32_t khint_t
;
151 typedef khint_t khiter_t
;
153 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
154 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
155 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
156 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
157 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
158 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
159 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
162 #define __ac_inc(k, m) 1
164 #define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
167 #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
170 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
173 static const double __ac_HASH_UPPER
= 0.77;
175 #define KHASH_DECLARE(name, khkey_t, khval_t) \
177 khint_t n_buckets, size, n_occupied, upper_bound; \
182 extern kh_##name##_t *kh_init_##name(); \
183 extern void kh_destroy_##name(kh_##name##_t *h); \
184 extern void kh_clear_##name(kh_##name##_t *h); \
185 extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
186 extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
187 extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
188 extern void kh_del_##name(kh_##name##_t *h, khint_t x);
190 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
192 khint_t n_buckets, size, n_occupied, upper_bound; \
197 SCOPE kh_##name##_t *kh_init_##name(void) { \
198 return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
200 SCOPE void kh_destroy_##name(kh_##name##_t *h) \
203 free(h->keys); free(h->flags); \
208 SCOPE void kh_clear_##name(kh_##name##_t *h) \
210 if (h && h->flags) { \
211 memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
212 h->size = h->n_occupied = 0; \
215 SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
217 if (h->n_buckets) { \
218 khint_t inc, k, i, last, mask; \
219 mask = h->n_buckets - 1; \
220 k = __hash_func(key); i = k & mask; \
221 inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
222 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
223 i = (i + inc) & mask; \
224 if (i == last) return h->n_buckets; \
226 return __ac_iseither(h->flags, i)? h->n_buckets : i; \
229 SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
230 { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
231 khint32_t *new_flags = 0; \
234 kroundup32(new_n_buckets); \
235 if (new_n_buckets < 4) new_n_buckets = 4; \
236 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
237 else { /* hash table size to be changed (shrink or expand); rehash */ \
238 new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
239 memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
240 if (h->n_buckets < new_n_buckets) { /* expand */ \
241 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
242 if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
243 } /* otherwise shrink */ \
246 if (j) { /* rehashing is needed */ \
247 for (j = 0; j != h->n_buckets; ++j) { \
248 if (__ac_iseither(h->flags, j) == 0) { \
249 khkey_t key = h->keys[j]; \
252 new_mask = new_n_buckets - 1; \
253 if (kh_is_map) val = h->vals[j]; \
254 __ac_set_isdel_true(h->flags, j); \
255 while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
257 k = __hash_func(key); \
259 inc = __ac_inc(k, new_mask); \
260 while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
261 __ac_set_isempty_false(new_flags, i); \
262 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
263 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
264 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
265 __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
266 } else { /* write the element and jump out of the loop */ \
268 if (kh_is_map) h->vals[i] = val; \
274 if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
275 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
276 if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
278 free(h->flags); /* free the working space */ \
279 h->flags = new_flags; \
280 h->n_buckets = new_n_buckets; \
281 h->n_occupied = h->size; \
282 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
285 SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
288 if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
289 if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
290 else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
291 } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
293 khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
294 x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
295 if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
297 inc = __ac_inc(k, mask); last = i; \
298 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
299 if (__ac_isdel(h->flags, i)) site = i; \
300 i = (i + inc) & mask; \
301 if (i == last) { x = site; break; } \
303 if (x == h->n_buckets) { \
304 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
309 if (__ac_isempty(h->flags, x)) { /* not present at all */ \
311 __ac_set_isboth_false(h->flags, x); \
312 ++h->size; ++h->n_occupied; \
314 } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
316 __ac_set_isboth_false(h->flags, x); \
319 } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
322 SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
324 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
325 __ac_set_isdel_true(h->flags, x); \
330 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
331 KHASH_INIT2(name, static PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
333 /* --- BEGIN OF HASH FUNCTIONS --- */
336 @abstract Integer hash function
337 @param key The integer [khint32_t]
338 @return The hash value [khint_t]
340 #define kh_int_hash_func(key) (khint32_t)(key)
342 @abstract Integer comparison function
344 #define kh_int_hash_equal(a, b) ((a) == (b))
346 @abstract 64-bit integer hash function
347 @param key The integer [khint64_t]
348 @return The hash value [khint_t]
350 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
352 @abstract 64-bit integer comparison function
354 #define kh_int64_hash_equal(a, b) ((a) == (b))
358 #define kh_float64_hash_func _Py_HashDouble
359 #define kh_float64_hash_equal kh_int64_hash_equal
362 @abstract const char* hash function
363 @param s Pointer to a null terminated string
364 @return The hash value
366 static PANDAS_INLINE khint_t
__ac_X31_hash_string(const char *s
)
369 if (h
) for (++s
; *s
; ++s
) h
= (h
<< 5) - h
+ *s
;
373 @abstract Another interface to const char* hash function
374 @param key Pointer to a null terminated string [const char*]
375 @return The hash value [khint_t]
377 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
379 @abstract Const char* comparison function
381 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
383 static PANDAS_INLINE khint_t
__ac_Wang_hash(khint_t key
)
393 #define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
395 /* --- END OF HASH FUNCTIONS --- */
397 /* Other convenient macros... */
400 @abstract Type of the hash table.
401 @param name Name of the hash table [symbol]
403 #define khash_t(name) kh_##name##_t
406 @abstract Initiate a hash table.
407 @param name Name of the hash table [symbol]
408 @return Pointer to the hash table [khash_t(name)*]
410 #define kh_init(name) kh_init_##name(void)
413 @abstract Destroy a hash table.
414 @param name Name of the hash table [symbol]
415 @param h Pointer to the hash table [khash_t(name)*]
417 #define kh_destroy(name, h) kh_destroy_##name(h)
420 @abstract Reset a hash table without deallocating memory.
421 @param name Name of the hash table [symbol]
422 @param h Pointer to the hash table [khash_t(name)*]
424 #define kh_clear(name, h) kh_clear_##name(h)
427 @abstract Resize a hash table.
428 @param name Name of the hash table [symbol]
429 @param h Pointer to the hash table [khash_t(name)*]
430 @param s New size [khint_t]
432 #define kh_resize(name, h, s) kh_resize_##name(h, s)
435 @abstract Insert a key to the hash table.
436 @param name Name of the hash table [symbol]
437 @param h Pointer to the hash table [khash_t(name)*]
438 @param k Key [type of keys]
439 @param r Extra return code: 0 if the key is present in the hash table;
440 1 if the bucket is empty (never used); 2 if the element in
441 the bucket has been deleted [int*]
442 @return Iterator to the inserted element [khint_t]
444 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
447 @abstract Retrieve a key from the hash table.
448 @param name Name of the hash table [symbol]
449 @param h Pointer to the hash table [khash_t(name)*]
450 @param k Key [type of keys]
451 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
453 #define kh_get(name, h, k) kh_get_##name(h, k)
456 @abstract Remove a key from the hash table.
457 @param name Name of the hash table [symbol]
458 @param h Pointer to the hash table [khash_t(name)*]
459 @param k Iterator to the element to be deleted [khint_t]
461 #define kh_del(name, h, k) kh_del_##name(h, k)
464 @abstract Test whether a bucket contains data.
465 @param h Pointer to the hash table [khash_t(name)*]
466 @param x Iterator to the bucket [khint_t]
467 @return 1 if containing data; 0 otherwise [int]
469 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
472 @abstract Get key given an iterator
473 @param h Pointer to the hash table [khash_t(name)*]
474 @param x Iterator to the bucket [khint_t]
475 @return Key [type of keys]
477 #define kh_key(h, x) ((h)->keys[x])
480 @abstract Get value given an iterator
481 @param h Pointer to the hash table [khash_t(name)*]
482 @param x Iterator to the bucket [khint_t]
483 @return Value [type of values]
484 @discussion For hash sets, calling this results in segfault.
486 #define kh_val(h, x) ((h)->vals[x])
489 @abstract Alias of kh_val()
491 #define kh_value(h, x) ((h)->vals[x])
494 @abstract Get the start iterator
495 @param h Pointer to the hash table [khash_t(name)*]
496 @return The start iterator [khint_t]
498 #define kh_begin(h) (khint_t)(0)
501 @abstract Get the end iterator
502 @param h Pointer to the hash table [khash_t(name)*]
503 @return The end iterator [khint_t]
505 #define kh_end(h) ((h)->n_buckets)
508 @abstract Get the number of elements in the hash table
509 @param h Pointer to the hash table [khash_t(name)*]
510 @return Number of elements in the hash table [khint_t]
512 #define kh_size(h) ((h)->size)
515 @abstract Get the number of buckets in the hash table
516 @param h Pointer to the hash table [khash_t(name)*]
517 @return Number of buckets in the hash table [khint_t]
519 #define kh_n_buckets(h) ((h)->n_buckets)
521 /* More conenient interfaces */
524 @abstract Instantiate a hash set containing integer keys
525 @param name Name of the hash table [symbol]
527 #define KHASH_SET_INIT_INT(name) \
528 KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
531 @abstract Instantiate a hash map containing integer keys
532 @param name Name of the hash table [symbol]
533 @param khval_t Type of values [type]
535 #define KHASH_MAP_INIT_INT(name, khval_t) \
536 KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
539 @abstract Instantiate a hash map containing 64-bit integer keys
540 @param name Name of the hash table [symbol]
542 #define KHASH_SET_INIT_UINT64(name) \
543 KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
545 #define KHASH_SET_INIT_INT64(name) \
546 KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
549 @abstract Instantiate a hash map containing 64-bit integer keys
550 @param name Name of the hash table [symbol]
551 @param khval_t Type of values [type]
553 #define KHASH_MAP_INIT_UINT64(name, khval_t) \
554 KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
556 #define KHASH_MAP_INIT_INT64(name, khval_t) \
557 KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
559 #define KHASH_MAP_INIT_FLOAT64(name, khval_t) \
560 KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal)
562 typedef const char *kh_cstr_t
;
564 @abstract Instantiate a hash map containing const char* keys
565 @param name Name of the hash table [symbol]
567 #define KHASH_SET_INIT_STR(name) \
568 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
571 @abstract Instantiate a hash map containing const char* keys
572 @param name Name of the hash table [symbol]
573 @param khval_t Type of values [type]
575 #define KHASH_MAP_INIT_STR(name, khval_t) \
576 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
581 #define kh_python_hash_func(key) (PyObject_Hash(key))
582 #define kh_python_hash_equal(a, b) ((a == b) || PyObject_RichCompareBool(a, b, Py_EQ))
587 typedef PyObject
* kh_pyobject_t
;
589 #define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \
590 KHASH_INIT(name, kh_pyobject_t, khval_t, 1, kh_python_hash_func, kh_python_hash_equal)
592 KHASH_MAP_INIT_PYOBJECT(pymap
, Py_ssize_t
)
594 #define KHASH_SET_INIT_PYOBJECT(name) \
595 KHASH_INIT(name, kh_pyobject_t, char, 0, kh_python_hash_func, kh_python_hash_equal)
597 KHASH_SET_INIT_PYOBJECT(pyset
)
599 #define kh_exist_pymap(h, k) (kh_exist(h, k))
600 #define kh_exist_pyset(h, k) (kh_exist(h, k))
601 #define kh_exist_str(h, k) (kh_exist(h, k))
602 #define kh_exist_float64(h, k) (kh_exist(h, k))
603 #define kh_exist_int64(h, k) (kh_exist(h, k))
604 #define kh_exist_int32(h, k) (kh_exist(h, k))
606 KHASH_MAP_INIT_STR(str
, Py_ssize_t
)
608 KHASH_MAP_INIT_INT(int32
, Py_ssize_t
)
609 KHASH_MAP_INIT_INT64(int64
, khfloat64_t
) /* I want this hash to store float type values*/
610 KHASH_MAP_INIT_FLOAT64(float64
, khfloat64_t
) /* I want this hash to store float type values*/
612 #endif /* __AC_KHASH_H */