Merge branch 'maint-0.4.8' into release-0.4.8
[tor.git] / src / ext / ht.h
blob4bfce3690307513811c9f2fe3ac6d50bd3f497c0
1 /* Copyright (c) 2002, Christopher Clark.
2 * Copyright (c) 2005-2006, Nick Mathewson.
3 * Copyright (c) 2007-2019, The Tor Project, Inc. */
4 /* See license at end. */
6 /* Based on ideas by Christopher Clark and interfaces from Niels Provos. */
8 /*
9 These macros provide an intrustive implementation for a typesafe chaining
10 hash table, loosely based on the BSD tree.h and queue.h macros. Here's
11 how to use them.
13 First, pick a the structure that you'll be storing in the hashtable. Let's
14 say that's "struct dinosaur". To this structure, you add an HT_ENTRY()
15 member, as such:
17 struct dinosaur {
18 HT_ENTRY(dinosaur) node; // The name inside the () must match the
19 // struct.
21 // These are just fields from the dinosaur structure...
22 long dinosaur_id;
23 char *name;
24 long age;
25 int is_ornithischian;
26 int is_herbivorous;
29 You can declare the hashtable itself as:
31 HT_HEAD(dinosaur_ht, dinosaur);
33 This declares a new 'struct dinosaur_ht' type.
35 Now you need to declare two functions to help implement the hashtable: one
36 compares two dinosaurs for equality, and one computes the hash of a
37 dinosaur. Let's say that two dinosaurs are equal if they have the same ID
38 and name.
40 int
41 dinosaurs_equal(const struct dinosaur *d1, const struct dinosaur *d2)
43 return d1->dinosaur_id == d2->dinosaur_id &&
44 0 == strcmp(d1->name, d2->name);
47 unsigned
48 dinosaur_hash(const struct dinosaur *d)
50 // This is a very bad hash function. Use siphash24g instead.
51 return (d->dinosaur_id + d->name[0] ) * 1337 + d->name[1] * 1337;
54 Now you'll need to declare the functions that manipulate the hash table.
55 To do this, you put this declaration either in a header file, or inside
56 a regular module, depending on what visibility you want.
58 HT_PROTOTYPE(dinosaur_ht, // The name of the hashtable struct
59 dinosaur, // The name of the element struct,
60 node, // The name of HT_ENTRY member
61 dinosaur_hash, dinosaurs_equal);
63 Later, inside a C function, you use this macro to declare the hashtable
64 functions.
66 HT_GENERATE2(dinosaur_ht, dinosaur, node, dinosaur_hash, dinosaurs_equal,
67 0.6, tor_reallocarray, tor_free_);
69 Note the use of tor_free_, not tor_free. The 0.6 is magic.
71 Now you can use the hashtable! You can initialize one with
73 struct dinosaur_ht my_dinos = HT_INITIALIZER();
75 Or create one in core with
77 struct dinosaur_ht *dinos = tor_malloc(sizeof(dinosaur_ht));
78 HT_INIT(dinosaur_ht, dinos);
80 To the hashtable, you use the HT_FOO(dinosaur_ht, ...) macros. For
81 example, to put new_dino into dinos, you say:
83 HT_REPLACE(dinosaur_ht, dinos, new_dino);
85 If you're searching for an element, you need to use a dummy 'key' element in
86 the search. For example.
88 struct dinosaur dino_key;
89 dino_key.dinosaur_id = 12345;
90 dino_key.name = tor_strdup("Atrociraptor");
92 struct dinosaur *found = HT_FIND(dinosaurs_ht, dinos, &dino_key);
94 Have fun with your hash table!
98 #ifndef HT_H_INCLUDED_
99 #define HT_H_INCLUDED_
101 #define HT_HEAD(name, type) \
102 struct name { \
103 /* The hash table itself. */ \
104 struct type **hth_table; \
105 /* How long is the hash table? */ \
106 unsigned hth_table_length; \
107 /* How many elements does the table contain? */ \
108 unsigned hth_n_entries; \
109 /* How many elements will we allow in the table before resizing it? */ \
110 unsigned hth_load_limit; \
111 /* Position of hth_table_length in the primes table. */ \
112 int hth_prime_idx; \
115 #define HT_INITIALIZER() \
116 { NULL, 0, 0, 0, -1 }
118 #ifdef HT_NO_CACHE_HASH_VALUES
119 #define HT_ENTRY(type) \
120 struct { \
121 struct type *hte_next; \
123 #else
124 #define HT_ENTRY(type) \
125 struct { \
126 struct type *hte_next; \
127 unsigned hte_hash; \
129 #endif
131 /* || 0 is for -Wparentheses-equality (-Wall?) appeasement under clang */
132 #define HT_EMPTY(head) \
133 (((head)->hth_n_entries == 0) || 0)
135 /* How many elements in 'head'? */
136 #define HT_SIZE(head) \
137 ((head)->hth_n_entries)
139 /* Return memory usage for a hashtable (not counting the entries themselves) */
140 #define HT_MEM_USAGE(head) \
141 (sizeof(*head) + (head)->hth_table_length * sizeof(void*))
143 #define HT_FIND(name, head, elm) name##_HT_FIND((head), (elm))
144 #define HT_INSERT(name, head, elm) name##_HT_INSERT((head), (elm))
145 #define HT_REPLACE(name, head, elm) name##_HT_REPLACE((head), (elm))
146 #define HT_REMOVE(name, head, elm) name##_HT_REMOVE((head), (elm))
147 #define HT_START(name, head) name##_HT_START(head)
148 #define HT_NEXT(name, head, elm) name##_HT_NEXT((head), (elm))
149 #define HT_NEXT_RMV(name, head, elm) name##_HT_NEXT_RMV((head), (elm))
150 #define HT_CLEAR(name, head) name##_HT_CLEAR(head)
151 #define HT_INIT(name, head) name##_HT_INIT(head)
152 #define HT_REP_IS_BAD_(name, head) name##_HT_REP_IS_BAD_(head)
153 #define HT_FOREACH_FN(name, head, fn, data) \
154 name##_HT_FOREACH_FN((head), (fn), (data))
155 /* Helper: */
156 static inline unsigned
157 ht_improve_hash(unsigned h)
159 /* Aim to protect against poor hash functions by adding logic here
160 * - logic taken from java 1.4 hashtable source */
161 h += ~(h << 9);
162 h ^= ((h >> 14) | (h << 18)); /* >>> */
163 h += (h << 4);
164 h ^= ((h >> 10) | (h << 22)); /* >>> */
165 return h;
168 #if 0
169 /** Basic string hash function, from Java standard String.hashCode(). */
170 static inline unsigned
171 ht_string_hash(const char *s)
173 unsigned h = 0;
174 int m = 1;
175 while (*s) {
176 h += ((signed char)*s++)*m;
177 m = (m<<5)-1; /* m *= 31 */
179 return h;
181 #endif
183 #if 0
184 /** Basic string hash function, from Python's str.__hash__() */
185 static inline unsigned
186 ht_string_hash(const char *s)
188 unsigned h;
189 const unsigned char *cp = (const unsigned char *)s;
190 h = *cp << 7;
191 while (*cp) {
192 h = (1000003*h) ^ *cp++;
194 /* This conversion truncates the length of the string, but that's ok. */
195 h ^= (unsigned)(cp-(const unsigned char*)s);
196 return h;
198 #endif
200 #ifndef HT_NO_CACHE_HASH_VALUES
201 #define HT_SET_HASH_(elm, field, hashfn) \
202 do { (elm)->field.hte_hash = hashfn(elm); } while (0)
203 #define HT_SET_HASHVAL_(elm, field, val) \
204 do { (elm)->field.hte_hash = (val); } while (0)
205 #define HT_ELT_HASH_(elm, field, hashfn) \
206 ((elm)->field.hte_hash)
207 #else
208 #define HT_SET_HASH_(elm, field, hashfn) \
209 ((void)0)
210 #define HT_ELT_HASH_(elm, field, hashfn) \
211 (hashfn(elm))
212 #define HT_SET_HASHVAL_(elm, field, val) \
213 ((void)0)
214 #endif
216 #define HT_BUCKET_NUM_(head, field, elm, hashfn) \
217 (HT_ELT_HASH_(elm,field,hashfn) % head->hth_table_length)
219 /* Helper: alias for the bucket containing 'elm'. */
220 #define HT_BUCKET_(head, field, elm, hashfn) \
221 ((head)->hth_table[HT_BUCKET_NUM_(head, field, elm, hashfn)])
223 #define HT_FOREACH(x, name, head) \
224 for ((x) = HT_START(name, head); \
225 (x) != NULL; \
226 (x) = HT_NEXT(name, head, x))
228 #ifndef HT_NDEBUG
229 #include "lib/err/torerr.h"
230 #define HT_ASSERT_(x) raw_assert(x)
231 #else
232 #define HT_ASSERT_(x) (void)0
233 #endif
235 /* Macro put at the end of the end of a macro definition so that it
236 * consumes the following semicolon at file scope. Used only inside ht.h. */
237 #define HT_EAT_SEMICOLON__ struct ht_semicolon_eater
239 #define HT_PROTOTYPE(name, type, field, hashfn, eqfn) \
240 int name##_HT_GROW(struct name *ht, unsigned min_capacity); \
241 void name##_HT_CLEAR(struct name *ht); \
242 int name##_HT_REP_IS_BAD_(const struct name *ht); \
243 static inline void \
244 name##_HT_INIT(struct name *head) { \
245 head->hth_table_length = 0; \
246 head->hth_table = NULL; \
247 head->hth_n_entries = 0; \
248 head->hth_load_limit = 0; \
249 head->hth_prime_idx = -1; \
251 /* Helper: returns a pointer to the right location in the table \
252 * 'head' to find or insert the element 'elm'. */ \
253 static inline struct type ** \
254 name##_HT_FIND_P_(struct name *head, struct type *elm) \
256 struct type **p; \
257 if (!head->hth_table) \
258 return NULL; \
259 p = &HT_BUCKET_(head, field, elm, hashfn); \
260 while (*p) { \
261 if (eqfn(*p, elm)) \
262 return p; \
263 p = &(*p)->field.hte_next; \
265 return p; \
267 /* Return a pointer to the element in the table 'head' matching 'elm', \
268 * or NULL if no such element exists */ \
269 ATTR_UNUSED static inline struct type * \
270 name##_HT_FIND(const struct name *head, struct type *elm) \
272 struct type **p; \
273 struct name *h = (struct name *) head; \
274 HT_SET_HASH_(elm, field, hashfn); \
275 p = name##_HT_FIND_P_(h, elm); \
276 return p ? *p : NULL; \
278 /* Insert the element 'elm' into the table 'head'. Do not call this \
279 * function if the table might already contain a matching element. */ \
280 ATTR_UNUSED static inline void \
281 name##_HT_INSERT(struct name *head, struct type *elm) \
283 struct type **p; \
284 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
285 name##_HT_GROW(head, head->hth_n_entries+1); \
286 ++head->hth_n_entries; \
287 HT_SET_HASH_(elm, field, hashfn); \
288 p = &HT_BUCKET_(head, field, elm, hashfn); \
289 elm->field.hte_next = *p; \
290 *p = elm; \
292 /* Insert the element 'elm' into the table 'head'. If there already \
293 * a matching element in the table, replace that element and return \
294 * it. */ \
295 ATTR_UNUSED static inline struct type * \
296 name##_HT_REPLACE(struct name *head, struct type *elm) \
298 struct type **p, *r; \
299 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
300 name##_HT_GROW(head, head->hth_n_entries+1); \
301 HT_SET_HASH_(elm, field, hashfn); \
302 p = name##_HT_FIND_P_(head, elm); \
303 HT_ASSERT_(p != NULL); /* this holds because we called HT_GROW */ \
304 r = *p; \
305 *p = elm; \
306 if (r && (r!=elm)) { \
307 elm->field.hte_next = r->field.hte_next; \
308 r->field.hte_next = NULL; \
309 return r; \
310 } else { \
311 ++head->hth_n_entries; \
312 return NULL; \
315 /* Remove any element matching 'elm' from the table 'head'. If such \
316 * an element is found, return it; otherwise return NULL. */ \
317 ATTR_UNUSED static inline struct type * \
318 name##_HT_REMOVE(struct name *head, struct type *elm) \
320 struct type **p, *r; \
321 HT_SET_HASH_(elm, field, hashfn); \
322 p = name##_HT_FIND_P_(head,elm); \
323 if (!p || !*p) \
324 return NULL; \
325 r = *p; \
326 *p = r->field.hte_next; \
327 r->field.hte_next = NULL; \
328 --head->hth_n_entries; \
329 return r; \
331 /* Invoke the function 'fn' on every element of the table 'head', \
332 * using 'data' as its second argument. If the function returns \
333 * nonzero, remove the most recently examined element before invoking \
334 * the function again. */ \
335 ATTR_UNUSED static inline void \
336 name##_HT_FOREACH_FN(struct name *head, \
337 int (*fn)(struct type *, void *), \
338 void *data) \
340 unsigned idx; \
341 struct type **p, **nextp, *next; \
342 if (!head->hth_table) \
343 return; \
344 for (idx=0; idx < head->hth_table_length; ++idx) { \
345 p = &head->hth_table[idx]; \
346 while (*p) { \
347 nextp = &(*p)->field.hte_next; \
348 next = *nextp; \
349 if (fn(*p, data)) { \
350 --head->hth_n_entries; \
351 *p = next; \
352 } else { \
353 p = nextp; \
358 /* Return a pointer to the first element in the table 'head', under \
359 * an arbitrary order. This order is stable under remove operations, \
360 * but not under others. If the table is empty, return NULL. */ \
361 ATTR_UNUSED static inline struct type ** \
362 name##_HT_START(struct name *head) \
364 unsigned b = 0; \
365 while (b < head->hth_table_length) { \
366 if (head->hth_table[b]) { \
367 HT_ASSERT_(b == \
368 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
369 return &head->hth_table[b]; \
371 ++b; \
373 return NULL; \
375 /* Return the next element in 'head' after 'elm', under the arbitrary \
376 * order used by HT_START. If there are no more elements, return \
377 * NULL. If 'elm' is to be removed from the table, you must call \
378 * this function for the next value before you remove it, or use \
379 * HT_NEXT_RMV instead. \
380 */ \
381 ATTR_UNUSED static inline struct type ** \
382 name##_HT_NEXT(struct name *head, struct type **elm) \
384 if ((*elm)->field.hte_next) { \
385 HT_ASSERT_(HT_BUCKET_NUM_(head,field,*elm,hashfn) == \
386 HT_BUCKET_NUM_(head,field,(*elm)->field.hte_next,hashfn)); \
387 return &(*elm)->field.hte_next; \
388 } else { \
389 unsigned b = HT_BUCKET_NUM_(head,field,*elm,hashfn)+1; \
390 while (b < head->hth_table_length) { \
391 if (head->hth_table[b]) { \
392 HT_ASSERT_(b == \
393 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
394 return &head->hth_table[b]; \
396 ++b; \
398 return NULL; \
401 /* As HT_NEXT, but also remove the current element 'elm' from the \
402 * table. */ \
403 ATTR_UNUSED static inline struct type ** \
404 name##_HT_NEXT_RMV(struct name *head, struct type **elm) \
406 unsigned h = HT_ELT_HASH_(*elm, field, hashfn); \
407 *elm = (*elm)->field.hte_next; \
408 --head->hth_n_entries; \
409 if (*elm) { \
410 return elm; \
411 } else { \
412 unsigned b = (h % head->hth_table_length)+1; \
413 while (b < head->hth_table_length) { \
414 if (head->hth_table[b]) \
415 return &head->hth_table[b]; \
416 ++b; \
418 return NULL; \
421 HT_EAT_SEMICOLON__
423 #define HT_GENERATE2(name, type, field, hashfn, eqfn, load, reallocarrayfn, \
424 freefn) \
425 /* Primes that aren't too far from powers of two. We stop at */ \
426 /* P=402653189 because P*sizeof(void*) is less than SSIZE_MAX */ \
427 /* even on a 32-bit platform. */ \
428 static unsigned name##_PRIMES[] = { \
429 53, 97, 193, 389, \
430 769, 1543, 3079, 6151, \
431 12289, 24593, 49157, 98317, \
432 196613, 393241, 786433, 1572869, \
433 3145739, 6291469, 12582917, 25165843, \
434 50331653, 100663319, 201326611, 402653189 \
435 }; \
436 static unsigned name##_N_PRIMES = \
437 (unsigned)(sizeof(name##_PRIMES)/sizeof(name##_PRIMES[0])); \
438 /* Expand the internal table of 'head' until it is large enough to \
439 * hold 'size' elements. Return 0 on success, -1 on allocation \
440 * failure. */ \
441 int \
442 name##_HT_GROW(struct name *head, unsigned size) \
444 unsigned new_len, new_load_limit; \
445 int prime_idx; \
446 struct type **new_table; \
447 if (head->hth_prime_idx == (int)name##_N_PRIMES - 1) \
448 return 0; \
449 if (head->hth_load_limit > size) \
450 return 0; \
451 prime_idx = head->hth_prime_idx; \
452 do { \
453 new_len = name##_PRIMES[++prime_idx]; \
454 new_load_limit = (unsigned)(load*new_len); \
455 } while (new_load_limit <= size && \
456 prime_idx < (int)name##_N_PRIMES); \
457 if ((new_table = reallocarrayfn(NULL, new_len, sizeof(struct type*)))) { \
458 unsigned b; \
459 memset(new_table, 0, new_len*sizeof(struct type*)); \
460 for (b = 0; b < head->hth_table_length; ++b) { \
461 struct type *elm, *next; \
462 unsigned b2; \
463 elm = head->hth_table[b]; \
464 while (elm) { \
465 next = elm->field.hte_next; \
466 b2 = HT_ELT_HASH_(elm, field, hashfn) % new_len; \
467 elm->field.hte_next = new_table[b2]; \
468 new_table[b2] = elm; \
469 elm = next; \
472 if (head->hth_table) \
473 freefn(head->hth_table); \
474 head->hth_table = new_table; \
475 } else { \
476 unsigned b, b2; \
477 new_table = reallocarrayfn(head->hth_table, new_len, sizeof(struct type*)); \
478 if (!new_table) return -1; \
479 memset(new_table + head->hth_table_length, 0, \
480 (new_len - head->hth_table_length)*sizeof(struct type*)); \
481 for (b=0; b < head->hth_table_length; ++b) { \
482 struct type *e, **pE; \
483 for (pE = &new_table[b], e = *pE; e != NULL; e = *pE) { \
484 b2 = HT_ELT_HASH_(e, field, hashfn) % new_len; \
485 if (b2 == b) { \
486 pE = &e->field.hte_next; \
487 } else { \
488 *pE = e->field.hte_next; \
489 e->field.hte_next = new_table[b2]; \
490 new_table[b2] = e; \
494 head->hth_table = new_table; \
496 head->hth_table_length = new_len; \
497 head->hth_prime_idx = prime_idx; \
498 head->hth_load_limit = new_load_limit; \
499 return 0; \
501 /* Free all storage held by 'head'. Does not free 'head' itself, or \
502 * individual elements. */ \
503 void \
504 name##_HT_CLEAR(struct name *head) \
506 if (head->hth_table) \
507 freefn(head->hth_table); \
508 head->hth_table_length = 0; \
509 name##_HT_INIT(head); \
511 /* Debugging helper: return false iff the representation of 'head' is \
512 * internally consistent. */ \
513 int \
514 name##_HT_REP_IS_BAD_(const struct name *head) \
516 unsigned n, i; \
517 struct type *elm; \
518 if (!head->hth_table_length) { \
519 if (!head->hth_table && !head->hth_n_entries && \
520 !head->hth_load_limit && head->hth_prime_idx == -1) \
521 return 0; \
522 else \
523 return 1; \
525 if (!head->hth_table || head->hth_prime_idx < 0 || \
526 !head->hth_load_limit) \
527 return 2; \
528 if (head->hth_n_entries > head->hth_load_limit) \
529 return 3; \
530 if (head->hth_table_length != name##_PRIMES[head->hth_prime_idx]) \
531 return 4; \
532 if (head->hth_load_limit != (unsigned)(load*head->hth_table_length)) \
533 return 5; \
534 for (n = i = 0; i < head->hth_table_length; ++i) { \
535 for (elm = head->hth_table[i]; elm; elm = elm->field.hte_next) { \
536 if (HT_ELT_HASH_(elm, field, hashfn) != hashfn(elm)) \
537 return 1000 + i; \
538 if (HT_BUCKET_NUM_(head,field,elm,hashfn) != i) \
539 return 10000 + i; \
540 ++n; \
543 if (n != head->hth_n_entries) \
544 return 6; \
545 return 0; \
547 HT_EAT_SEMICOLON__
549 #define HT_GENERATE(name, type, field, hashfn, eqfn, load, mallocfn, \
550 reallocfn, freefn) \
551 static void * \
552 name##_reallocarray(void *arg, size_t a, size_t b) \
554 if ((b) && (a) > SIZE_MAX / (b)) \
555 return NULL; \
556 if (arg) \
557 return reallocfn((arg),(a)*(b)); \
558 else \
559 return mallocfn((a)*(b)); \
561 HT_GENERATE2(name, type, field, hashfn, eqfn, load, \
562 name##_reallocarray, freefn)
564 /** Implements an over-optimized "find and insert if absent" block;
565 * not meant for direct usage by typical code, or usage outside the critical
566 * path.*/
567 #define HT_FIND_OR_INSERT_(name, field, hashfn, head, eltype, elm, var, y, n) \
569 struct name *var##_head_ = head; \
570 struct eltype **var; \
571 if (!var##_head_->hth_table || \
572 var##_head_->hth_n_entries >= var##_head_->hth_load_limit) \
573 name##_HT_GROW(var##_head_, var##_head_->hth_n_entries+1); \
574 HT_SET_HASH_((elm), field, hashfn); \
575 var = name##_HT_FIND_P_(var##_head_, (elm)); \
576 HT_ASSERT_(var); /* Holds because we called HT_GROW */ \
577 if (*var) { \
578 y; \
579 } else { \
580 n; \
583 #define HT_FOI_INSERT_(field, head, elm, newent, var) \
585 HT_SET_HASHVAL_(newent, field, (elm)->field.hte_hash); \
586 newent->field.hte_next = NULL; \
587 *var = newent; \
588 ++((head)->hth_n_entries); \
592 * Copyright 2005, Nick Mathewson. Implementation logic is adapted from code
593 * by Christopher Clark, retrofit to allow drop-in memory management, and to
594 * use the same interface as Niels Provos's tree.h. This is probably still
595 * a derived work, so the original license below still applies.
597 * Copyright (c) 2002, Christopher Clark
598 * All rights reserved.
600 * Redistribution and use in source and binary forms, with or without
601 * modification, are permitted provided that the following conditions
602 * are met:
604 * * Redistributions of source code must retain the above copyright
605 * notice, this list of conditions and the following disclaimer.
607 * * Redistributions in binary form must reproduce the above copyright
608 * notice, this list of conditions and the following disclaimer in the
609 * documentation and/or other materials provided with the distribution.
611 * * Neither the name of the original author; nor the names of any contributors
612 * may be used to endorse or promote products derived from this software
613 * without specific prior written permission.
616 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
617 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
618 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
619 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
620 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
621 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
622 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
623 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
624 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
625 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
626 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
629 #endif