2 /*--------------------------------------------------------------------*/
3 /*--- A separately-chained hash table. m_hashtable.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2005-2017 Nicholas Nethercote
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "pub_core_basics.h"
30 #include "pub_core_debuglog.h"
31 #include "pub_core_hashtable.h"
32 #include "pub_core_libcassert.h"
33 #include "pub_core_libcbase.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_mallocfree.h"
37 /*--------------------------------------------------------------------*/
38 /*--- Declarations ---*/
39 /*--------------------------------------------------------------------*/
41 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
44 UInt n_chains
; // should be prime
46 VgHashNode
* iterNode
; // current iterator node
47 UInt iterChain
; // next chain to be traversed by the iterator
48 Bool iterOK
; // table safe to iterate over?
49 VgHashNode
** chains
; // expanding array of hash chains
50 const HChar
* name
; // name of table (for debugging only)
53 #define N_HASH_PRIMES 20
55 static const SizeT primes
[N_HASH_PRIMES
] = {
56 769UL, 1543UL, 3079UL, 6151UL,
57 12289UL, 24593UL, 49157UL, 98317UL,
58 196613UL, 393241UL, 786433UL, 1572869UL,
59 3145739UL, 6291469UL, 12582917UL, 25165843UL,
60 50331653UL, 100663319UL, 201326611UL, 402653189UL
63 /*--------------------------------------------------------------------*/
65 /*--------------------------------------------------------------------*/
67 VgHashTable
*VG_(HT_construct
) ( const HChar
* name
)
69 /* Initialises to zero, ie. all entries NULL */
70 SizeT n_chains
= primes
[0];
71 SizeT sz
= n_chains
* sizeof(VgHashNode
*);
72 VgHashTable
*table
= VG_(calloc
)("hashtable.Hc.1",
73 1, sizeof(struct _VgHashTable
));
74 table
->chains
= VG_(calloc
)("hashtable.Hc.2", 1, sz
);
75 table
->n_chains
= n_chains
;
76 table
->n_elements
= 0;
83 UInt
VG_(HT_count_nodes
) ( const VgHashTable
*table
)
85 return table
->n_elements
;
88 static void resize ( VgHashTable
*table
)
92 SizeT old_chains
= table
->n_chains
;
93 SizeT new_chains
= old_chains
+ 1;
97 /* If we've run out of primes, do nothing. */
98 if (old_chains
== primes
[N_HASH_PRIMES
-1])
101 vg_assert(old_chains
>= primes
[0]
102 && old_chains
< primes
[N_HASH_PRIMES
-1]);
104 for (i
= 0; i
< N_HASH_PRIMES
; i
++) {
105 if (primes
[i
] > new_chains
) {
106 new_chains
= primes
[i
];
111 vg_assert(new_chains
> old_chains
);
112 vg_assert(new_chains
> primes
[0]
113 && new_chains
<= primes
[N_HASH_PRIMES
-1]);
117 "resizing table `%s' from %lu to %lu (total elems %lu)\n",
118 table
->name
, (UWord
)old_chains
, (UWord
)new_chains
,
119 (UWord
)table
->n_elements
);
121 table
->n_chains
= new_chains
;
122 sz
= new_chains
* sizeof(VgHashNode
*);
123 chains
= VG_(calloc
)("hashtable.resize.1", 1, sz
);
125 for (i
= 0; i
< old_chains
; i
++) {
126 node
= table
->chains
[i
];
127 while (node
!= NULL
) {
128 VgHashNode
* next
= node
->next
;
129 UWord chain
= CHAIN_NO(node
->key
, table
);
130 node
->next
= chains
[chain
];
131 chains
[chain
] = node
;
136 VG_(free
)(table
->chains
);
137 table
->chains
= chains
;
140 /* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends
141 the node to the appropriate chain. No duplicate key detection is done. */
142 void VG_(HT_add_node
) ( VgHashTable
*table
, void* vnode
)
144 VgHashNode
* node
= (VgHashNode
*)vnode
;
145 UWord chain
= CHAIN_NO(node
->key
, table
);
146 node
->next
= table
->chains
[chain
];
147 table
->chains
[chain
] = node
;
149 if ( (1 * (ULong
)table
->n_elements
) > (1 * (ULong
)table
->n_chains
) ) {
153 /* Table has been modified; hence HT_Next should assert. */
154 table
->iterOK
= False
;
157 /* Looks up a VgHashNode by key in the table. Returns NULL if not found. */
158 void* VG_(HT_lookup
) ( const VgHashTable
*table
, UWord key
)
160 VgHashNode
* curr
= table
->chains
[ CHAIN_NO(key
, table
) ];
163 if (key
== curr
->key
) {
171 /* Looks up a VgHashNode by node in the table. Returns NULL if not found.
172 GEN!!! marks the lines that differs from VG_(HT_lookup). */
173 void* VG_(HT_gen_lookup
) ( const VgHashTable
*table
, const void* node
,
176 const VgHashNode
* hnode
= node
; // GEN!!!
177 VgHashNode
* curr
= table
->chains
[ CHAIN_NO(hnode
->key
, table
) ]; // GEN!!!
180 if (hnode
->key
== curr
->key
&& cmp (hnode
, curr
) == 0) { // GEN!!!
188 /* Removes a VgHashNode from the table. Returns NULL if not found. */
189 void* VG_(HT_remove
) ( VgHashTable
*table
, UWord key
)
191 UWord chain
= CHAIN_NO(key
, table
);
192 VgHashNode
* curr
= table
->chains
[chain
];
193 VgHashNode
** prev_next_ptr
= &(table
->chains
[chain
]);
195 /* Table has been modified; hence HT_Next should assert. */
196 table
->iterOK
= False
;
199 if (key
== curr
->key
) {
200 *prev_next_ptr
= curr
->next
;
204 prev_next_ptr
= &(curr
->next
);
210 /* Removes a VgHashNode by node from the table. Returns NULL if not found.
211 GEN!!! marks the lines that differs from VG_(HT_remove). */
212 void* VG_(HT_gen_remove
) ( VgHashTable
*table
, const void* node
, HT_Cmp_t cmp
)
214 const VgHashNode
* hnode
= node
; // GEN!!!
215 UWord chain
= CHAIN_NO(hnode
->key
, table
); // GEN!!!
216 VgHashNode
* curr
= table
->chains
[chain
];
217 VgHashNode
** prev_next_ptr
= &(table
->chains
[chain
]);
219 /* Table has been modified; hence HT_Next should assert. */
220 table
->iterOK
= False
;
223 if (hnode
->key
== curr
->key
&& cmp(hnode
, curr
) == 0) { // GEN!!!
224 *prev_next_ptr
= curr
->next
;
228 prev_next_ptr
= &(curr
->next
);
234 void VG_(HT_print_stats
) ( const VgHashTable
*table
, HT_Cmp_t cmp
)
237 UInt elt_occurrences
[MAXOCCUR
+1];
238 UInt key_occurrences
[MAXOCCUR
+1];
239 UInt cno_occurrences
[MAXOCCUR
+1];
240 /* Key occurrence : how many ht elements have the same key.
241 elt_occurrences : how many elements are inserted multiple time.
242 cno_occurrences : how many chains have that length.
243 The last entry in these arrays collects all occurrences >= MAXOCCUR. */
244 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
246 UInt nkey
, nelt
, ncno
;
247 VgHashNode
*cnode
, *node
;
249 VG_(memset
)(key_occurrences
, 0, sizeof(key_occurrences
));
250 VG_(memset
)(elt_occurrences
, 0, sizeof(elt_occurrences
));
251 VG_(memset
)(cno_occurrences
, 0, sizeof(cno_occurrences
));
253 // Note that the below algorithm is quadractic in nr of elements in a chain
254 // but if that happens, the hash table/function is really bad and that
256 for (i
= 0; i
< table
->n_chains
; i
++) {
258 for (cnode
= table
->chains
[i
]; cnode
!= NULL
; cnode
= cnode
->next
) {
262 // Is the same cnode->key existing before cnode ?
263 for (node
= table
->chains
[i
]; node
!= cnode
; node
= node
->next
) {
264 if (node
->key
== cnode
->key
)
267 // If cnode->key not in a previous node, count occurrences of key.
269 for (node
= cnode
; node
!= NULL
; node
= node
->next
) {
270 if (node
->key
== cnode
->key
)
273 INCOCCUR(key_occurrences
, nkey
);
277 // Is the same cnode element existing before cnode ?
278 for (node
= table
->chains
[i
]; node
!= cnode
; node
= node
->next
) {
279 if (node
->key
== cnode
->key
280 && (cmp
== NULL
|| cmp (node
, cnode
) == 0)) {
284 // If cnode element not in a previous node, count occurrences of elt.
286 for (node
= cnode
; node
!= NULL
; node
= node
->next
) {
287 if (node
->key
== cnode
->key
288 && (cmp
== NULL
|| cmp (node
, cnode
) == 0)) {
292 INCOCCUR(elt_occurrences
, nelt
);
295 INCOCCUR(cno_occurrences
, ncno
);
298 VG_(message
)(Vg_DebugMsg
,
302 " N-plicated elts\n");
303 nkey
= nelt
= ncno
= 0;
304 for (i
= 0; i
<= MAXOCCUR
; i
++) {
305 if (elt_occurrences
[i
] > 0
306 || key_occurrences
[i
] > 0
307 || cno_occurrences
[i
] > 0)
308 VG_(message
)(Vg_DebugMsg
,
309 "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n",
310 i
== MAXOCCUR
? ">" : "N", i
,
311 cno_occurrences
[i
], key_occurrences
[i
], elt_occurrences
[i
]);
312 nkey
+= key_occurrences
[i
];
313 nelt
+= elt_occurrences
[i
];
314 ncno
+= cno_occurrences
[i
];
316 VG_(message
)(Vg_DebugMsg
,
317 "total nr of unique slots: %6u, keys %6u, elts %6u."
318 " Avg chain len %3.1f\n",
320 (Double
)nelt
/(Double
)(ncno
== cno_occurrences
[0] ?
321 1 : ncno
- cno_occurrences
[0]));
325 /* Allocates a suitably-sized array, copies pointers to all the hashtable
326 elements into it, then returns both the array and the size of it. The
327 array must be freed with VG_(free).
329 VgHashNode
** VG_(HT_to_array
) (const VgHashTable
*table
, /*OUT*/ UInt
* n_elems
)
335 *n_elems
= table
->n_elements
;
339 arr
= VG_(malloc
)( "hashtable.Hta.1", *n_elems
* sizeof(VgHashNode
*) );
342 for (i
= 0; i
< table
->n_chains
; i
++) {
343 for (node
= table
->chains
[i
]; node
!= NULL
; node
= node
->next
) {
347 vg_assert(j
== *n_elems
);
352 void VG_(HT_ResetIter
)(VgHashTable
*table
)
355 table
->iterNode
= NULL
;
356 table
->iterChain
= 0;
357 table
->iterOK
= True
;
360 void* VG_(HT_Next
)(VgHashTable
*table
)
364 /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
365 In short if this fails, it means the caller tried to modify the
366 table whilst iterating over it, which is a bug.
367 One exception: HT_remove_at_Iter can remove the current entry and
368 leave the iterator in a valid state for HT_Next. */
369 vg_assert(table
->iterOK
);
371 if (table
->iterNode
&& table
->iterNode
->next
) {
372 table
->iterNode
= table
->iterNode
->next
;
373 return table
->iterNode
;
376 for (i
= table
->iterChain
; i
< table
->n_chains
; i
++) {
377 if (table
->chains
[i
]) {
378 table
->iterNode
= table
->chains
[i
];
379 table
->iterChain
= i
+ 1; // Next chain to be traversed
380 return table
->iterNode
;
386 void VG_(HT_remove_at_Iter
)(VgHashTable
*table
)
389 vg_assert(table
->iterOK
);
390 vg_assert(table
->iterNode
);
392 const UInt curChain
= table
->iterChain
- 1; // chain of iterNode.
395 if (table
->chains
[curChain
] == table
->iterNode
) {
396 /* iterNode is the first of its chain -> remove it from the chain. */
397 table
->chains
[curChain
] = table
->iterNode
->next
;
398 /* Setup the iterator to visit first node of curChain: */
399 table
->iterNode
= NULL
;
400 table
->iterChain
= curChain
;
402 /* iterNode is somewhere inside curChain chain */
403 VgHashNode
* prev
= table
->chains
[curChain
];
405 while (prev
->next
!= table
->iterNode
)
407 /* Remove iterNode from the chain. */
408 prev
->next
= table
->iterNode
->next
;
409 /* Setup the iterator to visit prev->next, which is the node
410 that was after the deleted node. */
411 table
->iterNode
= prev
;
417 void VG_(HT_destruct
)(VgHashTable
*table
, void(*freenode_fn
)(void*))
420 VgHashNode
*node
, *node_next
;
422 for (i
= 0; i
< table
->n_chains
; i
++) {
423 for (node
= table
->chains
[i
]; node
!= NULL
; node
= node_next
) {
424 node_next
= node
->next
;
428 VG_(free
)(table
->chains
);
432 /*--------------------------------------------------------------------*/
434 /*--------------------------------------------------------------------*/