drd: Add a consistency check
[valgrind.git] / coregrind / m_hashtable.c
blob3e52235862a4390096b4cd7dac925bd99235a160
2 /*--------------------------------------------------------------------*/
3 /*--- A separately-chained hash table. m_hashtable.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2005-2013 Nicholas Nethercote
11 njn@valgrind.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
28 The GNU General Public License is contained in the file COPYING.
31 #include "pub_core_basics.h"
32 #include "pub_core_debuglog.h"
33 #include "pub_core_hashtable.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
39 /*--------------------------------------------------------------------*/
40 /*--- Declarations ---*/
41 /*--------------------------------------------------------------------*/
43 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
45 struct _VgHashTable {
46 UInt n_chains; // should be prime
47 UInt n_elements;
48 VgHashNode* iterNode; // current iterator node
49 UInt iterChain; // next chain to be traversed by the iterator
50 VgHashNode** chains; // expanding array of hash chains
51 Bool iterOK; // table safe to iterate over?
52 const HChar* name; // name of table (for debugging only)
55 #define N_HASH_PRIMES 20
57 static const SizeT primes[N_HASH_PRIMES] = {
58 769UL, 1543UL, 3079UL, 6151UL,
59 12289UL, 24593UL, 49157UL, 98317UL,
60 196613UL, 393241UL, 786433UL, 1572869UL,
61 3145739UL, 6291469UL, 12582917UL, 25165843UL,
62 50331653UL, 100663319UL, 201326611UL, 402653189UL
65 /*--------------------------------------------------------------------*/
66 /*--- Functions ---*/
67 /*--------------------------------------------------------------------*/
69 VgHashTable *VG_(HT_construct) ( const HChar* name )
71 /* Initialises to zero, ie. all entries NULL */
72 SizeT n_chains = primes[0];
73 SizeT sz = n_chains * sizeof(VgHashNode*);
74 VgHashTable *table = VG_(calloc)("hashtable.Hc.1",
75 1, sizeof(struct _VgHashTable));
76 table->chains = VG_(calloc)("hashtable.Hc.2", 1, sz);
77 table->n_chains = n_chains;
78 table->n_elements = 0;
79 table->iterOK = True;
80 table->name = name;
81 vg_assert(name);
82 return table;
85 Int VG_(HT_count_nodes) ( const VgHashTable *table )
87 return table->n_elements;
90 static void resize ( VgHashTable *table )
92 Int i;
93 SizeT sz;
94 SizeT old_chains = table->n_chains;
95 SizeT new_chains = old_chains + 1;
96 VgHashNode** chains;
97 VgHashNode * node;
99 /* If we've run out of primes, do nothing. */
100 if (old_chains == primes[N_HASH_PRIMES-1])
101 return;
103 vg_assert(old_chains >= primes[0]
104 && old_chains < primes[N_HASH_PRIMES-1]);
106 for (i = 0; i < N_HASH_PRIMES; i++) {
107 if (primes[i] > new_chains) {
108 new_chains = primes[i];
109 break;
113 vg_assert(new_chains > old_chains);
114 vg_assert(new_chains > primes[0]
115 && new_chains <= primes[N_HASH_PRIMES-1]);
117 VG_(debugLog)(
118 1, "hashtable",
119 "resizing table `%s' from %lu to %lu (total elems %lu)\n",
120 table->name, (UWord)old_chains, (UWord)new_chains,
121 (UWord)table->n_elements );
123 table->n_chains = new_chains;
124 sz = new_chains * sizeof(VgHashNode*);
125 chains = VG_(calloc)("hashtable.resize.1", 1, sz);
127 for (i = 0; i < old_chains; i++) {
128 node = table->chains[i];
129 while (node != NULL) {
130 VgHashNode* next = node->next;
131 UWord chain = CHAIN_NO(node->key, table);
132 node->next = chains[chain];
133 chains[chain] = node;
134 node = next;
138 VG_(free)(table->chains);
139 table->chains = chains;
142 /* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends
143 the node to the appropriate chain. No duplicate key detection is done. */
144 void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
146 VgHashNode* node = (VgHashNode*)vnode;
147 UWord chain = CHAIN_NO(node->key, table);
148 node->next = table->chains[chain];
149 table->chains[chain] = node;
150 table->n_elements++;
151 if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
152 resize(table);
155 /* Table has been modified; hence HT_Next should assert. */
156 table->iterOK = False;
159 /* Looks up a VgHashNode by key in the table. Returns NULL if not found. */
160 void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
162 VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
164 while (curr) {
165 if (key == curr->key) {
166 return curr;
168 curr = curr->next;
170 return NULL;
173 /* Looks up a VgHashNode by node in the table. Returns NULL if not found.
174 GEN!!! marks the lines that differs from VG_(HT_lookup). */
175 void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
176 HT_Cmp_t cmp )
178 const VgHashNode* hnode = node; // GEN!!!
179 VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
181 while (curr) {
182 if (cmp (hnode, curr) == 0) { // GEN!!!
183 return curr;
185 curr = curr->next;
187 return NULL;
190 /* Removes a VgHashNode from the table. Returns NULL if not found. */
191 void* VG_(HT_remove) ( VgHashTable *table, UWord key )
193 UWord chain = CHAIN_NO(key, table);
194 VgHashNode* curr = table->chains[chain];
195 VgHashNode** prev_next_ptr = &(table->chains[chain]);
197 /* Table has been modified; hence HT_Next should assert. */
198 table->iterOK = False;
200 while (curr) {
201 if (key == curr->key) {
202 *prev_next_ptr = curr->next;
203 table->n_elements--;
204 return curr;
206 prev_next_ptr = &(curr->next);
207 curr = curr->next;
209 return NULL;
212 /* Removes a VgHashNode by node from the table. Returns NULL if not found.
213 GEN!!! marks the lines that differs from VG_(HT_remove). */
214 void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp )
216 const VgHashNode* hnode = node; // GEN!!!
217 UWord chain = CHAIN_NO(hnode->key, table); // GEN!!!
218 VgHashNode* curr = table->chains[chain];
219 VgHashNode** prev_next_ptr = &(table->chains[chain]);
221 /* Table has been modified; hence HT_Next should assert. */
222 table->iterOK = False;
224 while (curr) {
225 if (cmp(hnode, curr) == 0) { // GEN!!!
226 *prev_next_ptr = curr->next;
227 table->n_elements--;
228 return curr;
230 prev_next_ptr = &(curr->next);
231 curr = curr->next;
233 return NULL;
236 void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
238 #define MAXOCCUR 20
239 UInt elt_occurences[MAXOCCUR+1];
240 UInt key_occurences[MAXOCCUR+1];
241 UInt cno_occurences[MAXOCCUR+1];
242 /* Key occurence : how many ht elements have the same key.
243 elt_occurences : how many elements are inserted multiple time.
244 cno_occurences : how many chains have that length.
245 The last entry in these arrays collects all occurences >= MAXOCCUR. */
246 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
247 UInt i;
248 UInt nkey, nelt, ncno;
249 VgHashNode *cnode, *node;
251 VG_(memset)(key_occurences, 0, sizeof(key_occurences));
252 VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
253 VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
255 // Note that the below algorithm is quadractic in nr of elements in a chain
256 // but if that happens, the hash table/function is really bad and that
257 // should be fixed.
258 for (i = 0; i < table->n_chains; i++) {
259 ncno = 0;
260 for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
261 ncno++;
263 nkey = 0;
264 // Is the same cnode->key existing before cnode ?
265 for (node = table->chains[i]; node != cnode; node = node->next) {
266 if (node->key == cnode->key)
267 nkey++;
269 // If cnode->key not in a previous node, count occurences of key.
270 if (nkey == 0) {
271 for (node = cnode; node != NULL; node = node->next) {
272 if (node->key == cnode->key)
273 nkey++;
275 INCOCCUR(key_occurences, nkey);
278 nelt = 0;
279 // Is the same cnode element existing before cnode ?
280 for (node = table->chains[i]; node != cnode; node = node->next) {
281 if (cmp) {
282 if ((*cmp)(node, cnode) == 0)
283 nelt++;
284 } else
285 if (node->key == cnode->key)
286 nelt++;
288 // If cnode element not in a previous node, count occurences of elt.
289 if (nelt == 0) {
290 for (node = cnode; node != NULL; node = node->next) {
291 if (cmp) {
292 if ((*cmp)(node, cnode) == 0)
293 nelt++;
294 } else
295 if (node->key == cnode->key)
296 nelt++;
298 INCOCCUR(elt_occurences, nelt);
301 INCOCCUR(cno_occurences, ncno);
304 VG_(message)(Vg_DebugMsg,
305 "nr occurences of"
306 " chains of len N,"
307 " N-plicated keys,"
308 " N-plicated elts\n");
309 nkey = nelt = ncno = 0;
310 for (i = 0; i <= MAXOCCUR; i++) {
311 if (elt_occurences[i] > 0
312 || key_occurences[i] > 0
313 || cno_occurences[i] > 0)
314 VG_(message)(Vg_DebugMsg,
315 "%s=%2d : nr chain %6d, nr keys %6d, nr elts %6d\n",
316 i == MAXOCCUR ? ">" : "N", i,
317 cno_occurences[i], key_occurences[i], elt_occurences[i]);
318 nkey += key_occurences[i];
319 nelt += elt_occurences[i];
320 ncno += cno_occurences[i];
322 VG_(message)(Vg_DebugMsg,
323 "total nr of unique chains: %6d, keys %6d, elts %6d\n",
324 ncno, nkey, nelt);
328 /* Allocates a suitably-sized array, copies pointers to all the hashtable
329 elements into it, then returns both the array and the size of it. The
330 array must be freed with VG_(free).
332 VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
334 UInt i, j;
335 VgHashNode** arr;
336 VgHashNode* node;
338 *n_elems = table->n_elements;
339 if (*n_elems == 0)
340 return NULL;
342 arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
344 j = 0;
345 for (i = 0; i < table->n_chains; i++) {
346 for (node = table->chains[i]; node != NULL; node = node->next) {
347 arr[j++] = node;
350 vg_assert(j == *n_elems);
352 return arr;
355 void VG_(HT_ResetIter)(VgHashTable *table)
357 vg_assert(table);
358 table->iterNode = NULL;
359 table->iterChain = 0;
360 table->iterOK = True;
363 void* VG_(HT_Next)(VgHashTable *table)
365 Int i;
366 vg_assert(table);
367 /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
368 In short if this fails, it means the caller tried to modify the
369 table whilst iterating over it, which is a bug. */
370 vg_assert(table->iterOK);
372 if (table->iterNode && table->iterNode->next) {
373 table->iterNode = table->iterNode->next;
374 return table->iterNode;
377 for (i = table->iterChain; i < table->n_chains; i++) {
378 if (table->chains[i]) {
379 table->iterNode = table->chains[i];
380 table->iterChain = i + 1; // Next chain to be traversed
381 return table->iterNode;
384 return NULL;
387 void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
389 UInt i;
390 VgHashNode *node, *node_next;
392 for (i = 0; i < table->n_chains; i++) {
393 for (node = table->chains[i]; node != NULL; node = node_next) {
394 node_next = node->next;
395 freenode_fn(node);
398 VG_(free)(table->chains);
399 VG_(free)(table);
402 /*--------------------------------------------------------------------*/
403 /*--- end ---*/
404 /*--------------------------------------------------------------------*/