ms-manual.xml: Put stray ':' inside para.
[valgrind.git] / coregrind / m_hashtable.c
blob19c604d3a68a932076e58571f9e27d79e79671cc
2 /*--------------------------------------------------------------------*/
3 /*--- A separately-chained hash table. m_hashtable.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2005-2017 Nicholas Nethercote
11 njn@valgrind.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "pub_core_basics.h"
30 #include "pub_core_debuglog.h"
31 #include "pub_core_hashtable.h"
32 #include "pub_core_libcassert.h"
33 #include "pub_core_libcbase.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_mallocfree.h"
37 /*--------------------------------------------------------------------*/
38 /*--- Declarations ---*/
39 /*--------------------------------------------------------------------*/
41 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
43 struct _VgHashTable {
44 UInt n_chains; // should be prime
45 UInt n_elements;
46 VgHashNode* iterNode; // current iterator node
47 UInt iterChain; // next chain to be traversed by the iterator
48 VgHashNode** chains; // expanding array of hash chains
49 Bool iterOK; // table safe to iterate over?
50 const HChar* name; // name of table (for debugging only)
53 #define N_HASH_PRIMES 20
55 static const SizeT primes[N_HASH_PRIMES] = {
56 769UL, 1543UL, 3079UL, 6151UL,
57 12289UL, 24593UL, 49157UL, 98317UL,
58 196613UL, 393241UL, 786433UL, 1572869UL,
59 3145739UL, 6291469UL, 12582917UL, 25165843UL,
60 50331653UL, 100663319UL, 201326611UL, 402653189UL
63 /*--------------------------------------------------------------------*/
64 /*--- Functions ---*/
65 /*--------------------------------------------------------------------*/
67 VgHashTable *VG_(HT_construct) ( const HChar* name )
69 /* Initialises to zero, ie. all entries NULL */
70 SizeT n_chains = primes[0];
71 SizeT sz = n_chains * sizeof(VgHashNode*);
72 VgHashTable *table = VG_(calloc)("hashtable.Hc.1",
73 1, sizeof(struct _VgHashTable));
74 table->chains = VG_(calloc)("hashtable.Hc.2", 1, sz);
75 table->n_chains = n_chains;
76 table->n_elements = 0;
77 table->iterOK = True;
78 table->name = name;
79 vg_assert(name);
80 return table;
83 UInt VG_(HT_count_nodes) ( const VgHashTable *table )
85 return table->n_elements;
88 static void resize ( VgHashTable *table )
90 Int i;
91 SizeT sz;
92 SizeT old_chains = table->n_chains;
93 SizeT new_chains = old_chains + 1;
94 VgHashNode** chains;
95 VgHashNode * node;
97 /* If we've run out of primes, do nothing. */
98 if (old_chains == primes[N_HASH_PRIMES-1])
99 return;
101 vg_assert(old_chains >= primes[0]
102 && old_chains < primes[N_HASH_PRIMES-1]);
104 for (i = 0; i < N_HASH_PRIMES; i++) {
105 if (primes[i] > new_chains) {
106 new_chains = primes[i];
107 break;
111 vg_assert(new_chains > old_chains);
112 vg_assert(new_chains > primes[0]
113 && new_chains <= primes[N_HASH_PRIMES-1]);
115 VG_(debugLog)(
116 1, "hashtable",
117 "resizing table `%s' from %lu to %lu (total elems %lu)\n",
118 table->name, (UWord)old_chains, (UWord)new_chains,
119 (UWord)table->n_elements );
121 table->n_chains = new_chains;
122 sz = new_chains * sizeof(VgHashNode*);
123 chains = VG_(calloc)("hashtable.resize.1", 1, sz);
125 for (i = 0; i < old_chains; i++) {
126 node = table->chains[i];
127 while (node != NULL) {
128 VgHashNode* next = node->next;
129 UWord chain = CHAIN_NO(node->key, table);
130 node->next = chains[chain];
131 chains[chain] = node;
132 node = next;
136 VG_(free)(table->chains);
137 table->chains = chains;
140 /* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends
141 the node to the appropriate chain. No duplicate key detection is done. */
142 void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
144 VgHashNode* node = (VgHashNode*)vnode;
145 UWord chain = CHAIN_NO(node->key, table);
146 node->next = table->chains[chain];
147 table->chains[chain] = node;
148 table->n_elements++;
149 if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
150 resize(table);
153 /* Table has been modified; hence HT_Next should assert. */
154 table->iterOK = False;
157 /* Looks up a VgHashNode by key in the table. Returns NULL if not found. */
158 void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
160 VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
162 while (curr) {
163 if (key == curr->key) {
164 return curr;
166 curr = curr->next;
168 return NULL;
171 /* Looks up a VgHashNode by node in the table. Returns NULL if not found.
172 GEN!!! marks the lines that differs from VG_(HT_lookup). */
173 void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
174 HT_Cmp_t cmp )
176 const VgHashNode* hnode = node; // GEN!!!
177 VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
179 while (curr) {
180 if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!!
181 return curr;
183 curr = curr->next;
185 return NULL;
188 /* Removes a VgHashNode from the table. Returns NULL if not found. */
189 void* VG_(HT_remove) ( VgHashTable *table, UWord key )
191 UWord chain = CHAIN_NO(key, table);
192 VgHashNode* curr = table->chains[chain];
193 VgHashNode** prev_next_ptr = &(table->chains[chain]);
195 /* Table has been modified; hence HT_Next should assert. */
196 table->iterOK = False;
198 while (curr) {
199 if (key == curr->key) {
200 *prev_next_ptr = curr->next;
201 table->n_elements--;
202 return curr;
204 prev_next_ptr = &(curr->next);
205 curr = curr->next;
207 return NULL;
210 /* Removes a VgHashNode by node from the table. Returns NULL if not found.
211 GEN!!! marks the lines that differs from VG_(HT_remove). */
212 void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp )
214 const VgHashNode* hnode = node; // GEN!!!
215 UWord chain = CHAIN_NO(hnode->key, table); // GEN!!!
216 VgHashNode* curr = table->chains[chain];
217 VgHashNode** prev_next_ptr = &(table->chains[chain]);
219 /* Table has been modified; hence HT_Next should assert. */
220 table->iterOK = False;
222 while (curr) {
223 if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!!
224 *prev_next_ptr = curr->next;
225 table->n_elements--;
226 return curr;
228 prev_next_ptr = &(curr->next);
229 curr = curr->next;
231 return NULL;
234 void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
236 #define MAXOCCUR 20
237 UInt elt_occurences[MAXOCCUR+1];
238 UInt key_occurences[MAXOCCUR+1];
239 UInt cno_occurences[MAXOCCUR+1];
240 /* Key occurence : how many ht elements have the same key.
241 elt_occurences : how many elements are inserted multiple time.
242 cno_occurences : how many chains have that length.
243 The last entry in these arrays collects all occurences >= MAXOCCUR. */
244 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
245 UInt i;
246 UInt nkey, nelt, ncno;
247 VgHashNode *cnode, *node;
249 VG_(memset)(key_occurences, 0, sizeof(key_occurences));
250 VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
251 VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
253 // Note that the below algorithm is quadractic in nr of elements in a chain
254 // but if that happens, the hash table/function is really bad and that
255 // should be fixed.
256 for (i = 0; i < table->n_chains; i++) {
257 ncno = 0;
258 for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
259 ncno++;
261 nkey = 0;
262 // Is the same cnode->key existing before cnode ?
263 for (node = table->chains[i]; node != cnode; node = node->next) {
264 if (node->key == cnode->key)
265 nkey++;
267 // If cnode->key not in a previous node, count occurences of key.
268 if (nkey == 0) {
269 for (node = cnode; node != NULL; node = node->next) {
270 if (node->key == cnode->key)
271 nkey++;
273 INCOCCUR(key_occurences, nkey);
276 nelt = 0;
277 // Is the same cnode element existing before cnode ?
278 for (node = table->chains[i]; node != cnode; node = node->next) {
279 if (node->key == cnode->key
280 && (cmp == NULL || cmp (node, cnode) == 0)) {
281 nelt++;
284 // If cnode element not in a previous node, count occurences of elt.
285 if (nelt == 0) {
286 for (node = cnode; node != NULL; node = node->next) {
287 if (node->key == cnode->key
288 && (cmp == NULL || cmp (node, cnode) == 0)) {
289 nelt++;
292 INCOCCUR(elt_occurences, nelt);
295 INCOCCUR(cno_occurences, ncno);
298 VG_(message)(Vg_DebugMsg,
299 "nr occurrences of"
300 " chains of len N,"
301 " N-plicated keys,"
302 " N-plicated elts\n");
303 nkey = nelt = ncno = 0;
304 for (i = 0; i <= MAXOCCUR; i++) {
305 if (elt_occurences[i] > 0
306 || key_occurences[i] > 0
307 || cno_occurences[i] > 0)
308 VG_(message)(Vg_DebugMsg,
309 "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n",
310 i == MAXOCCUR ? ">" : "N", i,
311 cno_occurences[i], key_occurences[i], elt_occurences[i]);
312 nkey += key_occurences[i];
313 nelt += elt_occurences[i];
314 ncno += cno_occurences[i];
316 VG_(message)(Vg_DebugMsg,
317 "total nr of unique slots: %6u, keys %6u, elts %6u."
318 " Avg chain len %3.1f\n",
319 ncno, nkey, nelt,
320 (Double)nelt/(Double)(ncno == cno_occurences[0] ?
321 1 : ncno - cno_occurences[0]));
325 /* Allocates a suitably-sized array, copies pointers to all the hashtable
326 elements into it, then returns both the array and the size of it. The
327 array must be freed with VG_(free).
329 VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
331 UInt i, j;
332 VgHashNode** arr;
333 VgHashNode* node;
335 *n_elems = table->n_elements;
336 if (*n_elems == 0)
337 return NULL;
339 arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
341 j = 0;
342 for (i = 0; i < table->n_chains; i++) {
343 for (node = table->chains[i]; node != NULL; node = node->next) {
344 arr[j++] = node;
347 vg_assert(j == *n_elems);
349 return arr;
352 void VG_(HT_ResetIter)(VgHashTable *table)
354 vg_assert(table);
355 table->iterNode = NULL;
356 table->iterChain = 0;
357 table->iterOK = True;
360 void* VG_(HT_Next)(VgHashTable *table)
362 Int i;
363 vg_assert(table);
364 /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
365 In short if this fails, it means the caller tried to modify the
366 table whilst iterating over it, which is a bug.
367 One exception: HT_remove_at_Iter can remove the current entry and
368 leave the iterator in a valid state for HT_Next. */
369 vg_assert(table->iterOK);
371 if (table->iterNode && table->iterNode->next) {
372 table->iterNode = table->iterNode->next;
373 return table->iterNode;
376 for (i = table->iterChain; i < table->n_chains; i++) {
377 if (table->chains[i]) {
378 table->iterNode = table->chains[i];
379 table->iterChain = i + 1; // Next chain to be traversed
380 return table->iterNode;
383 return NULL;
386 void VG_(HT_remove_at_Iter)(VgHashTable *table)
388 vg_assert(table);
389 vg_assert(table->iterOK);
390 vg_assert(table->iterNode);
392 const UInt curChain = table->iterChain - 1; // chain of iterNode.
395 if (table->chains[curChain] == table->iterNode) {
396 /* iterNode is the first of its chain -> remove it from the chain. */
397 table->chains[curChain] = table->iterNode->next;
398 /* Setup the iterator to visit first node of curChain: */
399 table->iterNode = NULL;
400 table->iterChain = curChain;
401 } else {
402 /* iterNode is somewhere inside curChain chain */
403 VgHashNode* prev = table->chains[curChain];
405 while (prev->next != table->iterNode)
406 prev = prev->next;
407 /* Remove iterNode from the chain. */
408 prev->next = table->iterNode->next;
409 /* Setup the iterator to visit prev->next, which is the node
410 that was after the deleted node. */
411 table->iterNode = prev;
414 table->n_elements--;
417 void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
419 UInt i;
420 VgHashNode *node, *node_next;
422 for (i = 0; i < table->n_chains; i++) {
423 for (node = table->chains[i]; node != NULL; node = node_next) {
424 node_next = node->next;
425 freenode_fn(node);
428 VG_(free)(table->chains);
429 VG_(free)(table);
432 /*--------------------------------------------------------------------*/
433 /*--- end ---*/
434 /*--------------------------------------------------------------------*/