.
[coreutils.git] / lib / hash.c
blob0e4f2d61c744020fe88e9c1b409614d6320957da
1 /* A generic hash table package. */
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <assert.h>
7 #include "hash.h"
9 #ifdef USE_OBSTACK
10 # define ZALLOC(Ht, N) obstack_alloc (&(ht->ht_obstack), (N))
11 #else
12 # define ZALLOC(Ht, N) malloc ((N))
13 #endif
15 #define BUCKET_HEAD(ht, idx) ((ht)->hash_table[(idx)])
17 static int
18 is_prime (candidate)
19 unsigned long candidate;
21 /* No even number and none less than 10 will be passed here. */
22 unsigned long divn = 3;
23 unsigned long sq = divn * divn;
25 while (sq < candidate && (candidate % divn))
27 divn++;
28 sq += 4 * divn;
29 divn++;
32 return (candidate % divn);
35 /* Round a given number up to the nearest prime. */
37 static unsigned long
38 next_prime (candidate)
39 unsigned long candidate;
41 /* Make it definitely odd. */
42 candidate |= 1;
44 while (!is_prime (candidate))
45 candidate += 2;
47 return candidate;
50 static void
51 hash_free_entry (HT *ht, HASH_ENT *e)
53 e->key = NULL;
54 e->next = ht->hash_free_entry_list;
55 ht->hash_free_entry_list = e;
58 static HASH_ENT *
59 hash_allocate_entry (HT *ht)
61 HASH_ENT *new;
62 if (ht->hash_free_entry_list)
64 new = ht->hash_free_entry_list;
65 ht->hash_free_entry_list = new->next;
67 else
69 new = (HASH_ENT *) ZALLOC (ht, sizeof (HASH_ENT));
71 return new;
74 unsigned int
75 hash_get_n_slots_used (const HT *ht)
77 return ht->hash_n_slots_used;
80 /* Free all storage associated with HT that functions in this package
81 have allocated. If a key_freer function has been supplied (when HT
82 was created), this function applies it to the key of each entry before
83 freeing that entry. */
85 static void
86 hash_free_0 (HT *ht, int free_user_data)
88 if (free_user_data && ht->hash_key_freer != NULL)
90 unsigned int i;
92 for (i = 0; i < ht->hash_table_size; i++)
94 HASH_ENT *p;
95 HASH_ENT *next;
97 for (p = BUCKET_HEAD (ht, i); p; p = next)
99 next = p->next;
100 ht->hash_key_freer (p->key);
105 #ifdef USE_OBSTACK
106 obstack_free (&(ht->ht_obstack), NULL);
107 #else
109 unsigned int i;
110 for (i = 0; i < ht->hash_table_size; i++)
112 HASH_ENT *p;
113 HASH_ENT *next;
115 for (p = BUCKET_HEAD (ht, i); p; p = next)
117 next = p->next;
118 free (p);
122 #endif
123 ht->hash_free_entry_list = NULL;
124 free (ht->hash_table);
127 /* FIXME-comment */
130 hash_rehash (HT *ht, unsigned int new_table_size)
132 HT *ht_new;
133 unsigned int i;
135 if (ht->hash_table_size <= 0 || new_table_size == 0)
136 return 1;
138 ht_new = hash_initialize (new_table_size, ht->hash_key_freer,
139 ht->hash_hash, ht->hash_key_comparator);
141 if (ht_new == NULL)
142 return 1;
144 for (i = 0; i < ht->hash_table_size; i++)
146 HASH_ENT *p = BUCKET_HEAD (ht, i);
147 for ( /* empty */ ; p; p = p->next)
149 int failed;
150 const void *already_in_table;
151 already_in_table = hash_insert_if_absent (ht_new, p->key, &failed);
152 assert (failed == 0 && already_in_table == 0);
156 hash_free_0 (ht, 0);
158 #ifdef TESTING
159 assert (hash_table_ok (ht_new));
160 #endif
161 *ht = *ht_new;
162 free (ht_new);
164 /* FIXME: fill in ht_new->n_slots_used and other statistics fields. */
166 return 0;
169 /* FIXME-comment */
171 unsigned int
172 hash_get_max_chain_length (HT *ht)
174 unsigned int i;
175 unsigned int max_chain_length = 0;
177 if (!ht->hash_dirty_max_chain_length)
178 return ht->hash_max_chain_length;
180 for (i = 0; i < ht->hash_table_size; i++)
182 unsigned int chain_length = 0;
183 HASH_ENT *p = BUCKET_HEAD (ht, i);
184 for ( /* empty */ ; p; p = p->next)
185 ++chain_length;
186 if (chain_length > max_chain_length)
187 max_chain_length = chain_length;
190 ht->hash_max_chain_length = max_chain_length;
191 ht->hash_dirty_max_chain_length = 0;
192 return ht->hash_max_chain_length;
195 unsigned int
196 hash_get_n_keys (const HT *ht)
198 return ht->hash_n_keys;
201 unsigned int
202 hash_get_table_size (const HT *ht)
204 return ht->hash_table_size;
207 /* CANDIDATE_TABLE_SIZE need not be prime. If WHEN_TO_REHASH (FIXME: add
208 this parameter) is positive, when that percentage of table entries have
209 been used, the table size is increased; then a new, larger table
210 (GROW_FACTOR (FIXME: maybe add this parameter) times larger than the previous
211 size) is allocated and all entries in the old table are rehashed into
212 the new, larger one. The old table is freed. If WHEN_TO_REHASH is zero
213 or negative, the table is never resized.
215 The function returns non-zero
216 - if CANDIDATE_TABLE_SIZE is zero or negative
217 - if KEY_COMPARATOR or HASH is null
218 - if it was unable to allocate sufficient storage for the hash table
219 - if WHEN_TO_REHASH is zero or negative
220 Otherwise it returns zero. */
222 HT *
223 hash_initialize (unsigned int candidate_table_size,
224 Hash_key_freer_type key_freer,
225 unsigned int (*hash) (const void *, unsigned int),
226 int (*key_comparator) (const void *, const void *))
228 HT *ht;
229 unsigned int i;
230 unsigned int table_size;
232 if (candidate_table_size <= 0)
233 return NULL;
235 if (hash == NULL || key_comparator == NULL)
236 return NULL;
238 ht = (HT *) malloc (sizeof (HT));
239 if (ht == NULL)
240 return NULL;
242 table_size = next_prime (candidate_table_size);
243 ht->hash_table = (HASH_ENT **) malloc (table_size * sizeof (HASH_ENT *));
244 if (ht->hash_table == NULL)
245 return NULL;
247 for (i = 0; i < table_size; i++)
249 BUCKET_HEAD (ht, i) = NULL;
252 ht->hash_free_entry_list = NULL;
253 ht->hash_table_size = table_size;
254 ht->hash_hash = hash;
255 ht->hash_key_comparator = key_comparator;
256 ht->hash_key_freer = key_freer;
257 ht->hash_n_slots_used = 0;
258 ht->hash_max_chain_length = 0;
259 ht->hash_n_keys = 0;
260 ht->hash_dirty_max_chain_length = 0;
261 #ifdef USE_OBSTACK
262 obstack_init (&(ht->ht_obstack));
263 #endif
265 return ht;
268 /* This private function is used to help with insertion and deletion.
269 If E does *not* compare equal to the key of any entry in the table,
270 return NULL.
271 When E matches an entry in the table, return a pointer to the matching
272 entry. When DELETE is non-zero and E matches an entry in the table,
273 unlink the matching entry. Set *CHAIN_LENGTH to the number of keys
274 that have hashed to the bucket E hashed to. */
276 static HASH_ENT *
277 hash_find_entry (HT *ht, const void *e, unsigned int *table_idx,
278 unsigned int *chain_length, int delete)
280 unsigned int idx;
281 int found;
282 HASH_ENT *p, *prev;
284 idx = ht->hash_hash (e, ht->hash_table_size);
285 assert (idx < ht->hash_table_size);
287 *table_idx = idx;
288 *chain_length = 0;
290 prev = ht->hash_table[idx];
292 if (prev == NULL)
293 return NULL;
295 *chain_length = 1;
296 if (ht->hash_key_comparator (e, prev->key) == 0)
298 if (delete)
299 ht->hash_table[idx] = prev->next;
300 return prev;
303 p = prev->next;
304 found = 0;
305 while (p)
307 ++(*chain_length);
308 if (ht->hash_key_comparator (e, p->key) == 0)
310 found = 1;
311 break;
313 prev = p;
314 p = p->next;
317 if (!found)
318 return NULL;
320 assert (p != NULL);
321 if (delete)
322 prev->next = p->next;
324 return p;
327 /* Return non-zero if E is already in the table, zero otherwise. */
330 hash_query_in_table (const HT *ht, const void *e)
332 unsigned int idx;
333 HASH_ENT *p;
335 idx = ht->hash_hash (e, ht->hash_table_size);
336 assert (idx < ht->hash_table_size);
337 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
338 if (ht->hash_key_comparator (e, p->key) == 0)
339 return 1;
340 return 0;
343 void *
344 hash_lookup (const HT *ht, const void *e)
346 unsigned int idx;
347 HASH_ENT *p;
349 idx = ht->hash_hash (e, ht->hash_table_size);
350 assert (idx < ht->hash_table_size);
351 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
352 if (ht->hash_key_comparator (e, p->key) == 0)
353 return p->key;
354 return NULL;
357 /* If E matches an entry already in the hash table, don't modify the
358 table and return a pointer to the matched entry. If E does not
359 match any item in the table, insert E and return NULL.
360 If the storage required for insertion cannot be allocated
361 set *FAILED to non-zero and return NULL. */
363 void *
364 hash_insert_if_absent (HT *ht, const void *e, int *failed)
366 const HASH_ENT *ent;
367 HASH_ENT *new;
368 unsigned int idx;
369 unsigned int chain_length;
371 assert (e != NULL); /* Can't insert a NULL key. */
373 *failed = 0;
374 ent = hash_find_entry (ht, e, &idx, &chain_length, 0);
375 if (ent != NULL)
377 /* E matches a key from an entry already in the table. */
378 return ent->key;
381 new = hash_allocate_entry (ht);
382 if (new == NULL)
384 *failed = 1;
385 return NULL;
388 new->key = (void *) e;
389 new->next = BUCKET_HEAD (ht, idx);
390 BUCKET_HEAD (ht, idx) = new;
392 if (chain_length == 0)
393 ++(ht->hash_n_slots_used);
395 /* The insertion has just increased chain_length by 1. */
396 ++chain_length;
398 if (chain_length > ht->hash_max_chain_length)
399 ht->hash_max_chain_length = chain_length;
401 ++(ht->hash_n_keys);
402 if ((double) ht->hash_n_keys / ht->hash_table_size > 0.80)
404 unsigned int new_size;
405 new_size = next_prime (2 * ht->hash_table_size + 1);
406 *failed = hash_rehash (ht, new_size);
409 #ifdef TESTING
410 assert (hash_table_ok (ht));
411 #endif
413 return NULL;
416 /* If E is already in the table, remove it and return a pointer to
417 the just-deleted key (the user may want to deallocate its storage).
418 If E is not in the table, don't modify the table and return NULL. */
420 void *
421 hash_delete_if_present (HT *ht, const void *e)
423 HASH_ENT *ent;
424 void *key;
425 unsigned int idx;
426 unsigned int chain_length;
428 ent = hash_find_entry (ht, e, &idx, &chain_length, 1);
429 if (ent == NULL)
430 return NULL;
432 if (ent->next == NULL && chain_length == 1)
433 --(ht->hash_n_slots_used);
435 key = ent->key;
437 --(ht->hash_n_keys);
438 ht->hash_dirty_max_chain_length = 1;
439 if (ent->next == NULL && chain_length < ht->hash_max_chain_length)
440 ht->hash_dirty_max_chain_length = 0;
442 hash_free_entry (ht, ent);
444 #ifdef TESTING
445 assert (hash_table_ok (ht));
446 #endif
447 return key;
450 void
451 hash_print_statistics (const HT *ht, FILE *stream)
453 unsigned int n_slots_used;
454 unsigned int n_keys;
455 unsigned int max_chain_length;
456 int err;
458 err = hash_get_statistics (ht, &n_slots_used, &n_keys, &max_chain_length);
459 assert (err == 0);
460 fprintf (stream, "table size: %d\n", ht->hash_table_size);
461 fprintf (stream, "# slots used: %u (%.2f%%)\n", n_slots_used,
462 (100.0 * n_slots_used) / ht->hash_table_size);
463 fprintf (stream, "# keys: %u\n", n_keys);
464 fprintf (stream, "max chain length: %u\n", max_chain_length);
467 /* If there is *NO* table (so, no meaningful stats) return non-zero
468 and don't reference the argument pointers. Otherwise compute the
469 performance statistics and return non-zero. */
472 hash_get_statistics (const HT *ht,
473 unsigned int *n_slots_used,
474 unsigned int *n_keys,
475 unsigned int *max_chain_length)
477 unsigned int i;
479 if (ht == NULL || ht->hash_table == NULL)
480 return 1;
482 *max_chain_length = 0;
483 *n_slots_used = 0;
484 *n_keys = 0;
486 for (i = 0; i < ht->hash_table_size; i++)
488 unsigned int chain_length = 0;
489 HASH_ENT *p;
491 p = BUCKET_HEAD (ht, i);
492 if (p != NULL)
493 ++(*n_slots_used);
495 for (; p; p = p->next)
496 ++chain_length;
498 *n_keys += chain_length;
499 if (chain_length > *max_chain_length)
500 *max_chain_length = chain_length;
502 return 0;
506 hash_table_ok (HT *ht)
508 int code;
509 unsigned int n_slots_used;
510 unsigned int n_keys;
511 unsigned int max_chain_length;
513 if (ht == NULL || ht->hash_table == NULL)
514 return 1;
516 code = hash_get_statistics (ht, &n_slots_used, &n_keys,
517 &max_chain_length);
519 if (code != 0
520 || n_slots_used != ht->hash_n_slots_used
521 || n_keys != ht->hash_n_keys
522 || max_chain_length != hash_get_max_chain_length (ht))
523 return 0;
525 return 1;
528 /* See hash_do_for_each_2 (below) for a variant. */
530 void
531 hash_do_for_each (HT *ht, void (*f) (void *e, void *aux), void *aux)
533 unsigned int i;
535 #ifdef TESTING
536 assert (hash_table_ok (ht));
537 #endif
539 if (ht->hash_table == NULL)
540 return;
542 for (i = 0; i < ht->hash_table_size; i++)
544 HASH_ENT *p;
545 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
547 (*f) (p->key, aux);
552 /* Just like hash_do_for_each, except that function F returns an int
553 that can signal (when non-zero) we should return early. */
556 hash_do_for_each_2 (HT *ht, int (*f) (void *e, void *aux), void *aux)
558 unsigned int i;
560 #ifdef TESTING
561 assert (hash_table_ok (ht));
562 #endif
564 if (ht->hash_table == NULL)
565 return 0;
567 for (i = 0; i < ht->hash_table_size; i++)
569 HASH_ENT *p;
570 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
572 int return_code;
574 return_code = (*f) (p->key, aux);
575 if (return_code != 0)
576 return return_code;
579 return 0;
582 /* For each entry in the bucket addressed by BUCKET_KEY of the hash
583 table HT, invoke the function F. If F returns non-zero, stop
584 iterating and return that value. Otherwise, apply F to all entries
585 in the selected bucket and return zero. The AUX argument to this
586 function is passed as the last argument in each invocation of F.
587 The first argument to F is BUCKET_KEY, and the second is the key of
588 an entry in the selected bucket. */
591 hash_do_for_each_in_selected_bucket (HT *ht, const void *bucket_key,
592 int (*f) (const void *bucket_key,
593 void *e, void *aux),
594 void *aux)
596 int idx;
597 HASH_ENT *p;
599 #ifdef TESTING
600 assert (hash_table_ok (ht));
601 #endif
603 if (ht->hash_table == NULL)
604 return 0;
606 idx = ht->hash_hash (bucket_key, ht->hash_table_size);
608 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
610 int return_code;
612 return_code = (*f) (bucket_key, p->key, aux);
613 if (return_code != 0)
614 return return_code;
617 return 0;
620 /* Make all buckets empty, placing any chained entries on the free list.
621 As with hash_free, apply the user-specified function key_freer
622 (if it's not NULL) to the keys of any affected entries. */
624 void
625 hash_clear (HT *ht)
627 unsigned int i;
628 HASH_ENT *p;
630 for (i = 0; i < ht->hash_table_size; i++)
632 HASH_ENT *tail = NULL;
633 HASH_ENT *head = BUCKET_HEAD (ht, i);
635 /* Free any keys and get tail pointer to last entry in chain. */
636 for (p = head; p; p = p->next)
638 if (ht->hash_key_freer != NULL)
639 ht->hash_key_freer (p->key);
640 p->key = NULL; /* Make sure no one tries to use this key later. */
641 tail = p;
643 BUCKET_HEAD (ht, i) = NULL;
645 /* If there's a chain in this bucket, tack it onto the
646 beginning of the free list. */
647 if (head != NULL)
649 assert (tail != NULL && tail->next == NULL);
650 tail->next = ht->hash_free_entry_list;
651 ht->hash_free_entry_list = head;
654 ht->hash_n_slots_used = 0;
655 ht->hash_max_chain_length = 0;
656 ht->hash_n_keys = 0;
657 ht->hash_dirty_max_chain_length = 0;
660 void
661 hash_free (HT *ht)
663 hash_free_0 (ht, 1);
664 free (ht);
667 #ifdef TESTING
669 void
670 hash_print (const HT *ht)
672 int i;
674 for (i = 0; i < ht->hash_table_size; i++)
676 HASH_ENT *p;
678 if (BUCKET_HEAD (ht, i) != NULL)
679 printf ("%d:\n", i);
681 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
683 char *s = (char *) p->key;
684 /* FIXME */
685 printf (" %s\n", s);
690 #endif /* TESTING */
692 void
693 hash_get_key_list (const HT *ht, unsigned int bufsize, void **buf)
695 unsigned int i;
696 unsigned int c = 0;
698 for (i = 0; i < ht->hash_table_size; i++)
700 HASH_ENT *p;
702 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
704 if (c >= bufsize)
705 return;
706 buf[c++] = p->key;
711 /* Return the first key in the table. If the table is empty, return NULL. */
713 void *
714 hash_get_first (const HT *ht)
716 unsigned int idx;
717 HASH_ENT *p;
719 if (ht->hash_n_keys == 0)
720 return NULL;
722 for (idx = 0; idx < ht->hash_table_size; idx++)
724 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
725 return p->key;
727 abort ();
730 /* Return the key in the entry following the entry whose key matches E.
731 If there is the only one key in the table and that key matches E,
732 return the matching key. If E is not in the table, return NULL. */
734 void *
735 hash_get_next (const HT *ht, const void *e)
737 unsigned int idx;
738 HASH_ENT *p;
740 idx = ht->hash_hash (e, ht->hash_table_size);
741 assert (idx < ht->hash_table_size);
742 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
744 if (ht->hash_key_comparator (e, p->key) == 0)
746 if (p->next != NULL)
748 return p->next->key;
750 else
752 unsigned int bucket;
754 /* E is the last or only key in the bucket chain. */
755 if (ht->hash_n_keys == 1)
757 /* There is only one key in the table, and it matches E. */
758 return p->key;
760 bucket = idx;
763 idx = (idx + 1) % ht->hash_table_size;
764 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
765 return p->key;
767 while (idx != bucket);
772 /* E is not in the table. */
773 return NULL;