1 /* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
3 /* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
9 #include "ruby/config.h"
10 #include "ruby/defines.h"
20 typedef struct st_table_entry st_table_entry
;
22 struct st_table_entry
{
27 st_table_entry
*fore
, *back
;
30 #define ST_DEFAULT_MAX_DENSITY 5
31 #define ST_DEFAULT_INIT_TABLE_SIZE 11
34 * DEFAULT_MAX_DENSITY is the default for the largest we allow the
35 * average number of items per bin before increasing the number of
38 * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
43 static const struct st_hash_type type_numhash
= {
48 /* extern int strcmp(const char *, const char *); */
49 static int strhash(const char *);
50 static const struct st_hash_type type_strhash
= {
55 static int strcasehash(const char *);
56 static const struct st_hash_type type_strcasehash
= {
61 static void rehash(st_table
*);
64 #define malloc xmalloc
65 #define calloc xcalloc
66 #define free(x) xfree(x)
69 #define alloc(type) (type*)malloc((size_t)sizeof(type))
70 #define Calloc(n,s) (char*)calloc((n),(s))
72 #define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
74 #define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
75 #define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
78 * MINSIZE is the minimum size of a dictionary.
84 Table of prime numbers 2^n+a, 2<=n<=30.
86 static const long primes
[] = {
124 for (i
=3; i
<31; i
++) {
125 if ((1<<i
) > size
) return 1<<i
;
131 for (i
= 0, newsize
= MINSIZE
;
132 i
< (int )(sizeof(primes
)/sizeof(primes
[0]));
135 if (newsize
> size
) return primes
[i
];
137 /* Ran out of polynomials */
138 return -1; /* should raise exception */
143 static int collision
= 0;
144 static int init_st
= 0;
149 FILE *f
= fopen("/tmp/col", "w");
150 fprintf(f
, "collision: %d\n", collision
);
155 #define MAX_PACKED_NUMHASH 5
158 st_init_table_with_size(const struct st_hash_type
*type
, int size
)
169 size
= new_size(size
); /* round up to prime number */
171 tbl
= alloc(st_table
);
173 tbl
->num_entries
= 0;
174 tbl
->entries_packed
= type
== &type_numhash
&& size
/2 <= MAX_PACKED_NUMHASH
;
175 tbl
->num_bins
= size
;
176 tbl
->bins
= (st_table_entry
**)Calloc(size
, sizeof(st_table_entry
*));
183 st_init_table(const struct st_hash_type
*type
)
185 return st_init_table_with_size(type
, 0);
189 st_init_numtable(void)
191 return st_init_table(&type_numhash
);
195 st_init_numtable_with_size(int size
)
197 return st_init_table_with_size(&type_numhash
, size
);
201 st_init_strtable(void)
203 return st_init_table(&type_strhash
);
207 st_init_strtable_with_size(int size
)
209 return st_init_table_with_size(&type_strhash
, size
);
213 st_init_strcasetable(void)
215 return st_init_table(&type_strcasehash
);
219 st_init_strcasetable_with_size(int size
)
221 return st_init_table_with_size(&type_strcasehash
, size
);
225 st_clear(st_table
*table
)
227 register st_table_entry
*ptr
, *next
;
230 if (table
->entries_packed
) {
231 table
->num_entries
= 0;
235 for(i
= 0; i
< table
->num_bins
; i
++) {
236 ptr
= table
->bins
[i
];
244 table
->num_entries
= 0;
249 st_free_table(st_table
*table
)
256 #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
257 ((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
260 #define COLLISION collision++
265 #define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
266 bin_pos = hash_val%(table)->num_bins;\
267 ptr = (table)->bins[bin_pos];\
268 if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
270 while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
278 st_lookup(st_table
*table
, register st_data_t key
, st_data_t
*value
)
280 unsigned int hash_val
, bin_pos
;
281 register st_table_entry
*ptr
;
283 if (table
->entries_packed
) {
285 for (i
= 0; i
< table
->num_entries
; i
++) {
286 if ((st_data_t
)table
->bins
[i
*2] == key
) {
287 if (value
!=0) *value
= (st_data_t
)table
->bins
[i
*2+1];
294 hash_val
= do_hash(key
, table
);
295 FIND_ENTRY(table
, ptr
, hash_val
, bin_pos
);
301 if (value
!= 0) *value
= ptr
->record
;
307 st_get_key(st_table
*table
, register st_data_t key
, st_data_t
*result
)
309 unsigned int hash_val
, bin_pos
;
310 register st_table_entry
*ptr
;
312 if (table
->entries_packed
) {
314 for (i
= 0; i
< table
->num_entries
; i
++) {
315 if ((st_data_t
)table
->bins
[i
*2] == key
) {
316 if (result
!=0) *result
= (st_data_t
)table
->bins
[i
*2];
323 hash_val
= do_hash(key
, table
);
324 FIND_ENTRY(table
, ptr
, hash_val
, bin_pos
);
330 if (result
!= 0) *result
= ptr
->key
;
335 #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
337 st_table_entry *entry, *head;\
338 if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
340 bin_pos = hash_val % table->num_bins;\
343 entry = alloc(st_table_entry);\
345 entry->hash = hash_val;\
347 entry->record = value;\
348 entry->next = table->bins[bin_pos];\
349 if ((head = table->head) != 0) {\
351 (entry->back = head->back)->fore = entry;\
355 table->head = entry->fore = entry->back = entry;\
357 table->bins[bin_pos] = entry;\
358 table->num_entries++;\
362 unpack_entries(register st_table
*table
)
365 struct st_table_entry
*packed_bins
[MAX_PACKED_NUMHASH
*2];
366 int num_entries
= table
->num_entries
;
368 memcpy(packed_bins
, table
->bins
, sizeof(struct st_table_entry
*) * num_entries
*2);
369 table
->entries_packed
= 0;
370 table
->num_entries
= 0;
371 memset(table
->bins
, 0, sizeof(struct st_table_entry
*) * table
->num_bins
);
372 for (i
= 0; i
< num_entries
; i
++) {
373 st_insert(table
, (st_data_t
)packed_bins
[i
*2], (st_data_t
)packed_bins
[i
*2+1]);
378 st_insert(register st_table
*table
, register st_data_t key
, st_data_t value
)
380 unsigned int hash_val
, bin_pos
;
381 register st_table_entry
*ptr
;
383 if (table
->entries_packed
) {
385 for (i
= 0; i
< table
->num_entries
; i
++) {
386 if ((st_data_t
)table
->bins
[i
*2] == key
) {
387 table
->bins
[i
*2+1] = (struct st_table_entry
*)value
;
391 if ((table
->num_entries
+1) * 2 <= table
->num_bins
&& table
->num_entries
+1 <= MAX_PACKED_NUMHASH
) {
392 i
= table
->num_entries
++;
393 table
->bins
[i
*2] = (struct st_table_entry
*)key
;
394 table
->bins
[i
*2+1] = (struct st_table_entry
*)value
;
398 unpack_entries(table
);
402 hash_val
= do_hash(key
, table
);
403 FIND_ENTRY(table
, ptr
, hash_val
, bin_pos
);
406 ADD_DIRECT(table
, key
, value
, hash_val
, bin_pos
);
416 st_add_direct(st_table
*table
, st_data_t key
, st_data_t value
)
418 unsigned int hash_val
, bin_pos
;
420 if (table
->entries_packed
) {
422 if ((table
->num_entries
+1) * 2 <= table
->num_bins
&& table
->num_entries
+1 <= MAX_PACKED_NUMHASH
) {
423 i
= table
->num_entries
++;
424 table
->bins
[i
*2] = (struct st_table_entry
*)key
;
425 table
->bins
[i
*2+1] = (struct st_table_entry
*)value
;
429 unpack_entries(table
);
433 hash_val
= do_hash(key
, table
);
434 bin_pos
= hash_val
% table
->num_bins
;
435 ADD_DIRECT(table
, key
, value
, hash_val
, bin_pos
);
439 rehash(register st_table
*table
)
441 register st_table_entry
*ptr
, **new_bins
;
443 unsigned int hash_val
;
445 new_num_bins
= new_size(table
->num_bins
+1);
446 new_bins
= (st_table_entry
**)
447 xrealloc(table
->bins
, new_num_bins
* sizeof(st_table_entry
*));
448 for (i
= 0; i
< new_num_bins
; ++i
) new_bins
[i
] = 0;
449 table
->num_bins
= new_num_bins
;
450 table
->bins
= new_bins
;
452 if ((ptr
= table
->head
) != 0) {
454 hash_val
= ptr
->hash
% new_num_bins
;
455 ptr
->next
= new_bins
[hash_val
];
456 new_bins
[hash_val
] = ptr
;
457 } while ((ptr
= ptr
->fore
) != table
->head
);
462 st_copy(st_table
*old_table
)
465 st_table_entry
*ptr
, *entry
, *prev
, **tail
;
466 int num_bins
= old_table
->num_bins
;
467 unsigned int hash_val
;
469 new_table
= alloc(st_table
);
470 if (new_table
== 0) {
474 *new_table
= *old_table
;
475 new_table
->bins
= (st_table_entry
**)
476 Calloc((unsigned)num_bins
, sizeof(st_table_entry
*));
478 if (new_table
->bins
== 0) {
483 if (old_table
->entries_packed
) {
484 memcpy(new_table
->bins
, old_table
->bins
, sizeof(struct st_table_entry
*) * old_table
->num_bins
);
488 if ((ptr
= old_table
->head
) != 0) {
490 tail
= &new_table
->head
;
492 entry
= alloc(st_table_entry
);
494 st_free_table(new_table
);
498 hash_val
= entry
->hash
% num_bins
;
499 entry
->next
= new_table
->bins
[hash_val
];
500 new_table
->bins
[hash_val
] = entry
;
502 *tail
= prev
= entry
;
504 } while ((ptr
= ptr
->fore
) != old_table
->head
);
505 entry
= new_table
->head
;
513 #define REMOVE_ENTRY(table, ptr) do \
515 if (ptr == ptr->fore) { \
519 st_table_entry *fore = ptr->fore, *back = ptr->back; \
522 if (ptr == table->head) table->head = fore; \
524 table->num_entries--; \
528 st_delete(register st_table
*table
, register st_data_t
*key
, st_data_t
*value
)
530 unsigned int hash_val
;
531 st_table_entry
**prev
;
532 register st_table_entry
*ptr
;
534 if (table
->entries_packed
) {
536 for (i
= 0; i
< table
->num_entries
; i
++) {
537 if ((st_data_t
)table
->bins
[i
*2] == *key
) {
538 if (value
!= 0) *value
= (st_data_t
)table
->bins
[i
*2+1];
539 table
->num_entries
--;
540 memmove(&table
->bins
[i
*2], &table
->bins
[(i
+1)*2],
541 sizeof(struct st_table_entry
*) * 2*(table
->num_entries
-i
));
545 if (value
!= 0) *value
= 0;
549 hash_val
= do_hash_bin(*key
, table
);
551 for (prev
= &table
->bins
[hash_val
]; (ptr
= *prev
) != 0; prev
= &ptr
->next
) {
552 if (EQUAL(table
, *key
, ptr
->key
)) {
554 REMOVE_ENTRY(table
, ptr
);
555 if (value
!= 0) *value
= ptr
->record
;
562 if (value
!= 0) *value
= 0;
567 st_delete_safe(register st_table
*table
, register st_data_t
*key
, st_data_t
*value
, st_data_t never
)
569 unsigned int hash_val
;
570 register st_table_entry
*ptr
;
572 hash_val
= do_hash_bin(*key
, table
);
573 ptr
= table
->bins
[hash_val
];
575 for (; ptr
!= 0; ptr
= ptr
->next
) {
576 if ((ptr
->key
!= never
) && EQUAL(table
, ptr
->key
, *key
)) {
577 REMOVE_ENTRY(table
, ptr
);
579 if (value
!= 0) *value
= ptr
->record
;
580 ptr
->key
= ptr
->record
= never
;
585 if (value
!= 0) *value
= 0;
590 st_cleanup_safe(st_table
*table
, st_data_t never
)
592 st_table_entry
*ptr
, **last
, *tmp
;
595 for (i
= 0; i
< table
->num_bins
; i
++) {
596 ptr
= *(last
= &table
->bins
[i
]);
598 if (ptr
->key
== never
) {
600 *last
= ptr
= ptr
->next
;
604 ptr
= *(last
= &ptr
->next
);
611 st_foreach(st_table
*table
, int (*func
)(ANYARGS
), st_data_t arg
)
613 st_table_entry
*ptr
, **last
, *tmp
;
614 enum st_retval retval
;
617 if (table
->entries_packed
) {
618 for (i
= 0; i
< table
->num_entries
; i
++) {
621 key
= (st_data_t
)table
->bins
[i
*2];
622 val
= (st_data_t
)table
->bins
[i
*2+1];
623 retval
= (*func
)(key
, val
, arg
);
625 case ST_CHECK
: /* check if hash is modified during iteration */
626 for (j
= 0; j
< table
->num_entries
; j
++) {
627 if ((st_data_t
)table
->bins
[j
*2] == key
)
630 if (j
== table
->num_entries
) {
631 /* call func with error notice */
632 retval
= (*func
)(0, 0, arg
, 1);
641 table
->num_entries
--;
642 memmove(&table
->bins
[i
*2], &table
->bins
[(i
+1)*2],
643 sizeof(struct st_table_entry
*) * 2*(table
->num_entries
-i
));
651 if ((ptr
= table
->head
) != 0) {
653 end
= ptr
->fore
== table
->head
;
654 retval
= (*func
)(ptr
->key
, ptr
->record
, arg
);
656 case ST_CHECK
: /* check if hash is modified during iteration */
657 i
= ptr
->hash
% table
->num_bins
;
658 for (tmp
= table
->bins
[i
]; tmp
!= ptr
; tmp
= tmp
->next
) {
660 /* call func with error notice */
661 retval
= (*func
)(0, 0, arg
, 1);
672 last
= &table
->bins
[ptr
->hash
% table
->num_bins
];
673 for (; (tmp
= *last
) != 0; last
= &tmp
->next
) {
677 REMOVE_ENTRY(table
, ptr
);
679 if (ptr
== tmp
) return 0;
685 } while (!end
&& table
->head
);
690 #if 0 /* unused right now */
692 st_reverse_foreach(st_table
*table
, int (*func
)(ANYARGS
), st_data_t arg
)
694 st_table_entry
*ptr
, **last
, *tmp
;
695 enum st_retval retval
;
698 if (table
->entries_packed
) {
699 for (i
= table
->num_entries
-1; 0 <= i
; i
--) {
702 key
= (st_data_t
)table
->bins
[i
*2];
703 val
= (st_data_t
)table
->bins
[i
*2+1];
704 retval
= (*func
)(key
, val
, arg
);
706 case ST_CHECK
: /* check if hash is modified during iteration */
707 for (j
= 0; j
< table
->num_entries
; j
++) {
708 if ((st_data_t
)table
->bins
[j
*2] == key
)
711 if (j
== table
->num_entries
) {
712 /* call func with error notice */
713 retval
= (*func
)(0, 0, arg
, 1);
722 table
->num_entries
--;
723 memmove(&table
->bins
[i
*2], &table
->bins
[(i
+1)*2],
724 sizeof(struct st_table_entry
*) * 2*(table
->num_entries
-i
));
731 if ((ptr
= table
->head
) != 0) {
734 end
= ptr
== table
->head
;
735 retval
= (*func
)(ptr
->key
, ptr
->record
, arg
, 0);
737 case ST_CHECK
: /* check if hash is modified during iteration */
738 i
= ptr
->hash
% table
->num_bins
;
739 for (tmp
= table
->bins
[i
]; tmp
!= ptr
; tmp
= tmp
->next
) {
741 /* call func with error notice */
742 retval
= (*func
)(0, 0, arg
, 1);
753 last
= &table
->bins
[ptr
->hash
% table
->num_bins
];
754 for (; (tmp
= *last
) != 0; last
= &tmp
->next
) {
758 REMOVE_ENTRY(table
, ptr
);
766 table
->num_entries
--;
768 } while (!end
&& table
->head
);
775 * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
777 * @(#) $Hash32: Revision: 1.1 $
778 * @(#) $Hash32: Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $
779 * @(#) $Hash32: Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $
783 * Fowler/Noll/Vo hash
785 * The basis of this hash algorithm was taken from an idea sent
786 * as reviewer comments to the IEEE POSIX P1003.2 committee by:
788 * Phong Vo (http://www.research.att.com/info/kpv/)
789 * Glenn Fowler (http://www.research.att.com/~gsf/)
791 * In a subsequent ballot round:
793 * Landon Curt Noll (http://www.isthe.com/chongo/)
795 * improved on their algorithm. Some people tried this hash
796 * and found that it worked rather well. In an EMail message
797 * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
799 * FNV hashes are designed to be fast while maintaining a low
800 * collision rate. The FNV speed allows one to quickly hash lots
801 * of data while maintaining a reasonable collision rate. See:
803 * http://www.isthe.com/chongo/tech/comp/fnv/index.html
805 * for more details as well as other forms of the FNV hash.
808 * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
809 * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
813 * Please do not copyright this code. This code is in the public domain.
815 * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
816 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
817 * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
818 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
819 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
820 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
821 * PERFORMANCE OF THIS SOFTWARE.
824 * chongo <Landon Curt Noll> /\oo/\
825 * http://www.isthe.com/chongo/
827 * Share and Enjoy! :-)
831 * 32 bit FNV-1 and FNV-1a non-zero initial basis
833 * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets:
835 * chongo <Landon Curt Noll> /\../\
837 * NOTE: The \'s above are not back-slashing escape characters.
838 * They are literal ASCII backslash 0x5c characters.
840 * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition.
842 #define FNV1_32A_INIT 0x811c9dc5
845 * 32 bit magic FNV-1a prime
847 #define FNV_32_PRIME 0x01000193
850 strhash(register const char *string
)
852 register unsigned int hval
= FNV1_32A_INIT
;
855 * FNV-1a hash each octet in the buffer
858 /* xor the bottom with the current octet */
859 hval
^= (unsigned int)*string
++;
861 /* multiply by the 32 bit FNV magic prime mod 2^32 */
862 hval
*= FNV_32_PRIME
;
868 st_strcasecmp(const char *s1
, const char *s2
)
873 c1
= (unsigned char)*s1
++;
874 c2
= (unsigned char)*s2
++;
875 if (c1
== '\0' || c2
== '\0') {
876 if (c1
!= '\0') return 1;
877 if (c2
!= '\0') return -1;
880 if ((unsigned int)(c1
- 'A') <= ('Z' - 'A')) c1
+= 'a' - 'A';
881 if ((unsigned int)(c2
- 'A') <= ('Z' - 'A')) c2
+= 'a' - 'A';
892 st_strncasecmp(const char *s1
, const char *s2
, size_t n
)
897 c1
= (unsigned char)*s1
++;
898 c2
= (unsigned char)*s2
++;
899 if (c1
== '\0' || c2
== '\0') {
900 if (c1
!= '\0') return 1;
901 if (c2
!= '\0') return -1;
904 if ((unsigned int)(c1
- 'A') <= ('Z' - 'A')) c1
+= 'a' - 'A';
905 if ((unsigned int)(c2
- 'A') <= ('Z' - 'A')) c2
+= 'a' - 'A';
917 strcasehash(register const char *string
)
919 register unsigned int hval
= FNV1_32A_INIT
;
922 * FNV-1a hash each octet in the buffer
925 unsigned int c
= (unsigned char)*string
++;
926 if ((unsigned int)(c
- 'A') <= ('Z' - 'A')) c
+= 'a' - 'A';
929 /* multiply by the 32 bit FNV magic prime mod 2^32 */
930 hval
*= FNV_32_PRIME
;
936 st_numcmp(st_data_t x
, st_data_t y
)
942 st_numhash(st_data_t n
)