3 * strcache2 - New string cache.
7 * Copyright (c) 2008-2009 knut st. osmundsen <bird-kBuild-spamix@anduin.net>
9 * This file is part of kBuild.
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild. If not, see <http://www.gnu.org/licenses/>
26 /*******************************************************************************
28 *******************************************************************************/
30 #include "strcache2.h"
37 typedef unsigned char uint8_t;
38 typedef unsigned short uint16_t;
39 typedef unsigned int uint32_t;
40 typedef signed char int8_t;
41 typedef signed short int16_t;
42 typedef signed int int32_t;
51 # define PARSE_IN_WORKER
55 # include <sys/fmutex.h>
63 /*******************************************************************************
64 * Defined Constants And Macros *
65 *******************************************************************************/
66 /* The default size of a memory segment (1MB). */
67 #define STRCACHE2_SEG_SIZE (1024U*1024U)
68 /* The default hash table shift (hash size give as a power of two). */
69 #define STRCACHE2_HASH_SHIFT 16
70 /** Does the modding / masking of a hash number into an index. */
71 #ifdef STRCACHE2_USE_MASK
72 # define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
74 # define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
77 # if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
78 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
79 # define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
81 /* (endian doesn't matter) */
82 # define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
83 | (((const uint8_t *)(ptr))[1]) )
87 /*******************************************************************************
89 *******************************************************************************/
90 /* List of initialized string caches. */
91 static struct strcache2
*strcache_head
;
94 /** Finds the closest primary number for power of two value (or something else
95 * useful if not support). */
96 MY_INLINE
unsigned int strcache2_find_prime(unsigned int shift
)
105 case 10: return 1021;
106 case 11: return 2039;
107 case 12: return 4093;
108 case 13: return 8191;
109 case 14: return 16381;
110 case 15: return 32749;
111 case 16: return 65521;
112 case 17: return 131063;
113 case 18: return 262139;
114 case 19: return 524269;
115 case 20: return 1048573;
116 case 21: return 2097143;
117 case 22: return 4194301;
118 case 23: return 8388593;
122 return (1 << shift
) - 1;
126 /* The following is a bit experiment. It produces longer chains, i.e. worse
127 distribution of the strings in the table, however the actual make
128 performances is better (<time). The explanation is probably that the
129 collisions only really increase for entries that aren't looked up that
130 much and that it actually improoves the situation for those that is. Or
131 that we spend so much less time hashing that it makes up (and more) for
132 the pentalty we suffer from the longer chains and worse distribution.
134 XXX: Check how this works out with different path lengths. I suspect it
135 might depend on the length of PATH_ROOT and the depth of the files
136 in the project as well. If it does, this might make matters worse
137 for some and better for others which isn't very cool... */
140 # define BIG_HASH_SIZE 32 /* kinda fast */
141 # define BIG_HASH_HEAD 16
142 # define BIG_HASH_TAIL 12
144 # define BIG_HASH_SIZE 68 /* kinda safe */
145 # define BIG_HASH_HEAD 24
146 # define BIG_HASH_TAIL 24
148 # define BIG_HASH_SIZE 128 /* safe */
149 # define BIG_HASH_HEAD 32
150 # define BIG_HASH_TAIL 32
154 /* long string: hash head and tail, drop the middle. */
155 MY_INLINE
unsigned int
156 strcache2_case_sensitive_hash_big (const char *str
, unsigned int len
)
162 /* head BIG_HASH_HEAD bytes */
163 head
= (BIG_HASH_HEAD
>> 2);
166 hash
+= strcache2_get_unaligned_16bits (str
);
167 tmp
= (strcache2_get_unaligned_16bits (str
+ 2) << 11) ^ hash
;
168 hash
= (hash
<< 16) ^ tmp
;
169 str
+= 2 * sizeof (uint16_t);
173 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
174 str
+= (len
- BIG_HASH_HEAD
- BIG_HASH_TAIL
) & ~3U;
175 head
= (BIG_HASH_TAIL
>> 2);
178 hash
+= strcache2_get_unaligned_16bits (str
);
179 tmp
= (strcache2_get_unaligned_16bits (str
+ 2) << 11) ^ hash
;
180 hash
= (hash
<< 16) ^ tmp
;
181 str
+= 2 * sizeof (uint16_t);
185 /* force "avalanching" of final 127 bits. */
195 #endif /* BIG_HASH_SIZE */
197 MY_INLINE
unsigned int
198 strcache2_case_sensitive_hash (const char *str
, unsigned int len
)
201 /* Paul Hsieh hash SuperFast function:
202 http://www.azillionmonkeys.com/qed/hash.html
204 This performs very good and as a sligtly better distribution than
205 STRING_N_HASH_1 on a typical kBuild run.
207 It is also 37% faster than return_STRING_N_HASH_1 when running the
208 two 100 times over typical kBuild strings that end up here (did a
209 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
212 FIXME: A path for well aligned data should be added to speed up
213 execution on alignment sensitive systems. */
218 assert (sizeof (uint8_t) == sizeof (char));
220 # ifdef BIG_HASH_SIZE
222 # if 0 /*BIG_HASH_SIZE > 128*/
223 if (MY_PREDICT_FALSE(len
>= BIG_HASH_SIZE
))
225 if (len
>= BIG_HASH_SIZE
)
227 return strcache2_case_sensitive_hash_big (str
, len
);
230 /* short string: main loop, walking on 2 x uint16_t */
236 hash
+= strcache2_get_unaligned_16bits (str
);
237 tmp
= (strcache2_get_unaligned_16bits (str
+ 2) << 11) ^ hash
;
238 hash
= (hash
<< 16) ^ tmp
;
239 str
+= 2 * sizeof (uint16_t);
248 hash
+= strcache2_get_unaligned_16bits (str
);
250 hash
^= str
[sizeof (uint16_t)] << 18;
254 hash
+= strcache2_get_unaligned_16bits (str
);
265 /* force "avalanching" of final 127 bits. */
276 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
277 when running the two 100 times over typical kBuild strings that
278 end up here (did a fprintf here and built kBuild).
279 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
281 unsigned int hash
= 0;
282 if (MY_PREDICT_TRUE(len
>= 2))
284 unsigned int ch0
= *str
++;
289 unsigned int ch1
= *str
++;
290 hash
+= ch0
<< (ch1
& 0xf);
293 hash
+= ch1
<< (ch0
& 0xf);
300 hash
+= ch0
<< (ch1
& 0xf);
310 hash
+= hash
<< (hash
& 0xf);
318 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
319 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
320 first in the assignment made it noticably slower.)
322 However, it is noticably slower in practice, most likely because of more
323 collisions. Hrmpf. */
325 # define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
326 unsigned int hash
= 0;
329 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
330 fast than return_STRING_N_HASH_1.
334 # define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
335 unsigned int hash
= 5381;
341 UPDATE_HASH (str
[0]);
342 UPDATE_HASH (str
[1]);
343 UPDATE_HASH (str
[2]);
344 UPDATE_HASH (str
[3]);
354 UPDATE_HASH (str
[0]);
357 UPDATE_HASH (str
[0]);
358 UPDATE_HASH (str
[1]);
361 UPDATE_HASH (str
[0]);
362 UPDATE_HASH (str
[1]);
363 UPDATE_HASH (str
[2]);
369 MY_INLINE
unsigned int
370 strcache2_case_insensitive_hash (const char *str
, unsigned int len
)
372 unsigned int hash
= 0;
373 if (MY_PREDICT_TRUE(len
>= 2))
375 unsigned int ch0
= *str
++;
381 unsigned int ch1
= *str
++;
383 hash
+= ch0
<< (ch1
& 0xf);
387 hash
+= ch1
<< (ch0
& 0xf);
395 hash
+= ch0
<< (ch1
& 0xf);
405 hash
+= hash
<< (hash
& 0xf);
413 strcache2_memcmp_inline_short (const char *xs
, const char *ys
, unsigned int length
)
417 /* short string compare - ~50% of the kBuild calls. */
418 assert ( !((size_t)ys
& 3) );
419 if (!((size_t)xs
& 3))
425 default: /* memcmp for longer strings */
426 return memcmp (xs
, ys
, length
);
428 result
= *(int32_t*)(xs
+ 4) - *(int32_t*)(ys
+ 4);
429 result
|= *(int32_t*)xs
- *(int32_t*)ys
;
432 result
= xs
[6] - ys
[6];
433 result
|= xs
[5] - ys
[5];
434 result
|= xs
[4] - ys
[4];
435 result
|= *(int32_t*)xs
- *(int32_t*)ys
;
438 result
= xs
[5] - ys
[5];
439 result
|= xs
[4] - ys
[4];
440 result
|= *(int32_t*)xs
- *(int32_t*)ys
;
443 result
= xs
[4] - ys
[4];
444 result
|= *(int32_t*)xs
- *(int32_t*)ys
;
447 return *(int32_t*)xs
- *(int32_t*)ys
;
449 result
= xs
[2] - ys
[2];
450 result
|= xs
[1] - ys
[1];
451 result
|= xs
[0] - ys
[0];
454 result
= xs
[1] - ys
[1];
455 result
|= xs
[0] - ys
[0];
469 case 8: result
|= xs
[7] - ys
[7];
470 case 7: result
|= xs
[6] - ys
[6];
471 case 6: result
|= xs
[5] - ys
[5];
472 case 5: result
|= xs
[4] - ys
[4];
473 case 4: result
|= xs
[3] - ys
[3];
474 case 3: result
|= xs
[2] - ys
[2];
475 case 2: result
|= xs
[1] - ys
[1];
476 case 1: result
|= xs
[0] - ys
[0];
483 /* memcmp for longer strings */
484 return memcmp (xs
, ys
, length
);
488 strcache2_memcmp_inlined (const char *xs
, const char *ys
, unsigned int length
)
490 #ifndef ELECTRIC_HEAP
491 assert ( !((size_t)ys
& 3) );
493 if (!((size_t)xs
& 3))
499 result
= *(int32_t*)xs
- *(int32_t*)ys
;
500 result
|= *(int32_t*)(xs
+ 4) - *(int32_t*)(ys
+ 4);
501 if (MY_PREDICT_FALSE(result
))
510 result
= *(int32_t*)xs
- *(int32_t*)ys
;
511 result
|= xs
[6] - ys
[6];
512 result
|= xs
[5] - ys
[5];
513 result
|= xs
[4] - ys
[4];
516 result
= *(int32_t*)xs
- *(int32_t*)ys
;
517 result
|= xs
[5] - ys
[5];
518 result
|= xs
[4] - ys
[4];
521 result
= *(int32_t*)xs
- *(int32_t*)ys
;
522 result
|= xs
[4] - ys
[4];
525 return *(int32_t*)xs
- *(int32_t*)ys
;
527 result
= xs
[2] - ys
[2];
528 result
|= xs
[1] - ys
[1];
529 result
|= xs
[0] - ys
[0];
532 result
= xs
[1] - ys
[1];
533 result
|= xs
[0] - ys
[0];
548 #if defined(__i386__) || defined(__x86_64__)
549 result
= ( ((int32_t)xs
[3] << 24)
550 | ((int32_t)xs
[2] << 16)
551 | ((int32_t)xs
[1] << 8)
554 result
|= ( ((int32_t)xs
[7] << 24)
555 | ((int32_t)xs
[6] << 16)
556 | ((int32_t)xs
[5] << 8)
558 - *(int32_t*)(ys
+ 4);
560 result
= xs
[3] - ys
[3];
561 result
|= xs
[2] - ys
[2];
562 result
|= xs
[1] - ys
[1];
563 result
|= xs
[0] - ys
[0];
564 result
|= xs
[7] - ys
[7];
565 result
|= xs
[6] - ys
[6];
566 result
|= xs
[5] - ys
[5];
567 result
|= xs
[4] - ys
[4];
569 if (MY_PREDICT_FALSE(result
))
578 case 7: result
|= xs
[6] - ys
[6];
579 case 6: result
|= xs
[5] - ys
[5];
580 case 5: result
|= xs
[4] - ys
[4];
581 case 4: result
|= xs
[3] - ys
[3];
582 case 3: result
|= xs
[2] - ys
[2];
583 case 2: result
|= xs
[1] - ys
[1];
584 case 1: result
|= xs
[0] - ys
[0];
594 strcache2_is_equal (struct strcache2
*cache
, struct strcache2_entry
const *entry
,
595 const char *str
, unsigned int length
, unsigned int hash
)
597 assert (!cache
->case_insensitive
);
599 /* the simple stuff first. */
600 if ( entry
->hash
!= hash
601 || entry
->length
!= length
)
605 return memcmp (str
, entry
+ 1, length
) == 0;
607 return strcache2_memcmp_inlined (str
, (const char *)(entry
+ 1), length
) == 0;
609 return strcache2_memcmp_inline_short (str
, (const char *)(entry
+ 1), length
) == 0;
614 strcache2_is_iequal (struct strcache2
*cache
, struct strcache2_entry
const *entry
,
615 const char *str
, unsigned int length
, unsigned int hash
)
617 assert (cache
->case_insensitive
);
619 /* the simple stuff first. */
620 if ( entry
->hash
!= hash
621 || entry
->length
!= length
)
624 #if defined(_MSC_VER) || defined(__OS2__)
625 return _memicmp (entry
+ 1, str
, length
) == 0;
627 return strncasecmp ((const char *)(entry
+ 1), str
, length
) == 0;
632 strcache2_rehash (struct strcache2
*cache
)
634 unsigned int src
= cache
->hash_size
;
635 struct strcache2_entry
**src_tab
= cache
->hash_tab
;
636 struct strcache2_entry
**dst_tab
;
637 #ifndef STRCACHE2_USE_MASK
638 unsigned int hash_shift
;
641 /* Allocate a new hash table twice the size of the current. */
642 cache
->hash_size
<<= 1;
643 #ifdef STRCACHE2_USE_MASK
644 cache
->hash_mask
<<= 1;
645 cache
->hash_mask
|= 1;
647 for (hash_shift
= 1; (1U << hash_shift
) < cache
->hash_size
; hash_shift
++)
649 cache
->hash_div
= strcache2_find_prime (hash_shift
);
651 cache
->rehash_count
<<= 1;
652 cache
->hash_tab
= dst_tab
= (struct strcache2_entry
**)
653 xmalloc (cache
->hash_size
* sizeof (struct strcache2_entry
*));
654 memset (dst_tab
, '\0', cache
->hash_size
* sizeof (struct strcache2_entry
*));
656 /* Copy the entries from the old to the new hash table. */
657 cache
->collision_count
= 0;
660 struct strcache2_entry
*entry
= src_tab
[src
];
663 struct strcache2_entry
*next
= entry
->next
;
664 unsigned int dst
= STRCACHE2_MOD_IT (cache
, entry
->hash
);
665 if ((entry
->next
= dst_tab
[dst
]) != 0)
666 cache
->collision_count
++;
667 dst_tab
[dst
] = entry
;
673 /* That's it, just free the old table and we're done. */
677 static struct strcache2_seg
*
678 strcache2_new_seg (struct strcache2
*cache
, unsigned int minlen
)
680 struct strcache2_seg
*seg
;
684 size
= cache
->def_seg_size
;
685 if (size
< (size_t)minlen
+ sizeof (struct strcache2_seg
) + STRCACHE2_ENTRY_ALIGNMENT
)
687 size
= (size_t)minlen
* 2;
688 size
= (size
+ 0xfff) & ~(size_t)0xfff;
691 seg
= xmalloc (size
);
692 seg
->start
= (char *)(seg
+ 1);
693 seg
->size
= size
- sizeof (struct strcache2_seg
);
694 off
= (size_t)seg
->start
& (STRCACHE2_ENTRY_ALIGNMENT
- 1);
697 off
= STRCACHE2_ENTRY_ALIGNMENT
- off
;
701 assert (seg
->size
> minlen
);
702 seg
->cursor
= seg
->start
;
703 seg
->avail
= seg
->size
;
705 seg
->next
= cache
->seg_head
;
706 cache
->seg_head
= seg
;
711 /* Internal worker that enters a new string into the cache. */
713 strcache2_enter_string (struct strcache2
*cache
, unsigned int idx
,
714 const char *str
, unsigned int length
,
717 struct strcache2_entry
*entry
;
718 struct strcache2_seg
*seg
;
722 /* Allocate space for the string. */
724 size
= length
+ 1 + sizeof (struct strcache2_entry
);
725 size
= (size
+ STRCACHE2_ENTRY_ALIGNMENT
- 1) & ~(STRCACHE2_ENTRY_ALIGNMENT
- 1U);
727 seg
= cache
->seg_head
;
728 if (MY_PREDICT_FALSE(seg
->avail
< size
))
732 while (seg
&& seg
->avail
< size
);
734 seg
= strcache2_new_seg (cache
, size
);
737 entry
= (struct strcache2_entry
*) seg
->cursor
;
738 assert (!((size_t)entry
& (STRCACHE2_ENTRY_ALIGNMENT
- 1)));
742 /* Setup the entry, copy the string and insert it into the hash table. */
745 entry
->length
= length
;
747 str_copy
= (char *) memcpy (entry
+ 1, str
, length
);
748 str_copy
[length
] = '\0';
750 if ((entry
->next
= cache
->hash_tab
[idx
]) != 0)
751 cache
->collision_count
++;
752 cache
->hash_tab
[idx
] = entry
;
754 if (cache
->count
>= cache
->rehash_count
)
755 strcache2_rehash (cache
);
760 /* The public add string interface. */
762 strcache2_add (struct strcache2
*cache
, const char *str
, unsigned int length
)
764 struct strcache2_entry
const *entry
;
765 unsigned int hash
= strcache2_case_sensitive_hash (str
, length
);
768 assert (!cache
->case_insensitive
);
769 assert (!memchr (str
, '\0', length
));
771 MAKE_STATS (cache
->lookup_count
++);
773 /* Lookup the entry in the hash table, hoping for an
774 early match. If not found, enter the string at IDX. */
775 idx
= STRCACHE2_MOD_IT (cache
, hash
);
776 entry
= cache
->hash_tab
[idx
];
778 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
779 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
780 return (const char *)(entry
+ 1);
781 MAKE_STATS (cache
->collision_1st_count
++);
785 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
786 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
787 return (const char *)(entry
+ 1);
788 MAKE_STATS (cache
->collision_2nd_count
++);
795 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
796 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
797 return (const char *)(entry
+ 1);
798 MAKE_STATS (cache
->collision_3rd_count
++);
803 /* The public add string interface for prehashed strings.
804 Use strcache2_hash_str to calculate the hash of a string. */
806 strcache2_add_hashed (struct strcache2
*cache
, const char *str
,
807 unsigned int length
, unsigned int hash
)
809 struct strcache2_entry
const *entry
;
812 unsigned correct_hash
;
814 assert (!cache
->case_insensitive
);
815 assert (!memchr (str
, '\0', length
));
816 correct_hash
= strcache2_case_sensitive_hash (str
, length
);
817 MY_ASSERT_MSG (hash
== correct_hash
, ("%#x != %#x\n", hash
, correct_hash
));
820 MAKE_STATS (cache
->lookup_count
++);
822 /* Lookup the entry in the hash table, hoping for an
823 early match. If not found, enter the string at IDX. */
824 idx
= STRCACHE2_MOD_IT (cache
, hash
);
825 entry
= cache
->hash_tab
[idx
];
827 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
828 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
829 return (const char *)(entry
+ 1);
830 MAKE_STATS (cache
->collision_1st_count
++);
834 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
835 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
836 return (const char *)(entry
+ 1);
837 MAKE_STATS (cache
->collision_2nd_count
++);
844 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
845 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
846 return (const char *)(entry
+ 1);
847 MAKE_STATS (cache
->collision_3rd_count
++);
852 /* The public lookup (case sensitive) string interface. */
854 strcache2_lookup (struct strcache2
*cache
, const char *str
, unsigned int length
)
856 struct strcache2_entry
const *entry
;
857 unsigned int hash
= strcache2_case_sensitive_hash (str
, length
);
860 assert (!cache
->case_insensitive
);
861 assert (!memchr (str
, '\0', length
));
863 MAKE_STATS (cache
->lookup_count
++);
865 /* Lookup the entry in the hash table, hoping for an
867 idx
= STRCACHE2_MOD_IT (cache
, hash
);
868 entry
= cache
->hash_tab
[idx
];
871 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
872 return (const char *)(entry
+ 1);
873 MAKE_STATS (cache
->collision_1st_count
++);
878 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
879 return (const char *)(entry
+ 1);
880 MAKE_STATS (cache
->collision_2nd_count
++);
888 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
889 return (const char *)(entry
+ 1);
890 MAKE_STATS (cache
->collision_3rd_count
++);
895 #if defined(HAVE_CASE_INSENSITIVE_FS)
897 /* The public add string interface for case insensitive strings. */
899 strcache2_iadd (struct strcache2
*cache
, const char *str
, unsigned int length
)
901 struct strcache2_entry
const *entry
;
902 unsigned int hash
= strcache2_case_insensitive_hash (str
, length
);
905 assert (cache
->case_insensitive
);
906 assert (!memchr (str
, '\0', length
));
908 MAKE_STATS (cache
->lookup_count
++);
910 /* Lookup the entry in the hash table, hoping for an
911 early match. If not found, enter the string at IDX. */
912 idx
= STRCACHE2_MOD_IT (cache
, hash
);
913 entry
= cache
->hash_tab
[idx
];
915 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
916 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
917 return (const char *)(entry
+ 1);
918 MAKE_STATS (cache
->collision_1st_count
++);
922 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
923 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
924 return (const char *)(entry
+ 1);
925 MAKE_STATS (cache
->collision_2nd_count
++);
932 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
933 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
934 return (const char *)(entry
+ 1);
935 MAKE_STATS (cache
->collision_3rd_count
++);
940 /* The public add string interface for prehashed case insensitive strings.
941 Use strcache2_hash_istr to calculate the hash of a string. */
943 strcache2_iadd_hashed (struct strcache2
*cache
, const char *str
,
944 unsigned int length
, unsigned int hash
)
946 struct strcache2_entry
const *entry
;
949 unsigned correct_hash
;
951 assert (cache
->case_insensitive
);
952 assert (!memchr (str
, '\0', length
));
953 correct_hash
= strcache2_case_insensitive_hash (str
, length
);
954 MY_ASSERT_MSG (hash
== correct_hash
, ("%#x != %#x\n", hash
, correct_hash
));
957 MAKE_STATS (cache
->lookup_count
++);
959 /* Lookup the entry in the hash table, hoping for an
960 early match. If not found, enter the string at IDX. */
961 idx
= STRCACHE2_MOD_IT (cache
, hash
);
962 entry
= cache
->hash_tab
[idx
];
964 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
965 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
966 return (const char *)(entry
+ 1);
967 MAKE_STATS (cache
->collision_1st_count
++);
971 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
972 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
973 return (const char *)(entry
+ 1);
974 MAKE_STATS (cache
->collision_2nd_count
++);
981 return strcache2_enter_string (cache
, idx
, str
, length
, hash
);
982 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
983 return (const char *)(entry
+ 1);
984 MAKE_STATS (cache
->collision_3rd_count
++);
989 /* The public lookup (case insensitive) string interface. */
991 strcache2_ilookup (struct strcache2
*cache
, const char *str
, unsigned int length
)
993 struct strcache2_entry
const *entry
;
994 unsigned int hash
= strcache2_case_insensitive_hash (str
, length
);
997 assert (cache
->case_insensitive
);
998 assert (!memchr (str
, '\0', length
));
1000 MAKE_STATS (cache
->lookup_count
++);
1002 /* Lookup the entry in the hash table, hoping for an
1004 idx
= STRCACHE2_MOD_IT (cache
, hash
);
1005 entry
= cache
->hash_tab
[idx
];
1008 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
1009 return (const char *)(entry
+ 1);
1010 MAKE_STATS (cache
->collision_1st_count
++);
1012 entry
= entry
->next
;
1015 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
1016 return (const char *)(entry
+ 1);
1017 MAKE_STATS (cache
->collision_2nd_count
++);
1019 /* Loop the rest. */
1022 entry
= entry
->next
;
1025 if (strcache2_is_equal (cache
, entry
, str
, length
, hash
))
1026 return (const char *)(entry
+ 1);
1027 MAKE_STATS (cache
->collision_3rd_count
++);
1032 #endif /* HAVE_CASE_INSENSITIVE_FS */
1034 /* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1036 strcache2_is_cached (struct strcache2
*cache
, const char *str
)
1038 /* Check mandatory alignment first. */
1039 if (!((size_t)str
& (sizeof (void *) - 1)))
1041 /* Check the segment list and consider the question answered if the
1042 string is within one of them. (Could check it more thoroughly...) */
1043 struct strcache2_seg
const *seg
;
1044 for (seg
= cache
->seg_head
; seg
; seg
= seg
->next
)
1045 if ((size_t)(str
- seg
->start
) < seg
->size
)
1053 /* Verify the integrity of the specified string, returning 0 if OK. */
1055 strcache2_verify_entry (struct strcache2
*cache
, const char *str
)
1057 struct strcache2_entry
const *entry
;
1059 unsigned int length
;
1062 entry
= (struct strcache2_entry
const *)str
- 1;
1063 if ((size_t)entry
& (STRCACHE2_ENTRY_ALIGNMENT
- 1))
1066 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1067 cache
->name
, (void *)entry
, (void *)str
, str
);
1071 end
= memchr (str
, '\0', entry
->length
+ 1);
1073 if (length
!= entry
->length
)
1076 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1077 cache
->name
, (void *)entry
, length
, entry
->length
, str
);
1081 hash
= cache
->case_insensitive
1082 ? strcache2_case_insensitive_hash (str
, entry
->length
)
1083 : strcache2_case_sensitive_hash (str
, entry
->length
);
1084 if (hash
!= entry
->hash
)
1087 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1088 cache
->name
, (void *)entry
, hash
, entry
->hash
, str
);
1096 /* Calculates the case sensitive hash values of the string.
1097 The first hash is returned, the other is put at HASH2P. */
1098 unsigned int strcache2_hash_str (const char *str
, unsigned int length
, unsigned int *hash2p
)
1101 return strcache2_case_sensitive_hash (str
, length
);
1104 /* Calculates the case insensitive hash values of the string.
1105 The first hash is returned, the other is put at HASH2P. */
1106 unsigned int strcache2_hash_istr (const char *str
, unsigned int length
, unsigned int *hash2p
)
1109 return strcache2_case_insensitive_hash (str
, length
);
1114 /* Initalizes a new cache. */
1116 strcache2_init (struct strcache2
*cache
, const char *name
, unsigned int size
,
1117 unsigned int def_seg_size
, int case_insensitive
, int thread_safe
)
1119 unsigned hash_shift
;
1120 assert (!thread_safe
);
1122 /* calc the size as a power of two */
1124 hash_shift
= STRCACHE2_HASH_SHIFT
;
1127 assert (size
<= (~0U / 2 + 1));
1128 for (hash_shift
= 8; (1U << hash_shift
) < size
; hash_shift
++)
1132 /* adjust the default segment size */
1134 def_seg_size
= STRCACHE2_SEG_SIZE
;
1135 else if (def_seg_size
< sizeof (struct strcache2_seg
) * 10)
1136 def_seg_size
= sizeof (struct strcache2_seg
) * 10;
1137 else if ((def_seg_size
& 0xfff) < 0xf00)
1138 def_seg_size
= ((def_seg_size
+ 0xfff) & ~0xfffU
);
1141 /* init the structure. */
1142 cache
->case_insensitive
= case_insensitive
;
1143 #ifdef STRCACHE2_USE_MASK
1144 cache
->hash_mask
= (1U << hash_shift
) - 1U;
1146 cache
->hash_div
= strcache2_find_prime(hash_shift
);
1149 cache
->collision_count
= 0;
1150 cache
->lookup_count
= 0;
1151 cache
->collision_1st_count
= 0;
1152 cache
->collision_2nd_count
= 0;
1153 cache
->collision_3rd_count
= 0;
1154 cache
->rehash_count
= (1U << hash_shift
) / 4 * 3; /* rehash at 75% */
1155 cache
->init_size
= 1U << hash_shift
;
1156 cache
->hash_size
= 1U << hash_shift
;
1157 cache
->def_seg_size
= def_seg_size
;
1161 /* allocate the hash table and first segment. */
1162 cache
->hash_tab
= (struct strcache2_entry
**)
1163 xmalloc (cache
->init_size
* sizeof (struct strcache2_entry
*));
1164 memset (cache
->hash_tab
, '\0', cache
->init_size
* sizeof (struct strcache2_entry
*));
1165 strcache2_new_seg (cache
, 0);
1168 cache
->next
= strcache_head
;
1169 strcache_head
= cache
;
1173 /* Terminates a string cache, freeing all memory and other
1174 associated resources. */
1176 strcache2_term (struct strcache2
*cache
)
1179 if (strcache_head
== cache
)
1180 strcache_head
= cache
->next
;
1183 struct strcache2
*prev
= strcache_head
;
1184 while (prev
->next
!= cache
)
1187 prev
->next
= cache
->next
;
1190 /* free the memory segments */
1193 void *free_it
= cache
->seg_head
;
1194 cache
->seg_head
= cache
->seg_head
->next
;
1197 while (cache
->seg_head
);
1199 /* free the hash and clear the structure. */
1200 free (cache
->hash_tab
);
1201 memset (cache
, '\0', sizeof (struct strcache2
));
1204 /* Print statistics a string cache. */
1206 strcache2_print_stats (struct strcache2
*cache
, const char *prefix
)
1208 unsigned int seg_count
= 0;
1209 unsigned long seg_total_bytes
= 0;
1210 unsigned long seg_avg_bytes
;
1211 unsigned long seg_avail_bytes
= 0;
1212 unsigned long seg_max_bytes
= 0;
1213 struct strcache2_seg
*seg
;
1214 unsigned int str_count
= 0;
1215 unsigned long str_total_len
= 0;
1216 unsigned int str_avg_len
;
1217 unsigned int str_min_len
= ~0U;
1218 unsigned int str_max_len
= 0;
1220 unsigned int rehashes
;
1221 unsigned int chain_depths
[32];
1223 printf (_("\n%s strcache2: %s\n"), prefix
, cache
->name
);
1225 /* Segment statistics. */
1226 for (seg
= cache
->seg_head
; seg
; seg
= seg
->next
)
1229 seg_total_bytes
+= seg
->size
;
1230 seg_avail_bytes
+= seg
->avail
;
1231 if (seg
->size
> seg_max_bytes
)
1232 seg_max_bytes
= seg
->size
;
1234 seg_avg_bytes
= seg_total_bytes
/ seg_count
;
1235 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1236 prefix
, seg_count
, seg_total_bytes
, seg_max_bytes
, seg_avg_bytes
,
1237 cache
->def_seg_size
, seg_avail_bytes
);
1239 /* String statistics. */
1240 memset (chain_depths
, '\0', sizeof (chain_depths
));
1241 idx
= cache
->hash_size
;
1244 struct strcache2_entry
const *entry
= cache
->hash_tab
[idx
];
1245 unsigned int depth
= 0;
1246 for (; entry
!= 0; entry
= entry
->next
, depth
++)
1248 unsigned int length
= entry
->length
;
1249 str_total_len
+= length
;
1250 if (length
> str_max_len
)
1251 str_max_len
= length
;
1252 if (length
< str_min_len
)
1253 str_min_len
= length
;
1256 chain_depths
[depth
>= 32 ? 31 : depth
]++;
1258 str_avg_len
= cache
->count
? str_total_len
/ cache
->count
: 0;
1259 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1260 prefix
, cache
->count
, str_total_len
, str_max_len
, str_avg_len
, str_min_len
);
1261 if (str_count
!= cache
->count
)
1262 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix
,
1263 cache
->count
, str_count
);
1265 /* Hash statistics. */
1266 idx
= cache
->init_size
;
1268 while (idx
< cache
->hash_size
)
1274 #ifdef STRCACHE2_USE_MASK
1275 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1276 prefix
, cache
->hash_size
, cache
->hash_mask
, rehashes
);
1278 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1279 prefix
, cache
->hash_size
, cache
->hash_div
, rehashes
);
1281 if (cache
->lookup_count
)
1282 printf (_("%s lookups = %lu\n"
1283 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1284 prefix
, cache
->lookup_count
,
1286 cache
->collision_1st_count
, (unsigned int)((100.0 * cache
->collision_1st_count
) / cache
->lookup_count
),
1287 cache
->collision_2nd_count
, (unsigned int)((100.0 * cache
->collision_2nd_count
) / cache
->lookup_count
),
1288 cache
->collision_3rd_count
, (unsigned int)((100.0 * cache
->collision_3rd_count
) / cache
->lookup_count
));
1289 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1290 prefix
, cache
->collision_count
,(unsigned int)((100.0 * cache
->collision_count
) / cache
->count
));
1291 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1292 prefix
, chain_depths
[0], (unsigned int)((100.0 * chain_depths
[0]) / cache
->hash_size
));
1293 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1294 prefix
, chain_depths
[1], (unsigned int)((100.0 * chain_depths
[1]) / cache
->hash_size
));
1295 for (idx
= 2; idx
< 32; idx
++)
1297 unsigned strs_at_this_depth
= chain_depths
[idx
];
1299 for (i
= idx
+ 1; i
< 32; i
++)
1300 strs_at_this_depth
+= chain_depths
[i
];
1301 if (strs_at_this_depth
)
1302 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1303 prefix
, chain_depths
[idx
], (unsigned int)((100.0 * chain_depths
[idx
]) / (cache
->count
- cache
->collision_count
)),
1304 idx
- 1, idx
== 2 ? " " : "s",
1305 strs_at_this_depth
, (unsigned int)((100.0 * strs_at_this_depth
) / cache
->count
), idx
- 1);
1309 /* Print statistics for all string caches. */
1311 strcache2_print_stats_all (const char *prefix
)
1313 struct strcache2
*cur
;
1314 for (cur
= strcache_head
; cur
; cur
= cur
->next
)
1315 strcache2_print_stats (cur
, prefix
);