1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_general.h"
19 #include "mod_cache.h"
20 #include "cache_hash.h"
31 * The internal form of a hash table.
33 * The table is an array indexed by the hash of the key; collisions
34 * are resolved by hanging a linked list of hash entries off each
35 * element of the array. Although this is a really simple design it
36 * isn't too bad given that pools have a low allocation overhead.
39 typedef struct cache_hash_entry_t cache_hash_entry_t
;
41 struct cache_hash_entry_t
{
42 cache_hash_entry_t
*next
;
50 * Data structure for iterating through a hash table.
52 * We keep a pointer to the next hash entry here to allow the current
53 * hash entry to be freed or otherwise mangled between calls to
56 struct cache_hash_index_t
{
58 cache_hash_entry_t
*this, *next
;
63 * The size of the array is always a power of two. We use the maximum
64 * index rather than the size so that we can use bitwise-AND for
66 * The count of hash entries may be greater depending on the chosen
70 cache_hash_entry_t
**array
;
71 cache_hash_index_t iterator
; /* For cache_hash_first(NULL, ...) */
76 * Hash creation functions.
78 static cache_hash_entry_t
**alloc_array(cache_hash_t
*ht
, int max
)
80 return calloc(1, sizeof(*ht
->array
) * (max
+ 1));
83 cache_hash_t
* cache_hash_make(apr_size_t size
)
86 ht
= malloc(sizeof(cache_hash_t
));
92 ht
->array
= alloc_array(ht
, ht
->max
);
100 void cache_hash_free(cache_hash_t
*ht
)
110 * Hash iteration functions.
113 cache_hash_index_t
* cache_hash_next(cache_hash_index_t
*hi
)
117 if (hi
->index
> hi
->ht
->max
)
119 hi
->this = hi
->ht
->array
[hi
->index
++];
121 hi
->next
= hi
->this->next
;
125 cache_hash_index_t
* cache_hash_first(cache_hash_t
*ht
)
127 cache_hash_index_t
*hi
;
134 return cache_hash_next(hi
);
137 void cache_hash_this(cache_hash_index_t
*hi
,
142 if (key
) *key
= hi
->this->key
;
143 if (klen
) *klen
= hi
->this->klen
;
144 if (val
) *val
= (void *)hi
->this->val
;
149 * This is where we keep the details of the hash function and control
150 * the maximum collision rate.
152 * If val is non-NULL it creates and initializes a new hash entry if
153 * there isn't already one there; it returns an updatable pointer so
154 * that hash entries can be removed.
157 static cache_hash_entry_t
**find_entry(cache_hash_t
*ht
,
162 cache_hash_entry_t
**hep
, *he
;
163 const unsigned char *p
;
168 * This is the popular `times 33' hash algorithm which is used by
169 * perl and also appears in Berkeley DB. This is one of the best
170 * known hash functions for strings because it is both computed
171 * very fast and distributes very well.
173 * The originator may be Dan Bernstein but the code in Berkeley DB
174 * cites Chris Torek as the source. The best citation I have found
175 * is "Chris Torek, Hash function for text in C, Usenet message
176 * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
177 * Salz's USENIX 1992 paper about INN which can be found at
178 * <http://citeseer.nj.nec.com/salz92internetnews.html>.
180 * The magic of number 33, i.e. why it works better than many other
181 * constants, prime or not, has never been adequately explained by
182 * anyone. So I try an explanation: if one experimentally tests all
183 * multipliers between 1 and 256 (as I did while writing a low-level
184 * data structure library some time ago) one detects that even
185 * numbers are not useable at all. The remaining 128 odd numbers
186 * (except for the number 1) work more or less all equally well.
187 * They all distribute in an acceptable way and this way fill a hash
188 * table with an average percent of approx. 86%.
190 * If one compares the chi^2 values of the variants (see
191 * Bob Jenkins ``Hashing Frequently Asked Questions'' at
192 * http://burtleburtle.net/bob/hash/hashfaq.html for a description
193 * of chi^2), the number 33 not even has the best value. But the
194 * number 33 and a few other equally good numbers like 17, 31, 63,
195 * 127 and 129 have nevertheless a great advantage to the remaining
196 * numbers in the large set of possible multipliers: their multiply
197 * operation can be replaced by a faster operation based on just one
198 * shift plus either a single addition or subtraction operation. And
199 * because a hash function has to both distribute good _and_ has to
200 * be very fast to compute, those few numbers should be preferred.
202 * -- Ralf S. Engelschall <rse@engelschall.com>
205 if (klen
== CACHE_HASH_KEY_STRING
) {
206 for (p
= key
; *p
; p
++) {
207 hash
= hash
* 33 + *p
;
209 klen
= p
- (const unsigned char *)key
;
212 for (p
= key
, i
= klen
; i
; i
--, p
++) {
213 hash
= hash
* 33 + *p
;
217 /* scan linked list */
218 for (hep
= &ht
->array
[hash
% ht
->max
], he
= *hep
;
220 hep
= &he
->next
, he
= *hep
) {
221 if (he
->hash
== hash
&&
223 memcmp(he
->key
, key
, klen
) == 0)
228 /* add a new entry for non-NULL values */
229 he
= malloc(sizeof(*he
));
243 void* cache_hash_get(cache_hash_t
*ht
,
247 cache_hash_entry_t
*he
;
248 he
= *find_entry(ht
, key
, klen
, NULL
);
250 return (void *)he
->val
;
255 void* cache_hash_set(cache_hash_t
*ht
,
260 cache_hash_entry_t
**hep
, *tmp
;
262 hep
= find_entry(ht
, key
, klen
, val
);
263 /* If hep == NULL, then the malloc() in find_entry failed */
278 /* Return the object just removed from the cache to let the
279 * caller clean it up. Cast the constness away upon return.
281 return (void *) tval
;
283 /* else key not present and val==NULL */
287 int cache_hash_count(cache_hash_t
*ht
)