removed some of the debug logging and added author details
[httpd-crcsyncproxy.git] / modules / cache / cache_hash.c
blob202cf9f70c40665bddbafcd2eb9ad459bd23c13a
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_general.h"
19 #include "mod_cache.h"
20 #include "cache_hash.h"
22 #if APR_HAVE_STDLIB_H
23 #include <stdlib.h>
24 #endif
25 #if APR_HAVE_STRING_H
26 #include <string.h>
27 #endif
31 * The internal form of a hash table.
33 * The table is an array indexed by the hash of the key; collisions
34 * are resolved by hanging a linked list of hash entries off each
35 * element of the array. Although this is a really simple design it
36 * isn't too bad given that pools have a low allocation overhead.
39 typedef struct cache_hash_entry_t cache_hash_entry_t;
41 struct cache_hash_entry_t {
42 cache_hash_entry_t *next;
43 unsigned int hash;
44 const void *key;
45 apr_ssize_t klen;
46 const void *val;
50 * Data structure for iterating through a hash table.
52 * We keep a pointer to the next hash entry here to allow the current
53 * hash entry to be freed or otherwise mangled between calls to
54 * cache_hash_next().
56 struct cache_hash_index_t {
57 cache_hash_t *ht;
58 cache_hash_entry_t *this, *next;
59 int index;
63 * The size of the array is always a power of two. We use the maximum
64 * index rather than the size so that we can use bitwise-AND for
65 * modular arithmetic.
66 * The count of hash entries may be greater depending on the chosen
67 * collision rate.
69 struct cache_hash_t {
70 cache_hash_entry_t **array;
71 cache_hash_index_t iterator; /* For cache_hash_first(NULL, ...) */
72 int count, max;
76 * Hash creation functions.
78 static cache_hash_entry_t **alloc_array(cache_hash_t *ht, int max)
80 return calloc(1, sizeof(*ht->array) * (max + 1));
83 cache_hash_t* cache_hash_make(apr_size_t size)
85 cache_hash_t *ht;
86 ht = malloc(sizeof(cache_hash_t));
87 if (!ht) {
88 return NULL;
90 ht->count = 0;
91 ht->max = size;
92 ht->array = alloc_array(ht, ht->max);
93 if (!ht->array) {
94 free(ht);
95 return NULL;
97 return ht;
100 void cache_hash_free(cache_hash_t *ht)
102 if (ht) {
103 if (ht->array) {
104 free (ht->array);
106 free (ht);
110 * Hash iteration functions.
113 cache_hash_index_t* cache_hash_next(cache_hash_index_t *hi)
115 hi->this = hi->next;
116 while (!hi->this) {
117 if (hi->index > hi->ht->max)
118 return NULL;
119 hi->this = hi->ht->array[hi->index++];
121 hi->next = hi->this->next;
122 return hi;
125 cache_hash_index_t* cache_hash_first(cache_hash_t *ht)
127 cache_hash_index_t *hi;
129 hi = &ht->iterator;
130 hi->ht = ht;
131 hi->index = 0;
132 hi->this = NULL;
133 hi->next = NULL;
134 return cache_hash_next(hi);
137 void cache_hash_this(cache_hash_index_t *hi,
138 const void **key,
139 apr_ssize_t *klen,
140 void **val)
142 if (key) *key = hi->this->key;
143 if (klen) *klen = hi->this->klen;
144 if (val) *val = (void *)hi->this->val;
149 * This is where we keep the details of the hash function and control
150 * the maximum collision rate.
152 * If val is non-NULL it creates and initializes a new hash entry if
153 * there isn't already one there; it returns an updatable pointer so
154 * that hash entries can be removed.
157 static cache_hash_entry_t **find_entry(cache_hash_t *ht,
158 const void *key,
159 apr_ssize_t klen,
160 const void *val)
162 cache_hash_entry_t **hep, *he;
163 const unsigned char *p;
164 unsigned int hash;
165 apr_ssize_t i;
168 * This is the popular `times 33' hash algorithm which is used by
169 * perl and also appears in Berkeley DB. This is one of the best
170 * known hash functions for strings because it is both computed
171 * very fast and distributes very well.
173 * The originator may be Dan Bernstein but the code in Berkeley DB
174 * cites Chris Torek as the source. The best citation I have found
175 * is "Chris Torek, Hash function for text in C, Usenet message
176 * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
177 * Salz's USENIX 1992 paper about INN which can be found at
178 * <http://citeseer.nj.nec.com/salz92internetnews.html>.
180 * The magic of number 33, i.e. why it works better than many other
181 * constants, prime or not, has never been adequately explained by
182 * anyone. So I try an explanation: if one experimentally tests all
183 * multipliers between 1 and 256 (as I did while writing a low-level
184 * data structure library some time ago) one detects that even
185 * numbers are not useable at all. The remaining 128 odd numbers
186 * (except for the number 1) work more or less all equally well.
187 * They all distribute in an acceptable way and this way fill a hash
188 * table with an average percent of approx. 86%.
190 * If one compares the chi^2 values of the variants (see
191 * Bob Jenkins ``Hashing Frequently Asked Questions'' at
192 * http://burtleburtle.net/bob/hash/hashfaq.html for a description
193 * of chi^2), the number 33 not even has the best value. But the
194 * number 33 and a few other equally good numbers like 17, 31, 63,
195 * 127 and 129 have nevertheless a great advantage to the remaining
196 * numbers in the large set of possible multipliers: their multiply
197 * operation can be replaced by a faster operation based on just one
198 * shift plus either a single addition or subtraction operation. And
199 * because a hash function has to both distribute good _and_ has to
200 * be very fast to compute, those few numbers should be preferred.
202 * -- Ralf S. Engelschall <rse@engelschall.com>
204 hash = 0;
205 if (klen == CACHE_HASH_KEY_STRING) {
206 for (p = key; *p; p++) {
207 hash = hash * 33 + *p;
209 klen = p - (const unsigned char *)key;
211 else {
212 for (p = key, i = klen; i; i--, p++) {
213 hash = hash * 33 + *p;
217 /* scan linked list */
218 for (hep = &ht->array[hash % ht->max], he = *hep;
220 hep = &he->next, he = *hep) {
221 if (he->hash == hash &&
222 he->klen == klen &&
223 memcmp(he->key, key, klen) == 0)
224 break;
226 if (he || !val)
227 return hep;
228 /* add a new entry for non-NULL values */
229 he = malloc(sizeof(*he));
230 if (!he) {
231 return NULL;
233 he->next = NULL;
234 he->hash = hash;
235 he->key = key;
236 he->klen = klen;
237 he->val = val;
238 *hep = he;
239 ht->count++;
240 return hep;
243 void* cache_hash_get(cache_hash_t *ht,
244 const void *key,
245 apr_ssize_t klen)
247 cache_hash_entry_t *he;
248 he = *find_entry(ht, key, klen, NULL);
249 if (he)
250 return (void *)he->val;
251 else
252 return NULL;
255 void* cache_hash_set(cache_hash_t *ht,
256 const void *key,
257 apr_ssize_t klen,
258 const void *val)
260 cache_hash_entry_t **hep, *tmp;
261 const void *tval;
262 hep = find_entry(ht, key, klen, val);
263 /* If hep == NULL, then the malloc() in find_entry failed */
264 if (hep && *hep) {
265 if (!val) {
266 /* delete entry */
267 tval = (*hep)->val;
268 tmp = *hep;
269 *hep = (*hep)->next;
270 free(tmp);
271 --ht->count;
273 else {
274 /* replace entry */
275 tval = (*hep)->val;
276 (*hep)->val = val;
278 /* Return the object just removed from the cache to let the
279 * caller clean it up. Cast the constness away upon return.
281 return (void *) tval;
283 /* else key not present and val==NULL */
284 return NULL;
287 int cache_hash_count(cache_hash_t *ht)
289 return ht->count;