src/util/hash.c

   1 /** Hashing infrastructure
   2  * @file */
   3
   4 #ifdef HAVE_CONFIG_H
   5 #include "config.h"
   6 #endif
   7
   8 #include <string.h>
   9
  10 #include "elinks.h"
  11
  12 #include "util/hash.h"
  13 #include "util/memory.h"
  14
  15
  16 /* String hashes functions chooser. */
  17 /* #define MIX_HASH */ /* Far more better but slower */
  18 #define X31_HASH /* Weaker but faster */
  19
  20
  21 /* We provide common infrastructure for hashing - each hash consists from one
  22  * particularly large array full of small lists of keys with same index in the
  23  * array (same hash value). */
  24
  25 #define hash_mask(n) (hash_size(n) - 1)
  26 #define hash_size(n) (1 << (n))
  27
  28 static hash_value_T strhash(unsigned char *k, unsigned int length, hash_value_T initval);
  29
  30 static inline struct hash *
  31 init_hash(unsigned int width, hash_func_T func)
  32 {
  33         struct hash *hash;
  34         unsigned int i = 0;
  35
  36         assert(width > 0 && func);
  37         if_assert_failed return NULL;
  38
  39         /* One is already reserved in struct hash, so use size - 1. */
  40         hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
  41                          * sizeof(struct list_head));
  42         if (!hash) return NULL;
  43
  44         hash->width = width;
  45         hash->func = func;
  46
  47         /* Initialize dummy list_heads */
  48         for (; i < hash_size(width); i++)
  49                 init_list(hash->hash[i]);
  50
  51         return hash;
  52 }
  53
  54 /** @relates hash */
  55 struct hash *
  56 init_hash8(void)
  57 {
  58         return init_hash(8, &strhash);
  59 }
  60
  61 /** @relates hash */
  62 void
  63 free_hash(struct hash **hashp)
  64 {
  65         unsigned int i = 0;
  66
  67         assert(hashp && *hashp);
  68         if_assert_failed return;
  69
  70         for (; i < hash_size((*hashp)->width); i++)
  71                 free_list((*hashp)->hash[i]);
  72
  73         mem_free_set(hashp, NULL);
  74 }
  75
  76
  77 /** Initialization vector for the hash function.
  78  * I've no much idea about what to set here.. I think it doesn't matter much
  79  * anyway.. ;) --pasky */
  80 #define HASH_MAGIC 0xdeadbeef
  81
  82 /** @returns hash_item if ok, NULL if error.
  83  * @relates hash */
  84 struct hash_item *
  85 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
  86               void *value)
  87 {
  88         hash_value_T hashval;
  89         struct hash_item *item = mem_alloc(sizeof(*item));
  90
  91         if (!item) return NULL;
  92
  93         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
  94
  95         item->key = key;
  96         item->keylen = keylen;
  97         item->value = value;
  98
  99         add_to_list(hash->hash[hashval], item);
 100
 101         return item;
 102 }
 103
 104 /** @relates hash */
 105 struct hash_item *
 106 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
 107 {
 108         struct list_head *list;
 109         struct hash_item *item;
 110         hash_value_T hashval;
 111
 112         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
 113         list    = &hash->hash[hashval];
 114
 115         foreach (item, *list) {
 116                 if (keylen != item->keylen) continue;
 117                 if (memcmp(key, item->key, keylen)) continue;
 118
 119                 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
 120                  * the list by popularity of its items. Inspired from Links,
 121                  * probably PerM. --pasky */
 122                 move_to_top_of_list(*list, item);
 123
 124                 return item;
 125         }
 126
 127         return NULL;
 128 }
 129
 130 #undef HASH_MAGIC
 131
 132 /** Delete @a item from @a hash.
 133  * If key and/or value were dynamically allocated, think about freeing them.
 134  * This function doesn't do that.
 135  * @relates hash */
 136 void
 137 del_hash_item(struct hash *hash, struct hash_item *item)
 138 {
 139         assert(item);
 140         if_assert_failed return;
 141
 142         del_from_list(item);
 143         mem_free(item);
 144 }
 145
 146
 147 #ifdef X31_HASH
 148
 149 /** Fast string hashing.
 150  * @param k             the key
 151  * @param length        the length of the key
 152  * @param initval       the previous hash, or an arbitrary value */
 153 static hash_value_T
 154 strhash(unsigned char *k,
 155         unsigned int length,
 156         hash_value_T initval)
 157 {
 158         const unsigned char *p = (const unsigned char *) k;
 159         hash_value_T h = initval;
 160         unsigned int i = 0;
 161
 162         assert(k && length > 0);
 163         if_assert_failed return h;
 164
 165         /* This loop was unrolled, should improve performance on most cpus,
 166          * After some tests, 8 seems a good value for most keys. --Zas */
 167         for (;;) {
 168                 h = (h << 5) - h + p[i];
 169                 if (++i == length) break;
 170                 h = (h << 5) - h + p[i];
 171                 if (++i == length) break;
 172                 h = (h << 5) - h + p[i];
 173                 if (++i == length) break;
 174                 h = (h << 5) - h + p[i];
 175                 if (++i == length) break;
 176                 h = (h << 5) - h + p[i];
 177                 if (++i == length) break;
 178                 h = (h << 5) - h + p[i];
 179                 if (++i == length) break;
 180                 h = (h << 5) - h + p[i];
 181                 if (++i == length) break;
 182                 h = (h << 5) - h + p[i];
 183                 if (++i == length) break;
 184         };
 185
 186         return h;
 187 }
 188
 189 #else /* X31_HASH */
 190
 191 /* String hashing function follows; it is not written by me, somewhere below
 192  * are credits. I only hacked it a bit. --pasky */
 193
 194 /* This is a big CPU hog, in fact:
 195  *
 196  *   %   cumulative   self              self     total-----------
 197  *  time   seconds   seconds    calls  us/call  us/call  name----
 198  *   6.00      0.35     0.06    10126     5.93     5.93  strhash
 199  *
 200  * It should be investigated whether we couldn't push this down a little. */
 201
 202 /*
 203  *  --------------------------------------------------------------------
 204  *   mix -- mix 3 32-bit values reversibly.
 205  *   For every delta with one or two bits set, and the deltas of all three
 206  *     high bits or all three low bits, whether the original value of a,b,c
 207  *     is almost all zero or is uniformly distributed,
 208  *   * If mix() is run forward or backward, at least 32 bits in a,b,c
 209  *     have at least 1/4 probability of changing.
 210  *   * If mix() is run forward, every bit of c will change between 1/3 and
 211  *     2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
 212  *     mix() was built out of 36 single-cycle latency instructions in a
 213  *     structure that could supported 2x parallelism, like so:
 214  *       a -= b;
 215  *       a -= c; x = (c>>13);
 216  *       b -= c; a ^= x;
 217  *       b -= a; x = (a<<8);
 218  *       c -= a; b ^= x;
 219  *       c -= b; x = (b>>13);
 220  *       ...
 221  *     Unfortunately, superscalar Pentiums and Sparcs can't take advantage
 222  *     of that parallelism.  They've also turned some of those single-cycle
 223  *     latency instructions into multi-cycle latency instructions.  Still,
 224  *     this is the fastest good hash I could find.  There were about 2^^68
 225  *     to choose from.  I only looked at a billion or so.
 226  *  --------------------------------------------------------------------
 227 */
 228
 229 #define mix(a, b, c) { \
 230         a -= b; a -= c; a ^= (c>>13); \
 231         b -= c; b -= a; b ^= (a<<8); \
 232         c -= a; c -= b; c ^= (b>>13); \
 233         a -= b; a -= c; a ^= (c>>12);  \
 234         b -= c; b -= a; b ^= (a<<16); \
 235         c -= a; c -= b; c ^= (b>>5); \
 236         a -= b; a -= c; a ^= (c>>3);  \
 237         b -= c; b -= a; b ^= (a<<10); \
 238         c -= a; c -= b; c ^= (b>>15); \
 239 }
 240
 241  /*
 242  --------------------------------------------------------------------
 243  hash() -- hash a variable-length key into a 32-bit value
 244    k       : the key (the unaligned variable-length array of bytes)
 245    len     : the length of the key, counting by bytes
 246    initval : can be any 4-byte value
 247  Returns a 32-bit value.  Every bit of the key affects every bit of
 248  the return value.  Every 1-bit and 2-bit delta achieves avalanche.
 249  About 6*len+35 instructions.
 250
 251  The best hash table sizes are powers of 2.  There is no need to do
 252  mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 253  use a bitmask.  For example, if you need only 10 bits, do
 254    h = (h & hashmask(10));
 255  In which case, the hash table should have hashsize(10) elements.
 256
 257  If you are hashing n strings (ub1 **) k, do it like this:
 258    for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 259
 260  By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
 261  code any way you wish, private, educational, or commercial.  It's free.
 262
 263  See http://burtleburtle.net/bob/hash/evahash.html
 264  Use for hash table lookup, or anything where one collision in 2^^32 is
 265  acceptable.  Do NOT use for cryptographic purposes.
 266  --------------------------------------------------------------------
 267  */
 268
 269 #define keycompute(a) ((k[(a)]) \
 270                         + ((hash_value_T) (k[(a)+1])<<8) \
 271                         + ((hash_value_T) (k[(a)+2])<<16) \
 272                         + ((hash_value_T) (k[(a)+3])<<24))
 273
 274 /** Hash an array of bytes.
 275  * @param k             the key
 276  * @param length        the length of the key
 277  * @param initval       the previous hash, or an arbitrary value */
 278 static hash_value_T
 279 strhash(unsigned char *k,
 280         unsigned int length,
 281         hash_value_T initval)
 282 {
 283         int len;
 284         hash_value_T a, b, c;
 285
 286         /* Set up the internal state */
 287         len = length;
 288         a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
 289         c = initval;         /* the previous hash value */
 290
 291         /*---------------------------------------- handle most of the key */
 292         while (len >= 12) {
 293                 a += keycompute(0);
 294                 b += keycompute(4);
 295                 c += keycompute(8);
 296                 mix(a, b, c);
 297                 k += 12;
 298                 len -= 12;
 299         }
 300
 301         /*------------------------------------- handle the last 11 bytes */
 302         c += length;
 303         switch (len) {  /* all the case statements fall through */
 304                 case 11: c += ((hash_value_T) (k[10])<<24);
 305                 case 10: c += ((hash_value_T) (k[9])<<16);
 306                 case 9 : c += ((hash_value_T) (k[8])<<8);
 307                         /* the first byte of c is reserved for the length */
 308                 case 8 : b += ((hash_value_T) (k[7])<<24);
 309                 case 7 : b += ((hash_value_T) (k[6])<<16);
 310                 case 6 : b += ((hash_value_T) (k[5])<<8);
 311                 case 5 : b += (k[4]);
 312                 case 4 : a += ((hash_value_T) (k[3])<<24);
 313                 case 3 : a += ((hash_value_T) (k[2])<<16);
 314                 case 2 : a += ((hash_value_T) (k[1])<<8);
 315                 case 1 : a += (k[0]);
 316                         /* case 0: nothing left to add */
 317         }
 318
 319         mix(a, b, c);
 320
 321         /*-------------------------------------------- report the result */
 322         return c;
 323 }
 324
 325 #undef keycompute
 326 #undef mix
 327 #undef hash_mask
 328
 329 #endif /* MIX_HASH */