lib/Support/StringMap.cpp

   1 //===--- StringMap.cpp - String Hash table map implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the StringMap class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/StringMap.h"
  15 #include <cassert>
  16 using namespace llvm;
  17
  18 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  19   ItemSize = itemSize;
  20
  21   // If a size is specified, initialize the table with that many buckets.
  22   if (InitSize) {
  23     init(InitSize);
  24     return;
  25   }
  26
  27   // Otherwise, initialize it with zero buckets to avoid the allocation.
  28   TheTable = 0;
  29   NumBuckets = 0;
  30   NumItems = 0;
  31   NumTombstones = 0;
  32 }
  33
  34 void StringMapImpl::init(unsigned InitSize) {
  35   assert((InitSize & (InitSize-1)) == 0 &&
  36          "Init Size must be a power of 2 or zero!");
  37   NumBuckets = InitSize ? InitSize : 16;
  38   NumItems = 0;
  39   NumTombstones = 0;
  40
  41   TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
  42
  43   // Allocate one extra bucket, set it to look filled so the iterators stop at
  44   // end.
  45   TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
  46 }
  47
  48
  49 /// HashString - Compute a hash code for the specified string.
  50 ///
  51 static unsigned HashString(const char *Start, const char *End) {
  52   // Bernstein hash function.
  53   unsigned int Result = 0;
  54   // TODO: investigate whether a modified bernstein hash function performs
  55   // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  56   //   X*33+c -> X*33^c
  57   while (Start != End)
  58     Result = Result * 33 + *Start++;
  59   Result = Result + (Result >> 5);
  60   return Result;
  61 }
  62
  63 /// LookupBucketFor - Look up the bucket that the specified string should end
  64 /// up in.  If it already exists as a key in the map, the Item pointer for the
  65 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
  66 /// case, the FullHashValue field of the bucket will be set to the hash value
  67 /// of the string.
  68 unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
  69                                         const char *NameEnd) {
  70   unsigned HTSize = NumBuckets;
  71   if (HTSize == 0) {  // Hash table unallocated so far?
  72     init(16);
  73     HTSize = NumBuckets;
  74   }
  75   unsigned FullHashValue = HashString(NameStart, NameEnd);
  76   unsigned BucketNo = FullHashValue & (HTSize-1);
  77
  78   unsigned ProbeAmt = 1;
  79   int FirstTombstone = -1;
  80   while (1) {
  81     ItemBucket &Bucket = TheTable[BucketNo];
  82     StringMapEntryBase *BucketItem = Bucket.Item;
  83     // If we found an empty bucket, this key isn't in the table yet, return it.
  84     if (BucketItem == 0) {
  85       // If we found a tombstone, we want to reuse the tombstone instead of an
  86       // empty bucket.  This reduces probing.
  87       if (FirstTombstone != -1) {
  88         TheTable[FirstTombstone].FullHashValue = FullHashValue;
  89         return FirstTombstone;
  90       }
  91
  92       Bucket.FullHashValue = FullHashValue;
  93       return BucketNo;
  94     }
  95
  96     if (BucketItem == getTombstoneVal()) {
  97       // Skip over tombstones.  However, remember the first one we see.
  98       if (FirstTombstone == -1) FirstTombstone = BucketNo;
  99     } else if (Bucket.FullHashValue == FullHashValue) {
 100       // If the full hash value matches, check deeply for a match.  The common
 101       // case here is that we are only looking at the buckets (for item info
 102       // being non-null and for the full hash value) not at the items.  This
 103       // is important for cache locality.
 104
 105       // Do the comparison like this because NameStart isn't necessarily
 106       // null-terminated!
 107       char *ItemStr = (char*)BucketItem+ItemSize;
 108       unsigned ItemStrLen = BucketItem->getKeyLength();
 109       if (unsigned(NameEnd-NameStart) == ItemStrLen &&
 110           memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
 111         // We found a match!
 112         return BucketNo;
 113       }
 114     }
 115
 116     // Okay, we didn't find the item.  Probe to the next bucket.
 117     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 118
 119     // Use quadratic probing, it has fewer clumping artifacts than linear
 120     // probing and has good cache behavior in the common case.
 121     ++ProbeAmt;
 122   }
 123 }
 124
 125
 126 /// FindKey - Look up the bucket that contains the specified key. If it exists
 127 /// in the map, return the bucket number of the key.  Otherwise return -1.
 128 /// This does not modify the map.
 129 int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
 130   unsigned HTSize = NumBuckets;
 131   if (HTSize == 0) return -1;  // Really empty table?
 132   unsigned FullHashValue = HashString(KeyStart, KeyEnd);
 133   unsigned BucketNo = FullHashValue & (HTSize-1);
 134
 135   unsigned ProbeAmt = 1;
 136   while (1) {
 137     ItemBucket &Bucket = TheTable[BucketNo];
 138     StringMapEntryBase *BucketItem = Bucket.Item;
 139     // If we found an empty bucket, this key isn't in the table yet, return.
 140     if (BucketItem == 0)
 141       return -1;
 142
 143     if (BucketItem == getTombstoneVal()) {
 144       // Ignore tombstones.
 145     } else if (Bucket.FullHashValue == FullHashValue) {
 146       // If the full hash value matches, check deeply for a match.  The common
 147       // case here is that we are only looking at the buckets (for item info
 148       // being non-null and for the full hash value) not at the items.  This
 149       // is important for cache locality.
 150
 151       // Do the comparison like this because NameStart isn't necessarily
 152       // null-terminated!
 153       char *ItemStr = (char*)BucketItem+ItemSize;
 154       unsigned ItemStrLen = BucketItem->getKeyLength();
 155       if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
 156           memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
 157         // We found a match!
 158         return BucketNo;
 159       }
 160     }
 161
 162     // Okay, we didn't find the item.  Probe to the next bucket.
 163     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 164
 165     // Use quadratic probing, it has fewer clumping artifacts than linear
 166     // probing and has good cache behavior in the common case.
 167     ++ProbeAmt;
 168   }
 169 }
 170
 171 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
 172 /// delete it.  This aborts if the value isn't in the table.
 173 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 174   const char *VStr = (char*)V + ItemSize;
 175   StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
 176   V2 = V2;
 177   assert(V == V2 && "Didn't find key?");
 178 }
 179
 180 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 181 /// table, returning it.  If the key is not in the table, this returns null.
 182 StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
 183                                              const char *KeyEnd) {
 184   int Bucket = FindKey(KeyStart, KeyEnd);
 185   if (Bucket == -1) return 0;
 186
 187   StringMapEntryBase *Result = TheTable[Bucket].Item;
 188   TheTable[Bucket].Item = getTombstoneVal();
 189   --NumItems;
 190   ++NumTombstones;
 191   return Result;
 192 }
 193
 194
 195
 196 /// RehashTable - Grow the table, redistributing values into the buckets with
 197 /// the appropriate mod-of-hashtable-size.
 198 void StringMapImpl::RehashTable() {
 199   unsigned NewSize = NumBuckets*2;
 200   // Allocate one extra bucket which will always be non-empty.  This allows the
 201   // iterators to stop at end.
 202   ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
 203   NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
 204
 205   // Rehash all the items into their new buckets.  Luckily :) we already have
 206   // the hash values available, so we don't have to rehash any strings.
 207   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
 208     if (IB->Item && IB->Item != getTombstoneVal()) {
 209       // Fast case, bucket available.
 210       unsigned FullHash = IB->FullHashValue;
 211       unsigned NewBucket = FullHash & (NewSize-1);
 212       if (NewTableArray[NewBucket].Item == 0) {
 213         NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
 214         NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
 215         continue;
 216       }
 217
 218       // Otherwise probe for a spot.
 219       unsigned ProbeSize = 1;
 220       do {
 221         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
 222       } while (NewTableArray[NewBucket].Item);
 223
 224       // Finally found a slot.  Fill it in.
 225       NewTableArray[NewBucket].Item = IB->Item;
 226       NewTableArray[NewBucket].FullHashValue = FullHash;
 227     }
 228   }
 229
 230   free(TheTable);
 231
 232   TheTable = NewTableArray;
 233   NumBuckets = NewSize;
 234 }