include/llvm/DebugInfo/PDB/Native/HashTable.h

   1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  11
  12 #include "llvm/ADT/SparseBitVector.h"
  13 #include "llvm/ADT/iterator.h"
  14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
  15 #include "llvm/Support/BinaryStreamReader.h"
  16 #include "llvm/Support/BinaryStreamWriter.h"
  17 #include "llvm/Support/Endian.h"
  18 #include "llvm/Support/Error.h"
  19 #include <cstdint>
  20 #include <iterator>
  21 #include <utility>
  22 #include <vector>
  23
  24 namespace llvm {
  25
  26 class BinaryStreamReader;
  27 class BinaryStreamWriter;
  28
  29 namespace pdb {
  30
  31 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
  32 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
  33
  34 template <typename ValueT> class HashTable;
  35
  36 template <typename ValueT>
  37 class HashTableIterator
  38     : public iterator_facade_base<HashTableIterator<ValueT>,
  39                                   std::forward_iterator_tag,
  40                                   const std::pair<uint32_t, ValueT>> {
  41   friend HashTable<ValueT>;
  42
  43   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
  44                     bool IsEnd)
  45       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
  46
  47 public:
  48   HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
  49     int I = Map.Present.find_first();
  50     if (I == -1) {
  51       Index = 0;
  52       IsEnd = true;
  53     } else {
  54       Index = static_cast<uint32_t>(I);
  55       IsEnd = false;
  56     }
  57   }
  58
  59   HashTableIterator &operator=(const HashTableIterator &R) {
  60     Map = R.Map;
  61     return *this;
  62   }
  63   bool operator==(const HashTableIterator &R) const {
  64     if (IsEnd && R.IsEnd)
  65       return true;
  66     if (IsEnd != R.IsEnd)
  67       return false;
  68
  69     return (Map == R.Map) && (Index == R.Index);
  70   }
  71   const std::pair<uint32_t, ValueT> &operator*() const {
  72     assert(Map->Present.test(Index));
  73     return Map->Buckets[Index];
  74   }
  75
  76   // Implement postfix op++ in terms of prefix op++ by using the superclass
  77   // implementation.
  78   using iterator_facade_base<HashTableIterator<ValueT>,
  79                              std::forward_iterator_tag,
  80                              const std::pair<uint32_t, ValueT>>::operator++;
  81   HashTableIterator &operator++() {
  82     while (Index < Map->Buckets.size()) {
  83       ++Index;
  84       if (Map->Present.test(Index))
  85         return *this;
  86     }
  87
  88     IsEnd = true;
  89     return *this;
  90   }
  91
  92 private:
  93   bool isEnd() const { return IsEnd; }
  94   uint32_t index() const { return Index; }
  95
  96   const HashTable<ValueT> *Map;
  97   uint32_t Index;
  98   bool IsEnd;
  99 };
 100
 101 template <typename ValueT>
 102 class HashTable {
 103   struct Header {
 104     support::ulittle32_t Size;
 105     support::ulittle32_t Capacity;
 106   };
 107
 108   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 109
 110 public:
 111   using const_iterator = HashTableIterator<ValueT>;
 112   friend const_iterator;
 113
 114   HashTable() { Buckets.resize(8); }
 115   explicit HashTable(uint32_t Capacity) {
 116     Buckets.resize(Capacity);
 117   }
 118
 119   Error load(BinaryStreamReader &Stream) {
 120     const Header *H;
 121     if (auto EC = Stream.readObject(H))
 122       return EC;
 123     if (H->Capacity == 0)
 124       return make_error<RawError>(raw_error_code::corrupt_file,
 125                                   "Invalid Hash Table Capacity");
 126     if (H->Size > maxLoad(H->Capacity))
 127       return make_error<RawError>(raw_error_code::corrupt_file,
 128                                   "Invalid Hash Table Size");
 129
 130     Buckets.resize(H->Capacity);
 131
 132     if (auto EC = readSparseBitVector(Stream, Present))
 133       return EC;
 134     if (Present.count() != H->Size)
 135       return make_error<RawError>(raw_error_code::corrupt_file,
 136                                   "Present bit vector does not match size!");
 137
 138     if (auto EC = readSparseBitVector(Stream, Deleted))
 139       return EC;
 140     if (Present.intersects(Deleted))
 141       return make_error<RawError>(raw_error_code::corrupt_file,
 142                                   "Present bit vector intersects deleted!");
 143
 144     for (uint32_t P : Present) {
 145       if (auto EC = Stream.readInteger(Buckets[P].first))
 146         return EC;
 147       const ValueT *Value;
 148       if (auto EC = Stream.readObject(Value))
 149         return EC;
 150       Buckets[P].second = *Value;
 151     }
 152
 153     return Error::success();
 154   }
 155
 156   uint32_t calculateSerializedLength() const {
 157     uint32_t Size = sizeof(Header);
 158
 159     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
 160
 161     int NumBitsP = Present.find_last() + 1;
 162     int NumBitsD = Deleted.find_last() + 1;
 163
 164     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
 165     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
 166
 167     // Present bit set number of words (4 bytes), followed by that many actual
 168     // words (4 bytes each).
 169     Size += sizeof(uint32_t);
 170     Size += NumWordsP * sizeof(uint32_t);
 171
 172     // Deleted bit set number of words (4 bytes), followed by that many actual
 173     // words (4 bytes each).
 174     Size += sizeof(uint32_t);
 175     Size += NumWordsD * sizeof(uint32_t);
 176
 177     // One (Key, ValueT) pair for each entry Present.
 178     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
 179
 180     return Size;
 181   }
 182
 183   Error commit(BinaryStreamWriter &Writer) const {
 184     Header H;
 185     H.Size = size();
 186     H.Capacity = capacity();
 187     if (auto EC = Writer.writeObject(H))
 188       return EC;
 189
 190     if (auto EC = writeSparseBitVector(Writer, Present))
 191       return EC;
 192
 193     if (auto EC = writeSparseBitVector(Writer, Deleted))
 194       return EC;
 195
 196     for (const auto &Entry : *this) {
 197       if (auto EC = Writer.writeInteger(Entry.first))
 198         return EC;
 199       if (auto EC = Writer.writeObject(Entry.second))
 200         return EC;
 201     }
 202     return Error::success();
 203   }
 204
 205   void clear() {
 206     Buckets.resize(8);
 207     Present.clear();
 208     Deleted.clear();
 209   }
 210
 211   bool empty() const { return size() == 0; }
 212   uint32_t capacity() const { return Buckets.size(); }
 213   uint32_t size() const { return Present.count(); }
 214
 215   const_iterator begin() const { return const_iterator(*this); }
 216   const_iterator end() const { return const_iterator(*this, 0, true); }
 217
 218   /// Find the entry whose key has the specified hash value, using the specified
 219   /// traits defining hash function and equality.
 220   template <typename Key, typename TraitsT>
 221   const_iterator find_as(const Key &K, TraitsT &Traits) const {
 222     uint32_t H = Traits.hashLookupKey(K) % capacity();
 223     uint32_t I = H;
 224     Optional<uint32_t> FirstUnused;
 225     do {
 226       if (isPresent(I)) {
 227         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
 228           return const_iterator(*this, I, false);
 229       } else {
 230         if (!FirstUnused)
 231           FirstUnused = I;
 232         // Insertion occurs via linear probing from the slot hint, and will be
 233         // inserted at the first empty / deleted location.  Therefore, if we are
 234         // probing and find a location that is neither present nor deleted, then
 235         // nothing must have EVER been inserted at this location, and thus it is
 236         // not possible for a matching value to occur later.
 237         if (!isDeleted(I))
 238           break;
 239       }
 240       I = (I + 1) % capacity();
 241     } while (I != H);
 242
 243     // The only way FirstUnused would not be set is if every single entry in the
 244     // table were Present.  But this would violate the load factor constraints
 245     // that we impose, so it should never happen.
 246     assert(FirstUnused);
 247     return const_iterator(*this, *FirstUnused, true);
 248   }
 249
 250   /// Set the entry using a key type that the specified Traits can convert
 251   /// from a real key to an internal key.
 252   template <typename Key, typename TraitsT>
 253   bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
 254     return set_as_internal(K, std::move(V), Traits, None);
 255   }
 256
 257   template <typename Key, typename TraitsT>
 258   ValueT get(const Key &K, TraitsT &Traits) const {
 259     auto Iter = find_as(K, Traits);
 260     assert(Iter != end());
 261     return (*Iter).second;
 262   }
 263
 264 protected:
 265   bool isPresent(uint32_t K) const { return Present.test(K); }
 266   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 267
 268   BucketList Buckets;
 269   mutable SparseBitVector<> Present;
 270   mutable SparseBitVector<> Deleted;
 271
 272 private:
 273   /// Set the entry using a key type that the specified Traits can convert
 274   /// from a real key to an internal key.
 275   template <typename Key, typename TraitsT>
 276   bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
 277                        Optional<uint32_t> InternalKey) {
 278     auto Entry = find_as(K, Traits);
 279     if (Entry != end()) {
 280       assert(isPresent(Entry.index()));
 281       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
 282       // We're updating, no need to do anything special.
 283       Buckets[Entry.index()].second = V;
 284       return false;
 285     }
 286
 287     auto &B = Buckets[Entry.index()];
 288     assert(!isPresent(Entry.index()));
 289     assert(Entry.isEnd());
 290     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
 291     B.second = V;
 292     Present.set(Entry.index());
 293     Deleted.reset(Entry.index());
 294
 295     grow(Traits);
 296
 297     assert((find_as(K, Traits)) != end());
 298     return true;
 299   }
 300
 301   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 302
 303   template <typename TraitsT>
 304   void grow(TraitsT &Traits) {
 305     uint32_t S = size();
 306     uint32_t MaxLoad = maxLoad(capacity());
 307     if (S < maxLoad(capacity()))
 308       return;
 309     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
 310
 311     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
 312
 313     // Growing requires rebuilding the table and re-hashing every item.  Make a
 314     // copy with a larger capacity, insert everything into the copy, then swap
 315     // it in.
 316     HashTable NewMap(NewCapacity);
 317     for (auto I : Present) {
 318       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
 319       NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
 320                              Buckets[I].first);
 321     }
 322
 323     Buckets.swap(NewMap.Buckets);
 324     std::swap(Present, NewMap.Present);
 325     std::swap(Deleted, NewMap.Deleted);
 326     assert(capacity() == NewCapacity);
 327     assert(size() == S);
 328   }
 329 };
 330
 331 } // end namespace pdb
 332
 333 } // end namespace llvm
 334
 335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H