1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
12 #include "llvm/ADT/SparseBitVector.h"
13 #include "llvm/ADT/iterator.h"
14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
15 #include "llvm/Support/BinaryStreamReader.h"
16 #include "llvm/Support/BinaryStreamWriter.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Error.h"
26 class BinaryStreamReader
;
27 class BinaryStreamWriter
;
31 Error
readSparseBitVector(BinaryStreamReader
&Stream
, SparseBitVector
<> &V
);
32 Error
writeSparseBitVector(BinaryStreamWriter
&Writer
, SparseBitVector
<> &Vec
);
34 template <typename ValueT
> class HashTable
;
36 template <typename ValueT
>
37 class HashTableIterator
38 : public iterator_facade_base
<HashTableIterator
<ValueT
>,
39 std::forward_iterator_tag
,
40 const std::pair
<uint32_t, ValueT
>> {
41 friend HashTable
<ValueT
>;
43 HashTableIterator(const HashTable
<ValueT
> &Map
, uint32_t Index
,
45 : Map(&Map
), Index(Index
), IsEnd(IsEnd
) {}
48 HashTableIterator(const HashTable
<ValueT
> &Map
) : Map(&Map
) {
49 int I
= Map
.Present
.find_first();
54 Index
= static_cast<uint32_t>(I
);
59 HashTableIterator
&operator=(const HashTableIterator
&R
) {
63 bool operator==(const HashTableIterator
&R
) const {
69 return (Map
== R
.Map
) && (Index
== R
.Index
);
71 const std::pair
<uint32_t, ValueT
> &operator*() const {
72 assert(Map
->Present
.test(Index
));
73 return Map
->Buckets
[Index
];
76 // Implement postfix op++ in terms of prefix op++ by using the superclass
78 using iterator_facade_base
<HashTableIterator
<ValueT
>,
79 std::forward_iterator_tag
,
80 const std::pair
<uint32_t, ValueT
>>::operator++;
81 HashTableIterator
&operator++() {
82 while (Index
< Map
->Buckets
.size()) {
84 if (Map
->Present
.test(Index
))
93 bool isEnd() const { return IsEnd
; }
94 uint32_t index() const { return Index
; }
96 const HashTable
<ValueT
> *Map
;
101 template <typename ValueT
>
104 support::ulittle32_t Size
;
105 support::ulittle32_t Capacity
;
108 using BucketList
= std::vector
<std::pair
<uint32_t, ValueT
>>;
111 using const_iterator
= HashTableIterator
<ValueT
>;
112 friend const_iterator
;
114 HashTable() { Buckets
.resize(8); }
115 explicit HashTable(uint32_t Capacity
) {
116 Buckets
.resize(Capacity
);
119 Error
load(BinaryStreamReader
&Stream
) {
121 if (auto EC
= Stream
.readObject(H
))
123 if (H
->Capacity
== 0)
124 return make_error
<RawError
>(raw_error_code::corrupt_file
,
125 "Invalid Hash Table Capacity");
126 if (H
->Size
> maxLoad(H
->Capacity
))
127 return make_error
<RawError
>(raw_error_code::corrupt_file
,
128 "Invalid Hash Table Size");
130 Buckets
.resize(H
->Capacity
);
132 if (auto EC
= readSparseBitVector(Stream
, Present
))
134 if (Present
.count() != H
->Size
)
135 return make_error
<RawError
>(raw_error_code::corrupt_file
,
136 "Present bit vector does not match size!");
138 if (auto EC
= readSparseBitVector(Stream
, Deleted
))
140 if (Present
.intersects(Deleted
))
141 return make_error
<RawError
>(raw_error_code::corrupt_file
,
142 "Present bit vector intersects deleted!");
144 for (uint32_t P
: Present
) {
145 if (auto EC
= Stream
.readInteger(Buckets
[P
].first
))
148 if (auto EC
= Stream
.readObject(Value
))
150 Buckets
[P
].second
= *Value
;
153 return Error::success();
156 uint32_t calculateSerializedLength() const {
157 uint32_t Size
= sizeof(Header
);
159 constexpr int BitsPerWord
= 8 * sizeof(uint32_t);
161 int NumBitsP
= Present
.find_last() + 1;
162 int NumBitsD
= Deleted
.find_last() + 1;
164 uint32_t NumWordsP
= alignTo(NumBitsP
, BitsPerWord
) / BitsPerWord
;
165 uint32_t NumWordsD
= alignTo(NumBitsD
, BitsPerWord
) / BitsPerWord
;
167 // Present bit set number of words (4 bytes), followed by that many actual
168 // words (4 bytes each).
169 Size
+= sizeof(uint32_t);
170 Size
+= NumWordsP
* sizeof(uint32_t);
172 // Deleted bit set number of words (4 bytes), followed by that many actual
173 // words (4 bytes each).
174 Size
+= sizeof(uint32_t);
175 Size
+= NumWordsD
* sizeof(uint32_t);
177 // One (Key, ValueT) pair for each entry Present.
178 Size
+= (sizeof(uint32_t) + sizeof(ValueT
)) * size();
183 Error
commit(BinaryStreamWriter
&Writer
) const {
186 H
.Capacity
= capacity();
187 if (auto EC
= Writer
.writeObject(H
))
190 if (auto EC
= writeSparseBitVector(Writer
, Present
))
193 if (auto EC
= writeSparseBitVector(Writer
, Deleted
))
196 for (const auto &Entry
: *this) {
197 if (auto EC
= Writer
.writeInteger(Entry
.first
))
199 if (auto EC
= Writer
.writeObject(Entry
.second
))
202 return Error::success();
211 bool empty() const { return size() == 0; }
212 uint32_t capacity() const { return Buckets
.size(); }
213 uint32_t size() const { return Present
.count(); }
215 const_iterator
begin() const { return const_iterator(*this); }
216 const_iterator
end() const { return const_iterator(*this, 0, true); }
218 /// Find the entry whose key has the specified hash value, using the specified
219 /// traits defining hash function and equality.
220 template <typename Key
, typename TraitsT
>
221 const_iterator
find_as(const Key
&K
, TraitsT
&Traits
) const {
222 uint32_t H
= Traits
.hashLookupKey(K
) % capacity();
224 Optional
<uint32_t> FirstUnused
;
227 if (Traits
.storageKeyToLookupKey(Buckets
[I
].first
) == K
)
228 return const_iterator(*this, I
, false);
232 // Insertion occurs via linear probing from the slot hint, and will be
233 // inserted at the first empty / deleted location. Therefore, if we are
234 // probing and find a location that is neither present nor deleted, then
235 // nothing must have EVER been inserted at this location, and thus it is
236 // not possible for a matching value to occur later.
240 I
= (I
+ 1) % capacity();
243 // The only way FirstUnused would not be set is if every single entry in the
244 // table were Present. But this would violate the load factor constraints
245 // that we impose, so it should never happen.
247 return const_iterator(*this, *FirstUnused
, true);
250 /// Set the entry using a key type that the specified Traits can convert
251 /// from a real key to an internal key.
252 template <typename Key
, typename TraitsT
>
253 bool set_as(const Key
&K
, ValueT V
, TraitsT
&Traits
) {
254 return set_as_internal(K
, std::move(V
), Traits
, None
);
257 template <typename Key
, typename TraitsT
>
258 ValueT
get(const Key
&K
, TraitsT
&Traits
) const {
259 auto Iter
= find_as(K
, Traits
);
260 assert(Iter
!= end());
261 return (*Iter
).second
;
265 bool isPresent(uint32_t K
) const { return Present
.test(K
); }
266 bool isDeleted(uint32_t K
) const { return Deleted
.test(K
); }
269 mutable SparseBitVector
<> Present
;
270 mutable SparseBitVector
<> Deleted
;
273 /// Set the entry using a key type that the specified Traits can convert
274 /// from a real key to an internal key.
275 template <typename Key
, typename TraitsT
>
276 bool set_as_internal(const Key
&K
, ValueT V
, TraitsT
&Traits
,
277 Optional
<uint32_t> InternalKey
) {
278 auto Entry
= find_as(K
, Traits
);
279 if (Entry
!= end()) {
280 assert(isPresent(Entry
.index()));
281 assert(Traits
.storageKeyToLookupKey(Buckets
[Entry
.index()].first
) == K
);
282 // We're updating, no need to do anything special.
283 Buckets
[Entry
.index()].second
= V
;
287 auto &B
= Buckets
[Entry
.index()];
288 assert(!isPresent(Entry
.index()));
289 assert(Entry
.isEnd());
290 B
.first
= InternalKey
? *InternalKey
: Traits
.lookupKeyToStorageKey(K
);
292 Present
.set(Entry
.index());
293 Deleted
.reset(Entry
.index());
297 assert((find_as(K
, Traits
)) != end());
301 static uint32_t maxLoad(uint32_t capacity
) { return capacity
* 2 / 3 + 1; }
303 template <typename TraitsT
>
304 void grow(TraitsT
&Traits
) {
306 uint32_t MaxLoad
= maxLoad(capacity());
307 if (S
< maxLoad(capacity()))
309 assert(capacity() != UINT32_MAX
&& "Can't grow Hash table!");
311 uint32_t NewCapacity
= (capacity() <= INT32_MAX
) ? MaxLoad
* 2 : UINT32_MAX
;
313 // Growing requires rebuilding the table and re-hashing every item. Make a
314 // copy with a larger capacity, insert everything into the copy, then swap
316 HashTable
NewMap(NewCapacity
);
317 for (auto I
: Present
) {
318 auto LookupKey
= Traits
.storageKeyToLookupKey(Buckets
[I
].first
);
319 NewMap
.set_as_internal(LookupKey
, Buckets
[I
].second
, Traits
,
323 Buckets
.swap(NewMap
.Buckets
);
324 std::swap(Present
, NewMap
.Present
);
325 std::swap(Deleted
, NewMap
.Deleted
);
326 assert(capacity() == NewCapacity
);
331 } // end namespace pdb
333 } // end namespace llvm
335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H