1 //===- TrieRawHashMapTest.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/TrieRawHashMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/Support/Endian.h"
12 #include "llvm/Support/SHA1.h"
13 #include "gtest/gtest.h"
18 class TrieRawHashMapTestHelper
{
20 TrieRawHashMapTestHelper() = default;
22 void setTrie(ThreadSafeTrieRawHashMapBase
*T
) { Trie
= T
; }
24 ThreadSafeTrieRawHashMapBase::PointerBase
getRoot() const {
25 return Trie
->getRoot();
27 unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P
) const {
28 return Trie
->getStartBit(P
);
30 unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P
) const {
31 return Trie
->getNumBits(P
);
33 unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P
) const {
34 return Trie
->getNumSlotUsed(P
);
36 unsigned getNumTries() const { return Trie
->getNumTries(); }
38 getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P
) const {
39 return Trie
->getTriePrefixAsString(P
);
41 ThreadSafeTrieRawHashMapBase::PointerBase
42 getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P
) const {
43 return Trie
->getNextTrie(P
);
47 ThreadSafeTrieRawHashMapBase
*Trie
= nullptr;
52 template <typename DataType
, size_t HashSize
= sizeof(uint64_t)>
53 class SimpleTrieHashMapTest
: public TrieRawHashMapTestHelper
,
54 public ::testing::Test
{
56 using NumType
= DataType
;
57 using HashType
= std::array
<uint8_t, HashSize
>;
58 using TrieType
= ThreadSafeTrieRawHashMap
<DataType
, sizeof(HashType
)>;
60 TrieType
&createTrie(size_t RootBits
, size_t SubtrieBits
) {
61 auto &Ret
= Trie
.emplace(RootBits
, SubtrieBits
);
62 TrieRawHashMapTestHelper::setTrie(&Ret
);
66 void destroyTrie() { Trie
.reset(); }
67 ~SimpleTrieHashMapTest() { destroyTrie(); }
69 // Use the number itself as hash to test the pathological case.
70 static HashType
hash(uint64_t Num
) {
72 llvm::support::endian::byte_swap(Num
, llvm::endianness::big
);
74 memcpy(&Hash
[0], &HashN
, sizeof(HashType
));
79 std::optional
<TrieType
> Trie
;
82 using SmallNodeTrieTest
= SimpleTrieHashMapTest
<uint64_t>;
84 TEST_F(SmallNodeTrieTest
, TrieAllocation
) {
86 0x0, std::numeric_limits
<NumType
>::max(), 0x1, 0x2,
87 0x3, std::numeric_limits
<NumType
>::max() - 1u,
90 unsigned ExpectedTries
[] = {
92 1, // Both on the root.
93 64, // 0 and 1 sinks all the way down.
94 64, // no new allocation needed.
95 65, // need a new node between 2 and 3.
96 65 + 63, // 63 new allocation to sink two big numbers all the way.
99 const char *ExpectedPrefix
[] = {
102 "00000000000000[0000000]",
103 "00000000000000[0000000]",
104 "00000000000000[0000001]",
105 "ffffffffffffff[1111111]",
108 // Use root and subtrie sizes of 1 so this gets sunk quite deep.
109 auto &Trie
= createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
111 for (unsigned I
= 0; I
< 6; ++I
) {
112 // Lookup first to exercise hint code for deep tries.
113 TrieType::pointer Lookup
= Trie
.find(hash(Numbers
[I
]));
114 EXPECT_FALSE(Lookup
);
116 Trie
.insert(Lookup
, TrieType::value_type(hash(Numbers
[I
]), Numbers
[I
]));
117 EXPECT_EQ(getNumTries(), ExpectedTries
[I
]);
118 EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix
[I
]);
122 TEST_F(SmallNodeTrieTest
, TrieStructure
) {
123 NumType Numbers
[] = {
124 // Three numbers that will nest deeply to test (1) sinking subtries and
125 // (2) deep, non-trivial hints.
126 std::numeric_limits
<NumType
>::max(),
127 std::numeric_limits
<NumType
>::max() - 2u,
128 std::numeric_limits
<NumType
>::max() - 3u,
129 // One number to stay at the top-level.
133 // Use root and subtrie sizes of 1 so this gets sunk quite deep.
134 auto &Trie
= createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
136 for (NumType N
: Numbers
) {
137 // Lookup first to exercise hint code for deep tries.
138 TrieType::pointer Lookup
= Trie
.find(hash(N
));
139 EXPECT_FALSE(Lookup
);
141 Trie
.insert(Lookup
, TrieType::value_type(hash(N
), N
));
143 for (NumType N
: Numbers
) {
144 TrieType::pointer Lookup
= Trie
.find(hash(N
));
148 EXPECT_EQ(hash(N
), Lookup
->Hash
);
149 EXPECT_EQ(N
, Lookup
->Data
);
151 // Confirm a subsequent insertion fails to overwrite by trying to insert a
153 auto Result
= Trie
.insert(Lookup
, TrieType::value_type(hash(N
), N
- 1));
154 EXPECT_EQ(N
, Result
->Data
);
157 // Check the trie so we can confirm the structure is correct. Each subtrie
158 // should have 2 slots. The root's index=0 should have the content for
159 // 0x37 directly, and index=1 should be a linked-list of subtries, finally
160 // ending with content for (max-2) and (max-3).
162 // Note: This structure is not exhaustive (too expensive to update tests),
163 // but it does test that the dump format is somewhat readable and that the
164 // basic structure is correct.
166 // Note: This test requires that the trie reads bytes starting from index 0
167 // of the array of uint8_t, and then reads each byte's bits from high to low.
170 // We should allocated a total of 64 SubTries for 64 bit hash.
171 ASSERT_EQ(getNumTries(), 64u);
172 // Check the root trie. Two slots and both are used.
173 ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
174 // Check last subtrie.
175 // Last allocated trie is the next node in the allocation chain.
176 auto LastAlloctedSubTrie
= getNextTrie(getRoot());
177 ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie
),
178 "ffffffffffffff[1111110]");
179 ASSERT_EQ(getStartBit(LastAlloctedSubTrie
), 63u);
180 ASSERT_EQ(getNumBits(LastAlloctedSubTrie
), 1u);
181 ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie
), 2u);
184 TEST_F(SmallNodeTrieTest
, TrieStructureSmallFinalSubtrie
) {
185 NumType Numbers
[] = {
186 // Three numbers that will nest deeply to test (1) sinking subtries and
187 // (2) deep, non-trivial hints.
188 std::numeric_limits
<NumType
>::max(),
189 std::numeric_limits
<NumType
>::max() - 2u,
190 std::numeric_limits
<NumType
>::max() - 3u,
191 // One number to stay at the top-level.
195 // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie
197 auto &Trie
= createTrie(/*RootBits=*/8, /*SubtrieBits=*/5);
199 for (NumType N
: Numbers
) {
200 // Lookup first to exercise hint code for deep tries.
201 TrieType::pointer Lookup
= Trie
.find(hash(N
));
202 EXPECT_FALSE(Lookup
);
204 Trie
.insert(Lookup
, TrieType::value_type(hash(N
), N
));
206 for (NumType N
: Numbers
) {
207 TrieType::pointer Lookup
= Trie
.find(hash(N
));
209 EXPECT_EQ(hash(N
), Lookup
->Hash
);
210 EXPECT_EQ(N
, Lookup
->Data
);
212 // Confirm a subsequent insertion fails to overwrite by trying to insert a
214 auto Result
= Trie
.insert(Lookup
, TrieType::value_type(hash(N
), N
- 1));
215 EXPECT_EQ(N
, Result
->Data
);
218 // Check the trie so we can confirm the structure is correct. The root
219 // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the
220 // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1).
221 // should have 2 slots. The root's index=0 should have the content for
222 // 0x37 directly, and index=1 should be a linked-list of subtries, finally
223 // ending with content for (max-2) and (max-3).
225 // Note: This structure is not exhaustive (too expensive to update tests),
226 // but it does test that the dump format is somewhat readable and that the
227 // basic structure is correct.
229 // Note: This test requires that the trie reads bytes starting from index 0
230 // of the array of uint8_t, and then reads each byte's bits from high to low.
233 // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level
234 // subtrie, 13 total.
235 ASSERT_EQ(getNumTries(), 13u);
236 // Check the root trie. Two slots and both are used.
237 ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
238 // Check last subtrie.
239 // Last allocated trie is the next node in the allocation chain.
240 auto LastAlloctedSubTrie
= getNextTrie(getRoot());
241 ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie
),
242 "ffffffffffffff[1111110]");
243 ASSERT_EQ(getStartBit(LastAlloctedSubTrie
), 63u);
244 ASSERT_EQ(getNumBits(LastAlloctedSubTrie
), 1u);
245 ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie
), 2u);
248 TEST_F(SmallNodeTrieTest
, TrieDestructionLoop
) {
249 // Test destroying large Trie. Make sure there is no recursion that can
250 // overflow the stack.
252 // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
253 auto &Trie
= createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
255 // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
257 static constexpr uint64_t MaxN
= 100000;
258 for (uint64_t N
= 0; N
!= MaxN
; ++N
) {
259 HashType Hash
= hash(N
);
260 Trie
.insert(TrieType::pointer(), TrieType::value_type(Hash
, NumType
{N
}));
263 // Destroy tries. If destruction is recursive and MaxN is high enough, these
268 struct NumWithDestructorT
{
270 llvm::function_ref
<void()> DestructorCallback
;
271 ~NumWithDestructorT() { DestructorCallback(); }
274 using NodeWithDestructorTrieTest
= SimpleTrieHashMapTest
<NumWithDestructorT
>;
276 TEST_F(NodeWithDestructorTrieTest
, TrieDestructionLoop
) {
277 // Test destroying large Trie. Make sure there is no recursion that can
278 // overflow the stack.
280 // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
281 auto &Trie
= createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
283 // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
285 static constexpr uint64_t MaxN
= 100000;
287 uint64_t DestructorCalled
= 0;
288 auto DtorCallback
= [&DestructorCalled
]() { ++DestructorCalled
; };
289 for (uint64_t N
= 0; N
!= MaxN
; ++N
) {
290 HashType Hash
= hash(N
);
291 Trie
.insert(TrieType::pointer(),
292 TrieType::value_type(Hash
, NumType
{N
, DtorCallback
}));
294 // Reset the count after all the temporaries get destroyed.
295 DestructorCalled
= 0;
297 // Destroy tries. If destruction is recursive and MaxN is high enough, these
301 // Count the number of destructor calls during `destroyTrie()`.
302 ASSERT_EQ(DestructorCalled
, MaxN
);
305 using NumStrNodeTrieTest
= SimpleTrieHashMapTest
<std::string
>;
307 TEST_F(NumStrNodeTrieTest
, TrieInsertLazy
) {
308 for (unsigned RootBits
: {2, 3, 6, 10}) {
309 for (unsigned SubtrieBits
: {2, 3, 4}) {
310 auto &Trie
= createTrie(RootBits
, SubtrieBits
);
311 for (int I
= 0, E
= 1000; I
!= E
; ++I
) {
312 TrieType::pointer Lookup
;
313 HashType H
= hash(I
);
315 Lookup
= Trie
.find(H
);
317 auto insertNum
= [&](uint64_t Num
) {
318 std::string S
= Twine(I
).str();
319 auto Hash
= hash(Num
);
320 return Trie
.insertLazy(
321 Hash
, [&](TrieType::LazyValueConstructor C
) { C(std::move(S
)); });
323 auto S1
= insertNum(I
);
324 // The address of the Data should be the same.
325 EXPECT_EQ(&S1
->Data
, &insertNum(I
)->Data
);
327 auto insertStr
= [&](std::string S
) {
328 int Num
= std::stoi(S
);
329 return insertNum(Num
);
331 std::string S2
= S1
->Data
;
332 // The address of the Data should be the same.
333 EXPECT_EQ(&S1
->Data
, &insertStr(S2
)->Data
);
335 for (int I
= 0, E
= 1000; I
!= E
; ++I
) {
336 std::string S
= Twine(I
).str();
337 TrieType::pointer Lookup
= Trie
.find(hash(I
));
341 EXPECT_EQ(S
, Lookup
->Data
);
346 } // end anonymous namespace