Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / xray / tests / unit / function_call_trie_test.cpp
blobc90d6637fff5e5074b0621d18e15a34e80119f16
1 //===-- function_call_trie_test.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a function call tracing system.
11 //===----------------------------------------------------------------------===//
12 #include "xray_function_call_trie.h"
13 #include "gtest/gtest.h"
14 #include <cstdint>
16 namespace __xray {
18 namespace {
20 TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
21 profilingFlags()->setDefaults();
22 FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
23 FunctionCallTrie Trie(Allocators);
26 TEST(FunctionCallTrieTest, EnterAndExitFunction) {
27 profilingFlags()->setDefaults();
28 auto A = FunctionCallTrie::InitAllocators();
29 FunctionCallTrie Trie(A);
31 uint64_t TSC = 1;
32 uint16_t CPU = 0;
33 Trie.enterFunction(1, TSC++, CPU++);
34 Trie.exitFunction(1, TSC++, CPU++);
35 const auto &R = Trie.getRoots();
37 ASSERT_EQ(R.size(), 1u);
38 ASSERT_EQ(R.front()->FId, 1);
39 ASSERT_EQ(R.front()->CallCount, 1u);
40 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
43 TEST(FunctionCallTrieTest, HandleTSCOverflow) {
44 profilingFlags()->setDefaults();
45 auto A = FunctionCallTrie::InitAllocators();
46 FunctionCallTrie Trie(A);
48 Trie.enterFunction(1, std::numeric_limits<uint64_t>::max(), 0);
49 Trie.exitFunction(1, 1, 0);
50 const auto &R = Trie.getRoots();
52 ASSERT_EQ(R.size(), 1u);
53 ASSERT_EQ(R.front()->FId, 1);
54 ASSERT_EQ(R.front()->CallCount, 1u);
55 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
58 TEST(FunctionCallTrieTest, MaximalCumulativeTime) {
59 profilingFlags()->setDefaults();
60 auto A = FunctionCallTrie::InitAllocators();
61 FunctionCallTrie Trie(A);
63 Trie.enterFunction(1, 1, 0);
64 Trie.exitFunction(1, 0, 0);
65 const auto &R = Trie.getRoots();
67 ASSERT_EQ(R.size(), 1u);
68 ASSERT_EQ(R.front()->FId, 1);
69 ASSERT_EQ(R.front()->CallCount, 1u);
70 ASSERT_EQ(R.front()->CumulativeLocalTime,
71 std::numeric_limits<uint64_t>::max() - 1);
74 TEST(FunctionCallTrieTest, MissingFunctionEntry) {
75 profilingFlags()->setDefaults();
76 auto A = FunctionCallTrie::InitAllocators();
77 FunctionCallTrie Trie(A);
78 Trie.exitFunction(1, 1, 0);
79 const auto &R = Trie.getRoots();
81 ASSERT_TRUE(R.empty());
84 TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
85 profilingFlags()->setDefaults();
86 auto A = FunctionCallTrie::InitAllocators();
87 FunctionCallTrie Trie(A);
88 Trie.enterFunction(2, 1, 0);
89 Trie.enterFunction(3, 3, 0);
90 Trie.exitFunction(1, 5, 0);
91 const auto &R = Trie.getRoots();
93 ASSERT_FALSE(R.empty());
94 EXPECT_EQ(R.size(), size_t{1});
97 TEST(FunctionCallTrieTest, MissingFunctionExit) {
98 profilingFlags()->setDefaults();
99 auto A = FunctionCallTrie::InitAllocators();
100 FunctionCallTrie Trie(A);
101 Trie.enterFunction(1, 1, 0);
102 const auto &R = Trie.getRoots();
104 ASSERT_FALSE(R.empty());
105 EXPECT_EQ(R.size(), size_t{1});
108 TEST(FunctionCallTrieTest, MultipleRoots) {
109 profilingFlags()->setDefaults();
110 auto A = FunctionCallTrie::InitAllocators();
111 FunctionCallTrie Trie(A);
113 // Enter and exit FId = 1.
114 Trie.enterFunction(1, 1, 0);
115 Trie.exitFunction(1, 2, 0);
117 // Enter and exit FId = 2.
118 Trie.enterFunction(2, 3, 0);
119 Trie.exitFunction(2, 4, 0);
121 const auto &R = Trie.getRoots();
122 ASSERT_FALSE(R.empty());
123 ASSERT_EQ(R.size(), 2u);
125 // Make sure the roots have different IDs.
126 const auto R0 = R[0];
127 const auto R1 = R[1];
128 ASSERT_NE(R0->FId, R1->FId);
130 // Inspect the roots that they have the right data.
131 ASSERT_NE(R0, nullptr);
132 EXPECT_EQ(R0->CallCount, 1u);
133 EXPECT_EQ(R0->CumulativeLocalTime, 1u);
135 ASSERT_NE(R1, nullptr);
136 EXPECT_EQ(R1->CallCount, 1u);
137 EXPECT_EQ(R1->CumulativeLocalTime, 1u);
140 // While missing an intermediary entry may be rare in practice, we still enforce
141 // that we can handle the case where we've missed the entry event somehow, in
142 // between call entry/exits. To illustrate, imagine the following shadow call
143 // stack:
145 // f0@t0 -> f1@t1 -> f2@t2
147 // If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
148 // `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
149 // accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
150 // as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
151 TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
152 profilingFlags()->setDefaults();
153 auto A = FunctionCallTrie::InitAllocators();
154 FunctionCallTrie Trie(A);
156 Trie.enterFunction(1, 0, 0);
157 Trie.enterFunction(2, 100, 0);
158 Trie.enterFunction(3, 200, 0);
159 Trie.exitFunction(3, 300, 0);
160 Trie.exitFunction(1, 400, 0);
162 // What we should see at this point is all the functions in the trie in a
163 // specific order (1 -> 2 -> 3) with the appropriate count(s) and local
164 // latencies.
165 const auto &R = Trie.getRoots();
166 ASSERT_FALSE(R.empty());
167 ASSERT_EQ(R.size(), 1u);
169 const auto &F1 = *R[0];
170 ASSERT_EQ(F1.FId, 1);
171 ASSERT_FALSE(F1.Callees.empty());
173 const auto &F2 = *F1.Callees[0].NodePtr;
174 ASSERT_EQ(F2.FId, 2);
175 ASSERT_FALSE(F2.Callees.empty());
177 const auto &F3 = *F2.Callees[0].NodePtr;
178 ASSERT_EQ(F3.FId, 3);
179 ASSERT_TRUE(F3.Callees.empty());
181 // Now that we've established the preconditions, we check for specific aspects
182 // of the nodes.
183 EXPECT_EQ(F3.CallCount, 1u);
184 EXPECT_EQ(F2.CallCount, 1u);
185 EXPECT_EQ(F1.CallCount, 1u);
186 EXPECT_EQ(F3.CumulativeLocalTime, 100u);
187 EXPECT_EQ(F2.CumulativeLocalTime, 300u);
188 EXPECT_EQ(F1.CumulativeLocalTime, 100u);
191 TEST(FunctionCallTrieTest, DeepCallStack) {
192 // Simulate a relatively deep call stack (32 levels) and ensure that we can
193 // properly pop all the way up the stack.
194 profilingFlags()->setDefaults();
195 auto A = FunctionCallTrie::InitAllocators();
196 FunctionCallTrie Trie(A);
197 for (int i = 0; i < 32; ++i)
198 Trie.enterFunction(i + 1, i, 0);
199 Trie.exitFunction(1, 33, 0);
201 // Here, validate that we have a 32-level deep function call path from the
202 // root (1) down to the leaf (33).
203 const auto &R = Trie.getRoots();
204 ASSERT_EQ(R.size(), 1u);
205 auto F = R[0];
206 for (int i = 0; i < 32; ++i) {
207 EXPECT_EQ(F->FId, i + 1);
208 EXPECT_EQ(F->CallCount, 1u);
209 if (F->Callees.empty() && i != 31)
210 FAIL() << "Empty callees for FId " << F->FId;
211 if (i != 31)
212 F = F->Callees[0].NodePtr;
216 // TODO: Test that we can handle cross-CPU migrations, where TSCs are not
217 // guaranteed to be synchronised.
218 TEST(FunctionCallTrieTest, DeepCopy) {
219 profilingFlags()->setDefaults();
220 auto A = FunctionCallTrie::InitAllocators();
221 FunctionCallTrie Trie(A);
223 Trie.enterFunction(1, 0, 0);
224 Trie.enterFunction(2, 1, 0);
225 Trie.exitFunction(2, 2, 0);
226 Trie.enterFunction(3, 3, 0);
227 Trie.exitFunction(3, 4, 0);
228 Trie.exitFunction(1, 5, 0);
230 // We want to make a deep copy and compare notes.
231 auto B = FunctionCallTrie::InitAllocators();
232 FunctionCallTrie Copy(B);
233 Trie.deepCopyInto(Copy);
235 ASSERT_NE(Trie.getRoots().size(), 0u);
236 ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
237 const auto &R0Orig = *Trie.getRoots()[0];
238 const auto &R0Copy = *Copy.getRoots()[0];
239 EXPECT_EQ(R0Orig.FId, 1);
240 EXPECT_EQ(R0Orig.FId, R0Copy.FId);
242 ASSERT_EQ(R0Orig.Callees.size(), 2u);
243 ASSERT_EQ(R0Copy.Callees.size(), 2u);
245 const auto &F1Orig =
246 *R0Orig.Callees
247 .find_element(
248 [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
249 ->NodePtr;
250 const auto &F1Copy =
251 *R0Copy.Callees
252 .find_element(
253 [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
254 ->NodePtr;
255 EXPECT_EQ(&R0Orig, F1Orig.Parent);
256 EXPECT_EQ(&R0Copy, F1Copy.Parent);
259 TEST(FunctionCallTrieTest, MergeInto) {
260 profilingFlags()->setDefaults();
261 auto A = FunctionCallTrie::InitAllocators();
262 FunctionCallTrie T0(A);
263 FunctionCallTrie T1(A);
265 // 1 -> 2 -> 3
266 T0.enterFunction(1, 0, 0);
267 T0.enterFunction(2, 1, 0);
268 T0.enterFunction(3, 2, 0);
269 T0.exitFunction(3, 3, 0);
270 T0.exitFunction(2, 4, 0);
271 T0.exitFunction(1, 5, 0);
273 // 1 -> 2 -> 3
274 T1.enterFunction(1, 0, 0);
275 T1.enterFunction(2, 1, 0);
276 T1.enterFunction(3, 2, 0);
277 T1.exitFunction(3, 3, 0);
278 T1.exitFunction(2, 4, 0);
279 T1.exitFunction(1, 5, 0);
281 // We use a different allocator here to make sure that we're able to transfer
282 // data into a FunctionCallTrie which uses a different allocator. This
283 // reflects the intended usage scenario for when we're collecting profiles
284 // that aggregate across threads.
285 auto B = FunctionCallTrie::InitAllocators();
286 FunctionCallTrie Merged(B);
288 T0.mergeInto(Merged);
289 T1.mergeInto(Merged);
291 ASSERT_EQ(Merged.getRoots().size(), 1u);
292 const auto &R0 = *Merged.getRoots()[0];
293 EXPECT_EQ(R0.FId, 1);
294 EXPECT_EQ(R0.CallCount, 2u);
295 EXPECT_EQ(R0.CumulativeLocalTime, 10u);
296 EXPECT_EQ(R0.Callees.size(), 1u);
298 const auto &F1 = *R0.Callees[0].NodePtr;
299 EXPECT_EQ(F1.FId, 2);
300 EXPECT_EQ(F1.CallCount, 2u);
301 EXPECT_EQ(F1.CumulativeLocalTime, 6u);
302 EXPECT_EQ(F1.Callees.size(), 1u);
304 const auto &F2 = *F1.Callees[0].NodePtr;
305 EXPECT_EQ(F2.FId, 3);
306 EXPECT_EQ(F2.CallCount, 2u);
307 EXPECT_EQ(F2.CumulativeLocalTime, 2u);
308 EXPECT_EQ(F2.Callees.size(), 0u);
311 TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) {
312 profilingFlags()->setDefaults();
313 typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
314 alignof(FunctionCallTrie::Allocators)>::type
315 AllocatorsStorage;
316 new (&AllocatorsStorage)
317 FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
318 auto *A =
319 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
321 typename std::aligned_storage<sizeof(FunctionCallTrie),
322 alignof(FunctionCallTrie)>::type FCTStorage;
323 new (&FCTStorage) FunctionCallTrie(*A);
324 auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage);
326 // Put some data into it.
327 T->enterFunction(1, 0, 0);
328 T->exitFunction(1, 1, 0);
330 // Re-initialize the objects in storage.
331 T->~FunctionCallTrie();
332 A->~Allocators();
333 new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
334 new (T) FunctionCallTrie(*A);
336 // Then put some data into it again.
337 T->enterFunction(1, 0, 0);
338 T->exitFunction(1, 1, 0);
341 } // namespace
343 } // namespace __xray