[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (#121850)
[llvm-project.git] / llvm / tools / llvm-profgen / MissingFrameInferrer.cpp
blobee49950f39ca4e2818852b5c5916411d04d6b172
1 //===-- MissingFrameInferrer.cpp - Missing frame inferrer --------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MissingFrameInferrer.h"
10 #include "PerfReader.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/Statistic.h"
14 #include <algorithm>
15 #include <cstdint>
16 #include <iterator>
17 #include <queue>
18 #include <sys/types.h>
20 #define DEBUG_TYPE "missing-frame-inferrer"
22 using namespace llvm;
23 using namespace sampleprof;
25 STATISTIC(TailCallUniReachable,
26 "Number of frame pairs reachable via a unique tail call path");
27 STATISTIC(TailCallMultiReachable,
28 "Number of frame pairs reachable via a multiple tail call paths");
29 STATISTIC(TailCallUnreachable,
30 "Number of frame pairs unreachable via any tail call path");
31 STATISTIC(TailCallFuncSingleTailCalls,
32 "Number of functions with single tail call site");
33 STATISTIC(TailCallFuncMultipleTailCalls,
34 "Number of functions with multiple tail call sites");
35 STATISTIC(TailCallMaxTailCallPath, "Length of the longest tail call path");
37 static cl::opt<uint32_t>
38 MaximumSearchDepth("max-search-depth", cl::init(UINT32_MAX - 1),
39 cl::desc("The maximum levels the DFS-based missing "
40 "frame search should go with"));
42 void MissingFrameInferrer::initialize(
43 const ContextSampleCounterMap *SampleCounters) {
44 // Refine call edges based on LBR samples.
45 if (SampleCounters) {
46 std::unordered_map<uint64_t, std::unordered_set<uint64_t>> SampledCalls;
47 std::unordered_map<uint64_t, std::unordered_set<uint64_t>> SampledTailCalls;
49 // Populate SampledCalls based on static call sites. Similarly to
50 // SampledTailCalls.
51 for (const auto &CI : *SampleCounters) {
52 for (auto Item : CI.second.BranchCounter) {
53 auto From = Item.first.first;
54 auto To = Item.first.second;
55 if (CallEdges.count(From)) {
56 assert(CallEdges[From].size() == 1 &&
57 "A callsite should only appear once with either a known or a "
58 "zero (unknown) target value at this point");
59 SampledCalls[From].insert(To);
61 if (TailCallEdges.count(From)) {
62 assert(TailCallEdges[From].size() == 1 &&
63 "A callsite should only appear once with either a known or a "
64 "zero (unknown) target value at this point");
65 FuncRange *FromFRange = Binary->findFuncRange(From);
66 FuncRange *ToFRange = Binary->findFuncRange(To);
67 if (FromFRange != ToFRange)
68 SampledTailCalls[From].insert(To);
73 // Replace static edges with dynamic edges.
74 CallEdges = SampledCalls;
75 TailCallEdges = SampledTailCalls;
78 // Populate function-based edges. This is to speed up address to function
79 // translation.
80 for (auto Call : CallEdges)
81 for (auto Target : Call.second)
82 if (FuncRange *ToFRange = Binary->findFuncRange(Target))
83 CallEdgesF[Call.first].insert(ToFRange->Func);
85 for (auto Call : TailCallEdges) {
86 for (auto Target : Call.second) {
87 if (FuncRange *ToFRange = Binary->findFuncRange(Target)) {
88 TailCallEdgesF[Call.first].insert(ToFRange->Func);
89 TailCallTargetFuncs.insert(ToFRange->Func);
92 if (FuncRange *FromFRange = Binary->findFuncRange(Call.first))
93 FuncToTailCallMap[FromFRange->Func].push_back(Call.first);
96 #if LLVM_ENABLE_STATS
97 for (auto F : FuncToTailCallMap) {
98 assert(F.second.size() > 0 && "");
99 if (F.second.size() > 1)
100 TailCallFuncMultipleTailCalls++;
101 else
102 TailCallFuncSingleTailCalls++;
104 #endif
106 #ifndef NDEBUG
107 auto PrintCallTargets =
108 [&](const std::unordered_map<uint64_t, std::unordered_set<uint64_t>>
109 &CallTargets,
110 bool IsTailCall) {
111 for (const auto &Targets : CallTargets) {
112 for (const auto &Target : Targets.second) {
113 dbgs() << (IsTailCall ? "TailCall" : "Call");
114 dbgs() << " From " << format("%8" PRIx64, Targets.first) << " to "
115 << format("%8" PRIx64, Target) << "\n";
120 LLVM_DEBUG(dbgs() << "============================\n ";
121 dbgs() << "Call targets:\n";
122 PrintCallTargets(CallEdges, false);
123 dbgs() << "\nTail call targets:\n";
124 PrintCallTargets(CallEdges, true);
125 dbgs() << "============================\n";);
126 #endif
129 uint64_t MissingFrameInferrer::computeUniqueTailCallPath(
130 BinaryFunction *From, BinaryFunction *To, SmallVectorImpl<uint64_t> &Path) {
131 // Search for a unique path comprised of only tail call edges for a given
132 // source and target frame address on the a tail call graph that consists of
133 // only tail call edges. Note that only a unique path counts. Multiple paths
134 // are treated unreachable.
135 if (From == To)
136 return 1;
138 // Ignore cyclic paths. Since we are doing a recursive DFS walk, if the source
139 // frame being visited is already in the stack, it means we are seeing a
140 // cycle. This is done before querying the cached result because the cached
141 // result may be computed based on the same path. Consider the following case:
142 // A -> B, B -> A, A -> D
143 // When computing unique reachablity from A to D, the cached result for (B,D)
144 // should not be counted since the unique path B->A->D is basically the same
145 // path as A->D. Counting that with invalidate the uniqueness from A to D.
146 if (Visiting.contains(From))
147 return 0;
149 // If already computed, return the cached result.
150 auto I = UniquePaths.find({From, To});
151 if (I != UniquePaths.end()) {
152 Path.append(I->second.begin(), I->second.end());
153 return 1;
156 auto J = NonUniquePaths.find({From, To});
157 if (J != NonUniquePaths.end()) {
158 return J->second;
161 uint64_t Pos = Path.size();
163 // DFS walk each outgoing tail call edges.
164 // Bail out if we are already at the the maximum searching depth.
165 if (CurSearchingDepth == MaximumSearchDepth)
166 return 0;
169 if (!FuncToTailCallMap.count(From))
170 return 0;
172 CurSearchingDepth++;
173 Visiting.insert(From);
174 uint64_t NumPaths = 0;
175 for (auto TailCall : FuncToTailCallMap[From]) {
176 NumPaths += computeUniqueTailCallPath(TailCall, To, Path);
177 // Stop analyzing the remaining if we are already seeing more than one
178 // reachable paths.
179 if (NumPaths > 1)
180 break;
182 CurSearchingDepth--;
183 Visiting.erase(From);
185 // Undo already-computed path if it is not unique.
186 if (NumPaths != 1) {
187 Path.pop_back_n(Path.size() - Pos);
190 // Cache the result.
191 if (NumPaths == 1) {
192 UniquePaths[{From, To}].assign(Path.begin() + Pos, Path.end());
193 #if LLVM_ENABLE_STATS
194 auto &LocalPath = UniquePaths[{From, To}];
195 assert((LocalPath.size() <= MaximumSearchDepth + 1) &&
196 "Path should not be longer than the maximum searching depth");
197 TailCallMaxTailCallPath = std::max(uint64_t(LocalPath.size()),
198 TailCallMaxTailCallPath.getValue());
199 #endif
200 } else {
201 NonUniquePaths[{From, To}] = NumPaths;
204 return NumPaths;
207 uint64_t MissingFrameInferrer::computeUniqueTailCallPath(
208 uint64_t From, BinaryFunction *To, SmallVectorImpl<uint64_t> &Path) {
209 if (!TailCallEdgesF.count(From))
210 return 0;
211 Path.push_back(From);
212 uint64_t NumPaths = 0;
213 for (auto Target : TailCallEdgesF[From]) {
214 NumPaths += computeUniqueTailCallPath(Target, To, Path);
215 // Stop analyzing the remaining if we are already seeing more than one
216 // reachable paths.
217 if (NumPaths > 1)
218 break;
221 // Undo already-computed path if it is not unique.
222 if (NumPaths != 1)
223 Path.pop_back();
224 return NumPaths;
227 bool MissingFrameInferrer::inferMissingFrames(
228 uint64_t From, uint64_t To, SmallVectorImpl<uint64_t> &UniquePath) {
229 assert(!TailCallEdgesF.count(From) &&
230 "transition between From and To cannot be via a tailcall otherwise "
231 "they would not show up at the same time");
232 UniquePath.push_back(From);
233 uint64_t Pos = UniquePath.size();
235 FuncRange *ToFRange = Binary->findFuncRange(To);
236 if (!ToFRange)
237 return false;
239 // Bail out if caller has no known outgoing call edges.
240 if (!CallEdgesF.count(From))
241 return false;
243 // Done with the inference if the calle is reachable via a single callsite.
244 // This may not be accurate but it improves the search throughput.
245 if (llvm::is_contained(CallEdgesF[From], ToFRange->Func))
246 return true;
248 // Bail out if callee is not tailcall reachable at all.
249 if (!TailCallTargetFuncs.contains(ToFRange->Func))
250 return false;
252 Visiting.clear();
253 CurSearchingDepth = 0;
254 uint64_t NumPaths = 0;
255 for (auto Target : CallEdgesF[From]) {
256 NumPaths +=
257 computeUniqueTailCallPath(Target, ToFRange->Func, UniquePath);
258 // Stop analyzing the remaining if we are already seeing more than one
259 // reachable paths.
260 if (NumPaths > 1)
261 break;
264 // Undo already-computed path if it is not unique.
265 if (NumPaths != 1) {
266 UniquePath.pop_back_n(UniquePath.size() - Pos);
267 assert(UniquePath.back() == From && "broken path");
270 #if LLVM_ENABLE_STATS
271 if (NumPaths == 1) {
272 if (ReachableViaUniquePaths.insert({From, ToFRange->StartAddress}).second)
273 TailCallUniReachable++;
274 } else if (NumPaths == 0) {
275 if (Unreachables.insert({From, ToFRange->StartAddress}).second) {
276 TailCallUnreachable++;
277 LLVM_DEBUG(dbgs() << "No path found from "
278 << format("%8" PRIx64 ":", From) << " to "
279 << format("%8" PRIx64 ":", ToFRange->StartAddress)
280 << "\n");
282 } else if (NumPaths > 1) {
283 if (ReachableViaMultiPaths.insert({From, ToFRange->StartAddress})
284 .second) {
285 TailCallMultiReachable++;
286 LLVM_DEBUG(dbgs() << "Multiple paths found from "
287 << format("%8" PRIx64 ":", From) << " to "
288 << format("%8" PRIx64 ":", ToFRange->StartAddress)
289 << "\n");
292 #endif
294 return NumPaths == 1;
297 void MissingFrameInferrer::inferMissingFrames(
298 const SmallVectorImpl<uint64_t> &Context,
299 SmallVectorImpl<uint64_t> &NewContext) {
300 if (Context.size() == 1) {
301 NewContext = Context;
302 return;
305 NewContext.clear();
306 for (uint64_t I = 1; I < Context.size(); I++) {
307 inferMissingFrames(Context[I - 1], Context[I], NewContext);
309 NewContext.push_back(Context.back());
311 assert((NewContext.size() >= Context.size()) &&
312 "Inferred context should include all frames in the original context");
313 assert((NewContext.size() > Context.size() || NewContext == Context) &&
314 "Inferred context should be exactly the same "
315 "with the original context");