[TargetVersion] Only enable on RISC-V and AArch64 (#115991)
[llvm-project.git] / bolt / lib / Core / BinaryFunctionCallGraph.cpp
blobb4b7897aa426ae70e0e04f5ce958728e444888ca
1 //===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunctionCallGraph class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
18 #include <stack>
20 #define DEBUG_TYPE "callgraph"
22 using namespace llvm;
24 namespace opts {
26 extern cl::opt<bool> TimeOpts;
27 extern cl::opt<unsigned> Verbosity;
28 extern cl::OptionCategory BoltCategory;
30 static cl::opt<std::string>
31 DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32 cl::cat(BoltCategory));
34 } // namespace opts
36 namespace llvm {
37 namespace bolt {
39 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
40 uint32_t Size,
41 uint64_t Samples) {
42 NodeId Id = CallGraph::addNode(Size, Samples);
43 assert(size_t(Id) == Funcs.size());
44 Funcs.push_back(BF);
45 FuncToNodeId[BF] = Id;
46 assert(Funcs[Id] == BF);
47 return Id;
50 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
51 NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
52 "CG breakdown", "CG breakdown", opts::TimeOpts);
53 std::deque<BinaryFunction *> TopologicalOrder;
54 enum NodeStatus { NEW, VISITING, VISITED };
55 std::vector<NodeStatus> NodeStatus(Funcs.size());
56 std::stack<NodeId> Worklist;
58 for (BinaryFunction *Func : Funcs) {
59 auto It = FuncToNodeId.find(Func);
60 assert(It != FuncToNodeId.end());
61 const NodeId Id = It->second;
62 Worklist.push(Id);
63 NodeStatus[Id] = NEW;
66 while (!Worklist.empty()) {
67 const NodeId FuncId = Worklist.top();
68 Worklist.pop();
70 if (NodeStatus[FuncId] == VISITED)
71 continue;
73 if (NodeStatus[FuncId] == VISITING) {
74 TopologicalOrder.push_back(Funcs[FuncId]);
75 NodeStatus[FuncId] = VISITED;
76 continue;
79 assert(NodeStatus[FuncId] == NEW);
80 NodeStatus[FuncId] = VISITING;
81 Worklist.push(FuncId);
82 for (const NodeId Callee : successors(FuncId)) {
83 if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
84 continue;
85 Worklist.push(Callee);
89 return TopologicalOrder;
92 BinaryFunctionCallGraph
93 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
94 bool IncludeSplitCalls, bool UseFunctionHotSize,
95 bool UseSplitHotSize, bool UseEdgeCounts,
96 bool IgnoreRecursiveCalls) {
97 NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
98 "CG breakdown", opts::TimeOpts);
99 BinaryFunctionCallGraph Cg;
100 static constexpr uint64_t COUNT_NO_PROFILE =
101 BinaryBasicBlock::COUNT_NO_PROFILE;
103 // Compute function size
104 auto functionSize = [&](const BinaryFunction *Function) {
105 return UseFunctionHotSize && Function->isSplit()
106 ? Function->estimateHotSize(UseSplitHotSize)
107 : Function->estimateSize();
110 // Add call graph nodes.
111 auto lookupNode = [&](BinaryFunction *Function) {
112 const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
113 if (Id == CallGraph::InvalidId) {
114 // It's ok to use the hot size here when the function is split. This is
115 // because emitFunctions will emit the hot part first in the order that is
116 // computed by ReorderFunctions. The cold part will be emitted with the
117 // rest of the cold functions and code.
118 const size_t Size = functionSize(Function);
119 // NOTE: for functions without a profile, we set the number of samples
120 // to zero. This will keep these functions from appearing in the hot
121 // section. This is a little weird because we wouldn't be trying to
122 // create a node for a function unless it was the target of a call from
123 // a hot block. The alternative would be to set the count to one or
124 // accumulate the number of calls from the callsite into the function
125 // samples. Results from perfomance testing seem to favor the zero
126 // count though, so I'm leaving it this way for now.
127 return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
129 return Id;
132 // Add call graph edges.
133 uint64_t NotProcessed = 0;
134 uint64_t TotalCallsites = 0;
135 uint64_t NoProfileCallsites = 0;
136 uint64_t NumFallbacks = 0;
137 uint64_t RecursiveCallsites = 0;
138 for (auto &It : BC.getBinaryFunctions()) {
139 BinaryFunction *Function = &It.second;
141 if (Filter(*Function))
142 continue;
144 const CallGraph::NodeId SrcId = lookupNode(Function);
145 // Offset of the current basic block from the beginning of the function
146 uint64_t Offset = 0;
148 auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
149 BinaryFunction *DstFunc =
150 DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
151 if (!DstFunc) {
152 LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
153 << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
154 return false;
157 if (DstFunc == Function) {
158 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
159 << *DstFunc << "\n");
160 ++RecursiveCallsites;
161 if (IgnoreRecursiveCalls)
162 return false;
164 if (Filter(*DstFunc)) {
165 LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
166 << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
167 << '\n');
168 return false;
171 const CallGraph::NodeId DstId = lookupNode(DstFunc);
172 const bool IsValidCount = Count != COUNT_NO_PROFILE;
173 const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
174 if (!IsValidCount)
175 ++NoProfileCallsites;
176 Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
177 LLVM_DEBUG(if (opts::Verbosity > 1) {
178 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
179 << *DstFunc << " @ " << Offset << "\n";
181 return true;
184 // Pairs of (symbol, count) for each target at this callsite.
185 using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
186 using CallInfoTy = std::vector<TargetDesc>;
188 // Get pairs of (symbol, count) for each target at this callsite.
189 // If the call is to an unknown function the symbol will be nullptr.
190 // If there is no profiling data the count will be COUNT_NO_PROFILE.
191 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
192 CallInfoTy Counts;
193 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
195 // If this is an indirect call use perf data directly.
196 if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
197 const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
198 Inst, "CallProfile");
199 for (const IndirectCallProfile &CSI : ICSP)
200 if (CSI.Symbol)
201 Counts.emplace_back(CSI.Symbol, CSI.Count);
202 } else {
203 const uint64_t Count = BB->getExecutionCount();
204 Counts.emplace_back(DstSym, Count);
207 return Counts;
210 // If the function has an invalid profile, try to use the perf data
211 // directly (if requested). If there is no perf data for this function,
212 // fall back to the CFG walker which attempts to handle missing data.
213 if (!Function->hasValidProfile() && CgFromPerfData &&
214 !Function->getAllCallSites().empty()) {
215 LLVM_DEBUG(
216 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
217 << " for " << *Function << "\n");
218 ++NumFallbacks;
219 const size_t Size = functionSize(Function);
220 for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
221 ++TotalCallsites;
223 if (!CSI.Symbol)
224 continue;
226 // The computed offset may exceed the hot part of the function; hence,
227 // bound it by the size.
228 Offset = CSI.Offset;
229 if (Offset > Size)
230 Offset = Size;
232 if (!recordCall(CSI.Symbol, CSI.Count))
233 ++NotProcessed;
235 } else {
236 for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
237 // Don't count calls from split blocks unless requested.
238 if (BB->isSplit() && !IncludeSplitCalls)
239 continue;
241 // Determine whether the block is included in Function's (hot) size
242 // See BinaryFunction::estimateHotSize
243 bool BBIncludedInFunctionSize = false;
244 if (UseFunctionHotSize && Function->isSplit()) {
245 if (UseSplitHotSize)
246 BBIncludedInFunctionSize = !BB->isSplit();
247 else
248 BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
249 } else {
250 BBIncludedInFunctionSize = true;
253 for (MCInst &Inst : *BB) {
254 // Find call instructions and extract target symbols from each one.
255 if (BC.MIB->isCall(Inst)) {
256 const CallInfoTy CallInfo = getCallInfo(BB, Inst);
258 if (!CallInfo.empty()) {
259 for (const TargetDesc &CI : CallInfo) {
260 ++TotalCallsites;
261 if (!recordCall(CI.first, CI.second))
262 ++NotProcessed;
264 } else {
265 ++TotalCallsites;
266 ++NotProcessed;
269 // Increase Offset if needed
270 if (BBIncludedInFunctionSize)
271 Offset += BC.computeCodeSize(&Inst, &Inst + 1);
277 #ifndef NDEBUG
278 bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
279 #else
280 bool PrintInfo = false;
281 #endif
282 if (PrintInfo || opts::Verbosity > 0)
283 BC.outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
284 "(%u recursive), density = %.6lf, %u callsites not "
285 "processed, %u callsites with invalid profile, "
286 "used perf data for %u stale functions.\n",
287 Cg.numNodes(), TotalCallsites, RecursiveCallsites,
288 Cg.density(), NotProcessed, NoProfileCallsites,
289 NumFallbacks);
291 if (opts::DumpCGDot.getNumOccurrences()) {
292 Cg.printDot(opts::DumpCGDot, [&](CallGraph::NodeId Id) {
293 return Cg.nodeIdToFunc(Id)->getPrintName();
297 return Cg;
300 } // namespace bolt
301 } // namespace llvm