1 //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BinaryFunctionCallGraph class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
20 #define DEBUG_TYPE "callgraph"
26 extern cl::opt
<bool> TimeOpts
;
27 extern cl::opt
<unsigned> Verbosity
;
28 extern cl::OptionCategory BoltCategory
;
30 static cl::opt
<std::string
>
31 DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32 cl::cat(BoltCategory
));
39 CallGraph::NodeId
BinaryFunctionCallGraph::addNode(BinaryFunction
*BF
,
42 NodeId Id
= CallGraph::addNode(Size
, Samples
);
43 assert(size_t(Id
) == Funcs
.size());
45 FuncToNodeId
[BF
] = Id
;
46 assert(Funcs
[Id
] == BF
);
50 std::deque
<BinaryFunction
*> BinaryFunctionCallGraph::buildTraversalOrder() {
51 NamedRegionTimer
T1("buildcgorder", "Build cg traversal order",
52 "CG breakdown", "CG breakdown", opts::TimeOpts
);
53 std::deque
<BinaryFunction
*> TopologicalOrder
;
54 enum NodeStatus
{ NEW
, VISITING
, VISITED
};
55 std::vector
<NodeStatus
> NodeStatus(Funcs
.size());
56 std::stack
<NodeId
> Worklist
;
58 for (BinaryFunction
*Func
: Funcs
) {
59 const NodeId Id
= FuncToNodeId
.at(Func
);
64 while (!Worklist
.empty()) {
65 const NodeId FuncId
= Worklist
.top();
68 if (NodeStatus
[FuncId
] == VISITED
)
71 if (NodeStatus
[FuncId
] == VISITING
) {
72 TopologicalOrder
.push_back(Funcs
[FuncId
]);
73 NodeStatus
[FuncId
] = VISITED
;
77 assert(NodeStatus
[FuncId
] == NEW
);
78 NodeStatus
[FuncId
] = VISITING
;
79 Worklist
.push(FuncId
);
80 for (const NodeId Callee
: successors(FuncId
)) {
81 if (NodeStatus
[Callee
] == VISITING
|| NodeStatus
[Callee
] == VISITED
)
83 Worklist
.push(Callee
);
87 return TopologicalOrder
;
90 BinaryFunctionCallGraph
91 buildCallGraph(BinaryContext
&BC
, CgFilterFunction Filter
, bool CgFromPerfData
,
92 bool IncludeSplitCalls
, bool UseFunctionHotSize
,
93 bool UseSplitHotSize
, bool UseEdgeCounts
,
94 bool IgnoreRecursiveCalls
) {
95 NamedRegionTimer
T1("buildcg", "Callgraph construction", "CG breakdown",
96 "CG breakdown", opts::TimeOpts
);
97 BinaryFunctionCallGraph Cg
;
98 static constexpr uint64_t COUNT_NO_PROFILE
=
99 BinaryBasicBlock::COUNT_NO_PROFILE
;
101 // Compute function size
102 auto functionSize
= [&](const BinaryFunction
*Function
) {
103 return UseFunctionHotSize
&& Function
->isSplit()
104 ? Function
->estimateHotSize(UseSplitHotSize
)
105 : Function
->estimateSize();
108 // Add call graph nodes.
109 auto lookupNode
= [&](BinaryFunction
*Function
) {
110 const CallGraph::NodeId Id
= Cg
.maybeGetNodeId(Function
);
111 if (Id
== CallGraph::InvalidId
) {
112 // It's ok to use the hot size here when the function is split. This is
113 // because emitFunctions will emit the hot part first in the order that is
114 // computed by ReorderFunctions. The cold part will be emitted with the
115 // rest of the cold functions and code.
116 const size_t Size
= functionSize(Function
);
117 // NOTE: for functions without a profile, we set the number of samples
118 // to zero. This will keep these functions from appearing in the hot
119 // section. This is a little weird because we wouldn't be trying to
120 // create a node for a function unless it was the target of a call from
121 // a hot block. The alternative would be to set the count to one or
122 // accumulate the number of calls from the callsite into the function
123 // samples. Results from perfomance testing seem to favor the zero
124 // count though, so I'm leaving it this way for now.
125 return Cg
.addNode(Function
, Size
, Function
->getKnownExecutionCount());
130 // Add call graph edges.
131 uint64_t NotProcessed
= 0;
132 uint64_t TotalCallsites
= 0;
133 uint64_t NoProfileCallsites
= 0;
134 uint64_t NumFallbacks
= 0;
135 uint64_t RecursiveCallsites
= 0;
136 for (auto &It
: BC
.getBinaryFunctions()) {
137 BinaryFunction
*Function
= &It
.second
;
139 if (Filter(*Function
))
142 const CallGraph::NodeId SrcId
= lookupNode(Function
);
143 // Offset of the current basic block from the beginning of the function
146 auto recordCall
= [&](const MCSymbol
*DestSymbol
, const uint64_t Count
) {
147 BinaryFunction
*DstFunc
=
148 DestSymbol
? BC
.getFunctionForSymbol(DestSymbol
) : nullptr;
150 LLVM_DEBUG(if (opts::Verbosity
> 1) dbgs()
151 << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
155 if (DstFunc
== Function
) {
156 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
157 << *DstFunc
<< "\n");
158 ++RecursiveCallsites
;
159 if (IgnoreRecursiveCalls
)
162 if (Filter(*DstFunc
)) {
163 LLVM_DEBUG(if (opts::Verbosity
> 1) dbgs()
164 << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
169 const CallGraph::NodeId DstId
= lookupNode(DstFunc
);
170 const bool IsValidCount
= Count
!= COUNT_NO_PROFILE
;
171 const uint64_t AdjCount
= UseEdgeCounts
&& IsValidCount
? Count
: 1;
173 ++NoProfileCallsites
;
174 Cg
.incArcWeight(SrcId
, DstId
, AdjCount
, Offset
);
175 LLVM_DEBUG(if (opts::Verbosity
> 1) {
176 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
<< " -> "
177 << *DstFunc
<< " @ " << Offset
<< "\n";
182 // Pairs of (symbol, count) for each target at this callsite.
183 using TargetDesc
= std::pair
<const MCSymbol
*, uint64_t>;
184 using CallInfoTy
= std::vector
<TargetDesc
>;
186 // Get pairs of (symbol, count) for each target at this callsite.
187 // If the call is to an unknown function the symbol will be nullptr.
188 // If there is no profiling data the count will be COUNT_NO_PROFILE.
189 auto getCallInfo
= [&](const BinaryBasicBlock
*BB
, const MCInst
&Inst
) {
191 const MCSymbol
*DstSym
= BC
.MIB
->getTargetSymbol(Inst
);
193 // If this is an indirect call use perf data directly.
194 if (!DstSym
&& BC
.MIB
->hasAnnotation(Inst
, "CallProfile")) {
195 const auto &ICSP
= BC
.MIB
->getAnnotationAs
<IndirectCallSiteProfile
>(
196 Inst
, "CallProfile");
197 for (const IndirectCallProfile
&CSI
: ICSP
)
199 Counts
.emplace_back(CSI
.Symbol
, CSI
.Count
);
201 const uint64_t Count
= BB
->getExecutionCount();
202 Counts
.emplace_back(DstSym
, Count
);
208 // If the function has an invalid profile, try to use the perf data
209 // directly (if requested). If there is no perf data for this function,
210 // fall back to the CFG walker which attempts to handle missing data.
211 if (!Function
->hasValidProfile() && CgFromPerfData
&&
212 !Function
->getAllCallSites().empty()) {
214 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
215 << " for " << *Function
<< "\n");
217 const size_t Size
= functionSize(Function
);
218 for (const IndirectCallProfile
&CSI
: Function
->getAllCallSites()) {
224 // The computed offset may exceed the hot part of the function; hence,
225 // bound it by the size.
230 if (!recordCall(CSI
.Symbol
, CSI
.Count
))
234 for (BinaryBasicBlock
*BB
: Function
->getLayout().blocks()) {
235 // Don't count calls from split blocks unless requested.
236 if (BB
->isSplit() && !IncludeSplitCalls
)
239 // Determine whether the block is included in Function's (hot) size
240 // See BinaryFunction::estimateHotSize
241 bool BBIncludedInFunctionSize
= false;
242 if (UseFunctionHotSize
&& Function
->isSplit()) {
244 BBIncludedInFunctionSize
= !BB
->isSplit();
246 BBIncludedInFunctionSize
= BB
->getKnownExecutionCount() != 0;
248 BBIncludedInFunctionSize
= true;
251 for (MCInst
&Inst
: *BB
) {
252 // Find call instructions and extract target symbols from each one.
253 if (BC
.MIB
->isCall(Inst
)) {
254 const CallInfoTy CallInfo
= getCallInfo(BB
, Inst
);
256 if (!CallInfo
.empty()) {
257 for (const TargetDesc
&CI
: CallInfo
) {
259 if (!recordCall(CI
.first
, CI
.second
))
267 // Increase Offset if needed
268 if (BBIncludedInFunctionSize
)
269 Offset
+= BC
.computeCodeSize(&Inst
, &Inst
+ 1);
276 bool PrintInfo
= DebugFlag
&& isCurrentDebugType("callgraph");
278 bool PrintInfo
= false;
280 if (PrintInfo
|| opts::Verbosity
> 0)
281 outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
282 "(%u recursive), density = %.6lf, %u callsites not "
283 "processed, %u callsites with invalid profile, "
284 "used perf data for %u stale functions.\n",
285 Cg
.numNodes(), TotalCallsites
, RecursiveCallsites
,
286 Cg
.density(), NotProcessed
, NoProfileCallsites
,
289 if (opts::DumpCGDot
.getNumOccurrences()) {
290 Cg
.printDot(opts::DumpCGDot
, [&](CallGraph::NodeId Id
) {
291 return Cg
.nodeIdToFunc(Id
)->getPrintName();