1 //===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BinaryFunctionCallGraph class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
20 #define DEBUG_TYPE "callgraph"
26 extern cl::opt
<bool> TimeOpts
;
27 extern cl::opt
<unsigned> Verbosity
;
28 extern cl::OptionCategory BoltCategory
;
30 static cl::opt
<std::string
>
31 DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32 cl::cat(BoltCategory
));
39 CallGraph::NodeId
BinaryFunctionCallGraph::addNode(BinaryFunction
*BF
,
42 NodeId Id
= CallGraph::addNode(Size
, Samples
);
43 assert(size_t(Id
) == Funcs
.size());
45 FuncToNodeId
[BF
] = Id
;
46 assert(Funcs
[Id
] == BF
);
50 std::deque
<BinaryFunction
*> BinaryFunctionCallGraph::buildTraversalOrder() {
51 NamedRegionTimer
T1("buildcgorder", "Build cg traversal order",
52 "CG breakdown", "CG breakdown", opts::TimeOpts
);
53 std::deque
<BinaryFunction
*> TopologicalOrder
;
54 enum NodeStatus
{ NEW
, VISITING
, VISITED
};
55 std::vector
<NodeStatus
> NodeStatus(Funcs
.size());
56 std::stack
<NodeId
> Worklist
;
58 for (BinaryFunction
*Func
: Funcs
) {
59 auto It
= FuncToNodeId
.find(Func
);
60 assert(It
!= FuncToNodeId
.end());
61 const NodeId Id
= It
->second
;
66 while (!Worklist
.empty()) {
67 const NodeId FuncId
= Worklist
.top();
70 if (NodeStatus
[FuncId
] == VISITED
)
73 if (NodeStatus
[FuncId
] == VISITING
) {
74 TopologicalOrder
.push_back(Funcs
[FuncId
]);
75 NodeStatus
[FuncId
] = VISITED
;
79 assert(NodeStatus
[FuncId
] == NEW
);
80 NodeStatus
[FuncId
] = VISITING
;
81 Worklist
.push(FuncId
);
82 for (const NodeId Callee
: successors(FuncId
)) {
83 if (NodeStatus
[Callee
] == VISITING
|| NodeStatus
[Callee
] == VISITED
)
85 Worklist
.push(Callee
);
89 return TopologicalOrder
;
92 BinaryFunctionCallGraph
93 buildCallGraph(BinaryContext
&BC
, CgFilterFunction Filter
, bool CgFromPerfData
,
94 bool IncludeSplitCalls
, bool UseFunctionHotSize
,
95 bool UseSplitHotSize
, bool UseEdgeCounts
,
96 bool IgnoreRecursiveCalls
) {
97 NamedRegionTimer
T1("buildcg", "Callgraph construction", "CG breakdown",
98 "CG breakdown", opts::TimeOpts
);
99 BinaryFunctionCallGraph Cg
;
100 static constexpr uint64_t COUNT_NO_PROFILE
=
101 BinaryBasicBlock::COUNT_NO_PROFILE
;
103 // Compute function size
104 auto functionSize
= [&](const BinaryFunction
*Function
) {
105 return UseFunctionHotSize
&& Function
->isSplit()
106 ? Function
->estimateHotSize(UseSplitHotSize
)
107 : Function
->estimateSize();
110 // Add call graph nodes.
111 auto lookupNode
= [&](BinaryFunction
*Function
) {
112 const CallGraph::NodeId Id
= Cg
.maybeGetNodeId(Function
);
113 if (Id
== CallGraph::InvalidId
) {
114 // It's ok to use the hot size here when the function is split. This is
115 // because emitFunctions will emit the hot part first in the order that is
116 // computed by ReorderFunctions. The cold part will be emitted with the
117 // rest of the cold functions and code.
118 const size_t Size
= functionSize(Function
);
119 // NOTE: for functions without a profile, we set the number of samples
120 // to zero. This will keep these functions from appearing in the hot
121 // section. This is a little weird because we wouldn't be trying to
122 // create a node for a function unless it was the target of a call from
123 // a hot block. The alternative would be to set the count to one or
124 // accumulate the number of calls from the callsite into the function
125 // samples. Results from perfomance testing seem to favor the zero
126 // count though, so I'm leaving it this way for now.
127 return Cg
.addNode(Function
, Size
, Function
->getKnownExecutionCount());
132 // Add call graph edges.
133 uint64_t NotProcessed
= 0;
134 uint64_t TotalCallsites
= 0;
135 uint64_t NoProfileCallsites
= 0;
136 uint64_t NumFallbacks
= 0;
137 uint64_t RecursiveCallsites
= 0;
138 for (auto &It
: BC
.getBinaryFunctions()) {
139 BinaryFunction
*Function
= &It
.second
;
141 if (Filter(*Function
))
144 const CallGraph::NodeId SrcId
= lookupNode(Function
);
145 // Offset of the current basic block from the beginning of the function
148 auto recordCall
= [&](const MCSymbol
*DestSymbol
, const uint64_t Count
) {
149 BinaryFunction
*DstFunc
=
150 DestSymbol
? BC
.getFunctionForSymbol(DestSymbol
) : nullptr;
152 LLVM_DEBUG(if (opts::Verbosity
> 1) dbgs()
153 << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
157 if (DstFunc
== Function
) {
158 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
159 << *DstFunc
<< "\n");
160 ++RecursiveCallsites
;
161 if (IgnoreRecursiveCalls
)
164 if (Filter(*DstFunc
)) {
165 LLVM_DEBUG(if (opts::Verbosity
> 1) dbgs()
166 << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
171 const CallGraph::NodeId DstId
= lookupNode(DstFunc
);
172 const bool IsValidCount
= Count
!= COUNT_NO_PROFILE
;
173 const uint64_t AdjCount
= UseEdgeCounts
&& IsValidCount
? Count
: 1;
175 ++NoProfileCallsites
;
176 Cg
.incArcWeight(SrcId
, DstId
, AdjCount
, Offset
);
177 LLVM_DEBUG(if (opts::Verbosity
> 1) {
178 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
<< " -> "
179 << *DstFunc
<< " @ " << Offset
<< "\n";
184 // Pairs of (symbol, count) for each target at this callsite.
185 using TargetDesc
= std::pair
<const MCSymbol
*, uint64_t>;
186 using CallInfoTy
= std::vector
<TargetDesc
>;
188 // Get pairs of (symbol, count) for each target at this callsite.
189 // If the call is to an unknown function the symbol will be nullptr.
190 // If there is no profiling data the count will be COUNT_NO_PROFILE.
191 auto getCallInfo
= [&](const BinaryBasicBlock
*BB
, const MCInst
&Inst
) {
193 const MCSymbol
*DstSym
= BC
.MIB
->getTargetSymbol(Inst
);
195 // If this is an indirect call use perf data directly.
196 if (!DstSym
&& BC
.MIB
->hasAnnotation(Inst
, "CallProfile")) {
197 const auto &ICSP
= BC
.MIB
->getAnnotationAs
<IndirectCallSiteProfile
>(
198 Inst
, "CallProfile");
199 for (const IndirectCallProfile
&CSI
: ICSP
)
201 Counts
.emplace_back(CSI
.Symbol
, CSI
.Count
);
203 const uint64_t Count
= BB
->getExecutionCount();
204 Counts
.emplace_back(DstSym
, Count
);
210 // If the function has an invalid profile, try to use the perf data
211 // directly (if requested). If there is no perf data for this function,
212 // fall back to the CFG walker which attempts to handle missing data.
213 if (!Function
->hasValidProfile() && CgFromPerfData
&&
214 !Function
->getAllCallSites().empty()) {
216 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
217 << " for " << *Function
<< "\n");
219 const size_t Size
= functionSize(Function
);
220 for (const IndirectCallProfile
&CSI
: Function
->getAllCallSites()) {
226 // The computed offset may exceed the hot part of the function; hence,
227 // bound it by the size.
232 if (!recordCall(CSI
.Symbol
, CSI
.Count
))
236 for (BinaryBasicBlock
*BB
: Function
->getLayout().blocks()) {
237 // Don't count calls from split blocks unless requested.
238 if (BB
->isSplit() && !IncludeSplitCalls
)
241 // Determine whether the block is included in Function's (hot) size
242 // See BinaryFunction::estimateHotSize
243 bool BBIncludedInFunctionSize
= false;
244 if (UseFunctionHotSize
&& Function
->isSplit()) {
246 BBIncludedInFunctionSize
= !BB
->isSplit();
248 BBIncludedInFunctionSize
= BB
->getKnownExecutionCount() != 0;
250 BBIncludedInFunctionSize
= true;
253 for (MCInst
&Inst
: *BB
) {
254 // Find call instructions and extract target symbols from each one.
255 if (BC
.MIB
->isCall(Inst
)) {
256 const CallInfoTy CallInfo
= getCallInfo(BB
, Inst
);
258 if (!CallInfo
.empty()) {
259 for (const TargetDesc
&CI
: CallInfo
) {
261 if (!recordCall(CI
.first
, CI
.second
))
269 // Increase Offset if needed
270 if (BBIncludedInFunctionSize
)
271 Offset
+= BC
.computeCodeSize(&Inst
, &Inst
+ 1);
278 bool PrintInfo
= DebugFlag
&& isCurrentDebugType("callgraph");
280 bool PrintInfo
= false;
282 if (PrintInfo
|| opts::Verbosity
> 0)
283 BC
.outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
284 "(%u recursive), density = %.6lf, %u callsites not "
285 "processed, %u callsites with invalid profile, "
286 "used perf data for %u stale functions.\n",
287 Cg
.numNodes(), TotalCallsites
, RecursiveCallsites
,
288 Cg
.density(), NotProcessed
, NoProfileCallsites
,
291 if (opts::DumpCGDot
.getNumOccurrences()) {
292 Cg
.printDot(opts::DumpCGDot
, [&](CallGraph::NodeId Id
) {
293 return Cg
.nodeIdToFunc(Id
)->getPrintName();