1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
21 #define DEBUG_TYPE "bolt-prof"
28 extern cl::OptionCategory BoltOptCategory
;
30 cl::opt
<IndirectCallPromotionType
> ICP(
31 "indirect-call-promotion", cl::init(ICP_NONE
),
32 cl::desc("indirect call promotion"),
34 clEnumValN(ICP_NONE
, "none", "do not perform indirect call promotion"),
35 clEnumValN(ICP_CALLS
, "calls", "perform ICP on indirect calls"),
36 clEnumValN(ICP_JUMP_TABLES
, "jump-tables",
37 "perform ICP on jump tables"),
38 clEnumValN(ICP_ALL
, "all", "perform ICP on calls and jump tables")),
39 cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
41 static cl::alias
ICPAlias("icp",
42 cl::desc("Alias for --indirect-call-promotion"),
45 extern cl::opt
<JumpTableSupportLevel
> JumpTables
;
47 static cl::opt
<bool> FixFuncCounts(
49 cl::desc("adjust function counts based on basic blocks execution count"),
50 cl::Hidden
, cl::cat(BoltOptCategory
));
52 static cl::opt
<bool> FixBlockCounts(
54 cl::desc("adjust block counts based on outgoing branch counts"),
55 cl::init(true), cl::Hidden
, cl::cat(BoltOptCategory
));
58 InferFallThroughs("infer-fall-throughs",
59 cl::desc("infer execution count for fall-through blocks"),
60 cl::Hidden
, cl::cat(BoltOptCategory
));
67 void BinaryFunction::postProcessProfile() {
68 if (!hasValidProfile()) {
73 if (!(getProfileFlags() & PF_LBR
))
76 // If we have at least some branch data for the function indicate that it
78 if (opts::FixFuncCounts
&& ExecutionCount
== 0)
81 // Compute preliminary execution count for each basic block.
82 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
83 if ((!BB
->isEntryPoint() && !BB
->isLandingPad()) ||
84 BB
->ExecutionCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
85 BB
->ExecutionCount
= 0;
87 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
88 auto SuccBIIter
= BB
->branch_info_begin();
89 for (BinaryBasicBlock
*Succ
: BB
->successors()) {
90 // All incoming edges to the primary entry have been accounted for, thus
91 // we skip the update here.
92 if (SuccBIIter
->Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
&&
93 Succ
!= BasicBlocks
.front())
94 Succ
->setExecutionCount(Succ
->getExecutionCount() + SuccBIIter
->Count
);
99 // Fix for old profiles.
100 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
101 if (BB
->size() != 1 || BB
->succ_size() != 1)
104 if (BB
->getKnownExecutionCount() == 0)
107 MCInst
*Instr
= BB
->getFirstNonPseudoInstr();
108 assert(Instr
&& "expected non-pseudo instr");
109 if (!BC
.MIB
->hasAnnotation(*Instr
, "NOP"))
112 BinaryBasicBlock
*FTSuccessor
= BB
->getSuccessor();
113 BinaryBasicBlock::BinaryBranchInfo
&BI
= BB
->getBranchInfo(*FTSuccessor
);
115 BI
.Count
= BB
->getKnownExecutionCount();
116 FTSuccessor
->setExecutionCount(FTSuccessor
->getKnownExecutionCount() +
121 if (opts::FixBlockCounts
) {
122 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
123 // Make sure that execution count of a block is at least the branch count
124 // of an incoming/outgoing jump.
125 auto SuccBIIter
= BB
->branch_info_begin();
126 for (BinaryBasicBlock
*Succ
: BB
->successors()) {
127 uint64_t Count
= SuccBIIter
->Count
;
128 if (Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
&& Count
> 0) {
129 Succ
->setExecutionCount(std::max(Succ
->getExecutionCount(), Count
));
130 BB
->setExecutionCount(std::max(BB
->getExecutionCount(), Count
));
134 // Make sure that execution count of a block is at least the number of
135 // function calls from the block.
136 for (MCInst
&Inst
: *BB
) {
137 // Ignore non-call instruction
138 if (!BC
.MIB
->isCall(Inst
))
141 auto CountAnnt
= BC
.MIB
->tryGetAnnotationAs
<uint64_t>(Inst
, "Count");
143 BB
->setExecutionCount(std::max(BB
->getExecutionCount(), *CountAnnt
));
148 if (opts::InferFallThroughs
)
149 inferFallThroughCounts();
151 // Update profile information for jump tables based on CFG branch data.
152 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
153 const MCInst
*LastInstr
= BB
->getLastNonPseudoInstr();
156 const uint64_t JTAddress
= BC
.MIB
->getJumpTable(*LastInstr
);
159 JumpTable
*JT
= getJumpTableContainingAddress(JTAddress
);
163 uint64_t TotalBranchCount
= 0;
164 for (const BinaryBasicBlock::BinaryBranchInfo
&BranchInfo
:
166 TotalBranchCount
+= BranchInfo
.Count
;
168 JT
->Count
+= TotalBranchCount
;
170 if (opts::ICP
< ICP_JUMP_TABLES
&& opts::JumpTables
< JTS_AGGRESSIVE
)
173 if (JT
->Counts
.empty())
174 JT
->Counts
.resize(JT
->Entries
.size());
175 auto EI
= JT
->Entries
.begin();
176 uint64_t Delta
= (JTAddress
- JT
->getAddress()) / JT
->EntrySize
;
178 while (EI
!= JT
->Entries
.end()) {
179 const BinaryBasicBlock
*TargetBB
= getBasicBlockForLabel(*EI
);
181 const BinaryBasicBlock::BinaryBranchInfo
&BranchInfo
=
182 BB
->getBranchInfo(*TargetBB
);
183 assert(Delta
< JT
->Counts
.size());
184 JT
->Counts
[Delta
].Count
+= BranchInfo
.Count
;
185 JT
->Counts
[Delta
].Mispreds
+= BranchInfo
.MispredictedCount
;
189 // A label marks the start of another jump table.
190 if (JT
->Labels
.count(Delta
* JT
->EntrySize
))
196 void BinaryFunction::mergeProfileDataInto(BinaryFunction
&BF
) const {
197 // No reason to merge invalid or empty profiles into BF.
198 if (!hasValidProfile())
201 // Update function execution count.
202 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE
)
203 BF
.setExecutionCount(BF
.getKnownExecutionCount() + getExecutionCount());
205 // Since we are merging a valid profile, the new profile should be valid too.
206 // It has either already been valid, or it has been cleaned up.
207 BF
.ProfileMatchRatio
= 1.0f
;
209 // Update basic block and edge counts.
210 auto BBMergeI
= BF
.begin();
211 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
212 BinaryBasicBlock
*BBMerge
= &*BBMergeI
;
213 assert(getIndex(BB
) == BF
.getIndex(BBMerge
));
215 // Update basic block count.
216 if (BB
->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE
) {
217 BBMerge
->setExecutionCount(BBMerge
->getKnownExecutionCount() +
218 BB
->getExecutionCount());
221 // Update edge count for successors of this basic block.
222 auto BBMergeSI
= BBMerge
->succ_begin();
223 auto BIMergeI
= BBMerge
->branch_info_begin();
224 auto BII
= BB
->branch_info_begin();
225 for (const BinaryBasicBlock
*BBSucc
: BB
->successors()) {
227 assert(getIndex(BBSucc
) == BF
.getIndex(*BBMergeSI
));
230 // At this point no branch count should be set to COUNT_NO_PROFILE.
231 assert(BII
->Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
&&
232 "unexpected unknown branch profile");
233 assert(BIMergeI
->Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
&&
234 "unexpected unknown branch profile");
236 BIMergeI
->Count
+= BII
->Count
;
238 // When we merge inferred and real fall-through branch data, the merged
239 // data is considered inferred.
240 if (BII
->MispredictedCount
!= BinaryBasicBlock::COUNT_INFERRED
&&
241 BIMergeI
->MispredictedCount
!= BinaryBasicBlock::COUNT_INFERRED
) {
242 BIMergeI
->MispredictedCount
+= BII
->MispredictedCount
;
244 BIMergeI
->MispredictedCount
= BinaryBasicBlock::COUNT_INFERRED
;
251 assert(BBMergeSI
== BBMerge
->succ_end());
255 assert(BBMergeI
== BF
.end());
257 // Merge jump tables profile info.
258 auto JTMergeI
= BF
.JumpTables
.begin();
259 for (const auto &JTEntry
: JumpTables
) {
260 if (JTMergeI
->second
->Counts
.empty())
261 JTMergeI
->second
->Counts
.resize(JTEntry
.second
->Counts
.size());
262 auto CountMergeI
= JTMergeI
->second
->Counts
.begin();
263 for (const JumpTable::JumpInfo
&JI
: JTEntry
.second
->Counts
) {
264 CountMergeI
->Count
+= JI
.Count
;
265 CountMergeI
->Mispreds
+= JI
.Mispreds
;
268 assert(CountMergeI
== JTMergeI
->second
->Counts
.end());
272 assert(JTMergeI
== BF
.JumpTables
.end());
275 void BinaryFunction::inferFallThroughCounts() {
276 // Work on a basic block at a time, propagating frequency information
278 // It is important to walk in the layout order.
279 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
280 const uint64_t BBExecCount
= BB
->getExecutionCount();
282 // Propagate this information to successors, filling in fall-through edges
283 // with frequency information
284 if (BB
->succ_size() == 0)
287 // Calculate frequency of outgoing branches from this node according to
289 uint64_t ReportedBranches
= 0;
290 for (const BinaryBasicBlock::BinaryBranchInfo
&SuccBI
: BB
->branch_info())
291 if (SuccBI
.Count
!= BinaryBasicBlock::COUNT_NO_PROFILE
)
292 ReportedBranches
+= SuccBI
.Count
;
294 // Get taken count of conditional tail call if the block ends with one.
295 uint64_t CTCTakenCount
= 0;
296 const MCInst
*CTCInstr
= BB
->getLastNonPseudoInstr();
297 if (CTCInstr
&& BC
.MIB
->getConditionalTailCall(*CTCInstr
)) {
298 CTCTakenCount
= BC
.MIB
->getAnnotationWithDefault
<uint64_t>(
299 *CTCInstr
, "CTCTakenCount");
302 // Calculate frequency of throws from this node according to LBR data
303 // for branching into associated landing pads. Since it is possible
304 // for a landing pad to be associated with more than one basic blocks,
305 // we may overestimate the frequency of throws for such blocks.
306 uint64_t ReportedThrows
= 0;
307 for (const BinaryBasicBlock
*LP
: BB
->landing_pads())
308 ReportedThrows
+= LP
->getExecutionCount();
310 const uint64_t TotalReportedJumps
=
311 ReportedBranches
+ CTCTakenCount
+ ReportedThrows
;
313 // Infer the frequency of the fall-through edge, representing not taking the
315 uint64_t Inferred
= 0;
316 if (BBExecCount
> TotalReportedJumps
)
317 Inferred
= BBExecCount
- TotalReportedJumps
;
320 if (BBExecCount
< TotalReportedJumps
) dbgs()
321 << "Fall-through inference is slightly inconsistent. "
322 "exec frequency is less than the outgoing edges frequency ("
323 << BBExecCount
<< " < " << ReportedBranches
324 << ") for BB at offset 0x"
325 << Twine::utohexstr(getAddress() + BB
->getOffset()) << '\n';);
327 if (BB
->succ_size() <= 2) {
328 // Skip if the last instruction is an unconditional jump.
329 const MCInst
*LastInstr
= BB
->getLastNonPseudoInstr();
330 if (LastInstr
&& (BC
.MIB
->isUnconditionalBranch(*LastInstr
) ||
331 BC
.MIB
->isIndirectBranch(*LastInstr
)))
333 // If there is an FT it will be the last successor.
334 auto &SuccBI
= *BB
->branch_info_rbegin();
335 auto &Succ
= *BB
->succ_rbegin();
336 if (SuccBI
.Count
== 0) {
337 SuccBI
.Count
= Inferred
;
338 SuccBI
.MispredictedCount
= BinaryBasicBlock::COUNT_INFERRED
;
339 Succ
->ExecutionCount
=
340 std::max(Succ
->getKnownExecutionCount(), Inferred
);
346 void BinaryFunction::clearProfile() {
347 // Keep function execution profile the same. Only clear basic block and edge
349 for (BinaryBasicBlock
*BB
: BasicBlocks
) {
350 BB
->ExecutionCount
= 0;
351 for (BinaryBasicBlock::BinaryBranchInfo
&BI
: BB
->branch_info()) {
353 BI
.MispredictedCount
= 0;