Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Core / BinaryFunctionProfile.cpp
blob0d705cd82f5df6c1f23ad29184954e8a102a74c2
1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
20 #undef DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
23 using namespace llvm;
24 using namespace bolt;
26 namespace opts {
28 extern cl::OptionCategory BoltOptCategory;
30 cl::opt<IndirectCallPromotionType> ICP(
31 "indirect-call-promotion", cl::init(ICP_NONE),
32 cl::desc("indirect call promotion"),
33 cl::values(
34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36 clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37 "perform ICP on jump tables"),
38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39 cl::ZeroOrMore, cl::cat(BoltOptCategory));
41 static cl::alias ICPAlias("icp",
42 cl::desc("Alias for --indirect-call-promotion"),
43 cl::aliasopt(ICP));
45 extern cl::opt<JumpTableSupportLevel> JumpTables;
47 static cl::opt<bool> FixFuncCounts(
48 "fix-func-counts",
49 cl::desc("adjust function counts based on basic blocks execution count"),
50 cl::Hidden, cl::cat(BoltOptCategory));
52 static cl::opt<bool> FixBlockCounts(
53 "fix-block-counts",
54 cl::desc("adjust block counts based on outgoing branch counts"),
55 cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));
57 static cl::opt<bool>
58 InferFallThroughs("infer-fall-throughs",
59 cl::desc("infer execution count for fall-through blocks"),
60 cl::Hidden, cl::cat(BoltOptCategory));
62 } // namespace opts
64 namespace llvm {
65 namespace bolt {
67 void BinaryFunction::postProcessProfile() {
68 if (!hasValidProfile()) {
69 clearProfile();
70 return;
73 if (!(getProfileFlags() & PF_LBR))
74 return;
76 // If we have at least some branch data for the function indicate that it
77 // was executed.
78 if (opts::FixFuncCounts && ExecutionCount == 0)
79 ExecutionCount = 1;
81 // Compute preliminary execution count for each basic block.
82 for (BinaryBasicBlock *BB : BasicBlocks) {
83 if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
84 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
85 BB->ExecutionCount = 0;
87 for (BinaryBasicBlock *BB : BasicBlocks) {
88 auto SuccBIIter = BB->branch_info_begin();
89 for (BinaryBasicBlock *Succ : BB->successors()) {
90 // All incoming edges to the primary entry have been accounted for, thus
91 // we skip the update here.
92 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
93 Succ != BasicBlocks.front())
94 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
95 ++SuccBIIter;
99 // Fix for old profiles.
100 for (BinaryBasicBlock *BB : BasicBlocks) {
101 if (BB->size() != 1 || BB->succ_size() != 1)
102 continue;
104 if (BB->getKnownExecutionCount() == 0)
105 continue;
107 MCInst *Instr = BB->getFirstNonPseudoInstr();
108 assert(Instr && "expected non-pseudo instr");
109 if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
110 continue;
112 BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
113 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
114 if (!BI.Count) {
115 BI.Count = BB->getKnownExecutionCount();
116 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
117 BI.Count);
121 if (opts::FixBlockCounts) {
122 for (BinaryBasicBlock *BB : BasicBlocks) {
123 // Make sure that execution count of a block is at least the branch count
124 // of an incoming/outgoing jump.
125 auto SuccBIIter = BB->branch_info_begin();
126 for (BinaryBasicBlock *Succ : BB->successors()) {
127 uint64_t Count = SuccBIIter->Count;
128 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
129 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
130 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
132 ++SuccBIIter;
134 // Make sure that execution count of a block is at least the number of
135 // function calls from the block.
136 for (MCInst &Inst : *BB) {
137 // Ignore non-call instruction
138 if (!BC.MIB->isCall(Inst))
139 continue;
141 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
142 if (CountAnnt)
143 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
148 if (opts::InferFallThroughs)
149 inferFallThroughCounts();
151 // Update profile information for jump tables based on CFG branch data.
152 for (BinaryBasicBlock *BB : BasicBlocks) {
153 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
154 if (!LastInstr)
155 continue;
156 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
157 if (!JTAddress)
158 continue;
159 JumpTable *JT = getJumpTableContainingAddress(JTAddress);
160 if (!JT)
161 continue;
163 uint64_t TotalBranchCount = 0;
164 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
165 BB->branch_info()) {
166 TotalBranchCount += BranchInfo.Count;
168 JT->Count += TotalBranchCount;
170 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
171 continue;
173 if (JT->Counts.empty())
174 JT->Counts.resize(JT->Entries.size());
175 auto EI = JT->Entries.begin();
176 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
177 EI += Delta;
178 while (EI != JT->Entries.end()) {
179 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
180 if (TargetBB) {
181 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
182 BB->getBranchInfo(*TargetBB);
183 assert(Delta < JT->Counts.size());
184 JT->Counts[Delta].Count += BranchInfo.Count;
185 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
187 ++Delta;
188 ++EI;
189 // A label marks the start of another jump table.
190 if (JT->Labels.count(Delta * JT->EntrySize))
191 break;
196 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
197 // No reason to merge invalid or empty profiles into BF.
198 if (!hasValidProfile())
199 return;
201 // Update function execution count.
202 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
203 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
205 // Since we are merging a valid profile, the new profile should be valid too.
206 // It has either already been valid, or it has been cleaned up.
207 BF.ProfileMatchRatio = 1.0f;
209 // Update basic block and edge counts.
210 auto BBMergeI = BF.begin();
211 for (BinaryBasicBlock *BB : BasicBlocks) {
212 BinaryBasicBlock *BBMerge = &*BBMergeI;
213 assert(getIndex(BB) == BF.getIndex(BBMerge));
215 // Update basic block count.
216 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
217 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
218 BB->getExecutionCount());
221 // Update edge count for successors of this basic block.
222 auto BBMergeSI = BBMerge->succ_begin();
223 auto BIMergeI = BBMerge->branch_info_begin();
224 auto BII = BB->branch_info_begin();
225 for (const BinaryBasicBlock *BBSucc : BB->successors()) {
226 (void)BBSucc;
227 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
229 // At this point no branch count should be set to COUNT_NO_PROFILE.
230 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
231 "unexpected unknown branch profile");
232 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
233 "unexpected unknown branch profile");
235 BIMergeI->Count += BII->Count;
237 // When we merge inferred and real fall-through branch data, the merged
238 // data is considered inferred.
239 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
240 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
241 BIMergeI->MispredictedCount += BII->MispredictedCount;
242 } else {
243 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
246 ++BBMergeSI;
247 ++BII;
248 ++BIMergeI;
250 assert(BBMergeSI == BBMerge->succ_end());
252 ++BBMergeI;
254 assert(BBMergeI == BF.end());
256 // Merge jump tables profile info.
257 auto JTMergeI = BF.JumpTables.begin();
258 for (const auto &JTEntry : JumpTables) {
259 if (JTMergeI->second->Counts.empty())
260 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
261 auto CountMergeI = JTMergeI->second->Counts.begin();
262 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
263 CountMergeI->Count += JI.Count;
264 CountMergeI->Mispreds += JI.Mispreds;
265 ++CountMergeI;
267 assert(CountMergeI == JTMergeI->second->Counts.end());
269 ++JTMergeI;
271 assert(JTMergeI == BF.JumpTables.end());
274 void BinaryFunction::inferFallThroughCounts() {
275 // Work on a basic block at a time, propagating frequency information
276 // forwards.
277 // It is important to walk in the layout order.
278 for (BinaryBasicBlock *BB : BasicBlocks) {
279 const uint64_t BBExecCount = BB->getExecutionCount();
281 // Propagate this information to successors, filling in fall-through edges
282 // with frequency information
283 if (BB->succ_size() == 0)
284 continue;
286 // Calculate frequency of outgoing branches from this node according to
287 // LBR data.
288 uint64_t ReportedBranches = 0;
289 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
290 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
291 ReportedBranches += SuccBI.Count;
293 // Get taken count of conditional tail call if the block ends with one.
294 uint64_t CTCTakenCount = 0;
295 const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
296 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
297 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
298 *CTCInstr, "CTCTakenCount");
301 // Calculate frequency of throws from this node according to LBR data
302 // for branching into associated landing pads. Since it is possible
303 // for a landing pad to be associated with more than one basic blocks,
304 // we may overestimate the frequency of throws for such blocks.
305 uint64_t ReportedThrows = 0;
306 for (const BinaryBasicBlock *LP : BB->landing_pads())
307 ReportedThrows += LP->getExecutionCount();
309 const uint64_t TotalReportedJumps =
310 ReportedBranches + CTCTakenCount + ReportedThrows;
312 // Infer the frequency of the fall-through edge, representing not taking the
313 // branch.
314 uint64_t Inferred = 0;
315 if (BBExecCount > TotalReportedJumps)
316 Inferred = BBExecCount - TotalReportedJumps;
318 LLVM_DEBUG(
319 if (BBExecCount < TotalReportedJumps) dbgs()
320 << "Fall-through inference is slightly inconsistent. "
321 "exec frequency is less than the outgoing edges frequency ("
322 << BBExecCount << " < " << ReportedBranches
323 << ") for BB at offset 0x"
324 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
326 if (BB->succ_size() <= 2) {
327 // Skip if the last instruction is an unconditional jump.
328 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
329 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
330 BC.MIB->isIndirectBranch(*LastInstr)))
331 continue;
332 // If there is an FT it will be the last successor.
333 auto &SuccBI = *BB->branch_info_rbegin();
334 auto &Succ = *BB->succ_rbegin();
335 if (SuccBI.Count == 0) {
336 SuccBI.Count = Inferred;
337 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
338 Succ->ExecutionCount += Inferred;
344 void BinaryFunction::clearProfile() {
345 // Keep function execution profile the same. Only clear basic block and edge
346 // counts.
347 for (BinaryBasicBlock *BB : BasicBlocks) {
348 BB->ExecutionCount = 0;
349 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
350 BI.Count = 0;
351 BI.MispredictedCount = 0;
356 } // namespace bolt
357 } // namespace llvm