[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / bolt / lib / Core / BinaryFunctionProfile.cpp
blob55ebe5fc900e65102ccb22a364bbc9262ebce124
1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
20 #undef DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
23 using namespace llvm;
24 using namespace bolt;
26 namespace opts {
28 extern cl::OptionCategory BoltOptCategory;
30 cl::opt<IndirectCallPromotionType> ICP(
31 "indirect-call-promotion", cl::init(ICP_NONE),
32 cl::desc("indirect call promotion"),
33 cl::values(
34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36 clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37 "perform ICP on jump tables"),
38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39 cl::ZeroOrMore, cl::cat(BoltOptCategory));
41 static cl::alias ICPAlias("icp",
42 cl::desc("Alias for --indirect-call-promotion"),
43 cl::aliasopt(ICP));
45 extern cl::opt<JumpTableSupportLevel> JumpTables;
47 static cl::opt<bool> FixFuncCounts(
48 "fix-func-counts",
49 cl::desc("adjust function counts based on basic blocks execution count"),
50 cl::Hidden, cl::cat(BoltOptCategory));
52 static cl::opt<bool> FixBlockCounts(
53 "fix-block-counts",
54 cl::desc("adjust block counts based on outgoing branch counts"),
55 cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));
57 static cl::opt<bool>
58 InferFallThroughs("infer-fall-throughs",
59 cl::desc("infer execution count for fall-through blocks"),
60 cl::Hidden, cl::cat(BoltOptCategory));
62 } // namespace opts
64 namespace llvm {
65 namespace bolt {
67 void BinaryFunction::postProcessProfile() {
68 if (!hasValidProfile()) {
69 clearProfile();
70 return;
73 if (!(getProfileFlags() & PF_LBR))
74 return;
76 // If we have at least some branch data for the function indicate that it
77 // was executed.
78 if (opts::FixFuncCounts && ExecutionCount == 0)
79 ExecutionCount = 1;
81 // Compute preliminary execution count for each basic block.
82 for (BinaryBasicBlock *BB : BasicBlocks) {
83 if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
84 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
85 BB->ExecutionCount = 0;
87 for (BinaryBasicBlock *BB : BasicBlocks) {
88 auto SuccBIIter = BB->branch_info_begin();
89 for (BinaryBasicBlock *Succ : BB->successors()) {
90 // All incoming edges to the primary entry have been accounted for, thus
91 // we skip the update here.
92 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
93 Succ != BasicBlocks.front())
94 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
95 ++SuccBIIter;
99 // Fix for old profiles.
100 for (BinaryBasicBlock *BB : BasicBlocks) {
101 if (BB->size() != 1 || BB->succ_size() != 1)
102 continue;
104 if (BB->getKnownExecutionCount() == 0)
105 continue;
107 MCInst *Instr = BB->getFirstNonPseudoInstr();
108 assert(Instr && "expected non-pseudo instr");
109 if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
110 continue;
112 BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
113 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
114 if (!BI.Count) {
115 BI.Count = BB->getKnownExecutionCount();
116 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
117 BI.Count);
121 if (opts::FixBlockCounts) {
122 for (BinaryBasicBlock *BB : BasicBlocks) {
123 // Make sure that execution count of a block is at least the branch count
124 // of an incoming/outgoing jump.
125 auto SuccBIIter = BB->branch_info_begin();
126 for (BinaryBasicBlock *Succ : BB->successors()) {
127 uint64_t Count = SuccBIIter->Count;
128 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
129 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
130 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
132 ++SuccBIIter;
134 // Make sure that execution count of a block is at least the number of
135 // function calls from the block.
136 for (MCInst &Inst : *BB) {
137 // Ignore non-call instruction
138 if (!BC.MIB->isCall(Inst))
139 continue;
141 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
142 if (CountAnnt)
143 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
148 if (opts::InferFallThroughs)
149 inferFallThroughCounts();
151 // Update profile information for jump tables based on CFG branch data.
152 for (BinaryBasicBlock *BB : BasicBlocks) {
153 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
154 if (!LastInstr)
155 continue;
156 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
157 if (!JTAddress)
158 continue;
159 JumpTable *JT = getJumpTableContainingAddress(JTAddress);
160 if (!JT)
161 continue;
163 uint64_t TotalBranchCount = 0;
164 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
165 BB->branch_info()) {
166 TotalBranchCount += BranchInfo.Count;
168 JT->Count += TotalBranchCount;
170 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
171 continue;
173 if (JT->Counts.empty())
174 JT->Counts.resize(JT->Entries.size());
175 auto EI = JT->Entries.begin();
176 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
177 EI += Delta;
178 while (EI != JT->Entries.end()) {
179 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
180 if (TargetBB) {
181 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
182 BB->getBranchInfo(*TargetBB);
183 assert(Delta < JT->Counts.size());
184 JT->Counts[Delta].Count += BranchInfo.Count;
185 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
187 ++Delta;
188 ++EI;
189 // A label marks the start of another jump table.
190 if (JT->Labels.count(Delta * JT->EntrySize))
191 break;
196 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
197 // No reason to merge invalid or empty profiles into BF.
198 if (!hasValidProfile())
199 return;
201 // Update function execution count.
202 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
203 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
205 // Since we are merging a valid profile, the new profile should be valid too.
206 // It has either already been valid, or it has been cleaned up.
207 BF.ProfileMatchRatio = 1.0f;
209 // Update basic block and edge counts.
210 auto BBMergeI = BF.begin();
211 for (BinaryBasicBlock *BB : BasicBlocks) {
212 BinaryBasicBlock *BBMerge = &*BBMergeI;
213 assert(getIndex(BB) == BF.getIndex(BBMerge));
215 // Update basic block count.
216 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
217 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
218 BB->getExecutionCount());
221 // Update edge count for successors of this basic block.
222 auto BBMergeSI = BBMerge->succ_begin();
223 auto BIMergeI = BBMerge->branch_info_begin();
224 auto BII = BB->branch_info_begin();
225 for (const BinaryBasicBlock *BBSucc : BB->successors()) {
226 (void)BBSucc;
227 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
228 (void)BBMergeSI;
230 // At this point no branch count should be set to COUNT_NO_PROFILE.
231 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
232 "unexpected unknown branch profile");
233 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
234 "unexpected unknown branch profile");
236 BIMergeI->Count += BII->Count;
238 // When we merge inferred and real fall-through branch data, the merged
239 // data is considered inferred.
240 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
241 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
242 BIMergeI->MispredictedCount += BII->MispredictedCount;
243 } else {
244 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
247 ++BBMergeSI;
248 ++BII;
249 ++BIMergeI;
251 assert(BBMergeSI == BBMerge->succ_end());
253 ++BBMergeI;
255 assert(BBMergeI == BF.end());
257 // Merge jump tables profile info.
258 auto JTMergeI = BF.JumpTables.begin();
259 for (const auto &JTEntry : JumpTables) {
260 if (JTMergeI->second->Counts.empty())
261 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
262 auto CountMergeI = JTMergeI->second->Counts.begin();
263 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
264 CountMergeI->Count += JI.Count;
265 CountMergeI->Mispreds += JI.Mispreds;
266 ++CountMergeI;
268 assert(CountMergeI == JTMergeI->second->Counts.end());
270 ++JTMergeI;
272 assert(JTMergeI == BF.JumpTables.end());
275 void BinaryFunction::inferFallThroughCounts() {
276 // Work on a basic block at a time, propagating frequency information
277 // forwards.
278 // It is important to walk in the layout order.
279 for (BinaryBasicBlock *BB : BasicBlocks) {
280 const uint64_t BBExecCount = BB->getExecutionCount();
282 // Propagate this information to successors, filling in fall-through edges
283 // with frequency information
284 if (BB->succ_size() == 0)
285 continue;
287 // Calculate frequency of outgoing branches from this node according to
288 // LBR data.
289 uint64_t ReportedBranches = 0;
290 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
291 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
292 ReportedBranches += SuccBI.Count;
294 // Get taken count of conditional tail call if the block ends with one.
295 uint64_t CTCTakenCount = 0;
296 const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
297 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
298 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
299 *CTCInstr, "CTCTakenCount");
302 // Calculate frequency of throws from this node according to LBR data
303 // for branching into associated landing pads. Since it is possible
304 // for a landing pad to be associated with more than one basic blocks,
305 // we may overestimate the frequency of throws for such blocks.
306 uint64_t ReportedThrows = 0;
307 for (const BinaryBasicBlock *LP : BB->landing_pads())
308 ReportedThrows += LP->getExecutionCount();
310 const uint64_t TotalReportedJumps =
311 ReportedBranches + CTCTakenCount + ReportedThrows;
313 // Infer the frequency of the fall-through edge, representing not taking the
314 // branch.
315 uint64_t Inferred = 0;
316 if (BBExecCount > TotalReportedJumps)
317 Inferred = BBExecCount - TotalReportedJumps;
319 LLVM_DEBUG(
320 if (BBExecCount < TotalReportedJumps) dbgs()
321 << "Fall-through inference is slightly inconsistent. "
322 "exec frequency is less than the outgoing edges frequency ("
323 << BBExecCount << " < " << ReportedBranches
324 << ") for BB at offset 0x"
325 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
327 if (BB->succ_size() <= 2) {
328 // Skip if the last instruction is an unconditional jump.
329 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
330 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
331 BC.MIB->isIndirectBranch(*LastInstr)))
332 continue;
333 // If there is an FT it will be the last successor.
334 auto &SuccBI = *BB->branch_info_rbegin();
335 auto &Succ = *BB->succ_rbegin();
336 if (SuccBI.Count == 0) {
337 SuccBI.Count = Inferred;
338 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
339 Succ->ExecutionCount += Inferred;
345 void BinaryFunction::clearProfile() {
346 // Keep function execution profile the same. Only clear basic block and edge
347 // counts.
348 for (BinaryBasicBlock *BB : BasicBlocks) {
349 BB->ExecutionCount = 0;
350 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
351 BI.Count = 0;
352 BI.MispredictedCount = 0;
357 } // namespace bolt
358 } // namespace llvm