Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Core / DynoStats.cpp
blob5dd55e13e5b31fdea55eb3071d8e62f8c5368b54
1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the DynoStats class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <algorithm>
21 #include <string>
23 #undef DEBUG_TYPE
24 #define DEBUG_TYPE "bolt"
26 using namespace llvm;
27 using namespace bolt;
29 namespace opts {
31 extern cl::OptionCategory BoltCategory;
33 static cl::opt<uint32_t>
34 DynoStatsScale("dyno-stats-scale",
35 cl::desc("scale to be applied while reporting dyno stats"),
36 cl::Optional,
37 cl::init(1),
38 cl::Hidden,
39 cl::cat(BoltCategory));
41 static cl::opt<uint32_t>
42 PrintDynoOpcodeStat("print-dyno-opcode-stats",
43 cl::desc("print per instruction opcode dyno stats and the function"
44 "names:BB offsets of the nth highest execution counts"),
45 cl::init(0),
46 cl::Hidden,
47 cl::cat(BoltCategory));
49 } // namespace opts
51 namespace llvm {
52 namespace bolt {
54 constexpr const char *DynoStats::Desc[];
56 bool DynoStats::operator<(const DynoStats &Other) const {
57 return std::lexicographical_compare(
58 &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
59 &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
62 bool DynoStats::operator==(const DynoStats &Other) const {
63 return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
64 &Other.Stats[FIRST_DYNO_STAT]);
67 bool DynoStats::lessThan(const DynoStats &Other,
68 ArrayRef<Category> Keys) const {
69 return std::lexicographical_compare(
70 Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71 [this, &Other](const Category A, const Category) {
72 return Stats[A] < Other.Stats[A];
73 });
76 void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77 MCInstPrinter *Printer) const {
78 auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79 uint64_t OtherStat) {
80 OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81 if (Other) {
82 if (Stat != OtherStat) {
83 OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
84 OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85 (float)(OtherStat));
86 } else {
87 OS << " (=)";
90 OS << '\n';
93 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
94 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
96 if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97 continue;
99 printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
101 if (opts::PrintDynoOpcodeStat && Printer) {
102 outs() << "\nProgram-wide opcode histogram:\n";
103 OS << " Opcode, Execution Count, Max Exec Count, "
104 "Function Name:Offset ...\n";
105 std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106 for (const OpcodeStatTy &Stat : OpcodeHistogram)
107 SortedHistogram.emplace_back(Stat.second.first, Stat.first);
109 // Sort using lexicographic ordering
110 llvm::sort(SortedHistogram);
112 // Dump in ascending order: Start with Opcode with Highest execution
113 // count.
114 for (auto &Stat : llvm::reverse(SortedHistogram)) {
115 OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
116 Stat.first * opts::DynoStatsScale);
118 MaxOpcodeHistogramTy MaxMultiMap = OpcodeHistogram.at(Stat.second).second;
119 // Start with function name:BB offset with highest execution count.
120 for (auto &Max : llvm::reverse(MaxMultiMap)) {
121 OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
122 << Max.second.first.str() << ':' << Max.second.second;
124 OS << '\n';
129 void DynoStats::operator+=(const DynoStats &Other) {
130 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
131 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
132 Stats[Stat] += Other[Stat];
134 for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
135 auto I = OpcodeHistogram.find(Stat.first);
136 if (I == OpcodeHistogram.end()) {
137 OpcodeHistogram.emplace(Stat);
138 } else {
139 // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
140 // maximum counts.
141 I->second.first += Stat.second.first;
142 auto &MMap = I->second.second;
143 auto &OtherMMap = Stat.second.second;
144 auto Size = MMap.size();
145 assert(Size <= opts::PrintDynoOpcodeStat);
146 for (auto OtherMMapPair : llvm::reverse(OtherMMap)) {
147 if (Size++ >= opts::PrintDynoOpcodeStat) {
148 auto First = MMap.begin();
149 if (OtherMMapPair.first <= First->first)
150 break;
151 MMap.erase(First);
153 MMap.emplace(OtherMMapPair);
159 DynoStats getDynoStats(BinaryFunction &BF) {
160 auto &BC = BF.getBinaryContext();
162 DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
164 // Return empty-stats about the function we don't completely understand.
165 if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
166 return Stats;
168 // Update enumeration of basic blocks for correct detection of branch'
169 // direction.
170 BF.getLayout().updateLayoutIndices();
172 for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
173 // The basic block execution count equals to the sum of incoming branch
174 // frequencies. This may deviate from the sum of outgoing branches of the
175 // basic block especially since the block may contain a function that
176 // does not return or a function that throws an exception.
177 const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
179 // Ignore empty blocks and blocks that were not executed.
180 if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
181 continue;
183 // Count AArch64 linker-inserted veneers
184 if (BF.isAArch64Veneer())
185 Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
187 // Count various instruction types by iterating through all instructions.
188 // When -print-dyno-opcode-stats is on, count per each opcode and record
189 // maximum execution counts.
190 for (const MCInst &Instr : *BB) {
191 if (opts::PrintDynoOpcodeStat) {
192 unsigned Opcode = Instr.getOpcode();
193 auto I = Stats.OpcodeHistogram.find(Opcode);
194 if (I == Stats.OpcodeHistogram.end()) {
195 DynoStats::MaxOpcodeHistogramTy MMap;
196 MMap.emplace(BBExecutionCount,
197 std::make_pair(BF.getOneName(), BB->getOffset()));
198 Stats.OpcodeHistogram.emplace(Opcode,
199 std::make_pair(BBExecutionCount, MMap));
200 } else {
201 I->second.first += BBExecutionCount;
202 bool Insert = true;
203 if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
204 auto First = I->second.second.begin();
205 if (First->first < BBExecutionCount)
206 I->second.second.erase(First);
207 else
208 Insert = false;
210 if (Insert) {
211 I->second.second.emplace(
212 BBExecutionCount,
213 std::make_pair(BF.getOneName(), BB->getOffset()));
218 if (BC.MIB->mayStore(Instr)) {
219 Stats[DynoStats::STORES] += BBExecutionCount;
221 if (BC.MIB->mayLoad(Instr)) {
222 Stats[DynoStats::LOADS] += BBExecutionCount;
224 if (!BC.MIB->isCall(Instr))
225 continue;
227 uint64_t CallFreq = BBExecutionCount;
228 if (BC.MIB->getConditionalTailCall(Instr)) {
229 CallFreq =
230 BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
232 Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
233 if (BC.MIB->isIndirectCall(Instr)) {
234 Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
235 } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
236 const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
237 if (BF && BF->isPLTFunction()) {
238 Stats[DynoStats::PLT_CALLS] += CallFreq;
240 // We don't process PLT functions and hence have to adjust relevant
241 // dynostats here for:
243 // jmp *GOT_ENTRY(%rip)
245 // NOTE: this is arch-specific.
246 Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
247 Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
248 Stats[DynoStats::LOADS] += CallFreq;
249 Stats[DynoStats::INSTRUCTIONS] += CallFreq;
254 Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
256 // Jump tables.
257 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
258 if (BC.MIB->getJumpTable(*LastInstr)) {
259 Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
260 LLVM_DEBUG(
261 static uint64_t MostFrequentJT;
262 if (BBExecutionCount > MostFrequentJT) {
263 MostFrequentJT = BBExecutionCount;
264 dbgs() << "BOLT-INFO: most frequently executed jump table is in "
265 << "function " << BF << " in basic block " << BB->getName()
266 << " executed totally " << BBExecutionCount << " times.\n";
269 continue;
272 if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
273 Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
274 continue;
277 // Update stats for branches.
278 const MCSymbol *TBB = nullptr;
279 const MCSymbol *FBB = nullptr;
280 MCInst *CondBranch = nullptr;
281 MCInst *UncondBranch = nullptr;
282 if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
283 continue;
285 if (!CondBranch && !UncondBranch)
286 continue;
288 // Simple unconditional branch.
289 if (!CondBranch) {
290 Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
291 continue;
294 // CTCs: instruction annotations could be stripped, hence check the number
295 // of successors to identify conditional tail calls.
296 if (BB->succ_size() == 1) {
297 if (BB->branch_info_begin() != BB->branch_info_end())
298 Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
299 continue;
302 // Conditional branch that could be followed by an unconditional branch.
303 uint64_t TakenCount = BB->getTakenBranchInfo().Count;
304 if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
305 TakenCount = 0;
307 uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
308 if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
309 NonTakenCount = 0;
311 if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
312 Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
313 Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
314 } else {
315 Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
316 Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
319 if (UncondBranch) {
320 Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
324 return Stats;
327 } // namespace bolt
328 } // namespace llvm