1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the DynoStats class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
24 #define DEBUG_TYPE "bolt"
31 extern cl::OptionCategory BoltCategory
;
33 static cl::opt
<uint32_t>
34 DynoStatsScale("dyno-stats-scale",
35 cl::desc("scale to be applied while reporting dyno stats"),
39 cl::cat(BoltCategory
));
41 static cl::opt
<uint32_t>
42 PrintDynoOpcodeStat("print-dyno-opcode-stats",
43 cl::desc("print per instruction opcode dyno stats and the function"
44 "names:BB offsets of the nth highest execution counts"),
47 cl::cat(BoltCategory
));
54 constexpr const char *DynoStats::Desc
[];
56 bool DynoStats::operator<(const DynoStats
&Other
) const {
57 return std::lexicographical_compare(
58 &Stats
[FIRST_DYNO_STAT
], &Stats
[LAST_DYNO_STAT
],
59 &Other
.Stats
[FIRST_DYNO_STAT
], &Other
.Stats
[LAST_DYNO_STAT
]);
62 bool DynoStats::operator==(const DynoStats
&Other
) const {
63 return std::equal(&Stats
[FIRST_DYNO_STAT
], &Stats
[LAST_DYNO_STAT
],
64 &Other
.Stats
[FIRST_DYNO_STAT
]);
67 bool DynoStats::lessThan(const DynoStats
&Other
,
68 ArrayRef
<Category
> Keys
) const {
69 return std::lexicographical_compare(
70 Keys
.begin(), Keys
.end(), Keys
.begin(), Keys
.end(),
71 [this, &Other
](const Category A
, const Category
) {
72 return Stats
[A
] < Other
.Stats
[A
];
76 void DynoStats::print(raw_ostream
&OS
, const DynoStats
*Other
,
77 MCInstPrinter
*Printer
) const {
78 auto printStatWithDelta
= [&](const std::string
&Name
, uint64_t Stat
,
80 OS
<< format("%'20lld : ", Stat
* opts::DynoStatsScale
) << Name
;
82 if (Stat
!= OtherStat
) {
83 OtherStat
= std::max(OtherStat
, uint64_t(1)); // to prevent divide by 0
84 OS
<< format(" (%+.1f%%)", ((float)Stat
- (float)OtherStat
) * 100.0 /
93 for (auto Stat
= DynoStats::FIRST_DYNO_STAT
+ 1;
94 Stat
< DynoStats::LAST_DYNO_STAT
; ++Stat
) {
96 if (!PrintAArch64Stats
&& Stat
== DynoStats::VENEER_CALLS_AARCH64
)
99 printStatWithDelta(Desc
[Stat
], Stats
[Stat
], Other
? (*Other
)[Stat
] : 0);
101 if (opts::PrintDynoOpcodeStat
&& Printer
) {
102 outs() << "\nProgram-wide opcode histogram:\n";
103 OS
<< " Opcode, Execution Count, Max Exec Count, "
104 "Function Name:Offset ...\n";
105 std::vector
<std::pair
<uint64_t, unsigned>> SortedHistogram
;
106 for (const OpcodeStatTy
&Stat
: OpcodeHistogram
)
107 SortedHistogram
.emplace_back(Stat
.second
.first
, Stat
.first
);
109 // Sort using lexicographic ordering
110 llvm::sort(SortedHistogram
);
112 // Dump in ascending order: Start with Opcode with Highest execution
114 for (auto &Stat
: llvm::reverse(SortedHistogram
)) {
115 OS
<< format("%20s,%'18lld", Printer
->getOpcodeName(Stat
.second
).data(),
116 Stat
.first
* opts::DynoStatsScale
);
118 MaxOpcodeHistogramTy MaxMultiMap
= OpcodeHistogram
.at(Stat
.second
).second
;
119 // Start with function name:BB offset with highest execution count.
120 for (auto &Max
: llvm::reverse(MaxMultiMap
)) {
121 OS
<< format(", %'18lld, ", Max
.first
* opts::DynoStatsScale
)
122 << Max
.second
.first
.str() << ':' << Max
.second
.second
;
129 void DynoStats::operator+=(const DynoStats
&Other
) {
130 for (auto Stat
= DynoStats::FIRST_DYNO_STAT
+ 1;
131 Stat
< DynoStats::LAST_DYNO_STAT
; ++Stat
) {
132 Stats
[Stat
] += Other
[Stat
];
134 for (const OpcodeStatTy
&Stat
: Other
.OpcodeHistogram
) {
135 auto I
= OpcodeHistogram
.find(Stat
.first
);
136 if (I
== OpcodeHistogram
.end()) {
137 OpcodeHistogram
.emplace(Stat
);
139 // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
141 I
->second
.first
+= Stat
.second
.first
;
142 auto &MMap
= I
->second
.second
;
143 auto &OtherMMap
= Stat
.second
.second
;
144 auto Size
= MMap
.size();
145 assert(Size
<= opts::PrintDynoOpcodeStat
);
146 for (auto OtherMMapPair
: llvm::reverse(OtherMMap
)) {
147 if (Size
++ >= opts::PrintDynoOpcodeStat
) {
148 auto First
= MMap
.begin();
149 if (OtherMMapPair
.first
<= First
->first
)
153 MMap
.emplace(OtherMMapPair
);
159 DynoStats
getDynoStats(BinaryFunction
&BF
) {
160 auto &BC
= BF
.getBinaryContext();
162 DynoStats
Stats(/*PrintAArch64Stats*/ BC
.isAArch64());
164 // Return empty-stats about the function we don't completely understand.
165 if (!BF
.isSimple() || !BF
.hasValidProfile() || !BF
.hasCanonicalCFG())
168 // Update enumeration of basic blocks for correct detection of branch'
170 BF
.getLayout().updateLayoutIndices();
172 for (BinaryBasicBlock
*const BB
: BF
.getLayout().blocks()) {
173 // The basic block execution count equals to the sum of incoming branch
174 // frequencies. This may deviate from the sum of outgoing branches of the
175 // basic block especially since the block may contain a function that
176 // does not return or a function that throws an exception.
177 const uint64_t BBExecutionCount
= BB
->getKnownExecutionCount();
179 // Ignore empty blocks and blocks that were not executed.
180 if (BB
->getNumNonPseudos() == 0 || BBExecutionCount
== 0)
183 // Count AArch64 linker-inserted veneers
184 if (BF
.isAArch64Veneer())
185 Stats
[DynoStats::VENEER_CALLS_AARCH64
] += BF
.getKnownExecutionCount();
187 // Count various instruction types by iterating through all instructions.
188 // When -print-dyno-opcode-stats is on, count per each opcode and record
189 // maximum execution counts.
190 for (const MCInst
&Instr
: *BB
) {
191 if (opts::PrintDynoOpcodeStat
) {
192 unsigned Opcode
= Instr
.getOpcode();
193 auto I
= Stats
.OpcodeHistogram
.find(Opcode
);
194 if (I
== Stats
.OpcodeHistogram
.end()) {
195 DynoStats::MaxOpcodeHistogramTy MMap
;
196 MMap
.emplace(BBExecutionCount
,
197 std::make_pair(BF
.getOneName(), BB
->getOffset()));
198 Stats
.OpcodeHistogram
.emplace(Opcode
,
199 std::make_pair(BBExecutionCount
, MMap
));
201 I
->second
.first
+= BBExecutionCount
;
203 if (I
->second
.second
.size() == opts::PrintDynoOpcodeStat
) {
204 auto First
= I
->second
.second
.begin();
205 if (First
->first
< BBExecutionCount
)
206 I
->second
.second
.erase(First
);
211 I
->second
.second
.emplace(
213 std::make_pair(BF
.getOneName(), BB
->getOffset()));
218 if (BC
.MIB
->mayStore(Instr
)) {
219 Stats
[DynoStats::STORES
] += BBExecutionCount
;
221 if (BC
.MIB
->mayLoad(Instr
)) {
222 Stats
[DynoStats::LOADS
] += BBExecutionCount
;
224 if (!BC
.MIB
->isCall(Instr
))
227 uint64_t CallFreq
= BBExecutionCount
;
228 if (BC
.MIB
->getConditionalTailCall(Instr
)) {
230 BC
.MIB
->getAnnotationWithDefault
<uint64_t>(Instr
, "CTCTakenCount");
232 Stats
[DynoStats::FUNCTION_CALLS
] += CallFreq
;
233 if (BC
.MIB
->isIndirectCall(Instr
)) {
234 Stats
[DynoStats::INDIRECT_CALLS
] += CallFreq
;
235 } else if (const MCSymbol
*CallSymbol
= BC
.MIB
->getTargetSymbol(Instr
)) {
236 const BinaryFunction
*BF
= BC
.getFunctionForSymbol(CallSymbol
);
237 if (BF
&& BF
->isPLTFunction()) {
238 Stats
[DynoStats::PLT_CALLS
] += CallFreq
;
240 // We don't process PLT functions and hence have to adjust relevant
241 // dynostats here for:
243 // jmp *GOT_ENTRY(%rip)
245 // NOTE: this is arch-specific.
246 Stats
[DynoStats::FUNCTION_CALLS
] += CallFreq
;
247 Stats
[DynoStats::INDIRECT_CALLS
] += CallFreq
;
248 Stats
[DynoStats::LOADS
] += CallFreq
;
249 Stats
[DynoStats::INSTRUCTIONS
] += CallFreq
;
254 Stats
[DynoStats::INSTRUCTIONS
] += BB
->getNumNonPseudos() * BBExecutionCount
;
257 const MCInst
*LastInstr
= BB
->getLastNonPseudoInstr();
258 if (BC
.MIB
->getJumpTable(*LastInstr
)) {
259 Stats
[DynoStats::JUMP_TABLE_BRANCHES
] += BBExecutionCount
;
261 static uint64_t MostFrequentJT
;
262 if (BBExecutionCount
> MostFrequentJT
) {
263 MostFrequentJT
= BBExecutionCount
;
264 dbgs() << "BOLT-INFO: most frequently executed jump table is in "
265 << "function " << BF
<< " in basic block " << BB
->getName()
266 << " executed totally " << BBExecutionCount
<< " times.\n";
272 if (BC
.MIB
->isIndirectBranch(*LastInstr
) && !BC
.MIB
->isCall(*LastInstr
)) {
273 Stats
[DynoStats::UNKNOWN_INDIRECT_BRANCHES
] += BBExecutionCount
;
277 // Update stats for branches.
278 const MCSymbol
*TBB
= nullptr;
279 const MCSymbol
*FBB
= nullptr;
280 MCInst
*CondBranch
= nullptr;
281 MCInst
*UncondBranch
= nullptr;
282 if (!BB
->analyzeBranch(TBB
, FBB
, CondBranch
, UncondBranch
))
285 if (!CondBranch
&& !UncondBranch
)
288 // Simple unconditional branch.
290 Stats
[DynoStats::UNCOND_BRANCHES
] += BBExecutionCount
;
294 // CTCs: instruction annotations could be stripped, hence check the number
295 // of successors to identify conditional tail calls.
296 if (BB
->succ_size() == 1) {
297 if (BB
->branch_info_begin() != BB
->branch_info_end())
298 Stats
[DynoStats::UNCOND_BRANCHES
] += BB
->branch_info_begin()->Count
;
302 // Conditional branch that could be followed by an unconditional branch.
303 uint64_t TakenCount
= BB
->getTakenBranchInfo().Count
;
304 if (TakenCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
307 uint64_t NonTakenCount
= BB
->getFallthroughBranchInfo().Count
;
308 if (NonTakenCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
311 if (BF
.isForwardBranch(BB
, BB
->getConditionalSuccessor(true))) {
312 Stats
[DynoStats::FORWARD_COND_BRANCHES
] += BBExecutionCount
;
313 Stats
[DynoStats::FORWARD_COND_BRANCHES_TAKEN
] += TakenCount
;
315 Stats
[DynoStats::BACKWARD_COND_BRANCHES
] += BBExecutionCount
;
316 Stats
[DynoStats::BACKWARD_COND_BRANCHES_TAKEN
] += TakenCount
;
320 Stats
[DynoStats::UNCOND_BRANCHES
] += NonTakenCount
;