1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the DynoStats class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
24 #define DEBUG_TYPE "bolt"
31 extern cl::OptionCategory BoltCategory
;
33 static cl::opt
<uint32_t>
34 DynoStatsScale("dyno-stats-scale",
35 cl::desc("scale to be applied while reporting dyno stats"),
39 cl::cat(BoltCategory
));
41 static cl::opt
<uint32_t>
42 PrintDynoOpcodeStat("print-dyno-opcode-stats",
43 cl::desc("print per instruction opcode dyno stats and the function"
44 "names:BB offsets of the nth highest execution counts"),
47 cl::cat(BoltCategory
));
54 constexpr const char *DynoStats::Desc
[];
56 bool DynoStats::operator<(const DynoStats
&Other
) const {
57 return std::lexicographical_compare(
58 &Stats
[FIRST_DYNO_STAT
], &Stats
[LAST_DYNO_STAT
],
59 &Other
.Stats
[FIRST_DYNO_STAT
], &Other
.Stats
[LAST_DYNO_STAT
]);
62 bool DynoStats::operator==(const DynoStats
&Other
) const {
63 return std::equal(&Stats
[FIRST_DYNO_STAT
], &Stats
[LAST_DYNO_STAT
],
64 &Other
.Stats
[FIRST_DYNO_STAT
]);
67 bool DynoStats::lessThan(const DynoStats
&Other
,
68 ArrayRef
<Category
> Keys
) const {
69 return std::lexicographical_compare(
70 Keys
.begin(), Keys
.end(), Keys
.begin(), Keys
.end(),
71 [this, &Other
](const Category A
, const Category
) {
72 return Stats
[A
] < Other
.Stats
[A
];
76 void DynoStats::print(raw_ostream
&OS
, const DynoStats
*Other
,
77 MCInstPrinter
*Printer
) const {
78 auto printStatWithDelta
= [&](const std::string
&Name
, uint64_t Stat
,
80 OS
<< format("%'20lld : ", Stat
* opts::DynoStatsScale
) << Name
;
82 if (Stat
!= OtherStat
) {
83 OtherStat
= std::max(OtherStat
, uint64_t(1)); // to prevent divide by 0
84 OS
<< format(" (%+.1f%%)", ((float)Stat
- (float)OtherStat
) * 100.0 /
93 for (auto Stat
= DynoStats::FIRST_DYNO_STAT
+ 1;
94 Stat
< DynoStats::LAST_DYNO_STAT
; ++Stat
) {
96 if (!PrintAArch64Stats
&& Stat
== DynoStats::VENEER_CALLS_AARCH64
)
99 printStatWithDelta(Desc
[Stat
], Stats
[Stat
], Other
? (*Other
)[Stat
] : 0);
101 if (opts::PrintDynoOpcodeStat
&& Printer
) {
102 OS
<< "\nProgram-wide opcode histogram:\n";
103 OS
<< " Opcode, Execution Count, Max Exec Count, "
104 "Function Name:Offset ...\n";
105 std::vector
<std::pair
<uint64_t, unsigned>> SortedHistogram
;
106 for (const OpcodeStatTy
&Stat
: OpcodeHistogram
)
107 SortedHistogram
.emplace_back(Stat
.second
.first
, Stat
.first
);
109 // Sort using lexicographic ordering
110 llvm::sort(SortedHistogram
);
112 // Dump in ascending order: Start with Opcode with Highest execution
114 for (auto &Stat
: llvm::reverse(SortedHistogram
)) {
115 OS
<< format("%20s,%'18lld", Printer
->getOpcodeName(Stat
.second
).data(),
116 Stat
.first
* opts::DynoStatsScale
);
117 auto It
= OpcodeHistogram
.find(Stat
.second
);
118 assert(It
!= OpcodeHistogram
.end());
119 MaxOpcodeHistogramTy MaxMultiMap
= It
->second
.second
;
120 // Start with function name:BB offset with highest execution count.
121 for (auto &Max
: llvm::reverse(MaxMultiMap
)) {
122 OS
<< format(", %'18lld, ", Max
.first
* opts::DynoStatsScale
)
123 << Max
.second
.first
.str() << ':' << Max
.second
.second
;
130 void DynoStats::operator+=(const DynoStats
&Other
) {
131 for (auto Stat
= DynoStats::FIRST_DYNO_STAT
+ 1;
132 Stat
< DynoStats::LAST_DYNO_STAT
; ++Stat
) {
133 Stats
[Stat
] += Other
[Stat
];
135 for (const OpcodeStatTy
&Stat
: Other
.OpcodeHistogram
) {
136 auto I
= OpcodeHistogram
.find(Stat
.first
);
137 if (I
== OpcodeHistogram
.end()) {
138 OpcodeHistogram
.emplace(Stat
);
140 // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
142 I
->second
.first
+= Stat
.second
.first
;
143 auto &MMap
= I
->second
.second
;
144 auto &OtherMMap
= Stat
.second
.second
;
145 auto Size
= MMap
.size();
146 assert(Size
<= opts::PrintDynoOpcodeStat
);
147 for (auto OtherMMapPair
: llvm::reverse(OtherMMap
)) {
148 if (Size
++ >= opts::PrintDynoOpcodeStat
) {
149 auto First
= MMap
.begin();
150 if (OtherMMapPair
.first
<= First
->first
)
154 MMap
.emplace(OtherMMapPair
);
160 DynoStats
getDynoStats(BinaryFunction
&BF
) {
161 auto &BC
= BF
.getBinaryContext();
163 DynoStats
Stats(/*PrintAArch64Stats*/ BC
.isAArch64());
165 // Return empty-stats about the function we don't completely understand.
166 if (!BF
.isSimple() || !BF
.hasValidProfile() || !BF
.hasCanonicalCFG())
169 // Update enumeration of basic blocks for correct detection of branch'
171 BF
.getLayout().updateLayoutIndices();
173 for (BinaryBasicBlock
*const BB
: BF
.getLayout().blocks()) {
174 // The basic block execution count equals to the sum of incoming branch
175 // frequencies. This may deviate from the sum of outgoing branches of the
176 // basic block especially since the block may contain a function that
177 // does not return or a function that throws an exception.
178 const uint64_t BBExecutionCount
= BB
->getKnownExecutionCount();
180 // Ignore empty blocks and blocks that were not executed.
181 if (BB
->getNumNonPseudos() == 0 || BBExecutionCount
== 0)
184 // Count AArch64 linker-inserted veneers
185 if (BF
.isAArch64Veneer())
186 Stats
[DynoStats::VENEER_CALLS_AARCH64
] += BF
.getKnownExecutionCount();
188 // Count various instruction types by iterating through all instructions.
189 // When -print-dyno-opcode-stats is on, count per each opcode and record
190 // maximum execution counts.
191 for (const MCInst
&Instr
: *BB
) {
192 if (opts::PrintDynoOpcodeStat
) {
193 unsigned Opcode
= Instr
.getOpcode();
194 auto I
= Stats
.OpcodeHistogram
.find(Opcode
);
195 if (I
== Stats
.OpcodeHistogram
.end()) {
196 DynoStats::MaxOpcodeHistogramTy MMap
;
197 MMap
.emplace(BBExecutionCount
,
198 std::make_pair(BF
.getOneName(), BB
->getOffset()));
199 Stats
.OpcodeHistogram
.emplace(Opcode
,
200 std::make_pair(BBExecutionCount
, MMap
));
202 I
->second
.first
+= BBExecutionCount
;
204 if (I
->second
.second
.size() == opts::PrintDynoOpcodeStat
) {
205 auto First
= I
->second
.second
.begin();
206 if (First
->first
< BBExecutionCount
)
207 I
->second
.second
.erase(First
);
212 I
->second
.second
.emplace(
214 std::make_pair(BF
.getOneName(), BB
->getOffset()));
219 if (BC
.MIB
->mayStore(Instr
)) {
220 Stats
[DynoStats::STORES
] += BBExecutionCount
;
222 if (BC
.MIB
->mayLoad(Instr
)) {
223 Stats
[DynoStats::LOADS
] += BBExecutionCount
;
225 if (!BC
.MIB
->isCall(Instr
))
228 uint64_t CallFreq
= BBExecutionCount
;
229 if (BC
.MIB
->getConditionalTailCall(Instr
)) {
231 BC
.MIB
->getAnnotationWithDefault
<uint64_t>(Instr
, "CTCTakenCount");
233 Stats
[DynoStats::FUNCTION_CALLS
] += CallFreq
;
234 if (BC
.MIB
->isIndirectCall(Instr
)) {
235 Stats
[DynoStats::INDIRECT_CALLS
] += CallFreq
;
236 } else if (const MCSymbol
*CallSymbol
= BC
.MIB
->getTargetSymbol(Instr
)) {
237 const BinaryFunction
*BF
= BC
.getFunctionForSymbol(CallSymbol
);
238 if (BF
&& BF
->isPLTFunction()) {
239 Stats
[DynoStats::PLT_CALLS
] += CallFreq
;
241 // We don't process PLT functions and hence have to adjust relevant
242 // dynostats here for:
244 // jmp *GOT_ENTRY(%rip)
246 // NOTE: this is arch-specific.
247 Stats
[DynoStats::FUNCTION_CALLS
] += CallFreq
;
248 Stats
[DynoStats::INDIRECT_CALLS
] += CallFreq
;
249 Stats
[DynoStats::LOADS
] += CallFreq
;
250 Stats
[DynoStats::INSTRUCTIONS
] += CallFreq
;
255 Stats
[DynoStats::INSTRUCTIONS
] += BB
->getNumNonPseudos() * BBExecutionCount
;
258 const MCInst
*LastInstr
= BB
->getLastNonPseudoInstr();
259 if (BC
.MIB
->getJumpTable(*LastInstr
)) {
260 Stats
[DynoStats::JUMP_TABLE_BRANCHES
] += BBExecutionCount
;
262 static uint64_t MostFrequentJT
;
263 if (BBExecutionCount
> MostFrequentJT
) {
264 MostFrequentJT
= BBExecutionCount
;
265 dbgs() << "BOLT-INFO: most frequently executed jump table is in "
266 << "function " << BF
<< " in basic block " << BB
->getName()
267 << " executed totally " << BBExecutionCount
<< " times.\n";
273 if (BC
.MIB
->isIndirectBranch(*LastInstr
) && !BC
.MIB
->isCall(*LastInstr
)) {
274 Stats
[DynoStats::UNKNOWN_INDIRECT_BRANCHES
] += BBExecutionCount
;
278 // Update stats for branches.
279 const MCSymbol
*TBB
= nullptr;
280 const MCSymbol
*FBB
= nullptr;
281 MCInst
*CondBranch
= nullptr;
282 MCInst
*UncondBranch
= nullptr;
283 if (!BB
->analyzeBranch(TBB
, FBB
, CondBranch
, UncondBranch
))
286 if (!CondBranch
&& !UncondBranch
)
289 // Simple unconditional branch.
291 Stats
[DynoStats::UNCOND_BRANCHES
] += BBExecutionCount
;
295 // CTCs: instruction annotations could be stripped, hence check the number
296 // of successors to identify conditional tail calls.
297 if (BB
->succ_size() == 1) {
298 if (BB
->branch_info_begin() != BB
->branch_info_end())
299 Stats
[DynoStats::UNCOND_BRANCHES
] += BB
->branch_info_begin()->Count
;
303 // Conditional branch that could be followed by an unconditional branch.
304 uint64_t TakenCount
= BB
->getTakenBranchInfo().Count
;
305 if (TakenCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
308 uint64_t NonTakenCount
= BB
->getFallthroughBranchInfo().Count
;
309 if (NonTakenCount
== BinaryBasicBlock::COUNT_NO_PROFILE
)
312 if (BF
.isForwardBranch(BB
, BB
->getConditionalSuccessor(true))) {
313 Stats
[DynoStats::FORWARD_COND_BRANCHES
] += BBExecutionCount
;
314 Stats
[DynoStats::FORWARD_COND_BRANCHES_TAKEN
] += TakenCount
;
316 Stats
[DynoStats::BACKWARD_COND_BRANCHES
] += BBExecutionCount
;
317 Stats
[DynoStats::BACKWARD_COND_BRANCHES_TAKEN
] += TakenCount
;
321 Stats
[DynoStats::UNCOND_BRANCHES
] += NonTakenCount
;