1 //===- ProfileSummaryInfo.cpp - Global profile summary information --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass that provides access to the global profile summary
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/ProfileSummaryInfo.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/CallSite.h"
18 #include "llvm/IR/Metadata.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/ProfileSummary.h"
23 // The following two parameters determine the threshold for a count to be
24 // considered hot/cold. These two parameters are percentile values (multiplied
25 // by 10000). If the counts are sorted in descending order, the minimum count to
26 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
27 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
28 // threshold for determining cold count (everything <= this threshold is
31 static cl::opt
<int> ProfileSummaryCutoffHot(
32 "profile-summary-cutoff-hot", cl::Hidden
, cl::init(990000), cl::ZeroOrMore
,
33 cl::desc("A count is hot if it exceeds the minimum count to"
34 " reach this percentile of total counts."));
36 static cl::opt
<int> ProfileSummaryCutoffCold(
37 "profile-summary-cutoff-cold", cl::Hidden
, cl::init(999999), cl::ZeroOrMore
,
38 cl::desc("A count is cold if it is below the minimum count"
39 " to reach this percentile of total counts."));
41 static cl::opt
<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
42 "profile-summary-huge-working-set-size-threshold", cl::Hidden
,
43 cl::init(15000), cl::ZeroOrMore
,
44 cl::desc("The code working set size is considered huge if the number of"
45 " blocks required to reach the -profile-summary-cutoff-hot"
46 " percentile exceeds this count."));
48 static cl::opt
<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
49 "profile-summary-large-working-set-size-threshold", cl::Hidden
,
50 cl::init(12500), cl::ZeroOrMore
,
51 cl::desc("The code working set size is considered large if the number of"
52 " blocks required to reach the -profile-summary-cutoff-hot"
53 " percentile exceeds this count."));
55 // The next two options override the counts derived from summary computation and
56 // are useful for debugging purposes.
57 static cl::opt
<int> ProfileSummaryHotCount(
58 "profile-summary-hot-count", cl::ReallyHidden
, cl::ZeroOrMore
,
59 cl::desc("A fixed hot count that overrides the count derived from"
60 " profile-summary-cutoff-hot"));
62 static cl::opt
<int> ProfileSummaryColdCount(
63 "profile-summary-cold-count", cl::ReallyHidden
, cl::ZeroOrMore
,
64 cl::desc("A fixed cold count that overrides the count derived from"
65 " profile-summary-cutoff-cold"));
67 // Find the summary entry for a desired percentile of counts.
68 static const ProfileSummaryEntry
&getEntryForPercentile(SummaryEntryVector
&DS
,
69 uint64_t Percentile
) {
70 auto It
= partition_point(DS
, [=](const ProfileSummaryEntry
&Entry
) {
71 return Entry
.Cutoff
< Percentile
;
73 // The required percentile has to be <= one of the percentiles in the
76 report_fatal_error("Desired percentile exceeds the maximum cutoff");
80 // The profile summary metadata may be attached either by the frontend or by
81 // any backend passes (IR level instrumentation, for example). This method
82 // checks if the Summary is null and if so checks if the summary metadata is now
83 // available in the module and parses it to get the Summary object. Returns true
84 // if a valid Summary is available.
85 bool ProfileSummaryInfo::computeSummary() {
88 // First try to get context sensitive ProfileSummary.
89 auto *SummaryMD
= M
.getProfileSummary(/* IsCS */ true);
91 Summary
.reset(ProfileSummary::getFromMD(SummaryMD
));
94 // This will actually return PSK_Instr or PSK_Sample summary.
95 SummaryMD
= M
.getProfileSummary(/* IsCS */ false);
98 Summary
.reset(ProfileSummary::getFromMD(SummaryMD
));
103 ProfileSummaryInfo::getProfileCount(const Instruction
*Inst
,
104 BlockFrequencyInfo
*BFI
,
105 bool AllowSynthetic
) {
108 assert((isa
<CallInst
>(Inst
) || isa
<InvokeInst
>(Inst
)) &&
109 "We can only get profile count for call/invoke instruction.");
110 if (hasSampleProfile()) {
111 // In sample PGO mode, check if there is a profile metadata on the
112 // instruction. If it is present, determine hotness solely based on that,
113 // since the sampled entry count may not be accurate. If there is no
114 // annotated on the instruction, return None.
116 if (Inst
->extractProfTotalWeight(TotalCount
))
121 return BFI
->getBlockProfileCount(Inst
->getParent(), AllowSynthetic
);
125 /// Returns true if the function's entry is hot. If it returns false, it
126 /// either means it is not hot or it is unknown whether it is hot or not (for
127 /// example, no profile data is available).
128 bool ProfileSummaryInfo::isFunctionEntryHot(const Function
*F
) {
129 if (!F
|| !computeSummary())
131 auto FunctionCount
= F
->getEntryCount();
132 // FIXME: The heuristic used below for determining hotness is based on
133 // preliminary SPEC tuning for inliner. This will eventually be a
134 // convenience method that calls isHotCount.
135 return FunctionCount
&& isHotCount(FunctionCount
.getCount());
138 /// Returns true if the function contains hot code. This can include a hot
139 /// function entry count, hot basic block, or (in the case of Sample PGO)
140 /// hot total call edge count.
141 /// If it returns false, it either means it is not hot or it is unknown
142 /// (for example, no profile data is available).
143 bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function
*F
,
144 BlockFrequencyInfo
&BFI
) {
145 if (!F
|| !computeSummary())
147 if (auto FunctionCount
= F
->getEntryCount())
148 if (isHotCount(FunctionCount
.getCount()))
151 if (hasSampleProfile()) {
152 uint64_t TotalCallCount
= 0;
153 for (const auto &BB
: *F
)
154 for (const auto &I
: BB
)
155 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
156 if (auto CallCount
= getProfileCount(&I
, nullptr))
157 TotalCallCount
+= CallCount
.getValue();
158 if (isHotCount(TotalCallCount
))
161 for (const auto &BB
: *F
)
162 if (isHotBlock(&BB
, &BFI
))
167 /// Returns true if the function only contains cold code. This means that
168 /// the function entry and blocks are all cold, and (in the case of Sample PGO)
169 /// the total call edge count is cold.
170 /// If it returns false, it either means it is not cold or it is unknown
171 /// (for example, no profile data is available).
172 bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function
*F
,
173 BlockFrequencyInfo
&BFI
) {
174 if (!F
|| !computeSummary())
176 if (auto FunctionCount
= F
->getEntryCount())
177 if (!isColdCount(FunctionCount
.getCount()))
180 if (hasSampleProfile()) {
181 uint64_t TotalCallCount
= 0;
182 for (const auto &BB
: *F
)
183 for (const auto &I
: BB
)
184 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
185 if (auto CallCount
= getProfileCount(&I
, nullptr))
186 TotalCallCount
+= CallCount
.getValue();
187 if (!isColdCount(TotalCallCount
))
190 for (const auto &BB
: *F
)
191 if (!isColdBlock(&BB
, &BFI
))
196 // Like isFunctionHotInCallGraph but for a given cutoff.
197 bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
198 int PercentileCutoff
, const Function
*F
, BlockFrequencyInfo
&BFI
) {
199 if (!F
|| !computeSummary())
201 if (auto FunctionCount
= F
->getEntryCount())
202 if (isHotCountNthPercentile(PercentileCutoff
, FunctionCount
.getCount()))
205 if (hasSampleProfile()) {
206 uint64_t TotalCallCount
= 0;
207 for (const auto &BB
: *F
)
208 for (const auto &I
: BB
)
209 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
210 if (auto CallCount
= getProfileCount(&I
, nullptr))
211 TotalCallCount
+= CallCount
.getValue();
212 if (isHotCountNthPercentile(PercentileCutoff
, TotalCallCount
))
215 for (const auto &BB
: *F
)
216 if (isHotBlockNthPercentile(PercentileCutoff
, &BB
, &BFI
))
221 /// Returns true if the function's entry is a cold. If it returns false, it
222 /// either means it is not cold or it is unknown whether it is cold or not (for
223 /// example, no profile data is available).
224 bool ProfileSummaryInfo::isFunctionEntryCold(const Function
*F
) {
227 if (F
->hasFnAttribute(Attribute::Cold
))
229 if (!computeSummary())
231 auto FunctionCount
= F
->getEntryCount();
232 // FIXME: The heuristic used below for determining coldness is based on
233 // preliminary SPEC tuning for inliner. This will eventually be a
234 // convenience method that calls isHotCount.
235 return FunctionCount
&& isColdCount(FunctionCount
.getCount());
238 /// Compute the hot and cold thresholds.
239 void ProfileSummaryInfo::computeThresholds() {
240 if (!computeSummary())
242 auto &DetailedSummary
= Summary
->getDetailedSummary();
244 getEntryForPercentile(DetailedSummary
, ProfileSummaryCutoffHot
);
245 HotCountThreshold
= HotEntry
.MinCount
;
246 if (ProfileSummaryHotCount
.getNumOccurrences() > 0)
247 HotCountThreshold
= ProfileSummaryHotCount
;
249 getEntryForPercentile(DetailedSummary
, ProfileSummaryCutoffCold
);
250 ColdCountThreshold
= ColdEntry
.MinCount
;
251 if (ProfileSummaryColdCount
.getNumOccurrences() > 0)
252 ColdCountThreshold
= ProfileSummaryColdCount
;
253 assert(ColdCountThreshold
<= HotCountThreshold
&&
254 "Cold count threshold cannot exceed hot count threshold!");
255 HasHugeWorkingSetSize
=
256 HotEntry
.NumCounts
> ProfileSummaryHugeWorkingSetSizeThreshold
;
257 HasLargeWorkingSetSize
=
258 HotEntry
.NumCounts
> ProfileSummaryLargeWorkingSetSizeThreshold
;
261 Optional
<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff
) {
262 if (!computeSummary())
264 auto iter
= ThresholdCache
.find(PercentileCutoff
);
265 if (iter
!= ThresholdCache
.end()) {
268 auto &DetailedSummary
= Summary
->getDetailedSummary();
270 getEntryForPercentile(DetailedSummary
, PercentileCutoff
);
271 uint64_t CountThreshold
= Entry
.MinCount
;
272 ThresholdCache
[PercentileCutoff
] = CountThreshold
;
273 return CountThreshold
;
276 bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
277 if (!HasHugeWorkingSetSize
)
279 return HasHugeWorkingSetSize
&& HasHugeWorkingSetSize
.getValue();
282 bool ProfileSummaryInfo::hasLargeWorkingSetSize() {
283 if (!HasLargeWorkingSetSize
)
285 return HasLargeWorkingSetSize
&& HasLargeWorkingSetSize
.getValue();
288 bool ProfileSummaryInfo::isHotCount(uint64_t C
) {
289 if (!HotCountThreshold
)
291 return HotCountThreshold
&& C
>= HotCountThreshold
.getValue();
294 bool ProfileSummaryInfo::isColdCount(uint64_t C
) {
295 if (!ColdCountThreshold
)
297 return ColdCountThreshold
&& C
<= ColdCountThreshold
.getValue();
300 bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff
, uint64_t C
) {
301 auto CountThreshold
= computeThreshold(PercentileCutoff
);
302 return CountThreshold
&& C
>= CountThreshold
.getValue();
305 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
306 if (!HotCountThreshold
)
308 return HotCountThreshold
? HotCountThreshold
.getValue() : UINT64_MAX
;
311 uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
312 if (!ColdCountThreshold
)
314 return ColdCountThreshold
? ColdCountThreshold
.getValue() : 0;
317 bool ProfileSummaryInfo::isHotBlock(const BasicBlock
*BB
, BlockFrequencyInfo
*BFI
) {
318 auto Count
= BFI
->getBlockProfileCount(BB
);
319 return Count
&& isHotCount(*Count
);
322 bool ProfileSummaryInfo::isColdBlock(const BasicBlock
*BB
,
323 BlockFrequencyInfo
*BFI
) {
324 auto Count
= BFI
->getBlockProfileCount(BB
);
325 return Count
&& isColdCount(*Count
);
328 bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff
,
329 const BasicBlock
*BB
,
330 BlockFrequencyInfo
*BFI
) {
331 auto Count
= BFI
->getBlockProfileCount(BB
);
332 return Count
&& isHotCountNthPercentile(PercentileCutoff
, *Count
);
335 bool ProfileSummaryInfo::isHotCallSite(const CallSite
&CS
,
336 BlockFrequencyInfo
*BFI
) {
337 auto C
= getProfileCount(CS
.getInstruction(), BFI
);
338 return C
&& isHotCount(*C
);
341 bool ProfileSummaryInfo::isColdCallSite(const CallSite
&CS
,
342 BlockFrequencyInfo
*BFI
) {
343 auto C
= getProfileCount(CS
.getInstruction(), BFI
);
345 return isColdCount(*C
);
347 // In SamplePGO, if the caller has been sampled, and there is no profile
348 // annotated on the callsite, we consider the callsite as cold.
349 return hasSampleProfile() && CS
.getCaller()->hasProfileData();
352 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass
, "profile-summary-info",
353 "Profile summary info", false, true)
355 ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass()
356 : ImmutablePass(ID
) {
357 initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
360 bool ProfileSummaryInfoWrapperPass::doInitialization(Module
&M
) {
361 PSI
.reset(new ProfileSummaryInfo(M
));
365 bool ProfileSummaryInfoWrapperPass::doFinalization(Module
&M
) {
370 AnalysisKey
ProfileSummaryAnalysis::Key
;
371 ProfileSummaryInfo
ProfileSummaryAnalysis::run(Module
&M
,
372 ModuleAnalysisManager
&) {
373 return ProfileSummaryInfo(M
);
376 PreservedAnalyses
ProfileSummaryPrinterPass::run(Module
&M
,
377 ModuleAnalysisManager
&AM
) {
378 ProfileSummaryInfo
&PSI
= AM
.getResult
<ProfileSummaryAnalysis
>(M
);
380 OS
<< "Functions in " << M
.getName() << " with hot/cold annotations: \n";
383 if (PSI
.isFunctionEntryHot(&F
))
384 OS
<< " :hot entry ";
385 else if (PSI
.isFunctionEntryCold(&F
))
386 OS
<< " :cold entry ";
389 return PreservedAnalyses::all();
392 char ProfileSummaryInfoWrapperPass::ID
= 0;