1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for computing profile summary data.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/IR/Attributes.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/Metadata.h"
16 #include "llvm/IR/Type.h"
17 #include "llvm/ProfileData/InstrProf.h"
18 #include "llvm/ProfileData/ProfileCommon.h"
19 #include "llvm/ProfileData/SampleProf.h"
20 #include "llvm/Support/Casting.h"
21 #include "llvm/Support/CommandLine.h"
25 cl::opt
<bool> UseContextLessSummary(
26 "profile-summary-contextless", cl::Hidden
, cl::init(false), cl::ZeroOrMore
,
27 cl::desc("Merge context profiles before calculating thresholds."));
29 // The following two parameters determine the threshold for a count to be
30 // considered hot/cold. These two parameters are percentile values (multiplied
31 // by 10000). If the counts are sorted in descending order, the minimum count to
32 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
33 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
34 // threshold for determining cold count (everything <= this threshold is
36 cl::opt
<int> ProfileSummaryCutoffHot(
37 "profile-summary-cutoff-hot", cl::Hidden
, cl::init(990000), cl::ZeroOrMore
,
38 cl::desc("A count is hot if it exceeds the minimum count to"
39 " reach this percentile of total counts."));
41 cl::opt
<int> ProfileSummaryCutoffCold(
42 "profile-summary-cutoff-cold", cl::Hidden
, cl::init(999999), cl::ZeroOrMore
,
43 cl::desc("A count is cold if it is below the minimum count"
44 " to reach this percentile of total counts."));
46 cl::opt
<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
47 "profile-summary-huge-working-set-size-threshold", cl::Hidden
,
48 cl::init(15000), cl::ZeroOrMore
,
49 cl::desc("The code working set size is considered huge if the number of"
50 " blocks required to reach the -profile-summary-cutoff-hot"
51 " percentile exceeds this count."));
53 cl::opt
<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
54 "profile-summary-large-working-set-size-threshold", cl::Hidden
,
55 cl::init(12500), cl::ZeroOrMore
,
56 cl::desc("The code working set size is considered large if the number of"
57 " blocks required to reach the -profile-summary-cutoff-hot"
58 " percentile exceeds this count."));
60 // The next two options override the counts derived from summary computation and
61 // are useful for debugging purposes.
62 cl::opt
<int> ProfileSummaryHotCount(
63 "profile-summary-hot-count", cl::ReallyHidden
, cl::ZeroOrMore
,
64 cl::desc("A fixed hot count that overrides the count derived from"
65 " profile-summary-cutoff-hot"));
67 cl::opt
<int> ProfileSummaryColdCount(
68 "profile-summary-cold-count", cl::ReallyHidden
, cl::ZeroOrMore
,
69 cl::desc("A fixed cold count that overrides the count derived from"
70 " profile-summary-cutoff-cold"));
72 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
73 // (which is 1000000) is a desired percentile of total counts.
74 static const uint32_t DefaultCutoffsData
[] = {
77 200000, 300000, 400000, 500000, 600000, 700000, 800000,
78 900000, 950000, 990000, 999000, 999900, 999990, 999999};
79 const ArrayRef
<uint32_t> ProfileSummaryBuilder::DefaultCutoffs
=
82 const ProfileSummaryEntry
&
83 ProfileSummaryBuilder::getEntryForPercentile(SummaryEntryVector
&DS
,
84 uint64_t Percentile
) {
85 auto It
= partition_point(DS
, [=](const ProfileSummaryEntry
&Entry
) {
86 return Entry
.Cutoff
< Percentile
;
88 // The required percentile has to be <= one of the percentiles in the
91 report_fatal_error("Desired percentile exceeds the maximum cutoff");
95 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord
&R
) {
96 // The first counter is not necessarily an entry count for IR
97 // instrumentation profiles.
98 // Eventually MaxFunctionCount will become obsolete and this can be
100 addEntryCount(R
.Counts
[0]);
101 for (size_t I
= 1, E
= R
.Counts
.size(); I
< E
; ++I
)
102 addInternalCount(R
.Counts
[I
]);
105 // To compute the detailed summary, we consider each line containing samples as
106 // equivalent to a block with a count in the instrumented profile.
107 void SampleProfileSummaryBuilder::addRecord(
108 const sampleprof::FunctionSamples
&FS
, bool isCallsiteSample
) {
109 if (!isCallsiteSample
) {
111 if (FS
.getHeadSamples() > MaxFunctionCount
)
112 MaxFunctionCount
= FS
.getHeadSamples();
114 for (const auto &I
: FS
.getBodySamples()) {
115 uint64_t Count
= I
.second
.getSamples();
118 for (const auto &I
: FS
.getCallsiteSamples())
119 for (const auto &CS
: I
.second
)
120 addRecord(CS
.second
, true);
123 // The argument to this method is a vector of cutoff percentages and the return
124 // value is a vector of (Cutoff, MinCount, NumCounts) triplets.
125 void ProfileSummaryBuilder::computeDetailedSummary() {
126 if (DetailedSummaryCutoffs
.empty())
128 llvm::sort(DetailedSummaryCutoffs
);
129 auto Iter
= CountFrequencies
.begin();
130 const auto End
= CountFrequencies
.end();
132 uint32_t CountsSeen
= 0;
133 uint64_t CurrSum
= 0, Count
= 0;
135 for (const uint32_t Cutoff
: DetailedSummaryCutoffs
) {
136 assert(Cutoff
<= 999999);
137 APInt
Temp(128, TotalCount
);
138 APInt
N(128, Cutoff
);
139 APInt
D(128, ProfileSummary::Scale
);
142 uint64_t DesiredCount
= Temp
.getZExtValue();
143 assert(DesiredCount
<= TotalCount
);
144 while (CurrSum
< DesiredCount
&& Iter
!= End
) {
146 uint32_t Freq
= Iter
->second
;
147 CurrSum
+= (Count
* Freq
);
151 assert(CurrSum
>= DesiredCount
);
152 ProfileSummaryEntry PSE
= {Cutoff
, Count
, CountsSeen
};
153 DetailedSummary
.push_back(PSE
);
157 uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector
&DS
) {
159 ProfileSummaryBuilder::getEntryForPercentile(DS
, ProfileSummaryCutoffHot
);
160 uint64_t HotCountThreshold
= HotEntry
.MinCount
;
161 if (ProfileSummaryHotCount
.getNumOccurrences() > 0)
162 HotCountThreshold
= ProfileSummaryHotCount
;
163 return HotCountThreshold
;
166 uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector
&DS
) {
167 auto &ColdEntry
= ProfileSummaryBuilder::getEntryForPercentile(
168 DS
, ProfileSummaryCutoffCold
);
169 uint64_t ColdCountThreshold
= ColdEntry
.MinCount
;
170 if (ProfileSummaryColdCount
.getNumOccurrences() > 0)
171 ColdCountThreshold
= ProfileSummaryColdCount
;
172 return ColdCountThreshold
;
175 std::unique_ptr
<ProfileSummary
> SampleProfileSummaryBuilder::getSummary() {
176 computeDetailedSummary();
177 return std::make_unique
<ProfileSummary
>(
178 ProfileSummary::PSK_Sample
, DetailedSummary
, TotalCount
, MaxCount
, 0,
179 MaxFunctionCount
, NumCounts
, NumFunctions
);
182 std::unique_ptr
<ProfileSummary
>
183 SampleProfileSummaryBuilder::computeSummaryForProfiles(
184 const StringMap
<sampleprof::FunctionSamples
> &Profiles
) {
185 assert(NumFunctions
== 0 &&
186 "This can only be called on an empty summary builder");
187 StringMap
<sampleprof::FunctionSamples
> ContextLessProfiles
;
188 const StringMap
<sampleprof::FunctionSamples
> *ProfilesToUse
= &Profiles
;
189 // For CSSPGO, context-sensitive profile effectively split a function profile
190 // into many copies each representing the CFG profile of a particular calling
191 // context. That makes the count distribution looks more flat as we now have
192 // more function profiles each with lower counts, which in turn leads to lower
193 // hot thresholds. To compensate for that, by defauly we merge context
194 // profiles before coumputing profile summary.
195 if (UseContextLessSummary
|| (sampleprof::FunctionSamples::ProfileIsCS
&&
196 !UseContextLessSummary
.getNumOccurrences())) {
197 for (const auto &I
: Profiles
) {
198 ContextLessProfiles
[I
.second
.getName()].merge(I
.second
);
200 ProfilesToUse
= &ContextLessProfiles
;
203 for (const auto &I
: *ProfilesToUse
) {
204 const sampleprof::FunctionSamples
&Profile
= I
.second
;
211 std::unique_ptr
<ProfileSummary
> InstrProfSummaryBuilder::getSummary() {
212 computeDetailedSummary();
213 return std::make_unique
<ProfileSummary
>(
214 ProfileSummary::PSK_Instr
, DetailedSummary
, TotalCount
, MaxCount
,
215 MaxInternalBlockCount
, MaxFunctionCount
, NumCounts
, NumFunctions
);
218 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count
) {
221 // Skip invalid count.
222 if (Count
== (uint64_t)-1)
226 if (Count
> MaxFunctionCount
)
227 MaxFunctionCount
= Count
;
230 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count
) {
231 // Skip invalid count.
232 if (Count
== (uint64_t)-1)
236 if (Count
> MaxInternalBlockCount
)
237 MaxInternalBlockCount
= Count
;