1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11 #include "CSPreInliner.h"
12 #include "ErrorHandling.h"
13 #include "PerfReader.h"
14 #include "ProfiledBinary.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/ProfileData/SampleProfWriter.h"
18 #include <unordered_set>
21 using namespace sampleprof
;
24 namespace sampleprof
{
26 using ProbeCounterMap
=
27 std::unordered_map
<const MCDecodedPseudoProbe
*, uint64_t>;
29 // This base class for profile generation of sample-based PGO. We reuse all
30 // structures relating to function profiles and profile writers as seen in
31 // /ProfileData/SampleProf.h.
32 class ProfileGeneratorBase
{
35 ProfileGeneratorBase(ProfiledBinary
*Binary
) : Binary(Binary
){};
36 ProfileGeneratorBase(ProfiledBinary
*Binary
,
37 const ContextSampleCounterMap
*Counters
)
38 : Binary(Binary
), SampleCounters(Counters
){};
39 ProfileGeneratorBase(ProfiledBinary
*Binary
,
40 const SampleProfileMap
&&Profiles
)
41 : Binary(Binary
), ProfileMap(std::move(Profiles
)){};
43 virtual ~ProfileGeneratorBase() = default;
44 static std::unique_ptr
<ProfileGeneratorBase
>
45 create(ProfiledBinary
*Binary
, const ContextSampleCounterMap
*Counters
,
47 static std::unique_ptr
<ProfileGeneratorBase
>
48 create(ProfiledBinary
*Binary
, SampleProfileMap
&ProfileMap
,
50 virtual void generateProfile() = 0;
54 getDuplicationFactor(unsigned Discriminator
,
55 bool UseFSD
= ProfileGeneratorBase::UseFSDiscriminator
) {
57 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
62 getBaseDiscriminator(unsigned Discriminator
,
63 bool UseFSD
= ProfileGeneratorBase::UseFSDiscriminator
) {
64 return UseFSD
? Discriminator
65 : DILocation::getBaseDiscriminatorFromDiscriminator(
66 Discriminator
, /* IsFSDiscriminator */ false);
69 static bool UseFSDiscriminator
;
72 // Use SampleProfileWriter to serialize profile map
73 void write(std::unique_ptr
<SampleProfileWriter
> Writer
,
74 SampleProfileMap
&ProfileMap
);
76 For each region boundary point, mark if it is begin or end (or both) of
77 the region. Boundary points are inclusive. Log the sample count as well
78 so we can use it when we compute the sample count of each disjoint region
79 later. Note that there might be multiple ranges with different sample
80 count that share same begin/end point. We need to accumulate the sample
81 count for the boundary point for such case, because for the example
88 sample count for disjoint region [A,B] would be 300.
90 void findDisjointRanges(RangeSample
&DisjointRanges
,
91 const RangeSample
&Ranges
);
93 // Go through each address from range to extract the top frame probe by
94 // looking up in the Address2ProbeMap
95 void extractProbesFromRange(const RangeSample
&RangeCounter
,
96 ProbeCounterMap
&ProbeCounter
,
97 bool FindDisjointRanges
= true);
99 // Helper function for updating body sample for a leaf location in
101 void updateBodySamplesforFunctionProfile(FunctionSamples
&FunctionProfile
,
102 const SampleContextFrame
&LeafLoc
,
105 void updateFunctionSamples();
107 void updateTotalSamples();
109 void updateCallsiteSamples();
111 StringRef
getCalleeNameForAddress(uint64_t TargetAddress
);
113 void computeSummaryAndThreshold(SampleProfileMap
&ProfileMap
);
115 void calculateAndShowDensity(const SampleProfileMap
&Profiles
);
117 double calculateDensity(const SampleProfileMap
&Profiles
,
118 uint64_t HotCntThreshold
);
120 void showDensitySuggestion(double Density
);
122 void collectProfiledFunctions();
124 bool collectFunctionsFromRawProfile(
125 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
);
127 // Collect profiled Functions for llvm sample profile input.
128 virtual bool collectFunctionsFromLLVMProfile(
129 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) = 0;
131 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
132 uint64_t HotCountThreshold
;
134 uint64_t ColdCountThreshold
;
136 ProfiledBinary
*Binary
= nullptr;
138 std::unique_ptr
<ProfileSummary
> Summary
;
140 // Used by SampleProfileWriter
141 SampleProfileMap ProfileMap
;
143 const ContextSampleCounterMap
*SampleCounters
= nullptr;
146 class ProfileGenerator
: public ProfileGeneratorBase
{
149 ProfileGenerator(ProfiledBinary
*Binary
,
150 const ContextSampleCounterMap
*Counters
)
151 : ProfileGeneratorBase(Binary
, Counters
){};
152 ProfileGenerator(ProfiledBinary
*Binary
, const SampleProfileMap
&&Profiles
)
153 : ProfileGeneratorBase(Binary
, std::move(Profiles
)){};
154 void generateProfile() override
;
157 void generateLineNumBasedProfile();
158 void generateProbeBasedProfile();
159 RangeSample
preprocessRangeCounter(const RangeSample
&RangeCounter
);
160 FunctionSamples
&getTopLevelFunctionProfile(FunctionId FuncName
);
161 // Helper function to get the leaf frame's FunctionProfile by traversing the
162 // inline stack and meanwhile it adds the total samples for each frame's
165 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector
&FrameVec
,
167 void populateBodySamplesForAllFunctions(const RangeSample
&RangeCounter
);
169 populateBoundarySamplesForAllFunctions(const BranchSample
&BranchCounters
);
171 populateBodySamplesWithProbesForAllFunctions(const RangeSample
&RangeCounter
);
172 void populateBoundarySamplesWithProbesForAllFunctions(
173 const BranchSample
&BranchCounters
);
174 void postProcessProfiles();
175 void trimColdProfiles(const SampleProfileMap
&Profiles
,
176 uint64_t ColdCntThreshold
);
177 bool collectFunctionsFromLLVMProfile(
178 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) override
;
181 class CSProfileGenerator
: public ProfileGeneratorBase
{
183 CSProfileGenerator(ProfiledBinary
*Binary
,
184 const ContextSampleCounterMap
*Counters
)
185 : ProfileGeneratorBase(Binary
, Counters
){};
186 CSProfileGenerator(ProfiledBinary
*Binary
, SampleProfileMap
&Profiles
)
187 : ProfileGeneratorBase(Binary
), ContextTracker(Profiles
, nullptr){};
188 void generateProfile() override
;
190 // Trim the context stack at a given depth.
191 template <typename T
>
192 static void trimContext(SmallVectorImpl
<T
> &S
, int Depth
= MaxContextDepth
) {
193 if (Depth
< 0 || static_cast<size_t>(Depth
) >= S
.size())
195 std::copy(S
.begin() + S
.size() - static_cast<size_t>(Depth
), S
.end(),
200 // Remove adjacent repeated context sequences up to a given sequence length,
201 // -1 means no size limit. Note that repeated sequences are identified based
202 // on the exact call site, this is finer granularity than function recursion.
203 template <typename T
>
204 static void compressRecursionContext(SmallVectorImpl
<T
> &Context
,
205 int32_t CSize
= MaxCompressionSize
) {
207 uint32_t HS
= static_cast<uint32_t>(Context
.size() / 2);
208 uint32_t MaxDedupSize
=
209 CSize
== -1 ? HS
: std::min(static_cast<uint32_t>(CSize
), HS
);
210 auto BeginIter
= Context
.begin();
211 // Use an in-place algorithm to save memory copy
212 // End indicates the end location of current iteration's data
214 // Deduplicate from length 1 to the max possible size of a repeated
216 while (I
<= MaxDedupSize
) {
217 // This is a linear algorithm that deduplicates adjacent repeated
218 // sequences of size I. The deduplication detection runs on a sliding
219 // window whose size is 2*I and it keeps sliding the window to deduplicate
220 // the data inside. Once duplication is detected, deduplicate it by
221 // skipping the right half part of the window, otherwise just copy back
222 // the new one by appending them at the back of End pointer(for the next
226 // Input: [a1, a2, b1, b2]
227 // (Added index to distinguish the same char, the origin is [a, a, b,
228 // b], the size of the dedup window is 2(I = 1) at the beginning)
230 // 1) The initial status is a dummy window[null, a1], then just copy the
231 // right half of the window(End = 0), then slide the window.
232 // Result: [a1], a2, b1, b2 (End points to the element right before ],
233 // after ] is the data of the previous iteration)
235 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
236 // the window i.e the duplication happen. Only slide the window.
237 // Result: [a1], a2, b1, b2
239 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
240 // new) to the End and slide the window.
241 // Result: [a1, b1], b1, b2
243 // 4) Next window is [b1, b2], same to 2), skip b2.
244 // Result: [a1, b1], b1, b2
245 // After resize, it will be [a, b]
247 // Use pointers like below to do comparison inside the window
250 // LeftBoundary Left Right Left+I Right+I
251 // A duplication found if Left < LeftBoundry.
253 int32_t Right
= I
- 1;
255 int32_t LeftBoundary
= 0;
256 while (Right
+ I
< Context
.size()) {
257 // To avoids scanning a part of a sequence repeatedly, it finds out
258 // the common suffix of two hald in the window. The common suffix will
259 // serve as the common prefix of next possible pair of duplicate
260 // sequences. The non-common part will be ignored and never scanned
264 // Input: [a, b1], c1, b2, c2
267 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
268 // part is 'c1', copy it and only slide the window 1 step.
269 // Result: [a, b1, c1], b2, c2
271 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
272 // Result after resize: [a, b, c]
274 int32_t Left
= Right
;
275 while (Left
>= LeftBoundary
&& Context
[Left
] == Context
[Left
+ I
]) {
276 // Find the longest suffix inside the window. When stops, Left points
277 // at the diverging point in the current sequence.
281 bool DuplicationFound
= (Left
< LeftBoundary
);
282 // Don't need to recheck the data before Right
283 LeftBoundary
= Right
+ 1;
284 if (DuplicationFound
) {
285 // Duplication found, skip right half of the window.
288 // Copy the non-common-suffix part of the adjacent sequence.
289 std::copy(BeginIter
+ Right
+ 1, BeginIter
+ Left
+ I
+ 1,
291 End
+= Left
+ I
- Right
;
292 // Only slide the window by the size of non-common-suffix
296 // Don't forget the remaining part that's not scanned.
297 std::copy(BeginIter
+ Right
+ 1, Context
.end(), BeginIter
+ End
);
298 End
+= Context
.size() - Right
- 1;
301 MaxDedupSize
= std::min(static_cast<uint32_t>(End
/ 2), MaxDedupSize
);
306 void generateLineNumBasedProfile();
308 FunctionSamples
*getOrCreateFunctionSamples(ContextTrieNode
*ContextNode
,
309 bool WasLeafInlined
= false);
311 // Lookup or create ContextTrieNode for the context, FunctionSamples is
312 // created inside this function.
313 ContextTrieNode
*getOrCreateContextNode(const SampleContextFrames Context
,
314 bool WasLeafInlined
= false);
316 // For profiled only functions, on-demand compute their inline context
317 // function byte size which is used by the pre-inliner.
318 void computeSizeForProfiledFunctions();
319 // Post processing for profiles before writing out, such as mermining
320 // and trimming cold profiles, running preinliner on profiles.
321 void postProcessProfiles();
323 void populateBodySamplesForFunction(FunctionSamples
&FunctionProfile
,
324 const RangeSample
&RangeCounters
);
326 void populateBoundarySamplesForFunction(ContextTrieNode
*CallerNode
,
327 const BranchSample
&BranchCounters
);
329 void populateInferredFunctionSamples(ContextTrieNode
&Node
);
331 void updateFunctionSamples();
333 void generateProbeBasedProfile();
335 // Fill in function body samples from probes
336 void populateBodySamplesWithProbes(const RangeSample
&RangeCounter
,
337 const AddrBasedCtxKey
*CtxKey
);
338 // Fill in boundary samples for a call probe
339 void populateBoundarySamplesWithProbes(const BranchSample
&BranchCounter
,
340 const AddrBasedCtxKey
*CtxKey
);
343 getContextNodeForLeafProbe(const AddrBasedCtxKey
*CtxKey
,
344 const MCDecodedPseudoProbe
*LeafProbe
);
346 // Helper function to get FunctionSamples for the leaf probe
348 getFunctionProfileForLeafProbe(const AddrBasedCtxKey
*CtxKey
,
349 const MCDecodedPseudoProbe
*LeafProbe
);
351 void convertToProfileMap(ContextTrieNode
&Node
,
352 SampleContextFrameVector
&Context
);
354 void convertToProfileMap();
356 void computeSummaryAndThreshold();
358 bool collectFunctionsFromLLVMProfile(
359 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) override
;
361 void initializeMissingFrameInferrer();
363 // Given an input `Context`, output `NewContext` with inferred missing tail
365 void inferMissingFrames(const SmallVectorImpl
<uint64_t> &Context
,
366 SmallVectorImpl
<uint64_t> &NewContext
);
368 ContextTrieNode
&getRootContext() { return ContextTracker
.getRootContext(); };
370 // The container for holding the FunctionSamples used by context trie.
371 std::list
<FunctionSamples
> FSamplesList
;
373 // Underlying context table serves for sample profile writer.
374 std::unordered_set
<SampleContextFrameVector
, SampleContextFrameHash
> Contexts
;
376 SampleContextTracker ContextTracker
;
378 bool IsProfileValidOnTrie
= true;
381 // Deduplicate adjacent repeated context sequences up to a given sequence
382 // length. -1 means no size limit.
383 static int32_t MaxCompressionSize
;
384 static int MaxContextDepth
;
387 } // end namespace sampleprof
388 } // end namespace llvm