[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / tools / llvm-profgen / ProfileGenerator.h
blob88cf2dc0d49f37374eb0e6d22279a5b7667b99c9
1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11 #include "CSPreInliner.h"
12 #include "ErrorHandling.h"
13 #include "PerfReader.h"
14 #include "ProfiledBinary.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/ProfileData/SampleProfWriter.h"
17 #include <memory>
18 #include <unordered_set>
20 using namespace llvm;
21 using namespace sampleprof;
23 namespace llvm {
24 namespace sampleprof {
26 using ProbeCounterMap =
27 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
29 // This base class for profile generation of sample-based PGO. We reuse all
30 // structures relating to function profiles and profile writers as seen in
31 // /ProfileData/SampleProf.h.
32 class ProfileGeneratorBase {
34 public:
35 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
36 ProfileGeneratorBase(ProfiledBinary *Binary,
37 const ContextSampleCounterMap *Counters)
38 : Binary(Binary), SampleCounters(Counters){};
39 ProfileGeneratorBase(ProfiledBinary *Binary,
40 const SampleProfileMap &&Profiles)
41 : Binary(Binary), ProfileMap(std::move(Profiles)){};
43 virtual ~ProfileGeneratorBase() = default;
44 static std::unique_ptr<ProfileGeneratorBase>
45 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
46 bool profileIsCS);
47 static std::unique_ptr<ProfileGeneratorBase>
48 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
49 bool profileIsCS);
50 virtual void generateProfile() = 0;
51 void write();
53 static uint32_t
54 getDuplicationFactor(unsigned Discriminator,
55 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
56 return UseFSD ? 1
57 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
58 Discriminator);
61 static uint32_t
62 getBaseDiscriminator(unsigned Discriminator,
63 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
64 return UseFSD ? Discriminator
65 : DILocation::getBaseDiscriminatorFromDiscriminator(
66 Discriminator, /* IsFSDiscriminator */ false);
69 static bool UseFSDiscriminator;
71 protected:
72 // Use SampleProfileWriter to serialize profile map
73 void write(std::unique_ptr<SampleProfileWriter> Writer,
74 SampleProfileMap &ProfileMap);
76 For each region boundary point, mark if it is begin or end (or both) of
77 the region. Boundary points are inclusive. Log the sample count as well
78 so we can use it when we compute the sample count of each disjoint region
79 later. Note that there might be multiple ranges with different sample
80 count that share same begin/end point. We need to accumulate the sample
81 count for the boundary point for such case, because for the example
82 below,
84 |<--100-->|
85 |<------200------>|
86 A B C
88 sample count for disjoint region [A,B] would be 300.
90 void findDisjointRanges(RangeSample &DisjointRanges,
91 const RangeSample &Ranges);
93 // Go through each address from range to extract the top frame probe by
94 // looking up in the Address2ProbeMap
95 void extractProbesFromRange(const RangeSample &RangeCounter,
96 ProbeCounterMap &ProbeCounter,
97 bool FindDisjointRanges = true);
99 // Helper function for updating body sample for a leaf location in
100 // FunctionProfile
101 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
102 const SampleContextFrame &LeafLoc,
103 uint64_t Count);
105 void updateFunctionSamples();
107 void updateTotalSamples();
109 void updateCallsiteSamples();
111 StringRef getCalleeNameForAddress(uint64_t TargetAddress);
113 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
115 void calculateAndShowDensity(const SampleProfileMap &Profiles);
117 double calculateDensity(const SampleProfileMap &Profiles,
118 uint64_t HotCntThreshold);
120 void showDensitySuggestion(double Density);
122 void collectProfiledFunctions();
124 bool collectFunctionsFromRawProfile(
125 std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
127 // Collect profiled Functions for llvm sample profile input.
128 virtual bool collectFunctionsFromLLVMProfile(
129 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
131 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
132 uint64_t HotCountThreshold;
134 uint64_t ColdCountThreshold;
136 ProfiledBinary *Binary = nullptr;
138 std::unique_ptr<ProfileSummary> Summary;
140 // Used by SampleProfileWriter
141 SampleProfileMap ProfileMap;
143 const ContextSampleCounterMap *SampleCounters = nullptr;
146 class ProfileGenerator : public ProfileGeneratorBase {
148 public:
149 ProfileGenerator(ProfiledBinary *Binary,
150 const ContextSampleCounterMap *Counters)
151 : ProfileGeneratorBase(Binary, Counters){};
152 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
153 : ProfileGeneratorBase(Binary, std::move(Profiles)){};
154 void generateProfile() override;
156 private:
157 void generateLineNumBasedProfile();
158 void generateProbeBasedProfile();
159 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
160 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
161 // Helper function to get the leaf frame's FunctionProfile by traversing the
162 // inline stack and meanwhile it adds the total samples for each frame's
163 // function profile.
164 FunctionSamples &
165 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
166 uint64_t Count);
167 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
168 void
169 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
170 void
171 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
172 void populateBoundarySamplesWithProbesForAllFunctions(
173 const BranchSample &BranchCounters);
174 void postProcessProfiles();
175 void trimColdProfiles(const SampleProfileMap &Profiles,
176 uint64_t ColdCntThreshold);
177 bool collectFunctionsFromLLVMProfile(
178 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
181 class CSProfileGenerator : public ProfileGeneratorBase {
182 public:
183 CSProfileGenerator(ProfiledBinary *Binary,
184 const ContextSampleCounterMap *Counters)
185 : ProfileGeneratorBase(Binary, Counters){};
186 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
187 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
188 void generateProfile() override;
190 // Trim the context stack at a given depth.
191 template <typename T>
192 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
193 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
194 return;
195 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
196 S.begin());
197 S.resize(Depth);
200 // Remove adjacent repeated context sequences up to a given sequence length,
201 // -1 means no size limit. Note that repeated sequences are identified based
202 // on the exact call site, this is finer granularity than function recursion.
203 template <typename T>
204 static void compressRecursionContext(SmallVectorImpl<T> &Context,
205 int32_t CSize = MaxCompressionSize) {
206 uint32_t I = 1;
207 uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
208 uint32_t MaxDedupSize =
209 CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
210 auto BeginIter = Context.begin();
211 // Use an in-place algorithm to save memory copy
212 // End indicates the end location of current iteration's data
213 uint32_t End = 0;
214 // Deduplicate from length 1 to the max possible size of a repeated
215 // sequence.
216 while (I <= MaxDedupSize) {
217 // This is a linear algorithm that deduplicates adjacent repeated
218 // sequences of size I. The deduplication detection runs on a sliding
219 // window whose size is 2*I and it keeps sliding the window to deduplicate
220 // the data inside. Once duplication is detected, deduplicate it by
221 // skipping the right half part of the window, otherwise just copy back
222 // the new one by appending them at the back of End pointer(for the next
223 // iteration).
225 // For example:
226 // Input: [a1, a2, b1, b2]
227 // (Added index to distinguish the same char, the origin is [a, a, b,
228 // b], the size of the dedup window is 2(I = 1) at the beginning)
230 // 1) The initial status is a dummy window[null, a1], then just copy the
231 // right half of the window(End = 0), then slide the window.
232 // Result: [a1], a2, b1, b2 (End points to the element right before ],
233 // after ] is the data of the previous iteration)
235 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
236 // the window i.e the duplication happen. Only slide the window.
237 // Result: [a1], a2, b1, b2
239 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
240 // new) to the End and slide the window.
241 // Result: [a1, b1], b1, b2
243 // 4) Next window is [b1, b2], same to 2), skip b2.
244 // Result: [a1, b1], b1, b2
245 // After resize, it will be [a, b]
247 // Use pointers like below to do comparison inside the window
248 // [a b c a b c]
249 // | | | | |
250 // LeftBoundary Left Right Left+I Right+I
251 // A duplication found if Left < LeftBoundry.
253 int32_t Right = I - 1;
254 End = I;
255 int32_t LeftBoundary = 0;
256 while (Right + I < Context.size()) {
257 // To avoids scanning a part of a sequence repeatedly, it finds out
258 // the common suffix of two hald in the window. The common suffix will
259 // serve as the common prefix of next possible pair of duplicate
260 // sequences. The non-common part will be ignored and never scanned
261 // again.
263 // For example.
264 // Input: [a, b1], c1, b2, c2
265 // I = 2
267 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
268 // part is 'c1', copy it and only slide the window 1 step.
269 // Result: [a, b1, c1], b2, c2
271 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
272 // Result after resize: [a, b, c]
274 int32_t Left = Right;
275 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
276 // Find the longest suffix inside the window. When stops, Left points
277 // at the diverging point in the current sequence.
278 Left--;
281 bool DuplicationFound = (Left < LeftBoundary);
282 // Don't need to recheck the data before Right
283 LeftBoundary = Right + 1;
284 if (DuplicationFound) {
285 // Duplication found, skip right half of the window.
286 Right += I;
287 } else {
288 // Copy the non-common-suffix part of the adjacent sequence.
289 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
290 BeginIter + End);
291 End += Left + I - Right;
292 // Only slide the window by the size of non-common-suffix
293 Right = Left + I;
296 // Don't forget the remaining part that's not scanned.
297 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
298 End += Context.size() - Right - 1;
299 I++;
300 Context.resize(End);
301 MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
305 private:
306 void generateLineNumBasedProfile();
308 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
309 bool WasLeafInlined = false);
311 // Lookup or create ContextTrieNode for the context, FunctionSamples is
312 // created inside this function.
313 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
314 bool WasLeafInlined = false);
316 // For profiled only functions, on-demand compute their inline context
317 // function byte size which is used by the pre-inliner.
318 void computeSizeForProfiledFunctions();
319 // Post processing for profiles before writing out, such as mermining
320 // and trimming cold profiles, running preinliner on profiles.
321 void postProcessProfiles();
323 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
324 const RangeSample &RangeCounters);
326 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
327 const BranchSample &BranchCounters);
329 void populateInferredFunctionSamples(ContextTrieNode &Node);
331 void updateFunctionSamples();
333 void generateProbeBasedProfile();
335 // Fill in function body samples from probes
336 void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
337 const AddrBasedCtxKey *CtxKey);
338 // Fill in boundary samples for a call probe
339 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
340 const AddrBasedCtxKey *CtxKey);
342 ContextTrieNode *
343 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
344 const MCDecodedPseudoProbe *LeafProbe);
346 // Helper function to get FunctionSamples for the leaf probe
347 FunctionSamples &
348 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
349 const MCDecodedPseudoProbe *LeafProbe);
351 void convertToProfileMap(ContextTrieNode &Node,
352 SampleContextFrameVector &Context);
354 void convertToProfileMap();
356 void computeSummaryAndThreshold();
358 bool collectFunctionsFromLLVMProfile(
359 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
361 void initializeMissingFrameInferrer();
363 // Given an input `Context`, output `NewContext` with inferred missing tail
364 // call frames.
365 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
366 SmallVectorImpl<uint64_t> &NewContext);
368 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
370 // The container for holding the FunctionSamples used by context trie.
371 std::list<FunctionSamples> FSamplesList;
373 // Underlying context table serves for sample profile writer.
374 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
376 SampleContextTracker ContextTracker;
378 bool IsProfileValidOnTrie = true;
380 public:
381 // Deduplicate adjacent repeated context sequences up to a given sequence
382 // length. -1 means no size limit.
383 static int32_t MaxCompressionSize;
384 static int MaxContextDepth;
387 } // end namespace sampleprof
388 } // end namespace llvm
390 #endif