1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "ProfileGenerator.h"
10 #include "ErrorHandling.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <unordered_set>
16 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
18 cl::desc("Output profile file"));
19 static cl::alias
OutputA("o", cl::desc("Alias for --output"),
20 cl::aliasopt(OutputFilename
));
22 static cl::opt
<SampleProfileFormat
> OutputFormat(
23 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary
),
25 clEnumValN(SPF_Binary
, "binary", "Binary encoding (default)"),
26 clEnumValN(SPF_Compact_Binary
, "compbinary", "Compact binary encoding"),
27 clEnumValN(SPF_Ext_Binary
, "extbinary", "Extensible binary encoding"),
28 clEnumValN(SPF_Text
, "text", "Text encoding"),
29 clEnumValN(SPF_GCC
, "gcc",
30 "GCC encoding (only meaningful for -sample)")));
33 "use-md5", cl::init(false), cl::Hidden
,
34 cl::desc("Use md5 to represent function names in the output profile (only "
35 "meaningful for -extbinary)"));
37 static cl::opt
<bool> PopulateProfileSymbolList(
38 "populate-profile-symbol-list", cl::init(false), cl::Hidden
,
39 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
41 static cl::opt
<bool> FillZeroForAllFuncs(
42 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden
,
43 cl::desc("Attribute all functions' range with zero count "
44 "even it's not hit by any samples."));
46 static cl::opt
<int32_t, true> RecursionCompression(
48 cl::desc("Compressing recursion by deduplicating adjacent frame "
49 "sequences up to the specified size. -1 means no size limit."),
51 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize
));
54 TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore
,
55 cl::desc("If the total count of the profile is smaller "
56 "than threshold, it will be trimmed."));
58 static cl::opt
<bool> CSProfMergeColdContext(
59 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore
,
60 cl::desc("If the total count of context profile is smaller than "
61 "the threshold, it will be merged into context-less base "
64 static cl::opt
<uint32_t> CSProfMaxColdContextDepth(
65 "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore
,
66 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
67 "context-less base profile"));
69 static cl::opt
<int, true> CSProfMaxContextDepth(
70 "csprof-max-context-depth", cl::ZeroOrMore
,
71 cl::desc("Keep the last K contexts while merging profile. -1 means no "
73 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth
));
75 static cl::opt
<double> HotFunctionDensityThreshold(
76 "hot-function-density-threshold", llvm::cl::init(1000),
78 "specify density threshold for hot functions (default: 1000)"),
80 static cl::opt
<bool> ShowDensity("show-density", llvm::cl::init(false),
81 llvm::cl::desc("show profile density details"),
84 static cl::opt
<bool> UpdateTotalSamples(
85 "update-total-samples", llvm::cl::init(false),
87 "Update total samples by accumulating all its body samples."),
90 extern cl::opt
<int> ProfileSummaryCutoffHot
;
92 static cl::opt
<bool> GenCSNestedProfile(
93 "gen-cs-nested-profile", cl::Hidden
, cl::init(false),
94 cl::desc("Generate nested function profiles for CSSPGO"));
97 using namespace sampleprof
;
100 namespace sampleprof
{
102 // Initialize the MaxCompressionSize to -1 which means no size limit
103 int32_t CSProfileGenerator::MaxCompressionSize
= -1;
105 int CSProfileGenerator::MaxContextDepth
= -1;
107 bool ProfileGeneratorBase::UseFSDiscriminator
= false;
109 std::unique_ptr
<ProfileGeneratorBase
>
110 ProfileGeneratorBase::create(ProfiledBinary
*Binary
,
111 const ContextSampleCounterMap
&SampleCounters
,
112 bool ProfileIsCSFlat
) {
113 std::unique_ptr
<ProfileGeneratorBase
> Generator
;
114 if (ProfileIsCSFlat
) {
115 if (Binary
->useFSDiscriminator())
116 exitWithError("FS discriminator is not supported in CS profile.");
117 Generator
.reset(new CSProfileGenerator(Binary
, SampleCounters
));
119 Generator
.reset(new ProfileGenerator(Binary
, SampleCounters
));
121 ProfileGeneratorBase::UseFSDiscriminator
= Binary
->useFSDiscriminator();
122 FunctionSamples::ProfileIsFS
= Binary
->useFSDiscriminator();
127 void ProfileGeneratorBase::write(std::unique_ptr
<SampleProfileWriter
> Writer
,
128 SampleProfileMap
&ProfileMap
) {
129 // Populate profile symbol list if extended binary format is used.
130 ProfileSymbolList SymbolList
;
132 if (PopulateProfileSymbolList
&& OutputFormat
== SPF_Ext_Binary
) {
133 Binary
->populateSymbolListFromDWARF(SymbolList
);
134 Writer
->setProfileSymbolList(&SymbolList
);
137 if (std::error_code EC
= Writer
->write(ProfileMap
))
138 exitWithError(std::move(EC
));
141 void ProfileGeneratorBase::write() {
142 auto WriterOrErr
= SampleProfileWriter::create(OutputFilename
, OutputFormat
);
143 if (std::error_code EC
= WriterOrErr
.getError())
144 exitWithError(EC
, OutputFilename
);
147 if (OutputFormat
!= SPF_Ext_Binary
)
148 WithColor::warning() << "-use-md5 is ignored. Specify "
149 "--format=extbinary to enable it\n";
151 WriterOrErr
.get()->setUseMD5();
154 write(std::move(WriterOrErr
.get()), ProfileMap
);
157 void ProfileGeneratorBase::showDensitySuggestion(double Density
) {
159 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
160 "set too low. Please check your command.\n";
161 else if (Density
< HotFunctionDensityThreshold
)
163 << "AutoFDO is estimated to optimize better with "
164 << format("%.1f", HotFunctionDensityThreshold
/ Density
)
165 << "x more samples. Please consider increasing sampling rate or "
166 "profiling for longer duration to get more samples.\n";
169 outs() << "Minimum profile density for hot functions with top "
171 static_cast<double>(ProfileSummaryCutoffHot
.getValue()) /
173 << "% total samples: " << format("%.1f", Density
) << "\n";
176 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap
&Profiles
,
177 uint64_t HotCntThreshold
) {
178 double Density
= DBL_MAX
;
179 std::vector
<const FunctionSamples
*> HotFuncs
;
180 for (auto &I
: Profiles
) {
181 auto &FuncSamples
= I
.second
;
182 if (FuncSamples
.getTotalSamples() < HotCntThreshold
)
184 HotFuncs
.emplace_back(&FuncSamples
);
187 for (auto *FuncSamples
: HotFuncs
) {
188 auto *Func
= Binary
->getBinaryFunction(FuncSamples
->getName());
191 uint64_t FuncSize
= Func
->getFuncSize();
195 std::min(Density
, static_cast<double>(FuncSamples
->getTotalSamples()) /
199 return Density
== DBL_MAX
? 0.0 : Density
;
202 void ProfileGeneratorBase::findDisjointRanges(RangeSample
&DisjointRanges
,
203 const RangeSample
&Ranges
) {
206 Regions may overlap with each other. Using the boundary info, find all
207 disjoint ranges and their sample count. BoundaryPoint contains the count
208 multiple samples begin/end at this points.
211 |<------200------>| Sample2
214 In the example above,
215 Sample1 begins at A, ends at B, its value is 100.
216 Sample2 beings at A, ends at C, its value is 200.
217 For A, BeginCount is the sum of sample begins at A, which is 300 and no
218 samples ends at A, so EndCount is 0.
219 Then boundary points A, B, and C with begin/end counts are:
224 struct BoundaryPoint
{
225 // Sum of sample counts beginning at this point
226 uint64_t BeginCount
= UINT64_MAX
;
227 // Sum of sample counts ending at this point
228 uint64_t EndCount
= UINT64_MAX
;
229 // Is the begin point of a zero range.
230 bool IsZeroRangeBegin
= false;
231 // Is the end point of a zero range.
232 bool IsZeroRangeEnd
= false;
234 void addBeginCount(uint64_t Count
) {
235 if (BeginCount
== UINT64_MAX
)
240 void addEndCount(uint64_t Count
) {
241 if (EndCount
== UINT64_MAX
)
248 For the above example. With boundary points, follwing logic finds two
254 If there is a boundary point that both begin and end, the point itself
255 becomes a separate disjoint region. For example, if we have original
262 there are three boundary points with their begin/end counts of
268 the disjoint ranges would be
274 Example for zero value range:
278 |<--------------- 0 ----------------->|
287 std::map
<uint64_t, BoundaryPoint
> Boundaries
;
289 for (const auto &Item
: Ranges
) {
290 assert(Item
.first
.first
<= Item
.first
.second
&&
291 "Invalid instruction range");
292 auto &BeginPoint
= Boundaries
[Item
.first
.first
];
293 auto &EndPoint
= Boundaries
[Item
.first
.second
];
294 uint64_t Count
= Item
.second
;
296 BeginPoint
.addBeginCount(Count
);
297 EndPoint
.addEndCount(Count
);
299 BeginPoint
.IsZeroRangeBegin
= true;
300 EndPoint
.IsZeroRangeEnd
= true;
304 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
305 // and the next valid address
306 uint64_t BeginAddress
= UINT64_MAX
;
307 int ZeroRangeDepth
= 0;
309 for (const auto &Item
: Boundaries
) {
310 uint64_t Address
= Item
.first
;
311 const BoundaryPoint
&Point
= Item
.second
;
312 if (Point
.BeginCount
!= UINT64_MAX
) {
313 if (BeginAddress
!= UINT64_MAX
)
314 DisjointRanges
[{BeginAddress
, Address
- 1}] = Count
;
315 Count
+= Point
.BeginCount
;
316 BeginAddress
= Address
;
317 ZeroRangeDepth
+= Point
.IsZeroRangeBegin
;
319 if (Point
.EndCount
!= UINT64_MAX
) {
320 assert((BeginAddress
!= UINT64_MAX
) &&
321 "First boundary point cannot be 'end' point");
322 DisjointRanges
[{BeginAddress
, Address
}] = Count
;
323 assert(Count
>= Point
.EndCount
&& "Mismatched live ranges");
324 Count
-= Point
.EndCount
;
325 BeginAddress
= Address
+ 1;
326 ZeroRangeDepth
-= Point
.IsZeroRangeEnd
;
327 // If the remaining count is zero and it's no longer in a zero range, this
328 // means we consume all the ranges before, thus mark BeginAddress as
329 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
333 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
334 // have the [B+1, C-1] zero range.
335 if (Count
== 0 && ZeroRangeDepth
== 0)
336 BeginAddress
= UINT64_MAX
;
341 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
342 FunctionSamples
&FunctionProfile
, const SampleContextFrame
&LeafLoc
,
344 // Use the maximum count of samples with same line location
345 uint32_t Discriminator
= getBaseDiscriminator(LeafLoc
.Location
.Discriminator
);
347 // Use duplication factor to compensated for loop unroll/vectorization.
348 // Note that this is only needed when we're taking MAX of the counts at
349 // the location instead of SUM.
350 Count
*= getDuplicationFactor(LeafLoc
.Location
.Discriminator
);
352 ErrorOr
<uint64_t> R
=
353 FunctionProfile
.findSamplesAt(LeafLoc
.Location
.LineOffset
, Discriminator
);
355 uint64_t PreviousCount
= R
? R
.get() : 0;
356 if (PreviousCount
<= Count
) {
357 FunctionProfile
.addBodySamples(LeafLoc
.Location
.LineOffset
, Discriminator
,
358 Count
- PreviousCount
);
362 void ProfileGeneratorBase::updateTotalSamples() {
363 if (!UpdateTotalSamples
)
366 for (auto &Item
: ProfileMap
) {
367 FunctionSamples
&FunctionProfile
= Item
.second
;
368 FunctionProfile
.updateTotalSamples();
373 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName
) {
374 SampleContext
Context(FuncName
);
375 auto Ret
= ProfileMap
.emplace(Context
, FunctionSamples());
377 FunctionSamples
&FProfile
= Ret
.first
->second
;
378 FProfile
.setContext(Context
);
380 return Ret
.first
->second
;
383 void ProfileGenerator::generateProfile() {
384 if (Binary
->usePseudoProbes()) {
385 // TODO: Support probe based profile generation
386 exitWithError("Probe based profile generation not supported for AutoFDO, "
387 "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
389 generateLineNumBasedProfile();
391 postProcessProfiles();
394 void ProfileGenerator::postProcessProfiles() {
395 computeSummaryAndThreshold();
396 trimColdProfiles(ProfileMap
, ColdCountThreshold
);
397 calculateAndShowDensity(ProfileMap
);
400 void ProfileGenerator::trimColdProfiles(const SampleProfileMap
&Profiles
,
401 uint64_t ColdCntThreshold
) {
402 if (!TrimColdProfile
)
405 // Move cold profiles into a tmp container.
406 std::vector
<SampleContext
> ColdProfiles
;
407 for (const auto &I
: ProfileMap
) {
408 if (I
.second
.getTotalSamples() < ColdCntThreshold
)
409 ColdProfiles
.emplace_back(I
.first
);
412 // Remove the cold profile from ProfileMap.
413 for (const auto &I
: ColdProfiles
)
417 void ProfileGenerator::generateLineNumBasedProfile() {
418 assert(SampleCounters
.size() == 1 &&
419 "Must have one entry for profile generation.");
420 const SampleCounter
&SC
= SampleCounters
.begin()->second
;
421 // Fill in function body samples
422 populateBodySamplesForAllFunctions(SC
.RangeCounter
);
423 // Fill in boundary sample counts as well as call site samples for calls
424 populateBoundarySamplesForAllFunctions(SC
.BranchCounter
);
426 updateTotalSamples();
429 FunctionSamples
&ProfileGenerator::getLeafProfileAndAddTotalSamples(
430 const SampleContextFrameVector
&FrameVec
, uint64_t Count
) {
431 // Get top level profile
432 FunctionSamples
*FunctionProfile
=
433 &getTopLevelFunctionProfile(FrameVec
[0].FuncName
);
434 FunctionProfile
->addTotalSamples(Count
);
436 for (size_t I
= 1; I
< FrameVec
.size(); I
++) {
437 LineLocation
Callsite(
438 FrameVec
[I
- 1].Location
.LineOffset
,
439 getBaseDiscriminator(FrameVec
[I
- 1].Location
.Discriminator
));
440 FunctionSamplesMap
&SamplesMap
=
441 FunctionProfile
->functionSamplesAt(Callsite
);
443 SamplesMap
.emplace(FrameVec
[I
].FuncName
.str(), FunctionSamples());
445 SampleContext
Context(FrameVec
[I
].FuncName
);
446 Ret
.first
->second
.setContext(Context
);
448 FunctionProfile
= &Ret
.first
->second
;
449 FunctionProfile
->addTotalSamples(Count
);
452 return *FunctionProfile
;
456 ProfileGenerator::preprocessRangeCounter(const RangeSample
&RangeCounter
) {
457 RangeSample
Ranges(RangeCounter
.begin(), RangeCounter
.end());
458 if (FillZeroForAllFuncs
) {
459 for (auto &FuncI
: Binary
->getAllBinaryFunctions()) {
460 for (auto &R
: FuncI
.second
.Ranges
) {
461 Ranges
[{R
.first
, R
.second
- 1}] += 0;
465 // For each range, we search for all ranges of the function it belongs to
466 // and initialize it with zero count, so it remains zero if doesn't hit any
467 // samples. This is to be consistent with compiler that interpret zero count
468 // as unexecuted(cold).
469 for (const auto &I
: RangeCounter
) {
470 uint64_t StartOffset
= I
.first
.first
;
471 for (const auto &Range
: Binary
->getRangesForOffset(StartOffset
))
472 Ranges
[{Range
.first
, Range
.second
- 1}] += 0;
475 RangeSample DisjointRanges
;
476 findDisjointRanges(DisjointRanges
, Ranges
);
477 return DisjointRanges
;
480 void ProfileGenerator::populateBodySamplesForAllFunctions(
481 const RangeSample
&RangeCounter
) {
482 for (const auto &Range
: preprocessRangeCounter(RangeCounter
)) {
483 uint64_t RangeBegin
= Binary
->offsetToVirtualAddr(Range
.first
.first
);
484 uint64_t RangeEnd
= Binary
->offsetToVirtualAddr(Range
.first
.second
);
485 uint64_t Count
= Range
.second
;
487 InstructionPointer
IP(Binary
, RangeBegin
, true);
488 // Disjoint ranges may have range in the middle of two instr,
489 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
490 // can be Addr1+1 to Addr2-1. We should ignore such range.
491 if (IP
.Address
> RangeEnd
)
495 uint64_t Offset
= Binary
->virtualAddrToOffset(IP
.Address
);
496 const SampleContextFrameVector
&FrameVec
=
497 Binary
->getFrameLocationStack(Offset
);
498 if (!FrameVec
.empty()) {
499 // FIXME: As accumulating total count per instruction caused some
500 // regression, we changed to accumulate total count per byte as a
501 // workaround. Tuning hotness threshold on the compiler side might be
502 // necessary in the future.
503 FunctionSamples
&FunctionProfile
= getLeafProfileAndAddTotalSamples(
504 FrameVec
, Count
* Binary
->getInstSize(Offset
));
505 updateBodySamplesforFunctionProfile(FunctionProfile
, FrameVec
.back(),
508 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
512 StringRef
ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset
) {
513 // Get the function range by branch target if it's a call branch.
514 auto *FRange
= Binary
->findFuncRangeForStartOffset(TargetOffset
);
516 // We won't accumulate sample count for a range whose start is not the real
517 // function entry such as outlined function or inner labels.
518 if (!FRange
|| !FRange
->IsFuncEntry
)
521 return FunctionSamples::getCanonicalFnName(FRange
->getFuncName());
524 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
525 const BranchSample
&BranchCounters
) {
526 for (const auto &Entry
: BranchCounters
) {
527 uint64_t SourceOffset
= Entry
.first
.first
;
528 uint64_t TargetOffset
= Entry
.first
.second
;
529 uint64_t Count
= Entry
.second
;
530 assert(Count
!= 0 && "Unexpected zero weight branch");
532 StringRef CalleeName
= getCalleeNameForOffset(TargetOffset
);
533 if (CalleeName
.size() == 0)
535 // Record called target sample and its count.
536 const SampleContextFrameVector
&FrameVec
=
537 Binary
->getFrameLocationStack(SourceOffset
);
538 if (!FrameVec
.empty()) {
539 FunctionSamples
&FunctionProfile
=
540 getLeafProfileAndAddTotalSamples(FrameVec
, 0);
541 FunctionProfile
.addCalledTargetSamples(
542 FrameVec
.back().Location
.LineOffset
,
543 getBaseDiscriminator(FrameVec
.back().Location
.Discriminator
),
546 // Add head samples for callee.
547 FunctionSamples
&CalleeProfile
= getTopLevelFunctionProfile(CalleeName
);
548 CalleeProfile
.addHeadSamples(Count
);
552 void ProfileGeneratorBase::calculateAndShowDensity(
553 const SampleProfileMap
&Profiles
) {
554 double Density
= calculateDensity(Profiles
, HotCountThreshold
);
555 showDensitySuggestion(Density
);
558 FunctionSamples
&CSProfileGenerator::getFunctionProfileForContext(
559 const SampleContextFrameVector
&Context
, bool WasLeafInlined
) {
560 auto I
= ProfileMap
.find(SampleContext(Context
));
561 if (I
== ProfileMap
.end()) {
562 // Save the new context for future references.
563 SampleContextFrames NewContext
= *Contexts
.insert(Context
).first
;
564 SampleContext
FContext(NewContext
, RawContext
);
565 auto Ret
= ProfileMap
.emplace(FContext
, FunctionSamples());
567 FContext
.setAttribute(ContextWasInlined
);
568 FunctionSamples
&FProfile
= Ret
.first
->second
;
569 FProfile
.setContext(FContext
);
570 return Ret
.first
->second
;
575 void CSProfileGenerator::generateProfile() {
576 FunctionSamples::ProfileIsCSFlat
= true;
578 if (Binary
->getTrackFuncContextSize())
579 computeSizeForProfiledFunctions();
581 if (Binary
->usePseudoProbes()) {
582 // Enable pseudo probe functionalities in SampleProf
583 FunctionSamples::ProfileIsProbeBased
= true;
584 generateProbeBasedProfile();
586 generateLineNumBasedProfile();
588 postProcessProfiles();
591 void CSProfileGenerator::computeSizeForProfiledFunctions() {
592 // Hash map to deduplicate the function range and the item is a pair of
593 // function start and end offset.
594 std::unordered_map
<uint64_t, uint64_t> AggregatedRanges
;
595 // Go through all the ranges in the CS counters, use the start of the range to
596 // look up the function it belongs and record the function range.
597 for (const auto &CI
: SampleCounters
) {
598 for (const auto &Item
: CI
.second
.RangeCounter
) {
599 // FIXME: Filter the bogus crossing function range.
600 uint64_t StartOffset
= Item
.first
.first
;
601 // Note that a function can be spilt into multiple ranges, so get all
602 // ranges of the function.
603 for (const auto &Range
: Binary
->getRangesForOffset(StartOffset
))
604 AggregatedRanges
[Range
.first
] = Range
.second
;
608 for (const auto &I
: AggregatedRanges
) {
609 uint64_t StartOffset
= I
.first
;
610 uint64_t EndOffset
= I
.second
;
611 Binary
->computeInlinedContextSizeForRange(StartOffset
, EndOffset
);
615 void CSProfileGenerator::generateLineNumBasedProfile() {
616 for (const auto &CI
: SampleCounters
) {
617 const auto *CtxKey
= cast
<StringBasedCtxKey
>(CI
.first
.getPtr());
619 // Get or create function profile for the range
620 FunctionSamples
&FunctionProfile
=
621 getFunctionProfileForContext(CtxKey
->Context
, CtxKey
->WasLeafInlined
);
623 // Fill in function body samples
624 populateBodySamplesForFunction(FunctionProfile
, CI
.second
.RangeCounter
);
625 // Fill in boundary sample counts as well as call site samples for calls
626 populateBoundarySamplesForFunction(CtxKey
->Context
, FunctionProfile
,
627 CI
.second
.BranchCounter
);
629 // Fill in call site value sample for inlined calls and also use context to
630 // infer missing samples. Since we don't have call count for inlined
631 // functions, we estimate it from inlinee's profile using the entry of the
633 populateInferredFunctionSamples();
635 updateTotalSamples();
638 void CSProfileGenerator::populateBodySamplesForFunction(
639 FunctionSamples
&FunctionProfile
, const RangeSample
&RangeCounter
) {
640 // Compute disjoint ranges first, so we can use MAX
641 // for calculating count for each location.
643 findDisjointRanges(Ranges
, RangeCounter
);
644 for (const auto &Range
: Ranges
) {
645 uint64_t RangeBegin
= Binary
->offsetToVirtualAddr(Range
.first
.first
);
646 uint64_t RangeEnd
= Binary
->offsetToVirtualAddr(Range
.first
.second
);
647 uint64_t Count
= Range
.second
;
648 // Disjoint ranges have introduce zero-filled gap that
649 // doesn't belong to current context, filter them out.
653 InstructionPointer
IP(Binary
, RangeBegin
, true);
654 // Disjoint ranges may have range in the middle of two instr,
655 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
656 // can be Addr1+1 to Addr2-1. We should ignore such range.
657 if (IP
.Address
> RangeEnd
)
661 uint64_t Offset
= Binary
->virtualAddrToOffset(IP
.Address
);
662 auto LeafLoc
= Binary
->getInlineLeafFrameLoc(Offset
);
663 if (LeafLoc
.hasValue()) {
664 // Recording body sample for this specific context
665 updateBodySamplesforFunctionProfile(FunctionProfile
, *LeafLoc
, Count
);
666 FunctionProfile
.addTotalSamples(Count
);
668 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
672 void CSProfileGenerator::populateBoundarySamplesForFunction(
673 SampleContextFrames ContextId
, FunctionSamples
&FunctionProfile
,
674 const BranchSample
&BranchCounters
) {
676 for (const auto &Entry
: BranchCounters
) {
677 uint64_t SourceOffset
= Entry
.first
.first
;
678 uint64_t TargetOffset
= Entry
.first
.second
;
679 uint64_t Count
= Entry
.second
;
680 assert(Count
!= 0 && "Unexpected zero weight branch");
682 StringRef CalleeName
= getCalleeNameForOffset(TargetOffset
);
683 if (CalleeName
.size() == 0)
686 // Record called target sample and its count
687 auto LeafLoc
= Binary
->getInlineLeafFrameLoc(SourceOffset
);
688 if (!LeafLoc
.hasValue())
690 FunctionProfile
.addCalledTargetSamples(
691 LeafLoc
->Location
.LineOffset
,
692 getBaseDiscriminator(LeafLoc
->Location
.Discriminator
), CalleeName
,
695 // Record head sample for called target(callee)
696 SampleContextFrameVector
CalleeCtx(ContextId
.begin(), ContextId
.end());
697 assert(CalleeCtx
.back().FuncName
== LeafLoc
->FuncName
&&
698 "Leaf function name doesn't match");
699 CalleeCtx
.back() = *LeafLoc
;
700 CalleeCtx
.emplace_back(CalleeName
, LineLocation(0, 0));
701 FunctionSamples
&CalleeProfile
= getFunctionProfileForContext(CalleeCtx
);
702 CalleeProfile
.addHeadSamples(Count
);
706 static SampleContextFrame
707 getCallerContext(SampleContextFrames CalleeContext
,
708 SampleContextFrameVector
&CallerContext
) {
709 assert(CalleeContext
.size() > 1 && "Unexpected empty context");
710 CalleeContext
= CalleeContext
.drop_back();
711 CallerContext
.assign(CalleeContext
.begin(), CalleeContext
.end());
712 SampleContextFrame CallerFrame
= CallerContext
.back();
713 CallerContext
.back().Location
= LineLocation(0, 0);
717 void CSProfileGenerator::populateInferredFunctionSamples() {
718 for (const auto &Item
: ProfileMap
) {
719 const auto &CalleeContext
= Item
.first
;
720 const FunctionSamples
&CalleeProfile
= Item
.second
;
722 // If we already have head sample counts, we must have value profile
723 // for call sites added already. Skip to avoid double counting.
724 if (CalleeProfile
.getHeadSamples())
726 // If we don't have context, nothing to do for caller's call site.
727 // This could happen for entry point function.
728 if (CalleeContext
.isBaseContext())
731 // Infer Caller's frame loc and context ID through string splitting
732 SampleContextFrameVector CallerContextId
;
733 SampleContextFrame
&&CallerLeafFrameLoc
=
734 getCallerContext(CalleeContext
.getContextFrames(), CallerContextId
);
735 SampleContextFrames
CallerContext(CallerContextId
);
737 // It's possible that we haven't seen any sample directly in the caller,
738 // in which case CallerProfile will not exist. But we can't modify
739 // ProfileMap while iterating it.
740 // TODO: created function profile for those callers too
741 if (ProfileMap
.find(CallerContext
) == ProfileMap
.end())
743 FunctionSamples
&CallerProfile
= ProfileMap
[CallerContext
];
745 // Since we don't have call count for inlined functions, we
746 // estimate it from inlinee's profile using entry body sample.
747 uint64_t EstimatedCallCount
= CalleeProfile
.getEntrySamples();
748 // If we don't have samples with location, use 1 to indicate live.
749 if (!EstimatedCallCount
&& !CalleeProfile
.getBodySamples().size())
750 EstimatedCallCount
= 1;
751 CallerProfile
.addCalledTargetSamples(
752 CallerLeafFrameLoc
.Location
.LineOffset
,
753 CallerLeafFrameLoc
.Location
.Discriminator
,
754 CalleeProfile
.getContext().getName(), EstimatedCallCount
);
755 CallerProfile
.addBodySamples(CallerLeafFrameLoc
.Location
.LineOffset
,
756 CallerLeafFrameLoc
.Location
.Discriminator
,
758 CallerProfile
.addTotalSamples(EstimatedCallCount
);
762 void CSProfileGenerator::postProcessProfiles() {
763 // Compute hot/cold threshold based on profile. This will be used for cold
764 // context profile merging/trimming.
765 computeSummaryAndThreshold();
767 // Run global pre-inliner to adjust/merge context profile based on estimated
769 if (EnableCSPreInliner
) {
770 CSPreInliner(ProfileMap
, *Binary
, HotCountThreshold
, ColdCountThreshold
)
772 // Turn off the profile merger by default unless it is explicitly enabled.
773 if (!CSProfMergeColdContext
.getNumOccurrences())
774 CSProfMergeColdContext
= false;
777 // Trim and merge cold context profile using cold threshold above.
778 if (TrimColdProfile
|| CSProfMergeColdContext
) {
779 SampleContextTrimmer(ProfileMap
)
780 .trimAndMergeColdContextProfiles(
781 HotCountThreshold
, TrimColdProfile
, CSProfMergeColdContext
,
782 CSProfMaxColdContextDepth
, EnableCSPreInliner
);
785 // Merge function samples of CS profile to calculate profile density.
786 sampleprof::SampleProfileMap ContextLessProfiles
;
787 for (const auto &I
: ProfileMap
) {
788 ContextLessProfiles
[I
.second
.getName()].merge(I
.second
);
791 calculateAndShowDensity(ContextLessProfiles
);
792 if (GenCSNestedProfile
) {
793 CSProfileConverter
CSConverter(ProfileMap
);
794 CSConverter
.convertProfiles();
795 FunctionSamples::ProfileIsCSFlat
= false;
796 FunctionSamples::ProfileIsCSNested
= EnableCSPreInliner
;
800 void ProfileGeneratorBase::computeSummaryAndThreshold() {
801 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
802 auto Summary
= Builder
.computeSummaryForProfiles(ProfileMap
);
803 HotCountThreshold
= ProfileSummaryBuilder::getHotCountThreshold(
804 (Summary
->getDetailedSummary()));
805 ColdCountThreshold
= ProfileSummaryBuilder::getColdCountThreshold(
806 (Summary
->getDetailedSummary()));
809 // Helper function to extract context prefix string stack
810 // Extract context stack for reusing, leaf context stack will
811 // be added compressed while looking up function profile
812 static void extractPrefixContextStack(
813 SampleContextFrameVector
&ContextStack
,
814 const SmallVectorImpl
<const MCDecodedPseudoProbe
*> &Probes
,
815 ProfiledBinary
*Binary
) {
816 for (const auto *P
: Probes
) {
817 Binary
->getInlineContextForProbe(P
, ContextStack
, true);
821 void CSProfileGenerator::generateProbeBasedProfile() {
822 for (const auto &CI
: SampleCounters
) {
823 const ProbeBasedCtxKey
*CtxKey
=
824 dyn_cast
<ProbeBasedCtxKey
>(CI
.first
.getPtr());
825 SampleContextFrameVector ContextStack
;
826 extractPrefixContextStack(ContextStack
, CtxKey
->Probes
, Binary
);
827 // Fill in function body samples from probes, also infer caller's samples
828 // from callee's probe
829 populateBodySamplesWithProbes(CI
.second
.RangeCounter
, ContextStack
);
830 // Fill in boundary samples for a call probe
831 populateBoundarySamplesWithProbes(CI
.second
.BranchCounter
, ContextStack
);
835 void CSProfileGenerator::extractProbesFromRange(const RangeSample
&RangeCounter
,
836 ProbeCounterMap
&ProbeCounter
) {
838 findDisjointRanges(Ranges
, RangeCounter
);
839 for (const auto &Range
: Ranges
) {
840 uint64_t RangeBegin
= Binary
->offsetToVirtualAddr(Range
.first
.first
);
841 uint64_t RangeEnd
= Binary
->offsetToVirtualAddr(Range
.first
.second
);
842 uint64_t Count
= Range
.second
;
843 // Disjoint ranges have introduce zero-filled gap that
844 // doesn't belong to current context, filter them out.
848 InstructionPointer
IP(Binary
, RangeBegin
, true);
849 // Disjoint ranges may have range in the middle of two instr,
850 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
851 // can be Addr1+1 to Addr2-1. We should ignore such range.
852 if (IP
.Address
> RangeEnd
)
856 const AddressProbesMap
&Address2ProbesMap
=
857 Binary
->getAddress2ProbesMap();
858 auto It
= Address2ProbesMap
.find(IP
.Address
);
859 if (It
!= Address2ProbesMap
.end()) {
860 for (const auto &Probe
: It
->second
) {
861 if (!Probe
.isBlock())
863 ProbeCounter
[&Probe
] += Count
;
866 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
870 void CSProfileGenerator::populateBodySamplesWithProbes(
871 const RangeSample
&RangeCounter
, SampleContextFrames ContextStack
) {
872 ProbeCounterMap ProbeCounter
;
873 // Extract the top frame probes by looking up each address among the range in
874 // the Address2ProbeMap
875 extractProbesFromRange(RangeCounter
, ProbeCounter
);
876 std::unordered_map
<MCDecodedPseudoProbeInlineTree
*,
877 std::unordered_set
<FunctionSamples
*>>
879 for (const auto &PI
: ProbeCounter
) {
880 const MCDecodedPseudoProbe
*Probe
= PI
.first
;
881 uint64_t Count
= PI
.second
;
882 FunctionSamples
&FunctionProfile
=
883 getFunctionProfileForLeafProbe(ContextStack
, Probe
);
884 // Record the current frame and FunctionProfile whenever samples are
885 // collected for non-danglie probes. This is for reporting all of the
886 // zero count probes of the frame later.
887 FrameSamples
[Probe
->getInlineTreeNode()].insert(&FunctionProfile
);
888 FunctionProfile
.addBodySamplesForProbe(Probe
->getIndex(), Count
);
889 FunctionProfile
.addTotalSamples(Count
);
890 if (Probe
->isEntry()) {
891 FunctionProfile
.addHeadSamples(Count
);
892 // Look up for the caller's function profile
893 const auto *InlinerDesc
= Binary
->getInlinerDescForProbe(Probe
);
894 SampleContextFrames CalleeContextId
=
895 FunctionProfile
.getContext().getContextFrames();
896 if (InlinerDesc
!= nullptr && CalleeContextId
.size() > 1) {
897 // Since the context id will be compressed, we have to use callee's
898 // context id to infer caller's context id to ensure they share the
899 // same context prefix.
900 SampleContextFrameVector CallerContextId
;
901 SampleContextFrame
&&CallerLeafFrameLoc
=
902 getCallerContext(CalleeContextId
, CallerContextId
);
903 uint64_t CallerIndex
= CallerLeafFrameLoc
.Location
.LineOffset
;
904 assert(CallerIndex
&&
905 "Inferred caller's location index shouldn't be zero!");
906 FunctionSamples
&CallerProfile
=
907 getFunctionProfileForContext(CallerContextId
);
908 CallerProfile
.setFunctionHash(InlinerDesc
->FuncHash
);
909 CallerProfile
.addBodySamples(CallerIndex
, 0, Count
);
910 CallerProfile
.addTotalSamples(Count
);
911 CallerProfile
.addCalledTargetSamples(
912 CallerIndex
, 0, FunctionProfile
.getContext().getName(), Count
);
917 // Assign zero count for remaining probes without sample hits to
918 // differentiate from probes optimized away, of which the counts are unknown
919 // and will be inferred by the compiler.
920 for (auto &I
: FrameSamples
) {
921 for (auto *FunctionProfile
: I
.second
) {
922 for (auto *Probe
: I
.first
->getProbes()) {
923 FunctionProfile
->addBodySamplesForProbe(Probe
->getIndex(), 0);
929 void CSProfileGenerator::populateBoundarySamplesWithProbes(
930 const BranchSample
&BranchCounter
, SampleContextFrames ContextStack
) {
931 for (const auto &BI
: BranchCounter
) {
932 uint64_t SourceOffset
= BI
.first
.first
;
933 uint64_t TargetOffset
= BI
.first
.second
;
934 uint64_t Count
= BI
.second
;
935 uint64_t SourceAddress
= Binary
->offsetToVirtualAddr(SourceOffset
);
936 const MCDecodedPseudoProbe
*CallProbe
=
937 Binary
->getCallProbeForAddr(SourceAddress
);
938 if (CallProbe
== nullptr)
940 FunctionSamples
&FunctionProfile
=
941 getFunctionProfileForLeafProbe(ContextStack
, CallProbe
);
942 FunctionProfile
.addBodySamples(CallProbe
->getIndex(), 0, Count
);
943 FunctionProfile
.addTotalSamples(Count
);
944 StringRef CalleeName
= getCalleeNameForOffset(TargetOffset
);
945 if (CalleeName
.size() == 0)
947 FunctionProfile
.addCalledTargetSamples(CallProbe
->getIndex(), 0, CalleeName
,
952 FunctionSamples
&CSProfileGenerator::getFunctionProfileForLeafProbe(
953 SampleContextFrames ContextStack
, const MCDecodedPseudoProbe
*LeafProbe
) {
955 // Explicitly copy the context for appending the leaf context
956 SampleContextFrameVector
NewContextStack(ContextStack
.begin(),
958 Binary
->getInlineContextForProbe(LeafProbe
, NewContextStack
, true);
959 // For leaf inlined context with the top frame, we should strip off the top
960 // frame's probe id, like:
961 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
962 auto LeafFrame
= NewContextStack
.back();
963 LeafFrame
.Location
= LineLocation(0, 0);
964 NewContextStack
.pop_back();
965 // Compress the context string except for the leaf frame
966 CSProfileGenerator::compressRecursionContext(NewContextStack
);
967 CSProfileGenerator::trimContext(NewContextStack
);
968 NewContextStack
.push_back(LeafFrame
);
970 const auto *FuncDesc
= Binary
->getFuncDescForGUID(LeafProbe
->getGuid());
971 bool WasLeafInlined
= LeafProbe
->getInlineTreeNode()->hasInlineSite();
972 FunctionSamples
&FunctionProile
=
973 getFunctionProfileForContext(NewContextStack
, WasLeafInlined
);
974 FunctionProile
.setFunctionHash(FuncDesc
->FuncHash
);
975 return FunctionProile
;
978 } // end namespace sampleprof
979 } // end namespace llvm