1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "MissingFrameInferrer.h"
11 #include "PerfReader.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/ProfileData/ProfileCommon.h"
17 #include <unordered_set>
20 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
22 cl::desc("Output profile file"));
23 static cl::alias
OutputA("o", cl::desc("Alias for --output"),
24 cl::aliasopt(OutputFilename
));
26 static cl::opt
<SampleProfileFormat
> OutputFormat(
27 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary
),
29 clEnumValN(SPF_Binary
, "binary", "Binary encoding (default)"),
30 clEnumValN(SPF_Ext_Binary
, "extbinary", "Extensible binary encoding"),
31 clEnumValN(SPF_Text
, "text", "Text encoding"),
32 clEnumValN(SPF_GCC
, "gcc",
33 "GCC encoding (only meaningful for -sample)")));
35 static cl::opt
<bool> UseMD5(
36 "use-md5", cl::Hidden
,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
40 static cl::opt
<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(false), cl::Hidden
,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
44 static cl::opt
<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden
,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
49 static cl::opt
<int32_t, true> RecursionCompression(
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize
));
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
61 static cl::opt
<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
67 static cl::opt
<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
72 static cl::opt
<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth
));
78 static cl::opt
<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(1000),
81 "specify density threshold for hot functions (default: 1000)"),
83 static cl::opt
<bool> ShowDensity("show-density", llvm::cl::init(false),
84 llvm::cl::desc("show profile density details"),
87 static cl::opt
<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(false),
90 "Update total samples by accumulating all its body samples."),
93 static cl::opt
<bool> GenCSNestedProfile(
94 "gen-cs-nested-profile", cl::Hidden
, cl::init(true),
95 cl::desc("Generate nested function profiles for CSSPGO"));
97 cl::opt
<bool> InferMissingFrames(
98 "infer-missing-frames", llvm::cl::init(true),
100 "Infer missing call frames due to compiler tail call elimination."),
103 using namespace llvm
;
104 using namespace sampleprof
;
107 extern cl::opt
<int> ProfileSummaryCutoffHot
;
108 extern cl::opt
<bool> UseContextLessSummary
;
110 namespace sampleprof
{
112 // Initialize the MaxCompressionSize to -1 which means no size limit
113 int32_t CSProfileGenerator::MaxCompressionSize
= -1;
115 int CSProfileGenerator::MaxContextDepth
= -1;
117 bool ProfileGeneratorBase::UseFSDiscriminator
= false;
119 std::unique_ptr
<ProfileGeneratorBase
>
120 ProfileGeneratorBase::create(ProfiledBinary
*Binary
,
121 const ContextSampleCounterMap
*SampleCounters
,
123 std::unique_ptr
<ProfileGeneratorBase
> Generator
;
125 Generator
.reset(new CSProfileGenerator(Binary
, SampleCounters
));
127 Generator
.reset(new ProfileGenerator(Binary
, SampleCounters
));
129 ProfileGeneratorBase::UseFSDiscriminator
= Binary
->useFSDiscriminator();
130 FunctionSamples::ProfileIsFS
= Binary
->useFSDiscriminator();
135 std::unique_ptr
<ProfileGeneratorBase
>
136 ProfileGeneratorBase::create(ProfiledBinary
*Binary
, SampleProfileMap
&Profiles
,
138 std::unique_ptr
<ProfileGeneratorBase
> Generator
;
140 Generator
.reset(new CSProfileGenerator(Binary
, Profiles
));
142 Generator
.reset(new ProfileGenerator(Binary
, std::move(Profiles
)));
144 ProfileGeneratorBase::UseFSDiscriminator
= Binary
->useFSDiscriminator();
145 FunctionSamples::ProfileIsFS
= Binary
->useFSDiscriminator();
150 void ProfileGeneratorBase::write(std::unique_ptr
<SampleProfileWriter
> Writer
,
151 SampleProfileMap
&ProfileMap
) {
152 // Populate profile symbol list if extended binary format is used.
153 ProfileSymbolList SymbolList
;
155 if (PopulateProfileSymbolList
&& OutputFormat
== SPF_Ext_Binary
) {
156 Binary
->populateSymbolListFromDWARF(SymbolList
);
157 Writer
->setProfileSymbolList(&SymbolList
);
160 if (std::error_code EC
= Writer
->write(ProfileMap
))
161 exitWithError(std::move(EC
));
164 void ProfileGeneratorBase::write() {
165 auto WriterOrErr
= SampleProfileWriter::create(OutputFilename
, OutputFormat
);
166 if (std::error_code EC
= WriterOrErr
.getError())
167 exitWithError(EC
, OutputFilename
);
170 if (OutputFormat
!= SPF_Ext_Binary
)
171 WithColor::warning() << "-use-md5 is ignored. Specify "
172 "--format=extbinary to enable it\n";
174 WriterOrErr
.get()->setUseMD5();
177 write(std::move(WriterOrErr
.get()), ProfileMap
);
180 void ProfileGeneratorBase::showDensitySuggestion(double Density
) {
182 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183 "set too low. Please check your command.\n";
184 else if (Density
< HotFunctionDensityThreshold
)
186 << "Sample PGO is estimated to optimize better with "
187 << format("%.1f", HotFunctionDensityThreshold
/ Density
)
188 << "x more samples. Please consider increasing sampling rate or "
189 "profiling for longer duration to get more samples.\n";
192 outs() << "Minimum profile density for hot functions with top "
194 static_cast<double>(ProfileSummaryCutoffHot
.getValue()) /
196 << "% total samples: " << format("%.1f", Density
) << "\n";
199 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap
&Profiles
,
200 uint64_t HotCntThreshold
) {
201 double Density
= DBL_MAX
;
202 std::vector
<const FunctionSamples
*> HotFuncs
;
203 for (auto &I
: Profiles
) {
204 auto &FuncSamples
= I
.second
;
205 if (FuncSamples
.getTotalSamples() < HotCntThreshold
)
207 HotFuncs
.emplace_back(&FuncSamples
);
210 for (auto *FuncSamples
: HotFuncs
) {
211 auto *Func
= Binary
->getBinaryFunction(FuncSamples
->getName());
214 uint64_t FuncSize
= Func
->getFuncSize();
218 std::min(Density
, static_cast<double>(FuncSamples
->getTotalSamples()) /
222 return Density
== DBL_MAX
? 0.0 : Density
;
225 void ProfileGeneratorBase::findDisjointRanges(RangeSample
&DisjointRanges
,
226 const RangeSample
&Ranges
) {
229 Regions may overlap with each other. Using the boundary info, find all
230 disjoint ranges and their sample count. BoundaryPoint contains the count
231 multiple samples begin/end at this points.
234 |<------200------>| Sample2
237 In the example above,
238 Sample1 begins at A, ends at B, its value is 100.
239 Sample2 beings at A, ends at C, its value is 200.
240 For A, BeginCount is the sum of sample begins at A, which is 300 and no
241 samples ends at A, so EndCount is 0.
242 Then boundary points A, B, and C with begin/end counts are:
247 struct BoundaryPoint
{
248 // Sum of sample counts beginning at this point
249 uint64_t BeginCount
= UINT64_MAX
;
250 // Sum of sample counts ending at this point
251 uint64_t EndCount
= UINT64_MAX
;
252 // Is the begin point of a zero range.
253 bool IsZeroRangeBegin
= false;
254 // Is the end point of a zero range.
255 bool IsZeroRangeEnd
= false;
257 void addBeginCount(uint64_t Count
) {
258 if (BeginCount
== UINT64_MAX
)
263 void addEndCount(uint64_t Count
) {
264 if (EndCount
== UINT64_MAX
)
271 For the above example. With boundary points, follwing logic finds two
277 If there is a boundary point that both begin and end, the point itself
278 becomes a separate disjoint region. For example, if we have original
285 there are three boundary points with their begin/end counts of
291 the disjoint ranges would be
297 Example for zero value range:
301 |<--------------- 0 ----------------->|
310 std::map
<uint64_t, BoundaryPoint
> Boundaries
;
312 for (const auto &Item
: Ranges
) {
313 assert(Item
.first
.first
<= Item
.first
.second
&&
314 "Invalid instruction range");
315 auto &BeginPoint
= Boundaries
[Item
.first
.first
];
316 auto &EndPoint
= Boundaries
[Item
.first
.second
];
317 uint64_t Count
= Item
.second
;
319 BeginPoint
.addBeginCount(Count
);
320 EndPoint
.addEndCount(Count
);
322 BeginPoint
.IsZeroRangeBegin
= true;
323 EndPoint
.IsZeroRangeEnd
= true;
327 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
328 // and the next valid address
329 uint64_t BeginAddress
= UINT64_MAX
;
330 int ZeroRangeDepth
= 0;
332 for (const auto &Item
: Boundaries
) {
333 uint64_t Address
= Item
.first
;
334 const BoundaryPoint
&Point
= Item
.second
;
335 if (Point
.BeginCount
!= UINT64_MAX
) {
336 if (BeginAddress
!= UINT64_MAX
)
337 DisjointRanges
[{BeginAddress
, Address
- 1}] = Count
;
338 Count
+= Point
.BeginCount
;
339 BeginAddress
= Address
;
340 ZeroRangeDepth
+= Point
.IsZeroRangeBegin
;
342 if (Point
.EndCount
!= UINT64_MAX
) {
343 assert((BeginAddress
!= UINT64_MAX
) &&
344 "First boundary point cannot be 'end' point");
345 DisjointRanges
[{BeginAddress
, Address
}] = Count
;
346 assert(Count
>= Point
.EndCount
&& "Mismatched live ranges");
347 Count
-= Point
.EndCount
;
348 BeginAddress
= Address
+ 1;
349 ZeroRangeDepth
-= Point
.IsZeroRangeEnd
;
350 // If the remaining count is zero and it's no longer in a zero range, this
351 // means we consume all the ranges before, thus mark BeginAddress as
352 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
356 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
357 // have the [B+1, C-1] zero range.
358 if (Count
== 0 && ZeroRangeDepth
== 0)
359 BeginAddress
= UINT64_MAX
;
364 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
365 FunctionSamples
&FunctionProfile
, const SampleContextFrame
&LeafLoc
,
367 // Use the maximum count of samples with same line location
368 uint32_t Discriminator
= getBaseDiscriminator(LeafLoc
.Location
.Discriminator
);
370 // Use duplication factor to compensated for loop unroll/vectorization.
371 // Note that this is only needed when we're taking MAX of the counts at
372 // the location instead of SUM.
373 Count
*= getDuplicationFactor(LeafLoc
.Location
.Discriminator
);
375 ErrorOr
<uint64_t> R
=
376 FunctionProfile
.findSamplesAt(LeafLoc
.Location
.LineOffset
, Discriminator
);
378 uint64_t PreviousCount
= R
? R
.get() : 0;
379 if (PreviousCount
<= Count
) {
380 FunctionProfile
.addBodySamples(LeafLoc
.Location
.LineOffset
, Discriminator
,
381 Count
- PreviousCount
);
385 void ProfileGeneratorBase::updateTotalSamples() {
386 for (auto &Item
: ProfileMap
) {
387 FunctionSamples
&FunctionProfile
= Item
.second
;
388 FunctionProfile
.updateTotalSamples();
392 void ProfileGeneratorBase::updateCallsiteSamples() {
393 for (auto &Item
: ProfileMap
) {
394 FunctionSamples
&FunctionProfile
= Item
.second
;
395 FunctionProfile
.updateCallsiteSamples();
399 void ProfileGeneratorBase::updateFunctionSamples() {
400 updateCallsiteSamples();
402 if (UpdateTotalSamples
)
403 updateTotalSamples();
406 void ProfileGeneratorBase::collectProfiledFunctions() {
407 std::unordered_set
<const BinaryFunction
*> ProfiledFunctions
;
408 if (collectFunctionsFromRawProfile(ProfiledFunctions
))
409 Binary
->setProfiledFunctions(ProfiledFunctions
);
410 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions
))
411 Binary
->setProfiledFunctions(ProfiledFunctions
);
413 llvm_unreachable("Unsupported input profile");
416 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
417 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) {
420 // Go through all the stacks, ranges and branches in sample counters, use
421 // the start of the range to look up the function it belongs and record the
423 for (const auto &CI
: *SampleCounters
) {
424 if (const auto *CtxKey
= dyn_cast
<AddrBasedCtxKey
>(CI
.first
.getPtr())) {
425 for (auto StackAddr
: CtxKey
->Context
) {
426 if (FuncRange
*FRange
= Binary
->findFuncRange(StackAddr
))
427 ProfiledFunctions
.insert(FRange
->Func
);
431 for (auto Item
: CI
.second
.RangeCounter
) {
432 uint64_t StartAddress
= Item
.first
.first
;
433 if (FuncRange
*FRange
= Binary
->findFuncRange(StartAddress
))
434 ProfiledFunctions
.insert(FRange
->Func
);
437 for (auto Item
: CI
.second
.BranchCounter
) {
438 uint64_t SourceAddress
= Item
.first
.first
;
439 uint64_t TargetAddress
= Item
.first
.second
;
440 if (FuncRange
*FRange
= Binary
->findFuncRange(SourceAddress
))
441 ProfiledFunctions
.insert(FRange
->Func
);
442 if (FuncRange
*FRange
= Binary
->findFuncRange(TargetAddress
))
443 ProfiledFunctions
.insert(FRange
->Func
);
449 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
450 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) {
451 for (const auto &FS
: ProfileMap
) {
452 if (auto *Func
= Binary
->getBinaryFunction(FS
.second
.getName()))
453 ProfiledFunctions
.insert(Func
);
458 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
459 std::unordered_set
<const BinaryFunction
*> &ProfiledFunctions
) {
460 for (auto *Node
: ContextTracker
) {
461 if (!Node
->getFuncName().empty())
462 if (auto *Func
= Binary
->getBinaryFunction(Node
->getFuncName()))
463 ProfiledFunctions
.insert(Func
);
469 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName
) {
470 SampleContext
Context(FuncName
);
471 return ProfileMap
.Create(Context
);
474 void ProfileGenerator::generateProfile() {
475 collectProfiledFunctions();
477 if (Binary
->usePseudoProbes())
478 Binary
->decodePseudoProbe();
480 if (SampleCounters
) {
481 if (Binary
->usePseudoProbes()) {
482 generateProbeBasedProfile();
484 generateLineNumBasedProfile();
488 postProcessProfiles();
491 void ProfileGenerator::postProcessProfiles() {
492 computeSummaryAndThreshold(ProfileMap
);
493 trimColdProfiles(ProfileMap
, ColdCountThreshold
);
494 calculateAndShowDensity(ProfileMap
);
497 void ProfileGenerator::trimColdProfiles(const SampleProfileMap
&Profiles
,
498 uint64_t ColdCntThreshold
) {
499 if (!TrimColdProfile
)
502 // Move cold profiles into a tmp container.
503 std::vector
<hash_code
> ColdProfileHashes
;
504 for (const auto &I
: ProfileMap
) {
505 if (I
.second
.getTotalSamples() < ColdCntThreshold
)
506 ColdProfileHashes
.emplace_back(I
.first
);
509 // Remove the cold profile from ProfileMap.
510 for (const auto &I
: ColdProfileHashes
)
514 void ProfileGenerator::generateLineNumBasedProfile() {
515 assert(SampleCounters
->size() == 1 &&
516 "Must have one entry for profile generation.");
517 const SampleCounter
&SC
= SampleCounters
->begin()->second
;
518 // Fill in function body samples
519 populateBodySamplesForAllFunctions(SC
.RangeCounter
);
520 // Fill in boundary sample counts as well as call site samples for calls
521 populateBoundarySamplesForAllFunctions(SC
.BranchCounter
);
523 updateFunctionSamples();
526 void ProfileGenerator::generateProbeBasedProfile() {
527 assert(SampleCounters
->size() == 1 &&
528 "Must have one entry for profile generation.");
529 // Enable pseudo probe functionalities in SampleProf
530 FunctionSamples::ProfileIsProbeBased
= true;
531 const SampleCounter
&SC
= SampleCounters
->begin()->second
;
532 // Fill in function body samples
533 populateBodySamplesWithProbesForAllFunctions(SC
.RangeCounter
);
534 // Fill in boundary sample counts as well as call site samples for calls
535 populateBoundarySamplesWithProbesForAllFunctions(SC
.BranchCounter
);
537 updateFunctionSamples();
540 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
541 const RangeSample
&RangeCounter
) {
542 ProbeCounterMap ProbeCounter
;
543 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
544 // inside extractProbesFromRange.
545 extractProbesFromRange(preprocessRangeCounter(RangeCounter
), ProbeCounter
,
548 for (const auto &PI
: ProbeCounter
) {
549 const MCDecodedPseudoProbe
*Probe
= PI
.first
;
550 uint64_t Count
= PI
.second
;
551 SampleContextFrameVector FrameVec
;
552 Binary
->getInlineContextForProbe(Probe
, FrameVec
, true);
553 FunctionSamples
&FunctionProfile
=
554 getLeafProfileAndAddTotalSamples(FrameVec
, Count
);
555 FunctionProfile
.addBodySamples(Probe
->getIndex(), Probe
->getDiscriminator(),
557 if (Probe
->isEntry())
558 FunctionProfile
.addHeadSamples(Count
);
562 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
563 const BranchSample
&BranchCounters
) {
564 for (const auto &Entry
: BranchCounters
) {
565 uint64_t SourceAddress
= Entry
.first
.first
;
566 uint64_t TargetAddress
= Entry
.first
.second
;
567 uint64_t Count
= Entry
.second
;
568 assert(Count
!= 0 && "Unexpected zero weight branch");
570 StringRef CalleeName
= getCalleeNameForAddress(TargetAddress
);
571 if (CalleeName
.size() == 0)
574 const MCDecodedPseudoProbe
*CallProbe
=
575 Binary
->getCallProbeForAddr(SourceAddress
);
576 if (CallProbe
== nullptr)
579 // Record called target sample and its count.
580 SampleContextFrameVector FrameVec
;
581 Binary
->getInlineContextForProbe(CallProbe
, FrameVec
, true);
583 if (!FrameVec
.empty()) {
584 FunctionSamples
&FunctionProfile
=
585 getLeafProfileAndAddTotalSamples(FrameVec
, 0);
586 FunctionProfile
.addCalledTargetSamples(
587 FrameVec
.back().Location
.LineOffset
,
588 FrameVec
.back().Location
.Discriminator
,
594 FunctionSamples
&ProfileGenerator::getLeafProfileAndAddTotalSamples(
595 const SampleContextFrameVector
&FrameVec
, uint64_t Count
) {
596 // Get top level profile
597 FunctionSamples
*FunctionProfile
=
598 &getTopLevelFunctionProfile(FrameVec
[0].FuncName
);
599 FunctionProfile
->addTotalSamples(Count
);
600 if (Binary
->usePseudoProbes()) {
601 const auto *FuncDesc
= Binary
->getFuncDescForGUID(
602 Function::getGUID(FunctionProfile
->getName()));
603 FunctionProfile
->setFunctionHash(FuncDesc
->FuncHash
);
606 for (size_t I
= 1; I
< FrameVec
.size(); I
++) {
607 LineLocation
Callsite(
608 FrameVec
[I
- 1].Location
.LineOffset
,
609 getBaseDiscriminator(FrameVec
[I
- 1].Location
.Discriminator
));
610 FunctionSamplesMap
&SamplesMap
=
611 FunctionProfile
->functionSamplesAt(Callsite
);
613 SamplesMap
.emplace(FrameVec
[I
].FuncName
.str(), FunctionSamples());
615 SampleContext
Context(FrameVec
[I
].FuncName
);
616 Ret
.first
->second
.setContext(Context
);
618 FunctionProfile
= &Ret
.first
->second
;
619 FunctionProfile
->addTotalSamples(Count
);
620 if (Binary
->usePseudoProbes()) {
621 const auto *FuncDesc
= Binary
->getFuncDescForGUID(
622 Function::getGUID(FunctionProfile
->getName()));
623 FunctionProfile
->setFunctionHash(FuncDesc
->FuncHash
);
627 return *FunctionProfile
;
631 ProfileGenerator::preprocessRangeCounter(const RangeSample
&RangeCounter
) {
632 RangeSample
Ranges(RangeCounter
.begin(), RangeCounter
.end());
633 if (FillZeroForAllFuncs
) {
634 for (auto &FuncI
: Binary
->getAllBinaryFunctions()) {
635 for (auto &R
: FuncI
.second
.Ranges
) {
636 Ranges
[{R
.first
, R
.second
- 1}] += 0;
640 // For each range, we search for all ranges of the function it belongs to
641 // and initialize it with zero count, so it remains zero if doesn't hit any
642 // samples. This is to be consistent with compiler that interpret zero count
643 // as unexecuted(cold).
644 for (const auto &I
: RangeCounter
) {
645 uint64_t StartAddress
= I
.first
.first
;
646 for (const auto &Range
: Binary
->getRanges(StartAddress
))
647 Ranges
[{Range
.first
, Range
.second
- 1}] += 0;
650 RangeSample DisjointRanges
;
651 findDisjointRanges(DisjointRanges
, Ranges
);
652 return DisjointRanges
;
655 void ProfileGenerator::populateBodySamplesForAllFunctions(
656 const RangeSample
&RangeCounter
) {
657 for (const auto &Range
: preprocessRangeCounter(RangeCounter
)) {
658 uint64_t RangeBegin
= Range
.first
.first
;
659 uint64_t RangeEnd
= Range
.first
.second
;
660 uint64_t Count
= Range
.second
;
662 InstructionPointer
IP(Binary
, RangeBegin
, true);
663 // Disjoint ranges may have range in the middle of two instr,
664 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
665 // can be Addr1+1 to Addr2-1. We should ignore such range.
666 if (IP
.Address
> RangeEnd
)
670 const SampleContextFrameVector FrameVec
=
671 Binary
->getFrameLocationStack(IP
.Address
);
672 if (!FrameVec
.empty()) {
673 // FIXME: As accumulating total count per instruction caused some
674 // regression, we changed to accumulate total count per byte as a
675 // workaround. Tuning hotness threshold on the compiler side might be
676 // necessary in the future.
677 FunctionSamples
&FunctionProfile
= getLeafProfileAndAddTotalSamples(
678 FrameVec
, Count
* Binary
->getInstSize(IP
.Address
));
679 updateBodySamplesforFunctionProfile(FunctionProfile
, FrameVec
.back(),
682 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
687 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress
) {
688 // Get the function range by branch target if it's a call branch.
689 auto *FRange
= Binary
->findFuncRangeForStartAddr(TargetAddress
);
691 // We won't accumulate sample count for a range whose start is not the real
692 // function entry such as outlined function or inner labels.
693 if (!FRange
|| !FRange
->IsFuncEntry
)
696 return FunctionSamples::getCanonicalFnName(FRange
->getFuncName());
699 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
700 const BranchSample
&BranchCounters
) {
701 for (const auto &Entry
: BranchCounters
) {
702 uint64_t SourceAddress
= Entry
.first
.first
;
703 uint64_t TargetAddress
= Entry
.first
.second
;
704 uint64_t Count
= Entry
.second
;
705 assert(Count
!= 0 && "Unexpected zero weight branch");
707 StringRef CalleeName
= getCalleeNameForAddress(TargetAddress
);
708 if (CalleeName
.size() == 0)
710 // Record called target sample and its count.
711 const SampleContextFrameVector
&FrameVec
=
712 Binary
->getCachedFrameLocationStack(SourceAddress
);
713 if (!FrameVec
.empty()) {
714 FunctionSamples
&FunctionProfile
=
715 getLeafProfileAndAddTotalSamples(FrameVec
, 0);
716 FunctionProfile
.addCalledTargetSamples(
717 FrameVec
.back().Location
.LineOffset
,
718 getBaseDiscriminator(FrameVec
.back().Location
.Discriminator
),
721 // Add head samples for callee.
722 FunctionSamples
&CalleeProfile
= getTopLevelFunctionProfile(CalleeName
);
723 CalleeProfile
.addHeadSamples(Count
);
727 void ProfileGeneratorBase::calculateAndShowDensity(
728 const SampleProfileMap
&Profiles
) {
729 double Density
= calculateDensity(Profiles
, HotCountThreshold
);
730 showDensitySuggestion(Density
);
734 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode
*ContextNode
,
735 bool WasLeafInlined
) {
736 FunctionSamples
*FProfile
= ContextNode
->getFunctionSamples();
738 FSamplesList
.emplace_back();
739 FProfile
= &FSamplesList
.back();
740 FProfile
->setName(ContextNode
->getFuncName());
741 ContextNode
->setFunctionSamples(FProfile
);
743 // Update ContextWasInlined attribute for existing contexts.
744 // The current function can be called in two ways:
745 // - when processing a probe of the current frame
746 // - when processing the entry probe of an inlinee's frame, which
747 // is then used to update the callsite count of the current frame.
748 // The two can happen in any order, hence here we are making sure
749 // `ContextWasInlined` is always set as expected.
750 // TODO: Note that the former does not always happen if no probes of the
751 // current frame has samples, and if the latter happens, we could lose the
752 // attribute. This should be fixed.
754 FProfile
->getContext().setAttribute(ContextWasInlined
);
759 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context
,
760 bool WasLeafInlined
) {
761 ContextTrieNode
*ContextNode
=
762 ContextTracker
.getOrCreateContextPath(Context
, true);
763 getOrCreateFunctionSamples(ContextNode
, WasLeafInlined
);
767 void CSProfileGenerator::generateProfile() {
768 FunctionSamples::ProfileIsCS
= true;
770 collectProfiledFunctions();
772 if (Binary
->usePseudoProbes()) {
773 Binary
->decodePseudoProbe();
774 if (InferMissingFrames
)
775 initializeMissingFrameInferrer();
778 if (SampleCounters
) {
779 if (Binary
->usePseudoProbes()) {
780 generateProbeBasedProfile();
782 generateLineNumBasedProfile();
786 if (Binary
->getTrackFuncContextSize())
787 computeSizeForProfiledFunctions();
789 postProcessProfiles();
792 void CSProfileGenerator::initializeMissingFrameInferrer() {
793 Binary
->getMissingContextInferrer()->initialize(SampleCounters
);
796 void CSProfileGenerator::inferMissingFrames(
797 const SmallVectorImpl
<uint64_t> &Context
,
798 SmallVectorImpl
<uint64_t> &NewContext
) {
799 Binary
->inferMissingFrames(Context
, NewContext
);
802 void CSProfileGenerator::computeSizeForProfiledFunctions() {
803 for (auto *Func
: Binary
->getProfiledFunctions())
804 Binary
->computeInlinedContextSizeForFunc(Func
);
806 // Flush the symbolizer to save memory.
807 Binary
->flushSymbolizer();
810 void CSProfileGenerator::updateFunctionSamples() {
811 for (auto *Node
: ContextTracker
) {
812 FunctionSamples
*FSamples
= Node
->getFunctionSamples();
814 if (UpdateTotalSamples
)
815 FSamples
->updateTotalSamples();
816 FSamples
->updateCallsiteSamples();
821 void CSProfileGenerator::generateLineNumBasedProfile() {
822 for (const auto &CI
: *SampleCounters
) {
823 const auto *CtxKey
= cast
<StringBasedCtxKey
>(CI
.first
.getPtr());
825 ContextTrieNode
*ContextNode
= &getRootContext();
826 // Sample context will be empty if the jump is an external-to-internal call
827 // pattern, the head samples should be added for the internal function.
828 if (!CtxKey
->Context
.empty()) {
829 // Get or create function profile for the range
831 getOrCreateContextNode(CtxKey
->Context
, CtxKey
->WasLeafInlined
);
832 // Fill in function body samples
833 populateBodySamplesForFunction(*ContextNode
->getFunctionSamples(),
834 CI
.second
.RangeCounter
);
836 // Fill in boundary sample counts as well as call site samples for calls
837 populateBoundarySamplesForFunction(ContextNode
, CI
.second
.BranchCounter
);
839 // Fill in call site value sample for inlined calls and also use context to
840 // infer missing samples. Since we don't have call count for inlined
841 // functions, we estimate it from inlinee's profile using the entry of the
843 populateInferredFunctionSamples(getRootContext());
845 updateFunctionSamples();
848 void CSProfileGenerator::populateBodySamplesForFunction(
849 FunctionSamples
&FunctionProfile
, const RangeSample
&RangeCounter
) {
850 // Compute disjoint ranges first, so we can use MAX
851 // for calculating count for each location.
853 findDisjointRanges(Ranges
, RangeCounter
);
854 for (const auto &Range
: Ranges
) {
855 uint64_t RangeBegin
= Range
.first
.first
;
856 uint64_t RangeEnd
= Range
.first
.second
;
857 uint64_t Count
= Range
.second
;
858 // Disjoint ranges have introduce zero-filled gap that
859 // doesn't belong to current context, filter them out.
863 InstructionPointer
IP(Binary
, RangeBegin
, true);
864 // Disjoint ranges may have range in the middle of two instr,
865 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
866 // can be Addr1+1 to Addr2-1. We should ignore such range.
867 if (IP
.Address
> RangeEnd
)
871 auto LeafLoc
= Binary
->getInlineLeafFrameLoc(IP
.Address
);
873 // Recording body sample for this specific context
874 updateBodySamplesforFunctionProfile(FunctionProfile
, *LeafLoc
, Count
);
875 FunctionProfile
.addTotalSamples(Count
);
877 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
881 void CSProfileGenerator::populateBoundarySamplesForFunction(
882 ContextTrieNode
*Node
, const BranchSample
&BranchCounters
) {
884 for (const auto &Entry
: BranchCounters
) {
885 uint64_t SourceAddress
= Entry
.first
.first
;
886 uint64_t TargetAddress
= Entry
.first
.second
;
887 uint64_t Count
= Entry
.second
;
888 assert(Count
!= 0 && "Unexpected zero weight branch");
890 StringRef CalleeName
= getCalleeNameForAddress(TargetAddress
);
891 if (CalleeName
.size() == 0)
894 ContextTrieNode
*CallerNode
= Node
;
895 LineLocation
CalleeCallSite(0, 0);
896 if (CallerNode
!= &getRootContext()) {
897 // Record called target sample and its count
898 auto LeafLoc
= Binary
->getInlineLeafFrameLoc(SourceAddress
);
900 CallerNode
->getFunctionSamples()->addCalledTargetSamples(
901 LeafLoc
->Location
.LineOffset
,
902 getBaseDiscriminator(LeafLoc
->Location
.Discriminator
), CalleeName
,
904 // Record head sample for called target(callee)
905 CalleeCallSite
= LeafLoc
->Location
;
909 ContextTrieNode
*CalleeNode
=
910 CallerNode
->getOrCreateChildContext(CalleeCallSite
, CalleeName
);
911 FunctionSamples
*CalleeProfile
= getOrCreateFunctionSamples(CalleeNode
);
912 CalleeProfile
->addHeadSamples(Count
);
916 void CSProfileGenerator::populateInferredFunctionSamples(
917 ContextTrieNode
&Node
) {
918 // There is no call jmp sample between the inliner and inlinee, we need to use
919 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
920 // sample depends on child(inlinee)'s sample, so traverse the tree in
922 for (auto &It
: Node
.getAllChildContext())
923 populateInferredFunctionSamples(It
.second
);
925 FunctionSamples
*CalleeProfile
= Node
.getFunctionSamples();
928 // If we already have head sample counts, we must have value profile
929 // for call sites added already. Skip to avoid double counting.
930 if (CalleeProfile
->getHeadSamples())
932 ContextTrieNode
*CallerNode
= Node
.getParentContext();
933 // If we don't have context, nothing to do for caller's call site.
934 // This could happen for entry point function.
935 if (CallerNode
== &getRootContext())
938 LineLocation CallerLeafFrameLoc
= Node
.getCallSiteLoc();
939 FunctionSamples
&CallerProfile
= *getOrCreateFunctionSamples(CallerNode
);
940 // Since we don't have call count for inlined functions, we
941 // estimate it from inlinee's profile using entry body sample.
942 uint64_t EstimatedCallCount
= CalleeProfile
->getHeadSamplesEstimate();
943 // If we don't have samples with location, use 1 to indicate live.
944 if (!EstimatedCallCount
&& !CalleeProfile
->getBodySamples().size())
945 EstimatedCallCount
= 1;
946 CallerProfile
.addCalledTargetSamples(CallerLeafFrameLoc
.LineOffset
,
947 CallerLeafFrameLoc
.Discriminator
,
948 Node
.getFuncName(), EstimatedCallCount
);
949 CallerProfile
.addBodySamples(CallerLeafFrameLoc
.LineOffset
,
950 CallerLeafFrameLoc
.Discriminator
,
952 CallerProfile
.addTotalSamples(EstimatedCallCount
);
955 void CSProfileGenerator::convertToProfileMap(
956 ContextTrieNode
&Node
, SampleContextFrameVector
&Context
) {
957 FunctionSamples
*FProfile
= Node
.getFunctionSamples();
959 Context
.emplace_back(Node
.getFuncName(), LineLocation(0, 0));
960 // Save the new context for future references.
961 SampleContextFrames NewContext
= *Contexts
.insert(Context
).first
;
962 auto Ret
= ProfileMap
.emplace(NewContext
, std::move(*FProfile
));
963 FunctionSamples
&NewProfile
= Ret
.first
->second
;
964 NewProfile
.getContext().setContext(NewContext
);
968 for (auto &It
: Node
.getAllChildContext()) {
969 ContextTrieNode
&ChildNode
= It
.second
;
970 Context
.emplace_back(Node
.getFuncName(), ChildNode
.getCallSiteLoc());
971 convertToProfileMap(ChildNode
, Context
);
976 void CSProfileGenerator::convertToProfileMap() {
977 assert(ProfileMap
.empty() &&
978 "ProfileMap should be empty before converting from the trie");
979 assert(IsProfileValidOnTrie
&&
980 "Do not convert the trie twice, it's already destroyed");
982 SampleContextFrameVector Context
;
983 for (auto &It
: getRootContext().getAllChildContext())
984 convertToProfileMap(It
.second
, Context
);
986 IsProfileValidOnTrie
= false;
989 void CSProfileGenerator::postProcessProfiles() {
990 // Compute hot/cold threshold based on profile. This will be used for cold
991 // context profile merging/trimming.
992 computeSummaryAndThreshold();
994 // Run global pre-inliner to adjust/merge context profile based on estimated
996 if (EnableCSPreInliner
) {
997 ContextTracker
.populateFuncToCtxtMap();
998 CSPreInliner(ContextTracker
, *Binary
, Summary
.get()).run();
999 // Turn off the profile merger by default unless it is explicitly enabled.
1000 if (!CSProfMergeColdContext
.getNumOccurrences())
1001 CSProfMergeColdContext
= false;
1004 convertToProfileMap();
1006 // Trim and merge cold context profile using cold threshold above.
1007 if (TrimColdProfile
|| CSProfMergeColdContext
) {
1008 SampleContextTrimmer(ProfileMap
)
1009 .trimAndMergeColdContextProfiles(
1010 HotCountThreshold
, TrimColdProfile
, CSProfMergeColdContext
,
1011 CSProfMaxColdContextDepth
, EnableCSPreInliner
);
1014 // Merge function samples of CS profile to calculate profile density.
1015 sampleprof::SampleProfileMap ContextLessProfiles
;
1016 ProfileConverter::flattenProfile(ProfileMap
, ContextLessProfiles
, true);
1018 calculateAndShowDensity(ContextLessProfiles
);
1019 if (GenCSNestedProfile
) {
1020 ProfileConverter
CSConverter(ProfileMap
);
1021 CSConverter
.convertCSProfiles();
1022 FunctionSamples::ProfileIsCS
= false;
1026 void ProfileGeneratorBase::computeSummaryAndThreshold(
1027 SampleProfileMap
&Profiles
) {
1028 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1029 Summary
= Builder
.computeSummaryForProfiles(Profiles
);
1030 HotCountThreshold
= ProfileSummaryBuilder::getHotCountThreshold(
1031 (Summary
->getDetailedSummary()));
1032 ColdCountThreshold
= ProfileSummaryBuilder::getColdCountThreshold(
1033 (Summary
->getDetailedSummary()));
1036 void CSProfileGenerator::computeSummaryAndThreshold() {
1037 // Always merge and use context-less profile map to compute summary.
1038 SampleProfileMap ContextLessProfiles
;
1039 ContextTracker
.createContextLessProfileMap(ContextLessProfiles
);
1041 // Set the flag below to avoid merging the profile again in
1042 // computeSummaryAndThreshold
1043 FunctionSamples::ProfileIsCS
= false;
1045 (!UseContextLessSummary
.getNumOccurrences() || UseContextLessSummary
) &&
1046 "Don't set --profile-summary-contextless to false for profile "
1048 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles
);
1049 // Recover the old value.
1050 FunctionSamples::ProfileIsCS
= true;
1053 void ProfileGeneratorBase::extractProbesFromRange(
1054 const RangeSample
&RangeCounter
, ProbeCounterMap
&ProbeCounter
,
1055 bool FindDisjointRanges
) {
1056 const RangeSample
*PRanges
= &RangeCounter
;
1058 if (FindDisjointRanges
) {
1059 findDisjointRanges(Ranges
, RangeCounter
);
1063 for (const auto &Range
: *PRanges
) {
1064 uint64_t RangeBegin
= Range
.first
.first
;
1065 uint64_t RangeEnd
= Range
.first
.second
;
1066 uint64_t Count
= Range
.second
;
1068 InstructionPointer
IP(Binary
, RangeBegin
, true);
1069 // Disjoint ranges may have range in the middle of two instr,
1070 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1071 // can be Addr1+1 to Addr2-1. We should ignore such range.
1072 if (IP
.Address
> RangeEnd
)
1076 const AddressProbesMap
&Address2ProbesMap
=
1077 Binary
->getAddress2ProbesMap();
1078 auto It
= Address2ProbesMap
.find(IP
.Address
);
1079 if (It
!= Address2ProbesMap
.end()) {
1080 for (const auto &Probe
: It
->second
) {
1081 ProbeCounter
[&Probe
] += Count
;
1084 } while (IP
.advance() && IP
.Address
<= RangeEnd
);
1088 static void extractPrefixContextStack(SampleContextFrameVector
&ContextStack
,
1089 const SmallVectorImpl
<uint64_t> &AddrVec
,
1090 ProfiledBinary
*Binary
) {
1091 SmallVector
<const MCDecodedPseudoProbe
*, 16> Probes
;
1092 for (auto Address
: reverse(AddrVec
)) {
1093 const MCDecodedPseudoProbe
*CallProbe
=
1094 Binary
->getCallProbeForAddr(Address
);
1095 // These could be the cases when a probe is not found at a calliste. Cutting
1096 // off the context from here since the inliner will not know how to consume
1097 // a context with unknown callsites.
1098 // 1. for functions that are not sampled when
1099 // --decode-probe-for-profiled-functions-only is on.
1100 // 2. for a merged callsite. Callsite merging may cause the loss of original
1102 // 3. for an external callsite.
1105 Probes
.push_back(CallProbe
);
1108 std::reverse(Probes
.begin(), Probes
.end());
1110 // Extract context stack for reusing, leaf context stack will be added
1111 // compressed while looking up function profile.
1112 for (const auto *P
: Probes
) {
1113 Binary
->getInlineContextForProbe(P
, ContextStack
, true);
1117 void CSProfileGenerator::generateProbeBasedProfile() {
1118 // Enable pseudo probe functionalities in SampleProf
1119 FunctionSamples::ProfileIsProbeBased
= true;
1120 for (const auto &CI
: *SampleCounters
) {
1121 const AddrBasedCtxKey
*CtxKey
=
1122 dyn_cast
<AddrBasedCtxKey
>(CI
.first
.getPtr());
1123 // Fill in function body samples from probes, also infer caller's samples
1124 // from callee's probe
1125 populateBodySamplesWithProbes(CI
.second
.RangeCounter
, CtxKey
);
1126 // Fill in boundary samples for a call probe
1127 populateBoundarySamplesWithProbes(CI
.second
.BranchCounter
, CtxKey
);
1131 void CSProfileGenerator::populateBodySamplesWithProbes(
1132 const RangeSample
&RangeCounter
, const AddrBasedCtxKey
*CtxKey
) {
1133 ProbeCounterMap ProbeCounter
;
1134 // Extract the top frame probes by looking up each address among the range in
1135 // the Address2ProbeMap
1136 extractProbesFromRange(RangeCounter
, ProbeCounter
);
1137 std::unordered_map
<MCDecodedPseudoProbeInlineTree
*,
1138 std::unordered_set
<FunctionSamples
*>>
1140 for (const auto &PI
: ProbeCounter
) {
1141 const MCDecodedPseudoProbe
*Probe
= PI
.first
;
1142 uint64_t Count
= PI
.second
;
1143 // Disjoint ranges have introduce zero-filled gap that
1144 // doesn't belong to current context, filter them out.
1145 if (!Probe
->isBlock() || Count
== 0)
1148 ContextTrieNode
*ContextNode
= getContextNodeForLeafProbe(CtxKey
, Probe
);
1149 FunctionSamples
&FunctionProfile
= *ContextNode
->getFunctionSamples();
1150 // Record the current frame and FunctionProfile whenever samples are
1151 // collected for non-danglie probes. This is for reporting all of the
1152 // zero count probes of the frame later.
1153 FrameSamples
[Probe
->getInlineTreeNode()].insert(&FunctionProfile
);
1154 FunctionProfile
.addBodySamples(Probe
->getIndex(), Probe
->getDiscriminator(),
1156 FunctionProfile
.addTotalSamples(Count
);
1157 if (Probe
->isEntry()) {
1158 FunctionProfile
.addHeadSamples(Count
);
1159 // Look up for the caller's function profile
1160 const auto *InlinerDesc
= Binary
->getInlinerDescForProbe(Probe
);
1161 ContextTrieNode
*CallerNode
= ContextNode
->getParentContext();
1162 if (InlinerDesc
!= nullptr && CallerNode
!= &getRootContext()) {
1163 // Since the context id will be compressed, we have to use callee's
1164 // context id to infer caller's context id to ensure they share the
1165 // same context prefix.
1166 uint64_t CallerIndex
= ContextNode
->getCallSiteLoc().LineOffset
;
1167 uint64_t CallerDiscriminator
= ContextNode
->getCallSiteLoc().Discriminator
;
1168 assert(CallerIndex
&&
1169 "Inferred caller's location index shouldn't be zero!");
1170 assert(!CallerDiscriminator
&&
1171 "Callsite probe should not have a discriminator!");
1172 FunctionSamples
&CallerProfile
=
1173 *getOrCreateFunctionSamples(CallerNode
);
1174 CallerProfile
.setFunctionHash(InlinerDesc
->FuncHash
);
1175 CallerProfile
.addBodySamples(CallerIndex
, CallerDiscriminator
, Count
);
1176 CallerProfile
.addTotalSamples(Count
);
1177 CallerProfile
.addCalledTargetSamples(CallerIndex
, CallerDiscriminator
,
1178 ContextNode
->getFuncName(), Count
);
1183 // Assign zero count for remaining probes without sample hits to
1184 // differentiate from probes optimized away, of which the counts are unknown
1185 // and will be inferred by the compiler.
1186 for (auto &I
: FrameSamples
) {
1187 for (auto *FunctionProfile
: I
.second
) {
1188 for (auto *Probe
: I
.first
->getProbes()) {
1189 FunctionProfile
->addBodySamples(Probe
->getIndex(),
1190 Probe
->getDiscriminator(), 0);
1196 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1197 const BranchSample
&BranchCounter
, const AddrBasedCtxKey
*CtxKey
) {
1198 for (const auto &BI
: BranchCounter
) {
1199 uint64_t SourceAddress
= BI
.first
.first
;
1200 uint64_t TargetAddress
= BI
.first
.second
;
1201 uint64_t Count
= BI
.second
;
1202 const MCDecodedPseudoProbe
*CallProbe
=
1203 Binary
->getCallProbeForAddr(SourceAddress
);
1204 if (CallProbe
== nullptr)
1206 FunctionSamples
&FunctionProfile
=
1207 getFunctionProfileForLeafProbe(CtxKey
, CallProbe
);
1208 FunctionProfile
.addBodySamples(CallProbe
->getIndex(), 0, Count
);
1209 FunctionProfile
.addTotalSamples(Count
);
1210 StringRef CalleeName
= getCalleeNameForAddress(TargetAddress
);
1211 if (CalleeName
.size() == 0)
1213 FunctionProfile
.addCalledTargetSamples(CallProbe
->getIndex(),
1214 CallProbe
->getDiscriminator(),
1219 ContextTrieNode
*CSProfileGenerator::getContextNodeForLeafProbe(
1220 const AddrBasedCtxKey
*CtxKey
, const MCDecodedPseudoProbe
*LeafProbe
) {
1222 const SmallVectorImpl
<uint64_t> *PContext
= &CtxKey
->Context
;
1223 SmallVector
<uint64_t, 16> NewContext
;
1225 if (InferMissingFrames
) {
1226 SmallVector
<uint64_t, 16> Context
= CtxKey
->Context
;
1227 // Append leaf frame for a complete inference.
1228 Context
.push_back(LeafProbe
->getAddress());
1229 inferMissingFrames(Context
, NewContext
);
1230 // Pop out the leaf probe that was pushed in above.
1231 NewContext
.pop_back();
1232 PContext
= &NewContext
;
1235 SampleContextFrameVector ContextStack
;
1236 extractPrefixContextStack(ContextStack
, *PContext
, Binary
);
1238 // Explicitly copy the context for appending the leaf context
1239 SampleContextFrameVector
NewContextStack(ContextStack
.begin(),
1240 ContextStack
.end());
1241 Binary
->getInlineContextForProbe(LeafProbe
, NewContextStack
, true);
1242 // For leaf inlined context with the top frame, we should strip off the top
1243 // frame's probe id, like:
1244 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1245 auto LeafFrame
= NewContextStack
.back();
1246 LeafFrame
.Location
= LineLocation(0, 0);
1247 NewContextStack
.pop_back();
1248 // Compress the context string except for the leaf frame
1249 CSProfileGenerator::compressRecursionContext(NewContextStack
);
1250 CSProfileGenerator::trimContext(NewContextStack
);
1251 NewContextStack
.push_back(LeafFrame
);
1253 const auto *FuncDesc
= Binary
->getFuncDescForGUID(LeafProbe
->getGuid());
1254 bool WasLeafInlined
= LeafProbe
->getInlineTreeNode()->hasInlineSite();
1255 ContextTrieNode
*ContextNode
=
1256 getOrCreateContextNode(NewContextStack
, WasLeafInlined
);
1257 ContextNode
->getFunctionSamples()->setFunctionHash(FuncDesc
->FuncHash
);
1261 FunctionSamples
&CSProfileGenerator::getFunctionProfileForLeafProbe(
1262 const AddrBasedCtxKey
*CtxKey
, const MCDecodedPseudoProbe
*LeafProbe
) {
1263 return *getContextNodeForLeafProbe(CtxKey
, LeafProbe
)->getFunctionSamples();
1266 } // end namespace sampleprof
1267 } // end namespace llvm