[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / tools / llvm-profgen / ProfileGenerator.cpp
blobc4028e6b132871bf5d75cac62bb2a0fa851df747
1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "MissingFrameInferrer.h"
11 #include "PerfReader.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/ProfileData/ProfileCommon.h"
15 #include <algorithm>
16 #include <float.h>
17 #include <unordered_set>
18 #include <utility>
20 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21 cl::Required,
22 cl::desc("Output profile file"));
23 static cl::alias OutputA("o", cl::desc("Alias for --output"),
24 cl::aliasopt(OutputFilename));
26 static cl::opt<SampleProfileFormat> OutputFormat(
27 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
28 cl::values(
29 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31 clEnumValN(SPF_Text, "text", "Text encoding"),
32 clEnumValN(SPF_GCC, "gcc",
33 "GCC encoding (only meaningful for -sample)")));
35 static cl::opt<bool> UseMD5(
36 "use-md5", cl::Hidden,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
40 static cl::opt<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
44 static cl::opt<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
49 static cl::opt<int32_t, true> RecursionCompression(
50 "compress-recursion",
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
53 cl::Hidden,
54 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
56 static cl::opt<bool>
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
61 static cl::opt<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
65 "profile."));
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
72 static cl::opt<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
75 "depth limit."),
76 cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
78 static cl::opt<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(1000),
80 llvm::cl::desc(
81 "specify density threshold for hot functions (default: 1000)"),
82 llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84 llvm::cl::desc("show profile density details"),
85 llvm::cl::Optional);
87 static cl::opt<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(false),
89 llvm::cl::desc(
90 "Update total samples by accumulating all its body samples."),
91 llvm::cl::Optional);
93 static cl::opt<bool> GenCSNestedProfile(
94 "gen-cs-nested-profile", cl::Hidden, cl::init(true),
95 cl::desc("Generate nested function profiles for CSSPGO"));
97 cl::opt<bool> InferMissingFrames(
98 "infer-missing-frames", llvm::cl::init(true),
99 llvm::cl::desc(
100 "Infer missing call frames due to compiler tail call elimination."),
101 llvm::cl::Optional);
103 using namespace llvm;
104 using namespace sampleprof;
106 namespace llvm {
107 extern cl::opt<int> ProfileSummaryCutoffHot;
108 extern cl::opt<bool> UseContextLessSummary;
110 namespace sampleprof {
112 // Initialize the MaxCompressionSize to -1 which means no size limit
113 int32_t CSProfileGenerator::MaxCompressionSize = -1;
115 int CSProfileGenerator::MaxContextDepth = -1;
117 bool ProfileGeneratorBase::UseFSDiscriminator = false;
119 std::unique_ptr<ProfileGeneratorBase>
120 ProfileGeneratorBase::create(ProfiledBinary *Binary,
121 const ContextSampleCounterMap *SampleCounters,
122 bool ProfileIsCS) {
123 std::unique_ptr<ProfileGeneratorBase> Generator;
124 if (ProfileIsCS) {
125 Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
126 } else {
127 Generator.reset(new ProfileGenerator(Binary, SampleCounters));
129 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
130 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
132 return Generator;
135 std::unique_ptr<ProfileGeneratorBase>
136 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
137 bool ProfileIsCS) {
138 std::unique_ptr<ProfileGeneratorBase> Generator;
139 if (ProfileIsCS) {
140 Generator.reset(new CSProfileGenerator(Binary, Profiles));
141 } else {
142 Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
144 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
145 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
147 return Generator;
150 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
151 SampleProfileMap &ProfileMap) {
152 // Populate profile symbol list if extended binary format is used.
153 ProfileSymbolList SymbolList;
155 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
156 Binary->populateSymbolListFromDWARF(SymbolList);
157 Writer->setProfileSymbolList(&SymbolList);
160 if (std::error_code EC = Writer->write(ProfileMap))
161 exitWithError(std::move(EC));
164 void ProfileGeneratorBase::write() {
165 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
166 if (std::error_code EC = WriterOrErr.getError())
167 exitWithError(EC, OutputFilename);
169 if (UseMD5) {
170 if (OutputFormat != SPF_Ext_Binary)
171 WithColor::warning() << "-use-md5 is ignored. Specify "
172 "--format=extbinary to enable it\n";
173 else
174 WriterOrErr.get()->setUseMD5();
177 write(std::move(WriterOrErr.get()), ProfileMap);
180 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
181 if (Density == 0.0)
182 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183 "set too low. Please check your command.\n";
184 else if (Density < HotFunctionDensityThreshold)
185 WithColor::warning()
186 << "Sample PGO is estimated to optimize better with "
187 << format("%.1f", HotFunctionDensityThreshold / Density)
188 << "x more samples. Please consider increasing sampling rate or "
189 "profiling for longer duration to get more samples.\n";
191 if (ShowDensity)
192 outs() << "Minimum profile density for hot functions with top "
193 << format("%.2f",
194 static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
195 10000)
196 << "% total samples: " << format("%.1f", Density) << "\n";
199 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
200 uint64_t HotCntThreshold) {
201 double Density = DBL_MAX;
202 std::vector<const FunctionSamples *> HotFuncs;
203 for (auto &I : Profiles) {
204 auto &FuncSamples = I.second;
205 if (FuncSamples.getTotalSamples() < HotCntThreshold)
206 continue;
207 HotFuncs.emplace_back(&FuncSamples);
210 for (auto *FuncSamples : HotFuncs) {
211 auto *Func = Binary->getBinaryFunction(FuncSamples->getFunction());
212 if (!Func)
213 continue;
214 uint64_t FuncSize = Func->getFuncSize();
215 if (FuncSize == 0)
216 continue;
217 Density =
218 std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
219 FuncSize);
222 return Density == DBL_MAX ? 0.0 : Density;
225 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
226 const RangeSample &Ranges) {
229 Regions may overlap with each other. Using the boundary info, find all
230 disjoint ranges and their sample count. BoundaryPoint contains the count
231 multiple samples begin/end at this points.
233 |<--100-->| Sample1
234 |<------200------>| Sample2
235 A B C
237 In the example above,
238 Sample1 begins at A, ends at B, its value is 100.
239 Sample2 beings at A, ends at C, its value is 200.
240 For A, BeginCount is the sum of sample begins at A, which is 300 and no
241 samples ends at A, so EndCount is 0.
242 Then boundary points A, B, and C with begin/end counts are:
243 A: (300, 0)
244 B: (0, 100)
245 C: (0, 200)
247 struct BoundaryPoint {
248 // Sum of sample counts beginning at this point
249 uint64_t BeginCount = UINT64_MAX;
250 // Sum of sample counts ending at this point
251 uint64_t EndCount = UINT64_MAX;
252 // Is the begin point of a zero range.
253 bool IsZeroRangeBegin = false;
254 // Is the end point of a zero range.
255 bool IsZeroRangeEnd = false;
257 void addBeginCount(uint64_t Count) {
258 if (BeginCount == UINT64_MAX)
259 BeginCount = 0;
260 BeginCount += Count;
263 void addEndCount(uint64_t Count) {
264 if (EndCount == UINT64_MAX)
265 EndCount = 0;
266 EndCount += Count;
271 For the above example. With boundary points, follwing logic finds two
272 disjoint region of
274 [A,B]: 300
275 [B+1,C]: 200
277 If there is a boundary point that both begin and end, the point itself
278 becomes a separate disjoint region. For example, if we have original
279 ranges of
281 |<--- 100 --->|
282 |<--- 200 --->|
283 A B C
285 there are three boundary points with their begin/end counts of
287 A: (100, 0)
288 B: (200, 100)
289 C: (0, 200)
291 the disjoint ranges would be
293 [A, B-1]: 100
294 [B, B]: 300
295 [B+1, C]: 200.
297 Example for zero value range:
299 |<--- 100 --->|
300 |<--- 200 --->|
301 |<--------------- 0 ----------------->|
302 A B C D E F
304 [A, B-1] : 0
305 [B, C] : 100
306 [C+1, D-1]: 0
307 [D, E] : 200
308 [E+1, F] : 0
310 std::map<uint64_t, BoundaryPoint> Boundaries;
312 for (const auto &Item : Ranges) {
313 assert(Item.first.first <= Item.first.second &&
314 "Invalid instruction range");
315 auto &BeginPoint = Boundaries[Item.first.first];
316 auto &EndPoint = Boundaries[Item.first.second];
317 uint64_t Count = Item.second;
319 BeginPoint.addBeginCount(Count);
320 EndPoint.addEndCount(Count);
321 if (Count == 0) {
322 BeginPoint.IsZeroRangeBegin = true;
323 EndPoint.IsZeroRangeEnd = true;
327 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
328 // and the next valid address
329 uint64_t BeginAddress = UINT64_MAX;
330 int ZeroRangeDepth = 0;
331 uint64_t Count = 0;
332 for (const auto &Item : Boundaries) {
333 uint64_t Address = Item.first;
334 const BoundaryPoint &Point = Item.second;
335 if (Point.BeginCount != UINT64_MAX) {
336 if (BeginAddress != UINT64_MAX)
337 DisjointRanges[{BeginAddress, Address - 1}] = Count;
338 Count += Point.BeginCount;
339 BeginAddress = Address;
340 ZeroRangeDepth += Point.IsZeroRangeBegin;
342 if (Point.EndCount != UINT64_MAX) {
343 assert((BeginAddress != UINT64_MAX) &&
344 "First boundary point cannot be 'end' point");
345 DisjointRanges[{BeginAddress, Address}] = Count;
346 assert(Count >= Point.EndCount && "Mismatched live ranges");
347 Count -= Point.EndCount;
348 BeginAddress = Address + 1;
349 ZeroRangeDepth -= Point.IsZeroRangeEnd;
350 // If the remaining count is zero and it's no longer in a zero range, this
351 // means we consume all the ranges before, thus mark BeginAddress as
352 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
353 // [<---- 10 ---->]
354 // [<---- 20 ---->]
355 // A B C D
356 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
357 // have the [B+1, C-1] zero range.
358 if (Count == 0 && ZeroRangeDepth == 0)
359 BeginAddress = UINT64_MAX;
364 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
365 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
366 uint64_t Count) {
367 // Use the maximum count of samples with same line location
368 uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
370 // Use duplication factor to compensated for loop unroll/vectorization.
371 // Note that this is only needed when we're taking MAX of the counts at
372 // the location instead of SUM.
373 Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
375 ErrorOr<uint64_t> R =
376 FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
378 uint64_t PreviousCount = R ? R.get() : 0;
379 if (PreviousCount <= Count) {
380 FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
381 Count - PreviousCount);
385 void ProfileGeneratorBase::updateTotalSamples() {
386 for (auto &Item : ProfileMap) {
387 FunctionSamples &FunctionProfile = Item.second;
388 FunctionProfile.updateTotalSamples();
392 void ProfileGeneratorBase::updateCallsiteSamples() {
393 for (auto &Item : ProfileMap) {
394 FunctionSamples &FunctionProfile = Item.second;
395 FunctionProfile.updateCallsiteSamples();
399 void ProfileGeneratorBase::updateFunctionSamples() {
400 updateCallsiteSamples();
402 if (UpdateTotalSamples)
403 updateTotalSamples();
406 void ProfileGeneratorBase::collectProfiledFunctions() {
407 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
408 if (collectFunctionsFromRawProfile(ProfiledFunctions))
409 Binary->setProfiledFunctions(ProfiledFunctions);
410 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
411 Binary->setProfiledFunctions(ProfiledFunctions);
412 else
413 llvm_unreachable("Unsupported input profile");
416 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
417 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
418 if (!SampleCounters)
419 return false;
420 // Go through all the stacks, ranges and branches in sample counters, use
421 // the start of the range to look up the function it belongs and record the
422 // function.
423 for (const auto &CI : *SampleCounters) {
424 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
425 for (auto StackAddr : CtxKey->Context) {
426 if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
427 ProfiledFunctions.insert(FRange->Func);
431 for (auto Item : CI.second.RangeCounter) {
432 uint64_t StartAddress = Item.first.first;
433 if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
434 ProfiledFunctions.insert(FRange->Func);
437 for (auto Item : CI.second.BranchCounter) {
438 uint64_t SourceAddress = Item.first.first;
439 uint64_t TargetAddress = Item.first.second;
440 if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
441 ProfiledFunctions.insert(FRange->Func);
442 if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
443 ProfiledFunctions.insert(FRange->Func);
446 return true;
449 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
450 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
451 for (const auto &FS : ProfileMap) {
452 if (auto *Func = Binary->getBinaryFunction(FS.second.getFunction()))
453 ProfiledFunctions.insert(Func);
455 return true;
458 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
459 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
460 for (auto *Node : ContextTracker) {
461 if (!Node->getFuncName().empty())
462 if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
463 ProfiledFunctions.insert(Func);
465 return true;
468 FunctionSamples &
469 ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
470 SampleContext Context(FuncName);
471 return ProfileMap.Create(Context);
474 void ProfileGenerator::generateProfile() {
475 collectProfiledFunctions();
477 if (Binary->usePseudoProbes())
478 Binary->decodePseudoProbe();
480 if (SampleCounters) {
481 if (Binary->usePseudoProbes()) {
482 generateProbeBasedProfile();
483 } else {
484 generateLineNumBasedProfile();
488 postProcessProfiles();
491 void ProfileGenerator::postProcessProfiles() {
492 computeSummaryAndThreshold(ProfileMap);
493 trimColdProfiles(ProfileMap, ColdCountThreshold);
494 calculateAndShowDensity(ProfileMap);
497 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
498 uint64_t ColdCntThreshold) {
499 if (!TrimColdProfile)
500 return;
502 // Move cold profiles into a tmp container.
503 std::vector<hash_code> ColdProfileHashes;
504 for (const auto &I : ProfileMap) {
505 if (I.second.getTotalSamples() < ColdCntThreshold)
506 ColdProfileHashes.emplace_back(I.first);
509 // Remove the cold profile from ProfileMap.
510 for (const auto &I : ColdProfileHashes)
511 ProfileMap.erase(I);
514 void ProfileGenerator::generateLineNumBasedProfile() {
515 assert(SampleCounters->size() == 1 &&
516 "Must have one entry for profile generation.");
517 const SampleCounter &SC = SampleCounters->begin()->second;
518 // Fill in function body samples
519 populateBodySamplesForAllFunctions(SC.RangeCounter);
520 // Fill in boundary sample counts as well as call site samples for calls
521 populateBoundarySamplesForAllFunctions(SC.BranchCounter);
523 updateFunctionSamples();
526 void ProfileGenerator::generateProbeBasedProfile() {
527 assert(SampleCounters->size() == 1 &&
528 "Must have one entry for profile generation.");
529 // Enable pseudo probe functionalities in SampleProf
530 FunctionSamples::ProfileIsProbeBased = true;
531 const SampleCounter &SC = SampleCounters->begin()->second;
532 // Fill in function body samples
533 populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
534 // Fill in boundary sample counts as well as call site samples for calls
535 populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
537 updateFunctionSamples();
540 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
541 const RangeSample &RangeCounter) {
542 ProbeCounterMap ProbeCounter;
543 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
544 // inside extractProbesFromRange.
545 extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
546 false);
548 for (const auto &PI : ProbeCounter) {
549 const MCDecodedPseudoProbe *Probe = PI.first;
550 uint64_t Count = PI.second;
551 SampleContextFrameVector FrameVec;
552 Binary->getInlineContextForProbe(Probe, FrameVec, true);
553 FunctionSamples &FunctionProfile =
554 getLeafProfileAndAddTotalSamples(FrameVec, Count);
555 FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
556 Count);
557 if (Probe->isEntry())
558 FunctionProfile.addHeadSamples(Count);
562 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
563 const BranchSample &BranchCounters) {
564 for (const auto &Entry : BranchCounters) {
565 uint64_t SourceAddress = Entry.first.first;
566 uint64_t TargetAddress = Entry.first.second;
567 uint64_t Count = Entry.second;
568 assert(Count != 0 && "Unexpected zero weight branch");
570 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
571 if (CalleeName.size() == 0)
572 continue;
574 const MCDecodedPseudoProbe *CallProbe =
575 Binary->getCallProbeForAddr(SourceAddress);
576 if (CallProbe == nullptr)
577 continue;
579 // Record called target sample and its count.
580 SampleContextFrameVector FrameVec;
581 Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
583 if (!FrameVec.empty()) {
584 FunctionSamples &FunctionProfile =
585 getLeafProfileAndAddTotalSamples(FrameVec, 0);
586 FunctionProfile.addCalledTargetSamples(
587 FrameVec.back().Location.LineOffset,
588 FrameVec.back().Location.Discriminator,
589 FunctionId(CalleeName), Count);
594 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
595 const SampleContextFrameVector &FrameVec, uint64_t Count) {
596 // Get top level profile
597 FunctionSamples *FunctionProfile =
598 &getTopLevelFunctionProfile(FrameVec[0].Func);
599 FunctionProfile->addTotalSamples(Count);
600 if (Binary->usePseudoProbes()) {
601 const auto *FuncDesc = Binary->getFuncDescForGUID(
602 FunctionProfile->getFunction().getHashCode());
603 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
606 for (size_t I = 1; I < FrameVec.size(); I++) {
607 LineLocation Callsite(
608 FrameVec[I - 1].Location.LineOffset,
609 getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
610 FunctionSamplesMap &SamplesMap =
611 FunctionProfile->functionSamplesAt(Callsite);
612 auto Ret =
613 SamplesMap.emplace(FrameVec[I].Func, FunctionSamples());
614 if (Ret.second) {
615 SampleContext Context(FrameVec[I].Func);
616 Ret.first->second.setContext(Context);
618 FunctionProfile = &Ret.first->second;
619 FunctionProfile->addTotalSamples(Count);
620 if (Binary->usePseudoProbes()) {
621 const auto *FuncDesc = Binary->getFuncDescForGUID(
622 FunctionProfile->getFunction().getHashCode());
623 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
627 return *FunctionProfile;
630 RangeSample
631 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
632 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
633 if (FillZeroForAllFuncs) {
634 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
635 for (auto &R : FuncI.second.Ranges) {
636 Ranges[{R.first, R.second - 1}] += 0;
639 } else {
640 // For each range, we search for all ranges of the function it belongs to
641 // and initialize it with zero count, so it remains zero if doesn't hit any
642 // samples. This is to be consistent with compiler that interpret zero count
643 // as unexecuted(cold).
644 for (const auto &I : RangeCounter) {
645 uint64_t StartAddress = I.first.first;
646 for (const auto &Range : Binary->getRanges(StartAddress))
647 Ranges[{Range.first, Range.second - 1}] += 0;
650 RangeSample DisjointRanges;
651 findDisjointRanges(DisjointRanges, Ranges);
652 return DisjointRanges;
655 void ProfileGenerator::populateBodySamplesForAllFunctions(
656 const RangeSample &RangeCounter) {
657 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
658 uint64_t RangeBegin = Range.first.first;
659 uint64_t RangeEnd = Range.first.second;
660 uint64_t Count = Range.second;
662 InstructionPointer IP(Binary, RangeBegin, true);
663 // Disjoint ranges may have range in the middle of two instr,
664 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
665 // can be Addr1+1 to Addr2-1. We should ignore such range.
666 if (IP.Address > RangeEnd)
667 continue;
669 do {
670 const SampleContextFrameVector FrameVec =
671 Binary->getFrameLocationStack(IP.Address);
672 if (!FrameVec.empty()) {
673 // FIXME: As accumulating total count per instruction caused some
674 // regression, we changed to accumulate total count per byte as a
675 // workaround. Tuning hotness threshold on the compiler side might be
676 // necessary in the future.
677 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
678 FrameVec, Count * Binary->getInstSize(IP.Address));
679 updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
680 Count);
682 } while (IP.advance() && IP.Address <= RangeEnd);
686 StringRef
687 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
688 // Get the function range by branch target if it's a call branch.
689 auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
691 // We won't accumulate sample count for a range whose start is not the real
692 // function entry such as outlined function or inner labels.
693 if (!FRange || !FRange->IsFuncEntry)
694 return StringRef();
696 return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
699 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
700 const BranchSample &BranchCounters) {
701 for (const auto &Entry : BranchCounters) {
702 uint64_t SourceAddress = Entry.first.first;
703 uint64_t TargetAddress = Entry.first.second;
704 uint64_t Count = Entry.second;
705 assert(Count != 0 && "Unexpected zero weight branch");
707 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
708 if (CalleeName.size() == 0)
709 continue;
710 // Record called target sample and its count.
711 const SampleContextFrameVector &FrameVec =
712 Binary->getCachedFrameLocationStack(SourceAddress);
713 if (!FrameVec.empty()) {
714 FunctionSamples &FunctionProfile =
715 getLeafProfileAndAddTotalSamples(FrameVec, 0);
716 FunctionProfile.addCalledTargetSamples(
717 FrameVec.back().Location.LineOffset,
718 getBaseDiscriminator(FrameVec.back().Location.Discriminator),
719 FunctionId(CalleeName), Count);
721 // Add head samples for callee.
722 FunctionSamples &CalleeProfile =
723 getTopLevelFunctionProfile(FunctionId(CalleeName));
724 CalleeProfile.addHeadSamples(Count);
728 void ProfileGeneratorBase::calculateAndShowDensity(
729 const SampleProfileMap &Profiles) {
730 double Density = calculateDensity(Profiles, HotCountThreshold);
731 showDensitySuggestion(Density);
734 FunctionSamples *
735 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
736 bool WasLeafInlined) {
737 FunctionSamples *FProfile = ContextNode->getFunctionSamples();
738 if (!FProfile) {
739 FSamplesList.emplace_back();
740 FProfile = &FSamplesList.back();
741 FProfile->setFunction(ContextNode->getFuncName());
742 ContextNode->setFunctionSamples(FProfile);
744 // Update ContextWasInlined attribute for existing contexts.
745 // The current function can be called in two ways:
746 // - when processing a probe of the current frame
747 // - when processing the entry probe of an inlinee's frame, which
748 // is then used to update the callsite count of the current frame.
749 // The two can happen in any order, hence here we are making sure
750 // `ContextWasInlined` is always set as expected.
751 // TODO: Note that the former does not always happen if no probes of the
752 // current frame has samples, and if the latter happens, we could lose the
753 // attribute. This should be fixed.
754 if (WasLeafInlined)
755 FProfile->getContext().setAttribute(ContextWasInlined);
756 return FProfile;
759 ContextTrieNode *
760 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
761 bool WasLeafInlined) {
762 ContextTrieNode *ContextNode =
763 ContextTracker.getOrCreateContextPath(Context, true);
764 getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
765 return ContextNode;
768 void CSProfileGenerator::generateProfile() {
769 FunctionSamples::ProfileIsCS = true;
771 collectProfiledFunctions();
773 if (Binary->usePseudoProbes()) {
774 Binary->decodePseudoProbe();
775 if (InferMissingFrames)
776 initializeMissingFrameInferrer();
779 if (SampleCounters) {
780 if (Binary->usePseudoProbes()) {
781 generateProbeBasedProfile();
782 } else {
783 generateLineNumBasedProfile();
787 if (Binary->getTrackFuncContextSize())
788 computeSizeForProfiledFunctions();
790 postProcessProfiles();
793 void CSProfileGenerator::initializeMissingFrameInferrer() {
794 Binary->getMissingContextInferrer()->initialize(SampleCounters);
797 void CSProfileGenerator::inferMissingFrames(
798 const SmallVectorImpl<uint64_t> &Context,
799 SmallVectorImpl<uint64_t> &NewContext) {
800 Binary->inferMissingFrames(Context, NewContext);
803 void CSProfileGenerator::computeSizeForProfiledFunctions() {
804 for (auto *Func : Binary->getProfiledFunctions())
805 Binary->computeInlinedContextSizeForFunc(Func);
807 // Flush the symbolizer to save memory.
808 Binary->flushSymbolizer();
811 void CSProfileGenerator::updateFunctionSamples() {
812 for (auto *Node : ContextTracker) {
813 FunctionSamples *FSamples = Node->getFunctionSamples();
814 if (FSamples) {
815 if (UpdateTotalSamples)
816 FSamples->updateTotalSamples();
817 FSamples->updateCallsiteSamples();
822 void CSProfileGenerator::generateLineNumBasedProfile() {
823 for (const auto &CI : *SampleCounters) {
824 const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
826 ContextTrieNode *ContextNode = &getRootContext();
827 // Sample context will be empty if the jump is an external-to-internal call
828 // pattern, the head samples should be added for the internal function.
829 if (!CtxKey->Context.empty()) {
830 // Get or create function profile for the range
831 ContextNode =
832 getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
833 // Fill in function body samples
834 populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
835 CI.second.RangeCounter);
837 // Fill in boundary sample counts as well as call site samples for calls
838 populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
840 // Fill in call site value sample for inlined calls and also use context to
841 // infer missing samples. Since we don't have call count for inlined
842 // functions, we estimate it from inlinee's profile using the entry of the
843 // body sample.
844 populateInferredFunctionSamples(getRootContext());
846 updateFunctionSamples();
849 void CSProfileGenerator::populateBodySamplesForFunction(
850 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
851 // Compute disjoint ranges first, so we can use MAX
852 // for calculating count for each location.
853 RangeSample Ranges;
854 findDisjointRanges(Ranges, RangeCounter);
855 for (const auto &Range : Ranges) {
856 uint64_t RangeBegin = Range.first.first;
857 uint64_t RangeEnd = Range.first.second;
858 uint64_t Count = Range.second;
859 // Disjoint ranges have introduce zero-filled gap that
860 // doesn't belong to current context, filter them out.
861 if (Count == 0)
862 continue;
864 InstructionPointer IP(Binary, RangeBegin, true);
865 // Disjoint ranges may have range in the middle of two instr,
866 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
867 // can be Addr1+1 to Addr2-1. We should ignore such range.
868 if (IP.Address > RangeEnd)
869 continue;
871 do {
872 auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
873 if (LeafLoc) {
874 // Recording body sample for this specific context
875 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
876 FunctionProfile.addTotalSamples(Count);
878 } while (IP.advance() && IP.Address <= RangeEnd);
882 void CSProfileGenerator::populateBoundarySamplesForFunction(
883 ContextTrieNode *Node, const BranchSample &BranchCounters) {
885 for (const auto &Entry : BranchCounters) {
886 uint64_t SourceAddress = Entry.first.first;
887 uint64_t TargetAddress = Entry.first.second;
888 uint64_t Count = Entry.second;
889 assert(Count != 0 && "Unexpected zero weight branch");
891 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
892 if (CalleeName.size() == 0)
893 continue;
895 ContextTrieNode *CallerNode = Node;
896 LineLocation CalleeCallSite(0, 0);
897 if (CallerNode != &getRootContext()) {
898 // Record called target sample and its count
899 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
900 if (LeafLoc) {
901 CallerNode->getFunctionSamples()->addCalledTargetSamples(
902 LeafLoc->Location.LineOffset,
903 getBaseDiscriminator(LeafLoc->Location.Discriminator),
904 FunctionId(CalleeName),
905 Count);
906 // Record head sample for called target(callee)
907 CalleeCallSite = LeafLoc->Location;
911 ContextTrieNode *CalleeNode =
912 CallerNode->getOrCreateChildContext(CalleeCallSite,
913 FunctionId(CalleeName));
914 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
915 CalleeProfile->addHeadSamples(Count);
919 void CSProfileGenerator::populateInferredFunctionSamples(
920 ContextTrieNode &Node) {
921 // There is no call jmp sample between the inliner and inlinee, we need to use
922 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
923 // sample depends on child(inlinee)'s sample, so traverse the tree in
924 // post-order.
925 for (auto &It : Node.getAllChildContext())
926 populateInferredFunctionSamples(It.second);
928 FunctionSamples *CalleeProfile = Node.getFunctionSamples();
929 if (!CalleeProfile)
930 return;
931 // If we already have head sample counts, we must have value profile
932 // for call sites added already. Skip to avoid double counting.
933 if (CalleeProfile->getHeadSamples())
934 return;
935 ContextTrieNode *CallerNode = Node.getParentContext();
936 // If we don't have context, nothing to do for caller's call site.
937 // This could happen for entry point function.
938 if (CallerNode == &getRootContext())
939 return;
941 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
942 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
943 // Since we don't have call count for inlined functions, we
944 // estimate it from inlinee's profile using entry body sample.
945 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
946 // If we don't have samples with location, use 1 to indicate live.
947 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
948 EstimatedCallCount = 1;
949 CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
950 CallerLeafFrameLoc.Discriminator,
951 Node.getFuncName(), EstimatedCallCount);
952 CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
953 CallerLeafFrameLoc.Discriminator,
954 EstimatedCallCount);
955 CallerProfile.addTotalSamples(EstimatedCallCount);
958 void CSProfileGenerator::convertToProfileMap(
959 ContextTrieNode &Node, SampleContextFrameVector &Context) {
960 FunctionSamples *FProfile = Node.getFunctionSamples();
961 if (FProfile) {
962 Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
963 // Save the new context for future references.
964 SampleContextFrames NewContext = *Contexts.insert(Context).first;
965 auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
966 FunctionSamples &NewProfile = Ret.first->second;
967 NewProfile.getContext().setContext(NewContext);
968 Context.pop_back();
971 for (auto &It : Node.getAllChildContext()) {
972 ContextTrieNode &ChildNode = It.second;
973 Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
974 convertToProfileMap(ChildNode, Context);
975 Context.pop_back();
979 void CSProfileGenerator::convertToProfileMap() {
980 assert(ProfileMap.empty() &&
981 "ProfileMap should be empty before converting from the trie");
982 assert(IsProfileValidOnTrie &&
983 "Do not convert the trie twice, it's already destroyed");
985 SampleContextFrameVector Context;
986 for (auto &It : getRootContext().getAllChildContext())
987 convertToProfileMap(It.second, Context);
989 IsProfileValidOnTrie = false;
992 void CSProfileGenerator::postProcessProfiles() {
993 // Compute hot/cold threshold based on profile. This will be used for cold
994 // context profile merging/trimming.
995 computeSummaryAndThreshold();
997 // Run global pre-inliner to adjust/merge context profile based on estimated
998 // inline decisions.
999 if (EnableCSPreInliner) {
1000 ContextTracker.populateFuncToCtxtMap();
1001 CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1002 // Turn off the profile merger by default unless it is explicitly enabled.
1003 if (!CSProfMergeColdContext.getNumOccurrences())
1004 CSProfMergeColdContext = false;
1007 convertToProfileMap();
1009 // Trim and merge cold context profile using cold threshold above.
1010 if (TrimColdProfile || CSProfMergeColdContext) {
1011 SampleContextTrimmer(ProfileMap)
1012 .trimAndMergeColdContextProfiles(
1013 HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1014 CSProfMaxColdContextDepth, EnableCSPreInliner);
1017 // Merge function samples of CS profile to calculate profile density.
1018 sampleprof::SampleProfileMap ContextLessProfiles;
1019 ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
1021 calculateAndShowDensity(ContextLessProfiles);
1022 if (GenCSNestedProfile) {
1023 ProfileConverter CSConverter(ProfileMap);
1024 CSConverter.convertCSProfiles();
1025 FunctionSamples::ProfileIsCS = false;
1029 void ProfileGeneratorBase::computeSummaryAndThreshold(
1030 SampleProfileMap &Profiles) {
1031 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1032 Summary = Builder.computeSummaryForProfiles(Profiles);
1033 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1034 (Summary->getDetailedSummary()));
1035 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1036 (Summary->getDetailedSummary()));
1039 void CSProfileGenerator::computeSummaryAndThreshold() {
1040 // Always merge and use context-less profile map to compute summary.
1041 SampleProfileMap ContextLessProfiles;
1042 ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1044 // Set the flag below to avoid merging the profile again in
1045 // computeSummaryAndThreshold
1046 FunctionSamples::ProfileIsCS = false;
1047 assert(
1048 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1049 "Don't set --profile-summary-contextless to false for profile "
1050 "generation");
1051 ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1052 // Recover the old value.
1053 FunctionSamples::ProfileIsCS = true;
1056 void ProfileGeneratorBase::extractProbesFromRange(
1057 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1058 bool FindDisjointRanges) {
1059 const RangeSample *PRanges = &RangeCounter;
1060 RangeSample Ranges;
1061 if (FindDisjointRanges) {
1062 findDisjointRanges(Ranges, RangeCounter);
1063 PRanges = &Ranges;
1066 for (const auto &Range : *PRanges) {
1067 uint64_t RangeBegin = Range.first.first;
1068 uint64_t RangeEnd = Range.first.second;
1069 uint64_t Count = Range.second;
1071 InstructionPointer IP(Binary, RangeBegin, true);
1072 // Disjoint ranges may have range in the middle of two instr,
1073 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1074 // can be Addr1+1 to Addr2-1. We should ignore such range.
1075 if (IP.Address > RangeEnd)
1076 continue;
1078 do {
1079 const AddressProbesMap &Address2ProbesMap =
1080 Binary->getAddress2ProbesMap();
1081 auto It = Address2ProbesMap.find(IP.Address);
1082 if (It != Address2ProbesMap.end()) {
1083 for (const auto &Probe : It->second) {
1084 ProbeCounter[&Probe] += Count;
1087 } while (IP.advance() && IP.Address <= RangeEnd);
1091 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1092 const SmallVectorImpl<uint64_t> &AddrVec,
1093 ProfiledBinary *Binary) {
1094 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1095 for (auto Address : reverse(AddrVec)) {
1096 const MCDecodedPseudoProbe *CallProbe =
1097 Binary->getCallProbeForAddr(Address);
1098 // These could be the cases when a probe is not found at a calliste. Cutting
1099 // off the context from here since the inliner will not know how to consume
1100 // a context with unknown callsites.
1101 // 1. for functions that are not sampled when
1102 // --decode-probe-for-profiled-functions-only is on.
1103 // 2. for a merged callsite. Callsite merging may cause the loss of original
1104 // probe IDs.
1105 // 3. for an external callsite.
1106 if (!CallProbe)
1107 break;
1108 Probes.push_back(CallProbe);
1111 std::reverse(Probes.begin(), Probes.end());
1113 // Extract context stack for reusing, leaf context stack will be added
1114 // compressed while looking up function profile.
1115 for (const auto *P : Probes) {
1116 Binary->getInlineContextForProbe(P, ContextStack, true);
1120 void CSProfileGenerator::generateProbeBasedProfile() {
1121 // Enable pseudo probe functionalities in SampleProf
1122 FunctionSamples::ProfileIsProbeBased = true;
1123 for (const auto &CI : *SampleCounters) {
1124 const AddrBasedCtxKey *CtxKey =
1125 dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1126 // Fill in function body samples from probes, also infer caller's samples
1127 // from callee's probe
1128 populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
1129 // Fill in boundary samples for a call probe
1130 populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
1134 void CSProfileGenerator::populateBodySamplesWithProbes(
1135 const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1136 ProbeCounterMap ProbeCounter;
1137 // Extract the top frame probes by looking up each address among the range in
1138 // the Address2ProbeMap
1139 extractProbesFromRange(RangeCounter, ProbeCounter);
1140 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1141 std::unordered_set<FunctionSamples *>>
1142 FrameSamples;
1143 for (const auto &PI : ProbeCounter) {
1144 const MCDecodedPseudoProbe *Probe = PI.first;
1145 uint64_t Count = PI.second;
1146 // Disjoint ranges have introduce zero-filled gap that
1147 // doesn't belong to current context, filter them out.
1148 if (!Probe->isBlock() || Count == 0)
1149 continue;
1151 ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
1152 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1153 // Record the current frame and FunctionProfile whenever samples are
1154 // collected for non-danglie probes. This is for reporting all of the
1155 // zero count probes of the frame later.
1156 FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1157 FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
1158 Count);
1159 FunctionProfile.addTotalSamples(Count);
1160 if (Probe->isEntry()) {
1161 FunctionProfile.addHeadSamples(Count);
1162 // Look up for the caller's function profile
1163 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1164 ContextTrieNode *CallerNode = ContextNode->getParentContext();
1165 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1166 // Since the context id will be compressed, we have to use callee's
1167 // context id to infer caller's context id to ensure they share the
1168 // same context prefix.
1169 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1170 uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
1171 assert(CallerIndex &&
1172 "Inferred caller's location index shouldn't be zero!");
1173 assert(!CallerDiscriminator &&
1174 "Callsite probe should not have a discriminator!");
1175 FunctionSamples &CallerProfile =
1176 *getOrCreateFunctionSamples(CallerNode);
1177 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1178 CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
1179 CallerProfile.addTotalSamples(Count);
1180 CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
1181 ContextNode->getFuncName(), Count);
1186 // Assign zero count for remaining probes without sample hits to
1187 // differentiate from probes optimized away, of which the counts are unknown
1188 // and will be inferred by the compiler.
1189 for (auto &I : FrameSamples) {
1190 for (auto *FunctionProfile : I.second) {
1191 for (auto *Probe : I.first->getProbes()) {
1192 FunctionProfile->addBodySamples(Probe->getIndex(),
1193 Probe->getDiscriminator(), 0);
1199 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1200 const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1201 for (const auto &BI : BranchCounter) {
1202 uint64_t SourceAddress = BI.first.first;
1203 uint64_t TargetAddress = BI.first.second;
1204 uint64_t Count = BI.second;
1205 const MCDecodedPseudoProbe *CallProbe =
1206 Binary->getCallProbeForAddr(SourceAddress);
1207 if (CallProbe == nullptr)
1208 continue;
1209 FunctionSamples &FunctionProfile =
1210 getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1211 FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1212 FunctionProfile.addTotalSamples(Count);
1213 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1214 if (CalleeName.size() == 0)
1215 continue;
1216 FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
1217 CallProbe->getDiscriminator(),
1218 FunctionId(CalleeName), Count);
1222 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1223 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1225 const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1226 SmallVector<uint64_t, 16> NewContext;
1228 if (InferMissingFrames) {
1229 SmallVector<uint64_t, 16> Context = CtxKey->Context;
1230 // Append leaf frame for a complete inference.
1231 Context.push_back(LeafProbe->getAddress());
1232 inferMissingFrames(Context, NewContext);
1233 // Pop out the leaf probe that was pushed in above.
1234 NewContext.pop_back();
1235 PContext = &NewContext;
1238 SampleContextFrameVector ContextStack;
1239 extractPrefixContextStack(ContextStack, *PContext, Binary);
1241 // Explicitly copy the context for appending the leaf context
1242 SampleContextFrameVector NewContextStack(ContextStack.begin(),
1243 ContextStack.end());
1244 Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1245 // For leaf inlined context with the top frame, we should strip off the top
1246 // frame's probe id, like:
1247 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1248 auto LeafFrame = NewContextStack.back();
1249 LeafFrame.Location = LineLocation(0, 0);
1250 NewContextStack.pop_back();
1251 // Compress the context string except for the leaf frame
1252 CSProfileGenerator::compressRecursionContext(NewContextStack);
1253 CSProfileGenerator::trimContext(NewContextStack);
1254 NewContextStack.push_back(LeafFrame);
1256 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1257 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1258 ContextTrieNode *ContextNode =
1259 getOrCreateContextNode(NewContextStack, WasLeafInlined);
1260 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1261 return ContextNode;
1264 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1265 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1266 return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1269 } // end namespace sampleprof
1270 } // end namespace llvm