1 //=-- SampleProf.cpp - Sample profiling format support --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ProfileData/SampleProf.h"
15 #include "llvm/Config/llvm-config.h"
16 #include "llvm/IR/DebugInfoMetadata.h"
17 #include "llvm/IR/PseudoProbe.h"
18 #include "llvm/ProfileData/SampleProfReader.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/LEB128.h"
25 #include "llvm/Support/ManagedStatic.h"
26 #include "llvm/Support/raw_ostream.h"
28 #include <system_error>
31 using namespace sampleprof
;
33 static cl::opt
<uint64_t> ProfileSymbolListCutOff(
34 "profile-symbol-list-cutoff", cl::Hidden
, cl::init(-1), cl::ZeroOrMore
,
35 cl::desc("Cutoff value about how many symbols in profile symbol list "
36 "will be used. This is very useful for performance debugging"));
38 cl::opt
<bool> GenerateMergedBaseProfiles(
39 "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore
,
40 cl::desc("When generating nested context-sensitive profiles, always "
41 "generate extra base profile for function with all its context "
42 "profiles merged into it."));
45 namespace sampleprof
{
46 SampleProfileFormat
FunctionSamples::Format
;
47 bool FunctionSamples::ProfileIsProbeBased
= false;
48 bool FunctionSamples::ProfileIsCSFlat
= false;
49 bool FunctionSamples::ProfileIsCSNested
= false;
50 bool FunctionSamples::UseMD5
= false;
51 bool FunctionSamples::HasUniqSuffix
= true;
52 bool FunctionSamples::ProfileIsFS
= false;
53 } // namespace sampleprof
58 // FIXME: This class is only here to support the transition to llvm::Error. It
59 // will be removed once this transition is complete. Clients should prefer to
60 // deal with the Error value directly, rather than converting to error_code.
61 class SampleProfErrorCategoryType
: public std::error_category
{
62 const char *name() const noexcept override
{ return "llvm.sampleprof"; }
64 std::string
message(int IE
) const override
{
65 sampleprof_error E
= static_cast<sampleprof_error
>(IE
);
67 case sampleprof_error::success
:
69 case sampleprof_error::bad_magic
:
70 return "Invalid sample profile data (bad magic)";
71 case sampleprof_error::unsupported_version
:
72 return "Unsupported sample profile format version";
73 case sampleprof_error::too_large
:
74 return "Too much profile data";
75 case sampleprof_error::truncated
:
76 return "Truncated profile data";
77 case sampleprof_error::malformed
:
78 return "Malformed sample profile data";
79 case sampleprof_error::unrecognized_format
:
80 return "Unrecognized sample profile encoding format";
81 case sampleprof_error::unsupported_writing_format
:
82 return "Profile encoding format unsupported for writing operations";
83 case sampleprof_error::truncated_name_table
:
84 return "Truncated function name table";
85 case sampleprof_error::not_implemented
:
86 return "Unimplemented feature";
87 case sampleprof_error::counter_overflow
:
88 return "Counter overflow";
89 case sampleprof_error::ostream_seek_unsupported
:
90 return "Ostream does not support seek";
91 case sampleprof_error::compress_failed
:
92 return "Compress failure";
93 case sampleprof_error::uncompress_failed
:
94 return "Uncompress failure";
95 case sampleprof_error::zlib_unavailable
:
96 return "Zlib is unavailable";
97 case sampleprof_error::hash_mismatch
:
98 return "Function hash mismatch";
100 llvm_unreachable("A value of sampleprof_error has no message.");
104 } // end anonymous namespace
106 static ManagedStatic
<SampleProfErrorCategoryType
> ErrorCategory
;
108 const std::error_category
&llvm::sampleprof_category() {
109 return *ErrorCategory
;
112 void LineLocation::print(raw_ostream
&OS
) const {
114 if (Discriminator
> 0)
115 OS
<< "." << Discriminator
;
118 raw_ostream
&llvm::sampleprof::operator<<(raw_ostream
&OS
,
119 const LineLocation
&Loc
) {
124 /// Merge the samples in \p Other into this record.
125 /// Optionally scale sample counts by \p Weight.
126 sampleprof_error
SampleRecord::merge(const SampleRecord
&Other
,
128 sampleprof_error Result
;
129 Result
= addSamples(Other
.getSamples(), Weight
);
130 for (const auto &I
: Other
.getCallTargets()) {
131 MergeResult(Result
, addCalledTarget(I
.first(), I
.second
, Weight
));
136 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
137 LLVM_DUMP_METHOD
void LineLocation::dump() const { print(dbgs()); }
140 /// Print the sample record to the stream \p OS indented by \p Indent.
141 void SampleRecord::print(raw_ostream
&OS
, unsigned Indent
) const {
145 for (const auto &I
: getSortedCallTargets())
146 OS
<< " " << I
.first
<< ":" << I
.second
;
151 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
152 LLVM_DUMP_METHOD
void SampleRecord::dump() const { print(dbgs(), 0); }
155 raw_ostream
&llvm::sampleprof::operator<<(raw_ostream
&OS
,
156 const SampleRecord
&Sample
) {
161 /// Print the samples collected for a function on stream \p OS.
162 void FunctionSamples::print(raw_ostream
&OS
, unsigned Indent
) const {
163 if (getFunctionHash())
164 OS
<< "CFG checksum " << getFunctionHash() << "\n";
166 OS
<< TotalSamples
<< ", " << TotalHeadSamples
<< ", " << BodySamples
.size()
167 << " sampled lines\n";
170 if (!BodySamples
.empty()) {
171 OS
<< "Samples collected in the function's body {\n";
172 SampleSorter
<LineLocation
, SampleRecord
> SortedBodySamples(BodySamples
);
173 for (const auto &SI
: SortedBodySamples
.get()) {
174 OS
.indent(Indent
+ 2);
175 OS
<< SI
->first
<< ": " << SI
->second
;
180 OS
<< "No samples collected in the function's body\n";
184 if (!CallsiteSamples
.empty()) {
185 OS
<< "Samples collected in inlined callsites {\n";
186 SampleSorter
<LineLocation
, FunctionSamplesMap
> SortedCallsiteSamples(
188 for (const auto &CS
: SortedCallsiteSamples
.get()) {
189 for (const auto &FS
: CS
->second
) {
190 OS
.indent(Indent
+ 2);
191 OS
<< CS
->first
<< ": inlined callee: " << FS
.second
.getName() << ": ";
192 FS
.second
.print(OS
, Indent
+ 4);
198 OS
<< "No inlined callsites in this function\n";
202 raw_ostream
&llvm::sampleprof::operator<<(raw_ostream
&OS
,
203 const FunctionSamples
&FS
) {
208 void sampleprof::sortFuncProfiles(
209 const SampleProfileMap
&ProfileMap
,
210 std::vector
<NameFunctionSamples
> &SortedProfiles
) {
211 for (const auto &I
: ProfileMap
) {
212 assert(I
.first
== I
.second
.getContext() && "Inconsistent profile map");
213 SortedProfiles
.push_back(std::make_pair(I
.second
.getContext(), &I
.second
));
215 llvm::stable_sort(SortedProfiles
, [](const NameFunctionSamples
&A
,
216 const NameFunctionSamples
&B
) {
217 if (A
.second
->getTotalSamples() == B
.second
->getTotalSamples())
218 return A
.first
< B
.first
;
219 return A
.second
->getTotalSamples() > B
.second
->getTotalSamples();
223 unsigned FunctionSamples::getOffset(const DILocation
*DIL
) {
224 return (DIL
->getLine() - DIL
->getScope()->getSubprogram()->getLine()) &
228 LineLocation
FunctionSamples::getCallSiteIdentifier(const DILocation
*DIL
,
230 if (FunctionSamples::ProfileIsProbeBased
) {
231 // In a pseudo-probe based profile, a callsite is simply represented by the
232 // ID of the probe associated with the call instruction. The probe ID is
233 // encoded in the Discriminator field of the call instruction's debug
235 return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex(
236 DIL
->getDiscriminator()),
239 unsigned Discriminator
=
240 ProfileIsFS
? DIL
->getDiscriminator() : DIL
->getBaseDiscriminator();
241 return LineLocation(FunctionSamples::getOffset(DIL
), Discriminator
);
245 uint64_t FunctionSamples::getCallSiteHash(StringRef CalleeName
,
246 const LineLocation
&Callsite
) {
247 uint64_t NameHash
= std::hash
<std::string
>{}(CalleeName
.str());
249 (((uint64_t)Callsite
.LineOffset
) << 32) | Callsite
.Discriminator
;
250 return NameHash
+ (LocId
<< 5) + LocId
;
253 const FunctionSamples
*FunctionSamples::findFunctionSamples(
254 const DILocation
*DIL
, SampleProfileReaderItaniumRemapper
*Remapper
) const {
256 SmallVector
<std::pair
<LineLocation
, StringRef
>, 10> S
;
258 const DILocation
*PrevDIL
= DIL
;
259 for (DIL
= DIL
->getInlinedAt(); DIL
; DIL
= DIL
->getInlinedAt()) {
260 // Use C++ linkage name if possible.
261 StringRef Name
= PrevDIL
->getScope()->getSubprogram()->getLinkageName();
263 Name
= PrevDIL
->getScope()->getSubprogram()->getName();
264 S
.emplace_back(FunctionSamples::getCallSiteIdentifier(
265 DIL
, FunctionSamples::ProfileIsFS
),
272 const FunctionSamples
*FS
= this;
273 for (int i
= S
.size() - 1; i
>= 0 && FS
!= nullptr; i
--) {
274 FS
= FS
->findFunctionSamplesAt(S
[i
].first
, S
[i
].second
, Remapper
);
279 void FunctionSamples::findAllNames(DenseSet
<StringRef
> &NameSet
) const {
280 NameSet
.insert(getName());
281 for (const auto &BS
: BodySamples
)
282 for (const auto &TS
: BS
.second
.getCallTargets())
283 NameSet
.insert(TS
.getKey());
285 for (const auto &CS
: CallsiteSamples
) {
286 for (const auto &NameFS
: CS
.second
) {
287 NameSet
.insert(NameFS
.first
);
288 NameFS
.second
.findAllNames(NameSet
);
293 const FunctionSamples
*FunctionSamples::findFunctionSamplesAt(
294 const LineLocation
&Loc
, StringRef CalleeName
,
295 SampleProfileReaderItaniumRemapper
*Remapper
) const {
296 CalleeName
= getCanonicalFnName(CalleeName
);
298 std::string CalleeGUID
;
299 CalleeName
= getRepInFormat(CalleeName
, UseMD5
, CalleeGUID
);
301 auto iter
= CallsiteSamples
.find(Loc
);
302 if (iter
== CallsiteSamples
.end())
304 auto FS
= iter
->second
.find(CalleeName
);
305 if (FS
!= iter
->second
.end())
308 if (auto NameInProfile
= Remapper
->lookUpNameInProfile(CalleeName
)) {
309 auto FS
= iter
->second
.find(*NameInProfile
);
310 if (FS
!= iter
->second
.end())
314 // If we cannot find exact match of the callee name, return the FS with
315 // the max total count. Only do this when CalleeName is not provided,
316 // i.e., only for indirect calls.
317 if (!CalleeName
.empty())
319 uint64_t MaxTotalSamples
= 0;
320 const FunctionSamples
*R
= nullptr;
321 for (const auto &NameFS
: iter
->second
)
322 if (NameFS
.second
.getTotalSamples() >= MaxTotalSamples
) {
323 MaxTotalSamples
= NameFS
.second
.getTotalSamples();
329 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
330 LLVM_DUMP_METHOD
void FunctionSamples::dump() const { print(dbgs(), 0); }
333 std::error_code
ProfileSymbolList::read(const uint8_t *Data
,
335 const char *ListStart
= reinterpret_cast<const char *>(Data
);
338 while (Size
< ListSize
&& StrNum
< ProfileSymbolListCutOff
) {
339 StringRef
Str(ListStart
+ Size
);
341 Size
+= Str
.size() + 1;
344 if (Size
!= ListSize
&& StrNum
!= ProfileSymbolListCutOff
)
345 return sampleprof_error::malformed
;
346 return sampleprof_error::success
;
349 void SampleContextTrimmer::trimAndMergeColdContextProfiles(
350 uint64_t ColdCountThreshold
, bool TrimColdContext
, bool MergeColdContext
,
351 uint32_t ColdContextFrameLength
, bool TrimBaseProfileOnly
) {
352 if (!TrimColdContext
&& !MergeColdContext
)
355 // Nothing to merge if sample threshold is zero
356 if (ColdCountThreshold
== 0)
359 // Trimming base profiles only is mainly to honor the preinliner decsion. When
360 // MergeColdContext is true preinliner decsion is not honored anyway so turn
361 // off TrimBaseProfileOnly.
362 if (MergeColdContext
)
363 TrimBaseProfileOnly
= false;
365 // Filter the cold profiles from ProfileMap and move them into a tmp
367 std::vector
<std::pair
<SampleContext
, const FunctionSamples
*>> ColdProfiles
;
368 for (const auto &I
: ProfileMap
) {
369 const SampleContext
&Context
= I
.first
;
370 const FunctionSamples
&FunctionProfile
= I
.second
;
371 if (FunctionProfile
.getTotalSamples() < ColdCountThreshold
&&
372 (!TrimBaseProfileOnly
|| Context
.isBaseContext()))
373 ColdProfiles
.emplace_back(Context
, &I
.second
);
376 // Remove the cold profile from ProfileMap and merge them into
377 // MergedProfileMap by the last K frames of context
378 SampleProfileMap MergedProfileMap
;
379 for (const auto &I
: ColdProfiles
) {
380 if (MergeColdContext
) {
381 auto MergedContext
= I
.second
->getContext().getContextFrames();
382 if (ColdContextFrameLength
< MergedContext
.size())
383 MergedContext
= MergedContext
.take_back(ColdContextFrameLength
);
384 auto Ret
= MergedProfileMap
.emplace(MergedContext
, FunctionSamples());
385 FunctionSamples
&MergedProfile
= Ret
.first
->second
;
386 MergedProfile
.merge(*I
.second
);
388 ProfileMap
.erase(I
.first
);
391 // Move the merged profiles into ProfileMap;
392 for (const auto &I
: MergedProfileMap
) {
393 // Filter the cold merged profile
394 if (TrimColdContext
&& I
.second
.getTotalSamples() < ColdCountThreshold
&&
395 ProfileMap
.find(I
.first
) == ProfileMap
.end())
397 // Merge the profile if the original profile exists, otherwise just insert
399 auto Ret
= ProfileMap
.emplace(I
.first
, FunctionSamples());
401 SampleContext
FContext(Ret
.first
->first
, RawContext
);
402 FunctionSamples
&FProfile
= Ret
.first
->second
;
403 FProfile
.setContext(FContext
);
405 FunctionSamples
&OrigProfile
= Ret
.first
->second
;
406 OrigProfile
.merge(I
.second
);
410 void SampleContextTrimmer::canonicalizeContextProfiles() {
411 std::vector
<SampleContext
> ProfilesToBeRemoved
;
412 SampleProfileMap ProfilesToBeAdded
;
413 for (auto &I
: ProfileMap
) {
414 FunctionSamples
&FProfile
= I
.second
;
415 SampleContext
&Context
= FProfile
.getContext();
416 if (I
.first
== Context
)
419 // Use the context string from FunctionSamples to update the keys of
420 // ProfileMap. They can get out of sync after context profile promotion
421 // through pre-inliner.
422 // Duplicate the function profile for later insertion to avoid a conflict
423 // caused by a context both to be add and to be removed. This could happen
424 // when a context is promoted to another context which is also promoted to
425 // the third context. For example, given an original context A @ B @ C that
426 // is promoted to B @ C and the original context B @ C which is promoted to
427 // just C, adding B @ C to the profile map while removing same context (but
428 // with different profiles) from the map can cause a conflict if they are
429 // not handled in a right order. This can be solved by just caching the
430 // profiles to be added.
431 auto Ret
= ProfilesToBeAdded
.emplace(Context
, FProfile
);
433 assert(Ret
.second
&& "Context conflict during canonicalization");
434 ProfilesToBeRemoved
.push_back(I
.first
);
437 for (auto &I
: ProfilesToBeRemoved
) {
441 for (auto &I
: ProfilesToBeAdded
) {
442 ProfileMap
.emplace(I
.first
, I
.second
);
446 std::error_code
ProfileSymbolList::write(raw_ostream
&OS
) {
447 // Sort the symbols before output. If doing compression.
448 // It will make the compression much more effective.
449 std::vector
<StringRef
> SortedList(Syms
.begin(), Syms
.end());
450 llvm::sort(SortedList
);
452 std::string OutputString
;
453 for (auto &Sym
: SortedList
) {
454 OutputString
.append(Sym
.str());
455 OutputString
.append(1, '\0');
459 return sampleprof_error::success
;
462 void ProfileSymbolList::dump(raw_ostream
&OS
) const {
463 OS
<< "======== Dump profile symbol list ========\n";
464 std::vector
<StringRef
> SortedList(Syms
.begin(), Syms
.end());
465 llvm::sort(SortedList
);
467 for (auto &Sym
: SortedList
)
471 CSProfileConverter::FrameNode
*
472 CSProfileConverter::FrameNode::getOrCreateChildFrame(
473 const LineLocation
&CallSite
, StringRef CalleeName
) {
474 uint64_t Hash
= FunctionSamples::getCallSiteHash(CalleeName
, CallSite
);
475 auto It
= AllChildFrames
.find(Hash
);
476 if (It
!= AllChildFrames
.end()) {
477 assert(It
->second
.FuncName
== CalleeName
&&
478 "Hash collision for child context node");
482 AllChildFrames
[Hash
] = FrameNode(CalleeName
, nullptr, CallSite
);
483 return &AllChildFrames
[Hash
];
486 CSProfileConverter::CSProfileConverter(SampleProfileMap
&Profiles
)
487 : ProfileMap(Profiles
) {
488 for (auto &FuncSample
: Profiles
) {
489 FunctionSamples
*FSamples
= &FuncSample
.second
;
490 auto *NewNode
= getOrCreateContextPath(FSamples
->getContext());
491 assert(!NewNode
->FuncSamples
&& "New node cannot have sample profile");
492 NewNode
->FuncSamples
= FSamples
;
496 CSProfileConverter::FrameNode
*
497 CSProfileConverter::getOrCreateContextPath(const SampleContext
&Context
) {
498 auto Node
= &RootFrame
;
499 LineLocation
CallSiteLoc(0, 0);
500 for (auto &Callsite
: Context
.getContextFrames()) {
501 Node
= Node
->getOrCreateChildFrame(CallSiteLoc
, Callsite
.FuncName
);
502 CallSiteLoc
= Callsite
.Location
;
507 void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode
&Node
) {
508 // Process each child profile. Add each child profile to callsite profile map
509 // of the current node `Node` if `Node` comes with a profile. Otherwise
510 // promote the child profile to a standalone profile.
511 auto *NodeProfile
= Node
.FuncSamples
;
512 for (auto &It
: Node
.AllChildFrames
) {
513 auto &ChildNode
= It
.second
;
514 convertProfiles(ChildNode
);
515 auto *ChildProfile
= ChildNode
.FuncSamples
;
518 SampleContext OrigChildContext
= ChildProfile
->getContext();
519 // Reset the child context to be contextless.
520 ChildProfile
->getContext().setName(OrigChildContext
.getName());
522 // Add child profile to the callsite profile map.
523 auto &SamplesMap
= NodeProfile
->functionSamplesAt(ChildNode
.CallSiteLoc
);
524 SamplesMap
.emplace(OrigChildContext
.getName().str(), *ChildProfile
);
525 NodeProfile
->addTotalSamples(ChildProfile
->getTotalSamples());
528 // Separate child profile to be a standalone profile, if the current parent
529 // profile doesn't exist. This is a duplicating operation when the child
530 // profile is already incorporated into the parent which is still useful and
531 // thus done optionally. It is seen that duplicating context profiles into
532 // base profiles improves the code quality for thinlto build by allowing a
533 // profile in the prelink phase for to-be-fully-inlined functions.
534 if (!NodeProfile
|| GenerateMergedBaseProfiles
)
535 ProfileMap
[ChildProfile
->getContext()].merge(*ChildProfile
);
537 // Contexts coming with a `ContextShouldBeInlined` attribute indicate this
538 // is a preinliner-computed profile.
539 if (OrigChildContext
.hasAttribute(ContextShouldBeInlined
))
540 FunctionSamples::ProfileIsCSNested
= true;
542 // Remove the original child profile.
543 ProfileMap
.erase(OrigChildContext
);
547 void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame
); }