1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
41 const std::error_category
&sampleprof_category();
43 enum class sampleprof_error
{
51 unsupported_writing_format
,
55 ostream_seek_unsupported
,
61 inline std::error_code
make_error_code(sampleprof_error E
) {
62 return std::error_code(static_cast<int>(E
), sampleprof_category());
65 inline sampleprof_error
MergeResult(sampleprof_error
&Accumulator
,
66 sampleprof_error Result
) {
67 // Prefer first error encountered as later errors may be secondary effects of
68 // the initial problem.
69 if (Accumulator
== sampleprof_error::success
&&
70 Result
!= sampleprof_error::success
)
75 } // end namespace llvm
80 struct is_error_code_enum
<llvm::sampleprof_error
> : std::true_type
{};
82 } // end namespace std
85 namespace sampleprof
{
87 enum SampleProfileFormat
{
90 SPF_Compact_Binary
= 0x2,
96 static inline uint64_t SPMagic(SampleProfileFormat Format
= SPF_Binary
) {
97 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100 uint64_t('2') << (64 - 56) | uint64_t(Format
);
103 // Get the proper representation of a string in the input Format.
104 static inline StringRef
getRepInFormat(StringRef Name
,
105 SampleProfileFormat Format
,
106 std::string
&GUIDBuf
) {
109 GUIDBuf
= std::to_string(Function::getGUID(Name
));
110 return (Format
== SPF_Compact_Binary
) ? StringRef(GUIDBuf
) : Name
;
113 static inline uint64_t SPVersion() { return 103; }
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
122 SecProfileSymbolList
= 3,
123 SecFuncOffsetTable
= 4,
124 // marker for the first type of profile.
125 SecFuncProfileFirst
= 32,
126 SecLBRProfile
= SecFuncProfileFirst
129 static inline std::string
getSecName(SecType Type
) {
132 return "InvalidSection";
134 return "ProfileSummarySection";
136 return "NameTableSection";
137 case SecProfileSymbolList
:
138 return "ProfileSymbolListSection";
139 case SecFuncOffsetTable
:
140 return "FuncOffsetTableSection";
142 return "LBRProfileSection";
144 llvm_unreachable("A SecType has no name for output");
147 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
148 // and SampleProfileExtBinaryBaseWriter.
149 struct SecHdrTableEntry
{
156 enum SecFlags
{ SecFlagInValid
= 0, SecFlagCompress
= (1 << 0) };
158 static inline void addSecFlags(SecHdrTableEntry
&Entry
, uint64_t Flags
) {
159 Entry
.Flags
|= Flags
;
162 static inline void removeSecFlags(SecHdrTableEntry
&Entry
, uint64_t Flags
) {
163 Entry
.Flags
&= ~Flags
;
166 static inline bool hasSecFlag(SecHdrTableEntry
&Entry
, SecFlags Flag
) {
167 return Entry
.Flags
& Flag
;
170 /// Represents the relative location of an instruction.
172 /// Instruction locations are specified by the line offset from the
173 /// beginning of the function (marked by the line where the function
174 /// header is) and the discriminator value within that line.
176 /// The discriminator value is useful to distinguish instructions
177 /// that are on the same line but belong to different basic blocks
178 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
179 struct LineLocation
{
180 LineLocation(uint32_t L
, uint32_t D
) : LineOffset(L
), Discriminator(D
) {}
182 void print(raw_ostream
&OS
) const;
185 bool operator<(const LineLocation
&O
) const {
186 return LineOffset
< O
.LineOffset
||
187 (LineOffset
== O
.LineOffset
&& Discriminator
< O
.Discriminator
);
191 uint32_t Discriminator
;
194 raw_ostream
&operator<<(raw_ostream
&OS
, const LineLocation
&Loc
);
196 /// Representation of a single sample record.
198 /// A sample record is represented by a positive integer value, which
199 /// indicates how frequently was the associated line location executed.
201 /// Additionally, if the associated location contains a function call,
202 /// the record will hold a list of all the possible called targets. For
203 /// direct calls, this will be the exact function being invoked. For
204 /// indirect calls (function pointers, virtual table dispatch), this
205 /// will be a list of one or more functions.
208 using CallTarget
= std::pair
<StringRef
, uint64_t>;
209 struct CallTargetComparator
{
210 bool operator()(const CallTarget
&LHS
, const CallTarget
&RHS
) const {
211 if (LHS
.second
!= RHS
.second
)
212 return LHS
.second
> RHS
.second
;
214 return LHS
.first
< RHS
.first
;
218 using SortedCallTargetSet
= std::set
<CallTarget
, CallTargetComparator
>;
219 using CallTargetMap
= StringMap
<uint64_t>;
220 SampleRecord() = default;
222 /// Increment the number of samples for this record by \p S.
223 /// Optionally scale sample count \p S by \p Weight.
225 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
226 /// around unsigned integers.
227 sampleprof_error
addSamples(uint64_t S
, uint64_t Weight
= 1) {
229 NumSamples
= SaturatingMultiplyAdd(S
, Weight
, NumSamples
, &Overflowed
);
230 return Overflowed
? sampleprof_error::counter_overflow
231 : sampleprof_error::success
;
234 /// Add called function \p F with samples \p S.
235 /// Optionally scale sample count \p S by \p Weight.
237 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
238 /// around unsigned integers.
239 sampleprof_error
addCalledTarget(StringRef F
, uint64_t S
,
240 uint64_t Weight
= 1) {
241 uint64_t &TargetSamples
= CallTargets
[F
];
244 SaturatingMultiplyAdd(S
, Weight
, TargetSamples
, &Overflowed
);
245 return Overflowed
? sampleprof_error::counter_overflow
246 : sampleprof_error::success
;
249 /// Return true if this sample record contains function calls.
250 bool hasCalls() const { return !CallTargets
.empty(); }
252 uint64_t getSamples() const { return NumSamples
; }
253 const CallTargetMap
&getCallTargets() const { return CallTargets
; }
254 const SortedCallTargetSet
getSortedCallTargets() const {
255 return SortCallTargets(CallTargets
);
258 /// Sort call targets in descending order of call frequency.
259 static const SortedCallTargetSet
SortCallTargets(const CallTargetMap
&Targets
) {
260 SortedCallTargetSet SortedTargets
;
261 for (const auto &I
: Targets
) {
262 SortedTargets
.emplace(I
.first(), I
.second
);
264 return SortedTargets
;
267 /// Merge the samples in \p Other into this record.
268 /// Optionally scale sample counts by \p Weight.
269 sampleprof_error
merge(const SampleRecord
&Other
, uint64_t Weight
= 1) {
270 sampleprof_error Result
= addSamples(Other
.getSamples(), Weight
);
271 for (const auto &I
: Other
.getCallTargets()) {
272 MergeResult(Result
, addCalledTarget(I
.first(), I
.second
, Weight
));
277 void print(raw_ostream
&OS
, unsigned Indent
) const;
281 uint64_t NumSamples
= 0;
282 CallTargetMap CallTargets
;
285 raw_ostream
&operator<<(raw_ostream
&OS
, const SampleRecord
&Sample
);
287 class FunctionSamples
;
289 using BodySampleMap
= std::map
<LineLocation
, SampleRecord
>;
290 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
291 // memory, which is *very* significant for large profiles.
292 using FunctionSamplesMap
= std::map
<std::string
, FunctionSamples
, std::less
<>>;
293 using CallsiteSampleMap
= std::map
<LineLocation
, FunctionSamplesMap
>;
295 /// Representation of the samples collected for a function.
297 /// This data structure contains all the collected samples for the body
298 /// of a function. Each sample corresponds to a LineLocation instance
299 /// within the body of the function.
300 class FunctionSamples
{
302 FunctionSamples() = default;
304 void print(raw_ostream
&OS
= dbgs(), unsigned Indent
= 0) const;
307 sampleprof_error
addTotalSamples(uint64_t Num
, uint64_t Weight
= 1) {
310 SaturatingMultiplyAdd(Num
, Weight
, TotalSamples
, &Overflowed
);
311 return Overflowed
? sampleprof_error::counter_overflow
312 : sampleprof_error::success
;
315 sampleprof_error
addHeadSamples(uint64_t Num
, uint64_t Weight
= 1) {
318 SaturatingMultiplyAdd(Num
, Weight
, TotalHeadSamples
, &Overflowed
);
319 return Overflowed
? sampleprof_error::counter_overflow
320 : sampleprof_error::success
;
323 sampleprof_error
addBodySamples(uint32_t LineOffset
, uint32_t Discriminator
,
324 uint64_t Num
, uint64_t Weight
= 1) {
325 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addSamples(
329 sampleprof_error
addCalledTargetSamples(uint32_t LineOffset
,
330 uint32_t Discriminator
,
331 StringRef FName
, uint64_t Num
,
332 uint64_t Weight
= 1) {
333 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addCalledTarget(
337 /// Return the number of samples collected at the given location.
338 /// Each location is specified by \p LineOffset and \p Discriminator.
339 /// If the location is not found in profile, return error.
340 ErrorOr
<uint64_t> findSamplesAt(uint32_t LineOffset
,
341 uint32_t Discriminator
) const {
342 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
343 if (ret
== BodySamples
.end())
344 return std::error_code();
346 return ret
->second
.getSamples();
349 /// Returns the call target map collected at a given location.
350 /// Each location is specified by \p LineOffset and \p Discriminator.
351 /// If the location is not found in profile, return error.
352 ErrorOr
<SampleRecord::CallTargetMap
>
353 findCallTargetMapAt(uint32_t LineOffset
, uint32_t Discriminator
) const {
354 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
355 if (ret
== BodySamples
.end())
356 return std::error_code();
357 return ret
->second
.getCallTargets();
360 /// Return the function samples at the given callsite location.
361 FunctionSamplesMap
&functionSamplesAt(const LineLocation
&Loc
) {
362 return CallsiteSamples
[Loc
];
365 /// Returns the FunctionSamplesMap at the given \p Loc.
366 const FunctionSamplesMap
*
367 findFunctionSamplesMapAt(const LineLocation
&Loc
) const {
368 auto iter
= CallsiteSamples
.find(Loc
);
369 if (iter
== CallsiteSamples
.end())
371 return &iter
->second
;
374 /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
375 /// with callee \p CalleeName. If no callsite can be found, relax the
376 /// restriction to return the FunctionSamples at callsite location \p Loc
377 /// with the maximum total sample count.
378 const FunctionSamples
*findFunctionSamplesAt(const LineLocation
&Loc
,
379 StringRef CalleeName
) const {
380 std::string CalleeGUID
;
381 CalleeName
= getRepInFormat(CalleeName
, Format
, CalleeGUID
);
383 auto iter
= CallsiteSamples
.find(Loc
);
384 if (iter
== CallsiteSamples
.end())
386 auto FS
= iter
->second
.find(CalleeName
);
387 if (FS
!= iter
->second
.end())
389 // If we cannot find exact match of the callee name, return the FS with
390 // the max total count.
391 uint64_t MaxTotalSamples
= 0;
392 const FunctionSamples
*R
= nullptr;
393 for (const auto &NameFS
: iter
->second
)
394 if (NameFS
.second
.getTotalSamples() >= MaxTotalSamples
) {
395 MaxTotalSamples
= NameFS
.second
.getTotalSamples();
401 bool empty() const { return TotalSamples
== 0; }
403 /// Return the total number of samples collected inside the function.
404 uint64_t getTotalSamples() const { return TotalSamples
; }
406 /// Return the total number of branch samples that have the function as the
407 /// branch target. This should be equivalent to the sample of the first
408 /// instruction of the symbol. But as we directly get this info for raw
409 /// profile without referring to potentially inaccurate debug info, this
410 /// gives more accurate profile data and is preferred for standalone symbols.
411 uint64_t getHeadSamples() const { return TotalHeadSamples
; }
413 /// Return the sample count of the first instruction of the function.
414 /// The function can be either a standalone symbol or an inlined function.
415 uint64_t getEntrySamples() const {
416 // Use either BodySamples or CallsiteSamples which ever has the smaller
418 if (!BodySamples
.empty() &&
419 (CallsiteSamples
.empty() ||
420 BodySamples
.begin()->first
< CallsiteSamples
.begin()->first
))
421 return BodySamples
.begin()->second
.getSamples();
422 if (!CallsiteSamples
.empty()) {
424 // An indirect callsite may be promoted to several inlined direct calls.
425 // We need to get the sum of them.
426 for (const auto &N_FS
: CallsiteSamples
.begin()->second
)
427 T
+= N_FS
.second
.getEntrySamples();
433 /// Return all the samples collected in the body of the function.
434 const BodySampleMap
&getBodySamples() const { return BodySamples
; }
436 /// Return all the callsite samples collected in the body of the function.
437 const CallsiteSampleMap
&getCallsiteSamples() const {
438 return CallsiteSamples
;
441 /// Merge the samples in \p Other into this one.
442 /// Optionally scale samples by \p Weight.
443 sampleprof_error
merge(const FunctionSamples
&Other
, uint64_t Weight
= 1) {
444 sampleprof_error Result
= sampleprof_error::success
;
445 Name
= Other
.getName();
446 MergeResult(Result
, addTotalSamples(Other
.getTotalSamples(), Weight
));
447 MergeResult(Result
, addHeadSamples(Other
.getHeadSamples(), Weight
));
448 for (const auto &I
: Other
.getBodySamples()) {
449 const LineLocation
&Loc
= I
.first
;
450 const SampleRecord
&Rec
= I
.second
;
451 MergeResult(Result
, BodySamples
[Loc
].merge(Rec
, Weight
));
453 for (const auto &I
: Other
.getCallsiteSamples()) {
454 const LineLocation
&Loc
= I
.first
;
455 FunctionSamplesMap
&FSMap
= functionSamplesAt(Loc
);
456 for (const auto &Rec
: I
.second
)
457 MergeResult(Result
, FSMap
[Rec
.first
].merge(Rec
.second
, Weight
));
462 /// Recursively traverses all children, if the total sample count of the
463 /// corresponding function is no less than \p Threshold, add its corresponding
464 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
466 void findInlinedFunctions(DenseSet
<GlobalValue::GUID
> &S
, const Module
*M
,
467 uint64_t Threshold
) const {
468 if (TotalSamples
<= Threshold
)
470 S
.insert(getGUID(Name
));
471 // Import hot CallTargets, which may not be available in IR because full
472 // profile annotation cannot be done until backend compilation in ThinLTO.
473 for (const auto &BS
: BodySamples
)
474 for (const auto &TS
: BS
.second
.getCallTargets())
475 if (TS
.getValue() > Threshold
) {
476 const Function
*Callee
=
477 M
->getFunction(getNameInModule(TS
.getKey(), M
));
478 if (!Callee
|| !Callee
->getSubprogram())
479 S
.insert(getGUID(TS
.getKey()));
481 for (const auto &CS
: CallsiteSamples
)
482 for (const auto &NameFS
: CS
.second
)
483 NameFS
.second
.findInlinedFunctions(S
, M
, Threshold
);
486 /// Set the name of the function.
487 void setName(StringRef FunctionName
) { Name
= FunctionName
; }
489 /// Return the function name.
490 StringRef
getName() const { return Name
; }
492 /// Return the original function name if it exists in Module \p M.
493 StringRef
getFuncNameInModule(const Module
*M
) const {
494 return getNameInModule(Name
, M
);
497 /// Return the canonical name for a function, taking into account
498 /// suffix elision policy attributes.
499 static StringRef
getCanonicalFnName(const Function
&F
) {
500 static const char *knownSuffixes
[] = { ".llvm.", ".part." };
501 auto AttrName
= "sample-profile-suffix-elision-policy";
502 auto Attr
= F
.getFnAttribute(AttrName
).getValueAsString();
503 if (Attr
== "" || Attr
== "all") {
504 return F
.getName().split('.').first
;
505 } else if (Attr
== "selected") {
506 StringRef
Cand(F
.getName());
507 for (const auto &Suf
: knownSuffixes
) {
508 StringRef
Suffix(Suf
);
509 auto It
= Cand
.rfind(Suffix
);
510 if (It
== StringRef::npos
)
512 auto Dit
= Cand
.rfind('.');
513 if (Dit
== It
+ Suffix
.size() - 1)
514 Cand
= Cand
.substr(0, It
);
517 } else if (Attr
== "none") {
520 assert(false && "internal error: unknown suffix elision policy");
525 /// Translate \p Name into its original name in Module.
526 /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
527 /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
528 /// is actually GUID of the original function name. getNameInModule will
529 /// translate \p Name in current FunctionSamples into its original name.
530 /// If the original name doesn't exist in \p M, return empty StringRef.
531 StringRef
getNameInModule(StringRef Name
, const Module
*M
) const {
532 if (Format
!= SPF_Compact_Binary
)
535 assert(GUIDToFuncNameMap
&& "GUIDToFuncNameMap needs to be popluated first");
536 auto iter
= GUIDToFuncNameMap
->find(std::stoull(Name
.data()));
537 if (iter
== GUIDToFuncNameMap
->end())
542 /// Returns the line offset to the start line of the subprogram.
543 /// We assume that a single function will not exceed 65535 LOC.
544 static unsigned getOffset(const DILocation
*DIL
);
546 /// Get the FunctionSamples of the inline instance where DIL originates
549 /// The FunctionSamples of the instruction (Machine or IR) associated to
550 /// \p DIL is the inlined instance in which that instruction is coming from.
551 /// We traverse the inline stack of that instruction, and match it with the
552 /// tree nodes in the profile.
554 /// \returns the FunctionSamples pointer to the inlined instance.
555 const FunctionSamples
*findFunctionSamples(const DILocation
*DIL
) const;
557 static SampleProfileFormat Format
;
559 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
560 /// all the function symbols defined or declared in current module.
561 DenseMap
<uint64_t, StringRef
> *GUIDToFuncNameMap
= nullptr;
563 // Assume the input \p Name is a name coming from FunctionSamples itself.
564 // If the format is SPF_Compact_Binary, the name is already a GUID and we
565 // don't want to return the GUID of GUID.
566 static uint64_t getGUID(StringRef Name
) {
567 return (Format
== SPF_Compact_Binary
) ? std::stoull(Name
.data())
568 : Function::getGUID(Name
);
572 /// Mangled name of the function.
575 /// Total number of samples collected inside this function.
577 /// Samples are cumulative, they include all the samples collected
578 /// inside this function and all its inlined callees.
579 uint64_t TotalSamples
= 0;
581 /// Total number of samples collected at the head of the function.
582 /// This is an approximation of the number of calls made to this function
584 uint64_t TotalHeadSamples
= 0;
586 /// Map instruction locations to collected samples.
588 /// Each entry in this map contains the number of samples
589 /// collected at the corresponding line offset. All line locations
590 /// are an offset from the start of the function.
591 BodySampleMap BodySamples
;
593 /// Map call sites to collected samples for the called function.
595 /// Each entry in this map corresponds to all the samples
596 /// collected for the inlined function call at the given
597 /// location. For example, given:
605 /// If the bar() and baz() calls were inlined inside foo(), this
606 /// map will contain two entries. One for all the samples collected
607 /// in the call to bar() at line offset 1, the other for all the samples
608 /// collected in the call to baz() at line offset 8.
609 CallsiteSampleMap CallsiteSamples
;
612 raw_ostream
&operator<<(raw_ostream
&OS
, const FunctionSamples
&FS
);
614 /// Sort a LocationT->SampleT map by LocationT.
616 /// It produces a sorted list of <LocationT, SampleT> records by ascending
617 /// order of LocationT.
618 template <class LocationT
, class SampleT
> class SampleSorter
{
620 using SamplesWithLoc
= std::pair
<const LocationT
, SampleT
>;
621 using SamplesWithLocList
= SmallVector
<const SamplesWithLoc
*, 20>;
623 SampleSorter(const std::map
<LocationT
, SampleT
> &Samples
) {
624 for (const auto &I
: Samples
)
626 llvm::stable_sort(V
, [](const SamplesWithLoc
*A
, const SamplesWithLoc
*B
) {
627 return A
->first
< B
->first
;
631 const SamplesWithLocList
&get() const { return V
; }
634 SamplesWithLocList V
;
637 /// ProfileSymbolList records the list of function symbols shown up
638 /// in the binary used to generate the profile. It is useful to
639 /// to discriminate a function being so cold as not to shown up
640 /// in the profile and a function newly added.
641 class ProfileSymbolList
{
643 /// copy indicates whether we need to copy the underlying memory
644 /// for the input Name.
645 void add(StringRef Name
, bool copy
= false) {
650 Syms
.insert(Name
.copy(Allocator
));
653 bool contains(StringRef Name
) { return Syms
.count(Name
); }
655 void merge(const ProfileSymbolList
&List
) {
656 for (auto Sym
: List
.Syms
)
660 unsigned size() { return Syms
.size(); }
662 void setToCompress(bool TC
) { ToCompress
= TC
; }
663 bool toCompress() { return ToCompress
; }
665 std::error_code
read(const uint8_t *Data
, uint64_t ListSize
);
666 std::error_code
write(raw_ostream
&OS
);
667 void dump(raw_ostream
&OS
= dbgs()) const;
670 // Determine whether or not to compress the symbol list when
671 // writing it into profile. The variable is unused when the symbol
672 // list is read from an existing profile.
673 bool ToCompress
= false;
674 DenseSet
<StringRef
> Syms
;
675 BumpPtrAllocator Allocator
;
678 } // end namespace sampleprof
679 } // end namespace llvm
681 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H