1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
41 const std::error_category
&sampleprof_category();
43 enum class sampleprof_error
{
51 unsupported_writing_format
,
55 ostream_seek_unsupported
,
61 inline std::error_code
make_error_code(sampleprof_error E
) {
62 return std::error_code(static_cast<int>(E
), sampleprof_category());
65 inline sampleprof_error
MergeResult(sampleprof_error
&Accumulator
,
66 sampleprof_error Result
) {
67 // Prefer first error encountered as later errors may be secondary effects of
68 // the initial problem.
69 if (Accumulator
== sampleprof_error::success
&&
70 Result
!= sampleprof_error::success
)
75 } // end namespace llvm
80 struct is_error_code_enum
<llvm::sampleprof_error
> : std::true_type
{};
82 } // end namespace std
85 namespace sampleprof
{
87 enum SampleProfileFormat
{
90 SPF_Compact_Binary
= 0x2,
96 static inline uint64_t SPMagic(SampleProfileFormat Format
= SPF_Binary
) {
97 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100 uint64_t('2') << (64 - 56) | uint64_t(Format
);
103 // Get the proper representation of a string in the input Format.
104 static inline StringRef
getRepInFormat(StringRef Name
,
105 SampleProfileFormat Format
,
106 std::string
&GUIDBuf
) {
109 GUIDBuf
= std::to_string(Function::getGUID(Name
));
110 return (Format
== SPF_Compact_Binary
) ? StringRef(GUIDBuf
) : Name
;
113 static inline uint64_t SPVersion() { return 103; }
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
122 SecProfileSymbolList
= 3,
123 // marker for the first type of profile.
124 SecFuncProfileFirst
= 32,
125 SecLBRProfile
= SecFuncProfileFirst
128 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
129 // and SampleProfileExtBinaryBaseWriter.
130 struct SecHdrTableEntry
{
137 /// Represents the relative location of an instruction.
139 /// Instruction locations are specified by the line offset from the
140 /// beginning of the function (marked by the line where the function
141 /// header is) and the discriminator value within that line.
143 /// The discriminator value is useful to distinguish instructions
144 /// that are on the same line but belong to different basic blocks
145 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
146 struct LineLocation
{
147 LineLocation(uint32_t L
, uint32_t D
) : LineOffset(L
), Discriminator(D
) {}
149 void print(raw_ostream
&OS
) const;
152 bool operator<(const LineLocation
&O
) const {
153 return LineOffset
< O
.LineOffset
||
154 (LineOffset
== O
.LineOffset
&& Discriminator
< O
.Discriminator
);
158 uint32_t Discriminator
;
161 raw_ostream
&operator<<(raw_ostream
&OS
, const LineLocation
&Loc
);
163 /// Representation of a single sample record.
165 /// A sample record is represented by a positive integer value, which
166 /// indicates how frequently was the associated line location executed.
168 /// Additionally, if the associated location contains a function call,
169 /// the record will hold a list of all the possible called targets. For
170 /// direct calls, this will be the exact function being invoked. For
171 /// indirect calls (function pointers, virtual table dispatch), this
172 /// will be a list of one or more functions.
175 using CallTarget
= std::pair
<StringRef
, uint64_t>;
176 struct CallTargetComparator
{
177 bool operator()(const CallTarget
&LHS
, const CallTarget
&RHS
) const {
178 if (LHS
.second
!= RHS
.second
)
179 return LHS
.second
> RHS
.second
;
181 return LHS
.first
< RHS
.first
;
185 using SortedCallTargetSet
= std::set
<CallTarget
, CallTargetComparator
>;
186 using CallTargetMap
= StringMap
<uint64_t>;
187 SampleRecord() = default;
189 /// Increment the number of samples for this record by \p S.
190 /// Optionally scale sample count \p S by \p Weight.
192 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
193 /// around unsigned integers.
194 sampleprof_error
addSamples(uint64_t S
, uint64_t Weight
= 1) {
196 NumSamples
= SaturatingMultiplyAdd(S
, Weight
, NumSamples
, &Overflowed
);
197 return Overflowed
? sampleprof_error::counter_overflow
198 : sampleprof_error::success
;
201 /// Add called function \p F with samples \p S.
202 /// Optionally scale sample count \p S by \p Weight.
204 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
205 /// around unsigned integers.
206 sampleprof_error
addCalledTarget(StringRef F
, uint64_t S
,
207 uint64_t Weight
= 1) {
208 uint64_t &TargetSamples
= CallTargets
[F
];
211 SaturatingMultiplyAdd(S
, Weight
, TargetSamples
, &Overflowed
);
212 return Overflowed
? sampleprof_error::counter_overflow
213 : sampleprof_error::success
;
216 /// Return true if this sample record contains function calls.
217 bool hasCalls() const { return !CallTargets
.empty(); }
219 uint64_t getSamples() const { return NumSamples
; }
220 const CallTargetMap
&getCallTargets() const { return CallTargets
; }
221 const SortedCallTargetSet
getSortedCallTargets() const {
222 return SortCallTargets(CallTargets
);
225 /// Sort call targets in descending order of call frequency.
226 static const SortedCallTargetSet
SortCallTargets(const CallTargetMap
&Targets
) {
227 SortedCallTargetSet SortedTargets
;
228 for (const auto &I
: Targets
) {
229 SortedTargets
.emplace(I
.first(), I
.second
);
231 return SortedTargets
;
234 /// Merge the samples in \p Other into this record.
235 /// Optionally scale sample counts by \p Weight.
236 sampleprof_error
merge(const SampleRecord
&Other
, uint64_t Weight
= 1) {
237 sampleprof_error Result
= addSamples(Other
.getSamples(), Weight
);
238 for (const auto &I
: Other
.getCallTargets()) {
239 MergeResult(Result
, addCalledTarget(I
.first(), I
.second
, Weight
));
244 void print(raw_ostream
&OS
, unsigned Indent
) const;
248 uint64_t NumSamples
= 0;
249 CallTargetMap CallTargets
;
252 raw_ostream
&operator<<(raw_ostream
&OS
, const SampleRecord
&Sample
);
254 class FunctionSamples
;
256 using BodySampleMap
= std::map
<LineLocation
, SampleRecord
>;
257 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
258 // memory, which is *very* significant for large profiles.
259 using FunctionSamplesMap
= std::map
<std::string
, FunctionSamples
, std::less
<>>;
260 using CallsiteSampleMap
= std::map
<LineLocation
, FunctionSamplesMap
>;
262 /// Representation of the samples collected for a function.
264 /// This data structure contains all the collected samples for the body
265 /// of a function. Each sample corresponds to a LineLocation instance
266 /// within the body of the function.
267 class FunctionSamples
{
269 FunctionSamples() = default;
271 void print(raw_ostream
&OS
= dbgs(), unsigned Indent
= 0) const;
274 sampleprof_error
addTotalSamples(uint64_t Num
, uint64_t Weight
= 1) {
277 SaturatingMultiplyAdd(Num
, Weight
, TotalSamples
, &Overflowed
);
278 return Overflowed
? sampleprof_error::counter_overflow
279 : sampleprof_error::success
;
282 sampleprof_error
addHeadSamples(uint64_t Num
, uint64_t Weight
= 1) {
285 SaturatingMultiplyAdd(Num
, Weight
, TotalHeadSamples
, &Overflowed
);
286 return Overflowed
? sampleprof_error::counter_overflow
287 : sampleprof_error::success
;
290 sampleprof_error
addBodySamples(uint32_t LineOffset
, uint32_t Discriminator
,
291 uint64_t Num
, uint64_t Weight
= 1) {
292 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addSamples(
296 sampleprof_error
addCalledTargetSamples(uint32_t LineOffset
,
297 uint32_t Discriminator
,
298 StringRef FName
, uint64_t Num
,
299 uint64_t Weight
= 1) {
300 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addCalledTarget(
304 /// Return the number of samples collected at the given location.
305 /// Each location is specified by \p LineOffset and \p Discriminator.
306 /// If the location is not found in profile, return error.
307 ErrorOr
<uint64_t> findSamplesAt(uint32_t LineOffset
,
308 uint32_t Discriminator
) const {
309 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
310 if (ret
== BodySamples
.end())
311 return std::error_code();
313 return ret
->second
.getSamples();
316 /// Returns the call target map collected at a given location.
317 /// Each location is specified by \p LineOffset and \p Discriminator.
318 /// If the location is not found in profile, return error.
319 ErrorOr
<SampleRecord::CallTargetMap
>
320 findCallTargetMapAt(uint32_t LineOffset
, uint32_t Discriminator
) const {
321 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
322 if (ret
== BodySamples
.end())
323 return std::error_code();
324 return ret
->second
.getCallTargets();
327 /// Return the function samples at the given callsite location.
328 FunctionSamplesMap
&functionSamplesAt(const LineLocation
&Loc
) {
329 return CallsiteSamples
[Loc
];
332 /// Returns the FunctionSamplesMap at the given \p Loc.
333 const FunctionSamplesMap
*
334 findFunctionSamplesMapAt(const LineLocation
&Loc
) const {
335 auto iter
= CallsiteSamples
.find(Loc
);
336 if (iter
== CallsiteSamples
.end())
338 return &iter
->second
;
341 /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
342 /// with callee \p CalleeName. If no callsite can be found, relax the
343 /// restriction to return the FunctionSamples at callsite location \p Loc
344 /// with the maximum total sample count.
345 const FunctionSamples
*findFunctionSamplesAt(const LineLocation
&Loc
,
346 StringRef CalleeName
) const {
347 std::string CalleeGUID
;
348 CalleeName
= getRepInFormat(CalleeName
, Format
, CalleeGUID
);
350 auto iter
= CallsiteSamples
.find(Loc
);
351 if (iter
== CallsiteSamples
.end())
353 auto FS
= iter
->second
.find(CalleeName
);
354 if (FS
!= iter
->second
.end())
356 // If we cannot find exact match of the callee name, return the FS with
357 // the max total count.
358 uint64_t MaxTotalSamples
= 0;
359 const FunctionSamples
*R
= nullptr;
360 for (const auto &NameFS
: iter
->second
)
361 if (NameFS
.second
.getTotalSamples() >= MaxTotalSamples
) {
362 MaxTotalSamples
= NameFS
.second
.getTotalSamples();
368 bool empty() const { return TotalSamples
== 0; }
370 /// Return the total number of samples collected inside the function.
371 uint64_t getTotalSamples() const { return TotalSamples
; }
373 /// Return the total number of branch samples that have the function as the
374 /// branch target. This should be equivalent to the sample of the first
375 /// instruction of the symbol. But as we directly get this info for raw
376 /// profile without referring to potentially inaccurate debug info, this
377 /// gives more accurate profile data and is preferred for standalone symbols.
378 uint64_t getHeadSamples() const { return TotalHeadSamples
; }
380 /// Return the sample count of the first instruction of the function.
381 /// The function can be either a standalone symbol or an inlined function.
382 uint64_t getEntrySamples() const {
383 // Use either BodySamples or CallsiteSamples which ever has the smaller
385 if (!BodySamples
.empty() &&
386 (CallsiteSamples
.empty() ||
387 BodySamples
.begin()->first
< CallsiteSamples
.begin()->first
))
388 return BodySamples
.begin()->second
.getSamples();
389 if (!CallsiteSamples
.empty()) {
391 // An indirect callsite may be promoted to several inlined direct calls.
392 // We need to get the sum of them.
393 for (const auto &N_FS
: CallsiteSamples
.begin()->second
)
394 T
+= N_FS
.second
.getEntrySamples();
400 /// Return all the samples collected in the body of the function.
401 const BodySampleMap
&getBodySamples() const { return BodySamples
; }
403 /// Return all the callsite samples collected in the body of the function.
404 const CallsiteSampleMap
&getCallsiteSamples() const {
405 return CallsiteSamples
;
408 /// Merge the samples in \p Other into this one.
409 /// Optionally scale samples by \p Weight.
410 sampleprof_error
merge(const FunctionSamples
&Other
, uint64_t Weight
= 1) {
411 sampleprof_error Result
= sampleprof_error::success
;
412 Name
= Other
.getName();
413 MergeResult(Result
, addTotalSamples(Other
.getTotalSamples(), Weight
));
414 MergeResult(Result
, addHeadSamples(Other
.getHeadSamples(), Weight
));
415 for (const auto &I
: Other
.getBodySamples()) {
416 const LineLocation
&Loc
= I
.first
;
417 const SampleRecord
&Rec
= I
.second
;
418 MergeResult(Result
, BodySamples
[Loc
].merge(Rec
, Weight
));
420 for (const auto &I
: Other
.getCallsiteSamples()) {
421 const LineLocation
&Loc
= I
.first
;
422 FunctionSamplesMap
&FSMap
= functionSamplesAt(Loc
);
423 for (const auto &Rec
: I
.second
)
424 MergeResult(Result
, FSMap
[Rec
.first
].merge(Rec
.second
, Weight
));
429 /// Recursively traverses all children, if the total sample count of the
430 /// corresponding function is no less than \p Threshold, add its corresponding
431 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
433 void findInlinedFunctions(DenseSet
<GlobalValue::GUID
> &S
, const Module
*M
,
434 uint64_t Threshold
) const {
435 if (TotalSamples
<= Threshold
)
437 S
.insert(getGUID(Name
));
438 // Import hot CallTargets, which may not be available in IR because full
439 // profile annotation cannot be done until backend compilation in ThinLTO.
440 for (const auto &BS
: BodySamples
)
441 for (const auto &TS
: BS
.second
.getCallTargets())
442 if (TS
.getValue() > Threshold
) {
443 const Function
*Callee
=
444 M
->getFunction(getNameInModule(TS
.getKey(), M
));
445 if (!Callee
|| !Callee
->getSubprogram())
446 S
.insert(getGUID(TS
.getKey()));
448 for (const auto &CS
: CallsiteSamples
)
449 for (const auto &NameFS
: CS
.second
)
450 NameFS
.second
.findInlinedFunctions(S
, M
, Threshold
);
453 /// Set the name of the function.
454 void setName(StringRef FunctionName
) { Name
= FunctionName
; }
456 /// Return the function name.
457 StringRef
getName() const { return Name
; }
459 /// Return the original function name if it exists in Module \p M.
460 StringRef
getFuncNameInModule(const Module
*M
) const {
461 return getNameInModule(Name
, M
);
464 /// Return the canonical name for a function, taking into account
465 /// suffix elision policy attributes.
466 static StringRef
getCanonicalFnName(const Function
&F
) {
467 static const char *knownSuffixes
[] = { ".llvm.", ".part." };
468 auto AttrName
= "sample-profile-suffix-elision-policy";
469 auto Attr
= F
.getFnAttribute(AttrName
).getValueAsString();
470 if (Attr
== "" || Attr
== "all") {
471 return F
.getName().split('.').first
;
472 } else if (Attr
== "selected") {
473 StringRef
Cand(F
.getName());
474 for (const auto &Suf
: knownSuffixes
) {
475 StringRef
Suffix(Suf
);
476 auto It
= Cand
.rfind(Suffix
);
477 if (It
== StringRef::npos
)
479 auto Dit
= Cand
.rfind('.');
480 if (Dit
== It
+ Suffix
.size() - 1)
481 Cand
= Cand
.substr(0, It
);
484 } else if (Attr
== "none") {
487 assert(false && "internal error: unknown suffix elision policy");
492 /// Translate \p Name into its original name in Module.
493 /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
494 /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
495 /// is actually GUID of the original function name. getNameInModule will
496 /// translate \p Name in current FunctionSamples into its original name.
497 /// If the original name doesn't exist in \p M, return empty StringRef.
498 StringRef
getNameInModule(StringRef Name
, const Module
*M
) const {
499 if (Format
!= SPF_Compact_Binary
)
502 assert(GUIDToFuncNameMap
&& "GUIDToFuncNameMap needs to be popluated first");
503 auto iter
= GUIDToFuncNameMap
->find(std::stoull(Name
.data()));
504 if (iter
== GUIDToFuncNameMap
->end())
509 /// Returns the line offset to the start line of the subprogram.
510 /// We assume that a single function will not exceed 65535 LOC.
511 static unsigned getOffset(const DILocation
*DIL
);
513 /// Get the FunctionSamples of the inline instance where DIL originates
516 /// The FunctionSamples of the instruction (Machine or IR) associated to
517 /// \p DIL is the inlined instance in which that instruction is coming from.
518 /// We traverse the inline stack of that instruction, and match it with the
519 /// tree nodes in the profile.
521 /// \returns the FunctionSamples pointer to the inlined instance.
522 const FunctionSamples
*findFunctionSamples(const DILocation
*DIL
) const;
524 static SampleProfileFormat Format
;
526 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
527 /// all the function symbols defined or declared in current module.
528 DenseMap
<uint64_t, StringRef
> *GUIDToFuncNameMap
= nullptr;
530 // Assume the input \p Name is a name coming from FunctionSamples itself.
531 // If the format is SPF_Compact_Binary, the name is already a GUID and we
532 // don't want to return the GUID of GUID.
533 static uint64_t getGUID(StringRef Name
) {
534 return (Format
== SPF_Compact_Binary
) ? std::stoull(Name
.data())
535 : Function::getGUID(Name
);
539 /// Mangled name of the function.
542 /// Total number of samples collected inside this function.
544 /// Samples are cumulative, they include all the samples collected
545 /// inside this function and all its inlined callees.
546 uint64_t TotalSamples
= 0;
548 /// Total number of samples collected at the head of the function.
549 /// This is an approximation of the number of calls made to this function
551 uint64_t TotalHeadSamples
= 0;
553 /// Map instruction locations to collected samples.
555 /// Each entry in this map contains the number of samples
556 /// collected at the corresponding line offset. All line locations
557 /// are an offset from the start of the function.
558 BodySampleMap BodySamples
;
560 /// Map call sites to collected samples for the called function.
562 /// Each entry in this map corresponds to all the samples
563 /// collected for the inlined function call at the given
564 /// location. For example, given:
572 /// If the bar() and baz() calls were inlined inside foo(), this
573 /// map will contain two entries. One for all the samples collected
574 /// in the call to bar() at line offset 1, the other for all the samples
575 /// collected in the call to baz() at line offset 8.
576 CallsiteSampleMap CallsiteSamples
;
579 raw_ostream
&operator<<(raw_ostream
&OS
, const FunctionSamples
&FS
);
581 /// Sort a LocationT->SampleT map by LocationT.
583 /// It produces a sorted list of <LocationT, SampleT> records by ascending
584 /// order of LocationT.
585 template <class LocationT
, class SampleT
> class SampleSorter
{
587 using SamplesWithLoc
= std::pair
<const LocationT
, SampleT
>;
588 using SamplesWithLocList
= SmallVector
<const SamplesWithLoc
*, 20>;
590 SampleSorter(const std::map
<LocationT
, SampleT
> &Samples
) {
591 for (const auto &I
: Samples
)
593 llvm::stable_sort(V
, [](const SamplesWithLoc
*A
, const SamplesWithLoc
*B
) {
594 return A
->first
< B
->first
;
598 const SamplesWithLocList
&get() const { return V
; }
601 SamplesWithLocList V
;
604 /// ProfileSymbolList records the list of function symbols shown up
605 /// in the binary used to generate the profile. It is useful to
606 /// to discriminate a function being so cold as not to shown up
607 /// in the profile and a function newly added.
608 class ProfileSymbolList
{
610 /// copy indicates whether we need to copy the underlying memory
611 /// for the input Name.
612 void add(StringRef Name
, bool copy
= false) {
617 Syms
.insert(Name
.copy(Allocator
));
620 bool contains(StringRef Name
) { return Syms
.count(Name
); }
622 void merge(const ProfileSymbolList
&List
) {
623 for (auto Sym
: List
.Syms
)
627 unsigned size() { return Syms
.size(); }
629 void setToCompress(bool TC
) { ToCompress
= TC
; }
631 std::error_code
read(uint64_t CompressSize
, uint64_t UncompressSize
,
632 const uint8_t *Data
);
633 std::error_code
write(raw_ostream
&OS
);
634 void dump(raw_ostream
&OS
= dbgs()) const;
637 // Determine whether or not to compress the symbol list when
638 // writing it into profile. The variable is unused when the symbol
639 // list is read from an existing profile.
640 bool ToCompress
= false;
641 DenseSet
<StringRef
> Syms
;
642 BumpPtrAllocator Allocator
;
645 } // end namespace sampleprof
646 } // end namespace llvm
648 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H