1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
41 const std::error_category
&sampleprof_category();
43 enum class sampleprof_error
{
51 unsupported_writing_format
,
55 ostream_seek_unsupported
,
61 inline std::error_code
make_error_code(sampleprof_error E
) {
62 return std::error_code(static_cast<int>(E
), sampleprof_category());
65 inline sampleprof_error
MergeResult(sampleprof_error
&Accumulator
,
66 sampleprof_error Result
) {
67 // Prefer first error encountered as later errors may be secondary effects of
68 // the initial problem.
69 if (Accumulator
== sampleprof_error::success
&&
70 Result
!= sampleprof_error::success
)
75 } // end namespace llvm
80 struct is_error_code_enum
<llvm::sampleprof_error
> : std::true_type
{};
82 } // end namespace std
85 namespace sampleprof
{
87 enum SampleProfileFormat
{
90 SPF_Compact_Binary
= 0x2,
96 static inline uint64_t SPMagic(SampleProfileFormat Format
= SPF_Binary
) {
97 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100 uint64_t('2') << (64 - 56) | uint64_t(Format
);
103 // Get the proper representation of a string in the input Format.
104 static inline StringRef
getRepInFormat(StringRef Name
,
105 SampleProfileFormat Format
,
106 std::string
&GUIDBuf
) {
109 GUIDBuf
= std::to_string(Function::getGUID(Name
));
110 return (Format
== SPF_Compact_Binary
) ? StringRef(GUIDBuf
) : Name
;
113 static inline uint64_t SPVersion() { return 103; }
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
122 SecProfileSymbolList
= 3,
123 // marker for the first type of profile.
124 SecFuncProfileFirst
= 32,
125 SecLBRProfile
= SecFuncProfileFirst
128 static inline std::string
getSecName(SecType Type
) {
131 return "InvalidSection";
133 return "ProfileSummarySection";
135 return "NameTableSection";
136 case SecProfileSymbolList
:
137 return "ProfileSymbolListSection";
139 return "LBRProfileSection";
141 llvm_unreachable("A SecType has no name for output");
144 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
145 // and SampleProfileExtBinaryBaseWriter.
146 struct SecHdrTableEntry
{
153 /// Represents the relative location of an instruction.
155 /// Instruction locations are specified by the line offset from the
156 /// beginning of the function (marked by the line where the function
157 /// header is) and the discriminator value within that line.
159 /// The discriminator value is useful to distinguish instructions
160 /// that are on the same line but belong to different basic blocks
161 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
162 struct LineLocation
{
163 LineLocation(uint32_t L
, uint32_t D
) : LineOffset(L
), Discriminator(D
) {}
165 void print(raw_ostream
&OS
) const;
168 bool operator<(const LineLocation
&O
) const {
169 return LineOffset
< O
.LineOffset
||
170 (LineOffset
== O
.LineOffset
&& Discriminator
< O
.Discriminator
);
174 uint32_t Discriminator
;
177 raw_ostream
&operator<<(raw_ostream
&OS
, const LineLocation
&Loc
);
179 /// Representation of a single sample record.
181 /// A sample record is represented by a positive integer value, which
182 /// indicates how frequently was the associated line location executed.
184 /// Additionally, if the associated location contains a function call,
185 /// the record will hold a list of all the possible called targets. For
186 /// direct calls, this will be the exact function being invoked. For
187 /// indirect calls (function pointers, virtual table dispatch), this
188 /// will be a list of one or more functions.
191 using CallTarget
= std::pair
<StringRef
, uint64_t>;
192 struct CallTargetComparator
{
193 bool operator()(const CallTarget
&LHS
, const CallTarget
&RHS
) const {
194 if (LHS
.second
!= RHS
.second
)
195 return LHS
.second
> RHS
.second
;
197 return LHS
.first
< RHS
.first
;
201 using SortedCallTargetSet
= std::set
<CallTarget
, CallTargetComparator
>;
202 using CallTargetMap
= StringMap
<uint64_t>;
203 SampleRecord() = default;
205 /// Increment the number of samples for this record by \p S.
206 /// Optionally scale sample count \p S by \p Weight.
208 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
209 /// around unsigned integers.
210 sampleprof_error
addSamples(uint64_t S
, uint64_t Weight
= 1) {
212 NumSamples
= SaturatingMultiplyAdd(S
, Weight
, NumSamples
, &Overflowed
);
213 return Overflowed
? sampleprof_error::counter_overflow
214 : sampleprof_error::success
;
217 /// Add called function \p F with samples \p S.
218 /// Optionally scale sample count \p S by \p Weight.
220 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
221 /// around unsigned integers.
222 sampleprof_error
addCalledTarget(StringRef F
, uint64_t S
,
223 uint64_t Weight
= 1) {
224 uint64_t &TargetSamples
= CallTargets
[F
];
227 SaturatingMultiplyAdd(S
, Weight
, TargetSamples
, &Overflowed
);
228 return Overflowed
? sampleprof_error::counter_overflow
229 : sampleprof_error::success
;
232 /// Return true if this sample record contains function calls.
233 bool hasCalls() const { return !CallTargets
.empty(); }
235 uint64_t getSamples() const { return NumSamples
; }
236 const CallTargetMap
&getCallTargets() const { return CallTargets
; }
237 const SortedCallTargetSet
getSortedCallTargets() const {
238 return SortCallTargets(CallTargets
);
241 /// Sort call targets in descending order of call frequency.
242 static const SortedCallTargetSet
SortCallTargets(const CallTargetMap
&Targets
) {
243 SortedCallTargetSet SortedTargets
;
244 for (const auto &I
: Targets
) {
245 SortedTargets
.emplace(I
.first(), I
.second
);
247 return SortedTargets
;
250 /// Merge the samples in \p Other into this record.
251 /// Optionally scale sample counts by \p Weight.
252 sampleprof_error
merge(const SampleRecord
&Other
, uint64_t Weight
= 1) {
253 sampleprof_error Result
= addSamples(Other
.getSamples(), Weight
);
254 for (const auto &I
: Other
.getCallTargets()) {
255 MergeResult(Result
, addCalledTarget(I
.first(), I
.second
, Weight
));
260 void print(raw_ostream
&OS
, unsigned Indent
) const;
264 uint64_t NumSamples
= 0;
265 CallTargetMap CallTargets
;
268 raw_ostream
&operator<<(raw_ostream
&OS
, const SampleRecord
&Sample
);
270 class FunctionSamples
;
272 using BodySampleMap
= std::map
<LineLocation
, SampleRecord
>;
273 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
274 // memory, which is *very* significant for large profiles.
275 using FunctionSamplesMap
= std::map
<std::string
, FunctionSamples
, std::less
<>>;
276 using CallsiteSampleMap
= std::map
<LineLocation
, FunctionSamplesMap
>;
278 /// Representation of the samples collected for a function.
280 /// This data structure contains all the collected samples for the body
281 /// of a function. Each sample corresponds to a LineLocation instance
282 /// within the body of the function.
283 class FunctionSamples
{
285 FunctionSamples() = default;
287 void print(raw_ostream
&OS
= dbgs(), unsigned Indent
= 0) const;
290 sampleprof_error
addTotalSamples(uint64_t Num
, uint64_t Weight
= 1) {
293 SaturatingMultiplyAdd(Num
, Weight
, TotalSamples
, &Overflowed
);
294 return Overflowed
? sampleprof_error::counter_overflow
295 : sampleprof_error::success
;
298 sampleprof_error
addHeadSamples(uint64_t Num
, uint64_t Weight
= 1) {
301 SaturatingMultiplyAdd(Num
, Weight
, TotalHeadSamples
, &Overflowed
);
302 return Overflowed
? sampleprof_error::counter_overflow
303 : sampleprof_error::success
;
306 sampleprof_error
addBodySamples(uint32_t LineOffset
, uint32_t Discriminator
,
307 uint64_t Num
, uint64_t Weight
= 1) {
308 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addSamples(
312 sampleprof_error
addCalledTargetSamples(uint32_t LineOffset
,
313 uint32_t Discriminator
,
314 StringRef FName
, uint64_t Num
,
315 uint64_t Weight
= 1) {
316 return BodySamples
[LineLocation(LineOffset
, Discriminator
)].addCalledTarget(
320 /// Return the number of samples collected at the given location.
321 /// Each location is specified by \p LineOffset and \p Discriminator.
322 /// If the location is not found in profile, return error.
323 ErrorOr
<uint64_t> findSamplesAt(uint32_t LineOffset
,
324 uint32_t Discriminator
) const {
325 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
326 if (ret
== BodySamples
.end())
327 return std::error_code();
329 return ret
->second
.getSamples();
332 /// Returns the call target map collected at a given location.
333 /// Each location is specified by \p LineOffset and \p Discriminator.
334 /// If the location is not found in profile, return error.
335 ErrorOr
<SampleRecord::CallTargetMap
>
336 findCallTargetMapAt(uint32_t LineOffset
, uint32_t Discriminator
) const {
337 const auto &ret
= BodySamples
.find(LineLocation(LineOffset
, Discriminator
));
338 if (ret
== BodySamples
.end())
339 return std::error_code();
340 return ret
->second
.getCallTargets();
343 /// Return the function samples at the given callsite location.
344 FunctionSamplesMap
&functionSamplesAt(const LineLocation
&Loc
) {
345 return CallsiteSamples
[Loc
];
348 /// Returns the FunctionSamplesMap at the given \p Loc.
349 const FunctionSamplesMap
*
350 findFunctionSamplesMapAt(const LineLocation
&Loc
) const {
351 auto iter
= CallsiteSamples
.find(Loc
);
352 if (iter
== CallsiteSamples
.end())
354 return &iter
->second
;
357 /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
358 /// with callee \p CalleeName. If no callsite can be found, relax the
359 /// restriction to return the FunctionSamples at callsite location \p Loc
360 /// with the maximum total sample count.
361 const FunctionSamples
*findFunctionSamplesAt(const LineLocation
&Loc
,
362 StringRef CalleeName
) const {
363 std::string CalleeGUID
;
364 CalleeName
= getRepInFormat(CalleeName
, Format
, CalleeGUID
);
366 auto iter
= CallsiteSamples
.find(Loc
);
367 if (iter
== CallsiteSamples
.end())
369 auto FS
= iter
->second
.find(CalleeName
);
370 if (FS
!= iter
->second
.end())
372 // If we cannot find exact match of the callee name, return the FS with
373 // the max total count.
374 uint64_t MaxTotalSamples
= 0;
375 const FunctionSamples
*R
= nullptr;
376 for (const auto &NameFS
: iter
->second
)
377 if (NameFS
.second
.getTotalSamples() >= MaxTotalSamples
) {
378 MaxTotalSamples
= NameFS
.second
.getTotalSamples();
384 bool empty() const { return TotalSamples
== 0; }
386 /// Return the total number of samples collected inside the function.
387 uint64_t getTotalSamples() const { return TotalSamples
; }
389 /// Return the total number of branch samples that have the function as the
390 /// branch target. This should be equivalent to the sample of the first
391 /// instruction of the symbol. But as we directly get this info for raw
392 /// profile without referring to potentially inaccurate debug info, this
393 /// gives more accurate profile data and is preferred for standalone symbols.
394 uint64_t getHeadSamples() const { return TotalHeadSamples
; }
396 /// Return the sample count of the first instruction of the function.
397 /// The function can be either a standalone symbol or an inlined function.
398 uint64_t getEntrySamples() const {
399 // Use either BodySamples or CallsiteSamples which ever has the smaller
401 if (!BodySamples
.empty() &&
402 (CallsiteSamples
.empty() ||
403 BodySamples
.begin()->first
< CallsiteSamples
.begin()->first
))
404 return BodySamples
.begin()->second
.getSamples();
405 if (!CallsiteSamples
.empty()) {
407 // An indirect callsite may be promoted to several inlined direct calls.
408 // We need to get the sum of them.
409 for (const auto &N_FS
: CallsiteSamples
.begin()->second
)
410 T
+= N_FS
.second
.getEntrySamples();
416 /// Return all the samples collected in the body of the function.
417 const BodySampleMap
&getBodySamples() const { return BodySamples
; }
419 /// Return all the callsite samples collected in the body of the function.
420 const CallsiteSampleMap
&getCallsiteSamples() const {
421 return CallsiteSamples
;
424 /// Merge the samples in \p Other into this one.
425 /// Optionally scale samples by \p Weight.
426 sampleprof_error
merge(const FunctionSamples
&Other
, uint64_t Weight
= 1) {
427 sampleprof_error Result
= sampleprof_error::success
;
428 Name
= Other
.getName();
429 MergeResult(Result
, addTotalSamples(Other
.getTotalSamples(), Weight
));
430 MergeResult(Result
, addHeadSamples(Other
.getHeadSamples(), Weight
));
431 for (const auto &I
: Other
.getBodySamples()) {
432 const LineLocation
&Loc
= I
.first
;
433 const SampleRecord
&Rec
= I
.second
;
434 MergeResult(Result
, BodySamples
[Loc
].merge(Rec
, Weight
));
436 for (const auto &I
: Other
.getCallsiteSamples()) {
437 const LineLocation
&Loc
= I
.first
;
438 FunctionSamplesMap
&FSMap
= functionSamplesAt(Loc
);
439 for (const auto &Rec
: I
.second
)
440 MergeResult(Result
, FSMap
[Rec
.first
].merge(Rec
.second
, Weight
));
445 /// Recursively traverses all children, if the total sample count of the
446 /// corresponding function is no less than \p Threshold, add its corresponding
447 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
449 void findInlinedFunctions(DenseSet
<GlobalValue::GUID
> &S
, const Module
*M
,
450 uint64_t Threshold
) const {
451 if (TotalSamples
<= Threshold
)
453 S
.insert(getGUID(Name
));
454 // Import hot CallTargets, which may not be available in IR because full
455 // profile annotation cannot be done until backend compilation in ThinLTO.
456 for (const auto &BS
: BodySamples
)
457 for (const auto &TS
: BS
.second
.getCallTargets())
458 if (TS
.getValue() > Threshold
) {
459 const Function
*Callee
=
460 M
->getFunction(getNameInModule(TS
.getKey(), M
));
461 if (!Callee
|| !Callee
->getSubprogram())
462 S
.insert(getGUID(TS
.getKey()));
464 for (const auto &CS
: CallsiteSamples
)
465 for (const auto &NameFS
: CS
.second
)
466 NameFS
.second
.findInlinedFunctions(S
, M
, Threshold
);
469 /// Set the name of the function.
470 void setName(StringRef FunctionName
) { Name
= FunctionName
; }
472 /// Return the function name.
473 StringRef
getName() const { return Name
; }
475 /// Return the original function name if it exists in Module \p M.
476 StringRef
getFuncNameInModule(const Module
*M
) const {
477 return getNameInModule(Name
, M
);
480 /// Return the canonical name for a function, taking into account
481 /// suffix elision policy attributes.
482 static StringRef
getCanonicalFnName(const Function
&F
) {
483 static const char *knownSuffixes
[] = { ".llvm.", ".part." };
484 auto AttrName
= "sample-profile-suffix-elision-policy";
485 auto Attr
= F
.getFnAttribute(AttrName
).getValueAsString();
486 if (Attr
== "" || Attr
== "all") {
487 return F
.getName().split('.').first
;
488 } else if (Attr
== "selected") {
489 StringRef
Cand(F
.getName());
490 for (const auto &Suf
: knownSuffixes
) {
491 StringRef
Suffix(Suf
);
492 auto It
= Cand
.rfind(Suffix
);
493 if (It
== StringRef::npos
)
495 auto Dit
= Cand
.rfind('.');
496 if (Dit
== It
+ Suffix
.size() - 1)
497 Cand
= Cand
.substr(0, It
);
500 } else if (Attr
== "none") {
503 assert(false && "internal error: unknown suffix elision policy");
508 /// Translate \p Name into its original name in Module.
509 /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
510 /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
511 /// is actually GUID of the original function name. getNameInModule will
512 /// translate \p Name in current FunctionSamples into its original name.
513 /// If the original name doesn't exist in \p M, return empty StringRef.
514 StringRef
getNameInModule(StringRef Name
, const Module
*M
) const {
515 if (Format
!= SPF_Compact_Binary
)
518 assert(GUIDToFuncNameMap
&& "GUIDToFuncNameMap needs to be popluated first");
519 auto iter
= GUIDToFuncNameMap
->find(std::stoull(Name
.data()));
520 if (iter
== GUIDToFuncNameMap
->end())
525 /// Returns the line offset to the start line of the subprogram.
526 /// We assume that a single function will not exceed 65535 LOC.
527 static unsigned getOffset(const DILocation
*DIL
);
529 /// Get the FunctionSamples of the inline instance where DIL originates
532 /// The FunctionSamples of the instruction (Machine or IR) associated to
533 /// \p DIL is the inlined instance in which that instruction is coming from.
534 /// We traverse the inline stack of that instruction, and match it with the
535 /// tree nodes in the profile.
537 /// \returns the FunctionSamples pointer to the inlined instance.
538 const FunctionSamples
*findFunctionSamples(const DILocation
*DIL
) const;
540 static SampleProfileFormat Format
;
542 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
543 /// all the function symbols defined or declared in current module.
544 DenseMap
<uint64_t, StringRef
> *GUIDToFuncNameMap
= nullptr;
546 // Assume the input \p Name is a name coming from FunctionSamples itself.
547 // If the format is SPF_Compact_Binary, the name is already a GUID and we
548 // don't want to return the GUID of GUID.
549 static uint64_t getGUID(StringRef Name
) {
550 return (Format
== SPF_Compact_Binary
) ? std::stoull(Name
.data())
551 : Function::getGUID(Name
);
555 /// Mangled name of the function.
558 /// Total number of samples collected inside this function.
560 /// Samples are cumulative, they include all the samples collected
561 /// inside this function and all its inlined callees.
562 uint64_t TotalSamples
= 0;
564 /// Total number of samples collected at the head of the function.
565 /// This is an approximation of the number of calls made to this function
567 uint64_t TotalHeadSamples
= 0;
569 /// Map instruction locations to collected samples.
571 /// Each entry in this map contains the number of samples
572 /// collected at the corresponding line offset. All line locations
573 /// are an offset from the start of the function.
574 BodySampleMap BodySamples
;
576 /// Map call sites to collected samples for the called function.
578 /// Each entry in this map corresponds to all the samples
579 /// collected for the inlined function call at the given
580 /// location. For example, given:
588 /// If the bar() and baz() calls were inlined inside foo(), this
589 /// map will contain two entries. One for all the samples collected
590 /// in the call to bar() at line offset 1, the other for all the samples
591 /// collected in the call to baz() at line offset 8.
592 CallsiteSampleMap CallsiteSamples
;
595 raw_ostream
&operator<<(raw_ostream
&OS
, const FunctionSamples
&FS
);
597 /// Sort a LocationT->SampleT map by LocationT.
599 /// It produces a sorted list of <LocationT, SampleT> records by ascending
600 /// order of LocationT.
601 template <class LocationT
, class SampleT
> class SampleSorter
{
603 using SamplesWithLoc
= std::pair
<const LocationT
, SampleT
>;
604 using SamplesWithLocList
= SmallVector
<const SamplesWithLoc
*, 20>;
606 SampleSorter(const std::map
<LocationT
, SampleT
> &Samples
) {
607 for (const auto &I
: Samples
)
609 llvm::stable_sort(V
, [](const SamplesWithLoc
*A
, const SamplesWithLoc
*B
) {
610 return A
->first
< B
->first
;
614 const SamplesWithLocList
&get() const { return V
; }
617 SamplesWithLocList V
;
620 /// ProfileSymbolList records the list of function symbols shown up
621 /// in the binary used to generate the profile. It is useful to
622 /// to discriminate a function being so cold as not to shown up
623 /// in the profile and a function newly added.
624 class ProfileSymbolList
{
626 /// copy indicates whether we need to copy the underlying memory
627 /// for the input Name.
628 void add(StringRef Name
, bool copy
= false) {
633 Syms
.insert(Name
.copy(Allocator
));
636 bool contains(StringRef Name
) { return Syms
.count(Name
); }
638 void merge(const ProfileSymbolList
&List
) {
639 for (auto Sym
: List
.Syms
)
643 unsigned size() { return Syms
.size(); }
645 void setToCompress(bool TC
) { ToCompress
= TC
; }
647 std::error_code
read(uint64_t CompressSize
, uint64_t UncompressSize
,
648 const uint8_t *Data
);
649 std::error_code
write(raw_ostream
&OS
);
650 void dump(raw_ostream
&OS
= dbgs()) const;
653 // Determine whether or not to compress the symbol list when
654 // writing it into profile. The variable is unused when the symbol
655 // list is read from an existing profile.
656 bool ToCompress
= false;
657 DenseSet
<StringRef
> Syms
;
658 BumpPtrAllocator Allocator
;
661 } // end namespace sampleprof
662 } // end namespace llvm
664 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H