1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
21 using namespace sampleprof
;
25 class CleanupInstaller
;
27 namespace sampleprof
{
29 // Stream based trace line iterator
31 std::string CurrentLine
;
34 uint64_t LineNumber
= 0;
37 TraceStream(StringRef Filename
) : Fin(Filename
.str()) {
39 exitWithError("Error read input perf script file", Filename
);
43 StringRef
getCurrentLine() {
44 assert(!IsAtEoF
&& "Line iterator reaches the End-of-File!");
48 uint64_t getLineNumber() { return LineNumber
; }
50 bool isAtEoF() { return IsAtEoF
; }
54 if (!std::getline(Fin
, CurrentLine
)) {
62 // The type of input format.
65 PerfData
= 1, // Raw linux perf.data.
66 PerfScript
= 2, // Perf script create by `perf script` command.
67 UnsymbolizedProfile
= 3, // Unsymbolized profile generated by llvm-profgen.
71 // The type of perfscript content.
74 LBR
= 1, // Only LBR sample.
75 LBRStack
= 2, // Hybrid sample including call stack and LBR stack.
78 struct PerfInputFile
{
79 std::string InputFile
;
80 PerfFormat Format
= PerfFormat::UnknownFormat
;
81 PerfContent Content
= PerfContent::UnknownContent
;
84 // The parsed LBR sample entry.
88 LBREntry(uint64_t S
, uint64_t T
) : Source(S
), Target(T
) {}
92 dbgs() << "from " << format("%#010x", Source
) << " to "
93 << format("%#010x", Target
);
99 static inline void printLBRStack(const SmallVectorImpl
<LBREntry
> &LBRStack
) {
100 for (size_t I
= 0; I
< LBRStack
.size(); I
++) {
101 dbgs() << "[" << I
<< "] ";
107 static inline void printCallStack(const SmallVectorImpl
<uint64_t> &CallStack
) {
108 for (size_t I
= 0; I
< CallStack
.size(); I
++) {
109 dbgs() << "[" << I
<< "] " << format("%#010x", CallStack
[I
]) << "\n";
114 // Hash interface for generic data of type T
115 // Data should implement a \fn getHashCode and a \fn isEqual
116 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117 // i.e we explicitly calculate hash of derived class, assign to base class's
118 // HashCode. This also provides the flexibility for calculating the hash code
119 // incrementally(like rolling hash) during frame stack unwinding since unwinding
120 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
121 // which will have perf overhead. In the future, if we redesign a better hash
122 // function, then we can just skip this or switch to non-virtual function(like
123 // just ignore comparison if hash conflicts probabilities is low)
124 template <class T
> class Hashable
{
126 std::shared_ptr
<T
> Data
;
127 Hashable(const std::shared_ptr
<T
> &D
) : Data(D
) {}
129 // Hash code generation
131 uint64_t operator()(const Hashable
<T
> &Key
) const {
132 // Don't make it virtual for getHashCode
133 uint64_t Hash
= Key
.Data
->getHashCode();
134 assert(Hash
&& "Should generate HashCode for it!");
141 bool operator()(const Hashable
<T
> &LHS
, const Hashable
<T
> &RHS
) const {
142 // Precisely compare the data, vtable will have overhead.
143 return LHS
.Data
->isEqual(RHS
.Data
.get());
147 T
*getPtr() const { return Data
.get(); }
151 // LBR stack recorded in FIFO order.
152 SmallVector
<LBREntry
, 16> LBRStack
;
153 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
155 SmallVector
<uint64_t, 16> CallStack
;
157 virtual ~PerfSample() = default;
158 uint64_t getHashCode() const {
159 // Use simple DJB2 hash
160 auto HashCombine
= [](uint64_t H
, uint64_t V
) {
161 return ((H
<< 5) + H
) + V
;
163 uint64_t Hash
= 5381;
164 for (const auto &Value
: CallStack
) {
165 Hash
= HashCombine(Hash
, Value
);
167 for (const auto &Entry
: LBRStack
) {
168 Hash
= HashCombine(Hash
, Entry
.Source
);
169 Hash
= HashCombine(Hash
, Entry
.Target
);
174 bool isEqual(const PerfSample
*Other
) const {
175 const SmallVector
<uint64_t, 16> &OtherCallStack
= Other
->CallStack
;
176 const SmallVector
<LBREntry
, 16> &OtherLBRStack
= Other
->LBRStack
;
178 if (CallStack
.size() != OtherCallStack
.size() ||
179 LBRStack
.size() != OtherLBRStack
.size())
182 if (!std::equal(CallStack
.begin(), CallStack
.end(), OtherCallStack
.begin()))
185 for (size_t I
= 0; I
< OtherLBRStack
.size(); I
++) {
186 if (LBRStack
[I
].Source
!= OtherLBRStack
[I
].Source
||
187 LBRStack
[I
].Target
!= OtherLBRStack
[I
].Target
)
194 uint64_t Linenum
= 0;
197 dbgs() << "Line " << Linenum
<< "\n";
198 dbgs() << "LBR stack\n";
199 printLBRStack(LBRStack
);
200 dbgs() << "Call stack\n";
201 printCallStack(CallStack
);
205 // After parsing the sample, we record the samples by aggregating them
206 // into this counter. The key stores the sample data and the value is
207 // the sample repeat times.
208 using AggregatedCounter
=
209 std::unordered_map
<Hashable
<PerfSample
>, uint64_t,
210 Hashable
<PerfSample
>::Hash
, Hashable
<PerfSample
>::Equal
>;
212 using SampleVector
= SmallVector
<std::tuple
<uint64_t, uint64_t, uint64_t>, 16>;
214 inline bool isValidFallThroughRange(uint64_t Start
, uint64_t End
,
215 ProfiledBinary
*Binary
) {
216 // Start bigger than End is considered invalid.
217 // LBR ranges cross the unconditional jmp are also assumed invalid.
218 // It's found that perf data may contain duplicate LBR entries that could form
219 // a range that does not reflect real execution flow on some Intel targets,
220 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
221 // cannot be a linear execution range that spans over unconditional jmp.
222 return Start
<= End
&& !Binary
->rangeCrossUncondBranch(Start
, End
);
225 // The state for the unwinder, it doesn't hold the data but only keep the
226 // pointer/index of the data, While unwinding, the CallStack is changed
227 // dynamicially and will be recorded as the context of the sample
229 // Profiled binary that current frame address belongs to
230 const ProfiledBinary
*Binary
;
231 // Call stack trie node
232 struct ProfiledFrame
{
233 const uint64_t Address
= DummyRoot
;
234 ProfiledFrame
*Parent
;
235 SampleVector RangeSamples
;
236 SampleVector BranchSamples
;
237 std::unordered_map
<uint64_t, std::unique_ptr
<ProfiledFrame
>> Children
;
239 ProfiledFrame(uint64_t Addr
= 0, ProfiledFrame
*P
= nullptr)
240 : Address(Addr
), Parent(P
) {}
241 ProfiledFrame
*getOrCreateChildFrame(uint64_t Address
) {
242 assert(Address
&& "Address can't be zero!");
243 auto Ret
= Children
.emplace(
244 Address
, std::make_unique
<ProfiledFrame
>(Address
, this));
245 return Ret
.first
->second
.get();
247 void recordRangeCount(uint64_t Start
, uint64_t End
, uint64_t Count
) {
248 RangeSamples
.emplace_back(std::make_tuple(Start
, End
, Count
));
250 void recordBranchCount(uint64_t Source
, uint64_t Target
, uint64_t Count
) {
251 BranchSamples
.emplace_back(std::make_tuple(Source
, Target
, Count
));
253 bool isDummyRoot() { return Address
== DummyRoot
; }
254 bool isExternalFrame() { return Address
== ExternalAddr
; }
255 bool isLeafFrame() { return Children
.empty(); }
258 ProfiledFrame DummyTrieRoot
;
259 ProfiledFrame
*CurrentLeafFrame
;
260 // Used to fall through the LBR stack
261 uint32_t LBRIndex
= 0;
262 // Reference to PerfSample.LBRStack
263 const SmallVector
<LBREntry
, 16> &LBRStack
;
264 // Used to iterate the address range
265 InstructionPointer InstPtr
;
266 // Indicate whether unwinding is currently in a bad state which requires to
267 // skip all subsequent unwinding.
268 bool Invalid
= false;
269 UnwindState(const PerfSample
*Sample
, const ProfiledBinary
*Binary
)
270 : Binary(Binary
), LBRStack(Sample
->LBRStack
),
271 InstPtr(Binary
, Sample
->CallStack
.front()) {
272 initFrameTrie(Sample
->CallStack
);
275 bool validateInitialState() {
276 uint64_t LBRLeaf
= LBRStack
[LBRIndex
].Target
;
277 uint64_t LeafAddr
= CurrentLeafFrame
->Address
;
278 assert((LBRLeaf
!= ExternalAddr
|| LBRLeaf
== LeafAddr
) &&
279 "External leading LBR should match the leaf frame.");
281 // When we take a stack sample, ideally the sampling distance between the
282 // leaf IP of stack and the last LBR target shouldn't be very large.
283 // Use a heuristic size (0x100) to filter out broken records.
284 if (LeafAddr
< LBRLeaf
|| LeafAddr
- LBRLeaf
>= 0x100) {
285 WithColor::warning() << "Bogus trace: stack tip = "
286 << format("%#010x", LeafAddr
)
287 << ", LBR tip = " << format("%#010x\n", LBRLeaf
);
293 void checkStateConsistency() {
294 assert(InstPtr
.Address
== CurrentLeafFrame
->Address
&&
295 "IP should align with context leaf");
298 void setInvalid() { Invalid
= true; }
299 bool hasNextLBR() const { return LBRIndex
< LBRStack
.size(); }
300 uint64_t getCurrentLBRSource() const { return LBRStack
[LBRIndex
].Source
; }
301 uint64_t getCurrentLBRTarget() const { return LBRStack
[LBRIndex
].Target
; }
302 const LBREntry
&getCurrentLBR() const { return LBRStack
[LBRIndex
]; }
303 bool IsLastLBR() const { return LBRIndex
== 0; }
304 bool getLBRStackSize() const { return LBRStack
.size(); }
305 void advanceLBR() { LBRIndex
++; }
306 ProfiledFrame
*getParentFrame() { return CurrentLeafFrame
->Parent
; }
308 void pushFrame(uint64_t Address
) {
309 CurrentLeafFrame
= CurrentLeafFrame
->getOrCreateChildFrame(Address
);
312 void switchToFrame(uint64_t Address
) {
313 if (CurrentLeafFrame
->Address
== Address
)
315 CurrentLeafFrame
= CurrentLeafFrame
->Parent
->getOrCreateChildFrame(Address
);
318 void popFrame() { CurrentLeafFrame
= CurrentLeafFrame
->Parent
; }
320 void clearCallStack() { CurrentLeafFrame
= &DummyTrieRoot
; }
322 void initFrameTrie(const SmallVectorImpl
<uint64_t> &CallStack
) {
323 ProfiledFrame
*Cur
= &DummyTrieRoot
;
324 for (auto Address
: reverse(CallStack
)) {
325 Cur
= Cur
->getOrCreateChildFrame(Address
);
327 CurrentLeafFrame
= Cur
;
330 ProfiledFrame
*getDummyRootPtr() { return &DummyTrieRoot
; }
333 // Base class for sample counter key with context
335 uint64_t HashCode
= 0;
336 virtual ~ContextKey() = default;
337 uint64_t getHashCode() {
342 virtual void genHashCode() = 0;
343 virtual bool isEqual(const ContextKey
*K
) const {
344 return HashCode
== K
->HashCode
;
347 // Utilities for LLVM-style RTTI
348 enum ContextKind
{ CK_StringBased
, CK_AddrBased
};
349 const ContextKind Kind
;
350 ContextKind
getKind() const { return Kind
; }
351 ContextKey(ContextKind K
) : Kind(K
){};
354 // String based context id
355 struct StringBasedCtxKey
: public ContextKey
{
356 SampleContextFrameVector Context
;
359 StringBasedCtxKey() : ContextKey(CK_StringBased
), WasLeafInlined(false){};
360 static bool classof(const ContextKey
*K
) {
361 return K
->getKind() == CK_StringBased
;
364 bool isEqual(const ContextKey
*K
) const override
{
365 const StringBasedCtxKey
*Other
= dyn_cast
<StringBasedCtxKey
>(K
);
366 return Context
== Other
->Context
;
369 void genHashCode() override
{
370 HashCode
= hash_value(SampleContextFrames(Context
));
374 // Address-based context id
375 struct AddrBasedCtxKey
: public ContextKey
{
376 SmallVector
<uint64_t, 16> Context
;
379 AddrBasedCtxKey() : ContextKey(CK_AddrBased
), WasLeafInlined(false){};
380 static bool classof(const ContextKey
*K
) {
381 return K
->getKind() == CK_AddrBased
;
384 bool isEqual(const ContextKey
*K
) const override
{
385 const AddrBasedCtxKey
*Other
= dyn_cast
<AddrBasedCtxKey
>(K
);
386 return Context
== Other
->Context
;
389 void genHashCode() override
{
390 HashCode
= hash_combine_range(Context
.begin(), Context
.end());
394 // The counter of branch samples for one function indexed by the branch,
395 // which is represented as the source and target offset pair.
396 using BranchSample
= std::map
<std::pair
<uint64_t, uint64_t>, uint64_t>;
397 // The counter of range samples for one function indexed by the range,
398 // which is represented as the start and end offset pair.
399 using RangeSample
= std::map
<std::pair
<uint64_t, uint64_t>, uint64_t>;
400 // Wrapper for sample counters including range counter and branch counter
401 struct SampleCounter
{
402 RangeSample RangeCounter
;
403 BranchSample BranchCounter
;
405 void recordRangeCount(uint64_t Start
, uint64_t End
, uint64_t Repeat
) {
406 assert(Start
<= End
&& "Invalid instruction range");
407 RangeCounter
[{Start
, End
}] += Repeat
;
409 void recordBranchCount(uint64_t Source
, uint64_t Target
, uint64_t Repeat
) {
410 BranchCounter
[{Source
, Target
}] += Repeat
;
414 // Sample counter with context to support context-sensitive profile
415 using ContextSampleCounterMap
=
416 std::unordered_map
<Hashable
<ContextKey
>, SampleCounter
,
417 Hashable
<ContextKey
>::Hash
, Hashable
<ContextKey
>::Equal
>;
420 SmallVector
<uint64_t, 16> Stack
;
421 ProfiledBinary
*Binary
;
422 FrameStack(ProfiledBinary
*B
) : Binary(B
) {}
423 bool pushFrame(UnwindState::ProfiledFrame
*Cur
) {
424 assert(!Cur
->isExternalFrame() &&
425 "External frame's not expected for context stack.");
426 Stack
.push_back(Cur
->Address
);
434 std::shared_ptr
<StringBasedCtxKey
> getContextKey();
437 struct AddressStack
{
438 SmallVector
<uint64_t, 16> Stack
;
439 ProfiledBinary
*Binary
;
440 AddressStack(ProfiledBinary
*B
) : Binary(B
) {}
441 bool pushFrame(UnwindState::ProfiledFrame
*Cur
) {
442 assert(!Cur
->isExternalFrame() &&
443 "External frame's not expected for context stack.");
444 Stack
.push_back(Cur
->Address
);
452 std::shared_ptr
<AddrBasedCtxKey
> getContextKey();
456 As in hybrid sample we have a group of LBRs and the most recent sampling call
457 stack, we can walk through those LBRs to infer more call stacks which would be
458 used as context for profile. VirtualUnwinder is the class to do the call stack
459 unwinding based on LBR state. Two types of unwinding are processd here:
460 1) LBR unwinding and 2) linear range unwinding.
461 Specifically, for each LBR entry(can be classified into call, return, regular
462 branch), LBR unwinding will replay the operation by pushing, popping or
463 switching leaf frame towards the call stack and since the initial call stack
464 is most recently sampled, the replay should be in anti-execution order, i.e. for
465 the regular case, pop the call stack when LBR is call, push frame on call stack
466 when LBR is return. After each LBR processed, it also needs to align with the
467 next LBR by going through instructions from previous LBR's target to current
468 LBR's source, which is the linear unwinding. As instruction from linear range
469 can come from different function by inlining, linear unwinding will do the range
470 splitting and record counters by the range with same inline context. Over those
471 unwinding process we will record each call stack as context id and LBR/linear
472 range as sample counter for further CS profile generation.
474 class VirtualUnwinder
{
476 VirtualUnwinder(ContextSampleCounterMap
*Counter
, ProfiledBinary
*B
)
477 : CtxCounterMap(Counter
), Binary(B
) {}
478 bool unwind(const PerfSample
*Sample
, uint64_t Repeat
);
479 std::set
<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites
; }
481 uint64_t NumTotalBranches
= 0;
482 uint64_t NumExtCallBranch
= 0;
483 uint64_t NumMissingExternalFrame
= 0;
484 uint64_t NumMismatchedProEpiBranch
= 0;
485 uint64_t NumMismatchedExtCallBranch
= 0;
486 uint64_t NumUnpairedExtAddr
= 0;
487 uint64_t NumPairedExtAddr
= 0;
490 bool isSourceExternal(UnwindState
&State
) const {
491 return State
.getCurrentLBRSource() == ExternalAddr
;
494 bool isTargetExternal(UnwindState
&State
) const {
495 return State
.getCurrentLBRTarget() == ExternalAddr
;
498 // Determine whether the return source is from external code by checking if
499 // the target's the next inst is a call inst.
500 bool isReturnFromExternal(UnwindState
&State
) const {
501 return isSourceExternal(State
) &&
502 (Binary
->getCallAddrFromFrameAddr(State
.getCurrentLBRTarget()) != 0);
505 // If the source is external address but it's not the `return` case, treat it
506 // as a call from external.
507 bool isCallFromExternal(UnwindState
&State
) const {
508 return isSourceExternal(State
) &&
509 Binary
->getCallAddrFromFrameAddr(State
.getCurrentLBRTarget()) == 0;
512 bool isCallState(UnwindState
&State
) const {
513 // The tail call frame is always missing here in stack sample, we will
514 // use a specific tail call tracker to infer it.
515 if (!isValidState(State
))
518 if (Binary
->addressIsCall(State
.getCurrentLBRSource()))
521 return isCallFromExternal(State
);
524 bool isReturnState(UnwindState
&State
) const {
525 if (!isValidState(State
))
528 // Simply check addressIsReturn, as ret is always reliable, both for
529 // regular call and tail call.
530 if (Binary
->addressIsReturn(State
.getCurrentLBRSource()))
533 return isReturnFromExternal(State
);
536 bool isValidState(UnwindState
&State
) const { return !State
.Invalid
; }
538 void unwindCall(UnwindState
&State
);
539 void unwindLinear(UnwindState
&State
, uint64_t Repeat
);
540 void unwindReturn(UnwindState
&State
);
541 void unwindBranch(UnwindState
&State
);
543 template <typename T
>
544 void collectSamplesFromFrame(UnwindState::ProfiledFrame
*Cur
, T
&Stack
);
545 // Collect each samples on trie node by DFS traversal
546 template <typename T
>
547 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame
*Cur
, T
&Stack
);
548 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame
*Cur
);
550 void recordRangeCount(uint64_t Start
, uint64_t End
, UnwindState
&State
,
552 void recordBranchCount(const LBREntry
&Branch
, UnwindState
&State
,
555 ContextSampleCounterMap
*CtxCounterMap
;
556 // Profiled binary that current frame address belongs to
557 ProfiledBinary
*Binary
;
558 // Keep track of all untracked callsites
559 std::set
<uint64_t> UntrackedCallsites
;
562 // Read perf trace to parse the events and samples.
563 class PerfReaderBase
{
565 PerfReaderBase(ProfiledBinary
*B
, StringRef PerfTrace
)
566 : Binary(B
), PerfTraceFile(PerfTrace
) {
567 // Initialize the base address to preferred address.
568 Binary
->setBaseAddress(Binary
->getPreferredBaseAddress());
570 virtual ~PerfReaderBase() = default;
571 static std::unique_ptr
<PerfReaderBase
>
572 create(ProfiledBinary
*Binary
, PerfInputFile
&PerfInput
,
573 std::optional
<int32_t> PIDFilter
);
575 // Entry of the reader to parse multiple perf traces
576 virtual void parsePerfTraces() = 0;
577 const ContextSampleCounterMap
&getSampleCounters() const {
578 return SampleCounters
;
580 bool profileIsCS() { return ProfileIsCS
; }
583 ProfiledBinary
*Binary
= nullptr;
584 StringRef PerfTraceFile
;
586 ContextSampleCounterMap SampleCounters
;
587 bool ProfileIsCS
= false;
589 uint64_t NumTotalSample
= 0;
590 uint64_t NumLeafExternalFrame
= 0;
591 uint64_t NumLeadingOutgoingLBR
= 0;
594 // Read perf script to parse the events and samples.
595 class PerfScriptReader
: public PerfReaderBase
{
597 PerfScriptReader(ProfiledBinary
*B
, StringRef PerfTrace
,
598 std::optional
<int32_t> PID
)
599 : PerfReaderBase(B
, PerfTrace
), PIDFilter(PID
) {};
601 // Entry of the reader to parse multiple perf traces
602 void parsePerfTraces() override
;
603 // Generate perf script from perf data
604 static PerfInputFile
convertPerfDataToTrace(ProfiledBinary
*Binary
,
605 bool SkipPID
, PerfInputFile
&File
,
606 std::optional
<int32_t> PIDFilter
);
607 // Extract perf script type by peaking at the input
608 static PerfContent
checkPerfScriptType(StringRef FileName
);
610 // Cleanup installers for temporary files created by perf script command.
611 // Those files will be automatically removed when running destructor or
612 // receiving signals.
613 static SmallVector
<CleanupInstaller
, 2> TempFileCleanups
;
616 // The parsed MMap event
619 uint64_t Address
= 0;
622 StringRef BinaryPath
;
625 // Check whether a given line is LBR sample
626 static bool isLBRSample(StringRef Line
);
627 // Check whether a given line is MMAP event
628 static bool isMMapEvent(StringRef Line
);
629 // Parse a single line of a PERF_RECORD_MMAP event looking for a
630 // mapping between the binary name and its memory layout.
631 static bool extractMMapEventForBinary(ProfiledBinary
*Binary
, StringRef Line
,
633 // Update base address based on mmap events
634 void updateBinaryAddress(const MMapEvent
&Event
);
635 // Parse mmap event and update binary address
636 void parseMMapEvent(TraceStream
&TraceIt
);
637 // Parse perf events/samples and do aggregation
638 void parseAndAggregateTrace();
639 // Parse either an MMAP event or a perf sample
640 void parseEventOrSample(TraceStream
&TraceIt
);
641 // Warn if the relevant mmap event is missing.
642 void warnIfMissingMMap();
643 // Emit accumulate warnings.
644 void warnTruncatedStack();
645 // Warn if range is invalid.
646 void warnInvalidRange();
647 // Extract call stack from the perf trace lines
648 bool extractCallstack(TraceStream
&TraceIt
,
649 SmallVectorImpl
<uint64_t> &CallStack
);
650 // Extract LBR stack from one perf trace line
651 bool extractLBRStack(TraceStream
&TraceIt
,
652 SmallVectorImpl
<LBREntry
> &LBRStack
);
653 uint64_t parseAggregatedCount(TraceStream
&TraceIt
);
654 // Parse one sample from multiple perf lines, override this for different
656 void parseSample(TraceStream
&TraceIt
);
657 // An aggregated count is given to indicate how many times the sample is
659 virtual void parseSample(TraceStream
&TraceIt
, uint64_t Count
){};
660 void computeCounterFromLBR(const PerfSample
*Sample
, uint64_t Repeat
);
661 // Post process the profile after trace aggregation, we will do simple range
662 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
663 virtual void generateUnsymbolizedProfile();
664 void writeUnsymbolizedProfile(StringRef Filename
);
665 void writeUnsymbolizedProfile(raw_fd_ostream
&OS
);
667 // Samples with the repeating time generated by the perf reader
668 AggregatedCounter AggregatedSamples
;
669 // Keep track of all invalid return addresses
670 std::set
<uint64_t> InvalidReturnAddresses
;
671 // PID for the process of interest
672 std::optional
<int32_t> PIDFilter
;
676 The reader of LBR only perf script.
677 A typical LBR sample is like:
678 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
679 ... 0x4005c8/0x4005dc/P/-/-/0
681 class LBRPerfReader
: public PerfScriptReader
{
683 LBRPerfReader(ProfiledBinary
*Binary
, StringRef PerfTrace
,
684 std::optional
<int32_t> PID
)
685 : PerfScriptReader(Binary
, PerfTrace
, PID
) {};
686 // Parse the LBR only sample.
687 void parseSample(TraceStream
&TraceIt
, uint64_t Count
) override
;
691 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
692 which is used to generate CS profile. An example of hybrid sample:
693 4005dc # call stack leaf
695 400684 # call stack root
696 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
697 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
699 class HybridPerfReader
: public PerfScriptReader
{
701 HybridPerfReader(ProfiledBinary
*Binary
, StringRef PerfTrace
,
702 std::optional
<int32_t> PID
)
703 : PerfScriptReader(Binary
, PerfTrace
, PID
) {};
704 // Parse the hybrid sample including the call and LBR line
705 void parseSample(TraceStream
&TraceIt
, uint64_t Count
) override
;
706 void generateUnsymbolizedProfile() override
;
709 // Unwind the hybrid samples after aggregration
710 void unwindSamples();
714 Format of unsymbolized profile:
716 [frame1 @ frame2 @ ...] # If it's a CS profile
717 number of entries in RangeCounter
722 number of entries in BranchCounter
727 [frame1 @ frame2 @ ...] # Next context
730 Note that non-CS profile doesn't have the empty `[]` context.
732 class UnsymbolizedProfileReader
: public PerfReaderBase
{
734 UnsymbolizedProfileReader(ProfiledBinary
*Binary
, StringRef PerfTrace
)
735 : PerfReaderBase(Binary
, PerfTrace
){};
736 void parsePerfTraces() override
;
739 void readSampleCounters(TraceStream
&TraceIt
, SampleCounter
&SCounters
);
740 void readUnsymbolizedProfile(StringRef Filename
);
742 std::unordered_set
<std::string
> ContextStrSet
;
745 } // end namespace sampleprof
746 } // end namespace llvm