[SCFToGPU] Convert scf.parallel+scf.reduce to gpu.all_reduce (#122782)
[llvm-project.git] / llvm / tools / llvm-profgen / PerfReader.h
bloba3bd7a0a6493e95cf1e92d6b7c68c0f1d6aef247
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <map>
20 using namespace llvm;
21 using namespace sampleprof;
23 namespace llvm {
25 class CleanupInstaller;
27 namespace sampleprof {
29 // Stream based trace line iterator
30 class TraceStream {
31 std::string CurrentLine;
32 std::ifstream Fin;
33 bool IsAtEoF = false;
34 uint64_t LineNumber = 0;
36 public:
37 TraceStream(StringRef Filename) : Fin(Filename.str()) {
38 if (!Fin.good())
39 exitWithError("Error read input perf script file", Filename);
40 advance();
43 StringRef getCurrentLine() {
44 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45 return CurrentLine;
48 uint64_t getLineNumber() { return LineNumber; }
50 bool isAtEoF() { return IsAtEoF; }
52 // Read the next line
53 void advance() {
54 if (!std::getline(Fin, CurrentLine)) {
55 IsAtEoF = true;
56 return;
58 LineNumber++;
62 // The type of input format.
63 enum PerfFormat {
64 UnknownFormat = 0,
65 PerfData = 1, // Raw linux perf.data.
66 PerfScript = 2, // Perf script create by `perf script` command.
67 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
71 // The type of perfscript content.
72 enum PerfContent {
73 UnknownContent = 0,
74 LBR = 1, // Only LBR sample.
75 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
78 struct PerfInputFile {
79 std::string InputFile;
80 PerfFormat Format = PerfFormat::UnknownFormat;
81 PerfContent Content = PerfContent::UnknownContent;
84 // The parsed LBR sample entry.
85 struct LBREntry {
86 uint64_t Source = 0;
87 uint64_t Target = 0;
88 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
90 #ifndef NDEBUG
91 void print() const {
92 dbgs() << "from " << format("%#010x", Source) << " to "
93 << format("%#010x", Target);
95 #endif
98 #ifndef NDEBUG
99 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
100 for (size_t I = 0; I < LBRStack.size(); I++) {
101 dbgs() << "[" << I << "] ";
102 LBRStack[I].print();
103 dbgs() << "\n";
107 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
108 for (size_t I = 0; I < CallStack.size(); I++) {
109 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
112 #endif
114 // Hash interface for generic data of type T
115 // Data should implement a \fn getHashCode and a \fn isEqual
116 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117 // i.e we explicitly calculate hash of derived class, assign to base class's
118 // HashCode. This also provides the flexibility for calculating the hash code
119 // incrementally(like rolling hash) during frame stack unwinding since unwinding
120 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
121 // which will have perf overhead. In the future, if we redesign a better hash
122 // function, then we can just skip this or switch to non-virtual function(like
123 // just ignore comparison if hash conflicts probabilities is low)
124 template <class T> class Hashable {
125 public:
126 std::shared_ptr<T> Data;
127 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
129 // Hash code generation
130 struct Hash {
131 uint64_t operator()(const Hashable<T> &Key) const {
132 // Don't make it virtual for getHashCode
133 uint64_t Hash = Key.Data->getHashCode();
134 assert(Hash && "Should generate HashCode for it!");
135 return Hash;
139 // Hash equal
140 struct Equal {
141 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
142 // Precisely compare the data, vtable will have overhead.
143 return LHS.Data->isEqual(RHS.Data.get());
147 T *getPtr() const { return Data.get(); }
150 struct PerfSample {
151 // LBR stack recorded in FIFO order.
152 SmallVector<LBREntry, 16> LBRStack;
153 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
154 // generation
155 SmallVector<uint64_t, 16> CallStack;
157 virtual ~PerfSample() = default;
158 uint64_t getHashCode() const {
159 // Use simple DJB2 hash
160 auto HashCombine = [](uint64_t H, uint64_t V) {
161 return ((H << 5) + H) + V;
163 uint64_t Hash = 5381;
164 for (const auto &Value : CallStack) {
165 Hash = HashCombine(Hash, Value);
167 for (const auto &Entry : LBRStack) {
168 Hash = HashCombine(Hash, Entry.Source);
169 Hash = HashCombine(Hash, Entry.Target);
171 return Hash;
174 bool isEqual(const PerfSample *Other) const {
175 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
176 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
178 if (CallStack.size() != OtherCallStack.size() ||
179 LBRStack.size() != OtherLBRStack.size())
180 return false;
182 if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
183 return false;
185 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
186 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
187 LBRStack[I].Target != OtherLBRStack[I].Target)
188 return false;
190 return true;
193 #ifndef NDEBUG
194 uint64_t Linenum = 0;
196 void print() const {
197 dbgs() << "Line " << Linenum << "\n";
198 dbgs() << "LBR stack\n";
199 printLBRStack(LBRStack);
200 dbgs() << "Call stack\n";
201 printCallStack(CallStack);
203 #endif
205 // After parsing the sample, we record the samples by aggregating them
206 // into this counter. The key stores the sample data and the value is
207 // the sample repeat times.
208 using AggregatedCounter =
209 std::unordered_map<Hashable<PerfSample>, uint64_t,
210 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
212 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
214 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
215 ProfiledBinary *Binary) {
216 // Start bigger than End is considered invalid.
217 // LBR ranges cross the unconditional jmp are also assumed invalid.
218 // It's found that perf data may contain duplicate LBR entries that could form
219 // a range that does not reflect real execution flow on some Intel targets,
220 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
221 // cannot be a linear execution range that spans over unconditional jmp.
222 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
225 // The state for the unwinder, it doesn't hold the data but only keep the
226 // pointer/index of the data, While unwinding, the CallStack is changed
227 // dynamicially and will be recorded as the context of the sample
228 struct UnwindState {
229 // Profiled binary that current frame address belongs to
230 const ProfiledBinary *Binary;
231 // Call stack trie node
232 struct ProfiledFrame {
233 const uint64_t Address = DummyRoot;
234 ProfiledFrame *Parent;
235 SampleVector RangeSamples;
236 SampleVector BranchSamples;
237 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
239 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
240 : Address(Addr), Parent(P) {}
241 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
242 assert(Address && "Address can't be zero!");
243 auto Ret = Children.emplace(
244 Address, std::make_unique<ProfiledFrame>(Address, this));
245 return Ret.first->second.get();
247 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
248 RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
250 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
251 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
253 bool isDummyRoot() { return Address == DummyRoot; }
254 bool isExternalFrame() { return Address == ExternalAddr; }
255 bool isLeafFrame() { return Children.empty(); }
258 ProfiledFrame DummyTrieRoot;
259 ProfiledFrame *CurrentLeafFrame;
260 // Used to fall through the LBR stack
261 uint32_t LBRIndex = 0;
262 // Reference to PerfSample.LBRStack
263 const SmallVector<LBREntry, 16> &LBRStack;
264 // Used to iterate the address range
265 InstructionPointer InstPtr;
266 // Indicate whether unwinding is currently in a bad state which requires to
267 // skip all subsequent unwinding.
268 bool Invalid = false;
269 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270 : Binary(Binary), LBRStack(Sample->LBRStack),
271 InstPtr(Binary, Sample->CallStack.front()) {
272 initFrameTrie(Sample->CallStack);
275 bool validateInitialState() {
276 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
277 uint64_t LeafAddr = CurrentLeafFrame->Address;
278 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
279 "External leading LBR should match the leaf frame.");
281 // When we take a stack sample, ideally the sampling distance between the
282 // leaf IP of stack and the last LBR target shouldn't be very large.
283 // Use a heuristic size (0x100) to filter out broken records.
284 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
285 WithColor::warning() << "Bogus trace: stack tip = "
286 << format("%#010x", LeafAddr)
287 << ", LBR tip = " << format("%#010x\n", LBRLeaf);
288 return false;
290 return true;
293 void checkStateConsistency() {
294 assert(InstPtr.Address == CurrentLeafFrame->Address &&
295 "IP should align with context leaf");
298 void setInvalid() { Invalid = true; }
299 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
300 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
301 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
302 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
303 bool IsLastLBR() const { return LBRIndex == 0; }
304 bool getLBRStackSize() const { return LBRStack.size(); }
305 void advanceLBR() { LBRIndex++; }
306 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
308 void pushFrame(uint64_t Address) {
309 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
312 void switchToFrame(uint64_t Address) {
313 if (CurrentLeafFrame->Address == Address)
314 return;
315 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
318 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
320 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
322 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
323 ProfiledFrame *Cur = &DummyTrieRoot;
324 for (auto Address : reverse(CallStack)) {
325 Cur = Cur->getOrCreateChildFrame(Address);
327 CurrentLeafFrame = Cur;
330 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
333 // Base class for sample counter key with context
334 struct ContextKey {
335 uint64_t HashCode = 0;
336 virtual ~ContextKey() = default;
337 uint64_t getHashCode() {
338 if (HashCode == 0)
339 genHashCode();
340 return HashCode;
342 virtual void genHashCode() = 0;
343 virtual bool isEqual(const ContextKey *K) const {
344 return HashCode == K->HashCode;
347 // Utilities for LLVM-style RTTI
348 enum ContextKind { CK_StringBased, CK_AddrBased };
349 const ContextKind Kind;
350 ContextKind getKind() const { return Kind; }
351 ContextKey(ContextKind K) : Kind(K){};
354 // String based context id
355 struct StringBasedCtxKey : public ContextKey {
356 SampleContextFrameVector Context;
358 bool WasLeafInlined;
359 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
360 static bool classof(const ContextKey *K) {
361 return K->getKind() == CK_StringBased;
364 bool isEqual(const ContextKey *K) const override {
365 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
366 return Context == Other->Context;
369 void genHashCode() override {
370 HashCode = hash_value(SampleContextFrames(Context));
374 // Address-based context id
375 struct AddrBasedCtxKey : public ContextKey {
376 SmallVector<uint64_t, 16> Context;
378 bool WasLeafInlined;
379 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
380 static bool classof(const ContextKey *K) {
381 return K->getKind() == CK_AddrBased;
384 bool isEqual(const ContextKey *K) const override {
385 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
386 return Context == Other->Context;
389 void genHashCode() override {
390 HashCode = hash_combine_range(Context.begin(), Context.end());
394 // The counter of branch samples for one function indexed by the branch,
395 // which is represented as the source and target offset pair.
396 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
397 // The counter of range samples for one function indexed by the range,
398 // which is represented as the start and end offset pair.
399 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
400 // Wrapper for sample counters including range counter and branch counter
401 struct SampleCounter {
402 RangeSample RangeCounter;
403 BranchSample BranchCounter;
405 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
406 assert(Start <= End && "Invalid instruction range");
407 RangeCounter[{Start, End}] += Repeat;
409 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
410 BranchCounter[{Source, Target}] += Repeat;
414 // Sample counter with context to support context-sensitive profile
415 using ContextSampleCounterMap =
416 std::unordered_map<Hashable<ContextKey>, SampleCounter,
417 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
419 struct FrameStack {
420 SmallVector<uint64_t, 16> Stack;
421 ProfiledBinary *Binary;
422 FrameStack(ProfiledBinary *B) : Binary(B) {}
423 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
424 assert(!Cur->isExternalFrame() &&
425 "External frame's not expected for context stack.");
426 Stack.push_back(Cur->Address);
427 return true;
430 void popFrame() {
431 if (!Stack.empty())
432 Stack.pop_back();
434 std::shared_ptr<StringBasedCtxKey> getContextKey();
437 struct AddressStack {
438 SmallVector<uint64_t, 16> Stack;
439 ProfiledBinary *Binary;
440 AddressStack(ProfiledBinary *B) : Binary(B) {}
441 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
442 assert(!Cur->isExternalFrame() &&
443 "External frame's not expected for context stack.");
444 Stack.push_back(Cur->Address);
445 return true;
448 void popFrame() {
449 if (!Stack.empty())
450 Stack.pop_back();
452 std::shared_ptr<AddrBasedCtxKey> getContextKey();
456 As in hybrid sample we have a group of LBRs and the most recent sampling call
457 stack, we can walk through those LBRs to infer more call stacks which would be
458 used as context for profile. VirtualUnwinder is the class to do the call stack
459 unwinding based on LBR state. Two types of unwinding are processd here:
460 1) LBR unwinding and 2) linear range unwinding.
461 Specifically, for each LBR entry(can be classified into call, return, regular
462 branch), LBR unwinding will replay the operation by pushing, popping or
463 switching leaf frame towards the call stack and since the initial call stack
464 is most recently sampled, the replay should be in anti-execution order, i.e. for
465 the regular case, pop the call stack when LBR is call, push frame on call stack
466 when LBR is return. After each LBR processed, it also needs to align with the
467 next LBR by going through instructions from previous LBR's target to current
468 LBR's source, which is the linear unwinding. As instruction from linear range
469 can come from different function by inlining, linear unwinding will do the range
470 splitting and record counters by the range with same inline context. Over those
471 unwinding process we will record each call stack as context id and LBR/linear
472 range as sample counter for further CS profile generation.
474 class VirtualUnwinder {
475 public:
476 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
477 : CtxCounterMap(Counter), Binary(B) {}
478 bool unwind(const PerfSample *Sample, uint64_t Repeat);
479 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
481 uint64_t NumTotalBranches = 0;
482 uint64_t NumExtCallBranch = 0;
483 uint64_t NumMissingExternalFrame = 0;
484 uint64_t NumMismatchedProEpiBranch = 0;
485 uint64_t NumMismatchedExtCallBranch = 0;
486 uint64_t NumUnpairedExtAddr = 0;
487 uint64_t NumPairedExtAddr = 0;
489 private:
490 bool isSourceExternal(UnwindState &State) const {
491 return State.getCurrentLBRSource() == ExternalAddr;
494 bool isTargetExternal(UnwindState &State) const {
495 return State.getCurrentLBRTarget() == ExternalAddr;
498 // Determine whether the return source is from external code by checking if
499 // the target's the next inst is a call inst.
500 bool isReturnFromExternal(UnwindState &State) const {
501 return isSourceExternal(State) &&
502 (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
505 // If the source is external address but it's not the `return` case, treat it
506 // as a call from external.
507 bool isCallFromExternal(UnwindState &State) const {
508 return isSourceExternal(State) &&
509 Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
512 bool isCallState(UnwindState &State) const {
513 // The tail call frame is always missing here in stack sample, we will
514 // use a specific tail call tracker to infer it.
515 if (!isValidState(State))
516 return false;
518 if (Binary->addressIsCall(State.getCurrentLBRSource()))
519 return true;
521 return isCallFromExternal(State);
524 bool isReturnState(UnwindState &State) const {
525 if (!isValidState(State))
526 return false;
528 // Simply check addressIsReturn, as ret is always reliable, both for
529 // regular call and tail call.
530 if (Binary->addressIsReturn(State.getCurrentLBRSource()))
531 return true;
533 return isReturnFromExternal(State);
536 bool isValidState(UnwindState &State) const { return !State.Invalid; }
538 void unwindCall(UnwindState &State);
539 void unwindLinear(UnwindState &State, uint64_t Repeat);
540 void unwindReturn(UnwindState &State);
541 void unwindBranch(UnwindState &State);
543 template <typename T>
544 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
545 // Collect each samples on trie node by DFS traversal
546 template <typename T>
547 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
548 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
550 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
551 uint64_t Repeat);
552 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
553 uint64_t Repeat);
555 ContextSampleCounterMap *CtxCounterMap;
556 // Profiled binary that current frame address belongs to
557 ProfiledBinary *Binary;
558 // Keep track of all untracked callsites
559 std::set<uint64_t> UntrackedCallsites;
562 // Read perf trace to parse the events and samples.
563 class PerfReaderBase {
564 public:
565 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
566 : Binary(B), PerfTraceFile(PerfTrace) {
567 // Initialize the base address to preferred address.
568 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
570 virtual ~PerfReaderBase() = default;
571 static std::unique_ptr<PerfReaderBase>
572 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
573 std::optional<int32_t> PIDFilter);
575 // Entry of the reader to parse multiple perf traces
576 virtual void parsePerfTraces() = 0;
577 const ContextSampleCounterMap &getSampleCounters() const {
578 return SampleCounters;
580 bool profileIsCS() { return ProfileIsCS; }
582 protected:
583 ProfiledBinary *Binary = nullptr;
584 StringRef PerfTraceFile;
586 ContextSampleCounterMap SampleCounters;
587 bool ProfileIsCS = false;
589 uint64_t NumTotalSample = 0;
590 uint64_t NumLeafExternalFrame = 0;
591 uint64_t NumLeadingOutgoingLBR = 0;
594 // Read perf script to parse the events and samples.
595 class PerfScriptReader : public PerfReaderBase {
596 public:
597 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
598 std::optional<int32_t> PID)
599 : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
601 // Entry of the reader to parse multiple perf traces
602 void parsePerfTraces() override;
603 // Generate perf script from perf data
604 static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
605 bool SkipPID, PerfInputFile &File,
606 std::optional<int32_t> PIDFilter);
607 // Extract perf script type by peaking at the input
608 static PerfContent checkPerfScriptType(StringRef FileName);
610 // Cleanup installers for temporary files created by perf script command.
611 // Those files will be automatically removed when running destructor or
612 // receiving signals.
613 static SmallVector<CleanupInstaller, 2> TempFileCleanups;
615 protected:
616 // The parsed MMap event
617 struct MMapEvent {
618 int64_t PID = 0;
619 uint64_t Address = 0;
620 uint64_t Size = 0;
621 uint64_t Offset = 0;
622 StringRef BinaryPath;
625 // Check whether a given line is LBR sample
626 static bool isLBRSample(StringRef Line);
627 // Check whether a given line is MMAP event
628 static bool isMMapEvent(StringRef Line);
629 // Parse a single line of a PERF_RECORD_MMAP event looking for a
630 // mapping between the binary name and its memory layout.
631 static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
632 MMapEvent &MMap);
633 // Update base address based on mmap events
634 void updateBinaryAddress(const MMapEvent &Event);
635 // Parse mmap event and update binary address
636 void parseMMapEvent(TraceStream &TraceIt);
637 // Parse perf events/samples and do aggregation
638 void parseAndAggregateTrace();
639 // Parse either an MMAP event or a perf sample
640 void parseEventOrSample(TraceStream &TraceIt);
641 // Warn if the relevant mmap event is missing.
642 void warnIfMissingMMap();
643 // Emit accumulate warnings.
644 void warnTruncatedStack();
645 // Warn if range is invalid.
646 void warnInvalidRange();
647 // Extract call stack from the perf trace lines
648 bool extractCallstack(TraceStream &TraceIt,
649 SmallVectorImpl<uint64_t> &CallStack);
650 // Extract LBR stack from one perf trace line
651 bool extractLBRStack(TraceStream &TraceIt,
652 SmallVectorImpl<LBREntry> &LBRStack);
653 uint64_t parseAggregatedCount(TraceStream &TraceIt);
654 // Parse one sample from multiple perf lines, override this for different
655 // sample type
656 void parseSample(TraceStream &TraceIt);
657 // An aggregated count is given to indicate how many times the sample is
658 // repeated.
659 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
660 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
661 // Post process the profile after trace aggregation, we will do simple range
662 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
663 virtual void generateUnsymbolizedProfile();
664 void writeUnsymbolizedProfile(StringRef Filename);
665 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
667 // Samples with the repeating time generated by the perf reader
668 AggregatedCounter AggregatedSamples;
669 // Keep track of all invalid return addresses
670 std::set<uint64_t> InvalidReturnAddresses;
671 // PID for the process of interest
672 std::optional<int32_t> PIDFilter;
676 The reader of LBR only perf script.
677 A typical LBR sample is like:
678 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
679 ... 0x4005c8/0x4005dc/P/-/-/0
681 class LBRPerfReader : public PerfScriptReader {
682 public:
683 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
684 std::optional<int32_t> PID)
685 : PerfScriptReader(Binary, PerfTrace, PID) {};
686 // Parse the LBR only sample.
687 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
691 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
692 which is used to generate CS profile. An example of hybrid sample:
693 4005dc # call stack leaf
694 400634
695 400684 # call stack root
696 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
697 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
699 class HybridPerfReader : public PerfScriptReader {
700 public:
701 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
702 std::optional<int32_t> PID)
703 : PerfScriptReader(Binary, PerfTrace, PID) {};
704 // Parse the hybrid sample including the call and LBR line
705 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
706 void generateUnsymbolizedProfile() override;
708 private:
709 // Unwind the hybrid samples after aggregration
710 void unwindSamples();
714 Format of unsymbolized profile:
716 [frame1 @ frame2 @ ...] # If it's a CS profile
717 number of entries in RangeCounter
718 from_1-to_1:count_1
719 from_2-to_2:count_2
720 ......
721 from_n-to_n:count_n
722 number of entries in BranchCounter
723 src_1->dst_1:count_1
724 src_2->dst_2:count_2
725 ......
726 src_n->dst_n:count_n
727 [frame1 @ frame2 @ ...] # Next context
728 ......
730 Note that non-CS profile doesn't have the empty `[]` context.
732 class UnsymbolizedProfileReader : public PerfReaderBase {
733 public:
734 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
735 : PerfReaderBase(Binary, PerfTrace){};
736 void parsePerfTraces() override;
738 private:
739 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
740 void readUnsymbolizedProfile(StringRef Filename);
742 std::unordered_set<std::string> ContextStrSet;
745 } // end namespace sampleprof
746 } // end namespace llvm
748 #endif