[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / tools / llvm-profgen / PerfReader.h
blobe9f619350bf970861f99190a8f0bdb50fdae77d1
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <map>
20 using namespace llvm;
21 using namespace sampleprof;
23 namespace llvm {
24 namespace sampleprof {
26 // Stream based trace line iterator
27 class TraceStream {
28 std::string CurrentLine;
29 std::ifstream Fin;
30 bool IsAtEoF = false;
31 uint64_t LineNumber = 0;
33 public:
34 TraceStream(StringRef Filename) : Fin(Filename.str()) {
35 if (!Fin.good())
36 exitWithError("Error read input perf script file", Filename);
37 advance();
40 StringRef getCurrentLine() {
41 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
42 return CurrentLine;
45 uint64_t getLineNumber() { return LineNumber; }
47 bool isAtEoF() { return IsAtEoF; }
49 // Read the next line
50 void advance() {
51 if (!std::getline(Fin, CurrentLine)) {
52 IsAtEoF = true;
53 return;
55 LineNumber++;
59 // The type of input format.
60 enum PerfFormat {
61 UnknownFormat = 0,
62 PerfData = 1, // Raw linux perf.data.
63 PerfScript = 2, // Perf script create by `perf script` command.
64 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68 // The type of perfscript content.
69 enum PerfContent {
70 UnknownContent = 0,
71 LBR = 1, // Only LBR sample.
72 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
75 struct PerfInputFile {
76 std::string InputFile;
77 PerfFormat Format = PerfFormat::UnknownFormat;
78 PerfContent Content = PerfContent::UnknownContent;
81 // The parsed LBR sample entry.
82 struct LBREntry {
83 uint64_t Source = 0;
84 uint64_t Target = 0;
85 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
87 #ifndef NDEBUG
88 void print() const {
89 dbgs() << "from " << format("%#010x", Source) << " to "
90 << format("%#010x", Target);
92 #endif
95 #ifndef NDEBUG
96 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
97 for (size_t I = 0; I < LBRStack.size(); I++) {
98 dbgs() << "[" << I << "] ";
99 LBRStack[I].print();
100 dbgs() << "\n";
104 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
105 for (size_t I = 0; I < CallStack.size(); I++) {
106 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
109 #endif
111 // Hash interface for generic data of type T
112 // Data should implement a \fn getHashCode and a \fn isEqual
113 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
114 // i.e we explicitly calculate hash of derived class, assign to base class's
115 // HashCode. This also provides the flexibility for calculating the hash code
116 // incrementally(like rolling hash) during frame stack unwinding since unwinding
117 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
118 // which will have perf overhead. In the future, if we redesign a better hash
119 // function, then we can just skip this or switch to non-virtual function(like
120 // just ignore comparison if hash conflicts probabilities is low)
121 template <class T> class Hashable {
122 public:
123 std::shared_ptr<T> Data;
124 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
126 // Hash code generation
127 struct Hash {
128 uint64_t operator()(const Hashable<T> &Key) const {
129 // Don't make it virtual for getHashCode
130 uint64_t Hash = Key.Data->getHashCode();
131 assert(Hash && "Should generate HashCode for it!");
132 return Hash;
136 // Hash equal
137 struct Equal {
138 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
139 // Precisely compare the data, vtable will have overhead.
140 return LHS.Data->isEqual(RHS.Data.get());
144 T *getPtr() const { return Data.get(); }
147 struct PerfSample {
148 // LBR stack recorded in FIFO order.
149 SmallVector<LBREntry, 16> LBRStack;
150 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
151 // generation
152 SmallVector<uint64_t, 16> CallStack;
154 virtual ~PerfSample() = default;
155 uint64_t getHashCode() const {
156 // Use simple DJB2 hash
157 auto HashCombine = [](uint64_t H, uint64_t V) {
158 return ((H << 5) + H) + V;
160 uint64_t Hash = 5381;
161 for (const auto &Value : CallStack) {
162 Hash = HashCombine(Hash, Value);
164 for (const auto &Entry : LBRStack) {
165 Hash = HashCombine(Hash, Entry.Source);
166 Hash = HashCombine(Hash, Entry.Target);
168 return Hash;
171 bool isEqual(const PerfSample *Other) const {
172 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
173 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
175 if (CallStack.size() != OtherCallStack.size() ||
176 LBRStack.size() != OtherLBRStack.size())
177 return false;
179 if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
180 return false;
182 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
183 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
184 LBRStack[I].Target != OtherLBRStack[I].Target)
185 return false;
187 return true;
190 #ifndef NDEBUG
191 uint64_t Linenum = 0;
193 void print() const {
194 dbgs() << "Line " << Linenum << "\n";
195 dbgs() << "LBR stack\n";
196 printLBRStack(LBRStack);
197 dbgs() << "Call stack\n";
198 printCallStack(CallStack);
200 #endif
202 // After parsing the sample, we record the samples by aggregating them
203 // into this counter. The key stores the sample data and the value is
204 // the sample repeat times.
205 using AggregatedCounter =
206 std::unordered_map<Hashable<PerfSample>, uint64_t,
207 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
209 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
211 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
212 ProfiledBinary *Binary) {
213 // Start bigger than End is considered invalid.
214 // LBR ranges cross the unconditional jmp are also assumed invalid.
215 // It's found that perf data may contain duplicate LBR entries that could form
216 // a range that does not reflect real execution flow on some Intel targets,
217 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
218 // cannot be a linear execution range that spans over unconditional jmp.
219 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
222 // The state for the unwinder, it doesn't hold the data but only keep the
223 // pointer/index of the data, While unwinding, the CallStack is changed
224 // dynamicially and will be recorded as the context of the sample
225 struct UnwindState {
226 // Profiled binary that current frame address belongs to
227 const ProfiledBinary *Binary;
228 // Call stack trie node
229 struct ProfiledFrame {
230 const uint64_t Address = DummyRoot;
231 ProfiledFrame *Parent;
232 SampleVector RangeSamples;
233 SampleVector BranchSamples;
234 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
236 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
237 : Address(Addr), Parent(P) {}
238 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
239 assert(Address && "Address can't be zero!");
240 auto Ret = Children.emplace(
241 Address, std::make_unique<ProfiledFrame>(Address, this));
242 return Ret.first->second.get();
244 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
245 RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
247 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
248 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
250 bool isDummyRoot() { return Address == DummyRoot; }
251 bool isExternalFrame() { return Address == ExternalAddr; }
252 bool isLeafFrame() { return Children.empty(); }
255 ProfiledFrame DummyTrieRoot;
256 ProfiledFrame *CurrentLeafFrame;
257 // Used to fall through the LBR stack
258 uint32_t LBRIndex = 0;
259 // Reference to PerfSample.LBRStack
260 const SmallVector<LBREntry, 16> &LBRStack;
261 // Used to iterate the address range
262 InstructionPointer InstPtr;
263 // Indicate whether unwinding is currently in a bad state which requires to
264 // skip all subsequent unwinding.
265 bool Invalid = false;
266 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
267 : Binary(Binary), LBRStack(Sample->LBRStack),
268 InstPtr(Binary, Sample->CallStack.front()) {
269 initFrameTrie(Sample->CallStack);
272 bool validateInitialState() {
273 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
274 uint64_t LeafAddr = CurrentLeafFrame->Address;
275 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
276 "External leading LBR should match the leaf frame.");
278 // When we take a stack sample, ideally the sampling distance between the
279 // leaf IP of stack and the last LBR target shouldn't be very large.
280 // Use a heuristic size (0x100) to filter out broken records.
281 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
282 WithColor::warning() << "Bogus trace: stack tip = "
283 << format("%#010x", LeafAddr)
284 << ", LBR tip = " << format("%#010x\n", LBRLeaf);
285 return false;
287 return true;
290 void checkStateConsistency() {
291 assert(InstPtr.Address == CurrentLeafFrame->Address &&
292 "IP should align with context leaf");
295 void setInvalid() { Invalid = true; }
296 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
297 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
298 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
299 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
300 bool IsLastLBR() const { return LBRIndex == 0; }
301 bool getLBRStackSize() const { return LBRStack.size(); }
302 void advanceLBR() { LBRIndex++; }
303 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
305 void pushFrame(uint64_t Address) {
306 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
309 void switchToFrame(uint64_t Address) {
310 if (CurrentLeafFrame->Address == Address)
311 return;
312 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
315 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
317 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
319 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
320 ProfiledFrame *Cur = &DummyTrieRoot;
321 for (auto Address : reverse(CallStack)) {
322 Cur = Cur->getOrCreateChildFrame(Address);
324 CurrentLeafFrame = Cur;
327 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
330 // Base class for sample counter key with context
331 struct ContextKey {
332 uint64_t HashCode = 0;
333 virtual ~ContextKey() = default;
334 uint64_t getHashCode() {
335 if (HashCode == 0)
336 genHashCode();
337 return HashCode;
339 virtual void genHashCode() = 0;
340 virtual bool isEqual(const ContextKey *K) const {
341 return HashCode == K->HashCode;
344 // Utilities for LLVM-style RTTI
345 enum ContextKind { CK_StringBased, CK_AddrBased };
346 const ContextKind Kind;
347 ContextKind getKind() const { return Kind; }
348 ContextKey(ContextKind K) : Kind(K){};
351 // String based context id
352 struct StringBasedCtxKey : public ContextKey {
353 SampleContextFrameVector Context;
355 bool WasLeafInlined;
356 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
357 static bool classof(const ContextKey *K) {
358 return K->getKind() == CK_StringBased;
361 bool isEqual(const ContextKey *K) const override {
362 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
363 return Context == Other->Context;
366 void genHashCode() override {
367 HashCode = hash_value(SampleContextFrames(Context));
371 // Address-based context id
372 struct AddrBasedCtxKey : public ContextKey {
373 SmallVector<uint64_t, 16> Context;
375 bool WasLeafInlined;
376 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
377 static bool classof(const ContextKey *K) {
378 return K->getKind() == CK_AddrBased;
381 bool isEqual(const ContextKey *K) const override {
382 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
383 return Context == Other->Context;
386 void genHashCode() override {
387 HashCode = hash_combine_range(Context.begin(), Context.end());
391 // The counter of branch samples for one function indexed by the branch,
392 // which is represented as the source and target offset pair.
393 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
394 // The counter of range samples for one function indexed by the range,
395 // which is represented as the start and end offset pair.
396 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
397 // Wrapper for sample counters including range counter and branch counter
398 struct SampleCounter {
399 RangeSample RangeCounter;
400 BranchSample BranchCounter;
402 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
403 assert(Start <= End && "Invalid instruction range");
404 RangeCounter[{Start, End}] += Repeat;
406 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
407 BranchCounter[{Source, Target}] += Repeat;
411 // Sample counter with context to support context-sensitive profile
412 using ContextSampleCounterMap =
413 std::unordered_map<Hashable<ContextKey>, SampleCounter,
414 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
416 struct FrameStack {
417 SmallVector<uint64_t, 16> Stack;
418 ProfiledBinary *Binary;
419 FrameStack(ProfiledBinary *B) : Binary(B) {}
420 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
421 assert(!Cur->isExternalFrame() &&
422 "External frame's not expected for context stack.");
423 Stack.push_back(Cur->Address);
424 return true;
427 void popFrame() {
428 if (!Stack.empty())
429 Stack.pop_back();
431 std::shared_ptr<StringBasedCtxKey> getContextKey();
434 struct AddressStack {
435 SmallVector<uint64_t, 16> Stack;
436 ProfiledBinary *Binary;
437 AddressStack(ProfiledBinary *B) : Binary(B) {}
438 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
439 assert(!Cur->isExternalFrame() &&
440 "External frame's not expected for context stack.");
441 Stack.push_back(Cur->Address);
442 return true;
445 void popFrame() {
446 if (!Stack.empty())
447 Stack.pop_back();
449 std::shared_ptr<AddrBasedCtxKey> getContextKey();
453 As in hybrid sample we have a group of LBRs and the most recent sampling call
454 stack, we can walk through those LBRs to infer more call stacks which would be
455 used as context for profile. VirtualUnwinder is the class to do the call stack
456 unwinding based on LBR state. Two types of unwinding are processd here:
457 1) LBR unwinding and 2) linear range unwinding.
458 Specifically, for each LBR entry(can be classified into call, return, regular
459 branch), LBR unwinding will replay the operation by pushing, popping or
460 switching leaf frame towards the call stack and since the initial call stack
461 is most recently sampled, the replay should be in anti-execution order, i.e. for
462 the regular case, pop the call stack when LBR is call, push frame on call stack
463 when LBR is return. After each LBR processed, it also needs to align with the
464 next LBR by going through instructions from previous LBR's target to current
465 LBR's source, which is the linear unwinding. As instruction from linear range
466 can come from different function by inlining, linear unwinding will do the range
467 splitting and record counters by the range with same inline context. Over those
468 unwinding process we will record each call stack as context id and LBR/linear
469 range as sample counter for further CS profile generation.
471 class VirtualUnwinder {
472 public:
473 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
474 : CtxCounterMap(Counter), Binary(B) {}
475 bool unwind(const PerfSample *Sample, uint64_t Repeat);
476 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
478 uint64_t NumTotalBranches = 0;
479 uint64_t NumExtCallBranch = 0;
480 uint64_t NumMissingExternalFrame = 0;
481 uint64_t NumMismatchedProEpiBranch = 0;
482 uint64_t NumMismatchedExtCallBranch = 0;
483 uint64_t NumUnpairedExtAddr = 0;
484 uint64_t NumPairedExtAddr = 0;
486 private:
487 bool isSourceExternal(UnwindState &State) const {
488 return State.getCurrentLBRSource() == ExternalAddr;
491 bool isTargetExternal(UnwindState &State) const {
492 return State.getCurrentLBRTarget() == ExternalAddr;
495 // Determine whether the return source is from external code by checking if
496 // the target's the next inst is a call inst.
497 bool isReturnFromExternal(UnwindState &State) const {
498 return isSourceExternal(State) &&
499 (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
502 // If the source is external address but it's not the `return` case, treat it
503 // as a call from external.
504 bool isCallFromExternal(UnwindState &State) const {
505 return isSourceExternal(State) &&
506 Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
509 bool isCallState(UnwindState &State) const {
510 // The tail call frame is always missing here in stack sample, we will
511 // use a specific tail call tracker to infer it.
512 if (!isValidState(State))
513 return false;
515 if (Binary->addressIsCall(State.getCurrentLBRSource()))
516 return true;
518 return isCallFromExternal(State);
521 bool isReturnState(UnwindState &State) const {
522 if (!isValidState(State))
523 return false;
525 // Simply check addressIsReturn, as ret is always reliable, both for
526 // regular call and tail call.
527 if (Binary->addressIsReturn(State.getCurrentLBRSource()))
528 return true;
530 return isReturnFromExternal(State);
533 bool isValidState(UnwindState &State) const { return !State.Invalid; }
535 void unwindCall(UnwindState &State);
536 void unwindLinear(UnwindState &State, uint64_t Repeat);
537 void unwindReturn(UnwindState &State);
538 void unwindBranch(UnwindState &State);
540 template <typename T>
541 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
542 // Collect each samples on trie node by DFS traversal
543 template <typename T>
544 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
545 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
547 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
548 uint64_t Repeat);
549 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
550 uint64_t Repeat);
552 ContextSampleCounterMap *CtxCounterMap;
553 // Profiled binary that current frame address belongs to
554 ProfiledBinary *Binary;
555 // Keep track of all untracked callsites
556 std::set<uint64_t> UntrackedCallsites;
559 // Read perf trace to parse the events and samples.
560 class PerfReaderBase {
561 public:
562 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
563 : Binary(B), PerfTraceFile(PerfTrace) {
564 // Initialize the base address to preferred address.
565 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
567 virtual ~PerfReaderBase() = default;
568 static std::unique_ptr<PerfReaderBase>
569 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
570 std::optional<uint32_t> PIDFilter);
572 // Entry of the reader to parse multiple perf traces
573 virtual void parsePerfTraces() = 0;
574 const ContextSampleCounterMap &getSampleCounters() const {
575 return SampleCounters;
577 bool profileIsCS() { return ProfileIsCS; }
579 protected:
580 ProfiledBinary *Binary = nullptr;
581 StringRef PerfTraceFile;
583 ContextSampleCounterMap SampleCounters;
584 bool ProfileIsCS = false;
586 uint64_t NumTotalSample = 0;
587 uint64_t NumLeafExternalFrame = 0;
588 uint64_t NumLeadingOutgoingLBR = 0;
591 // Read perf script to parse the events and samples.
592 class PerfScriptReader : public PerfReaderBase {
593 public:
594 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
595 std::optional<uint32_t> PID)
596 : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
598 // Entry of the reader to parse multiple perf traces
599 void parsePerfTraces() override;
600 // Generate perf script from perf data
601 static PerfInputFile
602 convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
603 std::optional<uint32_t> PIDFilter);
604 // Extract perf script type by peaking at the input
605 static PerfContent checkPerfScriptType(StringRef FileName);
607 protected:
608 // The parsed MMap event
609 struct MMapEvent {
610 uint64_t PID = 0;
611 uint64_t Address = 0;
612 uint64_t Size = 0;
613 uint64_t Offset = 0;
614 StringRef BinaryPath;
617 // Check whether a given line is LBR sample
618 static bool isLBRSample(StringRef Line);
619 // Check whether a given line is MMAP event
620 static bool isMMap2Event(StringRef Line);
621 // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
622 // mapping between the binary name and its memory layout.
623 static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
624 MMapEvent &MMap);
625 // Update base address based on mmap events
626 void updateBinaryAddress(const MMapEvent &Event);
627 // Parse mmap event and update binary address
628 void parseMMap2Event(TraceStream &TraceIt);
629 // Parse perf events/samples and do aggregation
630 void parseAndAggregateTrace();
631 // Parse either an MMAP event or a perf sample
632 void parseEventOrSample(TraceStream &TraceIt);
633 // Warn if the relevant mmap event is missing.
634 void warnIfMissingMMap();
635 // Emit accumulate warnings.
636 void warnTruncatedStack();
637 // Warn if range is invalid.
638 void warnInvalidRange();
639 // Extract call stack from the perf trace lines
640 bool extractCallstack(TraceStream &TraceIt,
641 SmallVectorImpl<uint64_t> &CallStack);
642 // Extract LBR stack from one perf trace line
643 bool extractLBRStack(TraceStream &TraceIt,
644 SmallVectorImpl<LBREntry> &LBRStack);
645 uint64_t parseAggregatedCount(TraceStream &TraceIt);
646 // Parse one sample from multiple perf lines, override this for different
647 // sample type
648 void parseSample(TraceStream &TraceIt);
649 // An aggregated count is given to indicate how many times the sample is
650 // repeated.
651 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
652 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
653 // Post process the profile after trace aggregation, we will do simple range
654 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
655 virtual void generateUnsymbolizedProfile();
656 void writeUnsymbolizedProfile(StringRef Filename);
657 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
659 // Samples with the repeating time generated by the perf reader
660 AggregatedCounter AggregatedSamples;
661 // Keep track of all invalid return addresses
662 std::set<uint64_t> InvalidReturnAddresses;
663 // PID for the process of interest
664 std::optional<uint32_t> PIDFilter;
668 The reader of LBR only perf script.
669 A typical LBR sample is like:
670 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
671 ... 0x4005c8/0x4005dc/P/-/-/0
673 class LBRPerfReader : public PerfScriptReader {
674 public:
675 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
676 std::optional<uint32_t> PID)
677 : PerfScriptReader(Binary, PerfTrace, PID){};
678 // Parse the LBR only sample.
679 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
683 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
684 which is used to generate CS profile. An example of hybrid sample:
685 4005dc # call stack leaf
686 400634
687 400684 # call stack root
688 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
689 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
691 class HybridPerfReader : public PerfScriptReader {
692 public:
693 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
694 std::optional<uint32_t> PID)
695 : PerfScriptReader(Binary, PerfTrace, PID){};
696 // Parse the hybrid sample including the call and LBR line
697 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
698 void generateUnsymbolizedProfile() override;
700 private:
701 // Unwind the hybrid samples after aggregration
702 void unwindSamples();
706 Format of unsymbolized profile:
708 [frame1 @ frame2 @ ...] # If it's a CS profile
709 number of entries in RangeCounter
710 from_1-to_1:count_1
711 from_2-to_2:count_2
712 ......
713 from_n-to_n:count_n
714 number of entries in BranchCounter
715 src_1->dst_1:count_1
716 src_2->dst_2:count_2
717 ......
718 src_n->dst_n:count_n
719 [frame1 @ frame2 @ ...] # Next context
720 ......
722 Note that non-CS profile doesn't have the empty `[]` context.
724 class UnsymbolizedProfileReader : public PerfReaderBase {
725 public:
726 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
727 : PerfReaderBase(Binary, PerfTrace){};
728 void parsePerfTraces() override;
730 private:
731 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
732 void readUnsymbolizedProfile(StringRef Filename);
734 std::unordered_set<std::string> ContextStrSet;
737 } // end namespace sampleprof
738 } // end namespace llvm
740 #endif