[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / tools / llvm-profgen / PerfReader.h
blob14137e82572d77a34d9c07a884762e865e7b4592
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <list>
19 #include <map>
20 #include <vector>
22 using namespace llvm;
23 using namespace sampleprof;
25 namespace llvm {
26 namespace sampleprof {
28 // Stream based trace line iterator
29 class TraceStream {
30 std::string CurrentLine;
31 std::ifstream Fin;
32 bool IsAtEoF = false;
33 uint64_t LineNumber = 0;
35 public:
36 TraceStream(StringRef Filename) : Fin(Filename.str()) {
37 if (!Fin.good())
38 exitWithError("Error read input perf script file", Filename);
39 advance();
42 StringRef getCurrentLine() {
43 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
44 return CurrentLine;
47 uint64_t getLineNumber() { return LineNumber; }
49 bool isAtEoF() { return IsAtEoF; }
51 // Read the next line
52 void advance() {
53 if (!std::getline(Fin, CurrentLine)) {
54 IsAtEoF = true;
55 return;
57 LineNumber++;
61 // The type of input format.
62 enum PerfFormat {
63 UnknownFormat = 0,
64 PerfData = 1, // Raw linux perf.data.
65 PerfScript = 2, // Perf script create by `perf script` command.
66 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
70 // The type of perfscript content.
71 enum PerfContent {
72 UnknownContent = 0,
73 LBR = 1, // Only LBR sample.
74 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
77 struct PerfInputFile {
78 std::string InputFile;
79 PerfFormat Format = PerfFormat::UnknownFormat;
80 PerfContent Content = PerfContent::UnknownContent;
83 // The parsed LBR sample entry.
84 struct LBREntry {
85 uint64_t Source = 0;
86 uint64_t Target = 0;
87 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
89 #ifndef NDEBUG
90 void print() const {
91 dbgs() << "from " << format("%#010x", Source) << " to "
92 << format("%#010x", Target);
94 #endif
97 #ifndef NDEBUG
98 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
99 for (size_t I = 0; I < LBRStack.size(); I++) {
100 dbgs() << "[" << I << "] ";
101 LBRStack[I].print();
102 dbgs() << "\n";
106 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
107 for (size_t I = 0; I < CallStack.size(); I++) {
108 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
111 #endif
113 // Hash interface for generic data of type T
114 // Data should implement a \fn getHashCode and a \fn isEqual
115 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
116 // i.e we explicitly calculate hash of derived class, assign to base class's
117 // HashCode. This also provides the flexibility for calculating the hash code
118 // incrementally(like rolling hash) during frame stack unwinding since unwinding
119 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
120 // which will have perf overhead. In the future, if we redesign a better hash
121 // function, then we can just skip this or switch to non-virtual function(like
122 // just ignore comparison if hash conflicts probabilities is low)
123 template <class T> class Hashable {
124 public:
125 std::shared_ptr<T> Data;
126 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128 // Hash code generation
129 struct Hash {
130 uint64_t operator()(const Hashable<T> &Key) const {
131 // Don't make it virtual for getHashCode
132 uint64_t Hash = Key.Data->getHashCode();
133 assert(Hash && "Should generate HashCode for it!");
134 return Hash;
138 // Hash equal
139 struct Equal {
140 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
141 // Precisely compare the data, vtable will have overhead.
142 return LHS.Data->isEqual(RHS.Data.get());
146 T *getPtr() const { return Data.get(); }
149 struct PerfSample {
150 // LBR stack recorded in FIFO order.
151 SmallVector<LBREntry, 16> LBRStack;
152 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
153 // generation
154 SmallVector<uint64_t, 16> CallStack;
156 virtual ~PerfSample() = default;
157 uint64_t getHashCode() const {
158 // Use simple DJB2 hash
159 auto HashCombine = [](uint64_t H, uint64_t V) {
160 return ((H << 5) + H) + V;
162 uint64_t Hash = 5381;
163 for (const auto &Value : CallStack) {
164 Hash = HashCombine(Hash, Value);
166 for (const auto &Entry : LBRStack) {
167 Hash = HashCombine(Hash, Entry.Source);
168 Hash = HashCombine(Hash, Entry.Target);
170 return Hash;
173 bool isEqual(const PerfSample *Other) const {
174 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
175 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
177 if (CallStack.size() != OtherCallStack.size() ||
178 LBRStack.size() != OtherLBRStack.size())
179 return false;
181 if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
182 return false;
184 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
185 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
186 LBRStack[I].Target != OtherLBRStack[I].Target)
187 return false;
189 return true;
192 #ifndef NDEBUG
193 uint64_t Linenum = 0;
195 void print() const {
196 dbgs() << "Line " << Linenum << "\n";
197 dbgs() << "LBR stack\n";
198 printLBRStack(LBRStack);
199 dbgs() << "Call stack\n";
200 printCallStack(CallStack);
202 #endif
204 // After parsing the sample, we record the samples by aggregating them
205 // into this counter. The key stores the sample data and the value is
206 // the sample repeat times.
207 using AggregatedCounter =
208 std::unordered_map<Hashable<PerfSample>, uint64_t,
209 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
211 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
213 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
214 ProfiledBinary *Binary) {
215 // Start bigger than End is considered invalid.
216 // LBR ranges cross the unconditional jmp are also assumed invalid.
217 // It's found that perf data may contain duplicate LBR entries that could form
218 // a range that does not reflect real execution flow on some Intel targets,
219 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
220 // cannot be a linear execution range that spans over unconditional jmp.
221 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
224 // The state for the unwinder, it doesn't hold the data but only keep the
225 // pointer/index of the data, While unwinding, the CallStack is changed
226 // dynamicially and will be recorded as the context of the sample
227 struct UnwindState {
228 // Profiled binary that current frame address belongs to
229 const ProfiledBinary *Binary;
230 // Call stack trie node
231 struct ProfiledFrame {
232 const uint64_t Address = DummyRoot;
233 ProfiledFrame *Parent;
234 SampleVector RangeSamples;
235 SampleVector BranchSamples;
236 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
238 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
239 : Address(Addr), Parent(P) {}
240 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
241 assert(Address && "Address can't be zero!");
242 auto Ret = Children.emplace(
243 Address, std::make_unique<ProfiledFrame>(Address, this));
244 return Ret.first->second.get();
246 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
247 RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
249 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
250 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
252 bool isDummyRoot() { return Address == DummyRoot; }
253 bool isExternalFrame() { return Address == ExternalAddr; }
254 bool isLeafFrame() { return Children.empty(); }
257 ProfiledFrame DummyTrieRoot;
258 ProfiledFrame *CurrentLeafFrame;
259 // Used to fall through the LBR stack
260 uint32_t LBRIndex = 0;
261 // Reference to PerfSample.LBRStack
262 const SmallVector<LBREntry, 16> &LBRStack;
263 // Used to iterate the address range
264 InstructionPointer InstPtr;
265 // Indicate whether unwinding is currently in a bad state which requires to
266 // skip all subsequent unwinding.
267 bool Invalid = false;
268 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
269 : Binary(Binary), LBRStack(Sample->LBRStack),
270 InstPtr(Binary, Sample->CallStack.front()) {
271 initFrameTrie(Sample->CallStack);
274 bool validateInitialState() {
275 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
276 uint64_t LeafAddr = CurrentLeafFrame->Address;
277 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
278 "External leading LBR should match the leaf frame.");
280 // When we take a stack sample, ideally the sampling distance between the
281 // leaf IP of stack and the last LBR target shouldn't be very large.
282 // Use a heuristic size (0x100) to filter out broken records.
283 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
284 WithColor::warning() << "Bogus trace: stack tip = "
285 << format("%#010x", LeafAddr)
286 << ", LBR tip = " << format("%#010x\n", LBRLeaf);
287 return false;
289 return true;
292 void checkStateConsistency() {
293 assert(InstPtr.Address == CurrentLeafFrame->Address &&
294 "IP should align with context leaf");
297 void setInvalid() { Invalid = true; }
298 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
299 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
300 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
301 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
302 bool IsLastLBR() const { return LBRIndex == 0; }
303 bool getLBRStackSize() const { return LBRStack.size(); }
304 void advanceLBR() { LBRIndex++; }
305 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
307 void pushFrame(uint64_t Address) {
308 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
311 void switchToFrame(uint64_t Address) {
312 if (CurrentLeafFrame->Address == Address)
313 return;
314 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
317 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
319 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
321 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
322 ProfiledFrame *Cur = &DummyTrieRoot;
323 for (auto Address : reverse(CallStack)) {
324 Cur = Cur->getOrCreateChildFrame(Address);
326 CurrentLeafFrame = Cur;
329 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
332 // Base class for sample counter key with context
333 struct ContextKey {
334 uint64_t HashCode = 0;
335 virtual ~ContextKey() = default;
336 uint64_t getHashCode() {
337 if (HashCode == 0)
338 genHashCode();
339 return HashCode;
341 virtual void genHashCode() = 0;
342 virtual bool isEqual(const ContextKey *K) const {
343 return HashCode == K->HashCode;
346 // Utilities for LLVM-style RTTI
347 enum ContextKind { CK_StringBased, CK_AddrBased };
348 const ContextKind Kind;
349 ContextKind getKind() const { return Kind; }
350 ContextKey(ContextKind K) : Kind(K){};
353 // String based context id
354 struct StringBasedCtxKey : public ContextKey {
355 SampleContextFrameVector Context;
357 bool WasLeafInlined;
358 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
359 static bool classof(const ContextKey *K) {
360 return K->getKind() == CK_StringBased;
363 bool isEqual(const ContextKey *K) const override {
364 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
365 return Context == Other->Context;
368 void genHashCode() override {
369 HashCode = hash_value(SampleContextFrames(Context));
373 // Address-based context id
374 struct AddrBasedCtxKey : public ContextKey {
375 SmallVector<uint64_t, 16> Context;
377 bool WasLeafInlined;
378 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
379 static bool classof(const ContextKey *K) {
380 return K->getKind() == CK_AddrBased;
383 bool isEqual(const ContextKey *K) const override {
384 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
385 return Context == Other->Context;
388 void genHashCode() override {
389 HashCode = hash_combine_range(Context.begin(), Context.end());
393 // The counter of branch samples for one function indexed by the branch,
394 // which is represented as the source and target offset pair.
395 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
396 // The counter of range samples for one function indexed by the range,
397 // which is represented as the start and end offset pair.
398 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
399 // Wrapper for sample counters including range counter and branch counter
400 struct SampleCounter {
401 RangeSample RangeCounter;
402 BranchSample BranchCounter;
404 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
405 assert(Start <= End && "Invalid instruction range");
406 RangeCounter[{Start, End}] += Repeat;
408 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
409 BranchCounter[{Source, Target}] += Repeat;
413 // Sample counter with context to support context-sensitive profile
414 using ContextSampleCounterMap =
415 std::unordered_map<Hashable<ContextKey>, SampleCounter,
416 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
418 struct FrameStack {
419 SmallVector<uint64_t, 16> Stack;
420 ProfiledBinary *Binary;
421 FrameStack(ProfiledBinary *B) : Binary(B) {}
422 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
423 assert(!Cur->isExternalFrame() &&
424 "External frame's not expected for context stack.");
425 Stack.push_back(Cur->Address);
426 return true;
429 void popFrame() {
430 if (!Stack.empty())
431 Stack.pop_back();
433 std::shared_ptr<StringBasedCtxKey> getContextKey();
436 struct AddressStack {
437 SmallVector<uint64_t, 16> Stack;
438 ProfiledBinary *Binary;
439 AddressStack(ProfiledBinary *B) : Binary(B) {}
440 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
441 assert(!Cur->isExternalFrame() &&
442 "External frame's not expected for context stack.");
443 Stack.push_back(Cur->Address);
444 return true;
447 void popFrame() {
448 if (!Stack.empty())
449 Stack.pop_back();
451 std::shared_ptr<AddrBasedCtxKey> getContextKey();
455 As in hybrid sample we have a group of LBRs and the most recent sampling call
456 stack, we can walk through those LBRs to infer more call stacks which would be
457 used as context for profile. VirtualUnwinder is the class to do the call stack
458 unwinding based on LBR state. Two types of unwinding are processd here:
459 1) LBR unwinding and 2) linear range unwinding.
460 Specifically, for each LBR entry(can be classified into call, return, regular
461 branch), LBR unwinding will replay the operation by pushing, popping or
462 switching leaf frame towards the call stack and since the initial call stack
463 is most recently sampled, the replay should be in anti-execution order, i.e. for
464 the regular case, pop the call stack when LBR is call, push frame on call stack
465 when LBR is return. After each LBR processed, it also needs to align with the
466 next LBR by going through instructions from previous LBR's target to current
467 LBR's source, which is the linear unwinding. As instruction from linear range
468 can come from different function by inlining, linear unwinding will do the range
469 splitting and record counters by the range with same inline context. Over those
470 unwinding process we will record each call stack as context id and LBR/linear
471 range as sample counter for further CS profile generation.
473 class VirtualUnwinder {
474 public:
475 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
476 : CtxCounterMap(Counter), Binary(B) {}
477 bool unwind(const PerfSample *Sample, uint64_t Repeat);
478 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
480 uint64_t NumTotalBranches = 0;
481 uint64_t NumExtCallBranch = 0;
482 uint64_t NumMissingExternalFrame = 0;
483 uint64_t NumMismatchedProEpiBranch = 0;
484 uint64_t NumMismatchedExtCallBranch = 0;
485 uint64_t NumUnpairedExtAddr = 0;
486 uint64_t NumPairedExtAddr = 0;
488 private:
489 bool isSourceExternal(UnwindState &State) const {
490 return State.getCurrentLBRSource() == ExternalAddr;
493 bool isTargetExternal(UnwindState &State) const {
494 return State.getCurrentLBRTarget() == ExternalAddr;
497 // Determine whether the return source is from external code by checking if
498 // the target's the next inst is a call inst.
499 bool isReturnFromExternal(UnwindState &State) const {
500 return isSourceExternal(State) &&
501 (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
504 // If the source is external address but it's not the `return` case, treat it
505 // as a call from external.
506 bool isCallFromExternal(UnwindState &State) const {
507 return isSourceExternal(State) &&
508 Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
511 bool isCallState(UnwindState &State) const {
512 // The tail call frame is always missing here in stack sample, we will
513 // use a specific tail call tracker to infer it.
514 if (!isValidState(State))
515 return false;
517 if (Binary->addressIsCall(State.getCurrentLBRSource()))
518 return true;
520 return isCallFromExternal(State);
523 bool isReturnState(UnwindState &State) const {
524 if (!isValidState(State))
525 return false;
527 // Simply check addressIsReturn, as ret is always reliable, both for
528 // regular call and tail call.
529 if (Binary->addressIsReturn(State.getCurrentLBRSource()))
530 return true;
532 return isReturnFromExternal(State);
535 bool isValidState(UnwindState &State) const { return !State.Invalid; }
537 void unwindCall(UnwindState &State);
538 void unwindLinear(UnwindState &State, uint64_t Repeat);
539 void unwindReturn(UnwindState &State);
540 void unwindBranch(UnwindState &State);
542 template <typename T>
543 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
544 // Collect each samples on trie node by DFS traversal
545 template <typename T>
546 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
547 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
549 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
550 uint64_t Repeat);
551 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
552 uint64_t Repeat);
554 ContextSampleCounterMap *CtxCounterMap;
555 // Profiled binary that current frame address belongs to
556 ProfiledBinary *Binary;
557 // Keep track of all untracked callsites
558 std::set<uint64_t> UntrackedCallsites;
561 // Read perf trace to parse the events and samples.
562 class PerfReaderBase {
563 public:
564 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
565 : Binary(B), PerfTraceFile(PerfTrace) {
566 // Initialize the base address to preferred address.
567 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
569 virtual ~PerfReaderBase() = default;
570 static std::unique_ptr<PerfReaderBase>
571 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
572 std::optional<uint32_t> PIDFilter);
574 // Entry of the reader to parse multiple perf traces
575 virtual void parsePerfTraces() = 0;
576 const ContextSampleCounterMap &getSampleCounters() const {
577 return SampleCounters;
579 bool profileIsCS() { return ProfileIsCS; }
581 protected:
582 ProfiledBinary *Binary = nullptr;
583 StringRef PerfTraceFile;
585 ContextSampleCounterMap SampleCounters;
586 bool ProfileIsCS = false;
588 uint64_t NumTotalSample = 0;
589 uint64_t NumLeafExternalFrame = 0;
590 uint64_t NumLeadingOutgoingLBR = 0;
593 // Read perf script to parse the events and samples.
594 class PerfScriptReader : public PerfReaderBase {
595 public:
596 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
597 std::optional<uint32_t> PID)
598 : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
600 // Entry of the reader to parse multiple perf traces
601 void parsePerfTraces() override;
602 // Generate perf script from perf data
603 static PerfInputFile
604 convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
605 std::optional<uint32_t> PIDFilter);
606 // Extract perf script type by peaking at the input
607 static PerfContent checkPerfScriptType(StringRef FileName);
609 protected:
610 // The parsed MMap event
611 struct MMapEvent {
612 uint64_t PID = 0;
613 uint64_t Address = 0;
614 uint64_t Size = 0;
615 uint64_t Offset = 0;
616 StringRef BinaryPath;
619 // Check whether a given line is LBR sample
620 static bool isLBRSample(StringRef Line);
621 // Check whether a given line is MMAP event
622 static bool isMMap2Event(StringRef Line);
623 // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
624 // mapping between the binary name and its memory layout.
625 static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
626 MMapEvent &MMap);
627 // Update base address based on mmap events
628 void updateBinaryAddress(const MMapEvent &Event);
629 // Parse mmap event and update binary address
630 void parseMMap2Event(TraceStream &TraceIt);
631 // Parse perf events/samples and do aggregation
632 void parseAndAggregateTrace();
633 // Parse either an MMAP event or a perf sample
634 void parseEventOrSample(TraceStream &TraceIt);
635 // Warn if the relevant mmap event is missing.
636 void warnIfMissingMMap();
637 // Emit accumulate warnings.
638 void warnTruncatedStack();
639 // Warn if range is invalid.
640 void warnInvalidRange();
641 // Extract call stack from the perf trace lines
642 bool extractCallstack(TraceStream &TraceIt,
643 SmallVectorImpl<uint64_t> &CallStack);
644 // Extract LBR stack from one perf trace line
645 bool extractLBRStack(TraceStream &TraceIt,
646 SmallVectorImpl<LBREntry> &LBRStack);
647 uint64_t parseAggregatedCount(TraceStream &TraceIt);
648 // Parse one sample from multiple perf lines, override this for different
649 // sample type
650 void parseSample(TraceStream &TraceIt);
651 // An aggregated count is given to indicate how many times the sample is
652 // repeated.
653 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
654 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
655 // Post process the profile after trace aggregation, we will do simple range
656 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
657 virtual void generateUnsymbolizedProfile();
658 void writeUnsymbolizedProfile(StringRef Filename);
659 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
661 // Samples with the repeating time generated by the perf reader
662 AggregatedCounter AggregatedSamples;
663 // Keep track of all invalid return addresses
664 std::set<uint64_t> InvalidReturnAddresses;
665 // PID for the process of interest
666 std::optional<uint32_t> PIDFilter;
670 The reader of LBR only perf script.
671 A typical LBR sample is like:
672 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
673 ... 0x4005c8/0x4005dc/P/-/-/0
675 class LBRPerfReader : public PerfScriptReader {
676 public:
677 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
678 std::optional<uint32_t> PID)
679 : PerfScriptReader(Binary, PerfTrace, PID){};
680 // Parse the LBR only sample.
681 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
685 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
686 which is used to generate CS profile. An example of hybrid sample:
687 4005dc # call stack leaf
688 400634
689 400684 # call stack root
690 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
691 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
693 class HybridPerfReader : public PerfScriptReader {
694 public:
695 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
696 std::optional<uint32_t> PID)
697 : PerfScriptReader(Binary, PerfTrace, PID){};
698 // Parse the hybrid sample including the call and LBR line
699 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
700 void generateUnsymbolizedProfile() override;
702 private:
703 // Unwind the hybrid samples after aggregration
704 void unwindSamples();
708 Format of unsymbolized profile:
710 [frame1 @ frame2 @ ...] # If it's a CS profile
711 number of entries in RangeCounter
712 from_1-to_1:count_1
713 from_2-to_2:count_2
714 ......
715 from_n-to_n:count_n
716 number of entries in BranchCounter
717 src_1->dst_1:count_1
718 src_2->dst_2:count_2
719 ......
720 src_n->dst_n:count_n
721 [frame1 @ frame2 @ ...] # Next context
722 ......
724 Note that non-CS profile doesn't have the empty `[]` context.
726 class UnsymbolizedProfileReader : public PerfReaderBase {
727 public:
728 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
729 : PerfReaderBase(Binary, PerfTrace){};
730 void parsePerfTraces() override;
732 private:
733 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
734 void readUnsymbolizedProfile(StringRef Filename);
736 std::unordered_set<std::string> ContextStrSet;
739 } // end namespace sampleprof
740 } // end namespace llvm
742 #endif