[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / tools / llvm-profgen / PerfReader.h
blob9d84ad34bb336ab8da95549553b7be66d090ca13
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <list>
19 #include <map>
20 #include <vector>
22 using namespace llvm;
23 using namespace sampleprof;
25 namespace llvm {
26 namespace sampleprof {
28 // Stream based trace line iterator
29 class TraceStream {
30 std::string CurrentLine;
31 std::ifstream Fin;
32 bool IsAtEoF = false;
33 uint64_t LineNumber = 0;
35 public:
36 TraceStream(StringRef Filename) : Fin(Filename.str()) {
37 if (!Fin.good())
38 exitWithError("Error read input perf script file", Filename);
39 advance();
42 StringRef getCurrentLine() {
43 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
44 return CurrentLine;
47 uint64_t getLineNumber() { return LineNumber; }
49 bool isAtEoF() { return IsAtEoF; }
51 // Read the next line
52 void advance() {
53 if (!std::getline(Fin, CurrentLine)) {
54 IsAtEoF = true;
55 return;
57 LineNumber++;
61 // The type of input format.
62 enum PerfFormat {
63 UnknownFormat = 0,
64 PerfData = 1, // Raw linux perf.data.
65 PerfScript = 2, // Perf script create by `perf script` command.
66 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
70 // The type of perfscript content.
71 enum PerfContent {
72 UnknownContent = 0,
73 LBR = 1, // Only LBR sample.
74 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
77 struct PerfInputFile {
78 std::string InputFile;
79 PerfFormat Format = PerfFormat::UnknownFormat;
80 PerfContent Content = PerfContent::UnknownContent;
83 // The parsed LBR sample entry.
84 struct LBREntry {
85 uint64_t Source = 0;
86 uint64_t Target = 0;
87 // An artificial branch stands for a series of consecutive branches starting
88 // from the current binary with a transition through external code and
89 // eventually landing back in the current binary.
90 bool IsArtificial = false;
91 LBREntry(uint64_t S, uint64_t T, bool I)
92 : Source(S), Target(T), IsArtificial(I) {}
94 #ifndef NDEBUG
95 void print() const {
96 dbgs() << "from " << format("%#010x", Source) << " to "
97 << format("%#010x", Target);
98 if (IsArtificial)
99 dbgs() << " Artificial";
101 #endif
104 #ifndef NDEBUG
105 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
106 for (size_t I = 0; I < LBRStack.size(); I++) {
107 dbgs() << "[" << I << "] ";
108 LBRStack[I].print();
109 dbgs() << "\n";
113 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
114 for (size_t I = 0; I < CallStack.size(); I++) {
115 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
118 #endif
120 // Hash interface for generic data of type T
121 // Data should implement a \fn getHashCode and a \fn isEqual
122 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
123 // i.e we explicitly calculate hash of derived class, assign to base class's
124 // HashCode. This also provides the flexibility for calculating the hash code
125 // incrementally(like rolling hash) during frame stack unwinding since unwinding
126 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
127 // which will have perf overhead. In the future, if we redesign a better hash
128 // function, then we can just skip this or switch to non-virtual function(like
129 // just ignore comparision if hash conflicts probabilities is low)
130 template <class T> class Hashable {
131 public:
132 std::shared_ptr<T> Data;
133 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
135 // Hash code generation
136 struct Hash {
137 uint64_t operator()(const Hashable<T> &Key) const {
138 // Don't make it virtual for getHashCode
139 uint64_t Hash = Key.Data->getHashCode();
140 assert(Hash && "Should generate HashCode for it!");
141 return Hash;
145 // Hash equal
146 struct Equal {
147 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
148 // Precisely compare the data, vtable will have overhead.
149 return LHS.Data->isEqual(RHS.Data.get());
153 T *getPtr() const { return Data.get(); }
156 struct PerfSample {
157 // LBR stack recorded in FIFO order.
158 SmallVector<LBREntry, 16> LBRStack;
159 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
160 // generation
161 SmallVector<uint64_t, 16> CallStack;
163 virtual ~PerfSample() = default;
164 uint64_t getHashCode() const {
165 // Use simple DJB2 hash
166 auto HashCombine = [](uint64_t H, uint64_t V) {
167 return ((H << 5) + H) + V;
169 uint64_t Hash = 5381;
170 for (const auto &Value : CallStack) {
171 Hash = HashCombine(Hash, Value);
173 for (const auto &Entry : LBRStack) {
174 Hash = HashCombine(Hash, Entry.Source);
175 Hash = HashCombine(Hash, Entry.Target);
177 return Hash;
180 bool isEqual(const PerfSample *Other) const {
181 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
182 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
184 if (CallStack.size() != OtherCallStack.size() ||
185 LBRStack.size() != OtherLBRStack.size())
186 return false;
188 if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
189 return false;
191 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
192 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
193 LBRStack[I].Target != OtherLBRStack[I].Target)
194 return false;
196 return true;
199 #ifndef NDEBUG
200 void print() const {
201 dbgs() << "LBR stack\n";
202 printLBRStack(LBRStack);
203 dbgs() << "Call stack\n";
204 printCallStack(CallStack);
206 #endif
208 // After parsing the sample, we record the samples by aggregating them
209 // into this counter. The key stores the sample data and the value is
210 // the sample repeat times.
211 using AggregatedCounter =
212 std::unordered_map<Hashable<PerfSample>, uint64_t,
213 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
215 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
217 // The state for the unwinder, it doesn't hold the data but only keep the
218 // pointer/index of the data, While unwinding, the CallStack is changed
219 // dynamicially and will be recorded as the context of the sample
220 struct UnwindState {
221 // Profiled binary that current frame address belongs to
222 const ProfiledBinary *Binary;
223 // Call stack trie node
224 struct ProfiledFrame {
225 const uint64_t Address = DummyRoot;
226 ProfiledFrame *Parent;
227 SampleVector RangeSamples;
228 SampleVector BranchSamples;
229 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
231 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
232 : Address(Addr), Parent(P) {}
233 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
234 assert(Address && "Address can't be zero!");
235 auto Ret = Children.emplace(
236 Address, std::make_unique<ProfiledFrame>(Address, this));
237 return Ret.first->second.get();
239 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
240 RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
242 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
243 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
245 bool isDummyRoot() { return Address == DummyRoot; }
246 bool isExternalFrame() { return Address == ExternalAddr; }
247 bool isLeafFrame() { return Children.empty(); }
250 ProfiledFrame DummyTrieRoot;
251 ProfiledFrame *CurrentLeafFrame;
252 // Used to fall through the LBR stack
253 uint32_t LBRIndex = 0;
254 // Reference to PerfSample.LBRStack
255 const SmallVector<LBREntry, 16> &LBRStack;
256 // Used to iterate the address range
257 InstructionPointer InstPtr;
258 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
259 : Binary(Binary), LBRStack(Sample->LBRStack),
260 InstPtr(Binary, Sample->CallStack.front()) {
261 initFrameTrie(Sample->CallStack);
264 bool validateInitialState() {
265 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
266 uint64_t LeafAddr = CurrentLeafFrame->Address;
267 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
268 "External leading LBR should match the leaf frame.");
270 // When we take a stack sample, ideally the sampling distance between the
271 // leaf IP of stack and the last LBR target shouldn't be very large.
272 // Use a heuristic size (0x100) to filter out broken records.
273 if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
274 WithColor::warning() << "Bogus trace: stack tip = "
275 << format("%#010x", LeafAddr)
276 << ", LBR tip = " << format("%#010x\n", LBRLeaf);
277 return false;
279 return true;
282 void checkStateConsistency() {
283 assert(InstPtr.Address == CurrentLeafFrame->Address &&
284 "IP should align with context leaf");
287 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
288 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
289 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
290 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
291 bool IsLastLBR() const { return LBRIndex == 0; }
292 bool getLBRStackSize() const { return LBRStack.size(); }
293 void advanceLBR() { LBRIndex++; }
294 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
296 void pushFrame(uint64_t Address) {
297 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
300 void switchToFrame(uint64_t Address) {
301 if (CurrentLeafFrame->Address == Address)
302 return;
303 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
306 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
308 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
310 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
311 ProfiledFrame *Cur = &DummyTrieRoot;
312 for (auto Address : reverse(CallStack)) {
313 Cur = Cur->getOrCreateChildFrame(Address);
315 CurrentLeafFrame = Cur;
318 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
321 // Base class for sample counter key with context
322 struct ContextKey {
323 uint64_t HashCode = 0;
324 virtual ~ContextKey() = default;
325 uint64_t getHashCode() {
326 if (HashCode == 0)
327 genHashCode();
328 return HashCode;
330 virtual void genHashCode() = 0;
331 virtual bool isEqual(const ContextKey *K) const {
332 return HashCode == K->HashCode;
335 // Utilities for LLVM-style RTTI
336 enum ContextKind { CK_StringBased, CK_ProbeBased };
337 const ContextKind Kind;
338 ContextKind getKind() const { return Kind; }
339 ContextKey(ContextKind K) : Kind(K){};
342 // String based context id
343 struct StringBasedCtxKey : public ContextKey {
344 SampleContextFrameVector Context;
346 bool WasLeafInlined;
347 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
348 static bool classof(const ContextKey *K) {
349 return K->getKind() == CK_StringBased;
352 bool isEqual(const ContextKey *K) const override {
353 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
354 return Context == Other->Context;
357 void genHashCode() override {
358 HashCode = hash_value(SampleContextFrames(Context));
362 // Probe based context key as the intermediate key of context
363 // String based context key will introduce redundant string handling
364 // since the callee context is inferred from the context string which
365 // need to be splitted by '@' to get the last location frame, so we
366 // can just use probe instead and generate the string in the end.
367 struct ProbeBasedCtxKey : public ContextKey {
368 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
370 ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
371 static bool classof(const ContextKey *K) {
372 return K->getKind() == CK_ProbeBased;
375 bool isEqual(const ContextKey *K) const override {
376 const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
377 assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
378 return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
379 O->Probes.end());
382 void genHashCode() override {
383 for (const auto *P : Probes) {
384 HashCode = hash_combine(HashCode, P);
386 if (HashCode == 0) {
387 // Avoid zero value of HashCode when it's an empty list
388 HashCode = 1;
393 // The counter of branch samples for one function indexed by the branch,
394 // which is represented as the source and target offset pair.
395 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
396 // The counter of range samples for one function indexed by the range,
397 // which is represented as the start and end offset pair.
398 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
399 // Wrapper for sample counters including range counter and branch counter
400 struct SampleCounter {
401 RangeSample RangeCounter;
402 BranchSample BranchCounter;
404 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
405 assert(Start <= End && "Invalid instruction range");
406 RangeCounter[{Start, End}] += Repeat;
408 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
409 BranchCounter[{Source, Target}] += Repeat;
413 // Sample counter with context to support context-sensitive profile
414 using ContextSampleCounterMap =
415 std::unordered_map<Hashable<ContextKey>, SampleCounter,
416 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
418 struct FrameStack {
419 SmallVector<uint64_t, 16> Stack;
420 ProfiledBinary *Binary;
421 FrameStack(ProfiledBinary *B) : Binary(B) {}
422 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
423 assert(!Cur->isExternalFrame() &&
424 "External frame's not expected for context stack.");
425 Stack.push_back(Cur->Address);
426 return true;
429 void popFrame() {
430 if (!Stack.empty())
431 Stack.pop_back();
433 std::shared_ptr<StringBasedCtxKey> getContextKey();
436 struct ProbeStack {
437 SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
438 ProfiledBinary *Binary;
439 ProbeStack(ProfiledBinary *B) : Binary(B) {}
440 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
441 assert(!Cur->isExternalFrame() &&
442 "External frame's not expected for context stack.");
443 const MCDecodedPseudoProbe *CallProbe =
444 Binary->getCallProbeForAddr(Cur->Address);
445 // We may not find a probe for a merged or external callsite.
446 // Callsite merging may cause the loss of original probe IDs.
447 // Cutting off the context from here since the inliner will
448 // not know how to consume a context with unknown callsites.
449 if (!CallProbe)
450 return false;
451 Stack.push_back(CallProbe);
452 return true;
455 void popFrame() {
456 if (!Stack.empty())
457 Stack.pop_back();
459 // Use pseudo probe based context key to get the sample counter
460 // A context stands for a call path from 'main' to an uninlined
461 // callee with all inline frames recovered on that path. The probes
462 // belonging to that call path is the probes either originated from
463 // the callee or from any functions inlined into the callee. Since
464 // pseudo probes are organized in a tri-tree style after decoded,
465 // the tree path from the tri-tree root (which is the uninlined
466 // callee) to the probe node forms an inline context.
467 // Here we use a list of probe(pointer) as the context key to speed up
468 // aggregation and the final context string will be generate in
469 // ProfileGenerator
470 std::shared_ptr<ProbeBasedCtxKey> getContextKey();
474 As in hybrid sample we have a group of LBRs and the most recent sampling call
475 stack, we can walk through those LBRs to infer more call stacks which would be
476 used as context for profile. VirtualUnwinder is the class to do the call stack
477 unwinding based on LBR state. Two types of unwinding are processd here:
478 1) LBR unwinding and 2) linear range unwinding.
479 Specifically, for each LBR entry(can be classified into call, return, regular
480 branch), LBR unwinding will replay the operation by pushing, popping or
481 switching leaf frame towards the call stack and since the initial call stack
482 is most recently sampled, the replay should be in anti-execution order, i.e. for
483 the regular case, pop the call stack when LBR is call, push frame on call stack
484 when LBR is return. After each LBR processed, it also needs to align with the
485 next LBR by going through instructions from previous LBR's target to current
486 LBR's source, which is the linear unwinding. As instruction from linear range
487 can come from different function by inlining, linear unwinding will do the range
488 splitting and record counters by the range with same inline context. Over those
489 unwinding process we will record each call stack as context id and LBR/linear
490 range as sample counter for further CS profile generation.
492 class VirtualUnwinder {
493 public:
494 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
495 : CtxCounterMap(Counter), Binary(B) {}
496 bool unwind(const PerfSample *Sample, uint64_t Repeat);
497 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
499 uint64_t NumTotalBranches = 0;
500 uint64_t NumExtCallBranch = 0;
501 uint64_t NumMissingExternalFrame = 0;
502 uint64_t NumMismatchedProEpiBranch = 0;
503 uint64_t NumMismatchedExtCallBranch = 0;
505 private:
506 bool isCallState(UnwindState &State) const {
507 // The tail call frame is always missing here in stack sample, we will
508 // use a specific tail call tracker to infer it.
509 return Binary->addressIsCall(State.getCurrentLBRSource());
512 bool isReturnState(UnwindState &State) const {
513 // Simply check addressIsReturn, as ret is always reliable, both for
514 // regular call and tail call.
515 if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
516 return false;
518 // In a callback case, a return from internal code, say A, to external
519 // runtime can happen. The external runtime can then call back to
520 // another internal routine, say B. Making an artificial branch that
521 // looks like a return from A to B can confuse the unwinder to treat
522 // the instruction before B as the call instruction. Here we detect this
523 // case if the return target is not the next inst of call inst, then we just
524 // do not treat it as a return.
525 uint64_t CallAddr =
526 Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget());
527 return (CallAddr != 0);
530 void unwindCall(UnwindState &State);
531 void unwindLinear(UnwindState &State, uint64_t Repeat);
532 void unwindReturn(UnwindState &State);
533 void unwindBranch(UnwindState &State);
535 template <typename T>
536 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
537 // Collect each samples on trie node by DFS traversal
538 template <typename T>
539 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
540 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
542 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
543 uint64_t Repeat);
544 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
545 uint64_t Repeat);
547 ContextSampleCounterMap *CtxCounterMap;
548 // Profiled binary that current frame address belongs to
549 ProfiledBinary *Binary;
550 // Keep track of all untracked callsites
551 std::set<uint64_t> UntrackedCallsites;
554 // Read perf trace to parse the events and samples.
555 class PerfReaderBase {
556 public:
557 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
558 : Binary(B), PerfTraceFile(PerfTrace) {
559 // Initialize the base address to preferred address.
560 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
562 virtual ~PerfReaderBase() = default;
563 static std::unique_ptr<PerfReaderBase> create(ProfiledBinary *Binary,
564 PerfInputFile &PerfInput);
566 // Entry of the reader to parse multiple perf traces
567 virtual void parsePerfTraces() = 0;
568 const ContextSampleCounterMap &getSampleCounters() const {
569 return SampleCounters;
571 bool profileIsCSFlat() { return ProfileIsCSFlat; }
573 protected:
574 ProfiledBinary *Binary = nullptr;
575 StringRef PerfTraceFile;
577 ContextSampleCounterMap SampleCounters;
578 bool ProfileIsCSFlat = false;
580 uint64_t NumTotalSample = 0;
581 uint64_t NumLeafExternalFrame = 0;
582 uint64_t NumLeadingOutgoingLBR = 0;
585 // Read perf script to parse the events and samples.
586 class PerfScriptReader : public PerfReaderBase {
587 public:
588 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace)
589 : PerfReaderBase(B, PerfTrace){};
591 // Entry of the reader to parse multiple perf traces
592 virtual void parsePerfTraces() override;
593 // Generate perf script from perf data
594 static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
595 PerfInputFile &File);
596 // Extract perf script type by peaking at the input
597 static PerfContent checkPerfScriptType(StringRef FileName);
599 protected:
600 // The parsed MMap event
601 struct MMapEvent {
602 uint64_t PID = 0;
603 uint64_t Address = 0;
604 uint64_t Size = 0;
605 uint64_t Offset = 0;
606 StringRef BinaryPath;
609 // Check whether a given line is LBR sample
610 static bool isLBRSample(StringRef Line);
611 // Check whether a given line is MMAP event
612 static bool isMMap2Event(StringRef Line);
613 // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
614 // mapping between the binary name and its memory layout.
615 static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
616 MMapEvent &MMap);
617 // Update base address based on mmap events
618 void updateBinaryAddress(const MMapEvent &Event);
619 // Parse mmap event and update binary address
620 void parseMMap2Event(TraceStream &TraceIt);
621 // Parse perf events/samples and do aggregation
622 void parseAndAggregateTrace();
623 // Parse either an MMAP event or a perf sample
624 void parseEventOrSample(TraceStream &TraceIt);
625 // Warn if the relevant mmap event is missing.
626 void warnIfMissingMMap();
627 // Emit accumulate warnings.
628 void warnTruncatedStack();
629 // Warn if range is invalid.
630 void warnInvalidRange();
631 // Extract call stack from the perf trace lines
632 bool extractCallstack(TraceStream &TraceIt,
633 SmallVectorImpl<uint64_t> &CallStack);
634 // Extract LBR stack from one perf trace line
635 bool extractLBRStack(TraceStream &TraceIt,
636 SmallVectorImpl<LBREntry> &LBRStack);
637 uint64_t parseAggregatedCount(TraceStream &TraceIt);
638 // Parse one sample from multiple perf lines, override this for different
639 // sample type
640 void parseSample(TraceStream &TraceIt);
641 // An aggregated count is given to indicate how many times the sample is
642 // repeated.
643 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
644 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
645 // Post process the profile after trace aggregation, we will do simple range
646 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
647 virtual void generateUnsymbolizedProfile();
648 void writeUnsymbolizedProfile(StringRef Filename);
649 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
651 // Samples with the repeating time generated by the perf reader
652 AggregatedCounter AggregatedSamples;
653 // Keep track of all invalid return addresses
654 std::set<uint64_t> InvalidReturnAddresses;
658 The reader of LBR only perf script.
659 A typical LBR sample is like:
660 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
661 ... 0x4005c8/0x4005dc/P/-/-/0
663 class LBRPerfReader : public PerfScriptReader {
664 public:
665 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
666 : PerfScriptReader(Binary, PerfTrace){};
667 // Parse the LBR only sample.
668 virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
672 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
673 which is used to generate CS profile. An example of hybrid sample:
674 4005dc # call stack leaf
675 400634
676 400684 # call stack root
677 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
678 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
680 class HybridPerfReader : public PerfScriptReader {
681 public:
682 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
683 : PerfScriptReader(Binary, PerfTrace){};
684 // Parse the hybrid sample including the call and LBR line
685 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
686 void generateUnsymbolizedProfile() override;
688 private:
689 // Unwind the hybrid samples after aggregration
690 void unwindSamples();
694 Format of unsymbolized profile:
696 [frame1 @ frame2 @ ...] # If it's a CS profile
697 number of entries in RangeCounter
698 from_1-to_1:count_1
699 from_2-to_2:count_2
700 ......
701 from_n-to_n:count_n
702 number of entries in BranchCounter
703 src_1->dst_1:count_1
704 src_2->dst_2:count_2
705 ......
706 src_n->dst_n:count_n
707 [frame1 @ frame2 @ ...] # Next context
708 ......
710 Note that non-CS profile doesn't have the empty `[]` context.
712 class UnsymbolizedProfileReader : public PerfReaderBase {
713 public:
714 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
715 : PerfReaderBase(Binary, PerfTrace){};
716 void parsePerfTraces() override;
718 private:
719 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
720 void readUnsymbolizedProfile(StringRef Filename);
722 std::unordered_set<std::string> ContextStrSet;
725 } // end namespace sampleprof
726 } // end namespace llvm
728 #endif