Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / tools / llvm-xray / xray-account.h
blob38d3ec989577f304352a550c41b53b14534c6423
1 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interface for performing some basic function call
10 // accounting from an XRay trace.
12 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H
14 #define LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H
16 #include <map>
17 #include <utility>
18 #include <vector>
20 #include "func-id-helper.h"
21 #include "llvm/ADT/Bitfields.h"
22 #include "llvm/Support/Program.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/XRay/XRayRecord.h"
26 namespace llvm {
27 namespace xray {
29 class LatencyAccountant {
30 public:
31 typedef llvm::DenseMap<int32_t, llvm::SmallVector<uint64_t, 0>>
32 FunctionLatencyMap;
33 typedef llvm::DenseMap<uint32_t, std::pair<uint64_t, uint64_t>>
34 PerThreadMinMaxTSCMap;
35 typedef llvm::DenseMap<uint8_t, std::pair<uint64_t, uint64_t>>
36 PerCPUMinMaxTSCMap;
37 struct FunctionStack {
38 llvm::SmallVector<std::pair<int32_t, uint64_t>, 32> Stack;
39 class RecursionStatus {
40 uint32_t Storage = 0;
41 using Depth = Bitfield::Element<int32_t, 0, 31>; // Low 31 bits.
42 using IsRecursive = Bitfield::Element<bool, 31, 1>; // Sign bit.
43 public:
44 RecursionStatus &operator++();
45 RecursionStatus &operator--();
46 bool isRecursive() const;
48 std::optional<llvm::DenseMap<int32_t, RecursionStatus>> RecursionDepth;
50 typedef llvm::DenseMap<uint32_t, FunctionStack> PerThreadFunctionStackMap;
52 private:
53 PerThreadFunctionStackMap PerThreadFunctionStack;
54 FunctionLatencyMap FunctionLatencies;
55 PerThreadMinMaxTSCMap PerThreadMinMaxTSC;
56 PerCPUMinMaxTSCMap PerCPUMinMaxTSC;
57 FuncIdConversionHelper &FuncIdHelper;
59 bool RecursiveCallsOnly = false;
60 bool DeduceSiblingCalls = false;
61 uint64_t CurrentMaxTSC = 0;
63 void recordLatency(int32_t FuncId, uint64_t Latency) {
64 FunctionLatencies[FuncId].push_back(Latency);
67 public:
68 explicit LatencyAccountant(FuncIdConversionHelper &FuncIdHelper,
69 bool RecursiveCallsOnly, bool DeduceSiblingCalls)
70 : FuncIdHelper(FuncIdHelper), RecursiveCallsOnly(RecursiveCallsOnly),
71 DeduceSiblingCalls(DeduceSiblingCalls) {}
73 const FunctionLatencyMap &getFunctionLatencies() const {
74 return FunctionLatencies;
77 const PerThreadMinMaxTSCMap &getPerThreadMinMaxTSC() const {
78 return PerThreadMinMaxTSC;
81 const PerCPUMinMaxTSCMap &getPerCPUMinMaxTSC() const {
82 return PerCPUMinMaxTSC;
85 /// Returns false in case we fail to account the provided record. This happens
86 /// in the following cases:
87 ///
88 /// - An exit record does not match any entry records for the same function.
89 /// If we've been set to deduce sibling calls, we try walking up the stack
90 /// and recording times for the higher level functions.
91 /// - A record has a TSC that's before the latest TSC that has been
92 /// recorded. We still record the TSC for the min-max.
93 ///
94 bool accountRecord(const XRayRecord &Record);
96 const PerThreadFunctionStackMap &getPerThreadFunctionStack() const {
97 return PerThreadFunctionStack;
100 // Output Functions
101 // ================
103 void exportStatsAsText(raw_ostream &OS, const XRayFileHeader &Header) const;
104 void exportStatsAsCSV(raw_ostream &OS, const XRayFileHeader &Header) const;
106 private:
107 // Internal helper to implement common parts of the exportStatsAs...
108 // functions.
109 template <class F> void exportStats(const XRayFileHeader &Header, F fn) const;
112 } // namespace xray
113 } // namespace llvm
115 #endif // LLVM_TOOLS_LLVM_XRAY_XRAY_ACCOUNT_H