[llvm-exegeis] Computing Latency configuration upfront so we can generate many CodeTe...
[llvm-complete.git] / tools / llvm-exegesis / lib / Latency.cpp
blob7b991a452aa2d4912c607b7bbdd9e2fc29de2e16
1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
10 #include "Latency.h"
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
21 namespace exegesis {
23 struct ExecutionClass {
24 ExecutionMode Mask;
25 const char *Description;
26 } static const kExecutionClasses[] = {
27 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
28 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
29 "Repeating a single implicitly serial instruction"},
30 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
31 "Repeating a single explicitly serial instruction"},
32 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
33 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
34 "Repeating two instructions"},
37 static constexpr size_t kMaxAliasingInstructions = 10;
39 static std::vector<Instruction>
40 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
41 size_t MaxAliasingInstructions) {
42 // Randomly iterate the set of instructions.
43 std::vector<unsigned> Opcodes;
44 Opcodes.resize(State.getInstrInfo().getNumOpcodes());
45 std::iota(Opcodes.begin(), Opcodes.end(), 0U);
46 std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
48 std::vector<Instruction> AliasingInstructions;
49 for (const unsigned OtherOpcode : Opcodes) {
50 if (OtherOpcode == Instr.Description->getOpcode())
51 continue;
52 const Instruction OtherInstr(State, OtherOpcode);
53 if (OtherInstr.hasMemoryOperands())
54 continue;
55 if (Instr.hasAliasingRegistersThrough(OtherInstr))
56 AliasingInstructions.push_back(std::move(OtherInstr));
57 if (AliasingInstructions.size() >= MaxAliasingInstructions)
58 break;
60 return AliasingInstructions;
63 static ExecutionMode getExecutionModes(const Instruction &Instr) {
64 ExecutionMode EM;
65 if (Instr.hasAliasingImplicitRegisters())
66 EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
67 if (Instr.hasTiedRegisters())
68 EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
69 if (Instr.hasMemoryOperands())
70 EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
71 else {
72 if (Instr.hasAliasingRegisters())
73 EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
74 if (Instr.hasOneUseOrOneDef())
75 EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
77 return EM;
80 static void appendCodeTemplates(const LLVMState &State,
81 const Instruction &Instr,
82 ExecutionMode ExecutionModeBit,
83 llvm::StringRef ExecutionClassDescription,
84 std::vector<CodeTemplate> &CodeTemplates) {
85 assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
86 switch (ExecutionModeBit) {
87 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
88 // Nothing to do, the instruction is always serial.
89 LLVM_FALLTHROUGH;
90 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
91 // Picking whatever value for the tied variable will make the instruction
92 // serial.
93 CodeTemplate CT;
94 CT.Execution = ExecutionModeBit;
95 CT.Info = ExecutionClassDescription;
96 CT.Instructions.push_back(Instr);
97 CodeTemplates.push_back(std::move(CT));
98 return;
100 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
101 // Select back-to-back memory instruction.
102 // TODO: Implement me.
103 return;
105 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
106 // Making the execution of this instruction serial by selecting one def
107 // register to alias with one use register.
108 const AliasingConfigurations SelfAliasing(Instr, Instr);
109 assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
110 "Instr must alias itself explicitly");
111 InstructionTemplate IT(Instr);
112 // This is a self aliasing instruction so defs and uses are from the same
113 // instance, hence twice IT in the following call.
114 setRandomAliasing(SelfAliasing, IT, IT);
115 CodeTemplate CT;
116 CT.Execution = ExecutionModeBit;
117 CT.Info = ExecutionClassDescription;
118 CT.Instructions.push_back(std::move(IT));
119 CodeTemplates.push_back(std::move(CT));
120 return;
122 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
123 // Select back-to-back non-memory instruction.
124 for (const auto OtherInstr :
125 computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
126 const AliasingConfigurations Forward(Instr, OtherInstr);
127 const AliasingConfigurations Back(OtherInstr, Instr);
128 InstructionTemplate ThisIT(Instr);
129 InstructionTemplate OtherIT(OtherInstr);
130 if (!Forward.hasImplicitAliasing())
131 setRandomAliasing(Forward, ThisIT, OtherIT);
132 if (!Back.hasImplicitAliasing())
133 setRandomAliasing(Back, OtherIT, ThisIT);
134 CodeTemplate CT;
135 CT.Execution = ExecutionModeBit;
136 CT.Info = ExecutionClassDescription;
137 CT.Instructions.push_back(std::move(ThisIT));
138 CT.Instructions.push_back(std::move(OtherIT));
139 CodeTemplates.push_back(std::move(CT));
141 return;
143 default:
144 llvm_unreachable("Unhandled enum value");
148 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
150 llvm::Expected<std::vector<CodeTemplate>>
151 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
152 std::vector<CodeTemplate> Results;
153 const ExecutionMode EM = getExecutionModes(Instr);
154 for (const auto EC : kExecutionClasses) {
155 for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
156 appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
157 Results);
158 if (!Results.empty())
159 break;
161 if (Results.empty())
162 return llvm::make_error<BenchmarkFailure>(
163 "No strategy found to make the execution serial");
164 return std::move(Results);
167 const char *LatencyBenchmarkRunner::getCounterName() const {
168 if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
169 llvm::report_fatal_error("sched model is missing extra processor info!");
170 const char *CounterName = State.getSubtargetInfo()
171 .getSchedModel()
172 .getExtraProcessorInfo()
173 .PfmCounters.CycleCounter;
174 if (!CounterName)
175 llvm::report_fatal_error("sched model does not define a cycle counter");
176 return CounterName;
179 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
181 std::vector<BenchmarkMeasure>
182 LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
183 ScratchSpace &Scratch) const {
184 // Cycle measurements include some overhead from the kernel. Repeat the
185 // measure several times and take the minimum value.
186 constexpr const int NumMeasurements = 30;
187 int64_t MinLatency = std::numeric_limits<int64_t>::max();
188 const char *CounterName = getCounterName();
189 if (!CounterName)
190 llvm::report_fatal_error("could not determine cycle counter name");
191 const pfm::PerfEvent CyclesPerfEvent(CounterName);
192 if (!CyclesPerfEvent.valid())
193 llvm::report_fatal_error("invalid perf event");
194 for (size_t I = 0; I < NumMeasurements; ++I) {
195 pfm::Counter Counter(CyclesPerfEvent);
196 Scratch.clear();
197 Counter.start();
198 Function(Scratch.ptr());
199 Counter.stop();
200 const int64_t Value = Counter.read();
201 if (Value < MinLatency)
202 MinLatency = Value;
204 return {BenchmarkMeasure::Create("latency", MinLatency)};
207 } // namespace exegesis