[llvm-exegesis] Reject x86 instructions that use non uniform memory accesses
[llvm-core.git] / tools / llvm-exegesis / lib / Latency.cpp
blob7d68d60c48bd1c8f326d8cf5972e51c3d9abfb3c
1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
10 #include "Latency.h"
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstBuilder.h"
18 #include "llvm/Support/FormatVariadic.h"
20 namespace exegesis {
22 struct ExecutionClass {
23 ExecutionMode Mask;
24 const char *Description;
25 } static const kExecutionClasses[] = {
26 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
27 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
28 "Repeating a single implicitly serial instruction"},
29 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
30 "Repeating a single explicitly serial instruction"},
31 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
32 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
33 "Repeating two instructions"},
36 static constexpr size_t kMaxAliasingInstructions = 10;
38 static std::vector<Instruction>
39 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
40 size_t MaxAliasingInstructions) {
41 // Randomly iterate the set of instructions.
42 std::vector<unsigned> Opcodes;
43 Opcodes.resize(State.getInstrInfo().getNumOpcodes());
44 std::iota(Opcodes.begin(), Opcodes.end(), 0U);
45 std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
47 std::vector<Instruction> AliasingInstructions;
48 for (const unsigned OtherOpcode : Opcodes) {
49 if (OtherOpcode == Instr.Description->getOpcode())
50 continue;
51 const Instruction OtherInstr(State, OtherOpcode);
52 if (OtherInstr.hasMemoryOperands())
53 continue;
54 if (Instr.hasAliasingRegistersThrough(OtherInstr))
55 AliasingInstructions.push_back(std::move(OtherInstr));
56 if (AliasingInstructions.size() >= MaxAliasingInstructions)
57 break;
59 return AliasingInstructions;
62 static ExecutionMode getExecutionModes(const Instruction &Instr) {
63 ExecutionMode EM = ExecutionMode::UNKNOWN;
64 if (Instr.hasAliasingImplicitRegisters())
65 EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
66 if (Instr.hasTiedRegisters())
67 EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
68 if (Instr.hasMemoryOperands())
69 EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
70 else {
71 if (Instr.hasAliasingRegisters())
72 EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
73 if (Instr.hasOneUseOrOneDef())
74 EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
76 return EM;
79 static void appendCodeTemplates(const LLVMState &State,
80 const Instruction &Instr,
81 ExecutionMode ExecutionModeBit,
82 llvm::StringRef ExecutionClassDescription,
83 std::vector<CodeTemplate> &CodeTemplates) {
84 assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
85 switch (ExecutionModeBit) {
86 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
87 // Nothing to do, the instruction is always serial.
88 LLVM_FALLTHROUGH;
89 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
90 // Picking whatever value for the tied variable will make the instruction
91 // serial.
92 CodeTemplate CT;
93 CT.Execution = ExecutionModeBit;
94 CT.Info = ExecutionClassDescription;
95 CT.Instructions.push_back(Instr);
96 CodeTemplates.push_back(std::move(CT));
97 return;
99 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
100 // Select back-to-back memory instruction.
101 // TODO: Implement me.
102 return;
104 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
105 // Making the execution of this instruction serial by selecting one def
106 // register to alias with one use register.
107 const AliasingConfigurations SelfAliasing(Instr, Instr);
108 assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
109 "Instr must alias itself explicitly");
110 InstructionTemplate IT(Instr);
111 // This is a self aliasing instruction so defs and uses are from the same
112 // instance, hence twice IT in the following call.
113 setRandomAliasing(SelfAliasing, IT, IT);
114 CodeTemplate CT;
115 CT.Execution = ExecutionModeBit;
116 CT.Info = ExecutionClassDescription;
117 CT.Instructions.push_back(std::move(IT));
118 CodeTemplates.push_back(std::move(CT));
119 return;
121 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
122 // Select back-to-back non-memory instruction.
123 for (const auto OtherInstr :
124 computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
125 const AliasingConfigurations Forward(Instr, OtherInstr);
126 const AliasingConfigurations Back(OtherInstr, Instr);
127 InstructionTemplate ThisIT(Instr);
128 InstructionTemplate OtherIT(OtherInstr);
129 if (!Forward.hasImplicitAliasing())
130 setRandomAliasing(Forward, ThisIT, OtherIT);
131 if (!Back.hasImplicitAliasing())
132 setRandomAliasing(Back, OtherIT, ThisIT);
133 CodeTemplate CT;
134 CT.Execution = ExecutionModeBit;
135 CT.Info = ExecutionClassDescription;
136 CT.Instructions.push_back(std::move(ThisIT));
137 CT.Instructions.push_back(std::move(OtherIT));
138 CodeTemplates.push_back(std::move(CT));
140 return;
142 default:
143 llvm_unreachable("Unhandled enum value");
147 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
149 llvm::Expected<std::vector<CodeTemplate>>
150 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
151 std::vector<CodeTemplate> Results;
152 const ExecutionMode EM = getExecutionModes(Instr);
153 for (const auto EC : kExecutionClasses) {
154 for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
155 appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
156 Results);
157 if (!Results.empty())
158 break;
160 if (Results.empty())
161 return llvm::make_error<BenchmarkFailure>(
162 "No strategy found to make the execution serial");
163 return std::move(Results);
166 const char *LatencyBenchmarkRunner::getCounterName() const {
167 if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
168 llvm::report_fatal_error("sched model is missing extra processor info!");
169 const char *CounterName = State.getSubtargetInfo()
170 .getSchedModel()
171 .getExtraProcessorInfo()
172 .PfmCounters.CycleCounter;
173 if (!CounterName)
174 llvm::report_fatal_error("sched model does not define a cycle counter");
175 return CounterName;
178 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
180 llvm::Expected<std::vector<BenchmarkMeasure>>
181 LatencyBenchmarkRunner::runMeasurements(
182 const FunctionExecutor &Executor) const {
183 // Cycle measurements include some overhead from the kernel. Repeat the
184 // measure several times and take the minimum value.
185 constexpr const int NumMeasurements = 30;
186 int64_t MinValue = std::numeric_limits<int64_t>::max();
187 const char *CounterName = getCounterName();
188 if (!CounterName)
189 llvm::report_fatal_error("could not determine cycle counter name");
190 for (size_t I = 0; I < NumMeasurements; ++I) {
191 auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
192 if (!ExpectedCounterValue)
193 return ExpectedCounterValue.takeError();
194 if (*ExpectedCounterValue < MinValue)
195 MinValue = *ExpectedCounterValue;
197 std::vector<BenchmarkMeasure> Result = {
198 BenchmarkMeasure::Create("latency", MinValue)};
199 return std::move(Result);
202 } // namespace exegesis