1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "Assembler.h"
12 #include "BenchmarkRunner.h"
13 #include "MCInstrDescView.h"
14 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
24 struct ExecutionClass
{
26 const char *Description
;
27 } static const kExecutionClasses
[] = {
28 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
|
29 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
,
30 "Repeating a single implicitly serial instruction"},
31 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
,
32 "Repeating a single explicitly serial instruction"},
33 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR
|
34 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
,
35 "Repeating two instructions"},
38 static constexpr size_t kMaxAliasingInstructions
= 10;
40 static std::vector
<Instruction
>
41 computeAliasingInstructions(const LLVMState
&State
, const Instruction
&Instr
,
42 size_t MaxAliasingInstructions
,
43 const BitVector
&ForbiddenRegisters
) {
44 // Randomly iterate the set of instructions.
45 std::vector
<unsigned> Opcodes
;
46 Opcodes
.resize(State
.getInstrInfo().getNumOpcodes());
47 std::iota(Opcodes
.begin(), Opcodes
.end(), 0U);
48 std::shuffle(Opcodes
.begin(), Opcodes
.end(), randomGenerator());
50 std::vector
<Instruction
> AliasingInstructions
;
51 for (const unsigned OtherOpcode
: Opcodes
) {
52 if (OtherOpcode
== Instr
.Description
->getOpcode())
54 const Instruction
&OtherInstr
= State
.getIC().getInstr(OtherOpcode
);
55 if (OtherInstr
.hasMemoryOperands())
57 if (Instr
.hasAliasingRegistersThrough(OtherInstr
, ForbiddenRegisters
))
58 AliasingInstructions
.push_back(OtherInstr
);
59 if (AliasingInstructions
.size() >= MaxAliasingInstructions
)
62 return AliasingInstructions
;
65 static ExecutionMode
getExecutionModes(const Instruction
&Instr
,
66 const BitVector
&ForbiddenRegisters
) {
67 ExecutionMode EM
= ExecutionMode::UNKNOWN
;
68 if (Instr
.hasAliasingImplicitRegisters())
69 EM
|= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
;
70 if (Instr
.hasTiedRegisters())
71 EM
|= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
;
72 if (Instr
.hasMemoryOperands())
73 EM
|= ExecutionMode::SERIAL_VIA_MEMORY_INSTR
;
75 if (Instr
.hasAliasingRegisters(ForbiddenRegisters
))
76 EM
|= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
;
77 if (Instr
.hasOneUseOrOneDef())
78 EM
|= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
;
83 static void appendCodeTemplates(const LLVMState
&State
,
84 const Instruction
&Instr
,
85 const BitVector
&ForbiddenRegisters
,
86 ExecutionMode ExecutionModeBit
,
87 StringRef ExecutionClassDescription
,
88 std::vector
<CodeTemplate
> &CodeTemplates
) {
89 assert(isEnumValue(ExecutionModeBit
) && "Bit must be a power of two");
90 switch (ExecutionModeBit
) {
91 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
:
92 // Nothing to do, the instruction is always serial.
94 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
: {
95 // Picking whatever value for the tied variable will make the instruction
98 CT
.Execution
= ExecutionModeBit
;
99 CT
.Info
= ExecutionClassDescription
;
100 CT
.Instructions
.push_back(Instr
);
101 CodeTemplates
.push_back(std::move(CT
));
104 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR
: {
105 // Select back-to-back memory instruction.
106 // TODO: Implement me.
109 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
: {
110 // Making the execution of this instruction serial by selecting one def
111 // register to alias with one use register.
112 const AliasingConfigurations
SelfAliasing(Instr
, Instr
);
113 assert(!SelfAliasing
.empty() && !SelfAliasing
.hasImplicitAliasing() &&
114 "Instr must alias itself explicitly");
115 InstructionTemplate
IT(Instr
);
116 // This is a self aliasing instruction so defs and uses are from the same
117 // instance, hence twice IT in the following call.
118 setRandomAliasing(SelfAliasing
, IT
, IT
);
120 CT
.Execution
= ExecutionModeBit
;
121 CT
.Info
= ExecutionClassDescription
;
122 CT
.Instructions
.push_back(std::move(IT
));
123 CodeTemplates
.push_back(std::move(CT
));
126 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
: {
127 // Select back-to-back non-memory instruction.
128 for (const auto OtherInstr
: computeAliasingInstructions(
129 State
, Instr
, kMaxAliasingInstructions
, ForbiddenRegisters
)) {
130 const AliasingConfigurations
Forward(Instr
, OtherInstr
);
131 const AliasingConfigurations
Back(OtherInstr
, Instr
);
132 InstructionTemplate
ThisIT(Instr
);
133 InstructionTemplate
OtherIT(OtherInstr
);
134 if (!Forward
.hasImplicitAliasing())
135 setRandomAliasing(Forward
, ThisIT
, OtherIT
);
136 if (!Back
.hasImplicitAliasing())
137 setRandomAliasing(Back
, OtherIT
, ThisIT
);
139 CT
.Execution
= ExecutionModeBit
;
140 CT
.Info
= ExecutionClassDescription
;
141 CT
.Instructions
.push_back(std::move(ThisIT
));
142 CT
.Instructions
.push_back(std::move(OtherIT
));
143 CodeTemplates
.push_back(std::move(CT
));
148 llvm_unreachable("Unhandled enum value");
152 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
154 Expected
<std::vector
<CodeTemplate
>>
155 LatencySnippetGenerator::generateCodeTemplates(
156 const Instruction
&Instr
, const BitVector
&ForbiddenRegisters
) const {
157 std::vector
<CodeTemplate
> Results
;
158 const ExecutionMode EM
= getExecutionModes(Instr
, ForbiddenRegisters
);
159 for (const auto EC
: kExecutionClasses
) {
160 for (const auto ExecutionModeBit
: getExecutionModeBits(EM
& EC
.Mask
))
161 appendCodeTemplates(State
, Instr
, ForbiddenRegisters
, ExecutionModeBit
,
162 EC
.Description
, Results
);
163 if (!Results
.empty())
167 return make_error
<Failure
>(
168 "No strategy found to make the execution serial");
169 return std::move(Results
);
172 LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState
&State
,
173 InstructionBenchmark::ModeE Mode
)
174 : BenchmarkRunner(State
, Mode
) {
175 assert((Mode
== InstructionBenchmark::Latency
||
176 Mode
== InstructionBenchmark::InverseThroughput
) &&
180 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
182 Expected
<std::vector
<BenchmarkMeasure
>> LatencyBenchmarkRunner::runMeasurements(
183 const FunctionExecutor
&Executor
) const {
184 // Cycle measurements include some overhead from the kernel. Repeat the
185 // measure several times and take the minimum value.
186 constexpr const int NumMeasurements
= 30;
187 int64_t MinValue
= std::numeric_limits
<int64_t>::max();
188 const char *CounterName
= State
.getPfmCounters().CycleCounter
;
190 report_fatal_error("sched model does not define a cycle counter");
191 for (size_t I
= 0; I
< NumMeasurements
; ++I
) {
192 auto ExpectedCounterValue
= Executor
.runAndMeasure(CounterName
);
193 if (!ExpectedCounterValue
)
194 return ExpectedCounterValue
.takeError();
195 if (*ExpectedCounterValue
< MinValue
)
196 MinValue
= *ExpectedCounterValue
;
198 std::vector
<BenchmarkMeasure
> Result
;
200 case InstructionBenchmark::Latency
:
201 Result
= {BenchmarkMeasure::Create("latency", MinValue
)};
203 case InstructionBenchmark::InverseThroughput
:
204 Result
= {BenchmarkMeasure::Create("inverse_throughput", MinValue
)};
209 return std::move(Result
);
212 } // namespace exegesis