1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "Assembler.h"
12 #include "BenchmarkRunner.h"
13 #include "MCInstrDescView.h"
14 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
24 struct ExecutionClass
{
26 const char *Description
;
27 } static const kExecutionClasses
[] = {
28 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
|
29 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
,
30 "Repeating a single implicitly serial instruction"},
31 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
,
32 "Repeating a single explicitly serial instruction"},
33 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR
|
34 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
,
35 "Repeating two instructions"},
38 static constexpr size_t kMaxAliasingInstructions
= 10;
40 static std::vector
<Instruction
>
41 computeAliasingInstructions(const LLVMState
&State
, const Instruction
&Instr
,
42 size_t MaxAliasingInstructions
) {
43 // Randomly iterate the set of instructions.
44 std::vector
<unsigned> Opcodes
;
45 Opcodes
.resize(State
.getInstrInfo().getNumOpcodes());
46 std::iota(Opcodes
.begin(), Opcodes
.end(), 0U);
47 std::shuffle(Opcodes
.begin(), Opcodes
.end(), randomGenerator());
49 std::vector
<Instruction
> AliasingInstructions
;
50 for (const unsigned OtherOpcode
: Opcodes
) {
51 if (OtherOpcode
== Instr
.Description
->getOpcode())
53 const Instruction
&OtherInstr
= State
.getIC().getInstr(OtherOpcode
);
54 if (OtherInstr
.hasMemoryOperands())
56 if (Instr
.hasAliasingRegistersThrough(OtherInstr
))
57 AliasingInstructions
.push_back(std::move(OtherInstr
));
58 if (AliasingInstructions
.size() >= MaxAliasingInstructions
)
61 return AliasingInstructions
;
64 static ExecutionMode
getExecutionModes(const Instruction
&Instr
) {
65 ExecutionMode EM
= ExecutionMode::UNKNOWN
;
66 if (Instr
.hasAliasingImplicitRegisters())
67 EM
|= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
;
68 if (Instr
.hasTiedRegisters())
69 EM
|= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
;
70 if (Instr
.hasMemoryOperands())
71 EM
|= ExecutionMode::SERIAL_VIA_MEMORY_INSTR
;
73 if (Instr
.hasAliasingRegisters())
74 EM
|= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
;
75 if (Instr
.hasOneUseOrOneDef())
76 EM
|= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
;
81 static void appendCodeTemplates(const LLVMState
&State
,
82 const Instruction
&Instr
,
83 ExecutionMode ExecutionModeBit
,
84 llvm::StringRef ExecutionClassDescription
,
85 std::vector
<CodeTemplate
> &CodeTemplates
) {
86 assert(isEnumValue(ExecutionModeBit
) && "Bit must be a power of two");
87 switch (ExecutionModeBit
) {
88 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
:
89 // Nothing to do, the instruction is always serial.
91 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
: {
92 // Picking whatever value for the tied variable will make the instruction
95 CT
.Execution
= ExecutionModeBit
;
96 CT
.Info
= ExecutionClassDescription
;
97 CT
.Instructions
.push_back(Instr
);
98 CodeTemplates
.push_back(std::move(CT
));
101 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR
: {
102 // Select back-to-back memory instruction.
103 // TODO: Implement me.
106 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
: {
107 // Making the execution of this instruction serial by selecting one def
108 // register to alias with one use register.
109 const AliasingConfigurations
SelfAliasing(Instr
, Instr
);
110 assert(!SelfAliasing
.empty() && !SelfAliasing
.hasImplicitAliasing() &&
111 "Instr must alias itself explicitly");
112 InstructionTemplate
IT(Instr
);
113 // This is a self aliasing instruction so defs and uses are from the same
114 // instance, hence twice IT in the following call.
115 setRandomAliasing(SelfAliasing
, IT
, IT
);
117 CT
.Execution
= ExecutionModeBit
;
118 CT
.Info
= ExecutionClassDescription
;
119 CT
.Instructions
.push_back(std::move(IT
));
120 CodeTemplates
.push_back(std::move(CT
));
123 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
: {
124 // Select back-to-back non-memory instruction.
125 for (const auto OtherInstr
:
126 computeAliasingInstructions(State
, Instr
, kMaxAliasingInstructions
)) {
127 const AliasingConfigurations
Forward(Instr
, OtherInstr
);
128 const AliasingConfigurations
Back(OtherInstr
, Instr
);
129 InstructionTemplate
ThisIT(Instr
);
130 InstructionTemplate
OtherIT(OtherInstr
);
131 if (!Forward
.hasImplicitAliasing())
132 setRandomAliasing(Forward
, ThisIT
, OtherIT
);
133 if (!Back
.hasImplicitAliasing())
134 setRandomAliasing(Back
, OtherIT
, ThisIT
);
136 CT
.Execution
= ExecutionModeBit
;
137 CT
.Info
= ExecutionClassDescription
;
138 CT
.Instructions
.push_back(std::move(ThisIT
));
139 CT
.Instructions
.push_back(std::move(OtherIT
));
140 CodeTemplates
.push_back(std::move(CT
));
145 llvm_unreachable("Unhandled enum value");
149 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
151 llvm::Expected
<std::vector
<CodeTemplate
>>
152 LatencySnippetGenerator::generateCodeTemplates(const Instruction
&Instr
) const {
153 std::vector
<CodeTemplate
> Results
;
154 const ExecutionMode EM
= getExecutionModes(Instr
);
155 for (const auto EC
: kExecutionClasses
) {
156 for (const auto ExecutionModeBit
: getExecutionModeBits(EM
& EC
.Mask
))
157 appendCodeTemplates(State
, Instr
, ExecutionModeBit
, EC
.Description
,
159 if (!Results
.empty())
163 return llvm::make_error
<BenchmarkFailure
>(
164 "No strategy found to make the execution serial");
165 return std::move(Results
);
168 LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState
&State
,
169 InstructionBenchmark::ModeE Mode
)
170 : BenchmarkRunner(State
, Mode
) {
171 assert((Mode
== InstructionBenchmark::Latency
||
172 Mode
== InstructionBenchmark::InverseThroughput
) &&
176 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
178 llvm::Expected
<std::vector
<BenchmarkMeasure
>>
179 LatencyBenchmarkRunner::runMeasurements(
180 const FunctionExecutor
&Executor
) const {
181 // Cycle measurements include some overhead from the kernel. Repeat the
182 // measure several times and take the minimum value.
183 constexpr const int NumMeasurements
= 30;
184 int64_t MinValue
= std::numeric_limits
<int64_t>::max();
185 const char *CounterName
= State
.getPfmCounters().CycleCounter
;
187 llvm::report_fatal_error("sched model does not define a cycle counter");
188 for (size_t I
= 0; I
< NumMeasurements
; ++I
) {
189 auto ExpectedCounterValue
= Executor
.runAndMeasure(CounterName
);
190 if (!ExpectedCounterValue
)
191 return ExpectedCounterValue
.takeError();
192 if (*ExpectedCounterValue
< MinValue
)
193 MinValue
= *ExpectedCounterValue
;
195 std::vector
<BenchmarkMeasure
> Result
;
197 case InstructionBenchmark::Latency
:
198 Result
= {BenchmarkMeasure::Create("latency", MinValue
)};
200 case InstructionBenchmark::InverseThroughput
:
201 Result
= {BenchmarkMeasure::Create("inverse_throughput", MinValue
)};
206 return std::move(Result
);
209 } // namespace exegesis