1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstBuilder.h"
18 #include "llvm/Support/FormatVariadic.h"
22 struct ExecutionClass
{
24 const char *Description
;
25 } static const kExecutionClasses
[] = {
26 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
|
27 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
,
28 "Repeating a single implicitly serial instruction"},
29 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
,
30 "Repeating a single explicitly serial instruction"},
31 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR
|
32 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
,
33 "Repeating two instructions"},
36 static constexpr size_t kMaxAliasingInstructions
= 10;
38 static std::vector
<Instruction
>
39 computeAliasingInstructions(const LLVMState
&State
, const Instruction
&Instr
,
40 size_t MaxAliasingInstructions
) {
41 // Randomly iterate the set of instructions.
42 std::vector
<unsigned> Opcodes
;
43 Opcodes
.resize(State
.getInstrInfo().getNumOpcodes());
44 std::iota(Opcodes
.begin(), Opcodes
.end(), 0U);
45 std::shuffle(Opcodes
.begin(), Opcodes
.end(), randomGenerator());
47 std::vector
<Instruction
> AliasingInstructions
;
48 for (const unsigned OtherOpcode
: Opcodes
) {
49 if (OtherOpcode
== Instr
.Description
->getOpcode())
51 const Instruction
OtherInstr(State
, OtherOpcode
);
52 if (OtherInstr
.hasMemoryOperands())
54 if (Instr
.hasAliasingRegistersThrough(OtherInstr
))
55 AliasingInstructions
.push_back(std::move(OtherInstr
));
56 if (AliasingInstructions
.size() >= MaxAliasingInstructions
)
59 return AliasingInstructions
;
62 static ExecutionMode
getExecutionModes(const Instruction
&Instr
) {
63 ExecutionMode EM
= ExecutionMode::UNKNOWN
;
64 if (Instr
.hasAliasingImplicitRegisters())
65 EM
|= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
;
66 if (Instr
.hasTiedRegisters())
67 EM
|= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
;
68 if (Instr
.hasMemoryOperands())
69 EM
|= ExecutionMode::SERIAL_VIA_MEMORY_INSTR
;
71 if (Instr
.hasAliasingRegisters())
72 EM
|= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
;
73 if (Instr
.hasOneUseOrOneDef())
74 EM
|= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
;
79 static void appendCodeTemplates(const LLVMState
&State
,
80 const Instruction
&Instr
,
81 ExecutionMode ExecutionModeBit
,
82 llvm::StringRef ExecutionClassDescription
,
83 std::vector
<CodeTemplate
> &CodeTemplates
) {
84 assert(isEnumValue(ExecutionModeBit
) && "Bit must be a power of two");
85 switch (ExecutionModeBit
) {
86 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
:
87 // Nothing to do, the instruction is always serial.
89 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
: {
90 // Picking whatever value for the tied variable will make the instruction
93 CT
.Execution
= ExecutionModeBit
;
94 CT
.Info
= ExecutionClassDescription
;
95 CT
.Instructions
.push_back(Instr
);
96 CodeTemplates
.push_back(std::move(CT
));
99 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR
: {
100 // Select back-to-back memory instruction.
101 // TODO: Implement me.
104 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
: {
105 // Making the execution of this instruction serial by selecting one def
106 // register to alias with one use register.
107 const AliasingConfigurations
SelfAliasing(Instr
, Instr
);
108 assert(!SelfAliasing
.empty() && !SelfAliasing
.hasImplicitAliasing() &&
109 "Instr must alias itself explicitly");
110 InstructionTemplate
IT(Instr
);
111 // This is a self aliasing instruction so defs and uses are from the same
112 // instance, hence twice IT in the following call.
113 setRandomAliasing(SelfAliasing
, IT
, IT
);
115 CT
.Execution
= ExecutionModeBit
;
116 CT
.Info
= ExecutionClassDescription
;
117 CT
.Instructions
.push_back(std::move(IT
));
118 CodeTemplates
.push_back(std::move(CT
));
121 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
: {
122 // Select back-to-back non-memory instruction.
123 for (const auto OtherInstr
:
124 computeAliasingInstructions(State
, Instr
, kMaxAliasingInstructions
)) {
125 const AliasingConfigurations
Forward(Instr
, OtherInstr
);
126 const AliasingConfigurations
Back(OtherInstr
, Instr
);
127 InstructionTemplate
ThisIT(Instr
);
128 InstructionTemplate
OtherIT(OtherInstr
);
129 if (!Forward
.hasImplicitAliasing())
130 setRandomAliasing(Forward
, ThisIT
, OtherIT
);
131 if (!Back
.hasImplicitAliasing())
132 setRandomAliasing(Back
, OtherIT
, ThisIT
);
134 CT
.Execution
= ExecutionModeBit
;
135 CT
.Info
= ExecutionClassDescription
;
136 CT
.Instructions
.push_back(std::move(ThisIT
));
137 CT
.Instructions
.push_back(std::move(OtherIT
));
138 CodeTemplates
.push_back(std::move(CT
));
143 llvm_unreachable("Unhandled enum value");
147 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
149 llvm::Expected
<std::vector
<CodeTemplate
>>
150 LatencySnippetGenerator::generateCodeTemplates(const Instruction
&Instr
) const {
151 std::vector
<CodeTemplate
> Results
;
152 const ExecutionMode EM
= getExecutionModes(Instr
);
153 for (const auto EC
: kExecutionClasses
) {
154 for (const auto ExecutionModeBit
: getExecutionModeBits(EM
& EC
.Mask
))
155 appendCodeTemplates(State
, Instr
, ExecutionModeBit
, EC
.Description
,
157 if (!Results
.empty())
161 return llvm::make_error
<BenchmarkFailure
>(
162 "No strategy found to make the execution serial");
163 return std::move(Results
);
166 const char *LatencyBenchmarkRunner::getCounterName() const {
167 if (!State
.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
168 llvm::report_fatal_error("sched model is missing extra processor info!");
169 const char *CounterName
= State
.getSubtargetInfo()
171 .getExtraProcessorInfo()
172 .PfmCounters
.CycleCounter
;
174 llvm::report_fatal_error("sched model does not define a cycle counter");
178 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
180 llvm::Expected
<std::vector
<BenchmarkMeasure
>>
181 LatencyBenchmarkRunner::runMeasurements(
182 const FunctionExecutor
&Executor
) const {
183 // Cycle measurements include some overhead from the kernel. Repeat the
184 // measure several times and take the minimum value.
185 constexpr const int NumMeasurements
= 30;
186 int64_t MinValue
= std::numeric_limits
<int64_t>::max();
187 const char *CounterName
= getCounterName();
189 llvm::report_fatal_error("could not determine cycle counter name");
190 for (size_t I
= 0; I
< NumMeasurements
; ++I
) {
191 auto ExpectedCounterValue
= Executor
.runAndMeasure(CounterName
);
192 if (!ExpectedCounterValue
)
193 return ExpectedCounterValue
.takeError();
194 if (*ExpectedCounterValue
< MinValue
)
195 MinValue
= *ExpectedCounterValue
;
197 std::vector
<BenchmarkMeasure
> Result
= {
198 BenchmarkMeasure::Create("latency", MinValue
)};
199 return std::move(Result
);
202 } // namespace exegesis