1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
23 struct ExecutionClass
{
25 const char *Description
;
26 } static const kExecutionClasses
[] = {
27 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
|
28 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
,
29 "Repeating a single implicitly serial instruction"},
30 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
,
31 "Repeating a single explicitly serial instruction"},
32 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR
|
33 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
,
34 "Repeating two instructions"},
37 static constexpr size_t kMaxAliasingInstructions
= 10;
39 static std::vector
<Instruction
>
40 computeAliasingInstructions(const LLVMState
&State
, const Instruction
&Instr
,
41 size_t MaxAliasingInstructions
) {
42 // Randomly iterate the set of instructions.
43 std::vector
<unsigned> Opcodes
;
44 Opcodes
.resize(State
.getInstrInfo().getNumOpcodes());
45 std::iota(Opcodes
.begin(), Opcodes
.end(), 0U);
46 std::shuffle(Opcodes
.begin(), Opcodes
.end(), randomGenerator());
48 std::vector
<Instruction
> AliasingInstructions
;
49 for (const unsigned OtherOpcode
: Opcodes
) {
50 if (OtherOpcode
== Instr
.Description
->getOpcode())
52 const Instruction
OtherInstr(State
, OtherOpcode
);
53 if (OtherInstr
.hasMemoryOperands())
55 if (Instr
.hasAliasingRegistersThrough(OtherInstr
))
56 AliasingInstructions
.push_back(std::move(OtherInstr
));
57 if (AliasingInstructions
.size() >= MaxAliasingInstructions
)
60 return AliasingInstructions
;
63 static ExecutionMode
getExecutionModes(const Instruction
&Instr
) {
65 if (Instr
.hasAliasingImplicitRegisters())
66 EM
|= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
;
67 if (Instr
.hasTiedRegisters())
68 EM
|= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
;
69 if (Instr
.hasMemoryOperands())
70 EM
|= ExecutionMode::SERIAL_VIA_MEMORY_INSTR
;
72 if (Instr
.hasAliasingRegisters())
73 EM
|= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
;
74 if (Instr
.hasOneUseOrOneDef())
75 EM
|= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
;
80 static void appendCodeTemplates(const LLVMState
&State
,
81 const Instruction
&Instr
,
82 ExecutionMode ExecutionModeBit
,
83 llvm::StringRef ExecutionClassDescription
,
84 std::vector
<CodeTemplate
> &CodeTemplates
) {
85 assert(isEnumValue(ExecutionModeBit
) && "Bit must be a power of two");
86 switch (ExecutionModeBit
) {
87 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS
:
88 // Nothing to do, the instruction is always serial.
90 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS
: {
91 // Picking whatever value for the tied variable will make the instruction
94 CT
.Execution
= ExecutionModeBit
;
95 CT
.Info
= ExecutionClassDescription
;
96 CT
.Instructions
.push_back(Instr
);
97 CodeTemplates
.push_back(std::move(CT
));
100 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR
: {
101 // Select back-to-back memory instruction.
102 // TODO: Implement me.
105 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS
: {
106 // Making the execution of this instruction serial by selecting one def
107 // register to alias with one use register.
108 const AliasingConfigurations
SelfAliasing(Instr
, Instr
);
109 assert(!SelfAliasing
.empty() && !SelfAliasing
.hasImplicitAliasing() &&
110 "Instr must alias itself explicitly");
111 InstructionTemplate
IT(Instr
);
112 // This is a self aliasing instruction so defs and uses are from the same
113 // instance, hence twice IT in the following call.
114 setRandomAliasing(SelfAliasing
, IT
, IT
);
116 CT
.Execution
= ExecutionModeBit
;
117 CT
.Info
= ExecutionClassDescription
;
118 CT
.Instructions
.push_back(std::move(IT
));
119 CodeTemplates
.push_back(std::move(CT
));
122 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR
: {
123 // Select back-to-back non-memory instruction.
124 for (const auto OtherInstr
:
125 computeAliasingInstructions(State
, Instr
, kMaxAliasingInstructions
)) {
126 const AliasingConfigurations
Forward(Instr
, OtherInstr
);
127 const AliasingConfigurations
Back(OtherInstr
, Instr
);
128 InstructionTemplate
ThisIT(Instr
);
129 InstructionTemplate
OtherIT(OtherInstr
);
130 if (!Forward
.hasImplicitAliasing())
131 setRandomAliasing(Forward
, ThisIT
, OtherIT
);
132 if (!Back
.hasImplicitAliasing())
133 setRandomAliasing(Back
, OtherIT
, ThisIT
);
135 CT
.Execution
= ExecutionModeBit
;
136 CT
.Info
= ExecutionClassDescription
;
137 CT
.Instructions
.push_back(std::move(ThisIT
));
138 CT
.Instructions
.push_back(std::move(OtherIT
));
139 CodeTemplates
.push_back(std::move(CT
));
144 llvm_unreachable("Unhandled enum value");
148 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
150 llvm::Expected
<std::vector
<CodeTemplate
>>
151 LatencySnippetGenerator::generateCodeTemplates(const Instruction
&Instr
) const {
152 std::vector
<CodeTemplate
> Results
;
153 const ExecutionMode EM
= getExecutionModes(Instr
);
154 for (const auto EC
: kExecutionClasses
) {
155 for (const auto ExecutionModeBit
: getExecutionModeBits(EM
& EC
.Mask
))
156 appendCodeTemplates(State
, Instr
, ExecutionModeBit
, EC
.Description
,
158 if (!Results
.empty())
162 return llvm::make_error
<BenchmarkFailure
>(
163 "No strategy found to make the execution serial");
164 return std::move(Results
);
167 const char *LatencyBenchmarkRunner::getCounterName() const {
168 if (!State
.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
169 llvm::report_fatal_error("sched model is missing extra processor info!");
170 const char *CounterName
= State
.getSubtargetInfo()
172 .getExtraProcessorInfo()
173 .PfmCounters
.CycleCounter
;
175 llvm::report_fatal_error("sched model does not define a cycle counter");
179 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
181 std::vector
<BenchmarkMeasure
>
182 LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction
&Function
,
183 ScratchSpace
&Scratch
) const {
184 // Cycle measurements include some overhead from the kernel. Repeat the
185 // measure several times and take the minimum value.
186 constexpr const int NumMeasurements
= 30;
187 int64_t MinLatency
= std::numeric_limits
<int64_t>::max();
188 const char *CounterName
= getCounterName();
190 llvm::report_fatal_error("could not determine cycle counter name");
191 const pfm::PerfEvent
CyclesPerfEvent(CounterName
);
192 if (!CyclesPerfEvent
.valid())
193 llvm::report_fatal_error("invalid perf event");
194 for (size_t I
= 0; I
< NumMeasurements
; ++I
) {
195 pfm::Counter
Counter(CyclesPerfEvent
);
198 Function(Scratch
.ptr());
200 const int64_t Value
= Counter
.read();
201 if (Value
< MinLatency
)
204 return {BenchmarkMeasure::Create("latency", MinLatency
)};
207 } // namespace exegesis