Added function to set a register to a particular value + tests.
[llvm-core.git] / tools / llvm-exegesis / lib / X86 / Target.cpp
blobe682b98bd86872f3fdb8072fd9a3704b096bb656
1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "../Target.h"
11 #include "../Latency.h"
12 #include "../Uops.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
20 namespace exegesis {
22 namespace {
24 // Common code for X86 Uops and Latency runners.
25 template <typename Impl> class X86SnippetGenerator : public Impl {
26 using Impl::Impl;
28 llvm::Expected<CodeTemplate>
29 generateCodeTemplate(unsigned Opcode) const override {
30 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo = this->State.getInstrInfo();
32 const auto OpcodeName = InstrInfo.getName(Opcode);
33 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34 OpcodeName.startswith("ADJCALLSTACK")) {
35 return llvm::make_error<BenchmarkFailure>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
39 // Handle X87.
40 const auto &InstrDesc = InstrInfo.get(Opcode);
41 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42 const Instruction Instr(InstrDesc, this->RATC);
43 switch (FPInstClass) {
44 case llvm::X86II::NotFP:
45 break;
46 case llvm::X86II::ZeroArgFP:
47 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
48 case llvm::X86II::OneArgFP:
49 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
50 case llvm::X86II::OneArgFPRW:
51 case llvm::X86II::TwoArgFP: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
57 return this->generateSelfAliasingCodeTemplate(Instr);
59 case llvm::X86II::CompareFP:
60 return Impl::handleCompareFP(Instr);
61 case llvm::X86II::CondMovFP:
62 return Impl::handleCondMovFP(Instr);
63 case llvm::X86II::SpecialFP:
64 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
65 default:
66 llvm_unreachable("Unknown FP Type!");
69 // Fallback to generic implementation.
70 return Impl::Base::generateCodeTemplate(Opcode);
74 class X86LatencyImpl : public LatencySnippetGenerator {
75 protected:
76 using Base = LatencySnippetGenerator;
77 using Base::Base;
78 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
79 return llvm::make_error<SnippetGeneratorFailure>(
80 "Unsupported x87 CompareFP");
82 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
83 return llvm::make_error<SnippetGeneratorFailure>(
84 "Unsupported x87 CondMovFP");
88 class X86UopsImpl : public UopsSnippetGenerator {
89 protected:
90 using Base = UopsSnippetGenerator;
91 using Base::Base;
92 // We can compute uops for any FP instruction that does not grow or shrink the
93 // stack (either do not touch the stack or push as much as they pop).
94 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
95 return generateUnconstrainedCodeTemplate(
96 Instr, "instruction does not grow/shrink the FP stack");
98 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
99 return generateUnconstrainedCodeTemplate(
100 Instr, "instruction does not grow/shrink the FP stack");
104 static unsigned GetLoadImmediateOpcode(const llvm::APInt &Value) {
105 switch (Value.getBitWidth()) {
106 case 8:
107 return llvm::X86::MOV8ri;
108 case 16:
109 return llvm::X86::MOV16ri;
110 case 32:
111 return llvm::X86::MOV32ri;
112 case 64:
113 return llvm::X86::MOV64ri;
115 llvm_unreachable("Invalid Value Width");
118 static llvm::MCInst loadImmediate(unsigned Reg, const llvm::APInt &Value,
119 unsigned MaxBitWidth) {
120 assert(Value.getBitWidth() <= MaxBitWidth && "Value too big to fit register");
121 return llvm::MCInstBuilder(GetLoadImmediateOpcode(Value))
122 .addReg(Reg)
123 .addImm(Value.getZExtValue());
126 // Allocates scratch memory on the stack.
127 static llvm::MCInst allocateStackSpace(unsigned Bytes) {
128 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
129 .addReg(llvm::X86::RSP)
130 .addReg(llvm::X86::RSP)
131 .addImm(Bytes);
134 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
135 static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
136 uint64_t Imm) {
137 return llvm::MCInstBuilder(MovOpcode)
138 // Address = ESP
139 .addReg(llvm::X86::RSP) // BaseReg
140 .addImm(1) // ScaleAmt
141 .addReg(0) // IndexReg
142 .addImm(OffsetBytes) // Disp
143 .addReg(0) // Segment
144 // Immediate.
145 .addImm(Imm);
148 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
149 static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
150 return llvm::MCInstBuilder(RMOpcode)
151 .addReg(Reg)
152 // Address = ESP
153 .addReg(llvm::X86::RSP) // BaseReg
154 .addImm(1) // ScaleAmt
155 .addReg(0) // IndexReg
156 .addImm(0) // Disp
157 .addReg(0); // Segment
160 // Releases scratch memory.
161 static llvm::MCInst releaseStackSpace(unsigned Bytes) {
162 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
163 .addReg(llvm::X86::RSP)
164 .addReg(llvm::X86::RSP)
165 .addImm(Bytes);
168 struct ConstantInliner {
169 explicit ConstantInliner(const llvm::APInt &Constant)
170 : StackSize(Constant.getBitWidth() / 8) {
171 assert(Constant.getBitWidth() % 8 == 0 && "Must be a multiple of 8");
172 add(allocateStackSpace(StackSize));
173 size_t ByteOffset = 0;
174 for (; StackSize - ByteOffset >= 4; ByteOffset += 4)
175 add(fillStackSpace(
176 llvm::X86::MOV32mi, ByteOffset,
177 Constant.extractBits(32, ByteOffset * 8).getZExtValue()));
178 if (StackSize - ByteOffset >= 2) {
179 add(fillStackSpace(
180 llvm::X86::MOV16mi, ByteOffset,
181 Constant.extractBits(16, ByteOffset * 8).getZExtValue()));
182 ByteOffset += 2;
184 if (StackSize - ByteOffset >= 1)
185 add(fillStackSpace(
186 llvm::X86::MOV8mi, ByteOffset,
187 Constant.extractBits(8, ByteOffset * 8).getZExtValue()));
190 std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned Opcode,
191 unsigned BitWidth) {
192 assert(StackSize * 8 == BitWidth && "Value does not have the correct size");
193 add(loadToReg(Reg, Opcode));
194 add(releaseStackSpace(StackSize));
195 return std::move(Instructions);
198 std::vector<llvm::MCInst> loadX87AndFinalize(unsigned Reg, unsigned Opcode,
199 unsigned BitWidth) {
200 assert(StackSize * 8 == BitWidth && "Value does not have the correct size");
201 add(llvm::MCInstBuilder(Opcode)
202 .addReg(llvm::X86::RSP) // BaseReg
203 .addImm(1) // ScaleAmt
204 .addReg(0) // IndexReg
205 .addImm(0) // Disp
206 .addReg(0)); // Segment
207 if (Reg != llvm::X86::ST0)
208 add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
209 add(releaseStackSpace(StackSize));
210 return std::move(Instructions);
213 std::vector<llvm::MCInst> popFlagAndFinalize() {
214 assert(StackSize * 8 == 32 && "Value does not have the correct size");
215 add(llvm::MCInstBuilder(llvm::X86::POPF64));
216 return std::move(Instructions);
219 private:
220 ConstantInliner &add(const llvm::MCInst &Inst) {
221 Instructions.push_back(Inst);
222 return *this;
225 const size_t StackSize;
226 std::vector<llvm::MCInst> Instructions;
229 class ExegesisX86Target : public ExegesisTarget {
230 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
231 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
232 PM.add(llvm::createX86FloatingPointStackifierPass());
235 unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
236 if (!TT.isArch64Bit()) {
237 // FIXME: This would require popping from the stack, so we would have to
238 // add some additional setup code.
239 return 0;
241 return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
244 unsigned getMaxMemoryAccessSize() const override { return 64; }
246 void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
247 unsigned Offset) const override {
248 // FIXME: For instructions that read AND write to memory, we use the same
249 // value for input and output.
250 for (size_t I = 0, E = IB.Instr.Operands.size(); I < E; ++I) {
251 const Operand *Op = &IB.Instr.Operands[I];
252 if (Op->IsExplicit && Op->IsMem) {
253 // Case 1: 5-op memory.
254 assert((I + 5 <= E) && "x86 memory references are always 5 ops");
255 IB.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg
256 Op = &IB.Instr.Operands[++I];
257 assert(Op->IsMem);
258 assert(Op->IsExplicit);
259 IB.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt
260 Op = &IB.Instr.Operands[++I];
261 assert(Op->IsMem);
262 assert(Op->IsExplicit);
263 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg
264 Op = &IB.Instr.Operands[++I];
265 assert(Op->IsMem);
266 assert(Op->IsExplicit);
267 IB.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp
268 Op = &IB.Instr.Operands[++I];
269 assert(Op->IsMem);
270 assert(Op->IsExplicit);
271 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment
272 // Case2: segment:index addressing. We assume that ES is 0.
277 std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
278 const llvm::APInt &Value,
279 unsigned Reg) const override {
280 if (llvm::X86::GR8RegClass.contains(Reg))
281 return {loadImmediate(Reg, Value, 8)};
282 if (llvm::X86::GR16RegClass.contains(Reg))
283 return {loadImmediate(Reg, Value, 16)};
284 if (llvm::X86::GR32RegClass.contains(Reg))
285 return {loadImmediate(Reg, Value, 32)};
286 if (llvm::X86::GR64RegClass.contains(Reg))
287 return {loadImmediate(Reg, Value, 64)};
288 ConstantInliner CI(Value);
289 if (llvm::X86::VR64RegClass.contains(Reg))
290 return CI.loadAndFinalize(Reg, llvm::X86::MMX_MOVQ64rm, 64);
291 if (llvm::X86::VR128XRegClass.contains(Reg)) {
292 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
293 return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Z128rm, 128);
294 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
295 return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQUrm, 128);
296 return CI.loadAndFinalize(Reg, llvm::X86::MOVDQUrm, 128);
298 if (llvm::X86::VR256XRegClass.contains(Reg)) {
299 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
300 return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Z256rm, 256);
301 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
302 return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQUYrm, 256);
304 if (llvm::X86::VR512RegClass.contains(Reg))
305 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
306 return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Zrm, 512);
307 if (llvm::X86::RSTRegClass.contains(Reg)) {
308 if (Value.getBitWidth() == 32)
309 return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F32m, 32);
310 if (Value.getBitWidth() == 64)
311 return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F64m, 64);
312 if (Value.getBitWidth() == 80)
313 return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F80m, 80);
315 if (Reg == llvm::X86::EFLAGS)
316 return CI.popFlagAndFinalize();
317 llvm_unreachable("Not yet implemented");
320 std::unique_ptr<SnippetGenerator>
321 createLatencySnippetGenerator(const LLVMState &State) const override {
322 return llvm::make_unique<X86SnippetGenerator<X86LatencyImpl>>(State);
325 std::unique_ptr<SnippetGenerator>
326 createUopsSnippetGenerator(const LLVMState &State) const override {
327 return llvm::make_unique<X86SnippetGenerator<X86UopsImpl>>(State);
330 bool matchesArch(llvm::Triple::ArchType Arch) const override {
331 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
335 } // namespace
337 static ExegesisTarget *getTheExegesisX86Target() {
338 static ExegesisX86Target Target;
339 return &Target;
342 void InitializeX86ExegesisTarget() {
343 ExegesisTarget::registerTarget(getTheExegesisX86Target());
346 } // namespace exegesis