[llvm-exegesis] Mark x86 segment register instructions as unsupported.
[llvm-complete.git] / tools / llvm-exegesis / lib / X86 / Target.cpp
blobdb1a23b74ccf50b9c1fd10763ebf938157386015
1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "../Target.h"
11 #include "../Latency.h"
12 #include "../Uops.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
20 namespace exegesis {
22 namespace {
24 // A chunk of instruction's operands that represents a single memory access.
25 struct MemoryOperandRange {
26 MemoryOperandRange(llvm::ArrayRef<Operand> Operands) : Ops(Operands) {}
28 // Setup InstructionTemplate so the memory access represented by this object
29 // points to [reg] + offset.
30 void fillOrDie(InstructionTemplate &IT, unsigned Reg, unsigned Offset) {
31 switch (Ops.size()) {
32 case 5:
33 IT.getValueFor(Ops[0]) = llvm::MCOperand::createReg(Reg); // BaseReg
34 IT.getValueFor(Ops[1]) = llvm::MCOperand::createImm(1); // ScaleAmt
35 IT.getValueFor(Ops[2]) = llvm::MCOperand::createReg(0); // IndexReg
36 IT.getValueFor(Ops[3]) = llvm::MCOperand::createImm(Offset); // Disp
37 IT.getValueFor(Ops[4]) = llvm::MCOperand::createReg(0); // Segment
38 break;
39 default:
40 llvm::errs() << Ops.size() << "-op are not handled right now ("
41 << IT.Instr.Name << ")\n";
42 llvm_unreachable("Invalid memory configuration");
46 // Returns whether Range can be filled.
47 static bool isValid(const MemoryOperandRange &Range) {
48 return Range.Ops.size() == 5;
51 // Returns whether Op is a valid memory operand.
52 static bool isMemoryOperand(const Operand &Op) {
53 return Op.isMemory() && Op.isExplicit();
56 llvm::ArrayRef<Operand> Ops;
59 // X86 memory access involve non constant number of operands, this function
60 // extracts contiguous memory operands into MemoryOperandRange so it's easier to
61 // check and fill.
62 static std::vector<MemoryOperandRange>
63 getMemoryOperandRanges(llvm::ArrayRef<Operand> Operands) {
64 std::vector<MemoryOperandRange> Result;
65 while (!Operands.empty()) {
66 Operands = Operands.drop_until(MemoryOperandRange::isMemoryOperand);
67 auto MemoryOps = Operands.take_while(MemoryOperandRange::isMemoryOperand);
68 if (!MemoryOps.empty())
69 Result.push_back(MemoryOps);
70 Operands = Operands.drop_front(MemoryOps.size());
72 return Result;
75 static llvm::Error IsInvalidOpcode(const Instruction &Instr) {
76 const auto OpcodeName = Instr.Name;
77 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
78 OpcodeName.startswith("ADJCALLSTACK"))
79 return llvm::make_error<BenchmarkFailure>(
80 "unsupported opcode: Push/Pop/AdjCallStack");
81 const bool ValidMemoryOperands = llvm::all_of(
82 getMemoryOperandRanges(Instr.Operands), MemoryOperandRange::isValid);
83 if (!ValidMemoryOperands)
84 return llvm::make_error<BenchmarkFailure>(
85 "unsupported opcode: non uniform memory access");
86 // We do not handle instructions with OPERAND_PCREL.
87 for (const Operand &Op : Instr.Operands)
88 if (Op.isExplicit() &&
89 Op.getExplicitOperandInfo().OperandType == llvm::MCOI::OPERAND_PCREL)
90 return llvm::make_error<BenchmarkFailure>(
91 "unsupported opcode: PC relative operand");
92 for (const Operand &Op : Instr.Operands)
93 if (Op.isReg() && Op.isExplicit() &&
94 Op.getExplicitOperandInfo().RegClass ==
95 llvm::X86::SEGMENT_REGRegClassID)
96 return llvm::make_error<BenchmarkFailure>(
97 "unsupported opcode: access segment memory");
98 // We do not handle second-form X87 instructions. We only handle first-form
99 // ones (_Fp), see comment in X86InstrFPStack.td.
100 for (const Operand &Op : Instr.Operands)
101 if (Op.isReg() && Op.isExplicit() &&
102 Op.getExplicitOperandInfo().RegClass == llvm::X86::RSTRegClassID)
103 return llvm::make_error<BenchmarkFailure>(
104 "unsupported second-form X87 instruction");
105 return llvm::Error::success();
108 static unsigned GetX86FPFlags(const Instruction &Instr) {
109 return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
112 class X86LatencySnippetGenerator : public LatencySnippetGenerator {
113 public:
114 using LatencySnippetGenerator::LatencySnippetGenerator;
116 llvm::Expected<std::vector<CodeTemplate>>
117 generateCodeTemplates(const Instruction &Instr) const override {
118 if (auto E = IsInvalidOpcode(Instr))
119 return std::move(E);
121 switch (GetX86FPFlags(Instr)) {
122 case llvm::X86II::NotFP:
123 return LatencySnippetGenerator::generateCodeTemplates(Instr);
124 case llvm::X86II::ZeroArgFP:
125 case llvm::X86II::OneArgFP:
126 case llvm::X86II::SpecialFP:
127 case llvm::X86II::CompareFP:
128 case llvm::X86II::CondMovFP:
129 return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
130 case llvm::X86II::OneArgFPRW:
131 case llvm::X86II::TwoArgFP:
132 // These are instructions like
133 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
134 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
135 // They are intrinsically serial and do not modify the state of the stack.
136 return generateSelfAliasingCodeTemplates(Instr);
137 default:
138 llvm_unreachable("Unknown FP Type!");
143 class X86UopsSnippetGenerator : public UopsSnippetGenerator {
144 public:
145 using UopsSnippetGenerator::UopsSnippetGenerator;
147 llvm::Expected<std::vector<CodeTemplate>>
148 generateCodeTemplates(const Instruction &Instr) const override {
149 if (auto E = IsInvalidOpcode(Instr))
150 return std::move(E);
152 switch (GetX86FPFlags(Instr)) {
153 case llvm::X86II::NotFP:
154 return UopsSnippetGenerator::generateCodeTemplates(Instr);
155 case llvm::X86II::ZeroArgFP:
156 case llvm::X86II::OneArgFP:
157 case llvm::X86II::SpecialFP:
158 return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
159 case llvm::X86II::OneArgFPRW:
160 case llvm::X86II::TwoArgFP:
161 // These are instructions like
162 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
163 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
164 // They are intrinsically serial and do not modify the state of the stack.
165 // We generate the same code for latency and uops.
166 return generateSelfAliasingCodeTemplates(Instr);
167 case llvm::X86II::CompareFP:
168 case llvm::X86II::CondMovFP:
169 // We can compute uops for any FP instruction that does not grow or shrink
170 // the stack (either do not touch the stack or push as much as they pop).
171 return generateUnconstrainedCodeTemplates(
172 Instr, "instruction does not grow/shrink the FP stack");
173 default:
174 llvm_unreachable("Unknown FP Type!");
179 static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth) {
180 switch (RegBitWidth) {
181 case 8:
182 return llvm::X86::MOV8ri;
183 case 16:
184 return llvm::X86::MOV16ri;
185 case 32:
186 return llvm::X86::MOV32ri;
187 case 64:
188 return llvm::X86::MOV64ri;
190 llvm_unreachable("Invalid Value Width");
193 // Generates instruction to load an immediate value into a register.
194 static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
195 const llvm::APInt &Value) {
196 if (Value.getBitWidth() > RegBitWidth)
197 llvm_unreachable("Value must fit in the Register");
198 return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth))
199 .addReg(Reg)
200 .addImm(Value.getZExtValue());
203 // Allocates scratch memory on the stack.
204 static llvm::MCInst allocateStackSpace(unsigned Bytes) {
205 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
206 .addReg(llvm::X86::RSP)
207 .addReg(llvm::X86::RSP)
208 .addImm(Bytes);
211 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
212 static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
213 uint64_t Imm) {
214 return llvm::MCInstBuilder(MovOpcode)
215 // Address = ESP
216 .addReg(llvm::X86::RSP) // BaseReg
217 .addImm(1) // ScaleAmt
218 .addReg(0) // IndexReg
219 .addImm(OffsetBytes) // Disp
220 .addReg(0) // Segment
221 // Immediate.
222 .addImm(Imm);
225 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
226 static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
227 return llvm::MCInstBuilder(RMOpcode)
228 .addReg(Reg)
229 // Address = ESP
230 .addReg(llvm::X86::RSP) // BaseReg
231 .addImm(1) // ScaleAmt
232 .addReg(0) // IndexReg
233 .addImm(0) // Disp
234 .addReg(0); // Segment
237 // Releases scratch memory.
238 static llvm::MCInst releaseStackSpace(unsigned Bytes) {
239 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
240 .addReg(llvm::X86::RSP)
241 .addReg(llvm::X86::RSP)
242 .addImm(Bytes);
245 // Reserves some space on the stack, fills it with the content of the provided
246 // constant and provide methods to load the stack value into a register.
247 struct ConstantInliner {
248 explicit ConstantInliner(const llvm::APInt &Constant) : Constant_(Constant) {}
250 std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
251 unsigned Opcode) {
252 assert((RegBitWidth & 7) == 0 &&
253 "RegBitWidth must be a multiple of 8 bits");
254 initStack(RegBitWidth / 8);
255 add(loadToReg(Reg, Opcode));
256 add(releaseStackSpace(RegBitWidth / 8));
257 return std::move(Instructions);
260 std::vector<llvm::MCInst> loadX87STAndFinalize(unsigned Reg) {
261 initStack(kF80Bytes);
262 add(llvm::MCInstBuilder(llvm::X86::LD_F80m)
263 // Address = ESP
264 .addReg(llvm::X86::RSP) // BaseReg
265 .addImm(1) // ScaleAmt
266 .addReg(0) // IndexReg
267 .addImm(0) // Disp
268 .addReg(0)); // Segment
269 if (Reg != llvm::X86::ST0)
270 add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
271 add(releaseStackSpace(kF80Bytes));
272 return std::move(Instructions);
275 std::vector<llvm::MCInst> loadX87FPAndFinalize(unsigned Reg) {
276 initStack(kF80Bytes);
277 add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m)
278 .addReg(Reg)
279 // Address = ESP
280 .addReg(llvm::X86::RSP) // BaseReg
281 .addImm(1) // ScaleAmt
282 .addReg(0) // IndexReg
283 .addImm(0) // Disp
284 .addReg(0)); // Segment
285 add(releaseStackSpace(kF80Bytes));
286 return std::move(Instructions);
289 std::vector<llvm::MCInst> popFlagAndFinalize() {
290 initStack(8);
291 add(llvm::MCInstBuilder(llvm::X86::POPF64));
292 return std::move(Instructions);
295 private:
296 static constexpr const unsigned kF80Bytes = 10; // 80 bits.
298 ConstantInliner &add(const llvm::MCInst &Inst) {
299 Instructions.push_back(Inst);
300 return *this;
303 void initStack(unsigned Bytes) {
304 assert(Constant_.getBitWidth() <= Bytes * 8 &&
305 "Value does not have the correct size");
306 const llvm::APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
307 ? Constant_.sext(Bytes * 8)
308 : Constant_;
309 add(allocateStackSpace(Bytes));
310 size_t ByteOffset = 0;
311 for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
312 add(fillStackSpace(
313 llvm::X86::MOV32mi, ByteOffset,
314 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
315 if (Bytes - ByteOffset >= 2) {
316 add(fillStackSpace(
317 llvm::X86::MOV16mi, ByteOffset,
318 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
319 ByteOffset += 2;
321 if (Bytes - ByteOffset >= 1)
322 add(fillStackSpace(
323 llvm::X86::MOV8mi, ByteOffset,
324 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
327 llvm::APInt Constant_;
328 std::vector<llvm::MCInst> Instructions;
331 class ExegesisX86Target : public ExegesisTarget {
332 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
333 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
334 PM.add(llvm::createX86FloatingPointStackifierPass());
337 unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
338 if (!TT.isArch64Bit()) {
339 // FIXME: This would require popping from the stack, so we would have to
340 // add some additional setup code.
341 return 0;
343 return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
346 unsigned getMaxMemoryAccessSize() const override { return 64; }
348 void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
349 unsigned Offset) const override {
350 // FIXME: For instructions that read AND write to memory, we use the same
351 // value for input and output.
352 for (auto &MemoryRange : getMemoryOperandRanges(IT.Instr.Operands))
353 MemoryRange.fillOrDie(IT, Reg, Offset);
356 std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
357 unsigned Reg,
358 const llvm::APInt &Value) const override {
359 if (llvm::X86::GR8RegClass.contains(Reg))
360 return {loadImmediate(Reg, 8, Value)};
361 if (llvm::X86::GR16RegClass.contains(Reg))
362 return {loadImmediate(Reg, 16, Value)};
363 if (llvm::X86::GR32RegClass.contains(Reg))
364 return {loadImmediate(Reg, 32, Value)};
365 if (llvm::X86::GR64RegClass.contains(Reg))
366 return {loadImmediate(Reg, 64, Value)};
367 ConstantInliner CI(Value);
368 if (llvm::X86::VR64RegClass.contains(Reg))
369 return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
370 if (llvm::X86::VR128XRegClass.contains(Reg)) {
371 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
372 return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
373 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
374 return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
375 return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
377 if (llvm::X86::VR256XRegClass.contains(Reg)) {
378 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
379 return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
380 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
381 return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
383 if (llvm::X86::VR512RegClass.contains(Reg))
384 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
385 return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
386 if (llvm::X86::RSTRegClass.contains(Reg)) {
387 return CI.loadX87STAndFinalize(Reg);
389 if (llvm::X86::RFP32RegClass.contains(Reg) ||
390 llvm::X86::RFP64RegClass.contains(Reg) ||
391 llvm::X86::RFP80RegClass.contains(Reg)) {
392 return CI.loadX87FPAndFinalize(Reg);
394 if (Reg == llvm::X86::EFLAGS)
395 return CI.popFlagAndFinalize();
396 return {}; // Not yet implemented.
399 std::unique_ptr<SnippetGenerator>
400 createLatencySnippetGenerator(const LLVMState &State) const override {
401 return llvm::make_unique<X86LatencySnippetGenerator>(State);
404 std::unique_ptr<SnippetGenerator>
405 createUopsSnippetGenerator(const LLVMState &State) const override {
406 return llvm::make_unique<X86UopsSnippetGenerator>(State);
409 bool matchesArch(llvm::Triple::ArchType Arch) const override {
410 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
414 } // namespace
416 static ExegesisTarget *getTheExegesisX86Target() {
417 static ExegesisX86Target Target;
418 return &Target;
421 void InitializeX86ExegesisTarget() {
422 ExegesisTarget::registerTarget(getTheExegesisX86Target());
425 } // namespace exegesis