Clang] Fix expansion of response files in -Wp after integrated-cc1 change
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / X86 / Target.cpp
blobcea8af0cf69c0e2a837e9baed49d68e719d129f1
1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "../Target.h"
10 #include "../Error.h"
11 #include "../SerialSnippetGenerator.h"
12 #include "../SnippetGenerator.h"
13 #include "../ParallelSnippetGenerator.h"
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/MC/MCInstBuilder.h"
20 #include "llvm/Support/FormatVariadic.h"
22 namespace llvm {
23 namespace exegesis {
25 // Returns a non-null reason if we cannot handle the memory references in this
26 // instruction.
27 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
28 switch (Instr.Description.TSFlags & X86II::FormMask) {
29 default:
30 llvm_unreachable("Unknown FormMask value");
31 // These have no memory access.
32 case X86II::Pseudo:
33 case X86II::RawFrm:
34 case X86II::AddCCFrm:
35 case X86II::MRMDestReg:
36 case X86II::MRMSrcReg:
37 case X86II::MRMSrcReg4VOp3:
38 case X86II::MRMSrcRegOp4:
39 case X86II::MRMSrcRegCC:
40 case X86II::MRMXrCC:
41 case X86II::MRMXr:
42 case X86II::MRM0r:
43 case X86II::MRM1r:
44 case X86II::MRM2r:
45 case X86II::MRM3r:
46 case X86II::MRM4r:
47 case X86II::MRM5r:
48 case X86II::MRM6r:
49 case X86II::MRM7r:
50 case X86II::MRM_C0:
51 case X86II::MRM_C1:
52 case X86II::MRM_C2:
53 case X86II::MRM_C3:
54 case X86II::MRM_C4:
55 case X86II::MRM_C5:
56 case X86II::MRM_C6:
57 case X86II::MRM_C7:
58 case X86II::MRM_C8:
59 case X86II::MRM_C9:
60 case X86II::MRM_CA:
61 case X86II::MRM_CB:
62 case X86II::MRM_CC:
63 case X86II::MRM_CD:
64 case X86II::MRM_CE:
65 case X86II::MRM_CF:
66 case X86II::MRM_D0:
67 case X86II::MRM_D1:
68 case X86II::MRM_D2:
69 case X86II::MRM_D3:
70 case X86II::MRM_D4:
71 case X86II::MRM_D5:
72 case X86II::MRM_D6:
73 case X86II::MRM_D7:
74 case X86II::MRM_D8:
75 case X86II::MRM_D9:
76 case X86II::MRM_DA:
77 case X86II::MRM_DB:
78 case X86II::MRM_DC:
79 case X86II::MRM_DD:
80 case X86II::MRM_DE:
81 case X86II::MRM_DF:
82 case X86II::MRM_E0:
83 case X86II::MRM_E1:
84 case X86II::MRM_E2:
85 case X86II::MRM_E3:
86 case X86II::MRM_E4:
87 case X86II::MRM_E5:
88 case X86II::MRM_E6:
89 case X86II::MRM_E7:
90 case X86II::MRM_E8:
91 case X86II::MRM_E9:
92 case X86II::MRM_EA:
93 case X86II::MRM_EB:
94 case X86II::MRM_EC:
95 case X86II::MRM_ED:
96 case X86II::MRM_EE:
97 case X86II::MRM_EF:
98 case X86II::MRM_F0:
99 case X86II::MRM_F1:
100 case X86II::MRM_F2:
101 case X86II::MRM_F3:
102 case X86II::MRM_F4:
103 case X86II::MRM_F5:
104 case X86II::MRM_F6:
105 case X86II::MRM_F7:
106 case X86II::MRM_F8:
107 case X86II::MRM_F9:
108 case X86II::MRM_FA:
109 case X86II::MRM_FB:
110 case X86II::MRM_FC:
111 case X86II::MRM_FD:
112 case X86II::MRM_FE:
113 case X86II::MRM_FF:
114 case X86II::RawFrmImm8:
115 return nullptr;
116 case X86II::AddRegFrm:
117 return (Instr.Description.Opcode == X86::POP16r ||
118 Instr.Description.Opcode == X86::POP32r ||
119 Instr.Description.Opcode == X86::PUSH16r ||
120 Instr.Description.Opcode == X86::PUSH32r)
121 ? "unsupported opcode: unsupported memory access"
122 : nullptr;
123 // These access memory and are handled.
124 case X86II::MRMDestMem:
125 case X86II::MRMSrcMem:
126 case X86II::MRMSrcMem4VOp3:
127 case X86II::MRMSrcMemOp4:
128 case X86II::MRMSrcMemCC:
129 case X86II::MRMXmCC:
130 case X86II::MRMXm:
131 case X86II::MRM0m:
132 case X86II::MRM1m:
133 case X86II::MRM2m:
134 case X86II::MRM3m:
135 case X86II::MRM4m:
136 case X86II::MRM5m:
137 case X86II::MRM6m:
138 case X86II::MRM7m:
139 return nullptr;
140 // These access memory and are not handled yet.
141 case X86II::RawFrmImm16:
142 case X86II::RawFrmMemOffs:
143 case X86II::RawFrmSrc:
144 case X86II::RawFrmDst:
145 case X86II::RawFrmDstSrc:
146 return "unsupported opcode: non uniform memory access";
150 // If the opcode is invalid, returns a pointer to a character literal indicating
151 // the reason. nullptr indicates a valid opcode.
152 static const char *isInvalidOpcode(const Instruction &Instr) {
153 const auto OpcodeName = Instr.Name;
154 if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo)
155 return "unsupported opcode: pseudo instruction";
156 if (OpcodeName.startswith("POP") || OpcodeName.startswith("PUSH") ||
157 OpcodeName.startswith("ADJCALLSTACK") || OpcodeName.startswith("LEAVE"))
158 return "unsupported opcode: Push/Pop/AdjCallStack/Leave";
159 if (const auto reason = isInvalidMemoryInstr(Instr))
160 return reason;
161 // We do not handle instructions with OPERAND_PCREL.
162 for (const Operand &Op : Instr.Operands)
163 if (Op.isExplicit() &&
164 Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL)
165 return "unsupported opcode: PC relative operand";
166 // We do not handle second-form X87 instructions. We only handle first-form
167 // ones (_Fp), see comment in X86InstrFPStack.td.
168 for (const Operand &Op : Instr.Operands)
169 if (Op.isReg() && Op.isExplicit() &&
170 Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID)
171 return "unsupported second-form X87 instruction";
172 return nullptr;
175 static unsigned getX86FPFlags(const Instruction &Instr) {
176 return Instr.Description.TSFlags & X86II::FPTypeMask;
179 // Helper to fill a memory operand with a value.
180 static void setMemOp(InstructionTemplate &IT, int OpIdx,
181 const MCOperand &OpVal) {
182 const auto Op = IT.getInstr().Operands[OpIdx];
183 assert(Op.isExplicit() && "invalid memory pattern");
184 IT.getValueFor(Op) = OpVal;
187 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the
188 // addressing base and index registers and returns the LEA destination register.
189 static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
190 const Instruction &Instr, const BitVector &ForbiddenRegisters,
191 const LLVMState &State, const SnippetGenerator::Options &Opts,
192 std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)>
193 RestrictDestRegs) {
194 assert(Instr.Operands.size() == 6 && "invalid LEA");
195 assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 &&
196 "invalid LEA");
198 constexpr const int kDestOp = 0;
199 constexpr const int kBaseOp = 1;
200 constexpr const int kIndexOp = 3;
201 auto PossibleDestRegs =
202 Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
203 remove(PossibleDestRegs, ForbiddenRegisters);
204 auto PossibleBaseRegs =
205 Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
206 remove(PossibleBaseRegs, ForbiddenRegisters);
207 auto PossibleIndexRegs =
208 Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
209 remove(PossibleIndexRegs, ForbiddenRegisters);
211 const auto &RegInfo = State.getRegInfo();
212 std::vector<CodeTemplate> Result;
213 for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
214 for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
215 for (int LogScale = 0; LogScale <= 3; ++LogScale) {
216 // FIXME: Add an option for controlling how we explore immediates.
217 for (const int Disp : {0, 42}) {
218 InstructionTemplate IT(&Instr);
219 const int64_t Scale = 1ull << LogScale;
220 setMemOp(IT, 1, MCOperand::createReg(BaseReg));
221 setMemOp(IT, 2, MCOperand::createImm(Scale));
222 setMemOp(IT, 3, MCOperand::createReg(IndexReg));
223 setMemOp(IT, 4, MCOperand::createImm(Disp));
224 // SegmentReg must be 0 for LEA.
225 setMemOp(IT, 5, MCOperand::createReg(0));
227 // Output reg candidates are selected by the caller.
228 auto PossibleDestRegsNow = PossibleDestRegs;
229 RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow);
230 assert(PossibleDestRegsNow.set_bits().begin() !=
231 PossibleDestRegsNow.set_bits().end() &&
232 "no remaining registers");
233 setMemOp(
234 IT, 0,
235 MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin()));
237 CodeTemplate CT;
238 CT.Instructions.push_back(std::move(IT));
239 CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
240 RegInfo.getName(IndexReg), Scale, Disp)
241 .str();
242 Result.push_back(std::move(CT));
243 if (Result.size() >= Opts.MaxConfigsPerOpcode)
244 return std::move(Result);
250 return std::move(Result);
253 namespace {
254 class X86SerialSnippetGenerator : public SerialSnippetGenerator {
255 public:
256 using SerialSnippetGenerator::SerialSnippetGenerator;
258 Expected<std::vector<CodeTemplate>>
259 generateCodeTemplates(const Instruction &Instr,
260 const BitVector &ForbiddenRegisters) const override;
262 } // namespace
264 Expected<std::vector<CodeTemplate>>
265 X86SerialSnippetGenerator::generateCodeTemplates(
266 const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
267 if (const auto reason = isInvalidOpcode(Instr))
268 return make_error<Failure>(reason);
270 // LEA gets special attention.
271 const auto Opcode = Instr.Description.getOpcode();
272 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
273 return generateLEATemplatesCommon(
274 Instr, ForbiddenRegisters, State, Opts,
275 [this](unsigned BaseReg, unsigned IndexReg,
276 BitVector &CandidateDestRegs) {
277 // We just select a destination register that aliases the base
278 // register.
279 CandidateDestRegs &=
280 State.getRATC().getRegister(BaseReg).aliasedBits();
284 if (Instr.hasMemoryOperands())
285 return make_error<Failure>(
286 "unsupported memory operand in latency measurements");
288 switch (getX86FPFlags(Instr)) {
289 case X86II::NotFP:
290 return SerialSnippetGenerator::generateCodeTemplates(Instr,
291 ForbiddenRegisters);
292 case X86II::ZeroArgFP:
293 case X86II::OneArgFP:
294 case X86II::SpecialFP:
295 case X86II::CompareFP:
296 case X86II::CondMovFP:
297 return make_error<Failure>("Unsupported x87 Instruction");
298 case X86II::OneArgFPRW:
299 case X86II::TwoArgFP:
300 // These are instructions like
301 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
302 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
303 // They are intrinsically serial and do not modify the state of the stack.
304 return generateSelfAliasingCodeTemplates(Instr);
305 default:
306 llvm_unreachable("Unknown FP Type!");
310 namespace {
311 class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
312 public:
313 using ParallelSnippetGenerator::ParallelSnippetGenerator;
315 Expected<std::vector<CodeTemplate>>
316 generateCodeTemplates(const Instruction &Instr,
317 const BitVector &ForbiddenRegisters) const override;
320 } // namespace
322 Expected<std::vector<CodeTemplate>>
323 X86ParallelSnippetGenerator::generateCodeTemplates(
324 const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
325 if (const auto reason = isInvalidOpcode(Instr))
326 return make_error<Failure>(reason);
328 // LEA gets special attention.
329 const auto Opcode = Instr.Description.getOpcode();
330 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
331 return generateLEATemplatesCommon(
332 Instr, ForbiddenRegisters, State, Opts,
333 [this](unsigned BaseReg, unsigned IndexReg,
334 BitVector &CandidateDestRegs) {
335 // Any destination register that is not used for addressing is fine.
336 remove(CandidateDestRegs,
337 State.getRATC().getRegister(BaseReg).aliasedBits());
338 remove(CandidateDestRegs,
339 State.getRATC().getRegister(IndexReg).aliasedBits());
343 switch (getX86FPFlags(Instr)) {
344 case X86II::NotFP:
345 return ParallelSnippetGenerator::generateCodeTemplates(Instr,
346 ForbiddenRegisters);
347 case X86II::ZeroArgFP:
348 case X86II::OneArgFP:
349 case X86II::SpecialFP:
350 return make_error<Failure>("Unsupported x87 Instruction");
351 case X86II::OneArgFPRW:
352 case X86II::TwoArgFP:
353 // These are instructions like
354 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
355 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
356 // They are intrinsically serial and do not modify the state of the stack.
357 // We generate the same code for latency and uops.
358 return generateSelfAliasingCodeTemplates(Instr);
359 case X86II::CompareFP:
360 case X86II::CondMovFP:
361 // We can compute uops for any FP instruction that does not grow or shrink
362 // the stack (either do not touch the stack or push as much as they pop).
363 return generateUnconstrainedCodeTemplates(
364 Instr, "instruction does not grow/shrink the FP stack");
365 default:
366 llvm_unreachable("Unknown FP Type!");
370 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
371 switch (RegBitWidth) {
372 case 8:
373 return X86::MOV8ri;
374 case 16:
375 return X86::MOV16ri;
376 case 32:
377 return X86::MOV32ri;
378 case 64:
379 return X86::MOV64ri;
381 llvm_unreachable("Invalid Value Width");
384 // Generates instruction to load an immediate value into a register.
385 static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
386 const APInt &Value) {
387 if (Value.getBitWidth() > RegBitWidth)
388 llvm_unreachable("Value must fit in the Register");
389 return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
390 .addReg(Reg)
391 .addImm(Value.getZExtValue());
394 // Allocates scratch memory on the stack.
395 static MCInst allocateStackSpace(unsigned Bytes) {
396 return MCInstBuilder(X86::SUB64ri8)
397 .addReg(X86::RSP)
398 .addReg(X86::RSP)
399 .addImm(Bytes);
402 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
403 static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
404 uint64_t Imm) {
405 return MCInstBuilder(MovOpcode)
406 // Address = ESP
407 .addReg(X86::RSP) // BaseReg
408 .addImm(1) // ScaleAmt
409 .addReg(0) // IndexReg
410 .addImm(OffsetBytes) // Disp
411 .addReg(0) // Segment
412 // Immediate.
413 .addImm(Imm);
416 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
417 static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
418 return MCInstBuilder(RMOpcode)
419 .addReg(Reg)
420 // Address = ESP
421 .addReg(X86::RSP) // BaseReg
422 .addImm(1) // ScaleAmt
423 .addReg(0) // IndexReg
424 .addImm(0) // Disp
425 .addReg(0); // Segment
428 // Releases scratch memory.
429 static MCInst releaseStackSpace(unsigned Bytes) {
430 return MCInstBuilder(X86::ADD64ri8)
431 .addReg(X86::RSP)
432 .addReg(X86::RSP)
433 .addImm(Bytes);
436 // Reserves some space on the stack, fills it with the content of the provided
437 // constant and provide methods to load the stack value into a register.
438 namespace {
439 struct ConstantInliner {
440 explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {}
442 std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
443 unsigned Opcode);
445 std::vector<MCInst> loadX87STAndFinalize(unsigned Reg);
447 std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg);
449 std::vector<MCInst> popFlagAndFinalize();
451 std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode,
452 unsigned Value);
454 private:
455 ConstantInliner &add(const MCInst &Inst) {
456 Instructions.push_back(Inst);
457 return *this;
460 void initStack(unsigned Bytes);
462 static constexpr const unsigned kF80Bytes = 10; // 80 bits.
464 APInt Constant_;
465 std::vector<MCInst> Instructions;
467 } // namespace
469 std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg,
470 unsigned RegBitWidth,
471 unsigned Opcode) {
472 assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
473 initStack(RegBitWidth / 8);
474 add(loadToReg(Reg, Opcode));
475 add(releaseStackSpace(RegBitWidth / 8));
476 return std::move(Instructions);
479 std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) {
480 initStack(kF80Bytes);
481 add(MCInstBuilder(X86::LD_F80m)
482 // Address = ESP
483 .addReg(X86::RSP) // BaseReg
484 .addImm(1) // ScaleAmt
485 .addReg(0) // IndexReg
486 .addImm(0) // Disp
487 .addReg(0)); // Segment
488 if (Reg != X86::ST0)
489 add(MCInstBuilder(X86::ST_Frr).addReg(Reg));
490 add(releaseStackSpace(kF80Bytes));
491 return std::move(Instructions);
494 std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) {
495 initStack(kF80Bytes);
496 add(MCInstBuilder(X86::LD_Fp80m)
497 .addReg(Reg)
498 // Address = ESP
499 .addReg(X86::RSP) // BaseReg
500 .addImm(1) // ScaleAmt
501 .addReg(0) // IndexReg
502 .addImm(0) // Disp
503 .addReg(0)); // Segment
504 add(releaseStackSpace(kF80Bytes));
505 return std::move(Instructions);
508 std::vector<MCInst> ConstantInliner::popFlagAndFinalize() {
509 initStack(8);
510 add(MCInstBuilder(X86::POPF64));
511 return std::move(Instructions);
514 std::vector<MCInst>
515 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) {
516 add(allocateStackSpace(4));
517 add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions
518 add(MCInstBuilder(Opcode)
519 // Address = ESP
520 .addReg(X86::RSP) // BaseReg
521 .addImm(1) // ScaleAmt
522 .addReg(0) // IndexReg
523 .addImm(0) // Disp
524 .addReg(0)); // Segment
525 add(releaseStackSpace(4));
526 return std::move(Instructions);
529 void ConstantInliner::initStack(unsigned Bytes) {
530 assert(Constant_.getBitWidth() <= Bytes * 8 &&
531 "Value does not have the correct size");
532 const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
533 ? Constant_.sext(Bytes * 8)
534 : Constant_;
535 add(allocateStackSpace(Bytes));
536 size_t ByteOffset = 0;
537 for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
538 add(fillStackSpace(
539 X86::MOV32mi, ByteOffset,
540 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
541 if (Bytes - ByteOffset >= 2) {
542 add(fillStackSpace(
543 X86::MOV16mi, ByteOffset,
544 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
545 ByteOffset += 2;
547 if (Bytes - ByteOffset >= 1)
548 add(fillStackSpace(
549 X86::MOV8mi, ByteOffset,
550 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
553 #include "X86GenExegesis.inc"
555 namespace {
556 class ExegesisX86Target : public ExegesisTarget {
557 public:
558 ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
560 private:
561 void addTargetSpecificPasses(PassManagerBase &PM) const override;
563 unsigned getScratchMemoryRegister(const Triple &TT) const override;
565 unsigned getLoopCounterRegister(const Triple &) const override;
567 unsigned getMaxMemoryAccessSize() const override { return 64; }
569 void randomizeMCOperand(const Instruction &Instr, const Variable &Var,
570 MCOperand &AssignedValue,
571 const BitVector &ForbiddenRegs) const override;
573 void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
574 unsigned Offset) const override;
576 void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
577 MachineBasicBlock &TargetMBB,
578 const MCInstrInfo &MII) const override;
580 std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
581 const APInt &Value) const override;
583 ArrayRef<unsigned> getUnavailableRegisters() const override {
584 return makeArrayRef(kUnavailableRegisters,
585 sizeof(kUnavailableRegisters) /
586 sizeof(kUnavailableRegisters[0]));
589 bool allowAsBackToBack(const Instruction &Instr) const override {
590 const unsigned Opcode = Instr.Description.Opcode;
591 return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r &&
592 Opcode != X86::LEA64_32r && Opcode != X86::LEA16r;
595 std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
596 const LLVMState &State,
597 const SnippetGenerator::Options &Opts) const override {
598 return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
601 std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
602 const LLVMState &State,
603 const SnippetGenerator::Options &Opts) const override {
604 return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
607 bool matchesArch(Triple::ArchType Arch) const override {
608 return Arch == Triple::x86_64 || Arch == Triple::x86;
611 static const unsigned kUnavailableRegisters[4];
614 // We disable a few registers that cannot be encoded on instructions with a REX
615 // prefix.
616 const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH,
617 X86::CH, X86::DH};
619 // We're using one of R8-R15 because these registers are never hardcoded in
620 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
621 // conflicts.
622 constexpr const unsigned kLoopCounterReg = X86::R8;
624 } // namespace
626 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const {
627 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
628 PM.add(createX86FloatingPointStackifierPass());
631 unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
632 if (!TT.isArch64Bit()) {
633 // FIXME: This would require popping from the stack, so we would have to
634 // add some additional setup code.
635 return 0;
637 return TT.isOSWindows() ? X86::RCX : X86::RDI;
640 unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
641 if (!TT.isArch64Bit()) {
642 return 0;
644 return kLoopCounterReg;
647 void ExegesisX86Target::randomizeMCOperand(
648 const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
649 const BitVector &ForbiddenRegs) const {
650 ExegesisTarget::randomizeMCOperand(Instr, Var, AssignedValue, ForbiddenRegs);
652 const Operand &Op = Instr.getPrimaryOperand(Var);
653 switch (Op.getExplicitOperandInfo().OperandType) {
654 case X86::OperandType::OPERAND_ROUNDING_CONTROL:
655 AssignedValue =
656 MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::NO_EXC));
657 break;
658 case X86::OperandType::OPERAND_COND_CODE:
659 AssignedValue =
660 MCOperand::createImm(randomIndex(X86::CondCode::LAST_VALID_COND));
661 break;
662 default:
663 break;
667 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
668 unsigned Reg,
669 unsigned Offset) const {
670 assert(!isInvalidMemoryInstr(IT.getInstr()) &&
671 "fillMemoryOperands requires a valid memory instruction");
672 int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags);
673 assert(MemOpIdx >= 0 && "invalid memory operand index");
674 // getMemoryOperandNo() ignores tied operands, so we have to add them back.
675 for (unsigned I = 0; I <= static_cast<unsigned>(MemOpIdx); ++I) {
676 const auto &Op = IT.getInstr().Operands[I];
677 if (Op.isTied() && Op.getTiedToIndex() < I) {
678 ++MemOpIdx;
681 setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
682 setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
683 setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
684 setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
685 setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment
688 void ExegesisX86Target::decrementLoopCounterAndJump(
689 MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
690 const MCInstrInfo &MII) const {
691 BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
692 .addDef(kLoopCounterReg)
693 .addUse(kLoopCounterReg)
694 .addImm(-1);
695 BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
696 .addMBB(&TargetMBB)
697 .addImm(X86::COND_NE);
700 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
701 unsigned Reg,
702 const APInt &Value) const {
703 if (X86::GR8RegClass.contains(Reg))
704 return {loadImmediate(Reg, 8, Value)};
705 if (X86::GR16RegClass.contains(Reg))
706 return {loadImmediate(Reg, 16, Value)};
707 if (X86::GR32RegClass.contains(Reg))
708 return {loadImmediate(Reg, 32, Value)};
709 if (X86::GR64RegClass.contains(Reg))
710 return {loadImmediate(Reg, 64, Value)};
711 ConstantInliner CI(Value);
712 if (X86::VR64RegClass.contains(Reg))
713 return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
714 if (X86::VR128XRegClass.contains(Reg)) {
715 if (STI.getFeatureBits()[X86::FeatureAVX512])
716 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm);
717 if (STI.getFeatureBits()[X86::FeatureAVX])
718 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm);
719 return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm);
721 if (X86::VR256XRegClass.contains(Reg)) {
722 if (STI.getFeatureBits()[X86::FeatureAVX512])
723 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm);
724 if (STI.getFeatureBits()[X86::FeatureAVX])
725 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm);
727 if (X86::VR512RegClass.contains(Reg))
728 if (STI.getFeatureBits()[X86::FeatureAVX512])
729 return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm);
730 if (X86::RSTRegClass.contains(Reg)) {
731 return CI.loadX87STAndFinalize(Reg);
733 if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) ||
734 X86::RFP80RegClass.contains(Reg)) {
735 return CI.loadX87FPAndFinalize(Reg);
737 if (Reg == X86::EFLAGS)
738 return CI.popFlagAndFinalize();
739 if (Reg == X86::MXCSR)
740 return CI.loadImplicitRegAndFinalize(
741 STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR,
742 0x1f80);
743 if (Reg == X86::FPCW)
744 return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f);
745 return {}; // Not yet implemented.
748 static ExegesisTarget *getTheExegesisX86Target() {
749 static ExegesisX86Target Target;
750 return &Target;
753 void InitializeX86ExegesisTarget() {
754 ExegesisTarget::registerTarget(getTheExegesisX86Target());
757 } // namespace exegesis
758 } // namespace llvm