1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "../SerialSnippetGenerator.h"
12 #include "../SnippetGenerator.h"
13 #include "../ParallelSnippetGenerator.h"
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "MCTargetDesc/X86MCTargetDesc.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/MC/MCInstBuilder.h"
20 #include "llvm/Support/FormatVariadic.h"
25 // Returns a non-null reason if we cannot handle the memory references in this
27 static const char *isInvalidMemoryInstr(const Instruction
&Instr
) {
28 switch (Instr
.Description
.TSFlags
& X86II::FormMask
) {
30 llvm_unreachable("Unknown FormMask value");
31 // These have no memory access.
35 case X86II::MRMDestReg
:
36 case X86II::MRMSrcReg
:
37 case X86II::MRMSrcReg4VOp3
:
38 case X86II::MRMSrcRegOp4
:
39 case X86II::MRMSrcRegCC
:
114 case X86II::RawFrmImm8
:
116 case X86II::AddRegFrm
:
117 return (Instr
.Description
.Opcode
== X86::POP16r
||
118 Instr
.Description
.Opcode
== X86::POP32r
||
119 Instr
.Description
.Opcode
== X86::PUSH16r
||
120 Instr
.Description
.Opcode
== X86::PUSH32r
)
121 ? "unsupported opcode: unsupported memory access"
123 // These access memory and are handled.
124 case X86II::MRMDestMem
:
125 case X86II::MRMSrcMem
:
126 case X86II::MRMSrcMem4VOp3
:
127 case X86II::MRMSrcMemOp4
:
128 case X86II::MRMSrcMemCC
:
140 // These access memory and are not handled yet.
141 case X86II::RawFrmImm16
:
142 case X86II::RawFrmMemOffs
:
143 case X86II::RawFrmSrc
:
144 case X86II::RawFrmDst
:
145 case X86II::RawFrmDstSrc
:
146 return "unsupported opcode: non uniform memory access";
150 // If the opcode is invalid, returns a pointer to a character literal indicating
151 // the reason. nullptr indicates a valid opcode.
152 static const char *isInvalidOpcode(const Instruction
&Instr
) {
153 const auto OpcodeName
= Instr
.Name
;
154 if ((Instr
.Description
.TSFlags
& X86II::FormMask
) == X86II::Pseudo
)
155 return "unsupported opcode: pseudo instruction";
156 if (OpcodeName
.startswith("POP") || OpcodeName
.startswith("PUSH") ||
157 OpcodeName
.startswith("ADJCALLSTACK") || OpcodeName
.startswith("LEAVE"))
158 return "unsupported opcode: Push/Pop/AdjCallStack/Leave";
159 if (const auto reason
= isInvalidMemoryInstr(Instr
))
161 // We do not handle instructions with OPERAND_PCREL.
162 for (const Operand
&Op
: Instr
.Operands
)
163 if (Op
.isExplicit() &&
164 Op
.getExplicitOperandInfo().OperandType
== MCOI::OPERAND_PCREL
)
165 return "unsupported opcode: PC relative operand";
166 // We do not handle second-form X87 instructions. We only handle first-form
167 // ones (_Fp), see comment in X86InstrFPStack.td.
168 for (const Operand
&Op
: Instr
.Operands
)
169 if (Op
.isReg() && Op
.isExplicit() &&
170 Op
.getExplicitOperandInfo().RegClass
== X86::RSTRegClassID
)
171 return "unsupported second-form X87 instruction";
175 static unsigned getX86FPFlags(const Instruction
&Instr
) {
176 return Instr
.Description
.TSFlags
& X86II::FPTypeMask
;
179 // Helper to fill a memory operand with a value.
180 static void setMemOp(InstructionTemplate
&IT
, int OpIdx
,
181 const MCOperand
&OpVal
) {
182 const auto Op
= IT
.getInstr().Operands
[OpIdx
];
183 assert(Op
.isExplicit() && "invalid memory pattern");
184 IT
.getValueFor(Op
) = OpVal
;
187 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the
188 // addressing base and index registers and returns the LEA destination register.
189 static Expected
<std::vector
<CodeTemplate
>> generateLEATemplatesCommon(
190 const Instruction
&Instr
, const BitVector
&ForbiddenRegisters
,
191 const LLVMState
&State
, const SnippetGenerator::Options
&Opts
,
192 std::function
<void(unsigned, unsigned, BitVector
&CandidateDestRegs
)>
194 assert(Instr
.Operands
.size() == 6 && "invalid LEA");
195 assert(X86II::getMemoryOperandNo(Instr
.Description
.TSFlags
) == 1 &&
198 constexpr const int kDestOp
= 0;
199 constexpr const int kBaseOp
= 1;
200 constexpr const int kIndexOp
= 3;
201 auto PossibleDestRegs
=
202 Instr
.Operands
[kDestOp
].getRegisterAliasing().sourceBits();
203 remove(PossibleDestRegs
, ForbiddenRegisters
);
204 auto PossibleBaseRegs
=
205 Instr
.Operands
[kBaseOp
].getRegisterAliasing().sourceBits();
206 remove(PossibleBaseRegs
, ForbiddenRegisters
);
207 auto PossibleIndexRegs
=
208 Instr
.Operands
[kIndexOp
].getRegisterAliasing().sourceBits();
209 remove(PossibleIndexRegs
, ForbiddenRegisters
);
211 const auto &RegInfo
= State
.getRegInfo();
212 std::vector
<CodeTemplate
> Result
;
213 for (const unsigned BaseReg
: PossibleBaseRegs
.set_bits()) {
214 for (const unsigned IndexReg
: PossibleIndexRegs
.set_bits()) {
215 for (int LogScale
= 0; LogScale
<= 3; ++LogScale
) {
216 // FIXME: Add an option for controlling how we explore immediates.
217 for (const int Disp
: {0, 42}) {
218 InstructionTemplate
IT(&Instr
);
219 const int64_t Scale
= 1ull << LogScale
;
220 setMemOp(IT
, 1, MCOperand::createReg(BaseReg
));
221 setMemOp(IT
, 2, MCOperand::createImm(Scale
));
222 setMemOp(IT
, 3, MCOperand::createReg(IndexReg
));
223 setMemOp(IT
, 4, MCOperand::createImm(Disp
));
224 // SegmentReg must be 0 for LEA.
225 setMemOp(IT
, 5, MCOperand::createReg(0));
227 // Output reg candidates are selected by the caller.
228 auto PossibleDestRegsNow
= PossibleDestRegs
;
229 RestrictDestRegs(BaseReg
, IndexReg
, PossibleDestRegsNow
);
230 assert(PossibleDestRegsNow
.set_bits().begin() !=
231 PossibleDestRegsNow
.set_bits().end() &&
232 "no remaining registers");
235 MCOperand::createReg(*PossibleDestRegsNow
.set_bits().begin()));
238 CT
.Instructions
.push_back(std::move(IT
));
239 CT
.Config
= formatv("{3}(%{0}, %{1}, {2})", RegInfo
.getName(BaseReg
),
240 RegInfo
.getName(IndexReg
), Scale
, Disp
)
242 Result
.push_back(std::move(CT
));
243 if (Result
.size() >= Opts
.MaxConfigsPerOpcode
)
244 return std::move(Result
);
250 return std::move(Result
);
254 class X86SerialSnippetGenerator
: public SerialSnippetGenerator
{
256 using SerialSnippetGenerator::SerialSnippetGenerator
;
258 Expected
<std::vector
<CodeTemplate
>>
259 generateCodeTemplates(const Instruction
&Instr
,
260 const BitVector
&ForbiddenRegisters
) const override
;
264 Expected
<std::vector
<CodeTemplate
>>
265 X86SerialSnippetGenerator::generateCodeTemplates(
266 const Instruction
&Instr
, const BitVector
&ForbiddenRegisters
) const {
267 if (const auto reason
= isInvalidOpcode(Instr
))
268 return make_error
<Failure
>(reason
);
270 // LEA gets special attention.
271 const auto Opcode
= Instr
.Description
.getOpcode();
272 if (Opcode
== X86::LEA64r
|| Opcode
== X86::LEA64_32r
) {
273 return generateLEATemplatesCommon(
274 Instr
, ForbiddenRegisters
, State
, Opts
,
275 [this](unsigned BaseReg
, unsigned IndexReg
,
276 BitVector
&CandidateDestRegs
) {
277 // We just select a destination register that aliases the base
280 State
.getRATC().getRegister(BaseReg
).aliasedBits();
284 if (Instr
.hasMemoryOperands())
285 return make_error
<Failure
>(
286 "unsupported memory operand in latency measurements");
288 switch (getX86FPFlags(Instr
)) {
290 return SerialSnippetGenerator::generateCodeTemplates(Instr
,
292 case X86II::ZeroArgFP
:
293 case X86II::OneArgFP
:
294 case X86II::SpecialFP
:
295 case X86II::CompareFP
:
296 case X86II::CondMovFP
:
297 return make_error
<Failure
>("Unsupported x87 Instruction");
298 case X86II::OneArgFPRW
:
299 case X86II::TwoArgFP
:
300 // These are instructions like
301 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
302 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
303 // They are intrinsically serial and do not modify the state of the stack.
304 return generateSelfAliasingCodeTemplates(Instr
);
306 llvm_unreachable("Unknown FP Type!");
311 class X86ParallelSnippetGenerator
: public ParallelSnippetGenerator
{
313 using ParallelSnippetGenerator::ParallelSnippetGenerator
;
315 Expected
<std::vector
<CodeTemplate
>>
316 generateCodeTemplates(const Instruction
&Instr
,
317 const BitVector
&ForbiddenRegisters
) const override
;
322 Expected
<std::vector
<CodeTemplate
>>
323 X86ParallelSnippetGenerator::generateCodeTemplates(
324 const Instruction
&Instr
, const BitVector
&ForbiddenRegisters
) const {
325 if (const auto reason
= isInvalidOpcode(Instr
))
326 return make_error
<Failure
>(reason
);
328 // LEA gets special attention.
329 const auto Opcode
= Instr
.Description
.getOpcode();
330 if (Opcode
== X86::LEA64r
|| Opcode
== X86::LEA64_32r
) {
331 return generateLEATemplatesCommon(
332 Instr
, ForbiddenRegisters
, State
, Opts
,
333 [this](unsigned BaseReg
, unsigned IndexReg
,
334 BitVector
&CandidateDestRegs
) {
335 // Any destination register that is not used for addressing is fine.
336 remove(CandidateDestRegs
,
337 State
.getRATC().getRegister(BaseReg
).aliasedBits());
338 remove(CandidateDestRegs
,
339 State
.getRATC().getRegister(IndexReg
).aliasedBits());
343 switch (getX86FPFlags(Instr
)) {
345 return ParallelSnippetGenerator::generateCodeTemplates(Instr
,
347 case X86II::ZeroArgFP
:
348 case X86II::OneArgFP
:
349 case X86II::SpecialFP
:
350 return make_error
<Failure
>("Unsupported x87 Instruction");
351 case X86II::OneArgFPRW
:
352 case X86II::TwoArgFP
:
353 // These are instructions like
354 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
355 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
356 // They are intrinsically serial and do not modify the state of the stack.
357 // We generate the same code for latency and uops.
358 return generateSelfAliasingCodeTemplates(Instr
);
359 case X86II::CompareFP
:
360 case X86II::CondMovFP
:
361 // We can compute uops for any FP instruction that does not grow or shrink
362 // the stack (either do not touch the stack or push as much as they pop).
363 return generateUnconstrainedCodeTemplates(
364 Instr
, "instruction does not grow/shrink the FP stack");
366 llvm_unreachable("Unknown FP Type!");
370 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth
) {
371 switch (RegBitWidth
) {
381 llvm_unreachable("Invalid Value Width");
384 // Generates instruction to load an immediate value into a register.
385 static MCInst
loadImmediate(unsigned Reg
, unsigned RegBitWidth
,
386 const APInt
&Value
) {
387 if (Value
.getBitWidth() > RegBitWidth
)
388 llvm_unreachable("Value must fit in the Register");
389 return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth
))
391 .addImm(Value
.getZExtValue());
394 // Allocates scratch memory on the stack.
395 static MCInst
allocateStackSpace(unsigned Bytes
) {
396 return MCInstBuilder(X86::SUB64ri8
)
402 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
403 static MCInst
fillStackSpace(unsigned MovOpcode
, unsigned OffsetBytes
,
405 return MCInstBuilder(MovOpcode
)
407 .addReg(X86::RSP
) // BaseReg
408 .addImm(1) // ScaleAmt
409 .addReg(0) // IndexReg
410 .addImm(OffsetBytes
) // Disp
411 .addReg(0) // Segment
416 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
417 static MCInst
loadToReg(unsigned Reg
, unsigned RMOpcode
) {
418 return MCInstBuilder(RMOpcode
)
421 .addReg(X86::RSP
) // BaseReg
422 .addImm(1) // ScaleAmt
423 .addReg(0) // IndexReg
425 .addReg(0); // Segment
428 // Releases scratch memory.
429 static MCInst
releaseStackSpace(unsigned Bytes
) {
430 return MCInstBuilder(X86::ADD64ri8
)
436 // Reserves some space on the stack, fills it with the content of the provided
437 // constant and provide methods to load the stack value into a register.
439 struct ConstantInliner
{
440 explicit ConstantInliner(const APInt
&Constant
) : Constant_(Constant
) {}
442 std::vector
<MCInst
> loadAndFinalize(unsigned Reg
, unsigned RegBitWidth
,
445 std::vector
<MCInst
> loadX87STAndFinalize(unsigned Reg
);
447 std::vector
<MCInst
> loadX87FPAndFinalize(unsigned Reg
);
449 std::vector
<MCInst
> popFlagAndFinalize();
451 std::vector
<MCInst
> loadImplicitRegAndFinalize(unsigned Opcode
,
455 ConstantInliner
&add(const MCInst
&Inst
) {
456 Instructions
.push_back(Inst
);
460 void initStack(unsigned Bytes
);
462 static constexpr const unsigned kF80Bytes
= 10; // 80 bits.
465 std::vector
<MCInst
> Instructions
;
469 std::vector
<MCInst
> ConstantInliner::loadAndFinalize(unsigned Reg
,
470 unsigned RegBitWidth
,
472 assert((RegBitWidth
& 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
473 initStack(RegBitWidth
/ 8);
474 add(loadToReg(Reg
, Opcode
));
475 add(releaseStackSpace(RegBitWidth
/ 8));
476 return std::move(Instructions
);
479 std::vector
<MCInst
> ConstantInliner::loadX87STAndFinalize(unsigned Reg
) {
480 initStack(kF80Bytes
);
481 add(MCInstBuilder(X86::LD_F80m
)
483 .addReg(X86::RSP
) // BaseReg
484 .addImm(1) // ScaleAmt
485 .addReg(0) // IndexReg
487 .addReg(0)); // Segment
489 add(MCInstBuilder(X86::ST_Frr
).addReg(Reg
));
490 add(releaseStackSpace(kF80Bytes
));
491 return std::move(Instructions
);
494 std::vector
<MCInst
> ConstantInliner::loadX87FPAndFinalize(unsigned Reg
) {
495 initStack(kF80Bytes
);
496 add(MCInstBuilder(X86::LD_Fp80m
)
499 .addReg(X86::RSP
) // BaseReg
500 .addImm(1) // ScaleAmt
501 .addReg(0) // IndexReg
503 .addReg(0)); // Segment
504 add(releaseStackSpace(kF80Bytes
));
505 return std::move(Instructions
);
508 std::vector
<MCInst
> ConstantInliner::popFlagAndFinalize() {
510 add(MCInstBuilder(X86::POPF64
));
511 return std::move(Instructions
);
515 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode
, unsigned Value
) {
516 add(allocateStackSpace(4));
517 add(fillStackSpace(X86::MOV32mi
, 0, Value
)); // Mask all FP exceptions
518 add(MCInstBuilder(Opcode
)
520 .addReg(X86::RSP
) // BaseReg
521 .addImm(1) // ScaleAmt
522 .addReg(0) // IndexReg
524 .addReg(0)); // Segment
525 add(releaseStackSpace(4));
526 return std::move(Instructions
);
529 void ConstantInliner::initStack(unsigned Bytes
) {
530 assert(Constant_
.getBitWidth() <= Bytes
* 8 &&
531 "Value does not have the correct size");
532 const APInt WideConstant
= Constant_
.getBitWidth() < Bytes
* 8
533 ? Constant_
.sext(Bytes
* 8)
535 add(allocateStackSpace(Bytes
));
536 size_t ByteOffset
= 0;
537 for (; Bytes
- ByteOffset
>= 4; ByteOffset
+= 4)
539 X86::MOV32mi
, ByteOffset
,
540 WideConstant
.extractBits(32, ByteOffset
* 8).getZExtValue()));
541 if (Bytes
- ByteOffset
>= 2) {
543 X86::MOV16mi
, ByteOffset
,
544 WideConstant
.extractBits(16, ByteOffset
* 8).getZExtValue()));
547 if (Bytes
- ByteOffset
>= 1)
549 X86::MOV8mi
, ByteOffset
,
550 WideConstant
.extractBits(8, ByteOffset
* 8).getZExtValue()));
553 #include "X86GenExegesis.inc"
556 class ExegesisX86Target
: public ExegesisTarget
{
558 ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters
) {}
561 void addTargetSpecificPasses(PassManagerBase
&PM
) const override
;
563 unsigned getScratchMemoryRegister(const Triple
&TT
) const override
;
565 unsigned getLoopCounterRegister(const Triple
&) const override
;
567 unsigned getMaxMemoryAccessSize() const override
{ return 64; }
569 void randomizeMCOperand(const Instruction
&Instr
, const Variable
&Var
,
570 MCOperand
&AssignedValue
,
571 const BitVector
&ForbiddenRegs
) const override
;
573 void fillMemoryOperands(InstructionTemplate
&IT
, unsigned Reg
,
574 unsigned Offset
) const override
;
576 void decrementLoopCounterAndJump(MachineBasicBlock
&MBB
,
577 MachineBasicBlock
&TargetMBB
,
578 const MCInstrInfo
&MII
) const override
;
580 std::vector
<MCInst
> setRegTo(const MCSubtargetInfo
&STI
, unsigned Reg
,
581 const APInt
&Value
) const override
;
583 ArrayRef
<unsigned> getUnavailableRegisters() const override
{
584 return makeArrayRef(kUnavailableRegisters
,
585 sizeof(kUnavailableRegisters
) /
586 sizeof(kUnavailableRegisters
[0]));
589 bool allowAsBackToBack(const Instruction
&Instr
) const override
{
590 const unsigned Opcode
= Instr
.Description
.Opcode
;
591 return !isInvalidOpcode(Instr
) && Opcode
!= X86::LEA64r
&&
592 Opcode
!= X86::LEA64_32r
&& Opcode
!= X86::LEA16r
;
595 std::unique_ptr
<SnippetGenerator
> createSerialSnippetGenerator(
596 const LLVMState
&State
,
597 const SnippetGenerator::Options
&Opts
) const override
{
598 return std::make_unique
<X86SerialSnippetGenerator
>(State
, Opts
);
601 std::unique_ptr
<SnippetGenerator
> createParallelSnippetGenerator(
602 const LLVMState
&State
,
603 const SnippetGenerator::Options
&Opts
) const override
{
604 return std::make_unique
<X86ParallelSnippetGenerator
>(State
, Opts
);
607 bool matchesArch(Triple::ArchType Arch
) const override
{
608 return Arch
== Triple::x86_64
|| Arch
== Triple::x86
;
611 static const unsigned kUnavailableRegisters
[4];
614 // We disable a few registers that cannot be encoded on instructions with a REX
616 const unsigned ExegesisX86Target::kUnavailableRegisters
[4] = {X86::AH
, X86::BH
,
619 // We're using one of R8-R15 because these registers are never hardcoded in
620 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
622 constexpr const unsigned kLoopCounterReg
= X86::R8
;
626 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase
&PM
) const {
627 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
628 PM
.add(createX86FloatingPointStackifierPass());
631 unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple
&TT
) const {
632 if (!TT
.isArch64Bit()) {
633 // FIXME: This would require popping from the stack, so we would have to
634 // add some additional setup code.
637 return TT
.isOSWindows() ? X86::RCX
: X86::RDI
;
640 unsigned ExegesisX86Target::getLoopCounterRegister(const Triple
&TT
) const {
641 if (!TT
.isArch64Bit()) {
644 return kLoopCounterReg
;
647 void ExegesisX86Target::randomizeMCOperand(
648 const Instruction
&Instr
, const Variable
&Var
, MCOperand
&AssignedValue
,
649 const BitVector
&ForbiddenRegs
) const {
650 ExegesisTarget::randomizeMCOperand(Instr
, Var
, AssignedValue
, ForbiddenRegs
);
652 const Operand
&Op
= Instr
.getPrimaryOperand(Var
);
653 switch (Op
.getExplicitOperandInfo().OperandType
) {
654 case X86::OperandType::OPERAND_ROUNDING_CONTROL
:
656 MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::NO_EXC
));
658 case X86::OperandType::OPERAND_COND_CODE
:
660 MCOperand::createImm(randomIndex(X86::CondCode::LAST_VALID_COND
));
667 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate
&IT
,
669 unsigned Offset
) const {
670 assert(!isInvalidMemoryInstr(IT
.getInstr()) &&
671 "fillMemoryOperands requires a valid memory instruction");
672 int MemOpIdx
= X86II::getMemoryOperandNo(IT
.getInstr().Description
.TSFlags
);
673 assert(MemOpIdx
>= 0 && "invalid memory operand index");
674 // getMemoryOperandNo() ignores tied operands, so we have to add them back.
675 for (unsigned I
= 0; I
<= static_cast<unsigned>(MemOpIdx
); ++I
) {
676 const auto &Op
= IT
.getInstr().Operands
[I
];
677 if (Op
.isTied() && Op
.getTiedToIndex() < I
) {
681 setMemOp(IT
, MemOpIdx
+ 0, MCOperand::createReg(Reg
)); // BaseReg
682 setMemOp(IT
, MemOpIdx
+ 1, MCOperand::createImm(1)); // ScaleAmt
683 setMemOp(IT
, MemOpIdx
+ 2, MCOperand::createReg(0)); // IndexReg
684 setMemOp(IT
, MemOpIdx
+ 3, MCOperand::createImm(Offset
)); // Disp
685 setMemOp(IT
, MemOpIdx
+ 4, MCOperand::createReg(0)); // Segment
688 void ExegesisX86Target::decrementLoopCounterAndJump(
689 MachineBasicBlock
&MBB
, MachineBasicBlock
&TargetMBB
,
690 const MCInstrInfo
&MII
) const {
691 BuildMI(&MBB
, DebugLoc(), MII
.get(X86::ADD64ri8
))
692 .addDef(kLoopCounterReg
)
693 .addUse(kLoopCounterReg
)
695 BuildMI(&MBB
, DebugLoc(), MII
.get(X86::JCC_1
))
697 .addImm(X86::COND_NE
);
700 std::vector
<MCInst
> ExegesisX86Target::setRegTo(const MCSubtargetInfo
&STI
,
702 const APInt
&Value
) const {
703 if (X86::GR8RegClass
.contains(Reg
))
704 return {loadImmediate(Reg
, 8, Value
)};
705 if (X86::GR16RegClass
.contains(Reg
))
706 return {loadImmediate(Reg
, 16, Value
)};
707 if (X86::GR32RegClass
.contains(Reg
))
708 return {loadImmediate(Reg
, 32, Value
)};
709 if (X86::GR64RegClass
.contains(Reg
))
710 return {loadImmediate(Reg
, 64, Value
)};
711 ConstantInliner
CI(Value
);
712 if (X86::VR64RegClass
.contains(Reg
))
713 return CI
.loadAndFinalize(Reg
, 64, X86::MMX_MOVQ64rm
);
714 if (X86::VR128XRegClass
.contains(Reg
)) {
715 if (STI
.getFeatureBits()[X86::FeatureAVX512
])
716 return CI
.loadAndFinalize(Reg
, 128, X86::VMOVDQU32Z128rm
);
717 if (STI
.getFeatureBits()[X86::FeatureAVX
])
718 return CI
.loadAndFinalize(Reg
, 128, X86::VMOVDQUrm
);
719 return CI
.loadAndFinalize(Reg
, 128, X86::MOVDQUrm
);
721 if (X86::VR256XRegClass
.contains(Reg
)) {
722 if (STI
.getFeatureBits()[X86::FeatureAVX512
])
723 return CI
.loadAndFinalize(Reg
, 256, X86::VMOVDQU32Z256rm
);
724 if (STI
.getFeatureBits()[X86::FeatureAVX
])
725 return CI
.loadAndFinalize(Reg
, 256, X86::VMOVDQUYrm
);
727 if (X86::VR512RegClass
.contains(Reg
))
728 if (STI
.getFeatureBits()[X86::FeatureAVX512
])
729 return CI
.loadAndFinalize(Reg
, 512, X86::VMOVDQU32Zrm
);
730 if (X86::RSTRegClass
.contains(Reg
)) {
731 return CI
.loadX87STAndFinalize(Reg
);
733 if (X86::RFP32RegClass
.contains(Reg
) || X86::RFP64RegClass
.contains(Reg
) ||
734 X86::RFP80RegClass
.contains(Reg
)) {
735 return CI
.loadX87FPAndFinalize(Reg
);
737 if (Reg
== X86::EFLAGS
)
738 return CI
.popFlagAndFinalize();
739 if (Reg
== X86::MXCSR
)
740 return CI
.loadImplicitRegAndFinalize(
741 STI
.getFeatureBits()[X86::FeatureAVX
] ? X86::VLDMXCSR
: X86::LDMXCSR
,
743 if (Reg
== X86::FPCW
)
744 return CI
.loadImplicitRegAndFinalize(X86::FLDCW16m
, 0x37f);
745 return {}; // Not yet implemented.
748 static ExegesisTarget
*getTheExegesisX86Target() {
749 static ExegesisX86Target Target
;
753 void InitializeX86ExegesisTarget() {
754 ExegesisTarget::registerTarget(getTheExegesisX86Target());
757 } // namespace exegesis