1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "../Latency.h"
11 #include "../SnippetGenerator.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
23 // Returns an error if we cannot handle the memory references in this
25 static Error
isInvalidMemoryInstr(const Instruction
&Instr
) {
26 switch (Instr
.Description
->TSFlags
& X86II::FormMask
) {
28 llvm_unreachable("Unknown FormMask value");
29 // These have no memory access.
33 case X86II::MRMDestReg
:
34 case X86II::MRMSrcReg
:
35 case X86II::MRMSrcReg4VOp3
:
36 case X86II::MRMSrcRegOp4
:
37 case X86II::MRMSrcRegCC
:
112 case X86II::RawFrmImm8
:
113 return Error::success();
114 case X86II::AddRegFrm
:
115 return (Instr
.Description
->Opcode
== X86::POP16r
|| Instr
.Description
->Opcode
== X86::POP32r
||
116 Instr
.Description
->Opcode
== X86::PUSH16r
|| Instr
.Description
->Opcode
== X86::PUSH32r
)
117 ? make_error
<BenchmarkFailure
>(
118 "unsupported opcode: unsupported memory access")
120 // These access memory and are handled.
121 case X86II::MRMDestMem
:
122 case X86II::MRMSrcMem
:
123 case X86II::MRMSrcMem4VOp3
:
124 case X86II::MRMSrcMemOp4
:
125 case X86II::MRMSrcMemCC
:
136 return Error::success();
137 // These access memory and are not handled yet.
138 case X86II::RawFrmImm16
:
139 case X86II::RawFrmMemOffs
:
140 case X86II::RawFrmSrc
:
141 case X86II::RawFrmDst
:
142 case X86II::RawFrmDstSrc
:
143 return make_error
<BenchmarkFailure
>(
144 "unsupported opcode: non uniform memory access");
148 static llvm::Error
IsInvalidOpcode(const Instruction
&Instr
) {
149 const auto OpcodeName
= Instr
.Name
;
150 if ((Instr
.Description
->TSFlags
& X86II::FormMask
) == X86II::Pseudo
)
151 return llvm::make_error
<BenchmarkFailure
>(
152 "unsupported opcode: pseudo instruction");
153 if (OpcodeName
.startswith("POPF") || OpcodeName
.startswith("PUSHF") ||
154 OpcodeName
.startswith("ADJCALLSTACK"))
155 return llvm::make_error
<BenchmarkFailure
>(
156 "unsupported opcode: Push/Pop/AdjCallStack");
157 if (llvm::Error Error
= isInvalidMemoryInstr(Instr
))
159 // We do not handle instructions with OPERAND_PCREL.
160 for (const Operand
&Op
: Instr
.Operands
)
161 if (Op
.isExplicit() &&
162 Op
.getExplicitOperandInfo().OperandType
== llvm::MCOI::OPERAND_PCREL
)
163 return llvm::make_error
<BenchmarkFailure
>(
164 "unsupported opcode: PC relative operand");
165 // We do not handle second-form X87 instructions. We only handle first-form
166 // ones (_Fp), see comment in X86InstrFPStack.td.
167 for (const Operand
&Op
: Instr
.Operands
)
168 if (Op
.isReg() && Op
.isExplicit() &&
169 Op
.getExplicitOperandInfo().RegClass
== llvm::X86::RSTRegClassID
)
170 return llvm::make_error
<BenchmarkFailure
>(
171 "unsupported second-form X87 instruction");
172 return llvm::Error::success();
175 static unsigned getX86FPFlags(const Instruction
&Instr
) {
176 return Instr
.Description
->TSFlags
& llvm::X86II::FPTypeMask
;
180 class X86LatencySnippetGenerator
: public LatencySnippetGenerator
{
182 using LatencySnippetGenerator::LatencySnippetGenerator
;
184 llvm::Expected
<std::vector
<CodeTemplate
>>
185 generateCodeTemplates(const Instruction
&Instr
) const override
;
189 llvm::Expected
<std::vector
<CodeTemplate
>>
190 X86LatencySnippetGenerator::generateCodeTemplates(
191 const Instruction
&Instr
) const {
192 if (auto E
= IsInvalidOpcode(Instr
))
195 switch (getX86FPFlags(Instr
)) {
196 case llvm::X86II::NotFP
:
197 return LatencySnippetGenerator::generateCodeTemplates(Instr
);
198 case llvm::X86II::ZeroArgFP
:
199 case llvm::X86II::OneArgFP
:
200 case llvm::X86II::SpecialFP
:
201 case llvm::X86II::CompareFP
:
202 case llvm::X86II::CondMovFP
:
203 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 Instruction");
204 case llvm::X86II::OneArgFPRW
:
205 case llvm::X86II::TwoArgFP
:
206 // These are instructions like
207 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
208 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
209 // They are intrinsically serial and do not modify the state of the stack.
210 return generateSelfAliasingCodeTemplates(Instr
);
212 llvm_unreachable("Unknown FP Type!");
217 class X86UopsSnippetGenerator
: public UopsSnippetGenerator
{
219 using UopsSnippetGenerator::UopsSnippetGenerator
;
221 llvm::Expected
<std::vector
<CodeTemplate
>>
222 generateCodeTemplates(const Instruction
&Instr
) const override
;
226 llvm::Expected
<std::vector
<CodeTemplate
>>
227 X86UopsSnippetGenerator::generateCodeTemplates(
228 const Instruction
&Instr
) const {
229 if (auto E
= IsInvalidOpcode(Instr
))
232 switch (getX86FPFlags(Instr
)) {
233 case llvm::X86II::NotFP
:
234 return UopsSnippetGenerator::generateCodeTemplates(Instr
);
235 case llvm::X86II::ZeroArgFP
:
236 case llvm::X86II::OneArgFP
:
237 case llvm::X86II::SpecialFP
:
238 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 Instruction");
239 case llvm::X86II::OneArgFPRW
:
240 case llvm::X86II::TwoArgFP
:
241 // These are instructions like
242 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
243 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
244 // They are intrinsically serial and do not modify the state of the stack.
245 // We generate the same code for latency and uops.
246 return generateSelfAliasingCodeTemplates(Instr
);
247 case llvm::X86II::CompareFP
:
248 case llvm::X86II::CondMovFP
:
249 // We can compute uops for any FP instruction that does not grow or shrink
250 // the stack (either do not touch the stack or push as much as they pop).
251 return generateUnconstrainedCodeTemplates(
252 Instr
, "instruction does not grow/shrink the FP stack");
254 llvm_unreachable("Unknown FP Type!");
258 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth
) {
259 switch (RegBitWidth
) {
261 return llvm::X86::MOV8ri
;
263 return llvm::X86::MOV16ri
;
265 return llvm::X86::MOV32ri
;
267 return llvm::X86::MOV64ri
;
269 llvm_unreachable("Invalid Value Width");
272 // Generates instruction to load an immediate value into a register.
273 static llvm::MCInst
loadImmediate(unsigned Reg
, unsigned RegBitWidth
,
274 const llvm::APInt
&Value
) {
275 if (Value
.getBitWidth() > RegBitWidth
)
276 llvm_unreachable("Value must fit in the Register");
277 return llvm::MCInstBuilder(getLoadImmediateOpcode(RegBitWidth
))
279 .addImm(Value
.getZExtValue());
282 // Allocates scratch memory on the stack.
283 static llvm::MCInst
allocateStackSpace(unsigned Bytes
) {
284 return llvm::MCInstBuilder(llvm::X86::SUB64ri8
)
285 .addReg(llvm::X86::RSP
)
286 .addReg(llvm::X86::RSP
)
290 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
291 static llvm::MCInst
fillStackSpace(unsigned MovOpcode
, unsigned OffsetBytes
,
293 return llvm::MCInstBuilder(MovOpcode
)
295 .addReg(llvm::X86::RSP
) // BaseReg
296 .addImm(1) // ScaleAmt
297 .addReg(0) // IndexReg
298 .addImm(OffsetBytes
) // Disp
299 .addReg(0) // Segment
304 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
305 static llvm::MCInst
loadToReg(unsigned Reg
, unsigned RMOpcode
) {
306 return llvm::MCInstBuilder(RMOpcode
)
309 .addReg(llvm::X86::RSP
) // BaseReg
310 .addImm(1) // ScaleAmt
311 .addReg(0) // IndexReg
313 .addReg(0); // Segment
316 // Releases scratch memory.
317 static llvm::MCInst
releaseStackSpace(unsigned Bytes
) {
318 return llvm::MCInstBuilder(llvm::X86::ADD64ri8
)
319 .addReg(llvm::X86::RSP
)
320 .addReg(llvm::X86::RSP
)
324 // Reserves some space on the stack, fills it with the content of the provided
325 // constant and provide methods to load the stack value into a register.
327 struct ConstantInliner
{
328 explicit ConstantInliner(const llvm::APInt
&Constant
) : Constant_(Constant
) {}
330 std::vector
<llvm::MCInst
> loadAndFinalize(unsigned Reg
, unsigned RegBitWidth
,
333 std::vector
<llvm::MCInst
> loadX87STAndFinalize(unsigned Reg
);
335 std::vector
<llvm::MCInst
> loadX87FPAndFinalize(unsigned Reg
);
337 std::vector
<llvm::MCInst
> popFlagAndFinalize();
340 ConstantInliner
&add(const llvm::MCInst
&Inst
) {
341 Instructions
.push_back(Inst
);
345 void initStack(unsigned Bytes
);
347 static constexpr const unsigned kF80Bytes
= 10; // 80 bits.
349 llvm::APInt Constant_
;
350 std::vector
<llvm::MCInst
> Instructions
;
354 std::vector
<llvm::MCInst
> ConstantInliner::loadAndFinalize(unsigned Reg
,
355 unsigned RegBitWidth
,
357 assert((RegBitWidth
& 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
358 initStack(RegBitWidth
/ 8);
359 add(loadToReg(Reg
, Opcode
));
360 add(releaseStackSpace(RegBitWidth
/ 8));
361 return std::move(Instructions
);
364 std::vector
<llvm::MCInst
> ConstantInliner::loadX87STAndFinalize(unsigned Reg
) {
365 initStack(kF80Bytes
);
366 add(llvm::MCInstBuilder(llvm::X86::LD_F80m
)
368 .addReg(llvm::X86::RSP
) // BaseReg
369 .addImm(1) // ScaleAmt
370 .addReg(0) // IndexReg
372 .addReg(0)); // Segment
373 if (Reg
!= llvm::X86::ST0
)
374 add(llvm::MCInstBuilder(llvm::X86::ST_Frr
).addReg(Reg
));
375 add(releaseStackSpace(kF80Bytes
));
376 return std::move(Instructions
);
379 std::vector
<llvm::MCInst
> ConstantInliner::loadX87FPAndFinalize(unsigned Reg
) {
380 initStack(kF80Bytes
);
381 add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m
)
384 .addReg(llvm::X86::RSP
) // BaseReg
385 .addImm(1) // ScaleAmt
386 .addReg(0) // IndexReg
388 .addReg(0)); // Segment
389 add(releaseStackSpace(kF80Bytes
));
390 return std::move(Instructions
);
393 std::vector
<llvm::MCInst
> ConstantInliner::popFlagAndFinalize() {
395 add(llvm::MCInstBuilder(llvm::X86::POPF64
));
396 return std::move(Instructions
);
399 void ConstantInliner::initStack(unsigned Bytes
) {
400 assert(Constant_
.getBitWidth() <= Bytes
* 8 &&
401 "Value does not have the correct size");
402 const llvm::APInt WideConstant
= Constant_
.getBitWidth() < Bytes
* 8
403 ? Constant_
.sext(Bytes
* 8)
405 add(allocateStackSpace(Bytes
));
406 size_t ByteOffset
= 0;
407 for (; Bytes
- ByteOffset
>= 4; ByteOffset
+= 4)
409 llvm::X86::MOV32mi
, ByteOffset
,
410 WideConstant
.extractBits(32, ByteOffset
* 8).getZExtValue()));
411 if (Bytes
- ByteOffset
>= 2) {
413 llvm::X86::MOV16mi
, ByteOffset
,
414 WideConstant
.extractBits(16, ByteOffset
* 8).getZExtValue()));
417 if (Bytes
- ByteOffset
>= 1)
419 llvm::X86::MOV8mi
, ByteOffset
,
420 WideConstant
.extractBits(8, ByteOffset
* 8).getZExtValue()));
423 #include "X86GenExegesis.inc"
426 class ExegesisX86Target
: public ExegesisTarget
{
428 ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters
) {}
431 void addTargetSpecificPasses(llvm::PassManagerBase
&PM
) const override
;
433 unsigned getScratchMemoryRegister(const llvm::Triple
&TT
) const override
;
435 unsigned getMaxMemoryAccessSize() const override
{ return 64; }
437 void randomizeMCOperand(const Instruction
&Instr
, const Variable
&Var
,
438 llvm::MCOperand
&AssignedValue
,
439 const llvm::BitVector
&ForbiddenRegs
) const override
;
441 void fillMemoryOperands(InstructionTemplate
&IT
, unsigned Reg
,
442 unsigned Offset
) const override
;
444 std::vector
<llvm::MCInst
> setRegTo(const llvm::MCSubtargetInfo
&STI
,
446 const llvm::APInt
&Value
) const override
;
448 ArrayRef
<unsigned> getUnavailableRegisters() const override
{
449 return makeArrayRef(kUnavailableRegisters
,
450 sizeof(kUnavailableRegisters
) /
451 sizeof(kUnavailableRegisters
[0]));
454 std::unique_ptr
<SnippetGenerator
>
455 createLatencySnippetGenerator(const LLVMState
&State
) const override
{
456 return std::make_unique
<X86LatencySnippetGenerator
>(State
);
459 std::unique_ptr
<SnippetGenerator
>
460 createUopsSnippetGenerator(const LLVMState
&State
) const override
{
461 return std::make_unique
<X86UopsSnippetGenerator
>(State
);
464 bool matchesArch(llvm::Triple::ArchType Arch
) const override
{
465 return Arch
== llvm::Triple::x86_64
|| Arch
== llvm::Triple::x86
;
468 static const unsigned kUnavailableRegisters
[4];
471 // We disable a few registers that cannot be encoded on instructions with a REX
473 const unsigned ExegesisX86Target::kUnavailableRegisters
[4] = {X86::AH
, X86::BH
,
477 void ExegesisX86Target::addTargetSpecificPasses(
478 llvm::PassManagerBase
&PM
) const {
479 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
480 PM
.add(llvm::createX86FloatingPointStackifierPass());
484 ExegesisX86Target::getScratchMemoryRegister(const llvm::Triple
&TT
) const {
485 if (!TT
.isArch64Bit()) {
486 // FIXME: This would require popping from the stack, so we would have to
487 // add some additional setup code.
490 return TT
.isOSWindows() ? llvm::X86::RCX
: llvm::X86::RDI
;
493 void ExegesisX86Target::randomizeMCOperand(
494 const Instruction
&Instr
, const Variable
&Var
,
495 llvm::MCOperand
&AssignedValue
,
496 const llvm::BitVector
&ForbiddenRegs
) const {
497 ExegesisTarget::randomizeMCOperand(Instr
, Var
, AssignedValue
, ForbiddenRegs
);
499 const Operand
&Op
= Instr
.getPrimaryOperand(Var
);
500 switch (Op
.getExplicitOperandInfo().OperandType
) {
501 case llvm::X86::OperandType::OPERAND_COND_CODE
:
502 AssignedValue
= llvm::MCOperand::createImm(
503 randomIndex(llvm::X86::CondCode::LAST_VALID_COND
));
510 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate
&IT
,
512 unsigned Offset
) const {
513 assert(!isInvalidMemoryInstr(IT
.Instr
) &&
514 "fillMemoryOperands requires a valid memory instruction");
515 int MemOpIdx
= X86II::getMemoryOperandNo(IT
.Instr
.Description
->TSFlags
);
516 assert(MemOpIdx
>= 0 && "invalid memory operand index");
517 // getMemoryOperandNo() ignores tied operands, so we have to add them back.
518 for (unsigned I
= 0; I
<= static_cast<unsigned>(MemOpIdx
); ++I
) {
519 const auto &Op
= IT
.Instr
.Operands
[I
];
520 if (Op
.isTied() && Op
.getTiedToIndex() < I
) {
524 // Now fill in the memory operands.
525 const auto SetOp
= [&IT
](int OpIdx
, const MCOperand
&OpVal
) {
526 const auto Op
= IT
.Instr
.Operands
[OpIdx
];
527 assert(Op
.isMemory() && Op
.isExplicit() && "invalid memory pattern");
528 IT
.getValueFor(Op
) = OpVal
;
530 SetOp(MemOpIdx
+ 0, MCOperand::createReg(Reg
)); // BaseReg
531 SetOp(MemOpIdx
+ 1, MCOperand::createImm(1)); // ScaleAmt
532 SetOp(MemOpIdx
+ 2, MCOperand::createReg(0)); // IndexReg
533 SetOp(MemOpIdx
+ 3, MCOperand::createImm(Offset
)); // Disp
534 SetOp(MemOpIdx
+ 4, MCOperand::createReg(0)); // Segment
537 std::vector
<llvm::MCInst
>
538 ExegesisX86Target::setRegTo(const llvm::MCSubtargetInfo
&STI
, unsigned Reg
,
539 const llvm::APInt
&Value
) const {
540 if (llvm::X86::GR8RegClass
.contains(Reg
))
541 return {loadImmediate(Reg
, 8, Value
)};
542 if (llvm::X86::GR16RegClass
.contains(Reg
))
543 return {loadImmediate(Reg
, 16, Value
)};
544 if (llvm::X86::GR32RegClass
.contains(Reg
))
545 return {loadImmediate(Reg
, 32, Value
)};
546 if (llvm::X86::GR64RegClass
.contains(Reg
))
547 return {loadImmediate(Reg
, 64, Value
)};
548 ConstantInliner
CI(Value
);
549 if (llvm::X86::VR64RegClass
.contains(Reg
))
550 return CI
.loadAndFinalize(Reg
, 64, llvm::X86::MMX_MOVQ64rm
);
551 if (llvm::X86::VR128XRegClass
.contains(Reg
)) {
552 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
553 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::VMOVDQU32Z128rm
);
554 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
555 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::VMOVDQUrm
);
556 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::MOVDQUrm
);
558 if (llvm::X86::VR256XRegClass
.contains(Reg
)) {
559 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
560 return CI
.loadAndFinalize(Reg
, 256, llvm::X86::VMOVDQU32Z256rm
);
561 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
562 return CI
.loadAndFinalize(Reg
, 256, llvm::X86::VMOVDQUYrm
);
564 if (llvm::X86::VR512RegClass
.contains(Reg
))
565 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
566 return CI
.loadAndFinalize(Reg
, 512, llvm::X86::VMOVDQU32Zrm
);
567 if (llvm::X86::RSTRegClass
.contains(Reg
)) {
568 return CI
.loadX87STAndFinalize(Reg
);
570 if (llvm::X86::RFP32RegClass
.contains(Reg
) ||
571 llvm::X86::RFP64RegClass
.contains(Reg
) ||
572 llvm::X86::RFP80RegClass
.contains(Reg
)) {
573 return CI
.loadX87FPAndFinalize(Reg
);
575 if (Reg
== llvm::X86::EFLAGS
)
576 return CI
.popFlagAndFinalize();
577 return {}; // Not yet implemented.
580 static ExegesisTarget
*getTheExegesisX86Target() {
581 static ExegesisX86Target Target
;
585 void InitializeX86ExegesisTarget() {
586 ExegesisTarget::registerTarget(getTheExegesisX86Target());
589 } // namespace exegesis