1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "../Latency.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
24 // Common code for X86 Uops and Latency runners.
25 template <typename Impl
> class X86SnippetGenerator
: public Impl
{
28 llvm::Expected
<CodeTemplate
>
29 generateCodeTemplate(unsigned Opcode
) const override
{
30 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo
= this->State
.getInstrInfo();
32 const auto OpcodeName
= InstrInfo
.getName(Opcode
);
33 if (OpcodeName
.startswith("POPF") || OpcodeName
.startswith("PUSHF") ||
34 OpcodeName
.startswith("ADJCALLSTACK")) {
35 return llvm::make_error
<BenchmarkFailure
>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
40 const auto &InstrDesc
= InstrInfo
.get(Opcode
);
41 const unsigned FPInstClass
= InstrDesc
.TSFlags
& llvm::X86II::FPTypeMask
;
42 const Instruction
Instr(InstrDesc
, this->RATC
);
43 switch (FPInstClass
) {
44 case llvm::X86II::NotFP
:
46 case llvm::X86II::ZeroArgFP
:
47 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 ZeroArgFP");
48 case llvm::X86II::OneArgFP
:
49 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 OneArgFP");
50 case llvm::X86II::OneArgFPRW
:
51 case llvm::X86II::TwoArgFP
: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
57 return this->generateSelfAliasingCodeTemplate(Instr
);
59 case llvm::X86II::CompareFP
:
60 return Impl::handleCompareFP(Instr
);
61 case llvm::X86II::CondMovFP
:
62 return Impl::handleCondMovFP(Instr
);
63 case llvm::X86II::SpecialFP
:
64 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 SpecialFP");
66 llvm_unreachable("Unknown FP Type!");
69 // Fallback to generic implementation.
70 return Impl::Base::generateCodeTemplate(Opcode
);
74 class X86LatencyImpl
: public LatencySnippetGenerator
{
76 using Base
= LatencySnippetGenerator
;
78 llvm::Expected
<CodeTemplate
> handleCompareFP(const Instruction
&Instr
) const {
79 return llvm::make_error
<SnippetGeneratorFailure
>(
80 "Unsupported x87 CompareFP");
82 llvm::Expected
<CodeTemplate
> handleCondMovFP(const Instruction
&Instr
) const {
83 return llvm::make_error
<SnippetGeneratorFailure
>(
84 "Unsupported x87 CondMovFP");
88 class X86UopsImpl
: public UopsSnippetGenerator
{
90 using Base
= UopsSnippetGenerator
;
92 // We can compute uops for any FP instruction that does not grow or shrink the
93 // stack (either do not touch the stack or push as much as they pop).
94 llvm::Expected
<CodeTemplate
> handleCompareFP(const Instruction
&Instr
) const {
95 return generateUnconstrainedCodeTemplate(
96 Instr
, "instruction does not grow/shrink the FP stack");
98 llvm::Expected
<CodeTemplate
> handleCondMovFP(const Instruction
&Instr
) const {
99 return generateUnconstrainedCodeTemplate(
100 Instr
, "instruction does not grow/shrink the FP stack");
104 static unsigned GetLoadImmediateOpcode(const llvm::APInt
&Value
) {
105 switch (Value
.getBitWidth()) {
107 return llvm::X86::MOV8ri
;
109 return llvm::X86::MOV16ri
;
111 return llvm::X86::MOV32ri
;
113 return llvm::X86::MOV64ri
;
115 llvm_unreachable("Invalid Value Width");
118 static llvm::MCInst
loadImmediate(unsigned Reg
, const llvm::APInt
&Value
,
119 unsigned MaxBitWidth
) {
120 assert(Value
.getBitWidth() <= MaxBitWidth
&& "Value too big to fit register");
121 return llvm::MCInstBuilder(GetLoadImmediateOpcode(Value
))
123 .addImm(Value
.getZExtValue());
126 // Allocates scratch memory on the stack.
127 static llvm::MCInst
allocateStackSpace(unsigned Bytes
) {
128 return llvm::MCInstBuilder(llvm::X86::SUB64ri8
)
129 .addReg(llvm::X86::RSP
)
130 .addReg(llvm::X86::RSP
)
134 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
135 static llvm::MCInst
fillStackSpace(unsigned MovOpcode
, unsigned OffsetBytes
,
137 return llvm::MCInstBuilder(MovOpcode
)
139 .addReg(llvm::X86::RSP
) // BaseReg
140 .addImm(1) // ScaleAmt
141 .addReg(0) // IndexReg
142 .addImm(OffsetBytes
) // Disp
143 .addReg(0) // Segment
148 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
149 static llvm::MCInst
loadToReg(unsigned Reg
, unsigned RMOpcode
) {
150 return llvm::MCInstBuilder(RMOpcode
)
153 .addReg(llvm::X86::RSP
) // BaseReg
154 .addImm(1) // ScaleAmt
155 .addReg(0) // IndexReg
157 .addReg(0); // Segment
160 // Releases scratch memory.
161 static llvm::MCInst
releaseStackSpace(unsigned Bytes
) {
162 return llvm::MCInstBuilder(llvm::X86::ADD64ri8
)
163 .addReg(llvm::X86::RSP
)
164 .addReg(llvm::X86::RSP
)
168 struct ConstantInliner
{
169 explicit ConstantInliner(const llvm::APInt
&Constant
)
170 : StackSize(Constant
.getBitWidth() / 8) {
171 assert(Constant
.getBitWidth() % 8 == 0 && "Must be a multiple of 8");
172 add(allocateStackSpace(StackSize
));
173 size_t ByteOffset
= 0;
174 for (; StackSize
- ByteOffset
>= 4; ByteOffset
+= 4)
176 llvm::X86::MOV32mi
, ByteOffset
,
177 Constant
.extractBits(32, ByteOffset
* 8).getZExtValue()));
178 if (StackSize
- ByteOffset
>= 2) {
180 llvm::X86::MOV16mi
, ByteOffset
,
181 Constant
.extractBits(16, ByteOffset
* 8).getZExtValue()));
184 if (StackSize
- ByteOffset
>= 1)
186 llvm::X86::MOV8mi
, ByteOffset
,
187 Constant
.extractBits(8, ByteOffset
* 8).getZExtValue()));
190 std::vector
<llvm::MCInst
> loadAndFinalize(unsigned Reg
, unsigned Opcode
,
192 assert(StackSize
* 8 == BitWidth
&& "Value does not have the correct size");
193 add(loadToReg(Reg
, Opcode
));
194 add(releaseStackSpace(StackSize
));
195 return std::move(Instructions
);
198 std::vector
<llvm::MCInst
> loadX87AndFinalize(unsigned Reg
, unsigned Opcode
,
200 assert(StackSize
* 8 == BitWidth
&& "Value does not have the correct size");
201 add(llvm::MCInstBuilder(Opcode
)
202 .addReg(llvm::X86::RSP
) // BaseReg
203 .addImm(1) // ScaleAmt
204 .addReg(0) // IndexReg
206 .addReg(0)); // Segment
207 if (Reg
!= llvm::X86::ST0
)
208 add(llvm::MCInstBuilder(llvm::X86::ST_Frr
).addReg(Reg
));
209 add(releaseStackSpace(StackSize
));
210 return std::move(Instructions
);
213 std::vector
<llvm::MCInst
> popFlagAndFinalize() {
214 assert(StackSize
* 8 == 32 && "Value does not have the correct size");
215 add(llvm::MCInstBuilder(llvm::X86::POPF64
));
216 return std::move(Instructions
);
220 ConstantInliner
&add(const llvm::MCInst
&Inst
) {
221 Instructions
.push_back(Inst
);
225 const size_t StackSize
;
226 std::vector
<llvm::MCInst
> Instructions
;
229 class ExegesisX86Target
: public ExegesisTarget
{
230 void addTargetSpecificPasses(llvm::PassManagerBase
&PM
) const override
{
231 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
232 PM
.add(llvm::createX86FloatingPointStackifierPass());
235 unsigned getScratchMemoryRegister(const llvm::Triple
&TT
) const override
{
236 if (!TT
.isArch64Bit()) {
237 // FIXME: This would require popping from the stack, so we would have to
238 // add some additional setup code.
241 return TT
.isOSWindows() ? llvm::X86::RCX
: llvm::X86::RDI
;
244 unsigned getMaxMemoryAccessSize() const override
{ return 64; }
246 void fillMemoryOperands(InstructionBuilder
&IB
, unsigned Reg
,
247 unsigned Offset
) const override
{
248 // FIXME: For instructions that read AND write to memory, we use the same
249 // value for input and output.
250 for (size_t I
= 0, E
= IB
.Instr
.Operands
.size(); I
< E
; ++I
) {
251 const Operand
*Op
= &IB
.Instr
.Operands
[I
];
252 if (Op
->IsExplicit
&& Op
->IsMem
) {
253 // Case 1: 5-op memory.
254 assert((I
+ 5 <= E
) && "x86 memory references are always 5 ops");
255 IB
.getValueFor(*Op
) = llvm::MCOperand::createReg(Reg
); // BaseReg
256 Op
= &IB
.Instr
.Operands
[++I
];
258 assert(Op
->IsExplicit
);
259 IB
.getValueFor(*Op
) = llvm::MCOperand::createImm(1); // ScaleAmt
260 Op
= &IB
.Instr
.Operands
[++I
];
262 assert(Op
->IsExplicit
);
263 IB
.getValueFor(*Op
) = llvm::MCOperand::createReg(0); // IndexReg
264 Op
= &IB
.Instr
.Operands
[++I
];
266 assert(Op
->IsExplicit
);
267 IB
.getValueFor(*Op
) = llvm::MCOperand::createImm(Offset
); // Disp
268 Op
= &IB
.Instr
.Operands
[++I
];
270 assert(Op
->IsExplicit
);
271 IB
.getValueFor(*Op
) = llvm::MCOperand::createReg(0); // Segment
272 // Case2: segment:index addressing. We assume that ES is 0.
277 std::vector
<llvm::MCInst
> setRegTo(const llvm::MCSubtargetInfo
&STI
,
278 const llvm::APInt
&Value
,
279 unsigned Reg
) const override
{
280 if (llvm::X86::GR8RegClass
.contains(Reg
))
281 return {loadImmediate(Reg
, Value
, 8)};
282 if (llvm::X86::GR16RegClass
.contains(Reg
))
283 return {loadImmediate(Reg
, Value
, 16)};
284 if (llvm::X86::GR32RegClass
.contains(Reg
))
285 return {loadImmediate(Reg
, Value
, 32)};
286 if (llvm::X86::GR64RegClass
.contains(Reg
))
287 return {loadImmediate(Reg
, Value
, 64)};
288 ConstantInliner
CI(Value
);
289 if (llvm::X86::VR64RegClass
.contains(Reg
))
290 return CI
.loadAndFinalize(Reg
, llvm::X86::MMX_MOVQ64rm
, 64);
291 if (llvm::X86::VR128XRegClass
.contains(Reg
)) {
292 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
293 return CI
.loadAndFinalize(Reg
, llvm::X86::VMOVDQU32Z128rm
, 128);
294 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
295 return CI
.loadAndFinalize(Reg
, llvm::X86::VMOVDQUrm
, 128);
296 return CI
.loadAndFinalize(Reg
, llvm::X86::MOVDQUrm
, 128);
298 if (llvm::X86::VR256XRegClass
.contains(Reg
)) {
299 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
300 return CI
.loadAndFinalize(Reg
, llvm::X86::VMOVDQU32Z256rm
, 256);
301 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
302 return CI
.loadAndFinalize(Reg
, llvm::X86::VMOVDQUYrm
, 256);
304 if (llvm::X86::VR512RegClass
.contains(Reg
))
305 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
306 return CI
.loadAndFinalize(Reg
, llvm::X86::VMOVDQU32Zrm
, 512);
307 if (llvm::X86::RSTRegClass
.contains(Reg
)) {
308 if (Value
.getBitWidth() == 32)
309 return CI
.loadX87AndFinalize(Reg
, llvm::X86::LD_F32m
, 32);
310 if (Value
.getBitWidth() == 64)
311 return CI
.loadX87AndFinalize(Reg
, llvm::X86::LD_F64m
, 64);
312 if (Value
.getBitWidth() == 80)
313 return CI
.loadX87AndFinalize(Reg
, llvm::X86::LD_F80m
, 80);
315 if (Reg
== llvm::X86::EFLAGS
)
316 return CI
.popFlagAndFinalize();
317 llvm_unreachable("Not yet implemented");
320 std::unique_ptr
<SnippetGenerator
>
321 createLatencySnippetGenerator(const LLVMState
&State
) const override
{
322 return llvm::make_unique
<X86SnippetGenerator
<X86LatencyImpl
>>(State
);
325 std::unique_ptr
<SnippetGenerator
>
326 createUopsSnippetGenerator(const LLVMState
&State
) const override
{
327 return llvm::make_unique
<X86SnippetGenerator
<X86UopsImpl
>>(State
);
330 bool matchesArch(llvm::Triple::ArchType Arch
) const override
{
331 return Arch
== llvm::Triple::x86_64
|| Arch
== llvm::Triple::x86
;
337 static ExegesisTarget
*getTheExegesisX86Target() {
338 static ExegesisX86Target Target
;
342 void InitializeX86ExegesisTarget() {
343 ExegesisTarget::registerTarget(getTheExegesisX86Target());
346 } // namespace exegesis