1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "../Latency.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
24 // A chunk of instruction's operands that represents a single memory access.
25 struct MemoryOperandRange
{
26 MemoryOperandRange(llvm::ArrayRef
<Operand
> Operands
) : Ops(Operands
) {}
28 // Setup InstructionTemplate so the memory access represented by this object
29 // points to [reg] + offset.
30 void fillOrDie(InstructionTemplate
&IT
, unsigned Reg
, unsigned Offset
) {
33 IT
.getValueFor(Ops
[0]) = llvm::MCOperand::createReg(Reg
); // BaseReg
34 IT
.getValueFor(Ops
[1]) = llvm::MCOperand::createImm(1); // ScaleAmt
35 IT
.getValueFor(Ops
[2]) = llvm::MCOperand::createReg(0); // IndexReg
36 IT
.getValueFor(Ops
[3]) = llvm::MCOperand::createImm(Offset
); // Disp
37 IT
.getValueFor(Ops
[4]) = llvm::MCOperand::createReg(0); // Segment
40 llvm::errs() << Ops
.size() << "-op are not handled right now ("
41 << IT
.Instr
.Name
<< ")\n";
42 llvm_unreachable("Invalid memory configuration");
46 // Returns whether Range can be filled.
47 static bool isValid(const MemoryOperandRange
&Range
) {
48 return Range
.Ops
.size() == 5;
51 // Returns whether Op is a valid memory operand.
52 static bool isMemoryOperand(const Operand
&Op
) {
53 return Op
.isMemory() && Op
.isExplicit();
56 llvm::ArrayRef
<Operand
> Ops
;
59 // X86 memory access involve non constant number of operands, this function
60 // extracts contiguous memory operands into MemoryOperandRange so it's easier to
62 static std::vector
<MemoryOperandRange
>
63 getMemoryOperandRanges(llvm::ArrayRef
<Operand
> Operands
) {
64 std::vector
<MemoryOperandRange
> Result
;
65 while (!Operands
.empty()) {
66 Operands
= Operands
.drop_until(MemoryOperandRange::isMemoryOperand
);
67 auto MemoryOps
= Operands
.take_while(MemoryOperandRange::isMemoryOperand
);
68 if (!MemoryOps
.empty())
69 Result
.push_back(MemoryOps
);
70 Operands
= Operands
.drop_front(MemoryOps
.size());
75 static llvm::Error
IsInvalidOpcode(const Instruction
&Instr
) {
76 const auto OpcodeName
= Instr
.Name
;
77 if (OpcodeName
.startswith("POPF") || OpcodeName
.startswith("PUSHF") ||
78 OpcodeName
.startswith("ADJCALLSTACK"))
79 return llvm::make_error
<BenchmarkFailure
>(
80 "unsupported opcode: Push/Pop/AdjCallStack");
81 const bool ValidMemoryOperands
= llvm::all_of(
82 getMemoryOperandRanges(Instr
.Operands
), MemoryOperandRange::isValid
);
83 if (!ValidMemoryOperands
)
84 return llvm::make_error
<BenchmarkFailure
>(
85 "unsupported opcode: non uniform memory access");
86 // We do not handle instructions with OPERAND_PCREL.
87 for (const Operand
&Op
: Instr
.Operands
)
88 if (Op
.isExplicit() &&
89 Op
.getExplicitOperandInfo().OperandType
== llvm::MCOI::OPERAND_PCREL
)
90 return llvm::make_error
<BenchmarkFailure
>(
91 "unsupported opcode: PC relative operand");
92 for (const Operand
&Op
: Instr
.Operands
)
93 if (Op
.isReg() && Op
.isExplicit() &&
94 Op
.getExplicitOperandInfo().RegClass
==
95 llvm::X86::SEGMENT_REGRegClassID
)
96 return llvm::make_error
<BenchmarkFailure
>(
97 "unsupported opcode: access segment memory");
98 // We do not handle second-form X87 instructions. We only handle first-form
99 // ones (_Fp), see comment in X86InstrFPStack.td.
100 for (const Operand
&Op
: Instr
.Operands
)
101 if (Op
.isReg() && Op
.isExplicit() &&
102 Op
.getExplicitOperandInfo().RegClass
== llvm::X86::RSTRegClassID
)
103 return llvm::make_error
<BenchmarkFailure
>(
104 "unsupported second-form X87 instruction");
105 return llvm::Error::success();
108 static unsigned GetX86FPFlags(const Instruction
&Instr
) {
109 return Instr
.Description
->TSFlags
& llvm::X86II::FPTypeMask
;
112 class X86LatencySnippetGenerator
: public LatencySnippetGenerator
{
114 using LatencySnippetGenerator::LatencySnippetGenerator
;
116 llvm::Expected
<std::vector
<CodeTemplate
>>
117 generateCodeTemplates(const Instruction
&Instr
) const override
{
118 if (auto E
= IsInvalidOpcode(Instr
))
121 switch (GetX86FPFlags(Instr
)) {
122 case llvm::X86II::NotFP
:
123 return LatencySnippetGenerator::generateCodeTemplates(Instr
);
124 case llvm::X86II::ZeroArgFP
:
125 case llvm::X86II::OneArgFP
:
126 case llvm::X86II::SpecialFP
:
127 case llvm::X86II::CompareFP
:
128 case llvm::X86II::CondMovFP
:
129 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 Instruction");
130 case llvm::X86II::OneArgFPRW
:
131 case llvm::X86II::TwoArgFP
:
132 // These are instructions like
133 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
134 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
135 // They are intrinsically serial and do not modify the state of the stack.
136 return generateSelfAliasingCodeTemplates(Instr
);
138 llvm_unreachable("Unknown FP Type!");
143 class X86UopsSnippetGenerator
: public UopsSnippetGenerator
{
145 using UopsSnippetGenerator::UopsSnippetGenerator
;
147 llvm::Expected
<std::vector
<CodeTemplate
>>
148 generateCodeTemplates(const Instruction
&Instr
) const override
{
149 if (auto E
= IsInvalidOpcode(Instr
))
152 switch (GetX86FPFlags(Instr
)) {
153 case llvm::X86II::NotFP
:
154 return UopsSnippetGenerator::generateCodeTemplates(Instr
);
155 case llvm::X86II::ZeroArgFP
:
156 case llvm::X86II::OneArgFP
:
157 case llvm::X86II::SpecialFP
:
158 return llvm::make_error
<BenchmarkFailure
>("Unsupported x87 Instruction");
159 case llvm::X86II::OneArgFPRW
:
160 case llvm::X86II::TwoArgFP
:
161 // These are instructions like
162 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
163 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
164 // They are intrinsically serial and do not modify the state of the stack.
165 // We generate the same code for latency and uops.
166 return generateSelfAliasingCodeTemplates(Instr
);
167 case llvm::X86II::CompareFP
:
168 case llvm::X86II::CondMovFP
:
169 // We can compute uops for any FP instruction that does not grow or shrink
170 // the stack (either do not touch the stack or push as much as they pop).
171 return generateUnconstrainedCodeTemplates(
172 Instr
, "instruction does not grow/shrink the FP stack");
174 llvm_unreachable("Unknown FP Type!");
179 static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth
) {
180 switch (RegBitWidth
) {
182 return llvm::X86::MOV8ri
;
184 return llvm::X86::MOV16ri
;
186 return llvm::X86::MOV32ri
;
188 return llvm::X86::MOV64ri
;
190 llvm_unreachable("Invalid Value Width");
193 // Generates instruction to load an immediate value into a register.
194 static llvm::MCInst
loadImmediate(unsigned Reg
, unsigned RegBitWidth
,
195 const llvm::APInt
&Value
) {
196 if (Value
.getBitWidth() > RegBitWidth
)
197 llvm_unreachable("Value must fit in the Register");
198 return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth
))
200 .addImm(Value
.getZExtValue());
203 // Allocates scratch memory on the stack.
204 static llvm::MCInst
allocateStackSpace(unsigned Bytes
) {
205 return llvm::MCInstBuilder(llvm::X86::SUB64ri8
)
206 .addReg(llvm::X86::RSP
)
207 .addReg(llvm::X86::RSP
)
211 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
212 static llvm::MCInst
fillStackSpace(unsigned MovOpcode
, unsigned OffsetBytes
,
214 return llvm::MCInstBuilder(MovOpcode
)
216 .addReg(llvm::X86::RSP
) // BaseReg
217 .addImm(1) // ScaleAmt
218 .addReg(0) // IndexReg
219 .addImm(OffsetBytes
) // Disp
220 .addReg(0) // Segment
225 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
226 static llvm::MCInst
loadToReg(unsigned Reg
, unsigned RMOpcode
) {
227 return llvm::MCInstBuilder(RMOpcode
)
230 .addReg(llvm::X86::RSP
) // BaseReg
231 .addImm(1) // ScaleAmt
232 .addReg(0) // IndexReg
234 .addReg(0); // Segment
237 // Releases scratch memory.
238 static llvm::MCInst
releaseStackSpace(unsigned Bytes
) {
239 return llvm::MCInstBuilder(llvm::X86::ADD64ri8
)
240 .addReg(llvm::X86::RSP
)
241 .addReg(llvm::X86::RSP
)
245 // Reserves some space on the stack, fills it with the content of the provided
246 // constant and provide methods to load the stack value into a register.
247 struct ConstantInliner
{
248 explicit ConstantInliner(const llvm::APInt
&Constant
) : Constant_(Constant
) {}
250 std::vector
<llvm::MCInst
> loadAndFinalize(unsigned Reg
, unsigned RegBitWidth
,
252 assert((RegBitWidth
& 7) == 0 &&
253 "RegBitWidth must be a multiple of 8 bits");
254 initStack(RegBitWidth
/ 8);
255 add(loadToReg(Reg
, Opcode
));
256 add(releaseStackSpace(RegBitWidth
/ 8));
257 return std::move(Instructions
);
260 std::vector
<llvm::MCInst
> loadX87STAndFinalize(unsigned Reg
) {
261 initStack(kF80Bytes
);
262 add(llvm::MCInstBuilder(llvm::X86::LD_F80m
)
264 .addReg(llvm::X86::RSP
) // BaseReg
265 .addImm(1) // ScaleAmt
266 .addReg(0) // IndexReg
268 .addReg(0)); // Segment
269 if (Reg
!= llvm::X86::ST0
)
270 add(llvm::MCInstBuilder(llvm::X86::ST_Frr
).addReg(Reg
));
271 add(releaseStackSpace(kF80Bytes
));
272 return std::move(Instructions
);
275 std::vector
<llvm::MCInst
> loadX87FPAndFinalize(unsigned Reg
) {
276 initStack(kF80Bytes
);
277 add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m
)
280 .addReg(llvm::X86::RSP
) // BaseReg
281 .addImm(1) // ScaleAmt
282 .addReg(0) // IndexReg
284 .addReg(0)); // Segment
285 add(releaseStackSpace(kF80Bytes
));
286 return std::move(Instructions
);
289 std::vector
<llvm::MCInst
> popFlagAndFinalize() {
291 add(llvm::MCInstBuilder(llvm::X86::POPF64
));
292 return std::move(Instructions
);
296 static constexpr const unsigned kF80Bytes
= 10; // 80 bits.
298 ConstantInliner
&add(const llvm::MCInst
&Inst
) {
299 Instructions
.push_back(Inst
);
303 void initStack(unsigned Bytes
) {
304 assert(Constant_
.getBitWidth() <= Bytes
* 8 &&
305 "Value does not have the correct size");
306 const llvm::APInt WideConstant
= Constant_
.getBitWidth() < Bytes
* 8
307 ? Constant_
.sext(Bytes
* 8)
309 add(allocateStackSpace(Bytes
));
310 size_t ByteOffset
= 0;
311 for (; Bytes
- ByteOffset
>= 4; ByteOffset
+= 4)
313 llvm::X86::MOV32mi
, ByteOffset
,
314 WideConstant
.extractBits(32, ByteOffset
* 8).getZExtValue()));
315 if (Bytes
- ByteOffset
>= 2) {
317 llvm::X86::MOV16mi
, ByteOffset
,
318 WideConstant
.extractBits(16, ByteOffset
* 8).getZExtValue()));
321 if (Bytes
- ByteOffset
>= 1)
323 llvm::X86::MOV8mi
, ByteOffset
,
324 WideConstant
.extractBits(8, ByteOffset
* 8).getZExtValue()));
327 llvm::APInt Constant_
;
328 std::vector
<llvm::MCInst
> Instructions
;
331 class ExegesisX86Target
: public ExegesisTarget
{
332 void addTargetSpecificPasses(llvm::PassManagerBase
&PM
) const override
{
333 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
334 PM
.add(llvm::createX86FloatingPointStackifierPass());
337 unsigned getScratchMemoryRegister(const llvm::Triple
&TT
) const override
{
338 if (!TT
.isArch64Bit()) {
339 // FIXME: This would require popping from the stack, so we would have to
340 // add some additional setup code.
343 return TT
.isOSWindows() ? llvm::X86::RCX
: llvm::X86::RDI
;
346 unsigned getMaxMemoryAccessSize() const override
{ return 64; }
348 void fillMemoryOperands(InstructionTemplate
&IT
, unsigned Reg
,
349 unsigned Offset
) const override
{
350 // FIXME: For instructions that read AND write to memory, we use the same
351 // value for input and output.
352 for (auto &MemoryRange
: getMemoryOperandRanges(IT
.Instr
.Operands
))
353 MemoryRange
.fillOrDie(IT
, Reg
, Offset
);
356 std::vector
<llvm::MCInst
> setRegTo(const llvm::MCSubtargetInfo
&STI
,
358 const llvm::APInt
&Value
) const override
{
359 if (llvm::X86::GR8RegClass
.contains(Reg
))
360 return {loadImmediate(Reg
, 8, Value
)};
361 if (llvm::X86::GR16RegClass
.contains(Reg
))
362 return {loadImmediate(Reg
, 16, Value
)};
363 if (llvm::X86::GR32RegClass
.contains(Reg
))
364 return {loadImmediate(Reg
, 32, Value
)};
365 if (llvm::X86::GR64RegClass
.contains(Reg
))
366 return {loadImmediate(Reg
, 64, Value
)};
367 ConstantInliner
CI(Value
);
368 if (llvm::X86::VR64RegClass
.contains(Reg
))
369 return CI
.loadAndFinalize(Reg
, 64, llvm::X86::MMX_MOVQ64rm
);
370 if (llvm::X86::VR128XRegClass
.contains(Reg
)) {
371 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
372 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::VMOVDQU32Z128rm
);
373 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
374 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::VMOVDQUrm
);
375 return CI
.loadAndFinalize(Reg
, 128, llvm::X86::MOVDQUrm
);
377 if (llvm::X86::VR256XRegClass
.contains(Reg
)) {
378 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
379 return CI
.loadAndFinalize(Reg
, 256, llvm::X86::VMOVDQU32Z256rm
);
380 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX
])
381 return CI
.loadAndFinalize(Reg
, 256, llvm::X86::VMOVDQUYrm
);
383 if (llvm::X86::VR512RegClass
.contains(Reg
))
384 if (STI
.getFeatureBits()[llvm::X86::FeatureAVX512
])
385 return CI
.loadAndFinalize(Reg
, 512, llvm::X86::VMOVDQU32Zrm
);
386 if (llvm::X86::RSTRegClass
.contains(Reg
)) {
387 return CI
.loadX87STAndFinalize(Reg
);
389 if (llvm::X86::RFP32RegClass
.contains(Reg
) ||
390 llvm::X86::RFP64RegClass
.contains(Reg
) ||
391 llvm::X86::RFP80RegClass
.contains(Reg
)) {
392 return CI
.loadX87FPAndFinalize(Reg
);
394 if (Reg
== llvm::X86::EFLAGS
)
395 return CI
.popFlagAndFinalize();
396 return {}; // Not yet implemented.
399 std::unique_ptr
<SnippetGenerator
>
400 createLatencySnippetGenerator(const LLVMState
&State
) const override
{
401 return llvm::make_unique
<X86LatencySnippetGenerator
>(State
);
404 std::unique_ptr
<SnippetGenerator
>
405 createUopsSnippetGenerator(const LLVMState
&State
) const override
{
406 return llvm::make_unique
<X86UopsSnippetGenerator
>(State
);
409 bool matchesArch(llvm::Triple::ArchType Arch
) const override
{
410 return Arch
== llvm::Triple::x86_64
|| Arch
== llvm::Triple::x86
;
416 static ExegesisTarget
*getTheExegesisX86Target() {
417 static ExegesisX86Target Target
;
421 void InitializeX86ExegesisTarget() {
422 ExegesisTarget::registerTarget(getTheExegesisX86Target());
425 } // namespace exegesis