1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
23 #include "llvm/Analysis/BranchProbabilityInfo.h"
24 #include "llvm/CodeGen/FastISel.h"
25 #include "llvm/CodeGen/FunctionLoweringInfo.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/GetElementPtrTypeIterator.h"
33 #include "llvm/IR/GlobalVariable.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/IntrinsicsX86.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/MC/MCAsmInfo.h"
39 #include "llvm/MC/MCSymbol.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetOptions.h"
46 class X86FastISel final
: public FastISel
{
47 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const X86Subtarget
*Subtarget
;
52 explicit X86FastISel(FunctionLoweringInfo
&funcInfo
,
53 const TargetLibraryInfo
*libInfo
)
54 : FastISel(funcInfo
, libInfo
) {
55 Subtarget
= &funcInfo
.MF
->getSubtarget
<X86Subtarget
>();
58 bool fastSelectInstruction(const Instruction
*I
) override
;
60 /// The specified machine instr operand is a vreg, and that
61 /// vreg is being provided by the specified load instruction. If possible,
62 /// try to fold the load as an operand to the instruction, returning true if
64 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
65 const LoadInst
*LI
) override
;
67 bool fastLowerArguments() override
;
68 bool fastLowerCall(CallLoweringInfo
&CLI
) override
;
69 bool fastLowerIntrinsicCall(const IntrinsicInst
*II
) override
;
71 #include "X86GenFastISel.inc"
74 bool X86FastEmitCompare(const Value
*LHS
, const Value
*RHS
, EVT VT
,
77 bool X86FastEmitLoad(MVT VT
, X86AddressMode
&AM
, MachineMemOperand
*MMO
,
78 unsigned &ResultReg
, unsigned Alignment
= 1);
80 bool X86FastEmitStore(EVT VT
, const Value
*Val
, X86AddressMode
&AM
,
81 MachineMemOperand
*MMO
= nullptr, bool Aligned
= false);
82 bool X86FastEmitStore(EVT VT
, unsigned ValReg
, X86AddressMode
&AM
,
83 MachineMemOperand
*MMO
= nullptr, bool Aligned
= false);
85 bool X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
, unsigned Src
, EVT SrcVT
,
88 bool X86SelectAddress(const Value
*V
, X86AddressMode
&AM
);
89 bool X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
);
91 bool X86SelectLoad(const Instruction
*I
);
93 bool X86SelectStore(const Instruction
*I
);
95 bool X86SelectRet(const Instruction
*I
);
97 bool X86SelectCmp(const Instruction
*I
);
99 bool X86SelectZExt(const Instruction
*I
);
101 bool X86SelectSExt(const Instruction
*I
);
103 bool X86SelectBranch(const Instruction
*I
);
105 bool X86SelectShift(const Instruction
*I
);
107 bool X86SelectDivRem(const Instruction
*I
);
109 bool X86FastEmitCMoveSelect(MVT RetVT
, const Instruction
*I
);
111 bool X86FastEmitSSESelect(MVT RetVT
, const Instruction
*I
);
113 bool X86FastEmitPseudoSelect(MVT RetVT
, const Instruction
*I
);
115 bool X86SelectSelect(const Instruction
*I
);
117 bool X86SelectTrunc(const Instruction
*I
);
119 bool X86SelectFPExtOrFPTrunc(const Instruction
*I
, unsigned Opc
,
120 const TargetRegisterClass
*RC
);
122 bool X86SelectFPExt(const Instruction
*I
);
123 bool X86SelectFPTrunc(const Instruction
*I
);
124 bool X86SelectSIToFP(const Instruction
*I
);
125 bool X86SelectUIToFP(const Instruction
*I
);
126 bool X86SelectIntToFP(const Instruction
*I
, bool IsSigned
);
128 const X86InstrInfo
*getInstrInfo() const {
129 return Subtarget
->getInstrInfo();
131 const X86TargetMachine
*getTargetMachine() const {
132 return static_cast<const X86TargetMachine
*>(&TM
);
135 bool handleConstantAddresses(const Value
*V
, X86AddressMode
&AM
);
137 unsigned X86MaterializeInt(const ConstantInt
*CI
, MVT VT
);
138 unsigned X86MaterializeFP(const ConstantFP
*CFP
, MVT VT
);
139 unsigned X86MaterializeGV(const GlobalValue
*GV
, MVT VT
);
140 unsigned fastMaterializeConstant(const Constant
*C
) override
;
142 unsigned fastMaterializeAlloca(const AllocaInst
*C
) override
;
144 unsigned fastMaterializeFloatZero(const ConstantFP
*CF
) override
;
146 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
147 /// computed in an SSE register, not on the X87 floating point stack.
148 bool isScalarFPTypeInSSEReg(EVT VT
) const {
149 return (VT
== MVT::f64
&& Subtarget
->hasSSE2()) ||
150 (VT
== MVT::f32
&& Subtarget
->hasSSE1()) || VT
== MVT::f16
;
153 bool isTypeLegal(Type
*Ty
, MVT
&VT
, bool AllowI1
= false);
155 bool IsMemcpySmall(uint64_t Len
);
157 bool TryEmitSmallMemcpy(X86AddressMode DestAM
,
158 X86AddressMode SrcAM
, uint64_t Len
);
160 bool foldX86XALUIntrinsic(X86::CondCode
&CC
, const Instruction
*I
,
163 const MachineInstrBuilder
&addFullAddress(const MachineInstrBuilder
&MIB
,
166 unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode
,
167 const TargetRegisterClass
*RC
, unsigned Op0
,
168 unsigned Op1
, unsigned Op2
, unsigned Op3
);
171 } // end anonymous namespace.
173 static std::pair
<unsigned, bool>
174 getX86SSEConditionCode(CmpInst::Predicate Predicate
) {
176 bool NeedSwap
= false;
178 // SSE Condition code mapping:
188 default: llvm_unreachable("Unexpected predicate");
189 case CmpInst::FCMP_OEQ
: CC
= 0; break;
190 case CmpInst::FCMP_OGT
: NeedSwap
= true; [[fallthrough
]];
191 case CmpInst::FCMP_OLT
: CC
= 1; break;
192 case CmpInst::FCMP_OGE
: NeedSwap
= true; [[fallthrough
]];
193 case CmpInst::FCMP_OLE
: CC
= 2; break;
194 case CmpInst::FCMP_UNO
: CC
= 3; break;
195 case CmpInst::FCMP_UNE
: CC
= 4; break;
196 case CmpInst::FCMP_ULE
: NeedSwap
= true; [[fallthrough
]];
197 case CmpInst::FCMP_UGE
: CC
= 5; break;
198 case CmpInst::FCMP_ULT
: NeedSwap
= true; [[fallthrough
]];
199 case CmpInst::FCMP_UGT
: CC
= 6; break;
200 case CmpInst::FCMP_ORD
: CC
= 7; break;
201 case CmpInst::FCMP_UEQ
: CC
= 8; break;
202 case CmpInst::FCMP_ONE
: CC
= 12; break;
205 return std::make_pair(CC
, NeedSwap
);
208 /// Adds a complex addressing mode to the given machine instr builder.
209 /// Note, this will constrain the index register. If its not possible to
210 /// constrain the given index register, then a new one will be created. The
211 /// IndexReg field of the addressing mode will be updated to match in this case.
212 const MachineInstrBuilder
&
213 X86FastISel::addFullAddress(const MachineInstrBuilder
&MIB
,
214 X86AddressMode
&AM
) {
215 // First constrain the index register. It needs to be a GR64_NOSP.
216 AM
.IndexReg
= constrainOperandRegClass(MIB
->getDesc(), AM
.IndexReg
,
217 MIB
->getNumOperands() +
219 return ::addFullAddress(MIB
, AM
);
222 /// Check if it is possible to fold the condition from the XALU intrinsic
223 /// into the user. The condition code will only be updated on success.
224 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode
&CC
, const Instruction
*I
,
226 if (!isa
<ExtractValueInst
>(Cond
))
229 const auto *EV
= cast
<ExtractValueInst
>(Cond
);
230 if (!isa
<IntrinsicInst
>(EV
->getAggregateOperand()))
233 const auto *II
= cast
<IntrinsicInst
>(EV
->getAggregateOperand());
235 const Function
*Callee
= II
->getCalledFunction();
237 cast
<StructType
>(Callee
->getReturnType())->getTypeAtIndex(0U);
238 if (!isTypeLegal(RetTy
, RetVT
))
241 if (RetVT
!= MVT::i32
&& RetVT
!= MVT::i64
)
245 switch (II
->getIntrinsicID()) {
246 default: return false;
247 case Intrinsic::sadd_with_overflow
:
248 case Intrinsic::ssub_with_overflow
:
249 case Intrinsic::smul_with_overflow
:
250 case Intrinsic::umul_with_overflow
: TmpCC
= X86::COND_O
; break;
251 case Intrinsic::uadd_with_overflow
:
252 case Intrinsic::usub_with_overflow
: TmpCC
= X86::COND_B
; break;
255 // Check if both instructions are in the same basic block.
256 if (II
->getParent() != I
->getParent())
259 // Make sure nothing is in the way
260 BasicBlock::const_iterator
Start(I
);
261 BasicBlock::const_iterator
End(II
);
262 for (auto Itr
= std::prev(Start
); Itr
!= End
; --Itr
) {
263 // We only expect extractvalue instructions between the intrinsic and the
264 // instruction to be selected.
265 if (!isa
<ExtractValueInst
>(Itr
))
268 // Check that the extractvalue operand comes from the intrinsic.
269 const auto *EVI
= cast
<ExtractValueInst
>(Itr
);
270 if (EVI
->getAggregateOperand() != II
)
274 // Make sure no potentially eflags clobbering phi moves can be inserted in
276 auto HasPhis
= [](const BasicBlock
*Succ
) { return !Succ
->phis().empty(); };
277 if (I
->isTerminator() && llvm::any_of(successors(I
), HasPhis
))
280 // Make sure there are no potentially eflags clobbering constant
281 // materializations in between.
282 if (llvm::any_of(I
->operands(), [](Value
*V
) { return isa
<Constant
>(V
); }))
289 bool X86FastISel::isTypeLegal(Type
*Ty
, MVT
&VT
, bool AllowI1
) {
290 EVT evt
= TLI
.getValueType(DL
, Ty
, /*AllowUnknown=*/true);
291 if (evt
== MVT::Other
|| !evt
.isSimple())
292 // Unhandled type. Halt "fast" selection and bail.
295 VT
= evt
.getSimpleVT();
296 // For now, require SSE/SSE2 for performing floating-point operations,
297 // since x87 requires additional work.
298 if (VT
== MVT::f64
&& !Subtarget
->hasSSE2())
300 if (VT
== MVT::f32
&& !Subtarget
->hasSSE1())
302 // Similarly, no f80 support yet.
305 // We only handle legal types. For example, on x86-32 the instruction
306 // selector contains all of the 64-bit instructions from x86-64,
307 // under the assumption that i64 won't be used if the target doesn't
309 return (AllowI1
&& VT
== MVT::i1
) || TLI
.isTypeLegal(VT
);
312 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
313 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
314 /// Return true and the result register by reference if it is possible.
315 bool X86FastISel::X86FastEmitLoad(MVT VT
, X86AddressMode
&AM
,
316 MachineMemOperand
*MMO
, unsigned &ResultReg
,
317 unsigned Alignment
) {
318 bool HasSSE1
= Subtarget
->hasSSE1();
319 bool HasSSE2
= Subtarget
->hasSSE2();
320 bool HasSSE41
= Subtarget
->hasSSE41();
321 bool HasAVX
= Subtarget
->hasAVX();
322 bool HasAVX2
= Subtarget
->hasAVX2();
323 bool HasAVX512
= Subtarget
->hasAVX512();
324 bool HasVLX
= Subtarget
->hasVLX();
325 bool IsNonTemporal
= MMO
&& MMO
->isNonTemporal();
327 // Treat i1 loads the same as i8 loads. Masking will be done when storing.
331 // Get opcode and regclass of the output for the given load instruction.
333 switch (VT
.SimpleTy
) {
334 default: return false;
345 // Must be in x86-64 mode.
349 Opc
= HasAVX512
? X86::VMOVSSZrm_alt
350 : HasAVX
? X86::VMOVSSrm_alt
351 : HasSSE1
? X86::MOVSSrm_alt
355 Opc
= HasAVX512
? X86::VMOVSDZrm_alt
356 : HasAVX
? X86::VMOVSDrm_alt
357 : HasSSE2
? X86::MOVSDrm_alt
361 // No f80 support yet.
364 if (IsNonTemporal
&& Alignment
>= 16 && HasSSE41
)
365 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
366 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
367 else if (Alignment
>= 16)
368 Opc
= HasVLX
? X86::VMOVAPSZ128rm
:
369 HasAVX
? X86::VMOVAPSrm
: X86::MOVAPSrm
;
371 Opc
= HasVLX
? X86::VMOVUPSZ128rm
:
372 HasAVX
? X86::VMOVUPSrm
: X86::MOVUPSrm
;
375 if (IsNonTemporal
&& Alignment
>= 16 && HasSSE41
)
376 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
377 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
378 else if (Alignment
>= 16)
379 Opc
= HasVLX
? X86::VMOVAPDZ128rm
:
380 HasAVX
? X86::VMOVAPDrm
: X86::MOVAPDrm
;
382 Opc
= HasVLX
? X86::VMOVUPDZ128rm
:
383 HasAVX
? X86::VMOVUPDrm
: X86::MOVUPDrm
;
389 if (IsNonTemporal
&& Alignment
>= 16 && HasSSE41
)
390 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
391 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
392 else if (Alignment
>= 16)
393 Opc
= HasVLX
? X86::VMOVDQA64Z128rm
:
394 HasAVX
? X86::VMOVDQArm
: X86::MOVDQArm
;
396 Opc
= HasVLX
? X86::VMOVDQU64Z128rm
:
397 HasAVX
? X86::VMOVDQUrm
: X86::MOVDQUrm
;
401 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
402 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
403 else if (IsNonTemporal
&& Alignment
>= 16)
404 return false; // Force split for X86::VMOVNTDQArm
405 else if (Alignment
>= 32)
406 Opc
= HasVLX
? X86::VMOVAPSZ256rm
: X86::VMOVAPSYrm
;
408 Opc
= HasVLX
? X86::VMOVUPSZ256rm
: X86::VMOVUPSYrm
;
412 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
413 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
414 else if (IsNonTemporal
&& Alignment
>= 16)
415 return false; // Force split for X86::VMOVNTDQArm
416 else if (Alignment
>= 32)
417 Opc
= HasVLX
? X86::VMOVAPDZ256rm
: X86::VMOVAPDYrm
;
419 Opc
= HasVLX
? X86::VMOVUPDZ256rm
: X86::VMOVUPDYrm
;
426 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
427 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
428 else if (IsNonTemporal
&& Alignment
>= 16)
429 return false; // Force split for X86::VMOVNTDQArm
430 else if (Alignment
>= 32)
431 Opc
= HasVLX
? X86::VMOVDQA64Z256rm
: X86::VMOVDQAYrm
;
433 Opc
= HasVLX
? X86::VMOVDQU64Z256rm
: X86::VMOVDQUYrm
;
437 if (IsNonTemporal
&& Alignment
>= 64)
438 Opc
= X86::VMOVNTDQAZrm
;
440 Opc
= (Alignment
>= 64) ? X86::VMOVAPSZrm
: X86::VMOVUPSZrm
;
444 if (IsNonTemporal
&& Alignment
>= 64)
445 Opc
= X86::VMOVNTDQAZrm
;
447 Opc
= (Alignment
>= 64) ? X86::VMOVAPDZrm
: X86::VMOVUPDZrm
;
454 // Note: There are a lot more choices based on type with AVX-512, but
455 // there's really no advantage when the load isn't masked.
456 if (IsNonTemporal
&& Alignment
>= 64)
457 Opc
= X86::VMOVNTDQAZrm
;
459 Opc
= (Alignment
>= 64) ? X86::VMOVDQA64Zrm
: X86::VMOVDQU64Zrm
;
463 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
465 ResultReg
= createResultReg(RC
);
466 MachineInstrBuilder MIB
=
467 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), ResultReg
);
468 addFullAddress(MIB
, AM
);
470 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
474 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
475 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
476 /// and a displacement offset, or a GlobalAddress,
477 /// i.e. V. Return true if it is possible.
478 bool X86FastISel::X86FastEmitStore(EVT VT
, unsigned ValReg
, X86AddressMode
&AM
,
479 MachineMemOperand
*MMO
, bool Aligned
) {
480 bool HasSSE1
= Subtarget
->hasSSE1();
481 bool HasSSE2
= Subtarget
->hasSSE2();
482 bool HasSSE4A
= Subtarget
->hasSSE4A();
483 bool HasAVX
= Subtarget
->hasAVX();
484 bool HasAVX512
= Subtarget
->hasAVX512();
485 bool HasVLX
= Subtarget
->hasVLX();
486 bool IsNonTemporal
= MMO
&& MMO
->isNonTemporal();
488 // Get opcode and regclass of the output for the given store instruction.
490 switch (VT
.getSimpleVT().SimpleTy
) {
491 case MVT::f80
: // No f80 support yet.
492 default: return false;
494 // Mask out all but lowest bit.
495 Register AndResult
= createResultReg(&X86::GR8RegClass
);
496 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
497 TII
.get(X86::AND8ri
), AndResult
)
498 .addReg(ValReg
).addImm(1);
500 [[fallthrough
]]; // handle i1 as i8.
502 case MVT::i8
: Opc
= X86::MOV8mr
; break;
503 case MVT::i16
: Opc
= X86::MOV16mr
; break;
505 Opc
= (IsNonTemporal
&& HasSSE2
) ? X86::MOVNTImr
: X86::MOV32mr
;
508 // Must be in x86-64 mode.
509 Opc
= (IsNonTemporal
&& HasSSE2
) ? X86::MOVNTI_64mr
: X86::MOV64mr
;
513 if (IsNonTemporal
&& HasSSE4A
)
516 Opc
= HasAVX512
? X86::VMOVSSZmr
:
517 HasAVX
? X86::VMOVSSmr
: X86::MOVSSmr
;
523 if (IsNonTemporal
&& HasSSE4A
)
526 Opc
= HasAVX512
? X86::VMOVSDZmr
:
527 HasAVX
? X86::VMOVSDmr
: X86::MOVSDmr
;
532 Opc
= (IsNonTemporal
&& HasSSE1
) ? X86::MMX_MOVNTQmr
: X86::MMX_MOVQ64mr
;
537 Opc
= HasVLX
? X86::VMOVNTPSZ128mr
:
538 HasAVX
? X86::VMOVNTPSmr
: X86::MOVNTPSmr
;
540 Opc
= HasVLX
? X86::VMOVAPSZ128mr
:
541 HasAVX
? X86::VMOVAPSmr
: X86::MOVAPSmr
;
543 Opc
= HasVLX
? X86::VMOVUPSZ128mr
:
544 HasAVX
? X86::VMOVUPSmr
: X86::MOVUPSmr
;
549 Opc
= HasVLX
? X86::VMOVNTPDZ128mr
:
550 HasAVX
? X86::VMOVNTPDmr
: X86::MOVNTPDmr
;
552 Opc
= HasVLX
? X86::VMOVAPDZ128mr
:
553 HasAVX
? X86::VMOVAPDmr
: X86::MOVAPDmr
;
555 Opc
= HasVLX
? X86::VMOVUPDZ128mr
:
556 HasAVX
? X86::VMOVUPDmr
: X86::MOVUPDmr
;
564 Opc
= HasVLX
? X86::VMOVNTDQZ128mr
:
565 HasAVX
? X86::VMOVNTDQmr
: X86::MOVNTDQmr
;
567 Opc
= HasVLX
? X86::VMOVDQA64Z128mr
:
568 HasAVX
? X86::VMOVDQAmr
: X86::MOVDQAmr
;
570 Opc
= HasVLX
? X86::VMOVDQU64Z128mr
:
571 HasAVX
? X86::VMOVDQUmr
: X86::MOVDQUmr
;
577 Opc
= HasVLX
? X86::VMOVNTPSZ256mr
: X86::VMOVNTPSYmr
;
579 Opc
= HasVLX
? X86::VMOVAPSZ256mr
: X86::VMOVAPSYmr
;
581 Opc
= HasVLX
? X86::VMOVUPSZ256mr
: X86::VMOVUPSYmr
;
587 Opc
= HasVLX
? X86::VMOVNTPDZ256mr
: X86::VMOVNTPDYmr
;
589 Opc
= HasVLX
? X86::VMOVAPDZ256mr
: X86::VMOVAPDYmr
;
591 Opc
= HasVLX
? X86::VMOVUPDZ256mr
: X86::VMOVUPDYmr
;
600 Opc
= HasVLX
? X86::VMOVNTDQZ256mr
: X86::VMOVNTDQYmr
;
602 Opc
= HasVLX
? X86::VMOVDQA64Z256mr
: X86::VMOVDQAYmr
;
604 Opc
= HasVLX
? X86::VMOVDQU64Z256mr
: X86::VMOVDQUYmr
;
609 Opc
= IsNonTemporal
? X86::VMOVNTPSZmr
: X86::VMOVAPSZmr
;
611 Opc
= X86::VMOVUPSZmr
;
616 Opc
= IsNonTemporal
? X86::VMOVNTPDZmr
: X86::VMOVAPDZmr
;
618 Opc
= X86::VMOVUPDZmr
;
625 // Note: There are a lot more choices based on type with AVX-512, but
626 // there's really no advantage when the store isn't masked.
628 Opc
= IsNonTemporal
? X86::VMOVNTDQZmr
: X86::VMOVDQA64Zmr
;
630 Opc
= X86::VMOVDQU64Zmr
;
634 const MCInstrDesc
&Desc
= TII
.get(Opc
);
635 // Some of the instructions in the previous switch use FR128 instead
636 // of FR32 for ValReg. Make sure the register we feed the instruction
637 // matches its register class constraints.
638 // Note: This is fine to do a copy from FR32 to FR128, this is the
639 // same registers behind the scene and actually why it did not trigger
641 ValReg
= constrainOperandRegClass(Desc
, ValReg
, Desc
.getNumOperands() - 1);
642 MachineInstrBuilder MIB
=
643 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, Desc
);
644 addFullAddress(MIB
, AM
).addReg(ValReg
);
646 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
651 bool X86FastISel::X86FastEmitStore(EVT VT
, const Value
*Val
,
653 MachineMemOperand
*MMO
, bool Aligned
) {
654 // Handle 'null' like i32/i64 0.
655 if (isa
<ConstantPointerNull
>(Val
))
656 Val
= Constant::getNullValue(DL
.getIntPtrType(Val
->getContext()));
658 // If this is a store of a simple constant, fold the constant into the store.
659 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
662 switch (VT
.getSimpleVT().SimpleTy
) {
666 [[fallthrough
]]; // Handle as i8.
667 case MVT::i8
: Opc
= X86::MOV8mi
; break;
668 case MVT::i16
: Opc
= X86::MOV16mi
; break;
669 case MVT::i32
: Opc
= X86::MOV32mi
; break;
671 // Must be a 32-bit sign extended value.
672 if (isInt
<32>(CI
->getSExtValue()))
673 Opc
= X86::MOV64mi32
;
678 MachineInstrBuilder MIB
=
679 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
));
680 addFullAddress(MIB
, AM
).addImm(Signed
? (uint64_t) CI
->getSExtValue()
681 : CI
->getZExtValue());
683 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
688 Register ValReg
= getRegForValue(Val
);
692 return X86FastEmitStore(VT
, ValReg
, AM
, MMO
, Aligned
);
695 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
696 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
697 /// ISD::SIGN_EXTEND).
698 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
,
699 unsigned Src
, EVT SrcVT
,
700 unsigned &ResultReg
) {
701 unsigned RR
= fastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Opc
, Src
);
709 bool X86FastISel::handleConstantAddresses(const Value
*V
, X86AddressMode
&AM
) {
710 // Handle constant address.
711 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
712 // Can't handle alternate code models yet.
713 if (TM
.getCodeModel() != CodeModel::Small
&&
714 TM
.getCodeModel() != CodeModel::Medium
)
717 // Can't handle large objects yet.
718 if (TM
.isLargeGlobalValue(GV
))
721 // Can't handle TLS yet.
722 if (GV
->isThreadLocal())
725 // Can't handle !absolute_symbol references yet.
726 if (GV
->isAbsoluteSymbolRef())
729 // RIP-relative addresses can't have additional register operands, so if
730 // we've already folded stuff into the addressing mode, just force the
731 // global value into its own register, which we can use as the basereg.
732 if (!Subtarget
->isPICStyleRIPRel() ||
733 (AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0)) {
734 // Okay, we've committed to selecting this global. Set up the address.
737 // Allow the subtarget to classify the global.
738 unsigned char GVFlags
= Subtarget
->classifyGlobalReference(GV
);
740 // If this reference is relative to the pic base, set it now.
741 if (isGlobalRelativeToPICBase(GVFlags
)) {
742 // FIXME: How do we know Base.Reg is free??
743 AM
.Base
.Reg
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
746 // Unless the ABI requires an extra load, return a direct reference to
748 if (!isGlobalStubReference(GVFlags
)) {
749 if (Subtarget
->isPICStyleRIPRel()) {
750 // Use rip-relative addressing if we can. Above we verified that the
751 // base and index registers are unused.
752 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
753 AM
.Base
.Reg
= X86::RIP
;
755 AM
.GVOpFlags
= GVFlags
;
759 // Ok, we need to do a load from a stub. If we've already loaded from
760 // this stub, reuse the loaded pointer, otherwise emit the load now.
761 DenseMap
<const Value
*, Register
>::iterator I
= LocalValueMap
.find(V
);
763 if (I
!= LocalValueMap
.end() && I
->second
) {
766 // Issue load from stub.
768 const TargetRegisterClass
*RC
= nullptr;
769 X86AddressMode StubAM
;
770 StubAM
.Base
.Reg
= AM
.Base
.Reg
;
772 StubAM
.GVOpFlags
= GVFlags
;
774 // Prepare for inserting code in the local-value area.
775 SavePoint SaveInsertPt
= enterLocalValueArea();
777 if (TLI
.getPointerTy(DL
) == MVT::i64
) {
779 RC
= &X86::GR64RegClass
;
782 RC
= &X86::GR32RegClass
;
785 if (Subtarget
->isPICStyleRIPRel() || GVFlags
== X86II::MO_GOTPCREL
||
786 GVFlags
== X86II::MO_GOTPCREL_NORELAX
)
787 StubAM
.Base
.Reg
= X86::RIP
;
789 LoadReg
= createResultReg(RC
);
790 MachineInstrBuilder LoadMI
=
791 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), LoadReg
);
792 addFullAddress(LoadMI
, StubAM
);
794 // Ok, back to normal mode.
795 leaveLocalValueArea(SaveInsertPt
);
797 // Prevent loading GV stub multiple times in same MBB.
798 LocalValueMap
[V
] = LoadReg
;
801 // Now construct the final address. Note that the Disp, Scale,
802 // and Index values may already be set here.
803 AM
.Base
.Reg
= LoadReg
;
809 // If all else fails, try to materialize the value in a register.
810 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
811 if (AM
.Base
.Reg
== 0) {
812 AM
.Base
.Reg
= getRegForValue(V
);
813 return AM
.Base
.Reg
!= 0;
815 if (AM
.IndexReg
== 0) {
816 assert(AM
.Scale
== 1 && "Scale with no index!");
817 AM
.IndexReg
= getRegForValue(V
);
818 return AM
.IndexReg
!= 0;
825 /// X86SelectAddress - Attempt to fill in an address from the given value.
827 bool X86FastISel::X86SelectAddress(const Value
*V
, X86AddressMode
&AM
) {
828 SmallVector
<const Value
*, 32> GEPs
;
830 const User
*U
= nullptr;
831 unsigned Opcode
= Instruction::UserOp1
;
832 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
833 // Don't walk into other basic blocks; it's possible we haven't
834 // visited them yet, so the instructions may not yet be assigned
835 // virtual registers.
836 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(V
)) ||
837 FuncInfo
.getMBB(I
->getParent()) == FuncInfo
.MBB
) {
838 Opcode
= I
->getOpcode();
841 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
842 Opcode
= C
->getOpcode();
846 if (PointerType
*Ty
= dyn_cast
<PointerType
>(V
->getType()))
847 if (Ty
->getAddressSpace() > 255)
848 // Fast instruction selection doesn't support the special
854 case Instruction::BitCast
:
855 // Look past bitcasts.
856 return X86SelectAddress(U
->getOperand(0), AM
);
858 case Instruction::IntToPtr
:
859 // Look past no-op inttoptrs.
860 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
861 TLI
.getPointerTy(DL
))
862 return X86SelectAddress(U
->getOperand(0), AM
);
865 case Instruction::PtrToInt
:
866 // Look past no-op ptrtoints.
867 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
868 return X86SelectAddress(U
->getOperand(0), AM
);
871 case Instruction::Alloca
: {
872 // Do static allocas.
873 const AllocaInst
*A
= cast
<AllocaInst
>(V
);
874 DenseMap
<const AllocaInst
*, int>::iterator SI
=
875 FuncInfo
.StaticAllocaMap
.find(A
);
876 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
877 AM
.BaseType
= X86AddressMode::FrameIndexBase
;
878 AM
.Base
.FrameIndex
= SI
->second
;
884 case Instruction::Add
: {
885 // Adds of constants are common and easy enough.
886 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
887 uint64_t Disp
= (int32_t)AM
.Disp
+ (uint64_t)CI
->getSExtValue();
888 // They have to fit in the 32-bit signed displacement field though.
889 if (isInt
<32>(Disp
)) {
890 AM
.Disp
= (uint32_t)Disp
;
891 return X86SelectAddress(U
->getOperand(0), AM
);
897 case Instruction::GetElementPtr
: {
898 X86AddressMode SavedAM
= AM
;
900 // Pattern-match simple GEPs.
901 uint64_t Disp
= (int32_t)AM
.Disp
;
902 unsigned IndexReg
= AM
.IndexReg
;
903 unsigned Scale
= AM
.Scale
;
904 MVT PtrVT
= TLI
.getValueType(DL
, U
->getType()).getSimpleVT();
906 gep_type_iterator GTI
= gep_type_begin(U
);
907 // Iterate through the indices, folding what we can. Constants can be
908 // folded, and one dynamic index can be handled, if the scale is supported.
909 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
910 i
!= e
; ++i
, ++GTI
) {
911 const Value
*Op
= *i
;
912 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
913 const StructLayout
*SL
= DL
.getStructLayout(STy
);
914 Disp
+= SL
->getElementOffset(cast
<ConstantInt
>(Op
)->getZExtValue());
918 // A array/variable index is always of the form i*S where S is the
919 // constant scale size. See if we can push the scale into immediates.
920 uint64_t S
= GTI
.getSequentialElementStride(DL
);
922 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
923 // Constant-offset addressing.
924 Disp
+= CI
->getSExtValue() * S
;
927 if (canFoldAddIntoGEP(U
, Op
)) {
928 // A compatible add with a constant operand. Fold the constant.
930 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
931 Disp
+= CI
->getSExtValue() * S
;
932 // Iterate on the other operand.
933 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
937 (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) &&
938 (S
== 1 || S
== 2 || S
== 4 || S
== 8)) {
939 // Scaled-index addressing.
941 IndexReg
= getRegForGEPIndex(PtrVT
, Op
);
947 goto unsupported_gep
;
951 // Check for displacement overflow.
952 if (!isInt
<32>(Disp
))
955 AM
.IndexReg
= IndexReg
;
957 AM
.Disp
= (uint32_t)Disp
;
960 if (const GetElementPtrInst
*GEP
=
961 dyn_cast
<GetElementPtrInst
>(U
->getOperand(0))) {
962 // Ok, the GEP indices were covered by constant-offset and scaled-index
963 // addressing. Update the address state and move on to examining the base.
966 } else if (X86SelectAddress(U
->getOperand(0), AM
)) {
970 // If we couldn't merge the gep value into this addr mode, revert back to
971 // our address and just match the value instead of completely failing.
974 for (const Value
*I
: reverse(GEPs
))
975 if (handleConstantAddresses(I
, AM
))
980 // Ok, the GEP indices weren't all covered.
985 return handleConstantAddresses(V
, AM
);
988 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
990 bool X86FastISel::X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
) {
991 const User
*U
= nullptr;
992 unsigned Opcode
= Instruction::UserOp1
;
993 const Instruction
*I
= dyn_cast
<Instruction
>(V
);
994 // Record if the value is defined in the same basic block.
996 // This information is crucial to know whether or not folding an
998 // Indeed, FastISel generates or reuses a virtual register for all
999 // operands of all instructions it selects. Obviously, the definition and
1000 // its uses must use the same virtual register otherwise the produced
1001 // code is incorrect.
1002 // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1003 // registers for values that are alive across basic blocks. This ensures
1004 // that the values are consistently set between across basic block, even
1005 // if different instruction selection mechanisms are used (e.g., a mix of
1006 // SDISel and FastISel).
1007 // For values local to a basic block, the instruction selection process
1008 // generates these virtual registers with whatever method is appropriate
1009 // for its needs. In particular, FastISel and SDISel do not share the way
1010 // local virtual registers are set.
1011 // Therefore, this is impossible (or at least unsafe) to share values
1012 // between basic blocks unless they use the same instruction selection
1013 // method, which is not guarantee for X86.
1014 // Moreover, things like hasOneUse could not be used accurately, if we
1015 // allow to reference values across basic blocks whereas they are not
1016 // alive across basic blocks initially.
1019 Opcode
= I
->getOpcode();
1021 InMBB
= I
->getParent() == FuncInfo
.MBB
->getBasicBlock();
1022 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
1023 Opcode
= C
->getOpcode();
1029 case Instruction::BitCast
:
1030 // Look past bitcasts if its operand is in the same BB.
1032 return X86SelectCallAddress(U
->getOperand(0), AM
);
1035 case Instruction::IntToPtr
:
1036 // Look past no-op inttoptrs if its operand is in the same BB.
1038 TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
1039 TLI
.getPointerTy(DL
))
1040 return X86SelectCallAddress(U
->getOperand(0), AM
);
1043 case Instruction::PtrToInt
:
1044 // Look past no-op ptrtoints if its operand is in the same BB.
1045 if (InMBB
&& TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
1046 return X86SelectCallAddress(U
->getOperand(0), AM
);
1050 // Handle constant address.
1051 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
1052 // Can't handle alternate code models yet.
1053 if (TM
.getCodeModel() != CodeModel::Small
&&
1054 TM
.getCodeModel() != CodeModel::Medium
)
1057 // RIP-relative addresses can't have additional register operands.
1058 if (Subtarget
->isPICStyleRIPRel() &&
1059 (AM
.Base
.Reg
!= 0 || AM
.IndexReg
!= 0))
1062 // Can't handle TLS.
1063 if (const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
))
1064 if (GVar
->isThreadLocal())
1067 // Okay, we've committed to selecting this global. Set up the basic address.
1070 // Return a direct reference to the global. Fastisel can handle calls to
1071 // functions that require loads, such as dllimport and nonlazybind
1073 if (Subtarget
->isPICStyleRIPRel()) {
1074 // Use rip-relative addressing if we can. Above we verified that the
1075 // base and index registers are unused.
1076 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
1077 AM
.Base
.Reg
= X86::RIP
;
1079 AM
.GVOpFlags
= Subtarget
->classifyLocalReference(nullptr);
1085 // If all else fails, try to materialize the value in a register.
1086 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
1087 auto GetCallRegForValue
= [this](const Value
*V
) {
1088 Register Reg
= getRegForValue(V
);
1090 // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1091 if (Reg
&& Subtarget
->isTarget64BitILP32()) {
1092 Register CopyReg
= createResultReg(&X86::GR32RegClass
);
1093 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV32rr
),
1097 Register ExtReg
= createResultReg(&X86::GR64RegClass
);
1098 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1099 TII
.get(TargetOpcode::SUBREG_TO_REG
), ExtReg
)
1102 .addImm(X86::sub_32bit
);
1109 if (AM
.Base
.Reg
== 0) {
1110 AM
.Base
.Reg
= GetCallRegForValue(V
);
1111 return AM
.Base
.Reg
!= 0;
1113 if (AM
.IndexReg
== 0) {
1114 assert(AM
.Scale
== 1 && "Scale with no index!");
1115 AM
.IndexReg
= GetCallRegForValue(V
);
1116 return AM
.IndexReg
!= 0;
1124 /// X86SelectStore - Select and emit code to implement store instructions.
1125 bool X86FastISel::X86SelectStore(const Instruction
*I
) {
1126 // Atomic stores need special handling.
1127 const StoreInst
*S
= cast
<StoreInst
>(I
);
1132 const Value
*PtrV
= I
->getOperand(1);
1133 if (TLI
.supportSwiftError()) {
1134 // Swifterror values can come from either a function parameter with
1135 // swifterror attribute or an alloca with swifterror attribute.
1136 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1137 if (Arg
->hasSwiftErrorAttr())
1141 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1142 if (Alloca
->isSwiftError())
1147 const Value
*Val
= S
->getValueOperand();
1148 const Value
*Ptr
= S
->getPointerOperand();
1151 if (!isTypeLegal(Val
->getType(), VT
, /*AllowI1=*/true))
1154 Align Alignment
= S
->getAlign();
1155 Align ABIAlignment
= DL
.getABITypeAlign(Val
->getType());
1156 bool Aligned
= Alignment
>= ABIAlignment
;
1159 if (!X86SelectAddress(Ptr
, AM
))
1162 return X86FastEmitStore(VT
, Val
, AM
, createMachineMemOperandFor(I
), Aligned
);
1165 /// X86SelectRet - Select and emit code to implement ret instructions.
1166 bool X86FastISel::X86SelectRet(const Instruction
*I
) {
1167 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
1168 const Function
&F
= *I
->getParent()->getParent();
1169 const X86MachineFunctionInfo
*X86MFInfo
=
1170 FuncInfo
.MF
->getInfo
<X86MachineFunctionInfo
>();
1172 if (!FuncInfo
.CanLowerReturn
)
1175 if (TLI
.supportSwiftError() &&
1176 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
1179 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
1182 CallingConv::ID CC
= F
.getCallingConv();
1183 if (CC
!= CallingConv::C
&&
1184 CC
!= CallingConv::Fast
&&
1185 CC
!= CallingConv::Tail
&&
1186 CC
!= CallingConv::SwiftTail
&&
1187 CC
!= CallingConv::X86_FastCall
&&
1188 CC
!= CallingConv::X86_StdCall
&&
1189 CC
!= CallingConv::X86_ThisCall
&&
1190 CC
!= CallingConv::X86_64_SysV
&&
1191 CC
!= CallingConv::Win64
)
1194 // Don't handle popping bytes if they don't fit the ret's immediate.
1195 if (!isUInt
<16>(X86MFInfo
->getBytesToPopOnReturn()))
1198 // fastcc with -tailcallopt is intended to provide a guaranteed
1199 // tail call optimization. Fastisel doesn't know how to do that.
1200 if ((CC
== CallingConv::Fast
&& TM
.Options
.GuaranteedTailCallOpt
) ||
1201 CC
== CallingConv::Tail
|| CC
== CallingConv::SwiftTail
)
1204 // Let SDISel handle vararg functions.
1208 // Build a list of return value registers.
1209 SmallVector
<unsigned, 4> RetRegs
;
1211 if (Ret
->getNumOperands() > 0) {
1212 SmallVector
<ISD::OutputArg
, 4> Outs
;
1213 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
1215 // Analyze operands of the call, assigning locations to each operand.
1216 SmallVector
<CCValAssign
, 16> ValLocs
;
1217 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
1218 CCInfo
.AnalyzeReturn(Outs
, RetCC_X86
);
1220 const Value
*RV
= Ret
->getOperand(0);
1221 Register Reg
= getRegForValue(RV
);
1225 // Only handle a single return value for now.
1226 if (ValLocs
.size() != 1)
1229 CCValAssign
&VA
= ValLocs
[0];
1231 // Don't bother handling odd stuff for now.
1232 if (VA
.getLocInfo() != CCValAssign::Full
)
1234 // Only handle register returns for now.
1238 // The calling-convention tables for x87 returns don't tell
1240 if (VA
.getLocReg() == X86::FP0
|| VA
.getLocReg() == X86::FP1
)
1243 unsigned SrcReg
= Reg
+ VA
.getValNo();
1244 EVT SrcVT
= TLI
.getValueType(DL
, RV
->getType());
1245 EVT DstVT
= VA
.getValVT();
1246 // Special handling for extended integers.
1247 if (SrcVT
!= DstVT
) {
1248 if (SrcVT
!= MVT::i1
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i16
)
1251 if (!Outs
[0].Flags
.isZExt() && !Outs
[0].Flags
.isSExt())
1254 if (SrcVT
== MVT::i1
) {
1255 if (Outs
[0].Flags
.isSExt())
1257 SrcReg
= fastEmitZExtFromI1(MVT::i8
, SrcReg
);
1260 if (SrcVT
!= DstVT
) {
1262 Outs
[0].Flags
.isZExt() ? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND
;
1264 fastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Op
, SrcReg
);
1269 Register DstReg
= VA
.getLocReg();
1270 const TargetRegisterClass
*SrcRC
= MRI
.getRegClass(SrcReg
);
1271 // Avoid a cross-class copy. This is very unlikely.
1272 if (!SrcRC
->contains(DstReg
))
1274 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1275 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
1277 // Add register to return instruction.
1278 RetRegs
.push_back(VA
.getLocReg());
1281 // Swift calling convention does not require we copy the sret argument
1282 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1284 // All x86 ABIs require that for returning structs by value we copy
1285 // the sret argument into %rax/%eax (depending on ABI) for the return.
1286 // We saved the argument into a virtual register in the entry block,
1287 // so now we copy the value out and into %rax/%eax.
1288 if (F
.hasStructRetAttr() && CC
!= CallingConv::Swift
&&
1289 CC
!= CallingConv::SwiftTail
) {
1290 Register Reg
= X86MFInfo
->getSRetReturnReg();
1292 "SRetReturnReg should have been set in LowerFormalArguments()!");
1293 unsigned RetReg
= Subtarget
->isTarget64BitLP64() ? X86::RAX
: X86::EAX
;
1294 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1295 TII
.get(TargetOpcode::COPY
), RetReg
).addReg(Reg
);
1296 RetRegs
.push_back(RetReg
);
1299 // Now emit the RET.
1300 MachineInstrBuilder MIB
;
1301 if (X86MFInfo
->getBytesToPopOnReturn()) {
1302 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1303 TII
.get(Subtarget
->is64Bit() ? X86::RETI64
: X86::RETI32
))
1304 .addImm(X86MFInfo
->getBytesToPopOnReturn());
1306 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1307 TII
.get(Subtarget
->is64Bit() ? X86::RET64
: X86::RET32
));
1309 for (unsigned Reg
: RetRegs
)
1310 MIB
.addReg(Reg
, RegState::Implicit
);
1314 /// X86SelectLoad - Select and emit code to implement load instructions.
1316 bool X86FastISel::X86SelectLoad(const Instruction
*I
) {
1317 const LoadInst
*LI
= cast
<LoadInst
>(I
);
1319 // Atomic loads need special handling.
1323 const Value
*SV
= I
->getOperand(0);
1324 if (TLI
.supportSwiftError()) {
1325 // Swifterror values can come from either a function parameter with
1326 // swifterror attribute or an alloca with swifterror attribute.
1327 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1328 if (Arg
->hasSwiftErrorAttr())
1332 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1333 if (Alloca
->isSwiftError())
1339 if (!isTypeLegal(LI
->getType(), VT
, /*AllowI1=*/true))
1342 const Value
*Ptr
= LI
->getPointerOperand();
1345 if (!X86SelectAddress(Ptr
, AM
))
1348 unsigned ResultReg
= 0;
1349 if (!X86FastEmitLoad(VT
, AM
, createMachineMemOperandFor(LI
), ResultReg
,
1350 LI
->getAlign().value()))
1353 updateValueMap(I
, ResultReg
);
1357 static unsigned X86ChooseCmpOpcode(EVT VT
, const X86Subtarget
*Subtarget
) {
1358 bool HasAVX512
= Subtarget
->hasAVX512();
1359 bool HasAVX
= Subtarget
->hasAVX();
1360 bool HasSSE1
= Subtarget
->hasSSE1();
1361 bool HasSSE2
= Subtarget
->hasSSE2();
1363 switch (VT
.getSimpleVT().SimpleTy
) {
1365 case MVT::i8
: return X86::CMP8rr
;
1366 case MVT::i16
: return X86::CMP16rr
;
1367 case MVT::i32
: return X86::CMP32rr
;
1368 case MVT::i64
: return X86::CMP64rr
;
1370 return HasAVX512
? X86::VUCOMISSZrr
1371 : HasAVX
? X86::VUCOMISSrr
1372 : HasSSE1
? X86::UCOMISSrr
1375 return HasAVX512
? X86::VUCOMISDZrr
1376 : HasAVX
? X86::VUCOMISDrr
1377 : HasSSE2
? X86::UCOMISDrr
1382 /// If we have a comparison with RHS as the RHS of the comparison, return an
1383 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1384 static unsigned X86ChooseCmpImmediateOpcode(EVT VT
, const ConstantInt
*RHSC
) {
1385 switch (VT
.getSimpleVT().SimpleTy
) {
1386 // Otherwise, we can't fold the immediate into this comparison.
1392 return X86::CMP16ri
;
1394 return X86::CMP32ri
;
1396 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1398 return isInt
<32>(RHSC
->getSExtValue()) ? X86::CMP64ri32
: 0;
1402 bool X86FastISel::X86FastEmitCompare(const Value
*Op0
, const Value
*Op1
, EVT VT
,
1403 const DebugLoc
&CurMIMD
) {
1404 Register Op0Reg
= getRegForValue(Op0
);
1405 if (Op0Reg
== 0) return false;
1407 // Handle 'null' like i32/i64 0.
1408 if (isa
<ConstantPointerNull
>(Op1
))
1409 Op1
= Constant::getNullValue(DL
.getIntPtrType(Op0
->getContext()));
1411 // We have two options: compare with register or immediate. If the RHS of
1412 // the compare is an immediate that we can fold into this compare, use
1413 // CMPri, otherwise use CMPrr.
1414 if (const ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
1415 if (unsigned CompareImmOpc
= X86ChooseCmpImmediateOpcode(VT
, Op1C
)) {
1416 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, CurMIMD
, TII
.get(CompareImmOpc
))
1418 .addImm(Op1C
->getSExtValue());
1423 unsigned CompareOpc
= X86ChooseCmpOpcode(VT
, Subtarget
);
1424 if (CompareOpc
== 0) return false;
1426 Register Op1Reg
= getRegForValue(Op1
);
1427 if (Op1Reg
== 0) return false;
1428 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, CurMIMD
, TII
.get(CompareOpc
))
1435 bool X86FastISel::X86SelectCmp(const Instruction
*I
) {
1436 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1439 if (!isTypeLegal(I
->getOperand(0)->getType(), VT
))
1442 // Below code only works for scalars.
1446 // Try to optimize or fold the cmp.
1447 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
1448 unsigned ResultReg
= 0;
1449 switch (Predicate
) {
1451 case CmpInst::FCMP_FALSE
: {
1452 ResultReg
= createResultReg(&X86::GR32RegClass
);
1453 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV32r0
),
1455 ResultReg
= fastEmitInst_extractsubreg(MVT::i8
, ResultReg
, X86::sub_8bit
);
1460 case CmpInst::FCMP_TRUE
: {
1461 ResultReg
= createResultReg(&X86::GR8RegClass
);
1462 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV8ri
),
1463 ResultReg
).addImm(1);
1469 updateValueMap(I
, ResultReg
);
1473 const Value
*LHS
= CI
->getOperand(0);
1474 const Value
*RHS
= CI
->getOperand(1);
1476 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1477 // We don't have to materialize a zero constant for this case and can just use
1478 // %x again on the RHS.
1479 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
1480 const auto *RHSC
= dyn_cast
<ConstantFP
>(RHS
);
1481 if (RHSC
&& RHSC
->isNullValue())
1485 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1486 static const uint16_t SETFOpcTable
[2][3] = {
1487 { X86::COND_E
, X86::COND_NP
, X86::AND8rr
},
1488 { X86::COND_NE
, X86::COND_P
, X86::OR8rr
}
1490 const uint16_t *SETFOpc
= nullptr;
1491 switch (Predicate
) {
1493 case CmpInst::FCMP_OEQ
: SETFOpc
= &SETFOpcTable
[0][0]; break;
1494 case CmpInst::FCMP_UNE
: SETFOpc
= &SETFOpcTable
[1][0]; break;
1497 ResultReg
= createResultReg(&X86::GR8RegClass
);
1499 if (!X86FastEmitCompare(LHS
, RHS
, VT
, I
->getDebugLoc()))
1502 Register FlagReg1
= createResultReg(&X86::GR8RegClass
);
1503 Register FlagReg2
= createResultReg(&X86::GR8RegClass
);
1504 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
1505 FlagReg1
).addImm(SETFOpc
[0]);
1506 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
1507 FlagReg2
).addImm(SETFOpc
[1]);
1508 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(SETFOpc
[2]),
1509 ResultReg
).addReg(FlagReg1
).addReg(FlagReg2
);
1510 updateValueMap(I
, ResultReg
);
1516 std::tie(CC
, SwapArgs
) = X86::getX86ConditionCode(Predicate
);
1517 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
1520 std::swap(LHS
, RHS
);
1522 // Emit a compare of LHS/RHS.
1523 if (!X86FastEmitCompare(LHS
, RHS
, VT
, I
->getDebugLoc()))
1526 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
1527 ResultReg
).addImm(CC
);
1528 updateValueMap(I
, ResultReg
);
1532 bool X86FastISel::X86SelectZExt(const Instruction
*I
) {
1533 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
1534 if (!TLI
.isTypeLegal(DstVT
))
1537 Register ResultReg
= getRegForValue(I
->getOperand(0));
1541 // Handle zero-extension from i1 to i8, which is common.
1542 MVT SrcVT
= TLI
.getSimpleValueType(DL
, I
->getOperand(0)->getType());
1543 if (SrcVT
== MVT::i1
) {
1544 // Set the high bits to zero.
1545 ResultReg
= fastEmitZExtFromI1(MVT::i8
, ResultReg
);
1552 if (DstVT
== MVT::i64
) {
1553 // Handle extension to 64-bits via sub-register shenanigans.
1556 switch (SrcVT
.SimpleTy
) {
1557 case MVT::i8
: MovInst
= X86::MOVZX32rr8
; break;
1558 case MVT::i16
: MovInst
= X86::MOVZX32rr16
; break;
1559 case MVT::i32
: MovInst
= X86::MOV32rr
; break;
1560 default: llvm_unreachable("Unexpected zext to i64 source type");
1563 Register Result32
= createResultReg(&X86::GR32RegClass
);
1564 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(MovInst
), Result32
)
1567 ResultReg
= createResultReg(&X86::GR64RegClass
);
1568 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TargetOpcode::SUBREG_TO_REG
),
1570 .addImm(0).addReg(Result32
).addImm(X86::sub_32bit
);
1571 } else if (DstVT
== MVT::i16
) {
1572 // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1573 // extend to 32-bits and then extract down to 16-bits.
1574 Register Result32
= createResultReg(&X86::GR32RegClass
);
1575 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOVZX32rr8
),
1576 Result32
).addReg(ResultReg
);
1578 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, Result32
, X86::sub_16bit
);
1579 } else if (DstVT
!= MVT::i8
) {
1580 ResultReg
= fastEmit_r(MVT::i8
, DstVT
.getSimpleVT(), ISD::ZERO_EXTEND
,
1586 updateValueMap(I
, ResultReg
);
1590 bool X86FastISel::X86SelectSExt(const Instruction
*I
) {
1591 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
1592 if (!TLI
.isTypeLegal(DstVT
))
1595 Register ResultReg
= getRegForValue(I
->getOperand(0));
1599 // Handle sign-extension from i1 to i8.
1600 MVT SrcVT
= TLI
.getSimpleValueType(DL
, I
->getOperand(0)->getType());
1601 if (SrcVT
== MVT::i1
) {
1602 // Set the high bits to zero.
1603 Register ZExtReg
= fastEmitZExtFromI1(MVT::i8
, ResultReg
);
1607 // Negate the result to make an 8-bit sign extended value.
1608 ResultReg
= createResultReg(&X86::GR8RegClass
);
1609 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::NEG8r
),
1610 ResultReg
).addReg(ZExtReg
);
1615 if (DstVT
== MVT::i16
) {
1616 // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1617 // extend to 32-bits and then extract down to 16-bits.
1618 Register Result32
= createResultReg(&X86::GR32RegClass
);
1619 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOVSX32rr8
),
1620 Result32
).addReg(ResultReg
);
1622 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, Result32
, X86::sub_16bit
);
1623 } else if (DstVT
!= MVT::i8
) {
1624 ResultReg
= fastEmit_r(MVT::i8
, DstVT
.getSimpleVT(), ISD::SIGN_EXTEND
,
1630 updateValueMap(I
, ResultReg
);
1634 bool X86FastISel::X86SelectBranch(const Instruction
*I
) {
1635 // Unconditional branches are selected by tablegen-generated code.
1636 // Handle a conditional branch.
1637 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1638 MachineBasicBlock
*TrueMBB
= FuncInfo
.getMBB(BI
->getSuccessor(0));
1639 MachineBasicBlock
*FalseMBB
= FuncInfo
.getMBB(BI
->getSuccessor(1));
1641 // Fold the common case of a conditional branch with a comparison
1642 // in the same block (values defined on other blocks may not have
1643 // initialized registers).
1645 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1646 if (CI
->hasOneUse() && CI
->getParent() == I
->getParent()) {
1647 EVT VT
= TLI
.getValueType(DL
, CI
->getOperand(0)->getType());
1649 // Try to optimize or fold the cmp.
1650 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
1651 switch (Predicate
) {
1653 case CmpInst::FCMP_FALSE
: fastEmitBranch(FalseMBB
, MIMD
.getDL()); return true;
1654 case CmpInst::FCMP_TRUE
: fastEmitBranch(TrueMBB
, MIMD
.getDL()); return true;
1657 const Value
*CmpLHS
= CI
->getOperand(0);
1658 const Value
*CmpRHS
= CI
->getOperand(1);
1660 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1662 // We don't have to materialize a zero constant for this case and can just
1663 // use %x again on the RHS.
1664 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
1665 const auto *CmpRHSC
= dyn_cast
<ConstantFP
>(CmpRHS
);
1666 if (CmpRHSC
&& CmpRHSC
->isNullValue())
1670 // Try to take advantage of fallthrough opportunities.
1671 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
1672 std::swap(TrueMBB
, FalseMBB
);
1673 Predicate
= CmpInst::getInversePredicate(Predicate
);
1676 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1677 // code check. Instead two branch instructions are required to check all
1678 // the flags. First we change the predicate to a supported condition code,
1679 // which will be the first branch. Later one we will emit the second
1681 bool NeedExtraBranch
= false;
1682 switch (Predicate
) {
1684 case CmpInst::FCMP_OEQ
:
1685 std::swap(TrueMBB
, FalseMBB
);
1687 case CmpInst::FCMP_UNE
:
1688 NeedExtraBranch
= true;
1689 Predicate
= CmpInst::FCMP_ONE
;
1694 std::tie(CC
, SwapArgs
) = X86::getX86ConditionCode(Predicate
);
1695 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
1698 std::swap(CmpLHS
, CmpRHS
);
1700 // Emit a compare of the LHS and RHS, setting the flags.
1701 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, VT
, CI
->getDebugLoc()))
1704 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::JCC_1
))
1705 .addMBB(TrueMBB
).addImm(CC
);
1707 // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1709 if (NeedExtraBranch
) {
1710 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::JCC_1
))
1711 .addMBB(TrueMBB
).addImm(X86::COND_P
);
1714 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1717 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1718 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1719 // typically happen for _Bool and C++ bools.
1721 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1722 isTypeLegal(TI
->getOperand(0)->getType(), SourceVT
)) {
1723 unsigned TestOpc
= 0;
1724 switch (SourceVT
.SimpleTy
) {
1726 case MVT::i8
: TestOpc
= X86::TEST8ri
; break;
1727 case MVT::i16
: TestOpc
= X86::TEST16ri
; break;
1728 case MVT::i32
: TestOpc
= X86::TEST32ri
; break;
1729 case MVT::i64
: TestOpc
= X86::TEST64ri32
; break;
1732 Register OpReg
= getRegForValue(TI
->getOperand(0));
1733 if (OpReg
== 0) return false;
1735 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TestOpc
))
1736 .addReg(OpReg
).addImm(1);
1738 unsigned JmpCond
= X86::COND_NE
;
1739 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
1740 std::swap(TrueMBB
, FalseMBB
);
1741 JmpCond
= X86::COND_E
;
1744 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::JCC_1
))
1745 .addMBB(TrueMBB
).addImm(JmpCond
);
1747 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1751 } else if (foldX86XALUIntrinsic(CC
, BI
, BI
->getCondition())) {
1752 // Fake request the condition, otherwise the intrinsic might be completely
1754 Register TmpReg
= getRegForValue(BI
->getCondition());
1758 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::JCC_1
))
1759 .addMBB(TrueMBB
).addImm(CC
);
1760 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1764 // Otherwise do a clumsy setcc and re-test it.
1765 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1766 // in an explicit cast, so make sure to handle that correctly.
1767 Register OpReg
= getRegForValue(BI
->getCondition());
1768 if (OpReg
== 0) return false;
1770 // In case OpReg is a K register, COPY to a GPR
1771 if (MRI
.getRegClass(OpReg
) == &X86::VK1RegClass
) {
1772 unsigned KOpReg
= OpReg
;
1773 OpReg
= createResultReg(&X86::GR32RegClass
);
1774 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1775 TII
.get(TargetOpcode::COPY
), OpReg
)
1777 OpReg
= fastEmitInst_extractsubreg(MVT::i8
, OpReg
, X86::sub_8bit
);
1779 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::TEST8ri
))
1782 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::JCC_1
))
1783 .addMBB(TrueMBB
).addImm(X86::COND_NE
);
1784 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1788 bool X86FastISel::X86SelectShift(const Instruction
*I
) {
1789 unsigned CReg
= 0, OpReg
= 0;
1790 const TargetRegisterClass
*RC
= nullptr;
1791 if (I
->getType()->isIntegerTy(8)) {
1793 RC
= &X86::GR8RegClass
;
1794 switch (I
->getOpcode()) {
1795 case Instruction::LShr
: OpReg
= X86::SHR8rCL
; break;
1796 case Instruction::AShr
: OpReg
= X86::SAR8rCL
; break;
1797 case Instruction::Shl
: OpReg
= X86::SHL8rCL
; break;
1798 default: return false;
1800 } else if (I
->getType()->isIntegerTy(16)) {
1802 RC
= &X86::GR16RegClass
;
1803 switch (I
->getOpcode()) {
1804 default: llvm_unreachable("Unexpected shift opcode");
1805 case Instruction::LShr
: OpReg
= X86::SHR16rCL
; break;
1806 case Instruction::AShr
: OpReg
= X86::SAR16rCL
; break;
1807 case Instruction::Shl
: OpReg
= X86::SHL16rCL
; break;
1809 } else if (I
->getType()->isIntegerTy(32)) {
1811 RC
= &X86::GR32RegClass
;
1812 switch (I
->getOpcode()) {
1813 default: llvm_unreachable("Unexpected shift opcode");
1814 case Instruction::LShr
: OpReg
= X86::SHR32rCL
; break;
1815 case Instruction::AShr
: OpReg
= X86::SAR32rCL
; break;
1816 case Instruction::Shl
: OpReg
= X86::SHL32rCL
; break;
1818 } else if (I
->getType()->isIntegerTy(64)) {
1820 RC
= &X86::GR64RegClass
;
1821 switch (I
->getOpcode()) {
1822 default: llvm_unreachable("Unexpected shift opcode");
1823 case Instruction::LShr
: OpReg
= X86::SHR64rCL
; break;
1824 case Instruction::AShr
: OpReg
= X86::SAR64rCL
; break;
1825 case Instruction::Shl
: OpReg
= X86::SHL64rCL
; break;
1832 if (!isTypeLegal(I
->getType(), VT
))
1835 Register Op0Reg
= getRegForValue(I
->getOperand(0));
1836 if (Op0Reg
== 0) return false;
1838 Register Op1Reg
= getRegForValue(I
->getOperand(1));
1839 if (Op1Reg
== 0) return false;
1840 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TargetOpcode::COPY
),
1841 CReg
).addReg(Op1Reg
);
1843 // The shift instruction uses X86::CL. If we defined a super-register
1844 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1845 if (CReg
!= X86::CL
)
1846 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1847 TII
.get(TargetOpcode::KILL
), X86::CL
)
1848 .addReg(CReg
, RegState::Kill
);
1850 Register ResultReg
= createResultReg(RC
);
1851 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(OpReg
), ResultReg
)
1853 updateValueMap(I
, ResultReg
);
1857 bool X86FastISel::X86SelectDivRem(const Instruction
*I
) {
1858 const static unsigned NumTypes
= 4; // i8, i16, i32, i64
1859 const static unsigned NumOps
= 4; // SDiv, SRem, UDiv, URem
1860 const static bool S
= true; // IsSigned
1861 const static bool U
= false; // !IsSigned
1862 const static unsigned Copy
= TargetOpcode::COPY
;
1863 // For the X86 DIV/IDIV instruction, in most cases the dividend
1864 // (numerator) must be in a specific register pair highreg:lowreg,
1865 // producing the quotient in lowreg and the remainder in highreg.
1866 // For most data types, to set up the instruction, the dividend is
1867 // copied into lowreg, and lowreg is sign-extended or zero-extended
1868 // into highreg. The exception is i8, where the dividend is defined
1869 // as a single register rather than a register pair, and we
1870 // therefore directly sign-extend or zero-extend the dividend into
1871 // lowreg, instead of copying, and ignore the highreg.
1872 const static struct DivRemEntry
{
1873 // The following portion depends only on the data type.
1874 const TargetRegisterClass
*RC
;
1875 unsigned LowInReg
; // low part of the register pair
1876 unsigned HighInReg
; // high part of the register pair
1877 // The following portion depends on both the data type and the operation.
1878 struct DivRemResult
{
1879 unsigned OpDivRem
; // The specific DIV/IDIV opcode to use.
1880 unsigned OpSignExtend
; // Opcode for sign-extending lowreg into
1881 // highreg, or copying a zero into highreg.
1882 unsigned OpCopy
; // Opcode for copying dividend into lowreg, or
1883 // zero/sign-extending into lowreg for i8.
1884 unsigned DivRemResultReg
; // Register containing the desired result.
1885 bool IsOpSigned
; // Whether to use signed or unsigned form.
1886 } ResultTable
[NumOps
];
1887 } OpTable
[NumTypes
] = {
1888 { &X86::GR8RegClass
, X86::AX
, 0, {
1889 { X86::IDIV8r
, 0, X86::MOVSX16rr8
, X86::AL
, S
}, // SDiv
1890 { X86::IDIV8r
, 0, X86::MOVSX16rr8
, X86::AH
, S
}, // SRem
1891 { X86::DIV8r
, 0, X86::MOVZX16rr8
, X86::AL
, U
}, // UDiv
1892 { X86::DIV8r
, 0, X86::MOVZX16rr8
, X86::AH
, U
}, // URem
1895 { &X86::GR16RegClass
, X86::AX
, X86::DX
, {
1896 { X86::IDIV16r
, X86::CWD
, Copy
, X86::AX
, S
}, // SDiv
1897 { X86::IDIV16r
, X86::CWD
, Copy
, X86::DX
, S
}, // SRem
1898 { X86::DIV16r
, X86::MOV32r0
, Copy
, X86::AX
, U
}, // UDiv
1899 { X86::DIV16r
, X86::MOV32r0
, Copy
, X86::DX
, U
}, // URem
1902 { &X86::GR32RegClass
, X86::EAX
, X86::EDX
, {
1903 { X86::IDIV32r
, X86::CDQ
, Copy
, X86::EAX
, S
}, // SDiv
1904 { X86::IDIV32r
, X86::CDQ
, Copy
, X86::EDX
, S
}, // SRem
1905 { X86::DIV32r
, X86::MOV32r0
, Copy
, X86::EAX
, U
}, // UDiv
1906 { X86::DIV32r
, X86::MOV32r0
, Copy
, X86::EDX
, U
}, // URem
1909 { &X86::GR64RegClass
, X86::RAX
, X86::RDX
, {
1910 { X86::IDIV64r
, X86::CQO
, Copy
, X86::RAX
, S
}, // SDiv
1911 { X86::IDIV64r
, X86::CQO
, Copy
, X86::RDX
, S
}, // SRem
1912 { X86::DIV64r
, X86::MOV32r0
, Copy
, X86::RAX
, U
}, // UDiv
1913 { X86::DIV64r
, X86::MOV32r0
, Copy
, X86::RDX
, U
}, // URem
1919 if (!isTypeLegal(I
->getType(), VT
))
1922 unsigned TypeIndex
, OpIndex
;
1923 switch (VT
.SimpleTy
) {
1924 default: return false;
1925 case MVT::i8
: TypeIndex
= 0; break;
1926 case MVT::i16
: TypeIndex
= 1; break;
1927 case MVT::i32
: TypeIndex
= 2; break;
1928 case MVT::i64
: TypeIndex
= 3;
1929 if (!Subtarget
->is64Bit())
1934 switch (I
->getOpcode()) {
1935 default: llvm_unreachable("Unexpected div/rem opcode");
1936 case Instruction::SDiv
: OpIndex
= 0; break;
1937 case Instruction::SRem
: OpIndex
= 1; break;
1938 case Instruction::UDiv
: OpIndex
= 2; break;
1939 case Instruction::URem
: OpIndex
= 3; break;
1942 const DivRemEntry
&TypeEntry
= OpTable
[TypeIndex
];
1943 const DivRemEntry::DivRemResult
&OpEntry
= TypeEntry
.ResultTable
[OpIndex
];
1944 Register Op0Reg
= getRegForValue(I
->getOperand(0));
1947 Register Op1Reg
= getRegForValue(I
->getOperand(1));
1951 // Move op0 into low-order input register.
1952 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1953 TII
.get(OpEntry
.OpCopy
), TypeEntry
.LowInReg
).addReg(Op0Reg
);
1954 // Zero-extend or sign-extend into high-order input register.
1955 if (OpEntry
.OpSignExtend
) {
1956 if (OpEntry
.IsOpSigned
)
1957 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1958 TII
.get(OpEntry
.OpSignExtend
));
1960 Register Zero32
= createResultReg(&X86::GR32RegClass
);
1961 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1962 TII
.get(X86::MOV32r0
), Zero32
);
1964 // Copy the zero into the appropriate sub/super/identical physical
1965 // register. Unfortunately the operations needed are not uniform enough
1966 // to fit neatly into the table above.
1967 if (VT
== MVT::i16
) {
1968 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1969 TII
.get(Copy
), TypeEntry
.HighInReg
)
1970 .addReg(Zero32
, 0, X86::sub_16bit
);
1971 } else if (VT
== MVT::i32
) {
1972 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1973 TII
.get(Copy
), TypeEntry
.HighInReg
)
1975 } else if (VT
== MVT::i64
) {
1976 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1977 TII
.get(TargetOpcode::SUBREG_TO_REG
), TypeEntry
.HighInReg
)
1978 .addImm(0).addReg(Zero32
).addImm(X86::sub_32bit
);
1982 // Generate the DIV/IDIV instruction.
1983 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1984 TII
.get(OpEntry
.OpDivRem
)).addReg(Op1Reg
);
1985 // For i8 remainder, we can't reference ah directly, as we'll end
1986 // up with bogus copies like %r9b = COPY %ah. Reference ax
1987 // instead to prevent ah references in a rex instruction.
1989 // The current assumption of the fast register allocator is that isel
1990 // won't generate explicit references to the GR8_NOREX registers. If
1991 // the allocator and/or the backend get enhanced to be more robust in
1992 // that regard, this can be, and should be, removed.
1993 unsigned ResultReg
= 0;
1994 if ((I
->getOpcode() == Instruction::SRem
||
1995 I
->getOpcode() == Instruction::URem
) &&
1996 OpEntry
.DivRemResultReg
== X86::AH
&& Subtarget
->is64Bit()) {
1997 Register SourceSuperReg
= createResultReg(&X86::GR16RegClass
);
1998 Register ResultSuperReg
= createResultReg(&X86::GR16RegClass
);
1999 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2000 TII
.get(Copy
), SourceSuperReg
).addReg(X86::AX
);
2002 // Shift AX right by 8 bits instead of using AH.
2003 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SHR16ri
),
2004 ResultSuperReg
).addReg(SourceSuperReg
).addImm(8);
2006 // Now reference the 8-bit subreg of the result.
2007 ResultReg
= fastEmitInst_extractsubreg(MVT::i8
, ResultSuperReg
,
2010 // Copy the result out of the physreg if we haven't already.
2012 ResultReg
= createResultReg(TypeEntry
.RC
);
2013 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Copy
), ResultReg
)
2014 .addReg(OpEntry
.DivRemResultReg
);
2016 updateValueMap(I
, ResultReg
);
2021 /// Emit a conditional move instruction (if the are supported) to lower
2023 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT
, const Instruction
*I
) {
2024 // Check if the subtarget supports these instructions.
2025 if (!Subtarget
->canUseCMOV())
2028 // FIXME: Add support for i8.
2029 if (RetVT
< MVT::i16
|| RetVT
> MVT::i64
)
2032 const Value
*Cond
= I
->getOperand(0);
2033 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2034 bool NeedTest
= true;
2035 X86::CondCode CC
= X86::COND_NE
;
2037 // Optimize conditions coming from a compare if both instructions are in the
2038 // same basic block (values defined in other basic blocks may not have
2039 // initialized registers).
2040 const auto *CI
= dyn_cast
<CmpInst
>(Cond
);
2041 if (CI
&& (CI
->getParent() == I
->getParent())) {
2042 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2044 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2045 static const uint16_t SETFOpcTable
[2][3] = {
2046 { X86::COND_NP
, X86::COND_E
, X86::TEST8rr
},
2047 { X86::COND_P
, X86::COND_NE
, X86::OR8rr
}
2049 const uint16_t *SETFOpc
= nullptr;
2050 switch (Predicate
) {
2052 case CmpInst::FCMP_OEQ
:
2053 SETFOpc
= &SETFOpcTable
[0][0];
2054 Predicate
= CmpInst::ICMP_NE
;
2056 case CmpInst::FCMP_UNE
:
2057 SETFOpc
= &SETFOpcTable
[1][0];
2058 Predicate
= CmpInst::ICMP_NE
;
2063 std::tie(CC
, NeedSwap
) = X86::getX86ConditionCode(Predicate
);
2064 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
2066 const Value
*CmpLHS
= CI
->getOperand(0);
2067 const Value
*CmpRHS
= CI
->getOperand(1);
2069 std::swap(CmpLHS
, CmpRHS
);
2071 EVT CmpVT
= TLI
.getValueType(DL
, CmpLHS
->getType());
2072 // Emit a compare of the LHS and RHS, setting the flags.
2073 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, CmpVT
, CI
->getDebugLoc()))
2077 Register FlagReg1
= createResultReg(&X86::GR8RegClass
);
2078 Register FlagReg2
= createResultReg(&X86::GR8RegClass
);
2079 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
2080 FlagReg1
).addImm(SETFOpc
[0]);
2081 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
2082 FlagReg2
).addImm(SETFOpc
[1]);
2083 auto const &II
= TII
.get(SETFOpc
[2]);
2084 if (II
.getNumDefs()) {
2085 Register TmpReg
= createResultReg(&X86::GR8RegClass
);
2086 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
, TmpReg
)
2087 .addReg(FlagReg2
).addReg(FlagReg1
);
2089 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
2090 .addReg(FlagReg2
).addReg(FlagReg1
);
2094 } else if (foldX86XALUIntrinsic(CC
, I
, Cond
)) {
2095 // Fake request the condition, otherwise the intrinsic might be completely
2097 Register TmpReg
= getRegForValue(Cond
);
2105 // Selects operate on i1, however, CondReg is 8 bits width and may contain
2106 // garbage. Indeed, only the less significant bit is supposed to be
2107 // accurate. If we read more than the lsb, we may see non-zero values
2108 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2109 // the select. This is achieved by performing TEST against 1.
2110 Register CondReg
= getRegForValue(Cond
);
2114 // In case OpReg is a K register, COPY to a GPR
2115 if (MRI
.getRegClass(CondReg
) == &X86::VK1RegClass
) {
2116 unsigned KCondReg
= CondReg
;
2117 CondReg
= createResultReg(&X86::GR32RegClass
);
2118 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2119 TII
.get(TargetOpcode::COPY
), CondReg
)
2121 CondReg
= fastEmitInst_extractsubreg(MVT::i8
, CondReg
, X86::sub_8bit
);
2123 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::TEST8ri
))
2128 const Value
*LHS
= I
->getOperand(1);
2129 const Value
*RHS
= I
->getOperand(2);
2131 Register RHSReg
= getRegForValue(RHS
);
2132 Register LHSReg
= getRegForValue(LHS
);
2133 if (!LHSReg
|| !RHSReg
)
2136 const TargetRegisterInfo
&TRI
= *Subtarget
->getRegisterInfo();
2137 unsigned Opc
= X86::getCMovOpcode(TRI
.getRegSizeInBits(*RC
) / 8, false,
2138 Subtarget
->hasNDD());
2139 Register ResultReg
= fastEmitInst_rri(Opc
, RC
, RHSReg
, LHSReg
, CC
);
2140 updateValueMap(I
, ResultReg
);
2144 /// Emit SSE or AVX instructions to lower the select.
2146 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2147 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2148 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2149 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT
, const Instruction
*I
) {
2150 // Optimize conditions coming from a compare if both instructions are in the
2151 // same basic block (values defined in other basic blocks may not have
2152 // initialized registers).
2153 const auto *CI
= dyn_cast
<FCmpInst
>(I
->getOperand(0));
2154 if (!CI
|| (CI
->getParent() != I
->getParent()))
2157 if (I
->getType() != CI
->getOperand(0)->getType() ||
2158 !((Subtarget
->hasSSE1() && RetVT
== MVT::f32
) ||
2159 (Subtarget
->hasSSE2() && RetVT
== MVT::f64
)))
2162 const Value
*CmpLHS
= CI
->getOperand(0);
2163 const Value
*CmpRHS
= CI
->getOperand(1);
2164 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2166 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2167 // We don't have to materialize a zero constant for this case and can just use
2168 // %x again on the RHS.
2169 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
2170 const auto *CmpRHSC
= dyn_cast
<ConstantFP
>(CmpRHS
);
2171 if (CmpRHSC
&& CmpRHSC
->isNullValue())
2177 std::tie(CC
, NeedSwap
) = getX86SSEConditionCode(Predicate
);
2178 if (CC
> 7 && !Subtarget
->hasAVX())
2182 std::swap(CmpLHS
, CmpRHS
);
2184 const Value
*LHS
= I
->getOperand(1);
2185 const Value
*RHS
= I
->getOperand(2);
2187 Register LHSReg
= getRegForValue(LHS
);
2188 Register RHSReg
= getRegForValue(RHS
);
2189 Register CmpLHSReg
= getRegForValue(CmpLHS
);
2190 Register CmpRHSReg
= getRegForValue(CmpRHS
);
2191 if (!LHSReg
|| !RHSReg
|| !CmpLHSReg
|| !CmpRHSReg
)
2194 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2197 if (Subtarget
->hasAVX512()) {
2198 // If we have AVX512 we can use a mask compare and masked movss/sd.
2199 const TargetRegisterClass
*VR128X
= &X86::VR128XRegClass
;
2200 const TargetRegisterClass
*VK1
= &X86::VK1RegClass
;
2202 unsigned CmpOpcode
=
2203 (RetVT
== MVT::f32
) ? X86::VCMPSSZrri
: X86::VCMPSDZrri
;
2204 Register CmpReg
= fastEmitInst_rri(CmpOpcode
, VK1
, CmpLHSReg
, CmpRHSReg
,
2207 // Need an IMPLICIT_DEF for the input that is used to generate the upper
2208 // bits of the result register since its not based on any of the inputs.
2209 Register ImplicitDefReg
= createResultReg(VR128X
);
2210 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2211 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2213 // Place RHSReg is the passthru of the masked movss/sd operation and put
2214 // LHS in the input. The mask input comes from the compare.
2215 unsigned MovOpcode
=
2216 (RetVT
== MVT::f32
) ? X86::VMOVSSZrrk
: X86::VMOVSDZrrk
;
2217 unsigned MovReg
= fastEmitInst_rrrr(MovOpcode
, VR128X
, RHSReg
, CmpReg
,
2218 ImplicitDefReg
, LHSReg
);
2220 ResultReg
= createResultReg(RC
);
2221 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2222 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(MovReg
);
2224 } else if (Subtarget
->hasAVX()) {
2225 const TargetRegisterClass
*VR128
= &X86::VR128RegClass
;
2227 // If we have AVX, create 1 blendv instead of 3 logic instructions.
2228 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2229 // uses XMM0 as the selection register. That may need just as many
2230 // instructions as the AND/ANDN/OR sequence due to register moves, so
2232 unsigned CmpOpcode
=
2233 (RetVT
== MVT::f32
) ? X86::VCMPSSrri
: X86::VCMPSDrri
;
2234 unsigned BlendOpcode
=
2235 (RetVT
== MVT::f32
) ? X86::VBLENDVPSrrr
: X86::VBLENDVPDrrr
;
2237 Register CmpReg
= fastEmitInst_rri(CmpOpcode
, RC
, CmpLHSReg
, CmpRHSReg
,
2239 Register VBlendReg
= fastEmitInst_rrr(BlendOpcode
, VR128
, RHSReg
, LHSReg
,
2241 ResultReg
= createResultReg(RC
);
2242 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2243 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(VBlendReg
);
2245 // Choose the SSE instruction sequence based on data type (float or double).
2246 static const uint16_t OpcTable
[2][4] = {
2247 { X86::CMPSSrri
, X86::ANDPSrr
, X86::ANDNPSrr
, X86::ORPSrr
},
2248 { X86::CMPSDrri
, X86::ANDPDrr
, X86::ANDNPDrr
, X86::ORPDrr
}
2251 const uint16_t *Opc
= nullptr;
2252 switch (RetVT
.SimpleTy
) {
2253 default: return false;
2254 case MVT::f32
: Opc
= &OpcTable
[0][0]; break;
2255 case MVT::f64
: Opc
= &OpcTable
[1][0]; break;
2258 const TargetRegisterClass
*VR128
= &X86::VR128RegClass
;
2259 Register CmpReg
= fastEmitInst_rri(Opc
[0], RC
, CmpLHSReg
, CmpRHSReg
, CC
);
2260 Register AndReg
= fastEmitInst_rr(Opc
[1], VR128
, CmpReg
, LHSReg
);
2261 Register AndNReg
= fastEmitInst_rr(Opc
[2], VR128
, CmpReg
, RHSReg
);
2262 Register OrReg
= fastEmitInst_rr(Opc
[3], VR128
, AndNReg
, AndReg
);
2263 ResultReg
= createResultReg(RC
);
2264 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2265 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(OrReg
);
2267 updateValueMap(I
, ResultReg
);
2271 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT
, const Instruction
*I
) {
2272 // These are pseudo CMOV instructions and will be later expanded into control-
2275 switch (RetVT
.SimpleTy
) {
2276 default: return false;
2277 case MVT::i8
: Opc
= X86::CMOV_GR8
; break;
2278 case MVT::i16
: Opc
= X86::CMOV_GR16
; break;
2279 case MVT::i32
: Opc
= X86::CMOV_GR32
; break;
2281 Opc
= Subtarget
->hasAVX512() ? X86::CMOV_FR16X
: X86::CMOV_FR16
; break;
2283 Opc
= Subtarget
->hasAVX512() ? X86::CMOV_FR32X
: X86::CMOV_FR32
; break;
2285 Opc
= Subtarget
->hasAVX512() ? X86::CMOV_FR64X
: X86::CMOV_FR64
; break;
2288 const Value
*Cond
= I
->getOperand(0);
2289 X86::CondCode CC
= X86::COND_NE
;
2291 // Optimize conditions coming from a compare if both instructions are in the
2292 // same basic block (values defined in other basic blocks may not have
2293 // initialized registers).
2294 const auto *CI
= dyn_cast
<CmpInst
>(Cond
);
2295 if (CI
&& (CI
->getParent() == I
->getParent())) {
2297 std::tie(CC
, NeedSwap
) = X86::getX86ConditionCode(CI
->getPredicate());
2298 if (CC
> X86::LAST_VALID_COND
)
2301 const Value
*CmpLHS
= CI
->getOperand(0);
2302 const Value
*CmpRHS
= CI
->getOperand(1);
2305 std::swap(CmpLHS
, CmpRHS
);
2307 EVT CmpVT
= TLI
.getValueType(DL
, CmpLHS
->getType());
2308 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, CmpVT
, CI
->getDebugLoc()))
2311 Register CondReg
= getRegForValue(Cond
);
2315 // In case OpReg is a K register, COPY to a GPR
2316 if (MRI
.getRegClass(CondReg
) == &X86::VK1RegClass
) {
2317 unsigned KCondReg
= CondReg
;
2318 CondReg
= createResultReg(&X86::GR32RegClass
);
2319 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2320 TII
.get(TargetOpcode::COPY
), CondReg
)
2322 CondReg
= fastEmitInst_extractsubreg(MVT::i8
, CondReg
, X86::sub_8bit
);
2324 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::TEST8ri
))
2329 const Value
*LHS
= I
->getOperand(1);
2330 const Value
*RHS
= I
->getOperand(2);
2332 Register LHSReg
= getRegForValue(LHS
);
2333 Register RHSReg
= getRegForValue(RHS
);
2334 if (!LHSReg
|| !RHSReg
)
2337 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2339 Register ResultReg
=
2340 fastEmitInst_rri(Opc
, RC
, RHSReg
, LHSReg
, CC
);
2341 updateValueMap(I
, ResultReg
);
2345 bool X86FastISel::X86SelectSelect(const Instruction
*I
) {
2347 if (!isTypeLegal(I
->getType(), RetVT
))
2350 // Check if we can fold the select.
2351 if (const auto *CI
= dyn_cast
<CmpInst
>(I
->getOperand(0))) {
2352 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2353 const Value
*Opnd
= nullptr;
2354 switch (Predicate
) {
2356 case CmpInst::FCMP_FALSE
: Opnd
= I
->getOperand(2); break;
2357 case CmpInst::FCMP_TRUE
: Opnd
= I
->getOperand(1); break;
2359 // No need for a select anymore - this is an unconditional move.
2361 Register OpReg
= getRegForValue(Opnd
);
2364 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2365 Register ResultReg
= createResultReg(RC
);
2366 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2367 TII
.get(TargetOpcode::COPY
), ResultReg
)
2369 updateValueMap(I
, ResultReg
);
2374 // First try to use real conditional move instructions.
2375 if (X86FastEmitCMoveSelect(RetVT
, I
))
2378 // Try to use a sequence of SSE instructions to simulate a conditional move.
2379 if (X86FastEmitSSESelect(RetVT
, I
))
2382 // Fall-back to pseudo conditional move instructions, which will be later
2383 // converted to control-flow.
2384 if (X86FastEmitPseudoSelect(RetVT
, I
))
2390 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2391 bool X86FastISel::X86SelectIntToFP(const Instruction
*I
, bool IsSigned
) {
2392 // The target-independent selection algorithm in FastISel already knows how
2393 // to select a SINT_TO_FP if the target is SSE but not AVX.
2394 // Early exit if the subtarget doesn't have AVX.
2395 // Unsigned conversion requires avx512.
2396 bool HasAVX512
= Subtarget
->hasAVX512();
2397 if (!Subtarget
->hasAVX() || (!IsSigned
&& !HasAVX512
))
2400 // TODO: We could sign extend narrower types.
2401 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
2402 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i64
)
2405 // Select integer to float/double conversion.
2406 Register OpReg
= getRegForValue(I
->getOperand(0));
2412 static const uint16_t SCvtOpc
[2][2][2] = {
2413 { { X86::VCVTSI2SSrr
, X86::VCVTSI642SSrr
},
2414 { X86::VCVTSI2SDrr
, X86::VCVTSI642SDrr
} },
2415 { { X86::VCVTSI2SSZrr
, X86::VCVTSI642SSZrr
},
2416 { X86::VCVTSI2SDZrr
, X86::VCVTSI642SDZrr
} },
2418 static const uint16_t UCvtOpc
[2][2] = {
2419 { X86::VCVTUSI2SSZrr
, X86::VCVTUSI642SSZrr
},
2420 { X86::VCVTUSI2SDZrr
, X86::VCVTUSI642SDZrr
},
2422 bool Is64Bit
= SrcVT
== MVT::i64
;
2424 if (I
->getType()->isDoubleTy()) {
2425 // s/uitofp int -> double
2426 Opcode
= IsSigned
? SCvtOpc
[HasAVX512
][1][Is64Bit
] : UCvtOpc
[1][Is64Bit
];
2427 } else if (I
->getType()->isFloatTy()) {
2428 // s/uitofp int -> float
2429 Opcode
= IsSigned
? SCvtOpc
[HasAVX512
][0][Is64Bit
] : UCvtOpc
[0][Is64Bit
];
2433 MVT DstVT
= TLI
.getValueType(DL
, I
->getType()).getSimpleVT();
2434 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(DstVT
);
2435 Register ImplicitDefReg
= createResultReg(RC
);
2436 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2437 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2438 Register ResultReg
= fastEmitInst_rr(Opcode
, RC
, ImplicitDefReg
, OpReg
);
2439 updateValueMap(I
, ResultReg
);
2443 bool X86FastISel::X86SelectSIToFP(const Instruction
*I
) {
2444 return X86SelectIntToFP(I
, /*IsSigned*/true);
2447 bool X86FastISel::X86SelectUIToFP(const Instruction
*I
) {
2448 return X86SelectIntToFP(I
, /*IsSigned*/false);
2451 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2452 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction
*I
,
2454 const TargetRegisterClass
*RC
) {
2455 assert((I
->getOpcode() == Instruction::FPExt
||
2456 I
->getOpcode() == Instruction::FPTrunc
) &&
2457 "Instruction must be an FPExt or FPTrunc!");
2458 bool HasAVX
= Subtarget
->hasAVX();
2460 Register OpReg
= getRegForValue(I
->getOperand(0));
2464 unsigned ImplicitDefReg
;
2466 ImplicitDefReg
= createResultReg(RC
);
2467 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2468 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2472 Register ResultReg
= createResultReg(RC
);
2473 MachineInstrBuilder MIB
;
2474 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TargetOpc
),
2478 MIB
.addReg(ImplicitDefReg
);
2481 updateValueMap(I
, ResultReg
);
2485 bool X86FastISel::X86SelectFPExt(const Instruction
*I
) {
2486 if (Subtarget
->hasSSE2() && I
->getType()->isDoubleTy() &&
2487 I
->getOperand(0)->getType()->isFloatTy()) {
2488 bool HasAVX512
= Subtarget
->hasAVX512();
2489 // fpext from float to double.
2491 HasAVX512
? X86::VCVTSS2SDZrr
2492 : Subtarget
->hasAVX() ? X86::VCVTSS2SDrr
: X86::CVTSS2SDrr
;
2493 return X86SelectFPExtOrFPTrunc(I
, Opc
, TLI
.getRegClassFor(MVT::f64
));
2499 bool X86FastISel::X86SelectFPTrunc(const Instruction
*I
) {
2500 if (Subtarget
->hasSSE2() && I
->getType()->isFloatTy() &&
2501 I
->getOperand(0)->getType()->isDoubleTy()) {
2502 bool HasAVX512
= Subtarget
->hasAVX512();
2503 // fptrunc from double to float.
2505 HasAVX512
? X86::VCVTSD2SSZrr
2506 : Subtarget
->hasAVX() ? X86::VCVTSD2SSrr
: X86::CVTSD2SSrr
;
2507 return X86SelectFPExtOrFPTrunc(I
, Opc
, TLI
.getRegClassFor(MVT::f32
));
2513 bool X86FastISel::X86SelectTrunc(const Instruction
*I
) {
2514 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
2515 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
2517 // This code only handles truncation to byte.
2518 if (DstVT
!= MVT::i8
&& DstVT
!= MVT::i1
)
2520 if (!TLI
.isTypeLegal(SrcVT
))
2523 Register InputReg
= getRegForValue(I
->getOperand(0));
2525 // Unhandled operand. Halt "fast" selection and bail.
2528 if (SrcVT
== MVT::i8
) {
2529 // Truncate from i8 to i1; no code needed.
2530 updateValueMap(I
, InputReg
);
2534 // Issue an extract_subreg.
2535 Register ResultReg
= fastEmitInst_extractsubreg(MVT::i8
, InputReg
,
2540 updateValueMap(I
, ResultReg
);
2544 bool X86FastISel::IsMemcpySmall(uint64_t Len
) {
2545 return Len
<= (Subtarget
->is64Bit() ? 32 : 16);
2548 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM
,
2549 X86AddressMode SrcAM
, uint64_t Len
) {
2551 // Make sure we don't bloat code by inlining very large memcpy's.
2552 if (!IsMemcpySmall(Len
))
2555 bool i64Legal
= Subtarget
->is64Bit();
2557 // We don't care about alignment here since we just emit integer accesses.
2560 if (Len
>= 8 && i64Legal
)
2570 bool RV
= X86FastEmitLoad(VT
, SrcAM
, nullptr, Reg
);
2571 RV
&= X86FastEmitStore(VT
, Reg
, DestAM
);
2572 assert(RV
&& "Failed to emit load or store??");
2575 unsigned Size
= VT
.getSizeInBits()/8;
2577 DestAM
.Disp
+= Size
;
2584 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst
*II
) {
2585 // FIXME: Handle more intrinsics.
2586 switch (II
->getIntrinsicID()) {
2587 default: return false;
2588 case Intrinsic::convert_from_fp16
:
2589 case Intrinsic::convert_to_fp16
: {
2590 if (Subtarget
->useSoftFloat() || !Subtarget
->hasF16C())
2593 const Value
*Op
= II
->getArgOperand(0);
2594 Register InputReg
= getRegForValue(Op
);
2598 // F16C only allows converting from float to half and from half to float.
2599 bool IsFloatToHalf
= II
->getIntrinsicID() == Intrinsic::convert_to_fp16
;
2600 if (IsFloatToHalf
) {
2601 if (!Op
->getType()->isFloatTy())
2604 if (!II
->getType()->isFloatTy())
2608 unsigned ResultReg
= 0;
2609 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(MVT::v8i16
);
2610 if (IsFloatToHalf
) {
2611 // 'InputReg' is implicitly promoted from register class FR32 to
2612 // register class VR128 by method 'constrainOperandRegClass' which is
2613 // directly called by 'fastEmitInst_ri'.
2614 // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2615 // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2616 // It's consistent with the other FP instructions, which are usually
2617 // controlled by MXCSR.
2618 unsigned Opc
= Subtarget
->hasVLX() ? X86::VCVTPS2PHZ128rr
2620 InputReg
= fastEmitInst_ri(Opc
, RC
, InputReg
, 4);
2622 // Move the lower 32-bits of ResultReg to another register of class GR32.
2623 Opc
= Subtarget
->hasAVX512() ? X86::VMOVPDI2DIZrr
2624 : X86::VMOVPDI2DIrr
;
2625 ResultReg
= createResultReg(&X86::GR32RegClass
);
2626 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), ResultReg
)
2627 .addReg(InputReg
, RegState::Kill
);
2629 // The result value is in the lower 16-bits of ResultReg.
2630 unsigned RegIdx
= X86::sub_16bit
;
2631 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, ResultReg
, RegIdx
);
2633 assert(Op
->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2634 // Explicitly zero-extend the input to 32-bit.
2635 InputReg
= fastEmit_r(MVT::i16
, MVT::i32
, ISD::ZERO_EXTEND
, InputReg
);
2637 // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2638 InputReg
= fastEmit_r(MVT::i32
, MVT::v4i32
, ISD::SCALAR_TO_VECTOR
,
2641 unsigned Opc
= Subtarget
->hasVLX() ? X86::VCVTPH2PSZ128rr
2643 InputReg
= fastEmitInst_r(Opc
, RC
, InputReg
);
2645 // The result value is in the lower 32-bits of ResultReg.
2646 // Emit an explicit copy from register class VR128 to register class FR32.
2647 ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
2648 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2649 TII
.get(TargetOpcode::COPY
), ResultReg
)
2650 .addReg(InputReg
, RegState::Kill
);
2653 updateValueMap(II
, ResultReg
);
2656 case Intrinsic::frameaddress
: {
2657 MachineFunction
*MF
= FuncInfo
.MF
;
2658 if (MF
->getTarget().getMCAsmInfo()->usesWindowsCFI())
2661 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
2664 if (!isTypeLegal(RetTy
, VT
))
2668 const TargetRegisterClass
*RC
= nullptr;
2670 switch (VT
.SimpleTy
) {
2671 default: llvm_unreachable("Invalid result type for frameaddress.");
2672 case MVT::i32
: Opc
= X86::MOV32rm
; RC
= &X86::GR32RegClass
; break;
2673 case MVT::i64
: Opc
= X86::MOV64rm
; RC
= &X86::GR64RegClass
; break;
2676 // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2677 // we get the wrong frame register.
2678 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2679 MFI
.setFrameAddressIsTaken(true);
2681 const X86RegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
2682 unsigned FrameReg
= RegInfo
->getPtrSizedFrameRegister(*MF
);
2683 assert(((FrameReg
== X86::RBP
&& VT
== MVT::i64
) ||
2684 (FrameReg
== X86::EBP
&& VT
== MVT::i32
)) &&
2685 "Invalid Frame Register!");
2687 // Always make a copy of the frame register to a vreg first, so that we
2688 // never directly reference the frame register (the TwoAddressInstruction-
2689 // Pass doesn't like that).
2690 Register SrcReg
= createResultReg(RC
);
2691 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2692 TII
.get(TargetOpcode::COPY
), SrcReg
).addReg(FrameReg
);
2694 // Now recursively load from the frame address.
2695 // movq (%rbp), %rax
2696 // movq (%rax), %rax
2697 // movq (%rax), %rax
2699 unsigned Depth
= cast
<ConstantInt
>(II
->getOperand(0))->getZExtValue();
2701 Register DestReg
= createResultReg(RC
);
2702 addDirectMem(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2703 TII
.get(Opc
), DestReg
), SrcReg
);
2707 updateValueMap(II
, SrcReg
);
2710 case Intrinsic::memcpy
: {
2711 const MemCpyInst
*MCI
= cast
<MemCpyInst
>(II
);
2712 // Don't handle volatile or variable length memcpys.
2713 if (MCI
->isVolatile())
2716 if (isa
<ConstantInt
>(MCI
->getLength())) {
2717 // Small memcpy's are common enough that we want to do them
2718 // without a call if possible.
2719 uint64_t Len
= cast
<ConstantInt
>(MCI
->getLength())->getZExtValue();
2720 if (IsMemcpySmall(Len
)) {
2721 X86AddressMode DestAM
, SrcAM
;
2722 if (!X86SelectAddress(MCI
->getRawDest(), DestAM
) ||
2723 !X86SelectAddress(MCI
->getRawSource(), SrcAM
))
2725 TryEmitSmallMemcpy(DestAM
, SrcAM
, Len
);
2730 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
2731 if (!MCI
->getLength()->getType()->isIntegerTy(SizeWidth
))
2734 if (MCI
->getSourceAddressSpace() > 255 || MCI
->getDestAddressSpace() > 255)
2737 return lowerCallTo(II
, "memcpy", II
->arg_size() - 1);
2739 case Intrinsic::memset
: {
2740 const MemSetInst
*MSI
= cast
<MemSetInst
>(II
);
2742 if (MSI
->isVolatile())
2745 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
2746 if (!MSI
->getLength()->getType()->isIntegerTy(SizeWidth
))
2749 if (MSI
->getDestAddressSpace() > 255)
2752 return lowerCallTo(II
, "memset", II
->arg_size() - 1);
2754 case Intrinsic::stackprotector
: {
2755 // Emit code to store the stack guard onto the stack.
2756 EVT PtrTy
= TLI
.getPointerTy(DL
);
2758 const Value
*Op1
= II
->getArgOperand(0); // The guard's value.
2759 const AllocaInst
*Slot
= cast
<AllocaInst
>(II
->getArgOperand(1));
2761 MFI
.setStackProtectorIndex(FuncInfo
.StaticAllocaMap
[Slot
]);
2763 // Grab the frame index.
2765 if (!X86SelectAddress(Slot
, AM
)) return false;
2766 if (!X86FastEmitStore(PtrTy
, Op1
, AM
)) return false;
2769 case Intrinsic::dbg_declare
: {
2770 const DbgDeclareInst
*DI
= cast
<DbgDeclareInst
>(II
);
2772 assert(DI
->getAddress() && "Null address should be checked earlier!");
2773 if (!X86SelectAddress(DI
->getAddress(), AM
))
2775 const MCInstrDesc
&II
= TII
.get(TargetOpcode::DBG_VALUE
);
2776 assert(DI
->getVariable()->isValidLocationForIntrinsic(MIMD
.getDL()) &&
2777 "Expected inlined-at fields to agree");
2778 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
), AM
)
2780 .addMetadata(DI
->getVariable())
2781 .addMetadata(DI
->getExpression());
2784 case Intrinsic::trap
: {
2785 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::TRAP
));
2788 case Intrinsic::sqrt
: {
2789 if (!Subtarget
->hasSSE1())
2792 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
2795 if (!isTypeLegal(RetTy
, VT
))
2798 // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2799 // is not generated by FastISel yet.
2800 // FIXME: Update this code once tablegen can handle it.
2801 static const uint16_t SqrtOpc
[3][2] = {
2802 { X86::SQRTSSr
, X86::SQRTSDr
},
2803 { X86::VSQRTSSr
, X86::VSQRTSDr
},
2804 { X86::VSQRTSSZr
, X86::VSQRTSDZr
},
2806 unsigned AVXLevel
= Subtarget
->hasAVX512() ? 2 :
2807 Subtarget
->hasAVX() ? 1 :
2810 switch (VT
.SimpleTy
) {
2811 default: return false;
2812 case MVT::f32
: Opc
= SqrtOpc
[AVXLevel
][0]; break;
2813 case MVT::f64
: Opc
= SqrtOpc
[AVXLevel
][1]; break;
2816 const Value
*SrcVal
= II
->getArgOperand(0);
2817 Register SrcReg
= getRegForValue(SrcVal
);
2822 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
2823 unsigned ImplicitDefReg
= 0;
2825 ImplicitDefReg
= createResultReg(RC
);
2826 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2827 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2830 Register ResultReg
= createResultReg(RC
);
2831 MachineInstrBuilder MIB
;
2832 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
),
2836 MIB
.addReg(ImplicitDefReg
);
2840 updateValueMap(II
, ResultReg
);
2843 case Intrinsic::sadd_with_overflow
:
2844 case Intrinsic::uadd_with_overflow
:
2845 case Intrinsic::ssub_with_overflow
:
2846 case Intrinsic::usub_with_overflow
:
2847 case Intrinsic::smul_with_overflow
:
2848 case Intrinsic::umul_with_overflow
: {
2849 // This implements the basic lowering of the xalu with overflow intrinsics
2850 // into add/sub/mul followed by either seto or setb.
2851 const Function
*Callee
= II
->getCalledFunction();
2852 auto *Ty
= cast
<StructType
>(Callee
->getReturnType());
2853 Type
*RetTy
= Ty
->getTypeAtIndex(0U);
2854 assert(Ty
->getTypeAtIndex(1)->isIntegerTy() &&
2855 Ty
->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2856 "Overflow value expected to be an i1");
2859 if (!isTypeLegal(RetTy
, VT
))
2862 if (VT
< MVT::i8
|| VT
> MVT::i64
)
2865 const Value
*LHS
= II
->getArgOperand(0);
2866 const Value
*RHS
= II
->getArgOperand(1);
2868 // Canonicalize immediate to the RHS.
2869 if (isa
<ConstantInt
>(LHS
) && !isa
<ConstantInt
>(RHS
) && II
->isCommutative())
2870 std::swap(LHS
, RHS
);
2872 unsigned BaseOpc
, CondCode
;
2873 switch (II
->getIntrinsicID()) {
2874 default: llvm_unreachable("Unexpected intrinsic!");
2875 case Intrinsic::sadd_with_overflow
:
2876 BaseOpc
= ISD::ADD
; CondCode
= X86::COND_O
; break;
2877 case Intrinsic::uadd_with_overflow
:
2878 BaseOpc
= ISD::ADD
; CondCode
= X86::COND_B
; break;
2879 case Intrinsic::ssub_with_overflow
:
2880 BaseOpc
= ISD::SUB
; CondCode
= X86::COND_O
; break;
2881 case Intrinsic::usub_with_overflow
:
2882 BaseOpc
= ISD::SUB
; CondCode
= X86::COND_B
; break;
2883 case Intrinsic::smul_with_overflow
:
2884 BaseOpc
= X86ISD::SMUL
; CondCode
= X86::COND_O
; break;
2885 case Intrinsic::umul_with_overflow
:
2886 BaseOpc
= X86ISD::UMUL
; CondCode
= X86::COND_O
; break;
2889 Register LHSReg
= getRegForValue(LHS
);
2893 unsigned ResultReg
= 0;
2894 // Check if we have an immediate version.
2895 if (const auto *CI
= dyn_cast
<ConstantInt
>(RHS
)) {
2896 static const uint16_t Opc
[2][4] = {
2897 { X86::INC8r
, X86::INC16r
, X86::INC32r
, X86::INC64r
},
2898 { X86::DEC8r
, X86::DEC16r
, X86::DEC32r
, X86::DEC64r
}
2901 if (CI
->isOne() && (BaseOpc
== ISD::ADD
|| BaseOpc
== ISD::SUB
) &&
2902 CondCode
== X86::COND_O
) {
2903 // We can use INC/DEC.
2904 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
2905 bool IsDec
= BaseOpc
== ISD::SUB
;
2906 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2907 TII
.get(Opc
[IsDec
][VT
.SimpleTy
-MVT::i8
]), ResultReg
)
2910 ResultReg
= fastEmit_ri(VT
, VT
, BaseOpc
, LHSReg
, CI
->getZExtValue());
2915 RHSReg
= getRegForValue(RHS
);
2918 ResultReg
= fastEmit_rr(VT
, VT
, BaseOpc
, LHSReg
, RHSReg
);
2921 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2923 if (BaseOpc
== X86ISD::UMUL
&& !ResultReg
) {
2924 static const uint16_t MULOpc
[] =
2925 { X86::MUL8r
, X86::MUL16r
, X86::MUL32r
, X86::MUL64r
};
2926 static const MCPhysReg Reg
[] = { X86::AL
, X86::AX
, X86::EAX
, X86::RAX
};
2927 // First copy the first operand into RAX, which is an implicit input to
2928 // the X86::MUL*r instruction.
2929 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2930 TII
.get(TargetOpcode::COPY
), Reg
[VT
.SimpleTy
-MVT::i8
])
2932 ResultReg
= fastEmitInst_r(MULOpc
[VT
.SimpleTy
-MVT::i8
],
2933 TLI
.getRegClassFor(VT
), RHSReg
);
2934 } else if (BaseOpc
== X86ISD::SMUL
&& !ResultReg
) {
2935 static const uint16_t MULOpc
[] =
2936 { X86::IMUL8r
, X86::IMUL16rr
, X86::IMUL32rr
, X86::IMUL64rr
};
2937 if (VT
== MVT::i8
) {
2938 // Copy the first operand into AL, which is an implicit input to the
2939 // X86::IMUL8r instruction.
2940 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2941 TII
.get(TargetOpcode::COPY
), X86::AL
)
2943 ResultReg
= fastEmitInst_r(MULOpc
[0], TLI
.getRegClassFor(VT
), RHSReg
);
2945 ResultReg
= fastEmitInst_rr(MULOpc
[VT
.SimpleTy
-MVT::i8
],
2946 TLI
.getRegClassFor(VT
), LHSReg
, RHSReg
);
2952 // Assign to a GPR since the overflow return value is lowered to a SETcc.
2953 Register ResultReg2
= createResultReg(&X86::GR8RegClass
);
2954 assert((ResultReg
+1) == ResultReg2
&& "Nonconsecutive result registers.");
2955 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::SETCCr
),
2956 ResultReg2
).addImm(CondCode
);
2958 updateValueMap(II
, ResultReg
, 2);
2961 case Intrinsic::x86_sse_cvttss2si
:
2962 case Intrinsic::x86_sse_cvttss2si64
:
2963 case Intrinsic::x86_sse2_cvttsd2si
:
2964 case Intrinsic::x86_sse2_cvttsd2si64
: {
2966 switch (II
->getIntrinsicID()) {
2967 default: llvm_unreachable("Unexpected intrinsic.");
2968 case Intrinsic::x86_sse_cvttss2si
:
2969 case Intrinsic::x86_sse_cvttss2si64
:
2970 if (!Subtarget
->hasSSE1())
2972 IsInputDouble
= false;
2974 case Intrinsic::x86_sse2_cvttsd2si
:
2975 case Intrinsic::x86_sse2_cvttsd2si64
:
2976 if (!Subtarget
->hasSSE2())
2978 IsInputDouble
= true;
2982 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
2984 if (!isTypeLegal(RetTy
, VT
))
2987 static const uint16_t CvtOpc
[3][2][2] = {
2988 { { X86::CVTTSS2SIrr
, X86::CVTTSS2SI64rr
},
2989 { X86::CVTTSD2SIrr
, X86::CVTTSD2SI64rr
} },
2990 { { X86::VCVTTSS2SIrr
, X86::VCVTTSS2SI64rr
},
2991 { X86::VCVTTSD2SIrr
, X86::VCVTTSD2SI64rr
} },
2992 { { X86::VCVTTSS2SIZrr
, X86::VCVTTSS2SI64Zrr
},
2993 { X86::VCVTTSD2SIZrr
, X86::VCVTTSD2SI64Zrr
} },
2995 unsigned AVXLevel
= Subtarget
->hasAVX512() ? 2 :
2996 Subtarget
->hasAVX() ? 1 :
2999 switch (VT
.SimpleTy
) {
3000 default: llvm_unreachable("Unexpected result type.");
3001 case MVT::i32
: Opc
= CvtOpc
[AVXLevel
][IsInputDouble
][0]; break;
3002 case MVT::i64
: Opc
= CvtOpc
[AVXLevel
][IsInputDouble
][1]; break;
3005 // Check if we can fold insertelement instructions into the convert.
3006 const Value
*Op
= II
->getArgOperand(0);
3007 while (auto *IE
= dyn_cast
<InsertElementInst
>(Op
)) {
3008 const Value
*Index
= IE
->getOperand(2);
3009 if (!isa
<ConstantInt
>(Index
))
3011 unsigned Idx
= cast
<ConstantInt
>(Index
)->getZExtValue();
3014 Op
= IE
->getOperand(1);
3017 Op
= IE
->getOperand(0);
3020 Register Reg
= getRegForValue(Op
);
3024 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3025 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), ResultReg
)
3028 updateValueMap(II
, ResultReg
);
3031 case Intrinsic::x86_sse42_crc32_32_8
:
3032 case Intrinsic::x86_sse42_crc32_32_16
:
3033 case Intrinsic::x86_sse42_crc32_32_32
:
3034 case Intrinsic::x86_sse42_crc32_64_64
: {
3035 if (!Subtarget
->hasCRC32())
3038 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
3041 if (!isTypeLegal(RetTy
, VT
))
3045 const TargetRegisterClass
*RC
= nullptr;
3047 switch (II
->getIntrinsicID()) {
3049 llvm_unreachable("Unexpected intrinsic.");
3050 #define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
3051 case Intrinsic::x86_sse42_crc32_32_8
:
3052 Opc
= GET_EGPR_IF_ENABLED(X86::CRC32r32r8
);
3053 RC
= &X86::GR32RegClass
;
3055 case Intrinsic::x86_sse42_crc32_32_16
:
3056 Opc
= GET_EGPR_IF_ENABLED(X86::CRC32r32r16
);
3057 RC
= &X86::GR32RegClass
;
3059 case Intrinsic::x86_sse42_crc32_32_32
:
3060 Opc
= GET_EGPR_IF_ENABLED(X86::CRC32r32r32
);
3061 RC
= &X86::GR32RegClass
;
3063 case Intrinsic::x86_sse42_crc32_64_64
:
3064 Opc
= GET_EGPR_IF_ENABLED(X86::CRC32r64r64
);
3065 RC
= &X86::GR64RegClass
;
3067 #undef GET_EGPR_IF_ENABLED
3070 const Value
*LHS
= II
->getArgOperand(0);
3071 const Value
*RHS
= II
->getArgOperand(1);
3073 Register LHSReg
= getRegForValue(LHS
);
3074 Register RHSReg
= getRegForValue(RHS
);
3075 if (!LHSReg
|| !RHSReg
)
3078 Register ResultReg
= fastEmitInst_rr(Opc
, RC
, LHSReg
, RHSReg
);
3082 updateValueMap(II
, ResultReg
);
3088 bool X86FastISel::fastLowerArguments() {
3089 if (!FuncInfo
.CanLowerReturn
)
3092 const Function
*F
= FuncInfo
.Fn
;
3096 CallingConv::ID CC
= F
->getCallingConv();
3097 if (CC
!= CallingConv::C
)
3100 if (Subtarget
->isCallingConvWin64(CC
))
3103 if (!Subtarget
->is64Bit())
3106 if (Subtarget
->useSoftFloat())
3109 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3110 unsigned GPRCnt
= 0;
3111 unsigned FPRCnt
= 0;
3112 for (auto const &Arg
: F
->args()) {
3113 if (Arg
.hasAttribute(Attribute::ByVal
) ||
3114 Arg
.hasAttribute(Attribute::InReg
) ||
3115 Arg
.hasAttribute(Attribute::StructRet
) ||
3116 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3117 Arg
.hasAttribute(Attribute::SwiftAsync
) ||
3118 Arg
.hasAttribute(Attribute::SwiftError
) ||
3119 Arg
.hasAttribute(Attribute::Nest
))
3122 Type
*ArgTy
= Arg
.getType();
3123 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3126 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3127 if (!ArgVT
.isSimple()) return false;
3128 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3129 default: return false;
3136 if (!Subtarget
->hasSSE1())
3149 static const MCPhysReg GPR32ArgRegs
[] = {
3150 X86::EDI
, X86::ESI
, X86::EDX
, X86::ECX
, X86::R8D
, X86::R9D
3152 static const MCPhysReg GPR64ArgRegs
[] = {
3153 X86::RDI
, X86::RSI
, X86::RDX
, X86::RCX
, X86::R8
, X86::R9
3155 static const MCPhysReg XMMArgRegs
[] = {
3156 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
3157 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
3160 unsigned GPRIdx
= 0;
3161 unsigned FPRIdx
= 0;
3162 for (auto const &Arg
: F
->args()) {
3163 MVT VT
= TLI
.getSimpleValueType(DL
, Arg
.getType());
3164 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
3166 switch (VT
.SimpleTy
) {
3167 default: llvm_unreachable("Unexpected value type.");
3168 case MVT::i32
: SrcReg
= GPR32ArgRegs
[GPRIdx
++]; break;
3169 case MVT::i64
: SrcReg
= GPR64ArgRegs
[GPRIdx
++]; break;
3170 case MVT::f32
: [[fallthrough
]];
3171 case MVT::f64
: SrcReg
= XMMArgRegs
[FPRIdx
++]; break;
3173 Register DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3174 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3175 // Without this, EmitLiveInCopies may eliminate the livein if its only
3176 // use is a bitcast (which isn't turned into an instruction).
3177 Register ResultReg
= createResultReg(RC
);
3178 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3179 TII
.get(TargetOpcode::COPY
), ResultReg
)
3180 .addReg(DstReg
, getKillRegState(true));
3181 updateValueMap(&Arg
, ResultReg
);
3186 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget
*Subtarget
,
3188 const CallBase
*CB
) {
3189 if (Subtarget
->is64Bit())
3191 if (Subtarget
->getTargetTriple().isOSMSVCRT())
3193 if (CC
== CallingConv::Fast
|| CC
== CallingConv::GHC
||
3194 CC
== CallingConv::HiPE
|| CC
== CallingConv::Tail
||
3195 CC
== CallingConv::SwiftTail
)
3199 if (CB
->arg_empty() || !CB
->paramHasAttr(0, Attribute::StructRet
) ||
3200 CB
->paramHasAttr(0, Attribute::InReg
) || Subtarget
->isTargetMCU())
3206 bool X86FastISel::fastLowerCall(CallLoweringInfo
&CLI
) {
3207 auto &OutVals
= CLI
.OutVals
;
3208 auto &OutFlags
= CLI
.OutFlags
;
3209 auto &OutRegs
= CLI
.OutRegs
;
3210 auto &Ins
= CLI
.Ins
;
3211 auto &InRegs
= CLI
.InRegs
;
3212 CallingConv::ID CC
= CLI
.CallConv
;
3213 bool &IsTailCall
= CLI
.IsTailCall
;
3214 bool IsVarArg
= CLI
.IsVarArg
;
3215 const Value
*Callee
= CLI
.Callee
;
3216 MCSymbol
*Symbol
= CLI
.Symbol
;
3217 const auto *CB
= CLI
.CB
;
3219 bool Is64Bit
= Subtarget
->is64Bit();
3220 bool IsWin64
= Subtarget
->isCallingConvWin64(CC
);
3222 // Call / invoke instructions with NoCfCheck attribute require special
3224 if (CB
&& CB
->doesNoCfCheck())
3227 // Functions with no_caller_saved_registers that need special handling.
3228 if ((CB
&& isa
<CallInst
>(CB
) && CB
->hasFnAttr("no_caller_saved_registers")))
3231 // Functions with no_callee_saved_registers that need special handling.
3232 if ((CB
&& CB
->hasFnAttr("no_callee_saved_registers")))
3235 // Indirect calls with CFI checks need special handling.
3236 if (CB
&& CB
->isIndirectCall() && CB
->getOperandBundle(LLVMContext::OB_kcfi
))
3239 // Functions using thunks for indirect calls need to use SDISel.
3240 if (Subtarget
->useIndirectThunkCalls())
3243 // Handle only C and fastcc calling conventions for now.
3245 default: return false;
3246 case CallingConv::C
:
3247 case CallingConv::Fast
:
3248 case CallingConv::Tail
:
3249 case CallingConv::Swift
:
3250 case CallingConv::SwiftTail
:
3251 case CallingConv::X86_FastCall
:
3252 case CallingConv::X86_StdCall
:
3253 case CallingConv::X86_ThisCall
:
3254 case CallingConv::Win64
:
3255 case CallingConv::X86_64_SysV
:
3256 case CallingConv::CFGuard_Check
:
3260 // Allow SelectionDAG isel to handle tail calls.
3264 // fastcc with -tailcallopt is intended to provide a guaranteed
3265 // tail call optimization. Fastisel doesn't know how to do that.
3266 if ((CC
== CallingConv::Fast
&& TM
.Options
.GuaranteedTailCallOpt
) ||
3267 CC
== CallingConv::Tail
|| CC
== CallingConv::SwiftTail
)
3270 // Don't know how to handle Win64 varargs yet. Nothing special needed for
3271 // x86-32. Special handling for x86-64 is implemented.
3272 if (IsVarArg
&& IsWin64
)
3275 // Don't know about inalloca yet.
3276 if (CLI
.CB
&& CLI
.CB
->hasInAllocaArgument())
3279 for (auto Flag
: CLI
.OutFlags
)
3280 if (Flag
.isSwiftError() || Flag
.isPreallocated())
3283 SmallVector
<MVT
, 16> OutVTs
;
3284 SmallVector
<unsigned, 16> ArgRegs
;
3286 // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3287 // instruction. This is safe because it is common to all FastISel supported
3288 // calling conventions on x86.
3289 for (int i
= 0, e
= OutVals
.size(); i
!= e
; ++i
) {
3290 Value
*&Val
= OutVals
[i
];
3291 ISD::ArgFlagsTy Flags
= OutFlags
[i
];
3292 if (auto *CI
= dyn_cast
<ConstantInt
>(Val
)) {
3293 if (CI
->getBitWidth() < 32) {
3295 Val
= ConstantInt::get(CI
->getContext(), CI
->getValue().sext(32));
3297 Val
= ConstantInt::get(CI
->getContext(), CI
->getValue().zext(32));
3301 // Passing bools around ends up doing a trunc to i1 and passing it.
3302 // Codegen this as an argument + "and 1".
3304 auto *TI
= dyn_cast
<TruncInst
>(Val
);
3306 if (TI
&& TI
->getType()->isIntegerTy(1) && CLI
.CB
&&
3307 (TI
->getParent() == CLI
.CB
->getParent()) && TI
->hasOneUse()) {
3308 Value
*PrevVal
= TI
->getOperand(0);
3309 ResultReg
= getRegForValue(PrevVal
);
3314 if (!isTypeLegal(PrevVal
->getType(), VT
))
3317 ResultReg
= fastEmit_ri(VT
, VT
, ISD::AND
, ResultReg
, 1);
3319 if (!isTypeLegal(Val
->getType(), VT
) ||
3320 (VT
.isVector() && VT
.getVectorElementType() == MVT::i1
))
3322 ResultReg
= getRegForValue(Val
);
3328 ArgRegs
.push_back(ResultReg
);
3329 OutVTs
.push_back(VT
);
3332 // Analyze operands of the call, assigning locations to each operand.
3333 SmallVector
<CCValAssign
, 16> ArgLocs
;
3334 CCState
CCInfo(CC
, IsVarArg
, *FuncInfo
.MF
, ArgLocs
, CLI
.RetTy
->getContext());
3336 // Allocate shadow area for Win64
3338 CCInfo
.AllocateStack(32, Align(8));
3340 CCInfo
.AnalyzeCallOperands(OutVTs
, OutFlags
, CC_X86
);
3342 // Get a count of how many bytes are to be pushed on the stack.
3343 unsigned NumBytes
= CCInfo
.getAlignedCallFrameSize();
3345 // Issue CALLSEQ_START
3346 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
3347 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(AdjStackDown
))
3348 .addImm(NumBytes
).addImm(0).addImm(0);
3350 // Walk the register/memloc assignments, inserting copies/loads.
3351 const X86RegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
3352 for (const CCValAssign
&VA
: ArgLocs
) {
3353 const Value
*ArgVal
= OutVals
[VA
.getValNo()];
3354 MVT ArgVT
= OutVTs
[VA
.getValNo()];
3356 if (ArgVT
== MVT::x86mmx
)
3359 unsigned ArgReg
= ArgRegs
[VA
.getValNo()];
3361 // Promote the value if needed.
3362 switch (VA
.getLocInfo()) {
3363 case CCValAssign::Full
: break;
3364 case CCValAssign::SExt
: {
3365 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3366 "Unexpected extend");
3368 if (ArgVT
== MVT::i1
)
3371 bool Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(), ArgReg
,
3373 assert(Emitted
&& "Failed to emit a sext!"); (void)Emitted
;
3374 ArgVT
= VA
.getLocVT();
3377 case CCValAssign::ZExt
: {
3378 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3379 "Unexpected extend");
3381 // Handle zero-extension from i1 to i8, which is common.
3382 if (ArgVT
== MVT::i1
) {
3383 // Set the high bits to zero.
3384 ArgReg
= fastEmitZExtFromI1(MVT::i8
, ArgReg
);
3391 bool Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(), ArgReg
,
3393 assert(Emitted
&& "Failed to emit a zext!"); (void)Emitted
;
3394 ArgVT
= VA
.getLocVT();
3397 case CCValAssign::AExt
: {
3398 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3399 "Unexpected extend");
3400 bool Emitted
= X86FastEmitExtend(ISD::ANY_EXTEND
, VA
.getLocVT(), ArgReg
,
3403 Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(), ArgReg
,
3406 Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(), ArgReg
,
3409 assert(Emitted
&& "Failed to emit a aext!"); (void)Emitted
;
3410 ArgVT
= VA
.getLocVT();
3413 case CCValAssign::BCvt
: {
3414 ArgReg
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, ArgReg
);
3415 assert(ArgReg
&& "Failed to emit a bitcast!");
3416 ArgVT
= VA
.getLocVT();
3419 case CCValAssign::VExt
:
3420 // VExt has not been implemented, so this should be impossible to reach
3421 // for now. However, fallback to Selection DAG isel once implemented.
3423 case CCValAssign::AExtUpper
:
3424 case CCValAssign::SExtUpper
:
3425 case CCValAssign::ZExtUpper
:
3426 case CCValAssign::FPExt
:
3427 case CCValAssign::Trunc
:
3428 llvm_unreachable("Unexpected loc info!");
3429 case CCValAssign::Indirect
:
3430 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3435 if (VA
.isRegLoc()) {
3436 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3437 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(ArgReg
);
3438 OutRegs
.push_back(VA
.getLocReg());
3440 assert(VA
.isMemLoc() && "Unknown value location!");
3442 // Don't emit stores for undef values.
3443 if (isa
<UndefValue
>(ArgVal
))
3446 unsigned LocMemOffset
= VA
.getLocMemOffset();
3448 AM
.Base
.Reg
= RegInfo
->getStackRegister();
3449 AM
.Disp
= LocMemOffset
;
3450 ISD::ArgFlagsTy Flags
= OutFlags
[VA
.getValNo()];
3451 Align Alignment
= DL
.getABITypeAlign(ArgVal
->getType());
3452 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
3453 MachinePointerInfo::getStack(*FuncInfo
.MF
, LocMemOffset
),
3454 MachineMemOperand::MOStore
, ArgVT
.getStoreSize(), Alignment
);
3455 if (Flags
.isByVal()) {
3456 X86AddressMode SrcAM
;
3457 SrcAM
.Base
.Reg
= ArgReg
;
3458 if (!TryEmitSmallMemcpy(AM
, SrcAM
, Flags
.getByValSize()))
3460 } else if (isa
<ConstantInt
>(ArgVal
) || isa
<ConstantPointerNull
>(ArgVal
)) {
3461 // If this is a really simple value, emit this with the Value* version
3462 // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3463 // as it can cause us to reevaluate the argument.
3464 if (!X86FastEmitStore(ArgVT
, ArgVal
, AM
, MMO
))
3467 if (!X86FastEmitStore(ArgVT
, ArgReg
, AM
, MMO
))
3473 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3475 if (Subtarget
->isPICStyleGOT()) {
3476 unsigned Base
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3477 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3478 TII
.get(TargetOpcode::COPY
), X86::EBX
).addReg(Base
);
3481 if (Is64Bit
&& IsVarArg
&& !IsWin64
) {
3482 // From AMD64 ABI document:
3483 // For calls that may call functions that use varargs or stdargs
3484 // (prototype-less calls or calls to functions containing ellipsis (...) in
3485 // the declaration) %al is used as hidden argument to specify the number
3486 // of SSE registers used. The contents of %al do not need to match exactly
3487 // the number of registers, but must be an ubound on the number of SSE
3488 // registers used and is in the range 0 - 8 inclusive.
3490 // Count the number of XMM registers allocated.
3491 static const MCPhysReg XMMArgRegs
[] = {
3492 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
3493 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
3495 unsigned NumXMMRegs
= CCInfo
.getFirstUnallocated(XMMArgRegs
);
3496 assert((Subtarget
->hasSSE1() || !NumXMMRegs
)
3497 && "SSE registers cannot be used when SSE is disabled");
3498 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV8ri
),
3499 X86::AL
).addImm(NumXMMRegs
);
3502 // Materialize callee address in a register. FIXME: GV address can be
3503 // handled with a CALLpcrel32 instead.
3504 X86AddressMode CalleeAM
;
3505 if (!X86SelectCallAddress(Callee
, CalleeAM
))
3508 unsigned CalleeOp
= 0;
3509 const GlobalValue
*GV
= nullptr;
3510 if (CalleeAM
.GV
!= nullptr) {
3512 } else if (CalleeAM
.Base
.Reg
!= 0) {
3513 CalleeOp
= CalleeAM
.Base
.Reg
;
3518 MachineInstrBuilder MIB
;
3520 // Register-indirect call.
3521 unsigned CallOpc
= Is64Bit
? X86::CALL64r
: X86::CALL32r
;
3522 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(CallOpc
))
3526 assert(GV
&& "Not a direct call");
3527 // See if we need any target-specific flags on the GV operand.
3528 unsigned char OpFlags
= Subtarget
->classifyGlobalFunctionReference(GV
);
3529 if (OpFlags
== X86II::MO_PLT
&& !Is64Bit
&&
3530 TM
.getRelocationModel() == Reloc::Static
&& isa
<Function
>(GV
) &&
3531 cast
<Function
>(GV
)->isIntrinsic())
3532 OpFlags
= X86II::MO_NO_FLAG
;
3534 // This will be a direct call, or an indirect call through memory for
3535 // NonLazyBind calls or dllimport calls.
3536 bool NeedLoad
= OpFlags
== X86II::MO_DLLIMPORT
||
3537 OpFlags
== X86II::MO_GOTPCREL
||
3538 OpFlags
== X86II::MO_GOTPCREL_NORELAX
||
3539 OpFlags
== X86II::MO_COFFSTUB
;
3540 unsigned CallOpc
= NeedLoad
3541 ? (Is64Bit
? X86::CALL64m
: X86::CALL32m
)
3542 : (Is64Bit
? X86::CALL64pcrel32
: X86::CALLpcrel32
);
3544 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(CallOpc
));
3546 MIB
.addReg(Is64Bit
? X86::RIP
: X86::NoRegister
).addImm(1).addReg(0);
3548 MIB
.addSym(Symbol
, OpFlags
);
3550 MIB
.addGlobalAddress(GV
, 0, OpFlags
);
3555 // Add a register mask operand representing the call-preserved registers.
3556 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3557 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
3559 // Add an implicit use GOT pointer in EBX.
3560 if (Subtarget
->isPICStyleGOT())
3561 MIB
.addReg(X86::EBX
, RegState::Implicit
);
3563 if (Is64Bit
&& IsVarArg
&& !IsWin64
)
3564 MIB
.addReg(X86::AL
, RegState::Implicit
);
3566 // Add implicit physical register uses to the call.
3567 for (auto Reg
: OutRegs
)
3568 MIB
.addReg(Reg
, RegState::Implicit
);
3570 // Issue CALLSEQ_END
3571 unsigned NumBytesForCalleeToPop
=
3572 X86::isCalleePop(CC
, Subtarget
->is64Bit(), IsVarArg
,
3573 TM
.Options
.GuaranteedTailCallOpt
)
3574 ? NumBytes
// Callee pops everything.
3575 : computeBytesPoppedByCalleeForSRet(Subtarget
, CC
, CLI
.CB
);
3576 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
3577 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(AdjStackUp
))
3578 .addImm(NumBytes
).addImm(NumBytesForCalleeToPop
);
3580 // Now handle call return values.
3581 SmallVector
<CCValAssign
, 16> RVLocs
;
3582 CCState
CCRetInfo(CC
, IsVarArg
, *FuncInfo
.MF
, RVLocs
,
3583 CLI
.RetTy
->getContext());
3584 CCRetInfo
.AnalyzeCallResult(Ins
, RetCC_X86
);
3586 // Copy all of the result registers out of their specified physreg.
3587 Register ResultReg
= FuncInfo
.CreateRegs(CLI
.RetTy
);
3588 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
3589 CCValAssign
&VA
= RVLocs
[i
];
3590 EVT CopyVT
= VA
.getValVT();
3591 unsigned CopyReg
= ResultReg
+ i
;
3592 Register SrcReg
= VA
.getLocReg();
3594 // If this is x86-64, and we disabled SSE, we can't return FP values
3595 if ((CopyVT
== MVT::f32
|| CopyVT
== MVT::f64
) &&
3596 ((Is64Bit
|| Ins
[i
].Flags
.isInReg()) && !Subtarget
->hasSSE1())) {
3597 report_fatal_error("SSE register return with SSE disabled");
3600 // If we prefer to use the value in xmm registers, copy it out as f80 and
3601 // use a truncate to move it from fp stack reg to xmm reg.
3602 if ((SrcReg
== X86::FP0
|| SrcReg
== X86::FP1
) &&
3603 isScalarFPTypeInSSEReg(VA
.getValVT())) {
3605 CopyReg
= createResultReg(&X86::RFP80RegClass
);
3608 // Copy out the result.
3609 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3610 TII
.get(TargetOpcode::COPY
), CopyReg
).addReg(SrcReg
);
3611 InRegs
.push_back(VA
.getLocReg());
3613 // Round the f80 to the right size, which also moves it to the appropriate
3614 // xmm register. This is accomplished by storing the f80 value in memory
3615 // and then loading it back.
3616 if (CopyVT
!= VA
.getValVT()) {
3617 EVT ResVT
= VA
.getValVT();
3618 unsigned Opc
= ResVT
== MVT::f32
? X86::ST_Fp80m32
: X86::ST_Fp80m64
;
3619 unsigned MemSize
= ResVT
.getSizeInBits()/8;
3620 int FI
= MFI
.CreateStackObject(MemSize
, Align(MemSize
), false);
3621 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3624 Opc
= ResVT
== MVT::f32
? X86::MOVSSrm_alt
: X86::MOVSDrm_alt
;
3625 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3626 TII
.get(Opc
), ResultReg
+ i
), FI
);
3630 CLI
.ResultReg
= ResultReg
;
3631 CLI
.NumResultRegs
= RVLocs
.size();
3638 X86FastISel::fastSelectInstruction(const Instruction
*I
) {
3639 switch (I
->getOpcode()) {
3641 case Instruction::Load
:
3642 return X86SelectLoad(I
);
3643 case Instruction::Store
:
3644 return X86SelectStore(I
);
3645 case Instruction::Ret
:
3646 return X86SelectRet(I
);
3647 case Instruction::ICmp
:
3648 case Instruction::FCmp
:
3649 return X86SelectCmp(I
);
3650 case Instruction::ZExt
:
3651 return X86SelectZExt(I
);
3652 case Instruction::SExt
:
3653 return X86SelectSExt(I
);
3654 case Instruction::Br
:
3655 return X86SelectBranch(I
);
3656 case Instruction::LShr
:
3657 case Instruction::AShr
:
3658 case Instruction::Shl
:
3659 return X86SelectShift(I
);
3660 case Instruction::SDiv
:
3661 case Instruction::UDiv
:
3662 case Instruction::SRem
:
3663 case Instruction::URem
:
3664 return X86SelectDivRem(I
);
3665 case Instruction::Select
:
3666 return X86SelectSelect(I
);
3667 case Instruction::Trunc
:
3668 return X86SelectTrunc(I
);
3669 case Instruction::FPExt
:
3670 return X86SelectFPExt(I
);
3671 case Instruction::FPTrunc
:
3672 return X86SelectFPTrunc(I
);
3673 case Instruction::SIToFP
:
3674 return X86SelectSIToFP(I
);
3675 case Instruction::UIToFP
:
3676 return X86SelectUIToFP(I
);
3677 case Instruction::IntToPtr
: // Deliberate fall-through.
3678 case Instruction::PtrToInt
: {
3679 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
3680 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
3681 if (DstVT
.bitsGT(SrcVT
))
3682 return X86SelectZExt(I
);
3683 if (DstVT
.bitsLT(SrcVT
))
3684 return X86SelectTrunc(I
);
3685 Register Reg
= getRegForValue(I
->getOperand(0));
3686 if (Reg
== 0) return false;
3687 updateValueMap(I
, Reg
);
3690 case Instruction::BitCast
: {
3691 // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3692 if (!Subtarget
->hasSSE2())
3696 if (!isTypeLegal(I
->getOperand(0)->getType(), SrcVT
) ||
3697 !isTypeLegal(I
->getType(), DstVT
))
3700 // Only allow vectors that use xmm/ymm/zmm.
3701 if (!SrcVT
.isVector() || !DstVT
.isVector() ||
3702 SrcVT
.getVectorElementType() == MVT::i1
||
3703 DstVT
.getVectorElementType() == MVT::i1
)
3706 Register Reg
= getRegForValue(I
->getOperand(0));
3710 // Emit a reg-reg copy so we don't propagate cached known bits information
3711 // with the wrong VT if we fall out of fast isel after selecting this.
3712 const TargetRegisterClass
*DstClass
= TLI
.getRegClassFor(DstVT
);
3713 Register ResultReg
= createResultReg(DstClass
);
3714 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3715 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(Reg
);
3717 updateValueMap(I
, ResultReg
);
3725 unsigned X86FastISel::X86MaterializeInt(const ConstantInt
*CI
, MVT VT
) {
3729 uint64_t Imm
= CI
->getZExtValue();
3731 Register SrcReg
= fastEmitInst_(X86::MOV32r0
, &X86::GR32RegClass
);
3732 switch (VT
.SimpleTy
) {
3733 default: llvm_unreachable("Unexpected value type");
3736 return fastEmitInst_extractsubreg(MVT::i8
, SrcReg
, X86::sub_8bit
);
3738 return fastEmitInst_extractsubreg(MVT::i16
, SrcReg
, X86::sub_16bit
);
3742 Register ResultReg
= createResultReg(&X86::GR64RegClass
);
3743 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3744 TII
.get(TargetOpcode::SUBREG_TO_REG
), ResultReg
)
3745 .addImm(0).addReg(SrcReg
).addImm(X86::sub_32bit
);
3752 switch (VT
.SimpleTy
) {
3753 default: llvm_unreachable("Unexpected value type");
3757 case MVT::i8
: Opc
= X86::MOV8ri
; break;
3758 case MVT::i16
: Opc
= X86::MOV16ri
; break;
3759 case MVT::i32
: Opc
= X86::MOV32ri
; break;
3761 if (isUInt
<32>(Imm
))
3762 Opc
= X86::MOV32ri64
;
3763 else if (isInt
<32>(Imm
))
3764 Opc
= X86::MOV64ri32
;
3770 return fastEmitInst_i(Opc
, TLI
.getRegClassFor(VT
), Imm
);
3773 unsigned X86FastISel::X86MaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
3774 if (CFP
->isNullValue())
3775 return fastMaterializeFloatZero(CFP
);
3777 // Can't handle alternate code models yet.
3778 CodeModel::Model CM
= TM
.getCodeModel();
3779 if (CM
!= CodeModel::Small
&& CM
!= CodeModel::Medium
&&
3780 CM
!= CodeModel::Large
)
3783 // Get opcode and regclass of the output for the given load instruction.
3785 bool HasSSE1
= Subtarget
->hasSSE1();
3786 bool HasSSE2
= Subtarget
->hasSSE2();
3787 bool HasAVX
= Subtarget
->hasAVX();
3788 bool HasAVX512
= Subtarget
->hasAVX512();
3789 switch (VT
.SimpleTy
) {
3792 Opc
= HasAVX512
? X86::VMOVSSZrm_alt
3793 : HasAVX
? X86::VMOVSSrm_alt
3794 : HasSSE1
? X86::MOVSSrm_alt
3798 Opc
= HasAVX512
? X86::VMOVSDZrm_alt
3799 : HasAVX
? X86::VMOVSDrm_alt
3800 : HasSSE2
? X86::MOVSDrm_alt
3804 // No f80 support yet.
3808 // MachineConstantPool wants an explicit alignment.
3809 Align Alignment
= DL
.getPrefTypeAlign(CFP
->getType());
3811 // x86-32 PIC requires a PIC base register for constant pools.
3812 unsigned PICBase
= 0;
3813 unsigned char OpFlag
= Subtarget
->classifyLocalReference(nullptr);
3814 if (OpFlag
== X86II::MO_PIC_BASE_OFFSET
)
3815 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3816 else if (OpFlag
== X86II::MO_GOTOFF
)
3817 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3818 else if (Subtarget
->is64Bit() && TM
.getCodeModel() != CodeModel::Large
)
3821 // Create the load from the constant pool.
3822 unsigned CPI
= MCP
.getConstantPoolIndex(CFP
, Alignment
);
3823 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
.SimpleTy
));
3825 // Large code model only applies to 64-bit mode.
3826 if (Subtarget
->is64Bit() && CM
== CodeModel::Large
) {
3827 Register AddrReg
= createResultReg(&X86::GR64RegClass
);
3828 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV64ri
),
3830 .addConstantPoolIndex(CPI
, 0, OpFlag
);
3831 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3832 TII
.get(Opc
), ResultReg
);
3833 addRegReg(MIB
, AddrReg
, false, PICBase
, false);
3834 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
3835 MachinePointerInfo::getConstantPool(*FuncInfo
.MF
),
3836 MachineMemOperand::MOLoad
, DL
.getPointerSize(), Alignment
);
3837 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
3841 addConstantPoolReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3842 TII
.get(Opc
), ResultReg
),
3843 CPI
, PICBase
, OpFlag
);
3847 unsigned X86FastISel::X86MaterializeGV(const GlobalValue
*GV
, MVT VT
) {
3848 // Can't handle large GlobalValues yet.
3849 if (TM
.getCodeModel() != CodeModel::Small
&&
3850 TM
.getCodeModel() != CodeModel::Medium
)
3852 if (TM
.isLargeGlobalValue(GV
))
3855 // Materialize addresses with LEA/MOV instructions.
3857 if (X86SelectAddress(GV
, AM
)) {
3858 // If the expression is just a basereg, then we're done, otherwise we need
3860 if (AM
.BaseType
== X86AddressMode::RegBase
&&
3861 AM
.IndexReg
== 0 && AM
.Disp
== 0 && AM
.GV
== nullptr)
3864 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3865 if (TM
.getRelocationModel() == Reloc::Static
&&
3866 TLI
.getPointerTy(DL
) == MVT::i64
) {
3867 // The displacement code could be more than 32 bits away so we need to use
3868 // an instruction with a 64 bit immediate
3869 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(X86::MOV64ri
),
3871 .addGlobalAddress(GV
);
3874 TLI
.getPointerTy(DL
) == MVT::i32
3875 ? (Subtarget
->isTarget64BitILP32() ? X86::LEA64_32r
: X86::LEA32r
)
3877 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3878 TII
.get(Opc
), ResultReg
), AM
);
3885 unsigned X86FastISel::fastMaterializeConstant(const Constant
*C
) {
3886 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
3888 // Only handle simple types.
3889 if (!CEVT
.isSimple())
3891 MVT VT
= CEVT
.getSimpleVT();
3893 if (const auto *CI
= dyn_cast
<ConstantInt
>(C
))
3894 return X86MaterializeInt(CI
, VT
);
3895 if (const auto *CFP
= dyn_cast
<ConstantFP
>(C
))
3896 return X86MaterializeFP(CFP
, VT
);
3897 if (const auto *GV
= dyn_cast
<GlobalValue
>(C
))
3898 return X86MaterializeGV(GV
, VT
);
3899 if (isa
<UndefValue
>(C
)) {
3901 switch (VT
.SimpleTy
) {
3905 if (!Subtarget
->hasSSE1())
3906 Opc
= X86::LD_Fp032
;
3909 if (!Subtarget
->hasSSE2())
3910 Opc
= X86::LD_Fp064
;
3913 Opc
= X86::LD_Fp080
;
3918 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3919 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
),
3928 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst
*C
) {
3929 // Fail on dynamic allocas. At this point, getRegForValue has already
3930 // checked its CSE maps, so if we're here trying to handle a dynamic
3931 // alloca, we're not going to succeed. X86SelectAddress has a
3932 // check for dynamic allocas, because it's called directly from
3933 // various places, but targetMaterializeAlloca also needs a check
3934 // in order to avoid recursion between getRegForValue,
3935 // X86SelectAddrss, and targetMaterializeAlloca.
3936 if (!FuncInfo
.StaticAllocaMap
.count(C
))
3938 assert(C
->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3941 if (!X86SelectAddress(C
, AM
))
3944 TLI
.getPointerTy(DL
) == MVT::i32
3945 ? (Subtarget
->isTarget64BitILP32() ? X86::LEA64_32r
: X86::LEA32r
)
3947 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(TLI
.getPointerTy(DL
));
3948 Register ResultReg
= createResultReg(RC
);
3949 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3950 TII
.get(Opc
), ResultReg
), AM
);
3954 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP
*CF
) {
3956 if (!isTypeLegal(CF
->getType(), VT
))
3959 // Get opcode and regclass for the given zero.
3960 bool HasSSE1
= Subtarget
->hasSSE1();
3961 bool HasSSE2
= Subtarget
->hasSSE2();
3962 bool HasAVX512
= Subtarget
->hasAVX512();
3964 switch (VT
.SimpleTy
) {
3967 Opc
= HasAVX512
? X86::AVX512_FsFLD0SH
: X86::FsFLD0SH
;
3970 Opc
= HasAVX512
? X86::AVX512_FsFLD0SS
3971 : HasSSE1
? X86::FsFLD0SS
3975 Opc
= HasAVX512
? X86::AVX512_FsFLD0SD
3976 : HasSSE2
? X86::FsFLD0SD
3980 // No f80 support yet.
3984 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3985 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), ResultReg
);
3990 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
3991 const LoadInst
*LI
) {
3992 const Value
*Ptr
= LI
->getPointerOperand();
3994 if (!X86SelectAddress(Ptr
, AM
))
3997 const X86InstrInfo
&XII
= (const X86InstrInfo
&)TII
;
3999 unsigned Size
= DL
.getTypeAllocSize(LI
->getType());
4001 SmallVector
<MachineOperand
, 8> AddrOps
;
4002 AM
.getFullAddress(AddrOps
);
4004 MachineInstr
*Result
= XII
.foldMemoryOperandImpl(
4005 *FuncInfo
.MF
, *MI
, OpNo
, AddrOps
, FuncInfo
.InsertPt
, Size
, LI
->getAlign(),
4006 /*AllowCommute=*/true);
4010 // The index register could be in the wrong register class. Unfortunately,
4011 // foldMemoryOperandImpl could have commuted the instruction so its not enough
4012 // to just look at OpNo + the offset to the index reg. We actually need to
4013 // scan the instruction to find the index reg and see if its the correct reg
4015 unsigned OperandNo
= 0;
4016 for (MachineInstr::mop_iterator I
= Result
->operands_begin(),
4017 E
= Result
->operands_end(); I
!= E
; ++I
, ++OperandNo
) {
4018 MachineOperand
&MO
= *I
;
4019 if (!MO
.isReg() || MO
.isDef() || MO
.getReg() != AM
.IndexReg
)
4021 // Found the index reg, now try to rewrite it.
4022 Register IndexReg
= constrainOperandRegClass(Result
->getDesc(),
4023 MO
.getReg(), OperandNo
);
4024 if (IndexReg
== MO
.getReg())
4026 MO
.setReg(IndexReg
);
4029 Result
->addMemOperand(*FuncInfo
.MF
, createMachineMemOperandFor(LI
));
4030 Result
->cloneInstrSymbols(*FuncInfo
.MF
, *MI
);
4031 MachineBasicBlock::iterator
I(MI
);
4032 removeDeadCode(I
, std::next(I
));
4036 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode
,
4037 const TargetRegisterClass
*RC
,
4038 unsigned Op0
, unsigned Op1
,
4039 unsigned Op2
, unsigned Op3
) {
4040 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
4042 Register ResultReg
= createResultReg(RC
);
4043 Op0
= constrainOperandRegClass(II
, Op0
, II
.getNumDefs());
4044 Op1
= constrainOperandRegClass(II
, Op1
, II
.getNumDefs() + 1);
4045 Op2
= constrainOperandRegClass(II
, Op2
, II
.getNumDefs() + 2);
4046 Op3
= constrainOperandRegClass(II
, Op3
, II
.getNumDefs() + 3);
4048 if (II
.getNumDefs() >= 1)
4049 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
, ResultReg
)
4055 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
4060 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TargetOpcode::COPY
),
4062 .addReg(II
.implicit_defs()[0]);
4069 FastISel
*X86::createFastISel(FunctionLoweringInfo
&funcInfo
,
4070 const TargetLibraryInfo
*libInfo
) {
4071 return new X86FastISel(funcInfo
, libInfo
);