1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
24 #include "llvm/Analysis/BranchProbabilityInfo.h"
25 #include "llvm/CodeGen/FastISel.h"
26 #include "llvm/CodeGen/FunctionLoweringInfo.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GetElementPtrTypeIterator.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Target/TargetOptions.h"
48 class X86FastISel final
: public FastISel
{
49 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50 /// make the right decision when generating code for different targets.
51 const X86Subtarget
*Subtarget
;
53 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54 /// floating point ops.
55 /// When SSE is available, use it for f32 operations.
56 /// When SSE2 is available, use it for f64 operations.
61 explicit X86FastISel(FunctionLoweringInfo
&funcInfo
,
62 const TargetLibraryInfo
*libInfo
)
63 : FastISel(funcInfo
, libInfo
) {
64 Subtarget
= &funcInfo
.MF
->getSubtarget
<X86Subtarget
>();
65 X86ScalarSSEf64
= Subtarget
->hasSSE2();
66 X86ScalarSSEf32
= Subtarget
->hasSSE1();
69 bool fastSelectInstruction(const Instruction
*I
) override
;
71 /// The specified machine instr operand is a vreg, and that
72 /// vreg is being provided by the specified load instruction. If possible,
73 /// try to fold the load as an operand to the instruction, returning true if
75 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
76 const LoadInst
*LI
) override
;
78 bool fastLowerArguments() override
;
79 bool fastLowerCall(CallLoweringInfo
&CLI
) override
;
80 bool fastLowerIntrinsicCall(const IntrinsicInst
*II
) override
;
82 #include "X86GenFastISel.inc"
85 bool X86FastEmitCompare(const Value
*LHS
, const Value
*RHS
, EVT VT
,
88 bool X86FastEmitLoad(EVT VT
, X86AddressMode
&AM
, MachineMemOperand
*MMO
,
89 unsigned &ResultReg
, unsigned Alignment
= 1);
91 bool X86FastEmitStore(EVT VT
, const Value
*Val
, X86AddressMode
&AM
,
92 MachineMemOperand
*MMO
= nullptr, bool Aligned
= false);
93 bool X86FastEmitStore(EVT VT
, unsigned ValReg
, bool ValIsKill
,
95 MachineMemOperand
*MMO
= nullptr, bool Aligned
= false);
97 bool X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
, unsigned Src
, EVT SrcVT
,
100 bool X86SelectAddress(const Value
*V
, X86AddressMode
&AM
);
101 bool X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
);
103 bool X86SelectLoad(const Instruction
*I
);
105 bool X86SelectStore(const Instruction
*I
);
107 bool X86SelectRet(const Instruction
*I
);
109 bool X86SelectCmp(const Instruction
*I
);
111 bool X86SelectZExt(const Instruction
*I
);
113 bool X86SelectSExt(const Instruction
*I
);
115 bool X86SelectBranch(const Instruction
*I
);
117 bool X86SelectShift(const Instruction
*I
);
119 bool X86SelectDivRem(const Instruction
*I
);
121 bool X86FastEmitCMoveSelect(MVT RetVT
, const Instruction
*I
);
123 bool X86FastEmitSSESelect(MVT RetVT
, const Instruction
*I
);
125 bool X86FastEmitPseudoSelect(MVT RetVT
, const Instruction
*I
);
127 bool X86SelectSelect(const Instruction
*I
);
129 bool X86SelectTrunc(const Instruction
*I
);
131 bool X86SelectFPExtOrFPTrunc(const Instruction
*I
, unsigned Opc
,
132 const TargetRegisterClass
*RC
);
134 bool X86SelectFPExt(const Instruction
*I
);
135 bool X86SelectFPTrunc(const Instruction
*I
);
136 bool X86SelectSIToFP(const Instruction
*I
);
137 bool X86SelectUIToFP(const Instruction
*I
);
138 bool X86SelectIntToFP(const Instruction
*I
, bool IsSigned
);
140 const X86InstrInfo
*getInstrInfo() const {
141 return Subtarget
->getInstrInfo();
143 const X86TargetMachine
*getTargetMachine() const {
144 return static_cast<const X86TargetMachine
*>(&TM
);
147 bool handleConstantAddresses(const Value
*V
, X86AddressMode
&AM
);
149 unsigned X86MaterializeInt(const ConstantInt
*CI
, MVT VT
);
150 unsigned X86MaterializeFP(const ConstantFP
*CFP
, MVT VT
);
151 unsigned X86MaterializeGV(const GlobalValue
*GV
, MVT VT
);
152 unsigned fastMaterializeConstant(const Constant
*C
) override
;
154 unsigned fastMaterializeAlloca(const AllocaInst
*C
) override
;
156 unsigned fastMaterializeFloatZero(const ConstantFP
*CF
) override
;
158 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
159 /// computed in an SSE register, not on the X87 floating point stack.
160 bool isScalarFPTypeInSSEReg(EVT VT
) const {
161 return (VT
== MVT::f64
&& X86ScalarSSEf64
) || // f64 is when SSE2
162 (VT
== MVT::f32
&& X86ScalarSSEf32
); // f32 is when SSE1
165 bool isTypeLegal(Type
*Ty
, MVT
&VT
, bool AllowI1
= false);
167 bool IsMemcpySmall(uint64_t Len
);
169 bool TryEmitSmallMemcpy(X86AddressMode DestAM
,
170 X86AddressMode SrcAM
, uint64_t Len
);
172 bool foldX86XALUIntrinsic(X86::CondCode
&CC
, const Instruction
*I
,
175 const MachineInstrBuilder
&addFullAddress(const MachineInstrBuilder
&MIB
,
178 unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode
,
179 const TargetRegisterClass
*RC
, unsigned Op0
,
180 bool Op0IsKill
, unsigned Op1
, bool Op1IsKill
,
181 unsigned Op2
, bool Op2IsKill
, unsigned Op3
,
185 } // end anonymous namespace.
187 static std::pair
<unsigned, bool>
188 getX86SSEConditionCode(CmpInst::Predicate Predicate
) {
190 bool NeedSwap
= false;
192 // SSE Condition code mapping:
202 default: llvm_unreachable("Unexpected predicate");
203 case CmpInst::FCMP_OEQ
: CC
= 0; break;
204 case CmpInst::FCMP_OGT
: NeedSwap
= true; LLVM_FALLTHROUGH
;
205 case CmpInst::FCMP_OLT
: CC
= 1; break;
206 case CmpInst::FCMP_OGE
: NeedSwap
= true; LLVM_FALLTHROUGH
;
207 case CmpInst::FCMP_OLE
: CC
= 2; break;
208 case CmpInst::FCMP_UNO
: CC
= 3; break;
209 case CmpInst::FCMP_UNE
: CC
= 4; break;
210 case CmpInst::FCMP_ULE
: NeedSwap
= true; LLVM_FALLTHROUGH
;
211 case CmpInst::FCMP_UGE
: CC
= 5; break;
212 case CmpInst::FCMP_ULT
: NeedSwap
= true; LLVM_FALLTHROUGH
;
213 case CmpInst::FCMP_UGT
: CC
= 6; break;
214 case CmpInst::FCMP_ORD
: CC
= 7; break;
215 case CmpInst::FCMP_UEQ
: CC
= 8; break;
216 case CmpInst::FCMP_ONE
: CC
= 12; break;
219 return std::make_pair(CC
, NeedSwap
);
222 /// Adds a complex addressing mode to the given machine instr builder.
223 /// Note, this will constrain the index register. If its not possible to
224 /// constrain the given index register, then a new one will be created. The
225 /// IndexReg field of the addressing mode will be updated to match in this case.
226 const MachineInstrBuilder
&
227 X86FastISel::addFullAddress(const MachineInstrBuilder
&MIB
,
228 X86AddressMode
&AM
) {
229 // First constrain the index register. It needs to be a GR64_NOSP.
230 AM
.IndexReg
= constrainOperandRegClass(MIB
->getDesc(), AM
.IndexReg
,
231 MIB
->getNumOperands() +
233 return ::addFullAddress(MIB
, AM
);
236 /// Check if it is possible to fold the condition from the XALU intrinsic
237 /// into the user. The condition code will only be updated on success.
238 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode
&CC
, const Instruction
*I
,
240 if (!isa
<ExtractValueInst
>(Cond
))
243 const auto *EV
= cast
<ExtractValueInst
>(Cond
);
244 if (!isa
<IntrinsicInst
>(EV
->getAggregateOperand()))
247 const auto *II
= cast
<IntrinsicInst
>(EV
->getAggregateOperand());
249 const Function
*Callee
= II
->getCalledFunction();
251 cast
<StructType
>(Callee
->getReturnType())->getTypeAtIndex(0U);
252 if (!isTypeLegal(RetTy
, RetVT
))
255 if (RetVT
!= MVT::i32
&& RetVT
!= MVT::i64
)
259 switch (II
->getIntrinsicID()) {
260 default: return false;
261 case Intrinsic::sadd_with_overflow
:
262 case Intrinsic::ssub_with_overflow
:
263 case Intrinsic::smul_with_overflow
:
264 case Intrinsic::umul_with_overflow
: TmpCC
= X86::COND_O
; break;
265 case Intrinsic::uadd_with_overflow
:
266 case Intrinsic::usub_with_overflow
: TmpCC
= X86::COND_B
; break;
269 // Check if both instructions are in the same basic block.
270 if (II
->getParent() != I
->getParent())
273 // Make sure nothing is in the way
274 BasicBlock::const_iterator
Start(I
);
275 BasicBlock::const_iterator
End(II
);
276 for (auto Itr
= std::prev(Start
); Itr
!= End
; --Itr
) {
277 // We only expect extractvalue instructions between the intrinsic and the
278 // instruction to be selected.
279 if (!isa
<ExtractValueInst
>(Itr
))
282 // Check that the extractvalue operand comes from the intrinsic.
283 const auto *EVI
= cast
<ExtractValueInst
>(Itr
);
284 if (EVI
->getAggregateOperand() != II
)
292 bool X86FastISel::isTypeLegal(Type
*Ty
, MVT
&VT
, bool AllowI1
) {
293 EVT evt
= TLI
.getValueType(DL
, Ty
, /*HandleUnknown=*/true);
294 if (evt
== MVT::Other
|| !evt
.isSimple())
295 // Unhandled type. Halt "fast" selection and bail.
298 VT
= evt
.getSimpleVT();
299 // For now, require SSE/SSE2 for performing floating-point operations,
300 // since x87 requires additional work.
301 if (VT
== MVT::f64
&& !X86ScalarSSEf64
)
303 if (VT
== MVT::f32
&& !X86ScalarSSEf32
)
305 // Similarly, no f80 support yet.
308 // We only handle legal types. For example, on x86-32 the instruction
309 // selector contains all of the 64-bit instructions from x86-64,
310 // under the assumption that i64 won't be used if the target doesn't
312 return (AllowI1
&& VT
== MVT::i1
) || TLI
.isTypeLegal(VT
);
315 #include "X86GenCallingConv.inc"
317 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
318 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
319 /// Return true and the result register by reference if it is possible.
320 bool X86FastISel::X86FastEmitLoad(EVT VT
, X86AddressMode
&AM
,
321 MachineMemOperand
*MMO
, unsigned &ResultReg
,
322 unsigned Alignment
) {
323 bool HasSSE41
= Subtarget
->hasSSE41();
324 bool HasAVX
= Subtarget
->hasAVX();
325 bool HasAVX2
= Subtarget
->hasAVX2();
326 bool HasAVX512
= Subtarget
->hasAVX512();
327 bool HasVLX
= Subtarget
->hasVLX();
328 bool IsNonTemporal
= MMO
&& MMO
->isNonTemporal();
330 // Get opcode and regclass of the output for the given load instruction.
332 const TargetRegisterClass
*RC
= nullptr;
333 switch (VT
.getSimpleVT().SimpleTy
) {
334 default: return false;
338 RC
= &X86::GR8RegClass
;
342 RC
= &X86::GR16RegClass
;
346 RC
= &X86::GR32RegClass
;
349 // Must be in x86-64 mode.
351 RC
= &X86::GR64RegClass
;
354 if (X86ScalarSSEf32
) {
355 Opc
= HasAVX512
? X86::VMOVSSZrm
: HasAVX
? X86::VMOVSSrm
: X86::MOVSSrm
;
356 RC
= HasAVX512
? &X86::FR32XRegClass
: &X86::FR32RegClass
;
359 RC
= &X86::RFP32RegClass
;
363 if (X86ScalarSSEf64
) {
364 Opc
= HasAVX512
? X86::VMOVSDZrm
: HasAVX
? X86::VMOVSDrm
: X86::MOVSDrm
;
365 RC
= HasAVX512
? &X86::FR64XRegClass
: &X86::FR64RegClass
;
368 RC
= &X86::RFP64RegClass
;
372 // No f80 support yet.
375 if (IsNonTemporal
&& Alignment
>= 16 && HasSSE41
)
376 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
377 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
378 else if (Alignment
>= 16)
379 Opc
= HasVLX
? X86::VMOVAPSZ128rm
:
380 HasAVX
? X86::VMOVAPSrm
: X86::MOVAPSrm
;
382 Opc
= HasVLX
? X86::VMOVUPSZ128rm
:
383 HasAVX
? X86::VMOVUPSrm
: X86::MOVUPSrm
;
384 RC
= HasVLX
? &X86::VR128XRegClass
: &X86::VR128RegClass
;
387 if (IsNonTemporal
&& Alignment
>= 16 && HasSSE41
)
388 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
389 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
390 else if (Alignment
>= 16)
391 Opc
= HasVLX
? X86::VMOVAPDZ128rm
:
392 HasAVX
? X86::VMOVAPDrm
: X86::MOVAPDrm
;
394 Opc
= HasVLX
? X86::VMOVUPDZ128rm
:
395 HasAVX
? X86::VMOVUPDrm
: X86::MOVUPDrm
;
396 RC
= HasVLX
? &X86::VR128XRegClass
: &X86::VR128RegClass
;
402 if (IsNonTemporal
&& Alignment
>= 16)
403 Opc
= HasVLX
? X86::VMOVNTDQAZ128rm
:
404 HasAVX
? X86::VMOVNTDQArm
: X86::MOVNTDQArm
;
405 else if (Alignment
>= 16)
406 Opc
= HasVLX
? X86::VMOVDQA64Z128rm
:
407 HasAVX
? X86::VMOVDQArm
: X86::MOVDQArm
;
409 Opc
= HasVLX
? X86::VMOVDQU64Z128rm
:
410 HasAVX
? X86::VMOVDQUrm
: X86::MOVDQUrm
;
411 RC
= HasVLX
? &X86::VR128XRegClass
: &X86::VR128RegClass
;
415 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
416 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
417 else if (IsNonTemporal
&& Alignment
>= 16)
418 return false; // Force split for X86::VMOVNTDQArm
419 else if (Alignment
>= 32)
420 Opc
= HasVLX
? X86::VMOVAPSZ256rm
: X86::VMOVAPSYrm
;
422 Opc
= HasVLX
? X86::VMOVUPSZ256rm
: X86::VMOVUPSYrm
;
423 RC
= HasVLX
? &X86::VR256XRegClass
: &X86::VR256RegClass
;
427 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
428 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
429 else if (IsNonTemporal
&& Alignment
>= 16)
430 return false; // Force split for X86::VMOVNTDQArm
431 else if (Alignment
>= 32)
432 Opc
= HasVLX
? X86::VMOVAPDZ256rm
: X86::VMOVAPDYrm
;
434 Opc
= HasVLX
? X86::VMOVUPDZ256rm
: X86::VMOVUPDYrm
;
435 RC
= HasVLX
? &X86::VR256XRegClass
: &X86::VR256RegClass
;
442 if (IsNonTemporal
&& Alignment
>= 32 && HasAVX2
)
443 Opc
= HasVLX
? X86::VMOVNTDQAZ256rm
: X86::VMOVNTDQAYrm
;
444 else if (IsNonTemporal
&& Alignment
>= 16)
445 return false; // Force split for X86::VMOVNTDQArm
446 else if (Alignment
>= 32)
447 Opc
= HasVLX
? X86::VMOVDQA64Z256rm
: X86::VMOVDQAYrm
;
449 Opc
= HasVLX
? X86::VMOVDQU64Z256rm
: X86::VMOVDQUYrm
;
450 RC
= HasVLX
? &X86::VR256XRegClass
: &X86::VR256RegClass
;
454 if (IsNonTemporal
&& Alignment
>= 64)
455 Opc
= X86::VMOVNTDQAZrm
;
457 Opc
= (Alignment
>= 64) ? X86::VMOVAPSZrm
: X86::VMOVUPSZrm
;
458 RC
= &X86::VR512RegClass
;
462 if (IsNonTemporal
&& Alignment
>= 64)
463 Opc
= X86::VMOVNTDQAZrm
;
465 Opc
= (Alignment
>= 64) ? X86::VMOVAPDZrm
: X86::VMOVUPDZrm
;
466 RC
= &X86::VR512RegClass
;
473 // Note: There are a lot more choices based on type with AVX-512, but
474 // there's really no advantage when the load isn't masked.
475 if (IsNonTemporal
&& Alignment
>= 64)
476 Opc
= X86::VMOVNTDQAZrm
;
478 Opc
= (Alignment
>= 64) ? X86::VMOVDQA64Zrm
: X86::VMOVDQU64Zrm
;
479 RC
= &X86::VR512RegClass
;
483 ResultReg
= createResultReg(RC
);
484 MachineInstrBuilder MIB
=
485 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), ResultReg
);
486 addFullAddress(MIB
, AM
);
488 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
492 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
493 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
494 /// and a displacement offset, or a GlobalAddress,
495 /// i.e. V. Return true if it is possible.
496 bool X86FastISel::X86FastEmitStore(EVT VT
, unsigned ValReg
, bool ValIsKill
,
498 MachineMemOperand
*MMO
, bool Aligned
) {
499 bool HasSSE1
= Subtarget
->hasSSE1();
500 bool HasSSE2
= Subtarget
->hasSSE2();
501 bool HasSSE4A
= Subtarget
->hasSSE4A();
502 bool HasAVX
= Subtarget
->hasAVX();
503 bool HasAVX512
= Subtarget
->hasAVX512();
504 bool HasVLX
= Subtarget
->hasVLX();
505 bool IsNonTemporal
= MMO
&& MMO
->isNonTemporal();
507 // Get opcode and regclass of the output for the given store instruction.
509 switch (VT
.getSimpleVT().SimpleTy
) {
510 case MVT::f80
: // No f80 support yet.
511 default: return false;
513 // Mask out all but lowest bit.
514 unsigned AndResult
= createResultReg(&X86::GR8RegClass
);
515 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
516 TII
.get(X86::AND8ri
), AndResult
)
517 .addReg(ValReg
, getKillRegState(ValIsKill
)).addImm(1);
519 LLVM_FALLTHROUGH
; // handle i1 as i8.
521 case MVT::i8
: Opc
= X86::MOV8mr
; break;
522 case MVT::i16
: Opc
= X86::MOV16mr
; break;
524 Opc
= (IsNonTemporal
&& HasSSE2
) ? X86::MOVNTImr
: X86::MOV32mr
;
527 // Must be in x86-64 mode.
528 Opc
= (IsNonTemporal
&& HasSSE2
) ? X86::MOVNTI_64mr
: X86::MOV64mr
;
531 if (X86ScalarSSEf32
) {
532 if (IsNonTemporal
&& HasSSE4A
)
535 Opc
= HasAVX512
? X86::VMOVSSZmr
:
536 HasAVX
? X86::VMOVSSmr
: X86::MOVSSmr
;
541 if (X86ScalarSSEf32
) {
542 if (IsNonTemporal
&& HasSSE4A
)
545 Opc
= HasAVX512
? X86::VMOVSDZmr
:
546 HasAVX
? X86::VMOVSDmr
: X86::MOVSDmr
;
551 Opc
= (IsNonTemporal
&& HasSSE1
) ? X86::MMX_MOVNTQmr
: X86::MMX_MOVQ64mr
;
556 Opc
= HasVLX
? X86::VMOVNTPSZ128mr
:
557 HasAVX
? X86::VMOVNTPSmr
: X86::MOVNTPSmr
;
559 Opc
= HasVLX
? X86::VMOVAPSZ128mr
:
560 HasAVX
? X86::VMOVAPSmr
: X86::MOVAPSmr
;
562 Opc
= HasVLX
? X86::VMOVUPSZ128mr
:
563 HasAVX
? X86::VMOVUPSmr
: X86::MOVUPSmr
;
568 Opc
= HasVLX
? X86::VMOVNTPDZ128mr
:
569 HasAVX
? X86::VMOVNTPDmr
: X86::MOVNTPDmr
;
571 Opc
= HasVLX
? X86::VMOVAPDZ128mr
:
572 HasAVX
? X86::VMOVAPDmr
: X86::MOVAPDmr
;
574 Opc
= HasVLX
? X86::VMOVUPDZ128mr
:
575 HasAVX
? X86::VMOVUPDmr
: X86::MOVUPDmr
;
583 Opc
= HasVLX
? X86::VMOVNTDQZ128mr
:
584 HasAVX
? X86::VMOVNTDQmr
: X86::MOVNTDQmr
;
586 Opc
= HasVLX
? X86::VMOVDQA64Z128mr
:
587 HasAVX
? X86::VMOVDQAmr
: X86::MOVDQAmr
;
589 Opc
= HasVLX
? X86::VMOVDQU64Z128mr
:
590 HasAVX
? X86::VMOVDQUmr
: X86::MOVDQUmr
;
596 Opc
= HasVLX
? X86::VMOVNTPSZ256mr
: X86::VMOVNTPSYmr
;
598 Opc
= HasVLX
? X86::VMOVAPSZ256mr
: X86::VMOVAPSYmr
;
600 Opc
= HasVLX
? X86::VMOVUPSZ256mr
: X86::VMOVUPSYmr
;
606 Opc
= HasVLX
? X86::VMOVNTPDZ256mr
: X86::VMOVNTPDYmr
;
608 Opc
= HasVLX
? X86::VMOVAPDZ256mr
: X86::VMOVAPDYmr
;
610 Opc
= HasVLX
? X86::VMOVUPDZ256mr
: X86::VMOVUPDYmr
;
619 Opc
= HasVLX
? X86::VMOVNTDQZ256mr
: X86::VMOVNTDQYmr
;
621 Opc
= HasVLX
? X86::VMOVDQA64Z256mr
: X86::VMOVDQAYmr
;
623 Opc
= HasVLX
? X86::VMOVDQU64Z256mr
: X86::VMOVDQUYmr
;
628 Opc
= IsNonTemporal
? X86::VMOVNTPSZmr
: X86::VMOVAPSZmr
;
630 Opc
= X86::VMOVUPSZmr
;
635 Opc
= IsNonTemporal
? X86::VMOVNTPDZmr
: X86::VMOVAPDZmr
;
637 Opc
= X86::VMOVUPDZmr
;
644 // Note: There are a lot more choices based on type with AVX-512, but
645 // there's really no advantage when the store isn't masked.
647 Opc
= IsNonTemporal
? X86::VMOVNTDQZmr
: X86::VMOVDQA64Zmr
;
649 Opc
= X86::VMOVDQU64Zmr
;
653 const MCInstrDesc
&Desc
= TII
.get(Opc
);
654 // Some of the instructions in the previous switch use FR128 instead
655 // of FR32 for ValReg. Make sure the register we feed the instruction
656 // matches its register class constraints.
657 // Note: This is fine to do a copy from FR32 to FR128, this is the
658 // same registers behind the scene and actually why it did not trigger
660 ValReg
= constrainOperandRegClass(Desc
, ValReg
, Desc
.getNumOperands() - 1);
661 MachineInstrBuilder MIB
=
662 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, Desc
);
663 addFullAddress(MIB
, AM
).addReg(ValReg
, getKillRegState(ValIsKill
));
665 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
670 bool X86FastISel::X86FastEmitStore(EVT VT
, const Value
*Val
,
672 MachineMemOperand
*MMO
, bool Aligned
) {
673 // Handle 'null' like i32/i64 0.
674 if (isa
<ConstantPointerNull
>(Val
))
675 Val
= Constant::getNullValue(DL
.getIntPtrType(Val
->getContext()));
677 // If this is a store of a simple constant, fold the constant into the store.
678 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
681 switch (VT
.getSimpleVT().SimpleTy
) {
685 LLVM_FALLTHROUGH
; // Handle as i8.
686 case MVT::i8
: Opc
= X86::MOV8mi
; break;
687 case MVT::i16
: Opc
= X86::MOV16mi
; break;
688 case MVT::i32
: Opc
= X86::MOV32mi
; break;
690 // Must be a 32-bit sign extended value.
691 if (isInt
<32>(CI
->getSExtValue()))
692 Opc
= X86::MOV64mi32
;
697 MachineInstrBuilder MIB
=
698 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
));
699 addFullAddress(MIB
, AM
).addImm(Signed
? (uint64_t) CI
->getSExtValue()
700 : CI
->getZExtValue());
702 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
707 unsigned ValReg
= getRegForValue(Val
);
711 bool ValKill
= hasTrivialKill(Val
);
712 return X86FastEmitStore(VT
, ValReg
, ValKill
, AM
, MMO
, Aligned
);
715 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
716 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
717 /// ISD::SIGN_EXTEND).
718 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
,
719 unsigned Src
, EVT SrcVT
,
720 unsigned &ResultReg
) {
721 unsigned RR
= fastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Opc
,
722 Src
, /*TODO: Kill=*/false);
730 bool X86FastISel::handleConstantAddresses(const Value
*V
, X86AddressMode
&AM
) {
731 // Handle constant address.
732 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
733 // Can't handle alternate code models yet.
734 if (TM
.getCodeModel() != CodeModel::Small
)
737 // Can't handle TLS yet.
738 if (GV
->isThreadLocal())
741 // Can't handle !absolute_symbol references yet.
742 if (GV
->isAbsoluteSymbolRef())
745 // RIP-relative addresses can't have additional register operands, so if
746 // we've already folded stuff into the addressing mode, just force the
747 // global value into its own register, which we can use as the basereg.
748 if (!Subtarget
->isPICStyleRIPRel() ||
749 (AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0)) {
750 // Okay, we've committed to selecting this global. Set up the address.
753 // Allow the subtarget to classify the global.
754 unsigned char GVFlags
= Subtarget
->classifyGlobalReference(GV
);
756 // If this reference is relative to the pic base, set it now.
757 if (isGlobalRelativeToPICBase(GVFlags
)) {
758 // FIXME: How do we know Base.Reg is free??
759 AM
.Base
.Reg
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
762 // Unless the ABI requires an extra load, return a direct reference to
764 if (!isGlobalStubReference(GVFlags
)) {
765 if (Subtarget
->isPICStyleRIPRel()) {
766 // Use rip-relative addressing if we can. Above we verified that the
767 // base and index registers are unused.
768 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
769 AM
.Base
.Reg
= X86::RIP
;
771 AM
.GVOpFlags
= GVFlags
;
775 // Ok, we need to do a load from a stub. If we've already loaded from
776 // this stub, reuse the loaded pointer, otherwise emit the load now.
777 DenseMap
<const Value
*, unsigned>::iterator I
= LocalValueMap
.find(V
);
779 if (I
!= LocalValueMap
.end() && I
->second
!= 0) {
782 // Issue load from stub.
784 const TargetRegisterClass
*RC
= nullptr;
785 X86AddressMode StubAM
;
786 StubAM
.Base
.Reg
= AM
.Base
.Reg
;
788 StubAM
.GVOpFlags
= GVFlags
;
790 // Prepare for inserting code in the local-value area.
791 SavePoint SaveInsertPt
= enterLocalValueArea();
793 if (TLI
.getPointerTy(DL
) == MVT::i64
) {
795 RC
= &X86::GR64RegClass
;
797 if (Subtarget
->isPICStyleRIPRel())
798 StubAM
.Base
.Reg
= X86::RIP
;
801 RC
= &X86::GR32RegClass
;
804 LoadReg
= createResultReg(RC
);
805 MachineInstrBuilder LoadMI
=
806 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), LoadReg
);
807 addFullAddress(LoadMI
, StubAM
);
809 // Ok, back to normal mode.
810 leaveLocalValueArea(SaveInsertPt
);
812 // Prevent loading GV stub multiple times in same MBB.
813 LocalValueMap
[V
] = LoadReg
;
816 // Now construct the final address. Note that the Disp, Scale,
817 // and Index values may already be set here.
818 AM
.Base
.Reg
= LoadReg
;
824 // If all else fails, try to materialize the value in a register.
825 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
826 if (AM
.Base
.Reg
== 0) {
827 AM
.Base
.Reg
= getRegForValue(V
);
828 return AM
.Base
.Reg
!= 0;
830 if (AM
.IndexReg
== 0) {
831 assert(AM
.Scale
== 1 && "Scale with no index!");
832 AM
.IndexReg
= getRegForValue(V
);
833 return AM
.IndexReg
!= 0;
840 /// X86SelectAddress - Attempt to fill in an address from the given value.
842 bool X86FastISel::X86SelectAddress(const Value
*V
, X86AddressMode
&AM
) {
843 SmallVector
<const Value
*, 32> GEPs
;
845 const User
*U
= nullptr;
846 unsigned Opcode
= Instruction::UserOp1
;
847 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
848 // Don't walk into other basic blocks; it's possible we haven't
849 // visited them yet, so the instructions may not yet be assigned
850 // virtual registers.
851 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(V
)) ||
852 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
853 Opcode
= I
->getOpcode();
856 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
857 Opcode
= C
->getOpcode();
861 if (PointerType
*Ty
= dyn_cast
<PointerType
>(V
->getType()))
862 if (Ty
->getAddressSpace() > 255)
863 // Fast instruction selection doesn't support the special
869 case Instruction::BitCast
:
870 // Look past bitcasts.
871 return X86SelectAddress(U
->getOperand(0), AM
);
873 case Instruction::IntToPtr
:
874 // Look past no-op inttoptrs.
875 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
876 TLI
.getPointerTy(DL
))
877 return X86SelectAddress(U
->getOperand(0), AM
);
880 case Instruction::PtrToInt
:
881 // Look past no-op ptrtoints.
882 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
883 return X86SelectAddress(U
->getOperand(0), AM
);
886 case Instruction::Alloca
: {
887 // Do static allocas.
888 const AllocaInst
*A
= cast
<AllocaInst
>(V
);
889 DenseMap
<const AllocaInst
*, int>::iterator SI
=
890 FuncInfo
.StaticAllocaMap
.find(A
);
891 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
892 AM
.BaseType
= X86AddressMode::FrameIndexBase
;
893 AM
.Base
.FrameIndex
= SI
->second
;
899 case Instruction::Add
: {
900 // Adds of constants are common and easy enough.
901 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
902 uint64_t Disp
= (int32_t)AM
.Disp
+ (uint64_t)CI
->getSExtValue();
903 // They have to fit in the 32-bit signed displacement field though.
904 if (isInt
<32>(Disp
)) {
905 AM
.Disp
= (uint32_t)Disp
;
906 return X86SelectAddress(U
->getOperand(0), AM
);
912 case Instruction::GetElementPtr
: {
913 X86AddressMode SavedAM
= AM
;
915 // Pattern-match simple GEPs.
916 uint64_t Disp
= (int32_t)AM
.Disp
;
917 unsigned IndexReg
= AM
.IndexReg
;
918 unsigned Scale
= AM
.Scale
;
919 gep_type_iterator GTI
= gep_type_begin(U
);
920 // Iterate through the indices, folding what we can. Constants can be
921 // folded, and one dynamic index can be handled, if the scale is supported.
922 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
923 i
!= e
; ++i
, ++GTI
) {
924 const Value
*Op
= *i
;
925 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
926 const StructLayout
*SL
= DL
.getStructLayout(STy
);
927 Disp
+= SL
->getElementOffset(cast
<ConstantInt
>(Op
)->getZExtValue());
931 // A array/variable index is always of the form i*S where S is the
932 // constant scale size. See if we can push the scale into immediates.
933 uint64_t S
= DL
.getTypeAllocSize(GTI
.getIndexedType());
935 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
936 // Constant-offset addressing.
937 Disp
+= CI
->getSExtValue() * S
;
940 if (canFoldAddIntoGEP(U
, Op
)) {
941 // A compatible add with a constant operand. Fold the constant.
943 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
944 Disp
+= CI
->getSExtValue() * S
;
945 // Iterate on the other operand.
946 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
950 (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) &&
951 (S
== 1 || S
== 2 || S
== 4 || S
== 8)) {
952 // Scaled-index addressing.
954 IndexReg
= getRegForGEPIndex(Op
).first
;
960 goto unsupported_gep
;
964 // Check for displacement overflow.
965 if (!isInt
<32>(Disp
))
968 AM
.IndexReg
= IndexReg
;
970 AM
.Disp
= (uint32_t)Disp
;
973 if (const GetElementPtrInst
*GEP
=
974 dyn_cast
<GetElementPtrInst
>(U
->getOperand(0))) {
975 // Ok, the GEP indices were covered by constant-offset and scaled-index
976 // addressing. Update the address state and move on to examining the base.
979 } else if (X86SelectAddress(U
->getOperand(0), AM
)) {
983 // If we couldn't merge the gep value into this addr mode, revert back to
984 // our address and just match the value instead of completely failing.
987 for (const Value
*I
: reverse(GEPs
))
988 if (handleConstantAddresses(I
, AM
))
993 // Ok, the GEP indices weren't all covered.
998 return handleConstantAddresses(V
, AM
);
1001 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
1003 bool X86FastISel::X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
) {
1004 const User
*U
= nullptr;
1005 unsigned Opcode
= Instruction::UserOp1
;
1006 const Instruction
*I
= dyn_cast
<Instruction
>(V
);
1007 // Record if the value is defined in the same basic block.
1009 // This information is crucial to know whether or not folding an
1010 // operand is valid.
1011 // Indeed, FastISel generates or reuses a virtual register for all
1012 // operands of all instructions it selects. Obviously, the definition and
1013 // its uses must use the same virtual register otherwise the produced
1014 // code is incorrect.
1015 // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1016 // registers for values that are alive across basic blocks. This ensures
1017 // that the values are consistently set between across basic block, even
1018 // if different instruction selection mechanisms are used (e.g., a mix of
1019 // SDISel and FastISel).
1020 // For values local to a basic block, the instruction selection process
1021 // generates these virtual registers with whatever method is appropriate
1022 // for its needs. In particular, FastISel and SDISel do not share the way
1023 // local virtual registers are set.
1024 // Therefore, this is impossible (or at least unsafe) to share values
1025 // between basic blocks unless they use the same instruction selection
1026 // method, which is not guarantee for X86.
1027 // Moreover, things like hasOneUse could not be used accurately, if we
1028 // allow to reference values across basic blocks whereas they are not
1029 // alive across basic blocks initially.
1032 Opcode
= I
->getOpcode();
1034 InMBB
= I
->getParent() == FuncInfo
.MBB
->getBasicBlock();
1035 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
1036 Opcode
= C
->getOpcode();
1042 case Instruction::BitCast
:
1043 // Look past bitcasts if its operand is in the same BB.
1045 return X86SelectCallAddress(U
->getOperand(0), AM
);
1048 case Instruction::IntToPtr
:
1049 // Look past no-op inttoptrs if its operand is in the same BB.
1051 TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
1052 TLI
.getPointerTy(DL
))
1053 return X86SelectCallAddress(U
->getOperand(0), AM
);
1056 case Instruction::PtrToInt
:
1057 // Look past no-op ptrtoints if its operand is in the same BB.
1058 if (InMBB
&& TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
1059 return X86SelectCallAddress(U
->getOperand(0), AM
);
1063 // Handle constant address.
1064 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
1065 // Can't handle alternate code models yet.
1066 if (TM
.getCodeModel() != CodeModel::Small
)
1069 // RIP-relative addresses can't have additional register operands.
1070 if (Subtarget
->isPICStyleRIPRel() &&
1071 (AM
.Base
.Reg
!= 0 || AM
.IndexReg
!= 0))
1074 // Can't handle TLS.
1075 if (const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
))
1076 if (GVar
->isThreadLocal())
1079 // Okay, we've committed to selecting this global. Set up the basic address.
1082 // Return a direct reference to the global. Fastisel can handle calls to
1083 // functions that require loads, such as dllimport and nonlazybind
1085 if (Subtarget
->isPICStyleRIPRel()) {
1086 // Use rip-relative addressing if we can. Above we verified that the
1087 // base and index registers are unused.
1088 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
1089 AM
.Base
.Reg
= X86::RIP
;
1091 AM
.GVOpFlags
= Subtarget
->classifyLocalReference(nullptr);
1097 // If all else fails, try to materialize the value in a register.
1098 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
1099 if (AM
.Base
.Reg
== 0) {
1100 AM
.Base
.Reg
= getRegForValue(V
);
1101 return AM
.Base
.Reg
!= 0;
1103 if (AM
.IndexReg
== 0) {
1104 assert(AM
.Scale
== 1 && "Scale with no index!");
1105 AM
.IndexReg
= getRegForValue(V
);
1106 return AM
.IndexReg
!= 0;
1114 /// X86SelectStore - Select and emit code to implement store instructions.
1115 bool X86FastISel::X86SelectStore(const Instruction
*I
) {
1116 // Atomic stores need special handling.
1117 const StoreInst
*S
= cast
<StoreInst
>(I
);
1122 const Value
*PtrV
= I
->getOperand(1);
1123 if (TLI
.supportSwiftError()) {
1124 // Swifterror values can come from either a function parameter with
1125 // swifterror attribute or an alloca with swifterror attribute.
1126 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1127 if (Arg
->hasSwiftErrorAttr())
1131 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1132 if (Alloca
->isSwiftError())
1137 const Value
*Val
= S
->getValueOperand();
1138 const Value
*Ptr
= S
->getPointerOperand();
1141 if (!isTypeLegal(Val
->getType(), VT
, /*AllowI1=*/true))
1144 unsigned Alignment
= S
->getAlignment();
1145 unsigned ABIAlignment
= DL
.getABITypeAlignment(Val
->getType());
1146 if (Alignment
== 0) // Ensure that codegen never sees alignment 0
1147 Alignment
= ABIAlignment
;
1148 bool Aligned
= Alignment
>= ABIAlignment
;
1151 if (!X86SelectAddress(Ptr
, AM
))
1154 return X86FastEmitStore(VT
, Val
, AM
, createMachineMemOperandFor(I
), Aligned
);
1157 /// X86SelectRet - Select and emit code to implement ret instructions.
1158 bool X86FastISel::X86SelectRet(const Instruction
*I
) {
1159 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
1160 const Function
&F
= *I
->getParent()->getParent();
1161 const X86MachineFunctionInfo
*X86MFInfo
=
1162 FuncInfo
.MF
->getInfo
<X86MachineFunctionInfo
>();
1164 if (!FuncInfo
.CanLowerReturn
)
1167 if (TLI
.supportSwiftError() &&
1168 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
1171 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
1174 CallingConv::ID CC
= F
.getCallingConv();
1175 if (CC
!= CallingConv::C
&&
1176 CC
!= CallingConv::Fast
&&
1177 CC
!= CallingConv::X86_FastCall
&&
1178 CC
!= CallingConv::X86_StdCall
&&
1179 CC
!= CallingConv::X86_ThisCall
&&
1180 CC
!= CallingConv::X86_64_SysV
&&
1181 CC
!= CallingConv::Win64
)
1184 // Don't handle popping bytes if they don't fit the ret's immediate.
1185 if (!isUInt
<16>(X86MFInfo
->getBytesToPopOnReturn()))
1188 // fastcc with -tailcallopt is intended to provide a guaranteed
1189 // tail call optimization. Fastisel doesn't know how to do that.
1190 if (CC
== CallingConv::Fast
&& TM
.Options
.GuaranteedTailCallOpt
)
1193 // Let SDISel handle vararg functions.
1197 // Build a list of return value registers.
1198 SmallVector
<unsigned, 4> RetRegs
;
1200 if (Ret
->getNumOperands() > 0) {
1201 SmallVector
<ISD::OutputArg
, 4> Outs
;
1202 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
1204 // Analyze operands of the call, assigning locations to each operand.
1205 SmallVector
<CCValAssign
, 16> ValLocs
;
1206 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
1207 CCInfo
.AnalyzeReturn(Outs
, RetCC_X86
);
1209 const Value
*RV
= Ret
->getOperand(0);
1210 unsigned Reg
= getRegForValue(RV
);
1214 // Only handle a single return value for now.
1215 if (ValLocs
.size() != 1)
1218 CCValAssign
&VA
= ValLocs
[0];
1220 // Don't bother handling odd stuff for now.
1221 if (VA
.getLocInfo() != CCValAssign::Full
)
1223 // Only handle register returns for now.
1227 // The calling-convention tables for x87 returns don't tell
1229 if (VA
.getLocReg() == X86::FP0
|| VA
.getLocReg() == X86::FP1
)
1232 unsigned SrcReg
= Reg
+ VA
.getValNo();
1233 EVT SrcVT
= TLI
.getValueType(DL
, RV
->getType());
1234 EVT DstVT
= VA
.getValVT();
1235 // Special handling for extended integers.
1236 if (SrcVT
!= DstVT
) {
1237 if (SrcVT
!= MVT::i1
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i16
)
1240 if (!Outs
[0].Flags
.isZExt() && !Outs
[0].Flags
.isSExt())
1243 assert(DstVT
== MVT::i32
&& "X86 should always ext to i32");
1245 if (SrcVT
== MVT::i1
) {
1246 if (Outs
[0].Flags
.isSExt())
1248 SrcReg
= fastEmitZExtFromI1(MVT::i8
, SrcReg
, /*TODO: Kill=*/false);
1251 unsigned Op
= Outs
[0].Flags
.isZExt() ? ISD::ZERO_EXTEND
:
1253 SrcReg
= fastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Op
,
1254 SrcReg
, /*TODO: Kill=*/false);
1258 unsigned DstReg
= VA
.getLocReg();
1259 const TargetRegisterClass
*SrcRC
= MRI
.getRegClass(SrcReg
);
1260 // Avoid a cross-class copy. This is very unlikely.
1261 if (!SrcRC
->contains(DstReg
))
1263 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1264 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
1266 // Add register to return instruction.
1267 RetRegs
.push_back(VA
.getLocReg());
1270 // Swift calling convention does not require we copy the sret argument
1271 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1273 // All x86 ABIs require that for returning structs by value we copy
1274 // the sret argument into %rax/%eax (depending on ABI) for the return.
1275 // We saved the argument into a virtual register in the entry block,
1276 // so now we copy the value out and into %rax/%eax.
1277 if (F
.hasStructRetAttr() && CC
!= CallingConv::Swift
) {
1278 unsigned Reg
= X86MFInfo
->getSRetReturnReg();
1280 "SRetReturnReg should have been set in LowerFormalArguments()!");
1281 unsigned RetReg
= Subtarget
->isTarget64BitLP64() ? X86::RAX
: X86::EAX
;
1282 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1283 TII
.get(TargetOpcode::COPY
), RetReg
).addReg(Reg
);
1284 RetRegs
.push_back(RetReg
);
1287 // Now emit the RET.
1288 MachineInstrBuilder MIB
;
1289 if (X86MFInfo
->getBytesToPopOnReturn()) {
1290 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1291 TII
.get(Subtarget
->is64Bit() ? X86::RETIQ
: X86::RETIL
))
1292 .addImm(X86MFInfo
->getBytesToPopOnReturn());
1294 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1295 TII
.get(Subtarget
->is64Bit() ? X86::RETQ
: X86::RETL
));
1297 for (unsigned i
= 0, e
= RetRegs
.size(); i
!= e
; ++i
)
1298 MIB
.addReg(RetRegs
[i
], RegState::Implicit
);
1302 /// X86SelectLoad - Select and emit code to implement load instructions.
1304 bool X86FastISel::X86SelectLoad(const Instruction
*I
) {
1305 const LoadInst
*LI
= cast
<LoadInst
>(I
);
1307 // Atomic loads need special handling.
1311 const Value
*SV
= I
->getOperand(0);
1312 if (TLI
.supportSwiftError()) {
1313 // Swifterror values can come from either a function parameter with
1314 // swifterror attribute or an alloca with swifterror attribute.
1315 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1316 if (Arg
->hasSwiftErrorAttr())
1320 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1321 if (Alloca
->isSwiftError())
1327 if (!isTypeLegal(LI
->getType(), VT
, /*AllowI1=*/true))
1330 const Value
*Ptr
= LI
->getPointerOperand();
1333 if (!X86SelectAddress(Ptr
, AM
))
1336 unsigned Alignment
= LI
->getAlignment();
1337 unsigned ABIAlignment
= DL
.getABITypeAlignment(LI
->getType());
1338 if (Alignment
== 0) // Ensure that codegen never sees alignment 0
1339 Alignment
= ABIAlignment
;
1341 unsigned ResultReg
= 0;
1342 if (!X86FastEmitLoad(VT
, AM
, createMachineMemOperandFor(LI
), ResultReg
,
1346 updateValueMap(I
, ResultReg
);
1350 static unsigned X86ChooseCmpOpcode(EVT VT
, const X86Subtarget
*Subtarget
) {
1351 bool HasAVX512
= Subtarget
->hasAVX512();
1352 bool HasAVX
= Subtarget
->hasAVX();
1353 bool X86ScalarSSEf32
= Subtarget
->hasSSE1();
1354 bool X86ScalarSSEf64
= Subtarget
->hasSSE2();
1356 switch (VT
.getSimpleVT().SimpleTy
) {
1358 case MVT::i8
: return X86::CMP8rr
;
1359 case MVT::i16
: return X86::CMP16rr
;
1360 case MVT::i32
: return X86::CMP32rr
;
1361 case MVT::i64
: return X86::CMP64rr
;
1363 return X86ScalarSSEf32
1364 ? (HasAVX512
? X86::VUCOMISSZrr
1365 : HasAVX
? X86::VUCOMISSrr
: X86::UCOMISSrr
)
1368 return X86ScalarSSEf64
1369 ? (HasAVX512
? X86::VUCOMISDZrr
1370 : HasAVX
? X86::VUCOMISDrr
: X86::UCOMISDrr
)
1375 /// If we have a comparison with RHS as the RHS of the comparison, return an
1376 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1377 static unsigned X86ChooseCmpImmediateOpcode(EVT VT
, const ConstantInt
*RHSC
) {
1378 int64_t Val
= RHSC
->getSExtValue();
1379 switch (VT
.getSimpleVT().SimpleTy
) {
1380 // Otherwise, we can't fold the immediate into this comparison.
1387 return X86::CMP16ri8
;
1388 return X86::CMP16ri
;
1391 return X86::CMP32ri8
;
1392 return X86::CMP32ri
;
1395 return X86::CMP64ri8
;
1396 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1399 return X86::CMP64ri32
;
1404 bool X86FastISel::X86FastEmitCompare(const Value
*Op0
, const Value
*Op1
, EVT VT
,
1405 const DebugLoc
&CurDbgLoc
) {
1406 unsigned Op0Reg
= getRegForValue(Op0
);
1407 if (Op0Reg
== 0) return false;
1409 // Handle 'null' like i32/i64 0.
1410 if (isa
<ConstantPointerNull
>(Op1
))
1411 Op1
= Constant::getNullValue(DL
.getIntPtrType(Op0
->getContext()));
1413 // We have two options: compare with register or immediate. If the RHS of
1414 // the compare is an immediate that we can fold into this compare, use
1415 // CMPri, otherwise use CMPrr.
1416 if (const ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
1417 if (unsigned CompareImmOpc
= X86ChooseCmpImmediateOpcode(VT
, Op1C
)) {
1418 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, CurDbgLoc
, TII
.get(CompareImmOpc
))
1420 .addImm(Op1C
->getSExtValue());
1425 unsigned CompareOpc
= X86ChooseCmpOpcode(VT
, Subtarget
);
1426 if (CompareOpc
== 0) return false;
1428 unsigned Op1Reg
= getRegForValue(Op1
);
1429 if (Op1Reg
== 0) return false;
1430 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, CurDbgLoc
, TII
.get(CompareOpc
))
1437 bool X86FastISel::X86SelectCmp(const Instruction
*I
) {
1438 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1441 if (!isTypeLegal(I
->getOperand(0)->getType(), VT
))
1444 // Try to optimize or fold the cmp.
1445 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
1446 unsigned ResultReg
= 0;
1447 switch (Predicate
) {
1449 case CmpInst::FCMP_FALSE
: {
1450 ResultReg
= createResultReg(&X86::GR32RegClass
);
1451 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOV32r0
),
1453 ResultReg
= fastEmitInst_extractsubreg(MVT::i8
, ResultReg
, /*Kill=*/true,
1459 case CmpInst::FCMP_TRUE
: {
1460 ResultReg
= createResultReg(&X86::GR8RegClass
);
1461 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOV8ri
),
1462 ResultReg
).addImm(1);
1468 updateValueMap(I
, ResultReg
);
1472 const Value
*LHS
= CI
->getOperand(0);
1473 const Value
*RHS
= CI
->getOperand(1);
1475 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1476 // We don't have to materialize a zero constant for this case and can just use
1477 // %x again on the RHS.
1478 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
1479 const auto *RHSC
= dyn_cast
<ConstantFP
>(RHS
);
1480 if (RHSC
&& RHSC
->isNullValue())
1484 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1485 static const uint16_t SETFOpcTable
[2][3] = {
1486 { X86::SETEr
, X86::SETNPr
, X86::AND8rr
},
1487 { X86::SETNEr
, X86::SETPr
, X86::OR8rr
}
1489 const uint16_t *SETFOpc
= nullptr;
1490 switch (Predicate
) {
1492 case CmpInst::FCMP_OEQ
: SETFOpc
= &SETFOpcTable
[0][0]; break;
1493 case CmpInst::FCMP_UNE
: SETFOpc
= &SETFOpcTable
[1][0]; break;
1496 ResultReg
= createResultReg(&X86::GR8RegClass
);
1498 if (!X86FastEmitCompare(LHS
, RHS
, VT
, I
->getDebugLoc()))
1501 unsigned FlagReg1
= createResultReg(&X86::GR8RegClass
);
1502 unsigned FlagReg2
= createResultReg(&X86::GR8RegClass
);
1503 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(SETFOpc
[0]),
1505 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(SETFOpc
[1]),
1507 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(SETFOpc
[2]),
1508 ResultReg
).addReg(FlagReg1
).addReg(FlagReg2
);
1509 updateValueMap(I
, ResultReg
);
1515 std::tie(CC
, SwapArgs
) = X86::getX86ConditionCode(Predicate
);
1516 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
1517 unsigned Opc
= X86::getSETFromCond(CC
);
1520 std::swap(LHS
, RHS
);
1522 // Emit a compare of LHS/RHS.
1523 if (!X86FastEmitCompare(LHS
, RHS
, VT
, I
->getDebugLoc()))
1526 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), ResultReg
);
1527 updateValueMap(I
, ResultReg
);
1531 bool X86FastISel::X86SelectZExt(const Instruction
*I
) {
1532 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
1533 if (!TLI
.isTypeLegal(DstVT
))
1536 unsigned ResultReg
= getRegForValue(I
->getOperand(0));
1540 // Handle zero-extension from i1 to i8, which is common.
1541 MVT SrcVT
= TLI
.getSimpleValueType(DL
, I
->getOperand(0)->getType());
1542 if (SrcVT
== MVT::i1
) {
1543 // Set the high bits to zero.
1544 ResultReg
= fastEmitZExtFromI1(MVT::i8
, ResultReg
, /*TODO: Kill=*/false);
1551 if (DstVT
== MVT::i64
) {
1552 // Handle extension to 64-bits via sub-register shenanigans.
1555 switch (SrcVT
.SimpleTy
) {
1556 case MVT::i8
: MovInst
= X86::MOVZX32rr8
; break;
1557 case MVT::i16
: MovInst
= X86::MOVZX32rr16
; break;
1558 case MVT::i32
: MovInst
= X86::MOV32rr
; break;
1559 default: llvm_unreachable("Unexpected zext to i64 source type");
1562 unsigned Result32
= createResultReg(&X86::GR32RegClass
);
1563 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovInst
), Result32
)
1566 ResultReg
= createResultReg(&X86::GR64RegClass
);
1567 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TargetOpcode::SUBREG_TO_REG
),
1569 .addImm(0).addReg(Result32
).addImm(X86::sub_32bit
);
1570 } else if (DstVT
== MVT::i16
) {
1571 // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1572 // extend to 32-bits and then extract down to 16-bits.
1573 unsigned Result32
= createResultReg(&X86::GR32RegClass
);
1574 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOVZX32rr8
),
1575 Result32
).addReg(ResultReg
);
1577 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, Result32
, /*Kill=*/true,
1579 } else if (DstVT
!= MVT::i8
) {
1580 ResultReg
= fastEmit_r(MVT::i8
, DstVT
.getSimpleVT(), ISD::ZERO_EXTEND
,
1581 ResultReg
, /*Kill=*/true);
1586 updateValueMap(I
, ResultReg
);
1590 bool X86FastISel::X86SelectSExt(const Instruction
*I
) {
1591 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
1592 if (!TLI
.isTypeLegal(DstVT
))
1595 unsigned ResultReg
= getRegForValue(I
->getOperand(0));
1599 // Handle sign-extension from i1 to i8.
1600 MVT SrcVT
= TLI
.getSimpleValueType(DL
, I
->getOperand(0)->getType());
1601 if (SrcVT
== MVT::i1
) {
1602 // Set the high bits to zero.
1603 unsigned ZExtReg
= fastEmitZExtFromI1(MVT::i8
, ResultReg
,
1604 /*TODO: Kill=*/false);
1608 // Negate the result to make an 8-bit sign extended value.
1609 ResultReg
= createResultReg(&X86::GR8RegClass
);
1610 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::NEG8r
),
1611 ResultReg
).addReg(ZExtReg
);
1616 if (DstVT
== MVT::i16
) {
1617 // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1618 // extend to 32-bits and then extract down to 16-bits.
1619 unsigned Result32
= createResultReg(&X86::GR32RegClass
);
1620 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOVSX32rr8
),
1621 Result32
).addReg(ResultReg
);
1623 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, Result32
, /*Kill=*/true,
1625 } else if (DstVT
!= MVT::i8
) {
1626 ResultReg
= fastEmit_r(MVT::i8
, DstVT
.getSimpleVT(), ISD::SIGN_EXTEND
,
1627 ResultReg
, /*Kill=*/true);
1632 updateValueMap(I
, ResultReg
);
1636 bool X86FastISel::X86SelectBranch(const Instruction
*I
) {
1637 // Unconditional branches are selected by tablegen-generated code.
1638 // Handle a conditional branch.
1639 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1640 MachineBasicBlock
*TrueMBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
1641 MachineBasicBlock
*FalseMBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
1643 // Fold the common case of a conditional branch with a comparison
1644 // in the same block (values defined on other blocks may not have
1645 // initialized registers).
1647 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1648 if (CI
->hasOneUse() && CI
->getParent() == I
->getParent()) {
1649 EVT VT
= TLI
.getValueType(DL
, CI
->getOperand(0)->getType());
1651 // Try to optimize or fold the cmp.
1652 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
1653 switch (Predicate
) {
1655 case CmpInst::FCMP_FALSE
: fastEmitBranch(FalseMBB
, DbgLoc
); return true;
1656 case CmpInst::FCMP_TRUE
: fastEmitBranch(TrueMBB
, DbgLoc
); return true;
1659 const Value
*CmpLHS
= CI
->getOperand(0);
1660 const Value
*CmpRHS
= CI
->getOperand(1);
1662 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1664 // We don't have to materialize a zero constant for this case and can just
1665 // use %x again on the RHS.
1666 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
1667 const auto *CmpRHSC
= dyn_cast
<ConstantFP
>(CmpRHS
);
1668 if (CmpRHSC
&& CmpRHSC
->isNullValue())
1672 // Try to take advantage of fallthrough opportunities.
1673 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
1674 std::swap(TrueMBB
, FalseMBB
);
1675 Predicate
= CmpInst::getInversePredicate(Predicate
);
1678 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1679 // code check. Instead two branch instructions are required to check all
1680 // the flags. First we change the predicate to a supported condition code,
1681 // which will be the first branch. Later one we will emit the second
1683 bool NeedExtraBranch
= false;
1684 switch (Predicate
) {
1686 case CmpInst::FCMP_OEQ
:
1687 std::swap(TrueMBB
, FalseMBB
);
1689 case CmpInst::FCMP_UNE
:
1690 NeedExtraBranch
= true;
1691 Predicate
= CmpInst::FCMP_ONE
;
1697 std::tie(CC
, SwapArgs
) = X86::getX86ConditionCode(Predicate
);
1698 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
1700 BranchOpc
= X86::GetCondBranchFromCond(CC
);
1702 std::swap(CmpLHS
, CmpRHS
);
1704 // Emit a compare of the LHS and RHS, setting the flags.
1705 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, VT
, CI
->getDebugLoc()))
1708 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BranchOpc
))
1711 // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1713 if (NeedExtraBranch
) {
1714 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::JP_1
))
1718 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1721 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1722 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1723 // typically happen for _Bool and C++ bools.
1725 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1726 isTypeLegal(TI
->getOperand(0)->getType(), SourceVT
)) {
1727 unsigned TestOpc
= 0;
1728 switch (SourceVT
.SimpleTy
) {
1730 case MVT::i8
: TestOpc
= X86::TEST8ri
; break;
1731 case MVT::i16
: TestOpc
= X86::TEST16ri
; break;
1732 case MVT::i32
: TestOpc
= X86::TEST32ri
; break;
1733 case MVT::i64
: TestOpc
= X86::TEST64ri32
; break;
1736 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1737 if (OpReg
== 0) return false;
1739 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TestOpc
))
1740 .addReg(OpReg
).addImm(1);
1742 unsigned JmpOpc
= X86::JNE_1
;
1743 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
1744 std::swap(TrueMBB
, FalseMBB
);
1748 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(JmpOpc
))
1751 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1755 } else if (foldX86XALUIntrinsic(CC
, BI
, BI
->getCondition())) {
1756 // Fake request the condition, otherwise the intrinsic might be completely
1758 unsigned TmpReg
= getRegForValue(BI
->getCondition());
1762 unsigned BranchOpc
= X86::GetCondBranchFromCond(CC
);
1764 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BranchOpc
))
1766 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1770 // Otherwise do a clumsy setcc and re-test it.
1771 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1772 // in an explicit cast, so make sure to handle that correctly.
1773 unsigned OpReg
= getRegForValue(BI
->getCondition());
1774 if (OpReg
== 0) return false;
1776 // In case OpReg is a K register, COPY to a GPR
1777 if (MRI
.getRegClass(OpReg
) == &X86::VK1RegClass
) {
1778 unsigned KOpReg
= OpReg
;
1779 OpReg
= createResultReg(&X86::GR32RegClass
);
1780 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1781 TII
.get(TargetOpcode::COPY
), OpReg
)
1783 OpReg
= fastEmitInst_extractsubreg(MVT::i8
, OpReg
, /*Kill=*/true,
1786 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::TEST8ri
))
1789 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::JNE_1
))
1791 finishCondBranch(BI
->getParent(), TrueMBB
, FalseMBB
);
1795 bool X86FastISel::X86SelectShift(const Instruction
*I
) {
1796 unsigned CReg
= 0, OpReg
= 0;
1797 const TargetRegisterClass
*RC
= nullptr;
1798 if (I
->getType()->isIntegerTy(8)) {
1800 RC
= &X86::GR8RegClass
;
1801 switch (I
->getOpcode()) {
1802 case Instruction::LShr
: OpReg
= X86::SHR8rCL
; break;
1803 case Instruction::AShr
: OpReg
= X86::SAR8rCL
; break;
1804 case Instruction::Shl
: OpReg
= X86::SHL8rCL
; break;
1805 default: return false;
1807 } else if (I
->getType()->isIntegerTy(16)) {
1809 RC
= &X86::GR16RegClass
;
1810 switch (I
->getOpcode()) {
1811 default: llvm_unreachable("Unexpected shift opcode");
1812 case Instruction::LShr
: OpReg
= X86::SHR16rCL
; break;
1813 case Instruction::AShr
: OpReg
= X86::SAR16rCL
; break;
1814 case Instruction::Shl
: OpReg
= X86::SHL16rCL
; break;
1816 } else if (I
->getType()->isIntegerTy(32)) {
1818 RC
= &X86::GR32RegClass
;
1819 switch (I
->getOpcode()) {
1820 default: llvm_unreachable("Unexpected shift opcode");
1821 case Instruction::LShr
: OpReg
= X86::SHR32rCL
; break;
1822 case Instruction::AShr
: OpReg
= X86::SAR32rCL
; break;
1823 case Instruction::Shl
: OpReg
= X86::SHL32rCL
; break;
1825 } else if (I
->getType()->isIntegerTy(64)) {
1827 RC
= &X86::GR64RegClass
;
1828 switch (I
->getOpcode()) {
1829 default: llvm_unreachable("Unexpected shift opcode");
1830 case Instruction::LShr
: OpReg
= X86::SHR64rCL
; break;
1831 case Instruction::AShr
: OpReg
= X86::SAR64rCL
; break;
1832 case Instruction::Shl
: OpReg
= X86::SHL64rCL
; break;
1839 if (!isTypeLegal(I
->getType(), VT
))
1842 unsigned Op0Reg
= getRegForValue(I
->getOperand(0));
1843 if (Op0Reg
== 0) return false;
1845 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1846 if (Op1Reg
== 0) return false;
1847 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TargetOpcode::COPY
),
1848 CReg
).addReg(Op1Reg
);
1850 // The shift instruction uses X86::CL. If we defined a super-register
1851 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1852 if (CReg
!= X86::CL
)
1853 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1854 TII
.get(TargetOpcode::KILL
), X86::CL
)
1855 .addReg(CReg
, RegState::Kill
);
1857 unsigned ResultReg
= createResultReg(RC
);
1858 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(OpReg
), ResultReg
)
1860 updateValueMap(I
, ResultReg
);
1864 bool X86FastISel::X86SelectDivRem(const Instruction
*I
) {
1865 const static unsigned NumTypes
= 4; // i8, i16, i32, i64
1866 const static unsigned NumOps
= 4; // SDiv, SRem, UDiv, URem
1867 const static bool S
= true; // IsSigned
1868 const static bool U
= false; // !IsSigned
1869 const static unsigned Copy
= TargetOpcode::COPY
;
1870 // For the X86 DIV/IDIV instruction, in most cases the dividend
1871 // (numerator) must be in a specific register pair highreg:lowreg,
1872 // producing the quotient in lowreg and the remainder in highreg.
1873 // For most data types, to set up the instruction, the dividend is
1874 // copied into lowreg, and lowreg is sign-extended or zero-extended
1875 // into highreg. The exception is i8, where the dividend is defined
1876 // as a single register rather than a register pair, and we
1877 // therefore directly sign-extend or zero-extend the dividend into
1878 // lowreg, instead of copying, and ignore the highreg.
1879 const static struct DivRemEntry
{
1880 // The following portion depends only on the data type.
1881 const TargetRegisterClass
*RC
;
1882 unsigned LowInReg
; // low part of the register pair
1883 unsigned HighInReg
; // high part of the register pair
1884 // The following portion depends on both the data type and the operation.
1885 struct DivRemResult
{
1886 unsigned OpDivRem
; // The specific DIV/IDIV opcode to use.
1887 unsigned OpSignExtend
; // Opcode for sign-extending lowreg into
1888 // highreg, or copying a zero into highreg.
1889 unsigned OpCopy
; // Opcode for copying dividend into lowreg, or
1890 // zero/sign-extending into lowreg for i8.
1891 unsigned DivRemResultReg
; // Register containing the desired result.
1892 bool IsOpSigned
; // Whether to use signed or unsigned form.
1893 } ResultTable
[NumOps
];
1894 } OpTable
[NumTypes
] = {
1895 { &X86::GR8RegClass
, X86::AX
, 0, {
1896 { X86::IDIV8r
, 0, X86::MOVSX16rr8
, X86::AL
, S
}, // SDiv
1897 { X86::IDIV8r
, 0, X86::MOVSX16rr8
, X86::AH
, S
}, // SRem
1898 { X86::DIV8r
, 0, X86::MOVZX16rr8
, X86::AL
, U
}, // UDiv
1899 { X86::DIV8r
, 0, X86::MOVZX16rr8
, X86::AH
, U
}, // URem
1902 { &X86::GR16RegClass
, X86::AX
, X86::DX
, {
1903 { X86::IDIV16r
, X86::CWD
, Copy
, X86::AX
, S
}, // SDiv
1904 { X86::IDIV16r
, X86::CWD
, Copy
, X86::DX
, S
}, // SRem
1905 { X86::DIV16r
, X86::MOV32r0
, Copy
, X86::AX
, U
}, // UDiv
1906 { X86::DIV16r
, X86::MOV32r0
, Copy
, X86::DX
, U
}, // URem
1909 { &X86::GR32RegClass
, X86::EAX
, X86::EDX
, {
1910 { X86::IDIV32r
, X86::CDQ
, Copy
, X86::EAX
, S
}, // SDiv
1911 { X86::IDIV32r
, X86::CDQ
, Copy
, X86::EDX
, S
}, // SRem
1912 { X86::DIV32r
, X86::MOV32r0
, Copy
, X86::EAX
, U
}, // UDiv
1913 { X86::DIV32r
, X86::MOV32r0
, Copy
, X86::EDX
, U
}, // URem
1916 { &X86::GR64RegClass
, X86::RAX
, X86::RDX
, {
1917 { X86::IDIV64r
, X86::CQO
, Copy
, X86::RAX
, S
}, // SDiv
1918 { X86::IDIV64r
, X86::CQO
, Copy
, X86::RDX
, S
}, // SRem
1919 { X86::DIV64r
, X86::MOV32r0
, Copy
, X86::RAX
, U
}, // UDiv
1920 { X86::DIV64r
, X86::MOV32r0
, Copy
, X86::RDX
, U
}, // URem
1926 if (!isTypeLegal(I
->getType(), VT
))
1929 unsigned TypeIndex
, OpIndex
;
1930 switch (VT
.SimpleTy
) {
1931 default: return false;
1932 case MVT::i8
: TypeIndex
= 0; break;
1933 case MVT::i16
: TypeIndex
= 1; break;
1934 case MVT::i32
: TypeIndex
= 2; break;
1935 case MVT::i64
: TypeIndex
= 3;
1936 if (!Subtarget
->is64Bit())
1941 switch (I
->getOpcode()) {
1942 default: llvm_unreachable("Unexpected div/rem opcode");
1943 case Instruction::SDiv
: OpIndex
= 0; break;
1944 case Instruction::SRem
: OpIndex
= 1; break;
1945 case Instruction::UDiv
: OpIndex
= 2; break;
1946 case Instruction::URem
: OpIndex
= 3; break;
1949 const DivRemEntry
&TypeEntry
= OpTable
[TypeIndex
];
1950 const DivRemEntry::DivRemResult
&OpEntry
= TypeEntry
.ResultTable
[OpIndex
];
1951 unsigned Op0Reg
= getRegForValue(I
->getOperand(0));
1954 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1958 // Move op0 into low-order input register.
1959 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1960 TII
.get(OpEntry
.OpCopy
), TypeEntry
.LowInReg
).addReg(Op0Reg
);
1961 // Zero-extend or sign-extend into high-order input register.
1962 if (OpEntry
.OpSignExtend
) {
1963 if (OpEntry
.IsOpSigned
)
1964 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1965 TII
.get(OpEntry
.OpSignExtend
));
1967 unsigned Zero32
= createResultReg(&X86::GR32RegClass
);
1968 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1969 TII
.get(X86::MOV32r0
), Zero32
);
1971 // Copy the zero into the appropriate sub/super/identical physical
1972 // register. Unfortunately the operations needed are not uniform enough
1973 // to fit neatly into the table above.
1974 if (VT
== MVT::i16
) {
1975 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1976 TII
.get(Copy
), TypeEntry
.HighInReg
)
1977 .addReg(Zero32
, 0, X86::sub_16bit
);
1978 } else if (VT
== MVT::i32
) {
1979 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1980 TII
.get(Copy
), TypeEntry
.HighInReg
)
1982 } else if (VT
== MVT::i64
) {
1983 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1984 TII
.get(TargetOpcode::SUBREG_TO_REG
), TypeEntry
.HighInReg
)
1985 .addImm(0).addReg(Zero32
).addImm(X86::sub_32bit
);
1989 // Generate the DIV/IDIV instruction.
1990 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1991 TII
.get(OpEntry
.OpDivRem
)).addReg(Op1Reg
);
1992 // For i8 remainder, we can't reference ah directly, as we'll end
1993 // up with bogus copies like %r9b = COPY %ah. Reference ax
1994 // instead to prevent ah references in a rex instruction.
1996 // The current assumption of the fast register allocator is that isel
1997 // won't generate explicit references to the GR8_NOREX registers. If
1998 // the allocator and/or the backend get enhanced to be more robust in
1999 // that regard, this can be, and should be, removed.
2000 unsigned ResultReg
= 0;
2001 if ((I
->getOpcode() == Instruction::SRem
||
2002 I
->getOpcode() == Instruction::URem
) &&
2003 OpEntry
.DivRemResultReg
== X86::AH
&& Subtarget
->is64Bit()) {
2004 unsigned SourceSuperReg
= createResultReg(&X86::GR16RegClass
);
2005 unsigned ResultSuperReg
= createResultReg(&X86::GR16RegClass
);
2006 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2007 TII
.get(Copy
), SourceSuperReg
).addReg(X86::AX
);
2009 // Shift AX right by 8 bits instead of using AH.
2010 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::SHR16ri
),
2011 ResultSuperReg
).addReg(SourceSuperReg
).addImm(8);
2013 // Now reference the 8-bit subreg of the result.
2014 ResultReg
= fastEmitInst_extractsubreg(MVT::i8
, ResultSuperReg
,
2015 /*Kill=*/true, X86::sub_8bit
);
2017 // Copy the result out of the physreg if we haven't already.
2019 ResultReg
= createResultReg(TypeEntry
.RC
);
2020 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Copy
), ResultReg
)
2021 .addReg(OpEntry
.DivRemResultReg
);
2023 updateValueMap(I
, ResultReg
);
2028 /// Emit a conditional move instruction (if the are supported) to lower
2030 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT
, const Instruction
*I
) {
2031 // Check if the subtarget supports these instructions.
2032 if (!Subtarget
->hasCMov())
2035 // FIXME: Add support for i8.
2036 if (RetVT
< MVT::i16
|| RetVT
> MVT::i64
)
2039 const Value
*Cond
= I
->getOperand(0);
2040 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2041 bool NeedTest
= true;
2042 X86::CondCode CC
= X86::COND_NE
;
2044 // Optimize conditions coming from a compare if both instructions are in the
2045 // same basic block (values defined in other basic blocks may not have
2046 // initialized registers).
2047 const auto *CI
= dyn_cast
<CmpInst
>(Cond
);
2048 if (CI
&& (CI
->getParent() == I
->getParent())) {
2049 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2051 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2052 static const uint16_t SETFOpcTable
[2][3] = {
2053 { X86::SETNPr
, X86::SETEr
, X86::TEST8rr
},
2054 { X86::SETPr
, X86::SETNEr
, X86::OR8rr
}
2056 const uint16_t *SETFOpc
= nullptr;
2057 switch (Predicate
) {
2059 case CmpInst::FCMP_OEQ
:
2060 SETFOpc
= &SETFOpcTable
[0][0];
2061 Predicate
= CmpInst::ICMP_NE
;
2063 case CmpInst::FCMP_UNE
:
2064 SETFOpc
= &SETFOpcTable
[1][0];
2065 Predicate
= CmpInst::ICMP_NE
;
2070 std::tie(CC
, NeedSwap
) = X86::getX86ConditionCode(Predicate
);
2071 assert(CC
<= X86::LAST_VALID_COND
&& "Unexpected condition code.");
2073 const Value
*CmpLHS
= CI
->getOperand(0);
2074 const Value
*CmpRHS
= CI
->getOperand(1);
2076 std::swap(CmpLHS
, CmpRHS
);
2078 EVT CmpVT
= TLI
.getValueType(DL
, CmpLHS
->getType());
2079 // Emit a compare of the LHS and RHS, setting the flags.
2080 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, CmpVT
, CI
->getDebugLoc()))
2084 unsigned FlagReg1
= createResultReg(&X86::GR8RegClass
);
2085 unsigned FlagReg2
= createResultReg(&X86::GR8RegClass
);
2086 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(SETFOpc
[0]),
2088 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(SETFOpc
[1]),
2090 auto const &II
= TII
.get(SETFOpc
[2]);
2091 if (II
.getNumDefs()) {
2092 unsigned TmpReg
= createResultReg(&X86::GR8RegClass
);
2093 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, TmpReg
)
2094 .addReg(FlagReg2
).addReg(FlagReg1
);
2096 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
2097 .addReg(FlagReg2
).addReg(FlagReg1
);
2101 } else if (foldX86XALUIntrinsic(CC
, I
, Cond
)) {
2102 // Fake request the condition, otherwise the intrinsic might be completely
2104 unsigned TmpReg
= getRegForValue(Cond
);
2112 // Selects operate on i1, however, CondReg is 8 bits width and may contain
2113 // garbage. Indeed, only the less significant bit is supposed to be
2114 // accurate. If we read more than the lsb, we may see non-zero values
2115 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2116 // the select. This is achieved by performing TEST against 1.
2117 unsigned CondReg
= getRegForValue(Cond
);
2120 bool CondIsKill
= hasTrivialKill(Cond
);
2122 // In case OpReg is a K register, COPY to a GPR
2123 if (MRI
.getRegClass(CondReg
) == &X86::VK1RegClass
) {
2124 unsigned KCondReg
= CondReg
;
2125 CondReg
= createResultReg(&X86::GR32RegClass
);
2126 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2127 TII
.get(TargetOpcode::COPY
), CondReg
)
2128 .addReg(KCondReg
, getKillRegState(CondIsKill
));
2129 CondReg
= fastEmitInst_extractsubreg(MVT::i8
, CondReg
, /*Kill=*/true,
2132 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::TEST8ri
))
2133 .addReg(CondReg
, getKillRegState(CondIsKill
))
2137 const Value
*LHS
= I
->getOperand(1);
2138 const Value
*RHS
= I
->getOperand(2);
2140 unsigned RHSReg
= getRegForValue(RHS
);
2141 bool RHSIsKill
= hasTrivialKill(RHS
);
2143 unsigned LHSReg
= getRegForValue(LHS
);
2144 bool LHSIsKill
= hasTrivialKill(LHS
);
2146 if (!LHSReg
|| !RHSReg
)
2149 const TargetRegisterInfo
&TRI
= *Subtarget
->getRegisterInfo();
2150 unsigned Opc
= X86::getCMovFromCond(CC
, TRI
.getRegSizeInBits(*RC
)/8);
2151 unsigned ResultReg
= fastEmitInst_rr(Opc
, RC
, RHSReg
, RHSIsKill
,
2153 updateValueMap(I
, ResultReg
);
2157 /// Emit SSE or AVX instructions to lower the select.
2159 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2160 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2161 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2162 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT
, const Instruction
*I
) {
2163 // Optimize conditions coming from a compare if both instructions are in the
2164 // same basic block (values defined in other basic blocks may not have
2165 // initialized registers).
2166 const auto *CI
= dyn_cast
<FCmpInst
>(I
->getOperand(0));
2167 if (!CI
|| (CI
->getParent() != I
->getParent()))
2170 if (I
->getType() != CI
->getOperand(0)->getType() ||
2171 !((Subtarget
->hasSSE1() && RetVT
== MVT::f32
) ||
2172 (Subtarget
->hasSSE2() && RetVT
== MVT::f64
)))
2175 const Value
*CmpLHS
= CI
->getOperand(0);
2176 const Value
*CmpRHS
= CI
->getOperand(1);
2177 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2179 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2180 // We don't have to materialize a zero constant for this case and can just use
2181 // %x again on the RHS.
2182 if (Predicate
== CmpInst::FCMP_ORD
|| Predicate
== CmpInst::FCMP_UNO
) {
2183 const auto *CmpRHSC
= dyn_cast
<ConstantFP
>(CmpRHS
);
2184 if (CmpRHSC
&& CmpRHSC
->isNullValue())
2190 std::tie(CC
, NeedSwap
) = getX86SSEConditionCode(Predicate
);
2191 if (CC
> 7 && !Subtarget
->hasAVX())
2195 std::swap(CmpLHS
, CmpRHS
);
2197 // Choose the SSE instruction sequence based on data type (float or double).
2198 static const uint16_t OpcTable
[2][4] = {
2199 { X86::CMPSSrr
, X86::ANDPSrr
, X86::ANDNPSrr
, X86::ORPSrr
},
2200 { X86::CMPSDrr
, X86::ANDPDrr
, X86::ANDNPDrr
, X86::ORPDrr
}
2203 const uint16_t *Opc
= nullptr;
2204 switch (RetVT
.SimpleTy
) {
2205 default: return false;
2206 case MVT::f32
: Opc
= &OpcTable
[0][0]; break;
2207 case MVT::f64
: Opc
= &OpcTable
[1][0]; break;
2210 const Value
*LHS
= I
->getOperand(1);
2211 const Value
*RHS
= I
->getOperand(2);
2213 unsigned LHSReg
= getRegForValue(LHS
);
2214 bool LHSIsKill
= hasTrivialKill(LHS
);
2216 unsigned RHSReg
= getRegForValue(RHS
);
2217 bool RHSIsKill
= hasTrivialKill(RHS
);
2219 unsigned CmpLHSReg
= getRegForValue(CmpLHS
);
2220 bool CmpLHSIsKill
= hasTrivialKill(CmpLHS
);
2222 unsigned CmpRHSReg
= getRegForValue(CmpRHS
);
2223 bool CmpRHSIsKill
= hasTrivialKill(CmpRHS
);
2225 if (!LHSReg
|| !RHSReg
|| !CmpLHS
|| !CmpRHS
)
2228 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2231 if (Subtarget
->hasAVX512()) {
2232 // If we have AVX512 we can use a mask compare and masked movss/sd.
2233 const TargetRegisterClass
*VR128X
= &X86::VR128XRegClass
;
2234 const TargetRegisterClass
*VK1
= &X86::VK1RegClass
;
2236 unsigned CmpOpcode
=
2237 (RetVT
== MVT::f32
) ? X86::VCMPSSZrr
: X86::VCMPSDZrr
;
2238 unsigned CmpReg
= fastEmitInst_rri(CmpOpcode
, VK1
, CmpLHSReg
, CmpLHSIsKill
,
2239 CmpRHSReg
, CmpRHSIsKill
, CC
);
2241 // Need an IMPLICIT_DEF for the input that is used to generate the upper
2242 // bits of the result register since its not based on any of the inputs.
2243 unsigned ImplicitDefReg
= createResultReg(VR128X
);
2244 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2245 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2247 // Place RHSReg is the passthru of the masked movss/sd operation and put
2248 // LHS in the input. The mask input comes from the compare.
2249 unsigned MovOpcode
=
2250 (RetVT
== MVT::f32
) ? X86::VMOVSSZrrk
: X86::VMOVSDZrrk
;
2251 unsigned MovReg
= fastEmitInst_rrrr(MovOpcode
, VR128X
, RHSReg
, RHSIsKill
,
2252 CmpReg
, true, ImplicitDefReg
, true,
2255 ResultReg
= createResultReg(RC
);
2256 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2257 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(MovReg
);
2259 } else if (Subtarget
->hasAVX()) {
2260 const TargetRegisterClass
*VR128
= &X86::VR128RegClass
;
2262 // If we have AVX, create 1 blendv instead of 3 logic instructions.
2263 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2264 // uses XMM0 as the selection register. That may need just as many
2265 // instructions as the AND/ANDN/OR sequence due to register moves, so
2267 unsigned CmpOpcode
=
2268 (RetVT
== MVT::f32
) ? X86::VCMPSSrr
: X86::VCMPSDrr
;
2269 unsigned BlendOpcode
=
2270 (RetVT
== MVT::f32
) ? X86::VBLENDVPSrr
: X86::VBLENDVPDrr
;
2272 unsigned CmpReg
= fastEmitInst_rri(CmpOpcode
, RC
, CmpLHSReg
, CmpLHSIsKill
,
2273 CmpRHSReg
, CmpRHSIsKill
, CC
);
2274 unsigned VBlendReg
= fastEmitInst_rrr(BlendOpcode
, VR128
, RHSReg
, RHSIsKill
,
2275 LHSReg
, LHSIsKill
, CmpReg
, true);
2276 ResultReg
= createResultReg(RC
);
2277 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2278 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(VBlendReg
);
2280 const TargetRegisterClass
*VR128
= &X86::VR128RegClass
;
2281 unsigned CmpReg
= fastEmitInst_rri(Opc
[0], RC
, CmpLHSReg
, CmpLHSIsKill
,
2282 CmpRHSReg
, CmpRHSIsKill
, CC
);
2283 unsigned AndReg
= fastEmitInst_rr(Opc
[1], VR128
, CmpReg
, /*IsKill=*/false,
2285 unsigned AndNReg
= fastEmitInst_rr(Opc
[2], VR128
, CmpReg
, /*IsKill=*/true,
2287 unsigned OrReg
= fastEmitInst_rr(Opc
[3], VR128
, AndNReg
, /*IsKill=*/true,
2288 AndReg
, /*IsKill=*/true);
2289 ResultReg
= createResultReg(RC
);
2290 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2291 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(OrReg
);
2293 updateValueMap(I
, ResultReg
);
2297 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT
, const Instruction
*I
) {
2298 // These are pseudo CMOV instructions and will be later expanded into control-
2301 switch (RetVT
.SimpleTy
) {
2302 default: return false;
2303 case MVT::i8
: Opc
= X86::CMOV_GR8
; break;
2304 case MVT::i16
: Opc
= X86::CMOV_GR16
; break;
2305 case MVT::i32
: Opc
= X86::CMOV_GR32
; break;
2306 case MVT::f32
: Opc
= X86::CMOV_FR32
; break;
2307 case MVT::f64
: Opc
= X86::CMOV_FR64
; break;
2310 const Value
*Cond
= I
->getOperand(0);
2311 X86::CondCode CC
= X86::COND_NE
;
2313 // Optimize conditions coming from a compare if both instructions are in the
2314 // same basic block (values defined in other basic blocks may not have
2315 // initialized registers).
2316 const auto *CI
= dyn_cast
<CmpInst
>(Cond
);
2317 if (CI
&& (CI
->getParent() == I
->getParent())) {
2319 std::tie(CC
, NeedSwap
) = X86::getX86ConditionCode(CI
->getPredicate());
2320 if (CC
> X86::LAST_VALID_COND
)
2323 const Value
*CmpLHS
= CI
->getOperand(0);
2324 const Value
*CmpRHS
= CI
->getOperand(1);
2327 std::swap(CmpLHS
, CmpRHS
);
2329 EVT CmpVT
= TLI
.getValueType(DL
, CmpLHS
->getType());
2330 if (!X86FastEmitCompare(CmpLHS
, CmpRHS
, CmpVT
, CI
->getDebugLoc()))
2333 unsigned CondReg
= getRegForValue(Cond
);
2336 bool CondIsKill
= hasTrivialKill(Cond
);
2338 // In case OpReg is a K register, COPY to a GPR
2339 if (MRI
.getRegClass(CondReg
) == &X86::VK1RegClass
) {
2340 unsigned KCondReg
= CondReg
;
2341 CondReg
= createResultReg(&X86::GR32RegClass
);
2342 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2343 TII
.get(TargetOpcode::COPY
), CondReg
)
2344 .addReg(KCondReg
, getKillRegState(CondIsKill
));
2345 CondReg
= fastEmitInst_extractsubreg(MVT::i8
, CondReg
, /*Kill=*/true,
2348 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::TEST8ri
))
2349 .addReg(CondReg
, getKillRegState(CondIsKill
))
2353 const Value
*LHS
= I
->getOperand(1);
2354 const Value
*RHS
= I
->getOperand(2);
2356 unsigned LHSReg
= getRegForValue(LHS
);
2357 bool LHSIsKill
= hasTrivialKill(LHS
);
2359 unsigned RHSReg
= getRegForValue(RHS
);
2360 bool RHSIsKill
= hasTrivialKill(RHS
);
2362 if (!LHSReg
|| !RHSReg
)
2365 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2367 unsigned ResultReg
=
2368 fastEmitInst_rri(Opc
, RC
, RHSReg
, RHSIsKill
, LHSReg
, LHSIsKill
, CC
);
2369 updateValueMap(I
, ResultReg
);
2373 bool X86FastISel::X86SelectSelect(const Instruction
*I
) {
2375 if (!isTypeLegal(I
->getType(), RetVT
))
2378 // Check if we can fold the select.
2379 if (const auto *CI
= dyn_cast
<CmpInst
>(I
->getOperand(0))) {
2380 CmpInst::Predicate Predicate
= optimizeCmpPredicate(CI
);
2381 const Value
*Opnd
= nullptr;
2382 switch (Predicate
) {
2384 case CmpInst::FCMP_FALSE
: Opnd
= I
->getOperand(2); break;
2385 case CmpInst::FCMP_TRUE
: Opnd
= I
->getOperand(1); break;
2387 // No need for a select anymore - this is an unconditional move.
2389 unsigned OpReg
= getRegForValue(Opnd
);
2392 bool OpIsKill
= hasTrivialKill(Opnd
);
2393 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(RetVT
);
2394 unsigned ResultReg
= createResultReg(RC
);
2395 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2396 TII
.get(TargetOpcode::COPY
), ResultReg
)
2397 .addReg(OpReg
, getKillRegState(OpIsKill
));
2398 updateValueMap(I
, ResultReg
);
2403 // First try to use real conditional move instructions.
2404 if (X86FastEmitCMoveSelect(RetVT
, I
))
2407 // Try to use a sequence of SSE instructions to simulate a conditional move.
2408 if (X86FastEmitSSESelect(RetVT
, I
))
2411 // Fall-back to pseudo conditional move instructions, which will be later
2412 // converted to control-flow.
2413 if (X86FastEmitPseudoSelect(RetVT
, I
))
2419 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2420 bool X86FastISel::X86SelectIntToFP(const Instruction
*I
, bool IsSigned
) {
2421 // The target-independent selection algorithm in FastISel already knows how
2422 // to select a SINT_TO_FP if the target is SSE but not AVX.
2423 // Early exit if the subtarget doesn't have AVX.
2424 // Unsigned conversion requires avx512.
2425 bool HasAVX512
= Subtarget
->hasAVX512();
2426 if (!Subtarget
->hasAVX() || (!IsSigned
&& !HasAVX512
))
2429 // TODO: We could sign extend narrower types.
2430 MVT SrcVT
= TLI
.getSimpleValueType(DL
, I
->getOperand(0)->getType());
2431 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i64
)
2434 // Select integer to float/double conversion.
2435 unsigned OpReg
= getRegForValue(I
->getOperand(0));
2441 static const uint16_t SCvtOpc
[2][2][2] = {
2442 { { X86::VCVTSI2SSrr
, X86::VCVTSI642SSrr
},
2443 { X86::VCVTSI2SDrr
, X86::VCVTSI642SDrr
} },
2444 { { X86::VCVTSI2SSZrr
, X86::VCVTSI642SSZrr
},
2445 { X86::VCVTSI2SDZrr
, X86::VCVTSI642SDZrr
} },
2447 static const uint16_t UCvtOpc
[2][2] = {
2448 { X86::VCVTUSI2SSZrr
, X86::VCVTUSI642SSZrr
},
2449 { X86::VCVTUSI2SDZrr
, X86::VCVTUSI642SDZrr
},
2451 bool Is64Bit
= SrcVT
== MVT::i64
;
2453 if (I
->getType()->isDoubleTy()) {
2454 // s/uitofp int -> double
2455 Opcode
= IsSigned
? SCvtOpc
[HasAVX512
][1][Is64Bit
] : UCvtOpc
[1][Is64Bit
];
2456 } else if (I
->getType()->isFloatTy()) {
2457 // s/uitofp int -> float
2458 Opcode
= IsSigned
? SCvtOpc
[HasAVX512
][0][Is64Bit
] : UCvtOpc
[0][Is64Bit
];
2462 MVT DstVT
= TLI
.getValueType(DL
, I
->getType()).getSimpleVT();
2463 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(DstVT
);
2464 unsigned ImplicitDefReg
= createResultReg(RC
);
2465 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2466 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2467 unsigned ResultReg
=
2468 fastEmitInst_rr(Opcode
, RC
, ImplicitDefReg
, true, OpReg
, false);
2469 updateValueMap(I
, ResultReg
);
2473 bool X86FastISel::X86SelectSIToFP(const Instruction
*I
) {
2474 return X86SelectIntToFP(I
, /*IsSigned*/true);
2477 bool X86FastISel::X86SelectUIToFP(const Instruction
*I
) {
2478 return X86SelectIntToFP(I
, /*IsSigned*/false);
2481 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2482 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction
*I
,
2484 const TargetRegisterClass
*RC
) {
2485 assert((I
->getOpcode() == Instruction::FPExt
||
2486 I
->getOpcode() == Instruction::FPTrunc
) &&
2487 "Instruction must be an FPExt or FPTrunc!");
2489 unsigned OpReg
= getRegForValue(I
->getOperand(0));
2493 unsigned ImplicitDefReg
;
2494 if (Subtarget
->hasAVX()) {
2495 ImplicitDefReg
= createResultReg(RC
);
2496 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2497 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2501 unsigned ResultReg
= createResultReg(RC
);
2502 MachineInstrBuilder MIB
;
2503 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TargetOpc
),
2506 if (Subtarget
->hasAVX())
2507 MIB
.addReg(ImplicitDefReg
);
2510 updateValueMap(I
, ResultReg
);
2514 bool X86FastISel::X86SelectFPExt(const Instruction
*I
) {
2515 if (X86ScalarSSEf64
&& I
->getType()->isDoubleTy() &&
2516 I
->getOperand(0)->getType()->isFloatTy()) {
2517 bool HasAVX512
= Subtarget
->hasAVX512();
2518 // fpext from float to double.
2520 HasAVX512
? X86::VCVTSS2SDZrr
2521 : Subtarget
->hasAVX() ? X86::VCVTSS2SDrr
: X86::CVTSS2SDrr
;
2522 return X86SelectFPExtOrFPTrunc(
2523 I
, Opc
, HasAVX512
? &X86::FR64XRegClass
: &X86::FR64RegClass
);
2529 bool X86FastISel::X86SelectFPTrunc(const Instruction
*I
) {
2530 if (X86ScalarSSEf64
&& I
->getType()->isFloatTy() &&
2531 I
->getOperand(0)->getType()->isDoubleTy()) {
2532 bool HasAVX512
= Subtarget
->hasAVX512();
2533 // fptrunc from double to float.
2535 HasAVX512
? X86::VCVTSD2SSZrr
2536 : Subtarget
->hasAVX() ? X86::VCVTSD2SSrr
: X86::CVTSD2SSrr
;
2537 return X86SelectFPExtOrFPTrunc(
2538 I
, Opc
, HasAVX512
? &X86::FR32XRegClass
: &X86::FR32RegClass
);
2544 bool X86FastISel::X86SelectTrunc(const Instruction
*I
) {
2545 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
2546 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
2548 // This code only handles truncation to byte.
2549 if (DstVT
!= MVT::i8
&& DstVT
!= MVT::i1
)
2551 if (!TLI
.isTypeLegal(SrcVT
))
2554 unsigned InputReg
= getRegForValue(I
->getOperand(0));
2556 // Unhandled operand. Halt "fast" selection and bail.
2559 if (SrcVT
== MVT::i8
) {
2560 // Truncate from i8 to i1; no code needed.
2561 updateValueMap(I
, InputReg
);
2565 // Issue an extract_subreg.
2566 unsigned ResultReg
= fastEmitInst_extractsubreg(MVT::i8
,
2572 updateValueMap(I
, ResultReg
);
2576 bool X86FastISel::IsMemcpySmall(uint64_t Len
) {
2577 return Len
<= (Subtarget
->is64Bit() ? 32 : 16);
2580 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM
,
2581 X86AddressMode SrcAM
, uint64_t Len
) {
2583 // Make sure we don't bloat code by inlining very large memcpy's.
2584 if (!IsMemcpySmall(Len
))
2587 bool i64Legal
= Subtarget
->is64Bit();
2589 // We don't care about alignment here since we just emit integer accesses.
2592 if (Len
>= 8 && i64Legal
)
2602 bool RV
= X86FastEmitLoad(VT
, SrcAM
, nullptr, Reg
);
2603 RV
&= X86FastEmitStore(VT
, Reg
, /*Kill=*/true, DestAM
);
2604 assert(RV
&& "Failed to emit load or store??");
2606 unsigned Size
= VT
.getSizeInBits()/8;
2608 DestAM
.Disp
+= Size
;
2615 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst
*II
) {
2616 // FIXME: Handle more intrinsics.
2617 switch (II
->getIntrinsicID()) {
2618 default: return false;
2619 case Intrinsic::convert_from_fp16
:
2620 case Intrinsic::convert_to_fp16
: {
2621 if (Subtarget
->useSoftFloat() || !Subtarget
->hasF16C())
2624 const Value
*Op
= II
->getArgOperand(0);
2625 unsigned InputReg
= getRegForValue(Op
);
2629 // F16C only allows converting from float to half and from half to float.
2630 bool IsFloatToHalf
= II
->getIntrinsicID() == Intrinsic::convert_to_fp16
;
2631 if (IsFloatToHalf
) {
2632 if (!Op
->getType()->isFloatTy())
2635 if (!II
->getType()->isFloatTy())
2639 unsigned ResultReg
= 0;
2640 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(MVT::v8i16
);
2641 if (IsFloatToHalf
) {
2642 // 'InputReg' is implicitly promoted from register class FR32 to
2643 // register class VR128 by method 'constrainOperandRegClass' which is
2644 // directly called by 'fastEmitInst_ri'.
2645 // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2646 // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2647 // It's consistent with the other FP instructions, which are usually
2648 // controlled by MXCSR.
2649 InputReg
= fastEmitInst_ri(X86::VCVTPS2PHrr
, RC
, InputReg
, false, 4);
2651 // Move the lower 32-bits of ResultReg to another register of class GR32.
2652 ResultReg
= createResultReg(&X86::GR32RegClass
);
2653 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2654 TII
.get(X86::VMOVPDI2DIrr
), ResultReg
)
2655 .addReg(InputReg
, RegState::Kill
);
2657 // The result value is in the lower 16-bits of ResultReg.
2658 unsigned RegIdx
= X86::sub_16bit
;
2659 ResultReg
= fastEmitInst_extractsubreg(MVT::i16
, ResultReg
, true, RegIdx
);
2661 assert(Op
->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2662 // Explicitly sign-extend the input to 32-bit.
2663 InputReg
= fastEmit_r(MVT::i16
, MVT::i32
, ISD::SIGN_EXTEND
, InputReg
,
2666 // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2667 InputReg
= fastEmit_r(MVT::i32
, MVT::v4i32
, ISD::SCALAR_TO_VECTOR
,
2668 InputReg
, /*Kill=*/true);
2670 InputReg
= fastEmitInst_r(X86::VCVTPH2PSrr
, RC
, InputReg
, /*Kill=*/true);
2672 // The result value is in the lower 32-bits of ResultReg.
2673 // Emit an explicit copy from register class VR128 to register class FR32.
2674 ResultReg
= createResultReg(&X86::FR32RegClass
);
2675 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2676 TII
.get(TargetOpcode::COPY
), ResultReg
)
2677 .addReg(InputReg
, RegState::Kill
);
2680 updateValueMap(II
, ResultReg
);
2683 case Intrinsic::frameaddress
: {
2684 MachineFunction
*MF
= FuncInfo
.MF
;
2685 if (MF
->getTarget().getMCAsmInfo()->usesWindowsCFI())
2688 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
2691 if (!isTypeLegal(RetTy
, VT
))
2695 const TargetRegisterClass
*RC
= nullptr;
2697 switch (VT
.SimpleTy
) {
2698 default: llvm_unreachable("Invalid result type for frameaddress.");
2699 case MVT::i32
: Opc
= X86::MOV32rm
; RC
= &X86::GR32RegClass
; break;
2700 case MVT::i64
: Opc
= X86::MOV64rm
; RC
= &X86::GR64RegClass
; break;
2703 // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2704 // we get the wrong frame register.
2705 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
2706 MFI
.setFrameAddressIsTaken(true);
2708 const X86RegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
2709 unsigned FrameReg
= RegInfo
->getPtrSizedFrameRegister(*MF
);
2710 assert(((FrameReg
== X86::RBP
&& VT
== MVT::i64
) ||
2711 (FrameReg
== X86::EBP
&& VT
== MVT::i32
)) &&
2712 "Invalid Frame Register!");
2714 // Always make a copy of the frame register to a vreg first, so that we
2715 // never directly reference the frame register (the TwoAddressInstruction-
2716 // Pass doesn't like that).
2717 unsigned SrcReg
= createResultReg(RC
);
2718 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2719 TII
.get(TargetOpcode::COPY
), SrcReg
).addReg(FrameReg
);
2721 // Now recursively load from the frame address.
2722 // movq (%rbp), %rax
2723 // movq (%rax), %rax
2724 // movq (%rax), %rax
2727 unsigned Depth
= cast
<ConstantInt
>(II
->getOperand(0))->getZExtValue();
2729 DestReg
= createResultReg(RC
);
2730 addDirectMem(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2731 TII
.get(Opc
), DestReg
), SrcReg
);
2735 updateValueMap(II
, SrcReg
);
2738 case Intrinsic::memcpy
: {
2739 const MemCpyInst
*MCI
= cast
<MemCpyInst
>(II
);
2740 // Don't handle volatile or variable length memcpys.
2741 if (MCI
->isVolatile())
2744 if (isa
<ConstantInt
>(MCI
->getLength())) {
2745 // Small memcpy's are common enough that we want to do them
2746 // without a call if possible.
2747 uint64_t Len
= cast
<ConstantInt
>(MCI
->getLength())->getZExtValue();
2748 if (IsMemcpySmall(Len
)) {
2749 X86AddressMode DestAM
, SrcAM
;
2750 if (!X86SelectAddress(MCI
->getRawDest(), DestAM
) ||
2751 !X86SelectAddress(MCI
->getRawSource(), SrcAM
))
2753 TryEmitSmallMemcpy(DestAM
, SrcAM
, Len
);
2758 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
2759 if (!MCI
->getLength()->getType()->isIntegerTy(SizeWidth
))
2762 if (MCI
->getSourceAddressSpace() > 255 || MCI
->getDestAddressSpace() > 255)
2765 return lowerCallTo(II
, "memcpy", II
->getNumArgOperands() - 1);
2767 case Intrinsic::memset
: {
2768 const MemSetInst
*MSI
= cast
<MemSetInst
>(II
);
2770 if (MSI
->isVolatile())
2773 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
2774 if (!MSI
->getLength()->getType()->isIntegerTy(SizeWidth
))
2777 if (MSI
->getDestAddressSpace() > 255)
2780 return lowerCallTo(II
, "memset", II
->getNumArgOperands() - 1);
2782 case Intrinsic::stackprotector
: {
2783 // Emit code to store the stack guard onto the stack.
2784 EVT PtrTy
= TLI
.getPointerTy(DL
);
2786 const Value
*Op1
= II
->getArgOperand(0); // The guard's value.
2787 const AllocaInst
*Slot
= cast
<AllocaInst
>(II
->getArgOperand(1));
2789 MFI
.setStackProtectorIndex(FuncInfo
.StaticAllocaMap
[Slot
]);
2791 // Grab the frame index.
2793 if (!X86SelectAddress(Slot
, AM
)) return false;
2794 if (!X86FastEmitStore(PtrTy
, Op1
, AM
)) return false;
2797 case Intrinsic::dbg_declare
: {
2798 const DbgDeclareInst
*DI
= cast
<DbgDeclareInst
>(II
);
2800 assert(DI
->getAddress() && "Null address should be checked earlier!");
2801 if (!X86SelectAddress(DI
->getAddress(), AM
))
2803 const MCInstrDesc
&II
= TII
.get(TargetOpcode::DBG_VALUE
);
2804 // FIXME may need to add RegState::Debug to any registers produced,
2805 // although ESP/EBP should be the only ones at the moment.
2806 assert(DI
->getVariable()->isValidLocationForIntrinsic(DbgLoc
) &&
2807 "Expected inlined-at fields to agree");
2808 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
), AM
)
2810 .addMetadata(DI
->getVariable())
2811 .addMetadata(DI
->getExpression());
2814 case Intrinsic::trap
: {
2815 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::TRAP
));
2818 case Intrinsic::sqrt
: {
2819 if (!Subtarget
->hasSSE1())
2822 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
2825 if (!isTypeLegal(RetTy
, VT
))
2828 // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2829 // is not generated by FastISel yet.
2830 // FIXME: Update this code once tablegen can handle it.
2831 static const uint16_t SqrtOpc
[3][2] = {
2832 { X86::SQRTSSr
, X86::SQRTSDr
},
2833 { X86::VSQRTSSr
, X86::VSQRTSDr
},
2834 { X86::VSQRTSSZr
, X86::VSQRTSDZr
},
2836 unsigned AVXLevel
= Subtarget
->hasAVX512() ? 2 :
2837 Subtarget
->hasAVX() ? 1 :
2840 switch (VT
.SimpleTy
) {
2841 default: return false;
2842 case MVT::f32
: Opc
= SqrtOpc
[AVXLevel
][0]; break;
2843 case MVT::f64
: Opc
= SqrtOpc
[AVXLevel
][1]; break;
2846 const Value
*SrcVal
= II
->getArgOperand(0);
2847 unsigned SrcReg
= getRegForValue(SrcVal
);
2852 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
2853 unsigned ImplicitDefReg
= 0;
2855 ImplicitDefReg
= createResultReg(RC
);
2856 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2857 TII
.get(TargetOpcode::IMPLICIT_DEF
), ImplicitDefReg
);
2860 unsigned ResultReg
= createResultReg(RC
);
2861 MachineInstrBuilder MIB
;
2862 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
),
2866 MIB
.addReg(ImplicitDefReg
);
2870 updateValueMap(II
, ResultReg
);
2873 case Intrinsic::sadd_with_overflow
:
2874 case Intrinsic::uadd_with_overflow
:
2875 case Intrinsic::ssub_with_overflow
:
2876 case Intrinsic::usub_with_overflow
:
2877 case Intrinsic::smul_with_overflow
:
2878 case Intrinsic::umul_with_overflow
: {
2879 // This implements the basic lowering of the xalu with overflow intrinsics
2880 // into add/sub/mul followed by either seto or setb.
2881 const Function
*Callee
= II
->getCalledFunction();
2882 auto *Ty
= cast
<StructType
>(Callee
->getReturnType());
2883 Type
*RetTy
= Ty
->getTypeAtIndex(0U);
2884 assert(Ty
->getTypeAtIndex(1)->isIntegerTy() &&
2885 Ty
->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2886 "Overflow value expected to be an i1");
2889 if (!isTypeLegal(RetTy
, VT
))
2892 if (VT
< MVT::i8
|| VT
> MVT::i64
)
2895 const Value
*LHS
= II
->getArgOperand(0);
2896 const Value
*RHS
= II
->getArgOperand(1);
2898 // Canonicalize immediate to the RHS.
2899 if (isa
<ConstantInt
>(LHS
) && !isa
<ConstantInt
>(RHS
) &&
2900 isCommutativeIntrinsic(II
))
2901 std::swap(LHS
, RHS
);
2903 bool UseIncDec
= false;
2904 if (isa
<ConstantInt
>(RHS
) && cast
<ConstantInt
>(RHS
)->isOne())
2907 unsigned BaseOpc
, CondOpc
;
2908 switch (II
->getIntrinsicID()) {
2909 default: llvm_unreachable("Unexpected intrinsic!");
2910 case Intrinsic::sadd_with_overflow
:
2911 BaseOpc
= UseIncDec
? unsigned(X86ISD::INC
) : unsigned(ISD::ADD
);
2912 CondOpc
= X86::SETOr
;
2914 case Intrinsic::uadd_with_overflow
:
2915 BaseOpc
= ISD::ADD
; CondOpc
= X86::SETBr
; break;
2916 case Intrinsic::ssub_with_overflow
:
2917 BaseOpc
= UseIncDec
? unsigned(X86ISD::DEC
) : unsigned(ISD::SUB
);
2918 CondOpc
= X86::SETOr
;
2920 case Intrinsic::usub_with_overflow
:
2921 BaseOpc
= ISD::SUB
; CondOpc
= X86::SETBr
; break;
2922 case Intrinsic::smul_with_overflow
:
2923 BaseOpc
= X86ISD::SMUL
; CondOpc
= X86::SETOr
; break;
2924 case Intrinsic::umul_with_overflow
:
2925 BaseOpc
= X86ISD::UMUL
; CondOpc
= X86::SETOr
; break;
2928 unsigned LHSReg
= getRegForValue(LHS
);
2931 bool LHSIsKill
= hasTrivialKill(LHS
);
2933 unsigned ResultReg
= 0;
2934 // Check if we have an immediate version.
2935 if (const auto *CI
= dyn_cast
<ConstantInt
>(RHS
)) {
2936 static const uint16_t Opc
[2][4] = {
2937 { X86::INC8r
, X86::INC16r
, X86::INC32r
, X86::INC64r
},
2938 { X86::DEC8r
, X86::DEC16r
, X86::DEC32r
, X86::DEC64r
}
2941 if (BaseOpc
== X86ISD::INC
|| BaseOpc
== X86ISD::DEC
) {
2942 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
2943 bool IsDec
= BaseOpc
== X86ISD::DEC
;
2944 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2945 TII
.get(Opc
[IsDec
][VT
.SimpleTy
-MVT::i8
]), ResultReg
)
2946 .addReg(LHSReg
, getKillRegState(LHSIsKill
));
2948 ResultReg
= fastEmit_ri(VT
, VT
, BaseOpc
, LHSReg
, LHSIsKill
,
2949 CI
->getZExtValue());
2955 RHSReg
= getRegForValue(RHS
);
2958 RHSIsKill
= hasTrivialKill(RHS
);
2959 ResultReg
= fastEmit_rr(VT
, VT
, BaseOpc
, LHSReg
, LHSIsKill
, RHSReg
,
2963 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2965 if (BaseOpc
== X86ISD::UMUL
&& !ResultReg
) {
2966 static const uint16_t MULOpc
[] =
2967 { X86::MUL8r
, X86::MUL16r
, X86::MUL32r
, X86::MUL64r
};
2968 static const MCPhysReg Reg
[] = { X86::AL
, X86::AX
, X86::EAX
, X86::RAX
};
2969 // First copy the first operand into RAX, which is an implicit input to
2970 // the X86::MUL*r instruction.
2971 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2972 TII
.get(TargetOpcode::COPY
), Reg
[VT
.SimpleTy
-MVT::i8
])
2973 .addReg(LHSReg
, getKillRegState(LHSIsKill
));
2974 ResultReg
= fastEmitInst_r(MULOpc
[VT
.SimpleTy
-MVT::i8
],
2975 TLI
.getRegClassFor(VT
), RHSReg
, RHSIsKill
);
2976 } else if (BaseOpc
== X86ISD::SMUL
&& !ResultReg
) {
2977 static const uint16_t MULOpc
[] =
2978 { X86::IMUL8r
, X86::IMUL16rr
, X86::IMUL32rr
, X86::IMUL64rr
};
2979 if (VT
== MVT::i8
) {
2980 // Copy the first operand into AL, which is an implicit input to the
2981 // X86::IMUL8r instruction.
2982 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2983 TII
.get(TargetOpcode::COPY
), X86::AL
)
2984 .addReg(LHSReg
, getKillRegState(LHSIsKill
));
2985 ResultReg
= fastEmitInst_r(MULOpc
[0], TLI
.getRegClassFor(VT
), RHSReg
,
2988 ResultReg
= fastEmitInst_rr(MULOpc
[VT
.SimpleTy
-MVT::i8
],
2989 TLI
.getRegClassFor(VT
), LHSReg
, LHSIsKill
,
2996 // Assign to a GPR since the overflow return value is lowered to a SETcc.
2997 unsigned ResultReg2
= createResultReg(&X86::GR8RegClass
);
2998 assert((ResultReg
+1) == ResultReg2
&& "Nonconsecutive result registers.");
2999 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(CondOpc
),
3002 updateValueMap(II
, ResultReg
, 2);
3005 case Intrinsic::x86_sse_cvttss2si
:
3006 case Intrinsic::x86_sse_cvttss2si64
:
3007 case Intrinsic::x86_sse2_cvttsd2si
:
3008 case Intrinsic::x86_sse2_cvttsd2si64
: {
3010 switch (II
->getIntrinsicID()) {
3011 default: llvm_unreachable("Unexpected intrinsic.");
3012 case Intrinsic::x86_sse_cvttss2si
:
3013 case Intrinsic::x86_sse_cvttss2si64
:
3014 if (!Subtarget
->hasSSE1())
3016 IsInputDouble
= false;
3018 case Intrinsic::x86_sse2_cvttsd2si
:
3019 case Intrinsic::x86_sse2_cvttsd2si64
:
3020 if (!Subtarget
->hasSSE2())
3022 IsInputDouble
= true;
3026 Type
*RetTy
= II
->getCalledFunction()->getReturnType();
3028 if (!isTypeLegal(RetTy
, VT
))
3031 static const uint16_t CvtOpc
[3][2][2] = {
3032 { { X86::CVTTSS2SIrr
, X86::CVTTSS2SI64rr
},
3033 { X86::CVTTSD2SIrr
, X86::CVTTSD2SI64rr
} },
3034 { { X86::VCVTTSS2SIrr
, X86::VCVTTSS2SI64rr
},
3035 { X86::VCVTTSD2SIrr
, X86::VCVTTSD2SI64rr
} },
3036 { { X86::VCVTTSS2SIZrr
, X86::VCVTTSS2SI64Zrr
},
3037 { X86::VCVTTSD2SIZrr
, X86::VCVTTSD2SI64Zrr
} },
3039 unsigned AVXLevel
= Subtarget
->hasAVX512() ? 2 :
3040 Subtarget
->hasAVX() ? 1 :
3043 switch (VT
.SimpleTy
) {
3044 default: llvm_unreachable("Unexpected result type.");
3045 case MVT::i32
: Opc
= CvtOpc
[AVXLevel
][IsInputDouble
][0]; break;
3046 case MVT::i64
: Opc
= CvtOpc
[AVXLevel
][IsInputDouble
][1]; break;
3049 // Check if we can fold insertelement instructions into the convert.
3050 const Value
*Op
= II
->getArgOperand(0);
3051 while (auto *IE
= dyn_cast
<InsertElementInst
>(Op
)) {
3052 const Value
*Index
= IE
->getOperand(2);
3053 if (!isa
<ConstantInt
>(Index
))
3055 unsigned Idx
= cast
<ConstantInt
>(Index
)->getZExtValue();
3058 Op
= IE
->getOperand(1);
3061 Op
= IE
->getOperand(0);
3064 unsigned Reg
= getRegForValue(Op
);
3068 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3069 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), ResultReg
)
3072 updateValueMap(II
, ResultReg
);
3078 bool X86FastISel::fastLowerArguments() {
3079 if (!FuncInfo
.CanLowerReturn
)
3082 const Function
*F
= FuncInfo
.Fn
;
3086 CallingConv::ID CC
= F
->getCallingConv();
3087 if (CC
!= CallingConv::C
)
3090 if (Subtarget
->isCallingConvWin64(CC
))
3093 if (!Subtarget
->is64Bit())
3096 if (Subtarget
->useSoftFloat())
3099 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3100 unsigned GPRCnt
= 0;
3101 unsigned FPRCnt
= 0;
3102 for (auto const &Arg
: F
->args()) {
3103 if (Arg
.hasAttribute(Attribute::ByVal
) ||
3104 Arg
.hasAttribute(Attribute::InReg
) ||
3105 Arg
.hasAttribute(Attribute::StructRet
) ||
3106 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3107 Arg
.hasAttribute(Attribute::SwiftError
) ||
3108 Arg
.hasAttribute(Attribute::Nest
))
3111 Type
*ArgTy
= Arg
.getType();
3112 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3115 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3116 if (!ArgVT
.isSimple()) return false;
3117 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3118 default: return false;
3125 if (!Subtarget
->hasSSE1())
3138 static const MCPhysReg GPR32ArgRegs
[] = {
3139 X86::EDI
, X86::ESI
, X86::EDX
, X86::ECX
, X86::R8D
, X86::R9D
3141 static const MCPhysReg GPR64ArgRegs
[] = {
3142 X86::RDI
, X86::RSI
, X86::RDX
, X86::RCX
, X86::R8
, X86::R9
3144 static const MCPhysReg XMMArgRegs
[] = {
3145 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
3146 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
3149 unsigned GPRIdx
= 0;
3150 unsigned FPRIdx
= 0;
3151 for (auto const &Arg
: F
->args()) {
3152 MVT VT
= TLI
.getSimpleValueType(DL
, Arg
.getType());
3153 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
3155 switch (VT
.SimpleTy
) {
3156 default: llvm_unreachable("Unexpected value type.");
3157 case MVT::i32
: SrcReg
= GPR32ArgRegs
[GPRIdx
++]; break;
3158 case MVT::i64
: SrcReg
= GPR64ArgRegs
[GPRIdx
++]; break;
3159 case MVT::f32
: LLVM_FALLTHROUGH
;
3160 case MVT::f64
: SrcReg
= XMMArgRegs
[FPRIdx
++]; break;
3162 unsigned DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3163 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3164 // Without this, EmitLiveInCopies may eliminate the livein if its only
3165 // use is a bitcast (which isn't turned into an instruction).
3166 unsigned ResultReg
= createResultReg(RC
);
3167 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3168 TII
.get(TargetOpcode::COPY
), ResultReg
)
3169 .addReg(DstReg
, getKillRegState(true));
3170 updateValueMap(&Arg
, ResultReg
);
3175 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget
*Subtarget
,
3177 ImmutableCallSite
*CS
) {
3178 if (Subtarget
->is64Bit())
3180 if (Subtarget
->getTargetTriple().isOSMSVCRT())
3182 if (CC
== CallingConv::Fast
|| CC
== CallingConv::GHC
||
3183 CC
== CallingConv::HiPE
)
3187 if (CS
->arg_empty() || !CS
->paramHasAttr(0, Attribute::StructRet
) ||
3188 CS
->paramHasAttr(0, Attribute::InReg
) || Subtarget
->isTargetMCU())
3194 bool X86FastISel::fastLowerCall(CallLoweringInfo
&CLI
) {
3195 auto &OutVals
= CLI
.OutVals
;
3196 auto &OutFlags
= CLI
.OutFlags
;
3197 auto &OutRegs
= CLI
.OutRegs
;
3198 auto &Ins
= CLI
.Ins
;
3199 auto &InRegs
= CLI
.InRegs
;
3200 CallingConv::ID CC
= CLI
.CallConv
;
3201 bool &IsTailCall
= CLI
.IsTailCall
;
3202 bool IsVarArg
= CLI
.IsVarArg
;
3203 const Value
*Callee
= CLI
.Callee
;
3204 MCSymbol
*Symbol
= CLI
.Symbol
;
3206 bool Is64Bit
= Subtarget
->is64Bit();
3207 bool IsWin64
= Subtarget
->isCallingConvWin64(CC
);
3209 const CallInst
*CI
=
3210 CLI
.CS
? dyn_cast
<CallInst
>(CLI
.CS
->getInstruction()) : nullptr;
3211 const Function
*CalledFn
= CI
? CI
->getCalledFunction() : nullptr;
3213 // Call / invoke instructions with NoCfCheck attribute require special
3216 CLI
.CS
? dyn_cast
<InvokeInst
>(CLI
.CS
->getInstruction()) : nullptr;
3217 if ((CI
&& CI
->doesNoCfCheck()) || (II
&& II
->doesNoCfCheck()))
3220 // Functions with no_caller_saved_registers that need special handling.
3221 if ((CI
&& CI
->hasFnAttr("no_caller_saved_registers")) ||
3222 (CalledFn
&& CalledFn
->hasFnAttribute("no_caller_saved_registers")))
3225 // Functions using retpoline for indirect calls need to use SDISel.
3226 if (Subtarget
->useRetpolineIndirectCalls())
3229 // Handle only C, fastcc, and webkit_js calling conventions for now.
3231 default: return false;
3232 case CallingConv::C
:
3233 case CallingConv::Fast
:
3234 case CallingConv::WebKit_JS
:
3235 case CallingConv::Swift
:
3236 case CallingConv::X86_FastCall
:
3237 case CallingConv::X86_StdCall
:
3238 case CallingConv::X86_ThisCall
:
3239 case CallingConv::Win64
:
3240 case CallingConv::X86_64_SysV
:
3244 // Allow SelectionDAG isel to handle tail calls.
3248 // fastcc with -tailcallopt is intended to provide a guaranteed
3249 // tail call optimization. Fastisel doesn't know how to do that.
3250 if (CC
== CallingConv::Fast
&& TM
.Options
.GuaranteedTailCallOpt
)
3253 // Don't know how to handle Win64 varargs yet. Nothing special needed for
3254 // x86-32. Special handling for x86-64 is implemented.
3255 if (IsVarArg
&& IsWin64
)
3258 // Don't know about inalloca yet.
3259 if (CLI
.CS
&& CLI
.CS
->hasInAllocaArgument())
3262 for (auto Flag
: CLI
.OutFlags
)
3263 if (Flag
.isSwiftError())
3266 SmallVector
<MVT
, 16> OutVTs
;
3267 SmallVector
<unsigned, 16> ArgRegs
;
3269 // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3270 // instruction. This is safe because it is common to all FastISel supported
3271 // calling conventions on x86.
3272 for (int i
= 0, e
= OutVals
.size(); i
!= e
; ++i
) {
3273 Value
*&Val
= OutVals
[i
];
3274 ISD::ArgFlagsTy Flags
= OutFlags
[i
];
3275 if (auto *CI
= dyn_cast
<ConstantInt
>(Val
)) {
3276 if (CI
->getBitWidth() < 32) {
3278 Val
= ConstantExpr::getSExt(CI
, Type::getInt32Ty(CI
->getContext()));
3280 Val
= ConstantExpr::getZExt(CI
, Type::getInt32Ty(CI
->getContext()));
3284 // Passing bools around ends up doing a trunc to i1 and passing it.
3285 // Codegen this as an argument + "and 1".
3287 auto *TI
= dyn_cast
<TruncInst
>(Val
);
3289 if (TI
&& TI
->getType()->isIntegerTy(1) && CLI
.CS
&&
3290 (TI
->getParent() == CLI
.CS
->getInstruction()->getParent()) &&
3292 Value
*PrevVal
= TI
->getOperand(0);
3293 ResultReg
= getRegForValue(PrevVal
);
3298 if (!isTypeLegal(PrevVal
->getType(), VT
))
3302 fastEmit_ri(VT
, VT
, ISD::AND
, ResultReg
, hasTrivialKill(PrevVal
), 1);
3304 if (!isTypeLegal(Val
->getType(), VT
))
3306 ResultReg
= getRegForValue(Val
);
3312 ArgRegs
.push_back(ResultReg
);
3313 OutVTs
.push_back(VT
);
3316 // Analyze operands of the call, assigning locations to each operand.
3317 SmallVector
<CCValAssign
, 16> ArgLocs
;
3318 CCState
CCInfo(CC
, IsVarArg
, *FuncInfo
.MF
, ArgLocs
, CLI
.RetTy
->getContext());
3320 // Allocate shadow area for Win64
3322 CCInfo
.AllocateStack(32, 8);
3324 CCInfo
.AnalyzeCallOperands(OutVTs
, OutFlags
, CC_X86
);
3326 // Get a count of how many bytes are to be pushed on the stack.
3327 unsigned NumBytes
= CCInfo
.getAlignedCallFrameSize();
3329 // Issue CALLSEQ_START
3330 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
3331 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(AdjStackDown
))
3332 .addImm(NumBytes
).addImm(0).addImm(0);
3334 // Walk the register/memloc assignments, inserting copies/loads.
3335 const X86RegisterInfo
*RegInfo
= Subtarget
->getRegisterInfo();
3336 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
3337 CCValAssign
const &VA
= ArgLocs
[i
];
3338 const Value
*ArgVal
= OutVals
[VA
.getValNo()];
3339 MVT ArgVT
= OutVTs
[VA
.getValNo()];
3341 if (ArgVT
== MVT::x86mmx
)
3344 unsigned ArgReg
= ArgRegs
[VA
.getValNo()];
3346 // Promote the value if needed.
3347 switch (VA
.getLocInfo()) {
3348 case CCValAssign::Full
: break;
3349 case CCValAssign::SExt
: {
3350 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3351 "Unexpected extend");
3353 if (ArgVT
== MVT::i1
)
3356 bool Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(), ArgReg
,
3358 assert(Emitted
&& "Failed to emit a sext!"); (void)Emitted
;
3359 ArgVT
= VA
.getLocVT();
3362 case CCValAssign::ZExt
: {
3363 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3364 "Unexpected extend");
3366 // Handle zero-extension from i1 to i8, which is common.
3367 if (ArgVT
== MVT::i1
) {
3368 // Set the high bits to zero.
3369 ArgReg
= fastEmitZExtFromI1(MVT::i8
, ArgReg
, /*TODO: Kill=*/false);
3376 bool Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(), ArgReg
,
3378 assert(Emitted
&& "Failed to emit a zext!"); (void)Emitted
;
3379 ArgVT
= VA
.getLocVT();
3382 case CCValAssign::AExt
: {
3383 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
3384 "Unexpected extend");
3385 bool Emitted
= X86FastEmitExtend(ISD::ANY_EXTEND
, VA
.getLocVT(), ArgReg
,
3388 Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(), ArgReg
,
3391 Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(), ArgReg
,
3394 assert(Emitted
&& "Failed to emit a aext!"); (void)Emitted
;
3395 ArgVT
= VA
.getLocVT();
3398 case CCValAssign::BCvt
: {
3399 ArgReg
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, ArgReg
,
3400 /*TODO: Kill=*/false);
3401 assert(ArgReg
&& "Failed to emit a bitcast!");
3402 ArgVT
= VA
.getLocVT();
3405 case CCValAssign::VExt
:
3406 // VExt has not been implemented, so this should be impossible to reach
3407 // for now. However, fallback to Selection DAG isel once implemented.
3409 case CCValAssign::AExtUpper
:
3410 case CCValAssign::SExtUpper
:
3411 case CCValAssign::ZExtUpper
:
3412 case CCValAssign::FPExt
:
3413 llvm_unreachable("Unexpected loc info!");
3414 case CCValAssign::Indirect
:
3415 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3420 if (VA
.isRegLoc()) {
3421 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3422 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(ArgReg
);
3423 OutRegs
.push_back(VA
.getLocReg());
3425 assert(VA
.isMemLoc());
3427 // Don't emit stores for undef values.
3428 if (isa
<UndefValue
>(ArgVal
))
3431 unsigned LocMemOffset
= VA
.getLocMemOffset();
3433 AM
.Base
.Reg
= RegInfo
->getStackRegister();
3434 AM
.Disp
= LocMemOffset
;
3435 ISD::ArgFlagsTy Flags
= OutFlags
[VA
.getValNo()];
3436 unsigned Alignment
= DL
.getABITypeAlignment(ArgVal
->getType());
3437 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
3438 MachinePointerInfo::getStack(*FuncInfo
.MF
, LocMemOffset
),
3439 MachineMemOperand::MOStore
, ArgVT
.getStoreSize(), Alignment
);
3440 if (Flags
.isByVal()) {
3441 X86AddressMode SrcAM
;
3442 SrcAM
.Base
.Reg
= ArgReg
;
3443 if (!TryEmitSmallMemcpy(AM
, SrcAM
, Flags
.getByValSize()))
3445 } else if (isa
<ConstantInt
>(ArgVal
) || isa
<ConstantPointerNull
>(ArgVal
)) {
3446 // If this is a really simple value, emit this with the Value* version
3447 // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3448 // as it can cause us to reevaluate the argument.
3449 if (!X86FastEmitStore(ArgVT
, ArgVal
, AM
, MMO
))
3452 bool ValIsKill
= hasTrivialKill(ArgVal
);
3453 if (!X86FastEmitStore(ArgVT
, ArgReg
, ValIsKill
, AM
, MMO
))
3459 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3461 if (Subtarget
->isPICStyleGOT()) {
3462 unsigned Base
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3463 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3464 TII
.get(TargetOpcode::COPY
), X86::EBX
).addReg(Base
);
3467 if (Is64Bit
&& IsVarArg
&& !IsWin64
) {
3468 // From AMD64 ABI document:
3469 // For calls that may call functions that use varargs or stdargs
3470 // (prototype-less calls or calls to functions containing ellipsis (...) in
3471 // the declaration) %al is used as hidden argument to specify the number
3472 // of SSE registers used. The contents of %al do not need to match exactly
3473 // the number of registers, but must be an ubound on the number of SSE
3474 // registers used and is in the range 0 - 8 inclusive.
3476 // Count the number of XMM registers allocated.
3477 static const MCPhysReg XMMArgRegs
[] = {
3478 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
3479 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
3481 unsigned NumXMMRegs
= CCInfo
.getFirstUnallocated(XMMArgRegs
);
3482 assert((Subtarget
->hasSSE1() || !NumXMMRegs
)
3483 && "SSE registers cannot be used when SSE is disabled");
3484 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOV8ri
),
3485 X86::AL
).addImm(NumXMMRegs
);
3488 // Materialize callee address in a register. FIXME: GV address can be
3489 // handled with a CALLpcrel32 instead.
3490 X86AddressMode CalleeAM
;
3491 if (!X86SelectCallAddress(Callee
, CalleeAM
))
3494 unsigned CalleeOp
= 0;
3495 const GlobalValue
*GV
= nullptr;
3496 if (CalleeAM
.GV
!= nullptr) {
3498 } else if (CalleeAM
.Base
.Reg
!= 0) {
3499 CalleeOp
= CalleeAM
.Base
.Reg
;
3504 MachineInstrBuilder MIB
;
3506 // Register-indirect call.
3507 unsigned CallOpc
= Is64Bit
? X86::CALL64r
: X86::CALL32r
;
3508 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(CallOpc
))
3512 assert(GV
&& "Not a direct call");
3513 // See if we need any target-specific flags on the GV operand.
3514 unsigned char OpFlags
= Subtarget
->classifyGlobalFunctionReference(GV
);
3516 // This will be a direct call, or an indirect call through memory for
3517 // NonLazyBind calls or dllimport calls.
3519 OpFlags
== X86II::MO_DLLIMPORT
|| OpFlags
== X86II::MO_GOTPCREL
;
3520 unsigned CallOpc
= NeedLoad
3521 ? (Is64Bit
? X86::CALL64m
: X86::CALL32m
)
3522 : (Is64Bit
? X86::CALL64pcrel32
: X86::CALLpcrel32
);
3524 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(CallOpc
));
3526 MIB
.addReg(Is64Bit
? X86::RIP
: 0).addImm(1).addReg(0);
3528 MIB
.addSym(Symbol
, OpFlags
);
3530 MIB
.addGlobalAddress(GV
, 0, OpFlags
);
3535 // Add a register mask operand representing the call-preserved registers.
3536 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3537 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
3539 // Add an implicit use GOT pointer in EBX.
3540 if (Subtarget
->isPICStyleGOT())
3541 MIB
.addReg(X86::EBX
, RegState::Implicit
);
3543 if (Is64Bit
&& IsVarArg
&& !IsWin64
)
3544 MIB
.addReg(X86::AL
, RegState::Implicit
);
3546 // Add implicit physical register uses to the call.
3547 for (auto Reg
: OutRegs
)
3548 MIB
.addReg(Reg
, RegState::Implicit
);
3550 // Issue CALLSEQ_END
3551 unsigned NumBytesForCalleeToPop
=
3552 X86::isCalleePop(CC
, Subtarget
->is64Bit(), IsVarArg
,
3553 TM
.Options
.GuaranteedTailCallOpt
)
3554 ? NumBytes
// Callee pops everything.
3555 : computeBytesPoppedByCalleeForSRet(Subtarget
, CC
, CLI
.CS
);
3556 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
3557 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(AdjStackUp
))
3558 .addImm(NumBytes
).addImm(NumBytesForCalleeToPop
);
3560 // Now handle call return values.
3561 SmallVector
<CCValAssign
, 16> RVLocs
;
3562 CCState
CCRetInfo(CC
, IsVarArg
, *FuncInfo
.MF
, RVLocs
,
3563 CLI
.RetTy
->getContext());
3564 CCRetInfo
.AnalyzeCallResult(Ins
, RetCC_X86
);
3566 // Copy all of the result registers out of their specified physreg.
3567 unsigned ResultReg
= FuncInfo
.CreateRegs(CLI
.RetTy
);
3568 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
3569 CCValAssign
&VA
= RVLocs
[i
];
3570 EVT CopyVT
= VA
.getValVT();
3571 unsigned CopyReg
= ResultReg
+ i
;
3572 unsigned SrcReg
= VA
.getLocReg();
3574 // If this is x86-64, and we disabled SSE, we can't return FP values
3575 if ((CopyVT
== MVT::f32
|| CopyVT
== MVT::f64
) &&
3576 ((Is64Bit
|| Ins
[i
].Flags
.isInReg()) && !Subtarget
->hasSSE1())) {
3577 report_fatal_error("SSE register return with SSE disabled");
3580 // If we prefer to use the value in xmm registers, copy it out as f80 and
3581 // use a truncate to move it from fp stack reg to xmm reg.
3582 if ((SrcReg
== X86::FP0
|| SrcReg
== X86::FP1
) &&
3583 isScalarFPTypeInSSEReg(VA
.getValVT())) {
3585 CopyReg
= createResultReg(&X86::RFP80RegClass
);
3588 // Copy out the result.
3589 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3590 TII
.get(TargetOpcode::COPY
), CopyReg
).addReg(SrcReg
);
3591 InRegs
.push_back(VA
.getLocReg());
3593 // Round the f80 to the right size, which also moves it to the appropriate
3594 // xmm register. This is accomplished by storing the f80 value in memory
3595 // and then loading it back.
3596 if (CopyVT
!= VA
.getValVT()) {
3597 EVT ResVT
= VA
.getValVT();
3598 unsigned Opc
= ResVT
== MVT::f32
? X86::ST_Fp80m32
: X86::ST_Fp80m64
;
3599 unsigned MemSize
= ResVT
.getSizeInBits()/8;
3600 int FI
= MFI
.CreateStackObject(MemSize
, MemSize
, false);
3601 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3604 Opc
= ResVT
== MVT::f32
? X86::MOVSSrm
: X86::MOVSDrm
;
3605 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3606 TII
.get(Opc
), ResultReg
+ i
), FI
);
3610 CLI
.ResultReg
= ResultReg
;
3611 CLI
.NumResultRegs
= RVLocs
.size();
3618 X86FastISel::fastSelectInstruction(const Instruction
*I
) {
3619 switch (I
->getOpcode()) {
3621 case Instruction::Load
:
3622 return X86SelectLoad(I
);
3623 case Instruction::Store
:
3624 return X86SelectStore(I
);
3625 case Instruction::Ret
:
3626 return X86SelectRet(I
);
3627 case Instruction::ICmp
:
3628 case Instruction::FCmp
:
3629 return X86SelectCmp(I
);
3630 case Instruction::ZExt
:
3631 return X86SelectZExt(I
);
3632 case Instruction::SExt
:
3633 return X86SelectSExt(I
);
3634 case Instruction::Br
:
3635 return X86SelectBranch(I
);
3636 case Instruction::LShr
:
3637 case Instruction::AShr
:
3638 case Instruction::Shl
:
3639 return X86SelectShift(I
);
3640 case Instruction::SDiv
:
3641 case Instruction::UDiv
:
3642 case Instruction::SRem
:
3643 case Instruction::URem
:
3644 return X86SelectDivRem(I
);
3645 case Instruction::Select
:
3646 return X86SelectSelect(I
);
3647 case Instruction::Trunc
:
3648 return X86SelectTrunc(I
);
3649 case Instruction::FPExt
:
3650 return X86SelectFPExt(I
);
3651 case Instruction::FPTrunc
:
3652 return X86SelectFPTrunc(I
);
3653 case Instruction::SIToFP
:
3654 return X86SelectSIToFP(I
);
3655 case Instruction::UIToFP
:
3656 return X86SelectUIToFP(I
);
3657 case Instruction::IntToPtr
: // Deliberate fall-through.
3658 case Instruction::PtrToInt
: {
3659 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
3660 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
3661 if (DstVT
.bitsGT(SrcVT
))
3662 return X86SelectZExt(I
);
3663 if (DstVT
.bitsLT(SrcVT
))
3664 return X86SelectTrunc(I
);
3665 unsigned Reg
= getRegForValue(I
->getOperand(0));
3666 if (Reg
== 0) return false;
3667 updateValueMap(I
, Reg
);
3670 case Instruction::BitCast
: {
3671 // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3672 if (!Subtarget
->hasSSE2())
3675 EVT SrcVT
= TLI
.getValueType(DL
, I
->getOperand(0)->getType());
3676 EVT DstVT
= TLI
.getValueType(DL
, I
->getType());
3678 if (!SrcVT
.isSimple() || !DstVT
.isSimple())
3681 MVT SVT
= SrcVT
.getSimpleVT();
3682 MVT DVT
= DstVT
.getSimpleVT();
3684 if (!SVT
.is128BitVector() &&
3685 !(Subtarget
->hasAVX() && SVT
.is256BitVector()) &&
3686 !(Subtarget
->hasAVX512() && SVT
.is512BitVector() &&
3687 (Subtarget
->hasBWI() || (SVT
.getScalarSizeInBits() >= 32 &&
3688 DVT
.getScalarSizeInBits() >= 32))))
3691 unsigned Reg
= getRegForValue(I
->getOperand(0));
3695 // No instruction is needed for conversion. Reuse the register used by
3696 // the fist operand.
3697 updateValueMap(I
, Reg
);
3705 unsigned X86FastISel::X86MaterializeInt(const ConstantInt
*CI
, MVT VT
) {
3709 uint64_t Imm
= CI
->getZExtValue();
3711 unsigned SrcReg
= fastEmitInst_(X86::MOV32r0
, &X86::GR32RegClass
);
3712 switch (VT
.SimpleTy
) {
3713 default: llvm_unreachable("Unexpected value type");
3716 return fastEmitInst_extractsubreg(MVT::i8
, SrcReg
, /*Kill=*/true,
3719 return fastEmitInst_extractsubreg(MVT::i16
, SrcReg
, /*Kill=*/true,
3724 unsigned ResultReg
= createResultReg(&X86::GR64RegClass
);
3725 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3726 TII
.get(TargetOpcode::SUBREG_TO_REG
), ResultReg
)
3727 .addImm(0).addReg(SrcReg
).addImm(X86::sub_32bit
);
3734 switch (VT
.SimpleTy
) {
3735 default: llvm_unreachable("Unexpected value type");
3737 // TODO: Support this properly.
3738 if (Subtarget
->hasAVX512())
3742 case MVT::i8
: Opc
= X86::MOV8ri
; break;
3743 case MVT::i16
: Opc
= X86::MOV16ri
; break;
3744 case MVT::i32
: Opc
= X86::MOV32ri
; break;
3746 if (isUInt
<32>(Imm
))
3747 Opc
= X86::MOV32ri64
;
3748 else if (isInt
<32>(Imm
))
3749 Opc
= X86::MOV64ri32
;
3755 return fastEmitInst_i(Opc
, TLI
.getRegClassFor(VT
), Imm
);
3758 unsigned X86FastISel::X86MaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
3759 if (CFP
->isNullValue())
3760 return fastMaterializeFloatZero(CFP
);
3762 // Can't handle alternate code models yet.
3763 CodeModel::Model CM
= TM
.getCodeModel();
3764 if (CM
!= CodeModel::Small
&& CM
!= CodeModel::Large
)
3767 // Get opcode and regclass of the output for the given load instruction.
3769 const TargetRegisterClass
*RC
= nullptr;
3770 switch (VT
.SimpleTy
) {
3773 if (X86ScalarSSEf32
) {
3774 Opc
= Subtarget
->hasAVX512()
3776 : Subtarget
->hasAVX() ? X86::VMOVSSrm
: X86::MOVSSrm
;
3777 RC
= Subtarget
->hasAVX512() ? &X86::FR32XRegClass
: &X86::FR32RegClass
;
3779 Opc
= X86::LD_Fp32m
;
3780 RC
= &X86::RFP32RegClass
;
3784 if (X86ScalarSSEf64
) {
3785 Opc
= Subtarget
->hasAVX512()
3787 : Subtarget
->hasAVX() ? X86::VMOVSDrm
: X86::MOVSDrm
;
3788 RC
= Subtarget
->hasAVX512() ? &X86::FR64XRegClass
: &X86::FR64RegClass
;
3790 Opc
= X86::LD_Fp64m
;
3791 RC
= &X86::RFP64RegClass
;
3795 // No f80 support yet.
3799 // MachineConstantPool wants an explicit alignment.
3800 unsigned Align
= DL
.getPrefTypeAlignment(CFP
->getType());
3802 // Alignment of vector types. FIXME!
3803 Align
= DL
.getTypeAllocSize(CFP
->getType());
3806 // x86-32 PIC requires a PIC base register for constant pools.
3807 unsigned PICBase
= 0;
3808 unsigned char OpFlag
= Subtarget
->classifyLocalReference(nullptr);
3809 if (OpFlag
== X86II::MO_PIC_BASE_OFFSET
)
3810 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3811 else if (OpFlag
== X86II::MO_GOTOFF
)
3812 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
3813 else if (Subtarget
->is64Bit() && TM
.getCodeModel() == CodeModel::Small
)
3816 // Create the load from the constant pool.
3817 unsigned CPI
= MCP
.getConstantPoolIndex(CFP
, Align
);
3818 unsigned ResultReg
= createResultReg(RC
);
3820 if (CM
== CodeModel::Large
) {
3821 unsigned AddrReg
= createResultReg(&X86::GR64RegClass
);
3822 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOV64ri
),
3824 .addConstantPoolIndex(CPI
, 0, OpFlag
);
3825 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3826 TII
.get(Opc
), ResultReg
);
3827 addDirectMem(MIB
, AddrReg
);
3828 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
3829 MachinePointerInfo::getConstantPool(*FuncInfo
.MF
),
3830 MachineMemOperand::MOLoad
, DL
.getPointerSize(), Align
);
3831 MIB
->addMemOperand(*FuncInfo
.MF
, MMO
);
3835 addConstantPoolReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3836 TII
.get(Opc
), ResultReg
),
3837 CPI
, PICBase
, OpFlag
);
3841 unsigned X86FastISel::X86MaterializeGV(const GlobalValue
*GV
, MVT VT
) {
3842 // Can't handle alternate code models yet.
3843 if (TM
.getCodeModel() != CodeModel::Small
)
3846 // Materialize addresses with LEA/MOV instructions.
3848 if (X86SelectAddress(GV
, AM
)) {
3849 // If the expression is just a basereg, then we're done, otherwise we need
3851 if (AM
.BaseType
== X86AddressMode::RegBase
&&
3852 AM
.IndexReg
== 0 && AM
.Disp
== 0 && AM
.GV
== nullptr)
3855 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
3856 if (TM
.getRelocationModel() == Reloc::Static
&&
3857 TLI
.getPointerTy(DL
) == MVT::i64
) {
3858 // The displacement code could be more than 32 bits away so we need to use
3859 // an instruction with a 64 bit immediate
3860 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(X86::MOV64ri
),
3862 .addGlobalAddress(GV
);
3865 TLI
.getPointerTy(DL
) == MVT::i32
3866 ? (Subtarget
->isTarget64BitILP32() ? X86::LEA64_32r
: X86::LEA32r
)
3868 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3869 TII
.get(Opc
), ResultReg
), AM
);
3876 unsigned X86FastISel::fastMaterializeConstant(const Constant
*C
) {
3877 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
3879 // Only handle simple types.
3880 if (!CEVT
.isSimple())
3882 MVT VT
= CEVT
.getSimpleVT();
3884 if (const auto *CI
= dyn_cast
<ConstantInt
>(C
))
3885 return X86MaterializeInt(CI
, VT
);
3886 else if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
3887 return X86MaterializeFP(CFP
, VT
);
3888 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
3889 return X86MaterializeGV(GV
, VT
);
3894 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst
*C
) {
3895 // Fail on dynamic allocas. At this point, getRegForValue has already
3896 // checked its CSE maps, so if we're here trying to handle a dynamic
3897 // alloca, we're not going to succeed. X86SelectAddress has a
3898 // check for dynamic allocas, because it's called directly from
3899 // various places, but targetMaterializeAlloca also needs a check
3900 // in order to avoid recursion between getRegForValue,
3901 // X86SelectAddrss, and targetMaterializeAlloca.
3902 if (!FuncInfo
.StaticAllocaMap
.count(C
))
3904 assert(C
->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3907 if (!X86SelectAddress(C
, AM
))
3910 TLI
.getPointerTy(DL
) == MVT::i32
3911 ? (Subtarget
->isTarget64BitILP32() ? X86::LEA64_32r
: X86::LEA32r
)
3913 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(TLI
.getPointerTy(DL
));
3914 unsigned ResultReg
= createResultReg(RC
);
3915 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3916 TII
.get(Opc
), ResultReg
), AM
);
3920 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP
*CF
) {
3922 if (!isTypeLegal(CF
->getType(), VT
))
3925 // Get opcode and regclass for the given zero.
3926 bool HasAVX512
= Subtarget
->hasAVX512();
3928 const TargetRegisterClass
*RC
= nullptr;
3929 switch (VT
.SimpleTy
) {
3932 if (X86ScalarSSEf32
) {
3933 Opc
= HasAVX512
? X86::AVX512_FsFLD0SS
: X86::FsFLD0SS
;
3934 RC
= HasAVX512
? &X86::FR32XRegClass
: &X86::FR32RegClass
;
3936 Opc
= X86::LD_Fp032
;
3937 RC
= &X86::RFP32RegClass
;
3941 if (X86ScalarSSEf64
) {
3942 Opc
= HasAVX512
? X86::AVX512_FsFLD0SD
: X86::FsFLD0SD
;
3943 RC
= HasAVX512
? &X86::FR64XRegClass
: &X86::FR64RegClass
;
3945 Opc
= X86::LD_Fp064
;
3946 RC
= &X86::RFP64RegClass
;
3950 // No f80 support yet.
3954 unsigned ResultReg
= createResultReg(RC
);
3955 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), ResultReg
);
3960 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
3961 const LoadInst
*LI
) {
3962 const Value
*Ptr
= LI
->getPointerOperand();
3964 if (!X86SelectAddress(Ptr
, AM
))
3967 const X86InstrInfo
&XII
= (const X86InstrInfo
&)TII
;
3969 unsigned Size
= DL
.getTypeAllocSize(LI
->getType());
3970 unsigned Alignment
= LI
->getAlignment();
3972 if (Alignment
== 0) // Ensure that codegen never sees alignment 0
3973 Alignment
= DL
.getABITypeAlignment(LI
->getType());
3975 SmallVector
<MachineOperand
, 8> AddrOps
;
3976 AM
.getFullAddress(AddrOps
);
3978 MachineInstr
*Result
= XII
.foldMemoryOperandImpl(
3979 *FuncInfo
.MF
, *MI
, OpNo
, AddrOps
, FuncInfo
.InsertPt
, Size
, Alignment
,
3980 /*AllowCommute=*/true);
3984 // The index register could be in the wrong register class. Unfortunately,
3985 // foldMemoryOperandImpl could have commuted the instruction so its not enough
3986 // to just look at OpNo + the offset to the index reg. We actually need to
3987 // scan the instruction to find the index reg and see if its the correct reg
3989 unsigned OperandNo
= 0;
3990 for (MachineInstr::mop_iterator I
= Result
->operands_begin(),
3991 E
= Result
->operands_end(); I
!= E
; ++I
, ++OperandNo
) {
3992 MachineOperand
&MO
= *I
;
3993 if (!MO
.isReg() || MO
.isDef() || MO
.getReg() != AM
.IndexReg
)
3995 // Found the index reg, now try to rewrite it.
3996 unsigned IndexReg
= constrainOperandRegClass(Result
->getDesc(),
3997 MO
.getReg(), OperandNo
);
3998 if (IndexReg
== MO
.getReg())
4000 MO
.setReg(IndexReg
);
4003 Result
->addMemOperand(*FuncInfo
.MF
, createMachineMemOperandFor(LI
));
4004 MI
->eraseFromParent();
4008 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode
,
4009 const TargetRegisterClass
*RC
,
4010 unsigned Op0
, bool Op0IsKill
,
4011 unsigned Op1
, bool Op1IsKill
,
4012 unsigned Op2
, bool Op2IsKill
,
4013 unsigned Op3
, bool Op3IsKill
) {
4014 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
4016 unsigned ResultReg
= createResultReg(RC
);
4017 Op0
= constrainOperandRegClass(II
, Op0
, II
.getNumDefs());
4018 Op1
= constrainOperandRegClass(II
, Op1
, II
.getNumDefs() + 1);
4019 Op2
= constrainOperandRegClass(II
, Op2
, II
.getNumDefs() + 2);
4020 Op3
= constrainOperandRegClass(II
, Op3
, II
.getNumDefs() + 3);
4022 if (II
.getNumDefs() >= 1)
4023 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
4024 .addReg(Op0
, getKillRegState(Op0IsKill
))
4025 .addReg(Op1
, getKillRegState(Op1IsKill
))
4026 .addReg(Op2
, getKillRegState(Op2IsKill
))
4027 .addReg(Op3
, getKillRegState(Op3IsKill
));
4029 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
4030 .addReg(Op0
, getKillRegState(Op0IsKill
))
4031 .addReg(Op1
, getKillRegState(Op1IsKill
))
4032 .addReg(Op2
, getKillRegState(Op2IsKill
))
4033 .addReg(Op3
, getKillRegState(Op3IsKill
));
4034 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
4035 TII
.get(TargetOpcode::COPY
), ResultReg
).addReg(II
.ImplicitDefs
[0]);
4042 FastISel
*X86::createFastISel(FunctionLoweringInfo
&funcInfo
,
4043 const TargetLibraryInfo
*libInfo
) {
4044 return new X86FastISel(funcInfo
, libInfo
);