1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "X86InstrBuilder.h"
18 #include "X86ISelLowering.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/CallingConv.h"
23 #include "llvm/DerivedTypes.h"
24 #include "llvm/GlobalVariable.h"
25 #include "llvm/Instructions.h"
26 #include "llvm/IntrinsicInst.h"
27 #include "llvm/Operator.h"
28 #include "llvm/CodeGen/Analysis.h"
29 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/CodeGen/FunctionLoweringInfo.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/Support/CallSite.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/GetElementPtrTypeIterator.h"
37 #include "llvm/Target/TargetOptions.h"
42 class X86FastISel
: public FastISel
{
43 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
44 /// make the right decision when generating code for different targets.
45 const X86Subtarget
*Subtarget
;
47 /// StackPtr - Register used as the stack pointer.
51 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
52 /// floating point ops.
53 /// When SSE is available, use it for f32 operations.
54 /// When SSE2 is available, use it for f64 operations.
59 explicit X86FastISel(FunctionLoweringInfo
&funcInfo
) : FastISel(funcInfo
) {
60 Subtarget
= &TM
.getSubtarget
<X86Subtarget
>();
61 StackPtr
= Subtarget
->is64Bit() ? X86::RSP
: X86::ESP
;
62 X86ScalarSSEf64
= Subtarget
->hasSSE2();
63 X86ScalarSSEf32
= Subtarget
->hasSSE1();
66 virtual bool TargetSelectInstruction(const Instruction
*I
);
68 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
69 /// vreg is being provided by the specified load instruction. If possible,
70 /// try to fold the load as an operand to the instruction, returning true if
72 virtual bool TryToFoldLoad(MachineInstr
*MI
, unsigned OpNo
,
75 #include "X86GenFastISel.inc"
78 bool X86FastEmitCompare(const Value
*LHS
, const Value
*RHS
, EVT VT
);
80 bool X86FastEmitLoad(EVT VT
, const X86AddressMode
&AM
, unsigned &RR
);
82 bool X86FastEmitStore(EVT VT
, const Value
*Val
, const X86AddressMode
&AM
);
83 bool X86FastEmitStore(EVT VT
, unsigned Val
, const X86AddressMode
&AM
);
85 bool X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
, unsigned Src
, EVT SrcVT
,
88 bool X86SelectAddress(const Value
*V
, X86AddressMode
&AM
);
89 bool X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
);
91 bool X86SelectLoad(const Instruction
*I
);
93 bool X86SelectStore(const Instruction
*I
);
95 bool X86SelectRet(const Instruction
*I
);
97 bool X86SelectCmp(const Instruction
*I
);
99 bool X86SelectZExt(const Instruction
*I
);
101 bool X86SelectBranch(const Instruction
*I
);
103 bool X86SelectShift(const Instruction
*I
);
105 bool X86SelectSelect(const Instruction
*I
);
107 bool X86SelectTrunc(const Instruction
*I
);
109 bool X86SelectFPExt(const Instruction
*I
);
110 bool X86SelectFPTrunc(const Instruction
*I
);
112 bool X86VisitIntrinsicCall(const IntrinsicInst
&I
);
113 bool X86SelectCall(const Instruction
*I
);
115 bool DoSelectCall(const Instruction
*I
, const char *MemIntName
);
117 const X86InstrInfo
*getInstrInfo() const {
118 return getTargetMachine()->getInstrInfo();
120 const X86TargetMachine
*getTargetMachine() const {
121 return static_cast<const X86TargetMachine
*>(&TM
);
124 unsigned TargetMaterializeConstant(const Constant
*C
);
126 unsigned TargetMaterializeAlloca(const AllocaInst
*C
);
128 unsigned TargetMaterializeFloatZero(const ConstantFP
*CF
);
130 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
131 /// computed in an SSE register, not on the X87 floating point stack.
132 bool isScalarFPTypeInSSEReg(EVT VT
) const {
133 return (VT
== MVT::f64
&& X86ScalarSSEf64
) || // f64 is when SSE2
134 (VT
== MVT::f32
&& X86ScalarSSEf32
); // f32 is when SSE1
137 bool isTypeLegal(const Type
*Ty
, MVT
&VT
, bool AllowI1
= false);
139 bool IsMemcpySmall(uint64_t Len
);
141 bool TryEmitSmallMemcpy(X86AddressMode DestAM
,
142 X86AddressMode SrcAM
, uint64_t Len
);
145 } // end anonymous namespace.
147 bool X86FastISel::isTypeLegal(const Type
*Ty
, MVT
&VT
, bool AllowI1
) {
148 EVT evt
= TLI
.getValueType(Ty
, /*HandleUnknown=*/true);
149 if (evt
== MVT::Other
|| !evt
.isSimple())
150 // Unhandled type. Halt "fast" selection and bail.
153 VT
= evt
.getSimpleVT();
154 // For now, require SSE/SSE2 for performing floating-point operations,
155 // since x87 requires additional work.
156 if (VT
== MVT::f64
&& !X86ScalarSSEf64
)
158 if (VT
== MVT::f32
&& !X86ScalarSSEf32
)
160 // Similarly, no f80 support yet.
163 // We only handle legal types. For example, on x86-32 the instruction
164 // selector contains all of the 64-bit instructions from x86-64,
165 // under the assumption that i64 won't be used if the target doesn't
167 return (AllowI1
&& VT
== MVT::i1
) || TLI
.isTypeLegal(VT
);
170 #include "X86GenCallingConv.inc"
172 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
173 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
174 /// Return true and the result register by reference if it is possible.
175 bool X86FastISel::X86FastEmitLoad(EVT VT
, const X86AddressMode
&AM
,
176 unsigned &ResultReg
) {
177 // Get opcode and regclass of the output for the given load instruction.
179 const TargetRegisterClass
*RC
= NULL
;
180 switch (VT
.getSimpleVT().SimpleTy
) {
181 default: return false;
185 RC
= X86::GR8RegisterClass
;
189 RC
= X86::GR16RegisterClass
;
193 RC
= X86::GR32RegisterClass
;
196 // Must be in x86-64 mode.
198 RC
= X86::GR64RegisterClass
;
201 if (Subtarget
->hasSSE1()) {
203 RC
= X86::FR32RegisterClass
;
206 RC
= X86::RFP32RegisterClass
;
210 if (Subtarget
->hasSSE2()) {
212 RC
= X86::FR64RegisterClass
;
215 RC
= X86::RFP64RegisterClass
;
219 // No f80 support yet.
223 ResultReg
= createResultReg(RC
);
224 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
225 DL
, TII
.get(Opc
), ResultReg
), AM
);
229 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
230 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
231 /// and a displacement offset, or a GlobalAddress,
232 /// i.e. V. Return true if it is possible.
234 X86FastISel::X86FastEmitStore(EVT VT
, unsigned Val
, const X86AddressMode
&AM
) {
235 // Get opcode and regclass of the output for the given store instruction.
237 switch (VT
.getSimpleVT().SimpleTy
) {
238 case MVT::f80
: // No f80 support yet.
239 default: return false;
241 // Mask out all but lowest bit.
242 unsigned AndResult
= createResultReg(X86::GR8RegisterClass
);
243 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
244 TII
.get(X86::AND8ri
), AndResult
).addReg(Val
).addImm(1);
247 // FALLTHROUGH, handling i1 as i8.
248 case MVT::i8
: Opc
= X86::MOV8mr
; break;
249 case MVT::i16
: Opc
= X86::MOV16mr
; break;
250 case MVT::i32
: Opc
= X86::MOV32mr
; break;
251 case MVT::i64
: Opc
= X86::MOV64mr
; break; // Must be in x86-64 mode.
253 Opc
= Subtarget
->hasSSE1() ? X86::MOVSSmr
: X86::ST_Fp32m
;
256 Opc
= Subtarget
->hasSSE2() ? X86::MOVSDmr
: X86::ST_Fp64m
;
260 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
261 DL
, TII
.get(Opc
)), AM
).addReg(Val
);
265 bool X86FastISel::X86FastEmitStore(EVT VT
, const Value
*Val
,
266 const X86AddressMode
&AM
) {
267 // Handle 'null' like i32/i64 0.
268 if (isa
<ConstantPointerNull
>(Val
))
269 Val
= Constant::getNullValue(TD
.getIntPtrType(Val
->getContext()));
271 // If this is a store of a simple constant, fold the constant into the store.
272 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
275 switch (VT
.getSimpleVT().SimpleTy
) {
277 case MVT::i1
: Signed
= false; // FALLTHROUGH to handle as i8.
278 case MVT::i8
: Opc
= X86::MOV8mi
; break;
279 case MVT::i16
: Opc
= X86::MOV16mi
; break;
280 case MVT::i32
: Opc
= X86::MOV32mi
; break;
282 // Must be a 32-bit sign extended value.
283 if ((int)CI
->getSExtValue() == CI
->getSExtValue())
284 Opc
= X86::MOV64mi32
;
289 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
290 DL
, TII
.get(Opc
)), AM
)
291 .addImm(Signed
? (uint64_t) CI
->getSExtValue() :
297 unsigned ValReg
= getRegForValue(Val
);
301 return X86FastEmitStore(VT
, ValReg
, AM
);
304 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
305 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
306 /// ISD::SIGN_EXTEND).
307 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc
, EVT DstVT
,
308 unsigned Src
, EVT SrcVT
,
309 unsigned &ResultReg
) {
310 unsigned RR
= FastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Opc
,
311 Src
, /*TODO: Kill=*/false);
320 /// X86SelectAddress - Attempt to fill in an address from the given value.
322 bool X86FastISel::X86SelectAddress(const Value
*V
, X86AddressMode
&AM
) {
323 const User
*U
= NULL
;
324 unsigned Opcode
= Instruction::UserOp1
;
325 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
326 // Don't walk into other basic blocks; it's possible we haven't
327 // visited them yet, so the instructions may not yet be assigned
328 // virtual registers.
329 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(V
)) ||
330 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
331 Opcode
= I
->getOpcode();
334 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
335 Opcode
= C
->getOpcode();
339 if (const PointerType
*Ty
= dyn_cast
<PointerType
>(V
->getType()))
340 if (Ty
->getAddressSpace() > 255)
341 // Fast instruction selection doesn't support the special
347 case Instruction::BitCast
:
348 // Look past bitcasts.
349 return X86SelectAddress(U
->getOperand(0), AM
);
351 case Instruction::IntToPtr
:
352 // Look past no-op inttoptrs.
353 if (TLI
.getValueType(U
->getOperand(0)->getType()) == TLI
.getPointerTy())
354 return X86SelectAddress(U
->getOperand(0), AM
);
357 case Instruction::PtrToInt
:
358 // Look past no-op ptrtoints.
359 if (TLI
.getValueType(U
->getType()) == TLI
.getPointerTy())
360 return X86SelectAddress(U
->getOperand(0), AM
);
363 case Instruction::Alloca
: {
364 // Do static allocas.
365 const AllocaInst
*A
= cast
<AllocaInst
>(V
);
366 DenseMap
<const AllocaInst
*, int>::iterator SI
=
367 FuncInfo
.StaticAllocaMap
.find(A
);
368 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
369 AM
.BaseType
= X86AddressMode::FrameIndexBase
;
370 AM
.Base
.FrameIndex
= SI
->second
;
376 case Instruction::Add
: {
377 // Adds of constants are common and easy enough.
378 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
379 uint64_t Disp
= (int32_t)AM
.Disp
+ (uint64_t)CI
->getSExtValue();
380 // They have to fit in the 32-bit signed displacement field though.
381 if (isInt
<32>(Disp
)) {
382 AM
.Disp
= (uint32_t)Disp
;
383 return X86SelectAddress(U
->getOperand(0), AM
);
389 case Instruction::GetElementPtr
: {
390 X86AddressMode SavedAM
= AM
;
392 // Pattern-match simple GEPs.
393 uint64_t Disp
= (int32_t)AM
.Disp
;
394 unsigned IndexReg
= AM
.IndexReg
;
395 unsigned Scale
= AM
.Scale
;
396 gep_type_iterator GTI
= gep_type_begin(U
);
397 // Iterate through the indices, folding what we can. Constants can be
398 // folded, and one dynamic index can be handled, if the scale is supported.
399 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
400 i
!= e
; ++i
, ++GTI
) {
401 const Value
*Op
= *i
;
402 if (const StructType
*STy
= dyn_cast
<StructType
>(*GTI
)) {
403 const StructLayout
*SL
= TD
.getStructLayout(STy
);
404 Disp
+= SL
->getElementOffset(cast
<ConstantInt
>(Op
)->getZExtValue());
408 // A array/variable index is always of the form i*S where S is the
409 // constant scale size. See if we can push the scale into immediates.
410 uint64_t S
= TD
.getTypeAllocSize(GTI
.getIndexedType());
412 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
413 // Constant-offset addressing.
414 Disp
+= CI
->getSExtValue() * S
;
417 if (isa
<AddOperator
>(Op
) &&
418 (!isa
<Instruction
>(Op
) ||
419 FuncInfo
.MBBMap
[cast
<Instruction
>(Op
)->getParent()]
421 isa
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1))) {
422 // An add (in the same block) with a constant operand. Fold the
425 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
426 Disp
+= CI
->getSExtValue() * S
;
427 // Iterate on the other operand.
428 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
432 (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) &&
433 (S
== 1 || S
== 2 || S
== 4 || S
== 8)) {
434 // Scaled-index addressing.
436 IndexReg
= getRegForGEPIndex(Op
).first
;
442 goto unsupported_gep
;
445 // Check for displacement overflow.
446 if (!isInt
<32>(Disp
))
448 // Ok, the GEP indices were covered by constant-offset and scaled-index
449 // addressing. Update the address state and move on to examining the base.
450 AM
.IndexReg
= IndexReg
;
452 AM
.Disp
= (uint32_t)Disp
;
453 if (X86SelectAddress(U
->getOperand(0), AM
))
456 // If we couldn't merge the gep value into this addr mode, revert back to
457 // our address and just match the value instead of completely failing.
461 // Ok, the GEP indices weren't all covered.
466 // Handle constant address.
467 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
468 // Can't handle alternate code models or TLS yet.
469 if (TM
.getCodeModel() != CodeModel::Small
)
472 if (const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
))
473 if (GVar
->isThreadLocal())
476 // RIP-relative addresses can't have additional register operands, so if
477 // we've already folded stuff into the addressing mode, just force the
478 // global value into its own register, which we can use as the basereg.
479 if (!Subtarget
->isPICStyleRIPRel() ||
480 (AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0)) {
481 // Okay, we've committed to selecting this global. Set up the address.
484 // Allow the subtarget to classify the global.
485 unsigned char GVFlags
= Subtarget
->ClassifyGlobalReference(GV
, TM
);
487 // If this reference is relative to the pic base, set it now.
488 if (isGlobalRelativeToPICBase(GVFlags
)) {
489 // FIXME: How do we know Base.Reg is free??
490 AM
.Base
.Reg
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
493 // Unless the ABI requires an extra load, return a direct reference to
495 if (!isGlobalStubReference(GVFlags
)) {
496 if (Subtarget
->isPICStyleRIPRel()) {
497 // Use rip-relative addressing if we can. Above we verified that the
498 // base and index registers are unused.
499 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
500 AM
.Base
.Reg
= X86::RIP
;
502 AM
.GVOpFlags
= GVFlags
;
506 // Ok, we need to do a load from a stub. If we've already loaded from
507 // this stub, reuse the loaded pointer, otherwise emit the load now.
508 DenseMap
<const Value
*, unsigned>::iterator I
= LocalValueMap
.find(V
);
510 if (I
!= LocalValueMap
.end() && I
->second
!= 0) {
513 // Issue load from stub.
515 const TargetRegisterClass
*RC
= NULL
;
516 X86AddressMode StubAM
;
517 StubAM
.Base
.Reg
= AM
.Base
.Reg
;
519 StubAM
.GVOpFlags
= GVFlags
;
521 // Prepare for inserting code in the local-value area.
522 SavePoint SaveInsertPt
= enterLocalValueArea();
524 if (TLI
.getPointerTy() == MVT::i64
) {
526 RC
= X86::GR64RegisterClass
;
528 if (Subtarget
->isPICStyleRIPRel())
529 StubAM
.Base
.Reg
= X86::RIP
;
532 RC
= X86::GR32RegisterClass
;
535 LoadReg
= createResultReg(RC
);
536 MachineInstrBuilder LoadMI
=
537 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), LoadReg
);
538 addFullAddress(LoadMI
, StubAM
);
540 // Ok, back to normal mode.
541 leaveLocalValueArea(SaveInsertPt
);
543 // Prevent loading GV stub multiple times in same MBB.
544 LocalValueMap
[V
] = LoadReg
;
547 // Now construct the final address. Note that the Disp, Scale,
548 // and Index values may already be set here.
549 AM
.Base
.Reg
= LoadReg
;
555 // If all else fails, try to materialize the value in a register.
556 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
557 if (AM
.Base
.Reg
== 0) {
558 AM
.Base
.Reg
= getRegForValue(V
);
559 return AM
.Base
.Reg
!= 0;
561 if (AM
.IndexReg
== 0) {
562 assert(AM
.Scale
== 1 && "Scale with no index!");
563 AM
.IndexReg
= getRegForValue(V
);
564 return AM
.IndexReg
!= 0;
571 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
573 bool X86FastISel::X86SelectCallAddress(const Value
*V
, X86AddressMode
&AM
) {
574 const User
*U
= NULL
;
575 unsigned Opcode
= Instruction::UserOp1
;
576 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
577 Opcode
= I
->getOpcode();
579 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(V
)) {
580 Opcode
= C
->getOpcode();
586 case Instruction::BitCast
:
587 // Look past bitcasts.
588 return X86SelectCallAddress(U
->getOperand(0), AM
);
590 case Instruction::IntToPtr
:
591 // Look past no-op inttoptrs.
592 if (TLI
.getValueType(U
->getOperand(0)->getType()) == TLI
.getPointerTy())
593 return X86SelectCallAddress(U
->getOperand(0), AM
);
596 case Instruction::PtrToInt
:
597 // Look past no-op ptrtoints.
598 if (TLI
.getValueType(U
->getType()) == TLI
.getPointerTy())
599 return X86SelectCallAddress(U
->getOperand(0), AM
);
603 // Handle constant address.
604 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
605 // Can't handle alternate code models yet.
606 if (TM
.getCodeModel() != CodeModel::Small
)
609 // RIP-relative addresses can't have additional register operands.
610 if (Subtarget
->isPICStyleRIPRel() &&
611 (AM
.Base
.Reg
!= 0 || AM
.IndexReg
!= 0))
614 // Can't handle DLLImport.
615 if (GV
->hasDLLImportLinkage())
619 if (const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
))
620 if (GVar
->isThreadLocal())
623 // Okay, we've committed to selecting this global. Set up the basic address.
626 // No ABI requires an extra load for anything other than DLLImport, which
627 // we rejected above. Return a direct reference to the global.
628 if (Subtarget
->isPICStyleRIPRel()) {
629 // Use rip-relative addressing if we can. Above we verified that the
630 // base and index registers are unused.
631 assert(AM
.Base
.Reg
== 0 && AM
.IndexReg
== 0);
632 AM
.Base
.Reg
= X86::RIP
;
633 } else if (Subtarget
->isPICStyleStubPIC()) {
634 AM
.GVOpFlags
= X86II::MO_PIC_BASE_OFFSET
;
635 } else if (Subtarget
->isPICStyleGOT()) {
636 AM
.GVOpFlags
= X86II::MO_GOTOFF
;
642 // If all else fails, try to materialize the value in a register.
643 if (!AM
.GV
|| !Subtarget
->isPICStyleRIPRel()) {
644 if (AM
.Base
.Reg
== 0) {
645 AM
.Base
.Reg
= getRegForValue(V
);
646 return AM
.Base
.Reg
!= 0;
648 if (AM
.IndexReg
== 0) {
649 assert(AM
.Scale
== 1 && "Scale with no index!");
650 AM
.IndexReg
= getRegForValue(V
);
651 return AM
.IndexReg
!= 0;
659 /// X86SelectStore - Select and emit code to implement store instructions.
660 bool X86FastISel::X86SelectStore(const Instruction
*I
) {
662 if (!isTypeLegal(I
->getOperand(0)->getType(), VT
, /*AllowI1=*/true))
666 if (!X86SelectAddress(I
->getOperand(1), AM
))
669 return X86FastEmitStore(VT
, I
->getOperand(0), AM
);
672 /// X86SelectRet - Select and emit code to implement ret instructions.
673 bool X86FastISel::X86SelectRet(const Instruction
*I
) {
674 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
675 const Function
&F
= *I
->getParent()->getParent();
677 if (!FuncInfo
.CanLowerReturn
)
680 CallingConv::ID CC
= F
.getCallingConv();
681 if (CC
!= CallingConv::C
&&
682 CC
!= CallingConv::Fast
&&
683 CC
!= CallingConv::X86_FastCall
)
686 if (Subtarget
->isTargetWin64())
689 // Don't handle popping bytes on return for now.
690 if (FuncInfo
.MF
->getInfo
<X86MachineFunctionInfo
>()
691 ->getBytesToPopOnReturn() != 0)
694 // fastcc with -tailcallopt is intended to provide a guaranteed
695 // tail call optimization. Fastisel doesn't know how to do that.
696 if (CC
== CallingConv::Fast
&& GuaranteedTailCallOpt
)
699 // Let SDISel handle vararg functions.
703 if (Ret
->getNumOperands() > 0) {
704 SmallVector
<ISD::OutputArg
, 4> Outs
;
705 GetReturnInfo(F
.getReturnType(), F
.getAttributes().getRetAttributes(),
708 // Analyze operands of the call, assigning locations to each operand.
709 SmallVector
<CCValAssign
, 16> ValLocs
;
710 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, TM
, ValLocs
,
712 CCInfo
.AnalyzeReturn(Outs
, RetCC_X86
);
714 const Value
*RV
= Ret
->getOperand(0);
715 unsigned Reg
= getRegForValue(RV
);
719 // Only handle a single return value for now.
720 if (ValLocs
.size() != 1)
723 CCValAssign
&VA
= ValLocs
[0];
725 // Don't bother handling odd stuff for now.
726 if (VA
.getLocInfo() != CCValAssign::Full
)
728 // Only handle register returns for now.
732 // The calling-convention tables for x87 returns don't tell
734 if (VA
.getLocReg() == X86::ST0
|| VA
.getLocReg() == X86::ST1
)
737 unsigned SrcReg
= Reg
+ VA
.getValNo();
738 EVT SrcVT
= TLI
.getValueType(RV
->getType());
739 EVT DstVT
= VA
.getValVT();
740 // Special handling for extended integers.
741 if (SrcVT
!= DstVT
) {
742 if (SrcVT
!= MVT::i1
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i16
)
745 if (!Outs
[0].Flags
.isZExt() && !Outs
[0].Flags
.isSExt())
748 assert(DstVT
== MVT::i32
&& "X86 should always ext to i32");
750 if (SrcVT
== MVT::i1
) {
751 if (Outs
[0].Flags
.isSExt())
753 SrcReg
= FastEmitZExtFromI1(MVT::i8
, SrcReg
, /*TODO: Kill=*/false);
756 unsigned Op
= Outs
[0].Flags
.isZExt() ? ISD::ZERO_EXTEND
:
758 SrcReg
= FastEmit_r(SrcVT
.getSimpleVT(), DstVT
.getSimpleVT(), Op
,
759 SrcReg
, /*TODO: Kill=*/false);
763 unsigned DstReg
= VA
.getLocReg();
764 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
765 // Avoid a cross-class copy. This is very unlikely.
766 if (!SrcRC
->contains(DstReg
))
768 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
769 DstReg
).addReg(SrcReg
);
771 // Mark the register as live out of the function.
772 MRI
.addLiveOut(VA
.getLocReg());
776 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::RET
));
780 /// X86SelectLoad - Select and emit code to implement load instructions.
782 bool X86FastISel::X86SelectLoad(const Instruction
*I
) {
784 if (!isTypeLegal(I
->getType(), VT
, /*AllowI1=*/true))
788 if (!X86SelectAddress(I
->getOperand(0), AM
))
791 unsigned ResultReg
= 0;
792 if (X86FastEmitLoad(VT
, AM
, ResultReg
)) {
793 UpdateValueMap(I
, ResultReg
);
799 static unsigned X86ChooseCmpOpcode(EVT VT
, const X86Subtarget
*Subtarget
) {
800 switch (VT
.getSimpleVT().SimpleTy
) {
802 case MVT::i8
: return X86::CMP8rr
;
803 case MVT::i16
: return X86::CMP16rr
;
804 case MVT::i32
: return X86::CMP32rr
;
805 case MVT::i64
: return X86::CMP64rr
;
806 case MVT::f32
: return Subtarget
->hasSSE1() ? X86::UCOMISSrr
: 0;
807 case MVT::f64
: return Subtarget
->hasSSE2() ? X86::UCOMISDrr
: 0;
811 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
812 /// of the comparison, return an opcode that works for the compare (e.g.
813 /// CMP32ri) otherwise return 0.
814 static unsigned X86ChooseCmpImmediateOpcode(EVT VT
, const ConstantInt
*RHSC
) {
815 switch (VT
.getSimpleVT().SimpleTy
) {
816 // Otherwise, we can't fold the immediate into this comparison.
818 case MVT::i8
: return X86::CMP8ri
;
819 case MVT::i16
: return X86::CMP16ri
;
820 case MVT::i32
: return X86::CMP32ri
;
822 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
824 if ((int)RHSC
->getSExtValue() == RHSC
->getSExtValue())
825 return X86::CMP64ri32
;
830 bool X86FastISel::X86FastEmitCompare(const Value
*Op0
, const Value
*Op1
,
832 unsigned Op0Reg
= getRegForValue(Op0
);
833 if (Op0Reg
== 0) return false;
835 // Handle 'null' like i32/i64 0.
836 if (isa
<ConstantPointerNull
>(Op1
))
837 Op1
= Constant::getNullValue(TD
.getIntPtrType(Op0
->getContext()));
839 // We have two options: compare with register or immediate. If the RHS of
840 // the compare is an immediate that we can fold into this compare, use
841 // CMPri, otherwise use CMPrr.
842 if (const ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
843 if (unsigned CompareImmOpc
= X86ChooseCmpImmediateOpcode(VT
, Op1C
)) {
844 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CompareImmOpc
))
846 .addImm(Op1C
->getSExtValue());
851 unsigned CompareOpc
= X86ChooseCmpOpcode(VT
, Subtarget
);
852 if (CompareOpc
== 0) return false;
854 unsigned Op1Reg
= getRegForValue(Op1
);
855 if (Op1Reg
== 0) return false;
856 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CompareOpc
))
863 bool X86FastISel::X86SelectCmp(const Instruction
*I
) {
864 const CmpInst
*CI
= cast
<CmpInst
>(I
);
867 if (!isTypeLegal(I
->getOperand(0)->getType(), VT
))
870 unsigned ResultReg
= createResultReg(&X86::GR8RegClass
);
872 bool SwapArgs
; // false -> compare Op0, Op1. true -> compare Op1, Op0.
873 switch (CI
->getPredicate()) {
874 case CmpInst::FCMP_OEQ
: {
875 if (!X86FastEmitCompare(CI
->getOperand(0), CI
->getOperand(1), VT
))
878 unsigned EReg
= createResultReg(&X86::GR8RegClass
);
879 unsigned NPReg
= createResultReg(&X86::GR8RegClass
);
880 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::SETEr
), EReg
);
881 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
882 TII
.get(X86::SETNPr
), NPReg
);
883 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
884 TII
.get(X86::AND8rr
), ResultReg
).addReg(NPReg
).addReg(EReg
);
885 UpdateValueMap(I
, ResultReg
);
888 case CmpInst::FCMP_UNE
: {
889 if (!X86FastEmitCompare(CI
->getOperand(0), CI
->getOperand(1), VT
))
892 unsigned NEReg
= createResultReg(&X86::GR8RegClass
);
893 unsigned PReg
= createResultReg(&X86::GR8RegClass
);
894 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::SETNEr
), NEReg
);
895 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::SETPr
), PReg
);
896 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::OR8rr
),ResultReg
)
897 .addReg(PReg
).addReg(NEReg
);
898 UpdateValueMap(I
, ResultReg
);
901 case CmpInst::FCMP_OGT
: SwapArgs
= false; SetCCOpc
= X86::SETAr
; break;
902 case CmpInst::FCMP_OGE
: SwapArgs
= false; SetCCOpc
= X86::SETAEr
; break;
903 case CmpInst::FCMP_OLT
: SwapArgs
= true; SetCCOpc
= X86::SETAr
; break;
904 case CmpInst::FCMP_OLE
: SwapArgs
= true; SetCCOpc
= X86::SETAEr
; break;
905 case CmpInst::FCMP_ONE
: SwapArgs
= false; SetCCOpc
= X86::SETNEr
; break;
906 case CmpInst::FCMP_ORD
: SwapArgs
= false; SetCCOpc
= X86::SETNPr
; break;
907 case CmpInst::FCMP_UNO
: SwapArgs
= false; SetCCOpc
= X86::SETPr
; break;
908 case CmpInst::FCMP_UEQ
: SwapArgs
= false; SetCCOpc
= X86::SETEr
; break;
909 case CmpInst::FCMP_UGT
: SwapArgs
= true; SetCCOpc
= X86::SETBr
; break;
910 case CmpInst::FCMP_UGE
: SwapArgs
= true; SetCCOpc
= X86::SETBEr
; break;
911 case CmpInst::FCMP_ULT
: SwapArgs
= false; SetCCOpc
= X86::SETBr
; break;
912 case CmpInst::FCMP_ULE
: SwapArgs
= false; SetCCOpc
= X86::SETBEr
; break;
914 case CmpInst::ICMP_EQ
: SwapArgs
= false; SetCCOpc
= X86::SETEr
; break;
915 case CmpInst::ICMP_NE
: SwapArgs
= false; SetCCOpc
= X86::SETNEr
; break;
916 case CmpInst::ICMP_UGT
: SwapArgs
= false; SetCCOpc
= X86::SETAr
; break;
917 case CmpInst::ICMP_UGE
: SwapArgs
= false; SetCCOpc
= X86::SETAEr
; break;
918 case CmpInst::ICMP_ULT
: SwapArgs
= false; SetCCOpc
= X86::SETBr
; break;
919 case CmpInst::ICMP_ULE
: SwapArgs
= false; SetCCOpc
= X86::SETBEr
; break;
920 case CmpInst::ICMP_SGT
: SwapArgs
= false; SetCCOpc
= X86::SETGr
; break;
921 case CmpInst::ICMP_SGE
: SwapArgs
= false; SetCCOpc
= X86::SETGEr
; break;
922 case CmpInst::ICMP_SLT
: SwapArgs
= false; SetCCOpc
= X86::SETLr
; break;
923 case CmpInst::ICMP_SLE
: SwapArgs
= false; SetCCOpc
= X86::SETLEr
; break;
928 const Value
*Op0
= CI
->getOperand(0), *Op1
= CI
->getOperand(1);
932 // Emit a compare of Op0/Op1.
933 if (!X86FastEmitCompare(Op0
, Op1
, VT
))
936 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(SetCCOpc
), ResultReg
);
937 UpdateValueMap(I
, ResultReg
);
941 bool X86FastISel::X86SelectZExt(const Instruction
*I
) {
942 // Handle zero-extension from i1 to i8, which is common.
943 if (!I
->getOperand(0)->getType()->isIntegerTy(1))
946 EVT DstVT
= TLI
.getValueType(I
->getType());
947 if (!TLI
.isTypeLegal(DstVT
))
950 unsigned ResultReg
= getRegForValue(I
->getOperand(0));
954 // Set the high bits to zero.
955 ResultReg
= FastEmitZExtFromI1(MVT::i8
, ResultReg
, /*TODO: Kill=*/false);
959 if (DstVT
!= MVT::i8
) {
960 ResultReg
= FastEmit_r(MVT::i8
, DstVT
.getSimpleVT(), ISD::ZERO_EXTEND
,
961 ResultReg
, /*Kill=*/true);
966 UpdateValueMap(I
, ResultReg
);
971 bool X86FastISel::X86SelectBranch(const Instruction
*I
) {
972 // Unconditional branches are selected by tablegen-generated code.
973 // Handle a conditional branch.
974 const BranchInst
*BI
= cast
<BranchInst
>(I
);
975 MachineBasicBlock
*TrueMBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
976 MachineBasicBlock
*FalseMBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
978 // Fold the common case of a conditional branch with a comparison
979 // in the same block (values defined on other blocks may not have
980 // initialized registers).
981 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
982 if (CI
->hasOneUse() && CI
->getParent() == I
->getParent()) {
983 EVT VT
= TLI
.getValueType(CI
->getOperand(0)->getType());
985 // Try to take advantage of fallthrough opportunities.
986 CmpInst::Predicate Predicate
= CI
->getPredicate();
987 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
988 std::swap(TrueMBB
, FalseMBB
);
989 Predicate
= CmpInst::getInversePredicate(Predicate
);
992 bool SwapArgs
; // false -> compare Op0, Op1. true -> compare Op1, Op0.
993 unsigned BranchOpc
; // Opcode to jump on, e.g. "X86::JA"
996 case CmpInst::FCMP_OEQ
:
997 std::swap(TrueMBB
, FalseMBB
);
998 Predicate
= CmpInst::FCMP_UNE
;
1000 case CmpInst::FCMP_UNE
: SwapArgs
= false; BranchOpc
= X86::JNE_4
; break;
1001 case CmpInst::FCMP_OGT
: SwapArgs
= false; BranchOpc
= X86::JA_4
; break;
1002 case CmpInst::FCMP_OGE
: SwapArgs
= false; BranchOpc
= X86::JAE_4
; break;
1003 case CmpInst::FCMP_OLT
: SwapArgs
= true; BranchOpc
= X86::JA_4
; break;
1004 case CmpInst::FCMP_OLE
: SwapArgs
= true; BranchOpc
= X86::JAE_4
; break;
1005 case CmpInst::FCMP_ONE
: SwapArgs
= false; BranchOpc
= X86::JNE_4
; break;
1006 case CmpInst::FCMP_ORD
: SwapArgs
= false; BranchOpc
= X86::JNP_4
; break;
1007 case CmpInst::FCMP_UNO
: SwapArgs
= false; BranchOpc
= X86::JP_4
; break;
1008 case CmpInst::FCMP_UEQ
: SwapArgs
= false; BranchOpc
= X86::JE_4
; break;
1009 case CmpInst::FCMP_UGT
: SwapArgs
= true; BranchOpc
= X86::JB_4
; break;
1010 case CmpInst::FCMP_UGE
: SwapArgs
= true; BranchOpc
= X86::JBE_4
; break;
1011 case CmpInst::FCMP_ULT
: SwapArgs
= false; BranchOpc
= X86::JB_4
; break;
1012 case CmpInst::FCMP_ULE
: SwapArgs
= false; BranchOpc
= X86::JBE_4
; break;
1014 case CmpInst::ICMP_EQ
: SwapArgs
= false; BranchOpc
= X86::JE_4
; break;
1015 case CmpInst::ICMP_NE
: SwapArgs
= false; BranchOpc
= X86::JNE_4
; break;
1016 case CmpInst::ICMP_UGT
: SwapArgs
= false; BranchOpc
= X86::JA_4
; break;
1017 case CmpInst::ICMP_UGE
: SwapArgs
= false; BranchOpc
= X86::JAE_4
; break;
1018 case CmpInst::ICMP_ULT
: SwapArgs
= false; BranchOpc
= X86::JB_4
; break;
1019 case CmpInst::ICMP_ULE
: SwapArgs
= false; BranchOpc
= X86::JBE_4
; break;
1020 case CmpInst::ICMP_SGT
: SwapArgs
= false; BranchOpc
= X86::JG_4
; break;
1021 case CmpInst::ICMP_SGE
: SwapArgs
= false; BranchOpc
= X86::JGE_4
; break;
1022 case CmpInst::ICMP_SLT
: SwapArgs
= false; BranchOpc
= X86::JL_4
; break;
1023 case CmpInst::ICMP_SLE
: SwapArgs
= false; BranchOpc
= X86::JLE_4
; break;
1028 const Value
*Op0
= CI
->getOperand(0), *Op1
= CI
->getOperand(1);
1030 std::swap(Op0
, Op1
);
1032 // Emit a compare of the LHS and RHS, setting the flags.
1033 if (!X86FastEmitCompare(Op0
, Op1
, VT
))
1036 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(BranchOpc
))
1039 if (Predicate
== CmpInst::FCMP_UNE
) {
1040 // X86 requires a second branch to handle UNE (and OEQ,
1041 // which is mapped to UNE above).
1042 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::JP_4
))
1046 FastEmitBranch(FalseMBB
, DL
);
1047 FuncInfo
.MBB
->addSuccessor(TrueMBB
);
1050 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1051 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1052 // typically happen for _Bool and C++ bools.
1054 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1055 isTypeLegal(TI
->getOperand(0)->getType(), SourceVT
)) {
1056 unsigned TestOpc
= 0;
1057 switch (SourceVT
.SimpleTy
) {
1059 case MVT::i8
: TestOpc
= X86::TEST8ri
; break;
1060 case MVT::i16
: TestOpc
= X86::TEST16ri
; break;
1061 case MVT::i32
: TestOpc
= X86::TEST32ri
; break;
1062 case MVT::i64
: TestOpc
= X86::TEST64ri32
; break;
1065 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1066 if (OpReg
== 0) return false;
1067 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TestOpc
))
1068 .addReg(OpReg
).addImm(1);
1070 unsigned JmpOpc
= X86::JNE_4
;
1071 if (FuncInfo
.MBB
->isLayoutSuccessor(TrueMBB
)) {
1072 std::swap(TrueMBB
, FalseMBB
);
1076 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(JmpOpc
))
1078 FastEmitBranch(FalseMBB
, DL
);
1079 FuncInfo
.MBB
->addSuccessor(TrueMBB
);
1085 // Otherwise do a clumsy setcc and re-test it.
1086 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1087 // in an explicit cast, so make sure to handle that correctly.
1088 unsigned OpReg
= getRegForValue(BI
->getCondition());
1089 if (OpReg
== 0) return false;
1091 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::TEST8ri
))
1092 .addReg(OpReg
).addImm(1);
1093 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::JNE_4
))
1095 FastEmitBranch(FalseMBB
, DL
);
1096 FuncInfo
.MBB
->addSuccessor(TrueMBB
);
1100 bool X86FastISel::X86SelectShift(const Instruction
*I
) {
1101 unsigned CReg
= 0, OpReg
= 0;
1102 const TargetRegisterClass
*RC
= NULL
;
1103 if (I
->getType()->isIntegerTy(8)) {
1105 RC
= &X86::GR8RegClass
;
1106 switch (I
->getOpcode()) {
1107 case Instruction::LShr
: OpReg
= X86::SHR8rCL
; break;
1108 case Instruction::AShr
: OpReg
= X86::SAR8rCL
; break;
1109 case Instruction::Shl
: OpReg
= X86::SHL8rCL
; break;
1110 default: return false;
1112 } else if (I
->getType()->isIntegerTy(16)) {
1114 RC
= &X86::GR16RegClass
;
1115 switch (I
->getOpcode()) {
1116 case Instruction::LShr
: OpReg
= X86::SHR16rCL
; break;
1117 case Instruction::AShr
: OpReg
= X86::SAR16rCL
; break;
1118 case Instruction::Shl
: OpReg
= X86::SHL16rCL
; break;
1119 default: return false;
1121 } else if (I
->getType()->isIntegerTy(32)) {
1123 RC
= &X86::GR32RegClass
;
1124 switch (I
->getOpcode()) {
1125 case Instruction::LShr
: OpReg
= X86::SHR32rCL
; break;
1126 case Instruction::AShr
: OpReg
= X86::SAR32rCL
; break;
1127 case Instruction::Shl
: OpReg
= X86::SHL32rCL
; break;
1128 default: return false;
1130 } else if (I
->getType()->isIntegerTy(64)) {
1132 RC
= &X86::GR64RegClass
;
1133 switch (I
->getOpcode()) {
1134 case Instruction::LShr
: OpReg
= X86::SHR64rCL
; break;
1135 case Instruction::AShr
: OpReg
= X86::SAR64rCL
; break;
1136 case Instruction::Shl
: OpReg
= X86::SHL64rCL
; break;
1137 default: return false;
1144 if (!isTypeLegal(I
->getType(), VT
))
1147 unsigned Op0Reg
= getRegForValue(I
->getOperand(0));
1148 if (Op0Reg
== 0) return false;
1150 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1151 if (Op1Reg
== 0) return false;
1152 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1153 CReg
).addReg(Op1Reg
);
1155 // The shift instruction uses X86::CL. If we defined a super-register
1156 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1157 if (CReg
!= X86::CL
)
1158 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1159 TII
.get(TargetOpcode::KILL
), X86::CL
)
1160 .addReg(CReg
, RegState::Kill
);
1162 unsigned ResultReg
= createResultReg(RC
);
1163 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(OpReg
), ResultReg
)
1165 UpdateValueMap(I
, ResultReg
);
1169 bool X86FastISel::X86SelectSelect(const Instruction
*I
) {
1171 if (!isTypeLegal(I
->getType(), VT
))
1174 // We only use cmov here, if we don't have a cmov instruction bail.
1175 if (!Subtarget
->hasCMov()) return false;
1178 const TargetRegisterClass
*RC
= NULL
;
1179 if (VT
== MVT::i16
) {
1180 Opc
= X86::CMOVE16rr
;
1181 RC
= &X86::GR16RegClass
;
1182 } else if (VT
== MVT::i32
) {
1183 Opc
= X86::CMOVE32rr
;
1184 RC
= &X86::GR32RegClass
;
1185 } else if (VT
== MVT::i64
) {
1186 Opc
= X86::CMOVE64rr
;
1187 RC
= &X86::GR64RegClass
;
1192 unsigned Op0Reg
= getRegForValue(I
->getOperand(0));
1193 if (Op0Reg
== 0) return false;
1194 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1195 if (Op1Reg
== 0) return false;
1196 unsigned Op2Reg
= getRegForValue(I
->getOperand(2));
1197 if (Op2Reg
== 0) return false;
1199 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::TEST8rr
))
1200 .addReg(Op0Reg
).addReg(Op0Reg
);
1201 unsigned ResultReg
= createResultReg(RC
);
1202 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), ResultReg
)
1203 .addReg(Op1Reg
).addReg(Op2Reg
);
1204 UpdateValueMap(I
, ResultReg
);
1208 bool X86FastISel::X86SelectFPExt(const Instruction
*I
) {
1209 // fpext from float to double.
1210 if (Subtarget
->hasSSE2() &&
1211 I
->getType()->isDoubleTy()) {
1212 const Value
*V
= I
->getOperand(0);
1213 if (V
->getType()->isFloatTy()) {
1214 unsigned OpReg
= getRegForValue(V
);
1215 if (OpReg
== 0) return false;
1216 unsigned ResultReg
= createResultReg(X86::FR64RegisterClass
);
1217 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1218 TII
.get(X86::CVTSS2SDrr
), ResultReg
)
1220 UpdateValueMap(I
, ResultReg
);
1228 bool X86FastISel::X86SelectFPTrunc(const Instruction
*I
) {
1229 if (Subtarget
->hasSSE2()) {
1230 if (I
->getType()->isFloatTy()) {
1231 const Value
*V
= I
->getOperand(0);
1232 if (V
->getType()->isDoubleTy()) {
1233 unsigned OpReg
= getRegForValue(V
);
1234 if (OpReg
== 0) return false;
1235 unsigned ResultReg
= createResultReg(X86::FR32RegisterClass
);
1236 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1237 TII
.get(X86::CVTSD2SSrr
), ResultReg
)
1239 UpdateValueMap(I
, ResultReg
);
1248 bool X86FastISel::X86SelectTrunc(const Instruction
*I
) {
1249 EVT SrcVT
= TLI
.getValueType(I
->getOperand(0)->getType());
1250 EVT DstVT
= TLI
.getValueType(I
->getType());
1252 // This code only handles truncation to byte.
1253 if (DstVT
!= MVT::i8
&& DstVT
!= MVT::i1
)
1255 if (!TLI
.isTypeLegal(SrcVT
))
1258 unsigned InputReg
= getRegForValue(I
->getOperand(0));
1260 // Unhandled operand. Halt "fast" selection and bail.
1263 if (SrcVT
== MVT::i8
) {
1264 // Truncate from i8 to i1; no code needed.
1265 UpdateValueMap(I
, InputReg
);
1269 if (!Subtarget
->is64Bit()) {
1270 // If we're on x86-32; we can't extract an i8 from a general register.
1271 // First issue a copy to GR16_ABCD or GR32_ABCD.
1272 const TargetRegisterClass
*CopyRC
= (SrcVT
== MVT::i16
)
1273 ? X86::GR16_ABCDRegisterClass
: X86::GR32_ABCDRegisterClass
;
1274 unsigned CopyReg
= createResultReg(CopyRC
);
1275 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1276 CopyReg
).addReg(InputReg
);
1280 // Issue an extract_subreg.
1281 unsigned ResultReg
= FastEmitInst_extractsubreg(MVT::i8
,
1282 InputReg
, /*Kill=*/true,
1287 UpdateValueMap(I
, ResultReg
);
1291 bool X86FastISel::IsMemcpySmall(uint64_t Len
) {
1292 return Len
<= (Subtarget
->is64Bit() ? 32 : 16);
1295 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM
,
1296 X86AddressMode SrcAM
, uint64_t Len
) {
1298 // Make sure we don't bloat code by inlining very large memcpy's.
1299 if (!IsMemcpySmall(Len
))
1302 bool i64Legal
= Subtarget
->is64Bit();
1304 // We don't care about alignment here since we just emit integer accesses.
1307 if (Len
>= 8 && i64Legal
)
1319 bool RV
= X86FastEmitLoad(VT
, SrcAM
, Reg
);
1320 RV
&= X86FastEmitStore(VT
, Reg
, DestAM
);
1321 assert(RV
&& "Failed to emit load or store??");
1323 unsigned Size
= VT
.getSizeInBits()/8;
1325 DestAM
.Disp
+= Size
;
1332 bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst
&I
) {
1333 // FIXME: Handle more intrinsics.
1334 switch (I
.getIntrinsicID()) {
1335 default: return false;
1336 case Intrinsic::memcpy
: {
1337 const MemCpyInst
&MCI
= cast
<MemCpyInst
>(I
);
1338 // Don't handle volatile or variable length memcpys.
1339 if (MCI
.isVolatile())
1342 if (isa
<ConstantInt
>(MCI
.getLength())) {
1343 // Small memcpy's are common enough that we want to do them
1344 // without a call if possible.
1345 uint64_t Len
= cast
<ConstantInt
>(MCI
.getLength())->getZExtValue();
1346 if (IsMemcpySmall(Len
)) {
1347 X86AddressMode DestAM
, SrcAM
;
1348 if (!X86SelectAddress(MCI
.getRawDest(), DestAM
) ||
1349 !X86SelectAddress(MCI
.getRawSource(), SrcAM
))
1351 TryEmitSmallMemcpy(DestAM
, SrcAM
, Len
);
1356 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
1357 if (!MCI
.getLength()->getType()->isIntegerTy(SizeWidth
))
1360 if (MCI
.getSourceAddressSpace() > 255 || MCI
.getDestAddressSpace() > 255)
1363 return DoSelectCall(&I
, "memcpy");
1365 case Intrinsic::memset
: {
1366 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
1368 unsigned SizeWidth
= Subtarget
->is64Bit() ? 64 : 32;
1369 if (!MSI
.getLength()->getType()->isIntegerTy(SizeWidth
))
1372 if (MSI
.getDestAddressSpace() > 255)
1375 return DoSelectCall(&I
, "memset");
1377 case Intrinsic::stackprotector
: {
1378 // Emit code inline code to store the stack guard onto the stack.
1379 EVT PtrTy
= TLI
.getPointerTy();
1381 const Value
*Op1
= I
.getArgOperand(0); // The guard's value.
1382 const AllocaInst
*Slot
= cast
<AllocaInst
>(I
.getArgOperand(1));
1384 // Grab the frame index.
1386 if (!X86SelectAddress(Slot
, AM
)) return false;
1387 if (!X86FastEmitStore(PtrTy
, Op1
, AM
)) return false;
1390 case Intrinsic::dbg_declare
: {
1391 const DbgDeclareInst
*DI
= cast
<DbgDeclareInst
>(&I
);
1393 assert(DI
->getAddress() && "Null address should be checked earlier!");
1394 if (!X86SelectAddress(DI
->getAddress(), AM
))
1396 const MCInstrDesc
&II
= TII
.get(TargetOpcode::DBG_VALUE
);
1397 // FIXME may need to add RegState::Debug to any registers produced,
1398 // although ESP/EBP should be the only ones at the moment.
1399 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
), AM
).
1400 addImm(0).addMetadata(DI
->getVariable());
1403 case Intrinsic::trap
: {
1404 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::TRAP
));
1407 case Intrinsic::sadd_with_overflow
:
1408 case Intrinsic::uadd_with_overflow
: {
1409 // FIXME: Should fold immediates.
1411 // Replace "add with overflow" intrinsics with an "add" instruction followed
1412 // by a seto/setc instruction.
1413 const Function
*Callee
= I
.getCalledFunction();
1415 cast
<StructType
>(Callee
->getReturnType())->getTypeAtIndex(unsigned(0));
1418 if (!isTypeLegal(RetTy
, VT
))
1421 const Value
*Op1
= I
.getArgOperand(0);
1422 const Value
*Op2
= I
.getArgOperand(1);
1423 unsigned Reg1
= getRegForValue(Op1
);
1424 unsigned Reg2
= getRegForValue(Op2
);
1426 if (Reg1
== 0 || Reg2
== 0)
1427 // FIXME: Handle values *not* in registers.
1433 else if (VT
== MVT::i64
)
1438 // The call to CreateRegs builds two sequential registers, to store the
1439 // both the the returned values.
1440 unsigned ResultReg
= FuncInfo
.CreateRegs(I
.getType());
1441 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(OpC
), ResultReg
)
1442 .addReg(Reg1
).addReg(Reg2
);
1444 unsigned Opc
= X86::SETBr
;
1445 if (I
.getIntrinsicID() == Intrinsic::sadd_with_overflow
)
1447 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), ResultReg
+1);
1449 UpdateValueMap(&I
, ResultReg
, 2);
1455 bool X86FastISel::X86SelectCall(const Instruction
*I
) {
1456 const CallInst
*CI
= cast
<CallInst
>(I
);
1457 const Value
*Callee
= CI
->getCalledValue();
1459 // Can't handle inline asm yet.
1460 if (isa
<InlineAsm
>(Callee
))
1463 // Handle intrinsic calls.
1464 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(CI
))
1465 return X86VisitIntrinsicCall(*II
);
1467 return DoSelectCall(I
, 0);
1470 // Select either a call, or an llvm.memcpy/memmove/memset intrinsic
1471 bool X86FastISel::DoSelectCall(const Instruction
*I
, const char *MemIntName
) {
1472 const CallInst
*CI
= cast
<CallInst
>(I
);
1473 const Value
*Callee
= CI
->getCalledValue();
1475 // Handle only C and fastcc calling conventions for now.
1476 ImmutableCallSite
CS(CI
);
1477 CallingConv::ID CC
= CS
.getCallingConv();
1478 if (CC
!= CallingConv::C
&& CC
!= CallingConv::Fast
&&
1479 CC
!= CallingConv::X86_FastCall
)
1482 // fastcc with -tailcallopt is intended to provide a guaranteed
1483 // tail call optimization. Fastisel doesn't know how to do that.
1484 if (CC
== CallingConv::Fast
&& GuaranteedTailCallOpt
)
1487 const PointerType
*PT
= cast
<PointerType
>(CS
.getCalledValue()->getType());
1488 const FunctionType
*FTy
= cast
<FunctionType
>(PT
->getElementType());
1489 bool isVarArg
= FTy
->isVarArg();
1491 // Don't know how to handle Win64 varargs yet. Nothing special needed for
1492 // x86-32. Special handling for x86-64 is implemented.
1493 if (isVarArg
&& Subtarget
->isTargetWin64())
1496 // Fast-isel doesn't know about callee-pop yet.
1497 if (X86::isCalleePop(CC
, Subtarget
->is64Bit(), isVarArg
,
1498 GuaranteedTailCallOpt
))
1501 // Check whether the function can return without sret-demotion.
1502 SmallVector
<ISD::OutputArg
, 4> Outs
;
1503 SmallVector
<uint64_t, 4> Offsets
;
1504 GetReturnInfo(I
->getType(), CS
.getAttributes().getRetAttributes(),
1505 Outs
, TLI
, &Offsets
);
1506 bool CanLowerReturn
= TLI
.CanLowerReturn(CS
.getCallingConv(),
1507 *FuncInfo
.MF
, FTy
->isVarArg(),
1508 Outs
, FTy
->getContext());
1509 if (!CanLowerReturn
)
1512 // Materialize callee address in a register. FIXME: GV address can be
1513 // handled with a CALLpcrel32 instead.
1514 X86AddressMode CalleeAM
;
1515 if (!X86SelectCallAddress(Callee
, CalleeAM
))
1517 unsigned CalleeOp
= 0;
1518 const GlobalValue
*GV
= 0;
1519 if (CalleeAM
.GV
!= 0) {
1521 } else if (CalleeAM
.Base
.Reg
!= 0) {
1522 CalleeOp
= CalleeAM
.Base
.Reg
;
1526 // Deal with call operands first.
1527 SmallVector
<const Value
*, 8> ArgVals
;
1528 SmallVector
<unsigned, 8> Args
;
1529 SmallVector
<MVT
, 8> ArgVTs
;
1530 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
1531 Args
.reserve(CS
.arg_size());
1532 ArgVals
.reserve(CS
.arg_size());
1533 ArgVTs
.reserve(CS
.arg_size());
1534 ArgFlags
.reserve(CS
.arg_size());
1535 for (ImmutableCallSite::arg_iterator i
= CS
.arg_begin(), e
= CS
.arg_end();
1537 // If we're lowering a mem intrinsic instead of a regular call, skip the
1538 // last two arguments, which should not passed to the underlying functions.
1539 if (MemIntName
&& e
-i
<= 2)
1542 ISD::ArgFlagsTy Flags
;
1543 unsigned AttrInd
= i
- CS
.arg_begin() + 1;
1544 if (CS
.paramHasAttr(AttrInd
, Attribute::SExt
))
1546 if (CS
.paramHasAttr(AttrInd
, Attribute::ZExt
))
1549 if (CS
.paramHasAttr(AttrInd
, Attribute::ByVal
)) {
1550 const PointerType
*Ty
= cast
<PointerType
>(ArgVal
->getType());
1551 const Type
*ElementTy
= Ty
->getElementType();
1552 unsigned FrameSize
= TD
.getTypeAllocSize(ElementTy
);
1553 unsigned FrameAlign
= CS
.getParamAlignment(AttrInd
);
1555 FrameAlign
= TLI
.getByValTypeAlignment(ElementTy
);
1557 Flags
.setByValSize(FrameSize
);
1558 Flags
.setByValAlign(FrameAlign
);
1559 if (!IsMemcpySmall(FrameSize
))
1563 if (CS
.paramHasAttr(AttrInd
, Attribute::InReg
))
1565 if (CS
.paramHasAttr(AttrInd
, Attribute::Nest
))
1568 // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
1569 // instruction. This is safe because it is common to all fastisel supported
1570 // calling conventions on x86.
1571 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(ArgVal
)) {
1572 if (CI
->getBitWidth() == 1 || CI
->getBitWidth() == 8 ||
1573 CI
->getBitWidth() == 16) {
1575 ArgVal
= ConstantExpr::getSExt(CI
,Type::getInt32Ty(CI
->getContext()));
1577 ArgVal
= ConstantExpr::getZExt(CI
,Type::getInt32Ty(CI
->getContext()));
1583 // Passing bools around ends up doing a trunc to i1 and passing it.
1584 // Codegen this as an argument + "and 1".
1585 if (ArgVal
->getType()->isIntegerTy(1) && isa
<TruncInst
>(ArgVal
) &&
1586 cast
<TruncInst
>(ArgVal
)->getParent() == I
->getParent() &&
1587 ArgVal
->hasOneUse()) {
1588 ArgVal
= cast
<TruncInst
>(ArgVal
)->getOperand(0);
1589 ArgReg
= getRegForValue(ArgVal
);
1590 if (ArgReg
== 0) return false;
1593 if (!isTypeLegal(ArgVal
->getType(), ArgVT
)) return false;
1595 ArgReg
= FastEmit_ri(ArgVT
, ArgVT
, ISD::AND
, ArgReg
,
1596 ArgVal
->hasOneUse(), 1);
1598 ArgReg
= getRegForValue(ArgVal
);
1601 if (ArgReg
== 0) return false;
1603 const Type
*ArgTy
= ArgVal
->getType();
1605 if (!isTypeLegal(ArgTy
, ArgVT
))
1607 if (ArgVT
== MVT::x86mmx
)
1609 unsigned OriginalAlignment
= TD
.getABITypeAlignment(ArgTy
);
1610 Flags
.setOrigAlign(OriginalAlignment
);
1612 Args
.push_back(ArgReg
);
1613 ArgVals
.push_back(ArgVal
);
1614 ArgVTs
.push_back(ArgVT
);
1615 ArgFlags
.push_back(Flags
);
1618 // Analyze operands of the call, assigning locations to each operand.
1619 SmallVector
<CCValAssign
, 16> ArgLocs
;
1620 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, TM
, ArgLocs
,
1621 I
->getParent()->getContext());
1623 // Allocate shadow area for Win64
1624 if (Subtarget
->isTargetWin64())
1625 CCInfo
.AllocateStack(32, 8);
1627 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
, CC_X86
);
1629 // Get a count of how many bytes are to be pushed on the stack.
1630 unsigned NumBytes
= CCInfo
.getNextStackOffset();
1632 // Issue CALLSEQ_START
1633 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1634 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(AdjStackDown
))
1637 // Process argument: walk the register/memloc assignments, inserting
1639 SmallVector
<unsigned, 4> RegArgs
;
1640 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1641 CCValAssign
&VA
= ArgLocs
[i
];
1642 unsigned Arg
= Args
[VA
.getValNo()];
1643 EVT ArgVT
= ArgVTs
[VA
.getValNo()];
1645 // Promote the value if needed.
1646 switch (VA
.getLocInfo()) {
1647 default: llvm_unreachable("Unknown loc info!");
1648 case CCValAssign::Full
: break;
1649 case CCValAssign::SExt
: {
1650 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
1651 "Unexpected extend");
1652 bool Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(),
1654 assert(Emitted
&& "Failed to emit a sext!"); (void)Emitted
;
1655 ArgVT
= VA
.getLocVT();
1658 case CCValAssign::ZExt
: {
1659 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
1660 "Unexpected extend");
1661 bool Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(),
1663 assert(Emitted
&& "Failed to emit a zext!"); (void)Emitted
;
1664 ArgVT
= VA
.getLocVT();
1667 case CCValAssign::AExt
: {
1668 assert(VA
.getLocVT().isInteger() && !VA
.getLocVT().isVector() &&
1669 "Unexpected extend");
1670 bool Emitted
= X86FastEmitExtend(ISD::ANY_EXTEND
, VA
.getLocVT(),
1673 Emitted
= X86FastEmitExtend(ISD::ZERO_EXTEND
, VA
.getLocVT(),
1676 Emitted
= X86FastEmitExtend(ISD::SIGN_EXTEND
, VA
.getLocVT(),
1679 assert(Emitted
&& "Failed to emit a aext!"); (void)Emitted
;
1680 ArgVT
= VA
.getLocVT();
1683 case CCValAssign::BCvt
: {
1684 unsigned BC
= FastEmit_r(ArgVT
.getSimpleVT(), VA
.getLocVT(),
1685 ISD::BITCAST
, Arg
, /*TODO: Kill=*/false);
1686 assert(BC
!= 0 && "Failed to emit a bitcast!");
1688 ArgVT
= VA
.getLocVT();
1693 if (VA
.isRegLoc()) {
1694 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1695 VA
.getLocReg()).addReg(Arg
);
1696 RegArgs
.push_back(VA
.getLocReg());
1698 unsigned LocMemOffset
= VA
.getLocMemOffset();
1700 AM
.Base
.Reg
= StackPtr
;
1701 AM
.Disp
= LocMemOffset
;
1702 const Value
*ArgVal
= ArgVals
[VA
.getValNo()];
1703 ISD::ArgFlagsTy Flags
= ArgFlags
[VA
.getValNo()];
1705 if (Flags
.isByVal()) {
1706 X86AddressMode SrcAM
;
1707 SrcAM
.Base
.Reg
= Arg
;
1708 bool Res
= TryEmitSmallMemcpy(AM
, SrcAM
, Flags
.getByValSize());
1709 assert(Res
&& "memcpy length already checked!"); (void)Res
;
1710 } else if (isa
<ConstantInt
>(ArgVal
) || isa
<ConstantPointerNull
>(ArgVal
)) {
1711 // If this is a really simple value, emit this with the Value* version
1712 //of X86FastEmitStore. If it isn't simple, we don't want to do this,
1713 // as it can cause us to reevaluate the argument.
1714 X86FastEmitStore(ArgVT
, ArgVal
, AM
);
1716 X86FastEmitStore(ArgVT
, Arg
, AM
);
1721 // ELF / PIC requires GOT in the EBX register before function calls via PLT
1723 if (Subtarget
->isPICStyleGOT()) {
1724 unsigned Base
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
1725 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1726 X86::EBX
).addReg(Base
);
1729 if (Subtarget
->is64Bit() && isVarArg
&& !Subtarget
->isTargetWin64()) {
1730 // Count the number of XMM registers allocated.
1731 static const unsigned XMMArgRegs
[] = {
1732 X86::XMM0
, X86::XMM1
, X86::XMM2
, X86::XMM3
,
1733 X86::XMM4
, X86::XMM5
, X86::XMM6
, X86::XMM7
1735 unsigned NumXMMRegs
= CCInfo
.getFirstUnallocated(XMMArgRegs
, 8);
1736 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::MOV8ri
),
1737 X86::AL
).addImm(NumXMMRegs
);
1741 MachineInstrBuilder MIB
;
1743 // Register-indirect call.
1745 if (Subtarget
->isTargetWin64())
1746 CallOpc
= X86::WINCALL64r
;
1747 else if (Subtarget
->is64Bit())
1748 CallOpc
= X86::CALL64r
;
1750 CallOpc
= X86::CALL32r
;
1751 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CallOpc
))
1756 assert(GV
&& "Not a direct call");
1758 if (Subtarget
->isTargetWin64())
1759 CallOpc
= X86::WINCALL64pcrel32
;
1760 else if (Subtarget
->is64Bit())
1761 CallOpc
= X86::CALL64pcrel32
;
1763 CallOpc
= X86::CALLpcrel32
;
1765 // See if we need any target-specific flags on the GV operand.
1766 unsigned char OpFlags
= 0;
1768 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1769 // external symbols most go through the PLT in PIC mode. If the symbol
1770 // has hidden or protected visibility, or if it is static or local, then
1771 // we don't need to use the PLT - we can directly call it.
1772 if (Subtarget
->isTargetELF() &&
1773 TM
.getRelocationModel() == Reloc::PIC_
&&
1774 GV
->hasDefaultVisibility() && !GV
->hasLocalLinkage()) {
1775 OpFlags
= X86II::MO_PLT
;
1776 } else if (Subtarget
->isPICStyleStubAny() &&
1777 (GV
->isDeclaration() || GV
->isWeakForLinker()) &&
1778 (!Subtarget
->getTargetTriple().isMacOSX() ||
1779 Subtarget
->getTargetTriple().isMacOSXVersionLT(10, 5))) {
1780 // PC-relative references to external symbols should go through $stub,
1781 // unless we're building with the leopard linker or later, which
1782 // automatically synthesizes these stubs.
1783 OpFlags
= X86II::MO_DARWIN_STUB
;
1787 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CallOpc
));
1789 MIB
.addExternalSymbol(MemIntName
, OpFlags
);
1791 MIB
.addGlobalAddress(GV
, 0, OpFlags
);
1794 // Add an implicit use GOT pointer in EBX.
1795 if (Subtarget
->isPICStyleGOT())
1796 MIB
.addReg(X86::EBX
);
1798 if (Subtarget
->is64Bit() && isVarArg
&& !Subtarget
->isTargetWin64())
1799 MIB
.addReg(X86::AL
);
1801 // Add implicit physical register uses to the call.
1802 for (unsigned i
= 0, e
= RegArgs
.size(); i
!= e
; ++i
)
1803 MIB
.addReg(RegArgs
[i
]);
1805 // Issue CALLSEQ_END
1806 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
1807 unsigned NumBytesCallee
= 0;
1808 if (!Subtarget
->is64Bit() && CS
.paramHasAttr(1, Attribute::StructRet
))
1810 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(AdjStackUp
))
1811 .addImm(NumBytes
).addImm(NumBytesCallee
);
1813 // Build info for return calling conv lowering code.
1814 // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
1815 SmallVector
<ISD::InputArg
, 32> Ins
;
1816 SmallVector
<EVT
, 4> RetTys
;
1817 ComputeValueVTs(TLI
, I
->getType(), RetTys
);
1818 for (unsigned i
= 0, e
= RetTys
.size(); i
!= e
; ++i
) {
1820 EVT RegisterVT
= TLI
.getRegisterType(I
->getParent()->getContext(), VT
);
1821 unsigned NumRegs
= TLI
.getNumRegisters(I
->getParent()->getContext(), VT
);
1822 for (unsigned j
= 0; j
!= NumRegs
; ++j
) {
1823 ISD::InputArg MyFlags
;
1824 MyFlags
.VT
= RegisterVT
.getSimpleVT();
1825 MyFlags
.Used
= !CS
.getInstruction()->use_empty();
1826 if (CS
.paramHasAttr(0, Attribute::SExt
))
1827 MyFlags
.Flags
.setSExt();
1828 if (CS
.paramHasAttr(0, Attribute::ZExt
))
1829 MyFlags
.Flags
.setZExt();
1830 if (CS
.paramHasAttr(0, Attribute::InReg
))
1831 MyFlags
.Flags
.setInReg();
1832 Ins
.push_back(MyFlags
);
1836 // Now handle call return values.
1837 SmallVector
<unsigned, 4> UsedRegs
;
1838 SmallVector
<CCValAssign
, 16> RVLocs
;
1839 CCState
CCRetInfo(CC
, false, *FuncInfo
.MF
, TM
, RVLocs
,
1840 I
->getParent()->getContext());
1841 unsigned ResultReg
= FuncInfo
.CreateRegs(I
->getType());
1842 CCRetInfo
.AnalyzeCallResult(Ins
, RetCC_X86
);
1843 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1844 EVT CopyVT
= RVLocs
[i
].getValVT();
1845 unsigned CopyReg
= ResultReg
+ i
;
1847 // If this is a call to a function that returns an fp value on the x87 fp
1848 // stack, but where we prefer to use the value in xmm registers, copy it
1849 // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1850 if ((RVLocs
[i
].getLocReg() == X86::ST0
||
1851 RVLocs
[i
].getLocReg() == X86::ST1
)) {
1852 if (isScalarFPTypeInSSEReg(RVLocs
[i
].getValVT())) {
1854 CopyReg
= createResultReg(X86::RFP80RegisterClass
);
1856 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(X86::FpPOP_RETVAL
),
1859 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1860 CopyReg
).addReg(RVLocs
[i
].getLocReg());
1861 UsedRegs
.push_back(RVLocs
[i
].getLocReg());
1864 if (CopyVT
!= RVLocs
[i
].getValVT()) {
1865 // Round the F80 the right size, which also moves to the appropriate xmm
1866 // register. This is accomplished by storing the F80 value in memory and
1867 // then loading it back. Ewww...
1868 EVT ResVT
= RVLocs
[i
].getValVT();
1869 unsigned Opc
= ResVT
== MVT::f32
? X86::ST_Fp80m32
: X86::ST_Fp80m64
;
1870 unsigned MemSize
= ResVT
.getSizeInBits()/8;
1871 int FI
= MFI
.CreateStackObject(MemSize
, MemSize
, false);
1872 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1875 Opc
= ResVT
== MVT::f32
? X86::MOVSSrm
: X86::MOVSDrm
;
1876 addFrameReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1877 TII
.get(Opc
), ResultReg
+ i
), FI
);
1882 UpdateValueMap(I
, ResultReg
, RVLocs
.size());
1884 // Set all unused physreg defs as dead.
1885 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
1892 X86FastISel::TargetSelectInstruction(const Instruction
*I
) {
1893 switch (I
->getOpcode()) {
1895 case Instruction::Load
:
1896 return X86SelectLoad(I
);
1897 case Instruction::Store
:
1898 return X86SelectStore(I
);
1899 case Instruction::Ret
:
1900 return X86SelectRet(I
);
1901 case Instruction::ICmp
:
1902 case Instruction::FCmp
:
1903 return X86SelectCmp(I
);
1904 case Instruction::ZExt
:
1905 return X86SelectZExt(I
);
1906 case Instruction::Br
:
1907 return X86SelectBranch(I
);
1908 case Instruction::Call
:
1909 return X86SelectCall(I
);
1910 case Instruction::LShr
:
1911 case Instruction::AShr
:
1912 case Instruction::Shl
:
1913 return X86SelectShift(I
);
1914 case Instruction::Select
:
1915 return X86SelectSelect(I
);
1916 case Instruction::Trunc
:
1917 return X86SelectTrunc(I
);
1918 case Instruction::FPExt
:
1919 return X86SelectFPExt(I
);
1920 case Instruction::FPTrunc
:
1921 return X86SelectFPTrunc(I
);
1922 case Instruction::IntToPtr
: // Deliberate fall-through.
1923 case Instruction::PtrToInt
: {
1924 EVT SrcVT
= TLI
.getValueType(I
->getOperand(0)->getType());
1925 EVT DstVT
= TLI
.getValueType(I
->getType());
1926 if (DstVT
.bitsGT(SrcVT
))
1927 return X86SelectZExt(I
);
1928 if (DstVT
.bitsLT(SrcVT
))
1929 return X86SelectTrunc(I
);
1930 unsigned Reg
= getRegForValue(I
->getOperand(0));
1931 if (Reg
== 0) return false;
1932 UpdateValueMap(I
, Reg
);
1940 unsigned X86FastISel::TargetMaterializeConstant(const Constant
*C
) {
1942 if (!isTypeLegal(C
->getType(), VT
))
1945 // Get opcode and regclass of the output for the given load instruction.
1947 const TargetRegisterClass
*RC
= NULL
;
1948 switch (VT
.SimpleTy
) {
1949 default: return false;
1952 RC
= X86::GR8RegisterClass
;
1956 RC
= X86::GR16RegisterClass
;
1960 RC
= X86::GR32RegisterClass
;
1963 // Must be in x86-64 mode.
1965 RC
= X86::GR64RegisterClass
;
1968 if (Subtarget
->hasSSE1()) {
1970 RC
= X86::FR32RegisterClass
;
1972 Opc
= X86::LD_Fp32m
;
1973 RC
= X86::RFP32RegisterClass
;
1977 if (Subtarget
->hasSSE2()) {
1979 RC
= X86::FR64RegisterClass
;
1981 Opc
= X86::LD_Fp64m
;
1982 RC
= X86::RFP64RegisterClass
;
1986 // No f80 support yet.
1990 // Materialize addresses with LEA instructions.
1991 if (isa
<GlobalValue
>(C
)) {
1993 if (X86SelectAddress(C
, AM
)) {
1994 // If the expression is just a basereg, then we're done, otherwise we need
1996 if (AM
.BaseType
== X86AddressMode::RegBase
&&
1997 AM
.IndexReg
== 0 && AM
.Disp
== 0 && AM
.GV
== 0)
2000 Opc
= TLI
.getPointerTy() == MVT::i32
? X86::LEA32r
: X86::LEA64r
;
2001 unsigned ResultReg
= createResultReg(RC
);
2002 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2003 TII
.get(Opc
), ResultReg
), AM
);
2009 // MachineConstantPool wants an explicit alignment.
2010 unsigned Align
= TD
.getPrefTypeAlignment(C
->getType());
2012 // Alignment of vector types. FIXME!
2013 Align
= TD
.getTypeAllocSize(C
->getType());
2016 // x86-32 PIC requires a PIC base register for constant pools.
2017 unsigned PICBase
= 0;
2018 unsigned char OpFlag
= 0;
2019 if (Subtarget
->isPICStyleStubPIC()) { // Not dynamic-no-pic
2020 OpFlag
= X86II::MO_PIC_BASE_OFFSET
;
2021 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
2022 } else if (Subtarget
->isPICStyleGOT()) {
2023 OpFlag
= X86II::MO_GOTOFF
;
2024 PICBase
= getInstrInfo()->getGlobalBaseReg(FuncInfo
.MF
);
2025 } else if (Subtarget
->isPICStyleRIPRel() &&
2026 TM
.getCodeModel() == CodeModel::Small
) {
2030 // Create the load from the constant pool.
2031 unsigned MCPOffset
= MCP
.getConstantPoolIndex(C
, Align
);
2032 unsigned ResultReg
= createResultReg(RC
);
2033 addConstantPoolReference(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2034 TII
.get(Opc
), ResultReg
),
2035 MCPOffset
, PICBase
, OpFlag
);
2040 unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst
*C
) {
2041 // Fail on dynamic allocas. At this point, getRegForValue has already
2042 // checked its CSE maps, so if we're here trying to handle a dynamic
2043 // alloca, we're not going to succeed. X86SelectAddress has a
2044 // check for dynamic allocas, because it's called directly from
2045 // various places, but TargetMaterializeAlloca also needs a check
2046 // in order to avoid recursion between getRegForValue,
2047 // X86SelectAddrss, and TargetMaterializeAlloca.
2048 if (!FuncInfo
.StaticAllocaMap
.count(C
))
2052 if (!X86SelectAddress(C
, AM
))
2054 unsigned Opc
= Subtarget
->is64Bit() ? X86::LEA64r
: X86::LEA32r
;
2055 TargetRegisterClass
* RC
= TLI
.getRegClassFor(TLI
.getPointerTy());
2056 unsigned ResultReg
= createResultReg(RC
);
2057 addFullAddress(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2058 TII
.get(Opc
), ResultReg
), AM
);
2062 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP
*CF
) {
2064 if (!isTypeLegal(CF
->getType(), VT
))
2067 // Get opcode and regclass for the given zero.
2069 const TargetRegisterClass
*RC
= NULL
;
2070 switch (VT
.SimpleTy
) {
2071 default: return false;
2073 if (Subtarget
->hasSSE1()) {
2074 Opc
= X86::FsFLD0SS
;
2075 RC
= X86::FR32RegisterClass
;
2077 Opc
= X86::LD_Fp032
;
2078 RC
= X86::RFP32RegisterClass
;
2082 if (Subtarget
->hasSSE2()) {
2083 Opc
= X86::FsFLD0SD
;
2084 RC
= X86::FR64RegisterClass
;
2086 Opc
= X86::LD_Fp064
;
2087 RC
= X86::RFP64RegisterClass
;
2091 // No f80 support yet.
2095 unsigned ResultReg
= createResultReg(RC
);
2096 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), ResultReg
);
2101 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2102 /// vreg is being provided by the specified load instruction. If possible,
2103 /// try to fold the load as an operand to the instruction, returning true if
2105 bool X86FastISel::TryToFoldLoad(MachineInstr
*MI
, unsigned OpNo
,
2106 const LoadInst
*LI
) {
2108 if (!X86SelectAddress(LI
->getOperand(0), AM
))
2111 X86InstrInfo
&XII
= (X86InstrInfo
&)TII
;
2113 unsigned Size
= TD
.getTypeAllocSize(LI
->getType());
2114 unsigned Alignment
= LI
->getAlignment();
2116 SmallVector
<MachineOperand
, 8> AddrOps
;
2117 AM
.getFullAddress(AddrOps
);
2119 MachineInstr
*Result
=
2120 XII
.foldMemoryOperandImpl(*FuncInfo
.MF
, MI
, OpNo
, AddrOps
, Size
, Alignment
);
2121 if (Result
== 0) return false;
2123 FuncInfo
.MBB
->insert(FuncInfo
.InsertPt
, Result
);
2124 MI
->eraseFromParent();
2130 llvm::FastISel
*X86::createFastISel(FunctionLoweringInfo
&funcInfo
) {
2131 return new X86FastISel(funcInfo
);