1 //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the ARM-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // ARMGenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSubtarget.h"
23 #include "MCTargetDesc/ARMAddressingModes.h"
24 #include "MCTargetDesc/ARMBaseInfo.h"
25 #include "Utils/ARMBaseInfo.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/CallingConvLower.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineInstr.h"
39 #include "llvm/CodeGen/MachineInstrBuilder.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/MachineOperand.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/CodeGen/RuntimeLibcalls.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetOpcodes.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Argument.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/CallSite.h"
52 #include "llvm/IR/CallingConv.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DerivedTypes.h"
57 #include "llvm/IR/Function.h"
58 #include "llvm/IR/GetElementPtrTypeIterator.h"
59 #include "llvm/IR/GlobalValue.h"
60 #include "llvm/IR/GlobalVariable.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/Intrinsics.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/Operator.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/ErrorHandling.h"
76 #include "llvm/Support/MachineValueType.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Target/TargetMachine.h"
79 #include "llvm/Target/TargetOptions.h"
88 // All possible address modes, plus some.
102 // Innocuous defaults for our address.
108 class ARMFastISel final
: public FastISel
{
109 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
110 /// make the right decision when generating code for different targets.
111 const ARMSubtarget
*Subtarget
;
113 const TargetMachine
&TM
;
114 const TargetInstrInfo
&TII
;
115 const TargetLowering
&TLI
;
116 ARMFunctionInfo
*AFI
;
118 // Convenience variables to avoid some queries.
120 LLVMContext
*Context
;
123 explicit ARMFastISel(FunctionLoweringInfo
&funcInfo
,
124 const TargetLibraryInfo
*libInfo
)
125 : FastISel(funcInfo
, libInfo
),
127 &static_cast<const ARMSubtarget
&>(funcInfo
.MF
->getSubtarget())),
128 M(const_cast<Module
&>(*funcInfo
.Fn
->getParent())),
129 TM(funcInfo
.MF
->getTarget()), TII(*Subtarget
->getInstrInfo()),
130 TLI(*Subtarget
->getTargetLowering()) {
131 AFI
= funcInfo
.MF
->getInfo
<ARMFunctionInfo
>();
132 isThumb2
= AFI
->isThumbFunction();
133 Context
= &funcInfo
.Fn
->getContext();
137 // Code from FastISel.cpp.
139 unsigned fastEmitInst_r(unsigned MachineInstOpcode
,
140 const TargetRegisterClass
*RC
,
141 unsigned Op0
, bool Op0IsKill
);
142 unsigned fastEmitInst_rr(unsigned MachineInstOpcode
,
143 const TargetRegisterClass
*RC
,
144 unsigned Op0
, bool Op0IsKill
,
145 unsigned Op1
, bool Op1IsKill
);
146 unsigned fastEmitInst_ri(unsigned MachineInstOpcode
,
147 const TargetRegisterClass
*RC
,
148 unsigned Op0
, bool Op0IsKill
,
150 unsigned fastEmitInst_i(unsigned MachineInstOpcode
,
151 const TargetRegisterClass
*RC
,
154 // Backend specific FastISel code.
156 bool fastSelectInstruction(const Instruction
*I
) override
;
157 unsigned fastMaterializeConstant(const Constant
*C
) override
;
158 unsigned fastMaterializeAlloca(const AllocaInst
*AI
) override
;
159 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
160 const LoadInst
*LI
) override
;
161 bool fastLowerArguments() override
;
163 #include "ARMGenFastISel.inc"
165 // Instruction selection routines.
167 bool SelectLoad(const Instruction
*I
);
168 bool SelectStore(const Instruction
*I
);
169 bool SelectBranch(const Instruction
*I
);
170 bool SelectIndirectBr(const Instruction
*I
);
171 bool SelectCmp(const Instruction
*I
);
172 bool SelectFPExt(const Instruction
*I
);
173 bool SelectFPTrunc(const Instruction
*I
);
174 bool SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
);
175 bool SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
);
176 bool SelectIToFP(const Instruction
*I
, bool isSigned
);
177 bool SelectFPToI(const Instruction
*I
, bool isSigned
);
178 bool SelectDiv(const Instruction
*I
, bool isSigned
);
179 bool SelectRem(const Instruction
*I
, bool isSigned
);
180 bool SelectCall(const Instruction
*I
, const char *IntrMemName
);
181 bool SelectIntrinsicCall(const IntrinsicInst
&I
);
182 bool SelectSelect(const Instruction
*I
);
183 bool SelectRet(const Instruction
*I
);
184 bool SelectTrunc(const Instruction
*I
);
185 bool SelectIntExt(const Instruction
*I
);
186 bool SelectShift(const Instruction
*I
, ARM_AM::ShiftOpc ShiftTy
);
190 bool isPositionIndependent() const;
191 bool isTypeLegal(Type
*Ty
, MVT
&VT
);
192 bool isLoadTypeLegal(Type
*Ty
, MVT
&VT
);
193 bool ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
194 bool isZExt
, bool isEquality
);
195 bool ARMEmitLoad(MVT VT
, unsigned &ResultReg
, Address
&Addr
,
196 unsigned Alignment
= 0, bool isZExt
= true,
197 bool allocReg
= true);
198 bool ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
199 unsigned Alignment
= 0);
200 bool ARMComputeAddress(const Value
*Obj
, Address
&Addr
);
201 void ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
);
202 bool ARMIsMemCpySmall(uint64_t Len
);
203 bool ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
,
205 unsigned ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
, bool isZExt
);
206 unsigned ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
);
207 unsigned ARMMaterializeInt(const Constant
*C
, MVT VT
);
208 unsigned ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
);
209 unsigned ARMMoveToFPReg(MVT VT
, unsigned SrcReg
);
210 unsigned ARMMoveToIntReg(MVT VT
, unsigned SrcReg
);
211 unsigned ARMSelectCallOp(bool UseReg
);
212 unsigned ARMLowerPICELF(const GlobalValue
*GV
, unsigned Align
, MVT VT
);
214 const TargetLowering
*getTargetLowering() { return &TLI
; }
216 // Call handling routines.
218 CCAssignFn
*CCAssignFnForCall(CallingConv::ID CC
,
221 bool ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
222 SmallVectorImpl
<unsigned> &ArgRegs
,
223 SmallVectorImpl
<MVT
> &ArgVTs
,
224 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
225 SmallVectorImpl
<unsigned> &RegArgs
,
229 unsigned getLibcallReg(const Twine
&Name
);
230 bool FinishCall(MVT RetVT
, SmallVectorImpl
<unsigned> &UsedRegs
,
231 const Instruction
*I
, CallingConv::ID CC
,
232 unsigned &NumBytes
, bool isVarArg
);
233 bool ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
);
235 // OptionalDef handling routines.
237 bool isARMNEONPred(const MachineInstr
*MI
);
238 bool DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
);
239 const MachineInstrBuilder
&AddOptionalDefs(const MachineInstrBuilder
&MIB
);
240 void AddLoadStoreOperands(MVT VT
, Address
&Addr
,
241 const MachineInstrBuilder
&MIB
,
242 MachineMemOperand::Flags Flags
, bool useAM3
);
245 } // end anonymous namespace
247 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
248 // we don't care about implicit defs here, just places we'll need to add a
249 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
250 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
) {
251 if (!MI
->hasOptionalDef())
254 // Look to see if our OptionalDef is defining CPSR or CCR.
255 for (const MachineOperand
&MO
: MI
->operands()) {
256 if (!MO
.isReg() || !MO
.isDef()) continue;
257 if (MO
.getReg() == ARM::CPSR
)
263 bool ARMFastISel::isARMNEONPred(const MachineInstr
*MI
) {
264 const MCInstrDesc
&MCID
= MI
->getDesc();
266 // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
267 if ((MCID
.TSFlags
& ARMII::DomainMask
) != ARMII::DomainNEON
||
268 AFI
->isThumb2Function())
269 return MI
->isPredicable();
271 for (const MCOperandInfo
&opInfo
: MCID
.operands())
272 if (opInfo
.isPredicate())
278 // If the machine is predicable go ahead and add the predicate operands, if
279 // it needs default CC operands add those.
280 // TODO: If we want to support thumb1 then we'll need to deal with optional
281 // CPSR defs that need to be added before the remaining operands. See s_cc_out
282 // for descriptions why.
283 const MachineInstrBuilder
&
284 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder
&MIB
) {
285 MachineInstr
*MI
= &*MIB
;
287 // Do we use a predicate? or...
288 // Are we NEON in ARM mode and have a predicate operand? If so, I know
289 // we're not predicable but add it anyways.
290 if (isARMNEONPred(MI
))
291 MIB
.add(predOps(ARMCC::AL
));
293 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
294 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
296 if (DefinesOptionalPredicate(MI
, &CPSR
))
297 MIB
.add(CPSR
? t1CondCodeOp() : condCodeOp());
301 unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode
,
302 const TargetRegisterClass
*RC
,
303 unsigned Op0
, bool Op0IsKill
) {
304 unsigned ResultReg
= createResultReg(RC
);
305 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
307 // Make sure the input operand is sufficiently constrained to be legal
308 // for this instruction.
309 Op0
= constrainOperandRegClass(II
, Op0
, 1);
310 if (II
.getNumDefs() >= 1) {
311 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
312 ResultReg
).addReg(Op0
, Op0IsKill
* RegState::Kill
));
314 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
315 .addReg(Op0
, Op0IsKill
* RegState::Kill
));
316 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
317 TII
.get(TargetOpcode::COPY
), ResultReg
)
318 .addReg(II
.ImplicitDefs
[0]));
323 unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode
,
324 const TargetRegisterClass
*RC
,
325 unsigned Op0
, bool Op0IsKill
,
326 unsigned Op1
, bool Op1IsKill
) {
327 unsigned ResultReg
= createResultReg(RC
);
328 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
330 // Make sure the input operands are sufficiently constrained to be legal
331 // for this instruction.
332 Op0
= constrainOperandRegClass(II
, Op0
, 1);
333 Op1
= constrainOperandRegClass(II
, Op1
, 2);
335 if (II
.getNumDefs() >= 1) {
337 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
338 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
339 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
341 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
342 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
343 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
344 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
345 TII
.get(TargetOpcode::COPY
), ResultReg
)
346 .addReg(II
.ImplicitDefs
[0]));
351 unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode
,
352 const TargetRegisterClass
*RC
,
353 unsigned Op0
, bool Op0IsKill
,
355 unsigned ResultReg
= createResultReg(RC
);
356 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
358 // Make sure the input operand is sufficiently constrained to be legal
359 // for this instruction.
360 Op0
= constrainOperandRegClass(II
, Op0
, 1);
361 if (II
.getNumDefs() >= 1) {
363 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
364 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
367 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
368 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
370 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
371 TII
.get(TargetOpcode::COPY
), ResultReg
)
372 .addReg(II
.ImplicitDefs
[0]));
377 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode
,
378 const TargetRegisterClass
*RC
,
380 unsigned ResultReg
= createResultReg(RC
);
381 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
383 if (II
.getNumDefs() >= 1) {
384 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
385 ResultReg
).addImm(Imm
));
387 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
389 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
390 TII
.get(TargetOpcode::COPY
), ResultReg
)
391 .addReg(II
.ImplicitDefs
[0]));
396 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
397 // checks from the various callers.
398 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT
, unsigned SrcReg
) {
399 if (VT
== MVT::f64
) return 0;
401 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
402 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
403 TII
.get(ARM::VMOVSR
), MoveReg
)
408 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT
, unsigned SrcReg
) {
409 if (VT
== MVT::i64
) return 0;
411 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
412 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
413 TII
.get(ARM::VMOVRS
), MoveReg
)
418 // For double width floating point we need to materialize two constants
419 // (the high and the low) into integer registers then use a move to get
420 // the combined constant into an FP reg.
421 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
422 const APFloat Val
= CFP
->getValueAPF();
423 bool is64bit
= VT
== MVT::f64
;
425 // This checks to see if we can use VFP3 instructions to materialize
426 // a constant, otherwise we have to go through the constant pool.
427 if (TLI
.isFPImmLegal(Val
, VT
)) {
431 Imm
= ARM_AM::getFP64Imm(Val
);
434 Imm
= ARM_AM::getFP32Imm(Val
);
437 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
438 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
439 TII
.get(Opc
), DestReg
).addImm(Imm
));
443 // Require VFP2 for loading fp constants.
444 if (!Subtarget
->hasVFP2Base()) return false;
446 // MachineConstantPool wants an explicit alignment.
447 unsigned Align
= DL
.getPrefTypeAlignment(CFP
->getType());
449 // TODO: Figure out if this is correct.
450 Align
= DL
.getTypeAllocSize(CFP
->getType());
452 unsigned Idx
= MCP
.getConstantPoolIndex(cast
<Constant
>(CFP
), Align
);
453 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
454 unsigned Opc
= is64bit
? ARM::VLDRD
: ARM::VLDRS
;
456 // The extra reg is for addrmode5.
458 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
459 .addConstantPoolIndex(Idx
)
464 unsigned ARMFastISel::ARMMaterializeInt(const Constant
*C
, MVT VT
) {
465 if (VT
!= MVT::i32
&& VT
!= MVT::i16
&& VT
!= MVT::i8
&& VT
!= MVT::i1
)
468 // If we can do this in a single instruction without a constant pool entry
470 const ConstantInt
*CI
= cast
<ConstantInt
>(C
);
471 if (Subtarget
->hasV6T2Ops() && isUInt
<16>(CI
->getZExtValue())) {
472 unsigned Opc
= isThumb2
? ARM::t2MOVi16
: ARM::MOVi16
;
473 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
475 unsigned ImmReg
= createResultReg(RC
);
476 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
477 TII
.get(Opc
), ImmReg
)
478 .addImm(CI
->getZExtValue()));
482 // Use MVN to emit negative constants.
483 if (VT
== MVT::i32
&& Subtarget
->hasV6T2Ops() && CI
->isNegative()) {
484 unsigned Imm
= (unsigned)~(CI
->getSExtValue());
485 bool UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
486 (ARM_AM::getSOImmVal(Imm
) != -1);
488 unsigned Opc
= isThumb2
? ARM::t2MVNi
: ARM::MVNi
;
489 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
491 unsigned ImmReg
= createResultReg(RC
);
492 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
493 TII
.get(Opc
), ImmReg
)
499 unsigned ResultReg
= 0;
500 if (Subtarget
->useMovt())
501 ResultReg
= fastEmit_i(VT
, VT
, ISD::Constant
, CI
->getZExtValue());
506 // Load from constant pool. For now 32-bit only.
510 // MachineConstantPool wants an explicit alignment.
511 unsigned Align
= DL
.getPrefTypeAlignment(C
->getType());
513 // TODO: Figure out if this is correct.
514 Align
= DL
.getTypeAllocSize(C
->getType());
516 unsigned Idx
= MCP
.getConstantPoolIndex(C
, Align
);
517 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
519 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
520 TII
.get(ARM::t2LDRpci
), ResultReg
)
521 .addConstantPoolIndex(Idx
));
523 // The extra immediate is for addrmode2.
524 ResultReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), ResultReg
, 0);
525 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
526 TII
.get(ARM::LDRcp
), ResultReg
)
527 .addConstantPoolIndex(Idx
)
533 bool ARMFastISel::isPositionIndependent() const {
534 return TLI
.isPositionIndependent();
537 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
) {
538 // For now 32-bit only.
539 if (VT
!= MVT::i32
|| GV
->isThreadLocal()) return 0;
541 // ROPI/RWPI not currently supported.
542 if (Subtarget
->isROPI() || Subtarget
->isRWPI())
545 bool IsIndirect
= Subtarget
->isGVIndirectSymbol(GV
);
546 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
548 unsigned DestReg
= createResultReg(RC
);
550 // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
551 const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
);
552 bool IsThreadLocal
= GVar
&& GVar
->isThreadLocal();
553 if (!Subtarget
->isTargetMachO() && IsThreadLocal
) return 0;
555 bool IsPositionIndependent
= isPositionIndependent();
556 // Use movw+movt when possible, it avoids constant pool entries.
557 // Non-darwin targets only support static movt relocations in FastISel.
558 if (Subtarget
->useMovt() &&
559 (Subtarget
->isTargetMachO() || !IsPositionIndependent
)) {
561 unsigned char TF
= 0;
562 if (Subtarget
->isTargetMachO())
563 TF
= ARMII::MO_NONLAZY
;
565 if (IsPositionIndependent
)
566 Opc
= isThumb2
? ARM::t2MOV_ga_pcrel
: ARM::MOV_ga_pcrel
;
568 Opc
= isThumb2
? ARM::t2MOVi32imm
: ARM::MOVi32imm
;
569 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
570 TII
.get(Opc
), DestReg
).addGlobalAddress(GV
, 0, TF
));
572 // MachineConstantPool wants an explicit alignment.
573 unsigned Align
= DL
.getPrefTypeAlignment(GV
->getType());
575 // TODO: Figure out if this is correct.
576 Align
= DL
.getTypeAllocSize(GV
->getType());
579 if (Subtarget
->isTargetELF() && IsPositionIndependent
)
580 return ARMLowerPICELF(GV
, Align
, VT
);
583 unsigned PCAdj
= IsPositionIndependent
? (Subtarget
->isThumb() ? 4 : 8) : 0;
584 unsigned Id
= AFI
->createPICLabelUId();
585 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(GV
, Id
,
588 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Align
);
591 MachineInstrBuilder MIB
;
593 unsigned Opc
= IsPositionIndependent
? ARM::t2LDRpci_pic
: ARM::t2LDRpci
;
594 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
),
595 DestReg
).addConstantPoolIndex(Idx
);
596 if (IsPositionIndependent
)
598 AddOptionalDefs(MIB
);
600 // The extra immediate is for addrmode2.
601 DestReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), DestReg
, 0);
602 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
603 TII
.get(ARM::LDRcp
), DestReg
)
604 .addConstantPoolIndex(Idx
)
606 AddOptionalDefs(MIB
);
608 if (IsPositionIndependent
) {
609 unsigned Opc
= IsIndirect
? ARM::PICLDR
: ARM::PICADD
;
610 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
612 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
613 DbgLoc
, TII
.get(Opc
), NewDestReg
)
616 AddOptionalDefs(MIB
);
623 MachineInstrBuilder MIB
;
624 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
626 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
627 TII
.get(ARM::t2LDRi12
), NewDestReg
)
631 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
632 TII
.get(ARM::LDRi12
), NewDestReg
)
635 DestReg
= NewDestReg
;
636 AddOptionalDefs(MIB
);
642 unsigned ARMFastISel::fastMaterializeConstant(const Constant
*C
) {
643 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
645 // Only handle simple types.
646 if (!CEVT
.isSimple()) return 0;
647 MVT VT
= CEVT
.getSimpleVT();
649 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
650 return ARMMaterializeFP(CFP
, VT
);
651 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
652 return ARMMaterializeGV(GV
, VT
);
653 else if (isa
<ConstantInt
>(C
))
654 return ARMMaterializeInt(C
, VT
);
659 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
661 unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst
*AI
) {
662 // Don't handle dynamic allocas.
663 if (!FuncInfo
.StaticAllocaMap
.count(AI
)) return 0;
666 if (!isLoadTypeLegal(AI
->getType(), VT
)) return 0;
668 DenseMap
<const AllocaInst
*, int>::iterator SI
=
669 FuncInfo
.StaticAllocaMap
.find(AI
);
671 // This will get lowered later into the correct offsets and registers
672 // via rewriteXFrameIndex.
673 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
674 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
675 const TargetRegisterClass
* RC
= TLI
.getRegClassFor(VT
);
676 unsigned ResultReg
= createResultReg(RC
);
677 ResultReg
= constrainOperandRegClass(TII
.get(Opc
), ResultReg
, 0);
679 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
680 TII
.get(Opc
), ResultReg
)
681 .addFrameIndex(SI
->second
)
689 bool ARMFastISel::isTypeLegal(Type
*Ty
, MVT
&VT
) {
690 EVT evt
= TLI
.getValueType(DL
, Ty
, true);
692 // Only handle simple types.
693 if (evt
== MVT::Other
|| !evt
.isSimple()) return false;
694 VT
= evt
.getSimpleVT();
696 // Handle all legal types, i.e. a register that will directly hold this
698 return TLI
.isTypeLegal(VT
);
701 bool ARMFastISel::isLoadTypeLegal(Type
*Ty
, MVT
&VT
) {
702 if (isTypeLegal(Ty
, VT
)) return true;
704 // If this is a type than can be sign or zero-extended to a basic operation
705 // go ahead and accept it now.
706 if (VT
== MVT::i1
|| VT
== MVT::i8
|| VT
== MVT::i16
)
712 // Computes the address to get to an object.
713 bool ARMFastISel::ARMComputeAddress(const Value
*Obj
, Address
&Addr
) {
714 // Some boilerplate from the X86 FastISel.
715 const User
*U
= nullptr;
716 unsigned Opcode
= Instruction::UserOp1
;
717 if (const Instruction
*I
= dyn_cast
<Instruction
>(Obj
)) {
718 // Don't walk into other basic blocks unless the object is an alloca from
719 // another block, otherwise it may not have a virtual register assigned.
720 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(Obj
)) ||
721 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
722 Opcode
= I
->getOpcode();
725 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(Obj
)) {
726 Opcode
= C
->getOpcode();
730 if (PointerType
*Ty
= dyn_cast
<PointerType
>(Obj
->getType()))
731 if (Ty
->getAddressSpace() > 255)
732 // Fast instruction selection doesn't support the special
739 case Instruction::BitCast
:
740 // Look through bitcasts.
741 return ARMComputeAddress(U
->getOperand(0), Addr
);
742 case Instruction::IntToPtr
:
743 // Look past no-op inttoptrs.
744 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
745 TLI
.getPointerTy(DL
))
746 return ARMComputeAddress(U
->getOperand(0), Addr
);
748 case Instruction::PtrToInt
:
749 // Look past no-op ptrtoints.
750 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
751 return ARMComputeAddress(U
->getOperand(0), Addr
);
753 case Instruction::GetElementPtr
: {
754 Address SavedAddr
= Addr
;
755 int TmpOffset
= Addr
.Offset
;
757 // Iterate through the GEP folding the constants into offsets where
759 gep_type_iterator GTI
= gep_type_begin(U
);
760 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
761 i
!= e
; ++i
, ++GTI
) {
762 const Value
*Op
= *i
;
763 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
764 const StructLayout
*SL
= DL
.getStructLayout(STy
);
765 unsigned Idx
= cast
<ConstantInt
>(Op
)->getZExtValue();
766 TmpOffset
+= SL
->getElementOffset(Idx
);
768 uint64_t S
= DL
.getTypeAllocSize(GTI
.getIndexedType());
770 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
771 // Constant-offset addressing.
772 TmpOffset
+= CI
->getSExtValue() * S
;
775 if (canFoldAddIntoGEP(U
, Op
)) {
776 // A compatible add with a constant operand. Fold the constant.
778 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
779 TmpOffset
+= CI
->getSExtValue() * S
;
780 // Iterate on the other operand.
781 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
785 goto unsupported_gep
;
790 // Try to grab the base operand now.
791 Addr
.Offset
= TmpOffset
;
792 if (ARMComputeAddress(U
->getOperand(0), Addr
)) return true;
794 // We failed, restore everything and try the other options.
800 case Instruction::Alloca
: {
801 const AllocaInst
*AI
= cast
<AllocaInst
>(Obj
);
802 DenseMap
<const AllocaInst
*, int>::iterator SI
=
803 FuncInfo
.StaticAllocaMap
.find(AI
);
804 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
805 Addr
.BaseType
= Address::FrameIndexBase
;
806 Addr
.Base
.FI
= SI
->second
;
813 // Try to get this in a register if nothing else has worked.
814 if (Addr
.Base
.Reg
== 0) Addr
.Base
.Reg
= getRegForValue(Obj
);
815 return Addr
.Base
.Reg
!= 0;
818 void ARMFastISel::ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
) {
819 bool needsLowering
= false;
820 switch (VT
.SimpleTy
) {
821 default: llvm_unreachable("Unhandled load/store type!");
827 // Integer loads/stores handle 12-bit offsets.
828 needsLowering
= ((Addr
.Offset
& 0xfff) != Addr
.Offset
);
829 // Handle negative offsets.
830 if (needsLowering
&& isThumb2
)
831 needsLowering
= !(Subtarget
->hasV6T2Ops() && Addr
.Offset
< 0 &&
834 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
835 needsLowering
= (Addr
.Offset
> 255 || Addr
.Offset
< -255);
840 // Floating point operands handle 8-bit offsets.
841 needsLowering
= ((Addr
.Offset
& 0xff) != Addr
.Offset
);
845 // If this is a stack pointer and the offset needs to be simplified then
846 // put the alloca address into a register, set the base type back to
847 // register and continue. This should almost never happen.
848 if (needsLowering
&& Addr
.BaseType
== Address::FrameIndexBase
) {
849 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
851 unsigned ResultReg
= createResultReg(RC
);
852 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
853 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
854 TII
.get(Opc
), ResultReg
)
855 .addFrameIndex(Addr
.Base
.FI
)
857 Addr
.Base
.Reg
= ResultReg
;
858 Addr
.BaseType
= Address::RegBase
;
861 // Since the offset is too large for the load/store instruction
862 // get the reg+offset into a register.
864 Addr
.Base
.Reg
= fastEmit_ri_(MVT::i32
, ISD::ADD
, Addr
.Base
.Reg
,
865 /*Op0IsKill*/false, Addr
.Offset
, MVT::i32
);
870 void ARMFastISel::AddLoadStoreOperands(MVT VT
, Address
&Addr
,
871 const MachineInstrBuilder
&MIB
,
872 MachineMemOperand::Flags Flags
,
874 // addrmode5 output depends on the selection dag addressing dividing the
875 // offset by 4 that it then later multiplies. Do this here as well.
876 if (VT
.SimpleTy
== MVT::f32
|| VT
.SimpleTy
== MVT::f64
)
879 // Frame base works a bit differently. Handle it separately.
880 if (Addr
.BaseType
== Address::FrameIndexBase
) {
881 int FI
= Addr
.Base
.FI
;
882 int Offset
= Addr
.Offset
;
883 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
884 MachinePointerInfo::getFixedStack(*FuncInfo
.MF
, FI
, Offset
), Flags
,
885 MFI
.getObjectSize(FI
), MFI
.getObjectAlignment(FI
));
886 // Now add the rest of the operands.
887 MIB
.addFrameIndex(FI
);
889 // ARM halfword load/stores and signed byte loads need an additional
892 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
896 MIB
.addImm(Addr
.Offset
);
898 MIB
.addMemOperand(MMO
);
900 // Now add the rest of the operands.
901 MIB
.addReg(Addr
.Base
.Reg
);
903 // ARM halfword load/stores and signed byte loads need an additional
906 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
910 MIB
.addImm(Addr
.Offset
);
913 AddOptionalDefs(MIB
);
916 bool ARMFastISel::ARMEmitLoad(MVT VT
, unsigned &ResultReg
, Address
&Addr
,
917 unsigned Alignment
, bool isZExt
, bool allocReg
) {
920 bool needVMOV
= false;
921 const TargetRegisterClass
*RC
;
922 switch (VT
.SimpleTy
) {
923 // This is mostly going to be Neon/vector support.
924 default: return false;
928 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
929 Opc
= isZExt
? ARM::t2LDRBi8
: ARM::t2LDRSBi8
;
931 Opc
= isZExt
? ARM::t2LDRBi12
: ARM::t2LDRSBi12
;
940 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
943 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
947 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
948 Opc
= isZExt
? ARM::t2LDRHi8
: ARM::t2LDRSHi8
;
950 Opc
= isZExt
? ARM::t2LDRHi12
: ARM::t2LDRSHi12
;
952 Opc
= isZExt
? ARM::LDRH
: ARM::LDRSH
;
955 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
958 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
962 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
969 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
972 if (!Subtarget
->hasVFP2Base()) return false;
973 // Unaligned loads need special handling. Floats require word-alignment.
974 if (Alignment
&& Alignment
< 4) {
977 Opc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
978 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
981 RC
= TLI
.getRegClassFor(VT
);
985 // Can load and store double precision even without FeatureFP64
986 if (!Subtarget
->hasVFP2Base()) return false;
987 // FIXME: Unaligned loads need special handling. Doublewords require
989 if (Alignment
&& Alignment
< 4)
993 RC
= TLI
.getRegClassFor(VT
);
996 // Simplify this down to something we can handle.
997 ARMSimplifyAddress(Addr
, VT
, useAM3
);
999 // Create the base instruction, then add the operands.
1001 ResultReg
= createResultReg(RC
);
1002 assert(ResultReg
> 255 && "Expected an allocated virtual register.");
1003 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1004 TII
.get(Opc
), ResultReg
);
1005 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOLoad
, useAM3
);
1007 // If we had an unaligned load of a float we've converted it to an regular
1008 // load. Now we must move from the GRP to the FP register.
1010 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1011 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1012 TII
.get(ARM::VMOVSR
), MoveReg
)
1013 .addReg(ResultReg
));
1014 ResultReg
= MoveReg
;
1019 bool ARMFastISel::SelectLoad(const Instruction
*I
) {
1020 // Atomic loads need special handling.
1021 if (cast
<LoadInst
>(I
)->isAtomic())
1024 const Value
*SV
= I
->getOperand(0);
1025 if (TLI
.supportSwiftError()) {
1026 // Swifterror values can come from either a function parameter with
1027 // swifterror attribute or an alloca with swifterror attribute.
1028 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1029 if (Arg
->hasSwiftErrorAttr())
1033 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1034 if (Alloca
->isSwiftError())
1039 // Verify we have a legal type before going any further.
1041 if (!isLoadTypeLegal(I
->getType(), VT
))
1044 // See if we can handle this address.
1046 if (!ARMComputeAddress(I
->getOperand(0), Addr
)) return false;
1049 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, cast
<LoadInst
>(I
)->getAlignment()))
1051 updateValueMap(I
, ResultReg
);
1055 bool ARMFastISel::ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
1056 unsigned Alignment
) {
1058 bool useAM3
= false;
1059 switch (VT
.SimpleTy
) {
1060 // This is mostly going to be Neon/vector support.
1061 default: return false;
1063 unsigned Res
= createResultReg(isThumb2
? &ARM::tGPRRegClass
1064 : &ARM::GPRRegClass
);
1065 unsigned Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
1066 SrcReg
= constrainOperandRegClass(TII
.get(Opc
), SrcReg
, 1);
1067 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1069 .addReg(SrcReg
).addImm(1));
1075 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1076 StrOpc
= ARM::t2STRBi8
;
1078 StrOpc
= ARM::t2STRBi12
;
1080 StrOpc
= ARM::STRBi12
;
1084 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
1088 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1089 StrOpc
= ARM::t2STRHi8
;
1091 StrOpc
= ARM::t2STRHi12
;
1098 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
1102 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1103 StrOpc
= ARM::t2STRi8
;
1105 StrOpc
= ARM::t2STRi12
;
1107 StrOpc
= ARM::STRi12
;
1111 if (!Subtarget
->hasVFP2Base()) return false;
1112 // Unaligned stores need special handling. Floats require word-alignment.
1113 if (Alignment
&& Alignment
< 4) {
1114 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1115 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1116 TII
.get(ARM::VMOVRS
), MoveReg
)
1120 StrOpc
= isThumb2
? ARM::t2STRi12
: ARM::STRi12
;
1122 StrOpc
= ARM::VSTRS
;
1126 // Can load and store double precision even without FeatureFP64
1127 if (!Subtarget
->hasVFP2Base()) return false;
1128 // FIXME: Unaligned stores need special handling. Doublewords require
1130 if (Alignment
&& Alignment
< 4)
1133 StrOpc
= ARM::VSTRD
;
1136 // Simplify this down to something we can handle.
1137 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1139 // Create the base instruction, then add the operands.
1140 SrcReg
= constrainOperandRegClass(TII
.get(StrOpc
), SrcReg
, 0);
1141 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1144 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOStore
, useAM3
);
1148 bool ARMFastISel::SelectStore(const Instruction
*I
) {
1149 Value
*Op0
= I
->getOperand(0);
1150 unsigned SrcReg
= 0;
1152 // Atomic stores need special handling.
1153 if (cast
<StoreInst
>(I
)->isAtomic())
1156 const Value
*PtrV
= I
->getOperand(1);
1157 if (TLI
.supportSwiftError()) {
1158 // Swifterror values can come from either a function parameter with
1159 // swifterror attribute or an alloca with swifterror attribute.
1160 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1161 if (Arg
->hasSwiftErrorAttr())
1165 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1166 if (Alloca
->isSwiftError())
1171 // Verify we have a legal type before going any further.
1173 if (!isLoadTypeLegal(I
->getOperand(0)->getType(), VT
))
1176 // Get the value to be stored into a register.
1177 SrcReg
= getRegForValue(Op0
);
1178 if (SrcReg
== 0) return false;
1180 // See if we can handle this address.
1182 if (!ARMComputeAddress(I
->getOperand(1), Addr
))
1185 if (!ARMEmitStore(VT
, SrcReg
, Addr
, cast
<StoreInst
>(I
)->getAlignment()))
1190 static ARMCC::CondCodes
getComparePred(CmpInst::Predicate Pred
) {
1192 // Needs two compares...
1193 case CmpInst::FCMP_ONE
:
1194 case CmpInst::FCMP_UEQ
:
1196 // AL is our "false" for now. The other two need more compares.
1198 case CmpInst::ICMP_EQ
:
1199 case CmpInst::FCMP_OEQ
:
1201 case CmpInst::ICMP_SGT
:
1202 case CmpInst::FCMP_OGT
:
1204 case CmpInst::ICMP_SGE
:
1205 case CmpInst::FCMP_OGE
:
1207 case CmpInst::ICMP_UGT
:
1208 case CmpInst::FCMP_UGT
:
1210 case CmpInst::FCMP_OLT
:
1212 case CmpInst::ICMP_ULE
:
1213 case CmpInst::FCMP_OLE
:
1215 case CmpInst::FCMP_ORD
:
1217 case CmpInst::FCMP_UNO
:
1219 case CmpInst::FCMP_UGE
:
1221 case CmpInst::ICMP_SLT
:
1222 case CmpInst::FCMP_ULT
:
1224 case CmpInst::ICMP_SLE
:
1225 case CmpInst::FCMP_ULE
:
1227 case CmpInst::FCMP_UNE
:
1228 case CmpInst::ICMP_NE
:
1230 case CmpInst::ICMP_UGE
:
1232 case CmpInst::ICMP_ULT
:
1237 bool ARMFastISel::SelectBranch(const Instruction
*I
) {
1238 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1239 MachineBasicBlock
*TBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
1240 MachineBasicBlock
*FBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
1242 // Simple branch support.
1244 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1246 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1247 if (CI
->hasOneUse() && (CI
->getParent() == I
->getParent())) {
1248 // Get the compare predicate.
1249 // Try to take advantage of fallthrough opportunities.
1250 CmpInst::Predicate Predicate
= CI
->getPredicate();
1251 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1252 std::swap(TBB
, FBB
);
1253 Predicate
= CmpInst::getInversePredicate(Predicate
);
1256 ARMCC::CondCodes ARMPred
= getComparePred(Predicate
);
1258 // We may not handle every CC for now.
1259 if (ARMPred
== ARMCC::AL
) return false;
1261 // Emit the compare.
1262 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned(),
1266 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1267 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1268 .addMBB(TBB
).addImm(ARMPred
).addReg(ARM::CPSR
);
1269 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1272 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1274 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1275 (isLoadTypeLegal(TI
->getOperand(0)->getType(), SourceVT
))) {
1276 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1277 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1278 OpReg
= constrainOperandRegClass(TII
.get(TstOpc
), OpReg
, 0);
1279 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1281 .addReg(OpReg
).addImm(1));
1283 unsigned CCMode
= ARMCC::NE
;
1284 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1285 std::swap(TBB
, FBB
);
1289 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1290 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1291 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1293 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1296 } else if (const ConstantInt
*CI
=
1297 dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1298 uint64_t Imm
= CI
->getZExtValue();
1299 MachineBasicBlock
*Target
= (Imm
== 0) ? FBB
: TBB
;
1300 fastEmitBranch(Target
, DbgLoc
);
1304 unsigned CmpReg
= getRegForValue(BI
->getCondition());
1305 if (CmpReg
== 0) return false;
1307 // We've been divorced from our compare! Our block was split, and
1308 // now our compare lives in a predecessor block. We musn't
1309 // re-compare here, as the children of the compare aren't guaranteed
1310 // live across the block boundary (we *could* check for this).
1311 // Regardless, the compare has been done in the predecessor block,
1312 // and it left a value for us in a virtual register. Ergo, we test
1313 // the one-bit value left in the virtual register.
1314 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1315 CmpReg
= constrainOperandRegClass(TII
.get(TstOpc
), CmpReg
, 0);
1317 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1321 unsigned CCMode
= ARMCC::NE
;
1322 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1323 std::swap(TBB
, FBB
);
1327 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1328 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1329 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1330 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1334 bool ARMFastISel::SelectIndirectBr(const Instruction
*I
) {
1335 unsigned AddrReg
= getRegForValue(I
->getOperand(0));
1336 if (AddrReg
== 0) return false;
1338 unsigned Opc
= isThumb2
? ARM::tBRIND
: ARM::BX
;
1339 assert(isThumb2
|| Subtarget
->hasV4TOps());
1341 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1342 TII
.get(Opc
)).addReg(AddrReg
));
1344 const IndirectBrInst
*IB
= cast
<IndirectBrInst
>(I
);
1345 for (const BasicBlock
*SuccBB
: IB
->successors())
1346 FuncInfo
.MBB
->addSuccessor(FuncInfo
.MBBMap
[SuccBB
]);
1351 bool ARMFastISel::ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
1352 bool isZExt
, bool isEquality
) {
1353 Type
*Ty
= Src1Value
->getType();
1354 EVT SrcEVT
= TLI
.getValueType(DL
, Ty
, true);
1355 if (!SrcEVT
.isSimple()) return false;
1356 MVT SrcVT
= SrcEVT
.getSimpleVT();
1358 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1361 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1364 // Check to see if the 2nd operand is a constant that we can encode directly
1367 bool UseImm
= false;
1368 bool isNegativeImm
= false;
1369 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1370 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1371 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Src2Value
)) {
1372 if (SrcVT
== MVT::i32
|| SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
||
1374 const APInt
&CIVal
= ConstInt
->getValue();
1375 Imm
= (isZExt
) ? (int)CIVal
.getZExtValue() : (int)CIVal
.getSExtValue();
1376 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1377 // then a cmn, because there is no way to represent 2147483648 as a
1378 // signed 32-bit int.
1379 if (Imm
< 0 && Imm
!= (int)0x80000000) {
1380 isNegativeImm
= true;
1383 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1384 (ARM_AM::getSOImmVal(Imm
) != -1);
1386 } else if (const ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Src2Value
)) {
1387 if (SrcVT
== MVT::f32
|| SrcVT
== MVT::f64
)
1388 if (ConstFP
->isZero() && !ConstFP
->isNegative())
1394 bool needsExt
= false;
1395 switch (SrcVT
.SimpleTy
) {
1396 default: return false;
1397 // TODO: Verify compares.
1400 // Equality comparisons shouldn't raise Invalid on uordered inputs.
1402 CmpOpc
= UseImm
? ARM::VCMPZS
: ARM::VCMPS
;
1404 CmpOpc
= UseImm
? ARM::VCMPEZS
: ARM::VCMPES
;
1408 // Equality comparisons shouldn't raise Invalid on uordered inputs.
1410 CmpOpc
= UseImm
? ARM::VCMPZD
: ARM::VCMPD
;
1412 CmpOpc
= UseImm
? ARM::VCMPEZD
: ARM::VCMPED
;
1422 CmpOpc
= ARM::t2CMPrr
;
1424 CmpOpc
= isNegativeImm
? ARM::t2CMNri
: ARM::t2CMPri
;
1427 CmpOpc
= ARM::CMPrr
;
1429 CmpOpc
= isNegativeImm
? ARM::CMNri
: ARM::CMPri
;
1434 unsigned SrcReg1
= getRegForValue(Src1Value
);
1435 if (SrcReg1
== 0) return false;
1437 unsigned SrcReg2
= 0;
1439 SrcReg2
= getRegForValue(Src2Value
);
1440 if (SrcReg2
== 0) return false;
1443 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1445 SrcReg1
= ARMEmitIntExt(SrcVT
, SrcReg1
, MVT::i32
, isZExt
);
1446 if (SrcReg1
== 0) return false;
1448 SrcReg2
= ARMEmitIntExt(SrcVT
, SrcReg2
, MVT::i32
, isZExt
);
1449 if (SrcReg2
== 0) return false;
1453 const MCInstrDesc
&II
= TII
.get(CmpOpc
);
1454 SrcReg1
= constrainOperandRegClass(II
, SrcReg1
, 0);
1456 SrcReg2
= constrainOperandRegClass(II
, SrcReg2
, 1);
1457 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1458 .addReg(SrcReg1
).addReg(SrcReg2
));
1460 MachineInstrBuilder MIB
;
1461 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1464 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1467 AddOptionalDefs(MIB
);
1470 // For floating point we need to move the result to a comparison register
1471 // that we can then use for branches.
1472 if (Ty
->isFloatTy() || Ty
->isDoubleTy())
1473 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1474 TII
.get(ARM::FMSTAT
)));
1478 bool ARMFastISel::SelectCmp(const Instruction
*I
) {
1479 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1481 // Get the compare predicate.
1482 ARMCC::CondCodes ARMPred
= getComparePred(CI
->getPredicate());
1484 // We may not handle every CC for now.
1485 if (ARMPred
== ARMCC::AL
) return false;
1487 // Emit the compare.
1488 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned(),
1492 // Now set a register based on the comparison. Explicitly set the predicates
1494 unsigned MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1495 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
1496 : &ARM::GPRRegClass
;
1497 unsigned DestReg
= createResultReg(RC
);
1498 Constant
*Zero
= ConstantInt::get(Type::getInt32Ty(*Context
), 0);
1499 unsigned ZeroReg
= fastMaterializeConstant(Zero
);
1500 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1501 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
), DestReg
)
1502 .addReg(ZeroReg
).addImm(1)
1503 .addImm(ARMPred
).addReg(ARM::CPSR
);
1505 updateValueMap(I
, DestReg
);
1509 bool ARMFastISel::SelectFPExt(const Instruction
*I
) {
1510 // Make sure we have VFP and that we're extending float to double.
1511 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1513 Value
*V
= I
->getOperand(0);
1514 if (!I
->getType()->isDoubleTy() ||
1515 !V
->getType()->isFloatTy()) return false;
1517 unsigned Op
= getRegForValue(V
);
1518 if (Op
== 0) return false;
1520 unsigned Result
= createResultReg(&ARM::DPRRegClass
);
1521 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1522 TII
.get(ARM::VCVTDS
), Result
)
1524 updateValueMap(I
, Result
);
1528 bool ARMFastISel::SelectFPTrunc(const Instruction
*I
) {
1529 // Make sure we have VFP and that we're truncating double to float.
1530 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1532 Value
*V
= I
->getOperand(0);
1533 if (!(I
->getType()->isFloatTy() &&
1534 V
->getType()->isDoubleTy())) return false;
1536 unsigned Op
= getRegForValue(V
);
1537 if (Op
== 0) return false;
1539 unsigned Result
= createResultReg(&ARM::SPRRegClass
);
1540 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1541 TII
.get(ARM::VCVTSD
), Result
)
1543 updateValueMap(I
, Result
);
1547 bool ARMFastISel::SelectIToFP(const Instruction
*I
, bool isSigned
) {
1548 // Make sure we have VFP.
1549 if (!Subtarget
->hasVFP2Base()) return false;
1552 Type
*Ty
= I
->getType();
1553 if (!isTypeLegal(Ty
, DstVT
))
1556 Value
*Src
= I
->getOperand(0);
1557 EVT SrcEVT
= TLI
.getValueType(DL
, Src
->getType(), true);
1558 if (!SrcEVT
.isSimple())
1560 MVT SrcVT
= SrcEVT
.getSimpleVT();
1561 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
1564 unsigned SrcReg
= getRegForValue(Src
);
1565 if (SrcReg
== 0) return false;
1567 // Handle sign-extension.
1568 if (SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
) {
1569 SrcReg
= ARMEmitIntExt(SrcVT
, SrcReg
, MVT::i32
,
1570 /*isZExt*/!isSigned
);
1571 if (SrcReg
== 0) return false;
1574 // The conversion routine works on fp-reg to fp-reg and the operand above
1575 // was an integer, move it to the fp registers if possible.
1576 unsigned FP
= ARMMoveToFPReg(MVT::f32
, SrcReg
);
1577 if (FP
== 0) return false;
1580 if (Ty
->isFloatTy()) Opc
= isSigned
? ARM::VSITOS
: ARM::VUITOS
;
1581 else if (Ty
->isDoubleTy() && Subtarget
->hasFP64())
1582 Opc
= isSigned
? ARM::VSITOD
: ARM::VUITOD
;
1585 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(DstVT
));
1586 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1587 TII
.get(Opc
), ResultReg
).addReg(FP
));
1588 updateValueMap(I
, ResultReg
);
1592 bool ARMFastISel::SelectFPToI(const Instruction
*I
, bool isSigned
) {
1593 // Make sure we have VFP.
1594 if (!Subtarget
->hasVFP2Base()) return false;
1597 Type
*RetTy
= I
->getType();
1598 if (!isTypeLegal(RetTy
, DstVT
))
1601 unsigned Op
= getRegForValue(I
->getOperand(0));
1602 if (Op
== 0) return false;
1605 Type
*OpTy
= I
->getOperand(0)->getType();
1606 if (OpTy
->isFloatTy()) Opc
= isSigned
? ARM::VTOSIZS
: ARM::VTOUIZS
;
1607 else if (OpTy
->isDoubleTy() && Subtarget
->hasFP64())
1608 Opc
= isSigned
? ARM::VTOSIZD
: ARM::VTOUIZD
;
1611 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1612 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1613 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1614 TII
.get(Opc
), ResultReg
).addReg(Op
));
1616 // This result needs to be in an integer register, but the conversion only
1617 // takes place in fp-regs.
1618 unsigned IntReg
= ARMMoveToIntReg(DstVT
, ResultReg
);
1619 if (IntReg
== 0) return false;
1621 updateValueMap(I
, IntReg
);
1625 bool ARMFastISel::SelectSelect(const Instruction
*I
) {
1627 if (!isTypeLegal(I
->getType(), VT
))
1630 // Things need to be register sized for register moves.
1631 if (VT
!= MVT::i32
) return false;
1633 unsigned CondReg
= getRegForValue(I
->getOperand(0));
1634 if (CondReg
== 0) return false;
1635 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1636 if (Op1Reg
== 0) return false;
1638 // Check to see if we can use an immediate in the conditional move.
1640 bool UseImm
= false;
1641 bool isNegativeImm
= false;
1642 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(I
->getOperand(2))) {
1643 assert(VT
== MVT::i32
&& "Expecting an i32.");
1644 Imm
= (int)ConstInt
->getValue().getZExtValue();
1646 isNegativeImm
= true;
1649 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1650 (ARM_AM::getSOImmVal(Imm
) != -1);
1653 unsigned Op2Reg
= 0;
1655 Op2Reg
= getRegForValue(I
->getOperand(2));
1656 if (Op2Reg
== 0) return false;
1659 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1660 CondReg
= constrainOperandRegClass(TII
.get(TstOpc
), CondReg
, 0);
1662 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1667 const TargetRegisterClass
*RC
;
1669 RC
= isThumb2
? &ARM::tGPRRegClass
: &ARM::GPRRegClass
;
1670 MovCCOpc
= isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
;
1672 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRRegClass
;
1674 MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1676 MovCCOpc
= isThumb2
? ARM::t2MVNCCi
: ARM::MVNCCi
;
1678 unsigned ResultReg
= createResultReg(RC
);
1680 Op2Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op2Reg
, 1);
1681 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 2);
1682 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1689 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 1);
1690 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1697 updateValueMap(I
, ResultReg
);
1701 bool ARMFastISel::SelectDiv(const Instruction
*I
, bool isSigned
) {
1703 Type
*Ty
= I
->getType();
1704 if (!isTypeLegal(Ty
, VT
))
1707 // If we have integer div support we should have selected this automagically.
1708 // In case we have a real miss go ahead and return false and we'll pick
1710 if (Subtarget
->hasDivideInThumbMode())
1713 // Otherwise emit a libcall.
1714 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1716 LC
= isSigned
? RTLIB::SDIV_I8
: RTLIB::UDIV_I8
;
1717 else if (VT
== MVT::i16
)
1718 LC
= isSigned
? RTLIB::SDIV_I16
: RTLIB::UDIV_I16
;
1719 else if (VT
== MVT::i32
)
1720 LC
= isSigned
? RTLIB::SDIV_I32
: RTLIB::UDIV_I32
;
1721 else if (VT
== MVT::i64
)
1722 LC
= isSigned
? RTLIB::SDIV_I64
: RTLIB::UDIV_I64
;
1723 else if (VT
== MVT::i128
)
1724 LC
= isSigned
? RTLIB::SDIV_I128
: RTLIB::UDIV_I128
;
1725 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SDIV!");
1727 return ARMEmitLibcall(I
, LC
);
1730 bool ARMFastISel::SelectRem(const Instruction
*I
, bool isSigned
) {
1732 Type
*Ty
= I
->getType();
1733 if (!isTypeLegal(Ty
, VT
))
1736 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1737 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1738 // multi-reg returns, we'll have to bail out.
1739 if (!TLI
.hasStandaloneRem(VT
)) {
1743 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1745 LC
= isSigned
? RTLIB::SREM_I8
: RTLIB::UREM_I8
;
1746 else if (VT
== MVT::i16
)
1747 LC
= isSigned
? RTLIB::SREM_I16
: RTLIB::UREM_I16
;
1748 else if (VT
== MVT::i32
)
1749 LC
= isSigned
? RTLIB::SREM_I32
: RTLIB::UREM_I32
;
1750 else if (VT
== MVT::i64
)
1751 LC
= isSigned
? RTLIB::SREM_I64
: RTLIB::UREM_I64
;
1752 else if (VT
== MVT::i128
)
1753 LC
= isSigned
? RTLIB::SREM_I128
: RTLIB::UREM_I128
;
1754 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SREM!");
1756 return ARMEmitLibcall(I
, LC
);
1759 bool ARMFastISel::SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
) {
1760 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
1762 // We can get here in the case when we have a binary operation on a non-legal
1763 // type and the target independent selector doesn't know how to handle it.
1764 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
1768 switch (ISDOpcode
) {
1769 default: return false;
1771 Opc
= isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
;
1774 Opc
= isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
;
1777 Opc
= isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
;
1781 unsigned SrcReg1
= getRegForValue(I
->getOperand(0));
1782 if (SrcReg1
== 0) return false;
1784 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1785 // in the instruction, rather then materializing the value in a register.
1786 unsigned SrcReg2
= getRegForValue(I
->getOperand(1));
1787 if (SrcReg2
== 0) return false;
1789 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
1790 SrcReg1
= constrainOperandRegClass(TII
.get(Opc
), SrcReg1
, 1);
1791 SrcReg2
= constrainOperandRegClass(TII
.get(Opc
), SrcReg2
, 2);
1792 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1793 TII
.get(Opc
), ResultReg
)
1794 .addReg(SrcReg1
).addReg(SrcReg2
));
1795 updateValueMap(I
, ResultReg
);
1799 bool ARMFastISel::SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
) {
1800 EVT FPVT
= TLI
.getValueType(DL
, I
->getType(), true);
1801 if (!FPVT
.isSimple()) return false;
1802 MVT VT
= FPVT
.getSimpleVT();
1804 // FIXME: Support vector types where possible.
1808 // We can get here in the case when we want to use NEON for our fp
1809 // operations, but can't figure out how to. Just use the vfp instructions
1811 // FIXME: It'd be nice to use NEON instructions.
1812 Type
*Ty
= I
->getType();
1813 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1815 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1819 bool is64bit
= VT
== MVT::f64
|| VT
== MVT::i64
;
1820 switch (ISDOpcode
) {
1821 default: return false;
1823 Opc
= is64bit
? ARM::VADDD
: ARM::VADDS
;
1826 Opc
= is64bit
? ARM::VSUBD
: ARM::VSUBS
;
1829 Opc
= is64bit
? ARM::VMULD
: ARM::VMULS
;
1832 unsigned Op1
= getRegForValue(I
->getOperand(0));
1833 if (Op1
== 0) return false;
1835 unsigned Op2
= getRegForValue(I
->getOperand(1));
1836 if (Op2
== 0) return false;
1838 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
.SimpleTy
));
1839 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1840 TII
.get(Opc
), ResultReg
)
1841 .addReg(Op1
).addReg(Op2
));
1842 updateValueMap(I
, ResultReg
);
1846 // Call Handling Code
1848 // This is largely taken directly from CCAssignFnForNode
1849 // TODO: We may not support all of this.
1850 CCAssignFn
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC
,
1855 report_fatal_error("Unsupported calling convention");
1856 case CallingConv::Fast
:
1857 if (Subtarget
->hasVFP2Base() && !isVarArg
) {
1858 if (!Subtarget
->isAAPCS_ABI())
1859 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1860 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1861 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1864 case CallingConv::C
:
1865 case CallingConv::CXX_FAST_TLS
:
1866 // Use target triple & subtarget features to do actual dispatch.
1867 if (Subtarget
->isAAPCS_ABI()) {
1868 if (Subtarget
->hasVFP2Base() &&
1869 TM
.Options
.FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1870 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1872 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1874 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1876 case CallingConv::ARM_AAPCS_VFP
:
1877 case CallingConv::Swift
:
1879 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1880 // Fall through to soft float variant, variadic functions don't
1881 // use hard floating point ABI.
1883 case CallingConv::ARM_AAPCS
:
1884 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1885 case CallingConv::ARM_APCS
:
1886 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1887 case CallingConv::GHC
:
1889 report_fatal_error("Can't return in GHC call convention");
1891 return CC_ARM_APCS_GHC
;
1895 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
1896 SmallVectorImpl
<unsigned> &ArgRegs
,
1897 SmallVectorImpl
<MVT
> &ArgVTs
,
1898 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
1899 SmallVectorImpl
<unsigned> &RegArgs
,
1903 SmallVector
<CCValAssign
, 16> ArgLocs
;
1904 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, ArgLocs
, *Context
);
1905 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
,
1906 CCAssignFnForCall(CC
, false, isVarArg
));
1908 // Check that we can handle all of the arguments. If we can't, then bail out
1909 // now before we add code to the MBB.
1910 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1911 CCValAssign
&VA
= ArgLocs
[i
];
1912 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1914 // We don't handle NEON/vector parameters yet.
1915 if (ArgVT
.isVector() || ArgVT
.getSizeInBits() > 64)
1918 // Now copy/store arg to correct locations.
1919 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1921 } else if (VA
.needsCustom()) {
1922 // TODO: We need custom lowering for vector (v2f64) args.
1923 if (VA
.getLocVT() != MVT::f64
||
1924 // TODO: Only handle register args for now.
1925 !VA
.isRegLoc() || !ArgLocs
[++i
].isRegLoc())
1928 switch (ArgVT
.SimpleTy
) {
1937 if (!Subtarget
->hasVFP2Base())
1941 if (!Subtarget
->hasVFP2Base())
1948 // At the point, we are able to handle the call's arguments in fast isel.
1950 // Get a count of how many bytes are to be pushed on the stack.
1951 NumBytes
= CCInfo
.getNextStackOffset();
1953 // Issue CALLSEQ_START
1954 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1955 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1956 TII
.get(AdjStackDown
))
1957 .addImm(NumBytes
).addImm(0));
1959 // Process the args.
1960 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1961 CCValAssign
&VA
= ArgLocs
[i
];
1962 const Value
*ArgVal
= Args
[VA
.getValNo()];
1963 unsigned Arg
= ArgRegs
[VA
.getValNo()];
1964 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1966 assert((!ArgVT
.isVector() && ArgVT
.getSizeInBits() <= 64) &&
1967 "We don't handle NEON/vector parameters yet.");
1969 // Handle arg promotion, etc.
1970 switch (VA
.getLocInfo()) {
1971 case CCValAssign::Full
: break;
1972 case CCValAssign::SExt
: {
1973 MVT DestVT
= VA
.getLocVT();
1974 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/false);
1975 assert(Arg
!= 0 && "Failed to emit a sext");
1979 case CCValAssign::AExt
:
1980 // Intentional fall-through. Handle AExt and ZExt.
1981 case CCValAssign::ZExt
: {
1982 MVT DestVT
= VA
.getLocVT();
1983 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/true);
1984 assert(Arg
!= 0 && "Failed to emit a zext");
1988 case CCValAssign::BCvt
: {
1989 unsigned BC
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, Arg
,
1990 /*TODO: Kill=*/false);
1991 assert(BC
!= 0 && "Failed to emit a bitcast!");
1993 ArgVT
= VA
.getLocVT();
1996 default: llvm_unreachable("Unknown arg promotion!");
1999 // Now copy/store arg to correct locations.
2000 if (VA
.isRegLoc() && !VA
.needsCustom()) {
2001 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2002 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(Arg
);
2003 RegArgs
.push_back(VA
.getLocReg());
2004 } else if (VA
.needsCustom()) {
2005 // TODO: We need custom lowering for vector (v2f64) args.
2006 assert(VA
.getLocVT() == MVT::f64
&&
2007 "Custom lowering for v2f64 args not available");
2009 // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2010 CCValAssign
&NextVA
= ArgLocs
[++i
];
2012 assert(VA
.isRegLoc() && NextVA
.isRegLoc() &&
2013 "We only handle register args!");
2015 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2016 TII
.get(ARM::VMOVRRD
), VA
.getLocReg())
2017 .addReg(NextVA
.getLocReg(), RegState::Define
)
2019 RegArgs
.push_back(VA
.getLocReg());
2020 RegArgs
.push_back(NextVA
.getLocReg());
2022 assert(VA
.isMemLoc());
2023 // Need to store on the stack.
2025 // Don't emit stores for undef values.
2026 if (isa
<UndefValue
>(ArgVal
))
2030 Addr
.BaseType
= Address::RegBase
;
2031 Addr
.Base
.Reg
= ARM::SP
;
2032 Addr
.Offset
= VA
.getLocMemOffset();
2034 bool EmitRet
= ARMEmitStore(ArgVT
, Arg
, Addr
); (void)EmitRet
;
2035 assert(EmitRet
&& "Could not emit a store for argument!");
2042 bool ARMFastISel::FinishCall(MVT RetVT
, SmallVectorImpl
<unsigned> &UsedRegs
,
2043 const Instruction
*I
, CallingConv::ID CC
,
2044 unsigned &NumBytes
, bool isVarArg
) {
2045 // Issue CALLSEQ_END
2046 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
2047 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2048 TII
.get(AdjStackUp
))
2049 .addImm(NumBytes
).addImm(0));
2051 // Now the return value.
2052 if (RetVT
!= MVT::isVoid
) {
2053 SmallVector
<CCValAssign
, 16> RVLocs
;
2054 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2055 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2057 // Copy all of the result registers out of their specified physreg.
2058 if (RVLocs
.size() == 2 && RetVT
== MVT::f64
) {
2059 // For this move we copy into two registers and then move into the
2060 // double fp reg we want.
2061 MVT DestVT
= RVLocs
[0].getValVT();
2062 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(DestVT
);
2063 unsigned ResultReg
= createResultReg(DstRC
);
2064 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2065 TII
.get(ARM::VMOVDRR
), ResultReg
)
2066 .addReg(RVLocs
[0].getLocReg())
2067 .addReg(RVLocs
[1].getLocReg()));
2069 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2070 UsedRegs
.push_back(RVLocs
[1].getLocReg());
2072 // Finally update the result.
2073 updateValueMap(I
, ResultReg
);
2075 assert(RVLocs
.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2076 MVT CopyVT
= RVLocs
[0].getValVT();
2078 // Special handling for extended integers.
2079 if (RetVT
== MVT::i1
|| RetVT
== MVT::i8
|| RetVT
== MVT::i16
)
2082 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(CopyVT
);
2084 unsigned ResultReg
= createResultReg(DstRC
);
2085 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2086 TII
.get(TargetOpcode::COPY
),
2087 ResultReg
).addReg(RVLocs
[0].getLocReg());
2088 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2090 // Finally update the result.
2091 updateValueMap(I
, ResultReg
);
2098 bool ARMFastISel::SelectRet(const Instruction
*I
) {
2099 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
2100 const Function
&F
= *I
->getParent()->getParent();
2102 if (!FuncInfo
.CanLowerReturn
)
2105 if (TLI
.supportSwiftError() &&
2106 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
2109 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
2112 // Build a list of return value registers.
2113 SmallVector
<unsigned, 4> RetRegs
;
2115 CallingConv::ID CC
= F
.getCallingConv();
2116 if (Ret
->getNumOperands() > 0) {
2117 SmallVector
<ISD::OutputArg
, 4> Outs
;
2118 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
2120 // Analyze operands of the call, assigning locations to each operand.
2121 SmallVector
<CCValAssign
, 16> ValLocs
;
2122 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
2123 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForCall(CC
, true /* is Ret */,
2126 const Value
*RV
= Ret
->getOperand(0);
2127 unsigned Reg
= getRegForValue(RV
);
2131 // Only handle a single return value for now.
2132 if (ValLocs
.size() != 1)
2135 CCValAssign
&VA
= ValLocs
[0];
2137 // Don't bother handling odd stuff for now.
2138 if (VA
.getLocInfo() != CCValAssign::Full
)
2140 // Only handle register returns for now.
2144 unsigned SrcReg
= Reg
+ VA
.getValNo();
2145 EVT RVEVT
= TLI
.getValueType(DL
, RV
->getType());
2146 if (!RVEVT
.isSimple()) return false;
2147 MVT RVVT
= RVEVT
.getSimpleVT();
2148 MVT DestVT
= VA
.getValVT();
2149 // Special handling for extended integers.
2150 if (RVVT
!= DestVT
) {
2151 if (RVVT
!= MVT::i1
&& RVVT
!= MVT::i8
&& RVVT
!= MVT::i16
)
2154 assert(DestVT
== MVT::i32
&& "ARM should always ext to i32");
2156 // Perform extension if flagged as either zext or sext. Otherwise, do
2158 if (Outs
[0].Flags
.isZExt() || Outs
[0].Flags
.isSExt()) {
2159 SrcReg
= ARMEmitIntExt(RVVT
, SrcReg
, DestVT
, Outs
[0].Flags
.isZExt());
2160 if (SrcReg
== 0) return false;
2165 unsigned DstReg
= VA
.getLocReg();
2166 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
2167 // Avoid a cross-class copy. This is very unlikely.
2168 if (!SrcRC
->contains(DstReg
))
2170 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2171 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
2173 // Add register to return instruction.
2174 RetRegs
.push_back(VA
.getLocReg());
2177 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2178 TII
.get(Subtarget
->getReturnOpcode()));
2179 AddOptionalDefs(MIB
);
2180 for (unsigned R
: RetRegs
)
2181 MIB
.addReg(R
, RegState::Implicit
);
2185 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg
) {
2187 return isThumb2
? ARM::tBLXr
: ARM::BLX
;
2189 return isThumb2
? ARM::tBL
: ARM::BL
;
2192 unsigned ARMFastISel::getLibcallReg(const Twine
&Name
) {
2193 // Manually compute the global's type to avoid building it when unnecessary.
2194 Type
*GVTy
= Type::getInt32PtrTy(*Context
, /*AS=*/0);
2195 EVT LCREVT
= TLI
.getValueType(DL
, GVTy
);
2196 if (!LCREVT
.isSimple()) return 0;
2198 GlobalValue
*GV
= new GlobalVariable(M
, Type::getInt32Ty(*Context
), false,
2199 GlobalValue::ExternalLinkage
, nullptr,
2201 assert(GV
->getType() == GVTy
&& "We miscomputed the type for the global!");
2202 return ARMMaterializeGV(GV
, LCREVT
.getSimpleVT());
2205 // A quick function that will emit a call for a named libcall in F with the
2206 // vector of passed arguments for the Instruction in I. We can assume that we
2207 // can emit a call for any libcall we can produce. This is an abridged version
2208 // of the full call infrastructure since we won't need to worry about things
2209 // like computed function pointers or strange arguments at call sites.
2210 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2212 bool ARMFastISel::ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
) {
2213 CallingConv::ID CC
= TLI
.getLibcallCallingConv(Call
);
2215 // Handle *simple* calls for now.
2216 Type
*RetTy
= I
->getType();
2218 if (RetTy
->isVoidTy())
2219 RetVT
= MVT::isVoid
;
2220 else if (!isTypeLegal(RetTy
, RetVT
))
2223 // Can't handle non-double multi-reg retvals.
2224 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i32
) {
2225 SmallVector
<CCValAssign
, 16> RVLocs
;
2226 CCState
CCInfo(CC
, false, *FuncInfo
.MF
, RVLocs
, *Context
);
2227 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, false));
2228 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2232 // Set up the argument vectors.
2233 SmallVector
<Value
*, 8> Args
;
2234 SmallVector
<unsigned, 8> ArgRegs
;
2235 SmallVector
<MVT
, 8> ArgVTs
;
2236 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2237 Args
.reserve(I
->getNumOperands());
2238 ArgRegs
.reserve(I
->getNumOperands());
2239 ArgVTs
.reserve(I
->getNumOperands());
2240 ArgFlags
.reserve(I
->getNumOperands());
2241 for (Value
*Op
: I
->operands()) {
2242 unsigned Arg
= getRegForValue(Op
);
2243 if (Arg
== 0) return false;
2245 Type
*ArgTy
= Op
->getType();
2247 if (!isTypeLegal(ArgTy
, ArgVT
)) return false;
2249 ISD::ArgFlagsTy Flags
;
2250 unsigned OriginalAlignment
= DL
.getABITypeAlignment(ArgTy
);
2251 Flags
.setOrigAlign(OriginalAlignment
);
2254 ArgRegs
.push_back(Arg
);
2255 ArgVTs
.push_back(ArgVT
);
2256 ArgFlags
.push_back(Flags
);
2259 // Handle the arguments now that we've gotten them.
2260 SmallVector
<unsigned, 4> RegArgs
;
2262 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2263 RegArgs
, CC
, NumBytes
, false))
2266 unsigned CalleeReg
= 0;
2267 if (Subtarget
->genLongCalls()) {
2268 CalleeReg
= getLibcallReg(TLI
.getLibcallName(Call
));
2269 if (CalleeReg
== 0) return false;
2273 unsigned CallOpc
= ARMSelectCallOp(Subtarget
->genLongCalls());
2274 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2275 DbgLoc
, TII
.get(CallOpc
));
2276 // BL / BLX don't take a predicate, but tBL / tBLX do.
2278 MIB
.add(predOps(ARMCC::AL
));
2279 if (Subtarget
->genLongCalls())
2280 MIB
.addReg(CalleeReg
);
2282 MIB
.addExternalSymbol(TLI
.getLibcallName(Call
));
2284 // Add implicit physical register uses to the call.
2285 for (unsigned R
: RegArgs
)
2286 MIB
.addReg(R
, RegState::Implicit
);
2288 // Add a register mask with the call-preserved registers.
2289 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2290 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2292 // Finish off the call including any return values.
2293 SmallVector
<unsigned, 4> UsedRegs
;
2294 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, false)) return false;
2296 // Set all unused physreg defs as dead.
2297 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2302 bool ARMFastISel::SelectCall(const Instruction
*I
,
2303 const char *IntrMemName
= nullptr) {
2304 const CallInst
*CI
= cast
<CallInst
>(I
);
2305 const Value
*Callee
= CI
->getCalledValue();
2307 // Can't handle inline asm.
2308 if (isa
<InlineAsm
>(Callee
)) return false;
2310 // Allow SelectionDAG isel to handle tail calls.
2311 if (CI
->isTailCall()) return false;
2313 // Check the calling convention.
2314 ImmutableCallSite
CS(CI
);
2315 CallingConv::ID CC
= CS
.getCallingConv();
2317 // TODO: Avoid some calling conventions?
2319 FunctionType
*FTy
= CS
.getFunctionType();
2320 bool isVarArg
= FTy
->isVarArg();
2322 // Handle *simple* calls for now.
2323 Type
*RetTy
= I
->getType();
2325 if (RetTy
->isVoidTy())
2326 RetVT
= MVT::isVoid
;
2327 else if (!isTypeLegal(RetTy
, RetVT
) && RetVT
!= MVT::i16
&&
2328 RetVT
!= MVT::i8
&& RetVT
!= MVT::i1
)
2331 // Can't handle non-double multi-reg retvals.
2332 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i1
&& RetVT
!= MVT::i8
&&
2333 RetVT
!= MVT::i16
&& RetVT
!= MVT::i32
) {
2334 SmallVector
<CCValAssign
, 16> RVLocs
;
2335 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2336 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2337 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2341 // Set up the argument vectors.
2342 SmallVector
<Value
*, 8> Args
;
2343 SmallVector
<unsigned, 8> ArgRegs
;
2344 SmallVector
<MVT
, 8> ArgVTs
;
2345 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2346 unsigned arg_size
= CS
.arg_size();
2347 Args
.reserve(arg_size
);
2348 ArgRegs
.reserve(arg_size
);
2349 ArgVTs
.reserve(arg_size
);
2350 ArgFlags
.reserve(arg_size
);
2351 for (ImmutableCallSite::arg_iterator i
= CS
.arg_begin(), e
= CS
.arg_end();
2353 // If we're lowering a memory intrinsic instead of a regular call, skip the
2354 // last argument, which shouldn't be passed to the underlying function.
2355 if (IntrMemName
&& e
- i
<= 1)
2358 ISD::ArgFlagsTy Flags
;
2359 unsigned ArgIdx
= i
- CS
.arg_begin();
2360 if (CS
.paramHasAttr(ArgIdx
, Attribute::SExt
))
2362 if (CS
.paramHasAttr(ArgIdx
, Attribute::ZExt
))
2365 // FIXME: Only handle *easy* calls for now.
2366 if (CS
.paramHasAttr(ArgIdx
, Attribute::InReg
) ||
2367 CS
.paramHasAttr(ArgIdx
, Attribute::StructRet
) ||
2368 CS
.paramHasAttr(ArgIdx
, Attribute::SwiftSelf
) ||
2369 CS
.paramHasAttr(ArgIdx
, Attribute::SwiftError
) ||
2370 CS
.paramHasAttr(ArgIdx
, Attribute::Nest
) ||
2371 CS
.paramHasAttr(ArgIdx
, Attribute::ByVal
))
2374 Type
*ArgTy
= (*i
)->getType();
2376 if (!isTypeLegal(ArgTy
, ArgVT
) && ArgVT
!= MVT::i16
&& ArgVT
!= MVT::i8
&&
2380 unsigned Arg
= getRegForValue(*i
);
2384 unsigned OriginalAlignment
= DL
.getABITypeAlignment(ArgTy
);
2385 Flags
.setOrigAlign(OriginalAlignment
);
2388 ArgRegs
.push_back(Arg
);
2389 ArgVTs
.push_back(ArgVT
);
2390 ArgFlags
.push_back(Flags
);
2393 // Handle the arguments now that we've gotten them.
2394 SmallVector
<unsigned, 4> RegArgs
;
2396 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2397 RegArgs
, CC
, NumBytes
, isVarArg
))
2400 bool UseReg
= false;
2401 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Callee
);
2402 if (!GV
|| Subtarget
->genLongCalls()) UseReg
= true;
2404 unsigned CalleeReg
= 0;
2407 CalleeReg
= getLibcallReg(IntrMemName
);
2409 CalleeReg
= getRegForValue(Callee
);
2411 if (CalleeReg
== 0) return false;
2415 unsigned CallOpc
= ARMSelectCallOp(UseReg
);
2416 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2417 DbgLoc
, TII
.get(CallOpc
));
2419 // ARM calls don't take a predicate, but tBL / tBLX do.
2421 MIB
.add(predOps(ARMCC::AL
));
2423 MIB
.addReg(CalleeReg
);
2424 else if (!IntrMemName
)
2425 MIB
.addGlobalAddress(GV
, 0, 0);
2427 MIB
.addExternalSymbol(IntrMemName
, 0);
2429 // Add implicit physical register uses to the call.
2430 for (unsigned R
: RegArgs
)
2431 MIB
.addReg(R
, RegState::Implicit
);
2433 // Add a register mask with the call-preserved registers.
2434 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2435 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2437 // Finish off the call including any return values.
2438 SmallVector
<unsigned, 4> UsedRegs
;
2439 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, isVarArg
))
2442 // Set all unused physreg defs as dead.
2443 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2448 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len
) {
2452 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest
, Address Src
,
2453 uint64_t Len
, unsigned Alignment
) {
2454 // Make sure we don't bloat code by inlining very large memcpy's.
2455 if (!ARMIsMemCpySmall(Len
))
2460 if (!Alignment
|| Alignment
>= 4) {
2466 assert(Len
== 1 && "Expected a length of 1!");
2470 // Bound based on alignment.
2471 if (Len
>= 2 && Alignment
== 2)
2480 RV
= ARMEmitLoad(VT
, ResultReg
, Src
);
2481 assert(RV
&& "Should be able to handle this load.");
2482 RV
= ARMEmitStore(VT
, ResultReg
, Dest
);
2483 assert(RV
&& "Should be able to handle this store.");
2486 unsigned Size
= VT
.getSizeInBits()/8;
2488 Dest
.Offset
+= Size
;
2495 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst
&I
) {
2496 // FIXME: Handle more intrinsics.
2497 switch (I
.getIntrinsicID()) {
2498 default: return false;
2499 case Intrinsic::frameaddress
: {
2500 MachineFrameInfo
&MFI
= FuncInfo
.MF
->getFrameInfo();
2501 MFI
.setFrameAddressIsTaken(true);
2503 unsigned LdrOpc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
2504 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
2505 : &ARM::GPRRegClass
;
2507 const ARMBaseRegisterInfo
*RegInfo
=
2508 static_cast<const ARMBaseRegisterInfo
*>(Subtarget
->getRegisterInfo());
2509 unsigned FramePtr
= RegInfo
->getFrameRegister(*(FuncInfo
.MF
));
2510 unsigned SrcReg
= FramePtr
;
2512 // Recursively load frame address
2518 unsigned Depth
= cast
<ConstantInt
>(I
.getOperand(0))->getZExtValue();
2520 DestReg
= createResultReg(RC
);
2521 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2522 TII
.get(LdrOpc
), DestReg
)
2523 .addReg(SrcReg
).addImm(0));
2526 updateValueMap(&I
, SrcReg
);
2529 case Intrinsic::memcpy
:
2530 case Intrinsic::memmove
: {
2531 const MemTransferInst
&MTI
= cast
<MemTransferInst
>(I
);
2532 // Don't handle volatile.
2533 if (MTI
.isVolatile())
2536 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2537 // we would emit dead code because we don't currently handle memmoves.
2538 bool isMemCpy
= (I
.getIntrinsicID() == Intrinsic::memcpy
);
2539 if (isa
<ConstantInt
>(MTI
.getLength()) && isMemCpy
) {
2540 // Small memcpy's are common enough that we want to do them without a call
2542 uint64_t Len
= cast
<ConstantInt
>(MTI
.getLength())->getZExtValue();
2543 if (ARMIsMemCpySmall(Len
)) {
2545 if (!ARMComputeAddress(MTI
.getRawDest(), Dest
) ||
2546 !ARMComputeAddress(MTI
.getRawSource(), Src
))
2548 unsigned Alignment
= MinAlign(MTI
.getDestAlignment(),
2549 MTI
.getSourceAlignment());
2550 if (ARMTryEmitSmallMemCpy(Dest
, Src
, Len
, Alignment
))
2555 if (!MTI
.getLength()->getType()->isIntegerTy(32))
2558 if (MTI
.getSourceAddressSpace() > 255 || MTI
.getDestAddressSpace() > 255)
2561 const char *IntrMemName
= isa
<MemCpyInst
>(I
) ? "memcpy" : "memmove";
2562 return SelectCall(&I
, IntrMemName
);
2564 case Intrinsic::memset
: {
2565 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
2566 // Don't handle volatile.
2567 if (MSI
.isVolatile())
2570 if (!MSI
.getLength()->getType()->isIntegerTy(32))
2573 if (MSI
.getDestAddressSpace() > 255)
2576 return SelectCall(&I
, "memset");
2578 case Intrinsic::trap
: {
2579 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(
2580 Subtarget
->useNaClTrap() ? ARM::TRAPNaCl
: ARM::TRAP
));
2586 bool ARMFastISel::SelectTrunc(const Instruction
*I
) {
2587 // The high bits for a type smaller than the register size are assumed to be
2589 Value
*Op
= I
->getOperand(0);
2592 SrcVT
= TLI
.getValueType(DL
, Op
->getType(), true);
2593 DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2595 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
2597 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
2600 unsigned SrcReg
= getRegForValue(Op
);
2601 if (!SrcReg
) return false;
2603 // Because the high bits are undefined, a truncate doesn't generate
2605 updateValueMap(I
, SrcReg
);
2609 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
,
2611 if (DestVT
!= MVT::i32
&& DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
)
2613 if (SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i1
)
2616 // Table of which combinations can be emitted as a single instruction,
2617 // and which will require two.
2618 static const uint8_t isSingleInstrTbl
[3][2][2][2] = {
2620 // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2621 // ext: s z s z s z s z
2622 /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2623 /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2624 /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2627 // Target registers for:
2628 // - For ARM can never be PC.
2629 // - For 16-bit Thumb are restricted to lower 8 registers.
2630 // - For 32-bit Thumb are restricted to non-SP and non-PC.
2631 static const TargetRegisterClass
*RCTbl
[2][2] = {
2632 // Instructions: Two Single
2633 /* ARM */ { &ARM::GPRnopcRegClass
, &ARM::GPRnopcRegClass
},
2634 /* Thumb */ { &ARM::tGPRRegClass
, &ARM::rGPRRegClass
}
2637 // Table governing the instruction(s) to be emitted.
2638 static const struct InstructionTable
{
2640 uint32_t hasS
: 1; // Some instructions have an S bit, always set it to 0.
2641 uint32_t Shift
: 7; // For shift operand addressing mode, used by MOVsi.
2642 uint32_t Imm
: 8; // All instructions have either a shift or a mask.
2643 } IT
[2][2][3][2] = {
2644 { // Two instructions (first is left shift, second is in this table).
2645 { // ARM Opc S Shift Imm
2646 /* 1 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 31 },
2647 /* 1 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 31 } },
2648 /* 8 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 24 },
2649 /* 8 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 24 } },
2650 /* 16 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 16 },
2651 /* 16 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 16 } }
2653 { // Thumb Opc S Shift Imm
2654 /* 1 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 31 },
2655 /* 1 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 31 } },
2656 /* 8 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 24 },
2657 /* 8 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 24 } },
2658 /* 16 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 16 },
2659 /* 16 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 16 } }
2662 { // Single instruction.
2663 { // ARM Opc S Shift Imm
2664 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2665 /* 1 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 1 } },
2666 /* 8 bit sext */ { { ARM::SXTB
, 0, ARM_AM::no_shift
, 0 },
2667 /* 8 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 255 } },
2668 /* 16 bit sext */ { { ARM::SXTH
, 0, ARM_AM::no_shift
, 0 },
2669 /* 16 bit zext */ { ARM::UXTH
, 0, ARM_AM::no_shift
, 0 } }
2671 { // Thumb Opc S Shift Imm
2672 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2673 /* 1 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 1 } },
2674 /* 8 bit sext */ { { ARM::t2SXTB
, 0, ARM_AM::no_shift
, 0 },
2675 /* 8 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 255 } },
2676 /* 16 bit sext */ { { ARM::t2SXTH
, 0, ARM_AM::no_shift
, 0 },
2677 /* 16 bit zext */ { ARM::t2UXTH
, 0, ARM_AM::no_shift
, 0 } }
2682 unsigned SrcBits
= SrcVT
.getSizeInBits();
2683 unsigned DestBits
= DestVT
.getSizeInBits();
2685 assert((SrcBits
< DestBits
) && "can only extend to larger types");
2686 assert((DestBits
== 32 || DestBits
== 16 || DestBits
== 8) &&
2687 "other sizes unimplemented");
2688 assert((SrcBits
== 16 || SrcBits
== 8 || SrcBits
== 1) &&
2689 "other sizes unimplemented");
2691 bool hasV6Ops
= Subtarget
->hasV6Ops();
2692 unsigned Bitness
= SrcBits
/ 8; // {1,8,16}=>{0,1,2}
2693 assert((Bitness
< 3) && "sanity-check table bounds");
2695 bool isSingleInstr
= isSingleInstrTbl
[Bitness
][isThumb2
][hasV6Ops
][isZExt
];
2696 const TargetRegisterClass
*RC
= RCTbl
[isThumb2
][isSingleInstr
];
2697 const InstructionTable
*ITP
= &IT
[isSingleInstr
][isThumb2
][Bitness
][isZExt
];
2698 unsigned Opc
= ITP
->Opc
;
2699 assert(ARM::KILL
!= Opc
&& "Invalid table entry");
2700 unsigned hasS
= ITP
->hasS
;
2701 ARM_AM::ShiftOpc Shift
= (ARM_AM::ShiftOpc
) ITP
->Shift
;
2702 assert(((Shift
== ARM_AM::no_shift
) == (Opc
!= ARM::MOVsi
)) &&
2703 "only MOVsi has shift operand addressing mode");
2704 unsigned Imm
= ITP
->Imm
;
2706 // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2707 bool setsCPSR
= &ARM::tGPRRegClass
== RC
;
2708 unsigned LSLOpc
= isThumb2
? ARM::tLSLri
: ARM::MOVsi
;
2710 // MOVsi encodes shift and immediate in shift operand addressing mode.
2711 // The following condition has the same value when emitting two
2712 // instruction sequences: both are shifts.
2713 bool ImmIsSO
= (Shift
!= ARM_AM::no_shift
);
2715 // Either one or two instructions are emitted.
2716 // They're always of the form:
2718 // CPSR is set only by 16-bit Thumb instructions.
2719 // Predicate, if any, is AL.
2720 // S bit, if available, is always 0.
2721 // When two are emitted the first's result will feed as the second's input,
2722 // that value is then dead.
2723 unsigned NumInstrsEmitted
= isSingleInstr
? 1 : 2;
2724 for (unsigned Instr
= 0; Instr
!= NumInstrsEmitted
; ++Instr
) {
2725 ResultReg
= createResultReg(RC
);
2726 bool isLsl
= (0 == Instr
) && !isSingleInstr
;
2727 unsigned Opcode
= isLsl
? LSLOpc
: Opc
;
2728 ARM_AM::ShiftOpc ShiftAM
= isLsl
? ARM_AM::lsl
: Shift
;
2729 unsigned ImmEnc
= ImmIsSO
? ARM_AM::getSORegOpc(ShiftAM
, Imm
) : Imm
;
2730 bool isKill
= 1 == Instr
;
2731 MachineInstrBuilder MIB
= BuildMI(
2732 *FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opcode
), ResultReg
);
2734 MIB
.addReg(ARM::CPSR
, RegState::Define
);
2735 SrcReg
= constrainOperandRegClass(TII
.get(Opcode
), SrcReg
, 1 + setsCPSR
);
2736 MIB
.addReg(SrcReg
, isKill
* RegState::Kill
)
2738 .add(predOps(ARMCC::AL
));
2740 MIB
.add(condCodeOp());
2741 // Second instruction consumes the first's result.
2748 bool ARMFastISel::SelectIntExt(const Instruction
*I
) {
2749 // On ARM, in general, integer casts don't involve legal types; this code
2750 // handles promotable integers.
2751 Type
*DestTy
= I
->getType();
2752 Value
*Src
= I
->getOperand(0);
2753 Type
*SrcTy
= Src
->getType();
2755 bool isZExt
= isa
<ZExtInst
>(I
);
2756 unsigned SrcReg
= getRegForValue(Src
);
2757 if (!SrcReg
) return false;
2759 EVT SrcEVT
, DestEVT
;
2760 SrcEVT
= TLI
.getValueType(DL
, SrcTy
, true);
2761 DestEVT
= TLI
.getValueType(DL
, DestTy
, true);
2762 if (!SrcEVT
.isSimple()) return false;
2763 if (!DestEVT
.isSimple()) return false;
2765 MVT SrcVT
= SrcEVT
.getSimpleVT();
2766 MVT DestVT
= DestEVT
.getSimpleVT();
2767 unsigned ResultReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
, isZExt
);
2768 if (ResultReg
== 0) return false;
2769 updateValueMap(I
, ResultReg
);
2773 bool ARMFastISel::SelectShift(const Instruction
*I
,
2774 ARM_AM::ShiftOpc ShiftTy
) {
2775 // We handle thumb2 mode by target independent selector
2776 // or SelectionDAG ISel.
2780 // Only handle i32 now.
2781 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2782 if (DestVT
!= MVT::i32
)
2785 unsigned Opc
= ARM::MOVsr
;
2787 Value
*Src2Value
= I
->getOperand(1);
2788 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Src2Value
)) {
2789 ShiftImm
= CI
->getZExtValue();
2791 // Fall back to selection DAG isel if the shift amount
2792 // is zero or greater than the width of the value type.
2793 if (ShiftImm
== 0 || ShiftImm
>=32)
2799 Value
*Src1Value
= I
->getOperand(0);
2800 unsigned Reg1
= getRegForValue(Src1Value
);
2801 if (Reg1
== 0) return false;
2804 if (Opc
== ARM::MOVsr
) {
2805 Reg2
= getRegForValue(Src2Value
);
2806 if (Reg2
== 0) return false;
2809 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
2810 if(ResultReg
== 0) return false;
2812 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2813 TII
.get(Opc
), ResultReg
)
2816 if (Opc
== ARM::MOVsi
)
2817 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, ShiftImm
));
2818 else if (Opc
== ARM::MOVsr
) {
2820 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, 0));
2823 AddOptionalDefs(MIB
);
2824 updateValueMap(I
, ResultReg
);
2828 // TODO: SoftFP support.
2829 bool ARMFastISel::fastSelectInstruction(const Instruction
*I
) {
2830 switch (I
->getOpcode()) {
2831 case Instruction::Load
:
2832 return SelectLoad(I
);
2833 case Instruction::Store
:
2834 return SelectStore(I
);
2835 case Instruction::Br
:
2836 return SelectBranch(I
);
2837 case Instruction::IndirectBr
:
2838 return SelectIndirectBr(I
);
2839 case Instruction::ICmp
:
2840 case Instruction::FCmp
:
2841 return SelectCmp(I
);
2842 case Instruction::FPExt
:
2843 return SelectFPExt(I
);
2844 case Instruction::FPTrunc
:
2845 return SelectFPTrunc(I
);
2846 case Instruction::SIToFP
:
2847 return SelectIToFP(I
, /*isSigned*/ true);
2848 case Instruction::UIToFP
:
2849 return SelectIToFP(I
, /*isSigned*/ false);
2850 case Instruction::FPToSI
:
2851 return SelectFPToI(I
, /*isSigned*/ true);
2852 case Instruction::FPToUI
:
2853 return SelectFPToI(I
, /*isSigned*/ false);
2854 case Instruction::Add
:
2855 return SelectBinaryIntOp(I
, ISD::ADD
);
2856 case Instruction::Or
:
2857 return SelectBinaryIntOp(I
, ISD::OR
);
2858 case Instruction::Sub
:
2859 return SelectBinaryIntOp(I
, ISD::SUB
);
2860 case Instruction::FAdd
:
2861 return SelectBinaryFPOp(I
, ISD::FADD
);
2862 case Instruction::FSub
:
2863 return SelectBinaryFPOp(I
, ISD::FSUB
);
2864 case Instruction::FMul
:
2865 return SelectBinaryFPOp(I
, ISD::FMUL
);
2866 case Instruction::SDiv
:
2867 return SelectDiv(I
, /*isSigned*/ true);
2868 case Instruction::UDiv
:
2869 return SelectDiv(I
, /*isSigned*/ false);
2870 case Instruction::SRem
:
2871 return SelectRem(I
, /*isSigned*/ true);
2872 case Instruction::URem
:
2873 return SelectRem(I
, /*isSigned*/ false);
2874 case Instruction::Call
:
2875 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2876 return SelectIntrinsicCall(*II
);
2877 return SelectCall(I
);
2878 case Instruction::Select
:
2879 return SelectSelect(I
);
2880 case Instruction::Ret
:
2881 return SelectRet(I
);
2882 case Instruction::Trunc
:
2883 return SelectTrunc(I
);
2884 case Instruction::ZExt
:
2885 case Instruction::SExt
:
2886 return SelectIntExt(I
);
2887 case Instruction::Shl
:
2888 return SelectShift(I
, ARM_AM::lsl
);
2889 case Instruction::LShr
:
2890 return SelectShift(I
, ARM_AM::lsr
);
2891 case Instruction::AShr
:
2892 return SelectShift(I
, ARM_AM::asr
);
2898 // This table describes sign- and zero-extend instructions which can be
2899 // folded into a preceding load. All of these extends have an immediate
2900 // (sometimes a mask and sometimes a shift) that's applied after
2902 static const struct FoldableLoadExtendsStruct
{
2903 uint16_t Opc
[2]; // ARM, Thumb.
2904 uint8_t ExpectedImm
;
2906 uint8_t ExpectedVT
: 7;
2907 } FoldableLoadExtends
[] = {
2908 { { ARM::SXTH
, ARM::t2SXTH
}, 0, 0, MVT::i16
},
2909 { { ARM::UXTH
, ARM::t2UXTH
}, 0, 1, MVT::i16
},
2910 { { ARM::ANDri
, ARM::t2ANDri
}, 255, 1, MVT::i8
},
2911 { { ARM::SXTB
, ARM::t2SXTB
}, 0, 0, MVT::i8
},
2912 { { ARM::UXTB
, ARM::t2UXTB
}, 0, 1, MVT::i8
}
2915 /// The specified machine instr operand is a vreg, and that
2916 /// vreg is being provided by the specified load instruction. If possible,
2917 /// try to fold the load as an operand to the instruction, returning true if
2919 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
2920 const LoadInst
*LI
) {
2921 // Verify we have a legal type before going any further.
2923 if (!isLoadTypeLegal(LI
->getType(), VT
))
2926 // Combine load followed by zero- or sign-extend.
2927 // ldrb r1, [r0] ldrb r1, [r0]
2929 // mov r3, r2 mov r3, r1
2930 if (MI
->getNumOperands() < 3 || !MI
->getOperand(2).isImm())
2932 const uint64_t Imm
= MI
->getOperand(2).getImm();
2936 for (const FoldableLoadExtendsStruct
&FLE
: FoldableLoadExtends
) {
2937 if (FLE
.Opc
[isThumb2
] == MI
->getOpcode() &&
2938 (uint64_t)FLE
.ExpectedImm
== Imm
&&
2939 MVT((MVT::SimpleValueType
)FLE
.ExpectedVT
) == VT
) {
2941 isZExt
= FLE
.isZExt
;
2944 if (!Found
) return false;
2946 // See if we can handle this address.
2948 if (!ARMComputeAddress(LI
->getOperand(0), Addr
)) return false;
2950 unsigned ResultReg
= MI
->getOperand(0).getReg();
2951 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, LI
->getAlignment(), isZExt
, false))
2953 MachineBasicBlock::iterator
I(MI
);
2954 removeDeadCode(I
, std::next(I
));
2958 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue
*GV
,
2959 unsigned Align
, MVT VT
) {
2960 bool UseGOT_PREL
= !TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
);
2962 LLVMContext
*Context
= &MF
->getFunction().getContext();
2963 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2964 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2965 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(
2966 GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
,
2967 UseGOT_PREL
? ARMCP::GOT_PREL
: ARMCP::no_modifier
,
2968 /*AddCurrentAddress=*/UseGOT_PREL
);
2970 unsigned ConstAlign
=
2971 MF
->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context
));
2972 unsigned Idx
= MF
->getConstantPool()->getConstantPoolIndex(CPV
, ConstAlign
);
2973 MachineMemOperand
*CPMMO
=
2974 MF
->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF
),
2975 MachineMemOperand::MOLoad
, 4, 4);
2977 unsigned TempReg
= MF
->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass
);
2978 unsigned Opc
= isThumb2
? ARM::t2LDRpci
: ARM::LDRcp
;
2979 MachineInstrBuilder MIB
=
2980 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), TempReg
)
2981 .addConstantPoolIndex(Idx
)
2982 .addMemOperand(CPMMO
);
2983 if (Opc
== ARM::LDRcp
)
2985 MIB
.add(predOps(ARMCC::AL
));
2987 // Fix the address by adding pc.
2988 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2989 Opc
= Subtarget
->isThumb() ? ARM::tPICADD
: UseGOT_PREL
? ARM::PICLDR
2991 DestReg
= constrainOperandRegClass(TII
.get(Opc
), DestReg
, 0);
2992 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
2994 .addImm(ARMPCLabelIndex
);
2996 if (!Subtarget
->isThumb())
2997 MIB
.add(predOps(ARMCC::AL
));
2999 if (UseGOT_PREL
&& Subtarget
->isThumb()) {
3000 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
3001 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3002 TII
.get(ARM::t2LDRi12
), NewDestReg
)
3005 DestReg
= NewDestReg
;
3006 AddOptionalDefs(MIB
);
3011 bool ARMFastISel::fastLowerArguments() {
3012 if (!FuncInfo
.CanLowerReturn
)
3015 const Function
*F
= FuncInfo
.Fn
;
3019 CallingConv::ID CC
= F
->getCallingConv();
3023 case CallingConv::Fast
:
3024 case CallingConv::C
:
3025 case CallingConv::ARM_AAPCS_VFP
:
3026 case CallingConv::ARM_AAPCS
:
3027 case CallingConv::ARM_APCS
:
3028 case CallingConv::Swift
:
3032 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3033 // which are passed in r0 - r3.
3034 for (const Argument
&Arg
: F
->args()) {
3035 if (Arg
.getArgNo() >= 4)
3038 if (Arg
.hasAttribute(Attribute::InReg
) ||
3039 Arg
.hasAttribute(Attribute::StructRet
) ||
3040 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3041 Arg
.hasAttribute(Attribute::SwiftError
) ||
3042 Arg
.hasAttribute(Attribute::ByVal
))
3045 Type
*ArgTy
= Arg
.getType();
3046 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3049 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3050 if (!ArgVT
.isSimple()) return false;
3051 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3061 static const MCPhysReg GPRArgRegs
[] = {
3062 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
3065 const TargetRegisterClass
*RC
= &ARM::rGPRRegClass
;
3066 for (const Argument
&Arg
: F
->args()) {
3067 unsigned ArgNo
= Arg
.getArgNo();
3068 unsigned SrcReg
= GPRArgRegs
[ArgNo
];
3069 unsigned DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3070 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3071 // Without this, EmitLiveInCopies may eliminate the livein if its only
3072 // use is a bitcast (which isn't turned into an instruction).
3073 unsigned ResultReg
= createResultReg(RC
);
3074 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3075 TII
.get(TargetOpcode::COPY
),
3076 ResultReg
).addReg(DstReg
, getKillRegState(true));
3077 updateValueMap(&Arg
, ResultReg
);
3085 FastISel
*ARM::createFastISel(FunctionLoweringInfo
&funcInfo
,
3086 const TargetLibraryInfo
*libInfo
) {
3087 if (funcInfo
.MF
->getSubtarget
<ARMSubtarget
>().useFastISel())
3088 return new ARMFastISel(funcInfo
, libInfo
);
3093 } // end namespace llvm