1 //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the ARM-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // ARMGenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSubtarget.h"
23 #include "MCTargetDesc/ARMAddressingModes.h"
24 #include "MCTargetDesc/ARMBaseInfo.h"
25 #include "Utils/ARMBaseInfo.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/CallingConvLower.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineInstr.h"
39 #include "llvm/CodeGen/MachineInstrBuilder.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/MachineOperand.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/CodeGen/RuntimeLibcalls.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetOpcodes.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Argument.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/CallSite.h"
52 #include "llvm/IR/CallingConv.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DerivedTypes.h"
57 #include "llvm/IR/Function.h"
58 #include "llvm/IR/GetElementPtrTypeIterator.h"
59 #include "llvm/IR/GlobalValue.h"
60 #include "llvm/IR/GlobalVariable.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/Intrinsics.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/Operator.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/ErrorHandling.h"
76 #include "llvm/Support/MachineValueType.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Target/TargetMachine.h"
79 #include "llvm/Target/TargetOptions.h"
88 // All possible address modes, plus some.
102 // Innocuous defaults for our address.
108 class ARMFastISel final
: public FastISel
{
109 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
110 /// make the right decision when generating code for different targets.
111 const ARMSubtarget
*Subtarget
;
113 const TargetMachine
&TM
;
114 const TargetInstrInfo
&TII
;
115 const TargetLowering
&TLI
;
116 ARMFunctionInfo
*AFI
;
118 // Convenience variables to avoid some queries.
120 LLVMContext
*Context
;
123 explicit ARMFastISel(FunctionLoweringInfo
&funcInfo
,
124 const TargetLibraryInfo
*libInfo
)
125 : FastISel(funcInfo
, libInfo
),
127 &static_cast<const ARMSubtarget
&>(funcInfo
.MF
->getSubtarget())),
128 M(const_cast<Module
&>(*funcInfo
.Fn
->getParent())),
129 TM(funcInfo
.MF
->getTarget()), TII(*Subtarget
->getInstrInfo()),
130 TLI(*Subtarget
->getTargetLowering()) {
131 AFI
= funcInfo
.MF
->getInfo
<ARMFunctionInfo
>();
132 isThumb2
= AFI
->isThumbFunction();
133 Context
= &funcInfo
.Fn
->getContext();
137 // Code from FastISel.cpp.
139 unsigned fastEmitInst_r(unsigned MachineInstOpcode
,
140 const TargetRegisterClass
*RC
,
141 unsigned Op0
, bool Op0IsKill
);
142 unsigned fastEmitInst_rr(unsigned MachineInstOpcode
,
143 const TargetRegisterClass
*RC
,
144 unsigned Op0
, bool Op0IsKill
,
145 unsigned Op1
, bool Op1IsKill
);
146 unsigned fastEmitInst_ri(unsigned MachineInstOpcode
,
147 const TargetRegisterClass
*RC
,
148 unsigned Op0
, bool Op0IsKill
,
150 unsigned fastEmitInst_i(unsigned MachineInstOpcode
,
151 const TargetRegisterClass
*RC
,
154 // Backend specific FastISel code.
156 bool fastSelectInstruction(const Instruction
*I
) override
;
157 unsigned fastMaterializeConstant(const Constant
*C
) override
;
158 unsigned fastMaterializeAlloca(const AllocaInst
*AI
) override
;
159 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
160 const LoadInst
*LI
) override
;
161 bool fastLowerArguments() override
;
163 #include "ARMGenFastISel.inc"
165 // Instruction selection routines.
167 bool SelectLoad(const Instruction
*I
);
168 bool SelectStore(const Instruction
*I
);
169 bool SelectBranch(const Instruction
*I
);
170 bool SelectIndirectBr(const Instruction
*I
);
171 bool SelectCmp(const Instruction
*I
);
172 bool SelectFPExt(const Instruction
*I
);
173 bool SelectFPTrunc(const Instruction
*I
);
174 bool SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
);
175 bool SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
);
176 bool SelectIToFP(const Instruction
*I
, bool isSigned
);
177 bool SelectFPToI(const Instruction
*I
, bool isSigned
);
178 bool SelectDiv(const Instruction
*I
, bool isSigned
);
179 bool SelectRem(const Instruction
*I
, bool isSigned
);
180 bool SelectCall(const Instruction
*I
, const char *IntrMemName
);
181 bool SelectIntrinsicCall(const IntrinsicInst
&I
);
182 bool SelectSelect(const Instruction
*I
);
183 bool SelectRet(const Instruction
*I
);
184 bool SelectTrunc(const Instruction
*I
);
185 bool SelectIntExt(const Instruction
*I
);
186 bool SelectShift(const Instruction
*I
, ARM_AM::ShiftOpc ShiftTy
);
190 bool isPositionIndependent() const;
191 bool isTypeLegal(Type
*Ty
, MVT
&VT
);
192 bool isLoadTypeLegal(Type
*Ty
, MVT
&VT
);
193 bool ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
195 bool ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
196 unsigned Alignment
= 0, bool isZExt
= true,
197 bool allocReg
= true);
198 bool ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
199 unsigned Alignment
= 0);
200 bool ARMComputeAddress(const Value
*Obj
, Address
&Addr
);
201 void ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
);
202 bool ARMIsMemCpySmall(uint64_t Len
);
203 bool ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
,
205 unsigned ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
, bool isZExt
);
206 unsigned ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
);
207 unsigned ARMMaterializeInt(const Constant
*C
, MVT VT
);
208 unsigned ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
);
209 unsigned ARMMoveToFPReg(MVT VT
, unsigned SrcReg
);
210 unsigned ARMMoveToIntReg(MVT VT
, unsigned SrcReg
);
211 unsigned ARMSelectCallOp(bool UseReg
);
212 unsigned ARMLowerPICELF(const GlobalValue
*GV
, unsigned Align
, MVT VT
);
214 const TargetLowering
*getTargetLowering() { return &TLI
; }
216 // Call handling routines.
218 CCAssignFn
*CCAssignFnForCall(CallingConv::ID CC
,
221 bool ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
222 SmallVectorImpl
<Register
> &ArgRegs
,
223 SmallVectorImpl
<MVT
> &ArgVTs
,
224 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
225 SmallVectorImpl
<Register
> &RegArgs
,
229 unsigned getLibcallReg(const Twine
&Name
);
230 bool FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
231 const Instruction
*I
, CallingConv::ID CC
,
232 unsigned &NumBytes
, bool isVarArg
);
233 bool ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
);
235 // OptionalDef handling routines.
237 bool isARMNEONPred(const MachineInstr
*MI
);
238 bool DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
);
239 const MachineInstrBuilder
&AddOptionalDefs(const MachineInstrBuilder
&MIB
);
240 void AddLoadStoreOperands(MVT VT
, Address
&Addr
,
241 const MachineInstrBuilder
&MIB
,
242 MachineMemOperand::Flags Flags
, bool useAM3
);
245 } // end anonymous namespace
247 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
248 // we don't care about implicit defs here, just places we'll need to add a
249 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
250 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
) {
251 if (!MI
->hasOptionalDef())
254 // Look to see if our OptionalDef is defining CPSR or CCR.
255 for (const MachineOperand
&MO
: MI
->operands()) {
256 if (!MO
.isReg() || !MO
.isDef()) continue;
257 if (MO
.getReg() == ARM::CPSR
)
263 bool ARMFastISel::isARMNEONPred(const MachineInstr
*MI
) {
264 const MCInstrDesc
&MCID
= MI
->getDesc();
266 // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
267 if ((MCID
.TSFlags
& ARMII::DomainMask
) != ARMII::DomainNEON
||
268 AFI
->isThumb2Function())
269 return MI
->isPredicable();
271 for (const MCOperandInfo
&opInfo
: MCID
.operands())
272 if (opInfo
.isPredicate())
278 // If the machine is predicable go ahead and add the predicate operands, if
279 // it needs default CC operands add those.
280 // TODO: If we want to support thumb1 then we'll need to deal with optional
281 // CPSR defs that need to be added before the remaining operands. See s_cc_out
282 // for descriptions why.
283 const MachineInstrBuilder
&
284 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder
&MIB
) {
285 MachineInstr
*MI
= &*MIB
;
287 // Do we use a predicate? or...
288 // Are we NEON in ARM mode and have a predicate operand? If so, I know
289 // we're not predicable but add it anyways.
290 if (isARMNEONPred(MI
))
291 MIB
.add(predOps(ARMCC::AL
));
293 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
294 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
296 if (DefinesOptionalPredicate(MI
, &CPSR
))
297 MIB
.add(CPSR
? t1CondCodeOp() : condCodeOp());
301 unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode
,
302 const TargetRegisterClass
*RC
,
303 unsigned Op0
, bool Op0IsKill
) {
304 Register ResultReg
= createResultReg(RC
);
305 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
307 // Make sure the input operand is sufficiently constrained to be legal
308 // for this instruction.
309 Op0
= constrainOperandRegClass(II
, Op0
, 1);
310 if (II
.getNumDefs() >= 1) {
311 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
312 ResultReg
).addReg(Op0
, Op0IsKill
* RegState::Kill
));
314 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
315 .addReg(Op0
, Op0IsKill
* RegState::Kill
));
316 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
317 TII
.get(TargetOpcode::COPY
), ResultReg
)
318 .addReg(II
.ImplicitDefs
[0]));
323 unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode
,
324 const TargetRegisterClass
*RC
,
325 unsigned Op0
, bool Op0IsKill
,
326 unsigned Op1
, bool Op1IsKill
) {
327 unsigned ResultReg
= createResultReg(RC
);
328 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
330 // Make sure the input operands are sufficiently constrained to be legal
331 // for this instruction.
332 Op0
= constrainOperandRegClass(II
, Op0
, 1);
333 Op1
= constrainOperandRegClass(II
, Op1
, 2);
335 if (II
.getNumDefs() >= 1) {
337 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
338 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
339 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
341 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
342 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
343 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
344 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
345 TII
.get(TargetOpcode::COPY
), ResultReg
)
346 .addReg(II
.ImplicitDefs
[0]));
351 unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode
,
352 const TargetRegisterClass
*RC
,
353 unsigned Op0
, bool Op0IsKill
,
355 unsigned ResultReg
= createResultReg(RC
);
356 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
358 // Make sure the input operand is sufficiently constrained to be legal
359 // for this instruction.
360 Op0
= constrainOperandRegClass(II
, Op0
, 1);
361 if (II
.getNumDefs() >= 1) {
363 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
364 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
367 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
368 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
370 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
371 TII
.get(TargetOpcode::COPY
), ResultReg
)
372 .addReg(II
.ImplicitDefs
[0]));
377 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode
,
378 const TargetRegisterClass
*RC
,
380 unsigned ResultReg
= createResultReg(RC
);
381 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
383 if (II
.getNumDefs() >= 1) {
384 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
385 ResultReg
).addImm(Imm
));
387 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
389 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
390 TII
.get(TargetOpcode::COPY
), ResultReg
)
391 .addReg(II
.ImplicitDefs
[0]));
396 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
397 // checks from the various callers.
398 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT
, unsigned SrcReg
) {
399 if (VT
== MVT::f64
) return 0;
401 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
402 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
403 TII
.get(ARM::VMOVSR
), MoveReg
)
408 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT
, unsigned SrcReg
) {
409 if (VT
== MVT::i64
) return 0;
411 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
412 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
413 TII
.get(ARM::VMOVRS
), MoveReg
)
418 // For double width floating point we need to materialize two constants
419 // (the high and the low) into integer registers then use a move to get
420 // the combined constant into an FP reg.
421 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
422 const APFloat Val
= CFP
->getValueAPF();
423 bool is64bit
= VT
== MVT::f64
;
425 // This checks to see if we can use VFP3 instructions to materialize
426 // a constant, otherwise we have to go through the constant pool.
427 if (TLI
.isFPImmLegal(Val
, VT
)) {
431 Imm
= ARM_AM::getFP64Imm(Val
);
434 Imm
= ARM_AM::getFP32Imm(Val
);
437 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
438 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
439 TII
.get(Opc
), DestReg
).addImm(Imm
));
443 // Require VFP2 for loading fp constants.
444 if (!Subtarget
->hasVFP2Base()) return false;
446 // MachineConstantPool wants an explicit alignment.
447 unsigned Align
= DL
.getPrefTypeAlignment(CFP
->getType());
449 // TODO: Figure out if this is correct.
450 Align
= DL
.getTypeAllocSize(CFP
->getType());
452 unsigned Idx
= MCP
.getConstantPoolIndex(cast
<Constant
>(CFP
), Align
);
453 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
454 unsigned Opc
= is64bit
? ARM::VLDRD
: ARM::VLDRS
;
456 // The extra reg is for addrmode5.
458 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
459 .addConstantPoolIndex(Idx
)
464 unsigned ARMFastISel::ARMMaterializeInt(const Constant
*C
, MVT VT
) {
465 if (VT
!= MVT::i32
&& VT
!= MVT::i16
&& VT
!= MVT::i8
&& VT
!= MVT::i1
)
468 // If we can do this in a single instruction without a constant pool entry
470 const ConstantInt
*CI
= cast
<ConstantInt
>(C
);
471 if (Subtarget
->hasV6T2Ops() && isUInt
<16>(CI
->getZExtValue())) {
472 unsigned Opc
= isThumb2
? ARM::t2MOVi16
: ARM::MOVi16
;
473 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
475 unsigned ImmReg
= createResultReg(RC
);
476 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
477 TII
.get(Opc
), ImmReg
)
478 .addImm(CI
->getZExtValue()));
482 // Use MVN to emit negative constants.
483 if (VT
== MVT::i32
&& Subtarget
->hasV6T2Ops() && CI
->isNegative()) {
484 unsigned Imm
= (unsigned)~(CI
->getSExtValue());
485 bool UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
486 (ARM_AM::getSOImmVal(Imm
) != -1);
488 unsigned Opc
= isThumb2
? ARM::t2MVNi
: ARM::MVNi
;
489 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
491 unsigned ImmReg
= createResultReg(RC
);
492 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
493 TII
.get(Opc
), ImmReg
)
499 unsigned ResultReg
= 0;
500 if (Subtarget
->useMovt())
501 ResultReg
= fastEmit_i(VT
, VT
, ISD::Constant
, CI
->getZExtValue());
506 // Load from constant pool. For now 32-bit only.
510 // MachineConstantPool wants an explicit alignment.
511 unsigned Align
= DL
.getPrefTypeAlignment(C
->getType());
513 // TODO: Figure out if this is correct.
514 Align
= DL
.getTypeAllocSize(C
->getType());
516 unsigned Idx
= MCP
.getConstantPoolIndex(C
, Align
);
517 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
519 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
520 TII
.get(ARM::t2LDRpci
), ResultReg
)
521 .addConstantPoolIndex(Idx
));
523 // The extra immediate is for addrmode2.
524 ResultReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), ResultReg
, 0);
525 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
526 TII
.get(ARM::LDRcp
), ResultReg
)
527 .addConstantPoolIndex(Idx
)
533 bool ARMFastISel::isPositionIndependent() const {
534 return TLI
.isPositionIndependent();
537 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
) {
538 // For now 32-bit only.
539 if (VT
!= MVT::i32
|| GV
->isThreadLocal()) return 0;
541 // ROPI/RWPI not currently supported.
542 if (Subtarget
->isROPI() || Subtarget
->isRWPI())
545 bool IsIndirect
= Subtarget
->isGVIndirectSymbol(GV
);
546 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
548 unsigned DestReg
= createResultReg(RC
);
550 // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
551 const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
);
552 bool IsThreadLocal
= GVar
&& GVar
->isThreadLocal();
553 if (!Subtarget
->isTargetMachO() && IsThreadLocal
) return 0;
555 bool IsPositionIndependent
= isPositionIndependent();
556 // Use movw+movt when possible, it avoids constant pool entries.
557 // Non-darwin targets only support static movt relocations in FastISel.
558 if (Subtarget
->useMovt() &&
559 (Subtarget
->isTargetMachO() || !IsPositionIndependent
)) {
561 unsigned char TF
= 0;
562 if (Subtarget
->isTargetMachO())
563 TF
= ARMII::MO_NONLAZY
;
565 if (IsPositionIndependent
)
566 Opc
= isThumb2
? ARM::t2MOV_ga_pcrel
: ARM::MOV_ga_pcrel
;
568 Opc
= isThumb2
? ARM::t2MOVi32imm
: ARM::MOVi32imm
;
569 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
570 TII
.get(Opc
), DestReg
).addGlobalAddress(GV
, 0, TF
));
572 // MachineConstantPool wants an explicit alignment.
573 unsigned Align
= DL
.getPrefTypeAlignment(GV
->getType());
575 // TODO: Figure out if this is correct.
576 Align
= DL
.getTypeAllocSize(GV
->getType());
579 if (Subtarget
->isTargetELF() && IsPositionIndependent
)
580 return ARMLowerPICELF(GV
, Align
, VT
);
583 unsigned PCAdj
= IsPositionIndependent
? (Subtarget
->isThumb() ? 4 : 8) : 0;
584 unsigned Id
= AFI
->createPICLabelUId();
585 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(GV
, Id
,
588 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Align
);
591 MachineInstrBuilder MIB
;
593 unsigned Opc
= IsPositionIndependent
? ARM::t2LDRpci_pic
: ARM::t2LDRpci
;
594 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
),
595 DestReg
).addConstantPoolIndex(Idx
);
596 if (IsPositionIndependent
)
598 AddOptionalDefs(MIB
);
600 // The extra immediate is for addrmode2.
601 DestReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), DestReg
, 0);
602 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
603 TII
.get(ARM::LDRcp
), DestReg
)
604 .addConstantPoolIndex(Idx
)
606 AddOptionalDefs(MIB
);
608 if (IsPositionIndependent
) {
609 unsigned Opc
= IsIndirect
? ARM::PICLDR
: ARM::PICADD
;
610 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
612 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
613 DbgLoc
, TII
.get(Opc
), NewDestReg
)
616 AddOptionalDefs(MIB
);
623 MachineInstrBuilder MIB
;
624 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
626 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
627 TII
.get(ARM::t2LDRi12
), NewDestReg
)
631 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
632 TII
.get(ARM::LDRi12
), NewDestReg
)
635 DestReg
= NewDestReg
;
636 AddOptionalDefs(MIB
);
642 unsigned ARMFastISel::fastMaterializeConstant(const Constant
*C
) {
643 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
645 // Only handle simple types.
646 if (!CEVT
.isSimple()) return 0;
647 MVT VT
= CEVT
.getSimpleVT();
649 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
650 return ARMMaterializeFP(CFP
, VT
);
651 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
652 return ARMMaterializeGV(GV
, VT
);
653 else if (isa
<ConstantInt
>(C
))
654 return ARMMaterializeInt(C
, VT
);
659 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
661 unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst
*AI
) {
662 // Don't handle dynamic allocas.
663 if (!FuncInfo
.StaticAllocaMap
.count(AI
)) return 0;
666 if (!isLoadTypeLegal(AI
->getType(), VT
)) return 0;
668 DenseMap
<const AllocaInst
*, int>::iterator SI
=
669 FuncInfo
.StaticAllocaMap
.find(AI
);
671 // This will get lowered later into the correct offsets and registers
672 // via rewriteXFrameIndex.
673 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
674 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
675 const TargetRegisterClass
* RC
= TLI
.getRegClassFor(VT
);
676 unsigned ResultReg
= createResultReg(RC
);
677 ResultReg
= constrainOperandRegClass(TII
.get(Opc
), ResultReg
, 0);
679 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
680 TII
.get(Opc
), ResultReg
)
681 .addFrameIndex(SI
->second
)
689 bool ARMFastISel::isTypeLegal(Type
*Ty
, MVT
&VT
) {
690 EVT evt
= TLI
.getValueType(DL
, Ty
, true);
692 // Only handle simple types.
693 if (evt
== MVT::Other
|| !evt
.isSimple()) return false;
694 VT
= evt
.getSimpleVT();
696 // Handle all legal types, i.e. a register that will directly hold this
698 return TLI
.isTypeLegal(VT
);
701 bool ARMFastISel::isLoadTypeLegal(Type
*Ty
, MVT
&VT
) {
702 if (isTypeLegal(Ty
, VT
)) return true;
704 // If this is a type than can be sign or zero-extended to a basic operation
705 // go ahead and accept it now.
706 if (VT
== MVT::i1
|| VT
== MVT::i8
|| VT
== MVT::i16
)
712 // Computes the address to get to an object.
713 bool ARMFastISel::ARMComputeAddress(const Value
*Obj
, Address
&Addr
) {
714 // Some boilerplate from the X86 FastISel.
715 const User
*U
= nullptr;
716 unsigned Opcode
= Instruction::UserOp1
;
717 if (const Instruction
*I
= dyn_cast
<Instruction
>(Obj
)) {
718 // Don't walk into other basic blocks unless the object is an alloca from
719 // another block, otherwise it may not have a virtual register assigned.
720 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(Obj
)) ||
721 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
722 Opcode
= I
->getOpcode();
725 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(Obj
)) {
726 Opcode
= C
->getOpcode();
730 if (PointerType
*Ty
= dyn_cast
<PointerType
>(Obj
->getType()))
731 if (Ty
->getAddressSpace() > 255)
732 // Fast instruction selection doesn't support the special
739 case Instruction::BitCast
:
740 // Look through bitcasts.
741 return ARMComputeAddress(U
->getOperand(0), Addr
);
742 case Instruction::IntToPtr
:
743 // Look past no-op inttoptrs.
744 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
745 TLI
.getPointerTy(DL
))
746 return ARMComputeAddress(U
->getOperand(0), Addr
);
748 case Instruction::PtrToInt
:
749 // Look past no-op ptrtoints.
750 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
751 return ARMComputeAddress(U
->getOperand(0), Addr
);
753 case Instruction::GetElementPtr
: {
754 Address SavedAddr
= Addr
;
755 int TmpOffset
= Addr
.Offset
;
757 // Iterate through the GEP folding the constants into offsets where
759 gep_type_iterator GTI
= gep_type_begin(U
);
760 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
761 i
!= e
; ++i
, ++GTI
) {
762 const Value
*Op
= *i
;
763 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
764 const StructLayout
*SL
= DL
.getStructLayout(STy
);
765 unsigned Idx
= cast
<ConstantInt
>(Op
)->getZExtValue();
766 TmpOffset
+= SL
->getElementOffset(Idx
);
768 uint64_t S
= DL
.getTypeAllocSize(GTI
.getIndexedType());
770 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
771 // Constant-offset addressing.
772 TmpOffset
+= CI
->getSExtValue() * S
;
775 if (canFoldAddIntoGEP(U
, Op
)) {
776 // A compatible add with a constant operand. Fold the constant.
778 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
779 TmpOffset
+= CI
->getSExtValue() * S
;
780 // Iterate on the other operand.
781 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
785 goto unsupported_gep
;
790 // Try to grab the base operand now.
791 Addr
.Offset
= TmpOffset
;
792 if (ARMComputeAddress(U
->getOperand(0), Addr
)) return true;
794 // We failed, restore everything and try the other options.
800 case Instruction::Alloca
: {
801 const AllocaInst
*AI
= cast
<AllocaInst
>(Obj
);
802 DenseMap
<const AllocaInst
*, int>::iterator SI
=
803 FuncInfo
.StaticAllocaMap
.find(AI
);
804 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
805 Addr
.BaseType
= Address::FrameIndexBase
;
806 Addr
.Base
.FI
= SI
->second
;
813 // Try to get this in a register if nothing else has worked.
814 if (Addr
.Base
.Reg
== 0) Addr
.Base
.Reg
= getRegForValue(Obj
);
815 return Addr
.Base
.Reg
!= 0;
818 void ARMFastISel::ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
) {
819 bool needsLowering
= false;
820 switch (VT
.SimpleTy
) {
821 default: llvm_unreachable("Unhandled load/store type!");
827 // Integer loads/stores handle 12-bit offsets.
828 needsLowering
= ((Addr
.Offset
& 0xfff) != Addr
.Offset
);
829 // Handle negative offsets.
830 if (needsLowering
&& isThumb2
)
831 needsLowering
= !(Subtarget
->hasV6T2Ops() && Addr
.Offset
< 0 &&
834 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
835 needsLowering
= (Addr
.Offset
> 255 || Addr
.Offset
< -255);
840 // Floating point operands handle 8-bit offsets.
841 needsLowering
= ((Addr
.Offset
& 0xff) != Addr
.Offset
);
845 // If this is a stack pointer and the offset needs to be simplified then
846 // put the alloca address into a register, set the base type back to
847 // register and continue. This should almost never happen.
848 if (needsLowering
&& Addr
.BaseType
== Address::FrameIndexBase
) {
849 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
851 unsigned ResultReg
= createResultReg(RC
);
852 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
853 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
854 TII
.get(Opc
), ResultReg
)
855 .addFrameIndex(Addr
.Base
.FI
)
857 Addr
.Base
.Reg
= ResultReg
;
858 Addr
.BaseType
= Address::RegBase
;
861 // Since the offset is too large for the load/store instruction
862 // get the reg+offset into a register.
864 Addr
.Base
.Reg
= fastEmit_ri_(MVT::i32
, ISD::ADD
, Addr
.Base
.Reg
,
865 /*Op0IsKill*/false, Addr
.Offset
, MVT::i32
);
870 void ARMFastISel::AddLoadStoreOperands(MVT VT
, Address
&Addr
,
871 const MachineInstrBuilder
&MIB
,
872 MachineMemOperand::Flags Flags
,
874 // addrmode5 output depends on the selection dag addressing dividing the
875 // offset by 4 that it then later multiplies. Do this here as well.
876 if (VT
.SimpleTy
== MVT::f32
|| VT
.SimpleTy
== MVT::f64
)
879 // Frame base works a bit differently. Handle it separately.
880 if (Addr
.BaseType
== Address::FrameIndexBase
) {
881 int FI
= Addr
.Base
.FI
;
882 int Offset
= Addr
.Offset
;
883 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
884 MachinePointerInfo::getFixedStack(*FuncInfo
.MF
, FI
, Offset
), Flags
,
885 MFI
.getObjectSize(FI
), MFI
.getObjectAlignment(FI
));
886 // Now add the rest of the operands.
887 MIB
.addFrameIndex(FI
);
889 // ARM halfword load/stores and signed byte loads need an additional
892 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
896 MIB
.addImm(Addr
.Offset
);
898 MIB
.addMemOperand(MMO
);
900 // Now add the rest of the operands.
901 MIB
.addReg(Addr
.Base
.Reg
);
903 // ARM halfword load/stores and signed byte loads need an additional
906 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
910 MIB
.addImm(Addr
.Offset
);
913 AddOptionalDefs(MIB
);
916 bool ARMFastISel::ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
917 unsigned Alignment
, bool isZExt
, bool allocReg
) {
920 bool needVMOV
= false;
921 const TargetRegisterClass
*RC
;
922 switch (VT
.SimpleTy
) {
923 // This is mostly going to be Neon/vector support.
924 default: return false;
928 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
929 Opc
= isZExt
? ARM::t2LDRBi8
: ARM::t2LDRSBi8
;
931 Opc
= isZExt
? ARM::t2LDRBi12
: ARM::t2LDRSBi12
;
940 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
943 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
947 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
948 Opc
= isZExt
? ARM::t2LDRHi8
: ARM::t2LDRSHi8
;
950 Opc
= isZExt
? ARM::t2LDRHi12
: ARM::t2LDRSHi12
;
952 Opc
= isZExt
? ARM::LDRH
: ARM::LDRSH
;
955 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
958 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
962 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
969 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
972 if (!Subtarget
->hasVFP2Base()) return false;
973 // Unaligned loads need special handling. Floats require word-alignment.
974 if (Alignment
&& Alignment
< 4) {
977 Opc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
978 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
981 RC
= TLI
.getRegClassFor(VT
);
985 // Can load and store double precision even without FeatureFP64
986 if (!Subtarget
->hasVFP2Base()) return false;
987 // FIXME: Unaligned loads need special handling. Doublewords require
989 if (Alignment
&& Alignment
< 4)
993 RC
= TLI
.getRegClassFor(VT
);
996 // Simplify this down to something we can handle.
997 ARMSimplifyAddress(Addr
, VT
, useAM3
);
999 // Create the base instruction, then add the operands.
1001 ResultReg
= createResultReg(RC
);
1002 assert(ResultReg
> 255 && "Expected an allocated virtual register.");
1003 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1004 TII
.get(Opc
), ResultReg
);
1005 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOLoad
, useAM3
);
1007 // If we had an unaligned load of a float we've converted it to an regular
1008 // load. Now we must move from the GRP to the FP register.
1010 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1011 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1012 TII
.get(ARM::VMOVSR
), MoveReg
)
1013 .addReg(ResultReg
));
1014 ResultReg
= MoveReg
;
1019 bool ARMFastISel::SelectLoad(const Instruction
*I
) {
1020 // Atomic loads need special handling.
1021 if (cast
<LoadInst
>(I
)->isAtomic())
1024 const Value
*SV
= I
->getOperand(0);
1025 if (TLI
.supportSwiftError()) {
1026 // Swifterror values can come from either a function parameter with
1027 // swifterror attribute or an alloca with swifterror attribute.
1028 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1029 if (Arg
->hasSwiftErrorAttr())
1033 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1034 if (Alloca
->isSwiftError())
1039 // Verify we have a legal type before going any further.
1041 if (!isLoadTypeLegal(I
->getType(), VT
))
1044 // See if we can handle this address.
1046 if (!ARMComputeAddress(I
->getOperand(0), Addr
)) return false;
1049 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, cast
<LoadInst
>(I
)->getAlignment()))
1051 updateValueMap(I
, ResultReg
);
1055 bool ARMFastISel::ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
1056 unsigned Alignment
) {
1058 bool useAM3
= false;
1059 switch (VT
.SimpleTy
) {
1060 // This is mostly going to be Neon/vector support.
1061 default: return false;
1063 unsigned Res
= createResultReg(isThumb2
? &ARM::tGPRRegClass
1064 : &ARM::GPRRegClass
);
1065 unsigned Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
1066 SrcReg
= constrainOperandRegClass(TII
.get(Opc
), SrcReg
, 1);
1067 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1069 .addReg(SrcReg
).addImm(1));
1075 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1076 StrOpc
= ARM::t2STRBi8
;
1078 StrOpc
= ARM::t2STRBi12
;
1080 StrOpc
= ARM::STRBi12
;
1084 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
1088 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1089 StrOpc
= ARM::t2STRHi8
;
1091 StrOpc
= ARM::t2STRHi12
;
1098 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
1102 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1103 StrOpc
= ARM::t2STRi8
;
1105 StrOpc
= ARM::t2STRi12
;
1107 StrOpc
= ARM::STRi12
;
1111 if (!Subtarget
->hasVFP2Base()) return false;
1112 // Unaligned stores need special handling. Floats require word-alignment.
1113 if (Alignment
&& Alignment
< 4) {
1114 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1115 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1116 TII
.get(ARM::VMOVRS
), MoveReg
)
1120 StrOpc
= isThumb2
? ARM::t2STRi12
: ARM::STRi12
;
1122 StrOpc
= ARM::VSTRS
;
1126 // Can load and store double precision even without FeatureFP64
1127 if (!Subtarget
->hasVFP2Base()) return false;
1128 // FIXME: Unaligned stores need special handling. Doublewords require
1130 if (Alignment
&& Alignment
< 4)
1133 StrOpc
= ARM::VSTRD
;
1136 // Simplify this down to something we can handle.
1137 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1139 // Create the base instruction, then add the operands.
1140 SrcReg
= constrainOperandRegClass(TII
.get(StrOpc
), SrcReg
, 0);
1141 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1144 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOStore
, useAM3
);
1148 bool ARMFastISel::SelectStore(const Instruction
*I
) {
1149 Value
*Op0
= I
->getOperand(0);
1150 unsigned SrcReg
= 0;
1152 // Atomic stores need special handling.
1153 if (cast
<StoreInst
>(I
)->isAtomic())
1156 const Value
*PtrV
= I
->getOperand(1);
1157 if (TLI
.supportSwiftError()) {
1158 // Swifterror values can come from either a function parameter with
1159 // swifterror attribute or an alloca with swifterror attribute.
1160 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1161 if (Arg
->hasSwiftErrorAttr())
1165 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1166 if (Alloca
->isSwiftError())
1171 // Verify we have a legal type before going any further.
1173 if (!isLoadTypeLegal(I
->getOperand(0)->getType(), VT
))
1176 // Get the value to be stored into a register.
1177 SrcReg
= getRegForValue(Op0
);
1178 if (SrcReg
== 0) return false;
1180 // See if we can handle this address.
1182 if (!ARMComputeAddress(I
->getOperand(1), Addr
))
1185 if (!ARMEmitStore(VT
, SrcReg
, Addr
, cast
<StoreInst
>(I
)->getAlignment()))
1190 static ARMCC::CondCodes
getComparePred(CmpInst::Predicate Pred
) {
1192 // Needs two compares...
1193 case CmpInst::FCMP_ONE
:
1194 case CmpInst::FCMP_UEQ
:
1196 // AL is our "false" for now. The other two need more compares.
1198 case CmpInst::ICMP_EQ
:
1199 case CmpInst::FCMP_OEQ
:
1201 case CmpInst::ICMP_SGT
:
1202 case CmpInst::FCMP_OGT
:
1204 case CmpInst::ICMP_SGE
:
1205 case CmpInst::FCMP_OGE
:
1207 case CmpInst::ICMP_UGT
:
1208 case CmpInst::FCMP_UGT
:
1210 case CmpInst::FCMP_OLT
:
1212 case CmpInst::ICMP_ULE
:
1213 case CmpInst::FCMP_OLE
:
1215 case CmpInst::FCMP_ORD
:
1217 case CmpInst::FCMP_UNO
:
1219 case CmpInst::FCMP_UGE
:
1221 case CmpInst::ICMP_SLT
:
1222 case CmpInst::FCMP_ULT
:
1224 case CmpInst::ICMP_SLE
:
1225 case CmpInst::FCMP_ULE
:
1227 case CmpInst::FCMP_UNE
:
1228 case CmpInst::ICMP_NE
:
1230 case CmpInst::ICMP_UGE
:
1232 case CmpInst::ICMP_ULT
:
1237 bool ARMFastISel::SelectBranch(const Instruction
*I
) {
1238 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1239 MachineBasicBlock
*TBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
1240 MachineBasicBlock
*FBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
1242 // Simple branch support.
1244 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1246 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1247 if (CI
->hasOneUse() && (CI
->getParent() == I
->getParent())) {
1248 // Get the compare predicate.
1249 // Try to take advantage of fallthrough opportunities.
1250 CmpInst::Predicate Predicate
= CI
->getPredicate();
1251 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1252 std::swap(TBB
, FBB
);
1253 Predicate
= CmpInst::getInversePredicate(Predicate
);
1256 ARMCC::CondCodes ARMPred
= getComparePred(Predicate
);
1258 // We may not handle every CC for now.
1259 if (ARMPred
== ARMCC::AL
) return false;
1261 // Emit the compare.
1262 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1265 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1266 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1267 .addMBB(TBB
).addImm(ARMPred
).addReg(ARM::CPSR
);
1268 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1271 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1273 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1274 (isLoadTypeLegal(TI
->getOperand(0)->getType(), SourceVT
))) {
1275 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1276 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1277 OpReg
= constrainOperandRegClass(TII
.get(TstOpc
), OpReg
, 0);
1278 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1280 .addReg(OpReg
).addImm(1));
1282 unsigned CCMode
= ARMCC::NE
;
1283 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1284 std::swap(TBB
, FBB
);
1288 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1289 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1290 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1292 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1295 } else if (const ConstantInt
*CI
=
1296 dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1297 uint64_t Imm
= CI
->getZExtValue();
1298 MachineBasicBlock
*Target
= (Imm
== 0) ? FBB
: TBB
;
1299 fastEmitBranch(Target
, DbgLoc
);
1303 unsigned CmpReg
= getRegForValue(BI
->getCondition());
1304 if (CmpReg
== 0) return false;
1306 // We've been divorced from our compare! Our block was split, and
1307 // now our compare lives in a predecessor block. We musn't
1308 // re-compare here, as the children of the compare aren't guaranteed
1309 // live across the block boundary (we *could* check for this).
1310 // Regardless, the compare has been done in the predecessor block,
1311 // and it left a value for us in a virtual register. Ergo, we test
1312 // the one-bit value left in the virtual register.
1313 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1314 CmpReg
= constrainOperandRegClass(TII
.get(TstOpc
), CmpReg
, 0);
1316 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1320 unsigned CCMode
= ARMCC::NE
;
1321 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1322 std::swap(TBB
, FBB
);
1326 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1327 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1328 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1329 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1333 bool ARMFastISel::SelectIndirectBr(const Instruction
*I
) {
1334 unsigned AddrReg
= getRegForValue(I
->getOperand(0));
1335 if (AddrReg
== 0) return false;
1337 unsigned Opc
= isThumb2
? ARM::tBRIND
: ARM::BX
;
1338 assert(isThumb2
|| Subtarget
->hasV4TOps());
1340 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1341 TII
.get(Opc
)).addReg(AddrReg
));
1343 const IndirectBrInst
*IB
= cast
<IndirectBrInst
>(I
);
1344 for (const BasicBlock
*SuccBB
: IB
->successors())
1345 FuncInfo
.MBB
->addSuccessor(FuncInfo
.MBBMap
[SuccBB
]);
1350 bool ARMFastISel::ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
1352 Type
*Ty
= Src1Value
->getType();
1353 EVT SrcEVT
= TLI
.getValueType(DL
, Ty
, true);
1354 if (!SrcEVT
.isSimple()) return false;
1355 MVT SrcVT
= SrcEVT
.getSimpleVT();
1357 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1360 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1363 // Check to see if the 2nd operand is a constant that we can encode directly
1366 bool UseImm
= false;
1367 bool isNegativeImm
= false;
1368 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1369 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1370 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Src2Value
)) {
1371 if (SrcVT
== MVT::i32
|| SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
||
1373 const APInt
&CIVal
= ConstInt
->getValue();
1374 Imm
= (isZExt
) ? (int)CIVal
.getZExtValue() : (int)CIVal
.getSExtValue();
1375 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1376 // then a cmn, because there is no way to represent 2147483648 as a
1377 // signed 32-bit int.
1378 if (Imm
< 0 && Imm
!= (int)0x80000000) {
1379 isNegativeImm
= true;
1382 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1383 (ARM_AM::getSOImmVal(Imm
) != -1);
1385 } else if (const ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Src2Value
)) {
1386 if (SrcVT
== MVT::f32
|| SrcVT
== MVT::f64
)
1387 if (ConstFP
->isZero() && !ConstFP
->isNegative())
1393 bool needsExt
= false;
1394 switch (SrcVT
.SimpleTy
) {
1395 default: return false;
1396 // TODO: Verify compares.
1399 CmpOpc
= UseImm
? ARM::VCMPZS
: ARM::VCMPS
;
1403 CmpOpc
= UseImm
? ARM::VCMPZD
: ARM::VCMPD
;
1413 CmpOpc
= ARM::t2CMPrr
;
1415 CmpOpc
= isNegativeImm
? ARM::t2CMNri
: ARM::t2CMPri
;
1418 CmpOpc
= ARM::CMPrr
;
1420 CmpOpc
= isNegativeImm
? ARM::CMNri
: ARM::CMPri
;
1425 unsigned SrcReg1
= getRegForValue(Src1Value
);
1426 if (SrcReg1
== 0) return false;
1428 unsigned SrcReg2
= 0;
1430 SrcReg2
= getRegForValue(Src2Value
);
1431 if (SrcReg2
== 0) return false;
1434 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1436 SrcReg1
= ARMEmitIntExt(SrcVT
, SrcReg1
, MVT::i32
, isZExt
);
1437 if (SrcReg1
== 0) return false;
1439 SrcReg2
= ARMEmitIntExt(SrcVT
, SrcReg2
, MVT::i32
, isZExt
);
1440 if (SrcReg2
== 0) return false;
1444 const MCInstrDesc
&II
= TII
.get(CmpOpc
);
1445 SrcReg1
= constrainOperandRegClass(II
, SrcReg1
, 0);
1447 SrcReg2
= constrainOperandRegClass(II
, SrcReg2
, 1);
1448 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1449 .addReg(SrcReg1
).addReg(SrcReg2
));
1451 MachineInstrBuilder MIB
;
1452 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1455 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1458 AddOptionalDefs(MIB
);
1461 // For floating point we need to move the result to a comparison register
1462 // that we can then use for branches.
1463 if (Ty
->isFloatTy() || Ty
->isDoubleTy())
1464 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1465 TII
.get(ARM::FMSTAT
)));
1469 bool ARMFastISel::SelectCmp(const Instruction
*I
) {
1470 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1472 // Get the compare predicate.
1473 ARMCC::CondCodes ARMPred
= getComparePred(CI
->getPredicate());
1475 // We may not handle every CC for now.
1476 if (ARMPred
== ARMCC::AL
) return false;
1478 // Emit the compare.
1479 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1482 // Now set a register based on the comparison. Explicitly set the predicates
1484 unsigned MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1485 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
1486 : &ARM::GPRRegClass
;
1487 unsigned DestReg
= createResultReg(RC
);
1488 Constant
*Zero
= ConstantInt::get(Type::getInt32Ty(*Context
), 0);
1489 unsigned ZeroReg
= fastMaterializeConstant(Zero
);
1490 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1491 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
), DestReg
)
1492 .addReg(ZeroReg
).addImm(1)
1493 .addImm(ARMPred
).addReg(ARM::CPSR
);
1495 updateValueMap(I
, DestReg
);
1499 bool ARMFastISel::SelectFPExt(const Instruction
*I
) {
1500 // Make sure we have VFP and that we're extending float to double.
1501 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1503 Value
*V
= I
->getOperand(0);
1504 if (!I
->getType()->isDoubleTy() ||
1505 !V
->getType()->isFloatTy()) return false;
1507 unsigned Op
= getRegForValue(V
);
1508 if (Op
== 0) return false;
1510 unsigned Result
= createResultReg(&ARM::DPRRegClass
);
1511 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1512 TII
.get(ARM::VCVTDS
), Result
)
1514 updateValueMap(I
, Result
);
1518 bool ARMFastISel::SelectFPTrunc(const Instruction
*I
) {
1519 // Make sure we have VFP and that we're truncating double to float.
1520 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1522 Value
*V
= I
->getOperand(0);
1523 if (!(I
->getType()->isFloatTy() &&
1524 V
->getType()->isDoubleTy())) return false;
1526 unsigned Op
= getRegForValue(V
);
1527 if (Op
== 0) return false;
1529 unsigned Result
= createResultReg(&ARM::SPRRegClass
);
1530 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1531 TII
.get(ARM::VCVTSD
), Result
)
1533 updateValueMap(I
, Result
);
1537 bool ARMFastISel::SelectIToFP(const Instruction
*I
, bool isSigned
) {
1538 // Make sure we have VFP.
1539 if (!Subtarget
->hasVFP2Base()) return false;
1542 Type
*Ty
= I
->getType();
1543 if (!isTypeLegal(Ty
, DstVT
))
1546 Value
*Src
= I
->getOperand(0);
1547 EVT SrcEVT
= TLI
.getValueType(DL
, Src
->getType(), true);
1548 if (!SrcEVT
.isSimple())
1550 MVT SrcVT
= SrcEVT
.getSimpleVT();
1551 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
1554 unsigned SrcReg
= getRegForValue(Src
);
1555 if (SrcReg
== 0) return false;
1557 // Handle sign-extension.
1558 if (SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
) {
1559 SrcReg
= ARMEmitIntExt(SrcVT
, SrcReg
, MVT::i32
,
1560 /*isZExt*/!isSigned
);
1561 if (SrcReg
== 0) return false;
1564 // The conversion routine works on fp-reg to fp-reg and the operand above
1565 // was an integer, move it to the fp registers if possible.
1566 unsigned FP
= ARMMoveToFPReg(MVT::f32
, SrcReg
);
1567 if (FP
== 0) return false;
1570 if (Ty
->isFloatTy()) Opc
= isSigned
? ARM::VSITOS
: ARM::VUITOS
;
1571 else if (Ty
->isDoubleTy() && Subtarget
->hasFP64())
1572 Opc
= isSigned
? ARM::VSITOD
: ARM::VUITOD
;
1575 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(DstVT
));
1576 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1577 TII
.get(Opc
), ResultReg
).addReg(FP
));
1578 updateValueMap(I
, ResultReg
);
1582 bool ARMFastISel::SelectFPToI(const Instruction
*I
, bool isSigned
) {
1583 // Make sure we have VFP.
1584 if (!Subtarget
->hasVFP2Base()) return false;
1587 Type
*RetTy
= I
->getType();
1588 if (!isTypeLegal(RetTy
, DstVT
))
1591 unsigned Op
= getRegForValue(I
->getOperand(0));
1592 if (Op
== 0) return false;
1595 Type
*OpTy
= I
->getOperand(0)->getType();
1596 if (OpTy
->isFloatTy()) Opc
= isSigned
? ARM::VTOSIZS
: ARM::VTOUIZS
;
1597 else if (OpTy
->isDoubleTy() && Subtarget
->hasFP64())
1598 Opc
= isSigned
? ARM::VTOSIZD
: ARM::VTOUIZD
;
1601 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1602 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1603 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1604 TII
.get(Opc
), ResultReg
).addReg(Op
));
1606 // This result needs to be in an integer register, but the conversion only
1607 // takes place in fp-regs.
1608 unsigned IntReg
= ARMMoveToIntReg(DstVT
, ResultReg
);
1609 if (IntReg
== 0) return false;
1611 updateValueMap(I
, IntReg
);
1615 bool ARMFastISel::SelectSelect(const Instruction
*I
) {
1617 if (!isTypeLegal(I
->getType(), VT
))
1620 // Things need to be register sized for register moves.
1621 if (VT
!= MVT::i32
) return false;
1623 unsigned CondReg
= getRegForValue(I
->getOperand(0));
1624 if (CondReg
== 0) return false;
1625 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1626 if (Op1Reg
== 0) return false;
1628 // Check to see if we can use an immediate in the conditional move.
1630 bool UseImm
= false;
1631 bool isNegativeImm
= false;
1632 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(I
->getOperand(2))) {
1633 assert(VT
== MVT::i32
&& "Expecting an i32.");
1634 Imm
= (int)ConstInt
->getValue().getZExtValue();
1636 isNegativeImm
= true;
1639 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1640 (ARM_AM::getSOImmVal(Imm
) != -1);
1643 unsigned Op2Reg
= 0;
1645 Op2Reg
= getRegForValue(I
->getOperand(2));
1646 if (Op2Reg
== 0) return false;
1649 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1650 CondReg
= constrainOperandRegClass(TII
.get(TstOpc
), CondReg
, 0);
1652 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1657 const TargetRegisterClass
*RC
;
1659 RC
= isThumb2
? &ARM::tGPRRegClass
: &ARM::GPRRegClass
;
1660 MovCCOpc
= isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
;
1662 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRRegClass
;
1664 MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1666 MovCCOpc
= isThumb2
? ARM::t2MVNCCi
: ARM::MVNCCi
;
1668 unsigned ResultReg
= createResultReg(RC
);
1670 Op2Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op2Reg
, 1);
1671 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 2);
1672 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1679 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 1);
1680 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1687 updateValueMap(I
, ResultReg
);
1691 bool ARMFastISel::SelectDiv(const Instruction
*I
, bool isSigned
) {
1693 Type
*Ty
= I
->getType();
1694 if (!isTypeLegal(Ty
, VT
))
1697 // If we have integer div support we should have selected this automagically.
1698 // In case we have a real miss go ahead and return false and we'll pick
1700 if (Subtarget
->hasDivideInThumbMode())
1703 // Otherwise emit a libcall.
1704 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1706 LC
= isSigned
? RTLIB::SDIV_I8
: RTLIB::UDIV_I8
;
1707 else if (VT
== MVT::i16
)
1708 LC
= isSigned
? RTLIB::SDIV_I16
: RTLIB::UDIV_I16
;
1709 else if (VT
== MVT::i32
)
1710 LC
= isSigned
? RTLIB::SDIV_I32
: RTLIB::UDIV_I32
;
1711 else if (VT
== MVT::i64
)
1712 LC
= isSigned
? RTLIB::SDIV_I64
: RTLIB::UDIV_I64
;
1713 else if (VT
== MVT::i128
)
1714 LC
= isSigned
? RTLIB::SDIV_I128
: RTLIB::UDIV_I128
;
1715 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SDIV!");
1717 return ARMEmitLibcall(I
, LC
);
1720 bool ARMFastISel::SelectRem(const Instruction
*I
, bool isSigned
) {
1722 Type
*Ty
= I
->getType();
1723 if (!isTypeLegal(Ty
, VT
))
1726 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1727 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1728 // multi-reg returns, we'll have to bail out.
1729 if (!TLI
.hasStandaloneRem(VT
)) {
1733 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1735 LC
= isSigned
? RTLIB::SREM_I8
: RTLIB::UREM_I8
;
1736 else if (VT
== MVT::i16
)
1737 LC
= isSigned
? RTLIB::SREM_I16
: RTLIB::UREM_I16
;
1738 else if (VT
== MVT::i32
)
1739 LC
= isSigned
? RTLIB::SREM_I32
: RTLIB::UREM_I32
;
1740 else if (VT
== MVT::i64
)
1741 LC
= isSigned
? RTLIB::SREM_I64
: RTLIB::UREM_I64
;
1742 else if (VT
== MVT::i128
)
1743 LC
= isSigned
? RTLIB::SREM_I128
: RTLIB::UREM_I128
;
1744 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SREM!");
1746 return ARMEmitLibcall(I
, LC
);
1749 bool ARMFastISel::SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
) {
1750 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
1752 // We can get here in the case when we have a binary operation on a non-legal
1753 // type and the target independent selector doesn't know how to handle it.
1754 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
1758 switch (ISDOpcode
) {
1759 default: return false;
1761 Opc
= isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
;
1764 Opc
= isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
;
1767 Opc
= isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
;
1771 unsigned SrcReg1
= getRegForValue(I
->getOperand(0));
1772 if (SrcReg1
== 0) return false;
1774 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1775 // in the instruction, rather then materializing the value in a register.
1776 unsigned SrcReg2
= getRegForValue(I
->getOperand(1));
1777 if (SrcReg2
== 0) return false;
1779 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
1780 SrcReg1
= constrainOperandRegClass(TII
.get(Opc
), SrcReg1
, 1);
1781 SrcReg2
= constrainOperandRegClass(TII
.get(Opc
), SrcReg2
, 2);
1782 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1783 TII
.get(Opc
), ResultReg
)
1784 .addReg(SrcReg1
).addReg(SrcReg2
));
1785 updateValueMap(I
, ResultReg
);
1789 bool ARMFastISel::SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
) {
1790 EVT FPVT
= TLI
.getValueType(DL
, I
->getType(), true);
1791 if (!FPVT
.isSimple()) return false;
1792 MVT VT
= FPVT
.getSimpleVT();
1794 // FIXME: Support vector types where possible.
1798 // We can get here in the case when we want to use NEON for our fp
1799 // operations, but can't figure out how to. Just use the vfp instructions
1801 // FIXME: It'd be nice to use NEON instructions.
1802 Type
*Ty
= I
->getType();
1803 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1805 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1809 bool is64bit
= VT
== MVT::f64
|| VT
== MVT::i64
;
1810 switch (ISDOpcode
) {
1811 default: return false;
1813 Opc
= is64bit
? ARM::VADDD
: ARM::VADDS
;
1816 Opc
= is64bit
? ARM::VSUBD
: ARM::VSUBS
;
1819 Opc
= is64bit
? ARM::VMULD
: ARM::VMULS
;
1822 unsigned Op1
= getRegForValue(I
->getOperand(0));
1823 if (Op1
== 0) return false;
1825 unsigned Op2
= getRegForValue(I
->getOperand(1));
1826 if (Op2
== 0) return false;
1828 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
.SimpleTy
));
1829 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1830 TII
.get(Opc
), ResultReg
)
1831 .addReg(Op1
).addReg(Op2
));
1832 updateValueMap(I
, ResultReg
);
1836 // Call Handling Code
1838 // This is largely taken directly from CCAssignFnForNode
1839 // TODO: We may not support all of this.
1840 CCAssignFn
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC
,
1845 report_fatal_error("Unsupported calling convention");
1846 case CallingConv::Fast
:
1847 if (Subtarget
->hasVFP2Base() && !isVarArg
) {
1848 if (!Subtarget
->isAAPCS_ABI())
1849 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1850 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1851 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1854 case CallingConv::C
:
1855 case CallingConv::CXX_FAST_TLS
:
1856 // Use target triple & subtarget features to do actual dispatch.
1857 if (Subtarget
->isAAPCS_ABI()) {
1858 if (Subtarget
->hasVFP2Base() &&
1859 TM
.Options
.FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1860 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1862 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1864 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1866 case CallingConv::ARM_AAPCS_VFP
:
1867 case CallingConv::Swift
:
1869 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1870 // Fall through to soft float variant, variadic functions don't
1871 // use hard floating point ABI.
1873 case CallingConv::ARM_AAPCS
:
1874 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1875 case CallingConv::ARM_APCS
:
1876 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1877 case CallingConv::GHC
:
1879 report_fatal_error("Can't return in GHC call convention");
1881 return CC_ARM_APCS_GHC
;
1885 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
1886 SmallVectorImpl
<Register
> &ArgRegs
,
1887 SmallVectorImpl
<MVT
> &ArgVTs
,
1888 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
1889 SmallVectorImpl
<Register
> &RegArgs
,
1893 SmallVector
<CCValAssign
, 16> ArgLocs
;
1894 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, ArgLocs
, *Context
);
1895 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
,
1896 CCAssignFnForCall(CC
, false, isVarArg
));
1898 // Check that we can handle all of the arguments. If we can't, then bail out
1899 // now before we add code to the MBB.
1900 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1901 CCValAssign
&VA
= ArgLocs
[i
];
1902 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1904 // We don't handle NEON/vector parameters yet.
1905 if (ArgVT
.isVector() || ArgVT
.getSizeInBits() > 64)
1908 // Now copy/store arg to correct locations.
1909 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1911 } else if (VA
.needsCustom()) {
1912 // TODO: We need custom lowering for vector (v2f64) args.
1913 if (VA
.getLocVT() != MVT::f64
||
1914 // TODO: Only handle register args for now.
1915 !VA
.isRegLoc() || !ArgLocs
[++i
].isRegLoc())
1918 switch (ArgVT
.SimpleTy
) {
1927 if (!Subtarget
->hasVFP2Base())
1931 if (!Subtarget
->hasVFP2Base())
1938 // At the point, we are able to handle the call's arguments in fast isel.
1940 // Get a count of how many bytes are to be pushed on the stack.
1941 NumBytes
= CCInfo
.getNextStackOffset();
1943 // Issue CALLSEQ_START
1944 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1945 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1946 TII
.get(AdjStackDown
))
1947 .addImm(NumBytes
).addImm(0));
1949 // Process the args.
1950 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1951 CCValAssign
&VA
= ArgLocs
[i
];
1952 const Value
*ArgVal
= Args
[VA
.getValNo()];
1953 Register Arg
= ArgRegs
[VA
.getValNo()];
1954 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1956 assert((!ArgVT
.isVector() && ArgVT
.getSizeInBits() <= 64) &&
1957 "We don't handle NEON/vector parameters yet.");
1959 // Handle arg promotion, etc.
1960 switch (VA
.getLocInfo()) {
1961 case CCValAssign::Full
: break;
1962 case CCValAssign::SExt
: {
1963 MVT DestVT
= VA
.getLocVT();
1964 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/false);
1965 assert(Arg
!= 0 && "Failed to emit a sext");
1969 case CCValAssign::AExt
:
1970 // Intentional fall-through. Handle AExt and ZExt.
1971 case CCValAssign::ZExt
: {
1972 MVT DestVT
= VA
.getLocVT();
1973 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/true);
1974 assert(Arg
!= 0 && "Failed to emit a zext");
1978 case CCValAssign::BCvt
: {
1979 unsigned BC
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, Arg
,
1980 /*TODO: Kill=*/false);
1981 assert(BC
!= 0 && "Failed to emit a bitcast!");
1983 ArgVT
= VA
.getLocVT();
1986 default: llvm_unreachable("Unknown arg promotion!");
1989 // Now copy/store arg to correct locations.
1990 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1991 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1992 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(Arg
);
1993 RegArgs
.push_back(VA
.getLocReg());
1994 } else if (VA
.needsCustom()) {
1995 // TODO: We need custom lowering for vector (v2f64) args.
1996 assert(VA
.getLocVT() == MVT::f64
&&
1997 "Custom lowering for v2f64 args not available");
1999 // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2000 CCValAssign
&NextVA
= ArgLocs
[++i
];
2002 assert(VA
.isRegLoc() && NextVA
.isRegLoc() &&
2003 "We only handle register args!");
2005 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2006 TII
.get(ARM::VMOVRRD
), VA
.getLocReg())
2007 .addReg(NextVA
.getLocReg(), RegState::Define
)
2009 RegArgs
.push_back(VA
.getLocReg());
2010 RegArgs
.push_back(NextVA
.getLocReg());
2012 assert(VA
.isMemLoc());
2013 // Need to store on the stack.
2015 // Don't emit stores for undef values.
2016 if (isa
<UndefValue
>(ArgVal
))
2020 Addr
.BaseType
= Address::RegBase
;
2021 Addr
.Base
.Reg
= ARM::SP
;
2022 Addr
.Offset
= VA
.getLocMemOffset();
2024 bool EmitRet
= ARMEmitStore(ArgVT
, Arg
, Addr
); (void)EmitRet
;
2025 assert(EmitRet
&& "Could not emit a store for argument!");
2032 bool ARMFastISel::FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
2033 const Instruction
*I
, CallingConv::ID CC
,
2034 unsigned &NumBytes
, bool isVarArg
) {
2035 // Issue CALLSEQ_END
2036 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
2037 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2038 TII
.get(AdjStackUp
))
2039 .addImm(NumBytes
).addImm(0));
2041 // Now the return value.
2042 if (RetVT
!= MVT::isVoid
) {
2043 SmallVector
<CCValAssign
, 16> RVLocs
;
2044 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2045 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2047 // Copy all of the result registers out of their specified physreg.
2048 if (RVLocs
.size() == 2 && RetVT
== MVT::f64
) {
2049 // For this move we copy into two registers and then move into the
2050 // double fp reg we want.
2051 MVT DestVT
= RVLocs
[0].getValVT();
2052 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(DestVT
);
2053 Register ResultReg
= createResultReg(DstRC
);
2054 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2055 TII
.get(ARM::VMOVDRR
), ResultReg
)
2056 .addReg(RVLocs
[0].getLocReg())
2057 .addReg(RVLocs
[1].getLocReg()));
2059 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2060 UsedRegs
.push_back(RVLocs
[1].getLocReg());
2062 // Finally update the result.
2063 updateValueMap(I
, ResultReg
);
2065 assert(RVLocs
.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2066 MVT CopyVT
= RVLocs
[0].getValVT();
2068 // Special handling for extended integers.
2069 if (RetVT
== MVT::i1
|| RetVT
== MVT::i8
|| RetVT
== MVT::i16
)
2072 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(CopyVT
);
2074 Register ResultReg
= createResultReg(DstRC
);
2075 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2076 TII
.get(TargetOpcode::COPY
),
2077 ResultReg
).addReg(RVLocs
[0].getLocReg());
2078 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2080 // Finally update the result.
2081 updateValueMap(I
, ResultReg
);
2088 bool ARMFastISel::SelectRet(const Instruction
*I
) {
2089 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
2090 const Function
&F
= *I
->getParent()->getParent();
2092 if (!FuncInfo
.CanLowerReturn
)
2095 if (TLI
.supportSwiftError() &&
2096 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
2099 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
2102 // Build a list of return value registers.
2103 SmallVector
<unsigned, 4> RetRegs
;
2105 CallingConv::ID CC
= F
.getCallingConv();
2106 if (Ret
->getNumOperands() > 0) {
2107 SmallVector
<ISD::OutputArg
, 4> Outs
;
2108 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
2110 // Analyze operands of the call, assigning locations to each operand.
2111 SmallVector
<CCValAssign
, 16> ValLocs
;
2112 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
2113 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForCall(CC
, true /* is Ret */,
2116 const Value
*RV
= Ret
->getOperand(0);
2117 unsigned Reg
= getRegForValue(RV
);
2121 // Only handle a single return value for now.
2122 if (ValLocs
.size() != 1)
2125 CCValAssign
&VA
= ValLocs
[0];
2127 // Don't bother handling odd stuff for now.
2128 if (VA
.getLocInfo() != CCValAssign::Full
)
2130 // Only handle register returns for now.
2134 unsigned SrcReg
= Reg
+ VA
.getValNo();
2135 EVT RVEVT
= TLI
.getValueType(DL
, RV
->getType());
2136 if (!RVEVT
.isSimple()) return false;
2137 MVT RVVT
= RVEVT
.getSimpleVT();
2138 MVT DestVT
= VA
.getValVT();
2139 // Special handling for extended integers.
2140 if (RVVT
!= DestVT
) {
2141 if (RVVT
!= MVT::i1
&& RVVT
!= MVT::i8
&& RVVT
!= MVT::i16
)
2144 assert(DestVT
== MVT::i32
&& "ARM should always ext to i32");
2146 // Perform extension if flagged as either zext or sext. Otherwise, do
2148 if (Outs
[0].Flags
.isZExt() || Outs
[0].Flags
.isSExt()) {
2149 SrcReg
= ARMEmitIntExt(RVVT
, SrcReg
, DestVT
, Outs
[0].Flags
.isZExt());
2150 if (SrcReg
== 0) return false;
2155 Register DstReg
= VA
.getLocReg();
2156 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
2157 // Avoid a cross-class copy. This is very unlikely.
2158 if (!SrcRC
->contains(DstReg
))
2160 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2161 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
2163 // Add register to return instruction.
2164 RetRegs
.push_back(VA
.getLocReg());
2167 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2168 TII
.get(Subtarget
->getReturnOpcode()));
2169 AddOptionalDefs(MIB
);
2170 for (unsigned R
: RetRegs
)
2171 MIB
.addReg(R
, RegState::Implicit
);
2175 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg
) {
2177 return isThumb2
? ARM::tBLXr
: ARM::BLX
;
2179 return isThumb2
? ARM::tBL
: ARM::BL
;
2182 unsigned ARMFastISel::getLibcallReg(const Twine
&Name
) {
2183 // Manually compute the global's type to avoid building it when unnecessary.
2184 Type
*GVTy
= Type::getInt32PtrTy(*Context
, /*AS=*/0);
2185 EVT LCREVT
= TLI
.getValueType(DL
, GVTy
);
2186 if (!LCREVT
.isSimple()) return 0;
2188 GlobalValue
*GV
= new GlobalVariable(M
, Type::getInt32Ty(*Context
), false,
2189 GlobalValue::ExternalLinkage
, nullptr,
2191 assert(GV
->getType() == GVTy
&& "We miscomputed the type for the global!");
2192 return ARMMaterializeGV(GV
, LCREVT
.getSimpleVT());
2195 // A quick function that will emit a call for a named libcall in F with the
2196 // vector of passed arguments for the Instruction in I. We can assume that we
2197 // can emit a call for any libcall we can produce. This is an abridged version
2198 // of the full call infrastructure since we won't need to worry about things
2199 // like computed function pointers or strange arguments at call sites.
2200 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2202 bool ARMFastISel::ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
) {
2203 CallingConv::ID CC
= TLI
.getLibcallCallingConv(Call
);
2205 // Handle *simple* calls for now.
2206 Type
*RetTy
= I
->getType();
2208 if (RetTy
->isVoidTy())
2209 RetVT
= MVT::isVoid
;
2210 else if (!isTypeLegal(RetTy
, RetVT
))
2213 // Can't handle non-double multi-reg retvals.
2214 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i32
) {
2215 SmallVector
<CCValAssign
, 16> RVLocs
;
2216 CCState
CCInfo(CC
, false, *FuncInfo
.MF
, RVLocs
, *Context
);
2217 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, false));
2218 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2222 // Set up the argument vectors.
2223 SmallVector
<Value
*, 8> Args
;
2224 SmallVector
<Register
, 8> ArgRegs
;
2225 SmallVector
<MVT
, 8> ArgVTs
;
2226 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2227 Args
.reserve(I
->getNumOperands());
2228 ArgRegs
.reserve(I
->getNumOperands());
2229 ArgVTs
.reserve(I
->getNumOperands());
2230 ArgFlags
.reserve(I
->getNumOperands());
2231 for (Value
*Op
: I
->operands()) {
2232 unsigned Arg
= getRegForValue(Op
);
2233 if (Arg
== 0) return false;
2235 Type
*ArgTy
= Op
->getType();
2237 if (!isTypeLegal(ArgTy
, ArgVT
)) return false;
2239 ISD::ArgFlagsTy Flags
;
2240 Flags
.setOrigAlign(Align(DL
.getABITypeAlignment(ArgTy
)));
2243 ArgRegs
.push_back(Arg
);
2244 ArgVTs
.push_back(ArgVT
);
2245 ArgFlags
.push_back(Flags
);
2248 // Handle the arguments now that we've gotten them.
2249 SmallVector
<Register
, 4> RegArgs
;
2251 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2252 RegArgs
, CC
, NumBytes
, false))
2256 if (Subtarget
->genLongCalls()) {
2257 CalleeReg
= getLibcallReg(TLI
.getLibcallName(Call
));
2258 if (CalleeReg
== 0) return false;
2262 unsigned CallOpc
= ARMSelectCallOp(Subtarget
->genLongCalls());
2263 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2264 DbgLoc
, TII
.get(CallOpc
));
2265 // BL / BLX don't take a predicate, but tBL / tBLX do.
2267 MIB
.add(predOps(ARMCC::AL
));
2268 if (Subtarget
->genLongCalls())
2269 MIB
.addReg(CalleeReg
);
2271 MIB
.addExternalSymbol(TLI
.getLibcallName(Call
));
2273 // Add implicit physical register uses to the call.
2274 for (Register R
: RegArgs
)
2275 MIB
.addReg(R
, RegState::Implicit
);
2277 // Add a register mask with the call-preserved registers.
2278 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2279 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2281 // Finish off the call including any return values.
2282 SmallVector
<Register
, 4> UsedRegs
;
2283 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, false)) return false;
2285 // Set all unused physreg defs as dead.
2286 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2291 bool ARMFastISel::SelectCall(const Instruction
*I
,
2292 const char *IntrMemName
= nullptr) {
2293 const CallInst
*CI
= cast
<CallInst
>(I
);
2294 const Value
*Callee
= CI
->getCalledValue();
2296 // Can't handle inline asm.
2297 if (isa
<InlineAsm
>(Callee
)) return false;
2299 // Allow SelectionDAG isel to handle tail calls.
2300 if (CI
->isTailCall()) return false;
2302 // Check the calling convention.
2303 ImmutableCallSite
CS(CI
);
2304 CallingConv::ID CC
= CS
.getCallingConv();
2306 // TODO: Avoid some calling conventions?
2308 FunctionType
*FTy
= CS
.getFunctionType();
2309 bool isVarArg
= FTy
->isVarArg();
2311 // Handle *simple* calls for now.
2312 Type
*RetTy
= I
->getType();
2314 if (RetTy
->isVoidTy())
2315 RetVT
= MVT::isVoid
;
2316 else if (!isTypeLegal(RetTy
, RetVT
) && RetVT
!= MVT::i16
&&
2317 RetVT
!= MVT::i8
&& RetVT
!= MVT::i1
)
2320 // Can't handle non-double multi-reg retvals.
2321 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i1
&& RetVT
!= MVT::i8
&&
2322 RetVT
!= MVT::i16
&& RetVT
!= MVT::i32
) {
2323 SmallVector
<CCValAssign
, 16> RVLocs
;
2324 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2325 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2326 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2330 // Set up the argument vectors.
2331 SmallVector
<Value
*, 8> Args
;
2332 SmallVector
<Register
, 8> ArgRegs
;
2333 SmallVector
<MVT
, 8> ArgVTs
;
2334 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2335 unsigned arg_size
= CS
.arg_size();
2336 Args
.reserve(arg_size
);
2337 ArgRegs
.reserve(arg_size
);
2338 ArgVTs
.reserve(arg_size
);
2339 ArgFlags
.reserve(arg_size
);
2340 for (ImmutableCallSite::arg_iterator i
= CS
.arg_begin(), e
= CS
.arg_end();
2342 // If we're lowering a memory intrinsic instead of a regular call, skip the
2343 // last argument, which shouldn't be passed to the underlying function.
2344 if (IntrMemName
&& e
- i
<= 1)
2347 ISD::ArgFlagsTy Flags
;
2348 unsigned ArgIdx
= i
- CS
.arg_begin();
2349 if (CS
.paramHasAttr(ArgIdx
, Attribute::SExt
))
2351 if (CS
.paramHasAttr(ArgIdx
, Attribute::ZExt
))
2354 // FIXME: Only handle *easy* calls for now.
2355 if (CS
.paramHasAttr(ArgIdx
, Attribute::InReg
) ||
2356 CS
.paramHasAttr(ArgIdx
, Attribute::StructRet
) ||
2357 CS
.paramHasAttr(ArgIdx
, Attribute::SwiftSelf
) ||
2358 CS
.paramHasAttr(ArgIdx
, Attribute::SwiftError
) ||
2359 CS
.paramHasAttr(ArgIdx
, Attribute::Nest
) ||
2360 CS
.paramHasAttr(ArgIdx
, Attribute::ByVal
))
2363 Type
*ArgTy
= (*i
)->getType();
2365 if (!isTypeLegal(ArgTy
, ArgVT
) && ArgVT
!= MVT::i16
&& ArgVT
!= MVT::i8
&&
2369 Register Arg
= getRegForValue(*i
);
2373 Flags
.setOrigAlign(Align(DL
.getABITypeAlignment(ArgTy
)));
2376 ArgRegs
.push_back(Arg
);
2377 ArgVTs
.push_back(ArgVT
);
2378 ArgFlags
.push_back(Flags
);
2381 // Handle the arguments now that we've gotten them.
2382 SmallVector
<Register
, 4> RegArgs
;
2384 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2385 RegArgs
, CC
, NumBytes
, isVarArg
))
2388 bool UseReg
= false;
2389 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Callee
);
2390 if (!GV
|| Subtarget
->genLongCalls()) UseReg
= true;
2395 CalleeReg
= getLibcallReg(IntrMemName
);
2397 CalleeReg
= getRegForValue(Callee
);
2399 if (CalleeReg
== 0) return false;
2403 unsigned CallOpc
= ARMSelectCallOp(UseReg
);
2404 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2405 DbgLoc
, TII
.get(CallOpc
));
2407 // ARM calls don't take a predicate, but tBL / tBLX do.
2409 MIB
.add(predOps(ARMCC::AL
));
2411 MIB
.addReg(CalleeReg
);
2412 else if (!IntrMemName
)
2413 MIB
.addGlobalAddress(GV
, 0, 0);
2415 MIB
.addExternalSymbol(IntrMemName
, 0);
2417 // Add implicit physical register uses to the call.
2418 for (Register R
: RegArgs
)
2419 MIB
.addReg(R
, RegState::Implicit
);
2421 // Add a register mask with the call-preserved registers.
2422 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2423 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2425 // Finish off the call including any return values.
2426 SmallVector
<Register
, 4> UsedRegs
;
2427 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, isVarArg
))
2430 // Set all unused physreg defs as dead.
2431 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2436 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len
) {
2440 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest
, Address Src
,
2441 uint64_t Len
, unsigned Alignment
) {
2442 // Make sure we don't bloat code by inlining very large memcpy's.
2443 if (!ARMIsMemCpySmall(Len
))
2448 if (!Alignment
|| Alignment
>= 4) {
2454 assert(Len
== 1 && "Expected a length of 1!");
2458 // Bound based on alignment.
2459 if (Len
>= 2 && Alignment
== 2)
2468 RV
= ARMEmitLoad(VT
, ResultReg
, Src
);
2469 assert(RV
&& "Should be able to handle this load.");
2470 RV
= ARMEmitStore(VT
, ResultReg
, Dest
);
2471 assert(RV
&& "Should be able to handle this store.");
2474 unsigned Size
= VT
.getSizeInBits()/8;
2476 Dest
.Offset
+= Size
;
2483 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst
&I
) {
2484 // FIXME: Handle more intrinsics.
2485 switch (I
.getIntrinsicID()) {
2486 default: return false;
2487 case Intrinsic::frameaddress
: {
2488 MachineFrameInfo
&MFI
= FuncInfo
.MF
->getFrameInfo();
2489 MFI
.setFrameAddressIsTaken(true);
2491 unsigned LdrOpc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
2492 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
2493 : &ARM::GPRRegClass
;
2495 const ARMBaseRegisterInfo
*RegInfo
=
2496 static_cast<const ARMBaseRegisterInfo
*>(Subtarget
->getRegisterInfo());
2497 Register FramePtr
= RegInfo
->getFrameRegister(*(FuncInfo
.MF
));
2498 unsigned SrcReg
= FramePtr
;
2500 // Recursively load frame address
2506 unsigned Depth
= cast
<ConstantInt
>(I
.getOperand(0))->getZExtValue();
2508 DestReg
= createResultReg(RC
);
2509 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2510 TII
.get(LdrOpc
), DestReg
)
2511 .addReg(SrcReg
).addImm(0));
2514 updateValueMap(&I
, SrcReg
);
2517 case Intrinsic::memcpy
:
2518 case Intrinsic::memmove
: {
2519 const MemTransferInst
&MTI
= cast
<MemTransferInst
>(I
);
2520 // Don't handle volatile.
2521 if (MTI
.isVolatile())
2524 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2525 // we would emit dead code because we don't currently handle memmoves.
2526 bool isMemCpy
= (I
.getIntrinsicID() == Intrinsic::memcpy
);
2527 if (isa
<ConstantInt
>(MTI
.getLength()) && isMemCpy
) {
2528 // Small memcpy's are common enough that we want to do them without a call
2530 uint64_t Len
= cast
<ConstantInt
>(MTI
.getLength())->getZExtValue();
2531 if (ARMIsMemCpySmall(Len
)) {
2533 if (!ARMComputeAddress(MTI
.getRawDest(), Dest
) ||
2534 !ARMComputeAddress(MTI
.getRawSource(), Src
))
2536 unsigned Alignment
= MinAlign(MTI
.getDestAlignment(),
2537 MTI
.getSourceAlignment());
2538 if (ARMTryEmitSmallMemCpy(Dest
, Src
, Len
, Alignment
))
2543 if (!MTI
.getLength()->getType()->isIntegerTy(32))
2546 if (MTI
.getSourceAddressSpace() > 255 || MTI
.getDestAddressSpace() > 255)
2549 const char *IntrMemName
= isa
<MemCpyInst
>(I
) ? "memcpy" : "memmove";
2550 return SelectCall(&I
, IntrMemName
);
2552 case Intrinsic::memset
: {
2553 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
2554 // Don't handle volatile.
2555 if (MSI
.isVolatile())
2558 if (!MSI
.getLength()->getType()->isIntegerTy(32))
2561 if (MSI
.getDestAddressSpace() > 255)
2564 return SelectCall(&I
, "memset");
2566 case Intrinsic::trap
: {
2567 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(
2568 Subtarget
->useNaClTrap() ? ARM::TRAPNaCl
: ARM::TRAP
));
2574 bool ARMFastISel::SelectTrunc(const Instruction
*I
) {
2575 // The high bits for a type smaller than the register size are assumed to be
2577 Value
*Op
= I
->getOperand(0);
2580 SrcVT
= TLI
.getValueType(DL
, Op
->getType(), true);
2581 DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2583 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
2585 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
2588 unsigned SrcReg
= getRegForValue(Op
);
2589 if (!SrcReg
) return false;
2591 // Because the high bits are undefined, a truncate doesn't generate
2593 updateValueMap(I
, SrcReg
);
2597 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
,
2599 if (DestVT
!= MVT::i32
&& DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
)
2601 if (SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i1
)
2604 // Table of which combinations can be emitted as a single instruction,
2605 // and which will require two.
2606 static const uint8_t isSingleInstrTbl
[3][2][2][2] = {
2608 // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2609 // ext: s z s z s z s z
2610 /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2611 /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2612 /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2615 // Target registers for:
2616 // - For ARM can never be PC.
2617 // - For 16-bit Thumb are restricted to lower 8 registers.
2618 // - For 32-bit Thumb are restricted to non-SP and non-PC.
2619 static const TargetRegisterClass
*RCTbl
[2][2] = {
2620 // Instructions: Two Single
2621 /* ARM */ { &ARM::GPRnopcRegClass
, &ARM::GPRnopcRegClass
},
2622 /* Thumb */ { &ARM::tGPRRegClass
, &ARM::rGPRRegClass
}
2625 // Table governing the instruction(s) to be emitted.
2626 static const struct InstructionTable
{
2628 uint32_t hasS
: 1; // Some instructions have an S bit, always set it to 0.
2629 uint32_t Shift
: 7; // For shift operand addressing mode, used by MOVsi.
2630 uint32_t Imm
: 8; // All instructions have either a shift or a mask.
2631 } IT
[2][2][3][2] = {
2632 { // Two instructions (first is left shift, second is in this table).
2633 { // ARM Opc S Shift Imm
2634 /* 1 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 31 },
2635 /* 1 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 31 } },
2636 /* 8 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 24 },
2637 /* 8 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 24 } },
2638 /* 16 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 16 },
2639 /* 16 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 16 } }
2641 { // Thumb Opc S Shift Imm
2642 /* 1 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 31 },
2643 /* 1 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 31 } },
2644 /* 8 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 24 },
2645 /* 8 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 24 } },
2646 /* 16 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 16 },
2647 /* 16 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 16 } }
2650 { // Single instruction.
2651 { // ARM Opc S Shift Imm
2652 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2653 /* 1 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 1 } },
2654 /* 8 bit sext */ { { ARM::SXTB
, 0, ARM_AM::no_shift
, 0 },
2655 /* 8 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 255 } },
2656 /* 16 bit sext */ { { ARM::SXTH
, 0, ARM_AM::no_shift
, 0 },
2657 /* 16 bit zext */ { ARM::UXTH
, 0, ARM_AM::no_shift
, 0 } }
2659 { // Thumb Opc S Shift Imm
2660 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2661 /* 1 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 1 } },
2662 /* 8 bit sext */ { { ARM::t2SXTB
, 0, ARM_AM::no_shift
, 0 },
2663 /* 8 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 255 } },
2664 /* 16 bit sext */ { { ARM::t2SXTH
, 0, ARM_AM::no_shift
, 0 },
2665 /* 16 bit zext */ { ARM::t2UXTH
, 0, ARM_AM::no_shift
, 0 } }
2670 unsigned SrcBits
= SrcVT
.getSizeInBits();
2671 unsigned DestBits
= DestVT
.getSizeInBits();
2673 assert((SrcBits
< DestBits
) && "can only extend to larger types");
2674 assert((DestBits
== 32 || DestBits
== 16 || DestBits
== 8) &&
2675 "other sizes unimplemented");
2676 assert((SrcBits
== 16 || SrcBits
== 8 || SrcBits
== 1) &&
2677 "other sizes unimplemented");
2679 bool hasV6Ops
= Subtarget
->hasV6Ops();
2680 unsigned Bitness
= SrcBits
/ 8; // {1,8,16}=>{0,1,2}
2681 assert((Bitness
< 3) && "sanity-check table bounds");
2683 bool isSingleInstr
= isSingleInstrTbl
[Bitness
][isThumb2
][hasV6Ops
][isZExt
];
2684 const TargetRegisterClass
*RC
= RCTbl
[isThumb2
][isSingleInstr
];
2685 const InstructionTable
*ITP
= &IT
[isSingleInstr
][isThumb2
][Bitness
][isZExt
];
2686 unsigned Opc
= ITP
->Opc
;
2687 assert(ARM::KILL
!= Opc
&& "Invalid table entry");
2688 unsigned hasS
= ITP
->hasS
;
2689 ARM_AM::ShiftOpc Shift
= (ARM_AM::ShiftOpc
) ITP
->Shift
;
2690 assert(((Shift
== ARM_AM::no_shift
) == (Opc
!= ARM::MOVsi
)) &&
2691 "only MOVsi has shift operand addressing mode");
2692 unsigned Imm
= ITP
->Imm
;
2694 // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2695 bool setsCPSR
= &ARM::tGPRRegClass
== RC
;
2696 unsigned LSLOpc
= isThumb2
? ARM::tLSLri
: ARM::MOVsi
;
2698 // MOVsi encodes shift and immediate in shift operand addressing mode.
2699 // The following condition has the same value when emitting two
2700 // instruction sequences: both are shifts.
2701 bool ImmIsSO
= (Shift
!= ARM_AM::no_shift
);
2703 // Either one or two instructions are emitted.
2704 // They're always of the form:
2706 // CPSR is set only by 16-bit Thumb instructions.
2707 // Predicate, if any, is AL.
2708 // S bit, if available, is always 0.
2709 // When two are emitted the first's result will feed as the second's input,
2710 // that value is then dead.
2711 unsigned NumInstrsEmitted
= isSingleInstr
? 1 : 2;
2712 for (unsigned Instr
= 0; Instr
!= NumInstrsEmitted
; ++Instr
) {
2713 ResultReg
= createResultReg(RC
);
2714 bool isLsl
= (0 == Instr
) && !isSingleInstr
;
2715 unsigned Opcode
= isLsl
? LSLOpc
: Opc
;
2716 ARM_AM::ShiftOpc ShiftAM
= isLsl
? ARM_AM::lsl
: Shift
;
2717 unsigned ImmEnc
= ImmIsSO
? ARM_AM::getSORegOpc(ShiftAM
, Imm
) : Imm
;
2718 bool isKill
= 1 == Instr
;
2719 MachineInstrBuilder MIB
= BuildMI(
2720 *FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opcode
), ResultReg
);
2722 MIB
.addReg(ARM::CPSR
, RegState::Define
);
2723 SrcReg
= constrainOperandRegClass(TII
.get(Opcode
), SrcReg
, 1 + setsCPSR
);
2724 MIB
.addReg(SrcReg
, isKill
* RegState::Kill
)
2726 .add(predOps(ARMCC::AL
));
2728 MIB
.add(condCodeOp());
2729 // Second instruction consumes the first's result.
2736 bool ARMFastISel::SelectIntExt(const Instruction
*I
) {
2737 // On ARM, in general, integer casts don't involve legal types; this code
2738 // handles promotable integers.
2739 Type
*DestTy
= I
->getType();
2740 Value
*Src
= I
->getOperand(0);
2741 Type
*SrcTy
= Src
->getType();
2743 bool isZExt
= isa
<ZExtInst
>(I
);
2744 unsigned SrcReg
= getRegForValue(Src
);
2745 if (!SrcReg
) return false;
2747 EVT SrcEVT
, DestEVT
;
2748 SrcEVT
= TLI
.getValueType(DL
, SrcTy
, true);
2749 DestEVT
= TLI
.getValueType(DL
, DestTy
, true);
2750 if (!SrcEVT
.isSimple()) return false;
2751 if (!DestEVT
.isSimple()) return false;
2753 MVT SrcVT
= SrcEVT
.getSimpleVT();
2754 MVT DestVT
= DestEVT
.getSimpleVT();
2755 unsigned ResultReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
, isZExt
);
2756 if (ResultReg
== 0) return false;
2757 updateValueMap(I
, ResultReg
);
2761 bool ARMFastISel::SelectShift(const Instruction
*I
,
2762 ARM_AM::ShiftOpc ShiftTy
) {
2763 // We handle thumb2 mode by target independent selector
2764 // or SelectionDAG ISel.
2768 // Only handle i32 now.
2769 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2770 if (DestVT
!= MVT::i32
)
2773 unsigned Opc
= ARM::MOVsr
;
2775 Value
*Src2Value
= I
->getOperand(1);
2776 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Src2Value
)) {
2777 ShiftImm
= CI
->getZExtValue();
2779 // Fall back to selection DAG isel if the shift amount
2780 // is zero or greater than the width of the value type.
2781 if (ShiftImm
== 0 || ShiftImm
>=32)
2787 Value
*Src1Value
= I
->getOperand(0);
2788 unsigned Reg1
= getRegForValue(Src1Value
);
2789 if (Reg1
== 0) return false;
2792 if (Opc
== ARM::MOVsr
) {
2793 Reg2
= getRegForValue(Src2Value
);
2794 if (Reg2
== 0) return false;
2797 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
2798 if(ResultReg
== 0) return false;
2800 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2801 TII
.get(Opc
), ResultReg
)
2804 if (Opc
== ARM::MOVsi
)
2805 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, ShiftImm
));
2806 else if (Opc
== ARM::MOVsr
) {
2808 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, 0));
2811 AddOptionalDefs(MIB
);
2812 updateValueMap(I
, ResultReg
);
2816 // TODO: SoftFP support.
2817 bool ARMFastISel::fastSelectInstruction(const Instruction
*I
) {
2818 switch (I
->getOpcode()) {
2819 case Instruction::Load
:
2820 return SelectLoad(I
);
2821 case Instruction::Store
:
2822 return SelectStore(I
);
2823 case Instruction::Br
:
2824 return SelectBranch(I
);
2825 case Instruction::IndirectBr
:
2826 return SelectIndirectBr(I
);
2827 case Instruction::ICmp
:
2828 case Instruction::FCmp
:
2829 return SelectCmp(I
);
2830 case Instruction::FPExt
:
2831 return SelectFPExt(I
);
2832 case Instruction::FPTrunc
:
2833 return SelectFPTrunc(I
);
2834 case Instruction::SIToFP
:
2835 return SelectIToFP(I
, /*isSigned*/ true);
2836 case Instruction::UIToFP
:
2837 return SelectIToFP(I
, /*isSigned*/ false);
2838 case Instruction::FPToSI
:
2839 return SelectFPToI(I
, /*isSigned*/ true);
2840 case Instruction::FPToUI
:
2841 return SelectFPToI(I
, /*isSigned*/ false);
2842 case Instruction::Add
:
2843 return SelectBinaryIntOp(I
, ISD::ADD
);
2844 case Instruction::Or
:
2845 return SelectBinaryIntOp(I
, ISD::OR
);
2846 case Instruction::Sub
:
2847 return SelectBinaryIntOp(I
, ISD::SUB
);
2848 case Instruction::FAdd
:
2849 return SelectBinaryFPOp(I
, ISD::FADD
);
2850 case Instruction::FSub
:
2851 return SelectBinaryFPOp(I
, ISD::FSUB
);
2852 case Instruction::FMul
:
2853 return SelectBinaryFPOp(I
, ISD::FMUL
);
2854 case Instruction::SDiv
:
2855 return SelectDiv(I
, /*isSigned*/ true);
2856 case Instruction::UDiv
:
2857 return SelectDiv(I
, /*isSigned*/ false);
2858 case Instruction::SRem
:
2859 return SelectRem(I
, /*isSigned*/ true);
2860 case Instruction::URem
:
2861 return SelectRem(I
, /*isSigned*/ false);
2862 case Instruction::Call
:
2863 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2864 return SelectIntrinsicCall(*II
);
2865 return SelectCall(I
);
2866 case Instruction::Select
:
2867 return SelectSelect(I
);
2868 case Instruction::Ret
:
2869 return SelectRet(I
);
2870 case Instruction::Trunc
:
2871 return SelectTrunc(I
);
2872 case Instruction::ZExt
:
2873 case Instruction::SExt
:
2874 return SelectIntExt(I
);
2875 case Instruction::Shl
:
2876 return SelectShift(I
, ARM_AM::lsl
);
2877 case Instruction::LShr
:
2878 return SelectShift(I
, ARM_AM::lsr
);
2879 case Instruction::AShr
:
2880 return SelectShift(I
, ARM_AM::asr
);
2886 // This table describes sign- and zero-extend instructions which can be
2887 // folded into a preceding load. All of these extends have an immediate
2888 // (sometimes a mask and sometimes a shift) that's applied after
2890 static const struct FoldableLoadExtendsStruct
{
2891 uint16_t Opc
[2]; // ARM, Thumb.
2892 uint8_t ExpectedImm
;
2894 uint8_t ExpectedVT
: 7;
2895 } FoldableLoadExtends
[] = {
2896 { { ARM::SXTH
, ARM::t2SXTH
}, 0, 0, MVT::i16
},
2897 { { ARM::UXTH
, ARM::t2UXTH
}, 0, 1, MVT::i16
},
2898 { { ARM::ANDri
, ARM::t2ANDri
}, 255, 1, MVT::i8
},
2899 { { ARM::SXTB
, ARM::t2SXTB
}, 0, 0, MVT::i8
},
2900 { { ARM::UXTB
, ARM::t2UXTB
}, 0, 1, MVT::i8
}
2903 /// The specified machine instr operand is a vreg, and that
2904 /// vreg is being provided by the specified load instruction. If possible,
2905 /// try to fold the load as an operand to the instruction, returning true if
2907 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
2908 const LoadInst
*LI
) {
2909 // Verify we have a legal type before going any further.
2911 if (!isLoadTypeLegal(LI
->getType(), VT
))
2914 // Combine load followed by zero- or sign-extend.
2915 // ldrb r1, [r0] ldrb r1, [r0]
2917 // mov r3, r2 mov r3, r1
2918 if (MI
->getNumOperands() < 3 || !MI
->getOperand(2).isImm())
2920 const uint64_t Imm
= MI
->getOperand(2).getImm();
2924 for (const FoldableLoadExtendsStruct
&FLE
: FoldableLoadExtends
) {
2925 if (FLE
.Opc
[isThumb2
] == MI
->getOpcode() &&
2926 (uint64_t)FLE
.ExpectedImm
== Imm
&&
2927 MVT((MVT::SimpleValueType
)FLE
.ExpectedVT
) == VT
) {
2929 isZExt
= FLE
.isZExt
;
2932 if (!Found
) return false;
2934 // See if we can handle this address.
2936 if (!ARMComputeAddress(LI
->getOperand(0), Addr
)) return false;
2938 Register ResultReg
= MI
->getOperand(0).getReg();
2939 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, LI
->getAlignment(), isZExt
, false))
2941 MachineBasicBlock::iterator
I(MI
);
2942 removeDeadCode(I
, std::next(I
));
2946 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue
*GV
,
2947 unsigned Align
, MVT VT
) {
2948 bool UseGOT_PREL
= !TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
);
2950 LLVMContext
*Context
= &MF
->getFunction().getContext();
2951 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2952 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2953 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(
2954 GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
,
2955 UseGOT_PREL
? ARMCP::GOT_PREL
: ARMCP::no_modifier
,
2956 /*AddCurrentAddress=*/UseGOT_PREL
);
2958 unsigned ConstAlign
=
2959 MF
->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context
));
2960 unsigned Idx
= MF
->getConstantPool()->getConstantPoolIndex(CPV
, ConstAlign
);
2961 MachineMemOperand
*CPMMO
=
2962 MF
->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF
),
2963 MachineMemOperand::MOLoad
, 4, 4);
2965 Register TempReg
= MF
->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass
);
2966 unsigned Opc
= isThumb2
? ARM::t2LDRpci
: ARM::LDRcp
;
2967 MachineInstrBuilder MIB
=
2968 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), TempReg
)
2969 .addConstantPoolIndex(Idx
)
2970 .addMemOperand(CPMMO
);
2971 if (Opc
== ARM::LDRcp
)
2973 MIB
.add(predOps(ARMCC::AL
));
2975 // Fix the address by adding pc.
2976 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2977 Opc
= Subtarget
->isThumb() ? ARM::tPICADD
: UseGOT_PREL
? ARM::PICLDR
2979 DestReg
= constrainOperandRegClass(TII
.get(Opc
), DestReg
, 0);
2980 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
2982 .addImm(ARMPCLabelIndex
);
2984 if (!Subtarget
->isThumb())
2985 MIB
.add(predOps(ARMCC::AL
));
2987 if (UseGOT_PREL
&& Subtarget
->isThumb()) {
2988 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2989 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2990 TII
.get(ARM::t2LDRi12
), NewDestReg
)
2993 DestReg
= NewDestReg
;
2994 AddOptionalDefs(MIB
);
2999 bool ARMFastISel::fastLowerArguments() {
3000 if (!FuncInfo
.CanLowerReturn
)
3003 const Function
*F
= FuncInfo
.Fn
;
3007 CallingConv::ID CC
= F
->getCallingConv();
3011 case CallingConv::Fast
:
3012 case CallingConv::C
:
3013 case CallingConv::ARM_AAPCS_VFP
:
3014 case CallingConv::ARM_AAPCS
:
3015 case CallingConv::ARM_APCS
:
3016 case CallingConv::Swift
:
3020 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3021 // which are passed in r0 - r3.
3022 for (const Argument
&Arg
: F
->args()) {
3023 if (Arg
.getArgNo() >= 4)
3026 if (Arg
.hasAttribute(Attribute::InReg
) ||
3027 Arg
.hasAttribute(Attribute::StructRet
) ||
3028 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3029 Arg
.hasAttribute(Attribute::SwiftError
) ||
3030 Arg
.hasAttribute(Attribute::ByVal
))
3033 Type
*ArgTy
= Arg
.getType();
3034 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3037 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3038 if (!ArgVT
.isSimple()) return false;
3039 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3049 static const MCPhysReg GPRArgRegs
[] = {
3050 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
3053 const TargetRegisterClass
*RC
= &ARM::rGPRRegClass
;
3054 for (const Argument
&Arg
: F
->args()) {
3055 unsigned ArgNo
= Arg
.getArgNo();
3056 unsigned SrcReg
= GPRArgRegs
[ArgNo
];
3057 unsigned DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3058 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3059 // Without this, EmitLiveInCopies may eliminate the livein if its only
3060 // use is a bitcast (which isn't turned into an instruction).
3061 unsigned ResultReg
= createResultReg(RC
);
3062 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3063 TII
.get(TargetOpcode::COPY
),
3064 ResultReg
).addReg(DstReg
, getKillRegState(true));
3065 updateValueMap(&Arg
, ResultReg
);
3073 FastISel
*ARM::createFastISel(FunctionLoweringInfo
&funcInfo
,
3074 const TargetLibraryInfo
*libInfo
) {
3075 if (funcInfo
.MF
->getSubtarget
<ARMSubtarget
>().useFastISel())
3076 return new ARMFastISel(funcInfo
, libInfo
);
3081 } // end namespace llvm