1 //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the ARM-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // ARMGenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSubtarget.h"
23 #include "MCTargetDesc/ARMAddressingModes.h"
24 #include "MCTargetDesc/ARMBaseInfo.h"
25 #include "Utils/ARMBaseInfo.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/CallingConvLower.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineInstr.h"
39 #include "llvm/CodeGen/MachineInstrBuilder.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/MachineOperand.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/CodeGen/TargetInstrInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetOpcodes.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/CodeGenTypes/MachineValueType.h"
49 #include "llvm/IR/Argument.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/CallingConv.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DerivedTypes.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/GetElementPtrTypeIterator.h"
58 #include "llvm/IR/GlobalValue.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/InstrTypes.h"
61 #include "llvm/IR/Instruction.h"
62 #include "llvm/IR/Instructions.h"
63 #include "llvm/IR/IntrinsicInst.h"
64 #include "llvm/IR/Intrinsics.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/ErrorHandling.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/Target/TargetMachine.h"
76 #include "llvm/Target/TargetOptions.h"
85 // All possible address modes, plus some.
99 // Innocuous defaults for our address.
105 class ARMFastISel final
: public FastISel
{
106 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
107 /// make the right decision when generating code for different targets.
108 const ARMSubtarget
*Subtarget
;
110 const TargetMachine
&TM
;
111 const TargetInstrInfo
&TII
;
112 const TargetLowering
&TLI
;
113 ARMFunctionInfo
*AFI
;
115 // Convenience variables to avoid some queries.
117 LLVMContext
*Context
;
120 explicit ARMFastISel(FunctionLoweringInfo
&funcInfo
,
121 const TargetLibraryInfo
*libInfo
)
122 : FastISel(funcInfo
, libInfo
),
123 Subtarget(&funcInfo
.MF
->getSubtarget
<ARMSubtarget
>()),
124 M(const_cast<Module
&>(*funcInfo
.Fn
->getParent())),
125 TM(funcInfo
.MF
->getTarget()), TII(*Subtarget
->getInstrInfo()),
126 TLI(*Subtarget
->getTargetLowering()) {
127 AFI
= funcInfo
.MF
->getInfo
<ARMFunctionInfo
>();
128 isThumb2
= AFI
->isThumbFunction();
129 Context
= &funcInfo
.Fn
->getContext();
133 // Code from FastISel.cpp.
135 unsigned fastEmitInst_r(unsigned MachineInstOpcode
,
136 const TargetRegisterClass
*RC
, unsigned Op0
);
137 unsigned fastEmitInst_rr(unsigned MachineInstOpcode
,
138 const TargetRegisterClass
*RC
,
139 unsigned Op0
, unsigned Op1
);
140 unsigned fastEmitInst_ri(unsigned MachineInstOpcode
,
141 const TargetRegisterClass
*RC
,
142 unsigned Op0
, uint64_t Imm
);
143 unsigned fastEmitInst_i(unsigned MachineInstOpcode
,
144 const TargetRegisterClass
*RC
,
147 // Backend specific FastISel code.
149 bool fastSelectInstruction(const Instruction
*I
) override
;
150 unsigned fastMaterializeConstant(const Constant
*C
) override
;
151 unsigned fastMaterializeAlloca(const AllocaInst
*AI
) override
;
152 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
153 const LoadInst
*LI
) override
;
154 bool fastLowerArguments() override
;
156 #include "ARMGenFastISel.inc"
158 // Instruction selection routines.
160 bool SelectLoad(const Instruction
*I
);
161 bool SelectStore(const Instruction
*I
);
162 bool SelectBranch(const Instruction
*I
);
163 bool SelectIndirectBr(const Instruction
*I
);
164 bool SelectCmp(const Instruction
*I
);
165 bool SelectFPExt(const Instruction
*I
);
166 bool SelectFPTrunc(const Instruction
*I
);
167 bool SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
);
168 bool SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
);
169 bool SelectIToFP(const Instruction
*I
, bool isSigned
);
170 bool SelectFPToI(const Instruction
*I
, bool isSigned
);
171 bool SelectDiv(const Instruction
*I
, bool isSigned
);
172 bool SelectRem(const Instruction
*I
, bool isSigned
);
173 bool SelectCall(const Instruction
*I
, const char *IntrMemName
);
174 bool SelectIntrinsicCall(const IntrinsicInst
&I
);
175 bool SelectSelect(const Instruction
*I
);
176 bool SelectRet(const Instruction
*I
);
177 bool SelectTrunc(const Instruction
*I
);
178 bool SelectIntExt(const Instruction
*I
);
179 bool SelectShift(const Instruction
*I
, ARM_AM::ShiftOpc ShiftTy
);
183 bool isPositionIndependent() const;
184 bool isTypeLegal(Type
*Ty
, MVT
&VT
);
185 bool isLoadTypeLegal(Type
*Ty
, MVT
&VT
);
186 bool ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
188 bool ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
189 MaybeAlign Alignment
= std::nullopt
, bool isZExt
= true,
190 bool allocReg
= true);
191 bool ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
192 MaybeAlign Alignment
= std::nullopt
);
193 bool ARMComputeAddress(const Value
*Obj
, Address
&Addr
);
194 void ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
);
195 bool ARMIsMemCpySmall(uint64_t Len
);
196 bool ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
,
197 MaybeAlign Alignment
);
198 unsigned ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
, bool isZExt
);
199 unsigned ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
);
200 unsigned ARMMaterializeInt(const Constant
*C
, MVT VT
);
201 unsigned ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
);
202 unsigned ARMMoveToFPReg(MVT VT
, unsigned SrcReg
);
203 unsigned ARMMoveToIntReg(MVT VT
, unsigned SrcReg
);
204 unsigned ARMSelectCallOp(bool UseReg
);
205 unsigned ARMLowerPICELF(const GlobalValue
*GV
, MVT VT
);
207 const TargetLowering
*getTargetLowering() { return &TLI
; }
209 // Call handling routines.
211 CCAssignFn
*CCAssignFnForCall(CallingConv::ID CC
,
214 bool ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
215 SmallVectorImpl
<Register
> &ArgRegs
,
216 SmallVectorImpl
<MVT
> &ArgVTs
,
217 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
218 SmallVectorImpl
<Register
> &RegArgs
,
222 unsigned getLibcallReg(const Twine
&Name
);
223 bool FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
224 const Instruction
*I
, CallingConv::ID CC
,
225 unsigned &NumBytes
, bool isVarArg
);
226 bool ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
);
228 // OptionalDef handling routines.
230 bool isARMNEONPred(const MachineInstr
*MI
);
231 bool DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
);
232 const MachineInstrBuilder
&AddOptionalDefs(const MachineInstrBuilder
&MIB
);
233 void AddLoadStoreOperands(MVT VT
, Address
&Addr
,
234 const MachineInstrBuilder
&MIB
,
235 MachineMemOperand::Flags Flags
, bool useAM3
);
238 } // end anonymous namespace
240 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
241 // we don't care about implicit defs here, just places we'll need to add a
242 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
243 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
) {
244 if (!MI
->hasOptionalDef())
247 // Look to see if our OptionalDef is defining CPSR or CCR.
248 for (const MachineOperand
&MO
: MI
->operands()) {
249 if (!MO
.isReg() || !MO
.isDef()) continue;
250 if (MO
.getReg() == ARM::CPSR
)
256 bool ARMFastISel::isARMNEONPred(const MachineInstr
*MI
) {
257 const MCInstrDesc
&MCID
= MI
->getDesc();
259 // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
260 if ((MCID
.TSFlags
& ARMII::DomainMask
) != ARMII::DomainNEON
||
261 AFI
->isThumb2Function())
262 return MI
->isPredicable();
264 for (const MCOperandInfo
&opInfo
: MCID
.operands())
265 if (opInfo
.isPredicate())
271 // If the machine is predicable go ahead and add the predicate operands, if
272 // it needs default CC operands add those.
273 // TODO: If we want to support thumb1 then we'll need to deal with optional
274 // CPSR defs that need to be added before the remaining operands. See s_cc_out
275 // for descriptions why.
276 const MachineInstrBuilder
&
277 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder
&MIB
) {
278 MachineInstr
*MI
= &*MIB
;
280 // Do we use a predicate? or...
281 // Are we NEON in ARM mode and have a predicate operand? If so, I know
282 // we're not predicable but add it anyways.
283 if (isARMNEONPred(MI
))
284 MIB
.add(predOps(ARMCC::AL
));
286 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
287 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
289 if (DefinesOptionalPredicate(MI
, &CPSR
))
290 MIB
.add(CPSR
? t1CondCodeOp() : condCodeOp());
294 unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode
,
295 const TargetRegisterClass
*RC
,
297 Register ResultReg
= createResultReg(RC
);
298 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
300 // Make sure the input operand is sufficiently constrained to be legal
301 // for this instruction.
302 Op0
= constrainOperandRegClass(II
, Op0
, 1);
303 if (II
.getNumDefs() >= 1) {
304 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
,
305 ResultReg
).addReg(Op0
));
307 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
309 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
310 TII
.get(TargetOpcode::COPY
), ResultReg
)
311 .addReg(II
.implicit_defs()[0]));
316 unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode
,
317 const TargetRegisterClass
*RC
,
318 unsigned Op0
, unsigned Op1
) {
319 Register ResultReg
= createResultReg(RC
);
320 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
322 // Make sure the input operands are sufficiently constrained to be legal
323 // for this instruction.
324 Op0
= constrainOperandRegClass(II
, Op0
, 1);
325 Op1
= constrainOperandRegClass(II
, Op1
, 2);
327 if (II
.getNumDefs() >= 1) {
329 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
, ResultReg
)
333 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
336 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
337 TII
.get(TargetOpcode::COPY
), ResultReg
)
338 .addReg(II
.implicit_defs()[0]));
343 unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode
,
344 const TargetRegisterClass
*RC
,
345 unsigned Op0
, uint64_t Imm
) {
346 Register ResultReg
= createResultReg(RC
);
347 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
349 // Make sure the input operand is sufficiently constrained to be legal
350 // for this instruction.
351 Op0
= constrainOperandRegClass(II
, Op0
, 1);
352 if (II
.getNumDefs() >= 1) {
354 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
, ResultReg
)
358 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
361 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
362 TII
.get(TargetOpcode::COPY
), ResultReg
)
363 .addReg(II
.implicit_defs()[0]));
368 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode
,
369 const TargetRegisterClass
*RC
,
371 Register ResultReg
= createResultReg(RC
);
372 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
374 if (II
.getNumDefs() >= 1) {
375 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
,
376 ResultReg
).addImm(Imm
));
378 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
380 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
381 TII
.get(TargetOpcode::COPY
), ResultReg
)
382 .addReg(II
.implicit_defs()[0]));
387 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
388 // checks from the various callers.
389 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT
, unsigned SrcReg
) {
390 if (VT
== MVT::f64
) return 0;
392 Register MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
393 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
394 TII
.get(ARM::VMOVSR
), MoveReg
)
399 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT
, unsigned SrcReg
) {
400 if (VT
== MVT::i64
) return 0;
402 Register MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
403 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
404 TII
.get(ARM::VMOVRS
), MoveReg
)
409 // For double width floating point we need to materialize two constants
410 // (the high and the low) into integer registers then use a move to get
411 // the combined constant into an FP reg.
412 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
413 const APFloat Val
= CFP
->getValueAPF();
414 bool is64bit
= VT
== MVT::f64
;
416 // This checks to see if we can use VFP3 instructions to materialize
417 // a constant, otherwise we have to go through the constant pool.
418 if (TLI
.isFPImmLegal(Val
, VT
)) {
422 Imm
= ARM_AM::getFP64Imm(Val
);
425 Imm
= ARM_AM::getFP32Imm(Val
);
428 Register DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
429 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
430 TII
.get(Opc
), DestReg
).addImm(Imm
));
434 // Require VFP2 for loading fp constants.
435 if (!Subtarget
->hasVFP2Base()) return false;
437 // MachineConstantPool wants an explicit alignment.
438 Align Alignment
= DL
.getPrefTypeAlign(CFP
->getType());
439 unsigned Idx
= MCP
.getConstantPoolIndex(cast
<Constant
>(CFP
), Alignment
);
440 Register DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
441 unsigned Opc
= is64bit
? ARM::VLDRD
: ARM::VLDRS
;
443 // The extra reg is for addrmode5.
445 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), DestReg
)
446 .addConstantPoolIndex(Idx
)
451 unsigned ARMFastISel::ARMMaterializeInt(const Constant
*C
, MVT VT
) {
452 if (VT
!= MVT::i32
&& VT
!= MVT::i16
&& VT
!= MVT::i8
&& VT
!= MVT::i1
)
455 // If we can do this in a single instruction without a constant pool entry
457 const ConstantInt
*CI
= cast
<ConstantInt
>(C
);
458 if (Subtarget
->hasV6T2Ops() && isUInt
<16>(CI
->getZExtValue())) {
459 unsigned Opc
= isThumb2
? ARM::t2MOVi16
: ARM::MOVi16
;
460 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
462 Register ImmReg
= createResultReg(RC
);
463 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
464 TII
.get(Opc
), ImmReg
)
465 .addImm(CI
->getZExtValue()));
469 // Use MVN to emit negative constants.
470 if (VT
== MVT::i32
&& Subtarget
->hasV6T2Ops() && CI
->isNegative()) {
471 unsigned Imm
= (unsigned)~(CI
->getSExtValue());
472 bool UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
473 (ARM_AM::getSOImmVal(Imm
) != -1);
475 unsigned Opc
= isThumb2
? ARM::t2MVNi
: ARM::MVNi
;
476 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
478 Register ImmReg
= createResultReg(RC
);
479 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
480 TII
.get(Opc
), ImmReg
)
486 unsigned ResultReg
= 0;
487 if (Subtarget
->useMovt())
488 ResultReg
= fastEmit_i(VT
, VT
, ISD::Constant
, CI
->getZExtValue());
493 // Load from constant pool. For now 32-bit only.
497 // MachineConstantPool wants an explicit alignment.
498 Align Alignment
= DL
.getPrefTypeAlign(C
->getType());
499 unsigned Idx
= MCP
.getConstantPoolIndex(C
, Alignment
);
500 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
502 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
503 TII
.get(ARM::t2LDRpci
), ResultReg
)
504 .addConstantPoolIndex(Idx
));
506 // The extra immediate is for addrmode2.
507 ResultReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), ResultReg
, 0);
508 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
509 TII
.get(ARM::LDRcp
), ResultReg
)
510 .addConstantPoolIndex(Idx
)
516 bool ARMFastISel::isPositionIndependent() const {
517 return TLI
.isPositionIndependent();
520 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
) {
521 // For now 32-bit only.
522 if (VT
!= MVT::i32
|| GV
->isThreadLocal()) return 0;
524 // ROPI/RWPI not currently supported.
525 if (Subtarget
->isROPI() || Subtarget
->isRWPI())
528 bool IsIndirect
= Subtarget
->isGVIndirectSymbol(GV
);
529 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
531 Register DestReg
= createResultReg(RC
);
533 // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
534 const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
);
535 bool IsThreadLocal
= GVar
&& GVar
->isThreadLocal();
536 if (!Subtarget
->isTargetMachO() && IsThreadLocal
) return 0;
538 bool IsPositionIndependent
= isPositionIndependent();
539 // Use movw+movt when possible, it avoids constant pool entries.
540 // Non-darwin targets only support static movt relocations in FastISel.
541 if (Subtarget
->useMovt() &&
542 (Subtarget
->isTargetMachO() || !IsPositionIndependent
)) {
544 unsigned char TF
= 0;
545 if (Subtarget
->isTargetMachO())
546 TF
= ARMII::MO_NONLAZY
;
548 if (IsPositionIndependent
)
549 Opc
= isThumb2
? ARM::t2MOV_ga_pcrel
: ARM::MOV_ga_pcrel
;
551 Opc
= isThumb2
? ARM::t2MOVi32imm
: ARM::MOVi32imm
;
552 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
553 TII
.get(Opc
), DestReg
).addGlobalAddress(GV
, 0, TF
));
555 // MachineConstantPool wants an explicit alignment.
556 Align Alignment
= DL
.getPrefTypeAlign(GV
->getType());
558 if (Subtarget
->isTargetELF() && IsPositionIndependent
)
559 return ARMLowerPICELF(GV
, VT
);
562 unsigned PCAdj
= IsPositionIndependent
? (Subtarget
->isThumb() ? 4 : 8) : 0;
563 unsigned Id
= AFI
->createPICLabelUId();
564 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(GV
, Id
,
567 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Alignment
);
570 MachineInstrBuilder MIB
;
572 unsigned Opc
= IsPositionIndependent
? ARM::t2LDRpci_pic
: ARM::t2LDRpci
;
573 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
),
574 DestReg
).addConstantPoolIndex(Idx
);
575 if (IsPositionIndependent
)
577 AddOptionalDefs(MIB
);
579 // The extra immediate is for addrmode2.
580 DestReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), DestReg
, 0);
581 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
582 TII
.get(ARM::LDRcp
), DestReg
)
583 .addConstantPoolIndex(Idx
)
585 AddOptionalDefs(MIB
);
587 if (IsPositionIndependent
) {
588 unsigned Opc
= IsIndirect
? ARM::PICLDR
: ARM::PICADD
;
589 Register NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
591 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
592 MIMD
, TII
.get(Opc
), NewDestReg
)
595 AddOptionalDefs(MIB
);
601 if ((Subtarget
->isTargetELF() && Subtarget
->isGVInGOT(GV
)) ||
602 (Subtarget
->isTargetMachO() && IsIndirect
)) {
603 MachineInstrBuilder MIB
;
604 Register NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
606 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
607 TII
.get(ARM::t2LDRi12
), NewDestReg
)
611 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
612 TII
.get(ARM::LDRi12
), NewDestReg
)
615 DestReg
= NewDestReg
;
616 AddOptionalDefs(MIB
);
622 unsigned ARMFastISel::fastMaterializeConstant(const Constant
*C
) {
623 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
625 // Only handle simple types.
626 if (!CEVT
.isSimple()) return 0;
627 MVT VT
= CEVT
.getSimpleVT();
629 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
630 return ARMMaterializeFP(CFP
, VT
);
631 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
632 return ARMMaterializeGV(GV
, VT
);
633 else if (isa
<ConstantInt
>(C
))
634 return ARMMaterializeInt(C
, VT
);
639 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
641 unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst
*AI
) {
642 // Don't handle dynamic allocas.
643 if (!FuncInfo
.StaticAllocaMap
.count(AI
)) return 0;
646 if (!isLoadTypeLegal(AI
->getType(), VT
)) return 0;
648 DenseMap
<const AllocaInst
*, int>::iterator SI
=
649 FuncInfo
.StaticAllocaMap
.find(AI
);
651 // This will get lowered later into the correct offsets and registers
652 // via rewriteXFrameIndex.
653 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
654 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
655 const TargetRegisterClass
* RC
= TLI
.getRegClassFor(VT
);
656 Register ResultReg
= createResultReg(RC
);
657 ResultReg
= constrainOperandRegClass(TII
.get(Opc
), ResultReg
, 0);
659 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
660 TII
.get(Opc
), ResultReg
)
661 .addFrameIndex(SI
->second
)
669 bool ARMFastISel::isTypeLegal(Type
*Ty
, MVT
&VT
) {
670 EVT evt
= TLI
.getValueType(DL
, Ty
, true);
672 // Only handle simple types.
673 if (evt
== MVT::Other
|| !evt
.isSimple()) return false;
674 VT
= evt
.getSimpleVT();
676 // Handle all legal types, i.e. a register that will directly hold this
678 return TLI
.isTypeLegal(VT
);
681 bool ARMFastISel::isLoadTypeLegal(Type
*Ty
, MVT
&VT
) {
682 if (isTypeLegal(Ty
, VT
)) return true;
684 // If this is a type than can be sign or zero-extended to a basic operation
685 // go ahead and accept it now.
686 if (VT
== MVT::i1
|| VT
== MVT::i8
|| VT
== MVT::i16
)
692 // Computes the address to get to an object.
693 bool ARMFastISel::ARMComputeAddress(const Value
*Obj
, Address
&Addr
) {
694 // Some boilerplate from the X86 FastISel.
695 const User
*U
= nullptr;
696 unsigned Opcode
= Instruction::UserOp1
;
697 if (const Instruction
*I
= dyn_cast
<Instruction
>(Obj
)) {
698 // Don't walk into other basic blocks unless the object is an alloca from
699 // another block, otherwise it may not have a virtual register assigned.
700 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(Obj
)) ||
701 FuncInfo
.getMBB(I
->getParent()) == FuncInfo
.MBB
) {
702 Opcode
= I
->getOpcode();
705 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(Obj
)) {
706 Opcode
= C
->getOpcode();
710 if (PointerType
*Ty
= dyn_cast
<PointerType
>(Obj
->getType()))
711 if (Ty
->getAddressSpace() > 255)
712 // Fast instruction selection doesn't support the special
719 case Instruction::BitCast
:
720 // Look through bitcasts.
721 return ARMComputeAddress(U
->getOperand(0), Addr
);
722 case Instruction::IntToPtr
:
723 // Look past no-op inttoptrs.
724 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
725 TLI
.getPointerTy(DL
))
726 return ARMComputeAddress(U
->getOperand(0), Addr
);
728 case Instruction::PtrToInt
:
729 // Look past no-op ptrtoints.
730 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
731 return ARMComputeAddress(U
->getOperand(0), Addr
);
733 case Instruction::GetElementPtr
: {
734 Address SavedAddr
= Addr
;
735 int TmpOffset
= Addr
.Offset
;
737 // Iterate through the GEP folding the constants into offsets where
739 gep_type_iterator GTI
= gep_type_begin(U
);
740 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
741 i
!= e
; ++i
, ++GTI
) {
742 const Value
*Op
= *i
;
743 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
744 const StructLayout
*SL
= DL
.getStructLayout(STy
);
745 unsigned Idx
= cast
<ConstantInt
>(Op
)->getZExtValue();
746 TmpOffset
+= SL
->getElementOffset(Idx
);
748 uint64_t S
= GTI
.getSequentialElementStride(DL
);
750 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
751 // Constant-offset addressing.
752 TmpOffset
+= CI
->getSExtValue() * S
;
755 if (canFoldAddIntoGEP(U
, Op
)) {
756 // A compatible add with a constant operand. Fold the constant.
758 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
759 TmpOffset
+= CI
->getSExtValue() * S
;
760 // Iterate on the other operand.
761 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
765 goto unsupported_gep
;
770 // Try to grab the base operand now.
771 Addr
.Offset
= TmpOffset
;
772 if (ARMComputeAddress(U
->getOperand(0), Addr
)) return true;
774 // We failed, restore everything and try the other options.
780 case Instruction::Alloca
: {
781 const AllocaInst
*AI
= cast
<AllocaInst
>(Obj
);
782 DenseMap
<const AllocaInst
*, int>::iterator SI
=
783 FuncInfo
.StaticAllocaMap
.find(AI
);
784 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
785 Addr
.BaseType
= Address::FrameIndexBase
;
786 Addr
.Base
.FI
= SI
->second
;
793 // Try to get this in a register if nothing else has worked.
794 if (Addr
.Base
.Reg
== 0) Addr
.Base
.Reg
= getRegForValue(Obj
);
795 return Addr
.Base
.Reg
!= 0;
798 void ARMFastISel::ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
) {
799 bool needsLowering
= false;
800 switch (VT
.SimpleTy
) {
801 default: llvm_unreachable("Unhandled load/store type!");
807 // Integer loads/stores handle 12-bit offsets.
808 needsLowering
= ((Addr
.Offset
& 0xfff) != Addr
.Offset
);
809 // Handle negative offsets.
810 if (needsLowering
&& isThumb2
)
811 needsLowering
= !(Subtarget
->hasV6T2Ops() && Addr
.Offset
< 0 &&
814 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
815 needsLowering
= (Addr
.Offset
> 255 || Addr
.Offset
< -255);
820 // Floating point operands handle 8-bit offsets.
821 needsLowering
= ((Addr
.Offset
& 0xff) != Addr
.Offset
);
825 // If this is a stack pointer and the offset needs to be simplified then
826 // put the alloca address into a register, set the base type back to
827 // register and continue. This should almost never happen.
828 if (needsLowering
&& Addr
.BaseType
== Address::FrameIndexBase
) {
829 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
831 Register ResultReg
= createResultReg(RC
);
832 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
833 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
834 TII
.get(Opc
), ResultReg
)
835 .addFrameIndex(Addr
.Base
.FI
)
837 Addr
.Base
.Reg
= ResultReg
;
838 Addr
.BaseType
= Address::RegBase
;
841 // Since the offset is too large for the load/store instruction
842 // get the reg+offset into a register.
844 Addr
.Base
.Reg
= fastEmit_ri_(MVT::i32
, ISD::ADD
, Addr
.Base
.Reg
,
845 Addr
.Offset
, MVT::i32
);
850 void ARMFastISel::AddLoadStoreOperands(MVT VT
, Address
&Addr
,
851 const MachineInstrBuilder
&MIB
,
852 MachineMemOperand::Flags Flags
,
854 // addrmode5 output depends on the selection dag addressing dividing the
855 // offset by 4 that it then later multiplies. Do this here as well.
856 if (VT
.SimpleTy
== MVT::f32
|| VT
.SimpleTy
== MVT::f64
)
859 // Frame base works a bit differently. Handle it separately.
860 if (Addr
.BaseType
== Address::FrameIndexBase
) {
861 int FI
= Addr
.Base
.FI
;
862 int Offset
= Addr
.Offset
;
863 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
864 MachinePointerInfo::getFixedStack(*FuncInfo
.MF
, FI
, Offset
), Flags
,
865 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
866 // Now add the rest of the operands.
867 MIB
.addFrameIndex(FI
);
869 // ARM halfword load/stores and signed byte loads need an additional
872 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
876 MIB
.addImm(Addr
.Offset
);
878 MIB
.addMemOperand(MMO
);
880 // Now add the rest of the operands.
881 MIB
.addReg(Addr
.Base
.Reg
);
883 // ARM halfword load/stores and signed byte loads need an additional
886 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
890 MIB
.addImm(Addr
.Offset
);
893 AddOptionalDefs(MIB
);
896 bool ARMFastISel::ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
897 MaybeAlign Alignment
, bool isZExt
,
901 bool needVMOV
= false;
902 const TargetRegisterClass
*RC
;
903 switch (VT
.SimpleTy
) {
904 // This is mostly going to be Neon/vector support.
905 default: return false;
909 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
910 Opc
= isZExt
? ARM::t2LDRBi8
: ARM::t2LDRSBi8
;
912 Opc
= isZExt
? ARM::t2LDRBi12
: ARM::t2LDRSBi12
;
921 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
924 if (Alignment
&& *Alignment
< Align(2) &&
925 !Subtarget
->allowsUnalignedMem())
929 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
930 Opc
= isZExt
? ARM::t2LDRHi8
: ARM::t2LDRSHi8
;
932 Opc
= isZExt
? ARM::t2LDRHi12
: ARM::t2LDRSHi12
;
934 Opc
= isZExt
? ARM::LDRH
: ARM::LDRSH
;
937 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
940 if (Alignment
&& *Alignment
< Align(4) &&
941 !Subtarget
->allowsUnalignedMem())
945 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
952 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
955 if (!Subtarget
->hasVFP2Base()) return false;
956 // Unaligned loads need special handling. Floats require word-alignment.
957 if (Alignment
&& *Alignment
< Align(4)) {
960 Opc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
961 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
964 RC
= TLI
.getRegClassFor(VT
);
968 // Can load and store double precision even without FeatureFP64
969 if (!Subtarget
->hasVFP2Base()) return false;
970 // FIXME: Unaligned loads need special handling. Doublewords require
972 if (Alignment
&& *Alignment
< Align(4))
976 RC
= TLI
.getRegClassFor(VT
);
979 // Simplify this down to something we can handle.
980 ARMSimplifyAddress(Addr
, VT
, useAM3
);
982 // Create the base instruction, then add the operands.
984 ResultReg
= createResultReg(RC
);
985 assert(ResultReg
> 255 && "Expected an allocated virtual register.");
986 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
987 TII
.get(Opc
), ResultReg
);
988 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOLoad
, useAM3
);
990 // If we had an unaligned load of a float we've converted it to an regular
991 // load. Now we must move from the GRP to the FP register.
993 Register MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
994 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
995 TII
.get(ARM::VMOVSR
), MoveReg
)
1002 bool ARMFastISel::SelectLoad(const Instruction
*I
) {
1003 // Atomic loads need special handling.
1004 if (cast
<LoadInst
>(I
)->isAtomic())
1007 const Value
*SV
= I
->getOperand(0);
1008 if (TLI
.supportSwiftError()) {
1009 // Swifterror values can come from either a function parameter with
1010 // swifterror attribute or an alloca with swifterror attribute.
1011 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1012 if (Arg
->hasSwiftErrorAttr())
1016 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1017 if (Alloca
->isSwiftError())
1022 // Verify we have a legal type before going any further.
1024 if (!isLoadTypeLegal(I
->getType(), VT
))
1027 // See if we can handle this address.
1029 if (!ARMComputeAddress(I
->getOperand(0), Addr
)) return false;
1032 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, cast
<LoadInst
>(I
)->getAlign()))
1034 updateValueMap(I
, ResultReg
);
1038 bool ARMFastISel::ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
1039 MaybeAlign Alignment
) {
1041 bool useAM3
= false;
1042 switch (VT
.SimpleTy
) {
1043 // This is mostly going to be Neon/vector support.
1044 default: return false;
1046 Register Res
= createResultReg(isThumb2
? &ARM::tGPRRegClass
1047 : &ARM::GPRRegClass
);
1048 unsigned Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
1049 SrcReg
= constrainOperandRegClass(TII
.get(Opc
), SrcReg
, 1);
1050 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1052 .addReg(SrcReg
).addImm(1));
1058 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1059 StrOpc
= ARM::t2STRBi8
;
1061 StrOpc
= ARM::t2STRBi12
;
1063 StrOpc
= ARM::STRBi12
;
1067 if (Alignment
&& *Alignment
< Align(2) &&
1068 !Subtarget
->allowsUnalignedMem())
1072 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1073 StrOpc
= ARM::t2STRHi8
;
1075 StrOpc
= ARM::t2STRHi12
;
1082 if (Alignment
&& *Alignment
< Align(4) &&
1083 !Subtarget
->allowsUnalignedMem())
1087 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1088 StrOpc
= ARM::t2STRi8
;
1090 StrOpc
= ARM::t2STRi12
;
1092 StrOpc
= ARM::STRi12
;
1096 if (!Subtarget
->hasVFP2Base()) return false;
1097 // Unaligned stores need special handling. Floats require word-alignment.
1098 if (Alignment
&& *Alignment
< Align(4)) {
1099 Register MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1100 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1101 TII
.get(ARM::VMOVRS
), MoveReg
)
1105 StrOpc
= isThumb2
? ARM::t2STRi12
: ARM::STRi12
;
1107 StrOpc
= ARM::VSTRS
;
1111 // Can load and store double precision even without FeatureFP64
1112 if (!Subtarget
->hasVFP2Base()) return false;
1113 // FIXME: Unaligned stores need special handling. Doublewords require
1115 if (Alignment
&& *Alignment
< Align(4))
1118 StrOpc
= ARM::VSTRD
;
1121 // Simplify this down to something we can handle.
1122 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1124 // Create the base instruction, then add the operands.
1125 SrcReg
= constrainOperandRegClass(TII
.get(StrOpc
), SrcReg
, 0);
1126 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1129 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOStore
, useAM3
);
1133 bool ARMFastISel::SelectStore(const Instruction
*I
) {
1134 Value
*Op0
= I
->getOperand(0);
1135 unsigned SrcReg
= 0;
1137 // Atomic stores need special handling.
1138 if (cast
<StoreInst
>(I
)->isAtomic())
1141 const Value
*PtrV
= I
->getOperand(1);
1142 if (TLI
.supportSwiftError()) {
1143 // Swifterror values can come from either a function parameter with
1144 // swifterror attribute or an alloca with swifterror attribute.
1145 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1146 if (Arg
->hasSwiftErrorAttr())
1150 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1151 if (Alloca
->isSwiftError())
1156 // Verify we have a legal type before going any further.
1158 if (!isLoadTypeLegal(I
->getOperand(0)->getType(), VT
))
1161 // Get the value to be stored into a register.
1162 SrcReg
= getRegForValue(Op0
);
1163 if (SrcReg
== 0) return false;
1165 // See if we can handle this address.
1167 if (!ARMComputeAddress(I
->getOperand(1), Addr
))
1170 if (!ARMEmitStore(VT
, SrcReg
, Addr
, cast
<StoreInst
>(I
)->getAlign()))
1175 static ARMCC::CondCodes
getComparePred(CmpInst::Predicate Pred
) {
1177 // Needs two compares...
1178 case CmpInst::FCMP_ONE
:
1179 case CmpInst::FCMP_UEQ
:
1181 // AL is our "false" for now. The other two need more compares.
1183 case CmpInst::ICMP_EQ
:
1184 case CmpInst::FCMP_OEQ
:
1186 case CmpInst::ICMP_SGT
:
1187 case CmpInst::FCMP_OGT
:
1189 case CmpInst::ICMP_SGE
:
1190 case CmpInst::FCMP_OGE
:
1192 case CmpInst::ICMP_UGT
:
1193 case CmpInst::FCMP_UGT
:
1195 case CmpInst::FCMP_OLT
:
1197 case CmpInst::ICMP_ULE
:
1198 case CmpInst::FCMP_OLE
:
1200 case CmpInst::FCMP_ORD
:
1202 case CmpInst::FCMP_UNO
:
1204 case CmpInst::FCMP_UGE
:
1206 case CmpInst::ICMP_SLT
:
1207 case CmpInst::FCMP_ULT
:
1209 case CmpInst::ICMP_SLE
:
1210 case CmpInst::FCMP_ULE
:
1212 case CmpInst::FCMP_UNE
:
1213 case CmpInst::ICMP_NE
:
1215 case CmpInst::ICMP_UGE
:
1217 case CmpInst::ICMP_ULT
:
1222 bool ARMFastISel::SelectBranch(const Instruction
*I
) {
1223 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1224 MachineBasicBlock
*TBB
= FuncInfo
.getMBB(BI
->getSuccessor(0));
1225 MachineBasicBlock
*FBB
= FuncInfo
.getMBB(BI
->getSuccessor(1));
1227 // Simple branch support.
1229 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1231 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1232 if (CI
->hasOneUse() && (CI
->getParent() == I
->getParent())) {
1233 // Get the compare predicate.
1234 // Try to take advantage of fallthrough opportunities.
1235 CmpInst::Predicate Predicate
= CI
->getPredicate();
1236 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1237 std::swap(TBB
, FBB
);
1238 Predicate
= CmpInst::getInversePredicate(Predicate
);
1241 ARMCC::CondCodes ARMPred
= getComparePred(Predicate
);
1243 // We may not handle every CC for now.
1244 if (ARMPred
== ARMCC::AL
) return false;
1246 // Emit the compare.
1247 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1250 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1251 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(BrOpc
))
1252 .addMBB(TBB
).addImm(ARMPred
).addReg(ARM::CPSR
);
1253 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1256 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1258 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1259 (isLoadTypeLegal(TI
->getOperand(0)->getType(), SourceVT
))) {
1260 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1261 Register OpReg
= getRegForValue(TI
->getOperand(0));
1262 OpReg
= constrainOperandRegClass(TII
.get(TstOpc
), OpReg
, 0);
1263 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1265 .addReg(OpReg
).addImm(1));
1267 unsigned CCMode
= ARMCC::NE
;
1268 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1269 std::swap(TBB
, FBB
);
1273 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1274 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(BrOpc
))
1275 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1277 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1280 } else if (const ConstantInt
*CI
=
1281 dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1282 uint64_t Imm
= CI
->getZExtValue();
1283 MachineBasicBlock
*Target
= (Imm
== 0) ? FBB
: TBB
;
1284 fastEmitBranch(Target
, MIMD
.getDL());
1288 Register CmpReg
= getRegForValue(BI
->getCondition());
1289 if (CmpReg
== 0) return false;
1291 // We've been divorced from our compare! Our block was split, and
1292 // now our compare lives in a predecessor block. We musn't
1293 // re-compare here, as the children of the compare aren't guaranteed
1294 // live across the block boundary (we *could* check for this).
1295 // Regardless, the compare has been done in the predecessor block,
1296 // and it left a value for us in a virtual register. Ergo, we test
1297 // the one-bit value left in the virtual register.
1298 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1299 CmpReg
= constrainOperandRegClass(TII
.get(TstOpc
), CmpReg
, 0);
1301 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TstOpc
))
1305 unsigned CCMode
= ARMCC::NE
;
1306 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1307 std::swap(TBB
, FBB
);
1311 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1312 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(BrOpc
))
1313 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1314 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1318 bool ARMFastISel::SelectIndirectBr(const Instruction
*I
) {
1319 Register AddrReg
= getRegForValue(I
->getOperand(0));
1320 if (AddrReg
== 0) return false;
1322 unsigned Opc
= isThumb2
? ARM::tBRIND
: ARM::BX
;
1323 assert(isThumb2
|| Subtarget
->hasV4TOps());
1325 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1326 TII
.get(Opc
)).addReg(AddrReg
));
1328 const IndirectBrInst
*IB
= cast
<IndirectBrInst
>(I
);
1329 for (const BasicBlock
*SuccBB
: IB
->successors())
1330 FuncInfo
.MBB
->addSuccessor(FuncInfo
.getMBB(SuccBB
));
1335 bool ARMFastISel::ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
1337 Type
*Ty
= Src1Value
->getType();
1338 EVT SrcEVT
= TLI
.getValueType(DL
, Ty
, true);
1339 if (!SrcEVT
.isSimple()) return false;
1340 MVT SrcVT
= SrcEVT
.getSimpleVT();
1342 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1345 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1348 // Check to see if the 2nd operand is a constant that we can encode directly
1351 bool UseImm
= false;
1352 bool isNegativeImm
= false;
1353 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1354 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1355 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Src2Value
)) {
1356 if (SrcVT
== MVT::i32
|| SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
||
1358 const APInt
&CIVal
= ConstInt
->getValue();
1359 Imm
= (isZExt
) ? (int)CIVal
.getZExtValue() : (int)CIVal
.getSExtValue();
1360 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1361 // then a cmn, because there is no way to represent 2147483648 as a
1362 // signed 32-bit int.
1363 if (Imm
< 0 && Imm
!= (int)0x80000000) {
1364 isNegativeImm
= true;
1367 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1368 (ARM_AM::getSOImmVal(Imm
) != -1);
1370 } else if (const ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Src2Value
)) {
1371 if (SrcVT
== MVT::f32
|| SrcVT
== MVT::f64
)
1372 if (ConstFP
->isZero() && !ConstFP
->isNegative())
1378 bool needsExt
= false;
1379 switch (SrcVT
.SimpleTy
) {
1380 default: return false;
1381 // TODO: Verify compares.
1384 CmpOpc
= UseImm
? ARM::VCMPZS
: ARM::VCMPS
;
1388 CmpOpc
= UseImm
? ARM::VCMPZD
: ARM::VCMPD
;
1398 CmpOpc
= ARM::t2CMPrr
;
1400 CmpOpc
= isNegativeImm
? ARM::t2CMNri
: ARM::t2CMPri
;
1403 CmpOpc
= ARM::CMPrr
;
1405 CmpOpc
= isNegativeImm
? ARM::CMNri
: ARM::CMPri
;
1410 Register SrcReg1
= getRegForValue(Src1Value
);
1411 if (SrcReg1
== 0) return false;
1413 unsigned SrcReg2
= 0;
1415 SrcReg2
= getRegForValue(Src2Value
);
1416 if (SrcReg2
== 0) return false;
1419 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1421 SrcReg1
= ARMEmitIntExt(SrcVT
, SrcReg1
, MVT::i32
, isZExt
);
1422 if (SrcReg1
== 0) return false;
1424 SrcReg2
= ARMEmitIntExt(SrcVT
, SrcReg2
, MVT::i32
, isZExt
);
1425 if (SrcReg2
== 0) return false;
1429 const MCInstrDesc
&II
= TII
.get(CmpOpc
);
1430 SrcReg1
= constrainOperandRegClass(II
, SrcReg1
, 0);
1432 SrcReg2
= constrainOperandRegClass(II
, SrcReg2
, 1);
1433 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
1434 .addReg(SrcReg1
).addReg(SrcReg2
));
1436 MachineInstrBuilder MIB
;
1437 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, II
)
1440 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1443 AddOptionalDefs(MIB
);
1446 // For floating point we need to move the result to a comparison register
1447 // that we can then use for branches.
1448 if (Ty
->isFloatTy() || Ty
->isDoubleTy())
1449 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1450 TII
.get(ARM::FMSTAT
)));
1454 bool ARMFastISel::SelectCmp(const Instruction
*I
) {
1455 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1457 // Get the compare predicate.
1458 ARMCC::CondCodes ARMPred
= getComparePred(CI
->getPredicate());
1460 // We may not handle every CC for now.
1461 if (ARMPred
== ARMCC::AL
) return false;
1463 // Emit the compare.
1464 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1467 // Now set a register based on the comparison. Explicitly set the predicates
1469 unsigned MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1470 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
1471 : &ARM::GPRRegClass
;
1472 Register DestReg
= createResultReg(RC
);
1473 Constant
*Zero
= ConstantInt::get(Type::getInt32Ty(*Context
), 0);
1474 unsigned ZeroReg
= fastMaterializeConstant(Zero
);
1475 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1476 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(MovCCOpc
), DestReg
)
1477 .addReg(ZeroReg
).addImm(1)
1478 .addImm(ARMPred
).addReg(ARM::CPSR
);
1480 updateValueMap(I
, DestReg
);
1484 bool ARMFastISel::SelectFPExt(const Instruction
*I
) {
1485 // Make sure we have VFP and that we're extending float to double.
1486 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1488 Value
*V
= I
->getOperand(0);
1489 if (!I
->getType()->isDoubleTy() ||
1490 !V
->getType()->isFloatTy()) return false;
1492 Register Op
= getRegForValue(V
);
1493 if (Op
== 0) return false;
1495 Register Result
= createResultReg(&ARM::DPRRegClass
);
1496 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1497 TII
.get(ARM::VCVTDS
), Result
)
1499 updateValueMap(I
, Result
);
1503 bool ARMFastISel::SelectFPTrunc(const Instruction
*I
) {
1504 // Make sure we have VFP and that we're truncating double to float.
1505 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1507 Value
*V
= I
->getOperand(0);
1508 if (!(I
->getType()->isFloatTy() &&
1509 V
->getType()->isDoubleTy())) return false;
1511 Register Op
= getRegForValue(V
);
1512 if (Op
== 0) return false;
1514 Register Result
= createResultReg(&ARM::SPRRegClass
);
1515 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1516 TII
.get(ARM::VCVTSD
), Result
)
1518 updateValueMap(I
, Result
);
1522 bool ARMFastISel::SelectIToFP(const Instruction
*I
, bool isSigned
) {
1523 // Make sure we have VFP.
1524 if (!Subtarget
->hasVFP2Base()) return false;
1527 Type
*Ty
= I
->getType();
1528 if (!isTypeLegal(Ty
, DstVT
))
1531 Value
*Src
= I
->getOperand(0);
1532 EVT SrcEVT
= TLI
.getValueType(DL
, Src
->getType(), true);
1533 if (!SrcEVT
.isSimple())
1535 MVT SrcVT
= SrcEVT
.getSimpleVT();
1536 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
1539 Register SrcReg
= getRegForValue(Src
);
1540 if (SrcReg
== 0) return false;
1542 // Handle sign-extension.
1543 if (SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
) {
1544 SrcReg
= ARMEmitIntExt(SrcVT
, SrcReg
, MVT::i32
,
1545 /*isZExt*/!isSigned
);
1546 if (SrcReg
== 0) return false;
1549 // The conversion routine works on fp-reg to fp-reg and the operand above
1550 // was an integer, move it to the fp registers if possible.
1551 unsigned FP
= ARMMoveToFPReg(MVT::f32
, SrcReg
);
1552 if (FP
== 0) return false;
1555 if (Ty
->isFloatTy()) Opc
= isSigned
? ARM::VSITOS
: ARM::VUITOS
;
1556 else if (Ty
->isDoubleTy() && Subtarget
->hasFP64())
1557 Opc
= isSigned
? ARM::VSITOD
: ARM::VUITOD
;
1560 Register ResultReg
= createResultReg(TLI
.getRegClassFor(DstVT
));
1561 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1562 TII
.get(Opc
), ResultReg
).addReg(FP
));
1563 updateValueMap(I
, ResultReg
);
1567 bool ARMFastISel::SelectFPToI(const Instruction
*I
, bool isSigned
) {
1568 // Make sure we have VFP.
1569 if (!Subtarget
->hasVFP2Base()) return false;
1572 Type
*RetTy
= I
->getType();
1573 if (!isTypeLegal(RetTy
, DstVT
))
1576 Register Op
= getRegForValue(I
->getOperand(0));
1577 if (Op
== 0) return false;
1580 Type
*OpTy
= I
->getOperand(0)->getType();
1581 if (OpTy
->isFloatTy()) Opc
= isSigned
? ARM::VTOSIZS
: ARM::VTOUIZS
;
1582 else if (OpTy
->isDoubleTy() && Subtarget
->hasFP64())
1583 Opc
= isSigned
? ARM::VTOSIZD
: ARM::VTOUIZD
;
1586 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1587 Register ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1588 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1589 TII
.get(Opc
), ResultReg
).addReg(Op
));
1591 // This result needs to be in an integer register, but the conversion only
1592 // takes place in fp-regs.
1593 unsigned IntReg
= ARMMoveToIntReg(DstVT
, ResultReg
);
1594 if (IntReg
== 0) return false;
1596 updateValueMap(I
, IntReg
);
1600 bool ARMFastISel::SelectSelect(const Instruction
*I
) {
1602 if (!isTypeLegal(I
->getType(), VT
))
1605 // Things need to be register sized for register moves.
1606 if (VT
!= MVT::i32
) return false;
1608 Register CondReg
= getRegForValue(I
->getOperand(0));
1609 if (CondReg
== 0) return false;
1610 Register Op1Reg
= getRegForValue(I
->getOperand(1));
1611 if (Op1Reg
== 0) return false;
1613 // Check to see if we can use an immediate in the conditional move.
1615 bool UseImm
= false;
1616 bool isNegativeImm
= false;
1617 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(I
->getOperand(2))) {
1618 assert(VT
== MVT::i32
&& "Expecting an i32.");
1619 Imm
= (int)ConstInt
->getValue().getZExtValue();
1621 isNegativeImm
= true;
1624 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1625 (ARM_AM::getSOImmVal(Imm
) != -1);
1628 unsigned Op2Reg
= 0;
1630 Op2Reg
= getRegForValue(I
->getOperand(2));
1631 if (Op2Reg
== 0) return false;
1634 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1635 CondReg
= constrainOperandRegClass(TII
.get(TstOpc
), CondReg
, 0);
1637 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(TstOpc
))
1642 const TargetRegisterClass
*RC
;
1644 RC
= isThumb2
? &ARM::tGPRRegClass
: &ARM::GPRRegClass
;
1645 MovCCOpc
= isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
;
1647 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRRegClass
;
1649 MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1651 MovCCOpc
= isThumb2
? ARM::t2MVNCCi
: ARM::MVNCCi
;
1653 Register ResultReg
= createResultReg(RC
);
1655 Op2Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op2Reg
, 1);
1656 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 2);
1657 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(MovCCOpc
),
1664 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 1);
1665 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(MovCCOpc
),
1672 updateValueMap(I
, ResultReg
);
1676 bool ARMFastISel::SelectDiv(const Instruction
*I
, bool isSigned
) {
1678 Type
*Ty
= I
->getType();
1679 if (!isTypeLegal(Ty
, VT
))
1682 // If we have integer div support we should have selected this automagically.
1683 // In case we have a real miss go ahead and return false and we'll pick
1685 if (Subtarget
->hasDivideInThumbMode())
1688 // Otherwise emit a libcall.
1689 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1691 LC
= isSigned
? RTLIB::SDIV_I8
: RTLIB::UDIV_I8
;
1692 else if (VT
== MVT::i16
)
1693 LC
= isSigned
? RTLIB::SDIV_I16
: RTLIB::UDIV_I16
;
1694 else if (VT
== MVT::i32
)
1695 LC
= isSigned
? RTLIB::SDIV_I32
: RTLIB::UDIV_I32
;
1696 else if (VT
== MVT::i64
)
1697 LC
= isSigned
? RTLIB::SDIV_I64
: RTLIB::UDIV_I64
;
1698 else if (VT
== MVT::i128
)
1699 LC
= isSigned
? RTLIB::SDIV_I128
: RTLIB::UDIV_I128
;
1700 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SDIV!");
1702 return ARMEmitLibcall(I
, LC
);
1705 bool ARMFastISel::SelectRem(const Instruction
*I
, bool isSigned
) {
1707 Type
*Ty
= I
->getType();
1708 if (!isTypeLegal(Ty
, VT
))
1711 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1712 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1713 // multi-reg returns, we'll have to bail out.
1714 if (!TLI
.hasStandaloneRem(VT
)) {
1718 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1720 LC
= isSigned
? RTLIB::SREM_I8
: RTLIB::UREM_I8
;
1721 else if (VT
== MVT::i16
)
1722 LC
= isSigned
? RTLIB::SREM_I16
: RTLIB::UREM_I16
;
1723 else if (VT
== MVT::i32
)
1724 LC
= isSigned
? RTLIB::SREM_I32
: RTLIB::UREM_I32
;
1725 else if (VT
== MVT::i64
)
1726 LC
= isSigned
? RTLIB::SREM_I64
: RTLIB::UREM_I64
;
1727 else if (VT
== MVT::i128
)
1728 LC
= isSigned
? RTLIB::SREM_I128
: RTLIB::UREM_I128
;
1729 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SREM!");
1731 return ARMEmitLibcall(I
, LC
);
1734 bool ARMFastISel::SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
) {
1735 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
1737 // We can get here in the case when we have a binary operation on a non-legal
1738 // type and the target independent selector doesn't know how to handle it.
1739 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
1743 switch (ISDOpcode
) {
1744 default: return false;
1746 Opc
= isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
;
1749 Opc
= isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
;
1752 Opc
= isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
;
1756 Register SrcReg1
= getRegForValue(I
->getOperand(0));
1757 if (SrcReg1
== 0) return false;
1759 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1760 // in the instruction, rather then materializing the value in a register.
1761 Register SrcReg2
= getRegForValue(I
->getOperand(1));
1762 if (SrcReg2
== 0) return false;
1764 Register ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
1765 SrcReg1
= constrainOperandRegClass(TII
.get(Opc
), SrcReg1
, 1);
1766 SrcReg2
= constrainOperandRegClass(TII
.get(Opc
), SrcReg2
, 2);
1767 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1768 TII
.get(Opc
), ResultReg
)
1769 .addReg(SrcReg1
).addReg(SrcReg2
));
1770 updateValueMap(I
, ResultReg
);
1774 bool ARMFastISel::SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
) {
1775 EVT FPVT
= TLI
.getValueType(DL
, I
->getType(), true);
1776 if (!FPVT
.isSimple()) return false;
1777 MVT VT
= FPVT
.getSimpleVT();
1779 // FIXME: Support vector types where possible.
1783 // We can get here in the case when we want to use NEON for our fp
1784 // operations, but can't figure out how to. Just use the vfp instructions
1786 // FIXME: It'd be nice to use NEON instructions.
1787 Type
*Ty
= I
->getType();
1788 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1790 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1794 bool is64bit
= VT
== MVT::f64
|| VT
== MVT::i64
;
1795 switch (ISDOpcode
) {
1796 default: return false;
1798 Opc
= is64bit
? ARM::VADDD
: ARM::VADDS
;
1801 Opc
= is64bit
? ARM::VSUBD
: ARM::VSUBS
;
1804 Opc
= is64bit
? ARM::VMULD
: ARM::VMULS
;
1807 Register Op1
= getRegForValue(I
->getOperand(0));
1808 if (Op1
== 0) return false;
1810 Register Op2
= getRegForValue(I
->getOperand(1));
1811 if (Op2
== 0) return false;
1813 Register ResultReg
= createResultReg(TLI
.getRegClassFor(VT
.SimpleTy
));
1814 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1815 TII
.get(Opc
), ResultReg
)
1816 .addReg(Op1
).addReg(Op2
));
1817 updateValueMap(I
, ResultReg
);
1821 // Call Handling Code
1823 // This is largely taken directly from CCAssignFnForNode
1824 // TODO: We may not support all of this.
1825 CCAssignFn
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC
,
1830 report_fatal_error("Unsupported calling convention");
1831 case CallingConv::Fast
:
1832 if (Subtarget
->hasVFP2Base() && !isVarArg
) {
1833 if (!Subtarget
->isAAPCS_ABI())
1834 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1835 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1836 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1839 case CallingConv::C
:
1840 case CallingConv::CXX_FAST_TLS
:
1841 // Use target triple & subtarget features to do actual dispatch.
1842 if (Subtarget
->isAAPCS_ABI()) {
1843 if (Subtarget
->hasFPRegs() &&
1844 TM
.Options
.FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1845 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1847 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1849 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1851 case CallingConv::ARM_AAPCS_VFP
:
1852 case CallingConv::Swift
:
1853 case CallingConv::SwiftTail
:
1855 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1856 // Fall through to soft float variant, variadic functions don't
1857 // use hard floating point ABI.
1859 case CallingConv::ARM_AAPCS
:
1860 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1861 case CallingConv::ARM_APCS
:
1862 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1863 case CallingConv::GHC
:
1865 report_fatal_error("Can't return in GHC call convention");
1867 return CC_ARM_APCS_GHC
;
1868 case CallingConv::CFGuard_Check
:
1869 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_Win32_CFGuard_Check
);
1873 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
1874 SmallVectorImpl
<Register
> &ArgRegs
,
1875 SmallVectorImpl
<MVT
> &ArgVTs
,
1876 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
1877 SmallVectorImpl
<Register
> &RegArgs
,
1881 SmallVector
<CCValAssign
, 16> ArgLocs
;
1882 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, ArgLocs
, *Context
);
1883 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
,
1884 CCAssignFnForCall(CC
, false, isVarArg
));
1886 // Check that we can handle all of the arguments. If we can't, then bail out
1887 // now before we add code to the MBB.
1888 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1889 CCValAssign
&VA
= ArgLocs
[i
];
1890 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1892 // We don't handle NEON/vector parameters yet.
1893 if (ArgVT
.isVector() || ArgVT
.getSizeInBits() > 64)
1896 // Now copy/store arg to correct locations.
1897 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1899 } else if (VA
.needsCustom()) {
1900 // TODO: We need custom lowering for vector (v2f64) args.
1901 if (VA
.getLocVT() != MVT::f64
||
1902 // TODO: Only handle register args for now.
1903 !VA
.isRegLoc() || !ArgLocs
[++i
].isRegLoc())
1906 switch (ArgVT
.SimpleTy
) {
1915 if (!Subtarget
->hasVFP2Base())
1919 if (!Subtarget
->hasVFP2Base())
1926 // At the point, we are able to handle the call's arguments in fast isel.
1928 // Get a count of how many bytes are to be pushed on the stack.
1929 NumBytes
= CCInfo
.getStackSize();
1931 // Issue CALLSEQ_START
1932 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1933 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1934 TII
.get(AdjStackDown
))
1935 .addImm(NumBytes
).addImm(0));
1937 // Process the args.
1938 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1939 CCValAssign
&VA
= ArgLocs
[i
];
1940 const Value
*ArgVal
= Args
[VA
.getValNo()];
1941 Register Arg
= ArgRegs
[VA
.getValNo()];
1942 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1944 assert((!ArgVT
.isVector() && ArgVT
.getSizeInBits() <= 64) &&
1945 "We don't handle NEON/vector parameters yet.");
1947 // Handle arg promotion, etc.
1948 switch (VA
.getLocInfo()) {
1949 case CCValAssign::Full
: break;
1950 case CCValAssign::SExt
: {
1951 MVT DestVT
= VA
.getLocVT();
1952 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/false);
1953 assert(Arg
!= 0 && "Failed to emit a sext");
1957 case CCValAssign::AExt
:
1958 // Intentional fall-through. Handle AExt and ZExt.
1959 case CCValAssign::ZExt
: {
1960 MVT DestVT
= VA
.getLocVT();
1961 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/true);
1962 assert(Arg
!= 0 && "Failed to emit a zext");
1966 case CCValAssign::BCvt
: {
1967 unsigned BC
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, Arg
);
1968 assert(BC
!= 0 && "Failed to emit a bitcast!");
1970 ArgVT
= VA
.getLocVT();
1973 default: llvm_unreachable("Unknown arg promotion!");
1976 // Now copy/store arg to correct locations.
1977 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1978 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1979 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(Arg
);
1980 RegArgs
.push_back(VA
.getLocReg());
1981 } else if (VA
.needsCustom()) {
1982 // TODO: We need custom lowering for vector (v2f64) args.
1983 assert(VA
.getLocVT() == MVT::f64
&&
1984 "Custom lowering for v2f64 args not available");
1986 // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
1987 CCValAssign
&NextVA
= ArgLocs
[++i
];
1989 assert(VA
.isRegLoc() && NextVA
.isRegLoc() &&
1990 "We only handle register args!");
1992 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
1993 TII
.get(ARM::VMOVRRD
), VA
.getLocReg())
1994 .addReg(NextVA
.getLocReg(), RegState::Define
)
1996 RegArgs
.push_back(VA
.getLocReg());
1997 RegArgs
.push_back(NextVA
.getLocReg());
1999 assert(VA
.isMemLoc());
2000 // Need to store on the stack.
2002 // Don't emit stores for undef values.
2003 if (isa
<UndefValue
>(ArgVal
))
2007 Addr
.BaseType
= Address::RegBase
;
2008 Addr
.Base
.Reg
= ARM::SP
;
2009 Addr
.Offset
= VA
.getLocMemOffset();
2011 bool EmitRet
= ARMEmitStore(ArgVT
, Arg
, Addr
); (void)EmitRet
;
2012 assert(EmitRet
&& "Could not emit a store for argument!");
2019 bool ARMFastISel::FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
2020 const Instruction
*I
, CallingConv::ID CC
,
2021 unsigned &NumBytes
, bool isVarArg
) {
2022 // Issue CALLSEQ_END
2023 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
2024 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2025 TII
.get(AdjStackUp
))
2026 .addImm(NumBytes
).addImm(-1ULL));
2028 // Now the return value.
2029 if (RetVT
!= MVT::isVoid
) {
2030 SmallVector
<CCValAssign
, 16> RVLocs
;
2031 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2032 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2034 // Copy all of the result registers out of their specified physreg.
2035 if (RVLocs
.size() == 2 && RetVT
== MVT::f64
) {
2036 // For this move we copy into two registers and then move into the
2037 // double fp reg we want.
2038 MVT DestVT
= RVLocs
[0].getValVT();
2039 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(DestVT
);
2040 Register ResultReg
= createResultReg(DstRC
);
2041 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2042 TII
.get(ARM::VMOVDRR
), ResultReg
)
2043 .addReg(RVLocs
[0].getLocReg())
2044 .addReg(RVLocs
[1].getLocReg()));
2046 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2047 UsedRegs
.push_back(RVLocs
[1].getLocReg());
2049 // Finally update the result.
2050 updateValueMap(I
, ResultReg
);
2052 assert(RVLocs
.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2053 MVT CopyVT
= RVLocs
[0].getValVT();
2055 // Special handling for extended integers.
2056 if (RetVT
== MVT::i1
|| RetVT
== MVT::i8
|| RetVT
== MVT::i16
)
2059 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(CopyVT
);
2061 Register ResultReg
= createResultReg(DstRC
);
2062 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2063 TII
.get(TargetOpcode::COPY
),
2064 ResultReg
).addReg(RVLocs
[0].getLocReg());
2065 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2067 // Finally update the result.
2068 updateValueMap(I
, ResultReg
);
2075 bool ARMFastISel::SelectRet(const Instruction
*I
) {
2076 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
2077 const Function
&F
= *I
->getParent()->getParent();
2078 const bool IsCmseNSEntry
= F
.hasFnAttribute("cmse_nonsecure_entry");
2080 if (!FuncInfo
.CanLowerReturn
)
2083 if (TLI
.supportSwiftError() &&
2084 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
2087 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
2090 // Build a list of return value registers.
2091 SmallVector
<unsigned, 4> RetRegs
;
2093 CallingConv::ID CC
= F
.getCallingConv();
2094 if (Ret
->getNumOperands() > 0) {
2095 SmallVector
<ISD::OutputArg
, 4> Outs
;
2096 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
2098 // Analyze operands of the call, assigning locations to each operand.
2099 SmallVector
<CCValAssign
, 16> ValLocs
;
2100 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
2101 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForCall(CC
, true /* is Ret */,
2104 const Value
*RV
= Ret
->getOperand(0);
2105 Register Reg
= getRegForValue(RV
);
2109 // Only handle a single return value for now.
2110 if (ValLocs
.size() != 1)
2113 CCValAssign
&VA
= ValLocs
[0];
2115 // Don't bother handling odd stuff for now.
2116 if (VA
.getLocInfo() != CCValAssign::Full
)
2118 // Only handle register returns for now.
2122 unsigned SrcReg
= Reg
+ VA
.getValNo();
2123 EVT RVEVT
= TLI
.getValueType(DL
, RV
->getType());
2124 if (!RVEVT
.isSimple()) return false;
2125 MVT RVVT
= RVEVT
.getSimpleVT();
2126 MVT DestVT
= VA
.getValVT();
2127 // Special handling for extended integers.
2128 if (RVVT
!= DestVT
) {
2129 if (RVVT
!= MVT::i1
&& RVVT
!= MVT::i8
&& RVVT
!= MVT::i16
)
2132 assert(DestVT
== MVT::i32
&& "ARM should always ext to i32");
2134 // Perform extension if flagged as either zext or sext. Otherwise, do
2136 if (Outs
[0].Flags
.isZExt() || Outs
[0].Flags
.isSExt()) {
2137 SrcReg
= ARMEmitIntExt(RVVT
, SrcReg
, DestVT
, Outs
[0].Flags
.isZExt());
2138 if (SrcReg
== 0) return false;
2143 Register DstReg
= VA
.getLocReg();
2144 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
2145 // Avoid a cross-class copy. This is very unlikely.
2146 if (!SrcRC
->contains(DstReg
))
2148 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2149 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
2151 // Add register to return instruction.
2152 RetRegs
.push_back(VA
.getLocReg());
2158 RetOpc
= ARM::tBXNS_RET
;
2160 llvm_unreachable("CMSE not valid for non-Thumb targets");
2162 RetOpc
= Subtarget
->getReturnOpcode();
2164 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2166 AddOptionalDefs(MIB
);
2167 for (unsigned R
: RetRegs
)
2168 MIB
.addReg(R
, RegState::Implicit
);
2172 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg
) {
2174 return isThumb2
? gettBLXrOpcode(*MF
) : getBLXOpcode(*MF
);
2176 return isThumb2
? ARM::tBL
: ARM::BL
;
2179 unsigned ARMFastISel::getLibcallReg(const Twine
&Name
) {
2180 // Manually compute the global's type to avoid building it when unnecessary.
2181 Type
*GVTy
= PointerType::get(*Context
, /*AS=*/0);
2182 EVT LCREVT
= TLI
.getValueType(DL
, GVTy
);
2183 if (!LCREVT
.isSimple()) return 0;
2185 GlobalValue
*GV
= M
.getNamedGlobal(Name
.str());
2187 GV
= new GlobalVariable(M
, Type::getInt32Ty(*Context
), false,
2188 GlobalValue::ExternalLinkage
, nullptr, Name
);
2190 return ARMMaterializeGV(GV
, LCREVT
.getSimpleVT());
2193 // A quick function that will emit a call for a named libcall in F with the
2194 // vector of passed arguments for the Instruction in I. We can assume that we
2195 // can emit a call for any libcall we can produce. This is an abridged version
2196 // of the full call infrastructure since we won't need to worry about things
2197 // like computed function pointers or strange arguments at call sites.
2198 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2200 bool ARMFastISel::ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
) {
2201 CallingConv::ID CC
= TLI
.getLibcallCallingConv(Call
);
2203 // Handle *simple* calls for now.
2204 Type
*RetTy
= I
->getType();
2206 if (RetTy
->isVoidTy())
2207 RetVT
= MVT::isVoid
;
2208 else if (!isTypeLegal(RetTy
, RetVT
))
2211 // Can't handle non-double multi-reg retvals.
2212 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i32
) {
2213 SmallVector
<CCValAssign
, 16> RVLocs
;
2214 CCState
CCInfo(CC
, false, *FuncInfo
.MF
, RVLocs
, *Context
);
2215 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, false));
2216 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2220 // Set up the argument vectors.
2221 SmallVector
<Value
*, 8> Args
;
2222 SmallVector
<Register
, 8> ArgRegs
;
2223 SmallVector
<MVT
, 8> ArgVTs
;
2224 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2225 Args
.reserve(I
->getNumOperands());
2226 ArgRegs
.reserve(I
->getNumOperands());
2227 ArgVTs
.reserve(I
->getNumOperands());
2228 ArgFlags
.reserve(I
->getNumOperands());
2229 for (Value
*Op
: I
->operands()) {
2230 Register Arg
= getRegForValue(Op
);
2231 if (Arg
== 0) return false;
2233 Type
*ArgTy
= Op
->getType();
2235 if (!isTypeLegal(ArgTy
, ArgVT
)) return false;
2237 ISD::ArgFlagsTy Flags
;
2238 Flags
.setOrigAlign(DL
.getABITypeAlign(ArgTy
));
2241 ArgRegs
.push_back(Arg
);
2242 ArgVTs
.push_back(ArgVT
);
2243 ArgFlags
.push_back(Flags
);
2246 // Handle the arguments now that we've gotten them.
2247 SmallVector
<Register
, 4> RegArgs
;
2249 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2250 RegArgs
, CC
, NumBytes
, false))
2254 if (Subtarget
->genLongCalls()) {
2255 CalleeReg
= getLibcallReg(TLI
.getLibcallName(Call
));
2256 if (CalleeReg
== 0) return false;
2260 unsigned CallOpc
= ARMSelectCallOp(Subtarget
->genLongCalls());
2261 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2262 MIMD
, TII
.get(CallOpc
));
2263 // BL / BLX don't take a predicate, but tBL / tBLX do.
2265 MIB
.add(predOps(ARMCC::AL
));
2266 if (Subtarget
->genLongCalls()) {
2268 constrainOperandRegClass(TII
.get(CallOpc
), CalleeReg
, isThumb2
? 2 : 0);
2269 MIB
.addReg(CalleeReg
);
2271 MIB
.addExternalSymbol(TLI
.getLibcallName(Call
));
2273 // Add implicit physical register uses to the call.
2274 for (Register R
: RegArgs
)
2275 MIB
.addReg(R
, RegState::Implicit
);
2277 // Add a register mask with the call-preserved registers.
2278 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2279 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2281 // Finish off the call including any return values.
2282 SmallVector
<Register
, 4> UsedRegs
;
2283 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, false)) return false;
2285 // Set all unused physreg defs as dead.
2286 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2291 bool ARMFastISel::SelectCall(const Instruction
*I
,
2292 const char *IntrMemName
= nullptr) {
2293 const CallInst
*CI
= cast
<CallInst
>(I
);
2294 const Value
*Callee
= CI
->getCalledOperand();
2296 // Can't handle inline asm.
2297 if (isa
<InlineAsm
>(Callee
)) return false;
2299 // Allow SelectionDAG isel to handle tail calls.
2300 if (CI
->isTailCall()) return false;
2302 // Check the calling convention.
2303 CallingConv::ID CC
= CI
->getCallingConv();
2305 // TODO: Avoid some calling conventions?
2307 FunctionType
*FTy
= CI
->getFunctionType();
2308 bool isVarArg
= FTy
->isVarArg();
2310 // Handle *simple* calls for now.
2311 Type
*RetTy
= I
->getType();
2313 if (RetTy
->isVoidTy())
2314 RetVT
= MVT::isVoid
;
2315 else if (!isTypeLegal(RetTy
, RetVT
) && RetVT
!= MVT::i16
&&
2316 RetVT
!= MVT::i8
&& RetVT
!= MVT::i1
)
2319 // Can't handle non-double multi-reg retvals.
2320 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i1
&& RetVT
!= MVT::i8
&&
2321 RetVT
!= MVT::i16
&& RetVT
!= MVT::i32
) {
2322 SmallVector
<CCValAssign
, 16> RVLocs
;
2323 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2324 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2325 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2329 // Set up the argument vectors.
2330 SmallVector
<Value
*, 8> Args
;
2331 SmallVector
<Register
, 8> ArgRegs
;
2332 SmallVector
<MVT
, 8> ArgVTs
;
2333 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2334 unsigned arg_size
= CI
->arg_size();
2335 Args
.reserve(arg_size
);
2336 ArgRegs
.reserve(arg_size
);
2337 ArgVTs
.reserve(arg_size
);
2338 ArgFlags
.reserve(arg_size
);
2339 for (auto ArgI
= CI
->arg_begin(), ArgE
= CI
->arg_end(); ArgI
!= ArgE
; ++ArgI
) {
2340 // If we're lowering a memory intrinsic instead of a regular call, skip the
2341 // last argument, which shouldn't be passed to the underlying function.
2342 if (IntrMemName
&& ArgE
- ArgI
<= 1)
2345 ISD::ArgFlagsTy Flags
;
2346 unsigned ArgIdx
= ArgI
- CI
->arg_begin();
2347 if (CI
->paramHasAttr(ArgIdx
, Attribute::SExt
))
2349 if (CI
->paramHasAttr(ArgIdx
, Attribute::ZExt
))
2352 // FIXME: Only handle *easy* calls for now.
2353 if (CI
->paramHasAttr(ArgIdx
, Attribute::InReg
) ||
2354 CI
->paramHasAttr(ArgIdx
, Attribute::StructRet
) ||
2355 CI
->paramHasAttr(ArgIdx
, Attribute::SwiftSelf
) ||
2356 CI
->paramHasAttr(ArgIdx
, Attribute::SwiftError
) ||
2357 CI
->paramHasAttr(ArgIdx
, Attribute::Nest
) ||
2358 CI
->paramHasAttr(ArgIdx
, Attribute::ByVal
))
2361 Type
*ArgTy
= (*ArgI
)->getType();
2363 if (!isTypeLegal(ArgTy
, ArgVT
) && ArgVT
!= MVT::i16
&& ArgVT
!= MVT::i8
&&
2367 Register Arg
= getRegForValue(*ArgI
);
2371 Flags
.setOrigAlign(DL
.getABITypeAlign(ArgTy
));
2373 Args
.push_back(*ArgI
);
2374 ArgRegs
.push_back(Arg
);
2375 ArgVTs
.push_back(ArgVT
);
2376 ArgFlags
.push_back(Flags
);
2379 // Handle the arguments now that we've gotten them.
2380 SmallVector
<Register
, 4> RegArgs
;
2382 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2383 RegArgs
, CC
, NumBytes
, isVarArg
))
2386 bool UseReg
= false;
2387 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Callee
);
2388 if (!GV
|| Subtarget
->genLongCalls()) UseReg
= true;
2393 CalleeReg
= getLibcallReg(IntrMemName
);
2395 CalleeReg
= getRegForValue(Callee
);
2397 if (CalleeReg
== 0) return false;
2401 unsigned CallOpc
= ARMSelectCallOp(UseReg
);
2402 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2403 MIMD
, TII
.get(CallOpc
));
2405 // ARM calls don't take a predicate, but tBL / tBLX do.
2407 MIB
.add(predOps(ARMCC::AL
));
2410 constrainOperandRegClass(TII
.get(CallOpc
), CalleeReg
, isThumb2
? 2 : 0);
2411 MIB
.addReg(CalleeReg
);
2412 } else if (!IntrMemName
)
2413 MIB
.addGlobalAddress(GV
, 0, 0);
2415 MIB
.addExternalSymbol(IntrMemName
, 0);
2417 // Add implicit physical register uses to the call.
2418 for (Register R
: RegArgs
)
2419 MIB
.addReg(R
, RegState::Implicit
);
2421 // Add a register mask with the call-preserved registers.
2422 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2423 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2425 // Finish off the call including any return values.
2426 SmallVector
<Register
, 4> UsedRegs
;
2427 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, isVarArg
))
2430 // Set all unused physreg defs as dead.
2431 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2436 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len
) {
2440 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
,
2441 MaybeAlign Alignment
) {
2442 // Make sure we don't bloat code by inlining very large memcpy's.
2443 if (!ARMIsMemCpySmall(Len
))
2448 if (!Alignment
|| *Alignment
>= 4) {
2454 assert(Len
== 1 && "Expected a length of 1!");
2458 assert(Alignment
&& "Alignment is set in this branch");
2459 // Bound based on alignment.
2460 if (Len
>= 2 && *Alignment
== 2)
2469 RV
= ARMEmitLoad(VT
, ResultReg
, Src
);
2470 assert(RV
&& "Should be able to handle this load.");
2471 RV
= ARMEmitStore(VT
, ResultReg
, Dest
);
2472 assert(RV
&& "Should be able to handle this store.");
2475 unsigned Size
= VT
.getSizeInBits()/8;
2477 Dest
.Offset
+= Size
;
2484 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst
&I
) {
2485 // FIXME: Handle more intrinsics.
2486 switch (I
.getIntrinsicID()) {
2487 default: return false;
2488 case Intrinsic::frameaddress
: {
2489 MachineFrameInfo
&MFI
= FuncInfo
.MF
->getFrameInfo();
2490 MFI
.setFrameAddressIsTaken(true);
2492 unsigned LdrOpc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
2493 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
2494 : &ARM::GPRRegClass
;
2496 const ARMBaseRegisterInfo
*RegInfo
=
2497 static_cast<const ARMBaseRegisterInfo
*>(Subtarget
->getRegisterInfo());
2498 Register FramePtr
= RegInfo
->getFrameRegister(*(FuncInfo
.MF
));
2499 unsigned SrcReg
= FramePtr
;
2501 // Recursively load frame address
2507 unsigned Depth
= cast
<ConstantInt
>(I
.getOperand(0))->getZExtValue();
2509 DestReg
= createResultReg(RC
);
2510 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2511 TII
.get(LdrOpc
), DestReg
)
2512 .addReg(SrcReg
).addImm(0));
2515 updateValueMap(&I
, SrcReg
);
2518 case Intrinsic::memcpy
:
2519 case Intrinsic::memmove
: {
2520 const MemTransferInst
&MTI
= cast
<MemTransferInst
>(I
);
2521 // Don't handle volatile.
2522 if (MTI
.isVolatile())
2525 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2526 // we would emit dead code because we don't currently handle memmoves.
2527 bool isMemCpy
= (I
.getIntrinsicID() == Intrinsic::memcpy
);
2528 if (isa
<ConstantInt
>(MTI
.getLength()) && isMemCpy
) {
2529 // Small memcpy's are common enough that we want to do them without a call
2531 uint64_t Len
= cast
<ConstantInt
>(MTI
.getLength())->getZExtValue();
2532 if (ARMIsMemCpySmall(Len
)) {
2534 if (!ARMComputeAddress(MTI
.getRawDest(), Dest
) ||
2535 !ARMComputeAddress(MTI
.getRawSource(), Src
))
2537 MaybeAlign Alignment
;
2538 if (MTI
.getDestAlign() || MTI
.getSourceAlign())
2539 Alignment
= std::min(MTI
.getDestAlign().valueOrOne(),
2540 MTI
.getSourceAlign().valueOrOne());
2541 if (ARMTryEmitSmallMemCpy(Dest
, Src
, Len
, Alignment
))
2546 if (!MTI
.getLength()->getType()->isIntegerTy(32))
2549 if (MTI
.getSourceAddressSpace() > 255 || MTI
.getDestAddressSpace() > 255)
2552 const char *IntrMemName
= isa
<MemCpyInst
>(I
) ? "memcpy" : "memmove";
2553 return SelectCall(&I
, IntrMemName
);
2555 case Intrinsic::memset
: {
2556 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
2557 // Don't handle volatile.
2558 if (MSI
.isVolatile())
2561 if (!MSI
.getLength()->getType()->isIntegerTy(32))
2564 if (MSI
.getDestAddressSpace() > 255)
2567 return SelectCall(&I
, "memset");
2569 case Intrinsic::trap
: {
2571 if (Subtarget
->isThumb())
2572 Opcode
= ARM::tTRAP
;
2574 Opcode
= Subtarget
->useNaClTrap() ? ARM::TRAPNaCl
: ARM::TRAP
;
2575 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opcode
));
2581 bool ARMFastISel::SelectTrunc(const Instruction
*I
) {
2582 // The high bits for a type smaller than the register size are assumed to be
2584 Value
*Op
= I
->getOperand(0);
2587 SrcVT
= TLI
.getValueType(DL
, Op
->getType(), true);
2588 DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2590 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
2592 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
2595 Register SrcReg
= getRegForValue(Op
);
2596 if (!SrcReg
) return false;
2598 // Because the high bits are undefined, a truncate doesn't generate
2600 updateValueMap(I
, SrcReg
);
2604 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
,
2606 if (DestVT
!= MVT::i32
&& DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
)
2608 if (SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i1
)
2611 // Table of which combinations can be emitted as a single instruction,
2612 // and which will require two.
2613 static const uint8_t isSingleInstrTbl
[3][2][2][2] = {
2615 // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2616 // ext: s z s z s z s z
2617 /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2618 /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2619 /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2622 // Target registers for:
2623 // - For ARM can never be PC.
2624 // - For 16-bit Thumb are restricted to lower 8 registers.
2625 // - For 32-bit Thumb are restricted to non-SP and non-PC.
2626 static const TargetRegisterClass
*RCTbl
[2][2] = {
2627 // Instructions: Two Single
2628 /* ARM */ { &ARM::GPRnopcRegClass
, &ARM::GPRnopcRegClass
},
2629 /* Thumb */ { &ARM::tGPRRegClass
, &ARM::rGPRRegClass
}
2632 // Table governing the instruction(s) to be emitted.
2633 static const struct InstructionTable
{
2635 uint32_t hasS
: 1; // Some instructions have an S bit, always set it to 0.
2636 uint32_t Shift
: 7; // For shift operand addressing mode, used by MOVsi.
2637 uint32_t Imm
: 8; // All instructions have either a shift or a mask.
2638 } IT
[2][2][3][2] = {
2639 { // Two instructions (first is left shift, second is in this table).
2640 { // ARM Opc S Shift Imm
2641 /* 1 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 31 },
2642 /* 1 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 31 } },
2643 /* 8 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 24 },
2644 /* 8 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 24 } },
2645 /* 16 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 16 },
2646 /* 16 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 16 } }
2648 { // Thumb Opc S Shift Imm
2649 /* 1 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 31 },
2650 /* 1 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 31 } },
2651 /* 8 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 24 },
2652 /* 8 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 24 } },
2653 /* 16 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 16 },
2654 /* 16 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 16 } }
2657 { // Single instruction.
2658 { // ARM Opc S Shift Imm
2659 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2660 /* 1 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 1 } },
2661 /* 8 bit sext */ { { ARM::SXTB
, 0, ARM_AM::no_shift
, 0 },
2662 /* 8 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 255 } },
2663 /* 16 bit sext */ { { ARM::SXTH
, 0, ARM_AM::no_shift
, 0 },
2664 /* 16 bit zext */ { ARM::UXTH
, 0, ARM_AM::no_shift
, 0 } }
2666 { // Thumb Opc S Shift Imm
2667 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2668 /* 1 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 1 } },
2669 /* 8 bit sext */ { { ARM::t2SXTB
, 0, ARM_AM::no_shift
, 0 },
2670 /* 8 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 255 } },
2671 /* 16 bit sext */ { { ARM::t2SXTH
, 0, ARM_AM::no_shift
, 0 },
2672 /* 16 bit zext */ { ARM::t2UXTH
, 0, ARM_AM::no_shift
, 0 } }
2677 unsigned SrcBits
= SrcVT
.getSizeInBits();
2678 unsigned DestBits
= DestVT
.getSizeInBits();
2680 assert((SrcBits
< DestBits
) && "can only extend to larger types");
2681 assert((DestBits
== 32 || DestBits
== 16 || DestBits
== 8) &&
2682 "other sizes unimplemented");
2683 assert((SrcBits
== 16 || SrcBits
== 8 || SrcBits
== 1) &&
2684 "other sizes unimplemented");
2686 bool hasV6Ops
= Subtarget
->hasV6Ops();
2687 unsigned Bitness
= SrcBits
/ 8; // {1,8,16}=>{0,1,2}
2688 assert((Bitness
< 3) && "sanity-check table bounds");
2690 bool isSingleInstr
= isSingleInstrTbl
[Bitness
][isThumb2
][hasV6Ops
][isZExt
];
2691 const TargetRegisterClass
*RC
= RCTbl
[isThumb2
][isSingleInstr
];
2692 const InstructionTable
*ITP
= &IT
[isSingleInstr
][isThumb2
][Bitness
][isZExt
];
2693 unsigned Opc
= ITP
->Opc
;
2694 assert(ARM::KILL
!= Opc
&& "Invalid table entry");
2695 unsigned hasS
= ITP
->hasS
;
2696 ARM_AM::ShiftOpc Shift
= (ARM_AM::ShiftOpc
) ITP
->Shift
;
2697 assert(((Shift
== ARM_AM::no_shift
) == (Opc
!= ARM::MOVsi
)) &&
2698 "only MOVsi has shift operand addressing mode");
2699 unsigned Imm
= ITP
->Imm
;
2701 // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2702 bool setsCPSR
= &ARM::tGPRRegClass
== RC
;
2703 unsigned LSLOpc
= isThumb2
? ARM::tLSLri
: ARM::MOVsi
;
2705 // MOVsi encodes shift and immediate in shift operand addressing mode.
2706 // The following condition has the same value when emitting two
2707 // instruction sequences: both are shifts.
2708 bool ImmIsSO
= (Shift
!= ARM_AM::no_shift
);
2710 // Either one or two instructions are emitted.
2711 // They're always of the form:
2713 // CPSR is set only by 16-bit Thumb instructions.
2714 // Predicate, if any, is AL.
2715 // S bit, if available, is always 0.
2716 // When two are emitted the first's result will feed as the second's input,
2717 // that value is then dead.
2718 unsigned NumInstrsEmitted
= isSingleInstr
? 1 : 2;
2719 for (unsigned Instr
= 0; Instr
!= NumInstrsEmitted
; ++Instr
) {
2720 ResultReg
= createResultReg(RC
);
2721 bool isLsl
= (0 == Instr
) && !isSingleInstr
;
2722 unsigned Opcode
= isLsl
? LSLOpc
: Opc
;
2723 ARM_AM::ShiftOpc ShiftAM
= isLsl
? ARM_AM::lsl
: Shift
;
2724 unsigned ImmEnc
= ImmIsSO
? ARM_AM::getSORegOpc(ShiftAM
, Imm
) : Imm
;
2725 bool isKill
= 1 == Instr
;
2726 MachineInstrBuilder MIB
= BuildMI(
2727 *FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opcode
), ResultReg
);
2729 MIB
.addReg(ARM::CPSR
, RegState::Define
);
2730 SrcReg
= constrainOperandRegClass(TII
.get(Opcode
), SrcReg
, 1 + setsCPSR
);
2731 MIB
.addReg(SrcReg
, isKill
* RegState::Kill
)
2733 .add(predOps(ARMCC::AL
));
2735 MIB
.add(condCodeOp());
2736 // Second instruction consumes the first's result.
2743 bool ARMFastISel::SelectIntExt(const Instruction
*I
) {
2744 // On ARM, in general, integer casts don't involve legal types; this code
2745 // handles promotable integers.
2746 Type
*DestTy
= I
->getType();
2747 Value
*Src
= I
->getOperand(0);
2748 Type
*SrcTy
= Src
->getType();
2750 bool isZExt
= isa
<ZExtInst
>(I
);
2751 Register SrcReg
= getRegForValue(Src
);
2752 if (!SrcReg
) return false;
2754 EVT SrcEVT
, DestEVT
;
2755 SrcEVT
= TLI
.getValueType(DL
, SrcTy
, true);
2756 DestEVT
= TLI
.getValueType(DL
, DestTy
, true);
2757 if (!SrcEVT
.isSimple()) return false;
2758 if (!DestEVT
.isSimple()) return false;
2760 MVT SrcVT
= SrcEVT
.getSimpleVT();
2761 MVT DestVT
= DestEVT
.getSimpleVT();
2762 unsigned ResultReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
, isZExt
);
2763 if (ResultReg
== 0) return false;
2764 updateValueMap(I
, ResultReg
);
2768 bool ARMFastISel::SelectShift(const Instruction
*I
,
2769 ARM_AM::ShiftOpc ShiftTy
) {
2770 // We handle thumb2 mode by target independent selector
2771 // or SelectionDAG ISel.
2775 // Only handle i32 now.
2776 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2777 if (DestVT
!= MVT::i32
)
2780 unsigned Opc
= ARM::MOVsr
;
2782 Value
*Src2Value
= I
->getOperand(1);
2783 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Src2Value
)) {
2784 ShiftImm
= CI
->getZExtValue();
2786 // Fall back to selection DAG isel if the shift amount
2787 // is zero or greater than the width of the value type.
2788 if (ShiftImm
== 0 || ShiftImm
>=32)
2794 Value
*Src1Value
= I
->getOperand(0);
2795 Register Reg1
= getRegForValue(Src1Value
);
2796 if (Reg1
== 0) return false;
2799 if (Opc
== ARM::MOVsr
) {
2800 Reg2
= getRegForValue(Src2Value
);
2801 if (Reg2
== 0) return false;
2804 Register ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
2805 if(ResultReg
== 0) return false;
2807 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2808 TII
.get(Opc
), ResultReg
)
2811 if (Opc
== ARM::MOVsi
)
2812 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, ShiftImm
));
2813 else if (Opc
== ARM::MOVsr
) {
2815 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, 0));
2818 AddOptionalDefs(MIB
);
2819 updateValueMap(I
, ResultReg
);
2823 // TODO: SoftFP support.
2824 bool ARMFastISel::fastSelectInstruction(const Instruction
*I
) {
2825 switch (I
->getOpcode()) {
2826 case Instruction::Load
:
2827 return SelectLoad(I
);
2828 case Instruction::Store
:
2829 return SelectStore(I
);
2830 case Instruction::Br
:
2831 return SelectBranch(I
);
2832 case Instruction::IndirectBr
:
2833 return SelectIndirectBr(I
);
2834 case Instruction::ICmp
:
2835 case Instruction::FCmp
:
2836 return SelectCmp(I
);
2837 case Instruction::FPExt
:
2838 return SelectFPExt(I
);
2839 case Instruction::FPTrunc
:
2840 return SelectFPTrunc(I
);
2841 case Instruction::SIToFP
:
2842 return SelectIToFP(I
, /*isSigned*/ true);
2843 case Instruction::UIToFP
:
2844 return SelectIToFP(I
, /*isSigned*/ false);
2845 case Instruction::FPToSI
:
2846 return SelectFPToI(I
, /*isSigned*/ true);
2847 case Instruction::FPToUI
:
2848 return SelectFPToI(I
, /*isSigned*/ false);
2849 case Instruction::Add
:
2850 return SelectBinaryIntOp(I
, ISD::ADD
);
2851 case Instruction::Or
:
2852 return SelectBinaryIntOp(I
, ISD::OR
);
2853 case Instruction::Sub
:
2854 return SelectBinaryIntOp(I
, ISD::SUB
);
2855 case Instruction::FAdd
:
2856 return SelectBinaryFPOp(I
, ISD::FADD
);
2857 case Instruction::FSub
:
2858 return SelectBinaryFPOp(I
, ISD::FSUB
);
2859 case Instruction::FMul
:
2860 return SelectBinaryFPOp(I
, ISD::FMUL
);
2861 case Instruction::SDiv
:
2862 return SelectDiv(I
, /*isSigned*/ true);
2863 case Instruction::UDiv
:
2864 return SelectDiv(I
, /*isSigned*/ false);
2865 case Instruction::SRem
:
2866 return SelectRem(I
, /*isSigned*/ true);
2867 case Instruction::URem
:
2868 return SelectRem(I
, /*isSigned*/ false);
2869 case Instruction::Call
:
2870 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2871 return SelectIntrinsicCall(*II
);
2872 return SelectCall(I
);
2873 case Instruction::Select
:
2874 return SelectSelect(I
);
2875 case Instruction::Ret
:
2876 return SelectRet(I
);
2877 case Instruction::Trunc
:
2878 return SelectTrunc(I
);
2879 case Instruction::ZExt
:
2880 case Instruction::SExt
:
2881 return SelectIntExt(I
);
2882 case Instruction::Shl
:
2883 return SelectShift(I
, ARM_AM::lsl
);
2884 case Instruction::LShr
:
2885 return SelectShift(I
, ARM_AM::lsr
);
2886 case Instruction::AShr
:
2887 return SelectShift(I
, ARM_AM::asr
);
2893 // This table describes sign- and zero-extend instructions which can be
2894 // folded into a preceding load. All of these extends have an immediate
2895 // (sometimes a mask and sometimes a shift) that's applied after
2897 static const struct FoldableLoadExtendsStruct
{
2898 uint16_t Opc
[2]; // ARM, Thumb.
2899 uint8_t ExpectedImm
;
2901 uint8_t ExpectedVT
: 7;
2902 } FoldableLoadExtends
[] = {
2903 { { ARM::SXTH
, ARM::t2SXTH
}, 0, 0, MVT::i16
},
2904 { { ARM::UXTH
, ARM::t2UXTH
}, 0, 1, MVT::i16
},
2905 { { ARM::ANDri
, ARM::t2ANDri
}, 255, 1, MVT::i8
},
2906 { { ARM::SXTB
, ARM::t2SXTB
}, 0, 0, MVT::i8
},
2907 { { ARM::UXTB
, ARM::t2UXTB
}, 0, 1, MVT::i8
}
2910 /// The specified machine instr operand is a vreg, and that
2911 /// vreg is being provided by the specified load instruction. If possible,
2912 /// try to fold the load as an operand to the instruction, returning true if
2914 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
2915 const LoadInst
*LI
) {
2916 // Verify we have a legal type before going any further.
2918 if (!isLoadTypeLegal(LI
->getType(), VT
))
2921 // Combine load followed by zero- or sign-extend.
2922 // ldrb r1, [r0] ldrb r1, [r0]
2924 // mov r3, r2 mov r3, r1
2925 if (MI
->getNumOperands() < 3 || !MI
->getOperand(2).isImm())
2927 const uint64_t Imm
= MI
->getOperand(2).getImm();
2931 for (const FoldableLoadExtendsStruct
&FLE
: FoldableLoadExtends
) {
2932 if (FLE
.Opc
[isThumb2
] == MI
->getOpcode() &&
2933 (uint64_t)FLE
.ExpectedImm
== Imm
&&
2934 MVT((MVT::SimpleValueType
)FLE
.ExpectedVT
) == VT
) {
2936 isZExt
= FLE
.isZExt
;
2939 if (!Found
) return false;
2941 // See if we can handle this address.
2943 if (!ARMComputeAddress(LI
->getOperand(0), Addr
)) return false;
2945 Register ResultReg
= MI
->getOperand(0).getReg();
2946 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, LI
->getAlign(), isZExt
, false))
2948 MachineBasicBlock::iterator
I(MI
);
2949 removeDeadCode(I
, std::next(I
));
2953 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue
*GV
, MVT VT
) {
2954 bool UseGOT_PREL
= !GV
->isDSOLocal();
2955 LLVMContext
*Context
= &MF
->getFunction().getContext();
2956 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2957 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2958 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(
2959 GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
,
2960 UseGOT_PREL
? ARMCP::GOT_PREL
: ARMCP::no_modifier
,
2961 /*AddCurrentAddress=*/UseGOT_PREL
);
2964 MF
->getDataLayout().getPrefTypeAlign(PointerType::get(*Context
, 0));
2965 unsigned Idx
= MF
->getConstantPool()->getConstantPoolIndex(CPV
, ConstAlign
);
2966 MachineMemOperand
*CPMMO
=
2967 MF
->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF
),
2968 MachineMemOperand::MOLoad
, 4, Align(4));
2970 Register TempReg
= MF
->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass
);
2971 unsigned Opc
= isThumb2
? ARM::t2LDRpci
: ARM::LDRcp
;
2972 MachineInstrBuilder MIB
=
2973 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), TempReg
)
2974 .addConstantPoolIndex(Idx
)
2975 .addMemOperand(CPMMO
);
2976 if (Opc
== ARM::LDRcp
)
2978 MIB
.add(predOps(ARMCC::AL
));
2980 // Fix the address by adding pc.
2981 Register DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2982 Opc
= Subtarget
->isThumb() ? ARM::tPICADD
: UseGOT_PREL
? ARM::PICLDR
2984 DestReg
= constrainOperandRegClass(TII
.get(Opc
), DestReg
, 0);
2985 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
, TII
.get(Opc
), DestReg
)
2987 .addImm(ARMPCLabelIndex
);
2989 if (!Subtarget
->isThumb())
2990 MIB
.add(predOps(ARMCC::AL
));
2992 if (UseGOT_PREL
&& Subtarget
->isThumb()) {
2993 Register NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2994 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
2995 TII
.get(ARM::t2LDRi12
), NewDestReg
)
2998 DestReg
= NewDestReg
;
2999 AddOptionalDefs(MIB
);
3004 bool ARMFastISel::fastLowerArguments() {
3005 if (!FuncInfo
.CanLowerReturn
)
3008 const Function
*F
= FuncInfo
.Fn
;
3012 CallingConv::ID CC
= F
->getCallingConv();
3016 case CallingConv::Fast
:
3017 case CallingConv::C
:
3018 case CallingConv::ARM_AAPCS_VFP
:
3019 case CallingConv::ARM_AAPCS
:
3020 case CallingConv::ARM_APCS
:
3021 case CallingConv::Swift
:
3022 case CallingConv::SwiftTail
:
3026 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3027 // which are passed in r0 - r3.
3028 for (const Argument
&Arg
: F
->args()) {
3029 if (Arg
.getArgNo() >= 4)
3032 if (Arg
.hasAttribute(Attribute::InReg
) ||
3033 Arg
.hasAttribute(Attribute::StructRet
) ||
3034 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3035 Arg
.hasAttribute(Attribute::SwiftError
) ||
3036 Arg
.hasAttribute(Attribute::ByVal
))
3039 Type
*ArgTy
= Arg
.getType();
3040 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3043 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3044 if (!ArgVT
.isSimple()) return false;
3045 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3055 static const MCPhysReg GPRArgRegs
[] = {
3056 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
3059 const TargetRegisterClass
*RC
= &ARM::rGPRRegClass
;
3060 for (const Argument
&Arg
: F
->args()) {
3061 unsigned ArgNo
= Arg
.getArgNo();
3062 unsigned SrcReg
= GPRArgRegs
[ArgNo
];
3063 Register DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3064 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3065 // Without this, EmitLiveInCopies may eliminate the livein if its only
3066 // use is a bitcast (which isn't turned into an instruction).
3067 Register ResultReg
= createResultReg(RC
);
3068 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, MIMD
,
3069 TII
.get(TargetOpcode::COPY
),
3070 ResultReg
).addReg(DstReg
, getKillRegState(true));
3071 updateValueMap(&Arg
, ResultReg
);
3079 FastISel
*ARM::createFastISel(FunctionLoweringInfo
&funcInfo
,
3080 const TargetLibraryInfo
*libInfo
) {
3081 if (funcInfo
.MF
->getSubtarget
<ARMSubtarget
>().useFastISel())
3082 return new ARMFastISel(funcInfo
, libInfo
);
3087 } // end namespace llvm