1 //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the ARM-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // ARMGenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSubtarget.h"
23 #include "MCTargetDesc/ARMAddressingModes.h"
24 #include "MCTargetDesc/ARMBaseInfo.h"
25 #include "Utils/ARMBaseInfo.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/CallingConvLower.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineInstr.h"
39 #include "llvm/CodeGen/MachineInstrBuilder.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/MachineOperand.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/CodeGen/RuntimeLibcalls.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetOpcodes.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Argument.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/CallingConv.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DerivedTypes.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/GetElementPtrTypeIterator.h"
58 #include "llvm/IR/GlobalValue.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/InstrTypes.h"
61 #include "llvm/IR/Instruction.h"
62 #include "llvm/IR/Instructions.h"
63 #include "llvm/IR/IntrinsicInst.h"
64 #include "llvm/IR/Intrinsics.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCRegisterInfo.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/ErrorHandling.h"
75 #include "llvm/Support/MachineValueType.h"
76 #include "llvm/Support/MathExtras.h"
77 #include "llvm/Target/TargetMachine.h"
78 #include "llvm/Target/TargetOptions.h"
87 // All possible address modes, plus some.
101 // Innocuous defaults for our address.
107 class ARMFastISel final
: public FastISel
{
108 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
109 /// make the right decision when generating code for different targets.
110 const ARMSubtarget
*Subtarget
;
112 const TargetMachine
&TM
;
113 const TargetInstrInfo
&TII
;
114 const TargetLowering
&TLI
;
115 ARMFunctionInfo
*AFI
;
117 // Convenience variables to avoid some queries.
119 LLVMContext
*Context
;
122 explicit ARMFastISel(FunctionLoweringInfo
&funcInfo
,
123 const TargetLibraryInfo
*libInfo
)
124 : FastISel(funcInfo
, libInfo
),
126 &static_cast<const ARMSubtarget
&>(funcInfo
.MF
->getSubtarget())),
127 M(const_cast<Module
&>(*funcInfo
.Fn
->getParent())),
128 TM(funcInfo
.MF
->getTarget()), TII(*Subtarget
->getInstrInfo()),
129 TLI(*Subtarget
->getTargetLowering()) {
130 AFI
= funcInfo
.MF
->getInfo
<ARMFunctionInfo
>();
131 isThumb2
= AFI
->isThumbFunction();
132 Context
= &funcInfo
.Fn
->getContext();
136 // Code from FastISel.cpp.
138 unsigned fastEmitInst_r(unsigned MachineInstOpcode
,
139 const TargetRegisterClass
*RC
, unsigned Op0
);
140 unsigned fastEmitInst_rr(unsigned MachineInstOpcode
,
141 const TargetRegisterClass
*RC
,
142 unsigned Op0
, unsigned Op1
);
143 unsigned fastEmitInst_ri(unsigned MachineInstOpcode
,
144 const TargetRegisterClass
*RC
,
145 unsigned Op0
, uint64_t Imm
);
146 unsigned fastEmitInst_i(unsigned MachineInstOpcode
,
147 const TargetRegisterClass
*RC
,
150 // Backend specific FastISel code.
152 bool fastSelectInstruction(const Instruction
*I
) override
;
153 unsigned fastMaterializeConstant(const Constant
*C
) override
;
154 unsigned fastMaterializeAlloca(const AllocaInst
*AI
) override
;
155 bool tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
156 const LoadInst
*LI
) override
;
157 bool fastLowerArguments() override
;
159 #include "ARMGenFastISel.inc"
161 // Instruction selection routines.
163 bool SelectLoad(const Instruction
*I
);
164 bool SelectStore(const Instruction
*I
);
165 bool SelectBranch(const Instruction
*I
);
166 bool SelectIndirectBr(const Instruction
*I
);
167 bool SelectCmp(const Instruction
*I
);
168 bool SelectFPExt(const Instruction
*I
);
169 bool SelectFPTrunc(const Instruction
*I
);
170 bool SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
);
171 bool SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
);
172 bool SelectIToFP(const Instruction
*I
, bool isSigned
);
173 bool SelectFPToI(const Instruction
*I
, bool isSigned
);
174 bool SelectDiv(const Instruction
*I
, bool isSigned
);
175 bool SelectRem(const Instruction
*I
, bool isSigned
);
176 bool SelectCall(const Instruction
*I
, const char *IntrMemName
);
177 bool SelectIntrinsicCall(const IntrinsicInst
&I
);
178 bool SelectSelect(const Instruction
*I
);
179 bool SelectRet(const Instruction
*I
);
180 bool SelectTrunc(const Instruction
*I
);
181 bool SelectIntExt(const Instruction
*I
);
182 bool SelectShift(const Instruction
*I
, ARM_AM::ShiftOpc ShiftTy
);
186 bool isPositionIndependent() const;
187 bool isTypeLegal(Type
*Ty
, MVT
&VT
);
188 bool isLoadTypeLegal(Type
*Ty
, MVT
&VT
);
189 bool ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
191 bool ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
192 unsigned Alignment
= 0, bool isZExt
= true,
193 bool allocReg
= true);
194 bool ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
195 unsigned Alignment
= 0);
196 bool ARMComputeAddress(const Value
*Obj
, Address
&Addr
);
197 void ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
);
198 bool ARMIsMemCpySmall(uint64_t Len
);
199 bool ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
,
201 unsigned ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
, bool isZExt
);
202 unsigned ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
);
203 unsigned ARMMaterializeInt(const Constant
*C
, MVT VT
);
204 unsigned ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
);
205 unsigned ARMMoveToFPReg(MVT VT
, unsigned SrcReg
);
206 unsigned ARMMoveToIntReg(MVT VT
, unsigned SrcReg
);
207 unsigned ARMSelectCallOp(bool UseReg
);
208 unsigned ARMLowerPICELF(const GlobalValue
*GV
, MVT VT
);
210 const TargetLowering
*getTargetLowering() { return &TLI
; }
212 // Call handling routines.
214 CCAssignFn
*CCAssignFnForCall(CallingConv::ID CC
,
217 bool ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
218 SmallVectorImpl
<Register
> &ArgRegs
,
219 SmallVectorImpl
<MVT
> &ArgVTs
,
220 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
221 SmallVectorImpl
<Register
> &RegArgs
,
225 unsigned getLibcallReg(const Twine
&Name
);
226 bool FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
227 const Instruction
*I
, CallingConv::ID CC
,
228 unsigned &NumBytes
, bool isVarArg
);
229 bool ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
);
231 // OptionalDef handling routines.
233 bool isARMNEONPred(const MachineInstr
*MI
);
234 bool DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
);
235 const MachineInstrBuilder
&AddOptionalDefs(const MachineInstrBuilder
&MIB
);
236 void AddLoadStoreOperands(MVT VT
, Address
&Addr
,
237 const MachineInstrBuilder
&MIB
,
238 MachineMemOperand::Flags Flags
, bool useAM3
);
241 } // end anonymous namespace
243 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
244 // we don't care about implicit defs here, just places we'll need to add a
245 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
246 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
) {
247 if (!MI
->hasOptionalDef())
250 // Look to see if our OptionalDef is defining CPSR or CCR.
251 for (const MachineOperand
&MO
: MI
->operands()) {
252 if (!MO
.isReg() || !MO
.isDef()) continue;
253 if (MO
.getReg() == ARM::CPSR
)
259 bool ARMFastISel::isARMNEONPred(const MachineInstr
*MI
) {
260 const MCInstrDesc
&MCID
= MI
->getDesc();
262 // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
263 if ((MCID
.TSFlags
& ARMII::DomainMask
) != ARMII::DomainNEON
||
264 AFI
->isThumb2Function())
265 return MI
->isPredicable();
267 for (const MCOperandInfo
&opInfo
: MCID
.operands())
268 if (opInfo
.isPredicate())
274 // If the machine is predicable go ahead and add the predicate operands, if
275 // it needs default CC operands add those.
276 // TODO: If we want to support thumb1 then we'll need to deal with optional
277 // CPSR defs that need to be added before the remaining operands. See s_cc_out
278 // for descriptions why.
279 const MachineInstrBuilder
&
280 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder
&MIB
) {
281 MachineInstr
*MI
= &*MIB
;
283 // Do we use a predicate? or...
284 // Are we NEON in ARM mode and have a predicate operand? If so, I know
285 // we're not predicable but add it anyways.
286 if (isARMNEONPred(MI
))
287 MIB
.add(predOps(ARMCC::AL
));
289 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
290 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
292 if (DefinesOptionalPredicate(MI
, &CPSR
))
293 MIB
.add(CPSR
? t1CondCodeOp() : condCodeOp());
297 unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode
,
298 const TargetRegisterClass
*RC
,
300 Register ResultReg
= createResultReg(RC
);
301 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
303 // Make sure the input operand is sufficiently constrained to be legal
304 // for this instruction.
305 Op0
= constrainOperandRegClass(II
, Op0
, 1);
306 if (II
.getNumDefs() >= 1) {
307 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
308 ResultReg
).addReg(Op0
));
310 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
312 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
313 TII
.get(TargetOpcode::COPY
), ResultReg
)
314 .addReg(II
.ImplicitDefs
[0]));
319 unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode
,
320 const TargetRegisterClass
*RC
,
321 unsigned Op0
, unsigned Op1
) {
322 unsigned ResultReg
= createResultReg(RC
);
323 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
325 // Make sure the input operands are sufficiently constrained to be legal
326 // for this instruction.
327 Op0
= constrainOperandRegClass(II
, Op0
, 1);
328 Op1
= constrainOperandRegClass(II
, Op1
, 2);
330 if (II
.getNumDefs() >= 1) {
332 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
336 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
339 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
340 TII
.get(TargetOpcode::COPY
), ResultReg
)
341 .addReg(II
.ImplicitDefs
[0]));
346 unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode
,
347 const TargetRegisterClass
*RC
,
348 unsigned Op0
, uint64_t Imm
) {
349 unsigned ResultReg
= createResultReg(RC
);
350 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
352 // Make sure the input operand is sufficiently constrained to be legal
353 // for this instruction.
354 Op0
= constrainOperandRegClass(II
, Op0
, 1);
355 if (II
.getNumDefs() >= 1) {
357 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
, ResultReg
)
361 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
364 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
365 TII
.get(TargetOpcode::COPY
), ResultReg
)
366 .addReg(II
.ImplicitDefs
[0]));
371 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode
,
372 const TargetRegisterClass
*RC
,
374 unsigned ResultReg
= createResultReg(RC
);
375 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
377 if (II
.getNumDefs() >= 1) {
378 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
,
379 ResultReg
).addImm(Imm
));
381 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
383 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
384 TII
.get(TargetOpcode::COPY
), ResultReg
)
385 .addReg(II
.ImplicitDefs
[0]));
390 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
391 // checks from the various callers.
392 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT
, unsigned SrcReg
) {
393 if (VT
== MVT::f64
) return 0;
395 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
396 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
397 TII
.get(ARM::VMOVSR
), MoveReg
)
402 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT
, unsigned SrcReg
) {
403 if (VT
== MVT::i64
) return 0;
405 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
406 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
407 TII
.get(ARM::VMOVRS
), MoveReg
)
412 // For double width floating point we need to materialize two constants
413 // (the high and the low) into integer registers then use a move to get
414 // the combined constant into an FP reg.
415 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP
*CFP
, MVT VT
) {
416 const APFloat Val
= CFP
->getValueAPF();
417 bool is64bit
= VT
== MVT::f64
;
419 // This checks to see if we can use VFP3 instructions to materialize
420 // a constant, otherwise we have to go through the constant pool.
421 if (TLI
.isFPImmLegal(Val
, VT
)) {
425 Imm
= ARM_AM::getFP64Imm(Val
);
428 Imm
= ARM_AM::getFP32Imm(Val
);
431 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
432 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
433 TII
.get(Opc
), DestReg
).addImm(Imm
));
437 // Require VFP2 for loading fp constants.
438 if (!Subtarget
->hasVFP2Base()) return false;
440 // MachineConstantPool wants an explicit alignment.
441 Align Alignment
= DL
.getPrefTypeAlign(CFP
->getType());
442 unsigned Idx
= MCP
.getConstantPoolIndex(cast
<Constant
>(CFP
), Alignment
);
443 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
444 unsigned Opc
= is64bit
? ARM::VLDRD
: ARM::VLDRS
;
446 // The extra reg is for addrmode5.
448 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
449 .addConstantPoolIndex(Idx
)
454 unsigned ARMFastISel::ARMMaterializeInt(const Constant
*C
, MVT VT
) {
455 if (VT
!= MVT::i32
&& VT
!= MVT::i16
&& VT
!= MVT::i8
&& VT
!= MVT::i1
)
458 // If we can do this in a single instruction without a constant pool entry
460 const ConstantInt
*CI
= cast
<ConstantInt
>(C
);
461 if (Subtarget
->hasV6T2Ops() && isUInt
<16>(CI
->getZExtValue())) {
462 unsigned Opc
= isThumb2
? ARM::t2MOVi16
: ARM::MOVi16
;
463 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
465 unsigned ImmReg
= createResultReg(RC
);
466 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
467 TII
.get(Opc
), ImmReg
)
468 .addImm(CI
->getZExtValue()));
472 // Use MVN to emit negative constants.
473 if (VT
== MVT::i32
&& Subtarget
->hasV6T2Ops() && CI
->isNegative()) {
474 unsigned Imm
= (unsigned)~(CI
->getSExtValue());
475 bool UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
476 (ARM_AM::getSOImmVal(Imm
) != -1);
478 unsigned Opc
= isThumb2
? ARM::t2MVNi
: ARM::MVNi
;
479 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
:
481 unsigned ImmReg
= createResultReg(RC
);
482 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
483 TII
.get(Opc
), ImmReg
)
489 unsigned ResultReg
= 0;
490 if (Subtarget
->useMovt())
491 ResultReg
= fastEmit_i(VT
, VT
, ISD::Constant
, CI
->getZExtValue());
496 // Load from constant pool. For now 32-bit only.
500 // MachineConstantPool wants an explicit alignment.
501 Align Alignment
= DL
.getPrefTypeAlign(C
->getType());
502 unsigned Idx
= MCP
.getConstantPoolIndex(C
, Alignment
);
503 ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
505 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
506 TII
.get(ARM::t2LDRpci
), ResultReg
)
507 .addConstantPoolIndex(Idx
));
509 // The extra immediate is for addrmode2.
510 ResultReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), ResultReg
, 0);
511 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
512 TII
.get(ARM::LDRcp
), ResultReg
)
513 .addConstantPoolIndex(Idx
)
519 bool ARMFastISel::isPositionIndependent() const {
520 return TLI
.isPositionIndependent();
523 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue
*GV
, MVT VT
) {
524 // For now 32-bit only.
525 if (VT
!= MVT::i32
|| GV
->isThreadLocal()) return 0;
527 // ROPI/RWPI not currently supported.
528 if (Subtarget
->isROPI() || Subtarget
->isRWPI())
531 bool IsIndirect
= Subtarget
->isGVIndirectSymbol(GV
);
532 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
534 unsigned DestReg
= createResultReg(RC
);
536 // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
537 const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
);
538 bool IsThreadLocal
= GVar
&& GVar
->isThreadLocal();
539 if (!Subtarget
->isTargetMachO() && IsThreadLocal
) return 0;
541 bool IsPositionIndependent
= isPositionIndependent();
542 // Use movw+movt when possible, it avoids constant pool entries.
543 // Non-darwin targets only support static movt relocations in FastISel.
544 if (Subtarget
->useMovt() &&
545 (Subtarget
->isTargetMachO() || !IsPositionIndependent
)) {
547 unsigned char TF
= 0;
548 if (Subtarget
->isTargetMachO())
549 TF
= ARMII::MO_NONLAZY
;
551 if (IsPositionIndependent
)
552 Opc
= isThumb2
? ARM::t2MOV_ga_pcrel
: ARM::MOV_ga_pcrel
;
554 Opc
= isThumb2
? ARM::t2MOVi32imm
: ARM::MOVi32imm
;
555 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
556 TII
.get(Opc
), DestReg
).addGlobalAddress(GV
, 0, TF
));
558 // MachineConstantPool wants an explicit alignment.
559 Align Alignment
= DL
.getPrefTypeAlign(GV
->getType());
561 if (Subtarget
->isTargetELF() && IsPositionIndependent
)
562 return ARMLowerPICELF(GV
, VT
);
565 unsigned PCAdj
= IsPositionIndependent
? (Subtarget
->isThumb() ? 4 : 8) : 0;
566 unsigned Id
= AFI
->createPICLabelUId();
567 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(GV
, Id
,
570 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Alignment
);
573 MachineInstrBuilder MIB
;
575 unsigned Opc
= IsPositionIndependent
? ARM::t2LDRpci_pic
: ARM::t2LDRpci
;
576 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
),
577 DestReg
).addConstantPoolIndex(Idx
);
578 if (IsPositionIndependent
)
580 AddOptionalDefs(MIB
);
582 // The extra immediate is for addrmode2.
583 DestReg
= constrainOperandRegClass(TII
.get(ARM::LDRcp
), DestReg
, 0);
584 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
585 TII
.get(ARM::LDRcp
), DestReg
)
586 .addConstantPoolIndex(Idx
)
588 AddOptionalDefs(MIB
);
590 if (IsPositionIndependent
) {
591 unsigned Opc
= IsIndirect
? ARM::PICLDR
: ARM::PICADD
;
592 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
594 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
595 DbgLoc
, TII
.get(Opc
), NewDestReg
)
598 AddOptionalDefs(MIB
);
604 if ((Subtarget
->isTargetELF() && Subtarget
->isGVInGOT(GV
)) ||
605 (Subtarget
->isTargetMachO() && IsIndirect
) ||
606 Subtarget
->genLongCalls()) {
607 MachineInstrBuilder MIB
;
608 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
610 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
611 TII
.get(ARM::t2LDRi12
), NewDestReg
)
615 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
616 TII
.get(ARM::LDRi12
), NewDestReg
)
619 DestReg
= NewDestReg
;
620 AddOptionalDefs(MIB
);
626 unsigned ARMFastISel::fastMaterializeConstant(const Constant
*C
) {
627 EVT CEVT
= TLI
.getValueType(DL
, C
->getType(), true);
629 // Only handle simple types.
630 if (!CEVT
.isSimple()) return 0;
631 MVT VT
= CEVT
.getSimpleVT();
633 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
634 return ARMMaterializeFP(CFP
, VT
);
635 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
636 return ARMMaterializeGV(GV
, VT
);
637 else if (isa
<ConstantInt
>(C
))
638 return ARMMaterializeInt(C
, VT
);
643 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
645 unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst
*AI
) {
646 // Don't handle dynamic allocas.
647 if (!FuncInfo
.StaticAllocaMap
.count(AI
)) return 0;
650 if (!isLoadTypeLegal(AI
->getType(), VT
)) return 0;
652 DenseMap
<const AllocaInst
*, int>::iterator SI
=
653 FuncInfo
.StaticAllocaMap
.find(AI
);
655 // This will get lowered later into the correct offsets and registers
656 // via rewriteXFrameIndex.
657 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
658 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
659 const TargetRegisterClass
* RC
= TLI
.getRegClassFor(VT
);
660 unsigned ResultReg
= createResultReg(RC
);
661 ResultReg
= constrainOperandRegClass(TII
.get(Opc
), ResultReg
, 0);
663 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
664 TII
.get(Opc
), ResultReg
)
665 .addFrameIndex(SI
->second
)
673 bool ARMFastISel::isTypeLegal(Type
*Ty
, MVT
&VT
) {
674 EVT evt
= TLI
.getValueType(DL
, Ty
, true);
676 // Only handle simple types.
677 if (evt
== MVT::Other
|| !evt
.isSimple()) return false;
678 VT
= evt
.getSimpleVT();
680 // Handle all legal types, i.e. a register that will directly hold this
682 return TLI
.isTypeLegal(VT
);
685 bool ARMFastISel::isLoadTypeLegal(Type
*Ty
, MVT
&VT
) {
686 if (isTypeLegal(Ty
, VT
)) return true;
688 // If this is a type than can be sign or zero-extended to a basic operation
689 // go ahead and accept it now.
690 if (VT
== MVT::i1
|| VT
== MVT::i8
|| VT
== MVT::i16
)
696 // Computes the address to get to an object.
697 bool ARMFastISel::ARMComputeAddress(const Value
*Obj
, Address
&Addr
) {
698 // Some boilerplate from the X86 FastISel.
699 const User
*U
= nullptr;
700 unsigned Opcode
= Instruction::UserOp1
;
701 if (const Instruction
*I
= dyn_cast
<Instruction
>(Obj
)) {
702 // Don't walk into other basic blocks unless the object is an alloca from
703 // another block, otherwise it may not have a virtual register assigned.
704 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(Obj
)) ||
705 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
706 Opcode
= I
->getOpcode();
709 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(Obj
)) {
710 Opcode
= C
->getOpcode();
714 if (PointerType
*Ty
= dyn_cast
<PointerType
>(Obj
->getType()))
715 if (Ty
->getAddressSpace() > 255)
716 // Fast instruction selection doesn't support the special
723 case Instruction::BitCast
:
724 // Look through bitcasts.
725 return ARMComputeAddress(U
->getOperand(0), Addr
);
726 case Instruction::IntToPtr
:
727 // Look past no-op inttoptrs.
728 if (TLI
.getValueType(DL
, U
->getOperand(0)->getType()) ==
729 TLI
.getPointerTy(DL
))
730 return ARMComputeAddress(U
->getOperand(0), Addr
);
732 case Instruction::PtrToInt
:
733 // Look past no-op ptrtoints.
734 if (TLI
.getValueType(DL
, U
->getType()) == TLI
.getPointerTy(DL
))
735 return ARMComputeAddress(U
->getOperand(0), Addr
);
737 case Instruction::GetElementPtr
: {
738 Address SavedAddr
= Addr
;
739 int TmpOffset
= Addr
.Offset
;
741 // Iterate through the GEP folding the constants into offsets where
743 gep_type_iterator GTI
= gep_type_begin(U
);
744 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
745 i
!= e
; ++i
, ++GTI
) {
746 const Value
*Op
= *i
;
747 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
748 const StructLayout
*SL
= DL
.getStructLayout(STy
);
749 unsigned Idx
= cast
<ConstantInt
>(Op
)->getZExtValue();
750 TmpOffset
+= SL
->getElementOffset(Idx
);
752 uint64_t S
= DL
.getTypeAllocSize(GTI
.getIndexedType());
754 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
755 // Constant-offset addressing.
756 TmpOffset
+= CI
->getSExtValue() * S
;
759 if (canFoldAddIntoGEP(U
, Op
)) {
760 // A compatible add with a constant operand. Fold the constant.
762 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
763 TmpOffset
+= CI
->getSExtValue() * S
;
764 // Iterate on the other operand.
765 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
769 goto unsupported_gep
;
774 // Try to grab the base operand now.
775 Addr
.Offset
= TmpOffset
;
776 if (ARMComputeAddress(U
->getOperand(0), Addr
)) return true;
778 // We failed, restore everything and try the other options.
784 case Instruction::Alloca
: {
785 const AllocaInst
*AI
= cast
<AllocaInst
>(Obj
);
786 DenseMap
<const AllocaInst
*, int>::iterator SI
=
787 FuncInfo
.StaticAllocaMap
.find(AI
);
788 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
789 Addr
.BaseType
= Address::FrameIndexBase
;
790 Addr
.Base
.FI
= SI
->second
;
797 // Try to get this in a register if nothing else has worked.
798 if (Addr
.Base
.Reg
== 0) Addr
.Base
.Reg
= getRegForValue(Obj
);
799 return Addr
.Base
.Reg
!= 0;
802 void ARMFastISel::ARMSimplifyAddress(Address
&Addr
, MVT VT
, bool useAM3
) {
803 bool needsLowering
= false;
804 switch (VT
.SimpleTy
) {
805 default: llvm_unreachable("Unhandled load/store type!");
811 // Integer loads/stores handle 12-bit offsets.
812 needsLowering
= ((Addr
.Offset
& 0xfff) != Addr
.Offset
);
813 // Handle negative offsets.
814 if (needsLowering
&& isThumb2
)
815 needsLowering
= !(Subtarget
->hasV6T2Ops() && Addr
.Offset
< 0 &&
818 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
819 needsLowering
= (Addr
.Offset
> 255 || Addr
.Offset
< -255);
824 // Floating point operands handle 8-bit offsets.
825 needsLowering
= ((Addr
.Offset
& 0xff) != Addr
.Offset
);
829 // If this is a stack pointer and the offset needs to be simplified then
830 // put the alloca address into a register, set the base type back to
831 // register and continue. This should almost never happen.
832 if (needsLowering
&& Addr
.BaseType
== Address::FrameIndexBase
) {
833 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
835 unsigned ResultReg
= createResultReg(RC
);
836 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
837 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
838 TII
.get(Opc
), ResultReg
)
839 .addFrameIndex(Addr
.Base
.FI
)
841 Addr
.Base
.Reg
= ResultReg
;
842 Addr
.BaseType
= Address::RegBase
;
845 // Since the offset is too large for the load/store instruction
846 // get the reg+offset into a register.
848 Addr
.Base
.Reg
= fastEmit_ri_(MVT::i32
, ISD::ADD
, Addr
.Base
.Reg
,
849 Addr
.Offset
, MVT::i32
);
854 void ARMFastISel::AddLoadStoreOperands(MVT VT
, Address
&Addr
,
855 const MachineInstrBuilder
&MIB
,
856 MachineMemOperand::Flags Flags
,
858 // addrmode5 output depends on the selection dag addressing dividing the
859 // offset by 4 that it then later multiplies. Do this here as well.
860 if (VT
.SimpleTy
== MVT::f32
|| VT
.SimpleTy
== MVT::f64
)
863 // Frame base works a bit differently. Handle it separately.
864 if (Addr
.BaseType
== Address::FrameIndexBase
) {
865 int FI
= Addr
.Base
.FI
;
866 int Offset
= Addr
.Offset
;
867 MachineMemOperand
*MMO
= FuncInfo
.MF
->getMachineMemOperand(
868 MachinePointerInfo::getFixedStack(*FuncInfo
.MF
, FI
, Offset
), Flags
,
869 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
870 // Now add the rest of the operands.
871 MIB
.addFrameIndex(FI
);
873 // ARM halfword load/stores and signed byte loads need an additional
876 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
880 MIB
.addImm(Addr
.Offset
);
882 MIB
.addMemOperand(MMO
);
884 // Now add the rest of the operands.
885 MIB
.addReg(Addr
.Base
.Reg
);
887 // ARM halfword load/stores and signed byte loads need an additional
890 int Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
894 MIB
.addImm(Addr
.Offset
);
897 AddOptionalDefs(MIB
);
900 bool ARMFastISel::ARMEmitLoad(MVT VT
, Register
&ResultReg
, Address
&Addr
,
901 unsigned Alignment
, bool isZExt
, bool allocReg
) {
904 bool needVMOV
= false;
905 const TargetRegisterClass
*RC
;
906 switch (VT
.SimpleTy
) {
907 // This is mostly going to be Neon/vector support.
908 default: return false;
912 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
913 Opc
= isZExt
? ARM::t2LDRBi8
: ARM::t2LDRSBi8
;
915 Opc
= isZExt
? ARM::t2LDRBi12
: ARM::t2LDRSBi12
;
924 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
927 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
931 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
932 Opc
= isZExt
? ARM::t2LDRHi8
: ARM::t2LDRSHi8
;
934 Opc
= isZExt
? ARM::t2LDRHi12
: ARM::t2LDRSHi12
;
936 Opc
= isZExt
? ARM::LDRH
: ARM::LDRSH
;
939 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
942 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
946 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
953 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
956 if (!Subtarget
->hasVFP2Base()) return false;
957 // Unaligned loads need special handling. Floats require word-alignment.
958 if (Alignment
&& Alignment
< 4) {
961 Opc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
962 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRnopcRegClass
;
965 RC
= TLI
.getRegClassFor(VT
);
969 // Can load and store double precision even without FeatureFP64
970 if (!Subtarget
->hasVFP2Base()) return false;
971 // FIXME: Unaligned loads need special handling. Doublewords require
973 if (Alignment
&& Alignment
< 4)
977 RC
= TLI
.getRegClassFor(VT
);
980 // Simplify this down to something we can handle.
981 ARMSimplifyAddress(Addr
, VT
, useAM3
);
983 // Create the base instruction, then add the operands.
985 ResultReg
= createResultReg(RC
);
986 assert(ResultReg
> 255 && "Expected an allocated virtual register.");
987 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
988 TII
.get(Opc
), ResultReg
);
989 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOLoad
, useAM3
);
991 // If we had an unaligned load of a float we've converted it to an regular
992 // load. Now we must move from the GRP to the FP register.
994 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
995 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
996 TII
.get(ARM::VMOVSR
), MoveReg
)
1003 bool ARMFastISel::SelectLoad(const Instruction
*I
) {
1004 // Atomic loads need special handling.
1005 if (cast
<LoadInst
>(I
)->isAtomic())
1008 const Value
*SV
= I
->getOperand(0);
1009 if (TLI
.supportSwiftError()) {
1010 // Swifterror values can come from either a function parameter with
1011 // swifterror attribute or an alloca with swifterror attribute.
1012 if (const Argument
*Arg
= dyn_cast
<Argument
>(SV
)) {
1013 if (Arg
->hasSwiftErrorAttr())
1017 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(SV
)) {
1018 if (Alloca
->isSwiftError())
1023 // Verify we have a legal type before going any further.
1025 if (!isLoadTypeLegal(I
->getType(), VT
))
1028 // See if we can handle this address.
1030 if (!ARMComputeAddress(I
->getOperand(0), Addr
)) return false;
1033 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, cast
<LoadInst
>(I
)->getAlignment()))
1035 updateValueMap(I
, ResultReg
);
1039 bool ARMFastISel::ARMEmitStore(MVT VT
, unsigned SrcReg
, Address
&Addr
,
1040 unsigned Alignment
) {
1042 bool useAM3
= false;
1043 switch (VT
.SimpleTy
) {
1044 // This is mostly going to be Neon/vector support.
1045 default: return false;
1047 unsigned Res
= createResultReg(isThumb2
? &ARM::tGPRRegClass
1048 : &ARM::GPRRegClass
);
1049 unsigned Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
1050 SrcReg
= constrainOperandRegClass(TII
.get(Opc
), SrcReg
, 1);
1051 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1053 .addReg(SrcReg
).addImm(1));
1059 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1060 StrOpc
= ARM::t2STRBi8
;
1062 StrOpc
= ARM::t2STRBi12
;
1064 StrOpc
= ARM::STRBi12
;
1068 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
1072 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1073 StrOpc
= ARM::t2STRHi8
;
1075 StrOpc
= ARM::t2STRHi12
;
1082 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
1086 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1087 StrOpc
= ARM::t2STRi8
;
1089 StrOpc
= ARM::t2STRi12
;
1091 StrOpc
= ARM::STRi12
;
1095 if (!Subtarget
->hasVFP2Base()) return false;
1096 // Unaligned stores need special handling. Floats require word-alignment.
1097 if (Alignment
&& Alignment
< 4) {
1098 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1099 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1100 TII
.get(ARM::VMOVRS
), MoveReg
)
1104 StrOpc
= isThumb2
? ARM::t2STRi12
: ARM::STRi12
;
1106 StrOpc
= ARM::VSTRS
;
1110 // Can load and store double precision even without FeatureFP64
1111 if (!Subtarget
->hasVFP2Base()) return false;
1112 // FIXME: Unaligned stores need special handling. Doublewords require
1114 if (Alignment
&& Alignment
< 4)
1117 StrOpc
= ARM::VSTRD
;
1120 // Simplify this down to something we can handle.
1121 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1123 // Create the base instruction, then add the operands.
1124 SrcReg
= constrainOperandRegClass(TII
.get(StrOpc
), SrcReg
, 0);
1125 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1128 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOStore
, useAM3
);
1132 bool ARMFastISel::SelectStore(const Instruction
*I
) {
1133 Value
*Op0
= I
->getOperand(0);
1134 unsigned SrcReg
= 0;
1136 // Atomic stores need special handling.
1137 if (cast
<StoreInst
>(I
)->isAtomic())
1140 const Value
*PtrV
= I
->getOperand(1);
1141 if (TLI
.supportSwiftError()) {
1142 // Swifterror values can come from either a function parameter with
1143 // swifterror attribute or an alloca with swifterror attribute.
1144 if (const Argument
*Arg
= dyn_cast
<Argument
>(PtrV
)) {
1145 if (Arg
->hasSwiftErrorAttr())
1149 if (const AllocaInst
*Alloca
= dyn_cast
<AllocaInst
>(PtrV
)) {
1150 if (Alloca
->isSwiftError())
1155 // Verify we have a legal type before going any further.
1157 if (!isLoadTypeLegal(I
->getOperand(0)->getType(), VT
))
1160 // Get the value to be stored into a register.
1161 SrcReg
= getRegForValue(Op0
);
1162 if (SrcReg
== 0) return false;
1164 // See if we can handle this address.
1166 if (!ARMComputeAddress(I
->getOperand(1), Addr
))
1169 if (!ARMEmitStore(VT
, SrcReg
, Addr
, cast
<StoreInst
>(I
)->getAlignment()))
1174 static ARMCC::CondCodes
getComparePred(CmpInst::Predicate Pred
) {
1176 // Needs two compares...
1177 case CmpInst::FCMP_ONE
:
1178 case CmpInst::FCMP_UEQ
:
1180 // AL is our "false" for now. The other two need more compares.
1182 case CmpInst::ICMP_EQ
:
1183 case CmpInst::FCMP_OEQ
:
1185 case CmpInst::ICMP_SGT
:
1186 case CmpInst::FCMP_OGT
:
1188 case CmpInst::ICMP_SGE
:
1189 case CmpInst::FCMP_OGE
:
1191 case CmpInst::ICMP_UGT
:
1192 case CmpInst::FCMP_UGT
:
1194 case CmpInst::FCMP_OLT
:
1196 case CmpInst::ICMP_ULE
:
1197 case CmpInst::FCMP_OLE
:
1199 case CmpInst::FCMP_ORD
:
1201 case CmpInst::FCMP_UNO
:
1203 case CmpInst::FCMP_UGE
:
1205 case CmpInst::ICMP_SLT
:
1206 case CmpInst::FCMP_ULT
:
1208 case CmpInst::ICMP_SLE
:
1209 case CmpInst::FCMP_ULE
:
1211 case CmpInst::FCMP_UNE
:
1212 case CmpInst::ICMP_NE
:
1214 case CmpInst::ICMP_UGE
:
1216 case CmpInst::ICMP_ULT
:
1221 bool ARMFastISel::SelectBranch(const Instruction
*I
) {
1222 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1223 MachineBasicBlock
*TBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
1224 MachineBasicBlock
*FBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
1226 // Simple branch support.
1228 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1230 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1231 if (CI
->hasOneUse() && (CI
->getParent() == I
->getParent())) {
1232 // Get the compare predicate.
1233 // Try to take advantage of fallthrough opportunities.
1234 CmpInst::Predicate Predicate
= CI
->getPredicate();
1235 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1236 std::swap(TBB
, FBB
);
1237 Predicate
= CmpInst::getInversePredicate(Predicate
);
1240 ARMCC::CondCodes ARMPred
= getComparePred(Predicate
);
1242 // We may not handle every CC for now.
1243 if (ARMPred
== ARMCC::AL
) return false;
1245 // Emit the compare.
1246 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1249 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1250 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1251 .addMBB(TBB
).addImm(ARMPred
).addReg(ARM::CPSR
);
1252 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1255 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1257 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1258 (isLoadTypeLegal(TI
->getOperand(0)->getType(), SourceVT
))) {
1259 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1260 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1261 OpReg
= constrainOperandRegClass(TII
.get(TstOpc
), OpReg
, 0);
1262 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1264 .addReg(OpReg
).addImm(1));
1266 unsigned CCMode
= ARMCC::NE
;
1267 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1268 std::swap(TBB
, FBB
);
1272 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1273 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1274 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1276 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1279 } else if (const ConstantInt
*CI
=
1280 dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1281 uint64_t Imm
= CI
->getZExtValue();
1282 MachineBasicBlock
*Target
= (Imm
== 0) ? FBB
: TBB
;
1283 fastEmitBranch(Target
, DbgLoc
);
1287 unsigned CmpReg
= getRegForValue(BI
->getCondition());
1288 if (CmpReg
== 0) return false;
1290 // We've been divorced from our compare! Our block was split, and
1291 // now our compare lives in a predecessor block. We musn't
1292 // re-compare here, as the children of the compare aren't guaranteed
1293 // live across the block boundary (we *could* check for this).
1294 // Regardless, the compare has been done in the predecessor block,
1295 // and it left a value for us in a virtual register. Ergo, we test
1296 // the one-bit value left in the virtual register.
1297 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1298 CmpReg
= constrainOperandRegClass(TII
.get(TstOpc
), CmpReg
, 0);
1300 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1304 unsigned CCMode
= ARMCC::NE
;
1305 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1306 std::swap(TBB
, FBB
);
1310 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1311 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(BrOpc
))
1312 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1313 finishCondBranch(BI
->getParent(), TBB
, FBB
);
1317 bool ARMFastISel::SelectIndirectBr(const Instruction
*I
) {
1318 unsigned AddrReg
= getRegForValue(I
->getOperand(0));
1319 if (AddrReg
== 0) return false;
1321 unsigned Opc
= isThumb2
? ARM::tBRIND
: ARM::BX
;
1322 assert(isThumb2
|| Subtarget
->hasV4TOps());
1324 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1325 TII
.get(Opc
)).addReg(AddrReg
));
1327 const IndirectBrInst
*IB
= cast
<IndirectBrInst
>(I
);
1328 for (const BasicBlock
*SuccBB
: IB
->successors())
1329 FuncInfo
.MBB
->addSuccessor(FuncInfo
.MBBMap
[SuccBB
]);
1334 bool ARMFastISel::ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
1336 Type
*Ty
= Src1Value
->getType();
1337 EVT SrcEVT
= TLI
.getValueType(DL
, Ty
, true);
1338 if (!SrcEVT
.isSimple()) return false;
1339 MVT SrcVT
= SrcEVT
.getSimpleVT();
1341 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1344 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1347 // Check to see if the 2nd operand is a constant that we can encode directly
1350 bool UseImm
= false;
1351 bool isNegativeImm
= false;
1352 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1353 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1354 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Src2Value
)) {
1355 if (SrcVT
== MVT::i32
|| SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
||
1357 const APInt
&CIVal
= ConstInt
->getValue();
1358 Imm
= (isZExt
) ? (int)CIVal
.getZExtValue() : (int)CIVal
.getSExtValue();
1359 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1360 // then a cmn, because there is no way to represent 2147483648 as a
1361 // signed 32-bit int.
1362 if (Imm
< 0 && Imm
!= (int)0x80000000) {
1363 isNegativeImm
= true;
1366 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1367 (ARM_AM::getSOImmVal(Imm
) != -1);
1369 } else if (const ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Src2Value
)) {
1370 if (SrcVT
== MVT::f32
|| SrcVT
== MVT::f64
)
1371 if (ConstFP
->isZero() && !ConstFP
->isNegative())
1377 bool needsExt
= false;
1378 switch (SrcVT
.SimpleTy
) {
1379 default: return false;
1380 // TODO: Verify compares.
1383 CmpOpc
= UseImm
? ARM::VCMPZS
: ARM::VCMPS
;
1387 CmpOpc
= UseImm
? ARM::VCMPZD
: ARM::VCMPD
;
1397 CmpOpc
= ARM::t2CMPrr
;
1399 CmpOpc
= isNegativeImm
? ARM::t2CMNri
: ARM::t2CMPri
;
1402 CmpOpc
= ARM::CMPrr
;
1404 CmpOpc
= isNegativeImm
? ARM::CMNri
: ARM::CMPri
;
1409 unsigned SrcReg1
= getRegForValue(Src1Value
);
1410 if (SrcReg1
== 0) return false;
1412 unsigned SrcReg2
= 0;
1414 SrcReg2
= getRegForValue(Src2Value
);
1415 if (SrcReg2
== 0) return false;
1418 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1420 SrcReg1
= ARMEmitIntExt(SrcVT
, SrcReg1
, MVT::i32
, isZExt
);
1421 if (SrcReg1
== 0) return false;
1423 SrcReg2
= ARMEmitIntExt(SrcVT
, SrcReg2
, MVT::i32
, isZExt
);
1424 if (SrcReg2
== 0) return false;
1428 const MCInstrDesc
&II
= TII
.get(CmpOpc
);
1429 SrcReg1
= constrainOperandRegClass(II
, SrcReg1
, 0);
1431 SrcReg2
= constrainOperandRegClass(II
, SrcReg2
, 1);
1432 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1433 .addReg(SrcReg1
).addReg(SrcReg2
));
1435 MachineInstrBuilder MIB
;
1436 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, II
)
1439 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1442 AddOptionalDefs(MIB
);
1445 // For floating point we need to move the result to a comparison register
1446 // that we can then use for branches.
1447 if (Ty
->isFloatTy() || Ty
->isDoubleTy())
1448 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1449 TII
.get(ARM::FMSTAT
)));
1453 bool ARMFastISel::SelectCmp(const Instruction
*I
) {
1454 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1456 // Get the compare predicate.
1457 ARMCC::CondCodes ARMPred
= getComparePred(CI
->getPredicate());
1459 // We may not handle every CC for now.
1460 if (ARMPred
== ARMCC::AL
) return false;
1462 // Emit the compare.
1463 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1466 // Now set a register based on the comparison. Explicitly set the predicates
1468 unsigned MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1469 const TargetRegisterClass
*RC
= isThumb2
? &ARM::rGPRRegClass
1470 : &ARM::GPRRegClass
;
1471 unsigned DestReg
= createResultReg(RC
);
1472 Constant
*Zero
= ConstantInt::get(Type::getInt32Ty(*Context
), 0);
1473 unsigned ZeroReg
= fastMaterializeConstant(Zero
);
1474 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1475 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
), DestReg
)
1476 .addReg(ZeroReg
).addImm(1)
1477 .addImm(ARMPred
).addReg(ARM::CPSR
);
1479 updateValueMap(I
, DestReg
);
1483 bool ARMFastISel::SelectFPExt(const Instruction
*I
) {
1484 // Make sure we have VFP and that we're extending float to double.
1485 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1487 Value
*V
= I
->getOperand(0);
1488 if (!I
->getType()->isDoubleTy() ||
1489 !V
->getType()->isFloatTy()) return false;
1491 unsigned Op
= getRegForValue(V
);
1492 if (Op
== 0) return false;
1494 unsigned Result
= createResultReg(&ARM::DPRRegClass
);
1495 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1496 TII
.get(ARM::VCVTDS
), Result
)
1498 updateValueMap(I
, Result
);
1502 bool ARMFastISel::SelectFPTrunc(const Instruction
*I
) {
1503 // Make sure we have VFP and that we're truncating double to float.
1504 if (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()) return false;
1506 Value
*V
= I
->getOperand(0);
1507 if (!(I
->getType()->isFloatTy() &&
1508 V
->getType()->isDoubleTy())) return false;
1510 unsigned Op
= getRegForValue(V
);
1511 if (Op
== 0) return false;
1513 unsigned Result
= createResultReg(&ARM::SPRRegClass
);
1514 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1515 TII
.get(ARM::VCVTSD
), Result
)
1517 updateValueMap(I
, Result
);
1521 bool ARMFastISel::SelectIToFP(const Instruction
*I
, bool isSigned
) {
1522 // Make sure we have VFP.
1523 if (!Subtarget
->hasVFP2Base()) return false;
1526 Type
*Ty
= I
->getType();
1527 if (!isTypeLegal(Ty
, DstVT
))
1530 Value
*Src
= I
->getOperand(0);
1531 EVT SrcEVT
= TLI
.getValueType(DL
, Src
->getType(), true);
1532 if (!SrcEVT
.isSimple())
1534 MVT SrcVT
= SrcEVT
.getSimpleVT();
1535 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
1538 unsigned SrcReg
= getRegForValue(Src
);
1539 if (SrcReg
== 0) return false;
1541 // Handle sign-extension.
1542 if (SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
) {
1543 SrcReg
= ARMEmitIntExt(SrcVT
, SrcReg
, MVT::i32
,
1544 /*isZExt*/!isSigned
);
1545 if (SrcReg
== 0) return false;
1548 // The conversion routine works on fp-reg to fp-reg and the operand above
1549 // was an integer, move it to the fp registers if possible.
1550 unsigned FP
= ARMMoveToFPReg(MVT::f32
, SrcReg
);
1551 if (FP
== 0) return false;
1554 if (Ty
->isFloatTy()) Opc
= isSigned
? ARM::VSITOS
: ARM::VUITOS
;
1555 else if (Ty
->isDoubleTy() && Subtarget
->hasFP64())
1556 Opc
= isSigned
? ARM::VSITOD
: ARM::VUITOD
;
1559 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(DstVT
));
1560 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1561 TII
.get(Opc
), ResultReg
).addReg(FP
));
1562 updateValueMap(I
, ResultReg
);
1566 bool ARMFastISel::SelectFPToI(const Instruction
*I
, bool isSigned
) {
1567 // Make sure we have VFP.
1568 if (!Subtarget
->hasVFP2Base()) return false;
1571 Type
*RetTy
= I
->getType();
1572 if (!isTypeLegal(RetTy
, DstVT
))
1575 unsigned Op
= getRegForValue(I
->getOperand(0));
1576 if (Op
== 0) return false;
1579 Type
*OpTy
= I
->getOperand(0)->getType();
1580 if (OpTy
->isFloatTy()) Opc
= isSigned
? ARM::VTOSIZS
: ARM::VTOUIZS
;
1581 else if (OpTy
->isDoubleTy() && Subtarget
->hasFP64())
1582 Opc
= isSigned
? ARM::VTOSIZD
: ARM::VTOUIZD
;
1585 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1586 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1587 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1588 TII
.get(Opc
), ResultReg
).addReg(Op
));
1590 // This result needs to be in an integer register, but the conversion only
1591 // takes place in fp-regs.
1592 unsigned IntReg
= ARMMoveToIntReg(DstVT
, ResultReg
);
1593 if (IntReg
== 0) return false;
1595 updateValueMap(I
, IntReg
);
1599 bool ARMFastISel::SelectSelect(const Instruction
*I
) {
1601 if (!isTypeLegal(I
->getType(), VT
))
1604 // Things need to be register sized for register moves.
1605 if (VT
!= MVT::i32
) return false;
1607 unsigned CondReg
= getRegForValue(I
->getOperand(0));
1608 if (CondReg
== 0) return false;
1609 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1610 if (Op1Reg
== 0) return false;
1612 // Check to see if we can use an immediate in the conditional move.
1614 bool UseImm
= false;
1615 bool isNegativeImm
= false;
1616 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(I
->getOperand(2))) {
1617 assert(VT
== MVT::i32
&& "Expecting an i32.");
1618 Imm
= (int)ConstInt
->getValue().getZExtValue();
1620 isNegativeImm
= true;
1623 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1624 (ARM_AM::getSOImmVal(Imm
) != -1);
1627 unsigned Op2Reg
= 0;
1629 Op2Reg
= getRegForValue(I
->getOperand(2));
1630 if (Op2Reg
== 0) return false;
1633 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1634 CondReg
= constrainOperandRegClass(TII
.get(TstOpc
), CondReg
, 0);
1636 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(TstOpc
))
1641 const TargetRegisterClass
*RC
;
1643 RC
= isThumb2
? &ARM::tGPRRegClass
: &ARM::GPRRegClass
;
1644 MovCCOpc
= isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
;
1646 RC
= isThumb2
? &ARM::rGPRRegClass
: &ARM::GPRRegClass
;
1648 MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1650 MovCCOpc
= isThumb2
? ARM::t2MVNCCi
: ARM::MVNCCi
;
1652 unsigned ResultReg
= createResultReg(RC
);
1654 Op2Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op2Reg
, 1);
1655 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 2);
1656 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1663 Op1Reg
= constrainOperandRegClass(TII
.get(MovCCOpc
), Op1Reg
, 1);
1664 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(MovCCOpc
),
1671 updateValueMap(I
, ResultReg
);
1675 bool ARMFastISel::SelectDiv(const Instruction
*I
, bool isSigned
) {
1677 Type
*Ty
= I
->getType();
1678 if (!isTypeLegal(Ty
, VT
))
1681 // If we have integer div support we should have selected this automagically.
1682 // In case we have a real miss go ahead and return false and we'll pick
1684 if (Subtarget
->hasDivideInThumbMode())
1687 // Otherwise emit a libcall.
1688 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1690 LC
= isSigned
? RTLIB::SDIV_I8
: RTLIB::UDIV_I8
;
1691 else if (VT
== MVT::i16
)
1692 LC
= isSigned
? RTLIB::SDIV_I16
: RTLIB::UDIV_I16
;
1693 else if (VT
== MVT::i32
)
1694 LC
= isSigned
? RTLIB::SDIV_I32
: RTLIB::UDIV_I32
;
1695 else if (VT
== MVT::i64
)
1696 LC
= isSigned
? RTLIB::SDIV_I64
: RTLIB::UDIV_I64
;
1697 else if (VT
== MVT::i128
)
1698 LC
= isSigned
? RTLIB::SDIV_I128
: RTLIB::UDIV_I128
;
1699 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SDIV!");
1701 return ARMEmitLibcall(I
, LC
);
1704 bool ARMFastISel::SelectRem(const Instruction
*I
, bool isSigned
) {
1706 Type
*Ty
= I
->getType();
1707 if (!isTypeLegal(Ty
, VT
))
1710 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1711 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1712 // multi-reg returns, we'll have to bail out.
1713 if (!TLI
.hasStandaloneRem(VT
)) {
1717 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1719 LC
= isSigned
? RTLIB::SREM_I8
: RTLIB::UREM_I8
;
1720 else if (VT
== MVT::i16
)
1721 LC
= isSigned
? RTLIB::SREM_I16
: RTLIB::UREM_I16
;
1722 else if (VT
== MVT::i32
)
1723 LC
= isSigned
? RTLIB::SREM_I32
: RTLIB::UREM_I32
;
1724 else if (VT
== MVT::i64
)
1725 LC
= isSigned
? RTLIB::SREM_I64
: RTLIB::UREM_I64
;
1726 else if (VT
== MVT::i128
)
1727 LC
= isSigned
? RTLIB::SREM_I128
: RTLIB::UREM_I128
;
1728 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SREM!");
1730 return ARMEmitLibcall(I
, LC
);
1733 bool ARMFastISel::SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
) {
1734 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
1736 // We can get here in the case when we have a binary operation on a non-legal
1737 // type and the target independent selector doesn't know how to handle it.
1738 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
1742 switch (ISDOpcode
) {
1743 default: return false;
1745 Opc
= isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
;
1748 Opc
= isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
;
1751 Opc
= isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
;
1755 unsigned SrcReg1
= getRegForValue(I
->getOperand(0));
1756 if (SrcReg1
== 0) return false;
1758 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1759 // in the instruction, rather then materializing the value in a register.
1760 unsigned SrcReg2
= getRegForValue(I
->getOperand(1));
1761 if (SrcReg2
== 0) return false;
1763 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
1764 SrcReg1
= constrainOperandRegClass(TII
.get(Opc
), SrcReg1
, 1);
1765 SrcReg2
= constrainOperandRegClass(TII
.get(Opc
), SrcReg2
, 2);
1766 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1767 TII
.get(Opc
), ResultReg
)
1768 .addReg(SrcReg1
).addReg(SrcReg2
));
1769 updateValueMap(I
, ResultReg
);
1773 bool ARMFastISel::SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
) {
1774 EVT FPVT
= TLI
.getValueType(DL
, I
->getType(), true);
1775 if (!FPVT
.isSimple()) return false;
1776 MVT VT
= FPVT
.getSimpleVT();
1778 // FIXME: Support vector types where possible.
1782 // We can get here in the case when we want to use NEON for our fp
1783 // operations, but can't figure out how to. Just use the vfp instructions
1785 // FIXME: It'd be nice to use NEON instructions.
1786 Type
*Ty
= I
->getType();
1787 if (Ty
->isFloatTy() && !Subtarget
->hasVFP2Base())
1789 if (Ty
->isDoubleTy() && (!Subtarget
->hasVFP2Base() || !Subtarget
->hasFP64()))
1793 bool is64bit
= VT
== MVT::f64
|| VT
== MVT::i64
;
1794 switch (ISDOpcode
) {
1795 default: return false;
1797 Opc
= is64bit
? ARM::VADDD
: ARM::VADDS
;
1800 Opc
= is64bit
? ARM::VSUBD
: ARM::VSUBS
;
1803 Opc
= is64bit
? ARM::VMULD
: ARM::VMULS
;
1806 unsigned Op1
= getRegForValue(I
->getOperand(0));
1807 if (Op1
== 0) return false;
1809 unsigned Op2
= getRegForValue(I
->getOperand(1));
1810 if (Op2
== 0) return false;
1812 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
.SimpleTy
));
1813 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1814 TII
.get(Opc
), ResultReg
)
1815 .addReg(Op1
).addReg(Op2
));
1816 updateValueMap(I
, ResultReg
);
1820 // Call Handling Code
1822 // This is largely taken directly from CCAssignFnForNode
1823 // TODO: We may not support all of this.
1824 CCAssignFn
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC
,
1829 report_fatal_error("Unsupported calling convention");
1830 case CallingConv::Fast
:
1831 if (Subtarget
->hasVFP2Base() && !isVarArg
) {
1832 if (!Subtarget
->isAAPCS_ABI())
1833 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1834 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1835 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1838 case CallingConv::C
:
1839 case CallingConv::CXX_FAST_TLS
:
1840 // Use target triple & subtarget features to do actual dispatch.
1841 if (Subtarget
->isAAPCS_ABI()) {
1842 if (Subtarget
->hasVFP2Base() &&
1843 TM
.Options
.FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1844 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1846 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1848 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1850 case CallingConv::ARM_AAPCS_VFP
:
1851 case CallingConv::Swift
:
1852 case CallingConv::SwiftTail
:
1854 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1855 // Fall through to soft float variant, variadic functions don't
1856 // use hard floating point ABI.
1858 case CallingConv::ARM_AAPCS
:
1859 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1860 case CallingConv::ARM_APCS
:
1861 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1862 case CallingConv::GHC
:
1864 report_fatal_error("Can't return in GHC call convention");
1866 return CC_ARM_APCS_GHC
;
1867 case CallingConv::CFGuard_Check
:
1868 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_Win32_CFGuard_Check
);
1872 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
1873 SmallVectorImpl
<Register
> &ArgRegs
,
1874 SmallVectorImpl
<MVT
> &ArgVTs
,
1875 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
1876 SmallVectorImpl
<Register
> &RegArgs
,
1880 SmallVector
<CCValAssign
, 16> ArgLocs
;
1881 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, ArgLocs
, *Context
);
1882 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
,
1883 CCAssignFnForCall(CC
, false, isVarArg
));
1885 // Check that we can handle all of the arguments. If we can't, then bail out
1886 // now before we add code to the MBB.
1887 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1888 CCValAssign
&VA
= ArgLocs
[i
];
1889 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1891 // We don't handle NEON/vector parameters yet.
1892 if (ArgVT
.isVector() || ArgVT
.getSizeInBits() > 64)
1895 // Now copy/store arg to correct locations.
1896 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1898 } else if (VA
.needsCustom()) {
1899 // TODO: We need custom lowering for vector (v2f64) args.
1900 if (VA
.getLocVT() != MVT::f64
||
1901 // TODO: Only handle register args for now.
1902 !VA
.isRegLoc() || !ArgLocs
[++i
].isRegLoc())
1905 switch (ArgVT
.SimpleTy
) {
1914 if (!Subtarget
->hasVFP2Base())
1918 if (!Subtarget
->hasVFP2Base())
1925 // At the point, we are able to handle the call's arguments in fast isel.
1927 // Get a count of how many bytes are to be pushed on the stack.
1928 NumBytes
= CCInfo
.getNextStackOffset();
1930 // Issue CALLSEQ_START
1931 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1932 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1933 TII
.get(AdjStackDown
))
1934 .addImm(NumBytes
).addImm(0));
1936 // Process the args.
1937 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1938 CCValAssign
&VA
= ArgLocs
[i
];
1939 const Value
*ArgVal
= Args
[VA
.getValNo()];
1940 Register Arg
= ArgRegs
[VA
.getValNo()];
1941 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1943 assert((!ArgVT
.isVector() && ArgVT
.getSizeInBits() <= 64) &&
1944 "We don't handle NEON/vector parameters yet.");
1946 // Handle arg promotion, etc.
1947 switch (VA
.getLocInfo()) {
1948 case CCValAssign::Full
: break;
1949 case CCValAssign::SExt
: {
1950 MVT DestVT
= VA
.getLocVT();
1951 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/false);
1952 assert(Arg
!= 0 && "Failed to emit a sext");
1956 case CCValAssign::AExt
:
1957 // Intentional fall-through. Handle AExt and ZExt.
1958 case CCValAssign::ZExt
: {
1959 MVT DestVT
= VA
.getLocVT();
1960 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/true);
1961 assert(Arg
!= 0 && "Failed to emit a zext");
1965 case CCValAssign::BCvt
: {
1966 unsigned BC
= fastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, Arg
);
1967 assert(BC
!= 0 && "Failed to emit a bitcast!");
1969 ArgVT
= VA
.getLocVT();
1972 default: llvm_unreachable("Unknown arg promotion!");
1975 // Now copy/store arg to correct locations.
1976 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1977 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1978 TII
.get(TargetOpcode::COPY
), VA
.getLocReg()).addReg(Arg
);
1979 RegArgs
.push_back(VA
.getLocReg());
1980 } else if (VA
.needsCustom()) {
1981 // TODO: We need custom lowering for vector (v2f64) args.
1982 assert(VA
.getLocVT() == MVT::f64
&&
1983 "Custom lowering for v2f64 args not available");
1985 // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
1986 CCValAssign
&NextVA
= ArgLocs
[++i
];
1988 assert(VA
.isRegLoc() && NextVA
.isRegLoc() &&
1989 "We only handle register args!");
1991 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
1992 TII
.get(ARM::VMOVRRD
), VA
.getLocReg())
1993 .addReg(NextVA
.getLocReg(), RegState::Define
)
1995 RegArgs
.push_back(VA
.getLocReg());
1996 RegArgs
.push_back(NextVA
.getLocReg());
1998 assert(VA
.isMemLoc());
1999 // Need to store on the stack.
2001 // Don't emit stores for undef values.
2002 if (isa
<UndefValue
>(ArgVal
))
2006 Addr
.BaseType
= Address::RegBase
;
2007 Addr
.Base
.Reg
= ARM::SP
;
2008 Addr
.Offset
= VA
.getLocMemOffset();
2010 bool EmitRet
= ARMEmitStore(ArgVT
, Arg
, Addr
); (void)EmitRet
;
2011 assert(EmitRet
&& "Could not emit a store for argument!");
2018 bool ARMFastISel::FinishCall(MVT RetVT
, SmallVectorImpl
<Register
> &UsedRegs
,
2019 const Instruction
*I
, CallingConv::ID CC
,
2020 unsigned &NumBytes
, bool isVarArg
) {
2021 // Issue CALLSEQ_END
2022 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
2023 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2024 TII
.get(AdjStackUp
))
2025 .addImm(NumBytes
).addImm(0));
2027 // Now the return value.
2028 if (RetVT
!= MVT::isVoid
) {
2029 SmallVector
<CCValAssign
, 16> RVLocs
;
2030 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2031 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2033 // Copy all of the result registers out of their specified physreg.
2034 if (RVLocs
.size() == 2 && RetVT
== MVT::f64
) {
2035 // For this move we copy into two registers and then move into the
2036 // double fp reg we want.
2037 MVT DestVT
= RVLocs
[0].getValVT();
2038 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(DestVT
);
2039 Register ResultReg
= createResultReg(DstRC
);
2040 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2041 TII
.get(ARM::VMOVDRR
), ResultReg
)
2042 .addReg(RVLocs
[0].getLocReg())
2043 .addReg(RVLocs
[1].getLocReg()));
2045 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2046 UsedRegs
.push_back(RVLocs
[1].getLocReg());
2048 // Finally update the result.
2049 updateValueMap(I
, ResultReg
);
2051 assert(RVLocs
.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2052 MVT CopyVT
= RVLocs
[0].getValVT();
2054 // Special handling for extended integers.
2055 if (RetVT
== MVT::i1
|| RetVT
== MVT::i8
|| RetVT
== MVT::i16
)
2058 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(CopyVT
);
2060 Register ResultReg
= createResultReg(DstRC
);
2061 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2062 TII
.get(TargetOpcode::COPY
),
2063 ResultReg
).addReg(RVLocs
[0].getLocReg());
2064 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2066 // Finally update the result.
2067 updateValueMap(I
, ResultReg
);
2074 bool ARMFastISel::SelectRet(const Instruction
*I
) {
2075 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
2076 const Function
&F
= *I
->getParent()->getParent();
2077 const bool IsCmseNSEntry
= F
.hasFnAttribute("cmse_nonsecure_entry");
2079 if (!FuncInfo
.CanLowerReturn
)
2082 if (TLI
.supportSwiftError() &&
2083 F
.getAttributes().hasAttrSomewhere(Attribute::SwiftError
))
2086 if (TLI
.supportSplitCSR(FuncInfo
.MF
))
2089 // Build a list of return value registers.
2090 SmallVector
<unsigned, 4> RetRegs
;
2092 CallingConv::ID CC
= F
.getCallingConv();
2093 if (Ret
->getNumOperands() > 0) {
2094 SmallVector
<ISD::OutputArg
, 4> Outs
;
2095 GetReturnInfo(CC
, F
.getReturnType(), F
.getAttributes(), Outs
, TLI
, DL
);
2097 // Analyze operands of the call, assigning locations to each operand.
2098 SmallVector
<CCValAssign
, 16> ValLocs
;
2099 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, ValLocs
, I
->getContext());
2100 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForCall(CC
, true /* is Ret */,
2103 const Value
*RV
= Ret
->getOperand(0);
2104 unsigned Reg
= getRegForValue(RV
);
2108 // Only handle a single return value for now.
2109 if (ValLocs
.size() != 1)
2112 CCValAssign
&VA
= ValLocs
[0];
2114 // Don't bother handling odd stuff for now.
2115 if (VA
.getLocInfo() != CCValAssign::Full
)
2117 // Only handle register returns for now.
2121 unsigned SrcReg
= Reg
+ VA
.getValNo();
2122 EVT RVEVT
= TLI
.getValueType(DL
, RV
->getType());
2123 if (!RVEVT
.isSimple()) return false;
2124 MVT RVVT
= RVEVT
.getSimpleVT();
2125 MVT DestVT
= VA
.getValVT();
2126 // Special handling for extended integers.
2127 if (RVVT
!= DestVT
) {
2128 if (RVVT
!= MVT::i1
&& RVVT
!= MVT::i8
&& RVVT
!= MVT::i16
)
2131 assert(DestVT
== MVT::i32
&& "ARM should always ext to i32");
2133 // Perform extension if flagged as either zext or sext. Otherwise, do
2135 if (Outs
[0].Flags
.isZExt() || Outs
[0].Flags
.isSExt()) {
2136 SrcReg
= ARMEmitIntExt(RVVT
, SrcReg
, DestVT
, Outs
[0].Flags
.isZExt());
2137 if (SrcReg
== 0) return false;
2142 Register DstReg
= VA
.getLocReg();
2143 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
2144 // Avoid a cross-class copy. This is very unlikely.
2145 if (!SrcRC
->contains(DstReg
))
2147 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2148 TII
.get(TargetOpcode::COPY
), DstReg
).addReg(SrcReg
);
2150 // Add register to return instruction.
2151 RetRegs
.push_back(VA
.getLocReg());
2157 RetOpc
= ARM::tBXNS_RET
;
2159 llvm_unreachable("CMSE not valid for non-Thumb targets");
2161 RetOpc
= Subtarget
->getReturnOpcode();
2163 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2165 AddOptionalDefs(MIB
);
2166 for (unsigned R
: RetRegs
)
2167 MIB
.addReg(R
, RegState::Implicit
);
2171 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg
) {
2173 return isThumb2
? gettBLXrOpcode(*MF
) : getBLXOpcode(*MF
);
2175 return isThumb2
? ARM::tBL
: ARM::BL
;
2178 unsigned ARMFastISel::getLibcallReg(const Twine
&Name
) {
2179 // Manually compute the global's type to avoid building it when unnecessary.
2180 Type
*GVTy
= Type::getInt32PtrTy(*Context
, /*AS=*/0);
2181 EVT LCREVT
= TLI
.getValueType(DL
, GVTy
);
2182 if (!LCREVT
.isSimple()) return 0;
2184 GlobalValue
*GV
= M
.getNamedGlobal(Name
.str());
2186 GV
= new GlobalVariable(M
, Type::getInt32Ty(*Context
), false,
2187 GlobalValue::ExternalLinkage
, nullptr, Name
);
2189 return ARMMaterializeGV(GV
, LCREVT
.getSimpleVT());
2192 // A quick function that will emit a call for a named libcall in F with the
2193 // vector of passed arguments for the Instruction in I. We can assume that we
2194 // can emit a call for any libcall we can produce. This is an abridged version
2195 // of the full call infrastructure since we won't need to worry about things
2196 // like computed function pointers or strange arguments at call sites.
2197 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2199 bool ARMFastISel::ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
) {
2200 CallingConv::ID CC
= TLI
.getLibcallCallingConv(Call
);
2202 // Handle *simple* calls for now.
2203 Type
*RetTy
= I
->getType();
2205 if (RetTy
->isVoidTy())
2206 RetVT
= MVT::isVoid
;
2207 else if (!isTypeLegal(RetTy
, RetVT
))
2210 // Can't handle non-double multi-reg retvals.
2211 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i32
) {
2212 SmallVector
<CCValAssign
, 16> RVLocs
;
2213 CCState
CCInfo(CC
, false, *FuncInfo
.MF
, RVLocs
, *Context
);
2214 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, false));
2215 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2219 // Set up the argument vectors.
2220 SmallVector
<Value
*, 8> Args
;
2221 SmallVector
<Register
, 8> ArgRegs
;
2222 SmallVector
<MVT
, 8> ArgVTs
;
2223 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2224 Args
.reserve(I
->getNumOperands());
2225 ArgRegs
.reserve(I
->getNumOperands());
2226 ArgVTs
.reserve(I
->getNumOperands());
2227 ArgFlags
.reserve(I
->getNumOperands());
2228 for (Value
*Op
: I
->operands()) {
2229 unsigned Arg
= getRegForValue(Op
);
2230 if (Arg
== 0) return false;
2232 Type
*ArgTy
= Op
->getType();
2234 if (!isTypeLegal(ArgTy
, ArgVT
)) return false;
2236 ISD::ArgFlagsTy Flags
;
2237 Flags
.setOrigAlign(DL
.getABITypeAlign(ArgTy
));
2240 ArgRegs
.push_back(Arg
);
2241 ArgVTs
.push_back(ArgVT
);
2242 ArgFlags
.push_back(Flags
);
2245 // Handle the arguments now that we've gotten them.
2246 SmallVector
<Register
, 4> RegArgs
;
2248 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2249 RegArgs
, CC
, NumBytes
, false))
2253 if (Subtarget
->genLongCalls()) {
2254 CalleeReg
= getLibcallReg(TLI
.getLibcallName(Call
));
2255 if (CalleeReg
== 0) return false;
2259 unsigned CallOpc
= ARMSelectCallOp(Subtarget
->genLongCalls());
2260 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2261 DbgLoc
, TII
.get(CallOpc
));
2262 // BL / BLX don't take a predicate, but tBL / tBLX do.
2264 MIB
.add(predOps(ARMCC::AL
));
2265 if (Subtarget
->genLongCalls()) {
2267 constrainOperandRegClass(TII
.get(CallOpc
), CalleeReg
, isThumb2
? 2 : 0);
2268 MIB
.addReg(CalleeReg
);
2270 MIB
.addExternalSymbol(TLI
.getLibcallName(Call
));
2272 // Add implicit physical register uses to the call.
2273 for (Register R
: RegArgs
)
2274 MIB
.addReg(R
, RegState::Implicit
);
2276 // Add a register mask with the call-preserved registers.
2277 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2278 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2280 // Finish off the call including any return values.
2281 SmallVector
<Register
, 4> UsedRegs
;
2282 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, false)) return false;
2284 // Set all unused physreg defs as dead.
2285 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2290 bool ARMFastISel::SelectCall(const Instruction
*I
,
2291 const char *IntrMemName
= nullptr) {
2292 const CallInst
*CI
= cast
<CallInst
>(I
);
2293 const Value
*Callee
= CI
->getCalledOperand();
2295 // Can't handle inline asm.
2296 if (isa
<InlineAsm
>(Callee
)) return false;
2298 // Allow SelectionDAG isel to handle tail calls.
2299 if (CI
->isTailCall()) return false;
2301 // Check the calling convention.
2302 CallingConv::ID CC
= CI
->getCallingConv();
2304 // TODO: Avoid some calling conventions?
2306 FunctionType
*FTy
= CI
->getFunctionType();
2307 bool isVarArg
= FTy
->isVarArg();
2309 // Handle *simple* calls for now.
2310 Type
*RetTy
= I
->getType();
2312 if (RetTy
->isVoidTy())
2313 RetVT
= MVT::isVoid
;
2314 else if (!isTypeLegal(RetTy
, RetVT
) && RetVT
!= MVT::i16
&&
2315 RetVT
!= MVT::i8
&& RetVT
!= MVT::i1
)
2318 // Can't handle non-double multi-reg retvals.
2319 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i1
&& RetVT
!= MVT::i8
&&
2320 RetVT
!= MVT::i16
&& RetVT
!= MVT::i32
) {
2321 SmallVector
<CCValAssign
, 16> RVLocs
;
2322 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, RVLocs
, *Context
);
2323 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2324 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2328 // Set up the argument vectors.
2329 SmallVector
<Value
*, 8> Args
;
2330 SmallVector
<Register
, 8> ArgRegs
;
2331 SmallVector
<MVT
, 8> ArgVTs
;
2332 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2333 unsigned arg_size
= CI
->arg_size();
2334 Args
.reserve(arg_size
);
2335 ArgRegs
.reserve(arg_size
);
2336 ArgVTs
.reserve(arg_size
);
2337 ArgFlags
.reserve(arg_size
);
2338 for (auto ArgI
= CI
->arg_begin(), ArgE
= CI
->arg_end(); ArgI
!= ArgE
; ++ArgI
) {
2339 // If we're lowering a memory intrinsic instead of a regular call, skip the
2340 // last argument, which shouldn't be passed to the underlying function.
2341 if (IntrMemName
&& ArgE
- ArgI
<= 1)
2344 ISD::ArgFlagsTy Flags
;
2345 unsigned ArgIdx
= ArgI
- CI
->arg_begin();
2346 if (CI
->paramHasAttr(ArgIdx
, Attribute::SExt
))
2348 if (CI
->paramHasAttr(ArgIdx
, Attribute::ZExt
))
2351 // FIXME: Only handle *easy* calls for now.
2352 if (CI
->paramHasAttr(ArgIdx
, Attribute::InReg
) ||
2353 CI
->paramHasAttr(ArgIdx
, Attribute::StructRet
) ||
2354 CI
->paramHasAttr(ArgIdx
, Attribute::SwiftSelf
) ||
2355 CI
->paramHasAttr(ArgIdx
, Attribute::SwiftError
) ||
2356 CI
->paramHasAttr(ArgIdx
, Attribute::Nest
) ||
2357 CI
->paramHasAttr(ArgIdx
, Attribute::ByVal
))
2360 Type
*ArgTy
= (*ArgI
)->getType();
2362 if (!isTypeLegal(ArgTy
, ArgVT
) && ArgVT
!= MVT::i16
&& ArgVT
!= MVT::i8
&&
2366 Register Arg
= getRegForValue(*ArgI
);
2370 Flags
.setOrigAlign(DL
.getABITypeAlign(ArgTy
));
2372 Args
.push_back(*ArgI
);
2373 ArgRegs
.push_back(Arg
);
2374 ArgVTs
.push_back(ArgVT
);
2375 ArgFlags
.push_back(Flags
);
2378 // Handle the arguments now that we've gotten them.
2379 SmallVector
<Register
, 4> RegArgs
;
2381 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2382 RegArgs
, CC
, NumBytes
, isVarArg
))
2385 bool UseReg
= false;
2386 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Callee
);
2387 if (!GV
|| Subtarget
->genLongCalls()) UseReg
= true;
2392 CalleeReg
= getLibcallReg(IntrMemName
);
2394 CalleeReg
= getRegForValue(Callee
);
2396 if (CalleeReg
== 0) return false;
2400 unsigned CallOpc
= ARMSelectCallOp(UseReg
);
2401 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2402 DbgLoc
, TII
.get(CallOpc
));
2404 // ARM calls don't take a predicate, but tBL / tBLX do.
2406 MIB
.add(predOps(ARMCC::AL
));
2409 constrainOperandRegClass(TII
.get(CallOpc
), CalleeReg
, isThumb2
? 2 : 0);
2410 MIB
.addReg(CalleeReg
);
2411 } else if (!IntrMemName
)
2412 MIB
.addGlobalAddress(GV
, 0, 0);
2414 MIB
.addExternalSymbol(IntrMemName
, 0);
2416 // Add implicit physical register uses to the call.
2417 for (Register R
: RegArgs
)
2418 MIB
.addReg(R
, RegState::Implicit
);
2420 // Add a register mask with the call-preserved registers.
2421 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2422 MIB
.addRegMask(TRI
.getCallPreservedMask(*FuncInfo
.MF
, CC
));
2424 // Finish off the call including any return values.
2425 SmallVector
<Register
, 4> UsedRegs
;
2426 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, isVarArg
))
2429 // Set all unused physreg defs as dead.
2430 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2435 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len
) {
2439 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest
, Address Src
,
2440 uint64_t Len
, unsigned Alignment
) {
2441 // Make sure we don't bloat code by inlining very large memcpy's.
2442 if (!ARMIsMemCpySmall(Len
))
2447 if (!Alignment
|| Alignment
>= 4) {
2453 assert(Len
== 1 && "Expected a length of 1!");
2457 // Bound based on alignment.
2458 if (Len
>= 2 && Alignment
== 2)
2467 RV
= ARMEmitLoad(VT
, ResultReg
, Src
);
2468 assert(RV
&& "Should be able to handle this load.");
2469 RV
= ARMEmitStore(VT
, ResultReg
, Dest
);
2470 assert(RV
&& "Should be able to handle this store.");
2473 unsigned Size
= VT
.getSizeInBits()/8;
2475 Dest
.Offset
+= Size
;
2482 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst
&I
) {
2483 // FIXME: Handle more intrinsics.
2484 switch (I
.getIntrinsicID()) {
2485 default: return false;
2486 case Intrinsic::frameaddress
: {
2487 MachineFrameInfo
&MFI
= FuncInfo
.MF
->getFrameInfo();
2488 MFI
.setFrameAddressIsTaken(true);
2490 unsigned LdrOpc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
2491 const TargetRegisterClass
*RC
= isThumb2
? &ARM::tGPRRegClass
2492 : &ARM::GPRRegClass
;
2494 const ARMBaseRegisterInfo
*RegInfo
=
2495 static_cast<const ARMBaseRegisterInfo
*>(Subtarget
->getRegisterInfo());
2496 Register FramePtr
= RegInfo
->getFrameRegister(*(FuncInfo
.MF
));
2497 unsigned SrcReg
= FramePtr
;
2499 // Recursively load frame address
2505 unsigned Depth
= cast
<ConstantInt
>(I
.getOperand(0))->getZExtValue();
2507 DestReg
= createResultReg(RC
);
2508 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2509 TII
.get(LdrOpc
), DestReg
)
2510 .addReg(SrcReg
).addImm(0));
2513 updateValueMap(&I
, SrcReg
);
2516 case Intrinsic::memcpy
:
2517 case Intrinsic::memmove
: {
2518 const MemTransferInst
&MTI
= cast
<MemTransferInst
>(I
);
2519 // Don't handle volatile.
2520 if (MTI
.isVolatile())
2523 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2524 // we would emit dead code because we don't currently handle memmoves.
2525 bool isMemCpy
= (I
.getIntrinsicID() == Intrinsic::memcpy
);
2526 if (isa
<ConstantInt
>(MTI
.getLength()) && isMemCpy
) {
2527 // Small memcpy's are common enough that we want to do them without a call
2529 uint64_t Len
= cast
<ConstantInt
>(MTI
.getLength())->getZExtValue();
2530 if (ARMIsMemCpySmall(Len
)) {
2532 if (!ARMComputeAddress(MTI
.getRawDest(), Dest
) ||
2533 !ARMComputeAddress(MTI
.getRawSource(), Src
))
2535 unsigned Alignment
= MinAlign(MTI
.getDestAlignment(),
2536 MTI
.getSourceAlignment());
2537 if (ARMTryEmitSmallMemCpy(Dest
, Src
, Len
, Alignment
))
2542 if (!MTI
.getLength()->getType()->isIntegerTy(32))
2545 if (MTI
.getSourceAddressSpace() > 255 || MTI
.getDestAddressSpace() > 255)
2548 const char *IntrMemName
= isa
<MemCpyInst
>(I
) ? "memcpy" : "memmove";
2549 return SelectCall(&I
, IntrMemName
);
2551 case Intrinsic::memset
: {
2552 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
2553 // Don't handle volatile.
2554 if (MSI
.isVolatile())
2557 if (!MSI
.getLength()->getType()->isIntegerTy(32))
2560 if (MSI
.getDestAddressSpace() > 255)
2563 return SelectCall(&I
, "memset");
2565 case Intrinsic::trap
: {
2567 if (Subtarget
->isThumb())
2568 Opcode
= ARM::tTRAP
;
2570 Opcode
= Subtarget
->useNaClTrap() ? ARM::TRAPNaCl
: ARM::TRAP
;
2571 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opcode
));
2577 bool ARMFastISel::SelectTrunc(const Instruction
*I
) {
2578 // The high bits for a type smaller than the register size are assumed to be
2580 Value
*Op
= I
->getOperand(0);
2583 SrcVT
= TLI
.getValueType(DL
, Op
->getType(), true);
2584 DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2586 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
2588 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
2591 unsigned SrcReg
= getRegForValue(Op
);
2592 if (!SrcReg
) return false;
2594 // Because the high bits are undefined, a truncate doesn't generate
2596 updateValueMap(I
, SrcReg
);
2600 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT
, unsigned SrcReg
, MVT DestVT
,
2602 if (DestVT
!= MVT::i32
&& DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
)
2604 if (SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
&& SrcVT
!= MVT::i1
)
2607 // Table of which combinations can be emitted as a single instruction,
2608 // and which will require two.
2609 static const uint8_t isSingleInstrTbl
[3][2][2][2] = {
2611 // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2612 // ext: s z s z s z s z
2613 /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2614 /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2615 /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2618 // Target registers for:
2619 // - For ARM can never be PC.
2620 // - For 16-bit Thumb are restricted to lower 8 registers.
2621 // - For 32-bit Thumb are restricted to non-SP and non-PC.
2622 static const TargetRegisterClass
*RCTbl
[2][2] = {
2623 // Instructions: Two Single
2624 /* ARM */ { &ARM::GPRnopcRegClass
, &ARM::GPRnopcRegClass
},
2625 /* Thumb */ { &ARM::tGPRRegClass
, &ARM::rGPRRegClass
}
2628 // Table governing the instruction(s) to be emitted.
2629 static const struct InstructionTable
{
2631 uint32_t hasS
: 1; // Some instructions have an S bit, always set it to 0.
2632 uint32_t Shift
: 7; // For shift operand addressing mode, used by MOVsi.
2633 uint32_t Imm
: 8; // All instructions have either a shift or a mask.
2634 } IT
[2][2][3][2] = {
2635 { // Two instructions (first is left shift, second is in this table).
2636 { // ARM Opc S Shift Imm
2637 /* 1 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 31 },
2638 /* 1 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 31 } },
2639 /* 8 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 24 },
2640 /* 8 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 24 } },
2641 /* 16 bit sext */ { { ARM::MOVsi
, 1, ARM_AM::asr
, 16 },
2642 /* 16 bit zext */ { ARM::MOVsi
, 1, ARM_AM::lsr
, 16 } }
2644 { // Thumb Opc S Shift Imm
2645 /* 1 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 31 },
2646 /* 1 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 31 } },
2647 /* 8 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 24 },
2648 /* 8 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 24 } },
2649 /* 16 bit sext */ { { ARM::tASRri
, 0, ARM_AM::no_shift
, 16 },
2650 /* 16 bit zext */ { ARM::tLSRri
, 0, ARM_AM::no_shift
, 16 } }
2653 { // Single instruction.
2654 { // ARM Opc S Shift Imm
2655 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2656 /* 1 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 1 } },
2657 /* 8 bit sext */ { { ARM::SXTB
, 0, ARM_AM::no_shift
, 0 },
2658 /* 8 bit zext */ { ARM::ANDri
, 1, ARM_AM::no_shift
, 255 } },
2659 /* 16 bit sext */ { { ARM::SXTH
, 0, ARM_AM::no_shift
, 0 },
2660 /* 16 bit zext */ { ARM::UXTH
, 0, ARM_AM::no_shift
, 0 } }
2662 { // Thumb Opc S Shift Imm
2663 /* 1 bit sext */ { { ARM::KILL
, 0, ARM_AM::no_shift
, 0 },
2664 /* 1 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 1 } },
2665 /* 8 bit sext */ { { ARM::t2SXTB
, 0, ARM_AM::no_shift
, 0 },
2666 /* 8 bit zext */ { ARM::t2ANDri
, 1, ARM_AM::no_shift
, 255 } },
2667 /* 16 bit sext */ { { ARM::t2SXTH
, 0, ARM_AM::no_shift
, 0 },
2668 /* 16 bit zext */ { ARM::t2UXTH
, 0, ARM_AM::no_shift
, 0 } }
2673 unsigned SrcBits
= SrcVT
.getSizeInBits();
2674 unsigned DestBits
= DestVT
.getSizeInBits();
2676 assert((SrcBits
< DestBits
) && "can only extend to larger types");
2677 assert((DestBits
== 32 || DestBits
== 16 || DestBits
== 8) &&
2678 "other sizes unimplemented");
2679 assert((SrcBits
== 16 || SrcBits
== 8 || SrcBits
== 1) &&
2680 "other sizes unimplemented");
2682 bool hasV6Ops
= Subtarget
->hasV6Ops();
2683 unsigned Bitness
= SrcBits
/ 8; // {1,8,16}=>{0,1,2}
2684 assert((Bitness
< 3) && "sanity-check table bounds");
2686 bool isSingleInstr
= isSingleInstrTbl
[Bitness
][isThumb2
][hasV6Ops
][isZExt
];
2687 const TargetRegisterClass
*RC
= RCTbl
[isThumb2
][isSingleInstr
];
2688 const InstructionTable
*ITP
= &IT
[isSingleInstr
][isThumb2
][Bitness
][isZExt
];
2689 unsigned Opc
= ITP
->Opc
;
2690 assert(ARM::KILL
!= Opc
&& "Invalid table entry");
2691 unsigned hasS
= ITP
->hasS
;
2692 ARM_AM::ShiftOpc Shift
= (ARM_AM::ShiftOpc
) ITP
->Shift
;
2693 assert(((Shift
== ARM_AM::no_shift
) == (Opc
!= ARM::MOVsi
)) &&
2694 "only MOVsi has shift operand addressing mode");
2695 unsigned Imm
= ITP
->Imm
;
2697 // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2698 bool setsCPSR
= &ARM::tGPRRegClass
== RC
;
2699 unsigned LSLOpc
= isThumb2
? ARM::tLSLri
: ARM::MOVsi
;
2701 // MOVsi encodes shift and immediate in shift operand addressing mode.
2702 // The following condition has the same value when emitting two
2703 // instruction sequences: both are shifts.
2704 bool ImmIsSO
= (Shift
!= ARM_AM::no_shift
);
2706 // Either one or two instructions are emitted.
2707 // They're always of the form:
2709 // CPSR is set only by 16-bit Thumb instructions.
2710 // Predicate, if any, is AL.
2711 // S bit, if available, is always 0.
2712 // When two are emitted the first's result will feed as the second's input,
2713 // that value is then dead.
2714 unsigned NumInstrsEmitted
= isSingleInstr
? 1 : 2;
2715 for (unsigned Instr
= 0; Instr
!= NumInstrsEmitted
; ++Instr
) {
2716 ResultReg
= createResultReg(RC
);
2717 bool isLsl
= (0 == Instr
) && !isSingleInstr
;
2718 unsigned Opcode
= isLsl
? LSLOpc
: Opc
;
2719 ARM_AM::ShiftOpc ShiftAM
= isLsl
? ARM_AM::lsl
: Shift
;
2720 unsigned ImmEnc
= ImmIsSO
? ARM_AM::getSORegOpc(ShiftAM
, Imm
) : Imm
;
2721 bool isKill
= 1 == Instr
;
2722 MachineInstrBuilder MIB
= BuildMI(
2723 *FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opcode
), ResultReg
);
2725 MIB
.addReg(ARM::CPSR
, RegState::Define
);
2726 SrcReg
= constrainOperandRegClass(TII
.get(Opcode
), SrcReg
, 1 + setsCPSR
);
2727 MIB
.addReg(SrcReg
, isKill
* RegState::Kill
)
2729 .add(predOps(ARMCC::AL
));
2731 MIB
.add(condCodeOp());
2732 // Second instruction consumes the first's result.
2739 bool ARMFastISel::SelectIntExt(const Instruction
*I
) {
2740 // On ARM, in general, integer casts don't involve legal types; this code
2741 // handles promotable integers.
2742 Type
*DestTy
= I
->getType();
2743 Value
*Src
= I
->getOperand(0);
2744 Type
*SrcTy
= Src
->getType();
2746 bool isZExt
= isa
<ZExtInst
>(I
);
2747 unsigned SrcReg
= getRegForValue(Src
);
2748 if (!SrcReg
) return false;
2750 EVT SrcEVT
, DestEVT
;
2751 SrcEVT
= TLI
.getValueType(DL
, SrcTy
, true);
2752 DestEVT
= TLI
.getValueType(DL
, DestTy
, true);
2753 if (!SrcEVT
.isSimple()) return false;
2754 if (!DestEVT
.isSimple()) return false;
2756 MVT SrcVT
= SrcEVT
.getSimpleVT();
2757 MVT DestVT
= DestEVT
.getSimpleVT();
2758 unsigned ResultReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
, isZExt
);
2759 if (ResultReg
== 0) return false;
2760 updateValueMap(I
, ResultReg
);
2764 bool ARMFastISel::SelectShift(const Instruction
*I
,
2765 ARM_AM::ShiftOpc ShiftTy
) {
2766 // We handle thumb2 mode by target independent selector
2767 // or SelectionDAG ISel.
2771 // Only handle i32 now.
2772 EVT DestVT
= TLI
.getValueType(DL
, I
->getType(), true);
2773 if (DestVT
!= MVT::i32
)
2776 unsigned Opc
= ARM::MOVsr
;
2778 Value
*Src2Value
= I
->getOperand(1);
2779 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Src2Value
)) {
2780 ShiftImm
= CI
->getZExtValue();
2782 // Fall back to selection DAG isel if the shift amount
2783 // is zero or greater than the width of the value type.
2784 if (ShiftImm
== 0 || ShiftImm
>=32)
2790 Value
*Src1Value
= I
->getOperand(0);
2791 unsigned Reg1
= getRegForValue(Src1Value
);
2792 if (Reg1
== 0) return false;
2795 if (Opc
== ARM::MOVsr
) {
2796 Reg2
= getRegForValue(Src2Value
);
2797 if (Reg2
== 0) return false;
2800 unsigned ResultReg
= createResultReg(&ARM::GPRnopcRegClass
);
2801 if(ResultReg
== 0) return false;
2803 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2804 TII
.get(Opc
), ResultReg
)
2807 if (Opc
== ARM::MOVsi
)
2808 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, ShiftImm
));
2809 else if (Opc
== ARM::MOVsr
) {
2811 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, 0));
2814 AddOptionalDefs(MIB
);
2815 updateValueMap(I
, ResultReg
);
2819 // TODO: SoftFP support.
2820 bool ARMFastISel::fastSelectInstruction(const Instruction
*I
) {
2821 switch (I
->getOpcode()) {
2822 case Instruction::Load
:
2823 return SelectLoad(I
);
2824 case Instruction::Store
:
2825 return SelectStore(I
);
2826 case Instruction::Br
:
2827 return SelectBranch(I
);
2828 case Instruction::IndirectBr
:
2829 return SelectIndirectBr(I
);
2830 case Instruction::ICmp
:
2831 case Instruction::FCmp
:
2832 return SelectCmp(I
);
2833 case Instruction::FPExt
:
2834 return SelectFPExt(I
);
2835 case Instruction::FPTrunc
:
2836 return SelectFPTrunc(I
);
2837 case Instruction::SIToFP
:
2838 return SelectIToFP(I
, /*isSigned*/ true);
2839 case Instruction::UIToFP
:
2840 return SelectIToFP(I
, /*isSigned*/ false);
2841 case Instruction::FPToSI
:
2842 return SelectFPToI(I
, /*isSigned*/ true);
2843 case Instruction::FPToUI
:
2844 return SelectFPToI(I
, /*isSigned*/ false);
2845 case Instruction::Add
:
2846 return SelectBinaryIntOp(I
, ISD::ADD
);
2847 case Instruction::Or
:
2848 return SelectBinaryIntOp(I
, ISD::OR
);
2849 case Instruction::Sub
:
2850 return SelectBinaryIntOp(I
, ISD::SUB
);
2851 case Instruction::FAdd
:
2852 return SelectBinaryFPOp(I
, ISD::FADD
);
2853 case Instruction::FSub
:
2854 return SelectBinaryFPOp(I
, ISD::FSUB
);
2855 case Instruction::FMul
:
2856 return SelectBinaryFPOp(I
, ISD::FMUL
);
2857 case Instruction::SDiv
:
2858 return SelectDiv(I
, /*isSigned*/ true);
2859 case Instruction::UDiv
:
2860 return SelectDiv(I
, /*isSigned*/ false);
2861 case Instruction::SRem
:
2862 return SelectRem(I
, /*isSigned*/ true);
2863 case Instruction::URem
:
2864 return SelectRem(I
, /*isSigned*/ false);
2865 case Instruction::Call
:
2866 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2867 return SelectIntrinsicCall(*II
);
2868 return SelectCall(I
);
2869 case Instruction::Select
:
2870 return SelectSelect(I
);
2871 case Instruction::Ret
:
2872 return SelectRet(I
);
2873 case Instruction::Trunc
:
2874 return SelectTrunc(I
);
2875 case Instruction::ZExt
:
2876 case Instruction::SExt
:
2877 return SelectIntExt(I
);
2878 case Instruction::Shl
:
2879 return SelectShift(I
, ARM_AM::lsl
);
2880 case Instruction::LShr
:
2881 return SelectShift(I
, ARM_AM::lsr
);
2882 case Instruction::AShr
:
2883 return SelectShift(I
, ARM_AM::asr
);
2889 // This table describes sign- and zero-extend instructions which can be
2890 // folded into a preceding load. All of these extends have an immediate
2891 // (sometimes a mask and sometimes a shift) that's applied after
2893 static const struct FoldableLoadExtendsStruct
{
2894 uint16_t Opc
[2]; // ARM, Thumb.
2895 uint8_t ExpectedImm
;
2897 uint8_t ExpectedVT
: 7;
2898 } FoldableLoadExtends
[] = {
2899 { { ARM::SXTH
, ARM::t2SXTH
}, 0, 0, MVT::i16
},
2900 { { ARM::UXTH
, ARM::t2UXTH
}, 0, 1, MVT::i16
},
2901 { { ARM::ANDri
, ARM::t2ANDri
}, 255, 1, MVT::i8
},
2902 { { ARM::SXTB
, ARM::t2SXTB
}, 0, 0, MVT::i8
},
2903 { { ARM::UXTB
, ARM::t2UXTB
}, 0, 1, MVT::i8
}
2906 /// The specified machine instr operand is a vreg, and that
2907 /// vreg is being provided by the specified load instruction. If possible,
2908 /// try to fold the load as an operand to the instruction, returning true if
2910 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr
*MI
, unsigned OpNo
,
2911 const LoadInst
*LI
) {
2912 // Verify we have a legal type before going any further.
2914 if (!isLoadTypeLegal(LI
->getType(), VT
))
2917 // Combine load followed by zero- or sign-extend.
2918 // ldrb r1, [r0] ldrb r1, [r0]
2920 // mov r3, r2 mov r3, r1
2921 if (MI
->getNumOperands() < 3 || !MI
->getOperand(2).isImm())
2923 const uint64_t Imm
= MI
->getOperand(2).getImm();
2927 for (const FoldableLoadExtendsStruct
&FLE
: FoldableLoadExtends
) {
2928 if (FLE
.Opc
[isThumb2
] == MI
->getOpcode() &&
2929 (uint64_t)FLE
.ExpectedImm
== Imm
&&
2930 MVT((MVT::SimpleValueType
)FLE
.ExpectedVT
) == VT
) {
2932 isZExt
= FLE
.isZExt
;
2935 if (!Found
) return false;
2937 // See if we can handle this address.
2939 if (!ARMComputeAddress(LI
->getOperand(0), Addr
)) return false;
2941 Register ResultReg
= MI
->getOperand(0).getReg();
2942 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, LI
->getAlignment(), isZExt
, false))
2944 MachineBasicBlock::iterator
I(MI
);
2945 removeDeadCode(I
, std::next(I
));
2949 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue
*GV
, MVT VT
) {
2950 bool UseGOT_PREL
= !TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
);
2952 LLVMContext
*Context
= &MF
->getFunction().getContext();
2953 unsigned ARMPCLabelIndex
= AFI
->createPICLabelUId();
2954 unsigned PCAdj
= Subtarget
->isThumb() ? 4 : 8;
2955 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(
2956 GV
, ARMPCLabelIndex
, ARMCP::CPValue
, PCAdj
,
2957 UseGOT_PREL
? ARMCP::GOT_PREL
: ARMCP::no_modifier
,
2958 /*AddCurrentAddress=*/UseGOT_PREL
);
2961 MF
->getDataLayout().getPrefTypeAlign(Type::getInt32PtrTy(*Context
));
2962 unsigned Idx
= MF
->getConstantPool()->getConstantPoolIndex(CPV
, ConstAlign
);
2963 MachineMemOperand
*CPMMO
=
2964 MF
->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF
),
2965 MachineMemOperand::MOLoad
, 4, Align(4));
2967 Register TempReg
= MF
->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass
);
2968 unsigned Opc
= isThumb2
? ARM::t2LDRpci
: ARM::LDRcp
;
2969 MachineInstrBuilder MIB
=
2970 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), TempReg
)
2971 .addConstantPoolIndex(Idx
)
2972 .addMemOperand(CPMMO
);
2973 if (Opc
== ARM::LDRcp
)
2975 MIB
.add(predOps(ARMCC::AL
));
2977 // Fix the address by adding pc.
2978 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2979 Opc
= Subtarget
->isThumb() ? ARM::tPICADD
: UseGOT_PREL
? ARM::PICLDR
2981 DestReg
= constrainOperandRegClass(TII
.get(Opc
), DestReg
, 0);
2982 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
, TII
.get(Opc
), DestReg
)
2984 .addImm(ARMPCLabelIndex
);
2986 if (!Subtarget
->isThumb())
2987 MIB
.add(predOps(ARMCC::AL
));
2989 if (UseGOT_PREL
&& Subtarget
->isThumb()) {
2990 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
2991 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
2992 TII
.get(ARM::t2LDRi12
), NewDestReg
)
2995 DestReg
= NewDestReg
;
2996 AddOptionalDefs(MIB
);
3001 bool ARMFastISel::fastLowerArguments() {
3002 if (!FuncInfo
.CanLowerReturn
)
3005 const Function
*F
= FuncInfo
.Fn
;
3009 CallingConv::ID CC
= F
->getCallingConv();
3013 case CallingConv::Fast
:
3014 case CallingConv::C
:
3015 case CallingConv::ARM_AAPCS_VFP
:
3016 case CallingConv::ARM_AAPCS
:
3017 case CallingConv::ARM_APCS
:
3018 case CallingConv::Swift
:
3019 case CallingConv::SwiftTail
:
3023 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3024 // which are passed in r0 - r3.
3025 for (const Argument
&Arg
: F
->args()) {
3026 if (Arg
.getArgNo() >= 4)
3029 if (Arg
.hasAttribute(Attribute::InReg
) ||
3030 Arg
.hasAttribute(Attribute::StructRet
) ||
3031 Arg
.hasAttribute(Attribute::SwiftSelf
) ||
3032 Arg
.hasAttribute(Attribute::SwiftError
) ||
3033 Arg
.hasAttribute(Attribute::ByVal
))
3036 Type
*ArgTy
= Arg
.getType();
3037 if (ArgTy
->isStructTy() || ArgTy
->isArrayTy() || ArgTy
->isVectorTy())
3040 EVT ArgVT
= TLI
.getValueType(DL
, ArgTy
);
3041 if (!ArgVT
.isSimple()) return false;
3042 switch (ArgVT
.getSimpleVT().SimpleTy
) {
3052 static const MCPhysReg GPRArgRegs
[] = {
3053 ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
3056 const TargetRegisterClass
*RC
= &ARM::rGPRRegClass
;
3057 for (const Argument
&Arg
: F
->args()) {
3058 unsigned ArgNo
= Arg
.getArgNo();
3059 unsigned SrcReg
= GPRArgRegs
[ArgNo
];
3060 unsigned DstReg
= FuncInfo
.MF
->addLiveIn(SrcReg
, RC
);
3061 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3062 // Without this, EmitLiveInCopies may eliminate the livein if its only
3063 // use is a bitcast (which isn't turned into an instruction).
3064 unsigned ResultReg
= createResultReg(RC
);
3065 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DbgLoc
,
3066 TII
.get(TargetOpcode::COPY
),
3067 ResultReg
).addReg(DstReg
, getKillRegState(true));
3068 updateValueMap(&Arg
, ResultReg
);
3076 FastISel
*ARM::createFastISel(FunctionLoweringInfo
&funcInfo
,
3077 const TargetLibraryInfo
*libInfo
) {
3078 if (funcInfo
.MF
->getSubtarget
<ARMSubtarget
>().useFastISel())
3079 return new ARMFastISel(funcInfo
, libInfo
);
3084 } // end namespace llvm