[Alignment][NFC] Use Align with TargetLowering::setMinFunctionAlignment
[llvm-core.git] / lib / Target / AArch64 / AArch64FastISel.cpp
blob15f283841a4218fed06b3badd8f5803f8fcfe273
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
13 //===----------------------------------------------------------------------===//
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FastISel.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/RuntimeLibcalls.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
62 #include "llvm/Support/AtomicOrdering.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/MachineValueType.h"
68 #include "llvm/Support/MathExtras.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
75 using namespace llvm;
77 namespace {
79 class AArch64FastISel final : public FastISel {
80 class Address {
81 public:
82 using BaseKind = enum {
83 RegBase,
84 FrameIndexBase
87 private:
88 BaseKind Kind = RegBase;
89 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90 union {
91 unsigned Reg;
92 int FI;
93 } Base;
94 unsigned OffsetReg = 0;
95 unsigned Shift = 0;
96 int64_t Offset = 0;
97 const GlobalValue *GV = nullptr;
99 public:
100 Address() { Base.Reg = 0; }
102 void setKind(BaseKind K) { Kind = K; }
103 BaseKind getKind() const { return Kind; }
104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106 bool isRegBase() const { return Kind == RegBase; }
107 bool isFIBase() const { return Kind == FrameIndexBase; }
109 void setReg(unsigned Reg) {
110 assert(isRegBase() && "Invalid base register access!");
111 Base.Reg = Reg;
114 unsigned getReg() const {
115 assert(isRegBase() && "Invalid base register access!");
116 return Base.Reg;
119 void setOffsetReg(unsigned Reg) {
120 OffsetReg = Reg;
123 unsigned getOffsetReg() const {
124 return OffsetReg;
127 void setFI(unsigned FI) {
128 assert(isFIBase() && "Invalid base frame index access!");
129 Base.FI = FI;
132 unsigned getFI() const {
133 assert(isFIBase() && "Invalid base frame index access!");
134 return Base.FI;
137 void setOffset(int64_t O) { Offset = O; }
138 int64_t getOffset() { return Offset; }
139 void setShift(unsigned S) { Shift = S; }
140 unsigned getShift() { return Shift; }
142 void setGlobalValue(const GlobalValue *G) { GV = G; }
143 const GlobalValue *getGlobalValue() { return GV; }
146 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147 /// make the right decision when generating code for different targets.
148 const AArch64Subtarget *Subtarget;
149 LLVMContext *Context;
151 bool fastLowerArguments() override;
152 bool fastLowerCall(CallLoweringInfo &CLI) override;
153 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 private:
156 // Selection routines.
157 bool selectAddSub(const Instruction *I);
158 bool selectLogicalOp(const Instruction *I);
159 bool selectLoad(const Instruction *I);
160 bool selectStore(const Instruction *I);
161 bool selectBranch(const Instruction *I);
162 bool selectIndirectBr(const Instruction *I);
163 bool selectCmp(const Instruction *I);
164 bool selectSelect(const Instruction *I);
165 bool selectFPExt(const Instruction *I);
166 bool selectFPTrunc(const Instruction *I);
167 bool selectFPToInt(const Instruction *I, bool Signed);
168 bool selectIntToFP(const Instruction *I, bool Signed);
169 bool selectRem(const Instruction *I, unsigned ISDOpcode);
170 bool selectRet(const Instruction *I);
171 bool selectTrunc(const Instruction *I);
172 bool selectIntExt(const Instruction *I);
173 bool selectMul(const Instruction *I);
174 bool selectShift(const Instruction *I);
175 bool selectBitCast(const Instruction *I);
176 bool selectFRem(const Instruction *I);
177 bool selectSDiv(const Instruction *I);
178 bool selectGetElementPtr(const Instruction *I);
179 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 // Utility helper routines.
182 bool isTypeLegal(Type *Ty, MVT &VT);
183 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184 bool isValueAvailable(const Value *V) const;
185 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186 bool computeCallAddress(const Value *V, Address &Addr);
187 bool simplifyAddress(Address &Addr, MVT VT);
188 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189 MachineMemOperand::Flags Flags,
190 unsigned ScaleFactor, MachineMemOperand *MMO);
191 bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193 unsigned Alignment);
194 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195 const Value *Cond);
196 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197 bool optimizeSelect(const SelectInst *SI);
198 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 // Emit helper routines.
201 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202 const Value *RHS, bool SetFlags = false,
203 bool WantResult = true, bool IsZExt = false);
204 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206 bool SetFlags = false, bool WantResult = true);
207 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208 bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209 bool WantResult = true);
210 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212 AArch64_AM::ShiftExtendType ShiftType,
213 uint64_t ShiftImm, bool SetFlags = false,
214 bool WantResult = true);
215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
217 AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243 unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245 unsigned RHSReg, bool RHSIsKill,
246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247 bool WantResult = true);
248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249 const Value *RHS);
250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251 bool LHSIsKill, uint64_t Imm);
252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254 uint64_t ShiftImm);
255 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257 unsigned Op1, bool Op1IsKill);
258 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259 unsigned Op1, bool Op1IsKill);
260 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261 unsigned Op1, bool Op1IsKill);
262 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263 unsigned Op1Reg, bool Op1IsKill);
264 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265 uint64_t Imm, bool IsZExt = true);
266 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267 unsigned Op1Reg, bool Op1IsKill);
268 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269 uint64_t Imm, bool IsZExt = true);
270 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271 unsigned Op1Reg, bool Op1IsKill);
272 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273 uint64_t Imm, bool IsZExt = false);
275 unsigned materializeInt(const ConstantInt *CI, MVT VT);
276 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277 unsigned materializeGV(const GlobalValue *GV);
279 // Call handling routines.
280 private:
281 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283 unsigned &NumBytes);
284 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 public:
287 // Backend specific FastISel code.
288 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289 unsigned fastMaterializeConstant(const Constant *C) override;
290 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293 const TargetLibraryInfo *LibInfo)
294 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295 Subtarget =
296 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297 Context = &FuncInfo.Fn->getContext();
300 bool fastSelectInstruction(const Instruction *I) override;
302 #include "AArch64GenFastISel.inc"
305 } // end anonymous namespace
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310 "Unexpected integer extend instruction.");
311 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312 "Unexpected value type.");
313 bool IsZExt = isa<ZExtInst>(I);
315 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316 if (LI->hasOneUse())
317 return true;
319 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321 return true;
323 return false;
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329 switch (VT.SimpleTy) {
330 default:
331 return 0; // invalid
332 case MVT::i1: // fall-through
333 case MVT::i8:
334 return 1;
335 case MVT::i16:
336 return 2;
337 case MVT::i32: // fall-through
338 case MVT::f32:
339 return 4;
340 case MVT::i64: // fall-through
341 case MVT::f64:
342 return 8;
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347 if (CC == CallingConv::WebKit_JS)
348 return CC_AArch64_WebKit_JS;
349 if (CC == CallingConv::GHC)
350 return CC_AArch64_GHC;
351 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356 "Alloca should always return a pointer.");
358 // Don't handle dynamic allocas.
359 if (!FuncInfo.StaticAllocaMap.count(AI))
360 return 0;
362 DenseMap<const AllocaInst *, int>::iterator SI =
363 FuncInfo.StaticAllocaMap.find(AI);
365 if (SI != FuncInfo.StaticAllocaMap.end()) {
366 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368 ResultReg)
369 .addFrameIndex(SI->second)
370 .addImm(0)
371 .addImm(0);
372 return ResultReg;
375 return 0;
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379 if (VT > MVT::i64)
380 return 0;
382 if (!CI->isZero())
383 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
385 // Create a copy from the zero register to materialize a "0" value.
386 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387 : &AArch64::GPR32RegClass;
388 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389 unsigned ResultReg = createResultReg(RC);
390 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391 ResultReg).addReg(ZeroReg, getKillRegState(true));
392 return ResultReg;
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396 // Positive zero (+0.0) has to be materialized with a fmov from the zero
397 // register, because the immediate version of fmov cannot encode zero.
398 if (CFP->isNullValue())
399 return fastMaterializeFloatZero(CFP);
401 if (VT != MVT::f32 && VT != MVT::f64)
402 return 0;
404 const APFloat Val = CFP->getValueAPF();
405 bool Is64Bit = (VT == MVT::f64);
406 // This checks to see if we can use FMOV instructions to materialize
407 // a constant, otherwise we have to materialize via the constant pool.
408 int Imm =
409 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410 if (Imm != -1) {
411 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
415 // For the MachO large code model materialize the FP constant in code.
416 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418 const TargetRegisterClass *RC = Is64Bit ?
419 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
421 unsigned TmpReg = createResultReg(RC);
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
425 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427 TII.get(TargetOpcode::COPY), ResultReg)
428 .addReg(TmpReg, getKillRegState(true));
430 return ResultReg;
433 // Materialize via constant pool. MachineConstantPool wants an explicit
434 // alignment.
435 unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436 if (Align == 0)
437 Align = DL.getTypeAllocSize(CFP->getType());
439 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
444 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447 .addReg(ADRPReg)
448 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
449 return ResultReg;
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453 // We can't handle thread-local variables quickly yet.
454 if (GV->isThreadLocal())
455 return 0;
457 // MachO still uses GOT for large code-model accesses, but ELF requires
458 // movz/movk sequences, which FastISel doesn't handle yet.
459 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460 return 0;
462 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
464 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465 if (!DestEVT.isSimple())
466 return 0;
468 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469 unsigned ResultReg;
471 if (OpFlags & AArch64II::MO_GOT) {
472 // ADRP + LDRX
473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474 ADRPReg)
475 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
477 ResultReg = createResultReg(&AArch64::GPR64RegClass);
478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479 ResultReg)
480 .addReg(ADRPReg)
481 .addGlobalAddress(GV, 0,
482 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
483 } else {
484 // ADRP + ADDX
485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486 ADRPReg)
487 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
489 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491 ResultReg)
492 .addReg(ADRPReg)
493 .addGlobalAddress(GV, 0,
494 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
495 .addImm(0);
497 return ResultReg;
500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
503 // Only handle simple types.
504 if (!CEVT.isSimple())
505 return 0;
506 MVT VT = CEVT.getSimpleVT();
508 if (const auto *CI = dyn_cast<ConstantInt>(C))
509 return materializeInt(CI, VT);
510 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511 return materializeFP(CFP, VT);
512 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513 return materializeGV(GV);
515 return 0;
518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519 assert(CFP->isNullValue() &&
520 "Floating-point constant is not a positive zero.");
521 MVT VT;
522 if (!isTypeLegal(CFP->getType(), VT))
523 return 0;
525 if (VT != MVT::f32 && VT != MVT::f64)
526 return 0;
528 bool Is64Bit = (VT == MVT::f64);
529 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
530 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
531 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
534 /// Check if the multiply is by a power-of-2 constant.
535 static bool isMulPowOf2(const Value *I) {
536 if (const auto *MI = dyn_cast<MulOperator>(I)) {
537 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538 if (C->getValue().isPowerOf2())
539 return true;
540 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541 if (C->getValue().isPowerOf2())
542 return true;
544 return false;
547 // Computes the address to get to an object.
548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
550 const User *U = nullptr;
551 unsigned Opcode = Instruction::UserOp1;
552 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553 // Don't walk into other basic blocks unless the object is an alloca from
554 // another block, otherwise it may not have a virtual register assigned.
555 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
557 Opcode = I->getOpcode();
558 U = I;
560 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561 Opcode = C->getOpcode();
562 U = C;
565 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566 if (Ty->getAddressSpace() > 255)
567 // Fast instruction selection doesn't support the special
568 // address spaces.
569 return false;
571 switch (Opcode) {
572 default:
573 break;
574 case Instruction::BitCast:
575 // Look through bitcasts.
576 return computeAddress(U->getOperand(0), Addr, Ty);
578 case Instruction::IntToPtr:
579 // Look past no-op inttoptrs.
580 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581 TLI.getPointerTy(DL))
582 return computeAddress(U->getOperand(0), Addr, Ty);
583 break;
585 case Instruction::PtrToInt:
586 // Look past no-op ptrtoints.
587 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588 return computeAddress(U->getOperand(0), Addr, Ty);
589 break;
591 case Instruction::GetElementPtr: {
592 Address SavedAddr = Addr;
593 uint64_t TmpOffset = Addr.getOffset();
595 // Iterate through the GEP folding the constants into offsets where
596 // we can.
597 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598 GTI != E; ++GTI) {
599 const Value *Op = GTI.getOperand();
600 if (StructType *STy = GTI.getStructTypeOrNull()) {
601 const StructLayout *SL = DL.getStructLayout(STy);
602 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603 TmpOffset += SL->getElementOffset(Idx);
604 } else {
605 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606 while (true) {
607 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608 // Constant-offset addressing.
609 TmpOffset += CI->getSExtValue() * S;
610 break;
612 if (canFoldAddIntoGEP(U, Op)) {
613 // A compatible add with a constant operand. Fold the constant.
614 ConstantInt *CI =
615 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616 TmpOffset += CI->getSExtValue() * S;
617 // Iterate on the other operand.
618 Op = cast<AddOperator>(Op)->getOperand(0);
619 continue;
621 // Unsupported
622 goto unsupported_gep;
627 // Try to grab the base operand now.
628 Addr.setOffset(TmpOffset);
629 if (computeAddress(U->getOperand(0), Addr, Ty))
630 return true;
632 // We failed, restore everything and try the other options.
633 Addr = SavedAddr;
635 unsupported_gep:
636 break;
638 case Instruction::Alloca: {
639 const AllocaInst *AI = cast<AllocaInst>(Obj);
640 DenseMap<const AllocaInst *, int>::iterator SI =
641 FuncInfo.StaticAllocaMap.find(AI);
642 if (SI != FuncInfo.StaticAllocaMap.end()) {
643 Addr.setKind(Address::FrameIndexBase);
644 Addr.setFI(SI->second);
645 return true;
647 break;
649 case Instruction::Add: {
650 // Adds of constants are common and easy enough.
651 const Value *LHS = U->getOperand(0);
652 const Value *RHS = U->getOperand(1);
654 if (isa<ConstantInt>(LHS))
655 std::swap(LHS, RHS);
657 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659 return computeAddress(LHS, Addr, Ty);
662 Address Backup = Addr;
663 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664 return true;
665 Addr = Backup;
667 break;
669 case Instruction::Sub: {
670 // Subs of constants are common and easy enough.
671 const Value *LHS = U->getOperand(0);
672 const Value *RHS = U->getOperand(1);
674 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676 return computeAddress(LHS, Addr, Ty);
678 break;
680 case Instruction::Shl: {
681 if (Addr.getOffsetReg())
682 break;
684 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685 if (!CI)
686 break;
688 unsigned Val = CI->getZExtValue();
689 if (Val < 1 || Val > 3)
690 break;
692 uint64_t NumBytes = 0;
693 if (Ty && Ty->isSized()) {
694 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695 NumBytes = NumBits / 8;
696 if (!isPowerOf2_64(NumBits))
697 NumBytes = 0;
700 if (NumBytes != (1ULL << Val))
701 break;
703 Addr.setShift(Val);
704 Addr.setExtendType(AArch64_AM::LSL);
706 const Value *Src = U->getOperand(0);
707 if (const auto *I = dyn_cast<Instruction>(Src)) {
708 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709 // Fold the zext or sext when it won't become a noop.
710 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711 if (!isIntExtFree(ZE) &&
712 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
713 Addr.setExtendType(AArch64_AM::UXTW);
714 Src = ZE->getOperand(0);
716 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717 if (!isIntExtFree(SE) &&
718 SE->getOperand(0)->getType()->isIntegerTy(32)) {
719 Addr.setExtendType(AArch64_AM::SXTW);
720 Src = SE->getOperand(0);
726 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727 if (AI->getOpcode() == Instruction::And) {
728 const Value *LHS = AI->getOperand(0);
729 const Value *RHS = AI->getOperand(1);
731 if (const auto *C = dyn_cast<ConstantInt>(LHS))
732 if (C->getValue() == 0xffffffff)
733 std::swap(LHS, RHS);
735 if (const auto *C = dyn_cast<ConstantInt>(RHS))
736 if (C->getValue() == 0xffffffff) {
737 Addr.setExtendType(AArch64_AM::UXTW);
738 unsigned Reg = getRegForValue(LHS);
739 if (!Reg)
740 return false;
741 bool RegIsKill = hasTrivialKill(LHS);
742 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743 AArch64::sub_32);
744 Addr.setOffsetReg(Reg);
745 return true;
749 unsigned Reg = getRegForValue(Src);
750 if (!Reg)
751 return false;
752 Addr.setOffsetReg(Reg);
753 return true;
755 case Instruction::Mul: {
756 if (Addr.getOffsetReg())
757 break;
759 if (!isMulPowOf2(U))
760 break;
762 const Value *LHS = U->getOperand(0);
763 const Value *RHS = U->getOperand(1);
765 // Canonicalize power-of-2 value to the RHS.
766 if (const auto *C = dyn_cast<ConstantInt>(LHS))
767 if (C->getValue().isPowerOf2())
768 std::swap(LHS, RHS);
770 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771 const auto *C = cast<ConstantInt>(RHS);
772 unsigned Val = C->getValue().logBase2();
773 if (Val < 1 || Val > 3)
774 break;
776 uint64_t NumBytes = 0;
777 if (Ty && Ty->isSized()) {
778 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779 NumBytes = NumBits / 8;
780 if (!isPowerOf2_64(NumBits))
781 NumBytes = 0;
784 if (NumBytes != (1ULL << Val))
785 break;
787 Addr.setShift(Val);
788 Addr.setExtendType(AArch64_AM::LSL);
790 const Value *Src = LHS;
791 if (const auto *I = dyn_cast<Instruction>(Src)) {
792 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793 // Fold the zext or sext when it won't become a noop.
794 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795 if (!isIntExtFree(ZE) &&
796 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
797 Addr.setExtendType(AArch64_AM::UXTW);
798 Src = ZE->getOperand(0);
800 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801 if (!isIntExtFree(SE) &&
802 SE->getOperand(0)->getType()->isIntegerTy(32)) {
803 Addr.setExtendType(AArch64_AM::SXTW);
804 Src = SE->getOperand(0);
810 unsigned Reg = getRegForValue(Src);
811 if (!Reg)
812 return false;
813 Addr.setOffsetReg(Reg);
814 return true;
816 case Instruction::And: {
817 if (Addr.getOffsetReg())
818 break;
820 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
821 break;
823 const Value *LHS = U->getOperand(0);
824 const Value *RHS = U->getOperand(1);
826 if (const auto *C = dyn_cast<ConstantInt>(LHS))
827 if (C->getValue() == 0xffffffff)
828 std::swap(LHS, RHS);
830 if (const auto *C = dyn_cast<ConstantInt>(RHS))
831 if (C->getValue() == 0xffffffff) {
832 Addr.setShift(0);
833 Addr.setExtendType(AArch64_AM::LSL);
834 Addr.setExtendType(AArch64_AM::UXTW);
836 unsigned Reg = getRegForValue(LHS);
837 if (!Reg)
838 return false;
839 bool RegIsKill = hasTrivialKill(LHS);
840 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841 AArch64::sub_32);
842 Addr.setOffsetReg(Reg);
843 return true;
845 break;
847 case Instruction::SExt:
848 case Instruction::ZExt: {
849 if (!Addr.getReg() || Addr.getOffsetReg())
850 break;
852 const Value *Src = nullptr;
853 // Fold the zext or sext when it won't become a noop.
854 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856 Addr.setExtendType(AArch64_AM::UXTW);
857 Src = ZE->getOperand(0);
859 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861 Addr.setExtendType(AArch64_AM::SXTW);
862 Src = SE->getOperand(0);
866 if (!Src)
867 break;
869 Addr.setShift(0);
870 unsigned Reg = getRegForValue(Src);
871 if (!Reg)
872 return false;
873 Addr.setOffsetReg(Reg);
874 return true;
876 } // end switch
878 if (Addr.isRegBase() && !Addr.getReg()) {
879 unsigned Reg = getRegForValue(Obj);
880 if (!Reg)
881 return false;
882 Addr.setReg(Reg);
883 return true;
886 if (!Addr.getOffsetReg()) {
887 unsigned Reg = getRegForValue(Obj);
888 if (!Reg)
889 return false;
890 Addr.setOffsetReg(Reg);
891 return true;
894 return false;
897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898 const User *U = nullptr;
899 unsigned Opcode = Instruction::UserOp1;
900 bool InMBB = true;
902 if (const auto *I = dyn_cast<Instruction>(V)) {
903 Opcode = I->getOpcode();
904 U = I;
905 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907 Opcode = C->getOpcode();
908 U = C;
911 switch (Opcode) {
912 default: break;
913 case Instruction::BitCast:
914 // Look past bitcasts if its operand is in the same BB.
915 if (InMBB)
916 return computeCallAddress(U->getOperand(0), Addr);
917 break;
918 case Instruction::IntToPtr:
919 // Look past no-op inttoptrs if its operand is in the same BB.
920 if (InMBB &&
921 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922 TLI.getPointerTy(DL))
923 return computeCallAddress(U->getOperand(0), Addr);
924 break;
925 case Instruction::PtrToInt:
926 // Look past no-op ptrtoints if its operand is in the same BB.
927 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928 return computeCallAddress(U->getOperand(0), Addr);
929 break;
932 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933 Addr.setGlobalValue(GV);
934 return true;
937 // If all else fails, try to materialize the value in a register.
938 if (!Addr.getGlobalValue()) {
939 Addr.setReg(getRegForValue(V));
940 return Addr.getReg() != 0;
943 return false;
946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947 EVT evt = TLI.getValueType(DL, Ty, true);
949 // Only handle simple types.
950 if (evt == MVT::Other || !evt.isSimple())
951 return false;
952 VT = evt.getSimpleVT();
954 // This is a legal type, but it's not something we handle in fast-isel.
955 if (VT == MVT::f128)
956 return false;
958 // Handle all other legal types, i.e. a register that will directly hold this
959 // value.
960 return TLI.isTypeLegal(VT);
963 /// Determine if the value type is supported by FastISel.
965 /// FastISel for AArch64 can handle more value types than are legal. This adds
966 /// simple value type such as i1, i8, and i16.
967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968 if (Ty->isVectorTy() && !IsVectorAllowed)
969 return false;
971 if (isTypeLegal(Ty, VT))
972 return true;
974 // If this is a type than can be sign or zero-extended to a basic operation
975 // go ahead and accept it now.
976 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
977 return true;
979 return false;
982 bool AArch64FastISel::isValueAvailable(const Value *V) const {
983 if (!isa<Instruction>(V))
984 return true;
986 const auto *I = cast<Instruction>(V);
987 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991 unsigned ScaleFactor = getImplicitScaleFactor(VT);
992 if (!ScaleFactor)
993 return false;
995 bool ImmediateOffsetNeedsLowering = false;
996 bool RegisterOffsetNeedsLowering = false;
997 int64_t Offset = Addr.getOffset();
998 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
999 ImmediateOffsetNeedsLowering = true;
1000 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1001 !isUInt<12>(Offset / ScaleFactor))
1002 ImmediateOffsetNeedsLowering = true;
1004 // Cannot encode an offset register and an immediate offset in the same
1005 // instruction. Fold the immediate offset into the load/store instruction and
1006 // emit an additional add to take care of the offset register.
1007 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1008 RegisterOffsetNeedsLowering = true;
1010 // Cannot encode zero register as base.
1011 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1012 RegisterOffsetNeedsLowering = true;
1014 // If this is a stack pointer and the offset needs to be simplified then put
1015 // the alloca address into a register, set the base type back to register and
1016 // continue. This should almost never happen.
1017 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1019 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021 ResultReg)
1022 .addFrameIndex(Addr.getFI())
1023 .addImm(0)
1024 .addImm(0);
1025 Addr.setKind(Address::RegBase);
1026 Addr.setReg(ResultReg);
1029 if (RegisterOffsetNeedsLowering) {
1030 unsigned ResultReg = 0;
1031 if (Addr.getReg()) {
1032 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033 Addr.getExtendType() == AArch64_AM::UXTW )
1034 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036 /*TODO:IsKill=*/false, Addr.getExtendType(),
1037 Addr.getShift());
1038 else
1039 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041 /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042 Addr.getShift());
1043 } else {
1044 if (Addr.getExtendType() == AArch64_AM::UXTW)
1045 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046 /*Op0IsKill=*/false, Addr.getShift(),
1047 /*IsZExt=*/true);
1048 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050 /*Op0IsKill=*/false, Addr.getShift(),
1051 /*IsZExt=*/false);
1052 else
1053 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054 /*Op0IsKill=*/false, Addr.getShift());
1056 if (!ResultReg)
1057 return false;
1059 Addr.setReg(ResultReg);
1060 Addr.setOffsetReg(0);
1061 Addr.setShift(0);
1062 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1065 // Since the offset is too large for the load/store instruction get the
1066 // reg+offset into a register.
1067 if (ImmediateOffsetNeedsLowering) {
1068 unsigned ResultReg;
1069 if (Addr.getReg())
1070 // Try to fold the immediate into the add instruction.
1071 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072 else
1073 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1075 if (!ResultReg)
1076 return false;
1077 Addr.setReg(ResultReg);
1078 Addr.setOffset(0);
1080 return true;
1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084 const MachineInstrBuilder &MIB,
1085 MachineMemOperand::Flags Flags,
1086 unsigned ScaleFactor,
1087 MachineMemOperand *MMO) {
1088 int64_t Offset = Addr.getOffset() / ScaleFactor;
1089 // Frame base works a bit differently. Handle it separately.
1090 if (Addr.isFIBase()) {
1091 int FI = Addr.getFI();
1092 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1093 // and alignment should be based on the VT.
1094 MMO = FuncInfo.MF->getMachineMemOperand(
1095 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097 // Now add the rest of the operands.
1098 MIB.addFrameIndex(FI).addImm(Offset);
1099 } else {
1100 assert(Addr.isRegBase() && "Unexpected address kind.");
1101 const MCInstrDesc &II = MIB->getDesc();
1102 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1103 Addr.setReg(
1104 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105 Addr.setOffsetReg(
1106 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107 if (Addr.getOffsetReg()) {
1108 assert(Addr.getOffset() == 0 && "Unexpected offset");
1109 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110 Addr.getExtendType() == AArch64_AM::SXTX;
1111 MIB.addReg(Addr.getReg());
1112 MIB.addReg(Addr.getOffsetReg());
1113 MIB.addImm(IsSigned);
1114 MIB.addImm(Addr.getShift() != 0);
1115 } else
1116 MIB.addReg(Addr.getReg()).addImm(Offset);
1119 if (MMO)
1120 MIB.addMemOperand(MMO);
1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124 const Value *RHS, bool SetFlags,
1125 bool WantResult, bool IsZExt) {
1126 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1127 bool NeedExtend = false;
1128 switch (RetVT.SimpleTy) {
1129 default:
1130 return 0;
1131 case MVT::i1:
1132 NeedExtend = true;
1133 break;
1134 case MVT::i8:
1135 NeedExtend = true;
1136 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1137 break;
1138 case MVT::i16:
1139 NeedExtend = true;
1140 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1141 break;
1142 case MVT::i32: // fall-through
1143 case MVT::i64:
1144 break;
1146 MVT SrcVT = RetVT;
1147 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1149 // Canonicalize immediates to the RHS first.
1150 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1151 std::swap(LHS, RHS);
1153 // Canonicalize mul by power of 2 to the RHS.
1154 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1155 if (isMulPowOf2(LHS))
1156 std::swap(LHS, RHS);
1158 // Canonicalize shift immediate to the RHS.
1159 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1160 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161 if (isa<ConstantInt>(SI->getOperand(1)))
1162 if (SI->getOpcode() == Instruction::Shl ||
1163 SI->getOpcode() == Instruction::LShr ||
1164 SI->getOpcode() == Instruction::AShr )
1165 std::swap(LHS, RHS);
1167 unsigned LHSReg = getRegForValue(LHS);
1168 if (!LHSReg)
1169 return 0;
1170 bool LHSIsKill = hasTrivialKill(LHS);
1172 if (NeedExtend)
1173 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1175 unsigned ResultReg = 0;
1176 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1178 if (C->isNegative())
1179 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180 SetFlags, WantResult);
1181 else
1182 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183 WantResult);
1184 } else if (const auto *C = dyn_cast<Constant>(RHS))
1185 if (C->isNullValue())
1186 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187 WantResult);
1189 if (ResultReg)
1190 return ResultReg;
1192 // Only extend the RHS within the instruction if there is a valid extend type.
1193 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1194 isValueAvailable(RHS)) {
1195 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199 if (!RHSReg)
1200 return 0;
1201 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203 RHSIsKill, ExtendType, C->getZExtValue(),
1204 SetFlags, WantResult);
1206 unsigned RHSReg = getRegForValue(RHS);
1207 if (!RHSReg)
1208 return 0;
1209 bool RHSIsKill = hasTrivialKill(RHS);
1210 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211 ExtendType, 0, SetFlags, WantResult);
1214 // Check if the mul can be folded into the instruction.
1215 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1216 if (isMulPowOf2(RHS)) {
1217 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1220 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221 if (C->getValue().isPowerOf2())
1222 std::swap(MulLHS, MulRHS);
1224 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226 unsigned RHSReg = getRegForValue(MulLHS);
1227 if (!RHSReg)
1228 return 0;
1229 bool RHSIsKill = hasTrivialKill(MulLHS);
1230 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232 WantResult);
1233 if (ResultReg)
1234 return ResultReg;
1238 // Check if the shift can be folded into the instruction.
1239 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1240 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1242 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1243 switch (SI->getOpcode()) {
1244 default: break;
1245 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1246 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1247 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1249 uint64_t ShiftVal = C->getZExtValue();
1250 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252 if (!RHSReg)
1253 return 0;
1254 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256 RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257 WantResult);
1258 if (ResultReg)
1259 return ResultReg;
1265 unsigned RHSReg = getRegForValue(RHS);
1266 if (!RHSReg)
1267 return 0;
1268 bool RHSIsKill = hasTrivialKill(RHS);
1270 if (NeedExtend)
1271 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1273 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274 SetFlags, WantResult);
1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278 bool LHSIsKill, unsigned RHSReg,
1279 bool RHSIsKill, bool SetFlags,
1280 bool WantResult) {
1281 assert(LHSReg && RHSReg && "Invalid register number.");
1283 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1284 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1285 return 0;
1287 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1288 return 0;
1290 static const unsigned OpcTable[2][2][2] = {
1291 { { AArch64::SUBWrr, AArch64::SUBXrr },
1292 { AArch64::ADDWrr, AArch64::ADDXrr } },
1293 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1296 bool Is64Bit = RetVT == MVT::i64;
1297 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298 const TargetRegisterClass *RC =
1299 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1300 unsigned ResultReg;
1301 if (WantResult)
1302 ResultReg = createResultReg(RC);
1303 else
1304 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1306 const MCInstrDesc &II = TII.get(Opc);
1307 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310 .addReg(LHSReg, getKillRegState(LHSIsKill))
1311 .addReg(RHSReg, getKillRegState(RHSIsKill));
1312 return ResultReg;
1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316 bool LHSIsKill, uint64_t Imm,
1317 bool SetFlags, bool WantResult) {
1318 assert(LHSReg && "Invalid register number.");
1320 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1321 return 0;
1323 unsigned ShiftImm;
1324 if (isUInt<12>(Imm))
1325 ShiftImm = 0;
1326 else if ((Imm & 0xfff000) == Imm) {
1327 ShiftImm = 12;
1328 Imm >>= 12;
1329 } else
1330 return 0;
1332 static const unsigned OpcTable[2][2][2] = {
1333 { { AArch64::SUBWri, AArch64::SUBXri },
1334 { AArch64::ADDWri, AArch64::ADDXri } },
1335 { { AArch64::SUBSWri, AArch64::SUBSXri },
1336 { AArch64::ADDSWri, AArch64::ADDSXri } }
1338 bool Is64Bit = RetVT == MVT::i64;
1339 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340 const TargetRegisterClass *RC;
1341 if (SetFlags)
1342 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343 else
1344 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345 unsigned ResultReg;
1346 if (WantResult)
1347 ResultReg = createResultReg(RC);
1348 else
1349 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1351 const MCInstrDesc &II = TII.get(Opc);
1352 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354 .addReg(LHSReg, getKillRegState(LHSIsKill))
1355 .addImm(Imm)
1356 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357 return ResultReg;
1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361 bool LHSIsKill, unsigned RHSReg,
1362 bool RHSIsKill,
1363 AArch64_AM::ShiftExtendType ShiftType,
1364 uint64_t ShiftImm, bool SetFlags,
1365 bool WantResult) {
1366 assert(LHSReg && RHSReg && "Invalid register number.");
1367 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1371 return 0;
1373 // Don't deal with undefined shifts.
1374 if (ShiftImm >= RetVT.getSizeInBits())
1375 return 0;
1377 static const unsigned OpcTable[2][2][2] = {
1378 { { AArch64::SUBWrs, AArch64::SUBXrs },
1379 { AArch64::ADDWrs, AArch64::ADDXrs } },
1380 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1383 bool Is64Bit = RetVT == MVT::i64;
1384 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385 const TargetRegisterClass *RC =
1386 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1387 unsigned ResultReg;
1388 if (WantResult)
1389 ResultReg = createResultReg(RC);
1390 else
1391 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 const MCInstrDesc &II = TII.get(Opc);
1394 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397 .addReg(LHSReg, getKillRegState(LHSIsKill))
1398 .addReg(RHSReg, getKillRegState(RHSIsKill))
1399 .addImm(getShifterImm(ShiftType, ShiftImm));
1400 return ResultReg;
1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404 bool LHSIsKill, unsigned RHSReg,
1405 bool RHSIsKill,
1406 AArch64_AM::ShiftExtendType ExtType,
1407 uint64_t ShiftImm, bool SetFlags,
1408 bool WantResult) {
1409 assert(LHSReg && RHSReg && "Invalid register number.");
1410 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1413 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414 return 0;
1416 if (ShiftImm >= 4)
1417 return 0;
1419 static const unsigned OpcTable[2][2][2] = {
1420 { { AArch64::SUBWrx, AArch64::SUBXrx },
1421 { AArch64::ADDWrx, AArch64::ADDXrx } },
1422 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1425 bool Is64Bit = RetVT == MVT::i64;
1426 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427 const TargetRegisterClass *RC = nullptr;
1428 if (SetFlags)
1429 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430 else
1431 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432 unsigned ResultReg;
1433 if (WantResult)
1434 ResultReg = createResultReg(RC);
1435 else
1436 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1438 const MCInstrDesc &II = TII.get(Opc);
1439 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442 .addReg(LHSReg, getKillRegState(LHSIsKill))
1443 .addReg(RHSReg, getKillRegState(RHSIsKill))
1444 .addImm(getArithExtendImm(ExtType, ShiftImm));
1445 return ResultReg;
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449 Type *Ty = LHS->getType();
1450 EVT EVT = TLI.getValueType(DL, Ty, true);
1451 if (!EVT.isSimple())
1452 return false;
1453 MVT VT = EVT.getSimpleVT();
1455 switch (VT.SimpleTy) {
1456 default:
1457 return false;
1458 case MVT::i1:
1459 case MVT::i8:
1460 case MVT::i16:
1461 case MVT::i32:
1462 case MVT::i64:
1463 return emitICmp(VT, LHS, RHS, IsZExt);
1464 case MVT::f32:
1465 case MVT::f64:
1466 return emitFCmp(VT, LHS, RHS);
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471 bool IsZExt) {
1472 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473 IsZExt) != 0;
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477 uint64_t Imm) {
1478 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484 return false;
1486 // Check to see if the 2nd operand is a constant that we can encode directly
1487 // in the compare.
1488 bool UseImm = false;
1489 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490 if (CFP->isZero() && !CFP->isNegative())
1491 UseImm = true;
1493 unsigned LHSReg = getRegForValue(LHS);
1494 if (!LHSReg)
1495 return false;
1496 bool LHSIsKill = hasTrivialKill(LHS);
1498 if (UseImm) {
1499 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1500 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501 .addReg(LHSReg, getKillRegState(LHSIsKill));
1502 return true;
1505 unsigned RHSReg = getRegForValue(RHS);
1506 if (!RHSReg)
1507 return false;
1508 bool RHSIsKill = hasTrivialKill(RHS);
1510 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512 .addReg(LHSReg, getKillRegState(LHSIsKill))
1513 .addReg(RHSReg, getKillRegState(RHSIsKill));
1514 return true;
1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518 bool SetFlags, bool WantResult, bool IsZExt) {
1519 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520 IsZExt);
1523 /// This method is a wrapper to simplify add emission.
1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526 /// that fails, then try to materialize the immediate into a register and use
1527 /// emitAddSub_rr instead.
1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529 int64_t Imm) {
1530 unsigned ResultReg;
1531 if (Imm < 0)
1532 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533 else
1534 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1536 if (ResultReg)
1537 return ResultReg;
1539 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540 if (!CReg)
1541 return 0;
1543 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544 return ResultReg;
1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548 bool SetFlags, bool WantResult, bool IsZExt) {
1549 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550 IsZExt);
1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554 bool LHSIsKill, unsigned RHSReg,
1555 bool RHSIsKill, bool WantResult) {
1556 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557 RHSIsKill, /*SetFlags=*/true, WantResult);
1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561 bool LHSIsKill, unsigned RHSReg,
1562 bool RHSIsKill,
1563 AArch64_AM::ShiftExtendType ShiftType,
1564 uint64_t ShiftImm, bool WantResult) {
1565 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567 WantResult);
1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571 const Value *LHS, const Value *RHS) {
1572 // Canonicalize immediates to the RHS first.
1573 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1574 std::swap(LHS, RHS);
1576 // Canonicalize mul by power-of-2 to the RHS.
1577 if (LHS->hasOneUse() && isValueAvailable(LHS))
1578 if (isMulPowOf2(LHS))
1579 std::swap(LHS, RHS);
1581 // Canonicalize shift immediate to the RHS.
1582 if (LHS->hasOneUse() && isValueAvailable(LHS))
1583 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584 if (isa<ConstantInt>(SI->getOperand(1)))
1585 std::swap(LHS, RHS);
1587 unsigned LHSReg = getRegForValue(LHS);
1588 if (!LHSReg)
1589 return 0;
1590 bool LHSIsKill = hasTrivialKill(LHS);
1592 unsigned ResultReg = 0;
1593 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594 uint64_t Imm = C->getZExtValue();
1595 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1597 if (ResultReg)
1598 return ResultReg;
1600 // Check if the mul can be folded into the instruction.
1601 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602 if (isMulPowOf2(RHS)) {
1603 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1606 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607 if (C->getValue().isPowerOf2())
1608 std::swap(MulLHS, MulRHS);
1610 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1613 unsigned RHSReg = getRegForValue(MulLHS);
1614 if (!RHSReg)
1615 return 0;
1616 bool RHSIsKill = hasTrivialKill(MulLHS);
1617 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618 RHSIsKill, ShiftVal);
1619 if (ResultReg)
1620 return ResultReg;
1624 // Check if the shift can be folded into the instruction.
1625 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628 uint64_t ShiftVal = C->getZExtValue();
1629 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630 if (!RHSReg)
1631 return 0;
1632 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634 RHSIsKill, ShiftVal);
1635 if (ResultReg)
1636 return ResultReg;
1640 unsigned RHSReg = getRegForValue(RHS);
1641 if (!RHSReg)
1642 return 0;
1643 bool RHSIsKill = hasTrivialKill(RHS);
1645 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1651 return ResultReg;
1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655 unsigned LHSReg, bool LHSIsKill,
1656 uint64_t Imm) {
1657 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658 "ISD nodes are not consecutive!");
1659 static const unsigned OpcTable[3][2] = {
1660 { AArch64::ANDWri, AArch64::ANDXri },
1661 { AArch64::ORRWri, AArch64::ORRXri },
1662 { AArch64::EORWri, AArch64::EORXri }
1664 const TargetRegisterClass *RC;
1665 unsigned Opc;
1666 unsigned RegSize;
1667 switch (RetVT.SimpleTy) {
1668 default:
1669 return 0;
1670 case MVT::i1:
1671 case MVT::i8:
1672 case MVT::i16:
1673 case MVT::i32: {
1674 unsigned Idx = ISDOpc - ISD::AND;
1675 Opc = OpcTable[Idx][0];
1676 RC = &AArch64::GPR32spRegClass;
1677 RegSize = 32;
1678 break;
1680 case MVT::i64:
1681 Opc = OpcTable[ISDOpc - ISD::AND][1];
1682 RC = &AArch64::GPR64spRegClass;
1683 RegSize = 64;
1684 break;
1687 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688 return 0;
1690 unsigned ResultReg =
1691 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692 AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1697 return ResultReg;
1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701 unsigned LHSReg, bool LHSIsKill,
1702 unsigned RHSReg, bool RHSIsKill,
1703 uint64_t ShiftImm) {
1704 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705 "ISD nodes are not consecutive!");
1706 static const unsigned OpcTable[3][2] = {
1707 { AArch64::ANDWrs, AArch64::ANDXrs },
1708 { AArch64::ORRWrs, AArch64::ORRXrs },
1709 { AArch64::EORWrs, AArch64::EORXrs }
1712 // Don't deal with undefined shifts.
1713 if (ShiftImm >= RetVT.getSizeInBits())
1714 return 0;
1716 const TargetRegisterClass *RC;
1717 unsigned Opc;
1718 switch (RetVT.SimpleTy) {
1719 default:
1720 return 0;
1721 case MVT::i1:
1722 case MVT::i8:
1723 case MVT::i16:
1724 case MVT::i32:
1725 Opc = OpcTable[ISDOpc - ISD::AND][0];
1726 RC = &AArch64::GPR32RegClass;
1727 break;
1728 case MVT::i64:
1729 Opc = OpcTable[ISDOpc - ISD::AND][1];
1730 RC = &AArch64::GPR64RegClass;
1731 break;
1733 unsigned ResultReg =
1734 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1735 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1736 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1740 return ResultReg;
1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744 uint64_t Imm) {
1745 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749 bool WantZExt, MachineMemOperand *MMO) {
1750 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751 return 0;
1753 // Simplify this down to something we can handle.
1754 if (!simplifyAddress(Addr, VT))
1755 return 0;
1757 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758 if (!ScaleFactor)
1759 llvm_unreachable("Unexpected value type.");
1761 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763 bool UseScaled = true;
1764 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765 UseScaled = false;
1766 ScaleFactor = 1;
1769 static const unsigned GPOpcTable[2][8][4] = {
1770 // Sign-extend.
1771 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772 AArch64::LDURXi },
1773 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774 AArch64::LDURXi },
1775 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776 AArch64::LDRXui },
1777 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778 AArch64::LDRXui },
1779 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780 AArch64::LDRXroX },
1781 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782 AArch64::LDRXroX },
1783 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784 AArch64::LDRXroW },
1785 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786 AArch64::LDRXroW }
1788 // Zero-extend.
1789 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790 AArch64::LDURXi },
1791 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792 AArch64::LDURXi },
1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794 AArch64::LDRXui },
1795 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796 AArch64::LDRXui },
1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798 AArch64::LDRXroX },
1799 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800 AArch64::LDRXroX },
1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802 AArch64::LDRXroW },
1803 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804 AArch64::LDRXroW }
1808 static const unsigned FPOpcTable[4][2] = {
1809 { AArch64::LDURSi, AArch64::LDURDi },
1810 { AArch64::LDRSui, AArch64::LDRDui },
1811 { AArch64::LDRSroX, AArch64::LDRDroX },
1812 { AArch64::LDRSroW, AArch64::LDRDroW }
1815 unsigned Opc;
1816 const TargetRegisterClass *RC;
1817 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818 Addr.getOffsetReg();
1819 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821 Addr.getExtendType() == AArch64_AM::SXTW)
1822 Idx++;
1824 bool IsRet64Bit = RetVT == MVT::i64;
1825 switch (VT.SimpleTy) {
1826 default:
1827 llvm_unreachable("Unexpected value type.");
1828 case MVT::i1: // Intentional fall-through.
1829 case MVT::i8:
1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831 RC = (IsRet64Bit && !WantZExt) ?
1832 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833 break;
1834 case MVT::i16:
1835 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836 RC = (IsRet64Bit && !WantZExt) ?
1837 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838 break;
1839 case MVT::i32:
1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841 RC = (IsRet64Bit && !WantZExt) ?
1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843 break;
1844 case MVT::i64:
1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846 RC = &AArch64::GPR64RegClass;
1847 break;
1848 case MVT::f32:
1849 Opc = FPOpcTable[Idx][0];
1850 RC = &AArch64::FPR32RegClass;
1851 break;
1852 case MVT::f64:
1853 Opc = FPOpcTable[Idx][1];
1854 RC = &AArch64::FPR64RegClass;
1855 break;
1858 // Create the base instruction, then add the operands.
1859 unsigned ResultReg = createResultReg(RC);
1860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861 TII.get(Opc), ResultReg);
1862 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1864 // Loading an i1 requires special handling.
1865 if (VT == MVT::i1) {
1866 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867 assert(ANDReg && "Unexpected AND instruction emission failure.");
1868 ResultReg = ANDReg;
1871 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872 // the 32bit reg to a 64bit reg.
1873 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877 .addImm(0)
1878 .addReg(ResultReg, getKillRegState(true))
1879 .addImm(AArch64::sub_32);
1880 ResultReg = Reg64;
1882 return ResultReg;
1885 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886 MVT VT;
1887 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888 return false;
1890 if (VT.isVector())
1891 return selectOperator(I, I->getOpcode());
1893 unsigned ResultReg;
1894 switch (I->getOpcode()) {
1895 default:
1896 llvm_unreachable("Unexpected instruction.");
1897 case Instruction::Add:
1898 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 case Instruction::Sub:
1901 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1904 if (!ResultReg)
1905 return false;
1907 updateValueMap(I, ResultReg);
1908 return true;
1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912 MVT VT;
1913 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914 return false;
1916 if (VT.isVector())
1917 return selectOperator(I, I->getOpcode());
1919 unsigned ResultReg;
1920 switch (I->getOpcode()) {
1921 default:
1922 llvm_unreachable("Unexpected instruction.");
1923 case Instruction::And:
1924 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Or:
1927 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Xor:
1930 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1933 if (!ResultReg)
1934 return false;
1936 updateValueMap(I, ResultReg);
1937 return true;
1940 bool AArch64FastISel::selectLoad(const Instruction *I) {
1941 MVT VT;
1942 // Verify we have a legal type before going any further. Currently, we handle
1943 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946 cast<LoadInst>(I)->isAtomic())
1947 return false;
1949 const Value *SV = I->getOperand(0);
1950 if (TLI.supportSwiftError()) {
1951 // Swifterror values can come from either a function parameter with
1952 // swifterror attribute or an alloca with swifterror attribute.
1953 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954 if (Arg->hasSwiftErrorAttr())
1955 return false;
1958 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959 if (Alloca->isSwiftError())
1960 return false;
1964 // See if we can handle this address.
1965 Address Addr;
1966 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967 return false;
1969 // Fold the following sign-/zero-extend into the load instruction.
1970 bool WantZExt = true;
1971 MVT RetVT = VT;
1972 const Value *IntExtVal = nullptr;
1973 if (I->hasOneUse()) {
1974 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975 if (isTypeSupported(ZE->getType(), RetVT))
1976 IntExtVal = ZE;
1977 else
1978 RetVT = VT;
1979 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980 if (isTypeSupported(SE->getType(), RetVT))
1981 IntExtVal = SE;
1982 else
1983 RetVT = VT;
1984 WantZExt = false;
1988 unsigned ResultReg =
1989 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990 if (!ResultReg)
1991 return false;
1993 // There are a few different cases we have to handle, because the load or the
1994 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995 // SelectionDAG. There is also an ordering issue when both instructions are in
1996 // different basic blocks.
1997 // 1.) The load instruction is selected by FastISel, but the integer extend
1998 // not. This usually happens when the integer extend is in a different
1999 // basic block and SelectionDAG took over for that basic block.
2000 // 2.) The load instruction is selected before the integer extend. This only
2001 // happens when the integer extend is in a different basic block.
2002 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003 // by FastISel. This happens if there are instructions between the load
2004 // and the integer extend that couldn't be selected by FastISel.
2005 if (IntExtVal) {
2006 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008 // it when it selects the integer extend.
2009 unsigned Reg = lookUpRegForValue(IntExtVal);
2010 auto *MI = MRI.getUniqueVRegDef(Reg);
2011 if (!MI) {
2012 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013 if (WantZExt) {
2014 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016 ResultReg = std::prev(I)->getOperand(0).getReg();
2017 removeDeadCode(I, std::next(I));
2018 } else
2019 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020 /*IsKill=*/true,
2021 AArch64::sub_32);
2023 updateValueMap(I, ResultReg);
2024 return true;
2027 // The integer extend has already been emitted - delete all the instructions
2028 // that have been emitted by the integer extend lowering code and use the
2029 // result from the load instruction directly.
2030 while (MI) {
2031 Reg = 0;
2032 for (auto &Opnd : MI->uses()) {
2033 if (Opnd.isReg()) {
2034 Reg = Opnd.getReg();
2035 break;
2038 MachineBasicBlock::iterator I(MI);
2039 removeDeadCode(I, std::next(I));
2040 MI = nullptr;
2041 if (Reg)
2042 MI = MRI.getUniqueVRegDef(Reg);
2044 updateValueMap(IntExtVal, ResultReg);
2045 return true;
2048 updateValueMap(I, ResultReg);
2049 return true;
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053 unsigned AddrReg,
2054 MachineMemOperand *MMO) {
2055 unsigned Opc;
2056 switch (VT.SimpleTy) {
2057 default: return false;
2058 case MVT::i8: Opc = AArch64::STLRB; break;
2059 case MVT::i16: Opc = AArch64::STLRH; break;
2060 case MVT::i32: Opc = AArch64::STLRW; break;
2061 case MVT::i64: Opc = AArch64::STLRX; break;
2064 const MCInstrDesc &II = TII.get(Opc);
2065 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068 .addReg(SrcReg)
2069 .addReg(AddrReg)
2070 .addMemOperand(MMO);
2071 return true;
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075 MachineMemOperand *MMO) {
2076 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077 return false;
2079 // Simplify this down to something we can handle.
2080 if (!simplifyAddress(Addr, VT))
2081 return false;
2083 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084 if (!ScaleFactor)
2085 llvm_unreachable("Unexpected value type.");
2087 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089 bool UseScaled = true;
2090 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091 UseScaled = false;
2092 ScaleFactor = 1;
2095 static const unsigned OpcTable[4][6] = {
2096 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2097 AArch64::STURSi, AArch64::STURDi },
2098 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2099 AArch64::STRSui, AArch64::STRDui },
2100 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101 AArch64::STRSroX, AArch64::STRDroX },
2102 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103 AArch64::STRSroW, AArch64::STRDroW }
2106 unsigned Opc;
2107 bool VTIsi1 = false;
2108 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109 Addr.getOffsetReg();
2110 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112 Addr.getExtendType() == AArch64_AM::SXTW)
2113 Idx++;
2115 switch (VT.SimpleTy) {
2116 default: llvm_unreachable("Unexpected value type.");
2117 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2118 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2119 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 // Storing an i1 requires special handling.
2127 if (VTIsi1 && SrcReg != AArch64::WZR) {
2128 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129 assert(ANDReg && "Unexpected AND instruction emission failure.");
2130 SrcReg = ANDReg;
2132 // Create the base instruction, then add the operands.
2133 const MCInstrDesc &II = TII.get(Opc);
2134 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135 MachineInstrBuilder MIB =
2136 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2139 return true;
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143 MVT VT;
2144 const Value *Op0 = I->getOperand(0);
2145 // Verify we have a legal type before going any further. Currently, we handle
2146 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149 return false;
2151 const Value *PtrV = I->getOperand(1);
2152 if (TLI.supportSwiftError()) {
2153 // Swifterror values can come from either a function parameter with
2154 // swifterror attribute or an alloca with swifterror attribute.
2155 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156 if (Arg->hasSwiftErrorAttr())
2157 return false;
2160 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161 if (Alloca->isSwiftError())
2162 return false;
2166 // Get the value to be stored into a register. Use the zero register directly
2167 // when possible to avoid an unnecessary copy and a wasted register.
2168 unsigned SrcReg = 0;
2169 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170 if (CI->isZero())
2171 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173 if (CF->isZero() && !CF->isNegative()) {
2174 VT = MVT::getIntegerVT(VT.getSizeInBits());
2175 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179 if (!SrcReg)
2180 SrcReg = getRegForValue(Op0);
2182 if (!SrcReg)
2183 return false;
2185 auto *SI = cast<StoreInst>(I);
2187 // Try to emit a STLR for seq_cst/release.
2188 if (SI->isAtomic()) {
2189 AtomicOrdering Ord = SI->getOrdering();
2190 // The non-atomic instructions are sufficient for relaxed stores.
2191 if (isReleaseOrStronger(Ord)) {
2192 // The STLR addressing mode only supports a base reg; pass that directly.
2193 unsigned AddrReg = getRegForValue(PtrV);
2194 return emitStoreRelease(VT, SrcReg, AddrReg,
2195 createMachineMemOperandFor(I));
2199 // See if we can handle this address.
2200 Address Addr;
2201 if (!computeAddress(PtrV, Addr, Op0->getType()))
2202 return false;
2204 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205 return false;
2206 return true;
2209 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2210 switch (Pred) {
2211 case CmpInst::FCMP_ONE:
2212 case CmpInst::FCMP_UEQ:
2213 default:
2214 // AL is our "false" for now. The other two need more compares.
2215 return AArch64CC::AL;
2216 case CmpInst::ICMP_EQ:
2217 case CmpInst::FCMP_OEQ:
2218 return AArch64CC::EQ;
2219 case CmpInst::ICMP_SGT:
2220 case CmpInst::FCMP_OGT:
2221 return AArch64CC::GT;
2222 case CmpInst::ICMP_SGE:
2223 case CmpInst::FCMP_OGE:
2224 return AArch64CC::GE;
2225 case CmpInst::ICMP_UGT:
2226 case CmpInst::FCMP_UGT:
2227 return AArch64CC::HI;
2228 case CmpInst::FCMP_OLT:
2229 return AArch64CC::MI;
2230 case CmpInst::ICMP_ULE:
2231 case CmpInst::FCMP_OLE:
2232 return AArch64CC::LS;
2233 case CmpInst::FCMP_ORD:
2234 return AArch64CC::VC;
2235 case CmpInst::FCMP_UNO:
2236 return AArch64CC::VS;
2237 case CmpInst::FCMP_UGE:
2238 return AArch64CC::PL;
2239 case CmpInst::ICMP_SLT:
2240 case CmpInst::FCMP_ULT:
2241 return AArch64CC::LT;
2242 case CmpInst::ICMP_SLE:
2243 case CmpInst::FCMP_ULE:
2244 return AArch64CC::LE;
2245 case CmpInst::FCMP_UNE:
2246 case CmpInst::ICMP_NE:
2247 return AArch64CC::NE;
2248 case CmpInst::ICMP_UGE:
2249 return AArch64CC::HS;
2250 case CmpInst::ICMP_ULT:
2251 return AArch64CC::LO;
2255 /// Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258 // will not be produced, as they are conditional branch instructions that do
2259 // not set flags.
2260 if (FuncInfo.MF->getFunction().hasFnAttribute(
2261 Attribute::SpeculativeLoadHardening))
2262 return false;
2264 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2268 const Value *LHS = CI->getOperand(0);
2269 const Value *RHS = CI->getOperand(1);
2271 MVT VT;
2272 if (!isTypeSupported(LHS->getType(), VT))
2273 return false;
2275 unsigned BW = VT.getSizeInBits();
2276 if (BW > 64)
2277 return false;
2279 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2282 // Try to take advantage of fallthrough opportunities.
2283 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284 std::swap(TBB, FBB);
2285 Predicate = CmpInst::getInversePredicate(Predicate);
2288 int TestBit = -1;
2289 bool IsCmpNE;
2290 switch (Predicate) {
2291 default:
2292 return false;
2293 case CmpInst::ICMP_EQ:
2294 case CmpInst::ICMP_NE:
2295 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296 std::swap(LHS, RHS);
2298 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299 return false;
2301 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303 const Value *AndLHS = AI->getOperand(0);
2304 const Value *AndRHS = AI->getOperand(1);
2306 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307 if (C->getValue().isPowerOf2())
2308 std::swap(AndLHS, AndRHS);
2310 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311 if (C->getValue().isPowerOf2()) {
2312 TestBit = C->getValue().logBase2();
2313 LHS = AndLHS;
2317 if (VT == MVT::i1)
2318 TestBit = 0;
2320 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321 break;
2322 case CmpInst::ICMP_SLT:
2323 case CmpInst::ICMP_SGE:
2324 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325 return false;
2327 TestBit = BW - 1;
2328 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329 break;
2330 case CmpInst::ICMP_SGT:
2331 case CmpInst::ICMP_SLE:
2332 if (!isa<ConstantInt>(RHS))
2333 return false;
2335 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336 return false;
2338 TestBit = BW - 1;
2339 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340 break;
2341 } // end switch
2343 static const unsigned OpcTable[2][2][2] = {
2344 { {AArch64::CBZW, AArch64::CBZX },
2345 {AArch64::CBNZW, AArch64::CBNZX} },
2346 { {AArch64::TBZW, AArch64::TBZX },
2347 {AArch64::TBNZW, AArch64::TBNZX} }
2350 bool IsBitTest = TestBit != -1;
2351 bool Is64Bit = BW == 64;
2352 if (TestBit < 32 && TestBit >= 0)
2353 Is64Bit = false;
2355 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356 const MCInstrDesc &II = TII.get(Opc);
2358 unsigned SrcReg = getRegForValue(LHS);
2359 if (!SrcReg)
2360 return false;
2361 bool SrcIsKill = hasTrivialKill(LHS);
2363 if (BW == 64 && !Is64Bit)
2364 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365 AArch64::sub_32);
2367 if ((BW < 32) && !IsBitTest)
2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370 // Emit the combined compare and branch instruction.
2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2372 MachineInstrBuilder MIB =
2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374 .addReg(SrcReg, getKillRegState(SrcIsKill));
2375 if (IsBitTest)
2376 MIB.addImm(TestBit);
2377 MIB.addMBB(TBB);
2379 finishCondBranch(BI->getParent(), TBB, FBB);
2380 return true;
2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384 const BranchInst *BI = cast<BranchInst>(I);
2385 if (BI->isUnconditional()) {
2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387 fastEmitBranch(MSucc, BI->getDebugLoc());
2388 return true;
2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395 if (CI->hasOneUse() && isValueAvailable(CI)) {
2396 // Try to optimize or fold the cmp.
2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398 switch (Predicate) {
2399 default:
2400 break;
2401 case CmpInst::FCMP_FALSE:
2402 fastEmitBranch(FBB, DbgLoc);
2403 return true;
2404 case CmpInst::FCMP_TRUE:
2405 fastEmitBranch(TBB, DbgLoc);
2406 return true;
2409 // Try to emit a combined compare-and-branch first.
2410 if (emitCompareAndBranch(BI))
2411 return true;
2413 // Try to take advantage of fallthrough opportunities.
2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415 std::swap(TBB, FBB);
2416 Predicate = CmpInst::getInversePredicate(Predicate);
2419 // Emit the cmp.
2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421 return false;
2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424 // instruction.
2425 AArch64CC::CondCode CC = getCompareCC(Predicate);
2426 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427 switch (Predicate) {
2428 default:
2429 break;
2430 case CmpInst::FCMP_UEQ:
2431 ExtraCC = AArch64CC::EQ;
2432 CC = AArch64CC::VS;
2433 break;
2434 case CmpInst::FCMP_ONE:
2435 ExtraCC = AArch64CC::MI;
2436 CC = AArch64CC::GT;
2437 break;
2439 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442 if (ExtraCC != AArch64CC::AL) {
2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444 .addImm(ExtraCC)
2445 .addMBB(TBB);
2448 // Emit the branch.
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450 .addImm(CC)
2451 .addMBB(TBB);
2453 finishCondBranch(BI->getParent(), TBB, FBB);
2454 return true;
2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457 uint64_t Imm = CI->getZExtValue();
2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460 .addMBB(Target);
2462 // Obtain the branch probability and add the target to the successor list.
2463 if (FuncInfo.BPI) {
2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465 BI->getParent(), Target->getBasicBlock());
2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467 } else
2468 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469 return true;
2470 } else {
2471 AArch64CC::CondCode CC = AArch64CC::NE;
2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473 // Fake request the condition, otherwise the intrinsic might be completely
2474 // optimized away.
2475 unsigned CondReg = getRegForValue(BI->getCondition());
2476 if (!CondReg)
2477 return false;
2479 // Emit the branch.
2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481 .addImm(CC)
2482 .addMBB(TBB);
2484 finishCondBranch(BI->getParent(), TBB, FBB);
2485 return true;
2489 unsigned CondReg = getRegForValue(BI->getCondition());
2490 if (CondReg == 0)
2491 return false;
2492 bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495 unsigned Opcode = AArch64::TBNZW;
2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497 std::swap(TBB, FBB);
2498 Opcode = AArch64::TBZW;
2501 const MCInstrDesc &II = TII.get(Opcode);
2502 unsigned ConstrainedCondReg
2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506 .addImm(0)
2507 .addMBB(TBB);
2509 finishCondBranch(BI->getParent(), TBB, FBB);
2510 return true;
2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515 unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516 if (AddrReg == 0)
2517 return false;
2519 // Emit the indirect branch.
2520 const MCInstrDesc &II = TII.get(AArch64::BR);
2521 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2524 // Make sure the CFG is up-to-date.
2525 for (auto *Succ : BI->successors())
2526 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2528 return true;
2531 bool AArch64FastISel::selectCmp(const Instruction *I) {
2532 const CmpInst *CI = cast<CmpInst>(I);
2534 // Vectors of i1 are weird: bail out.
2535 if (CI->getType()->isVectorTy())
2536 return false;
2538 // Try to optimize or fold the cmp.
2539 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540 unsigned ResultReg = 0;
2541 switch (Predicate) {
2542 default:
2543 break;
2544 case CmpInst::FCMP_FALSE:
2545 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547 TII.get(TargetOpcode::COPY), ResultReg)
2548 .addReg(AArch64::WZR, getKillRegState(true));
2549 break;
2550 case CmpInst::FCMP_TRUE:
2551 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552 break;
2555 if (ResultReg) {
2556 updateValueMap(I, ResultReg);
2557 return true;
2560 // Emit the cmp.
2561 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562 return false;
2564 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2566 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567 // condition codes are inverted, because they are used by CSINC.
2568 static unsigned CondCodeTable[2][2] = {
2569 { AArch64CC::NE, AArch64CC::VC },
2570 { AArch64CC::PL, AArch64CC::LE }
2572 unsigned *CondCodes = nullptr;
2573 switch (Predicate) {
2574 default:
2575 break;
2576 case CmpInst::FCMP_UEQ:
2577 CondCodes = &CondCodeTable[0][0];
2578 break;
2579 case CmpInst::FCMP_ONE:
2580 CondCodes = &CondCodeTable[1][0];
2581 break;
2584 if (CondCodes) {
2585 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587 TmpReg1)
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addReg(AArch64::WZR, getKillRegState(true))
2590 .addImm(CondCodes[0]);
2591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592 ResultReg)
2593 .addReg(TmpReg1, getKillRegState(true))
2594 .addReg(AArch64::WZR, getKillRegState(true))
2595 .addImm(CondCodes[1]);
2597 updateValueMap(I, ResultReg);
2598 return true;
2601 // Now set a register based on the comparison.
2602 AArch64CC::CondCode CC = getCompareCC(Predicate);
2603 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606 ResultReg)
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addReg(AArch64::WZR, getKillRegState(true))
2609 .addImm(invertedCC);
2611 updateValueMap(I, ResultReg);
2612 return true;
2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616 /// value.
2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618 if (!SI->getType()->isIntegerTy(1))
2619 return false;
2621 const Value *Src1Val, *Src2Val;
2622 unsigned Opc = 0;
2623 bool NeedExtraOp = false;
2624 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625 if (CI->isOne()) {
2626 Src1Val = SI->getCondition();
2627 Src2Val = SI->getFalseValue();
2628 Opc = AArch64::ORRWrr;
2629 } else {
2630 assert(CI->isZero());
2631 Src1Val = SI->getFalseValue();
2632 Src2Val = SI->getCondition();
2633 Opc = AArch64::BICWrr;
2635 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636 if (CI->isOne()) {
2637 Src1Val = SI->getCondition();
2638 Src2Val = SI->getTrueValue();
2639 Opc = AArch64::ORRWrr;
2640 NeedExtraOp = true;
2641 } else {
2642 assert(CI->isZero());
2643 Src1Val = SI->getCondition();
2644 Src2Val = SI->getTrueValue();
2645 Opc = AArch64::ANDWrr;
2649 if (!Opc)
2650 return false;
2652 unsigned Src1Reg = getRegForValue(Src1Val);
2653 if (!Src1Reg)
2654 return false;
2655 bool Src1IsKill = hasTrivialKill(Src1Val);
2657 unsigned Src2Reg = getRegForValue(Src2Val);
2658 if (!Src2Reg)
2659 return false;
2660 bool Src2IsKill = hasTrivialKill(Src2Val);
2662 if (NeedExtraOp) {
2663 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664 Src1IsKill = true;
2666 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667 Src1IsKill, Src2Reg, Src2IsKill);
2668 updateValueMap(SI, ResultReg);
2669 return true;
2672 bool AArch64FastISel::selectSelect(const Instruction *I) {
2673 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674 MVT VT;
2675 if (!isTypeSupported(I->getType(), VT))
2676 return false;
2678 unsigned Opc;
2679 const TargetRegisterClass *RC;
2680 switch (VT.SimpleTy) {
2681 default:
2682 return false;
2683 case MVT::i1:
2684 case MVT::i8:
2685 case MVT::i16:
2686 case MVT::i32:
2687 Opc = AArch64::CSELWr;
2688 RC = &AArch64::GPR32RegClass;
2689 break;
2690 case MVT::i64:
2691 Opc = AArch64::CSELXr;
2692 RC = &AArch64::GPR64RegClass;
2693 break;
2694 case MVT::f32:
2695 Opc = AArch64::FCSELSrrr;
2696 RC = &AArch64::FPR32RegClass;
2697 break;
2698 case MVT::f64:
2699 Opc = AArch64::FCSELDrrr;
2700 RC = &AArch64::FPR64RegClass;
2701 break;
2704 const SelectInst *SI = cast<SelectInst>(I);
2705 const Value *Cond = SI->getCondition();
2706 AArch64CC::CondCode CC = AArch64CC::NE;
2707 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2709 if (optimizeSelect(SI))
2710 return true;
2712 // Try to pickup the flags, so we don't have to emit another compare.
2713 if (foldXALUIntrinsic(CC, I, Cond)) {
2714 // Fake request the condition to force emission of the XALU intrinsic.
2715 unsigned CondReg = getRegForValue(Cond);
2716 if (!CondReg)
2717 return false;
2718 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2719 isValueAvailable(Cond)) {
2720 const auto *Cmp = cast<CmpInst>(Cond);
2721 // Try to optimize or fold the cmp.
2722 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723 const Value *FoldSelect = nullptr;
2724 switch (Predicate) {
2725 default:
2726 break;
2727 case CmpInst::FCMP_FALSE:
2728 FoldSelect = SI->getFalseValue();
2729 break;
2730 case CmpInst::FCMP_TRUE:
2731 FoldSelect = SI->getTrueValue();
2732 break;
2735 if (FoldSelect) {
2736 unsigned SrcReg = getRegForValue(FoldSelect);
2737 if (!SrcReg)
2738 return false;
2739 unsigned UseReg = lookUpRegForValue(SI);
2740 if (UseReg)
2741 MRI.clearKillFlags(UseReg);
2743 updateValueMap(I, SrcReg);
2744 return true;
2747 // Emit the cmp.
2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749 return false;
2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752 CC = getCompareCC(Predicate);
2753 switch (Predicate) {
2754 default:
2755 break;
2756 case CmpInst::FCMP_UEQ:
2757 ExtraCC = AArch64CC::EQ;
2758 CC = AArch64CC::VS;
2759 break;
2760 case CmpInst::FCMP_ONE:
2761 ExtraCC = AArch64CC::MI;
2762 CC = AArch64CC::GT;
2763 break;
2765 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766 } else {
2767 unsigned CondReg = getRegForValue(Cond);
2768 if (!CondReg)
2769 return false;
2770 bool CondIsKill = hasTrivialKill(Cond);
2772 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773 CondReg = constrainOperandRegClass(II, CondReg, 1);
2775 // Emit a TST instruction (ANDS wzr, reg, #imm).
2776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777 AArch64::WZR)
2778 .addReg(CondReg, getKillRegState(CondIsKill))
2779 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2782 unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783 bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2785 unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786 bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2788 if (!Src1Reg || !Src2Reg)
2789 return false;
2791 if (ExtraCC != AArch64CC::AL) {
2792 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793 Src2IsKill, ExtraCC);
2794 Src2IsKill = true;
2796 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797 Src2IsKill, CC);
2798 updateValueMap(I, ResultReg);
2799 return true;
2802 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803 Value *V = I->getOperand(0);
2804 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2805 return false;
2807 unsigned Op = getRegForValue(V);
2808 if (Op == 0)
2809 return false;
2811 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813 ResultReg).addReg(Op);
2814 updateValueMap(I, ResultReg);
2815 return true;
2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819 Value *V = I->getOperand(0);
2820 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2821 return false;
2823 unsigned Op = getRegForValue(V);
2824 if (Op == 0)
2825 return false;
2827 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829 ResultReg).addReg(Op);
2830 updateValueMap(I, ResultReg);
2831 return true;
2834 // FPToUI and FPToSI
2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836 MVT DestVT;
2837 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838 return false;
2840 unsigned SrcReg = getRegForValue(I->getOperand(0));
2841 if (SrcReg == 0)
2842 return false;
2844 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2846 return false;
2848 unsigned Opc;
2849 if (SrcVT == MVT::f64) {
2850 if (Signed)
2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852 else
2853 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2854 } else {
2855 if (Signed)
2856 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857 else
2858 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2860 unsigned ResultReg = createResultReg(
2861 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2862 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863 .addReg(SrcReg);
2864 updateValueMap(I, ResultReg);
2865 return true;
2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869 MVT DestVT;
2870 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871 return false;
2872 // Let regular ISEL handle FP16
2873 if (DestVT == MVT::f16)
2874 return false;
2876 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877 "Unexpected value type.");
2879 unsigned SrcReg = getRegForValue(I->getOperand(0));
2880 if (!SrcReg)
2881 return false;
2882 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2884 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2886 // Handle sign-extension.
2887 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2888 SrcReg =
2889 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890 if (!SrcReg)
2891 return false;
2892 SrcIsKill = true;
2895 unsigned Opc;
2896 if (SrcVT == MVT::i64) {
2897 if (Signed)
2898 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899 else
2900 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2901 } else {
2902 if (Signed)
2903 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2904 else
2905 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2908 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909 SrcIsKill);
2910 updateValueMap(I, ResultReg);
2911 return true;
2914 bool AArch64FastISel::fastLowerArguments() {
2915 if (!FuncInfo.CanLowerReturn)
2916 return false;
2918 const Function *F = FuncInfo.Fn;
2919 if (F->isVarArg())
2920 return false;
2922 CallingConv::ID CC = F->getCallingConv();
2923 if (CC != CallingConv::C && CC != CallingConv::Swift)
2924 return false;
2926 if (Subtarget->hasCustomCallingConv())
2927 return false;
2929 // Only handle simple cases of up to 8 GPR and FPR each.
2930 unsigned GPRCnt = 0;
2931 unsigned FPRCnt = 0;
2932 for (auto const &Arg : F->args()) {
2933 if (Arg.hasAttribute(Attribute::ByVal) ||
2934 Arg.hasAttribute(Attribute::InReg) ||
2935 Arg.hasAttribute(Attribute::StructRet) ||
2936 Arg.hasAttribute(Attribute::SwiftSelf) ||
2937 Arg.hasAttribute(Attribute::SwiftError) ||
2938 Arg.hasAttribute(Attribute::Nest))
2939 return false;
2941 Type *ArgTy = Arg.getType();
2942 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943 return false;
2945 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946 if (!ArgVT.isSimple())
2947 return false;
2949 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2951 return false;
2953 if (VT.isVector() &&
2954 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2955 return false;
2957 if (VT >= MVT::i1 && VT <= MVT::i64)
2958 ++GPRCnt;
2959 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2960 VT.is128BitVector())
2961 ++FPRCnt;
2962 else
2963 return false;
2965 if (GPRCnt > 8 || FPRCnt > 8)
2966 return false;
2969 static const MCPhysReg Registers[6][8] = {
2970 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971 AArch64::W5, AArch64::W6, AArch64::W7 },
2972 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973 AArch64::X5, AArch64::X6, AArch64::X7 },
2974 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975 AArch64::H5, AArch64::H6, AArch64::H7 },
2976 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977 AArch64::S5, AArch64::S6, AArch64::S7 },
2978 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979 AArch64::D5, AArch64::D6, AArch64::D7 },
2980 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2984 unsigned GPRIdx = 0;
2985 unsigned FPRIdx = 0;
2986 for (auto const &Arg : F->args()) {
2987 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988 unsigned SrcReg;
2989 const TargetRegisterClass *RC;
2990 if (VT >= MVT::i1 && VT <= MVT::i32) {
2991 SrcReg = Registers[0][GPRIdx++];
2992 RC = &AArch64::GPR32RegClass;
2993 VT = MVT::i32;
2994 } else if (VT == MVT::i64) {
2995 SrcReg = Registers[1][GPRIdx++];
2996 RC = &AArch64::GPR64RegClass;
2997 } else if (VT == MVT::f16) {
2998 SrcReg = Registers[2][FPRIdx++];
2999 RC = &AArch64::FPR16RegClass;
3000 } else if (VT == MVT::f32) {
3001 SrcReg = Registers[3][FPRIdx++];
3002 RC = &AArch64::FPR32RegClass;
3003 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3004 SrcReg = Registers[4][FPRIdx++];
3005 RC = &AArch64::FPR64RegClass;
3006 } else if (VT.is128BitVector()) {
3007 SrcReg = Registers[5][FPRIdx++];
3008 RC = &AArch64::FPR128RegClass;
3009 } else
3010 llvm_unreachable("Unexpected value type.");
3012 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014 // Without this, EmitLiveInCopies may eliminate the livein if its only
3015 // use is a bitcast (which isn't turned into an instruction).
3016 unsigned ResultReg = createResultReg(RC);
3017 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018 TII.get(TargetOpcode::COPY), ResultReg)
3019 .addReg(DstReg, getKillRegState(true));
3020 updateValueMap(&Arg, ResultReg);
3022 return true;
3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026 SmallVectorImpl<MVT> &OutVTs,
3027 unsigned &NumBytes) {
3028 CallingConv::ID CC = CLI.CallConv;
3029 SmallVector<CCValAssign, 16> ArgLocs;
3030 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3033 // Get a count of how many bytes are to be pushed on the stack.
3034 NumBytes = CCInfo.getNextStackOffset();
3036 // Issue CALLSEQ_START
3037 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039 .addImm(NumBytes).addImm(0);
3041 // Process the args.
3042 for (CCValAssign &VA : ArgLocs) {
3043 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044 MVT ArgVT = OutVTs[VA.getValNo()];
3046 unsigned ArgReg = getRegForValue(ArgVal);
3047 if (!ArgReg)
3048 return false;
3050 // Handle arg promotion: SExt, ZExt, AExt.
3051 switch (VA.getLocInfo()) {
3052 case CCValAssign::Full:
3053 break;
3054 case CCValAssign::SExt: {
3055 MVT DestVT = VA.getLocVT();
3056 MVT SrcVT = ArgVT;
3057 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058 if (!ArgReg)
3059 return false;
3060 break;
3062 case CCValAssign::AExt:
3063 // Intentional fall-through.
3064 case CCValAssign::ZExt: {
3065 MVT DestVT = VA.getLocVT();
3066 MVT SrcVT = ArgVT;
3067 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068 if (!ArgReg)
3069 return false;
3070 break;
3072 default:
3073 llvm_unreachable("Unknown arg promotion!");
3076 // Now copy/store arg to correct locations.
3077 if (VA.isRegLoc() && !VA.needsCustom()) {
3078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080 CLI.OutRegs.push_back(VA.getLocReg());
3081 } else if (VA.needsCustom()) {
3082 // FIXME: Handle custom args.
3083 return false;
3084 } else {
3085 assert(VA.isMemLoc() && "Assuming store on stack.");
3087 // Don't emit stores for undef values.
3088 if (isa<UndefValue>(ArgVal))
3089 continue;
3091 // Need to store on the stack.
3092 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3094 unsigned BEAlign = 0;
3095 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3096 BEAlign = 8 - ArgSize;
3098 Address Addr;
3099 Addr.setKind(Address::RegBase);
3100 Addr.setReg(AArch64::SP);
3101 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3103 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3108 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109 return false;
3112 return true;
3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116 unsigned NumBytes) {
3117 CallingConv::ID CC = CLI.CallConv;
3119 // Issue CALLSEQ_END
3120 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122 .addImm(NumBytes).addImm(0);
3124 // Now the return value.
3125 if (RetVT != MVT::isVoid) {
3126 SmallVector<CCValAssign, 16> RVLocs;
3127 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3130 // Only handle a single return value.
3131 if (RVLocs.size() != 1)
3132 return false;
3134 // Copy all of the result registers out of their specified physreg.
3135 MVT CopyVT = RVLocs[0].getValVT();
3137 // TODO: Handle big-endian results
3138 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3139 return false;
3141 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143 TII.get(TargetOpcode::COPY), ResultReg)
3144 .addReg(RVLocs[0].getLocReg());
3145 CLI.InRegs.push_back(RVLocs[0].getLocReg());
3147 CLI.ResultReg = ResultReg;
3148 CLI.NumResultRegs = 1;
3151 return true;
3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155 CallingConv::ID CC = CLI.CallConv;
3156 bool IsTailCall = CLI.IsTailCall;
3157 bool IsVarArg = CLI.IsVarArg;
3158 const Value *Callee = CLI.Callee;
3159 MCSymbol *Symbol = CLI.Symbol;
3161 if (!Callee && !Symbol)
3162 return false;
3164 // Allow SelectionDAG isel to handle tail calls.
3165 if (IsTailCall)
3166 return false;
3168 CodeModel::Model CM = TM.getCodeModel();
3169 // Only support the small-addressing and large code models.
3170 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171 return false;
3173 // FIXME: Add large code model support for ELF.
3174 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175 return false;
3177 // Let SDISel handle vararg functions.
3178 if (IsVarArg)
3179 return false;
3181 // FIXME: Only handle *simple* calls for now.
3182 MVT RetVT;
3183 if (CLI.RetTy->isVoidTy())
3184 RetVT = MVT::isVoid;
3185 else if (!isTypeLegal(CLI.RetTy, RetVT))
3186 return false;
3188 for (auto Flag : CLI.OutFlags)
3189 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190 Flag.isSwiftSelf() || Flag.isSwiftError())
3191 return false;
3193 // Set up the argument vectors.
3194 SmallVector<MVT, 16> OutVTs;
3195 OutVTs.reserve(CLI.OutVals.size());
3197 for (auto *Val : CLI.OutVals) {
3198 MVT VT;
3199 if (!isTypeLegal(Val->getType(), VT) &&
3200 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201 return false;
3203 // We don't handle vector parameters yet.
3204 if (VT.isVector() || VT.getSizeInBits() > 64)
3205 return false;
3207 OutVTs.push_back(VT);
3210 Address Addr;
3211 if (Callee && !computeCallAddress(Callee, Addr))
3212 return false;
3214 // Handle the arguments now that we've gotten them.
3215 unsigned NumBytes;
3216 if (!processCallArgs(CLI, OutVTs, NumBytes))
3217 return false;
3219 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220 if (RegInfo->isAnyArgRegReserved(*MF))
3221 RegInfo->emitReservedArgRegCallError(*MF);
3223 // Issue the call.
3224 MachineInstrBuilder MIB;
3225 if (Subtarget->useSmallAddressing()) {
3226 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3227 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228 if (Symbol)
3229 MIB.addSym(Symbol, 0);
3230 else if (Addr.getGlobalValue())
3231 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232 else if (Addr.getReg()) {
3233 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234 MIB.addReg(Reg);
3235 } else
3236 return false;
3237 } else {
3238 unsigned CallReg = 0;
3239 if (Symbol) {
3240 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242 ADRPReg)
3243 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3245 CallReg = createResultReg(&AArch64::GPR64RegClass);
3246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247 TII.get(AArch64::LDRXui), CallReg)
3248 .addReg(ADRPReg)
3249 .addSym(Symbol,
3250 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3251 } else if (Addr.getGlobalValue())
3252 CallReg = materializeGV(Addr.getGlobalValue());
3253 else if (Addr.getReg())
3254 CallReg = Addr.getReg();
3256 if (!CallReg)
3257 return false;
3259 const MCInstrDesc &II = TII.get(AArch64::BLR);
3260 CallReg = constrainOperandRegClass(II, CallReg, 0);
3261 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3264 // Add implicit physical register uses to the call.
3265 for (auto Reg : CLI.OutRegs)
3266 MIB.addReg(Reg, RegState::Implicit);
3268 // Add a register mask with the call-preserved registers.
3269 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3272 CLI.Call = MIB;
3274 // Finish off the call including any return values.
3275 return finishCall(CLI, RetVT, NumBytes);
3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279 if (Alignment)
3280 return Len / Alignment <= 4;
3281 else
3282 return Len < 32;
3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286 uint64_t Len, unsigned Alignment) {
3287 // Make sure we don't bloat code by inlining very large memcpy's.
3288 if (!isMemCpySmall(Len, Alignment))
3289 return false;
3291 int64_t UnscaledOffset = 0;
3292 Address OrigDest = Dest;
3293 Address OrigSrc = Src;
3295 while (Len) {
3296 MVT VT;
3297 if (!Alignment || Alignment >= 8) {
3298 if (Len >= 8)
3299 VT = MVT::i64;
3300 else if (Len >= 4)
3301 VT = MVT::i32;
3302 else if (Len >= 2)
3303 VT = MVT::i16;
3304 else {
3305 VT = MVT::i8;
3307 } else {
3308 // Bound based on alignment.
3309 if (Len >= 4 && Alignment == 4)
3310 VT = MVT::i32;
3311 else if (Len >= 2 && Alignment == 2)
3312 VT = MVT::i16;
3313 else {
3314 VT = MVT::i8;
3318 unsigned ResultReg = emitLoad(VT, VT, Src);
3319 if (!ResultReg)
3320 return false;
3322 if (!emitStore(VT, ResultReg, Dest))
3323 return false;
3325 int64_t Size = VT.getSizeInBits() / 8;
3326 Len -= Size;
3327 UnscaledOffset += Size;
3329 // We need to recompute the unscaled offset for each iteration.
3330 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3334 return true;
3337 /// Check if it is possible to fold the condition from the XALU intrinsic
3338 /// into the user. The condition code will only be updated on success.
3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340 const Instruction *I,
3341 const Value *Cond) {
3342 if (!isa<ExtractValueInst>(Cond))
3343 return false;
3345 const auto *EV = cast<ExtractValueInst>(Cond);
3346 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347 return false;
3349 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350 MVT RetVT;
3351 const Function *Callee = II->getCalledFunction();
3352 Type *RetTy =
3353 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354 if (!isTypeLegal(RetTy, RetVT))
3355 return false;
3357 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3358 return false;
3360 const Value *LHS = II->getArgOperand(0);
3361 const Value *RHS = II->getArgOperand(1);
3363 // Canonicalize immediate to the RHS.
3364 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3365 isCommutativeIntrinsic(II))
3366 std::swap(LHS, RHS);
3368 // Simplify multiplies.
3369 Intrinsic::ID IID = II->getIntrinsicID();
3370 switch (IID) {
3371 default:
3372 break;
3373 case Intrinsic::smul_with_overflow:
3374 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375 if (C->getValue() == 2)
3376 IID = Intrinsic::sadd_with_overflow;
3377 break;
3378 case Intrinsic::umul_with_overflow:
3379 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380 if (C->getValue() == 2)
3381 IID = Intrinsic::uadd_with_overflow;
3382 break;
3385 AArch64CC::CondCode TmpCC;
3386 switch (IID) {
3387 default:
3388 return false;
3389 case Intrinsic::sadd_with_overflow:
3390 case Intrinsic::ssub_with_overflow:
3391 TmpCC = AArch64CC::VS;
3392 break;
3393 case Intrinsic::uadd_with_overflow:
3394 TmpCC = AArch64CC::HS;
3395 break;
3396 case Intrinsic::usub_with_overflow:
3397 TmpCC = AArch64CC::LO;
3398 break;
3399 case Intrinsic::smul_with_overflow:
3400 case Intrinsic::umul_with_overflow:
3401 TmpCC = AArch64CC::NE;
3402 break;
3405 // Check if both instructions are in the same basic block.
3406 if (!isValueAvailable(II))
3407 return false;
3409 // Make sure nothing is in the way
3410 BasicBlock::const_iterator Start(I);
3411 BasicBlock::const_iterator End(II);
3412 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3413 // We only expect extractvalue instructions between the intrinsic and the
3414 // instruction to be selected.
3415 if (!isa<ExtractValueInst>(Itr))
3416 return false;
3418 // Check that the extractvalue operand comes from the intrinsic.
3419 const auto *EVI = cast<ExtractValueInst>(Itr);
3420 if (EVI->getAggregateOperand() != II)
3421 return false;
3424 CC = TmpCC;
3425 return true;
3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429 // FIXME: Handle more intrinsics.
3430 switch (II->getIntrinsicID()) {
3431 default: return false;
3432 case Intrinsic::frameaddress: {
3433 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434 MFI.setFrameAddressIsTaken(true);
3436 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441 // Recursively load frame address
3442 // ldr x0, [fp]
3443 // ldr x0, [x0]
3444 // ldr x0, [x0]
3445 // ...
3446 unsigned DestReg;
3447 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448 while (Depth--) {
3449 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450 SrcReg, /*IsKill=*/true, 0);
3451 assert(DestReg && "Unexpected LDR instruction emission failure.");
3452 SrcReg = DestReg;
3455 updateValueMap(II, SrcReg);
3456 return true;
3458 case Intrinsic::sponentry: {
3459 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3461 // SP = FP + Fixed Object + 16
3462 int FI = MFI.CreateFixedObject(4, 0, false);
3463 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465 TII.get(AArch64::ADDXri), ResultReg)
3466 .addFrameIndex(FI)
3467 .addImm(0)
3468 .addImm(0);
3470 updateValueMap(II, ResultReg);
3471 return true;
3473 case Intrinsic::memcpy:
3474 case Intrinsic::memmove: {
3475 const auto *MTI = cast<MemTransferInst>(II);
3476 // Don't handle volatile.
3477 if (MTI->isVolatile())
3478 return false;
3480 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3481 // we would emit dead code because we don't currently handle memmoves.
3482 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484 // Small memcpy's are common enough that we want to do them without a call
3485 // if possible.
3486 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487 unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488 MTI->getSourceAlignment());
3489 if (isMemCpySmall(Len, Alignment)) {
3490 Address Dest, Src;
3491 if (!computeAddress(MTI->getRawDest(), Dest) ||
3492 !computeAddress(MTI->getRawSource(), Src))
3493 return false;
3494 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495 return true;
3499 if (!MTI->getLength()->getType()->isIntegerTy(64))
3500 return false;
3502 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503 // Fast instruction selection doesn't support the special
3504 // address spaces.
3505 return false;
3507 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3508 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3510 case Intrinsic::memset: {
3511 const MemSetInst *MSI = cast<MemSetInst>(II);
3512 // Don't handle volatile.
3513 if (MSI->isVolatile())
3514 return false;
3516 if (!MSI->getLength()->getType()->isIntegerTy(64))
3517 return false;
3519 if (MSI->getDestAddressSpace() > 255)
3520 // Fast instruction selection doesn't support the special
3521 // address spaces.
3522 return false;
3524 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3526 case Intrinsic::sin:
3527 case Intrinsic::cos:
3528 case Intrinsic::pow: {
3529 MVT RetVT;
3530 if (!isTypeLegal(II->getType(), RetVT))
3531 return false;
3533 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3534 return false;
3536 static const RTLIB::Libcall LibCallTable[3][2] = {
3537 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538 { RTLIB::COS_F32, RTLIB::COS_F64 },
3539 { RTLIB::POW_F32, RTLIB::POW_F64 }
3541 RTLIB::Libcall LC;
3542 bool Is64Bit = RetVT == MVT::f64;
3543 switch (II->getIntrinsicID()) {
3544 default:
3545 llvm_unreachable("Unexpected intrinsic.");
3546 case Intrinsic::sin:
3547 LC = LibCallTable[0][Is64Bit];
3548 break;
3549 case Intrinsic::cos:
3550 LC = LibCallTable[1][Is64Bit];
3551 break;
3552 case Intrinsic::pow:
3553 LC = LibCallTable[2][Is64Bit];
3554 break;
3557 ArgListTy Args;
3558 Args.reserve(II->getNumArgOperands());
3560 // Populate the argument list.
3561 for (auto &Arg : II->arg_operands()) {
3562 ArgListEntry Entry;
3563 Entry.Val = Arg;
3564 Entry.Ty = Arg->getType();
3565 Args.push_back(Entry);
3568 CallLoweringInfo CLI;
3569 MCContext &Ctx = MF->getContext();
3570 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571 TLI.getLibcallName(LC), std::move(Args));
3572 if (!lowerCallTo(CLI))
3573 return false;
3574 updateValueMap(II, CLI.ResultReg);
3575 return true;
3577 case Intrinsic::fabs: {
3578 MVT VT;
3579 if (!isTypeLegal(II->getType(), VT))
3580 return false;
3582 unsigned Opc;
3583 switch (VT.SimpleTy) {
3584 default:
3585 return false;
3586 case MVT::f32:
3587 Opc = AArch64::FABSSr;
3588 break;
3589 case MVT::f64:
3590 Opc = AArch64::FABSDr;
3591 break;
3593 unsigned SrcReg = getRegForValue(II->getOperand(0));
3594 if (!SrcReg)
3595 return false;
3596 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599 .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600 updateValueMap(II, ResultReg);
3601 return true;
3603 case Intrinsic::trap:
3604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605 .addImm(1);
3606 return true;
3607 case Intrinsic::debugtrap: {
3608 if (Subtarget->isTargetWindows()) {
3609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3610 .addImm(0xF000);
3611 return true;
3613 break;
3616 case Intrinsic::sqrt: {
3617 Type *RetTy = II->getCalledFunction()->getReturnType();
3619 MVT VT;
3620 if (!isTypeLegal(RetTy, VT))
3621 return false;
3623 unsigned Op0Reg = getRegForValue(II->getOperand(0));
3624 if (!Op0Reg)
3625 return false;
3626 bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3628 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3629 if (!ResultReg)
3630 return false;
3632 updateValueMap(II, ResultReg);
3633 return true;
3635 case Intrinsic::sadd_with_overflow:
3636 case Intrinsic::uadd_with_overflow:
3637 case Intrinsic::ssub_with_overflow:
3638 case Intrinsic::usub_with_overflow:
3639 case Intrinsic::smul_with_overflow:
3640 case Intrinsic::umul_with_overflow: {
3641 // This implements the basic lowering of the xalu with overflow intrinsics.
3642 const Function *Callee = II->getCalledFunction();
3643 auto *Ty = cast<StructType>(Callee->getReturnType());
3644 Type *RetTy = Ty->getTypeAtIndex(0U);
3646 MVT VT;
3647 if (!isTypeLegal(RetTy, VT))
3648 return false;
3650 if (VT != MVT::i32 && VT != MVT::i64)
3651 return false;
3653 const Value *LHS = II->getArgOperand(0);
3654 const Value *RHS = II->getArgOperand(1);
3655 // Canonicalize immediate to the RHS.
3656 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3657 isCommutativeIntrinsic(II))
3658 std::swap(LHS, RHS);
3660 // Simplify multiplies.
3661 Intrinsic::ID IID = II->getIntrinsicID();
3662 switch (IID) {
3663 default:
3664 break;
3665 case Intrinsic::smul_with_overflow:
3666 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3667 if (C->getValue() == 2) {
3668 IID = Intrinsic::sadd_with_overflow;
3669 RHS = LHS;
3671 break;
3672 case Intrinsic::umul_with_overflow:
3673 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674 if (C->getValue() == 2) {
3675 IID = Intrinsic::uadd_with_overflow;
3676 RHS = LHS;
3678 break;
3681 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3682 AArch64CC::CondCode CC = AArch64CC::Invalid;
3683 switch (IID) {
3684 default: llvm_unreachable("Unexpected intrinsic!");
3685 case Intrinsic::sadd_with_overflow:
3686 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687 CC = AArch64CC::VS;
3688 break;
3689 case Intrinsic::uadd_with_overflow:
3690 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3691 CC = AArch64CC::HS;
3692 break;
3693 case Intrinsic::ssub_with_overflow:
3694 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695 CC = AArch64CC::VS;
3696 break;
3697 case Intrinsic::usub_with_overflow:
3698 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3699 CC = AArch64CC::LO;
3700 break;
3701 case Intrinsic::smul_with_overflow: {
3702 CC = AArch64CC::NE;
3703 unsigned LHSReg = getRegForValue(LHS);
3704 if (!LHSReg)
3705 return false;
3706 bool LHSIsKill = hasTrivialKill(LHS);
3708 unsigned RHSReg = getRegForValue(RHS);
3709 if (!RHSReg)
3710 return false;
3711 bool RHSIsKill = hasTrivialKill(RHS);
3713 if (VT == MVT::i32) {
3714 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3716 /*IsKill=*/false, 32);
3717 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718 AArch64::sub_32);
3719 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3720 AArch64::sub_32);
3721 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3722 AArch64_AM::ASR, 31, /*WantResult=*/false);
3723 } else {
3724 assert(VT == MVT::i64 && "Unexpected value type.");
3725 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726 // reused in the next instruction.
3727 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3728 /*IsKill=*/false);
3729 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3730 RHSReg, RHSIsKill);
3731 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3732 AArch64_AM::ASR, 63, /*WantResult=*/false);
3734 break;
3736 case Intrinsic::umul_with_overflow: {
3737 CC = AArch64CC::NE;
3738 unsigned LHSReg = getRegForValue(LHS);
3739 if (!LHSReg)
3740 return false;
3741 bool LHSIsKill = hasTrivialKill(LHS);
3743 unsigned RHSReg = getRegForValue(RHS);
3744 if (!RHSReg)
3745 return false;
3746 bool RHSIsKill = hasTrivialKill(RHS);
3748 if (VT == MVT::i32) {
3749 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3750 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3751 /*IsKill=*/false, AArch64_AM::LSR, 32,
3752 /*WantResult=*/false);
3753 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3754 AArch64::sub_32);
3755 } else {
3756 assert(VT == MVT::i64 && "Unexpected value type.");
3757 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758 // reused in the next instruction.
3759 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3760 /*IsKill=*/false);
3761 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3762 RHSReg, RHSIsKill);
3763 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3764 /*IsKill=*/false, /*WantResult=*/false);
3766 break;
3770 if (MulReg) {
3771 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3773 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776 if (!ResultReg1)
3777 return false;
3779 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780 AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3781 /*IsKill=*/true, getInvertedCondCode(CC));
3782 (void)ResultReg2;
3783 assert((ResultReg1 + 1) == ResultReg2 &&
3784 "Nonconsecutive result registers.");
3785 updateValueMap(II, ResultReg1, 2);
3786 return true;
3789 return false;
3792 bool AArch64FastISel::selectRet(const Instruction *I) {
3793 const ReturnInst *Ret = cast<ReturnInst>(I);
3794 const Function &F = *I->getParent()->getParent();
3796 if (!FuncInfo.CanLowerReturn)
3797 return false;
3799 if (F.isVarArg())
3800 return false;
3802 if (TLI.supportSwiftError() &&
3803 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3804 return false;
3806 if (TLI.supportSplitCSR(FuncInfo.MF))
3807 return false;
3809 // Build a list of return value registers.
3810 SmallVector<unsigned, 4> RetRegs;
3812 if (Ret->getNumOperands() > 0) {
3813 CallingConv::ID CC = F.getCallingConv();
3814 SmallVector<ISD::OutputArg, 4> Outs;
3815 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3817 // Analyze operands of the call, assigning locations to each operand.
3818 SmallVector<CCValAssign, 16> ValLocs;
3819 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3820 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3821 : RetCC_AArch64_AAPCS;
3822 CCInfo.AnalyzeReturn(Outs, RetCC);
3824 // Only handle a single return value for now.
3825 if (ValLocs.size() != 1)
3826 return false;
3828 CCValAssign &VA = ValLocs[0];
3829 const Value *RV = Ret->getOperand(0);
3831 // Don't bother handling odd stuff for now.
3832 if ((VA.getLocInfo() != CCValAssign::Full) &&
3833 (VA.getLocInfo() != CCValAssign::BCvt))
3834 return false;
3836 // Only handle register returns for now.
3837 if (!VA.isRegLoc())
3838 return false;
3840 unsigned Reg = getRegForValue(RV);
3841 if (Reg == 0)
3842 return false;
3844 unsigned SrcReg = Reg + VA.getValNo();
3845 Register DestReg = VA.getLocReg();
3846 // Avoid a cross-class copy. This is very unlikely.
3847 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3848 return false;
3850 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3851 if (!RVEVT.isSimple())
3852 return false;
3854 // Vectors (of > 1 lane) in big endian need tricky handling.
3855 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3856 !Subtarget->isLittleEndian())
3857 return false;
3859 MVT RVVT = RVEVT.getSimpleVT();
3860 if (RVVT == MVT::f128)
3861 return false;
3863 MVT DestVT = VA.getValVT();
3864 // Special handling for extended integers.
3865 if (RVVT != DestVT) {
3866 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3867 return false;
3869 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3870 return false;
3872 bool IsZExt = Outs[0].Flags.isZExt();
3873 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3874 if (SrcReg == 0)
3875 return false;
3878 // Make the copy.
3879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3880 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3882 // Add register to return instruction.
3883 RetRegs.push_back(VA.getLocReg());
3886 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3887 TII.get(AArch64::RET_ReallyLR));
3888 for (unsigned RetReg : RetRegs)
3889 MIB.addReg(RetReg, RegState::Implicit);
3890 return true;
3893 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3894 Type *DestTy = I->getType();
3895 Value *Op = I->getOperand(0);
3896 Type *SrcTy = Op->getType();
3898 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3899 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3900 if (!SrcEVT.isSimple())
3901 return false;
3902 if (!DestEVT.isSimple())
3903 return false;
3905 MVT SrcVT = SrcEVT.getSimpleVT();
3906 MVT DestVT = DestEVT.getSimpleVT();
3908 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3909 SrcVT != MVT::i8)
3910 return false;
3911 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3912 DestVT != MVT::i1)
3913 return false;
3915 unsigned SrcReg = getRegForValue(Op);
3916 if (!SrcReg)
3917 return false;
3918 bool SrcIsKill = hasTrivialKill(Op);
3920 // If we're truncating from i64 to a smaller non-legal type then generate an
3921 // AND. Otherwise, we know the high bits are undefined and a truncate only
3922 // generate a COPY. We cannot mark the source register also as result
3923 // register, because this can incorrectly transfer the kill flag onto the
3924 // source register.
3925 unsigned ResultReg;
3926 if (SrcVT == MVT::i64) {
3927 uint64_t Mask = 0;
3928 switch (DestVT.SimpleTy) {
3929 default:
3930 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3931 return false;
3932 case MVT::i1:
3933 Mask = 0x1;
3934 break;
3935 case MVT::i8:
3936 Mask = 0xff;
3937 break;
3938 case MVT::i16:
3939 Mask = 0xffff;
3940 break;
3942 // Issue an extract_subreg to get the lower 32-bits.
3943 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3944 AArch64::sub_32);
3945 // Create the AND instruction which performs the actual truncation.
3946 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3947 assert(ResultReg && "Unexpected AND instruction emission failure.");
3948 } else {
3949 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3950 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3951 TII.get(TargetOpcode::COPY), ResultReg)
3952 .addReg(SrcReg, getKillRegState(SrcIsKill));
3955 updateValueMap(I, ResultReg);
3956 return true;
3959 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3960 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3961 DestVT == MVT::i64) &&
3962 "Unexpected value type.");
3963 // Handle i8 and i16 as i32.
3964 if (DestVT == MVT::i8 || DestVT == MVT::i16)
3965 DestVT = MVT::i32;
3967 if (IsZExt) {
3968 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3969 assert(ResultReg && "Unexpected AND instruction emission failure.");
3970 if (DestVT == MVT::i64) {
3971 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3972 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3973 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3974 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3975 TII.get(AArch64::SUBREG_TO_REG), Reg64)
3976 .addImm(0)
3977 .addReg(ResultReg)
3978 .addImm(AArch64::sub_32);
3979 ResultReg = Reg64;
3981 return ResultReg;
3982 } else {
3983 if (DestVT == MVT::i64) {
3984 // FIXME: We're SExt i1 to i64.
3985 return 0;
3987 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3988 /*TODO:IsKill=*/false, 0, 0);
3992 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3993 unsigned Op1, bool Op1IsKill) {
3994 unsigned Opc, ZReg;
3995 switch (RetVT.SimpleTy) {
3996 default: return 0;
3997 case MVT::i8:
3998 case MVT::i16:
3999 case MVT::i32:
4000 RetVT = MVT::i32;
4001 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4002 case MVT::i64:
4003 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4006 const TargetRegisterClass *RC =
4007 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4008 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4009 /*IsKill=*/ZReg, true);
4012 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4013 unsigned Op1, bool Op1IsKill) {
4014 if (RetVT != MVT::i64)
4015 return 0;
4017 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4018 Op0, Op0IsKill, Op1, Op1IsKill,
4019 AArch64::XZR, /*IsKill=*/true);
4022 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4023 unsigned Op1, bool Op1IsKill) {
4024 if (RetVT != MVT::i64)
4025 return 0;
4027 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028 Op0, Op0IsKill, Op1, Op1IsKill,
4029 AArch64::XZR, /*IsKill=*/true);
4032 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4033 unsigned Op1Reg, bool Op1IsKill) {
4034 unsigned Opc = 0;
4035 bool NeedTrunc = false;
4036 uint64_t Mask = 0;
4037 switch (RetVT.SimpleTy) {
4038 default: return 0;
4039 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4040 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4041 case MVT::i32: Opc = AArch64::LSLVWr; break;
4042 case MVT::i64: Opc = AArch64::LSLVXr; break;
4045 const TargetRegisterClass *RC =
4046 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4047 if (NeedTrunc) {
4048 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4049 Op1IsKill = true;
4051 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4052 Op1IsKill);
4053 if (NeedTrunc)
4054 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4055 return ResultReg;
4058 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4059 bool Op0IsKill, uint64_t Shift,
4060 bool IsZExt) {
4061 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4062 "Unexpected source/return type pair.");
4063 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4064 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4065 "Unexpected source value type.");
4066 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4067 RetVT == MVT::i64) && "Unexpected return value type.");
4069 bool Is64Bit = (RetVT == MVT::i64);
4070 unsigned RegSize = Is64Bit ? 64 : 32;
4071 unsigned DstBits = RetVT.getSizeInBits();
4072 unsigned SrcBits = SrcVT.getSizeInBits();
4073 const TargetRegisterClass *RC =
4074 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4076 // Just emit a copy for "zero" shifts.
4077 if (Shift == 0) {
4078 if (RetVT == SrcVT) {
4079 unsigned ResultReg = createResultReg(RC);
4080 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4081 TII.get(TargetOpcode::COPY), ResultReg)
4082 .addReg(Op0, getKillRegState(Op0IsKill));
4083 return ResultReg;
4084 } else
4085 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4088 // Don't deal with undefined shifts.
4089 if (Shift >= DstBits)
4090 return 0;
4092 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4093 // {S|U}BFM Wd, Wn, #r, #s
4094 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4096 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4097 // %2 = shl i16 %1, 4
4098 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4099 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4100 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4101 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4103 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4104 // %2 = shl i16 %1, 8
4105 // Wd<32+7-24,32-24> = Wn<7:0>
4106 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4107 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4108 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4110 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4111 // %2 = shl i16 %1, 12
4112 // Wd<32+3-20,32-20> = Wn<3:0>
4113 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4114 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4115 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4117 unsigned ImmR = RegSize - Shift;
4118 // Limit the width to the length of the source type.
4119 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4120 static const unsigned OpcTable[2][2] = {
4121 {AArch64::SBFMWri, AArch64::SBFMXri},
4122 {AArch64::UBFMWri, AArch64::UBFMXri}
4124 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4125 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4126 Register TmpReg = MRI.createVirtualRegister(RC);
4127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4128 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4129 .addImm(0)
4130 .addReg(Op0, getKillRegState(Op0IsKill))
4131 .addImm(AArch64::sub_32);
4132 Op0 = TmpReg;
4133 Op0IsKill = true;
4135 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4138 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4139 unsigned Op1Reg, bool Op1IsKill) {
4140 unsigned Opc = 0;
4141 bool NeedTrunc = false;
4142 uint64_t Mask = 0;
4143 switch (RetVT.SimpleTy) {
4144 default: return 0;
4145 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4146 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4147 case MVT::i32: Opc = AArch64::LSRVWr; break;
4148 case MVT::i64: Opc = AArch64::LSRVXr; break;
4151 const TargetRegisterClass *RC =
4152 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4153 if (NeedTrunc) {
4154 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4155 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4156 Op0IsKill = Op1IsKill = true;
4158 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4159 Op1IsKill);
4160 if (NeedTrunc)
4161 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4162 return ResultReg;
4165 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4166 bool Op0IsKill, uint64_t Shift,
4167 bool IsZExt) {
4168 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4169 "Unexpected source/return type pair.");
4170 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4171 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4172 "Unexpected source value type.");
4173 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4174 RetVT == MVT::i64) && "Unexpected return value type.");
4176 bool Is64Bit = (RetVT == MVT::i64);
4177 unsigned RegSize = Is64Bit ? 64 : 32;
4178 unsigned DstBits = RetVT.getSizeInBits();
4179 unsigned SrcBits = SrcVT.getSizeInBits();
4180 const TargetRegisterClass *RC =
4181 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4183 // Just emit a copy for "zero" shifts.
4184 if (Shift == 0) {
4185 if (RetVT == SrcVT) {
4186 unsigned ResultReg = createResultReg(RC);
4187 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188 TII.get(TargetOpcode::COPY), ResultReg)
4189 .addReg(Op0, getKillRegState(Op0IsKill));
4190 return ResultReg;
4191 } else
4192 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4195 // Don't deal with undefined shifts.
4196 if (Shift >= DstBits)
4197 return 0;
4199 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4200 // {S|U}BFM Wd, Wn, #r, #s
4201 // Wd<s-r:0> = Wn<s:r> when r <= s
4203 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4204 // %2 = lshr i16 %1, 4
4205 // Wd<7-4:0> = Wn<7:4>
4206 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4207 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4208 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4210 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4211 // %2 = lshr i16 %1, 8
4212 // Wd<7-7,0> = Wn<7:7>
4213 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4214 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4215 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4217 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4218 // %2 = lshr i16 %1, 12
4219 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4220 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4221 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4222 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4224 if (Shift >= SrcBits && IsZExt)
4225 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4227 // It is not possible to fold a sign-extend into the LShr instruction. In this
4228 // case emit a sign-extend.
4229 if (!IsZExt) {
4230 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231 if (!Op0)
4232 return 0;
4233 Op0IsKill = true;
4234 SrcVT = RetVT;
4235 SrcBits = SrcVT.getSizeInBits();
4236 IsZExt = true;
4239 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4240 unsigned ImmS = SrcBits - 1;
4241 static const unsigned OpcTable[2][2] = {
4242 {AArch64::SBFMWri, AArch64::SBFMXri},
4243 {AArch64::UBFMWri, AArch64::UBFMXri}
4245 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4246 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4247 Register TmpReg = MRI.createVirtualRegister(RC);
4248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4249 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4250 .addImm(0)
4251 .addReg(Op0, getKillRegState(Op0IsKill))
4252 .addImm(AArch64::sub_32);
4253 Op0 = TmpReg;
4254 Op0IsKill = true;
4256 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4259 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4260 unsigned Op1Reg, bool Op1IsKill) {
4261 unsigned Opc = 0;
4262 bool NeedTrunc = false;
4263 uint64_t Mask = 0;
4264 switch (RetVT.SimpleTy) {
4265 default: return 0;
4266 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4267 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4268 case MVT::i32: Opc = AArch64::ASRVWr; break;
4269 case MVT::i64: Opc = AArch64::ASRVXr; break;
4272 const TargetRegisterClass *RC =
4273 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4274 if (NeedTrunc) {
4275 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4276 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4277 Op0IsKill = Op1IsKill = true;
4279 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4280 Op1IsKill);
4281 if (NeedTrunc)
4282 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4283 return ResultReg;
4286 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4287 bool Op0IsKill, uint64_t Shift,
4288 bool IsZExt) {
4289 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4290 "Unexpected source/return type pair.");
4291 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4292 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4293 "Unexpected source value type.");
4294 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4295 RetVT == MVT::i64) && "Unexpected return value type.");
4297 bool Is64Bit = (RetVT == MVT::i64);
4298 unsigned RegSize = Is64Bit ? 64 : 32;
4299 unsigned DstBits = RetVT.getSizeInBits();
4300 unsigned SrcBits = SrcVT.getSizeInBits();
4301 const TargetRegisterClass *RC =
4302 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4304 // Just emit a copy for "zero" shifts.
4305 if (Shift == 0) {
4306 if (RetVT == SrcVT) {
4307 unsigned ResultReg = createResultReg(RC);
4308 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4309 TII.get(TargetOpcode::COPY), ResultReg)
4310 .addReg(Op0, getKillRegState(Op0IsKill));
4311 return ResultReg;
4312 } else
4313 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4316 // Don't deal with undefined shifts.
4317 if (Shift >= DstBits)
4318 return 0;
4320 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4321 // {S|U}BFM Wd, Wn, #r, #s
4322 // Wd<s-r:0> = Wn<s:r> when r <= s
4324 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4325 // %2 = ashr i16 %1, 4
4326 // Wd<7-4:0> = Wn<7:4>
4327 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4328 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4329 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4331 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4332 // %2 = ashr i16 %1, 8
4333 // Wd<7-7,0> = Wn<7:7>
4334 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4335 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4336 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4338 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4339 // %2 = ashr i16 %1, 12
4340 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4341 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4342 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4343 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4345 if (Shift >= SrcBits && IsZExt)
4346 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4348 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4349 unsigned ImmS = SrcBits - 1;
4350 static const unsigned OpcTable[2][2] = {
4351 {AArch64::SBFMWri, AArch64::SBFMXri},
4352 {AArch64::UBFMWri, AArch64::UBFMXri}
4354 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4355 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4356 Register TmpReg = MRI.createVirtualRegister(RC);
4357 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4359 .addImm(0)
4360 .addReg(Op0, getKillRegState(Op0IsKill))
4361 .addImm(AArch64::sub_32);
4362 Op0 = TmpReg;
4363 Op0IsKill = true;
4365 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4368 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4369 bool IsZExt) {
4370 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4372 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4373 // DestVT are odd things, so test to make sure that they are both types we can
4374 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4375 // bail out to SelectionDAG.
4376 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4377 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4378 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4379 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4380 return 0;
4382 unsigned Opc;
4383 unsigned Imm = 0;
4385 switch (SrcVT.SimpleTy) {
4386 default:
4387 return 0;
4388 case MVT::i1:
4389 return emiti1Ext(SrcReg, DestVT, IsZExt);
4390 case MVT::i8:
4391 if (DestVT == MVT::i64)
4392 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393 else
4394 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4395 Imm = 7;
4396 break;
4397 case MVT::i16:
4398 if (DestVT == MVT::i64)
4399 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4400 else
4401 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4402 Imm = 15;
4403 break;
4404 case MVT::i32:
4405 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4406 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4407 Imm = 31;
4408 break;
4411 // Handle i8 and i16 as i32.
4412 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4413 DestVT = MVT::i32;
4414 else if (DestVT == MVT::i64) {
4415 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4417 TII.get(AArch64::SUBREG_TO_REG), Src64)
4418 .addImm(0)
4419 .addReg(SrcReg)
4420 .addImm(AArch64::sub_32);
4421 SrcReg = Src64;
4424 const TargetRegisterClass *RC =
4425 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4426 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4429 static bool isZExtLoad(const MachineInstr *LI) {
4430 switch (LI->getOpcode()) {
4431 default:
4432 return false;
4433 case AArch64::LDURBBi:
4434 case AArch64::LDURHHi:
4435 case AArch64::LDURWi:
4436 case AArch64::LDRBBui:
4437 case AArch64::LDRHHui:
4438 case AArch64::LDRWui:
4439 case AArch64::LDRBBroX:
4440 case AArch64::LDRHHroX:
4441 case AArch64::LDRWroX:
4442 case AArch64::LDRBBroW:
4443 case AArch64::LDRHHroW:
4444 case AArch64::LDRWroW:
4445 return true;
4449 static bool isSExtLoad(const MachineInstr *LI) {
4450 switch (LI->getOpcode()) {
4451 default:
4452 return false;
4453 case AArch64::LDURSBWi:
4454 case AArch64::LDURSHWi:
4455 case AArch64::LDURSBXi:
4456 case AArch64::LDURSHXi:
4457 case AArch64::LDURSWi:
4458 case AArch64::LDRSBWui:
4459 case AArch64::LDRSHWui:
4460 case AArch64::LDRSBXui:
4461 case AArch64::LDRSHXui:
4462 case AArch64::LDRSWui:
4463 case AArch64::LDRSBWroX:
4464 case AArch64::LDRSHWroX:
4465 case AArch64::LDRSBXroX:
4466 case AArch64::LDRSHXroX:
4467 case AArch64::LDRSWroX:
4468 case AArch64::LDRSBWroW:
4469 case AArch64::LDRSHWroW:
4470 case AArch64::LDRSBXroW:
4471 case AArch64::LDRSHXroW:
4472 case AArch64::LDRSWroW:
4473 return true;
4477 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4478 MVT SrcVT) {
4479 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4480 if (!LI || !LI->hasOneUse())
4481 return false;
4483 // Check if the load instruction has already been selected.
4484 unsigned Reg = lookUpRegForValue(LI);
4485 if (!Reg)
4486 return false;
4488 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4489 if (!MI)
4490 return false;
4492 // Check if the correct load instruction has been emitted - SelectionDAG might
4493 // have emitted a zero-extending load, but we need a sign-extending load.
4494 bool IsZExt = isa<ZExtInst>(I);
4495 const auto *LoadMI = MI;
4496 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4497 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4498 Register LoadReg = MI->getOperand(1).getReg();
4499 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4500 assert(LoadMI && "Expected valid instruction");
4502 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4503 return false;
4505 // Nothing to be done.
4506 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4507 updateValueMap(I, Reg);
4508 return true;
4511 if (IsZExt) {
4512 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4514 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4515 .addImm(0)
4516 .addReg(Reg, getKillRegState(true))
4517 .addImm(AArch64::sub_32);
4518 Reg = Reg64;
4519 } else {
4520 assert((MI->getOpcode() == TargetOpcode::COPY &&
4521 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4522 "Expected copy instruction");
4523 Reg = MI->getOperand(1).getReg();
4524 MachineBasicBlock::iterator I(MI);
4525 removeDeadCode(I, std::next(I));
4527 updateValueMap(I, Reg);
4528 return true;
4531 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4532 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4533 "Unexpected integer extend instruction.");
4534 MVT RetVT;
4535 MVT SrcVT;
4536 if (!isTypeSupported(I->getType(), RetVT))
4537 return false;
4539 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4540 return false;
4542 // Try to optimize already sign-/zero-extended values from load instructions.
4543 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4544 return true;
4546 unsigned SrcReg = getRegForValue(I->getOperand(0));
4547 if (!SrcReg)
4548 return false;
4549 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4551 // Try to optimize already sign-/zero-extended values from function arguments.
4552 bool IsZExt = isa<ZExtInst>(I);
4553 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4554 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4555 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4556 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4559 .addImm(0)
4560 .addReg(SrcReg, getKillRegState(SrcIsKill))
4561 .addImm(AArch64::sub_32);
4562 SrcReg = ResultReg;
4564 // Conservatively clear all kill flags from all uses, because we are
4565 // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4566 // level. The result of the instruction at IR level might have been
4567 // trivially dead, which is now not longer true.
4568 unsigned UseReg = lookUpRegForValue(I);
4569 if (UseReg)
4570 MRI.clearKillFlags(UseReg);
4572 updateValueMap(I, SrcReg);
4573 return true;
4577 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4578 if (!ResultReg)
4579 return false;
4581 updateValueMap(I, ResultReg);
4582 return true;
4585 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4586 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4587 if (!DestEVT.isSimple())
4588 return false;
4590 MVT DestVT = DestEVT.getSimpleVT();
4591 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4592 return false;
4594 unsigned DivOpc;
4595 bool Is64bit = (DestVT == MVT::i64);
4596 switch (ISDOpcode) {
4597 default:
4598 return false;
4599 case ISD::SREM:
4600 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4601 break;
4602 case ISD::UREM:
4603 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4604 break;
4606 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4607 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4608 if (!Src0Reg)
4609 return false;
4610 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4612 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4613 if (!Src1Reg)
4614 return false;
4615 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4617 const TargetRegisterClass *RC =
4618 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4619 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4620 Src1Reg, /*IsKill=*/false);
4621 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4622 // The remainder is computed as numerator - (quotient * denominator) using the
4623 // MSUB instruction.
4624 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4625 Src1Reg, Src1IsKill, Src0Reg,
4626 Src0IsKill);
4627 updateValueMap(I, ResultReg);
4628 return true;
4631 bool AArch64FastISel::selectMul(const Instruction *I) {
4632 MVT VT;
4633 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4634 return false;
4636 if (VT.isVector())
4637 return selectBinaryOp(I, ISD::MUL);
4639 const Value *Src0 = I->getOperand(0);
4640 const Value *Src1 = I->getOperand(1);
4641 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4642 if (C->getValue().isPowerOf2())
4643 std::swap(Src0, Src1);
4645 // Try to simplify to a shift instruction.
4646 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4647 if (C->getValue().isPowerOf2()) {
4648 uint64_t ShiftVal = C->getValue().logBase2();
4649 MVT SrcVT = VT;
4650 bool IsZExt = true;
4651 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4652 if (!isIntExtFree(ZExt)) {
4653 MVT VT;
4654 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4655 SrcVT = VT;
4656 IsZExt = true;
4657 Src0 = ZExt->getOperand(0);
4660 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4661 if (!isIntExtFree(SExt)) {
4662 MVT VT;
4663 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4664 SrcVT = VT;
4665 IsZExt = false;
4666 Src0 = SExt->getOperand(0);
4671 unsigned Src0Reg = getRegForValue(Src0);
4672 if (!Src0Reg)
4673 return false;
4674 bool Src0IsKill = hasTrivialKill(Src0);
4676 unsigned ResultReg =
4677 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4679 if (ResultReg) {
4680 updateValueMap(I, ResultReg);
4681 return true;
4685 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4686 if (!Src0Reg)
4687 return false;
4688 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4690 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4691 if (!Src1Reg)
4692 return false;
4693 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4695 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4697 if (!ResultReg)
4698 return false;
4700 updateValueMap(I, ResultReg);
4701 return true;
4704 bool AArch64FastISel::selectShift(const Instruction *I) {
4705 MVT RetVT;
4706 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4707 return false;
4709 if (RetVT.isVector())
4710 return selectOperator(I, I->getOpcode());
4712 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4713 unsigned ResultReg = 0;
4714 uint64_t ShiftVal = C->getZExtValue();
4715 MVT SrcVT = RetVT;
4716 bool IsZExt = I->getOpcode() != Instruction::AShr;
4717 const Value *Op0 = I->getOperand(0);
4718 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4719 if (!isIntExtFree(ZExt)) {
4720 MVT TmpVT;
4721 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4722 SrcVT = TmpVT;
4723 IsZExt = true;
4724 Op0 = ZExt->getOperand(0);
4727 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4728 if (!isIntExtFree(SExt)) {
4729 MVT TmpVT;
4730 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4731 SrcVT = TmpVT;
4732 IsZExt = false;
4733 Op0 = SExt->getOperand(0);
4738 unsigned Op0Reg = getRegForValue(Op0);
4739 if (!Op0Reg)
4740 return false;
4741 bool Op0IsKill = hasTrivialKill(Op0);
4743 switch (I->getOpcode()) {
4744 default: llvm_unreachable("Unexpected instruction.");
4745 case Instruction::Shl:
4746 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4747 break;
4748 case Instruction::AShr:
4749 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4750 break;
4751 case Instruction::LShr:
4752 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4753 break;
4755 if (!ResultReg)
4756 return false;
4758 updateValueMap(I, ResultReg);
4759 return true;
4762 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4763 if (!Op0Reg)
4764 return false;
4765 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4767 unsigned Op1Reg = getRegForValue(I->getOperand(1));
4768 if (!Op1Reg)
4769 return false;
4770 bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4772 unsigned ResultReg = 0;
4773 switch (I->getOpcode()) {
4774 default: llvm_unreachable("Unexpected instruction.");
4775 case Instruction::Shl:
4776 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4777 break;
4778 case Instruction::AShr:
4779 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4780 break;
4781 case Instruction::LShr:
4782 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4783 break;
4786 if (!ResultReg)
4787 return false;
4789 updateValueMap(I, ResultReg);
4790 return true;
4793 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4794 MVT RetVT, SrcVT;
4796 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4797 return false;
4798 if (!isTypeLegal(I->getType(), RetVT))
4799 return false;
4801 unsigned Opc;
4802 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4803 Opc = AArch64::FMOVWSr;
4804 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4805 Opc = AArch64::FMOVXDr;
4806 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4807 Opc = AArch64::FMOVSWr;
4808 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4809 Opc = AArch64::FMOVDXr;
4810 else
4811 return false;
4813 const TargetRegisterClass *RC = nullptr;
4814 switch (RetVT.SimpleTy) {
4815 default: llvm_unreachable("Unexpected value type.");
4816 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4817 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4818 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4819 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4821 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4822 if (!Op0Reg)
4823 return false;
4824 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4825 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4827 if (!ResultReg)
4828 return false;
4830 updateValueMap(I, ResultReg);
4831 return true;
4834 bool AArch64FastISel::selectFRem(const Instruction *I) {
4835 MVT RetVT;
4836 if (!isTypeLegal(I->getType(), RetVT))
4837 return false;
4839 RTLIB::Libcall LC;
4840 switch (RetVT.SimpleTy) {
4841 default:
4842 return false;
4843 case MVT::f32:
4844 LC = RTLIB::REM_F32;
4845 break;
4846 case MVT::f64:
4847 LC = RTLIB::REM_F64;
4848 break;
4851 ArgListTy Args;
4852 Args.reserve(I->getNumOperands());
4854 // Populate the argument list.
4855 for (auto &Arg : I->operands()) {
4856 ArgListEntry Entry;
4857 Entry.Val = Arg;
4858 Entry.Ty = Arg->getType();
4859 Args.push_back(Entry);
4862 CallLoweringInfo CLI;
4863 MCContext &Ctx = MF->getContext();
4864 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4865 TLI.getLibcallName(LC), std::move(Args));
4866 if (!lowerCallTo(CLI))
4867 return false;
4868 updateValueMap(I, CLI.ResultReg);
4869 return true;
4872 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873 MVT VT;
4874 if (!isTypeLegal(I->getType(), VT))
4875 return false;
4877 if (!isa<ConstantInt>(I->getOperand(1)))
4878 return selectBinaryOp(I, ISD::SDIV);
4880 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4882 !(C.isPowerOf2() || (-C).isPowerOf2()))
4883 return selectBinaryOp(I, ISD::SDIV);
4885 unsigned Lg2 = C.countTrailingZeros();
4886 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4887 if (!Src0Reg)
4888 return false;
4889 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4891 if (cast<BinaryOperator>(I)->isExact()) {
4892 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4893 if (!ResultReg)
4894 return false;
4895 updateValueMap(I, ResultReg);
4896 return true;
4899 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4900 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4901 if (!AddReg)
4902 return false;
4904 // (Src0 < 0) ? Pow2 - 1 : 0;
4905 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4906 return false;
4908 unsigned SelectOpc;
4909 const TargetRegisterClass *RC;
4910 if (VT == MVT::i64) {
4911 SelectOpc = AArch64::CSELXr;
4912 RC = &AArch64::GPR64RegClass;
4913 } else {
4914 SelectOpc = AArch64::CSELWr;
4915 RC = &AArch64::GPR32RegClass;
4917 unsigned SelectReg =
4918 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4919 Src0IsKill, AArch64CC::LT);
4920 if (!SelectReg)
4921 return false;
4923 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924 // negate the result.
4925 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926 unsigned ResultReg;
4927 if (C.isNegative())
4928 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4929 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4930 else
4931 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4933 if (!ResultReg)
4934 return false;
4936 updateValueMap(I, ResultReg);
4937 return true;
4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941 /// have to duplicate it for AArch64, because otherwise we would fail during the
4942 /// sign-extend emission.
4943 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944 unsigned IdxN = getRegForValue(Idx);
4945 if (IdxN == 0)
4946 // Unhandled operand. Halt "fast" selection and bail.
4947 return std::pair<unsigned, bool>(0, false);
4949 bool IdxNIsKill = hasTrivialKill(Idx);
4951 // If the index is smaller or larger than intptr_t, truncate or extend it.
4952 MVT PtrVT = TLI.getPointerTy(DL);
4953 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4954 if (IdxVT.bitsLT(PtrVT)) {
4955 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4956 IdxNIsKill = true;
4957 } else if (IdxVT.bitsGT(PtrVT))
4958 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4959 return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4962 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4963 /// duplicate it for AArch64, because otherwise we would bail out even for
4964 /// simple cases. This is because the standard fastEmit functions don't cover
4965 /// MUL at all and ADD is lowered very inefficientily.
4966 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967 unsigned N = getRegForValue(I->getOperand(0));
4968 if (!N)
4969 return false;
4970 bool NIsKill = hasTrivialKill(I->getOperand(0));
4972 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4973 // into a single N = N + TotalOffset.
4974 uint64_t TotalOffs = 0;
4975 MVT VT = TLI.getPointerTy(DL);
4976 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4977 GTI != E; ++GTI) {
4978 const Value *Idx = GTI.getOperand();
4979 if (auto *StTy = GTI.getStructTypeOrNull()) {
4980 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4981 // N = N + Offset
4982 if (Field)
4983 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4984 } else {
4985 Type *Ty = GTI.getIndexedType();
4987 // If this is a constant subscript, handle it quickly.
4988 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989 if (CI->isZero())
4990 continue;
4991 // N = N + Offset
4992 TotalOffs +=
4993 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4994 continue;
4996 if (TotalOffs) {
4997 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4998 if (!N)
4999 return false;
5000 NIsKill = true;
5001 TotalOffs = 0;
5004 // N = N + Idx * ElementSize;
5005 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5006 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5007 unsigned IdxN = Pair.first;
5008 bool IdxNIsKill = Pair.second;
5009 if (!IdxN)
5010 return false;
5012 if (ElementSize != 1) {
5013 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5014 if (!C)
5015 return false;
5016 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5017 if (!IdxN)
5018 return false;
5019 IdxNIsKill = true;
5021 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5022 if (!N)
5023 return false;
5026 if (TotalOffs) {
5027 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5028 if (!N)
5029 return false;
5031 updateValueMap(I, N);
5032 return true;
5035 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5036 assert(TM.getOptLevel() == CodeGenOpt::None &&
5037 "cmpxchg survived AtomicExpand at optlevel > -O0");
5039 auto *RetPairTy = cast<StructType>(I->getType());
5040 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5041 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5042 "cmpxchg has a non-i1 status result");
5044 MVT VT;
5045 if (!isTypeLegal(RetTy, VT))
5046 return false;
5048 const TargetRegisterClass *ResRC;
5049 unsigned Opc, CmpOpc;
5050 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5051 // extractvalue selection doesn't support that.
5052 if (VT == MVT::i32) {
5053 Opc = AArch64::CMP_SWAP_32;
5054 CmpOpc = AArch64::SUBSWrs;
5055 ResRC = &AArch64::GPR32RegClass;
5056 } else if (VT == MVT::i64) {
5057 Opc = AArch64::CMP_SWAP_64;
5058 CmpOpc = AArch64::SUBSXrs;
5059 ResRC = &AArch64::GPR64RegClass;
5060 } else {
5061 return false;
5064 const MCInstrDesc &II = TII.get(Opc);
5066 const unsigned AddrReg = constrainOperandRegClass(
5067 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5068 const unsigned DesiredReg = constrainOperandRegClass(
5069 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5070 const unsigned NewReg = constrainOperandRegClass(
5071 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5073 const unsigned ResultReg1 = createResultReg(ResRC);
5074 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5077 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5079 .addDef(ResultReg1)
5080 .addDef(ScratchReg)
5081 .addUse(AddrReg)
5082 .addUse(DesiredReg)
5083 .addUse(NewReg);
5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5086 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5087 .addUse(ResultReg1)
5088 .addUse(DesiredReg)
5089 .addImm(0);
5091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5092 .addDef(ResultReg2)
5093 .addUse(AArch64::WZR)
5094 .addUse(AArch64::WZR)
5095 .addImm(AArch64CC::NE);
5097 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098 updateValueMap(I, ResultReg1, 2);
5099 return true;
5102 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103 switch (I->getOpcode()) {
5104 default:
5105 break;
5106 case Instruction::Add:
5107 case Instruction::Sub:
5108 return selectAddSub(I);
5109 case Instruction::Mul:
5110 return selectMul(I);
5111 case Instruction::SDiv:
5112 return selectSDiv(I);
5113 case Instruction::SRem:
5114 if (!selectBinaryOp(I, ISD::SREM))
5115 return selectRem(I, ISD::SREM);
5116 return true;
5117 case Instruction::URem:
5118 if (!selectBinaryOp(I, ISD::UREM))
5119 return selectRem(I, ISD::UREM);
5120 return true;
5121 case Instruction::Shl:
5122 case Instruction::LShr:
5123 case Instruction::AShr:
5124 return selectShift(I);
5125 case Instruction::And:
5126 case Instruction::Or:
5127 case Instruction::Xor:
5128 return selectLogicalOp(I);
5129 case Instruction::Br:
5130 return selectBranch(I);
5131 case Instruction::IndirectBr:
5132 return selectIndirectBr(I);
5133 case Instruction::BitCast:
5134 if (!FastISel::selectBitCast(I))
5135 return selectBitCast(I);
5136 return true;
5137 case Instruction::FPToSI:
5138 if (!selectCast(I, ISD::FP_TO_SINT))
5139 return selectFPToInt(I, /*Signed=*/true);
5140 return true;
5141 case Instruction::FPToUI:
5142 return selectFPToInt(I, /*Signed=*/false);
5143 case Instruction::ZExt:
5144 case Instruction::SExt:
5145 return selectIntExt(I);
5146 case Instruction::Trunc:
5147 if (!selectCast(I, ISD::TRUNCATE))
5148 return selectTrunc(I);
5149 return true;
5150 case Instruction::FPExt:
5151 return selectFPExt(I);
5152 case Instruction::FPTrunc:
5153 return selectFPTrunc(I);
5154 case Instruction::SIToFP:
5155 if (!selectCast(I, ISD::SINT_TO_FP))
5156 return selectIntToFP(I, /*Signed=*/true);
5157 return true;
5158 case Instruction::UIToFP:
5159 return selectIntToFP(I, /*Signed=*/false);
5160 case Instruction::Load:
5161 return selectLoad(I);
5162 case Instruction::Store:
5163 return selectStore(I);
5164 case Instruction::FCmp:
5165 case Instruction::ICmp:
5166 return selectCmp(I);
5167 case Instruction::Select:
5168 return selectSelect(I);
5169 case Instruction::Ret:
5170 return selectRet(I);
5171 case Instruction::FRem:
5172 return selectFRem(I);
5173 case Instruction::GetElementPtr:
5174 return selectGetElementPtr(I);
5175 case Instruction::AtomicCmpXchg:
5176 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5179 // fall-back to target-independent instruction selection.
5180 return selectOperator(I, I->getOpcode());
5183 namespace llvm {
5185 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5186 const TargetLibraryInfo *LibInfo) {
5187 return new AArch64FastISel(FuncInfo, LibInfo);
5190 } // end namespace llvm