[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / AArch64 / AArch64CallLowering.cpp
blob59757769c89a74fa6dbdeccb58a3f8125895e11b
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
47 #define DEBUG_TYPE "aarch64-call-lowering"
49 using namespace llvm;
51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52 : CallLowering(&TLI) {}
54 namespace {
55 struct IncomingArgHandler : public CallLowering::ValueHandler {
56 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
57 CCAssignFn *AssignFn)
58 : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
60 Register getStackAddress(uint64_t Size, int64_t Offset,
61 MachinePointerInfo &MPO) override {
62 auto &MFI = MIRBuilder.getMF().getFrameInfo();
63 int FI = MFI.CreateFixedObject(Size, Offset, true);
64 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
65 Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
66 MIRBuilder.buildFrameIndex(AddrReg, FI);
67 StackUsed = std::max(StackUsed, Size + Offset);
68 return AddrReg;
71 void assignValueToReg(Register ValVReg, Register PhysReg,
72 CCValAssign &VA) override {
73 markPhysRegUsed(PhysReg);
74 switch (VA.getLocInfo()) {
75 default:
76 MIRBuilder.buildCopy(ValVReg, PhysReg);
77 break;
78 case CCValAssign::LocInfo::SExt:
79 case CCValAssign::LocInfo::ZExt:
80 case CCValAssign::LocInfo::AExt: {
81 auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
82 MIRBuilder.buildTrunc(ValVReg, Copy);
83 break;
88 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
89 MachinePointerInfo &MPO, CCValAssign &VA) override {
90 // FIXME: Get alignment
91 auto MMO = MIRBuilder.getMF().getMachineMemOperand(
92 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
93 1);
94 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
97 /// How the physical register gets marked varies between formal
98 /// parameters (it's a basic-block live-in), and a call instruction
99 /// (it's an implicit-def of the BL).
100 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
102 bool isArgumentHandler() const override { return true; }
104 uint64_t StackUsed;
107 struct FormalArgHandler : public IncomingArgHandler {
108 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
109 CCAssignFn *AssignFn)
110 : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
112 void markPhysRegUsed(unsigned PhysReg) override {
113 MIRBuilder.getMBB().addLiveIn(PhysReg);
117 struct CallReturnHandler : public IncomingArgHandler {
118 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
119 MachineInstrBuilder MIB, CCAssignFn *AssignFn)
120 : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
122 void markPhysRegUsed(unsigned PhysReg) override {
123 MIB.addDef(PhysReg, RegState::Implicit);
126 MachineInstrBuilder MIB;
129 struct OutgoingArgHandler : public CallLowering::ValueHandler {
130 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
131 MachineInstrBuilder MIB, CCAssignFn *AssignFn,
132 CCAssignFn *AssignFnVarArg)
133 : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
134 AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
136 Register getStackAddress(uint64_t Size, int64_t Offset,
137 MachinePointerInfo &MPO) override {
138 LLT p0 = LLT::pointer(0, 64);
139 LLT s64 = LLT::scalar(64);
140 Register SPReg = MRI.createGenericVirtualRegister(p0);
141 MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
143 Register OffsetReg = MRI.createGenericVirtualRegister(s64);
144 MIRBuilder.buildConstant(OffsetReg, Offset);
146 Register AddrReg = MRI.createGenericVirtualRegister(p0);
147 MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
149 MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
150 return AddrReg;
153 void assignValueToReg(Register ValVReg, Register PhysReg,
154 CCValAssign &VA) override {
155 MIB.addUse(PhysReg, RegState::Implicit);
156 Register ExtReg = extendRegister(ValVReg, VA);
157 MIRBuilder.buildCopy(PhysReg, ExtReg);
160 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
161 MachinePointerInfo &MPO, CCValAssign &VA) override {
162 if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
163 Size = VA.getLocVT().getSizeInBits() / 8;
164 ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg)
165 ->getOperand(0)
166 .getReg();
168 auto MMO = MIRBuilder.getMF().getMachineMemOperand(
169 MPO, MachineMemOperand::MOStore, Size, 1);
170 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
173 bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
174 CCValAssign::LocInfo LocInfo,
175 const CallLowering::ArgInfo &Info,
176 CCState &State) override {
177 bool Res;
178 if (Info.IsFixed)
179 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
180 else
181 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
183 StackSize = State.getNextStackOffset();
184 return Res;
187 MachineInstrBuilder MIB;
188 CCAssignFn *AssignFnVarArg;
189 uint64_t StackSize;
191 } // namespace
193 void AArch64CallLowering::splitToValueTypes(
194 const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
195 const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
196 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
197 LLVMContext &Ctx = OrigArg.Ty->getContext();
199 if (OrigArg.Ty->isVoidTy())
200 return;
202 SmallVector<EVT, 4> SplitVTs;
203 SmallVector<uint64_t, 4> Offsets;
204 ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
206 if (SplitVTs.size() == 1) {
207 // No splitting to do, but we want to replace the original type (e.g. [1 x
208 // double] -> double).
209 SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
210 OrigArg.Flags, OrigArg.IsFixed);
211 return;
214 // Create one ArgInfo for each virtual register in the original ArgInfo.
215 assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
217 bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
218 OrigArg.Ty, CallConv, false);
219 for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
220 Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
221 SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
222 OrigArg.IsFixed);
223 if (NeedsRegBlock)
224 SplitArgs.back().Flags.setInConsecutiveRegs();
227 SplitArgs.back().Flags.setInConsecutiveRegsLast();
230 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
231 const Value *Val,
232 ArrayRef<Register> VRegs,
233 Register SwiftErrorVReg) const {
234 auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
235 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
236 "Return value without a vreg");
238 bool Success = true;
239 if (!VRegs.empty()) {
240 MachineFunction &MF = MIRBuilder.getMF();
241 const Function &F = MF.getFunction();
243 MachineRegisterInfo &MRI = MF.getRegInfo();
244 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
245 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
246 auto &DL = F.getParent()->getDataLayout();
247 LLVMContext &Ctx = Val->getType()->getContext();
249 SmallVector<EVT, 4> SplitEVTs;
250 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
251 assert(VRegs.size() == SplitEVTs.size() &&
252 "For each split Type there should be exactly one VReg.");
254 SmallVector<ArgInfo, 8> SplitArgs;
255 CallingConv::ID CC = F.getCallingConv();
257 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
258 if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
259 LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
260 return false;
263 Register CurVReg = VRegs[i];
264 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
265 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
267 // i1 is a special case because SDAG i1 true is naturally zero extended
268 // when widened using ANYEXT. We need to do it explicitly here.
269 if (MRI.getType(CurVReg).getSizeInBits() == 1) {
270 CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
271 } else {
272 // Some types will need extending as specified by the CC.
273 MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
274 if (EVT(NewVT) != SplitEVTs[i]) {
275 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
276 if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
277 Attribute::SExt))
278 ExtendOp = TargetOpcode::G_SEXT;
279 else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
280 Attribute::ZExt))
281 ExtendOp = TargetOpcode::G_ZEXT;
283 LLT NewLLT(NewVT);
284 LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
285 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
286 // Instead of an extend, we might have a vector type which needs
287 // padding with more elements, e.g. <2 x half> -> <4 x half>.
288 if (NewVT.isVector()) {
289 if (OldLLT.isVector()) {
290 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
291 // We don't handle VA types which are not exactly twice the
292 // size, but can easily be done in future.
293 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
294 LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
295 return false;
297 auto Undef = MIRBuilder.buildUndef({OldLLT});
298 CurVReg =
299 MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
300 .getReg(0);
301 } else {
302 // Just do a vector extend.
303 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
304 .getReg(0);
306 } else if (NewLLT.getNumElements() == 2) {
307 // We need to pad a <1 x S> type to <2 x S>. Since we don't have
308 // <1 x S> vector types in GISel we use a build_vector instead
309 // of a vector merge/concat.
310 auto Undef = MIRBuilder.buildUndef({OldLLT});
311 CurVReg =
312 MIRBuilder
313 .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
314 .getReg(0);
315 } else {
316 LLVM_DEBUG(dbgs() << "Could not handle ret ty");
317 return false;
319 } else {
320 // A scalar extend.
321 CurVReg =
322 MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
326 if (CurVReg != CurArgInfo.Regs[0]) {
327 CurArgInfo.Regs[0] = CurVReg;
328 // Reset the arg flags after modifying CurVReg.
329 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
331 splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
334 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
335 Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
338 if (SwiftErrorVReg) {
339 MIB.addUse(AArch64::X21, RegState::Implicit);
340 MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
343 MIRBuilder.insertInstr(MIB);
344 return Success;
347 bool AArch64CallLowering::lowerFormalArguments(
348 MachineIRBuilder &MIRBuilder, const Function &F,
349 ArrayRef<ArrayRef<Register>> VRegs) const {
350 MachineFunction &MF = MIRBuilder.getMF();
351 MachineBasicBlock &MBB = MIRBuilder.getMBB();
352 MachineRegisterInfo &MRI = MF.getRegInfo();
353 auto &DL = F.getParent()->getDataLayout();
355 SmallVector<ArgInfo, 8> SplitArgs;
356 unsigned i = 0;
357 for (auto &Arg : F.args()) {
358 if (DL.getTypeStoreSize(Arg.getType()) == 0)
359 continue;
361 ArgInfo OrigArg{VRegs[i], Arg.getType()};
362 setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
364 splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
365 ++i;
368 if (!MBB.empty())
369 MIRBuilder.setInstr(*MBB.begin());
371 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
372 CCAssignFn *AssignFn =
373 TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
375 FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
376 if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
377 return false;
379 if (F.isVarArg()) {
380 if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
381 // FIXME: we need to reimplement saveVarArgsRegisters from
382 // AArch64ISelLowering.
383 return false;
386 // We currently pass all varargs at 8-byte alignment.
387 uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
389 auto &MFI = MIRBuilder.getMF().getFrameInfo();
390 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
391 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
394 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
395 if (Subtarget.hasCustomCallingConv())
396 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
398 // Move back to the end of the basic block.
399 MIRBuilder.setMBB(MBB);
401 return true;
404 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
405 CallingConv::ID CallConv,
406 const MachineOperand &Callee,
407 const ArgInfo &OrigRet,
408 ArrayRef<ArgInfo> OrigArgs,
409 Register SwiftErrorVReg) const {
410 MachineFunction &MF = MIRBuilder.getMF();
411 const Function &F = MF.getFunction();
412 MachineRegisterInfo &MRI = MF.getRegInfo();
413 auto &DL = F.getParent()->getDataLayout();
415 SmallVector<ArgInfo, 8> SplitArgs;
416 for (auto &OrigArg : OrigArgs) {
417 splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv);
418 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
419 if (OrigArg.Ty->isIntegerTy(1))
420 SplitArgs.back().Flags.setZExt();
423 // Find out which ABI gets to decide where things go.
424 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
425 CCAssignFn *AssignFnFixed =
426 TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
427 CCAssignFn *AssignFnVarArg =
428 TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
430 auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
432 // Create a temporarily-floating call instruction so we can add the implicit
433 // uses of arg registers.
434 auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
435 : AArch64::BL);
436 MIB.add(Callee);
438 // Tell the call which registers are clobbered.
439 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
440 const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
441 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
442 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
443 MIB.addRegMask(Mask);
445 if (TRI->isAnyArgRegReserved(MF))
446 TRI->emitReservedArgRegCallError(MF);
448 // Do the actual argument marshalling.
449 SmallVector<unsigned, 8> PhysRegs;
450 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
451 AssignFnVarArg);
452 if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
453 return false;
455 // Now we can add the actual call instruction to the correct basic block.
456 MIRBuilder.insertInstr(MIB);
458 // If Callee is a reg, since it is used by a target specific
459 // instruction, it must have a register class matching the
460 // constraint of that instruction.
461 if (Callee.isReg())
462 MIB->getOperand(0).setReg(constrainOperandRegClass(
463 MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
464 *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0));
466 // Finally we can copy the returned value back into its virtual-register. In
467 // symmetry with the arugments, the physical register must be an
468 // implicit-define of the call instruction.
469 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
470 if (!OrigRet.Ty->isVoidTy()) {
471 SplitArgs.clear();
473 splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv());
475 CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
476 if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
477 return false;
480 if (SwiftErrorVReg) {
481 MIB.addDef(AArch64::X21, RegState::Implicit);
482 MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21));
485 CallSeqStart.addImm(Handler.StackSize).addImm(0);
486 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
487 .addImm(Handler.StackSize)
488 .addImm(0);
490 return true;