lib/Target/AArch64/AArch64CallLowering.cpp

   1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements the lowering of LLVM calls to machine code calls for
  11 /// GlobalISel.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AArch64CallLowering.h"
  16 #include "AArch64ISelLowering.h"
  17 #include "AArch64MachineFunctionInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "llvm/ADT/ArrayRef.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 #include "llvm/CodeGen/Analysis.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  24 #include "llvm/CodeGen/GlobalISel/Utils.h"
  25 #include "llvm/CodeGen/LowLevelType.h"
  26 #include "llvm/CodeGen/MachineBasicBlock.h"
  27 #include "llvm/CodeGen/MachineFrameInfo.h"
  28 #include "llvm/CodeGen/MachineFunction.h"
  29 #include "llvm/CodeGen/MachineInstrBuilder.h"
  30 #include "llvm/CodeGen/MachineMemOperand.h"
  31 #include "llvm/CodeGen/MachineOperand.h"
  32 #include "llvm/CodeGen/MachineRegisterInfo.h"
  33 #include "llvm/CodeGen/TargetRegisterInfo.h"
  34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  35 #include "llvm/CodeGen/ValueTypes.h"
  36 #include "llvm/IR/Argument.h"
  37 #include "llvm/IR/Attributes.h"
  38 #include "llvm/IR/Function.h"
  39 #include "llvm/IR/Type.h"
  40 #include "llvm/IR/Value.h"
  41 #include "llvm/Support/MachineValueType.h"
  42 #include <algorithm>
  43 #include <cassert>
  44 #include <cstdint>
  45 #include <iterator>
  46
  47 #define DEBUG_TYPE "aarch64-call-lowering"
  48
  49 using namespace llvm;
  50
  51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
  52   : CallLowering(&TLI) {}
  53
  54 namespace {
  55 struct IncomingArgHandler : public CallLowering::ValueHandler {
  56   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  57                      CCAssignFn *AssignFn)
  58       : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
  59
  60   Register getStackAddress(uint64_t Size, int64_t Offset,
  61                            MachinePointerInfo &MPO) override {
  62     auto &MFI = MIRBuilder.getMF().getFrameInfo();
  63     int FI = MFI.CreateFixedObject(Size, Offset, true);
  64     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
  65     Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
  66     MIRBuilder.buildFrameIndex(AddrReg, FI);
  67     StackUsed = std::max(StackUsed, Size + Offset);
  68     return AddrReg;
  69   }
  70
  71   void assignValueToReg(Register ValVReg, Register PhysReg,
  72                         CCValAssign &VA) override {
  73     markPhysRegUsed(PhysReg);
  74     switch (VA.getLocInfo()) {
  75     default:
  76       MIRBuilder.buildCopy(ValVReg, PhysReg);
  77       break;
  78     case CCValAssign::LocInfo::SExt:
  79     case CCValAssign::LocInfo::ZExt:
  80     case CCValAssign::LocInfo::AExt: {
  81       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
  82       MIRBuilder.buildTrunc(ValVReg, Copy);
  83       break;
  84     }
  85     }
  86   }
  87
  88   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
  89                             MachinePointerInfo &MPO, CCValAssign &VA) override {
  90     // FIXME: Get alignment
  91     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
  92         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
  93         1);
  94     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
  95   }
  96
  97   /// How the physical register gets marked varies between formal
  98   /// parameters (it's a basic-block live-in), and a call instruction
  99   /// (it's an implicit-def of the BL).
 100   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
 101
 102   bool isIncomingArgumentHandler() const override { return true; }
 103
 104   uint64_t StackUsed;
 105 };
 106
 107 struct FormalArgHandler : public IncomingArgHandler {
 108   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 109                    CCAssignFn *AssignFn)
 110     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
 111
 112   void markPhysRegUsed(unsigned PhysReg) override {
 113     MIRBuilder.getMRI()->addLiveIn(PhysReg);
 114     MIRBuilder.getMBB().addLiveIn(PhysReg);
 115   }
 116 };
 117
 118 struct CallReturnHandler : public IncomingArgHandler {
 119   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 120                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
 121     : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
 122
 123   void markPhysRegUsed(unsigned PhysReg) override {
 124     MIB.addDef(PhysReg, RegState::Implicit);
 125   }
 126
 127   MachineInstrBuilder MIB;
 128 };
 129
 130 struct OutgoingArgHandler : public CallLowering::ValueHandler {
 131   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 132                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
 133                      CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
 134                      int FPDiff = 0)
 135       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
 136         AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
 137         StackSize(0) {}
 138
 139   Register getStackAddress(uint64_t Size, int64_t Offset,
 140                            MachinePointerInfo &MPO) override {
 141     MachineFunction &MF = MIRBuilder.getMF();
 142     LLT p0 = LLT::pointer(0, 64);
 143     LLT s64 = LLT::scalar(64);
 144
 145     if (IsTailCall) {
 146       Offset += FPDiff;
 147       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
 148       Register FIReg = MRI.createGenericVirtualRegister(p0);
 149       MIRBuilder.buildFrameIndex(FIReg, FI);
 150       MPO = MachinePointerInfo::getFixedStack(MF, FI);
 151       return FIReg;
 152     }
 153
 154     Register SPReg = MRI.createGenericVirtualRegister(p0);
 155     MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
 156
 157     Register OffsetReg = MRI.createGenericVirtualRegister(s64);
 158     MIRBuilder.buildConstant(OffsetReg, Offset);
 159
 160     Register AddrReg = MRI.createGenericVirtualRegister(p0);
 161     MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 162
 163     MPO = MachinePointerInfo::getStack(MF, Offset);
 164     return AddrReg;
 165   }
 166
 167   void assignValueToReg(Register ValVReg, Register PhysReg,
 168                         CCValAssign &VA) override {
 169     MIB.addUse(PhysReg, RegState::Implicit);
 170     Register ExtReg = extendRegister(ValVReg, VA);
 171     MIRBuilder.buildCopy(PhysReg, ExtReg);
 172   }
 173
 174   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
 175                             MachinePointerInfo &MPO, CCValAssign &VA) override {
 176     if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
 177       Size = VA.getLocVT().getSizeInBits() / 8;
 178       ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg)
 179                     ->getOperand(0)
 180                     .getReg();
 181     }
 182     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
 183         MPO, MachineMemOperand::MOStore, Size, 1);
 184     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
 185   }
 186
 187   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
 188                  CCValAssign::LocInfo LocInfo,
 189                  const CallLowering::ArgInfo &Info,
 190                  ISD::ArgFlagsTy Flags,
 191                  CCState &State) override {
 192     bool Res;
 193     if (Info.IsFixed)
 194       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 195     else
 196       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 197
 198     StackSize = State.getNextStackOffset();
 199     return Res;
 200   }
 201
 202   MachineInstrBuilder MIB;
 203   CCAssignFn *AssignFnVarArg;
 204   bool IsTailCall;
 205
 206   /// For tail calls, the byte offset of the call's argument area from the
 207   /// callee's. Unused elsewhere.
 208   int FPDiff;
 209   uint64_t StackSize;
 210 };
 211 } // namespace
 212
 213 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
 214   return CallConv == CallingConv::Fast && TailCallOpt;
 215 }
 216
 217 void AArch64CallLowering::splitToValueTypes(
 218     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
 219     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
 220   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 221   LLVMContext &Ctx = OrigArg.Ty->getContext();
 222
 223   if (OrigArg.Ty->isVoidTy())
 224     return;
 225
 226   SmallVector<EVT, 4> SplitVTs;
 227   SmallVector<uint64_t, 4> Offsets;
 228   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
 229
 230   if (SplitVTs.size() == 1) {
 231     // No splitting to do, but we want to replace the original type (e.g. [1 x
 232     // double] -> double).
 233     SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
 234                            OrigArg.Flags[0], OrigArg.IsFixed);
 235     return;
 236   }
 237
 238   // Create one ArgInfo for each virtual register in the original ArgInfo.
 239   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
 240
 241   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
 242       OrigArg.Ty, CallConv, false);
 243   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
 244     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
 245     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
 246                            OrigArg.IsFixed);
 247     if (NeedsRegBlock)
 248       SplitArgs.back().Flags[0].setInConsecutiveRegs();
 249   }
 250
 251   SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
 252 }
 253
 254 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
 255                                       const Value *Val,
 256                                       ArrayRef<Register> VRegs,
 257                                       Register SwiftErrorVReg) const {
 258   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
 259   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
 260          "Return value without a vreg");
 261
 262   bool Success = true;
 263   if (!VRegs.empty()) {
 264     MachineFunction &MF = MIRBuilder.getMF();
 265     const Function &F = MF.getFunction();
 266
 267     MachineRegisterInfo &MRI = MF.getRegInfo();
 268     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 269     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
 270     auto &DL = F.getParent()->getDataLayout();
 271     LLVMContext &Ctx = Val->getType()->getContext();
 272
 273     SmallVector<EVT, 4> SplitEVTs;
 274     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
 275     assert(VRegs.size() == SplitEVTs.size() &&
 276            "For each split Type there should be exactly one VReg.");
 277
 278     SmallVector<ArgInfo, 8> SplitArgs;
 279     CallingConv::ID CC = F.getCallingConv();
 280
 281     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
 282       if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
 283         LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
 284         return false;
 285       }
 286
 287       Register CurVReg = VRegs[i];
 288       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
 289       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 290
 291       // i1 is a special case because SDAG i1 true is naturally zero extended
 292       // when widened using ANYEXT. We need to do it explicitly here.
 293       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
 294         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
 295       } else {
 296         // Some types will need extending as specified by the CC.
 297         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
 298         if (EVT(NewVT) != SplitEVTs[i]) {
 299           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
 300           if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
 301                                              Attribute::SExt))
 302             ExtendOp = TargetOpcode::G_SEXT;
 303           else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
 304                                                   Attribute::ZExt))
 305             ExtendOp = TargetOpcode::G_ZEXT;
 306
 307           LLT NewLLT(NewVT);
 308           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
 309           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
 310           // Instead of an extend, we might have a vector type which needs
 311           // padding with more elements, e.g. <2 x half> -> <4 x half>.
 312           if (NewVT.isVector()) {
 313             if (OldLLT.isVector()) {
 314               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
 315                 // We don't handle VA types which are not exactly twice the
 316                 // size, but can easily be done in future.
 317                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
 318                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
 319                   return false;
 320                 }
 321                 auto Undef = MIRBuilder.buildUndef({OldLLT});
 322                 CurVReg =
 323                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
 324                         .getReg(0);
 325               } else {
 326                 // Just do a vector extend.
 327                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
 328                               .getReg(0);
 329               }
 330             } else if (NewLLT.getNumElements() == 2) {
 331               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
 332               // <1 x S> vector types in GISel we use a build_vector instead
 333               // of a vector merge/concat.
 334               auto Undef = MIRBuilder.buildUndef({OldLLT});
 335               CurVReg =
 336                   MIRBuilder
 337                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
 338                       .getReg(0);
 339             } else {
 340               LLVM_DEBUG(dbgs() << "Could not handle ret ty");
 341               return false;
 342             }
 343           } else {
 344             // A scalar extend.
 345             CurVReg =
 346                 MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
 347           }
 348         }
 349       }
 350       if (CurVReg != CurArgInfo.Regs[0]) {
 351         CurArgInfo.Regs[0] = CurVReg;
 352         // Reset the arg flags after modifying CurVReg.
 353         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 354       }
 355      splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
 356     }
 357
 358     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
 359     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
 360   }
 361
 362   if (SwiftErrorVReg) {
 363     MIB.addUse(AArch64::X21, RegState::Implicit);
 364     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
 365   }
 366
 367   MIRBuilder.insertInstr(MIB);
 368   return Success;
 369 }
 370
 371 bool AArch64CallLowering::lowerFormalArguments(
 372     MachineIRBuilder &MIRBuilder, const Function &F,
 373     ArrayRef<ArrayRef<Register>> VRegs) const {
 374   MachineFunction &MF = MIRBuilder.getMF();
 375   MachineBasicBlock &MBB = MIRBuilder.getMBB();
 376   MachineRegisterInfo &MRI = MF.getRegInfo();
 377   auto &DL = F.getParent()->getDataLayout();
 378
 379   SmallVector<ArgInfo, 8> SplitArgs;
 380   unsigned i = 0;
 381   for (auto &Arg : F.args()) {
 382     if (DL.getTypeStoreSize(Arg.getType()) == 0)
 383       continue;
 384
 385     ArgInfo OrigArg{VRegs[i], Arg.getType()};
 386     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
 387
 388     splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
 389     ++i;
 390   }
 391
 392   if (!MBB.empty())
 393     MIRBuilder.setInstr(*MBB.begin());
 394
 395   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 396   CCAssignFn *AssignFn =
 397       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
 398
 399   FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
 400   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
 401     return false;
 402
 403   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 404   uint64_t StackOffset = Handler.StackUsed;
 405   if (F.isVarArg()) {
 406     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 407     if (!Subtarget.isTargetDarwin()) {
 408         // FIXME: we need to reimplement saveVarArgsRegisters from
 409       // AArch64ISelLowering.
 410       return false;
 411     }
 412
 413     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
 414     StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
 415
 416     auto &MFI = MIRBuilder.getMF().getFrameInfo();
 417     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
 418   }
 419
 420   if (doesCalleeRestoreStack(F.getCallingConv(),
 421                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
 422     // We have a non-standard ABI, so why not make full use of the stack that
 423     // we're going to pop? It must be aligned to 16 B in any case.
 424     StackOffset = alignTo(StackOffset, 16);
 425
 426     // If we're expected to restore the stack (e.g. fastcc), then we'll be
 427     // adding a multiple of 16.
 428     FuncInfo->setArgumentStackToRestore(StackOffset);
 429
 430     // Our own callers will guarantee that the space is free by giving an
 431     // aligned value to CALLSEQ_START.
 432   }
 433
 434   // When we tail call, we need to check if the callee's arguments
 435   // will fit on the caller's stack. So, whenever we lower formal arguments,
 436   // we should keep track of this information, since we might lower a tail call
 437   // in this function later.
 438   FuncInfo->setBytesInStackArgArea(StackOffset);
 439
 440   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 441   if (Subtarget.hasCustomCallingConv())
 442     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
 443
 444   // Move back to the end of the basic block.
 445   MIRBuilder.setMBB(MBB);
 446
 447   return true;
 448 }
 449
 450 /// Return true if the calling convention is one that we can guarantee TCO for.
 451 static bool canGuaranteeTCO(CallingConv::ID CC) {
 452   return CC == CallingConv::Fast;
 453 }
 454
 455 /// Return true if we might ever do TCO for calls with this calling convention.
 456 static bool mayTailCallThisCC(CallingConv::ID CC) {
 457   switch (CC) {
 458   case CallingConv::C:
 459   case CallingConv::PreserveMost:
 460   case CallingConv::Swift:
 461     return true;
 462   default:
 463     return canGuaranteeTCO(CC);
 464   }
 465 }
 466
 467 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
 468     CallLoweringInfo &Info, MachineFunction &MF,
 469     SmallVectorImpl<ArgInfo> &InArgs) const {
 470   const Function &CallerF = MF.getFunction();
 471   CallingConv::ID CalleeCC = Info.CallConv;
 472   CallingConv::ID CallerCC = CallerF.getCallingConv();
 473
 474   // If the calling conventions match, then everything must be the same.
 475   if (CalleeCC == CallerCC)
 476     return true;
 477
 478   // Check if the caller and callee will handle arguments in the same way.
 479   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 480   CCAssignFn *CalleeAssignFn = TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg);
 481   CCAssignFn *CallerAssignFn =
 482       TLI.CCAssignFnForCall(CallerCC, CallerF.isVarArg());
 483
 484   if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFn, *CallerAssignFn))
 485     return false;
 486
 487   // Make sure that the caller and callee preserve all of the same registers.
 488   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 489   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
 490   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
 491   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
 492     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
 493     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
 494   }
 495
 496   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
 497 }
 498
 499 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
 500     CallLoweringInfo &Info, MachineFunction &MF,
 501     SmallVectorImpl<ArgInfo> &OutArgs) const {
 502   // If there are no outgoing arguments, then we are done.
 503   if (OutArgs.empty())
 504     return true;
 505
 506   const Function &CallerF = MF.getFunction();
 507   CallingConv::ID CalleeCC = Info.CallConv;
 508   CallingConv::ID CallerCC = CallerF.getCallingConv();
 509   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 510
 511   // We have outgoing arguments. Make sure that we can tail call with them.
 512   SmallVector<CCValAssign, 16> OutLocs;
 513   CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
 514
 515   if (!analyzeArgInfo(OutInfo, OutArgs,
 516                       *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) {
 517     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
 518     return false;
 519   }
 520
 521   // Make sure that they can fit on the caller's stack.
 522   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 523   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
 524     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
 525     return false;
 526   }
 527
 528   // Verify that the parameters in callee-saved registers match.
 529   // TODO: Port this over to CallLowering as general code once swiftself is
 530   // supported.
 531   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 532   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
 533   MachineRegisterInfo &MRI = MF.getRegInfo();
 534
 535   for (unsigned i = 0; i < OutLocs.size(); ++i) {
 536     auto &ArgLoc = OutLocs[i];
 537     // If it's not a register, it's fine.
 538     if (!ArgLoc.isRegLoc()) {
 539       if (Info.IsVarArg) {
 540         // Be conservative and disallow variadic memory operands to match SDAG's
 541         // behaviour.
 542         // FIXME: If the caller's calling convention is C, then we can
 543         // potentially use its argument area. However, for cases like fastcc,
 544         // we can't do anything.
 545         LLVM_DEBUG(
 546             dbgs()
 547             << "... Cannot tail call vararg function with stack arguments\n");
 548         return false;
 549       }
 550       continue;
 551     }
 552
 553     Register Reg = ArgLoc.getLocReg();
 554
 555     // Only look at callee-saved registers.
 556     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
 557       continue;
 558
 559     LLVM_DEBUG(
 560         dbgs()
 561         << "... Call has an argument passed in a callee-saved register.\n");
 562
 563     // Check if it was copied from.
 564     ArgInfo &OutInfo = OutArgs[i];
 565
 566     if (OutInfo.Regs.size() > 1) {
 567       LLVM_DEBUG(
 568           dbgs() << "... Cannot handle arguments in multiple registers.\n");
 569       return false;
 570     }
 571
 572     // Check if we copy the register, walking through copies from virtual
 573     // registers. Note that getDefIgnoringCopies does not ignore copies from
 574     // physical registers.
 575     MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
 576     if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
 577       LLVM_DEBUG(
 578           dbgs()
 579           << "... Parameter was not copied into a VReg, cannot tail call.\n");
 580       return false;
 581     }
 582
 583     // Got a copy. Verify that it's the same as the register we want.
 584     Register CopyRHS = RegDef->getOperand(1).getReg();
 585     if (CopyRHS != Reg) {
 586       LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
 587                            "VReg, cannot tail call.\n");
 588       return false;
 589     }
 590   }
 591
 592   return true;
 593 }
 594
 595 bool AArch64CallLowering::isEligibleForTailCallOptimization(
 596     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 597     SmallVectorImpl<ArgInfo> &InArgs,
 598     SmallVectorImpl<ArgInfo> &OutArgs) const {
 599
 600   // Must pass all target-independent checks in order to tail call optimize.
 601   if (!Info.IsTailCall)
 602     return false;
 603
 604   CallingConv::ID CalleeCC = Info.CallConv;
 605   MachineFunction &MF = MIRBuilder.getMF();
 606   const Function &CallerF = MF.getFunction();
 607
 608   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
 609
 610   if (Info.SwiftErrorVReg) {
 611     // TODO: We should handle this.
 612     // Note that this is also handled by the check for no outgoing arguments.
 613     // Proactively disabling this though, because the swifterror handling in
 614     // lowerCall inserts a COPY *after* the location of the call.
 615     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
 616     return false;
 617   }
 618
 619   if (!mayTailCallThisCC(CalleeCC)) {
 620     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
 621     return false;
 622   }
 623
 624   // Byval parameters hand the function a pointer directly into the stack area
 625   // we want to reuse during a tail call. Working around this *is* possible (see
 626   // X86).
 627   //
 628   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
 629   // it?
 630   //
 631   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
 632   // In this case, it is necessary to save/restore X0 in the callee. Tail
 633   // call opt interferes with this. So we disable tail call opt when the
 634   // caller has an argument with "inreg" attribute.
 635   //
 636   // FIXME: Check whether the callee also has an "inreg" argument.
 637   //
 638   // When the caller has a swifterror argument, we don't want to tail call
 639   // because would have to move into the swifterror register before the
 640   // tail call.
 641   if (any_of(CallerF.args(), [](const Argument &A) {
 642         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
 643       })) {
 644     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
 645                          "inreg, or swifterror arguments\n");
 646     return false;
 647   }
 648
 649   // Externally-defined functions with weak linkage should not be
 650   // tail-called on AArch64 when the OS does not support dynamic
 651   // pre-emption of symbols, as the AAELF spec requires normal calls
 652   // to undefined weak functions to be replaced with a NOP or jump to the
 653   // next instruction. The behaviour of branch instructions in this
 654   // situation (as used for tail calls) is implementation-defined, so we
 655   // cannot rely on the linker replacing the tail call with a return.
 656   if (Info.Callee.isGlobal()) {
 657     const GlobalValue *GV = Info.Callee.getGlobal();
 658     const Triple &TT = MF.getTarget().getTargetTriple();
 659     if (GV->hasExternalWeakLinkage() &&
 660         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
 661          TT.isOSBinFormatMachO())) {
 662       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
 663                            "with weak linkage for this OS.\n");
 664       return false;
 665     }
 666   }
 667
 668   // If we have -tailcallopt, then we're done.
 669   if (MF.getTarget().Options.GuaranteedTailCallOpt)
 670     return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
 671
 672   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
 673   // Try to find cases where we can do that.
 674
 675   // I want anyone implementing a new calling convention to think long and hard
 676   // about this assert.
 677   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
 678          "Unexpected variadic calling convention");
 679
 680   // Before we can musttail varargs, we need to forward parameters like in
 681   // r345641. Make sure that we don't enable musttail with varargs without
 682   // addressing that!
 683   if (Info.IsVarArg && Info.IsMustTailCall) {
 684     LLVM_DEBUG(
 685         dbgs()
 686         << "... Cannot handle vararg musttail functions yet.\n");
 687     return false;
 688   }
 689
 690   // Verify that the incoming and outgoing arguments from the callee are
 691   // safe to tail call.
 692   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
 693     LLVM_DEBUG(
 694         dbgs()
 695         << "... Caller and callee have incompatible calling conventions.\n");
 696     return false;
 697   }
 698
 699   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
 700     return false;
 701
 702   LLVM_DEBUG(
 703       dbgs() << "... Call is eligible for tail call optimization.\n");
 704   return true;
 705 }
 706
 707 static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
 708                               bool IsTailCall) {
 709   if (!IsTailCall)
 710     return IsIndirect ? AArch64::BLR : AArch64::BL;
 711
 712   if (!IsIndirect)
 713     return AArch64::TCRETURNdi;
 714
 715   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
 716   // x16 or x17.
 717   if (CallerF.hasFnAttribute("branch-target-enforcement"))
 718     return AArch64::TCRETURNriBTI;
 719
 720   return AArch64::TCRETURNri;
 721 }
 722
 723 bool AArch64CallLowering::lowerTailCall(
 724     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 725     SmallVectorImpl<ArgInfo> &OutArgs) const {
 726   MachineFunction &MF = MIRBuilder.getMF();
 727   const Function &F = MF.getFunction();
 728   MachineRegisterInfo &MRI = MF.getRegInfo();
 729   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 730
 731   // True when we're tail calling, but without -tailcallopt.
 732   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
 733
 734   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
 735   // register class. Until we can do that, we should fall back here.
 736   if (F.hasFnAttribute("branch-target-enforcement")) {
 737     LLVM_DEBUG(
 738         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
 739     return false;
 740   }
 741
 742   // Find out which ABI gets to decide where things go.
 743   CallingConv::ID CalleeCC = Info.CallConv;
 744   CCAssignFn *AssignFnFixed =
 745       TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
 746   CCAssignFn *AssignFnVarArg =
 747       TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
 748
 749   MachineInstrBuilder CallSeqStart;
 750   if (!IsSibCall)
 751     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 752
 753   unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
 754   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
 755   MIB.add(Info.Callee);
 756
 757   // Byte offset for the tail call. When we are sibcalling, this will always
 758   // be 0.
 759   MIB.addImm(0);
 760
 761   // Tell the call which registers are clobbered.
 762   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 763   const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
 764   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
 765     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
 766   MIB.addRegMask(Mask);
 767
 768   if (TRI->isAnyArgRegReserved(MF))
 769     TRI->emitReservedArgRegCallError(MF);
 770
 771   // FPDiff is the byte offset of the call's argument area from the callee's.
 772   // Stores to callee stack arguments will be placed in FixedStackSlots offset
 773   // by this amount for a tail call. In a sibling call it must be 0 because the
 774   // caller will deallocate the entire stack and the callee still expects its
 775   // arguments to begin at SP+0.
 776   int FPDiff = 0;
 777
 778   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
 779   // by -tailcallopt. For sibcalls, the memory operands for the call are
 780   // already available in the caller's incoming argument space.
 781   unsigned NumBytes = 0;
 782   if (!IsSibCall) {
 783     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
 784     // before handling assignments, because FPDiff must be known for memory
 785     // arguments.
 786     AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 787     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
 788     SmallVector<CCValAssign, 16> OutLocs;
 789     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
 790     analyzeArgInfo(OutInfo, OutArgs,
 791                    *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
 792
 793     // The callee will pop the argument stack as a tail call. Thus, we must
 794     // keep it 16-byte aligned.
 795     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
 796
 797     // FPDiff will be negative if this tail call requires more space than we
 798     // would automatically have in our incoming argument space. Positive if we
 799     // actually shrink the stack.
 800     FPDiff = NumReusableBytes - NumBytes;
 801
 802     // The stack pointer must be 16-byte aligned at all times it's used for a
 803     // memory operation, which in practice means at *all* times and in
 804     // particular across call boundaries. Therefore our own arguments started at
 805     // a 16-byte aligned SP and the delta applied for the tail call should
 806     // satisfy the same constraint.
 807     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
 808   }
 809
 810   // Do the actual argument marshalling.
 811   SmallVector<unsigned, 8> PhysRegs;
 812   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
 813                              AssignFnVarArg, true, FPDiff);
 814   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
 815     return false;
 816
 817   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
 818   // sequence start and end here.
 819   if (!IsSibCall) {
 820     MIB->getOperand(1).setImm(FPDiff);
 821     CallSeqStart.addImm(NumBytes).addImm(0);
 822     // End the call sequence *before* emitting the call. Normally, we would
 823     // tidy the frame up after the call. However, here, we've laid out the
 824     // parameters so that when SP is reset, they will be in the correct
 825     // location.
 826     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
 827   }
 828
 829   // Now we can add the actual call instruction to the correct basic block.
 830   MIRBuilder.insertInstr(MIB);
 831
 832   // If Callee is a reg, since it is used by a target specific instruction,
 833   // it must have a register class matching the constraint of that instruction.
 834   if (Info.Callee.isReg())
 835     MIB->getOperand(0).setReg(constrainOperandRegClass(
 836         MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
 837         *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
 838         0));
 839
 840   MF.getFrameInfo().setHasTailCall();
 841   Info.LoweredTailCall = true;
 842   return true;
 843 }
 844
 845 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 846                                     CallLoweringInfo &Info) const {
 847   MachineFunction &MF = MIRBuilder.getMF();
 848   const Function &F = MF.getFunction();
 849   MachineRegisterInfo &MRI = MF.getRegInfo();
 850   auto &DL = F.getParent()->getDataLayout();
 851   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 852
 853   SmallVector<ArgInfo, 8> OutArgs;
 854   for (auto &OrigArg : Info.OrigArgs) {
 855     splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
 856     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
 857     if (OrigArg.Ty->isIntegerTy(1))
 858       OutArgs.back().Flags[0].setZExt();
 859   }
 860
 861   SmallVector<ArgInfo, 8> InArgs;
 862   if (!Info.OrigRet.Ty->isVoidTy())
 863     splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
 864
 865   // If we can lower as a tail call, do that instead.
 866   bool CanTailCallOpt =
 867       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
 868
 869   // We must emit a tail call if we have musttail.
 870   if (Info.IsMustTailCall && !CanTailCallOpt) {
 871     // There are types of incoming/outgoing arguments we can't handle yet, so
 872     // it doesn't make sense to actually die here like in ISelLowering. Instead,
 873     // fall back to SelectionDAG and let it try to handle this.
 874     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
 875     return false;
 876   }
 877
 878   if (CanTailCallOpt)
 879     return lowerTailCall(MIRBuilder, Info, OutArgs);
 880
 881   // Find out which ABI gets to decide where things go.
 882   CCAssignFn *AssignFnFixed =
 883       TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
 884   CCAssignFn *AssignFnVarArg =
 885       TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
 886
 887   MachineInstrBuilder CallSeqStart;
 888   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 889
 890   // Create a temporarily-floating call instruction so we can add the implicit
 891   // uses of arg registers.
 892   unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
 893
 894   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
 895   MIB.add(Info.Callee);
 896
 897   // Tell the call which registers are clobbered.
 898   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 899   const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
 900   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
 901     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
 902   MIB.addRegMask(Mask);
 903
 904   if (TRI->isAnyArgRegReserved(MF))
 905     TRI->emitReservedArgRegCallError(MF);
 906
 907   // Do the actual argument marshalling.
 908   SmallVector<unsigned, 8> PhysRegs;
 909   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
 910                              AssignFnVarArg, false);
 911   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
 912     return false;
 913
 914   // Now we can add the actual call instruction to the correct basic block.
 915   MIRBuilder.insertInstr(MIB);
 916
 917   // If Callee is a reg, since it is used by a target specific
 918   // instruction, it must have a register class matching the
 919   // constraint of that instruction.
 920   if (Info.Callee.isReg())
 921     MIB->getOperand(0).setReg(constrainOperandRegClass(
 922         MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
 923         *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
 924         0));
 925
 926   // Finally we can copy the returned value back into its virtual-register. In
 927   // symmetry with the arugments, the physical register must be an
 928   // implicit-define of the call instruction.
 929   if (!Info.OrigRet.Ty->isVoidTy()) {
 930     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
 931     CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
 932     if (!handleAssignments(MIRBuilder, InArgs, Handler))
 933       return false;
 934   }
 935
 936   if (Info.SwiftErrorVReg) {
 937     MIB.addDef(AArch64::X21, RegState::Implicit);
 938     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
 939   }
 940
 941   uint64_t CalleePopBytes =
 942       doesCalleeRestoreStack(Info.CallConv,
 943                              MF.getTarget().Options.GuaranteedTailCallOpt)
 944           ? alignTo(Handler.StackSize, 16)
 945           : 0;
 946
 947   CallSeqStart.addImm(Handler.StackSize).addImm(0);
 948   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
 949       .addImm(Handler.StackSize)
 950       .addImm(CalleePopBytes);
 951
 952   return true;
 953 }