llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

   1 //===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains the AArch64 implementation of the TargetRegisterInfo
  10 // class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64RegisterInfo.h"
  15 #include "AArch64FrameLowering.h"
  16 #include "AArch64InstrInfo.h"
  17 #include "AArch64MachineFunctionInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/ADT/BitVector.h"
  21 #include "llvm/ADT/Triple.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineInstrBuilder.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/RegisterScavenging.h"
  26 #include "llvm/CodeGen/TargetFrameLowering.h"
  27 #include "llvm/IR/DebugInfoMetadata.h"
  28 #include "llvm/IR/DiagnosticInfo.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 using namespace llvm;
  34
  35 #define GET_REGINFO_TARGET_DESC
  36 #include "AArch64GenRegisterInfo.inc"
  37
  38 AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
  39     : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {
  40   AArch64_MC::initLLVMToCVRegMapping(this);
  41 }
  42
  43 /// Return whether the register needs a CFI entry. Not all unwinders may know
  44 /// about SVE registers, so we assume the lowest common denominator, i.e. the
  45 /// callee-saves required by the base ABI. For the SVE registers z8-z15 only the
  46 /// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is
  47 /// returned in \p RegToUseForCFI.
  48 bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg,
  49                                       unsigned &RegToUseForCFI) const {
  50   if (AArch64::PPRRegClass.contains(Reg))
  51     return false;
  52
  53   if (AArch64::ZPRRegClass.contains(Reg)) {
  54     RegToUseForCFI = getSubReg(Reg, AArch64::dsub);
  55     for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) {
  56       if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI)
  57         return true;
  58     }
  59     return false;
  60   }
  61
  62   RegToUseForCFI = Reg;
  63   return true;
  64 }
  65
  66 bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) {
  67   const Function &F = MF->getFunction();
  68   return isa<ScalableVectorType>(F.getReturnType()) ||
  69          any_of(F.args(), [](const Argument &Arg) {
  70            return isa<ScalableVectorType>(Arg.getType());
  71          });
  72 }
  73
  74 const MCPhysReg *
  75 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
  76   assert(MF && "Invalid MachineFunction pointer.");
  77
  78   if (MF->getFunction().getCallingConv() == CallingConv::GHC)
  79     // GHC set of callee saved regs is empty as all those regs are
  80     // used for passing STG regs around
  81     return CSR_AArch64_NoRegs_SaveList;
  82   if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
  83     return CSR_AArch64_AllRegs_SaveList;
  84
  85   // Darwin has its own CSR_AArch64_AAPCS_SaveList, which means most CSR save
  86   // lists depending on that will need to have their Darwin variant as well.
  87   if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
  88     return getDarwinCalleeSavedRegs(MF);
  89
  90   if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
  91     return CSR_Win_AArch64_CFGuard_Check_SaveList;
  92   if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
  93     return CSR_Win_AArch64_AAPCS_SaveList;
  94   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
  95     return CSR_AArch64_AAVPCS_SaveList;
  96   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
  97     return CSR_AArch64_SVE_AAPCS_SaveList;
  98   if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
  99           ->supportSwiftError() &&
 100       MF->getFunction().getAttributes().hasAttrSomewhere(
 101           Attribute::SwiftError))
 102     return CSR_AArch64_AAPCS_SwiftError_SaveList;
 103   if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
 104     return CSR_AArch64_AAPCS_SwiftTail_SaveList;
 105   if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
 106     return CSR_AArch64_RT_MostRegs_SaveList;
 107   if (MF->getFunction().getCallingConv() == CallingConv::Win64)
 108     // This is for OSes other than Windows; Windows is a separate case further
 109     // above.
 110     return CSR_AArch64_AAPCS_X18_SaveList;
 111   if (hasSVEArgsOrReturn(MF))
 112     return CSR_AArch64_SVE_AAPCS_SaveList;
 113   return CSR_AArch64_AAPCS_SaveList;
 114 }
 115
 116 const MCPhysReg *
 117 AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
 118   assert(MF && "Invalid MachineFunction pointer.");
 119   assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
 120          "Invalid subtarget for getDarwinCalleeSavedRegs");
 121
 122   if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
 123     report_fatal_error(
 124         "Calling convention CFGuard_Check is unsupported on Darwin.");
 125   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
 126     return CSR_Darwin_AArch64_AAVPCS_SaveList;
 127   if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
 128     report_fatal_error(
 129         "Calling convention SVE_VectorCall is unsupported on Darwin.");
 130   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
 131     return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR()
 132                ? CSR_Darwin_AArch64_CXX_TLS_PE_SaveList
 133                : CSR_Darwin_AArch64_CXX_TLS_SaveList;
 134   if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
 135           ->supportSwiftError() &&
 136       MF->getFunction().getAttributes().hasAttrSomewhere(
 137           Attribute::SwiftError))
 138     return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList;
 139   if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
 140     return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList;
 141   if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
 142     return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
 143   return CSR_Darwin_AArch64_AAPCS_SaveList;
 144 }
 145
 146 const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
 147     const MachineFunction *MF) const {
 148   assert(MF && "Invalid MachineFunction pointer.");
 149   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
 150       MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
 151     return CSR_Darwin_AArch64_CXX_TLS_ViaCopy_SaveList;
 152   return nullptr;
 153 }
 154
 155 void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
 156     MachineFunction &MF) const {
 157   const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
 158   SmallVector<MCPhysReg, 32> UpdatedCSRs;
 159   for (const MCPhysReg *I = CSRs; *I; ++I)
 160     UpdatedCSRs.push_back(*I);
 161
 162   for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
 163     if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
 164       UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
 165     }
 166   }
 167   // Register lists are zero-terminated.
 168   UpdatedCSRs.push_back(0);
 169   MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
 170 }
 171
 172 const TargetRegisterClass *
 173 AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
 174                                        unsigned Idx) const {
 175   // edge case for GPR/FPR register classes
 176   if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub)
 177     return &AArch64::FPR32RegClass;
 178   else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub)
 179     return &AArch64::FPR64RegClass;
 180
 181   // Forward to TableGen's default version.
 182   return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
 183 }
 184
 185 const uint32_t *
 186 AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
 187                                                 CallingConv::ID CC) const {
 188   assert(MF.getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
 189          "Invalid subtarget for getDarwinCallPreservedMask");
 190
 191   if (CC == CallingConv::CXX_FAST_TLS)
 192     return CSR_Darwin_AArch64_CXX_TLS_RegMask;
 193   if (CC == CallingConv::AArch64_VectorCall)
 194     return CSR_Darwin_AArch64_AAVPCS_RegMask;
 195   if (CC == CallingConv::AArch64_SVE_VectorCall)
 196     report_fatal_error(
 197         "Calling convention SVE_VectorCall is unsupported on Darwin.");
 198   if (CC == CallingConv::CFGuard_Check)
 199     report_fatal_error(
 200         "Calling convention CFGuard_Check is unsupported on Darwin.");
 201   if (MF.getSubtarget<AArch64Subtarget>()
 202           .getTargetLowering()
 203           ->supportSwiftError() &&
 204       MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
 205     return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask;
 206   if (CC == CallingConv::SwiftTail)
 207     return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask;
 208   if (CC == CallingConv::PreserveMost)
 209     return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
 210   return CSR_Darwin_AArch64_AAPCS_RegMask;
 211 }
 212
 213 const uint32_t *
 214 AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
 215                                           CallingConv::ID CC) const {
 216   bool SCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
 217   if (CC == CallingConv::GHC)
 218     // This is academic because all GHC calls are (supposed to be) tail calls
 219     return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask;
 220   if (CC == CallingConv::AnyReg)
 221     return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask;
 222
 223   // All the following calling conventions are handled differently on Darwin.
 224   if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
 225     if (SCS)
 226       report_fatal_error("ShadowCallStack attribute not supported on Darwin.");
 227     return getDarwinCallPreservedMask(MF, CC);
 228   }
 229
 230   if (CC == CallingConv::AArch64_VectorCall)
 231     return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
 232   if (CC == CallingConv::AArch64_SVE_VectorCall)
 233     return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
 234                : CSR_AArch64_SVE_AAPCS_RegMask;
 235   if (CC == CallingConv::CFGuard_Check)
 236     return CSR_Win_AArch64_CFGuard_Check_RegMask;
 237   if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
 238           ->supportSwiftError() &&
 239       MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
 240     return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask
 241                : CSR_AArch64_AAPCS_SwiftError_RegMask;
 242   if (CC == CallingConv::SwiftTail) {
 243     if (SCS)
 244       report_fatal_error("ShadowCallStack attribute not supported with swifttail");
 245     return CSR_AArch64_AAPCS_SwiftTail_RegMask;
 246   }
 247   if (CC == CallingConv::PreserveMost)
 248     return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
 249                : CSR_AArch64_RT_MostRegs_RegMask;
 250   else
 251     return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
 252 }
 253
 254 const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
 255     const MachineFunction &MF) const {
 256   if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
 257     return CSR_AArch64_AAPCS_RegMask;
 258
 259   return nullptr;
 260 }
 261
 262 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
 263   if (TT.isOSDarwin())
 264     return CSR_Darwin_AArch64_TLS_RegMask;
 265
 266   assert(TT.isOSBinFormatELF() && "Invalid target");
 267   return CSR_AArch64_TLS_ELF_RegMask;
 268 }
 269
 270 void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
 271                                                  const uint32_t **Mask) const {
 272   uint32_t *UpdatedMask = MF.allocateRegMask();
 273   unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
 274   memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize);
 275
 276   for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
 277     if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
 278       for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
 279                                    this, true);
 280            SubReg.isValid(); ++SubReg) {
 281         // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
 282         // register mask.
 283         UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
 284       }
 285     }
 286   }
 287   *Mask = UpdatedMask;
 288 }
 289
 290 const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
 291   return CSR_AArch64_NoRegs_RegMask;
 292 }
 293
 294 const uint32_t *
 295 AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
 296                                                 CallingConv::ID CC) const {
 297   // This should return a register mask that is the same as that returned by
 298   // getCallPreservedMask but that additionally preserves the register used for
 299   // the first i64 argument (which must also be the register used to return a
 300   // single i64 return value)
 301   //
 302   // In case that the calling convention does not use the same register for
 303   // both, the function should return NULL (does not currently apply)
 304   assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
 305   if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin())
 306     return CSR_Darwin_AArch64_AAPCS_ThisReturn_RegMask;
 307   return CSR_AArch64_AAPCS_ThisReturn_RegMask;
 308 }
 309
 310 const uint32_t *AArch64RegisterInfo::getWindowsStackProbePreservedMask() const {
 311   return CSR_AArch64_StackProbe_Windows_RegMask;
 312 }
 313
 314 BitVector
 315 AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 316   const AArch64FrameLowering *TFI = getFrameLowering(MF);
 317
 318   // FIXME: avoid re-calculating this every time.
 319   BitVector Reserved(getNumRegs());
 320   markSuperRegs(Reserved, AArch64::WSP);
 321   markSuperRegs(Reserved, AArch64::WZR);
 322
 323   if (TFI->hasFP(MF) || TT.isOSDarwin())
 324     markSuperRegs(Reserved, AArch64::W29);
 325
 326   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
 327     if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
 328       markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
 329   }
 330
 331   if (hasBasePointer(MF))
 332     markSuperRegs(Reserved, AArch64::W19);
 333
 334   // SLH uses register W16/X16 as the taint register.
 335   if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
 336     markSuperRegs(Reserved, AArch64::W16);
 337
 338   assert(checkAllSuperRegsMarked(Reserved));
 339   return Reserved;
 340 }
 341
 342 bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
 343                                         MCRegister Reg) const {
 344   return getReservedRegs(MF)[Reg];
 345 }
 346
 347 bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
 348   return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
 349     return isReservedReg(MF, r);
 350   });
 351 }
 352
 353 void AArch64RegisterInfo::emitReservedArgRegCallError(
 354     const MachineFunction &MF) const {
 355   const Function &F = MF.getFunction();
 356   F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
 357     " function calls if any of the argument registers is reserved.")});
 358 }
 359
 360 bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
 361                                           MCRegister PhysReg) const {
 362   return !isReservedReg(MF, PhysReg);
 363 }
 364
 365 bool AArch64RegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
 366   return PhysReg == AArch64::WZR || PhysReg == AArch64::XZR;
 367 }
 368
 369 const TargetRegisterClass *
 370 AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
 371                                       unsigned Kind) const {
 372   return &AArch64::GPR64spRegClass;
 373 }
 374
 375 const TargetRegisterClass *
 376 AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
 377   if (RC == &AArch64::CCRRegClass)
 378     return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV.
 379   return RC;
 380 }
 381
 382 unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
 383
 384 bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 385   const MachineFrameInfo &MFI = MF.getFrameInfo();
 386
 387   // In the presence of variable sized objects or funclets, if the fixed stack
 388   // size is large enough that referencing from the FP won't result in things
 389   // being in range relatively often, we can use a base pointer to allow access
 390   // from the other direction like the SP normally works.
 391   //
 392   // Furthermore, if both variable sized objects are present, and the
 393   // stack needs to be dynamically re-aligned, the base pointer is the only
 394   // reliable way to reference the locals.
 395   if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
 396     if (hasStackRealignment(MF))
 397       return true;
 398
 399     if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
 400       const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 401       // Frames that have variable sized objects and scalable SVE objects,
 402       // should always use a basepointer.
 403       if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
 404         return true;
 405     }
 406
 407     // Conservatively estimate whether the negative offset from the frame
 408     // pointer will be sufficient to reach. If a function has a smallish
 409     // frame, it's less likely to have lots of spills and callee saved
 410     // space, so it's all more likely to be within range of the frame pointer.
 411     // If it's wrong, we'll materialize the constant and still get to the
 412     // object; it's just suboptimal. Negative offsets use the unscaled
 413     // load/store instructions, which have a 9-bit signed immediate.
 414     return MFI.getLocalFrameSize() >= 256;
 415   }
 416
 417   return false;
 418 }
 419
 420 Register
 421 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 422   const AArch64FrameLowering *TFI = getFrameLowering(MF);
 423   return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
 424 }
 425
 426 bool AArch64RegisterInfo::requiresRegisterScavenging(
 427     const MachineFunction &MF) const {
 428   return true;
 429 }
 430
 431 bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
 432     const MachineFunction &MF) const {
 433   return true;
 434 }
 435
 436 bool
 437 AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
 438   // This function indicates whether the emergency spillslot should be placed
 439   // close to the beginning of the stackframe (closer to FP) or the end
 440   // (closer to SP).
 441   //
 442   // The beginning works most reliably if we have a frame pointer.
 443   // In the presence of any non-constant space between FP and locals,
 444   // (e.g. in case of stack realignment or a scalable SVE area), it is
 445   // better to use SP or BP.
 446   const AArch64FrameLowering &TFI = *getFrameLowering(MF);
 447   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 448   assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
 449           AFI->hasCalculatedStackSizeSVE()) &&
 450          "Expected SVE area to be calculated by this point");
 451   return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
 452 }
 453
 454 bool AArch64RegisterInfo::requiresFrameIndexScavenging(
 455     const MachineFunction &MF) const {
 456   return true;
 457 }
 458
 459 bool
 460 AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
 461   const MachineFrameInfo &MFI = MF.getFrameInfo();
 462   if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
 463     return true;
 464   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
 465 }
 466
 467 /// needsFrameBaseReg - Returns true if the instruction's frame index
 468 /// reference would be better served by a base register other than FP
 469 /// or SP. Used by LocalStackFrameAllocation to determine which frame index
 470 /// references it should create new base registers for.
 471 bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
 472                                             int64_t Offset) const {
 473   for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
 474     assert(i < MI->getNumOperands() &&
 475            "Instr doesn't have FrameIndex operand!");
 476
 477   // It's the load/store FI references that cause issues, as it can be difficult
 478   // to materialize the offset if it won't fit in the literal field. Estimate
 479   // based on the size of the local frame and some conservative assumptions
 480   // about the rest of the stack frame (note, this is pre-regalloc, so
 481   // we don't know everything for certain yet) whether this offset is likely
 482   // to be out of range of the immediate. Return true if so.
 483
 484   // We only generate virtual base registers for loads and stores, so
 485   // return false for everything else.
 486   if (!MI->mayLoad() && !MI->mayStore())
 487     return false;
 488
 489   // Without a virtual base register, if the function has variable sized
 490   // objects, all fixed-size local references will be via the frame pointer,
 491   // Approximate the offset and see if it's legal for the instruction.
 492   // Note that the incoming offset is based on the SP value at function entry,
 493   // so it'll be negative.
 494   MachineFunction &MF = *MI->getParent()->getParent();
 495   const AArch64FrameLowering *TFI = getFrameLowering(MF);
 496   MachineFrameInfo &MFI = MF.getFrameInfo();
 497
 498   // Estimate an offset from the frame pointer.
 499   // Conservatively assume all GPR callee-saved registers get pushed.
 500   // FP, LR, X19-X28, D8-D15. 64-bits each.
 501   int64_t FPOffset = Offset - 16 * 20;
 502   // Estimate an offset from the stack pointer.
 503   // The incoming offset is relating to the SP at the start of the function,
 504   // but when we access the local it'll be relative to the SP after local
 505   // allocation, so adjust our SP-relative offset by that allocation size.
 506   Offset += MFI.getLocalFrameSize();
 507   // Assume that we'll have at least some spill slots allocated.
 508   // FIXME: This is a total SWAG number. We should run some statistics
 509   //        and pick a real one.
 510   Offset += 128; // 128 bytes of spill slots
 511
 512   // If there is a frame pointer, try using it.
 513   // The FP is only available if there is no dynamic realignment. We
 514   // don't know for sure yet whether we'll need that, so we guess based
 515   // on whether there are any local variables that would trigger it.
 516   if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
 517     return false;
 518
 519   // If we can reference via the stack pointer or base pointer, try that.
 520   // FIXME: This (and the code that resolves the references) can be improved
 521   //        to only disallow SP relative references in the live range of
 522   //        the VLA(s). In practice, it's unclear how much difference that
 523   //        would make, but it may be worth doing.
 524   if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
 525     return false;
 526
 527   // If even offset 0 is illegal, we don't want a virtual base register.
 528   if (!isFrameOffsetLegal(MI, AArch64::SP, 0))
 529     return false;
 530
 531   // The offset likely isn't legal; we want to allocate a virtual base register.
 532   return true;
 533 }
 534
 535 bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
 536                                              Register BaseReg,
 537                                              int64_t Offset) const {
 538   assert(MI && "Unable to get the legal offset for nil instruction.");
 539   StackOffset SaveOffset = StackOffset::getFixed(Offset);
 540   return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
 541 }
 542
 543 /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
 544 /// at the beginning of the basic block.
 545 Register
 546 AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
 547                                                   int FrameIdx,
 548                                                   int64_t Offset) const {
 549   MachineBasicBlock::iterator Ins = MBB->begin();
 550   DebugLoc DL; // Defaults to "unknown"
 551   if (Ins != MBB->end())
 552     DL = Ins->getDebugLoc();
 553   const MachineFunction &MF = *MBB->getParent();
 554   const AArch64InstrInfo *TII =
 555       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
 556   const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
 557   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
 558   Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
 559   MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
 560   unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
 561
 562   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
 563       .addFrameIndex(FrameIdx)
 564       .addImm(Offset)
 565       .addImm(Shifter);
 566
 567   return BaseReg;
 568 }
 569
 570 void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
 571                                             int64_t Offset) const {
 572   // ARM doesn't need the general 64-bit offsets
 573   StackOffset Off = StackOffset::getFixed(Offset);
 574
 575   unsigned i = 0;
 576   while (!MI.getOperand(i).isFI()) {
 577     ++i;
 578     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
 579   }
 580
 581   const MachineFunction *MF = MI.getParent()->getParent();
 582   const AArch64InstrInfo *TII =
 583       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
 584   bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
 585   assert(Done && "Unable to resolve frame index!");
 586   (void)Done;
 587 }
 588
 589 // Create a scratch register for the frame index elimination in an instruction.
 590 // This function has special handling of stack tagging loop pseudos, in which
 591 // case it can also change the instruction opcode (but not the operands).
 592 static Register
 593 createScratchRegisterForInstruction(MachineInstr &MI,
 594                                     const AArch64InstrInfo *TII) {
 595   // ST*Gloop have a reserved scratch register in operand 1. Use it, and also
 596   // replace the instruction with the writeback variant because it will now
 597   // satisfy the operand constraints for it.
 598   if (MI.getOpcode() == AArch64::STGloop) {
 599     MI.setDesc(TII->get(AArch64::STGloop_wback));
 600     return MI.getOperand(1).getReg();
 601   } else if (MI.getOpcode() == AArch64::STZGloop) {
 602     MI.setDesc(TII->get(AArch64::STZGloop_wback));
 603     return MI.getOperand(1).getReg();
 604   } else {
 605     return MI.getMF()->getRegInfo().createVirtualRegister(
 606         &AArch64::GPR64RegClass);
 607   }
 608 }
 609
 610 void AArch64RegisterInfo::getOffsetOpcodes(
 611     const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
 612   // The smallest scalable element supported by scaled SVE addressing
 613   // modes are predicates, which are 2 scalable bytes in size. So the scalable
 614   // byte offset must always be a multiple of 2.
 615   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
 616
 617   // Add fixed-sized offset using existing DIExpression interface.
 618   DIExpression::appendOffset(Ops, Offset.getFixed());
 619
 620   unsigned VG = getDwarfRegNum(AArch64::VG, true);
 621   int64_t VGSized = Offset.getScalable() / 2;
 622   if (VGSized > 0) {
 623     Ops.push_back(dwarf::DW_OP_constu);
 624     Ops.push_back(VGSized);
 625     Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
 626     Ops.push_back(dwarf::DW_OP_mul);
 627     Ops.push_back(dwarf::DW_OP_plus);
 628   } else if (VGSized < 0) {
 629     Ops.push_back(dwarf::DW_OP_constu);
 630     Ops.push_back(-VGSized);
 631     Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
 632     Ops.push_back(dwarf::DW_OP_mul);
 633     Ops.push_back(dwarf::DW_OP_minus);
 634   }
 635 }
 636
 637 void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 638                                               int SPAdj, unsigned FIOperandNum,
 639                                               RegScavenger *RS) const {
 640   assert(SPAdj == 0 && "Unexpected");
 641
 642   MachineInstr &MI = *II;
 643   MachineBasicBlock &MBB = *MI.getParent();
 644   MachineFunction &MF = *MBB.getParent();
 645   const MachineFrameInfo &MFI = MF.getFrameInfo();
 646   const AArch64InstrInfo *TII =
 647       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
 648   const AArch64FrameLowering *TFI = getFrameLowering(MF);
 649   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 650   bool Tagged =
 651       MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
 652   Register FrameReg;
 653
 654   // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
 655   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
 656       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
 657       MI.getOpcode() == TargetOpcode::STATEPOINT) {
 658     StackOffset Offset =
 659         TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
 660                                         /*PreferFP=*/true,
 661                                         /*ForSimm=*/false);
 662     Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
 663     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
 664     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
 665     return;
 666   }
 667
 668   if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
 669     MachineOperand &FI = MI.getOperand(FIOperandNum);
 670     StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
 671     assert(!Offset.getScalable() &&
 672            "Frame offsets with a scalable component are not supported");
 673     FI.ChangeToImmediate(Offset.getFixed());
 674     return;
 675   }
 676
 677   StackOffset Offset;
 678   if (MI.getOpcode() == AArch64::TAGPstack) {
 679     // TAGPstack must use the virtual frame register in its 3rd operand.
 680     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 681     FrameReg = MI.getOperand(3).getReg();
 682     Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
 683                                       AFI->getTaggedBasePointerOffset());
 684   } else if (Tagged) {
 685     StackOffset SPOffset = StackOffset::getFixed(
 686         MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
 687     if (MFI.hasVarSizedObjects() ||
 688         isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
 689             (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
 690       // Can't update to SP + offset in place. Precalculate the tagged pointer
 691       // in a scratch register.
 692       Offset = TFI->resolveFrameIndexReference(
 693           MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
 694       Register ScratchReg =
 695           MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
 696       emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
 697                       TII);
 698       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
 699           .addReg(ScratchReg)
 700           .addReg(ScratchReg)
 701           .addImm(0);
 702       MI.getOperand(FIOperandNum)
 703           .ChangeToRegister(ScratchReg, false, false, true);
 704       return;
 705     }
 706     FrameReg = AArch64::SP;
 707     Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
 708                                    (int64_t)MFI.getStackSize());
 709   } else {
 710     Offset = TFI->resolveFrameIndexReference(
 711         MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
 712   }
 713
 714   // Modify MI as necessary to handle as much of 'Offset' as possible
 715   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
 716     return;
 717
 718   assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
 719          "Emergency spill slot is out of reach");
 720
 721   // If we get here, the immediate doesn't fit into the instruction.  We folded
 722   // as much as possible above.  Handle the rest, providing a register that is
 723   // SP+LargeImm.
 724   Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
 725   emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
 726   MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
 727 }
 728
 729 unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 730                                                   MachineFunction &MF) const {
 731   const AArch64FrameLowering *TFI = getFrameLowering(MF);
 732
 733   switch (RC->getID()) {
 734   default:
 735     return 0;
 736   case AArch64::GPR32RegClassID:
 737   case AArch64::GPR32spRegClassID:
 738   case AArch64::GPR32allRegClassID:
 739   case AArch64::GPR64spRegClassID:
 740   case AArch64::GPR64allRegClassID:
 741   case AArch64::GPR64RegClassID:
 742   case AArch64::GPR32commonRegClassID:
 743   case AArch64::GPR64commonRegClassID:
 744     return 32 - 1                                   // XZR/SP
 745               - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
 746               - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
 747               - hasBasePointer(MF);  // X19
 748   case AArch64::FPR8RegClassID:
 749   case AArch64::FPR16RegClassID:
 750   case AArch64::FPR32RegClassID:
 751   case AArch64::FPR64RegClassID:
 752   case AArch64::FPR128RegClassID:
 753     return 32;
 754
 755   case AArch64::MatrixIndexGPR32_12_15RegClassID:
 756     return 4;
 757
 758   case AArch64::DDRegClassID:
 759   case AArch64::DDDRegClassID:
 760   case AArch64::DDDDRegClassID:
 761   case AArch64::QQRegClassID:
 762   case AArch64::QQQRegClassID:
 763   case AArch64::QQQQRegClassID:
 764     return 32;
 765
 766   case AArch64::FPR128_loRegClassID:
 767   case AArch64::FPR64_loRegClassID:
 768   case AArch64::FPR16_loRegClassID:
 769     return 16;
 770   }
 771 }
 772
 773 unsigned AArch64RegisterInfo::getLocalAddressRegister(
 774   const MachineFunction &MF) const {
 775   const auto &MFI = MF.getFrameInfo();
 776   if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
 777     return AArch64::SP;
 778   else if (hasStackRealignment(MF))
 779     return getBaseRegister();
 780   return getFrameRegister(MF);
 781 }
 782
 783 /// SrcRC and DstRC will be morphed into NewRC if this returns true
 784 bool AArch64RegisterInfo::shouldCoalesce(
 785     MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
 786     const TargetRegisterClass *DstRC, unsigned DstSubReg,
 787     const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
 788   if (MI->isCopy() &&
 789       ((DstRC->getID() == AArch64::GPR64RegClassID) ||
 790        (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
 791       MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
 792     // Do not coalesce in the case of a 32-bit subregister copy
 793     // which implements a 32 to 64 bit zero extension
 794     // which relies on the upper 32 bits being zeroed.
 795     return false;
 796   return true;
 797 }