lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "SIMachineFunctionInfo.h"
  10 #include "AMDGPUArgumentUsageInfo.h"
  11 #include "AMDGPUSubtarget.h"
  12 #include "SIRegisterInfo.h"
  13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  14 #include "Utils/AMDGPUBaseInfo.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/CodeGen/MachineBasicBlock.h"
  17 #include "llvm/CodeGen/MachineFrameInfo.h"
  18 #include "llvm/CodeGen/MachineFunction.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/CallingConv.h"
  21 #include "llvm/IR/Function.h"
  22 #include <cassert>
  23 #include <vector>
  24
  25 #define MAX_LANES 64
  26
  27 using namespace llvm;
  28
  29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  30   : AMDGPUMachineFunction(MF),
  31     PrivateSegmentBuffer(false),
  32     DispatchPtr(false),
  33     QueuePtr(false),
  34     KernargSegmentPtr(false),
  35     DispatchID(false),
  36     FlatScratchInit(false),
  37     WorkGroupIDX(false),
  38     WorkGroupIDY(false),
  39     WorkGroupIDZ(false),
  40     WorkGroupInfo(false),
  41     PrivateSegmentWaveByteOffset(false),
  42     WorkItemIDX(false),
  43     WorkItemIDY(false),
  44     WorkItemIDZ(false),
  45     ImplicitBufferPtr(false),
  46     ImplicitArgPtr(false),
  47     GITPtrHigh(0xffffffff),
  48     HighBitsOf32BitAddress(0) {
  49   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  50   const Function &F = MF.getFunction();
  51   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  52   WavesPerEU = ST.getWavesPerEU(F);
  53
  54   Occupancy = getMaxWavesPerEU();
  55   limitOccupancy(MF);
  56   CallingConv::ID CC = F.getCallingConv();
  57
  58   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
  59     if (!F.arg_empty())
  60       KernargSegmentPtr = true;
  61     WorkGroupIDX = true;
  62     WorkItemIDX = true;
  63   } else if (CC == CallingConv::AMDGPU_PS) {
  64     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  65   }
  66
  67   if (!isEntryFunction()) {
  68     // Non-entry functions have no special inputs for now, other registers
  69     // required for scratch access.
  70     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  71     ScratchWaveOffsetReg = AMDGPU::SGPR4;
  72     FrameOffsetReg = AMDGPU::SGPR5;
  73     StackPtrOffsetReg = AMDGPU::SGPR32;
  74
  75     ArgInfo.PrivateSegmentBuffer =
  76       ArgDescriptor::createRegister(ScratchRSrcReg);
  77     ArgInfo.PrivateSegmentWaveByteOffset =
  78       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
  79
  80     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  81       ImplicitArgPtr = true;
  82   } else {
  83     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
  84       KernargSegmentPtr = true;
  85       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
  86                                  MaxKernArgAlign);
  87     }
  88   }
  89
  90   if (ST.debuggerEmitPrologue()) {
  91     // Enable everything.
  92     WorkGroupIDX = true;
  93     WorkGroupIDY = true;
  94     WorkGroupIDZ = true;
  95     WorkItemIDX = true;
  96     WorkItemIDY = true;
  97     WorkItemIDZ = true;
  98   } else {
  99     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
 100       WorkGroupIDX = true;
 101
 102     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
 103       WorkGroupIDY = true;
 104
 105     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
 106       WorkGroupIDZ = true;
 107
 108     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
 109       WorkItemIDX = true;
 110
 111     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
 112       WorkItemIDY = true;
 113
 114     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
 115       WorkItemIDZ = true;
 116   }
 117
 118   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 119   bool HasStackObjects = FrameInfo.hasStackObjects();
 120
 121   if (isEntryFunction()) {
 122     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 123     // enabled if Z is.
 124     if (WorkItemIDZ)
 125       WorkItemIDY = true;
 126
 127     PrivateSegmentWaveByteOffset = true;
 128
 129     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 130     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 131         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 132       ArgInfo.PrivateSegmentWaveByteOffset =
 133           ArgDescriptor::createRegister(AMDGPU::SGPR5);
 134   }
 135
 136   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
 137   if (isAmdHsaOrMesa) {
 138     PrivateSegmentBuffer = true;
 139
 140     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
 141       DispatchPtr = true;
 142
 143     if (F.hasFnAttribute("amdgpu-queue-ptr"))
 144       QueuePtr = true;
 145
 146     if (F.hasFnAttribute("amdgpu-dispatch-id"))
 147       DispatchID = true;
 148   } else if (ST.isMesaGfxShader(F)) {
 149     ImplicitBufferPtr = true;
 150   }
 151
 152   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
 153     KernargSegmentPtr = true;
 154
 155   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
 156     // TODO: This could be refined a lot. The attribute is a poor way of
 157     // detecting calls that may require it before argument lowering.
 158     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
 159       FlatScratchInit = true;
 160   }
 161
 162   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 163   StringRef S = A.getValueAsString();
 164   if (!S.empty())
 165     S.consumeInteger(0, GITPtrHigh);
 166
 167   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
 168   S = A.getValueAsString();
 169   if (!S.empty())
 170     S.consumeInteger(0, HighBitsOf32BitAddress);
 171 }
 172
 173 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
 174   limitOccupancy(getMaxWavesPerEU());
 175   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
 176   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
 177                  MF.getFunction()));
 178 }
 179
 180 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
 181   const SIRegisterInfo &TRI) {
 182   ArgInfo.PrivateSegmentBuffer =
 183     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 184     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
 185   NumUserSGPRs += 4;
 186   return ArgInfo.PrivateSegmentBuffer.getRegister();
 187 }
 188
 189 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 190   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 191     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 192   NumUserSGPRs += 2;
 193   return ArgInfo.DispatchPtr.getRegister();
 194 }
 195
 196 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 197   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 198     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 199   NumUserSGPRs += 2;
 200   return ArgInfo.QueuePtr.getRegister();
 201 }
 202
 203 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 204   ArgInfo.KernargSegmentPtr
 205     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 206     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 207   NumUserSGPRs += 2;
 208   return ArgInfo.KernargSegmentPtr.getRegister();
 209 }
 210
 211 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 212   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 213     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 214   NumUserSGPRs += 2;
 215   return ArgInfo.DispatchID.getRegister();
 216 }
 217
 218 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 219   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 220     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 221   NumUserSGPRs += 2;
 222   return ArgInfo.FlatScratchInit.getRegister();
 223 }
 224
 225 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 226   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 227     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 228   NumUserSGPRs += 2;
 229   return ArgInfo.ImplicitBufferPtr.getRegister();
 230 }
 231
 232 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
 233   for (unsigned I = 0; CSRegs[I]; ++I) {
 234     if (CSRegs[I] == Reg)
 235       return true;
 236   }
 237
 238   return false;
 239 }
 240
 241 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 242 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 243                                                     int FI) {
 244   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 245
 246   // This has already been allocated.
 247   if (!SpillLanes.empty())
 248     return true;
 249
 250   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 251   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 252   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 253   MachineRegisterInfo &MRI = MF.getRegInfo();
 254   unsigned WaveSize = ST.getWavefrontSize();
 255
 256   unsigned Size = FrameInfo.getObjectSize(FI);
 257   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
 258   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 259
 260   int NumLanes = Size / 4;
 261
 262   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 263
 264   // Make sure to handle the case where a wide SGPR spill may span between two
 265   // VGPRs.
 266   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 267     unsigned LaneVGPR;
 268     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 269
 270     if (VGPRIndex == 0) {
 271       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 272       if (LaneVGPR == AMDGPU::NoRegister) {
 273         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 274         // partially spill the SGPR to VGPRs.
 275         SGPRToVGPRSpills.erase(FI);
 276         NumVGPRSpillLanes -= I;
 277         return false;
 278       }
 279
 280       Optional<int> CSRSpillFI;
 281       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
 282           isCalleeSavedReg(CSRegs, LaneVGPR)) {
 283         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
 284       }
 285
 286       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
 287
 288       // Add this register as live-in to all blocks to avoid machine verifer
 289       // complaining about use of an undefined physical register.
 290       for (MachineBasicBlock &BB : MF)
 291         BB.addLiveIn(LaneVGPR);
 292     } else {
 293       LaneVGPR = SpillVGPRs.back().VGPR;
 294     }
 295
 296     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 297   }
 298
 299   return true;
 300 }
 301
 302 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
 303   for (auto &R : SGPRToVGPRSpills)
 304     MFI.RemoveStackObject(R.first);
 305 }
 306
 307
 308 /// \returns VGPR used for \p Dim' work item ID.
 309 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
 310   switch (Dim) {
 311   case 0:
 312     assert(hasWorkItemIDX());
 313     return AMDGPU::VGPR0;
 314   case 1:
 315     assert(hasWorkItemIDY());
 316     return AMDGPU::VGPR1;
 317   case 2:
 318     assert(hasWorkItemIDZ());
 319     return AMDGPU::VGPR2;
 320   }
 321   llvm_unreachable("unexpected dimension");
 322 }
 323
 324 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
 325   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 326   return AMDGPU::SGPR0 + NumUserSGPRs;
 327 }
 328
 329 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
 330   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 331 }