llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "SIMachineFunctionInfo.h"
  10 #include "AMDGPUTargetMachine.h"
  11 #include "AMDGPUSubtarget.h"
  12 #include "SIRegisterInfo.h"
  13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  14 #include "Utils/AMDGPUBaseInfo.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/CodeGen/LiveIntervals.h"
  17 #include "llvm/CodeGen/MachineBasicBlock.h"
  18 #include "llvm/CodeGen/MachineFrameInfo.h"
  19 #include "llvm/CodeGen/MachineFunction.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/CodeGen/MIRParser/MIParser.h"
  22 #include "llvm/IR/CallingConv.h"
  23 #include "llvm/IR/DiagnosticInfo.h"
  24 #include "llvm/IR/Function.h"
  25 #include <cassert>
  26 #include <vector>
  27
  28 #define MAX_LANES 64
  29
  30 using namespace llvm;
  31
  32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  33   : AMDGPUMachineFunction(MF),
  34     PrivateSegmentBuffer(false),
  35     DispatchPtr(false),
  36     QueuePtr(false),
  37     KernargSegmentPtr(false),
  38     DispatchID(false),
  39     FlatScratchInit(false),
  40     WorkGroupIDX(false),
  41     WorkGroupIDY(false),
  42     WorkGroupIDZ(false),
  43     WorkGroupInfo(false),
  44     PrivateSegmentWaveByteOffset(false),
  45     WorkItemIDX(false),
  46     WorkItemIDY(false),
  47     WorkItemIDZ(false),
  48     ImplicitBufferPtr(false),
  49     ImplicitArgPtr(false),
  50     GITPtrHigh(0xffffffff),
  51     HighBitsOf32BitAddress(0),
  52     GDSSize(0) {
  53   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  54   const Function &F = MF.getFunction();
  55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  56   WavesPerEU = ST.getWavesPerEU(F);
  57
  58   Occupancy = ST.computeOccupancy(F, getLDSSize());
  59   CallingConv::ID CC = F.getCallingConv();
  60
  61   // FIXME: Should have analysis or something rather than attribute to detect
  62   // calls.
  63   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
  64
  65   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
  66                         CC == CallingConv::SPIR_KERNEL;
  67
  68   if (IsKernel) {
  69     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
  70       KernargSegmentPtr = true;
  71     WorkGroupIDX = true;
  72     WorkItemIDX = true;
  73   } else if (CC == CallingConv::AMDGPU_PS) {
  74     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  75   }
  76
  77   MayNeedAGPRs = ST.hasMAIInsts();
  78
  79   if (!isEntryFunction()) {
  80     if (CC != CallingConv::AMDGPU_Gfx)
  81       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
  82
  83     // TODO: Pick a high register, and shift down, similar to a kernel.
  84     FrameOffsetReg = AMDGPU::SGPR33;
  85     StackPtrOffsetReg = AMDGPU::SGPR32;
  86
  87     if (!ST.enableFlatScratch()) {
  88       // Non-entry functions have no special inputs for now, other registers
  89       // required for scratch access.
  90       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  91
  92       ArgInfo.PrivateSegmentBuffer =
  93         ArgDescriptor::createRegister(ScratchRSrcReg);
  94     }
  95
  96     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
  97       ImplicitArgPtr = true;
  98   } else {
  99     ImplicitArgPtr = false;
 100     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
 101                                MaxKernArgAlign);
 102
 103     if (ST.hasGFX90AInsts() &&
 104         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
 105         !mayUseAGPRs(MF))
 106       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
 107   }
 108
 109   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
 110   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
 111     PrivateSegmentBuffer = true;
 112   else if (ST.isMesaGfxShader(F))
 113     ImplicitBufferPtr = true;
 114
 115   if (!AMDGPU::isGraphics(CC)) {
 116     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
 117       WorkGroupIDX = true;
 118
 119     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
 120       WorkGroupIDY = true;
 121
 122     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
 123       WorkGroupIDZ = true;
 124
 125     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
 126       WorkItemIDX = true;
 127
 128     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
 129         ST.getMaxWorkitemID(F, 1) != 0)
 130       WorkItemIDY = true;
 131
 132     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
 133         ST.getMaxWorkitemID(F, 2) != 0)
 134       WorkItemIDZ = true;
 135
 136     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
 137       DispatchPtr = true;
 138
 139     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
 140       QueuePtr = true;
 141
 142     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
 143       DispatchID = true;
 144   }
 145
 146   // FIXME: This attribute is a hack, we just need an analysis on the function
 147   // to look for allocas.
 148   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
 149
 150   // TODO: This could be refined a lot. The attribute is a poor way of
 151   // detecting calls or stack objects that may require it before argument
 152   // lowering.
 153   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
 154       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
 155       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
 156       !ST.flatScratchIsArchitected()) {
 157     FlatScratchInit = true;
 158   }
 159
 160   if (isEntryFunction()) {
 161     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 162     // enabled if Z is.
 163     if (WorkItemIDZ)
 164       WorkItemIDY = true;
 165
 166     if (!ST.flatScratchIsArchitected()) {
 167       PrivateSegmentWaveByteOffset = true;
 168
 169       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 170       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 171           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 172         ArgInfo.PrivateSegmentWaveByteOffset =
 173             ArgDescriptor::createRegister(AMDGPU::SGPR5);
 174     }
 175   }
 176
 177   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 178   StringRef S = A.getValueAsString();
 179   if (!S.empty())
 180     S.consumeInteger(0, GITPtrHigh);
 181
 182   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
 183   S = A.getValueAsString();
 184   if (!S.empty())
 185     S.consumeInteger(0, HighBitsOf32BitAddress);
 186
 187   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
 188   if (!S.empty())
 189     S.consumeInteger(0, GDSSize);
 190 }
 191
 192 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
 193   limitOccupancy(getMaxWavesPerEU());
 194   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
 195   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
 196                  MF.getFunction()));
 197 }
 198
 199 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
 200   const SIRegisterInfo &TRI) {
 201   ArgInfo.PrivateSegmentBuffer =
 202     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 203     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
 204   NumUserSGPRs += 4;
 205   return ArgInfo.PrivateSegmentBuffer.getRegister();
 206 }
 207
 208 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 209   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 210     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 211   NumUserSGPRs += 2;
 212   return ArgInfo.DispatchPtr.getRegister();
 213 }
 214
 215 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 216   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 217     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 218   NumUserSGPRs += 2;
 219   return ArgInfo.QueuePtr.getRegister();
 220 }
 221
 222 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 223   ArgInfo.KernargSegmentPtr
 224     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 225     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 226   NumUserSGPRs += 2;
 227   return ArgInfo.KernargSegmentPtr.getRegister();
 228 }
 229
 230 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 231   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 232     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 233   NumUserSGPRs += 2;
 234   return ArgInfo.DispatchID.getRegister();
 235 }
 236
 237 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 238   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 239     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 240   NumUserSGPRs += 2;
 241   return ArgInfo.FlatScratchInit.getRegister();
 242 }
 243
 244 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 245   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 246     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 247   NumUserSGPRs += 2;
 248   return ArgInfo.ImplicitBufferPtr.getRegister();
 249 }
 250
 251 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
 252                                              MCPhysReg Reg) {
 253   for (unsigned I = 0; CSRegs[I]; ++I) {
 254     if (CSRegs[I] == Reg)
 255       return true;
 256   }
 257
 258   return false;
 259 }
 260
 261 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
 262 /// SGPR spilling.
 263 //
 264 // FIXME: This only works after processFunctionBeforeFrameFinalized
 265 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
 266                                                       unsigned NumNeed) const {
 267   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 268   unsigned WaveSize = ST.getWavefrontSize();
 269   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
 270 }
 271
 272 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 273 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 274                                                     int FI) {
 275   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 276
 277   // This has already been allocated.
 278   if (!SpillLanes.empty())
 279     return true;
 280
 281   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 282   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 283   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 284   MachineRegisterInfo &MRI = MF.getRegInfo();
 285   unsigned WaveSize = ST.getWavefrontSize();
 286
 287   unsigned Size = FrameInfo.getObjectSize(FI);
 288   unsigned NumLanes = Size / 4;
 289
 290   if (NumLanes > WaveSize)
 291     return false;
 292
 293   assert(Size >= 4 && "invalid sgpr spill size");
 294   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 295
 296   // Make sure to handle the case where a wide SGPR spill may span between two
 297   // VGPRs.
 298   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 299     Register LaneVGPR;
 300     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 301
 302     if (VGPRIndex == 0) {
 303       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 304       if (LaneVGPR == AMDGPU::NoRegister) {
 305         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 306         // partially spill the SGPR to VGPRs.
 307         SGPRToVGPRSpills.erase(FI);
 308         NumVGPRSpillLanes -= I;
 309
 310         // FIXME: We can run out of free registers with split allocation if
 311         // IPRA is enabled and a called function already uses every VGPR.
 312 #if 0
 313         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
 314                                                   "VGPRs for SGPR spilling",
 315                                                   0, DS_Error);
 316         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
 317 #endif
 318         return false;
 319       }
 320
 321       Optional<int> SpillFI;
 322       // We need to preserve inactive lanes, so always save, even caller-save
 323       // registers.
 324       if (!isEntryFunction()) {
 325         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
 326       }
 327
 328       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
 329
 330       // Add this register as live-in to all blocks to avoid machine verifer
 331       // complaining about use of an undefined physical register.
 332       for (MachineBasicBlock &BB : MF)
 333         BB.addLiveIn(LaneVGPR);
 334     } else {
 335       LaneVGPR = SpillVGPRs.back().VGPR;
 336     }
 337
 338     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 339   }
 340
 341   return true;
 342 }
 343
 344 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
 345 /// Either AGPR is spilled to VGPR to vice versa.
 346 /// Returns true if a \p FI can be eliminated completely.
 347 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
 348                                                     int FI,
 349                                                     bool isAGPRtoVGPR) {
 350   MachineRegisterInfo &MRI = MF.getRegInfo();
 351   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 352   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
 353
 354   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
 355
 356   auto &Spill = VGPRToAGPRSpills[FI];
 357
 358   // This has already been allocated.
 359   if (!Spill.Lanes.empty())
 360     return Spill.FullyAllocated;
 361
 362   unsigned Size = FrameInfo.getObjectSize(FI);
 363   unsigned NumLanes = Size / 4;
 364   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
 365
 366   const TargetRegisterClass &RC =
 367       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
 368   auto Regs = RC.getRegisters();
 369
 370   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
 371   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 372   Spill.FullyAllocated = true;
 373
 374   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
 375   // once.
 376   BitVector OtherUsedRegs;
 377   OtherUsedRegs.resize(TRI->getNumRegs());
 378
 379   const uint32_t *CSRMask =
 380       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
 381   if (CSRMask)
 382     OtherUsedRegs.setBitsInMask(CSRMask);
 383
 384   // TODO: Should include register tuples, but doesn't matter with current
 385   // usage.
 386   for (MCPhysReg Reg : SpillAGPR)
 387     OtherUsedRegs.set(Reg);
 388   for (MCPhysReg Reg : SpillVGPR)
 389     OtherUsedRegs.set(Reg);
 390
 391   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
 392   for (int I = NumLanes - 1; I >= 0; --I) {
 393     NextSpillReg = std::find_if(
 394         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
 395           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
 396                  !OtherUsedRegs[Reg];
 397         });
 398
 399     if (NextSpillReg == Regs.end()) { // Registers exhausted
 400       Spill.FullyAllocated = false;
 401       break;
 402     }
 403
 404     OtherUsedRegs.set(*NextSpillReg);
 405     SpillRegs.push_back(*NextSpillReg);
 406     Spill.Lanes[I] = *NextSpillReg++;
 407   }
 408
 409   return Spill.FullyAllocated;
 410 }
 411
 412 bool SIMachineFunctionInfo::removeDeadFrameIndices(
 413     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
 414   // Remove dead frame indices from function frame, however keep FP & BP since
 415   // spills for them haven't been inserted yet. And also make sure to remove the
 416   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
 417   // result in an unexpected side effect and bug, in case of any re-mapping of
 418   // freed frame indices by later pass(es) like "stack slot coloring".
 419   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
 420     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
 421       MFI.RemoveStackObject(R.first);
 422       SGPRToVGPRSpills.erase(R.first);
 423     }
 424   }
 425
 426   bool HaveSGPRToMemory = false;
 427
 428   if (ResetSGPRSpillStackIDs) {
 429     // All other SPGRs must be allocated on the default stack, so reset the
 430     // stack ID.
 431     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
 432          ++i) {
 433       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
 434         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
 435           MFI.setStackID(i, TargetStackID::Default);
 436           HaveSGPRToMemory = true;
 437         }
 438       }
 439     }
 440   }
 441
 442   for (auto &R : VGPRToAGPRSpills) {
 443     if (R.second.IsDead)
 444       MFI.RemoveStackObject(R.first);
 445   }
 446
 447   return HaveSGPRToMemory;
 448 }
 449
 450 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
 451                                          const SIRegisterInfo &TRI) {
 452   if (ScavengeFI)
 453     return *ScavengeFI;
 454   if (isEntryFunction()) {
 455     ScavengeFI = MFI.CreateFixedObject(
 456         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
 457   } else {
 458     ScavengeFI = MFI.CreateStackObject(
 459         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
 460         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
 461   }
 462   return *ScavengeFI;
 463 }
 464
 465 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
 466   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 467   return AMDGPU::SGPR0 + NumUserSGPRs;
 468 }
 469
 470 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
 471   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 472 }
 473
 474 Register
 475 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
 476   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 477   if (!ST.isAmdPalOS())
 478     return Register();
 479   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
 480   if (ST.hasMergedShaders()) {
 481     switch (MF.getFunction().getCallingConv()) {
 482     case CallingConv::AMDGPU_HS:
 483     case CallingConv::AMDGPU_GS:
 484       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
 485       // ES+GS merged shader on gfx9+.
 486       GitPtrLo = AMDGPU::SGPR8;
 487       return GitPtrLo;
 488     default:
 489       return GitPtrLo;
 490     }
 491   }
 492   return GitPtrLo;
 493 }
 494
 495 static yaml::StringValue regToString(Register Reg,
 496                                      const TargetRegisterInfo &TRI) {
 497   yaml::StringValue Dest;
 498   {
 499     raw_string_ostream OS(Dest.Value);
 500     OS << printReg(Reg, &TRI);
 501   }
 502   return Dest;
 503 }
 504
 505 static Optional<yaml::SIArgumentInfo>
 506 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
 507                     const TargetRegisterInfo &TRI) {
 508   yaml::SIArgumentInfo AI;
 509
 510   auto convertArg = [&](Optional<yaml::SIArgument> &A,
 511                         const ArgDescriptor &Arg) {
 512     if (!Arg)
 513       return false;
 514
 515     // Create a register or stack argument.
 516     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
 517     if (Arg.isRegister()) {
 518       raw_string_ostream OS(SA.RegisterName.Value);
 519       OS << printReg(Arg.getRegister(), &TRI);
 520     } else
 521       SA.StackOffset = Arg.getStackOffset();
 522     // Check and update the optional mask.
 523     if (Arg.isMasked())
 524       SA.Mask = Arg.getMask();
 525
 526     A = SA;
 527     return true;
 528   };
 529
 530   bool Any = false;
 531   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
 532   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
 533   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
 534   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
 535   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
 536   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
 537   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
 538   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
 539   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
 540   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
 541   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
 542   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
 543                     ArgInfo.PrivateSegmentWaveByteOffset);
 544   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
 545   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
 546   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
 547   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
 548   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
 549
 550   if (Any)
 551     return AI;
 552
 553   return None;
 554 }
 555
 556 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
 557     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
 558     const llvm::MachineFunction &MF)
 559     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
 560       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
 561       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
 562       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
 563       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
 564       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
 565       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
 566       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
 567       Occupancy(MFI.getOccupancy()),
 568       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
 569       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
 570       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
 571       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
 572   auto SFI = MFI.getOptionalScavengeFI();
 573   if (SFI)
 574     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
 575 }
 576
 577 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
 578   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
 579 }
 580
 581 bool SIMachineFunctionInfo::initializeBaseYamlFields(
 582     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
 583     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
 584   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
 585   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
 586   LDSSize = YamlMFI.LDSSize;
 587   DynLDSAlign = YamlMFI.DynLDSAlign;
 588   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
 589   Occupancy = YamlMFI.Occupancy;
 590   IsEntryFunction = YamlMFI.IsEntryFunction;
 591   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
 592   MemoryBound = YamlMFI.MemoryBound;
 593   WaveLimiter = YamlMFI.WaveLimiter;
 594   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
 595   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
 596
 597   if (YamlMFI.ScavengeFI) {
 598     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
 599     if (!FIOrErr) {
 600       // Create a diagnostic for a the frame index.
 601       const MemoryBuffer &Buffer =
 602           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
 603
 604       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
 605                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
 606                            "", None, None);
 607       SourceRange = YamlMFI.ScavengeFI->SourceRange;
 608       return true;
 609     }
 610     ScavengeFI = *FIOrErr;
 611   } else {
 612     ScavengeFI = None;
 613   }
 614   return false;
 615 }
 616
 617 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
 618   for (const BasicBlock &BB : MF.getFunction()) {
 619     for (const Instruction &I : BB) {
 620       const auto *CB = dyn_cast<CallBase>(&I);
 621       if (!CB)
 622         continue;
 623
 624       if (CB->isInlineAsm()) {
 625         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
 626         for (const auto &CI : IA->ParseConstraints()) {
 627           for (StringRef Code : CI.Codes) {
 628             Code.consume_front("{");
 629             if (Code.startswith("a"))
 630               return true;
 631           }
 632         }
 633         continue;
 634       }
 635
 636       const Function *Callee =
 637           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
 638       if (!Callee)
 639         return true;
 640
 641       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
 642         return true;
 643     }
 644   }
 645
 646   return false;
 647 }
 648
 649 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
 650   if (UsesAGPRs)
 651     return *UsesAGPRs;
 652
 653   if (!mayNeedAGPRs()) {
 654     UsesAGPRs = false;
 655     return false;
 656   }
 657
 658   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
 659       MF.getFrameInfo().hasCalls()) {
 660     UsesAGPRs = true;
 661     return true;
 662   }
 663
 664   const MachineRegisterInfo &MRI = MF.getRegInfo();
 665
 666   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
 667     const Register Reg = Register::index2VirtReg(I);
 668     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
 669     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
 670       UsesAGPRs = true;
 671       return true;
 672     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
 673       // Defer caching UsesAGPRs, function might not yet been regbank selected.
 674       return true;
 675     }
 676   }
 677
 678   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
 679     if (MRI.isPhysRegUsed(Reg)) {
 680       UsesAGPRs = true;
 681       return true;
 682     }
 683   }
 684
 685   UsesAGPRs = false;
 686   return false;
 687 }