llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15
  16 #include "AMDGPUArgumentUsageInfo.h"
  17 #include "AMDGPUMachineFunction.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIInstrInfo.h"
  20 #include "llvm/ADT/MapVector.h"
  21 #include "llvm/CodeGen/MIRYamlMapping.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/Support/raw_ostream.h"
  24
  25 namespace llvm {
  26
  27 class MachineFrameInfo;
  28 class MachineFunction;
  29 class SIMachineFunctionInfo;
  30 class SIRegisterInfo;
  31 class TargetRegisterClass;
  32
  33 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
  34 public:
  35   enum AMDGPUPSVKind : unsigned {
  36     PSVBuffer = PseudoSourceValue::TargetCustom,
  37     PSVImage,
  38     GWSResource
  39   };
  40
  41 protected:
  42   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
  43       : PseudoSourceValue(Kind, TII) {}
  44
  45 public:
  46   bool isConstant(const MachineFrameInfo *) const override {
  47     // This should probably be true for most images, but we will start by being
  48     // conservative.
  49     return false;
  50   }
  51
  52   bool isAliased(const MachineFrameInfo *) const override {
  53     return true;
  54   }
  55
  56   bool mayAlias(const MachineFrameInfo *) const override {
  57     return true;
  58   }
  59 };
  60
  61 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
  62 public:
  63   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
  64       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
  65
  66   static bool classof(const PseudoSourceValue *V) {
  67     return V->kind() == PSVBuffer;
  68   }
  69
  70   void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
  71 };
  72
  73 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  74 public:
  75   // TODO: Is the img rsrc useful?
  76   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
  77       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
  78
  79   static bool classof(const PseudoSourceValue *V) {
  80     return V->kind() == PSVImage;
  81   }
  82
  83   void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
  84 };
  85
  86 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  87 public:
  88   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
  89       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
  90
  91   static bool classof(const PseudoSourceValue *V) {
  92     return V->kind() == GWSResource;
  93   }
  94
  95   // These are inaccessible memory from IR.
  96   bool isAliased(const MachineFrameInfo *) const override {
  97     return false;
  98   }
  99
 100   // These are inaccessible memory from IR.
 101   bool mayAlias(const MachineFrameInfo *) const override {
 102     return false;
 103   }
 104
 105   void printCustom(raw_ostream &OS) const override {
 106     OS << "GWSResource";
 107   }
 108 };
 109
 110 namespace yaml {
 111
 112 struct SIArgument {
 113   bool IsRegister;
 114   union {
 115     StringValue RegisterName;
 116     unsigned StackOffset;
 117   };
 118   Optional<unsigned> Mask;
 119
 120   // Default constructor, which creates a stack argument.
 121   SIArgument() : IsRegister(false), StackOffset(0) {}
 122   SIArgument(const SIArgument &Other) {
 123     IsRegister = Other.IsRegister;
 124     if (IsRegister) {
 125       ::new ((void *)std::addressof(RegisterName))
 126           StringValue(Other.RegisterName);
 127     } else
 128       StackOffset = Other.StackOffset;
 129     Mask = Other.Mask;
 130   }
 131   SIArgument &operator=(const SIArgument &Other) {
 132     IsRegister = Other.IsRegister;
 133     if (IsRegister) {
 134       ::new ((void *)std::addressof(RegisterName))
 135           StringValue(Other.RegisterName);
 136     } else
 137       StackOffset = Other.StackOffset;
 138     Mask = Other.Mask;
 139     return *this;
 140   }
 141   ~SIArgument() {
 142     if (IsRegister)
 143       RegisterName.~StringValue();
 144   }
 145
 146   // Helper to create a register or stack argument.
 147   static inline SIArgument createArgument(bool IsReg) {
 148     if (IsReg)
 149       return SIArgument(IsReg);
 150     return SIArgument();
 151   }
 152
 153 private:
 154   // Construct a register argument.
 155   SIArgument(bool) : IsRegister(true), RegisterName() {}
 156 };
 157
 158 template <> struct MappingTraits<SIArgument> {
 159   static void mapping(IO &YamlIO, SIArgument &A) {
 160     if (YamlIO.outputting()) {
 161       if (A.IsRegister)
 162         YamlIO.mapRequired("reg", A.RegisterName);
 163       else
 164         YamlIO.mapRequired("offset", A.StackOffset);
 165     } else {
 166       auto Keys = YamlIO.keys();
 167       if (is_contained(Keys, "reg")) {
 168         A = SIArgument::createArgument(true);
 169         YamlIO.mapRequired("reg", A.RegisterName);
 170       } else if (is_contained(Keys, "offset"))
 171         YamlIO.mapRequired("offset", A.StackOffset);
 172       else
 173         YamlIO.setError("missing required key 'reg' or 'offset'");
 174     }
 175     YamlIO.mapOptional("mask", A.Mask);
 176   }
 177   static const bool flow = true;
 178 };
 179
 180 struct SIArgumentInfo {
 181   Optional<SIArgument> PrivateSegmentBuffer;
 182   Optional<SIArgument> DispatchPtr;
 183   Optional<SIArgument> QueuePtr;
 184   Optional<SIArgument> KernargSegmentPtr;
 185   Optional<SIArgument> DispatchID;
 186   Optional<SIArgument> FlatScratchInit;
 187   Optional<SIArgument> PrivateSegmentSize;
 188
 189   Optional<SIArgument> WorkGroupIDX;
 190   Optional<SIArgument> WorkGroupIDY;
 191   Optional<SIArgument> WorkGroupIDZ;
 192   Optional<SIArgument> WorkGroupInfo;
 193   Optional<SIArgument> PrivateSegmentWaveByteOffset;
 194
 195   Optional<SIArgument> ImplicitArgPtr;
 196   Optional<SIArgument> ImplicitBufferPtr;
 197
 198   Optional<SIArgument> WorkItemIDX;
 199   Optional<SIArgument> WorkItemIDY;
 200   Optional<SIArgument> WorkItemIDZ;
 201 };
 202
 203 template <> struct MappingTraits<SIArgumentInfo> {
 204   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
 205     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
 206     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
 207     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
 208     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
 209     YamlIO.mapOptional("dispatchID", AI.DispatchID);
 210     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
 211     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
 212
 213     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
 214     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
 215     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
 216     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
 217     YamlIO.mapOptional("privateSegmentWaveByteOffset",
 218                        AI.PrivateSegmentWaveByteOffset);
 219
 220     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
 221     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
 222
 223     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
 224     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
 225     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
 226   }
 227 };
 228
 229 // Default to default mode for default calling convention.
 230 struct SIMode {
 231   bool IEEE = true;
 232   bool DX10Clamp = true;
 233   bool FP32InputDenormals = true;
 234   bool FP32OutputDenormals = true;
 235   bool FP64FP16InputDenormals = true;
 236   bool FP64FP16OutputDenormals = true;
 237
 238   SIMode() = default;
 239
 240   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
 241     IEEE = Mode.IEEE;
 242     DX10Clamp = Mode.DX10Clamp;
 243     FP32InputDenormals = Mode.FP32InputDenormals;
 244     FP32OutputDenormals = Mode.FP32OutputDenormals;
 245     FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
 246     FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
 247   }
 248
 249   bool operator ==(const SIMode Other) const {
 250     return IEEE == Other.IEEE &&
 251            DX10Clamp == Other.DX10Clamp &&
 252            FP32InputDenormals == Other.FP32InputDenormals &&
 253            FP32OutputDenormals == Other.FP32OutputDenormals &&
 254            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
 255            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
 256   }
 257 };
 258
 259 template <> struct MappingTraits<SIMode> {
 260   static void mapping(IO &YamlIO, SIMode &Mode) {
 261     YamlIO.mapOptional("ieee", Mode.IEEE, true);
 262     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
 263     YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
 264     YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
 265     YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
 266     YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
 267   }
 268 };
 269
 270 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
 271   uint64_t ExplicitKernArgSize = 0;
 272   unsigned MaxKernArgAlign = 0;
 273   unsigned LDSSize = 0;
 274   Align DynLDSAlign;
 275   bool IsEntryFunction = false;
 276   bool NoSignedZerosFPMath = false;
 277   bool MemoryBound = false;
 278   bool WaveLimiter = false;
 279   bool HasSpilledSGPRs = false;
 280   bool HasSpilledVGPRs = false;
 281   uint32_t HighBitsOf32BitAddress = 0;
 282
 283   // TODO: 10 may be a better default since it's the maximum.
 284   unsigned Occupancy = 0;
 285
 286   StringValue ScratchRSrcReg = "$private_rsrc_reg";
 287   StringValue FrameOffsetReg = "$fp_reg";
 288   StringValue StackPtrOffsetReg = "$sp_reg";
 289
 290   Optional<SIArgumentInfo> ArgInfo;
 291   SIMode Mode;
 292   Optional<FrameIndex> ScavengeFI;
 293
 294   SIMachineFunctionInfo() = default;
 295   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
 296                         const TargetRegisterInfo &TRI,
 297                         const llvm::MachineFunction &MF);
 298
 299   void mappingImpl(yaml::IO &YamlIO) override;
 300   ~SIMachineFunctionInfo() = default;
 301 };
 302
 303 template <> struct MappingTraits<SIMachineFunctionInfo> {
 304   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
 305     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
 306                        UINT64_C(0));
 307     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
 308     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
 309     YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
 310     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
 311     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
 312     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
 313     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
 314     YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
 315     YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
 316     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
 317                        StringValue("$private_rsrc_reg"));
 318     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
 319                        StringValue("$fp_reg"));
 320     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
 321                        StringValue("$sp_reg"));
 322     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
 323     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
 324     YamlIO.mapOptional("highBitsOf32BitAddress",
 325                        MFI.HighBitsOf32BitAddress, 0u);
 326     YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
 327     YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
 328   }
 329 };
 330
 331 } // end namespace yaml
 332
 333 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 334 /// tells the hardware which interpolation parameters to load.
 335 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 336   friend class GCNTargetMachine;
 337
 338   Register TIDReg = AMDGPU::NoRegister;
 339
 340   // Registers that may be reserved for spilling purposes. These may be the same
 341   // as the input registers.
 342   Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
 343
 344   // This is the the unswizzled offset from the current dispatch's scratch wave
 345   // base to the beginning of the current function's frame.
 346   Register FrameOffsetReg = AMDGPU::FP_REG;
 347
 348   // This is an ABI register used in the non-entry calling convention to
 349   // communicate the unswizzled offset from the current dispatch's scratch wave
 350   // base to the beginning of the new function's frame.
 351   Register StackPtrOffsetReg = AMDGPU::SP_REG;
 352
 353   AMDGPUFunctionArgInfo ArgInfo;
 354
 355   // Graphics info.
 356   unsigned PSInputAddr = 0;
 357   unsigned PSInputEnable = 0;
 358
 359   /// Number of bytes of arguments this function has on the stack. If the callee
 360   /// is expected to restore the argument stack this should be a multiple of 16,
 361   /// all usable during a tail call.
 362   ///
 363   /// The alternative would forbid tail call optimisation in some cases: if we
 364   /// want to transfer control from a function with 8-bytes of stack-argument
 365   /// space to a function with 16-bytes then misalignment of this value would
 366   /// make a stack adjustment necessary, which could not be undone by the
 367   /// callee.
 368   unsigned BytesInStackArgArea = 0;
 369
 370   bool ReturnsVoid = true;
 371
 372   // A pair of default/requested minimum/maximum flat work group sizes.
 373   // Minimum - first, maximum - second.
 374   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 375
 376   // A pair of default/requested minimum/maximum number of waves per execution
 377   // unit. Minimum - first, maximum - second.
 378   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 379
 380   std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
 381   std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
 382   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
 383
 384 private:
 385   unsigned LDSWaveSpillSize = 0;
 386   unsigned NumUserSGPRs = 0;
 387   unsigned NumSystemSGPRs = 0;
 388
 389   bool HasSpilledSGPRs = false;
 390   bool HasSpilledVGPRs = false;
 391   bool HasNonSpillStackObjects = false;
 392   bool IsStackRealigned = false;
 393
 394   unsigned NumSpilledSGPRs = 0;
 395   unsigned NumSpilledVGPRs = 0;
 396
 397   // Feature bits required for inputs passed in user SGPRs.
 398   bool PrivateSegmentBuffer : 1;
 399   bool DispatchPtr : 1;
 400   bool QueuePtr : 1;
 401   bool KernargSegmentPtr : 1;
 402   bool DispatchID : 1;
 403   bool FlatScratchInit : 1;
 404
 405   // Feature bits required for inputs passed in system SGPRs.
 406   bool WorkGroupIDX : 1; // Always initialized.
 407   bool WorkGroupIDY : 1;
 408   bool WorkGroupIDZ : 1;
 409   bool WorkGroupInfo : 1;
 410   bool PrivateSegmentWaveByteOffset : 1;
 411
 412   bool WorkItemIDX : 1; // Always initialized.
 413   bool WorkItemIDY : 1;
 414   bool WorkItemIDZ : 1;
 415
 416   // Private memory buffer
 417   // Compute directly in sgpr[0:1]
 418   // Other shaders indirect 64-bits at sgpr[0:1]
 419   bool ImplicitBufferPtr : 1;
 420
 421   // Pointer to where the ABI inserts special kernel arguments separate from the
 422   // user arguments. This is an offset from the KernargSegmentPtr.
 423   bool ImplicitArgPtr : 1;
 424
 425   bool MayNeedAGPRs : 1;
 426
 427   // The hard-wired high half of the address of the global information table
 428   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
 429   // current hardware only allows a 16 bit value.
 430   unsigned GITPtrHigh;
 431
 432   unsigned HighBitsOf32BitAddress;
 433   unsigned GDSSize;
 434
 435   // Current recorded maximum possible occupancy.
 436   unsigned Occupancy;
 437
 438   mutable Optional<bool> UsesAGPRs;
 439
 440   MCPhysReg getNextUserSGPR() const;
 441
 442   MCPhysReg getNextSystemSGPR() const;
 443
 444 public:
 445   struct SpilledReg {
 446     Register VGPR;
 447     int Lane = -1;
 448
 449     SpilledReg() = default;
 450     SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
 451
 452     bool hasLane() { return Lane != -1;}
 453     bool hasReg() { return VGPR != 0;}
 454   };
 455
 456   struct SGPRSpillVGPR {
 457     // VGPR used for SGPR spills
 458     Register VGPR;
 459
 460     // If the VGPR is is used for SGPR spills in a non-entrypoint function, the
 461     // stack slot used to save/restore it in the prolog/epilog.
 462     Optional<int> FI;
 463
 464     SGPRSpillVGPR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
 465   };
 466
 467   struct VGPRSpillToAGPR {
 468     SmallVector<MCPhysReg, 32> Lanes;
 469     bool FullyAllocated = false;
 470     bool IsDead = false;
 471   };
 472
 473   // Map WWM VGPR to a stack slot that is used to save/restore it in the
 474   // prolog/epilog.
 475   MapVector<Register, Optional<int>> WWMReservedRegs;
 476
 477 private:
 478   // Track VGPR + wave index for each subregister of the SGPR spilled to
 479   // frameindex key.
 480   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 481   unsigned NumVGPRSpillLanes = 0;
 482   SmallVector<SGPRSpillVGPR, 2> SpillVGPRs;
 483
 484   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 485
 486   // AGPRs used for VGPR spills.
 487   SmallVector<MCPhysReg, 32> SpillAGPR;
 488
 489   // VGPRs used for AGPR spills.
 490   SmallVector<MCPhysReg, 32> SpillVGPR;
 491
 492   // Emergency stack slot. Sometimes, we create this before finalizing the stack
 493   // frame, so save it here and add it to the RegScavenger later.
 494   Optional<int> ScavengeFI;
 495
 496 public: // FIXME
 497   /// If this is set, an SGPR used for save/restore of the register used for the
 498   /// frame pointer.
 499   Register SGPRForFPSaveRestoreCopy;
 500   Optional<int> FramePointerSaveIndex;
 501
 502   /// If this is set, an SGPR used for save/restore of the register used for the
 503   /// base pointer.
 504   Register SGPRForBPSaveRestoreCopy;
 505   Optional<int> BasePointerSaveIndex;
 506
 507   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
 508
 509 public:
 510   SIMachineFunctionInfo(const MachineFunction &MF);
 511
 512   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
 513                                 const MachineFunction &MF,
 514                                 PerFunctionMIParsingState &PFS,
 515                                 SMDiagnostic &Error, SMRange &SourceRange);
 516
 517   void reserveWWMRegister(Register Reg, Optional<int> FI) {
 518     WWMReservedRegs.insert(std::make_pair(Reg, FI));
 519   }
 520
 521   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 522     auto I = SGPRToVGPRSpills.find(FrameIndex);
 523     return (I == SGPRToVGPRSpills.end()) ?
 524       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 525   }
 526
 527   ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; }
 528
 529   void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
 530     SpillVGPRs[Index].VGPR = NewVGPR;
 531     SpillVGPRs[Index].FI = newFI;
 532   }
 533
 534   bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
 535
 536   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
 537     return SpillAGPR;
 538   }
 539
 540   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
 541     return SpillVGPR;
 542   }
 543
 544   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
 545     auto I = VGPRToAGPRSpills.find(FrameIndex);
 546     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
 547                                          : I->second.Lanes[Lane];
 548   }
 549
 550   void setVGPRToAGPRSpillDead(int FrameIndex) {
 551     auto I = VGPRToAGPRSpills.find(FrameIndex);
 552     if (I != VGPRToAGPRSpills.end())
 553       I->second.IsDead = true;
 554   }
 555
 556   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
 557                                  unsigned NumLane) const;
 558   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 559   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
 560
 561   /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
 562   /// to the default stack.
 563   bool removeDeadFrameIndices(MachineFrameInfo &MFI,
 564                               bool ResetSGPRSpillStackIDs);
 565
 566   int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
 567   Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
 568
 569   bool hasCalculatedTID() const { return TIDReg != 0; };
 570   Register getTIDReg() const { return TIDReg; };
 571   void setTIDReg(Register Reg) { TIDReg = Reg; }
 572
 573   unsigned getBytesInStackArgArea() const {
 574     return BytesInStackArgArea;
 575   }
 576
 577   void setBytesInStackArgArea(unsigned Bytes) {
 578     BytesInStackArgArea = Bytes;
 579   }
 580
 581   // Add user SGPRs.
 582   Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 583   Register addDispatchPtr(const SIRegisterInfo &TRI);
 584   Register addQueuePtr(const SIRegisterInfo &TRI);
 585   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
 586   Register addDispatchID(const SIRegisterInfo &TRI);
 587   Register addFlatScratchInit(const SIRegisterInfo &TRI);
 588   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
 589
 590   // Add system SGPRs.
 591   Register addWorkGroupIDX() {
 592     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
 593     NumSystemSGPRs += 1;
 594     return ArgInfo.WorkGroupIDX.getRegister();
 595   }
 596
 597   Register addWorkGroupIDY() {
 598     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
 599     NumSystemSGPRs += 1;
 600     return ArgInfo.WorkGroupIDY.getRegister();
 601   }
 602
 603   Register addWorkGroupIDZ() {
 604     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
 605     NumSystemSGPRs += 1;
 606     return ArgInfo.WorkGroupIDZ.getRegister();
 607   }
 608
 609   Register addWorkGroupInfo() {
 610     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
 611     NumSystemSGPRs += 1;
 612     return ArgInfo.WorkGroupInfo.getRegister();
 613   }
 614
 615   // Add special VGPR inputs
 616   void setWorkItemIDX(ArgDescriptor Arg) {
 617     ArgInfo.WorkItemIDX = Arg;
 618   }
 619
 620   void setWorkItemIDY(ArgDescriptor Arg) {
 621     ArgInfo.WorkItemIDY = Arg;
 622   }
 623
 624   void setWorkItemIDZ(ArgDescriptor Arg) {
 625     ArgInfo.WorkItemIDZ = Arg;
 626   }
 627
 628   Register addPrivateSegmentWaveByteOffset() {
 629     ArgInfo.PrivateSegmentWaveByteOffset
 630       = ArgDescriptor::createRegister(getNextSystemSGPR());
 631     NumSystemSGPRs += 1;
 632     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 633   }
 634
 635   void setPrivateSegmentWaveByteOffset(Register Reg) {
 636     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
 637   }
 638
 639   bool hasPrivateSegmentBuffer() const {
 640     return PrivateSegmentBuffer;
 641   }
 642
 643   bool hasDispatchPtr() const {
 644     return DispatchPtr;
 645   }
 646
 647   bool hasQueuePtr() const {
 648     return QueuePtr;
 649   }
 650
 651   bool hasKernargSegmentPtr() const {
 652     return KernargSegmentPtr;
 653   }
 654
 655   bool hasDispatchID() const {
 656     return DispatchID;
 657   }
 658
 659   bool hasFlatScratchInit() const {
 660     return FlatScratchInit;
 661   }
 662
 663   bool hasWorkGroupIDX() const {
 664     return WorkGroupIDX;
 665   }
 666
 667   bool hasWorkGroupIDY() const {
 668     return WorkGroupIDY;
 669   }
 670
 671   bool hasWorkGroupIDZ() const {
 672     return WorkGroupIDZ;
 673   }
 674
 675   bool hasWorkGroupInfo() const {
 676     return WorkGroupInfo;
 677   }
 678
 679   bool hasPrivateSegmentWaveByteOffset() const {
 680     return PrivateSegmentWaveByteOffset;
 681   }
 682
 683   bool hasWorkItemIDX() const {
 684     return WorkItemIDX;
 685   }
 686
 687   bool hasWorkItemIDY() const {
 688     return WorkItemIDY;
 689   }
 690
 691   bool hasWorkItemIDZ() const {
 692     return WorkItemIDZ;
 693   }
 694
 695   bool hasImplicitArgPtr() const {
 696     return ImplicitArgPtr;
 697   }
 698
 699   bool hasImplicitBufferPtr() const {
 700     return ImplicitBufferPtr;
 701   }
 702
 703   AMDGPUFunctionArgInfo &getArgInfo() {
 704     return ArgInfo;
 705   }
 706
 707   const AMDGPUFunctionArgInfo &getArgInfo() const {
 708     return ArgInfo;
 709   }
 710
 711   std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
 712   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 713     return ArgInfo.getPreloadedValue(Value);
 714   }
 715
 716   MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 717     auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
 718     return Arg ? Arg->getRegister() : MCRegister();
 719   }
 720
 721   unsigned getGITPtrHigh() const {
 722     return GITPtrHigh;
 723   }
 724
 725   Register getGITPtrLoReg(const MachineFunction &MF) const;
 726
 727   uint32_t get32BitAddressHighBits() const {
 728     return HighBitsOf32BitAddress;
 729   }
 730
 731   unsigned getGDSSize() const {
 732     return GDSSize;
 733   }
 734
 735   unsigned getNumUserSGPRs() const {
 736     return NumUserSGPRs;
 737   }
 738
 739   unsigned getNumPreloadedSGPRs() const {
 740     return NumUserSGPRs + NumSystemSGPRs;
 741   }
 742
 743   Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 744     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 745   }
 746
 747   /// Returns the physical register reserved for use as the resource
 748   /// descriptor for scratch accesses.
 749   Register getScratchRSrcReg() const {
 750     return ScratchRSrcReg;
 751   }
 752
 753   void setScratchRSrcReg(Register Reg) {
 754     assert(Reg != 0 && "Should never be unset");
 755     ScratchRSrcReg = Reg;
 756   }
 757
 758   Register getFrameOffsetReg() const {
 759     return FrameOffsetReg;
 760   }
 761
 762   void setFrameOffsetReg(Register Reg) {
 763     assert(Reg != 0 && "Should never be unset");
 764     FrameOffsetReg = Reg;
 765   }
 766
 767   void setStackPtrOffsetReg(Register Reg) {
 768     assert(Reg != 0 && "Should never be unset");
 769     StackPtrOffsetReg = Reg;
 770   }
 771
 772   // Note the unset value for this is AMDGPU::SP_REG rather than
 773   // NoRegister. This is mostly a workaround for MIR tests where state that
 774   // can't be directly computed from the function is not preserved in serialized
 775   // MIR.
 776   Register getStackPtrOffsetReg() const {
 777     return StackPtrOffsetReg;
 778   }
 779
 780   Register getQueuePtrUserSGPR() const {
 781     return ArgInfo.QueuePtr.getRegister();
 782   }
 783
 784   Register getImplicitBufferPtrUserSGPR() const {
 785     return ArgInfo.ImplicitBufferPtr.getRegister();
 786   }
 787
 788   bool hasSpilledSGPRs() const {
 789     return HasSpilledSGPRs;
 790   }
 791
 792   void setHasSpilledSGPRs(bool Spill = true) {
 793     HasSpilledSGPRs = Spill;
 794   }
 795
 796   bool hasSpilledVGPRs() const {
 797     return HasSpilledVGPRs;
 798   }
 799
 800   void setHasSpilledVGPRs(bool Spill = true) {
 801     HasSpilledVGPRs = Spill;
 802   }
 803
 804   bool hasNonSpillStackObjects() const {
 805     return HasNonSpillStackObjects;
 806   }
 807
 808   void setHasNonSpillStackObjects(bool StackObject = true) {
 809     HasNonSpillStackObjects = StackObject;
 810   }
 811
 812   bool isStackRealigned() const {
 813     return IsStackRealigned;
 814   }
 815
 816   void setIsStackRealigned(bool Realigned = true) {
 817     IsStackRealigned = Realigned;
 818   }
 819
 820   unsigned getNumSpilledSGPRs() const {
 821     return NumSpilledSGPRs;
 822   }
 823
 824   unsigned getNumSpilledVGPRs() const {
 825     return NumSpilledVGPRs;
 826   }
 827
 828   void addToSpilledSGPRs(unsigned num) {
 829     NumSpilledSGPRs += num;
 830   }
 831
 832   void addToSpilledVGPRs(unsigned num) {
 833     NumSpilledVGPRs += num;
 834   }
 835
 836   unsigned getPSInputAddr() const {
 837     return PSInputAddr;
 838   }
 839
 840   unsigned getPSInputEnable() const {
 841     return PSInputEnable;
 842   }
 843
 844   bool isPSInputAllocated(unsigned Index) const {
 845     return PSInputAddr & (1 << Index);
 846   }
 847
 848   void markPSInputAllocated(unsigned Index) {
 849     PSInputAddr |= 1 << Index;
 850   }
 851
 852   void markPSInputEnabled(unsigned Index) {
 853     PSInputEnable |= 1 << Index;
 854   }
 855
 856   bool returnsVoid() const {
 857     return ReturnsVoid;
 858   }
 859
 860   void setIfReturnsVoid(bool Value) {
 861     ReturnsVoid = Value;
 862   }
 863
 864   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 865   /// for this function.
 866   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 867     return FlatWorkGroupSizes;
 868   }
 869
 870   /// \returns Default/requested minimum flat work group size for this function.
 871   unsigned getMinFlatWorkGroupSize() const {
 872     return FlatWorkGroupSizes.first;
 873   }
 874
 875   /// \returns Default/requested maximum flat work group size for this function.
 876   unsigned getMaxFlatWorkGroupSize() const {
 877     return FlatWorkGroupSizes.second;
 878   }
 879
 880   /// \returns A pair of default/requested minimum/maximum number of waves per
 881   /// execution unit.
 882   std::pair<unsigned, unsigned> getWavesPerEU() const {
 883     return WavesPerEU;
 884   }
 885
 886   /// \returns Default/requested minimum number of waves per execution unit.
 887   unsigned getMinWavesPerEU() const {
 888     return WavesPerEU.first;
 889   }
 890
 891   /// \returns Default/requested maximum number of waves per execution unit.
 892   unsigned getMaxWavesPerEU() const {
 893     return WavesPerEU.second;
 894   }
 895
 896   /// \returns SGPR used for \p Dim's work group ID.
 897   Register getWorkGroupIDSGPR(unsigned Dim) const {
 898     switch (Dim) {
 899     case 0:
 900       assert(hasWorkGroupIDX());
 901       return ArgInfo.WorkGroupIDX.getRegister();
 902     case 1:
 903       assert(hasWorkGroupIDY());
 904       return ArgInfo.WorkGroupIDY.getRegister();
 905     case 2:
 906       assert(hasWorkGroupIDZ());
 907       return ArgInfo.WorkGroupIDZ.getRegister();
 908     }
 909     llvm_unreachable("unexpected dimension");
 910   }
 911
 912   unsigned getLDSWaveSpillSize() const {
 913     return LDSWaveSpillSize;
 914   }
 915
 916   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
 917     if (!BufferPSV)
 918       BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
 919
 920     return BufferPSV.get();
 921   }
 922
 923   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
 924     if (!ImagePSV)
 925       ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
 926
 927     return ImagePSV.get();
 928   }
 929
 930   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
 931     if (!GWSResourcePSV) {
 932       GWSResourcePSV =
 933           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
 934     }
 935
 936     return GWSResourcePSV.get();
 937   }
 938
 939   unsigned getOccupancy() const {
 940     return Occupancy;
 941   }
 942
 943   unsigned getMinAllowedOccupancy() const {
 944     if (!isMemoryBound() && !needsWaveLimiter())
 945       return Occupancy;
 946     return (Occupancy < 4) ? Occupancy : 4;
 947   }
 948
 949   void limitOccupancy(const MachineFunction &MF);
 950
 951   void limitOccupancy(unsigned Limit) {
 952     if (Occupancy > Limit)
 953       Occupancy = Limit;
 954   }
 955
 956   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
 957     if (Occupancy < Limit)
 958       Occupancy = Limit;
 959     limitOccupancy(MF);
 960   }
 961
 962   bool mayNeedAGPRs() const {
 963     return MayNeedAGPRs;
 964   }
 965
 966   // \returns true if a function has a use of AGPRs via inline asm or
 967   // has a call which may use it.
 968   bool mayUseAGPRs(const MachineFunction &MF) const;
 969
 970   // \returns true if a function needs or may need AGPRs.
 971   bool usesAGPRs(const MachineFunction &MF) const;
 972 };
 973
 974 } // end namespace llvm
 975
 976 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H