lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15
  16 #include "AMDGPUArgumentUsageInfo.h"
  17 #include "AMDGPUMachineFunction.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIInstrInfo.h"
  20 #include "SIRegisterInfo.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/DenseMap.h"
  23 #include "llvm/ADT/Optional.h"
  24 #include "llvm/ADT/STLExtras.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/ADT/SparseBitVector.h"
  27 #include "llvm/CodeGen/MIRYamlMapping.h"
  28 #include "llvm/CodeGen/PseudoSourceValue.h"
  29 #include "llvm/CodeGen/TargetInstrInfo.h"
  30 #include "llvm/MC/MCRegisterInfo.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include <array>
  33 #include <cassert>
  34 #include <utility>
  35 #include <vector>
  36
  37 namespace llvm {
  38
  39 class MachineFrameInfo;
  40 class MachineFunction;
  41 class TargetRegisterClass;
  42
  43 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
  44 public:
  45   enum AMDGPUPSVKind : unsigned {
  46     PSVBuffer = PseudoSourceValue::TargetCustom,
  47     PSVImage,
  48     GWSResource
  49   };
  50
  51 protected:
  52   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
  53       : PseudoSourceValue(Kind, TII) {}
  54
  55 public:
  56   bool isConstant(const MachineFrameInfo *) const override {
  57     // This should probably be true for most images, but we will start by being
  58     // conservative.
  59     return false;
  60   }
  61
  62   bool isAliased(const MachineFrameInfo *) const override {
  63     return true;
  64   }
  65
  66   bool mayAlias(const MachineFrameInfo *) const override {
  67     return true;
  68   }
  69 };
  70
  71 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
  72 public:
  73   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
  74       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
  75
  76   static bool classof(const PseudoSourceValue *V) {
  77     return V->kind() == PSVBuffer;
  78   }
  79 };
  80
  81 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  82 public:
  83   // TODO: Is the img rsrc useful?
  84   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
  85       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
  86
  87   static bool classof(const PseudoSourceValue *V) {
  88     return V->kind() == PSVImage;
  89   }
  90 };
  91
  92 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  93 public:
  94   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
  95       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
  96
  97   static bool classof(const PseudoSourceValue *V) {
  98     return V->kind() == GWSResource;
  99   }
 100
 101   // These are inaccessible memory from IR.
 102   bool isAliased(const MachineFrameInfo *) const override {
 103     return false;
 104   }
 105
 106   // These are inaccessible memory from IR.
 107   bool mayAlias(const MachineFrameInfo *) const override {
 108     return false;
 109   }
 110
 111   void printCustom(raw_ostream &OS) const override {
 112     OS << "GWSResource";
 113   }
 114 };
 115
 116 namespace yaml {
 117
 118 struct SIArgument {
 119   bool IsRegister;
 120   union {
 121     StringValue RegisterName;
 122     unsigned StackOffset;
 123   };
 124   Optional<unsigned> Mask;
 125
 126   // Default constructor, which creates a stack argument.
 127   SIArgument() : IsRegister(false), StackOffset(0) {}
 128   SIArgument(const SIArgument &Other) {
 129     IsRegister = Other.IsRegister;
 130     if (IsRegister) {
 131       ::new ((void *)std::addressof(RegisterName))
 132           StringValue(Other.RegisterName);
 133     } else
 134       StackOffset = Other.StackOffset;
 135     Mask = Other.Mask;
 136   }
 137   SIArgument &operator=(const SIArgument &Other) {
 138     IsRegister = Other.IsRegister;
 139     if (IsRegister) {
 140       ::new ((void *)std::addressof(RegisterName))
 141           StringValue(Other.RegisterName);
 142     } else
 143       StackOffset = Other.StackOffset;
 144     Mask = Other.Mask;
 145     return *this;
 146   }
 147   ~SIArgument() {
 148     if (IsRegister)
 149       RegisterName.~StringValue();
 150   }
 151
 152   // Helper to create a register or stack argument.
 153   static inline SIArgument createArgument(bool IsReg) {
 154     if (IsReg)
 155       return SIArgument(IsReg);
 156     return SIArgument();
 157   }
 158
 159 private:
 160   // Construct a register argument.
 161   SIArgument(bool) : IsRegister(true), RegisterName() {}
 162 };
 163
 164 template <> struct MappingTraits<SIArgument> {
 165   static void mapping(IO &YamlIO, SIArgument &A) {
 166     if (YamlIO.outputting()) {
 167       if (A.IsRegister)
 168         YamlIO.mapRequired("reg", A.RegisterName);
 169       else
 170         YamlIO.mapRequired("offset", A.StackOffset);
 171     } else {
 172       auto Keys = YamlIO.keys();
 173       if (is_contained(Keys, "reg")) {
 174         A = SIArgument::createArgument(true);
 175         YamlIO.mapRequired("reg", A.RegisterName);
 176       } else if (is_contained(Keys, "offset"))
 177         YamlIO.mapRequired("offset", A.StackOffset);
 178       else
 179         YamlIO.setError("missing required key 'reg' or 'offset'");
 180     }
 181     YamlIO.mapOptional("mask", A.Mask);
 182   }
 183   static const bool flow = true;
 184 };
 185
 186 struct SIArgumentInfo {
 187   Optional<SIArgument> PrivateSegmentBuffer;
 188   Optional<SIArgument> DispatchPtr;
 189   Optional<SIArgument> QueuePtr;
 190   Optional<SIArgument> KernargSegmentPtr;
 191   Optional<SIArgument> DispatchID;
 192   Optional<SIArgument> FlatScratchInit;
 193   Optional<SIArgument> PrivateSegmentSize;
 194
 195   Optional<SIArgument> WorkGroupIDX;
 196   Optional<SIArgument> WorkGroupIDY;
 197   Optional<SIArgument> WorkGroupIDZ;
 198   Optional<SIArgument> WorkGroupInfo;
 199   Optional<SIArgument> PrivateSegmentWaveByteOffset;
 200
 201   Optional<SIArgument> ImplicitArgPtr;
 202   Optional<SIArgument> ImplicitBufferPtr;
 203
 204   Optional<SIArgument> WorkItemIDX;
 205   Optional<SIArgument> WorkItemIDY;
 206   Optional<SIArgument> WorkItemIDZ;
 207 };
 208
 209 template <> struct MappingTraits<SIArgumentInfo> {
 210   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
 211     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
 212     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
 213     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
 214     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
 215     YamlIO.mapOptional("dispatchID", AI.DispatchID);
 216     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
 217     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
 218
 219     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
 220     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
 221     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
 222     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
 223     YamlIO.mapOptional("privateSegmentWaveByteOffset",
 224                        AI.PrivateSegmentWaveByteOffset);
 225
 226     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
 227     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
 228
 229     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
 230     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
 231     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
 232   }
 233 };
 234
 235 // Default to default mode for default calling convention.
 236 struct SIMode {
 237   bool IEEE = true;
 238   bool DX10Clamp = true;
 239
 240   SIMode() = default;
 241
 242
 243   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
 244     IEEE = Mode.IEEE;
 245     DX10Clamp = Mode.DX10Clamp;
 246   }
 247
 248   bool operator ==(const SIMode Other) const {
 249     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
 250   }
 251 };
 252
 253 template <> struct MappingTraits<SIMode> {
 254   static void mapping(IO &YamlIO, SIMode &Mode) {
 255     YamlIO.mapOptional("ieee", Mode.IEEE, true);
 256     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
 257   }
 258 };
 259
 260 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
 261   uint64_t ExplicitKernArgSize = 0;
 262   unsigned MaxKernArgAlign = 0;
 263   unsigned LDSSize = 0;
 264   bool IsEntryFunction = false;
 265   bool NoSignedZerosFPMath = false;
 266   bool MemoryBound = false;
 267   bool WaveLimiter = false;
 268   uint32_t HighBitsOf32BitAddress = 0;
 269
 270   StringValue ScratchRSrcReg = "$private_rsrc_reg";
 271   StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
 272   StringValue FrameOffsetReg = "$fp_reg";
 273   StringValue StackPtrOffsetReg = "$sp_reg";
 274
 275   Optional<SIArgumentInfo> ArgInfo;
 276   SIMode Mode;
 277
 278   SIMachineFunctionInfo() = default;
 279   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
 280                         const TargetRegisterInfo &TRI);
 281
 282   void mappingImpl(yaml::IO &YamlIO) override;
 283   ~SIMachineFunctionInfo() = default;
 284 };
 285
 286 template <> struct MappingTraits<SIMachineFunctionInfo> {
 287   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
 288     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
 289                        UINT64_C(0));
 290     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
 291     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
 292     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
 293     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
 294     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
 295     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
 296     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
 297                        StringValue("$private_rsrc_reg"));
 298     YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
 299                        StringValue("$scratch_wave_offset_reg"));
 300     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
 301                        StringValue("$fp_reg"));
 302     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
 303                        StringValue("$sp_reg"));
 304     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
 305     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
 306     YamlIO.mapOptional("highBitsOf32BitAddress",
 307                        MFI.HighBitsOf32BitAddress, 0u);
 308   }
 309 };
 310
 311 } // end namespace yaml
 312
 313 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 314 /// tells the hardware which interpolation parameters to load.
 315 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 316   friend class GCNTargetMachine;
 317
 318   unsigned TIDReg = AMDGPU::NoRegister;
 319
 320   // Registers that may be reserved for spilling purposes. These may be the same
 321   // as the input registers.
 322   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
 323   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
 324
 325   // This is the current function's incremented size from the kernel's scratch
 326   // wave offset register. For an entry function, this is exactly the same as
 327   // the ScratchWaveOffsetReg.
 328   unsigned FrameOffsetReg = AMDGPU::FP_REG;
 329
 330   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
 331   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
 332
 333   AMDGPUFunctionArgInfo ArgInfo;
 334
 335   // State of MODE register, assumed FP mode.
 336   AMDGPU::SIModeRegisterDefaults Mode;
 337
 338   // Graphics info.
 339   unsigned PSInputAddr = 0;
 340   unsigned PSInputEnable = 0;
 341
 342   /// Number of bytes of arguments this function has on the stack. If the callee
 343   /// is expected to restore the argument stack this should be a multiple of 16,
 344   /// all usable during a tail call.
 345   ///
 346   /// The alternative would forbid tail call optimisation in some cases: if we
 347   /// want to transfer control from a function with 8-bytes of stack-argument
 348   /// space to a function with 16-bytes then misalignment of this value would
 349   /// make a stack adjustment necessary, which could not be undone by the
 350   /// callee.
 351   unsigned BytesInStackArgArea = 0;
 352
 353   bool ReturnsVoid = true;
 354
 355   // A pair of default/requested minimum/maximum flat work group sizes.
 356   // Minimum - first, maximum - second.
 357   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 358
 359   // A pair of default/requested minimum/maximum number of waves per execution
 360   // unit. Minimum - first, maximum - second.
 361   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 362
 363   DenseMap<const Value *,
 364            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
 365   DenseMap<const Value *,
 366            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
 367   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
 368
 369 private:
 370   unsigned LDSWaveSpillSize = 0;
 371   unsigned NumUserSGPRs = 0;
 372   unsigned NumSystemSGPRs = 0;
 373
 374   bool HasSpilledSGPRs = false;
 375   bool HasSpilledVGPRs = false;
 376   bool HasNonSpillStackObjects = false;
 377   bool IsStackRealigned = false;
 378
 379   unsigned NumSpilledSGPRs = 0;
 380   unsigned NumSpilledVGPRs = 0;
 381
 382   // Feature bits required for inputs passed in user SGPRs.
 383   bool PrivateSegmentBuffer : 1;
 384   bool DispatchPtr : 1;
 385   bool QueuePtr : 1;
 386   bool KernargSegmentPtr : 1;
 387   bool DispatchID : 1;
 388   bool FlatScratchInit : 1;
 389
 390   // Feature bits required for inputs passed in system SGPRs.
 391   bool WorkGroupIDX : 1; // Always initialized.
 392   bool WorkGroupIDY : 1;
 393   bool WorkGroupIDZ : 1;
 394   bool WorkGroupInfo : 1;
 395   bool PrivateSegmentWaveByteOffset : 1;
 396
 397   bool WorkItemIDX : 1; // Always initialized.
 398   bool WorkItemIDY : 1;
 399   bool WorkItemIDZ : 1;
 400
 401   // Private memory buffer
 402   // Compute directly in sgpr[0:1]
 403   // Other shaders indirect 64-bits at sgpr[0:1]
 404   bool ImplicitBufferPtr : 1;
 405
 406   // Pointer to where the ABI inserts special kernel arguments separate from the
 407   // user arguments. This is an offset from the KernargSegmentPtr.
 408   bool ImplicitArgPtr : 1;
 409
 410   // The hard-wired high half of the address of the global information table
 411   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
 412   // current hardware only allows a 16 bit value.
 413   unsigned GITPtrHigh;
 414
 415   unsigned HighBitsOf32BitAddress;
 416   unsigned GDSSize;
 417
 418   // Current recorded maximum possible occupancy.
 419   unsigned Occupancy;
 420
 421   MCPhysReg getNextUserSGPR() const;
 422
 423   MCPhysReg getNextSystemSGPR() const;
 424
 425 public:
 426   struct SpilledReg {
 427     unsigned VGPR = 0;
 428     int Lane = -1;
 429
 430     SpilledReg() = default;
 431     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
 432
 433     bool hasLane() { return Lane != -1;}
 434     bool hasReg() { return VGPR != 0;}
 435   };
 436
 437   struct SGPRSpillVGPRCSR {
 438     // VGPR used for SGPR spills
 439     unsigned VGPR;
 440
 441     // If the VGPR is a CSR, the stack slot used to save/restore it in the
 442     // prolog/epilog.
 443     Optional<int> FI;
 444
 445     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
 446   };
 447
 448   struct VGPRSpillToAGPR {
 449     SmallVector<MCPhysReg, 32> Lanes;
 450     bool FullyAllocated = false;
 451   };
 452
 453   SparseBitVector<> WWMReservedRegs;
 454
 455   void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
 456
 457 private:
 458   // SGPR->VGPR spilling support.
 459   using SpillRegMask = std::pair<unsigned, unsigned>;
 460
 461   // Track VGPR + wave index for each subregister of the SGPR spilled to
 462   // frameindex key.
 463   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 464   unsigned NumVGPRSpillLanes = 0;
 465   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
 466
 467   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 468
 469   // AGPRs used for VGPR spills.
 470   SmallVector<MCPhysReg, 32> SpillAGPR;
 471
 472   // VGPRs used for AGPR spills.
 473   SmallVector<MCPhysReg, 32> SpillVGPR;
 474
 475 public: // FIXME
 476   /// If this is set, an SGPR used for save/restore of the register used for the
 477   /// frame pointer.
 478   unsigned SGPRForFPSaveRestoreCopy = 0;
 479   Optional<int> FramePointerSaveIndex;
 480
 481 public:
 482   SIMachineFunctionInfo(const MachineFunction &MF);
 483
 484   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
 485
 486   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 487     auto I = SGPRToVGPRSpills.find(FrameIndex);
 488     return (I == SGPRToVGPRSpills.end()) ?
 489       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 490   }
 491
 492   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
 493     return SpillVGPRs;
 494   }
 495
 496   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
 497     return SpillAGPR;
 498   }
 499
 500   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
 501     return SpillVGPR;
 502   }
 503
 504   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
 505     auto I = VGPRToAGPRSpills.find(FrameIndex);
 506     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
 507                                          : I->second.Lanes[Lane];
 508   }
 509
 510   AMDGPU::SIModeRegisterDefaults getMode() const {
 511     return Mode;
 512   }
 513
 514   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
 515                                  unsigned NumLane) const;
 516   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 517   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
 518   void removeDeadFrameIndices(MachineFrameInfo &MFI);
 519
 520   bool hasCalculatedTID() const { return TIDReg != 0; };
 521   unsigned getTIDReg() const { return TIDReg; };
 522   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 523
 524   unsigned getBytesInStackArgArea() const {
 525     return BytesInStackArgArea;
 526   }
 527
 528   void setBytesInStackArgArea(unsigned Bytes) {
 529     BytesInStackArgArea = Bytes;
 530   }
 531
 532   // Add user SGPRs.
 533   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 534   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
 535   unsigned addQueuePtr(const SIRegisterInfo &TRI);
 536   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
 537   unsigned addDispatchID(const SIRegisterInfo &TRI);
 538   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 539   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
 540
 541   // Add system SGPRs.
 542   unsigned addWorkGroupIDX() {
 543     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
 544     NumSystemSGPRs += 1;
 545     return ArgInfo.WorkGroupIDX.getRegister();
 546   }
 547
 548   unsigned addWorkGroupIDY() {
 549     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
 550     NumSystemSGPRs += 1;
 551     return ArgInfo.WorkGroupIDY.getRegister();
 552   }
 553
 554   unsigned addWorkGroupIDZ() {
 555     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
 556     NumSystemSGPRs += 1;
 557     return ArgInfo.WorkGroupIDZ.getRegister();
 558   }
 559
 560   unsigned addWorkGroupInfo() {
 561     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
 562     NumSystemSGPRs += 1;
 563     return ArgInfo.WorkGroupInfo.getRegister();
 564   }
 565
 566   // Add special VGPR inputs
 567   void setWorkItemIDX(ArgDescriptor Arg) {
 568     ArgInfo.WorkItemIDX = Arg;
 569   }
 570
 571   void setWorkItemIDY(ArgDescriptor Arg) {
 572     ArgInfo.WorkItemIDY = Arg;
 573   }
 574
 575   void setWorkItemIDZ(ArgDescriptor Arg) {
 576     ArgInfo.WorkItemIDZ = Arg;
 577   }
 578
 579   unsigned addPrivateSegmentWaveByteOffset() {
 580     ArgInfo.PrivateSegmentWaveByteOffset
 581       = ArgDescriptor::createRegister(getNextSystemSGPR());
 582     NumSystemSGPRs += 1;
 583     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 584   }
 585
 586   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
 587     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
 588   }
 589
 590   bool hasPrivateSegmentBuffer() const {
 591     return PrivateSegmentBuffer;
 592   }
 593
 594   bool hasDispatchPtr() const {
 595     return DispatchPtr;
 596   }
 597
 598   bool hasQueuePtr() const {
 599     return QueuePtr;
 600   }
 601
 602   bool hasKernargSegmentPtr() const {
 603     return KernargSegmentPtr;
 604   }
 605
 606   bool hasDispatchID() const {
 607     return DispatchID;
 608   }
 609
 610   bool hasFlatScratchInit() const {
 611     return FlatScratchInit;
 612   }
 613
 614   bool hasWorkGroupIDX() const {
 615     return WorkGroupIDX;
 616   }
 617
 618   bool hasWorkGroupIDY() const {
 619     return WorkGroupIDY;
 620   }
 621
 622   bool hasWorkGroupIDZ() const {
 623     return WorkGroupIDZ;
 624   }
 625
 626   bool hasWorkGroupInfo() const {
 627     return WorkGroupInfo;
 628   }
 629
 630   bool hasPrivateSegmentWaveByteOffset() const {
 631     return PrivateSegmentWaveByteOffset;
 632   }
 633
 634   bool hasWorkItemIDX() const {
 635     return WorkItemIDX;
 636   }
 637
 638   bool hasWorkItemIDY() const {
 639     return WorkItemIDY;
 640   }
 641
 642   bool hasWorkItemIDZ() const {
 643     return WorkItemIDZ;
 644   }
 645
 646   bool hasImplicitArgPtr() const {
 647     return ImplicitArgPtr;
 648   }
 649
 650   bool hasImplicitBufferPtr() const {
 651     return ImplicitBufferPtr;
 652   }
 653
 654   AMDGPUFunctionArgInfo &getArgInfo() {
 655     return ArgInfo;
 656   }
 657
 658   const AMDGPUFunctionArgInfo &getArgInfo() const {
 659     return ArgInfo;
 660   }
 661
 662   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
 663   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 664     return ArgInfo.getPreloadedValue(Value);
 665   }
 666
 667   Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 668     auto Arg = ArgInfo.getPreloadedValue(Value).first;
 669     return Arg ? Arg->getRegister() : Register();
 670   }
 671
 672   unsigned getGITPtrHigh() const {
 673     return GITPtrHigh;
 674   }
 675
 676   uint32_t get32BitAddressHighBits() const {
 677     return HighBitsOf32BitAddress;
 678   }
 679
 680   unsigned getGDSSize() const {
 681     return GDSSize;
 682   }
 683
 684   unsigned getNumUserSGPRs() const {
 685     return NumUserSGPRs;
 686   }
 687
 688   unsigned getNumPreloadedSGPRs() const {
 689     return NumUserSGPRs + NumSystemSGPRs;
 690   }
 691
 692   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 693     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 694   }
 695
 696   /// Returns the physical register reserved for use as the resource
 697   /// descriptor for scratch accesses.
 698   unsigned getScratchRSrcReg() const {
 699     return ScratchRSrcReg;
 700   }
 701
 702   void setScratchRSrcReg(unsigned Reg) {
 703     assert(Reg != 0 && "Should never be unset");
 704     ScratchRSrcReg = Reg;
 705   }
 706
 707   unsigned getScratchWaveOffsetReg() const {
 708     return ScratchWaveOffsetReg;
 709   }
 710
 711   unsigned getFrameOffsetReg() const {
 712     return FrameOffsetReg;
 713   }
 714
 715   void setFrameOffsetReg(unsigned Reg) {
 716     assert(Reg != 0 && "Should never be unset");
 717     FrameOffsetReg = Reg;
 718   }
 719
 720   void setStackPtrOffsetReg(unsigned Reg) {
 721     assert(Reg != 0 && "Should never be unset");
 722     StackPtrOffsetReg = Reg;
 723   }
 724
 725   // Note the unset value for this is AMDGPU::SP_REG rather than
 726   // NoRegister. This is mostly a workaround for MIR tests where state that
 727   // can't be directly computed from the function is not preserved in serialized
 728   // MIR.
 729   unsigned getStackPtrOffsetReg() const {
 730     return StackPtrOffsetReg;
 731   }
 732
 733   void setScratchWaveOffsetReg(unsigned Reg) {
 734     assert(Reg != 0 && "Should never be unset");
 735     ScratchWaveOffsetReg = Reg;
 736   }
 737
 738   unsigned getQueuePtrUserSGPR() const {
 739     return ArgInfo.QueuePtr.getRegister();
 740   }
 741
 742   unsigned getImplicitBufferPtrUserSGPR() const {
 743     return ArgInfo.ImplicitBufferPtr.getRegister();
 744   }
 745
 746   bool hasSpilledSGPRs() const {
 747     return HasSpilledSGPRs;
 748   }
 749
 750   void setHasSpilledSGPRs(bool Spill = true) {
 751     HasSpilledSGPRs = Spill;
 752   }
 753
 754   bool hasSpilledVGPRs() const {
 755     return HasSpilledVGPRs;
 756   }
 757
 758   void setHasSpilledVGPRs(bool Spill = true) {
 759     HasSpilledVGPRs = Spill;
 760   }
 761
 762   bool hasNonSpillStackObjects() const {
 763     return HasNonSpillStackObjects;
 764   }
 765
 766   void setHasNonSpillStackObjects(bool StackObject = true) {
 767     HasNonSpillStackObjects = StackObject;
 768   }
 769
 770   bool isStackRealigned() const {
 771     return IsStackRealigned;
 772   }
 773
 774   void setIsStackRealigned(bool Realigned = true) {
 775     IsStackRealigned = Realigned;
 776   }
 777
 778   unsigned getNumSpilledSGPRs() const {
 779     return NumSpilledSGPRs;
 780   }
 781
 782   unsigned getNumSpilledVGPRs() const {
 783     return NumSpilledVGPRs;
 784   }
 785
 786   void addToSpilledSGPRs(unsigned num) {
 787     NumSpilledSGPRs += num;
 788   }
 789
 790   void addToSpilledVGPRs(unsigned num) {
 791     NumSpilledVGPRs += num;
 792   }
 793
 794   unsigned getPSInputAddr() const {
 795     return PSInputAddr;
 796   }
 797
 798   unsigned getPSInputEnable() const {
 799     return PSInputEnable;
 800   }
 801
 802   bool isPSInputAllocated(unsigned Index) const {
 803     return PSInputAddr & (1 << Index);
 804   }
 805
 806   void markPSInputAllocated(unsigned Index) {
 807     PSInputAddr |= 1 << Index;
 808   }
 809
 810   void markPSInputEnabled(unsigned Index) {
 811     PSInputEnable |= 1 << Index;
 812   }
 813
 814   bool returnsVoid() const {
 815     return ReturnsVoid;
 816   }
 817
 818   void setIfReturnsVoid(bool Value) {
 819     ReturnsVoid = Value;
 820   }
 821
 822   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 823   /// for this function.
 824   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 825     return FlatWorkGroupSizes;
 826   }
 827
 828   /// \returns Default/requested minimum flat work group size for this function.
 829   unsigned getMinFlatWorkGroupSize() const {
 830     return FlatWorkGroupSizes.first;
 831   }
 832
 833   /// \returns Default/requested maximum flat work group size for this function.
 834   unsigned getMaxFlatWorkGroupSize() const {
 835     return FlatWorkGroupSizes.second;
 836   }
 837
 838   /// \returns A pair of default/requested minimum/maximum number of waves per
 839   /// execution unit.
 840   std::pair<unsigned, unsigned> getWavesPerEU() const {
 841     return WavesPerEU;
 842   }
 843
 844   /// \returns Default/requested minimum number of waves per execution unit.
 845   unsigned getMinWavesPerEU() const {
 846     return WavesPerEU.first;
 847   }
 848
 849   /// \returns Default/requested maximum number of waves per execution unit.
 850   unsigned getMaxWavesPerEU() const {
 851     return WavesPerEU.second;
 852   }
 853
 854   /// \returns SGPR used for \p Dim's work group ID.
 855   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
 856     switch (Dim) {
 857     case 0:
 858       assert(hasWorkGroupIDX());
 859       return ArgInfo.WorkGroupIDX.getRegister();
 860     case 1:
 861       assert(hasWorkGroupIDY());
 862       return ArgInfo.WorkGroupIDY.getRegister();
 863     case 2:
 864       assert(hasWorkGroupIDZ());
 865       return ArgInfo.WorkGroupIDZ.getRegister();
 866     }
 867     llvm_unreachable("unexpected dimension");
 868   }
 869
 870   unsigned getLDSWaveSpillSize() const {
 871     return LDSWaveSpillSize;
 872   }
 873
 874   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
 875                                                     const Value *BufferRsrc) {
 876     assert(BufferRsrc);
 877     auto PSV = BufferPSVs.try_emplace(
 878       BufferRsrc,
 879       std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
 880     return PSV.first->second.get();
 881   }
 882
 883   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
 884                                                   const Value *ImgRsrc) {
 885     assert(ImgRsrc);
 886     auto PSV = ImagePSVs.try_emplace(
 887       ImgRsrc,
 888       std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
 889     return PSV.first->second.get();
 890   }
 891
 892   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
 893     if (!GWSResourcePSV) {
 894       GWSResourcePSV =
 895           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
 896     }
 897
 898     return GWSResourcePSV.get();
 899   }
 900
 901   unsigned getOccupancy() const {
 902     return Occupancy;
 903   }
 904
 905   unsigned getMinAllowedOccupancy() const {
 906     if (!isMemoryBound() && !needsWaveLimiter())
 907       return Occupancy;
 908     return (Occupancy < 4) ? Occupancy : 4;
 909   }
 910
 911   void limitOccupancy(const MachineFunction &MF);
 912
 913   void limitOccupancy(unsigned Limit) {
 914     if (Occupancy > Limit)
 915       Occupancy = Limit;
 916   }
 917
 918   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
 919     if (Occupancy < Limit)
 920       Occupancy = Limit;
 921     limitOccupancy(MF);
 922   }
 923 };
 924
 925 } // end namespace llvm
 926
 927 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H