llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15
  16 #include "AMDGPUArgumentUsageInfo.h"
  17 #include "AMDGPUMachineFunction.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIInstrInfo.h"
  20 #include "llvm/ADT/MapVector.h"
  21 #include "llvm/CodeGen/MIRYamlMapping.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/Support/raw_ostream.h"
  24
  25 namespace llvm {
  26
  27 class MachineFrameInfo;
  28 class MachineFunction;
  29 class TargetRegisterClass;
  30 class SIMachineFunctionInfo;
  31 class SIRegisterInfo;
  32
  33 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
  34 public:
  35   enum AMDGPUPSVKind : unsigned {
  36     PSVBuffer = PseudoSourceValue::TargetCustom,
  37     PSVImage,
  38     GWSResource
  39   };
  40
  41 protected:
  42   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
  43       : PseudoSourceValue(Kind, TII) {}
  44
  45 public:
  46   bool isConstant(const MachineFrameInfo *) const override {
  47     // This should probably be true for most images, but we will start by being
  48     // conservative.
  49     return false;
  50   }
  51
  52   bool isAliased(const MachineFrameInfo *) const override {
  53     return true;
  54   }
  55
  56   bool mayAlias(const MachineFrameInfo *) const override {
  57     return true;
  58   }
  59 };
  60
  61 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
  62 public:
  63   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
  64       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
  65
  66   static bool classof(const PseudoSourceValue *V) {
  67     return V->kind() == PSVBuffer;
  68   }
  69
  70   void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
  71 };
  72
  73 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  74 public:
  75   // TODO: Is the img rsrc useful?
  76   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
  77       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
  78
  79   static bool classof(const PseudoSourceValue *V) {
  80     return V->kind() == PSVImage;
  81   }
  82
  83   void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
  84 };
  85
  86 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  87 public:
  88   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
  89       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
  90
  91   static bool classof(const PseudoSourceValue *V) {
  92     return V->kind() == GWSResource;
  93   }
  94
  95   // These are inaccessible memory from IR.
  96   bool isAliased(const MachineFrameInfo *) const override {
  97     return false;
  98   }
  99
 100   // These are inaccessible memory from IR.
 101   bool mayAlias(const MachineFrameInfo *) const override {
 102     return false;
 103   }
 104
 105   void printCustom(raw_ostream &OS) const override {
 106     OS << "GWSResource";
 107   }
 108 };
 109
 110 namespace yaml {
 111
 112 struct SIArgument {
 113   bool IsRegister;
 114   union {
 115     StringValue RegisterName;
 116     unsigned StackOffset;
 117   };
 118   Optional<unsigned> Mask;
 119
 120   // Default constructor, which creates a stack argument.
 121   SIArgument() : IsRegister(false), StackOffset(0) {}
 122   SIArgument(const SIArgument &Other) {
 123     IsRegister = Other.IsRegister;
 124     if (IsRegister) {
 125       ::new ((void *)std::addressof(RegisterName))
 126           StringValue(Other.RegisterName);
 127     } else
 128       StackOffset = Other.StackOffset;
 129     Mask = Other.Mask;
 130   }
 131   SIArgument &operator=(const SIArgument &Other) {
 132     IsRegister = Other.IsRegister;
 133     if (IsRegister) {
 134       ::new ((void *)std::addressof(RegisterName))
 135           StringValue(Other.RegisterName);
 136     } else
 137       StackOffset = Other.StackOffset;
 138     Mask = Other.Mask;
 139     return *this;
 140   }
 141   ~SIArgument() {
 142     if (IsRegister)
 143       RegisterName.~StringValue();
 144   }
 145
 146   // Helper to create a register or stack argument.
 147   static inline SIArgument createArgument(bool IsReg) {
 148     if (IsReg)
 149       return SIArgument(IsReg);
 150     return SIArgument();
 151   }
 152
 153 private:
 154   // Construct a register argument.
 155   SIArgument(bool) : IsRegister(true), RegisterName() {}
 156 };
 157
 158 template <> struct MappingTraits<SIArgument> {
 159   static void mapping(IO &YamlIO, SIArgument &A) {
 160     if (YamlIO.outputting()) {
 161       if (A.IsRegister)
 162         YamlIO.mapRequired("reg", A.RegisterName);
 163       else
 164         YamlIO.mapRequired("offset", A.StackOffset);
 165     } else {
 166       auto Keys = YamlIO.keys();
 167       if (is_contained(Keys, "reg")) {
 168         A = SIArgument::createArgument(true);
 169         YamlIO.mapRequired("reg", A.RegisterName);
 170       } else if (is_contained(Keys, "offset"))
 171         YamlIO.mapRequired("offset", A.StackOffset);
 172       else
 173         YamlIO.setError("missing required key 'reg' or 'offset'");
 174     }
 175     YamlIO.mapOptional("mask", A.Mask);
 176   }
 177   static const bool flow = true;
 178 };
 179
 180 struct SIArgumentInfo {
 181   Optional<SIArgument> PrivateSegmentBuffer;
 182   Optional<SIArgument> DispatchPtr;
 183   Optional<SIArgument> QueuePtr;
 184   Optional<SIArgument> KernargSegmentPtr;
 185   Optional<SIArgument> DispatchID;
 186   Optional<SIArgument> FlatScratchInit;
 187   Optional<SIArgument> PrivateSegmentSize;
 188
 189   Optional<SIArgument> WorkGroupIDX;
 190   Optional<SIArgument> WorkGroupIDY;
 191   Optional<SIArgument> WorkGroupIDZ;
 192   Optional<SIArgument> WorkGroupInfo;
 193   Optional<SIArgument> PrivateSegmentWaveByteOffset;
 194
 195   Optional<SIArgument> ImplicitArgPtr;
 196   Optional<SIArgument> ImplicitBufferPtr;
 197
 198   Optional<SIArgument> WorkItemIDX;
 199   Optional<SIArgument> WorkItemIDY;
 200   Optional<SIArgument> WorkItemIDZ;
 201 };
 202
 203 template <> struct MappingTraits<SIArgumentInfo> {
 204   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
 205     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
 206     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
 207     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
 208     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
 209     YamlIO.mapOptional("dispatchID", AI.DispatchID);
 210     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
 211     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
 212
 213     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
 214     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
 215     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
 216     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
 217     YamlIO.mapOptional("privateSegmentWaveByteOffset",
 218                        AI.PrivateSegmentWaveByteOffset);
 219
 220     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
 221     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
 222
 223     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
 224     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
 225     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
 226   }
 227 };
 228
 229 // Default to default mode for default calling convention.
 230 struct SIMode {
 231   bool IEEE = true;
 232   bool DX10Clamp = true;
 233   bool FP32InputDenormals = true;
 234   bool FP32OutputDenormals = true;
 235   bool FP64FP16InputDenormals = true;
 236   bool FP64FP16OutputDenormals = true;
 237
 238   SIMode() = default;
 239
 240   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
 241     IEEE = Mode.IEEE;
 242     DX10Clamp = Mode.DX10Clamp;
 243     FP32InputDenormals = Mode.FP32InputDenormals;
 244     FP32OutputDenormals = Mode.FP32OutputDenormals;
 245     FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
 246     FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
 247   }
 248
 249   bool operator ==(const SIMode Other) const {
 250     return IEEE == Other.IEEE &&
 251            DX10Clamp == Other.DX10Clamp &&
 252            FP32InputDenormals == Other.FP32InputDenormals &&
 253            FP32OutputDenormals == Other.FP32OutputDenormals &&
 254            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
 255            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
 256   }
 257 };
 258
 259 template <> struct MappingTraits<SIMode> {
 260   static void mapping(IO &YamlIO, SIMode &Mode) {
 261     YamlIO.mapOptional("ieee", Mode.IEEE, true);
 262     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
 263     YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
 264     YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
 265     YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
 266     YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
 267   }
 268 };
 269
 270 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
 271   uint64_t ExplicitKernArgSize = 0;
 272   unsigned MaxKernArgAlign = 0;
 273   unsigned LDSSize = 0;
 274   Align DynLDSAlign;
 275   bool IsEntryFunction = false;
 276   bool NoSignedZerosFPMath = false;
 277   bool MemoryBound = false;
 278   bool WaveLimiter = false;
 279   bool HasSpilledSGPRs = false;
 280   bool HasSpilledVGPRs = false;
 281   uint32_t HighBitsOf32BitAddress = 0;
 282
 283   // TODO: 10 may be a better default since it's the maximum.
 284   unsigned Occupancy = 0;
 285
 286   StringValue ScratchRSrcReg = "$private_rsrc_reg";
 287   StringValue FrameOffsetReg = "$fp_reg";
 288   StringValue StackPtrOffsetReg = "$sp_reg";
 289
 290   Optional<SIArgumentInfo> ArgInfo;
 291   SIMode Mode;
 292   Optional<FrameIndex> ScavengeFI;
 293
 294   SIMachineFunctionInfo() = default;
 295   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
 296                         const TargetRegisterInfo &TRI,
 297                         const llvm::MachineFunction &MF);
 298
 299   void mappingImpl(yaml::IO &YamlIO) override;
 300   ~SIMachineFunctionInfo() = default;
 301 };
 302
 303 template <> struct MappingTraits<SIMachineFunctionInfo> {
 304   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
 305     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
 306                        UINT64_C(0));
 307     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
 308     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
 309     YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
 310     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
 311     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
 312     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
 313     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
 314     YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
 315     YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
 316     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
 317                        StringValue("$private_rsrc_reg"));
 318     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
 319                        StringValue("$fp_reg"));
 320     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
 321                        StringValue("$sp_reg"));
 322     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
 323     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
 324     YamlIO.mapOptional("highBitsOf32BitAddress",
 325                        MFI.HighBitsOf32BitAddress, 0u);
 326     YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
 327     YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
 328   }
 329 };
 330
 331 } // end namespace yaml
 332
 333 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 334 /// tells the hardware which interpolation parameters to load.
 335 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 336   friend class GCNTargetMachine;
 337
 338   Register TIDReg = AMDGPU::NoRegister;
 339
 340   // Registers that may be reserved for spilling purposes. These may be the same
 341   // as the input registers.
 342   Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
 343
 344   // This is the the unswizzled offset from the current dispatch's scratch wave
 345   // base to the beginning of the current function's frame.
 346   Register FrameOffsetReg = AMDGPU::FP_REG;
 347
 348   // This is an ABI register used in the non-entry calling convention to
 349   // communicate the unswizzled offset from the current dispatch's scratch wave
 350   // base to the beginning of the new function's frame.
 351   Register StackPtrOffsetReg = AMDGPU::SP_REG;
 352
 353   AMDGPUFunctionArgInfo ArgInfo;
 354
 355   // Graphics info.
 356   unsigned PSInputAddr = 0;
 357   unsigned PSInputEnable = 0;
 358
 359   /// Number of bytes of arguments this function has on the stack. If the callee
 360   /// is expected to restore the argument stack this should be a multiple of 16,
 361   /// all usable during a tail call.
 362   ///
 363   /// The alternative would forbid tail call optimisation in some cases: if we
 364   /// want to transfer control from a function with 8-bytes of stack-argument
 365   /// space to a function with 16-bytes then misalignment of this value would
 366   /// make a stack adjustment necessary, which could not be undone by the
 367   /// callee.
 368   unsigned BytesInStackArgArea = 0;
 369
 370   bool ReturnsVoid = true;
 371
 372   // A pair of default/requested minimum/maximum flat work group sizes.
 373   // Minimum - first, maximum - second.
 374   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 375
 376   // A pair of default/requested minimum/maximum number of waves per execution
 377   // unit. Minimum - first, maximum - second.
 378   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 379
 380   std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
 381   std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
 382   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
 383
 384 private:
 385   unsigned LDSWaveSpillSize = 0;
 386   unsigned NumUserSGPRs = 0;
 387   unsigned NumSystemSGPRs = 0;
 388
 389   bool HasSpilledSGPRs = false;
 390   bool HasSpilledVGPRs = false;
 391   bool HasNonSpillStackObjects = false;
 392   bool IsStackRealigned = false;
 393
 394   unsigned NumSpilledSGPRs = 0;
 395   unsigned NumSpilledVGPRs = 0;
 396
 397   // Feature bits required for inputs passed in user SGPRs.
 398   bool PrivateSegmentBuffer : 1;
 399   bool DispatchPtr : 1;
 400   bool QueuePtr : 1;
 401   bool KernargSegmentPtr : 1;
 402   bool DispatchID : 1;
 403   bool FlatScratchInit : 1;
 404
 405   // Feature bits required for inputs passed in system SGPRs.
 406   bool WorkGroupIDX : 1; // Always initialized.
 407   bool WorkGroupIDY : 1;
 408   bool WorkGroupIDZ : 1;
 409   bool WorkGroupInfo : 1;
 410   bool PrivateSegmentWaveByteOffset : 1;
 411
 412   bool WorkItemIDX : 1; // Always initialized.
 413   bool WorkItemIDY : 1;
 414   bool WorkItemIDZ : 1;
 415
 416   // Private memory buffer
 417   // Compute directly in sgpr[0:1]
 418   // Other shaders indirect 64-bits at sgpr[0:1]
 419   bool ImplicitBufferPtr : 1;
 420
 421   // Pointer to where the ABI inserts special kernel arguments separate from the
 422   // user arguments. This is an offset from the KernargSegmentPtr.
 423   bool ImplicitArgPtr : 1;
 424
 425   // The hard-wired high half of the address of the global information table
 426   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
 427   // current hardware only allows a 16 bit value.
 428   unsigned GITPtrHigh;
 429
 430   unsigned HighBitsOf32BitAddress;
 431   unsigned GDSSize;
 432
 433   // Current recorded maximum possible occupancy.
 434   unsigned Occupancy;
 435
 436   MCPhysReg getNextUserSGPR() const;
 437
 438   MCPhysReg getNextSystemSGPR() const;
 439
 440 public:
 441   struct SpilledReg {
 442     Register VGPR;
 443     int Lane = -1;
 444
 445     SpilledReg() = default;
 446     SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
 447
 448     bool hasLane() { return Lane != -1;}
 449     bool hasReg() { return VGPR != 0;}
 450   };
 451
 452   struct SGPRSpillVGPR {
 453     // VGPR used for SGPR spills
 454     Register VGPR;
 455
 456     // If the VGPR is is used for SGPR spills in a non-entrypoint function, the
 457     // stack slot used to save/restore it in the prolog/epilog.
 458     Optional<int> FI;
 459
 460     SGPRSpillVGPR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
 461   };
 462
 463   struct VGPRSpillToAGPR {
 464     SmallVector<MCPhysReg, 32> Lanes;
 465     bool FullyAllocated = false;
 466   };
 467
 468   // Map WWM VGPR to a stack slot that is used to save/restore it in the
 469   // prolog/epilog.
 470   MapVector<Register, Optional<int>> WWMReservedRegs;
 471
 472 private:
 473   // Track VGPR + wave index for each subregister of the SGPR spilled to
 474   // frameindex key.
 475   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 476   unsigned NumVGPRSpillLanes = 0;
 477   SmallVector<SGPRSpillVGPR, 2> SpillVGPRs;
 478
 479   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 480
 481   // AGPRs used for VGPR spills.
 482   SmallVector<MCPhysReg, 32> SpillAGPR;
 483
 484   // VGPRs used for AGPR spills.
 485   SmallVector<MCPhysReg, 32> SpillVGPR;
 486
 487   // Emergency stack slot. Sometimes, we create this before finalizing the stack
 488   // frame, so save it here and add it to the RegScavenger later.
 489   Optional<int> ScavengeFI;
 490
 491 public: // FIXME
 492   /// If this is set, an SGPR used for save/restore of the register used for the
 493   /// frame pointer.
 494   Register SGPRForFPSaveRestoreCopy;
 495   Optional<int> FramePointerSaveIndex;
 496
 497   /// If this is set, an SGPR used for save/restore of the register used for the
 498   /// base pointer.
 499   Register SGPRForBPSaveRestoreCopy;
 500   Optional<int> BasePointerSaveIndex;
 501
 502   Register VGPRReservedForSGPRSpill;
 503   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
 504
 505 public:
 506   SIMachineFunctionInfo(const MachineFunction &MF);
 507
 508   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
 509                                 const MachineFunction &MF,
 510                                 PerFunctionMIParsingState &PFS,
 511                                 SMDiagnostic &Error, SMRange &SourceRange);
 512
 513   void reserveWWMRegister(Register Reg, Optional<int> FI) {
 514     WWMReservedRegs.insert(std::make_pair(Reg, FI));
 515   }
 516
 517   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 518     auto I = SGPRToVGPRSpills.find(FrameIndex);
 519     return (I == SGPRToVGPRSpills.end()) ?
 520       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 521   }
 522
 523   ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; }
 524
 525   void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
 526     SpillVGPRs[Index].VGPR = NewVGPR;
 527     SpillVGPRs[Index].FI = newFI;
 528     VGPRReservedForSGPRSpill = NewVGPR;
 529   }
 530
 531   bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
 532
 533   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
 534     return SpillAGPR;
 535   }
 536
 537   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
 538     return SpillVGPR;
 539   }
 540
 541   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
 542     auto I = VGPRToAGPRSpills.find(FrameIndex);
 543     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
 544                                          : I->second.Lanes[Lane];
 545   }
 546
 547   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
 548                                  unsigned NumLane) const;
 549   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 550   bool reserveVGPRforSGPRSpills(MachineFunction &MF);
 551   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
 552   void removeDeadFrameIndices(MachineFrameInfo &MFI);
 553
 554   int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
 555   Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
 556
 557   bool hasCalculatedTID() const { return TIDReg != 0; };
 558   Register getTIDReg() const { return TIDReg; };
 559   void setTIDReg(Register Reg) { TIDReg = Reg; }
 560
 561   unsigned getBytesInStackArgArea() const {
 562     return BytesInStackArgArea;
 563   }
 564
 565   void setBytesInStackArgArea(unsigned Bytes) {
 566     BytesInStackArgArea = Bytes;
 567   }
 568
 569   // Add user SGPRs.
 570   Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 571   Register addDispatchPtr(const SIRegisterInfo &TRI);
 572   Register addQueuePtr(const SIRegisterInfo &TRI);
 573   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
 574   Register addDispatchID(const SIRegisterInfo &TRI);
 575   Register addFlatScratchInit(const SIRegisterInfo &TRI);
 576   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
 577
 578   // Add system SGPRs.
 579   Register addWorkGroupIDX() {
 580     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
 581     NumSystemSGPRs += 1;
 582     return ArgInfo.WorkGroupIDX.getRegister();
 583   }
 584
 585   Register addWorkGroupIDY() {
 586     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
 587     NumSystemSGPRs += 1;
 588     return ArgInfo.WorkGroupIDY.getRegister();
 589   }
 590
 591   Register addWorkGroupIDZ() {
 592     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
 593     NumSystemSGPRs += 1;
 594     return ArgInfo.WorkGroupIDZ.getRegister();
 595   }
 596
 597   Register addWorkGroupInfo() {
 598     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
 599     NumSystemSGPRs += 1;
 600     return ArgInfo.WorkGroupInfo.getRegister();
 601   }
 602
 603   // Add special VGPR inputs
 604   void setWorkItemIDX(ArgDescriptor Arg) {
 605     ArgInfo.WorkItemIDX = Arg;
 606   }
 607
 608   void setWorkItemIDY(ArgDescriptor Arg) {
 609     ArgInfo.WorkItemIDY = Arg;
 610   }
 611
 612   void setWorkItemIDZ(ArgDescriptor Arg) {
 613     ArgInfo.WorkItemIDZ = Arg;
 614   }
 615
 616   Register addPrivateSegmentWaveByteOffset() {
 617     ArgInfo.PrivateSegmentWaveByteOffset
 618       = ArgDescriptor::createRegister(getNextSystemSGPR());
 619     NumSystemSGPRs += 1;
 620     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 621   }
 622
 623   void setPrivateSegmentWaveByteOffset(Register Reg) {
 624     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
 625   }
 626
 627   bool hasPrivateSegmentBuffer() const {
 628     return PrivateSegmentBuffer;
 629   }
 630
 631   bool hasDispatchPtr() const {
 632     return DispatchPtr;
 633   }
 634
 635   bool hasQueuePtr() const {
 636     return QueuePtr;
 637   }
 638
 639   bool hasKernargSegmentPtr() const {
 640     return KernargSegmentPtr;
 641   }
 642
 643   bool hasDispatchID() const {
 644     return DispatchID;
 645   }
 646
 647   bool hasFlatScratchInit() const {
 648     return FlatScratchInit;
 649   }
 650
 651   bool hasWorkGroupIDX() const {
 652     return WorkGroupIDX;
 653   }
 654
 655   bool hasWorkGroupIDY() const {
 656     return WorkGroupIDY;
 657   }
 658
 659   bool hasWorkGroupIDZ() const {
 660     return WorkGroupIDZ;
 661   }
 662
 663   bool hasWorkGroupInfo() const {
 664     return WorkGroupInfo;
 665   }
 666
 667   bool hasPrivateSegmentWaveByteOffset() const {
 668     return PrivateSegmentWaveByteOffset;
 669   }
 670
 671   bool hasWorkItemIDX() const {
 672     return WorkItemIDX;
 673   }
 674
 675   bool hasWorkItemIDY() const {
 676     return WorkItemIDY;
 677   }
 678
 679   bool hasWorkItemIDZ() const {
 680     return WorkItemIDZ;
 681   }
 682
 683   bool hasImplicitArgPtr() const {
 684     return ImplicitArgPtr;
 685   }
 686
 687   bool hasImplicitBufferPtr() const {
 688     return ImplicitBufferPtr;
 689   }
 690
 691   AMDGPUFunctionArgInfo &getArgInfo() {
 692     return ArgInfo;
 693   }
 694
 695   const AMDGPUFunctionArgInfo &getArgInfo() const {
 696     return ArgInfo;
 697   }
 698
 699   std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
 700   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 701     return ArgInfo.getPreloadedValue(Value);
 702   }
 703
 704   MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 705     auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
 706     return Arg ? Arg->getRegister() : MCRegister();
 707   }
 708
 709   unsigned getGITPtrHigh() const {
 710     return GITPtrHigh;
 711   }
 712
 713   Register getGITPtrLoReg(const MachineFunction &MF) const;
 714
 715   uint32_t get32BitAddressHighBits() const {
 716     return HighBitsOf32BitAddress;
 717   }
 718
 719   unsigned getGDSSize() const {
 720     return GDSSize;
 721   }
 722
 723   unsigned getNumUserSGPRs() const {
 724     return NumUserSGPRs;
 725   }
 726
 727   unsigned getNumPreloadedSGPRs() const {
 728     return NumUserSGPRs + NumSystemSGPRs;
 729   }
 730
 731   Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 732     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 733   }
 734
 735   /// Returns the physical register reserved for use as the resource
 736   /// descriptor for scratch accesses.
 737   Register getScratchRSrcReg() const {
 738     return ScratchRSrcReg;
 739   }
 740
 741   void setScratchRSrcReg(Register Reg) {
 742     assert(Reg != 0 && "Should never be unset");
 743     ScratchRSrcReg = Reg;
 744   }
 745
 746   Register getFrameOffsetReg() const {
 747     return FrameOffsetReg;
 748   }
 749
 750   void setFrameOffsetReg(Register Reg) {
 751     assert(Reg != 0 && "Should never be unset");
 752     FrameOffsetReg = Reg;
 753   }
 754
 755   void setStackPtrOffsetReg(Register Reg) {
 756     assert(Reg != 0 && "Should never be unset");
 757     StackPtrOffsetReg = Reg;
 758   }
 759
 760   // Note the unset value for this is AMDGPU::SP_REG rather than
 761   // NoRegister. This is mostly a workaround for MIR tests where state that
 762   // can't be directly computed from the function is not preserved in serialized
 763   // MIR.
 764   Register getStackPtrOffsetReg() const {
 765     return StackPtrOffsetReg;
 766   }
 767
 768   Register getQueuePtrUserSGPR() const {
 769     return ArgInfo.QueuePtr.getRegister();
 770   }
 771
 772   Register getImplicitBufferPtrUserSGPR() const {
 773     return ArgInfo.ImplicitBufferPtr.getRegister();
 774   }
 775
 776   bool hasSpilledSGPRs() const {
 777     return HasSpilledSGPRs;
 778   }
 779
 780   void setHasSpilledSGPRs(bool Spill = true) {
 781     HasSpilledSGPRs = Spill;
 782   }
 783
 784   bool hasSpilledVGPRs() const {
 785     return HasSpilledVGPRs;
 786   }
 787
 788   void setHasSpilledVGPRs(bool Spill = true) {
 789     HasSpilledVGPRs = Spill;
 790   }
 791
 792   bool hasNonSpillStackObjects() const {
 793     return HasNonSpillStackObjects;
 794   }
 795
 796   void setHasNonSpillStackObjects(bool StackObject = true) {
 797     HasNonSpillStackObjects = StackObject;
 798   }
 799
 800   bool isStackRealigned() const {
 801     return IsStackRealigned;
 802   }
 803
 804   void setIsStackRealigned(bool Realigned = true) {
 805     IsStackRealigned = Realigned;
 806   }
 807
 808   unsigned getNumSpilledSGPRs() const {
 809     return NumSpilledSGPRs;
 810   }
 811
 812   unsigned getNumSpilledVGPRs() const {
 813     return NumSpilledVGPRs;
 814   }
 815
 816   void addToSpilledSGPRs(unsigned num) {
 817     NumSpilledSGPRs += num;
 818   }
 819
 820   void addToSpilledVGPRs(unsigned num) {
 821     NumSpilledVGPRs += num;
 822   }
 823
 824   unsigned getPSInputAddr() const {
 825     return PSInputAddr;
 826   }
 827
 828   unsigned getPSInputEnable() const {
 829     return PSInputEnable;
 830   }
 831
 832   bool isPSInputAllocated(unsigned Index) const {
 833     return PSInputAddr & (1 << Index);
 834   }
 835
 836   void markPSInputAllocated(unsigned Index) {
 837     PSInputAddr |= 1 << Index;
 838   }
 839
 840   void markPSInputEnabled(unsigned Index) {
 841     PSInputEnable |= 1 << Index;
 842   }
 843
 844   bool returnsVoid() const {
 845     return ReturnsVoid;
 846   }
 847
 848   void setIfReturnsVoid(bool Value) {
 849     ReturnsVoid = Value;
 850   }
 851
 852   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 853   /// for this function.
 854   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 855     return FlatWorkGroupSizes;
 856   }
 857
 858   /// \returns Default/requested minimum flat work group size for this function.
 859   unsigned getMinFlatWorkGroupSize() const {
 860     return FlatWorkGroupSizes.first;
 861   }
 862
 863   /// \returns Default/requested maximum flat work group size for this function.
 864   unsigned getMaxFlatWorkGroupSize() const {
 865     return FlatWorkGroupSizes.second;
 866   }
 867
 868   /// \returns A pair of default/requested minimum/maximum number of waves per
 869   /// execution unit.
 870   std::pair<unsigned, unsigned> getWavesPerEU() const {
 871     return WavesPerEU;
 872   }
 873
 874   /// \returns Default/requested minimum number of waves per execution unit.
 875   unsigned getMinWavesPerEU() const {
 876     return WavesPerEU.first;
 877   }
 878
 879   /// \returns Default/requested maximum number of waves per execution unit.
 880   unsigned getMaxWavesPerEU() const {
 881     return WavesPerEU.second;
 882   }
 883
 884   /// \returns SGPR used for \p Dim's work group ID.
 885   Register getWorkGroupIDSGPR(unsigned Dim) const {
 886     switch (Dim) {
 887     case 0:
 888       assert(hasWorkGroupIDX());
 889       return ArgInfo.WorkGroupIDX.getRegister();
 890     case 1:
 891       assert(hasWorkGroupIDY());
 892       return ArgInfo.WorkGroupIDY.getRegister();
 893     case 2:
 894       assert(hasWorkGroupIDZ());
 895       return ArgInfo.WorkGroupIDZ.getRegister();
 896     }
 897     llvm_unreachable("unexpected dimension");
 898   }
 899
 900   unsigned getLDSWaveSpillSize() const {
 901     return LDSWaveSpillSize;
 902   }
 903
 904   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
 905     if (!BufferPSV)
 906       BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
 907
 908     return BufferPSV.get();
 909   }
 910
 911   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
 912     if (!ImagePSV)
 913       ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
 914
 915     return ImagePSV.get();
 916   }
 917
 918   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
 919     if (!GWSResourcePSV) {
 920       GWSResourcePSV =
 921           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
 922     }
 923
 924     return GWSResourcePSV.get();
 925   }
 926
 927   unsigned getOccupancy() const {
 928     return Occupancy;
 929   }
 930
 931   unsigned getMinAllowedOccupancy() const {
 932     if (!isMemoryBound() && !needsWaveLimiter())
 933       return Occupancy;
 934     return (Occupancy < 4) ? Occupancy : 4;
 935   }
 936
 937   void limitOccupancy(const MachineFunction &MF);
 938
 939   void limitOccupancy(unsigned Limit) {
 940     if (Occupancy > Limit)
 941       Occupancy = Limit;
 942   }
 943
 944   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
 945     if (Occupancy < Limit)
 946       Occupancy = Limit;
 947     limitOccupancy(MF);
 948   }
 949 };
 950
 951 } // end namespace llvm
 952
 953 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H