llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

   1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //==-----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// Base class for AMDGPU specific classes of TargetSubtarget.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  16
  17 #include "llvm/IR/CallingConv.h"
  18 #include "llvm/Support/Alignment.h"
  19 #include "llvm/TargetParser/Triple.h"
  20
  21 namespace llvm {
  22
  23 enum AMDGPUDwarfFlavour : unsigned;
  24 class Function;
  25 class Instruction;
  26 class MachineFunction;
  27 class TargetMachine;
  28
  29 class AMDGPUSubtarget {
  30 public:
  31   enum Generation {
  32     INVALID = 0,
  33     R600 = 1,
  34     R700 = 2,
  35     EVERGREEN = 3,
  36     NORTHERN_ISLANDS = 4,
  37     SOUTHERN_ISLANDS = 5,
  38     SEA_ISLANDS = 6,
  39     VOLCANIC_ISLANDS = 7,
  40     GFX9 = 8,
  41     GFX10 = 9,
  42     GFX11 = 10,
  43     GFX12 = 11,
  44   };
  45
  46 private:
  47   Triple TargetTriple;
  48
  49 protected:
  50   bool GCN3Encoding = false;
  51   bool Has16BitInsts = false;
  52   bool HasTrue16BitInsts = false;
  53   bool EnableRealTrue16Insts = false;
  54   bool HasMadMixInsts = false;
  55   bool HasMadMacF32Insts = false;
  56   bool HasDsSrc2Insts = false;
  57   bool HasSDWA = false;
  58   bool HasVOP3PInsts = false;
  59   bool HasMulI24 = true;
  60   bool HasMulU24 = true;
  61   bool HasSMulHi = false;
  62   bool HasInv2PiInlineImm = false;
  63   bool HasFminFmaxLegacy = true;
  64   bool EnablePromoteAlloca = false;
  65   bool HasTrigReducedRange = false;
  66   bool FastFMAF32 = false;
  67   unsigned EUsPerCU = 4;
  68   unsigned MaxWavesPerEU = 10;
  69   unsigned LocalMemorySize = 0;
  70   unsigned AddressableLocalMemorySize = 0;
  71   char WavefrontSizeLog2 = 0;
  72
  73 public:
  74   AMDGPUSubtarget(Triple TT);
  75
  76   static const AMDGPUSubtarget &get(const MachineFunction &MF);
  77   static const AMDGPUSubtarget &get(const TargetMachine &TM,
  78                                     const Function &F);
  79
  80   /// \returns Default range flat work group size for a calling convention.
  81   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
  82
  83   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
  84   /// for function \p F, or minimum/maximum flat work group sizes explicitly
  85   /// requested using "amdgpu-flat-work-group-size" attribute attached to
  86   /// function \p F.
  87   ///
  88   /// \returns Subtarget's default values if explicitly requested values cannot
  89   /// be converted to integer, or violate subtarget's specifications.
  90   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
  91
  92   /// \returns Subtarget's default pair of minimum/maximum number of waves per
  93   /// execution unit for function \p F, or minimum/maximum number of waves per
  94   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
  95   /// attached to function \p F.
  96   ///
  97   /// \returns Subtarget's default values if explicitly requested values cannot
  98   /// be converted to integer, violate subtarget's specifications, or are not
  99   /// compatible with minimum/maximum number of waves limited by flat work group
 100   /// size, register usage, and/or lds usage.
 101   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
 102     // Default/requested minimum/maximum flat work group sizes.
 103     std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
 104     return getWavesPerEU(F, FlatWorkGroupSizes);
 105   }
 106
 107   /// Overload which uses the specified values for the flat work group sizes,
 108   /// rather than querying the function itself. \p FlatWorkGroupSizes Should
 109   /// correspond to the function's value for getFlatWorkGroupSizes.
 110   std::pair<unsigned, unsigned>
 111   getWavesPerEU(const Function &F,
 112                 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
 113   std::pair<unsigned, unsigned> getEffectiveWavesPerEU(
 114       std::pair<unsigned, unsigned> WavesPerEU,
 115       std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
 116
 117   /// Return the amount of LDS that can be used that will not restrict the
 118   /// occupancy lower than WaveCount.
 119   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 120                                            const Function &) const;
 121
 122   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
 123   /// the given LDS memory size is the only constraint.
 124   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
 125
 126   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
 127
 128   bool isAmdHsaOS() const {
 129     return TargetTriple.getOS() == Triple::AMDHSA;
 130   }
 131
 132   bool isAmdPalOS() const {
 133     return TargetTriple.getOS() == Triple::AMDPAL;
 134   }
 135
 136   bool isMesa3DOS() const {
 137     return TargetTriple.getOS() == Triple::Mesa3D;
 138   }
 139
 140   bool isMesaKernel(const Function &F) const;
 141
 142   bool isAmdHsaOrMesa(const Function &F) const {
 143     return isAmdHsaOS() || isMesaKernel(F);
 144   }
 145
 146   bool isGCN() const {
 147     return TargetTriple.getArch() == Triple::amdgcn;
 148   }
 149
 150   bool isGCN3Encoding() const {
 151     return GCN3Encoding;
 152   }
 153
 154   bool has16BitInsts() const {
 155     return Has16BitInsts;
 156   }
 157
 158   /// Return true if the subtarget supports True16 instructions.
 159   bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
 160
 161   /// Return true if real (non-fake) variants of True16 instructions using
 162   /// 16-bit registers should be code-generated. Fake True16 instructions are
 163   /// identical to non-fake ones except that they take 32-bit registers as
 164   /// operands and always use their low halves.
 165   // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
 166   // supported and the support for fake True16 instructions is removed.
 167   bool useRealTrue16Insts() const;
 168
 169   bool hasMadMixInsts() const {
 170     return HasMadMixInsts;
 171   }
 172
 173   bool hasMadMacF32Insts() const {
 174     return HasMadMacF32Insts || !isGCN();
 175   }
 176
 177   bool hasDsSrc2Insts() const {
 178     return HasDsSrc2Insts;
 179   }
 180
 181   bool hasSDWA() const {
 182     return HasSDWA;
 183   }
 184
 185   bool hasVOP3PInsts() const {
 186     return HasVOP3PInsts;
 187   }
 188
 189   bool hasMulI24() const {
 190     return HasMulI24;
 191   }
 192
 193   bool hasMulU24() const {
 194     return HasMulU24;
 195   }
 196
 197   bool hasSMulHi() const {
 198     return HasSMulHi;
 199   }
 200
 201   bool hasInv2PiInlineImm() const {
 202     return HasInv2PiInlineImm;
 203   }
 204
 205   bool hasFminFmaxLegacy() const {
 206     return HasFminFmaxLegacy;
 207   }
 208
 209   bool hasTrigReducedRange() const {
 210     return HasTrigReducedRange;
 211   }
 212
 213   bool hasFastFMAF32() const {
 214     return FastFMAF32;
 215   }
 216
 217   bool isPromoteAllocaEnabled() const {
 218     return EnablePromoteAlloca;
 219   }
 220
 221   unsigned getWavefrontSize() const {
 222     return 1 << WavefrontSizeLog2;
 223   }
 224
 225   unsigned getWavefrontSizeLog2() const {
 226     return WavefrontSizeLog2;
 227   }
 228
 229   unsigned getLocalMemorySize() const {
 230     return LocalMemorySize;
 231   }
 232
 233   unsigned getAddressableLocalMemorySize() const {
 234     return AddressableLocalMemorySize;
 235   }
 236
 237   /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
 238   /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
 239   /// CU mode into account.
 240   unsigned getEUsPerCU() const { return EUsPerCU; }
 241
 242   Align getAlignmentForImplicitArgPtr() const {
 243     return isAmdHsaOS() ? Align(8) : Align(4);
 244   }
 245
 246   /// Returns the offset in bytes from the start of the input buffer
 247   ///        of the first explicit kernel argument.
 248   unsigned getExplicitKernelArgOffset() const {
 249     switch (TargetTriple.getOS()) {
 250     case Triple::AMDHSA:
 251     case Triple::AMDPAL:
 252     case Triple::Mesa3D:
 253       return 0;
 254     case Triple::UnknownOS:
 255     default:
 256       // For legacy reasons unknown/other is treated as a different version of
 257       // mesa.
 258       return 36;
 259     }
 260
 261     llvm_unreachable("invalid triple OS");
 262   }
 263
 264   /// \returns Maximum number of work groups per compute unit supported by the
 265   /// subtarget and limited by given \p FlatWorkGroupSize.
 266   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
 267
 268   /// \returns Minimum flat work group size supported by the subtarget.
 269   virtual unsigned getMinFlatWorkGroupSize() const = 0;
 270
 271   /// \returns Maximum flat work group size supported by the subtarget.
 272   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
 273
 274   /// \returns Number of waves per execution unit required to support the given
 275   /// \p FlatWorkGroupSize.
 276   virtual unsigned
 277   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
 278
 279   /// \returns Minimum number of waves per execution unit supported by the
 280   /// subtarget.
 281   virtual unsigned getMinWavesPerEU() const = 0;
 282
 283   /// \returns Maximum number of waves per execution unit supported by the
 284   /// subtarget without any kind of limitation.
 285   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
 286
 287   /// Return the maximum workitem ID value in the function, for the given (0, 1,
 288   /// 2) dimension.
 289   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
 290
 291   /// Return the number of work groups for the function.
 292   SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const;
 293
 294   /// Return true if only a single workitem can be active in a wave.
 295   bool isSingleLaneExecution(const Function &Kernel) const;
 296
 297   /// Creates value range metadata on an workitemid.* intrinsic call or load.
 298   bool makeLIDRangeMetadata(Instruction *I) const;
 299
 300   /// \returns Number of bytes of arguments that are passed to a shader or
 301   /// kernel in addition to the explicit ones declared for the function.
 302   unsigned getImplicitArgNumBytes(const Function &F) const;
 303   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
 304   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
 305
 306   /// \returns Corresponding DWARF register number mapping flavour for the
 307   /// \p WavefrontSize.
 308   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
 309
 310   virtual ~AMDGPUSubtarget() = default;
 311 };
 312
 313 } // end namespace llvm
 314
 315 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H