lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

   1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  11
  12 #include "AMDGPU.h"
  13 #include "AMDKernelCodeT.h"
  14 #include "SIDefines.h"
  15 #include "llvm/ADT/StringRef.h"
  16 #include "llvm/IR/CallingConv.h"
  17 #include "llvm/MC/MCInstrDesc.h"
  18 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  19 #include "llvm/Support/Compiler.h"
  20 #include "llvm/Support/ErrorHandling.h"
  21 #include "llvm/Support/TargetParser.h"
  22 #include <cstdint>
  23 #include <string>
  24 #include <utility>
  25
  26 namespace llvm {
  27
  28 class Argument;
  29 class AMDGPUSubtarget;
  30 class FeatureBitset;
  31 class Function;
  32 class GCNSubtarget;
  33 class GlobalValue;
  34 class MCContext;
  35 class MCRegisterClass;
  36 class MCRegisterInfo;
  37 class MCSection;
  38 class MCSubtargetInfo;
  39 class MachineMemOperand;
  40 class Triple;
  41
  42 namespace AMDGPU {
  43
  44 #define GET_MIMGBaseOpcode_DECL
  45 #define GET_MIMGDim_DECL
  46 #define GET_MIMGEncoding_DECL
  47 #define GET_MIMGLZMapping_DECL
  48 #include "AMDGPUGenSearchableTables.inc"
  49
  50 namespace IsaInfo {
  51
  52 enum {
  53   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
  54   // doesn't spill SGPRs as much as when 80 is set.
  55   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
  56   TRAP_NUM_SGPRS = 16
  57 };
  58
  59 /// Streams isa version string for given subtarget \p STI into \p Stream.
  60 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
  61
  62 /// \returns True if given subtarget \p STI supports code object version 3,
  63 /// false otherwise.
  64 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
  65
  66 /// \returns Wavefront size for given subtarget \p STI.
  67 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
  68
  69 /// \returns Local memory size in bytes for given subtarget \p STI.
  70 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
  71
  72 /// \returns Number of execution units per compute unit for given subtarget \p
  73 /// STI.
  74 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
  75
  76 /// \returns Maximum number of work groups per compute unit for given subtarget
  77 /// \p STI and limited by given \p FlatWorkGroupSize.
  78 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
  79                                unsigned FlatWorkGroupSize);
  80
  81 /// \returns Maximum number of waves per compute unit for given subtarget \p
  82 /// STI without any kind of limitation.
  83 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
  84
  85 /// \returns Maximum number of waves per compute unit for given subtarget \p
  86 /// STI and limited by given \p FlatWorkGroupSize.
  87 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
  88                           unsigned FlatWorkGroupSize);
  89
  90 /// \returns Minimum number of waves per execution unit for given subtarget \p
  91 /// STI.
  92 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
  93
  94 /// \returns Maximum number of waves per execution unit for given subtarget \p
  95 /// STI without any kind of limitation.
  96 unsigned getMaxWavesPerEU();
  97
  98 /// \returns Maximum number of waves per execution unit for given subtarget \p
  99 /// STI and limited by given \p FlatWorkGroupSize.
 100 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
 101                           unsigned FlatWorkGroupSize);
 102
 103 /// \returns Minimum flat work group size for given subtarget \p STI.
 104 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
 105
 106 /// \returns Maximum flat work group size for given subtarget \p STI.
 107 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
 108
 109 /// \returns Number of waves per work group for given subtarget \p STI and
 110 /// limited by given \p FlatWorkGroupSize.
 111 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
 112                               unsigned FlatWorkGroupSize);
 113
 114 /// \returns SGPR allocation granularity for given subtarget \p STI.
 115 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
 116
 117 /// \returns SGPR encoding granularity for given subtarget \p STI.
 118 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
 119
 120 /// \returns Total number of SGPRs for given subtarget \p STI.
 121 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
 122
 123 /// \returns Addressable number of SGPRs for given subtarget \p STI.
 124 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
 125
 126 /// \returns Minimum number of SGPRs that meets the given number of waves per
 127 /// execution unit requirement for given subtarget \p STI.
 128 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 129
 130 /// \returns Maximum number of SGPRs that meets the given number of waves per
 131 /// execution unit requirement for given subtarget \p STI.
 132 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
 133                         bool Addressable);
 134
 135 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 136 /// STI when the given special registers are used.
 137 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
 138                           bool FlatScrUsed, bool XNACKUsed);
 139
 140 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 141 /// STI when the given special registers are used. XNACK is inferred from
 142 /// \p STI.
 143 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
 144                           bool FlatScrUsed);
 145
 146 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
 147 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
 148 /// register counts.
 149 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
 150
 151 /// \returns VGPR allocation granularity for given subtarget \p STI.
 152 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
 153
 154 /// \returns VGPR encoding granularity for given subtarget \p STI.
 155 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
 156
 157 /// \returns Total number of VGPRs for given subtarget \p STI.
 158 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
 159
 160 /// \returns Addressable number of VGPRs for given subtarget \p STI.
 161 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
 162
 163 /// \returns Minimum number of VGPRs that meets given number of waves per
 164 /// execution unit requirement for given subtarget \p STI.
 165 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 166
 167 /// \returns Maximum number of VGPRs that meets given number of waves per
 168 /// execution unit requirement for given subtarget \p STI.
 169 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 170
 171 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
 172 /// \p NumVGPRs are used.
 173 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
 174
 175 } // end namespace IsaInfo
 176
 177 LLVM_READONLY
 178 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 179
 180 struct MIMGBaseOpcodeInfo {
 181   MIMGBaseOpcode BaseOpcode;
 182   bool Store;
 183   bool Atomic;
 184   bool AtomicX2;
 185   bool Sampler;
 186   bool Gather4;
 187
 188   uint8_t NumExtraArgs;
 189   bool Gradients;
 190   bool Coordinates;
 191   bool LodOrClampOrMip;
 192   bool HasD16;
 193 };
 194
 195 LLVM_READONLY
 196 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
 197
 198 struct MIMGDimInfo {
 199   MIMGDim Dim;
 200   uint8_t NumCoords;
 201   uint8_t NumGradients;
 202   bool DA;
 203 };
 204
 205 LLVM_READONLY
 206 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
 207
 208 struct MIMGLZMappingInfo {
 209   MIMGBaseOpcode L;
 210   MIMGBaseOpcode LZ;
 211 };
 212
 213 LLVM_READONLY
 214 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
 215
 216 LLVM_READONLY
 217 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
 218                   unsigned VDataDwords, unsigned VAddrDwords);
 219
 220 LLVM_READONLY
 221 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
 222
 223 LLVM_READONLY
 224 int getMUBUFBaseOpcode(unsigned Opc);
 225
 226 LLVM_READONLY
 227 int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
 228
 229 LLVM_READONLY
 230 int getMUBUFDwords(unsigned Opc);
 231
 232 LLVM_READONLY
 233 bool getMUBUFHasVAddr(unsigned Opc);
 234
 235 LLVM_READONLY
 236 bool getMUBUFHasSrsrc(unsigned Opc);
 237
 238 LLVM_READONLY
 239 bool getMUBUFHasSoffset(unsigned Opc);
 240
 241 LLVM_READONLY
 242 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 243
 244 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 245                                const MCSubtargetInfo *STI);
 246
 247 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
 248
 249 bool isGroupSegment(const GlobalValue *GV);
 250 bool isGlobalSegment(const GlobalValue *GV);
 251 bool isReadOnlySegment(const GlobalValue *GV);
 252
 253 /// \returns True if constants should be emitted to .text section for given
 254 /// target triple \p TT, false otherwise.
 255 bool shouldEmitConstantsToTextSection(const Triple &TT);
 256
 257 /// \returns Integer value requested using \p F's \p Name attribute.
 258 ///
 259 /// \returns \p Default if attribute is not present.
 260 ///
 261 /// \returns \p Default and emits error if requested value cannot be converted
 262 /// to integer.
 263 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
 264
 265 /// \returns A pair of integer values requested using \p F's \p Name attribute
 266 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
 267 /// is false).
 268 ///
 269 /// \returns \p Default if attribute is not present.
 270 ///
 271 /// \returns \p Default and emits error if one of the requested values cannot be
 272 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
 273 /// not present.
 274 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 275                                             StringRef Name,
 276                                             std::pair<int, int> Default,
 277                                             bool OnlyFirstRequired = false);
 278
 279 /// Represents the counter values to wait for in an s_waitcnt instruction.
 280 ///
 281 /// Large values (including the maximum possible integer) can be used to
 282 /// represent "don't care" waits.
 283 struct Waitcnt {
 284   unsigned VmCnt = ~0u;
 285   unsigned ExpCnt = ~0u;
 286   unsigned LgkmCnt = ~0u;
 287
 288   Waitcnt() {}
 289   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
 290       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
 291
 292   static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
 293
 294   bool dominates(const Waitcnt &Other) const {
 295     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
 296            LgkmCnt <= Other.LgkmCnt;
 297   }
 298
 299   Waitcnt combined(const Waitcnt &Other) const {
 300     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
 301                    std::min(LgkmCnt, Other.LgkmCnt));
 302   }
 303 };
 304
 305 /// \returns Vmcnt bit mask for given isa \p Version.
 306 unsigned getVmcntBitMask(const IsaVersion &Version);
 307
 308 /// \returns Expcnt bit mask for given isa \p Version.
 309 unsigned getExpcntBitMask(const IsaVersion &Version);
 310
 311 /// \returns Lgkmcnt bit mask for given isa \p Version.
 312 unsigned getLgkmcntBitMask(const IsaVersion &Version);
 313
 314 /// \returns Waitcnt bit mask for given isa \p Version.
 315 unsigned getWaitcntBitMask(const IsaVersion &Version);
 316
 317 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
 318 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
 319
 320 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
 321 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
 322
 323 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
 324 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
 325
 326 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 327 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
 328 /// \p Lgkmcnt respectively.
 329 ///
 330 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
 331 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
 332 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 333 ///     \p Expcnt = \p Waitcnt[6:4]
 334 ///     \p Lgkmcnt = \p Waitcnt[11:8]
 335 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
 336                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 337
 338 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
 339
 340 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
 341 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
 342                      unsigned Vmcnt);
 343
 344 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
 345 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
 346                       unsigned Expcnt);
 347
 348 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
 349 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
 350                        unsigned Lgkmcnt);
 351
 352 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
 353 /// \p Version.
 354 ///
 355 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
 356 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
 357 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
 358 ///     Waitcnt[6:4]   = \p Expcnt
 359 ///     Waitcnt[11:8]  = \p Lgkmcnt
 360 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
 361 ///
 362 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 363 /// isa \p Version.
 364 unsigned encodeWaitcnt(const IsaVersion &Version,
 365                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
 366
 367 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
 368
 369 unsigned getInitialPSInputAddr(const Function &F);
 370
 371 LLVM_READNONE
 372 bool isShader(CallingConv::ID CC);
 373
 374 LLVM_READNONE
 375 bool isCompute(CallingConv::ID CC);
 376
 377 LLVM_READNONE
 378 bool isEntryFunctionCC(CallingConv::ID CC);
 379
 380 // FIXME: Remove this when calling conventions cleaned up
 381 LLVM_READNONE
 382 inline bool isKernel(CallingConv::ID CC) {
 383   switch (CC) {
 384   case CallingConv::AMDGPU_KERNEL:
 385   case CallingConv::SPIR_KERNEL:
 386     return true;
 387   default:
 388     return false;
 389   }
 390 }
 391
 392 bool hasXNACK(const MCSubtargetInfo &STI);
 393 bool hasSRAMECC(const MCSubtargetInfo &STI);
 394 bool hasMIMG_R128(const MCSubtargetInfo &STI);
 395 bool hasPackedD16(const MCSubtargetInfo &STI);
 396
 397 bool isSI(const MCSubtargetInfo &STI);
 398 bool isCI(const MCSubtargetInfo &STI);
 399 bool isVI(const MCSubtargetInfo &STI);
 400 bool isGFX9(const MCSubtargetInfo &STI);
 401
 402 /// Is Reg - scalar register
 403 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
 404
 405 /// Is there any intersection between registers
 406 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
 407
 408 /// If \p Reg is a pseudo reg, return the correct hardware register given
 409 /// \p STI otherwise return \p Reg.
 410 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 411
 412 /// Convert hardware register \p Reg to a pseudo register
 413 LLVM_READNONE
 414 unsigned mc2PseudoReg(unsigned Reg);
 415
 416 /// Can this operand also contain immediate values?
 417 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
 418
 419 /// Is this floating-point operand?
 420 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
 421
 422 /// Does this opearnd support only inlinable literals?
 423 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
 424
 425 /// Get the size in bits of a register from the register class \p RC.
 426 unsigned getRegBitWidth(unsigned RCID);
 427
 428 /// Get the size in bits of a register from the register class \p RC.
 429 unsigned getRegBitWidth(const MCRegisterClass &RC);
 430
 431 /// Get size of register operand
 432 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 433                            unsigned OpNo);
 434
 435 LLVM_READNONE
 436 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
 437   switch (OpInfo.OperandType) {
 438   case AMDGPU::OPERAND_REG_IMM_INT32:
 439   case AMDGPU::OPERAND_REG_IMM_FP32:
 440   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
 441   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 442     return 4;
 443
 444   case AMDGPU::OPERAND_REG_IMM_INT64:
 445   case AMDGPU::OPERAND_REG_IMM_FP64:
 446   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
 447   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 448     return 8;
 449
 450   case AMDGPU::OPERAND_REG_IMM_INT16:
 451   case AMDGPU::OPERAND_REG_IMM_FP16:
 452   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
 453   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 454   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
 455   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 456     return 2;
 457
 458   default:
 459     llvm_unreachable("unhandled operand type");
 460   }
 461 }
 462
 463 LLVM_READNONE
 464 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
 465   return getOperandSize(Desc.OpInfo[OpNo]);
 466 }
 467
 468 /// Is this literal inlinable
 469 LLVM_READNONE
 470 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
 471
 472 LLVM_READNONE
 473 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
 474
 475 LLVM_READNONE
 476 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
 477
 478 LLVM_READNONE
 479 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
 480
 481 bool isArgPassedInSGPR(const Argument *Arg);
 482
 483 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
 484 /// offset field.
 485 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 486
 487 /// \returns true if this offset is small enough to fit in the SMRD
 488 /// offset field.  \p ByteOffset should be the offset in bytes and
 489 /// not the encoded offset.
 490 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 491
 492 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
 493                       const GCNSubtarget *Subtarget, uint32_t Align = 4);
 494
 495 /// \returns true if the intrinsic is divergent
 496 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 497
 498 } // end namespace AMDGPU
 499 } // end namespace llvm
 500
 501 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H