1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/CallingConv.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/Support/AMDHSAKernelDescriptor.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
29 class AMDGPUSubtarget
;
35 class MCRegisterClass
;
38 class MCSubtargetInfo
;
39 class MachineMemOperand
;
44 #define GET_MIMGBaseOpcode_DECL
45 #define GET_MIMGDim_DECL
46 #define GET_MIMGEncoding_DECL
47 #define GET_MIMGLZMapping_DECL
48 #include "AMDGPUGenSearchableTables.inc"
53 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
54 // doesn't spill SGPRs as much as when 80 is set.
55 FIXED_NUM_SGPRS_FOR_INIT_BUG
= 96,
59 /// Streams isa version string for given subtarget \p STI into \p Stream.
60 void streamIsaVersion(const MCSubtargetInfo
*STI
, raw_ostream
&Stream
);
62 /// \returns True if given subtarget \p STI supports code object version 3,
64 bool hasCodeObjectV3(const MCSubtargetInfo
*STI
);
66 /// \returns Wavefront size for given subtarget \p STI.
67 unsigned getWavefrontSize(const MCSubtargetInfo
*STI
);
69 /// \returns Local memory size in bytes for given subtarget \p STI.
70 unsigned getLocalMemorySize(const MCSubtargetInfo
*STI
);
72 /// \returns Number of execution units per compute unit for given subtarget \p
74 unsigned getEUsPerCU(const MCSubtargetInfo
*STI
);
76 /// \returns Maximum number of work groups per compute unit for given subtarget
77 /// \p STI and limited by given \p FlatWorkGroupSize.
78 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo
*STI
,
79 unsigned FlatWorkGroupSize
);
81 /// \returns Maximum number of waves per compute unit for given subtarget \p
82 /// STI without any kind of limitation.
83 unsigned getMaxWavesPerCU(const MCSubtargetInfo
*STI
);
85 /// \returns Maximum number of waves per compute unit for given subtarget \p
86 /// STI and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWavesPerCU(const MCSubtargetInfo
*STI
,
88 unsigned FlatWorkGroupSize
);
90 /// \returns Minimum number of waves per execution unit for given subtarget \p
92 unsigned getMinWavesPerEU(const MCSubtargetInfo
*STI
);
94 /// \returns Maximum number of waves per execution unit for given subtarget \p
95 /// STI without any kind of limitation.
96 unsigned getMaxWavesPerEU();
98 /// \returns Maximum number of waves per execution unit for given subtarget \p
99 /// STI and limited by given \p FlatWorkGroupSize.
100 unsigned getMaxWavesPerEU(const MCSubtargetInfo
*STI
,
101 unsigned FlatWorkGroupSize
);
103 /// \returns Minimum flat work group size for given subtarget \p STI.
104 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo
*STI
);
106 /// \returns Maximum flat work group size for given subtarget \p STI.
107 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo
*STI
);
109 /// \returns Number of waves per work group for given subtarget \p STI and
110 /// limited by given \p FlatWorkGroupSize.
111 unsigned getWavesPerWorkGroup(const MCSubtargetInfo
*STI
,
112 unsigned FlatWorkGroupSize
);
114 /// \returns SGPR allocation granularity for given subtarget \p STI.
115 unsigned getSGPRAllocGranule(const MCSubtargetInfo
*STI
);
117 /// \returns SGPR encoding granularity for given subtarget \p STI.
118 unsigned getSGPREncodingGranule(const MCSubtargetInfo
*STI
);
120 /// \returns Total number of SGPRs for given subtarget \p STI.
121 unsigned getTotalNumSGPRs(const MCSubtargetInfo
*STI
);
123 /// \returns Addressable number of SGPRs for given subtarget \p STI.
124 unsigned getAddressableNumSGPRs(const MCSubtargetInfo
*STI
);
126 /// \returns Minimum number of SGPRs that meets the given number of waves per
127 /// execution unit requirement for given subtarget \p STI.
128 unsigned getMinNumSGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
130 /// \returns Maximum number of SGPRs that meets the given number of waves per
131 /// execution unit requirement for given subtarget \p STI.
132 unsigned getMaxNumSGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
,
135 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
136 /// STI when the given special registers are used.
137 unsigned getNumExtraSGPRs(const MCSubtargetInfo
*STI
, bool VCCUsed
,
138 bool FlatScrUsed
, bool XNACKUsed
);
140 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
141 /// STI when the given special registers are used. XNACK is inferred from
143 unsigned getNumExtraSGPRs(const MCSubtargetInfo
*STI
, bool VCCUsed
,
146 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
147 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
149 unsigned getNumSGPRBlocks(const MCSubtargetInfo
*STI
, unsigned NumSGPRs
);
151 /// \returns VGPR allocation granularity for given subtarget \p STI.
152 unsigned getVGPRAllocGranule(const MCSubtargetInfo
*STI
);
154 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 unsigned getVGPREncodingGranule(const MCSubtargetInfo
*STI
);
157 /// \returns Total number of VGPRs for given subtarget \p STI.
158 unsigned getTotalNumVGPRs(const MCSubtargetInfo
*STI
);
160 /// \returns Addressable number of VGPRs for given subtarget \p STI.
161 unsigned getAddressableNumVGPRs(const MCSubtargetInfo
*STI
);
163 /// \returns Minimum number of VGPRs that meets given number of waves per
164 /// execution unit requirement for given subtarget \p STI.
165 unsigned getMinNumVGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
167 /// \returns Maximum number of VGPRs that meets given number of waves per
168 /// execution unit requirement for given subtarget \p STI.
169 unsigned getMaxNumVGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
171 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
172 /// \p NumVGPRs are used.
173 unsigned getNumVGPRBlocks(const MCSubtargetInfo
*STI
, unsigned NumSGPRs
);
175 } // end namespace IsaInfo
178 int16_t getNamedOperandIdx(uint16_t Opcode
, uint16_t NamedIdx
);
180 struct MIMGBaseOpcodeInfo
{
181 MIMGBaseOpcode BaseOpcode
;
188 uint8_t NumExtraArgs
;
191 bool LodOrClampOrMip
;
196 const MIMGBaseOpcodeInfo
*getMIMGBaseOpcodeInfo(unsigned BaseOpcode
);
201 uint8_t NumGradients
;
206 const MIMGDimInfo
*getMIMGDimInfo(unsigned Dim
);
208 struct MIMGLZMappingInfo
{
214 const MIMGLZMappingInfo
*getMIMGLZMappingInfo(unsigned L
);
217 int getMIMGOpcode(unsigned BaseOpcode
, unsigned MIMGEncoding
,
218 unsigned VDataDwords
, unsigned VAddrDwords
);
221 int getMaskedMIMGOp(unsigned Opc
, unsigned NewChannels
);
224 int getMUBUFBaseOpcode(unsigned Opc
);
227 int getMUBUFOpcode(unsigned BaseOpc
, unsigned Dwords
);
230 int getMUBUFDwords(unsigned Opc
);
233 bool getMUBUFHasVAddr(unsigned Opc
);
236 bool getMUBUFHasSrsrc(unsigned Opc
);
239 bool getMUBUFHasSoffset(unsigned Opc
);
242 int getMCOpcode(uint16_t Opcode
, unsigned Gen
);
244 void initDefaultAMDKernelCodeT(amd_kernel_code_t
&Header
,
245 const MCSubtargetInfo
*STI
);
247 amdhsa::kernel_descriptor_t
getDefaultAmdhsaKernelDescriptor();
249 bool isGroupSegment(const GlobalValue
*GV
);
250 bool isGlobalSegment(const GlobalValue
*GV
);
251 bool isReadOnlySegment(const GlobalValue
*GV
);
253 /// \returns True if constants should be emitted to .text section for given
254 /// target triple \p TT, false otherwise.
255 bool shouldEmitConstantsToTextSection(const Triple
&TT
);
257 /// \returns Integer value requested using \p F's \p Name attribute.
259 /// \returns \p Default if attribute is not present.
261 /// \returns \p Default and emits error if requested value cannot be converted
263 int getIntegerAttribute(const Function
&F
, StringRef Name
, int Default
);
265 /// \returns A pair of integer values requested using \p F's \p Name attribute
266 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
269 /// \returns \p Default if attribute is not present.
271 /// \returns \p Default and emits error if one of the requested values cannot be
272 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
274 std::pair
<int, int> getIntegerPairAttribute(const Function
&F
,
276 std::pair
<int, int> Default
,
277 bool OnlyFirstRequired
= false);
279 /// Represents the counter values to wait for in an s_waitcnt instruction.
281 /// Large values (including the maximum possible integer) can be used to
282 /// represent "don't care" waits.
284 unsigned VmCnt
= ~0u;
285 unsigned ExpCnt
= ~0u;
286 unsigned LgkmCnt
= ~0u;
289 Waitcnt(unsigned VmCnt
, unsigned ExpCnt
, unsigned LgkmCnt
)
290 : VmCnt(VmCnt
), ExpCnt(ExpCnt
), LgkmCnt(LgkmCnt
) {}
292 static Waitcnt
allZero() { return Waitcnt(0, 0, 0); }
294 bool dominates(const Waitcnt
&Other
) const {
295 return VmCnt
<= Other
.VmCnt
&& ExpCnt
<= Other
.ExpCnt
&&
296 LgkmCnt
<= Other
.LgkmCnt
;
299 Waitcnt
combined(const Waitcnt
&Other
) const {
300 return Waitcnt(std::min(VmCnt
, Other
.VmCnt
), std::min(ExpCnt
, Other
.ExpCnt
),
301 std::min(LgkmCnt
, Other
.LgkmCnt
));
305 /// \returns Vmcnt bit mask for given isa \p Version.
306 unsigned getVmcntBitMask(const IsaVersion
&Version
);
308 /// \returns Expcnt bit mask for given isa \p Version.
309 unsigned getExpcntBitMask(const IsaVersion
&Version
);
311 /// \returns Lgkmcnt bit mask for given isa \p Version.
312 unsigned getLgkmcntBitMask(const IsaVersion
&Version
);
314 /// \returns Waitcnt bit mask for given isa \p Version.
315 unsigned getWaitcntBitMask(const IsaVersion
&Version
);
317 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
318 unsigned decodeVmcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
320 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
321 unsigned decodeExpcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
323 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
324 unsigned decodeLgkmcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
326 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
327 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
328 /// \p Lgkmcnt respectively.
330 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
331 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
332 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
333 /// \p Expcnt = \p Waitcnt[6:4]
334 /// \p Lgkmcnt = \p Waitcnt[11:8]
335 void decodeWaitcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
336 unsigned &Vmcnt
, unsigned &Expcnt
, unsigned &Lgkmcnt
);
338 Waitcnt
decodeWaitcnt(const IsaVersion
&Version
, unsigned Encoded
);
340 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
341 unsigned encodeVmcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
344 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
345 unsigned encodeExpcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
348 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
349 unsigned encodeLgkmcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
352 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
355 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
356 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
357 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
358 /// Waitcnt[6:4] = \p Expcnt
359 /// Waitcnt[11:8] = \p Lgkmcnt
360 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
362 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
364 unsigned encodeWaitcnt(const IsaVersion
&Version
,
365 unsigned Vmcnt
, unsigned Expcnt
, unsigned Lgkmcnt
);
367 unsigned encodeWaitcnt(const IsaVersion
&Version
, const Waitcnt
&Decoded
);
369 unsigned getInitialPSInputAddr(const Function
&F
);
372 bool isShader(CallingConv::ID CC
);
375 bool isCompute(CallingConv::ID CC
);
378 bool isEntryFunctionCC(CallingConv::ID CC
);
380 // FIXME: Remove this when calling conventions cleaned up
382 inline bool isKernel(CallingConv::ID CC
) {
384 case CallingConv::AMDGPU_KERNEL
:
385 case CallingConv::SPIR_KERNEL
:
392 bool hasXNACK(const MCSubtargetInfo
&STI
);
393 bool hasSRAMECC(const MCSubtargetInfo
&STI
);
394 bool hasMIMG_R128(const MCSubtargetInfo
&STI
);
395 bool hasPackedD16(const MCSubtargetInfo
&STI
);
397 bool isSI(const MCSubtargetInfo
&STI
);
398 bool isCI(const MCSubtargetInfo
&STI
);
399 bool isVI(const MCSubtargetInfo
&STI
);
400 bool isGFX9(const MCSubtargetInfo
&STI
);
402 /// Is Reg - scalar register
403 bool isSGPR(unsigned Reg
, const MCRegisterInfo
* TRI
);
405 /// Is there any intersection between registers
406 bool isRegIntersect(unsigned Reg0
, unsigned Reg1
, const MCRegisterInfo
* TRI
);
408 /// If \p Reg is a pseudo reg, return the correct hardware register given
409 /// \p STI otherwise return \p Reg.
410 unsigned getMCReg(unsigned Reg
, const MCSubtargetInfo
&STI
);
412 /// Convert hardware register \p Reg to a pseudo register
414 unsigned mc2PseudoReg(unsigned Reg
);
416 /// Can this operand also contain immediate values?
417 bool isSISrcOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
419 /// Is this floating-point operand?
420 bool isSISrcFPOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
422 /// Does this opearnd support only inlinable literals?
423 bool isSISrcInlinableOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
425 /// Get the size in bits of a register from the register class \p RC.
426 unsigned getRegBitWidth(unsigned RCID
);
428 /// Get the size in bits of a register from the register class \p RC.
429 unsigned getRegBitWidth(const MCRegisterClass
&RC
);
431 /// Get size of register operand
432 unsigned getRegOperandSize(const MCRegisterInfo
*MRI
, const MCInstrDesc
&Desc
,
436 inline unsigned getOperandSize(const MCOperandInfo
&OpInfo
) {
437 switch (OpInfo
.OperandType
) {
438 case AMDGPU::OPERAND_REG_IMM_INT32
:
439 case AMDGPU::OPERAND_REG_IMM_FP32
:
440 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
441 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
444 case AMDGPU::OPERAND_REG_IMM_INT64
:
445 case AMDGPU::OPERAND_REG_IMM_FP64
:
446 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
447 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
450 case AMDGPU::OPERAND_REG_IMM_INT16
:
451 case AMDGPU::OPERAND_REG_IMM_FP16
:
452 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
453 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
454 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
455 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
459 llvm_unreachable("unhandled operand type");
464 inline unsigned getOperandSize(const MCInstrDesc
&Desc
, unsigned OpNo
) {
465 return getOperandSize(Desc
.OpInfo
[OpNo
]);
468 /// Is this literal inlinable
470 bool isInlinableLiteral64(int64_t Literal
, bool HasInv2Pi
);
473 bool isInlinableLiteral32(int32_t Literal
, bool HasInv2Pi
);
476 bool isInlinableLiteral16(int16_t Literal
, bool HasInv2Pi
);
479 bool isInlinableLiteralV216(int32_t Literal
, bool HasInv2Pi
);
481 bool isArgPassedInSGPR(const Argument
*Arg
);
483 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
485 int64_t getSMRDEncodedOffset(const MCSubtargetInfo
&ST
, int64_t ByteOffset
);
487 /// \returns true if this offset is small enough to fit in the SMRD
488 /// offset field. \p ByteOffset should be the offset in bytes and
489 /// not the encoded offset.
490 bool isLegalSMRDImmOffset(const MCSubtargetInfo
&ST
, int64_t ByteOffset
);
492 bool splitMUBUFOffset(uint32_t Imm
, uint32_t &SOffset
, uint32_t &ImmOffset
,
493 const GCNSubtarget
*Subtarget
, uint32_t Align
= 4);
495 /// \returns true if the intrinsic is divergent
496 bool isIntrinsicSourceOfDivergence(unsigned IntrID
);
498 } // end namespace AMDGPU
499 } // end namespace llvm
501 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H