Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Target / AMDGPU / Utils / AMDGPUBaseInfo.h
blob174fec47986475e4aa4d7f5067d996397ff50621
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
12 #include "AMDGPU.h"
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/CallingConv.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/Support/AMDHSAKernelDescriptor.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
26 namespace llvm {
28 class Argument;
29 class AMDGPUSubtarget;
30 class FeatureBitset;
31 class Function;
32 class GCNSubtarget;
33 class GlobalValue;
34 class MCContext;
35 class MCRegisterClass;
36 class MCRegisterInfo;
37 class MCSection;
38 class MCSubtargetInfo;
39 class MachineMemOperand;
40 class Triple;
42 namespace AMDGPU {
44 #define GET_MIMGBaseOpcode_DECL
45 #define GET_MIMGDim_DECL
46 #define GET_MIMGEncoding_DECL
47 #define GET_MIMGLZMapping_DECL
48 #include "AMDGPUGenSearchableTables.inc"
50 namespace IsaInfo {
52 enum {
53 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
54 // doesn't spill SGPRs as much as when 80 is set.
55 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
56 TRAP_NUM_SGPRS = 16
59 /// Streams isa version string for given subtarget \p STI into \p Stream.
60 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
62 /// \returns True if given subtarget \p STI supports code object version 3,
63 /// false otherwise.
64 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
66 /// \returns Wavefront size for given subtarget \p STI.
67 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
69 /// \returns Local memory size in bytes for given subtarget \p STI.
70 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
72 /// \returns Number of execution units per compute unit for given subtarget \p
73 /// STI.
74 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
76 /// \returns Maximum number of work groups per compute unit for given subtarget
77 /// \p STI and limited by given \p FlatWorkGroupSize.
78 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
79 unsigned FlatWorkGroupSize);
81 /// \returns Maximum number of waves per compute unit for given subtarget \p
82 /// STI without any kind of limitation.
83 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
85 /// \returns Maximum number of waves per compute unit for given subtarget \p
86 /// STI and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
88 unsigned FlatWorkGroupSize);
90 /// \returns Minimum number of waves per execution unit for given subtarget \p
91 /// STI.
92 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
94 /// \returns Maximum number of waves per execution unit for given subtarget \p
95 /// STI without any kind of limitation.
96 unsigned getMaxWavesPerEU();
98 /// \returns Maximum number of waves per execution unit for given subtarget \p
99 /// STI and limited by given \p FlatWorkGroupSize.
100 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
101 unsigned FlatWorkGroupSize);
103 /// \returns Minimum flat work group size for given subtarget \p STI.
104 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
106 /// \returns Maximum flat work group size for given subtarget \p STI.
107 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
109 /// \returns Number of waves per work group for given subtarget \p STI and
110 /// limited by given \p FlatWorkGroupSize.
111 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
112 unsigned FlatWorkGroupSize);
114 /// \returns SGPR allocation granularity for given subtarget \p STI.
115 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
117 /// \returns SGPR encoding granularity for given subtarget \p STI.
118 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
120 /// \returns Total number of SGPRs for given subtarget \p STI.
121 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
123 /// \returns Addressable number of SGPRs for given subtarget \p STI.
124 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
126 /// \returns Minimum number of SGPRs that meets the given number of waves per
127 /// execution unit requirement for given subtarget \p STI.
128 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
130 /// \returns Maximum number of SGPRs that meets the given number of waves per
131 /// execution unit requirement for given subtarget \p STI.
132 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
133 bool Addressable);
135 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
136 /// STI when the given special registers are used.
137 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
138 bool FlatScrUsed, bool XNACKUsed);
140 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
141 /// STI when the given special registers are used. XNACK is inferred from
142 /// \p STI.
143 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
144 bool FlatScrUsed);
146 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
147 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
148 /// register counts.
149 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
151 /// \returns VGPR allocation granularity for given subtarget \p STI.
152 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
154 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
157 /// \returns Total number of VGPRs for given subtarget \p STI.
158 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
160 /// \returns Addressable number of VGPRs for given subtarget \p STI.
161 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
163 /// \returns Minimum number of VGPRs that meets given number of waves per
164 /// execution unit requirement for given subtarget \p STI.
165 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
167 /// \returns Maximum number of VGPRs that meets given number of waves per
168 /// execution unit requirement for given subtarget \p STI.
169 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
171 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
172 /// \p NumVGPRs are used.
173 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
175 } // end namespace IsaInfo
177 LLVM_READONLY
178 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
180 struct MIMGBaseOpcodeInfo {
181 MIMGBaseOpcode BaseOpcode;
182 bool Store;
183 bool Atomic;
184 bool AtomicX2;
185 bool Sampler;
186 bool Gather4;
188 uint8_t NumExtraArgs;
189 bool Gradients;
190 bool Coordinates;
191 bool LodOrClampOrMip;
192 bool HasD16;
195 LLVM_READONLY
196 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
198 struct MIMGDimInfo {
199 MIMGDim Dim;
200 uint8_t NumCoords;
201 uint8_t NumGradients;
202 bool DA;
205 LLVM_READONLY
206 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
208 struct MIMGLZMappingInfo {
209 MIMGBaseOpcode L;
210 MIMGBaseOpcode LZ;
213 LLVM_READONLY
214 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
216 LLVM_READONLY
217 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
218 unsigned VDataDwords, unsigned VAddrDwords);
220 LLVM_READONLY
221 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
223 LLVM_READONLY
224 int getMUBUFBaseOpcode(unsigned Opc);
226 LLVM_READONLY
227 int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
229 LLVM_READONLY
230 int getMUBUFDwords(unsigned Opc);
232 LLVM_READONLY
233 bool getMUBUFHasVAddr(unsigned Opc);
235 LLVM_READONLY
236 bool getMUBUFHasSrsrc(unsigned Opc);
238 LLVM_READONLY
239 bool getMUBUFHasSoffset(unsigned Opc);
241 LLVM_READONLY
242 int getMCOpcode(uint16_t Opcode, unsigned Gen);
244 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
245 const MCSubtargetInfo *STI);
247 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
249 bool isGroupSegment(const GlobalValue *GV);
250 bool isGlobalSegment(const GlobalValue *GV);
251 bool isReadOnlySegment(const GlobalValue *GV);
253 /// \returns True if constants should be emitted to .text section for given
254 /// target triple \p TT, false otherwise.
255 bool shouldEmitConstantsToTextSection(const Triple &TT);
257 /// \returns Integer value requested using \p F's \p Name attribute.
259 /// \returns \p Default if attribute is not present.
261 /// \returns \p Default and emits error if requested value cannot be converted
262 /// to integer.
263 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
265 /// \returns A pair of integer values requested using \p F's \p Name attribute
266 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
267 /// is false).
269 /// \returns \p Default if attribute is not present.
271 /// \returns \p Default and emits error if one of the requested values cannot be
272 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
273 /// not present.
274 std::pair<int, int> getIntegerPairAttribute(const Function &F,
275 StringRef Name,
276 std::pair<int, int> Default,
277 bool OnlyFirstRequired = false);
279 /// Represents the counter values to wait for in an s_waitcnt instruction.
281 /// Large values (including the maximum possible integer) can be used to
282 /// represent "don't care" waits.
283 struct Waitcnt {
284 unsigned VmCnt = ~0u;
285 unsigned ExpCnt = ~0u;
286 unsigned LgkmCnt = ~0u;
288 Waitcnt() {}
289 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
290 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
292 static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
294 bool dominates(const Waitcnt &Other) const {
295 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
296 LgkmCnt <= Other.LgkmCnt;
299 Waitcnt combined(const Waitcnt &Other) const {
300 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
301 std::min(LgkmCnt, Other.LgkmCnt));
305 /// \returns Vmcnt bit mask for given isa \p Version.
306 unsigned getVmcntBitMask(const IsaVersion &Version);
308 /// \returns Expcnt bit mask for given isa \p Version.
309 unsigned getExpcntBitMask(const IsaVersion &Version);
311 /// \returns Lgkmcnt bit mask for given isa \p Version.
312 unsigned getLgkmcntBitMask(const IsaVersion &Version);
314 /// \returns Waitcnt bit mask for given isa \p Version.
315 unsigned getWaitcntBitMask(const IsaVersion &Version);
317 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
318 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
320 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
321 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
323 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
324 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
326 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
327 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
328 /// \p Lgkmcnt respectively.
330 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
331 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
332 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
333 /// \p Expcnt = \p Waitcnt[6:4]
334 /// \p Lgkmcnt = \p Waitcnt[11:8]
335 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
336 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
338 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
340 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
341 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
342 unsigned Vmcnt);
344 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
345 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
346 unsigned Expcnt);
348 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
349 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
350 unsigned Lgkmcnt);
352 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
353 /// \p Version.
355 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
356 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
357 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
358 /// Waitcnt[6:4] = \p Expcnt
359 /// Waitcnt[11:8] = \p Lgkmcnt
360 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
362 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
363 /// isa \p Version.
364 unsigned encodeWaitcnt(const IsaVersion &Version,
365 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
367 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
369 unsigned getInitialPSInputAddr(const Function &F);
371 LLVM_READNONE
372 bool isShader(CallingConv::ID CC);
374 LLVM_READNONE
375 bool isCompute(CallingConv::ID CC);
377 LLVM_READNONE
378 bool isEntryFunctionCC(CallingConv::ID CC);
380 // FIXME: Remove this when calling conventions cleaned up
381 LLVM_READNONE
382 inline bool isKernel(CallingConv::ID CC) {
383 switch (CC) {
384 case CallingConv::AMDGPU_KERNEL:
385 case CallingConv::SPIR_KERNEL:
386 return true;
387 default:
388 return false;
392 bool hasXNACK(const MCSubtargetInfo &STI);
393 bool hasSRAMECC(const MCSubtargetInfo &STI);
394 bool hasMIMG_R128(const MCSubtargetInfo &STI);
395 bool hasPackedD16(const MCSubtargetInfo &STI);
397 bool isSI(const MCSubtargetInfo &STI);
398 bool isCI(const MCSubtargetInfo &STI);
399 bool isVI(const MCSubtargetInfo &STI);
400 bool isGFX9(const MCSubtargetInfo &STI);
402 /// Is Reg - scalar register
403 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
405 /// Is there any intersection between registers
406 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
408 /// If \p Reg is a pseudo reg, return the correct hardware register given
409 /// \p STI otherwise return \p Reg.
410 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
412 /// Convert hardware register \p Reg to a pseudo register
413 LLVM_READNONE
414 unsigned mc2PseudoReg(unsigned Reg);
416 /// Can this operand also contain immediate values?
417 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
419 /// Is this floating-point operand?
420 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
422 /// Does this opearnd support only inlinable literals?
423 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
425 /// Get the size in bits of a register from the register class \p RC.
426 unsigned getRegBitWidth(unsigned RCID);
428 /// Get the size in bits of a register from the register class \p RC.
429 unsigned getRegBitWidth(const MCRegisterClass &RC);
431 /// Get size of register operand
432 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
433 unsigned OpNo);
435 LLVM_READNONE
436 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
437 switch (OpInfo.OperandType) {
438 case AMDGPU::OPERAND_REG_IMM_INT32:
439 case AMDGPU::OPERAND_REG_IMM_FP32:
440 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
441 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
442 return 4;
444 case AMDGPU::OPERAND_REG_IMM_INT64:
445 case AMDGPU::OPERAND_REG_IMM_FP64:
446 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
447 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
448 return 8;
450 case AMDGPU::OPERAND_REG_IMM_INT16:
451 case AMDGPU::OPERAND_REG_IMM_FP16:
452 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
453 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
454 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
455 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
456 return 2;
458 default:
459 llvm_unreachable("unhandled operand type");
463 LLVM_READNONE
464 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
465 return getOperandSize(Desc.OpInfo[OpNo]);
468 /// Is this literal inlinable
469 LLVM_READNONE
470 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
472 LLVM_READNONE
473 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
475 LLVM_READNONE
476 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
478 LLVM_READNONE
479 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
481 bool isArgPassedInSGPR(const Argument *Arg);
483 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
484 /// offset field.
485 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
487 /// \returns true if this offset is small enough to fit in the SMRD
488 /// offset field. \p ByteOffset should be the offset in bytes and
489 /// not the encoded offset.
490 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
492 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
493 const GCNSubtarget *Subtarget, uint32_t Align = 4);
495 /// \returns true if the intrinsic is divergent
496 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
498 } // end namespace AMDGPU
499 } // end namespace llvm
501 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H