1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/CallingConv.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/Support/AMDHSAKernelDescriptor.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
29 class AMDGPUSubtarget
;
35 class MCRegisterClass
;
38 class MCSubtargetInfo
;
39 class MachineMemOperand
;
44 #define GET_MIMGBaseOpcode_DECL
45 #define GET_MIMGDim_DECL
46 #define GET_MIMGEncoding_DECL
47 #define GET_MIMGLZMapping_DECL
48 #define GET_MIMGMIPMapping_DECL
49 #include "AMDGPUGenSearchableTables.inc"
54 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
55 // doesn't spill SGPRs as much as when 80 is set.
56 FIXED_NUM_SGPRS_FOR_INIT_BUG
= 96,
60 /// Streams isa version string for given subtarget \p STI into \p Stream.
61 void streamIsaVersion(const MCSubtargetInfo
*STI
, raw_ostream
&Stream
);
63 /// \returns True if given subtarget \p STI supports code object version 3,
65 bool hasCodeObjectV3(const MCSubtargetInfo
*STI
);
67 /// \returns Wavefront size for given subtarget \p STI.
68 unsigned getWavefrontSize(const MCSubtargetInfo
*STI
);
70 /// \returns Local memory size in bytes for given subtarget \p STI.
71 unsigned getLocalMemorySize(const MCSubtargetInfo
*STI
);
73 /// \returns Number of execution units per compute unit for given subtarget \p
75 unsigned getEUsPerCU(const MCSubtargetInfo
*STI
);
77 /// \returns Maximum number of work groups per compute unit for given subtarget
78 /// \p STI and limited by given \p FlatWorkGroupSize.
79 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo
*STI
,
80 unsigned FlatWorkGroupSize
);
82 /// \returns Maximum number of waves per compute unit for given subtarget \p
83 /// STI without any kind of limitation.
84 unsigned getMaxWavesPerCU(const MCSubtargetInfo
*STI
);
86 /// \returns Maximum number of waves per compute unit for given subtarget \p
87 /// STI and limited by given \p FlatWorkGroupSize.
88 unsigned getMaxWavesPerCU(const MCSubtargetInfo
*STI
,
89 unsigned FlatWorkGroupSize
);
91 /// \returns Minimum number of waves per execution unit for given subtarget \p
93 unsigned getMinWavesPerEU(const MCSubtargetInfo
*STI
);
95 /// \returns Maximum number of waves per execution unit for given subtarget \p
96 /// STI without any kind of limitation.
97 unsigned getMaxWavesPerEU(const MCSubtargetInfo
*STI
);
99 /// \returns Maximum number of waves per execution unit for given subtarget \p
100 /// STI and limited by given \p FlatWorkGroupSize.
101 unsigned getMaxWavesPerEU(const MCSubtargetInfo
*STI
,
102 unsigned FlatWorkGroupSize
);
104 /// \returns Minimum flat work group size for given subtarget \p STI.
105 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo
*STI
);
107 /// \returns Maximum flat work group size for given subtarget \p STI.
108 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo
*STI
);
110 /// \returns Number of waves per work group for given subtarget \p STI and
111 /// limited by given \p FlatWorkGroupSize.
112 unsigned getWavesPerWorkGroup(const MCSubtargetInfo
*STI
,
113 unsigned FlatWorkGroupSize
);
115 /// \returns SGPR allocation granularity for given subtarget \p STI.
116 unsigned getSGPRAllocGranule(const MCSubtargetInfo
*STI
);
118 /// \returns SGPR encoding granularity for given subtarget \p STI.
119 unsigned getSGPREncodingGranule(const MCSubtargetInfo
*STI
);
121 /// \returns Total number of SGPRs for given subtarget \p STI.
122 unsigned getTotalNumSGPRs(const MCSubtargetInfo
*STI
);
124 /// \returns Addressable number of SGPRs for given subtarget \p STI.
125 unsigned getAddressableNumSGPRs(const MCSubtargetInfo
*STI
);
127 /// \returns Minimum number of SGPRs that meets the given number of waves per
128 /// execution unit requirement for given subtarget \p STI.
129 unsigned getMinNumSGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
131 /// \returns Maximum number of SGPRs that meets the given number of waves per
132 /// execution unit requirement for given subtarget \p STI.
133 unsigned getMaxNumSGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
,
136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 /// STI when the given special registers are used.
138 unsigned getNumExtraSGPRs(const MCSubtargetInfo
*STI
, bool VCCUsed
,
139 bool FlatScrUsed
, bool XNACKUsed
);
141 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
142 /// STI when the given special registers are used. XNACK is inferred from
144 unsigned getNumExtraSGPRs(const MCSubtargetInfo
*STI
, bool VCCUsed
,
147 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
148 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
150 unsigned getNumSGPRBlocks(const MCSubtargetInfo
*STI
, unsigned NumSGPRs
);
152 /// \returns VGPR allocation granularity for given subtarget \p STI.
154 /// For subtargets which support it, \p EnableWavefrontSize32 should match
155 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
156 unsigned getVGPRAllocGranule(const MCSubtargetInfo
*STI
,
157 Optional
<bool> EnableWavefrontSize32
= None
);
159 /// \returns VGPR encoding granularity for given subtarget \p STI.
161 /// For subtargets which support it, \p EnableWavefrontSize32 should match
162 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
163 unsigned getVGPREncodingGranule(const MCSubtargetInfo
*STI
,
164 Optional
<bool> EnableWavefrontSize32
= None
);
166 /// \returns Total number of VGPRs for given subtarget \p STI.
167 unsigned getTotalNumVGPRs(const MCSubtargetInfo
*STI
);
169 /// \returns Addressable number of VGPRs for given subtarget \p STI.
170 unsigned getAddressableNumVGPRs(const MCSubtargetInfo
*STI
);
172 /// \returns Minimum number of VGPRs that meets given number of waves per
173 /// execution unit requirement for given subtarget \p STI.
174 unsigned getMinNumVGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
176 /// \returns Maximum number of VGPRs that meets given number of waves per
177 /// execution unit requirement for given subtarget \p STI.
178 unsigned getMaxNumVGPRs(const MCSubtargetInfo
*STI
, unsigned WavesPerEU
);
180 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
181 /// \p NumVGPRs are used.
183 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
184 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
185 unsigned getNumVGPRBlocks(const MCSubtargetInfo
*STI
, unsigned NumSGPRs
,
186 Optional
<bool> EnableWavefrontSize32
= None
);
188 } // end namespace IsaInfo
191 int16_t getNamedOperandIdx(uint16_t Opcode
, uint16_t NamedIdx
);
194 int getSOPPWithRelaxation(uint16_t Opcode
);
196 struct MIMGBaseOpcodeInfo
{
197 MIMGBaseOpcode BaseOpcode
;
204 uint8_t NumExtraArgs
;
207 bool LodOrClampOrMip
;
212 const MIMGBaseOpcodeInfo
*getMIMGBaseOpcodeInfo(unsigned BaseOpcode
);
217 uint8_t NumGradients
;
220 const char *AsmSuffix
;
224 const MIMGDimInfo
*getMIMGDimInfo(unsigned DimEnum
);
227 const MIMGDimInfo
*getMIMGDimInfoByEncoding(uint8_t DimEnc
);
230 const MIMGDimInfo
*getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix
);
232 struct MIMGLZMappingInfo
{
237 struct MIMGMIPMappingInfo
{
239 MIMGBaseOpcode NONMIP
;
243 const MIMGLZMappingInfo
*getMIMGLZMappingInfo(unsigned L
);
246 const MIMGMIPMappingInfo
*getMIMGMIPMappingInfo(unsigned L
);
249 int getMIMGOpcode(unsigned BaseOpcode
, unsigned MIMGEncoding
,
250 unsigned VDataDwords
, unsigned VAddrDwords
);
253 int getMaskedMIMGOp(unsigned Opc
, unsigned NewChannels
);
258 uint8_t MIMGEncoding
;
264 const MIMGInfo
*getMIMGInfo(unsigned Opc
);
267 int getMTBUFBaseOpcode(unsigned Opc
);
270 int getMTBUFOpcode(unsigned BaseOpc
, unsigned Elements
);
273 int getMTBUFElements(unsigned Opc
);
276 bool getMTBUFHasVAddr(unsigned Opc
);
279 bool getMTBUFHasSrsrc(unsigned Opc
);
282 bool getMTBUFHasSoffset(unsigned Opc
);
285 int getMUBUFBaseOpcode(unsigned Opc
);
288 int getMUBUFOpcode(unsigned BaseOpc
, unsigned Elements
);
291 int getMUBUFElements(unsigned Opc
);
294 bool getMUBUFHasVAddr(unsigned Opc
);
297 bool getMUBUFHasSrsrc(unsigned Opc
);
300 bool getMUBUFHasSoffset(unsigned Opc
);
303 int getMCOpcode(uint16_t Opcode
, unsigned Gen
);
305 void initDefaultAMDKernelCodeT(amd_kernel_code_t
&Header
,
306 const MCSubtargetInfo
*STI
);
308 amdhsa::kernel_descriptor_t
getDefaultAmdhsaKernelDescriptor(
309 const MCSubtargetInfo
*STI
);
311 bool isGroupSegment(const GlobalValue
*GV
);
312 bool isGlobalSegment(const GlobalValue
*GV
);
313 bool isReadOnlySegment(const GlobalValue
*GV
);
315 /// \returns True if constants should be emitted to .text section for given
316 /// target triple \p TT, false otherwise.
317 bool shouldEmitConstantsToTextSection(const Triple
&TT
);
319 /// \returns Integer value requested using \p F's \p Name attribute.
321 /// \returns \p Default if attribute is not present.
323 /// \returns \p Default and emits error if requested value cannot be converted
325 int getIntegerAttribute(const Function
&F
, StringRef Name
, int Default
);
327 /// \returns A pair of integer values requested using \p F's \p Name attribute
328 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
331 /// \returns \p Default if attribute is not present.
333 /// \returns \p Default and emits error if one of the requested values cannot be
334 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
336 std::pair
<int, int> getIntegerPairAttribute(const Function
&F
,
338 std::pair
<int, int> Default
,
339 bool OnlyFirstRequired
= false);
341 /// Represents the counter values to wait for in an s_waitcnt instruction.
343 /// Large values (including the maximum possible integer) can be used to
344 /// represent "don't care" waits.
346 unsigned VmCnt
= ~0u;
347 unsigned ExpCnt
= ~0u;
348 unsigned LgkmCnt
= ~0u;
349 unsigned VsCnt
= ~0u;
352 Waitcnt(unsigned VmCnt
, unsigned ExpCnt
, unsigned LgkmCnt
, unsigned VsCnt
)
353 : VmCnt(VmCnt
), ExpCnt(ExpCnt
), LgkmCnt(LgkmCnt
), VsCnt(VsCnt
) {}
355 static Waitcnt
allZero(const IsaVersion
&Version
) {
356 return Waitcnt(0, 0, 0, Version
.Major
>= 10 ? 0 : ~0u);
358 static Waitcnt
allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
360 bool hasWait() const {
361 return VmCnt
!= ~0u || ExpCnt
!= ~0u || LgkmCnt
!= ~0u || VsCnt
!= ~0u;
364 bool dominates(const Waitcnt
&Other
) const {
365 return VmCnt
<= Other
.VmCnt
&& ExpCnt
<= Other
.ExpCnt
&&
366 LgkmCnt
<= Other
.LgkmCnt
&& VsCnt
<= Other
.VsCnt
;
369 Waitcnt
combined(const Waitcnt
&Other
) const {
370 return Waitcnt(std::min(VmCnt
, Other
.VmCnt
), std::min(ExpCnt
, Other
.ExpCnt
),
371 std::min(LgkmCnt
, Other
.LgkmCnt
),
372 std::min(VsCnt
, Other
.VsCnt
));
376 /// \returns Vmcnt bit mask for given isa \p Version.
377 unsigned getVmcntBitMask(const IsaVersion
&Version
);
379 /// \returns Expcnt bit mask for given isa \p Version.
380 unsigned getExpcntBitMask(const IsaVersion
&Version
);
382 /// \returns Lgkmcnt bit mask for given isa \p Version.
383 unsigned getLgkmcntBitMask(const IsaVersion
&Version
);
385 /// \returns Waitcnt bit mask for given isa \p Version.
386 unsigned getWaitcntBitMask(const IsaVersion
&Version
);
388 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
389 unsigned decodeVmcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
391 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
392 unsigned decodeExpcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
394 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
395 unsigned decodeLgkmcnt(const IsaVersion
&Version
, unsigned Waitcnt
);
397 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
398 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
399 /// \p Lgkmcnt respectively.
401 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
402 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
403 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
404 /// \p Expcnt = \p Waitcnt[6:4]
405 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
406 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
407 void decodeWaitcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
408 unsigned &Vmcnt
, unsigned &Expcnt
, unsigned &Lgkmcnt
);
410 Waitcnt
decodeWaitcnt(const IsaVersion
&Version
, unsigned Encoded
);
412 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
413 unsigned encodeVmcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
416 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
417 unsigned encodeExpcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
420 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
421 unsigned encodeLgkmcnt(const IsaVersion
&Version
, unsigned Waitcnt
,
424 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
427 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
428 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
429 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
430 /// Waitcnt[6:4] = \p Expcnt
431 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
432 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
433 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
435 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
437 unsigned encodeWaitcnt(const IsaVersion
&Version
,
438 unsigned Vmcnt
, unsigned Expcnt
, unsigned Lgkmcnt
);
440 unsigned encodeWaitcnt(const IsaVersion
&Version
, const Waitcnt
&Decoded
);
445 int64_t getHwregId(const StringRef Name
);
448 bool isValidHwreg(int64_t Id
, const MCSubtargetInfo
&STI
);
451 bool isValidHwreg(int64_t Id
);
454 bool isValidHwregOffset(int64_t Offset
);
457 bool isValidHwregWidth(int64_t Width
);
460 uint64_t encodeHwreg(uint64_t Id
, uint64_t Offset
, uint64_t Width
);
463 StringRef
getHwreg(unsigned Id
, const MCSubtargetInfo
&STI
);
465 void decodeHwreg(unsigned Val
, unsigned &Id
, unsigned &Offset
, unsigned &Width
);
472 int64_t getMsgId(const StringRef Name
);
475 int64_t getMsgOpId(int64_t MsgId
, const StringRef Name
);
478 StringRef
getMsgName(int64_t MsgId
);
481 StringRef
getMsgOpName(int64_t MsgId
, int64_t OpId
);
484 bool isValidMsgId(int64_t MsgId
, const MCSubtargetInfo
&STI
, bool Strict
= true);
487 bool isValidMsgOp(int64_t MsgId
, int64_t OpId
, bool Strict
= true);
490 bool isValidMsgStream(int64_t MsgId
, int64_t OpId
, int64_t StreamId
, bool Strict
= true);
493 bool msgRequiresOp(int64_t MsgId
);
496 bool msgSupportsStream(int64_t MsgId
, int64_t OpId
);
498 void decodeMsg(unsigned Val
,
504 uint64_t encodeMsg(uint64_t MsgId
,
508 } // namespace SendMsg
511 unsigned getInitialPSInputAddr(const Function
&F
);
514 bool isShader(CallingConv::ID CC
);
517 bool isCompute(CallingConv::ID CC
);
520 bool isEntryFunctionCC(CallingConv::ID CC
);
522 // FIXME: Remove this when calling conventions cleaned up
524 inline bool isKernel(CallingConv::ID CC
) {
526 case CallingConv::AMDGPU_KERNEL
:
527 case CallingConv::SPIR_KERNEL
:
534 bool hasXNACK(const MCSubtargetInfo
&STI
);
535 bool hasSRAMECC(const MCSubtargetInfo
&STI
);
536 bool hasMIMG_R128(const MCSubtargetInfo
&STI
);
537 bool hasPackedD16(const MCSubtargetInfo
&STI
);
539 bool isSI(const MCSubtargetInfo
&STI
);
540 bool isCI(const MCSubtargetInfo
&STI
);
541 bool isVI(const MCSubtargetInfo
&STI
);
542 bool isGFX9(const MCSubtargetInfo
&STI
);
543 bool isGFX10(const MCSubtargetInfo
&STI
);
545 /// Is Reg - scalar register
546 bool isSGPR(unsigned Reg
, const MCRegisterInfo
* TRI
);
548 /// Is there any intersection between registers
549 bool isRegIntersect(unsigned Reg0
, unsigned Reg1
, const MCRegisterInfo
* TRI
);
551 /// If \p Reg is a pseudo reg, return the correct hardware register given
552 /// \p STI otherwise return \p Reg.
553 unsigned getMCReg(unsigned Reg
, const MCSubtargetInfo
&STI
);
555 /// Convert hardware register \p Reg to a pseudo register
557 unsigned mc2PseudoReg(unsigned Reg
);
559 /// Can this operand also contain immediate values?
560 bool isSISrcOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
562 /// Is this floating-point operand?
563 bool isSISrcFPOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
565 /// Does this opearnd support only inlinable literals?
566 bool isSISrcInlinableOperand(const MCInstrDesc
&Desc
, unsigned OpNo
);
568 /// Get the size in bits of a register from the register class \p RC.
569 unsigned getRegBitWidth(unsigned RCID
);
571 /// Get the size in bits of a register from the register class \p RC.
572 unsigned getRegBitWidth(const MCRegisterClass
&RC
);
574 /// Get size of register operand
575 unsigned getRegOperandSize(const MCRegisterInfo
*MRI
, const MCInstrDesc
&Desc
,
579 inline unsigned getOperandSize(const MCOperandInfo
&OpInfo
) {
580 switch (OpInfo
.OperandType
) {
581 case AMDGPU::OPERAND_REG_IMM_INT32
:
582 case AMDGPU::OPERAND_REG_IMM_FP32
:
583 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
584 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
585 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
586 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
589 case AMDGPU::OPERAND_REG_IMM_INT64
:
590 case AMDGPU::OPERAND_REG_IMM_FP64
:
591 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
592 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
595 case AMDGPU::OPERAND_REG_IMM_INT16
:
596 case AMDGPU::OPERAND_REG_IMM_FP16
:
597 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
598 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
599 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
600 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
601 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
602 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
603 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
604 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
605 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
606 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
610 llvm_unreachable("unhandled operand type");
615 inline unsigned getOperandSize(const MCInstrDesc
&Desc
, unsigned OpNo
) {
616 return getOperandSize(Desc
.OpInfo
[OpNo
]);
619 /// Is this literal inlinable
621 bool isInlinableLiteral64(int64_t Literal
, bool HasInv2Pi
);
624 bool isInlinableLiteral32(int32_t Literal
, bool HasInv2Pi
);
627 bool isInlinableLiteral16(int16_t Literal
, bool HasInv2Pi
);
630 bool isInlinableLiteralV216(int32_t Literal
, bool HasInv2Pi
);
632 bool isArgPassedInSGPR(const Argument
*Arg
);
634 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
636 int64_t getSMRDEncodedOffset(const MCSubtargetInfo
&ST
, int64_t ByteOffset
);
638 /// \returns true if this offset is small enough to fit in the SMRD
639 /// offset field. \p ByteOffset should be the offset in bytes and
640 /// not the encoded offset.
641 bool isLegalSMRDImmOffset(const MCSubtargetInfo
&ST
, int64_t ByteOffset
);
643 bool splitMUBUFOffset(uint32_t Imm
, uint32_t &SOffset
, uint32_t &ImmOffset
,
644 const GCNSubtarget
*Subtarget
, uint32_t Align
= 4);
646 /// \returns true if the intrinsic is divergent
647 bool isIntrinsicSourceOfDivergence(unsigned IntrID
);
650 // Track defaults for fields in the MODE registser.
651 struct SIModeRegisterDefaults
{
652 /// Floating point opcodes that support exception flag gathering quiet and
653 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
654 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
658 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
659 /// clamp NaN to zero; otherwise, pass NaN through.
662 // TODO: FP mode fields
664 SIModeRegisterDefaults() :
668 SIModeRegisterDefaults(const Function
&F
);
670 static SIModeRegisterDefaults
getDefaultForCallingConv(CallingConv::ID CC
) {
671 SIModeRegisterDefaults Mode
;
672 Mode
.DX10Clamp
= true;
673 Mode
.IEEE
= AMDGPU::isCompute(CC
);
677 bool operator ==(const SIModeRegisterDefaults Other
) const {
678 return IEEE
== Other
.IEEE
&& DX10Clamp
== Other
.DX10Clamp
;
681 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
682 // be able to override.
683 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode
) const {
684 return *this == CalleeMode
;
688 } // end namespace AMDGPU
689 } // end namespace llvm
691 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H