1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// AMDGPU specific subclass of TargetSubtarget.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
29 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
30 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
33 #include "llvm/MC/MCInstrItineraries.h"
34 #include "llvm/Support/MathExtras.h"
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
49 class AMDGPUSubtarget
{
74 bool HasInv2PiInlineImm
;
75 bool HasFminFmaxLegacy
;
76 bool EnablePromoteAlloca
;
77 bool HasTrigReducedRange
;
79 unsigned WavefrontSize
;
82 AMDGPUSubtarget(const Triple
&TT
);
84 static const AMDGPUSubtarget
&get(const MachineFunction
&MF
);
85 static const AMDGPUSubtarget
&get(const TargetMachine
&TM
,
88 /// \returns Default range flat work group size for a calling convention.
89 std::pair
<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC
) const;
91 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92 /// for function \p F, or minimum/maximum flat work group sizes explicitly
93 /// requested using "amdgpu-flat-work-group-size" attribute attached to
96 /// \returns Subtarget's default values if explicitly requested values cannot
97 /// be converted to integer, or violate subtarget's specifications.
98 std::pair
<unsigned, unsigned> getFlatWorkGroupSizes(const Function
&F
) const;
100 /// \returns Subtarget's default pair of minimum/maximum number of waves per
101 /// execution unit for function \p F, or minimum/maximum number of waves per
102 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103 /// attached to function \p F.
105 /// \returns Subtarget's default values if explicitly requested values cannot
106 /// be converted to integer, violate subtarget's specifications, or are not
107 /// compatible with minimum/maximum number of waves limited by flat work group
108 /// size, register usage, and/or lds usage.
109 std::pair
<unsigned, unsigned> getWavesPerEU(const Function
&F
) const;
111 /// Return the amount of LDS that can be used that will not restrict the
112 /// occupancy lower than WaveCount.
113 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount
,
114 const Function
&) const;
116 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117 /// the given LDS memory size is the only constraint.
118 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes
, const Function
&) const;
120 unsigned getOccupancyWithLocalMemSize(const MachineFunction
&MF
) const;
122 bool isAmdHsaOS() const {
123 return TargetTriple
.getOS() == Triple::AMDHSA
;
126 bool isAmdPalOS() const {
127 return TargetTriple
.getOS() == Triple::AMDPAL
;
130 bool isMesa3DOS() const {
131 return TargetTriple
.getOS() == Triple::Mesa3D
;
134 bool isMesaKernel(const Function
&F
) const {
135 return isMesa3DOS() && !AMDGPU::isShader(F
.getCallingConv());
138 bool isAmdCodeObjectV2(const Function
&F
) const {
139 return isAmdHsaOS() || isMesaKernel(F
);
142 bool has16BitInsts() const {
143 return Has16BitInsts
;
146 bool hasMadMixInsts() const {
147 return HasMadMixInsts
;
150 bool hasFP32Denormals() const {
151 return FP32Denormals
;
154 bool hasFPExceptions() const {
158 bool hasSDWA() const {
162 bool hasVOP3PInsts() const {
163 return HasVOP3PInsts
;
166 bool hasMulI24() const {
170 bool hasMulU24() const {
174 bool hasInv2PiInlineImm() const {
175 return HasInv2PiInlineImm
;
178 bool hasFminFmaxLegacy() const {
179 return HasFminFmaxLegacy
;
182 bool hasTrigReducedRange() const {
183 return HasTrigReducedRange
;
186 bool isPromoteAllocaEnabled() const {
187 return EnablePromoteAlloca
;
190 unsigned getWavefrontSize() const {
191 return WavefrontSize
;
194 int getLocalMemorySize() const {
195 return LocalMemorySize
;
198 unsigned getAlignmentForImplicitArgPtr() const {
199 return isAmdHsaOS() ? 8 : 4;
202 /// Returns the offset in bytes from the start of the input buffer
203 /// of the first explicit kernel argument.
204 unsigned getExplicitKernelArgOffset(const Function
&F
) const {
205 return isAmdCodeObjectV2(F
) ? 0 : 36;
208 /// \returns Maximum number of work groups per compute unit supported by the
209 /// subtarget and limited by given \p FlatWorkGroupSize.
210 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize
) const = 0;
212 /// \returns Minimum flat work group size supported by the subtarget.
213 virtual unsigned getMinFlatWorkGroupSize() const = 0;
215 /// \returns Maximum flat work group size supported by the subtarget.
216 virtual unsigned getMaxFlatWorkGroupSize() const = 0;
218 /// \returns Maximum number of waves per execution unit supported by the
219 /// subtarget and limited by given \p FlatWorkGroupSize.
220 virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize
) const = 0;
222 /// \returns Minimum number of waves per execution unit supported by the
224 virtual unsigned getMinWavesPerEU() const = 0;
226 unsigned getMaxWavesPerEU() const { return 10; }
228 /// Creates value range metadata on an workitemid.* inrinsic call or load.
229 bool makeLIDRangeMetadata(Instruction
*I
) const;
231 /// \returns Number of bytes of arguments that are passed to a shader or
232 /// kernel in addition to the explicit ones declared for the function.
233 unsigned getImplicitArgNumBytes(const Function
&F
) const {
236 return AMDGPU::getIntegerAttribute(F
, "amdgpu-implicitarg-num-bytes", 0);
238 uint64_t getExplicitKernArgSize(const Function
&F
,
239 unsigned &MaxAlign
) const;
240 unsigned getKernArgSegmentSize(const Function
&F
,
241 unsigned &MaxAlign
) const;
243 virtual ~AMDGPUSubtarget() {}
246 class GCNSubtarget
: public AMDGPUGenSubtargetInfo
,
247 public AMDGPUSubtarget
{
268 enum TrapHandlerAbi
{
269 TrapHandlerAbiNone
= 0,
270 TrapHandlerAbiHsa
= 1
274 TrapIDHardwareReserved
= 0,
275 TrapIDHSADebugTrap
= 1,
277 TrapIDLLVMDebugTrap
= 3,
278 TrapIDDebugBreakpoint
= 7,
279 TrapIDDebugReserved8
= 8,
280 TrapIDDebugReservedFE
= 0xfe,
281 TrapIDDebugReservedFF
= 0xff
285 LLVMTrapHandlerRegValue
= 1
289 /// GlobalISel related APIs.
290 std::unique_ptr
<AMDGPUCallLowering
> CallLoweringInfo
;
291 std::unique_ptr
<InstructionSelector
> InstSelector
;
292 std::unique_ptr
<LegalizerInfo
> Legalizer
;
293 std::unique_ptr
<RegisterBankInfo
> RegBankInfo
;
296 // Basic subtarget description.
300 InstrItineraryData InstrItins
;
302 unsigned MaxPrivateElementSize
;
304 // Possibly statically set by tablegen, but may want to be overridden.
308 // Dynamially set bits that enable features.
309 bool FP64FP16Denormals
;
312 bool AutoWaitcntBeforeBarrier
;
314 bool UnalignedScratchAccess
;
315 bool UnalignedBufferAccess
;
316 bool HasApertureRegs
;
319 bool DebuggerInsertNops
;
320 bool DebuggerEmitPrologue
;
323 bool EnableHugePrivateBuffer
;
324 bool EnableVGPRSpilling
;
325 bool EnableLoadStoreOpt
;
326 bool EnableUnsafeDSOffsetFolding
;
327 bool EnableSIScheduler
;
331 // Subtarget statically properties set by tablegen
341 bool HasSMemRealTime
;
345 bool HasVGPRIndexMode
;
346 bool HasScalarStores
;
347 bool HasScalarAtomics
;
352 bool HasSDWAOutModsVOPC
;
356 bool D16PreservesUnusedBits
;
357 bool FlatAddressSpace
;
358 bool FlatInstOffsets
;
359 bool FlatGlobalInsts
;
360 bool FlatScratchInsts
;
361 bool AddNoCarryInsts
;
362 bool HasUnpackedD16VMem
;
367 short TexVTXClauseSize
;
368 bool ScalarizeGlobal
;
370 // Dummy feature to use for assembler in tablegen.
373 SelectionDAGTargetInfo TSInfo
;
375 SIInstrInfo InstrInfo
;
376 SITargetLowering TLInfo
;
377 SIFrameLowering FrameLowering
;
380 GCNSubtarget(const Triple
&TT
, StringRef GPU
, StringRef FS
,
381 const GCNTargetMachine
&TM
);
382 ~GCNSubtarget() override
;
384 GCNSubtarget
&initializeSubtargetDependencies(const Triple
&TT
,
385 StringRef GPU
, StringRef FS
);
387 const SIInstrInfo
*getInstrInfo() const override
{
391 const SIFrameLowering
*getFrameLowering() const override
{
392 return &FrameLowering
;
395 const SITargetLowering
*getTargetLowering() const override
{
399 const SIRegisterInfo
*getRegisterInfo() const override
{
400 return &InstrInfo
.getRegisterInfo();
403 const CallLowering
*getCallLowering() const override
{
404 return CallLoweringInfo
.get();
407 const InstructionSelector
*getInstructionSelector() const override
{
408 return InstSelector
.get();
411 const LegalizerInfo
*getLegalizerInfo() const override
{
412 return Legalizer
.get();
415 const RegisterBankInfo
*getRegBankInfo() const override
{
416 return RegBankInfo
.get();
419 // Nothing implemented, just prevent crashes on use.
420 const SelectionDAGTargetInfo
*getSelectionDAGInfo() const override
{
424 const InstrItineraryData
*getInstrItineraryData() const override
{
428 void ParseSubtargetFeatures(StringRef CPU
, StringRef FS
);
430 Generation
getGeneration() const {
431 return (Generation
)Gen
;
434 unsigned getWavefrontSizeLog2() const {
435 return Log2_32(WavefrontSize
);
438 int getLDSBankCount() const {
442 unsigned getMaxPrivateElementSize() const {
443 return MaxPrivateElementSize
;
446 bool hasIntClamp() const {
450 bool hasFP64() const {
454 bool hasMIMG_R128() const {
458 bool hasHWFP64() const {
462 bool hasFastFMAF32() const {
466 bool hasHalfRate64Ops() const {
467 return HalfRate64Ops
;
470 bool hasAddr64() const {
471 return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS
);
474 bool hasBFE() const {
478 bool hasBFI() const {
482 bool hasBFM() const {
486 bool hasBCNT(unsigned Size
) const {
490 bool hasFFBL() const {
494 bool hasFFBH() const {
498 bool hasMed3_16() const {
499 return getGeneration() >= AMDGPUSubtarget::GFX9
;
502 bool hasMin3Max3_16() const {
503 return getGeneration() >= AMDGPUSubtarget::GFX9
;
506 bool hasFmaMixInsts() const {
507 return HasFmaMixInsts
;
510 bool hasCARRY() const {
514 bool hasFMA() const {
518 TrapHandlerAbi
getTrapHandlerAbi() const {
519 return isAmdHsaOS() ? TrapHandlerAbiHsa
: TrapHandlerAbiNone
;
522 bool enableHugePrivateBuffer() const {
523 return EnableHugePrivateBuffer
;
526 bool unsafeDSOffsetFoldingEnabled() const {
527 return EnableUnsafeDSOffsetFolding
;
530 bool dumpCode() const {
534 /// Return the amount of LDS that can be used that will not restrict the
535 /// occupancy lower than WaveCount.
536 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount
,
537 const Function
&) const;
539 bool hasFP16Denormals() const {
540 return FP64FP16Denormals
;
543 bool hasFP64Denormals() const {
544 return FP64FP16Denormals
;
547 bool supportsMinMaxDenormModes() const {
548 return getGeneration() >= AMDGPUSubtarget::GFX9
;
551 bool enableDX10Clamp() const {
555 bool enableIEEEBit(const MachineFunction
&MF
) const {
556 return AMDGPU::isCompute(MF
.getFunction().getCallingConv());
559 bool useFlatForGlobal() const {
560 return FlatForGlobal
;
563 /// \returns If target supports ds_read/write_b128 and user enables generation
564 /// of ds_read/write_b128.
565 bool useDS128() const {
566 return CIInsts
&& EnableDS128
;
569 /// \returns If MUBUF instructions always perform range checking, even for
570 /// buffer resources used for private memory access.
571 bool privateMemoryResourceIsRangeChecked() const {
572 return getGeneration() < AMDGPUSubtarget::GFX9
;
575 bool hasAutoWaitcntBeforeBarrier() const {
576 return AutoWaitcntBeforeBarrier
;
579 bool hasCodeObjectV3() const {
583 bool hasUnalignedBufferAccess() const {
584 return UnalignedBufferAccess
;
587 bool hasUnalignedScratchAccess() const {
588 return UnalignedScratchAccess
;
591 bool hasApertureRegs() const {
592 return HasApertureRegs
;
595 bool isTrapHandlerEnabled() const {
599 bool isXNACKEnabled() const {
603 bool hasFlatAddressSpace() const {
604 return FlatAddressSpace
;
607 bool hasFlatInstOffsets() const {
608 return FlatInstOffsets
;
611 bool hasFlatGlobalInsts() const {
612 return FlatGlobalInsts
;
615 bool hasFlatScratchInsts() const {
616 return FlatScratchInsts
;
619 bool hasFlatLgkmVMemCountInOrder() const {
620 return getGeneration() > GFX9
;
623 bool hasD16LoadStore() const {
624 return getGeneration() >= GFX9
;
627 /// Return if most LDS instructions have an m0 use that require m0 to be
629 bool ldsRequiresM0Init() const {
630 return getGeneration() < GFX9
;
633 bool hasAddNoCarry() const {
634 return AddNoCarryInsts
;
637 bool hasUnpackedD16VMem() const {
638 return HasUnpackedD16VMem
;
641 // Covers VS/PS/CS graphics shaders
642 bool isMesaGfxShader(const Function
&F
) const {
643 return isMesa3DOS() && AMDGPU::isShader(F
.getCallingConv());
646 bool hasMad64_32() const {
647 return getGeneration() >= SEA_ISLANDS
;
650 bool hasSDWAOmod() const {
654 bool hasSDWAScalar() const {
655 return HasSDWAScalar
;
658 bool hasSDWASdst() const {
662 bool hasSDWAMac() const {
666 bool hasSDWAOutModsVOPC() const {
667 return HasSDWAOutModsVOPC
;
670 bool vmemWriteNeedsExpWaitcnt() const {
671 return getGeneration() < SEA_ISLANDS
;
674 bool hasDLInsts() const {
678 bool d16PreservesUnusedBits() const {
679 return D16PreservesUnusedBits
;
682 // Scratch is allocated in 256 dword per wave blocks for the entire
683 // wavefront. When viewed from the perspecive of an arbitrary workitem, this
684 // is 4-byte aligned.
686 // Only 4-byte alignment is really needed to access anything. Transformations
687 // on the pointer value itself may rely on the alignment / known low bits of
688 // the pointer. Set this to something above the minimum to avoid needing
689 // dynamic realignment in common cases.
690 unsigned getStackAlignment() const {
694 bool enableMachineScheduler() const override
{
698 bool enableSubRegLiveness() const override
{
702 void setScalarizeGlobalBehavior(bool b
) { ScalarizeGlobal
= b
; }
703 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal
; }
705 /// \returns Number of execution units per compute unit supported by the
707 unsigned getEUsPerCU() const {
708 return AMDGPU::IsaInfo::getEUsPerCU(this);
711 /// \returns Maximum number of waves per compute unit supported by the
712 /// subtarget without any kind of limitation.
713 unsigned getMaxWavesPerCU() const {
714 return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
717 /// \returns Maximum number of waves per compute unit supported by the
718 /// subtarget and limited by given \p FlatWorkGroupSize.
719 unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize
) const {
720 return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize
);
723 /// \returns Maximum number of waves per execution unit supported by the
724 /// subtarget without any kind of limitation.
725 unsigned getMaxWavesPerEU() const {
726 return AMDGPU::IsaInfo::getMaxWavesPerEU();
729 /// \returns Number of waves per work group supported by the subtarget and
730 /// limited by given \p FlatWorkGroupSize.
731 unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize
) const {
732 return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize
);
736 static bool hasHalfRate64Ops(const TargetSubtargetInfo
&STI
);
738 // XXX - Why is this here if it isn't in the default pass set?
739 bool enableEarlyIfConversion() const override
{
743 void overrideSchedPolicy(MachineSchedPolicy
&Policy
,
744 unsigned NumRegionInstrs
) const override
;
746 bool isVGPRSpillingEnabled(const Function
&F
) const;
748 unsigned getMaxNumUserSGPRs() const {
752 bool hasSMemRealTime() const {
753 return HasSMemRealTime
;
756 bool hasMovrel() const {
760 bool hasVGPRIndexMode() const {
761 return HasVGPRIndexMode
;
764 bool useVGPRIndexMode(bool UserEnable
) const {
765 return !hasMovrel() || (UserEnable
&& hasVGPRIndexMode());
768 bool hasScalarCompareEq64() const {
769 return getGeneration() >= VOLCANIC_ISLANDS
;
772 bool hasScalarStores() const {
773 return HasScalarStores
;
776 bool hasScalarAtomics() const {
777 return HasScalarAtomics
;
781 bool hasDPP() const {
785 bool hasR128A16() const {
789 bool enableSIScheduler() const {
790 return EnableSIScheduler
;
793 bool debuggerSupported() const {
794 return debuggerInsertNops() && debuggerEmitPrologue();
797 bool debuggerInsertNops() const {
798 return DebuggerInsertNops
;
801 bool debuggerEmitPrologue() const {
802 return DebuggerEmitPrologue
;
805 bool loadStoreOptEnabled() const {
806 return EnableLoadStoreOpt
;
809 bool hasSGPRInitBug() const {
813 bool has12DWordStoreHazard() const {
814 return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS
;
817 bool hasSMovFedHazard() const {
818 return getGeneration() >= AMDGPUSubtarget::GFX9
;
821 bool hasReadM0MovRelInterpHazard() const {
822 return getGeneration() >= AMDGPUSubtarget::GFX9
;
825 bool hasReadM0SendMsgHazard() const {
826 return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
;
829 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
831 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs
) const;
833 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
835 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs
) const;
837 /// \returns true if the flat_scratch register should be initialized with the
838 /// pointer to the wave's scratch memory rather than a size and offset.
839 bool flatScratchIsPointer() const {
840 return getGeneration() >= AMDGPUSubtarget::GFX9
;
843 /// \returns true if the machine has merged shaders in which s0-s7 are
844 /// reserved by the hardware and user SGPRs start at s8
845 bool hasMergedShaders() const {
846 return getGeneration() >= GFX9
;
849 /// \returns SGPR allocation granularity supported by the subtarget.
850 unsigned getSGPRAllocGranule() const {
851 return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
854 /// \returns SGPR encoding granularity supported by the subtarget.
855 unsigned getSGPREncodingGranule() const {
856 return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
859 /// \returns Total number of SGPRs supported by the subtarget.
860 unsigned getTotalNumSGPRs() const {
861 return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
864 /// \returns Addressable number of SGPRs supported by the subtarget.
865 unsigned getAddressableNumSGPRs() const {
866 return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
869 /// \returns Minimum number of SGPRs that meets the given number of waves per
870 /// execution unit requirement supported by the subtarget.
871 unsigned getMinNumSGPRs(unsigned WavesPerEU
) const {
872 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU
);
875 /// \returns Maximum number of SGPRs that meets the given number of waves per
876 /// execution unit requirement supported by the subtarget.
877 unsigned getMaxNumSGPRs(unsigned WavesPerEU
, bool Addressable
) const {
878 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU
, Addressable
);
881 /// \returns Reserved number of SGPRs for given function \p MF.
882 unsigned getReservedNumSGPRs(const MachineFunction
&MF
) const;
884 /// \returns Maximum number of SGPRs that meets number of waves per execution
885 /// unit requirement for function \p MF, or number of SGPRs explicitly
886 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
888 /// \returns Value that meets number of waves per execution unit requirement
889 /// if explicitly requested value cannot be converted to integer, violates
890 /// subtarget's specifications, or does not meet number of waves per execution
891 /// unit requirement.
892 unsigned getMaxNumSGPRs(const MachineFunction
&MF
) const;
894 /// \returns VGPR allocation granularity supported by the subtarget.
895 unsigned getVGPRAllocGranule() const {
896 return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
899 /// \returns VGPR encoding granularity supported by the subtarget.
900 unsigned getVGPREncodingGranule() const {
901 return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
904 /// \returns Total number of VGPRs supported by the subtarget.
905 unsigned getTotalNumVGPRs() const {
906 return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
909 /// \returns Addressable number of VGPRs supported by the subtarget.
910 unsigned getAddressableNumVGPRs() const {
911 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
914 /// \returns Minimum number of VGPRs that meets given number of waves per
915 /// execution unit requirement supported by the subtarget.
916 unsigned getMinNumVGPRs(unsigned WavesPerEU
) const {
917 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU
);
920 /// \returns Maximum number of VGPRs that meets given number of waves per
921 /// execution unit requirement supported by the subtarget.
922 unsigned getMaxNumVGPRs(unsigned WavesPerEU
) const {
923 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU
);
926 /// \returns Maximum number of VGPRs that meets number of waves per execution
927 /// unit requirement for function \p MF, or number of VGPRs explicitly
928 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
930 /// \returns Value that meets number of waves per execution unit requirement
931 /// if explicitly requested value cannot be converted to integer, violates
932 /// subtarget's specifications, or does not meet number of waves per execution
933 /// unit requirement.
934 unsigned getMaxNumVGPRs(const MachineFunction
&MF
) const;
936 void getPostRAMutations(
937 std::vector
<std::unique_ptr
<ScheduleDAGMutation
>> &Mutations
)
940 /// \returns Maximum number of work groups per compute unit supported by the
941 /// subtarget and limited by given \p FlatWorkGroupSize.
942 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize
) const override
{
943 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize
);
946 /// \returns Minimum flat work group size supported by the subtarget.
947 unsigned getMinFlatWorkGroupSize() const override
{
948 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
951 /// \returns Maximum flat work group size supported by the subtarget.
952 unsigned getMaxFlatWorkGroupSize() const override
{
953 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
956 /// \returns Maximum number of waves per execution unit supported by the
957 /// subtarget and limited by given \p FlatWorkGroupSize.
958 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize
) const override
{
959 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize
);
962 /// \returns Minimum number of waves per execution unit supported by the
964 unsigned getMinWavesPerEU() const override
{
965 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
969 class R600Subtarget final
: public R600GenSubtargetInfo
,
970 public AMDGPUSubtarget
{
972 R600InstrInfo InstrInfo
;
973 R600FrameLowering FrameLowering
;
981 short TexVTXClauseSize
;
983 R600TargetLowering TLInfo
;
984 InstrItineraryData InstrItins
;
985 SelectionDAGTargetInfo TSInfo
;
988 R600Subtarget(const Triple
&TT
, StringRef CPU
, StringRef FS
,
989 const TargetMachine
&TM
);
991 const R600InstrInfo
*getInstrInfo() const override
{ return &InstrInfo
; }
993 const R600FrameLowering
*getFrameLowering() const override
{
994 return &FrameLowering
;
997 const R600TargetLowering
*getTargetLowering() const override
{
1001 const R600RegisterInfo
*getRegisterInfo() const override
{
1002 return &InstrInfo
.getRegisterInfo();
1005 const InstrItineraryData
*getInstrItineraryData() const override
{
1009 // Nothing implemented, just prevent crashes on use.
1010 const SelectionDAGTargetInfo
*getSelectionDAGInfo() const override
{
1014 void ParseSubtargetFeatures(StringRef CPU
, StringRef FS
);
1016 Generation
getGeneration() const {
1020 unsigned getStackAlignment() const {
1024 R600Subtarget
&initializeSubtargetDependencies(const Triple
&TT
,
1025 StringRef GPU
, StringRef FS
);
1027 bool hasBFE() const {
1028 return (getGeneration() >= EVERGREEN
);
1031 bool hasBFI() const {
1032 return (getGeneration() >= EVERGREEN
);
1035 bool hasBCNT(unsigned Size
) const {
1037 return (getGeneration() >= EVERGREEN
);
1042 bool hasBORROW() const {
1043 return (getGeneration() >= EVERGREEN
);
1046 bool hasCARRY() const {
1047 return (getGeneration() >= EVERGREEN
);
1050 bool hasCaymanISA() const {
1054 bool hasFFBL() const {
1055 return (getGeneration() >= EVERGREEN
);
1058 bool hasFFBH() const {
1059 return (getGeneration() >= EVERGREEN
);
1062 bool hasFMA() const { return FMA
; }
1064 bool hasCFAluBug() const { return CFALUBug
; }
1066 bool hasVertexCache() const { return HasVertexCache
; }
1068 short getTexVTXClauseSize() const { return TexVTXClauseSize
; }
1070 bool enableMachineScheduler() const override
{
1074 bool enableSubRegLiveness() const override
{
1078 /// \returns Maximum number of work groups per compute unit supported by the
1079 /// subtarget and limited by given \p FlatWorkGroupSize.
1080 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize
) const override
{
1081 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize
);
1084 /// \returns Minimum flat work group size supported by the subtarget.
1085 unsigned getMinFlatWorkGroupSize() const override
{
1086 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1089 /// \returns Maximum flat work group size supported by the subtarget.
1090 unsigned getMaxFlatWorkGroupSize() const override
{
1091 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1094 /// \returns Maximum number of waves per execution unit supported by the
1095 /// subtarget and limited by given \p FlatWorkGroupSize.
1096 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize
) const override
{
1097 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize
);
1100 /// \returns Minimum number of waves per execution unit supported by the
1102 unsigned getMinWavesPerEU() const override
{
1103 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1107 } // end namespace llvm
1109 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H