1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUArgumentUsageInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction
&MF
)
30 : AMDGPUMachineFunction(MF
),
31 PrivateSegmentBuffer(false),
34 KernargSegmentPtr(false),
36 FlatScratchInit(false),
41 PrivateSegmentWaveByteOffset(false),
45 ImplicitBufferPtr(false),
46 ImplicitArgPtr(false),
47 GITPtrHigh(0xffffffff),
48 HighBitsOf32BitAddress(0) {
49 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
50 const Function
&F
= MF
.getFunction();
51 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
52 WavesPerEU
= ST
.getWavesPerEU(F
);
54 Occupancy
= getMaxWavesPerEU();
56 CallingConv::ID CC
= F
.getCallingConv();
58 if (CC
== CallingConv::AMDGPU_KERNEL
|| CC
== CallingConv::SPIR_KERNEL
) {
60 KernargSegmentPtr
= true;
63 } else if (CC
== CallingConv::AMDGPU_PS
) {
64 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
67 if (!isEntryFunction()) {
68 // Non-entry functions have no special inputs for now, other registers
69 // required for scratch access.
70 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
71 ScratchWaveOffsetReg
= AMDGPU::SGPR4
;
72 FrameOffsetReg
= AMDGPU::SGPR5
;
73 StackPtrOffsetReg
= AMDGPU::SGPR32
;
75 ArgInfo
.PrivateSegmentBuffer
=
76 ArgDescriptor::createRegister(ScratchRSrcReg
);
77 ArgInfo
.PrivateSegmentWaveByteOffset
=
78 ArgDescriptor::createRegister(ScratchWaveOffsetReg
);
80 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr"))
81 ImplicitArgPtr
= true;
83 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr")) {
84 KernargSegmentPtr
= true;
85 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
90 if (ST
.debuggerEmitPrologue()) {
99 if (F
.hasFnAttribute("amdgpu-work-group-id-x"))
102 if (F
.hasFnAttribute("amdgpu-work-group-id-y"))
105 if (F
.hasFnAttribute("amdgpu-work-group-id-z"))
108 if (F
.hasFnAttribute("amdgpu-work-item-id-x"))
111 if (F
.hasFnAttribute("amdgpu-work-item-id-y"))
114 if (F
.hasFnAttribute("amdgpu-work-item-id-z"))
118 const MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
119 bool HasStackObjects
= FrameInfo
.hasStackObjects();
121 if (isEntryFunction()) {
122 // X, XY, and XYZ are the only supported combinations, so make sure Y is
127 PrivateSegmentWaveByteOffset
= true;
129 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
130 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
131 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
132 ArgInfo
.PrivateSegmentWaveByteOffset
=
133 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
136 bool isAmdHsaOrMesa
= ST
.isAmdHsaOrMesa(F
);
137 if (isAmdHsaOrMesa
) {
138 PrivateSegmentBuffer
= true;
140 if (F
.hasFnAttribute("amdgpu-dispatch-ptr"))
143 if (F
.hasFnAttribute("amdgpu-queue-ptr"))
146 if (F
.hasFnAttribute("amdgpu-dispatch-id"))
148 } else if (ST
.isMesaGfxShader(F
)) {
149 ImplicitBufferPtr
= true;
152 if (F
.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
153 KernargSegmentPtr
= true;
155 if (ST
.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa
) {
156 // TODO: This could be refined a lot. The attribute is a poor way of
157 // detecting calls that may require it before argument lowering.
158 if (HasStackObjects
|| F
.hasFnAttribute("amdgpu-flat-scratch"))
159 FlatScratchInit
= true;
162 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
163 StringRef S
= A
.getValueAsString();
165 S
.consumeInteger(0, GITPtrHigh
);
167 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
168 S
= A
.getValueAsString();
170 S
.consumeInteger(0, HighBitsOf32BitAddress
);
173 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
174 limitOccupancy(getMaxWavesPerEU());
175 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
176 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
180 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
181 const SIRegisterInfo
&TRI
) {
182 ArgInfo
.PrivateSegmentBuffer
=
183 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
184 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_128RegClass
));
186 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
189 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
190 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
191 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
193 return ArgInfo
.DispatchPtr
.getRegister();
196 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
197 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
198 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
200 return ArgInfo
.QueuePtr
.getRegister();
203 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
204 ArgInfo
.KernargSegmentPtr
205 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
206 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
208 return ArgInfo
.KernargSegmentPtr
.getRegister();
211 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
212 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
213 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
215 return ArgInfo
.DispatchID
.getRegister();
218 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
219 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
220 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
222 return ArgInfo
.FlatScratchInit
.getRegister();
225 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
226 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
227 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
229 return ArgInfo
.ImplicitBufferPtr
.getRegister();
232 static bool isCalleeSavedReg(const MCPhysReg
*CSRegs
, MCPhysReg Reg
) {
233 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
234 if (CSRegs
[I
] == Reg
)
241 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
242 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction
&MF
,
244 std::vector
<SpilledReg
> &SpillLanes
= SGPRToVGPRSpills
[FI
];
246 // This has already been allocated.
247 if (!SpillLanes
.empty())
250 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
251 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
252 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
253 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
254 unsigned WaveSize
= ST
.getWavefrontSize();
256 unsigned Size
= FrameInfo
.getObjectSize(FI
);
257 assert(Size
>= 4 && Size
<= 64 && "invalid sgpr spill size");
258 assert(TRI
->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
260 int NumLanes
= Size
/ 4;
262 const MCPhysReg
*CSRegs
= TRI
->getCalleeSavedRegs(&MF
);
264 // Make sure to handle the case where a wide SGPR spill may span between two
266 for (int I
= 0; I
< NumLanes
; ++I
, ++NumVGPRSpillLanes
) {
268 unsigned VGPRIndex
= (NumVGPRSpillLanes
% WaveSize
);
270 if (VGPRIndex
== 0) {
271 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
272 if (LaneVGPR
== AMDGPU::NoRegister
) {
273 // We have no VGPRs left for spilling SGPRs. Reset because we will not
274 // partially spill the SGPR to VGPRs.
275 SGPRToVGPRSpills
.erase(FI
);
276 NumVGPRSpillLanes
-= I
;
280 Optional
<int> CSRSpillFI
;
281 if ((FrameInfo
.hasCalls() || !isEntryFunction()) && CSRegs
&&
282 isCalleeSavedReg(CSRegs
, LaneVGPR
)) {
283 CSRSpillFI
= FrameInfo
.CreateSpillStackObject(4, 4);
286 SpillVGPRs
.push_back(SGPRSpillVGPRCSR(LaneVGPR
, CSRSpillFI
));
288 // Add this register as live-in to all blocks to avoid machine verifer
289 // complaining about use of an undefined physical register.
290 for (MachineBasicBlock
&BB
: MF
)
291 BB
.addLiveIn(LaneVGPR
);
293 LaneVGPR
= SpillVGPRs
.back().VGPR
;
296 SpillLanes
.push_back(SpilledReg(LaneVGPR
, VGPRIndex
));
302 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo
&MFI
) {
303 for (auto &R
: SGPRToVGPRSpills
)
304 MFI
.RemoveStackObject(R
.first
);
308 /// \returns VGPR used for \p Dim' work item ID.
309 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim
) const {
312 assert(hasWorkItemIDX());
313 return AMDGPU::VGPR0
;
315 assert(hasWorkItemIDY());
316 return AMDGPU::VGPR1
;
318 assert(hasWorkItemIDZ());
319 return AMDGPU::VGPR2
;
321 llvm_unreachable("unexpected dimension");
324 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
325 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
326 return AMDGPU::SGPR0
+ NumUserSGPRs
;
329 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
330 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;