1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUArgumentUsageInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction
&MF
)
30 : AMDGPUMachineFunction(MF
),
31 Mode(MF
.getFunction()),
32 PrivateSegmentBuffer(false),
35 KernargSegmentPtr(false),
37 FlatScratchInit(false),
42 PrivateSegmentWaveByteOffset(false),
46 ImplicitBufferPtr(false),
47 ImplicitArgPtr(false),
48 GITPtrHigh(0xffffffff),
49 HighBitsOf32BitAddress(0),
51 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
52 const Function
&F
= MF
.getFunction();
53 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
54 WavesPerEU
= ST
.getWavesPerEU(F
);
56 Occupancy
= getMaxWavesPerEU();
58 CallingConv::ID CC
= F
.getCallingConv();
60 if (CC
== CallingConv::AMDGPU_KERNEL
|| CC
== CallingConv::SPIR_KERNEL
) {
62 KernargSegmentPtr
= true;
65 } else if (CC
== CallingConv::AMDGPU_PS
) {
66 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
69 if (!isEntryFunction()) {
70 // Non-entry functions have no special inputs for now, other registers
71 // required for scratch access.
72 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
73 ScratchWaveOffsetReg
= AMDGPU::SGPR33
;
75 // TODO: Pick a high register, and shift down, similar to a kernel.
76 FrameOffsetReg
= AMDGPU::SGPR34
;
77 StackPtrOffsetReg
= AMDGPU::SGPR32
;
79 ArgInfo
.PrivateSegmentBuffer
=
80 ArgDescriptor::createRegister(ScratchRSrcReg
);
81 ArgInfo
.PrivateSegmentWaveByteOffset
=
82 ArgDescriptor::createRegister(ScratchWaveOffsetReg
);
84 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr"))
85 ImplicitArgPtr
= true;
87 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr")) {
88 KernargSegmentPtr
= true;
89 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
94 if (F
.hasFnAttribute("amdgpu-work-group-id-x"))
97 if (F
.hasFnAttribute("amdgpu-work-group-id-y"))
100 if (F
.hasFnAttribute("amdgpu-work-group-id-z"))
103 if (F
.hasFnAttribute("amdgpu-work-item-id-x"))
106 if (F
.hasFnAttribute("amdgpu-work-item-id-y"))
109 if (F
.hasFnAttribute("amdgpu-work-item-id-z"))
112 const MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
113 bool HasStackObjects
= FrameInfo
.hasStackObjects();
115 if (isEntryFunction()) {
116 // X, XY, and XYZ are the only supported combinations, so make sure Y is
121 PrivateSegmentWaveByteOffset
= true;
123 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
124 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
125 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
126 ArgInfo
.PrivateSegmentWaveByteOffset
=
127 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
130 bool isAmdHsaOrMesa
= ST
.isAmdHsaOrMesa(F
);
131 if (isAmdHsaOrMesa
) {
132 PrivateSegmentBuffer
= true;
134 if (F
.hasFnAttribute("amdgpu-dispatch-ptr"))
137 if (F
.hasFnAttribute("amdgpu-queue-ptr"))
140 if (F
.hasFnAttribute("amdgpu-dispatch-id"))
142 } else if (ST
.isMesaGfxShader(F
)) {
143 ImplicitBufferPtr
= true;
146 if (F
.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
147 KernargSegmentPtr
= true;
149 if (ST
.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa
) {
150 auto hasNonSpillStackObjects
= [&]() {
151 // Avoid expensive checking if there's no stack objects.
152 if (!HasStackObjects
)
154 for (auto OI
= FrameInfo
.getObjectIndexBegin(),
155 OE
= FrameInfo
.getObjectIndexEnd(); OI
!= OE
; ++OI
)
156 if (!FrameInfo
.isSpillSlotObjectIndex(OI
))
158 // All stack objects are spill slots.
161 // TODO: This could be refined a lot. The attribute is a poor way of
162 // detecting calls that may require it before argument lowering.
163 if (hasNonSpillStackObjects() || F
.hasFnAttribute("amdgpu-flat-scratch"))
164 FlatScratchInit
= true;
167 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
168 StringRef S
= A
.getValueAsString();
170 S
.consumeInteger(0, GITPtrHigh
);
172 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
173 S
= A
.getValueAsString();
175 S
.consumeInteger(0, HighBitsOf32BitAddress
);
177 S
= F
.getFnAttribute("amdgpu-gds-size").getValueAsString();
179 S
.consumeInteger(0, GDSSize
);
182 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
183 limitOccupancy(getMaxWavesPerEU());
184 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
185 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
189 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
190 const SIRegisterInfo
&TRI
) {
191 ArgInfo
.PrivateSegmentBuffer
=
192 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
193 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_128RegClass
));
195 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
198 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
199 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
200 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
202 return ArgInfo
.DispatchPtr
.getRegister();
205 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
206 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
207 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
209 return ArgInfo
.QueuePtr
.getRegister();
212 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
213 ArgInfo
.KernargSegmentPtr
214 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
215 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
217 return ArgInfo
.KernargSegmentPtr
.getRegister();
220 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
221 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
222 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
224 return ArgInfo
.DispatchID
.getRegister();
227 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
228 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
229 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
231 return ArgInfo
.FlatScratchInit
.getRegister();
234 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
235 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
236 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
238 return ArgInfo
.ImplicitBufferPtr
.getRegister();
241 static bool isCalleeSavedReg(const MCPhysReg
*CSRegs
, MCPhysReg Reg
) {
242 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
243 if (CSRegs
[I
] == Reg
)
250 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
253 // FIXME: This only works after processFunctionBeforeFrameFinalized
254 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction
&MF
,
255 unsigned NumNeed
) const {
256 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
257 unsigned WaveSize
= ST
.getWavefrontSize();
258 return NumVGPRSpillLanes
+ NumNeed
<= WaveSize
* SpillVGPRs
.size();
261 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
262 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction
&MF
,
264 std::vector
<SpilledReg
> &SpillLanes
= SGPRToVGPRSpills
[FI
];
266 // This has already been allocated.
267 if (!SpillLanes
.empty())
270 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
271 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
272 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
273 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
274 unsigned WaveSize
= ST
.getWavefrontSize();
276 unsigned Size
= FrameInfo
.getObjectSize(FI
);
277 assert(Size
>= 4 && Size
<= 64 && "invalid sgpr spill size");
278 assert(TRI
->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
280 int NumLanes
= Size
/ 4;
282 const MCPhysReg
*CSRegs
= MRI
.getCalleeSavedRegs();
284 // Make sure to handle the case where a wide SGPR spill may span between two
286 for (int I
= 0; I
< NumLanes
; ++I
, ++NumVGPRSpillLanes
) {
288 unsigned VGPRIndex
= (NumVGPRSpillLanes
% WaveSize
);
290 if (VGPRIndex
== 0) {
291 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
292 if (LaneVGPR
== AMDGPU::NoRegister
) {
293 // We have no VGPRs left for spilling SGPRs. Reset because we will not
294 // partially spill the SGPR to VGPRs.
295 SGPRToVGPRSpills
.erase(FI
);
296 NumVGPRSpillLanes
-= I
;
300 Optional
<int> CSRSpillFI
;
301 if ((FrameInfo
.hasCalls() || !isEntryFunction()) && CSRegs
&&
302 isCalleeSavedReg(CSRegs
, LaneVGPR
)) {
303 CSRSpillFI
= FrameInfo
.CreateSpillStackObject(4, 4);
306 SpillVGPRs
.push_back(SGPRSpillVGPRCSR(LaneVGPR
, CSRSpillFI
));
308 // Add this register as live-in to all blocks to avoid machine verifer
309 // complaining about use of an undefined physical register.
310 for (MachineBasicBlock
&BB
: MF
)
311 BB
.addLiveIn(LaneVGPR
);
313 LaneVGPR
= SpillVGPRs
.back().VGPR
;
316 SpillLanes
.push_back(SpilledReg(LaneVGPR
, VGPRIndex
));
322 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
323 /// Either AGPR is spilled to VGPR to vice versa.
324 /// Returns true if a \p FI can be eliminated completely.
325 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction
&MF
,
328 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
329 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
330 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
332 assert(ST
.hasMAIInsts() && FrameInfo
.isSpillSlotObjectIndex(FI
));
334 auto &Spill
= VGPRToAGPRSpills
[FI
];
336 // This has already been allocated.
337 if (!Spill
.Lanes
.empty())
338 return Spill
.FullyAllocated
;
340 unsigned Size
= FrameInfo
.getObjectSize(FI
);
341 unsigned NumLanes
= Size
/ 4;
342 Spill
.Lanes
.resize(NumLanes
, AMDGPU::NoRegister
);
344 const TargetRegisterClass
&RC
=
345 isAGPRtoVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::AGPR_32RegClass
;
346 auto Regs
= RC
.getRegisters();
348 auto &SpillRegs
= isAGPRtoVGPR
? SpillAGPR
: SpillVGPR
;
349 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
350 Spill
.FullyAllocated
= true;
352 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
354 BitVector OtherUsedRegs
;
355 OtherUsedRegs
.resize(TRI
->getNumRegs());
357 const uint32_t *CSRMask
=
358 TRI
->getCallPreservedMask(MF
, MF
.getFunction().getCallingConv());
360 OtherUsedRegs
.setBitsInMask(CSRMask
);
362 // TODO: Should include register tuples, but doesn't matter with current
364 for (MCPhysReg Reg
: SpillAGPR
)
365 OtherUsedRegs
.set(Reg
);
366 for (MCPhysReg Reg
: SpillVGPR
)
367 OtherUsedRegs
.set(Reg
);
369 SmallVectorImpl
<MCPhysReg
>::const_iterator NextSpillReg
= Regs
.begin();
370 for (unsigned I
= 0; I
< NumLanes
; ++I
) {
371 NextSpillReg
= std::find_if(
372 NextSpillReg
, Regs
.end(), [&MRI
, &OtherUsedRegs
](MCPhysReg Reg
) {
373 return MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
377 if (NextSpillReg
== Regs
.end()) { // Registers exhausted
378 Spill
.FullyAllocated
= false;
382 OtherUsedRegs
.set(*NextSpillReg
);
383 SpillRegs
.push_back(*NextSpillReg
);
384 Spill
.Lanes
[I
] = *NextSpillReg
++;
387 return Spill
.FullyAllocated
;
390 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo
&MFI
) {
391 // The FP spill hasn't been inserted yet, so keep it around.
392 for (auto &R
: SGPRToVGPRSpills
) {
393 if (R
.first
!= FramePointerSaveIndex
)
394 MFI
.RemoveStackObject(R
.first
);
397 // All other SPGRs must be allocated on the default stack, so reset the stack
399 for (int i
= MFI
.getObjectIndexBegin(), e
= MFI
.getObjectIndexEnd(); i
!= e
;
401 if (i
!= FramePointerSaveIndex
)
402 MFI
.setStackID(i
, TargetStackID::Default
);
404 for (auto &R
: VGPRToAGPRSpills
) {
405 if (R
.second
.FullyAllocated
)
406 MFI
.RemoveStackObject(R
.first
);
410 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
411 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
412 return AMDGPU::SGPR0
+ NumUserSGPRs
;
415 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
416 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;
419 static yaml::StringValue
regToString(unsigned Reg
,
420 const TargetRegisterInfo
&TRI
) {
421 yaml::StringValue Dest
;
423 raw_string_ostream
OS(Dest
.Value
);
424 OS
<< printReg(Reg
, &TRI
);
429 static Optional
<yaml::SIArgumentInfo
>
430 convertArgumentInfo(const AMDGPUFunctionArgInfo
&ArgInfo
,
431 const TargetRegisterInfo
&TRI
) {
432 yaml::SIArgumentInfo AI
;
434 auto convertArg
= [&](Optional
<yaml::SIArgument
> &A
,
435 const ArgDescriptor
&Arg
) {
439 // Create a register or stack argument.
440 yaml::SIArgument SA
= yaml::SIArgument::createArgument(Arg
.isRegister());
441 if (Arg
.isRegister()) {
442 raw_string_ostream
OS(SA
.RegisterName
.Value
);
443 OS
<< printReg(Arg
.getRegister(), &TRI
);
445 SA
.StackOffset
= Arg
.getStackOffset();
446 // Check and update the optional mask.
448 SA
.Mask
= Arg
.getMask();
455 Any
|= convertArg(AI
.PrivateSegmentBuffer
, ArgInfo
.PrivateSegmentBuffer
);
456 Any
|= convertArg(AI
.DispatchPtr
, ArgInfo
.DispatchPtr
);
457 Any
|= convertArg(AI
.QueuePtr
, ArgInfo
.QueuePtr
);
458 Any
|= convertArg(AI
.KernargSegmentPtr
, ArgInfo
.KernargSegmentPtr
);
459 Any
|= convertArg(AI
.DispatchID
, ArgInfo
.DispatchID
);
460 Any
|= convertArg(AI
.FlatScratchInit
, ArgInfo
.FlatScratchInit
);
461 Any
|= convertArg(AI
.PrivateSegmentSize
, ArgInfo
.PrivateSegmentSize
);
462 Any
|= convertArg(AI
.WorkGroupIDX
, ArgInfo
.WorkGroupIDX
);
463 Any
|= convertArg(AI
.WorkGroupIDY
, ArgInfo
.WorkGroupIDY
);
464 Any
|= convertArg(AI
.WorkGroupIDZ
, ArgInfo
.WorkGroupIDZ
);
465 Any
|= convertArg(AI
.WorkGroupInfo
, ArgInfo
.WorkGroupInfo
);
466 Any
|= convertArg(AI
.PrivateSegmentWaveByteOffset
,
467 ArgInfo
.PrivateSegmentWaveByteOffset
);
468 Any
|= convertArg(AI
.ImplicitArgPtr
, ArgInfo
.ImplicitArgPtr
);
469 Any
|= convertArg(AI
.ImplicitBufferPtr
, ArgInfo
.ImplicitBufferPtr
);
470 Any
|= convertArg(AI
.WorkItemIDX
, ArgInfo
.WorkItemIDX
);
471 Any
|= convertArg(AI
.WorkItemIDY
, ArgInfo
.WorkItemIDY
);
472 Any
|= convertArg(AI
.WorkItemIDZ
, ArgInfo
.WorkItemIDZ
);
480 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
481 const llvm::SIMachineFunctionInfo
& MFI
,
482 const TargetRegisterInfo
&TRI
)
483 : ExplicitKernArgSize(MFI
.getExplicitKernArgSize()),
484 MaxKernArgAlign(MFI
.getMaxKernArgAlign()),
485 LDSSize(MFI
.getLDSSize()),
486 IsEntryFunction(MFI
.isEntryFunction()),
487 NoSignedZerosFPMath(MFI
.hasNoSignedZerosFPMath()),
488 MemoryBound(MFI
.isMemoryBound()),
489 WaveLimiter(MFI
.needsWaveLimiter()),
490 ScratchRSrcReg(regToString(MFI
.getScratchRSrcReg(), TRI
)),
491 ScratchWaveOffsetReg(regToString(MFI
.getScratchWaveOffsetReg(), TRI
)),
492 FrameOffsetReg(regToString(MFI
.getFrameOffsetReg(), TRI
)),
493 StackPtrOffsetReg(regToString(MFI
.getStackPtrOffsetReg(), TRI
)),
494 ArgInfo(convertArgumentInfo(MFI
.getArgInfo(), TRI
)),
495 Mode(MFI
.getMode()) {}
497 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO
&YamlIO
) {
498 MappingTraits
<SIMachineFunctionInfo
>::mapping(YamlIO
, *this);
501 bool SIMachineFunctionInfo::initializeBaseYamlFields(
502 const yaml::SIMachineFunctionInfo
&YamlMFI
) {
503 ExplicitKernArgSize
= YamlMFI
.ExplicitKernArgSize
;
504 MaxKernArgAlign
= YamlMFI
.MaxKernArgAlign
;
505 LDSSize
= YamlMFI
.LDSSize
;
506 IsEntryFunction
= YamlMFI
.IsEntryFunction
;
507 NoSignedZerosFPMath
= YamlMFI
.NoSignedZerosFPMath
;
508 MemoryBound
= YamlMFI
.MemoryBound
;
509 WaveLimiter
= YamlMFI
.WaveLimiter
;