1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUArgumentUsageInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction
&MF
)
30 : AMDGPUMachineFunction(MF
),
31 Mode(MF
.getFunction()),
32 PrivateSegmentBuffer(false),
35 KernargSegmentPtr(false),
37 FlatScratchInit(false),
42 PrivateSegmentWaveByteOffset(false),
46 ImplicitBufferPtr(false),
47 ImplicitArgPtr(false),
48 GITPtrHigh(0xffffffff),
49 HighBitsOf32BitAddress(0),
51 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
52 const Function
&F
= MF
.getFunction();
53 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
54 WavesPerEU
= ST
.getWavesPerEU(F
);
56 Occupancy
= ST
.computeOccupancy(MF
, getLDSSize());
57 CallingConv::ID CC
= F
.getCallingConv();
59 if (CC
== CallingConv::AMDGPU_KERNEL
|| CC
== CallingConv::SPIR_KERNEL
) {
61 KernargSegmentPtr
= true;
64 } else if (CC
== CallingConv::AMDGPU_PS
) {
65 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
68 if (!isEntryFunction()) {
69 // Non-entry functions have no special inputs for now, other registers
70 // required for scratch access.
71 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
72 ScratchWaveOffsetReg
= AMDGPU::SGPR33
;
74 // TODO: Pick a high register, and shift down, similar to a kernel.
75 FrameOffsetReg
= AMDGPU::SGPR34
;
76 StackPtrOffsetReg
= AMDGPU::SGPR32
;
78 ArgInfo
.PrivateSegmentBuffer
=
79 ArgDescriptor::createRegister(ScratchRSrcReg
);
80 ArgInfo
.PrivateSegmentWaveByteOffset
=
81 ArgDescriptor::createRegister(ScratchWaveOffsetReg
);
83 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr"))
84 ImplicitArgPtr
= true;
86 if (F
.hasFnAttribute("amdgpu-implicitarg-ptr")) {
87 KernargSegmentPtr
= true;
88 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
93 if (F
.hasFnAttribute("amdgpu-work-group-id-x"))
96 if (F
.hasFnAttribute("amdgpu-work-group-id-y"))
99 if (F
.hasFnAttribute("amdgpu-work-group-id-z"))
102 if (F
.hasFnAttribute("amdgpu-work-item-id-x"))
105 if (F
.hasFnAttribute("amdgpu-work-item-id-y"))
108 if (F
.hasFnAttribute("amdgpu-work-item-id-z"))
111 const MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
112 bool HasStackObjects
= FrameInfo
.hasStackObjects();
114 if (isEntryFunction()) {
115 // X, XY, and XYZ are the only supported combinations, so make sure Y is
120 PrivateSegmentWaveByteOffset
= true;
122 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
123 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
124 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
125 ArgInfo
.PrivateSegmentWaveByteOffset
=
126 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
129 bool isAmdHsaOrMesa
= ST
.isAmdHsaOrMesa(F
);
130 if (isAmdHsaOrMesa
) {
131 PrivateSegmentBuffer
= true;
133 if (F
.hasFnAttribute("amdgpu-dispatch-ptr"))
136 if (F
.hasFnAttribute("amdgpu-queue-ptr"))
139 if (F
.hasFnAttribute("amdgpu-dispatch-id"))
141 } else if (ST
.isMesaGfxShader(F
)) {
142 ImplicitBufferPtr
= true;
145 if (F
.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
146 KernargSegmentPtr
= true;
148 if (ST
.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa
) {
149 auto hasNonSpillStackObjects
= [&]() {
150 // Avoid expensive checking if there's no stack objects.
151 if (!HasStackObjects
)
153 for (auto OI
= FrameInfo
.getObjectIndexBegin(),
154 OE
= FrameInfo
.getObjectIndexEnd(); OI
!= OE
; ++OI
)
155 if (!FrameInfo
.isSpillSlotObjectIndex(OI
))
157 // All stack objects are spill slots.
160 // TODO: This could be refined a lot. The attribute is a poor way of
161 // detecting calls that may require it before argument lowering.
162 if (hasNonSpillStackObjects() || F
.hasFnAttribute("amdgpu-flat-scratch"))
163 FlatScratchInit
= true;
166 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
167 StringRef S
= A
.getValueAsString();
169 S
.consumeInteger(0, GITPtrHigh
);
171 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
172 S
= A
.getValueAsString();
174 S
.consumeInteger(0, HighBitsOf32BitAddress
);
176 S
= F
.getFnAttribute("amdgpu-gds-size").getValueAsString();
178 S
.consumeInteger(0, GDSSize
);
181 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
182 limitOccupancy(getMaxWavesPerEU());
183 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
184 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
188 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
189 const SIRegisterInfo
&TRI
) {
190 ArgInfo
.PrivateSegmentBuffer
=
191 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
192 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SGPR_128RegClass
));
194 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
197 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
198 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
199 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
201 return ArgInfo
.DispatchPtr
.getRegister();
204 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
205 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
206 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
208 return ArgInfo
.QueuePtr
.getRegister();
211 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
212 ArgInfo
.KernargSegmentPtr
213 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
214 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
216 return ArgInfo
.KernargSegmentPtr
.getRegister();
219 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
220 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
221 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
223 return ArgInfo
.DispatchID
.getRegister();
226 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
227 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
228 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
230 return ArgInfo
.FlatScratchInit
.getRegister();
233 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
234 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
235 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
237 return ArgInfo
.ImplicitBufferPtr
.getRegister();
240 static bool isCalleeSavedReg(const MCPhysReg
*CSRegs
, MCPhysReg Reg
) {
241 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
242 if (CSRegs
[I
] == Reg
)
249 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
252 // FIXME: This only works after processFunctionBeforeFrameFinalized
253 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction
&MF
,
254 unsigned NumNeed
) const {
255 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
256 unsigned WaveSize
= ST
.getWavefrontSize();
257 return NumVGPRSpillLanes
+ NumNeed
<= WaveSize
* SpillVGPRs
.size();
260 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
261 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction
&MF
,
263 std::vector
<SpilledReg
> &SpillLanes
= SGPRToVGPRSpills
[FI
];
265 // This has already been allocated.
266 if (!SpillLanes
.empty())
269 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
270 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
271 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
272 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
273 unsigned WaveSize
= ST
.getWavefrontSize();
275 unsigned Size
= FrameInfo
.getObjectSize(FI
);
276 assert(Size
>= 4 && Size
<= 64 && "invalid sgpr spill size");
277 assert(TRI
->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
279 int NumLanes
= Size
/ 4;
281 const MCPhysReg
*CSRegs
= MRI
.getCalleeSavedRegs();
283 // Make sure to handle the case where a wide SGPR spill may span between two
285 for (int I
= 0; I
< NumLanes
; ++I
, ++NumVGPRSpillLanes
) {
287 unsigned VGPRIndex
= (NumVGPRSpillLanes
% WaveSize
);
289 if (VGPRIndex
== 0) {
290 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
291 if (LaneVGPR
== AMDGPU::NoRegister
) {
292 // We have no VGPRs left for spilling SGPRs. Reset because we will not
293 // partially spill the SGPR to VGPRs.
294 SGPRToVGPRSpills
.erase(FI
);
295 NumVGPRSpillLanes
-= I
;
299 Optional
<int> CSRSpillFI
;
300 if ((FrameInfo
.hasCalls() || !isEntryFunction()) && CSRegs
&&
301 isCalleeSavedReg(CSRegs
, LaneVGPR
)) {
302 CSRSpillFI
= FrameInfo
.CreateSpillStackObject(4, 4);
305 SpillVGPRs
.push_back(SGPRSpillVGPRCSR(LaneVGPR
, CSRSpillFI
));
307 // Add this register as live-in to all blocks to avoid machine verifer
308 // complaining about use of an undefined physical register.
309 for (MachineBasicBlock
&BB
: MF
)
310 BB
.addLiveIn(LaneVGPR
);
312 LaneVGPR
= SpillVGPRs
.back().VGPR
;
315 SpillLanes
.push_back(SpilledReg(LaneVGPR
, VGPRIndex
));
321 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
322 /// Either AGPR is spilled to VGPR to vice versa.
323 /// Returns true if a \p FI can be eliminated completely.
324 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction
&MF
,
327 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
328 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
329 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
331 assert(ST
.hasMAIInsts() && FrameInfo
.isSpillSlotObjectIndex(FI
));
333 auto &Spill
= VGPRToAGPRSpills
[FI
];
335 // This has already been allocated.
336 if (!Spill
.Lanes
.empty())
337 return Spill
.FullyAllocated
;
339 unsigned Size
= FrameInfo
.getObjectSize(FI
);
340 unsigned NumLanes
= Size
/ 4;
341 Spill
.Lanes
.resize(NumLanes
, AMDGPU::NoRegister
);
343 const TargetRegisterClass
&RC
=
344 isAGPRtoVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::AGPR_32RegClass
;
345 auto Regs
= RC
.getRegisters();
347 auto &SpillRegs
= isAGPRtoVGPR
? SpillAGPR
: SpillVGPR
;
348 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
349 Spill
.FullyAllocated
= true;
351 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
353 BitVector OtherUsedRegs
;
354 OtherUsedRegs
.resize(TRI
->getNumRegs());
356 const uint32_t *CSRMask
=
357 TRI
->getCallPreservedMask(MF
, MF
.getFunction().getCallingConv());
359 OtherUsedRegs
.setBitsInMask(CSRMask
);
361 // TODO: Should include register tuples, but doesn't matter with current
363 for (MCPhysReg Reg
: SpillAGPR
)
364 OtherUsedRegs
.set(Reg
);
365 for (MCPhysReg Reg
: SpillVGPR
)
366 OtherUsedRegs
.set(Reg
);
368 SmallVectorImpl
<MCPhysReg
>::const_iterator NextSpillReg
= Regs
.begin();
369 for (unsigned I
= 0; I
< NumLanes
; ++I
) {
370 NextSpillReg
= std::find_if(
371 NextSpillReg
, Regs
.end(), [&MRI
, &OtherUsedRegs
](MCPhysReg Reg
) {
372 return MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
376 if (NextSpillReg
== Regs
.end()) { // Registers exhausted
377 Spill
.FullyAllocated
= false;
381 OtherUsedRegs
.set(*NextSpillReg
);
382 SpillRegs
.push_back(*NextSpillReg
);
383 Spill
.Lanes
[I
] = *NextSpillReg
++;
386 return Spill
.FullyAllocated
;
389 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo
&MFI
) {
390 // The FP spill hasn't been inserted yet, so keep it around.
391 for (auto &R
: SGPRToVGPRSpills
) {
392 if (R
.first
!= FramePointerSaveIndex
)
393 MFI
.RemoveStackObject(R
.first
);
396 // All other SPGRs must be allocated on the default stack, so reset the stack
398 for (int i
= MFI
.getObjectIndexBegin(), e
= MFI
.getObjectIndexEnd(); i
!= e
;
400 if (i
!= FramePointerSaveIndex
)
401 MFI
.setStackID(i
, TargetStackID::Default
);
403 for (auto &R
: VGPRToAGPRSpills
) {
404 if (R
.second
.FullyAllocated
)
405 MFI
.RemoveStackObject(R
.first
);
409 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
410 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
411 return AMDGPU::SGPR0
+ NumUserSGPRs
;
414 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
415 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;
418 static yaml::StringValue
regToString(unsigned Reg
,
419 const TargetRegisterInfo
&TRI
) {
420 yaml::StringValue Dest
;
422 raw_string_ostream
OS(Dest
.Value
);
423 OS
<< printReg(Reg
, &TRI
);
428 static Optional
<yaml::SIArgumentInfo
>
429 convertArgumentInfo(const AMDGPUFunctionArgInfo
&ArgInfo
,
430 const TargetRegisterInfo
&TRI
) {
431 yaml::SIArgumentInfo AI
;
433 auto convertArg
= [&](Optional
<yaml::SIArgument
> &A
,
434 const ArgDescriptor
&Arg
) {
438 // Create a register or stack argument.
439 yaml::SIArgument SA
= yaml::SIArgument::createArgument(Arg
.isRegister());
440 if (Arg
.isRegister()) {
441 raw_string_ostream
OS(SA
.RegisterName
.Value
);
442 OS
<< printReg(Arg
.getRegister(), &TRI
);
444 SA
.StackOffset
= Arg
.getStackOffset();
445 // Check and update the optional mask.
447 SA
.Mask
= Arg
.getMask();
454 Any
|= convertArg(AI
.PrivateSegmentBuffer
, ArgInfo
.PrivateSegmentBuffer
);
455 Any
|= convertArg(AI
.DispatchPtr
, ArgInfo
.DispatchPtr
);
456 Any
|= convertArg(AI
.QueuePtr
, ArgInfo
.QueuePtr
);
457 Any
|= convertArg(AI
.KernargSegmentPtr
, ArgInfo
.KernargSegmentPtr
);
458 Any
|= convertArg(AI
.DispatchID
, ArgInfo
.DispatchID
);
459 Any
|= convertArg(AI
.FlatScratchInit
, ArgInfo
.FlatScratchInit
);
460 Any
|= convertArg(AI
.PrivateSegmentSize
, ArgInfo
.PrivateSegmentSize
);
461 Any
|= convertArg(AI
.WorkGroupIDX
, ArgInfo
.WorkGroupIDX
);
462 Any
|= convertArg(AI
.WorkGroupIDY
, ArgInfo
.WorkGroupIDY
);
463 Any
|= convertArg(AI
.WorkGroupIDZ
, ArgInfo
.WorkGroupIDZ
);
464 Any
|= convertArg(AI
.WorkGroupInfo
, ArgInfo
.WorkGroupInfo
);
465 Any
|= convertArg(AI
.PrivateSegmentWaveByteOffset
,
466 ArgInfo
.PrivateSegmentWaveByteOffset
);
467 Any
|= convertArg(AI
.ImplicitArgPtr
, ArgInfo
.ImplicitArgPtr
);
468 Any
|= convertArg(AI
.ImplicitBufferPtr
, ArgInfo
.ImplicitBufferPtr
);
469 Any
|= convertArg(AI
.WorkItemIDX
, ArgInfo
.WorkItemIDX
);
470 Any
|= convertArg(AI
.WorkItemIDY
, ArgInfo
.WorkItemIDY
);
471 Any
|= convertArg(AI
.WorkItemIDZ
, ArgInfo
.WorkItemIDZ
);
479 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
480 const llvm::SIMachineFunctionInfo
& MFI
,
481 const TargetRegisterInfo
&TRI
)
482 : ExplicitKernArgSize(MFI
.getExplicitKernArgSize()),
483 MaxKernArgAlign(MFI
.getMaxKernArgAlign()),
484 LDSSize(MFI
.getLDSSize()),
485 IsEntryFunction(MFI
.isEntryFunction()),
486 NoSignedZerosFPMath(MFI
.hasNoSignedZerosFPMath()),
487 MemoryBound(MFI
.isMemoryBound()),
488 WaveLimiter(MFI
.needsWaveLimiter()),
489 HighBitsOf32BitAddress(MFI
.get32BitAddressHighBits()),
490 ScratchRSrcReg(regToString(MFI
.getScratchRSrcReg(), TRI
)),
491 ScratchWaveOffsetReg(regToString(MFI
.getScratchWaveOffsetReg(), TRI
)),
492 FrameOffsetReg(regToString(MFI
.getFrameOffsetReg(), TRI
)),
493 StackPtrOffsetReg(regToString(MFI
.getStackPtrOffsetReg(), TRI
)),
494 ArgInfo(convertArgumentInfo(MFI
.getArgInfo(), TRI
)),
495 Mode(MFI
.getMode()) {}
497 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO
&YamlIO
) {
498 MappingTraits
<SIMachineFunctionInfo
>::mapping(YamlIO
, *this);
501 bool SIMachineFunctionInfo::initializeBaseYamlFields(
502 const yaml::SIMachineFunctionInfo
&YamlMFI
) {
503 ExplicitKernArgSize
= YamlMFI
.ExplicitKernArgSize
;
504 MaxKernArgAlign
= YamlMFI
.MaxKernArgAlign
;
505 LDSSize
= YamlMFI
.LDSSize
;
506 HighBitsOf32BitAddress
= YamlMFI
.HighBitsOf32BitAddress
;
507 IsEntryFunction
= YamlMFI
.IsEntryFunction
;
508 NoSignedZerosFPMath
= YamlMFI
.NoSignedZerosFPMath
;
509 MemoryBound
= YamlMFI
.MemoryBound
;
510 WaveLimiter
= YamlMFI
.WaveLimiter
;