1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUSubtarget.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/CodeGen/LiveIntervals.h"
16 #include "llvm/CodeGen/MIRParser/MIParser.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
28 enum { MAX_LANES
= 64 };
32 const GCNTargetMachine
&getTM(const GCNSubtarget
*STI
) {
33 const SITargetLowering
*TLI
= STI
->getTargetLowering();
34 return static_cast<const GCNTargetMachine
&>(TLI
->getTargetMachine());
37 SIMachineFunctionInfo::SIMachineFunctionInfo(const Function
&F
,
38 const GCNSubtarget
*STI
)
39 : AMDGPUMachineFunction(F
, *STI
), Mode(F
, *STI
), GWSResourcePSV(getTM(STI
)),
40 UserSGPRInfo(F
, *STI
), WorkGroupIDX(false), WorkGroupIDY(false),
41 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
42 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
43 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
44 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
45 const GCNSubtarget
&ST
= *static_cast<const GCNSubtarget
*>(STI
);
46 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
47 WavesPerEU
= ST
.getWavesPerEU(F
);
48 MaxNumWorkGroups
= ST
.getMaxNumWorkGroups(F
);
49 assert(MaxNumWorkGroups
.size() == 3);
51 Occupancy
= ST
.computeOccupancy(F
, getLDSSize());
52 CallingConv::ID CC
= F
.getCallingConv();
54 VRegFlags
.reserve(1024);
56 const bool IsKernel
= CC
== CallingConv::AMDGPU_KERNEL
||
57 CC
== CallingConv::SPIR_KERNEL
;
62 } else if (CC
== CallingConv::AMDGPU_PS
) {
63 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
66 MayNeedAGPRs
= ST
.hasMAIInsts();
68 if (AMDGPU::isChainCC(CC
)) {
69 // Chain functions don't receive an SP from their caller, but are free to
70 // set one up. For now, we can use s32 to match what amdgpu_gfx functions
71 // would use if called, but this can be revisited.
72 // FIXME: Only reserve this if we actually need it.
73 StackPtrOffsetReg
= AMDGPU::SGPR32
;
75 ScratchRSrcReg
= AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
;
77 ArgInfo
.PrivateSegmentBuffer
=
78 ArgDescriptor::createRegister(ScratchRSrcReg
);
80 ImplicitArgPtr
= false;
81 } else if (!isEntryFunction()) {
82 if (CC
!= CallingConv::AMDGPU_Gfx
)
83 ArgInfo
= AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
;
85 FrameOffsetReg
= AMDGPU::SGPR33
;
86 StackPtrOffsetReg
= AMDGPU::SGPR32
;
88 if (!ST
.enableFlatScratch()) {
89 // Non-entry functions have no special inputs for now, other registers
90 // required for scratch access.
91 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
93 ArgInfo
.PrivateSegmentBuffer
=
94 ArgDescriptor::createRegister(ScratchRSrcReg
);
97 if (!F
.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
98 ImplicitArgPtr
= true;
100 ImplicitArgPtr
= false;
101 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
104 if (ST
.hasGFX90AInsts() &&
105 ST
.getMaxNumVGPRs(F
) <= AMDGPU::VGPR_32RegClass
.getNumRegs() &&
107 MayNeedAGPRs
= false; // We will select all MAI with VGPR operands.
110 if (!AMDGPU::isGraphics(CC
) ||
111 ((CC
== CallingConv::AMDGPU_CS
|| CC
== CallingConv::AMDGPU_Gfx
) &&
112 ST
.hasArchitectedSGPRs())) {
113 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workgroup-id-x"))
116 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-y"))
119 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-z"))
123 if (!AMDGPU::isGraphics(CC
)) {
124 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workitem-id-x"))
127 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-y") &&
128 ST
.getMaxWorkitemID(F
, 1) != 0)
131 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-z") &&
132 ST
.getMaxWorkitemID(F
, 2) != 0)
135 if (!IsKernel
&& !F
.hasFnAttribute("amdgpu-no-lds-kernel-id"))
139 if (isEntryFunction()) {
140 // X, XY, and XYZ are the only supported combinations, so make sure Y is
145 if (!ST
.flatScratchIsArchitected()) {
146 PrivateSegmentWaveByteOffset
= true;
148 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
149 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
150 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
151 ArgInfo
.PrivateSegmentWaveByteOffset
=
152 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
156 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
157 StringRef S
= A
.getValueAsString();
159 S
.consumeInteger(0, GITPtrHigh
);
161 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
162 S
= A
.getValueAsString();
164 S
.consumeInteger(0, HighBitsOf32BitAddress
);
166 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
167 // VGPR available at all times. For now, reserve highest available VGPR. After
168 // RA, shift it to the lowest available unused VGPR if the one exist.
169 if (ST
.hasMAIInsts() && !ST
.hasGFX90AInsts()) {
171 AMDGPU::VGPR_32RegClass
.getRegister(ST
.getMaxNumVGPRs(F
) - 1);
175 MachineFunctionInfo
*SIMachineFunctionInfo::clone(
176 BumpPtrAllocator
&Allocator
, MachineFunction
&DestMF
,
177 const DenseMap
<MachineBasicBlock
*, MachineBasicBlock
*> &Src2DstMBB
)
179 return DestMF
.cloneInfo
<SIMachineFunctionInfo
>(*this);
182 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
183 limitOccupancy(getMaxWavesPerEU());
184 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
185 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
189 Register
SIMachineFunctionInfo::addPrivateSegmentBuffer(
190 const SIRegisterInfo
&TRI
) {
191 ArgInfo
.PrivateSegmentBuffer
=
192 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
193 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SGPR_128RegClass
));
195 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
198 Register
SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
199 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
200 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
202 return ArgInfo
.DispatchPtr
.getRegister();
205 Register
SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
206 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
207 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
209 return ArgInfo
.QueuePtr
.getRegister();
212 Register
SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
213 ArgInfo
.KernargSegmentPtr
214 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
215 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
217 return ArgInfo
.KernargSegmentPtr
.getRegister();
220 Register
SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
221 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
222 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
224 return ArgInfo
.DispatchID
.getRegister();
227 Register
SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
228 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
229 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
231 return ArgInfo
.FlatScratchInit
.getRegister();
234 Register
SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo
&TRI
) {
235 ArgInfo
.PrivateSegmentSize
= ArgDescriptor::createRegister(getNextUserSGPR());
237 return ArgInfo
.PrivateSegmentSize
.getRegister();
240 Register
SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
241 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
242 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
244 return ArgInfo
.ImplicitBufferPtr
.getRegister();
247 Register
SIMachineFunctionInfo::addLDSKernelId() {
248 ArgInfo
.LDSKernelId
= ArgDescriptor::createRegister(getNextUserSGPR());
250 return ArgInfo
.LDSKernelId
.getRegister();
253 SmallVectorImpl
<MCRegister
> *SIMachineFunctionInfo::addPreloadedKernArg(
254 const SIRegisterInfo
&TRI
, const TargetRegisterClass
*RC
,
255 unsigned AllocSizeDWord
, int KernArgIdx
, int PaddingSGPRs
) {
256 assert(!ArgInfo
.PreloadKernArgs
.count(KernArgIdx
) &&
257 "Preload kernel argument allocated twice.");
258 NumUserSGPRs
+= PaddingSGPRs
;
259 // If the available register tuples are aligned with the kernarg to be
260 // preloaded use that register, otherwise we need to use a set of SGPRs and
262 Register PreloadReg
=
263 TRI
.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0
, RC
);
265 (RC
== &AMDGPU::SReg_32RegClass
|| RC
== &AMDGPU::SReg_64RegClass
)) {
266 ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
.push_back(PreloadReg
);
267 NumUserSGPRs
+= AllocSizeDWord
;
269 for (unsigned I
= 0; I
< AllocSizeDWord
; ++I
) {
270 ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
.push_back(getNextUserSGPR());
275 // Track the actual number of SGPRs that HW will preload to.
276 UserSGPRInfo
.allocKernargPreloadSGPRs(AllocSizeDWord
+ PaddingSGPRs
);
277 return &ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
;
280 void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction
&MF
, Register VGPR
,
281 uint64_t Size
, Align Alignment
) {
282 // Skip if it is an entry function or the register is already added.
283 if (isEntryFunction() || WWMSpills
.count(VGPR
))
286 // Skip if this is a function with the amdgpu_cs_chain or
287 // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
288 // We never need to allocate a spill for these because we don't even need to
289 // restore the inactive lanes for them (they're scratchier than the usual
290 // scratch registers). We only need to do this if we have calls to
291 // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since
292 // chain functions do not return) and the function did not contain a call to
293 // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes
294 // when entering the function).
295 if (isChainFunction() &&
296 (SIRegisterInfo::isChainScratchRegister(VGPR
) ||
297 !MF
.getFrameInfo().hasTailCall() || hasInitWholeWave()))
300 WWMSpills
.insert(std::make_pair(
301 VGPR
, MF
.getFrameInfo().CreateSpillStackObject(Size
, Alignment
)));
304 // Separate out the callee-saved and scratch registers.
305 void SIMachineFunctionInfo::splitWWMSpillRegisters(
307 SmallVectorImpl
<std::pair
<Register
, int>> &CalleeSavedRegs
,
308 SmallVectorImpl
<std::pair
<Register
, int>> &ScratchRegs
) const {
309 const MCPhysReg
*CSRegs
= MF
.getRegInfo().getCalleeSavedRegs();
310 for (auto &Reg
: WWMSpills
) {
311 if (isCalleeSavedReg(CSRegs
, Reg
.first
))
312 CalleeSavedRegs
.push_back(Reg
);
314 ScratchRegs
.push_back(Reg
);
318 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg
*CSRegs
,
319 MCPhysReg Reg
) const {
320 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
321 if (CSRegs
[I
] == Reg
)
328 void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
329 MachineFunction
&MF
, SmallVectorImpl
<Register
> &WWMVGPRs
,
330 BitVector
&SavedVGPRs
) {
331 const SIRegisterInfo
*TRI
= MF
.getSubtarget
<GCNSubtarget
>().getRegisterInfo();
332 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
333 for (unsigned I
= 0, E
= WWMVGPRs
.size(); I
< E
; ++I
) {
334 Register Reg
= WWMVGPRs
[I
];
336 TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
337 if (!NewReg
|| NewReg
>= Reg
)
340 MRI
.replaceRegWith(Reg
, NewReg
);
342 // Update various tables with the new VGPR.
343 WWMVGPRs
[I
] = NewReg
;
344 WWMReservedRegs
.remove(Reg
);
345 WWMReservedRegs
.insert(NewReg
);
346 MRI
.reserveReg(NewReg
, TRI
);
348 // Replace the register in SpillPhysVGPRs. This is needed to look for free
349 // lanes while spilling special SGPRs like FP, BP, etc. during PEI.
350 auto *RegItr
= std::find(SpillPhysVGPRs
.begin(), SpillPhysVGPRs
.end(), Reg
);
351 if (RegItr
!= SpillPhysVGPRs
.end()) {
352 unsigned Idx
= std::distance(SpillPhysVGPRs
.begin(), RegItr
);
353 SpillPhysVGPRs
[Idx
] = NewReg
;
356 // The generic `determineCalleeSaves` might have set the old register if it
357 // is in the CSR range.
358 SavedVGPRs
.reset(Reg
);
360 for (MachineBasicBlock
&MBB
: MF
) {
361 MBB
.removeLiveIn(Reg
);
362 MBB
.sortUniqueLiveIns();
369 bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
370 MachineFunction
&MF
, int FI
, unsigned LaneIndex
) {
371 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
374 LaneVGPR
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
375 SpillVGPRs
.push_back(LaneVGPR
);
377 LaneVGPR
= SpillVGPRs
.back();
380 SGPRSpillsToVirtualVGPRLanes
[FI
].emplace_back(LaneVGPR
, LaneIndex
);
384 bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
385 MachineFunction
&MF
, int FI
, unsigned LaneIndex
, bool IsPrologEpilog
) {
386 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
387 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
388 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
391 // Find the highest available register if called before RA to ensure the
392 // lowest registers are available for allocation. The LaneVGPR, in that
393 // case, will be shifted back to the lowest range after VGPR allocation.
394 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
,
396 if (LaneVGPR
== AMDGPU::NoRegister
) {
397 // We have no VGPRs left for spilling SGPRs. Reset because we will not
398 // partially spill the SGPR to VGPRs.
399 SGPRSpillsToPhysicalVGPRLanes
.erase(FI
);
404 allocateWWMSpill(MF
, LaneVGPR
);
406 reserveWWMRegister(LaneVGPR
);
407 for (MachineBasicBlock
&MBB
: MF
) {
408 MBB
.addLiveIn(LaneVGPR
);
409 MBB
.sortUniqueLiveIns();
411 SpillPhysVGPRs
.push_back(LaneVGPR
);
413 LaneVGPR
= SpillPhysVGPRs
.back();
416 SGPRSpillsToPhysicalVGPRLanes
[FI
].emplace_back(LaneVGPR
, LaneIndex
);
420 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
421 MachineFunction
&MF
, int FI
, bool SpillToPhysVGPRLane
,
422 bool IsPrologEpilog
) {
423 std::vector
<SIRegisterInfo::SpilledReg
> &SpillLanes
=
424 SpillToPhysVGPRLane
? SGPRSpillsToPhysicalVGPRLanes
[FI
]
425 : SGPRSpillsToVirtualVGPRLanes
[FI
];
427 // This has already been allocated.
428 if (!SpillLanes
.empty())
431 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
432 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
433 unsigned WaveSize
= ST
.getWavefrontSize();
435 unsigned Size
= FrameInfo
.getObjectSize(FI
);
436 unsigned NumLanes
= Size
/ 4;
438 if (NumLanes
> WaveSize
)
441 assert(Size
>= 4 && "invalid sgpr spill size");
442 assert(ST
.getRegisterInfo()->spillSGPRToVGPR() &&
443 "not spilling SGPRs to VGPRs");
445 unsigned &NumSpillLanes
= SpillToPhysVGPRLane
? NumPhysicalVGPRSpillLanes
446 : NumVirtualVGPRSpillLanes
;
448 for (unsigned I
= 0; I
< NumLanes
; ++I
, ++NumSpillLanes
) {
449 unsigned LaneIndex
= (NumSpillLanes
% WaveSize
);
451 bool Allocated
= SpillToPhysVGPRLane
452 ? allocatePhysicalVGPRForSGPRSpills(MF
, FI
, LaneIndex
,
454 : allocateVirtualVGPRForSGPRSpills(MF
, FI
, LaneIndex
);
464 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
465 /// Either AGPR is spilled to VGPR to vice versa.
466 /// Returns true if a \p FI can be eliminated completely.
467 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction
&MF
,
470 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
471 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
472 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
474 assert(ST
.hasMAIInsts() && FrameInfo
.isSpillSlotObjectIndex(FI
));
476 auto &Spill
= VGPRToAGPRSpills
[FI
];
478 // This has already been allocated.
479 if (!Spill
.Lanes
.empty())
480 return Spill
.FullyAllocated
;
482 unsigned Size
= FrameInfo
.getObjectSize(FI
);
483 unsigned NumLanes
= Size
/ 4;
484 Spill
.Lanes
.resize(NumLanes
, AMDGPU::NoRegister
);
486 const TargetRegisterClass
&RC
=
487 isAGPRtoVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::AGPR_32RegClass
;
488 auto Regs
= RC
.getRegisters();
490 auto &SpillRegs
= isAGPRtoVGPR
? SpillAGPR
: SpillVGPR
;
491 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
492 Spill
.FullyAllocated
= true;
494 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
496 BitVector OtherUsedRegs
;
497 OtherUsedRegs
.resize(TRI
->getNumRegs());
499 const uint32_t *CSRMask
=
500 TRI
->getCallPreservedMask(MF
, MF
.getFunction().getCallingConv());
502 OtherUsedRegs
.setBitsInMask(CSRMask
);
504 // TODO: Should include register tuples, but doesn't matter with current
506 for (MCPhysReg Reg
: SpillAGPR
)
507 OtherUsedRegs
.set(Reg
);
508 for (MCPhysReg Reg
: SpillVGPR
)
509 OtherUsedRegs
.set(Reg
);
511 SmallVectorImpl
<MCPhysReg
>::const_iterator NextSpillReg
= Regs
.begin();
512 for (int I
= NumLanes
- 1; I
>= 0; --I
) {
513 NextSpillReg
= std::find_if(
514 NextSpillReg
, Regs
.end(), [&MRI
, &OtherUsedRegs
](MCPhysReg Reg
) {
515 return MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
519 if (NextSpillReg
== Regs
.end()) { // Registers exhausted
520 Spill
.FullyAllocated
= false;
524 OtherUsedRegs
.set(*NextSpillReg
);
525 SpillRegs
.push_back(*NextSpillReg
);
526 MRI
.reserveReg(*NextSpillReg
, TRI
);
527 Spill
.Lanes
[I
] = *NextSpillReg
++;
530 return Spill
.FullyAllocated
;
533 bool SIMachineFunctionInfo::removeDeadFrameIndices(
534 MachineFrameInfo
&MFI
, bool ResetSGPRSpillStackIDs
) {
535 // Remove dead frame indices from function frame, however keep FP & BP since
536 // spills for them haven't been inserted yet. And also make sure to remove the
537 // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
538 // otherwise, it could result in an unexpected side effect and bug, in case of
539 // any re-mapping of freed frame indices by later pass(es) like "stack slot
541 for (auto &R
: make_early_inc_range(SGPRSpillsToVirtualVGPRLanes
)) {
542 MFI
.RemoveStackObject(R
.first
);
543 SGPRSpillsToVirtualVGPRLanes
.erase(R
.first
);
546 // Remove the dead frame indices of CSR SGPRs which are spilled to physical
547 // VGPR lanes during SILowerSGPRSpills pass.
548 if (!ResetSGPRSpillStackIDs
) {
549 for (auto &R
: make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes
)) {
550 MFI
.RemoveStackObject(R
.first
);
551 SGPRSpillsToPhysicalVGPRLanes
.erase(R
.first
);
554 bool HaveSGPRToMemory
= false;
556 if (ResetSGPRSpillStackIDs
) {
557 // All other SGPRs must be allocated on the default stack, so reset the
559 for (int I
= MFI
.getObjectIndexBegin(), E
= MFI
.getObjectIndexEnd(); I
!= E
;
561 if (!checkIndexInPrologEpilogSGPRSpills(I
)) {
562 if (MFI
.getStackID(I
) == TargetStackID::SGPRSpill
) {
563 MFI
.setStackID(I
, TargetStackID::Default
);
564 HaveSGPRToMemory
= true;
570 for (auto &R
: VGPRToAGPRSpills
) {
572 MFI
.RemoveStackObject(R
.first
);
575 return HaveSGPRToMemory
;
578 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo
&MFI
,
579 const SIRegisterInfo
&TRI
) {
584 MFI
.CreateStackObject(TRI
.getSpillSize(AMDGPU::SGPR_32RegClass
),
585 TRI
.getSpillAlign(AMDGPU::SGPR_32RegClass
), false);
589 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
590 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
591 return AMDGPU::SGPR0
+ NumUserSGPRs
;
594 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
595 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;
598 void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg
) {
602 void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg
,
604 VRegFlags
.grow(NewReg
);
605 VRegFlags
[NewReg
] = VRegFlags
[SrcReg
];
609 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction
&MF
) const {
610 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
611 if (!ST
.isAmdPalOS())
613 Register GitPtrLo
= AMDGPU::SGPR0
; // Low GIT address passed in
614 if (ST
.hasMergedShaders()) {
615 switch (MF
.getFunction().getCallingConv()) {
616 case CallingConv::AMDGPU_HS
:
617 case CallingConv::AMDGPU_GS
:
618 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
619 // ES+GS merged shader on gfx9+.
620 GitPtrLo
= AMDGPU::SGPR8
;
629 static yaml::StringValue
regToString(Register Reg
,
630 const TargetRegisterInfo
&TRI
) {
631 yaml::StringValue Dest
;
633 raw_string_ostream
OS(Dest
.Value
);
634 OS
<< printReg(Reg
, &TRI
);
639 static std::optional
<yaml::SIArgumentInfo
>
640 convertArgumentInfo(const AMDGPUFunctionArgInfo
&ArgInfo
,
641 const TargetRegisterInfo
&TRI
) {
642 yaml::SIArgumentInfo AI
;
644 auto convertArg
= [&](std::optional
<yaml::SIArgument
> &A
,
645 const ArgDescriptor
&Arg
) {
649 // Create a register or stack argument.
650 yaml::SIArgument SA
= yaml::SIArgument::createArgument(Arg
.isRegister());
651 if (Arg
.isRegister()) {
652 raw_string_ostream
OS(SA
.RegisterName
.Value
);
653 OS
<< printReg(Arg
.getRegister(), &TRI
);
655 SA
.StackOffset
= Arg
.getStackOffset();
656 // Check and update the optional mask.
658 SA
.Mask
= Arg
.getMask();
664 // TODO: Need to serialize kernarg preloads.
666 Any
|= convertArg(AI
.PrivateSegmentBuffer
, ArgInfo
.PrivateSegmentBuffer
);
667 Any
|= convertArg(AI
.DispatchPtr
, ArgInfo
.DispatchPtr
);
668 Any
|= convertArg(AI
.QueuePtr
, ArgInfo
.QueuePtr
);
669 Any
|= convertArg(AI
.KernargSegmentPtr
, ArgInfo
.KernargSegmentPtr
);
670 Any
|= convertArg(AI
.DispatchID
, ArgInfo
.DispatchID
);
671 Any
|= convertArg(AI
.FlatScratchInit
, ArgInfo
.FlatScratchInit
);
672 Any
|= convertArg(AI
.LDSKernelId
, ArgInfo
.LDSKernelId
);
673 Any
|= convertArg(AI
.PrivateSegmentSize
, ArgInfo
.PrivateSegmentSize
);
674 Any
|= convertArg(AI
.WorkGroupIDX
, ArgInfo
.WorkGroupIDX
);
675 Any
|= convertArg(AI
.WorkGroupIDY
, ArgInfo
.WorkGroupIDY
);
676 Any
|= convertArg(AI
.WorkGroupIDZ
, ArgInfo
.WorkGroupIDZ
);
677 Any
|= convertArg(AI
.WorkGroupInfo
, ArgInfo
.WorkGroupInfo
);
678 Any
|= convertArg(AI
.PrivateSegmentWaveByteOffset
,
679 ArgInfo
.PrivateSegmentWaveByteOffset
);
680 Any
|= convertArg(AI
.ImplicitArgPtr
, ArgInfo
.ImplicitArgPtr
);
681 Any
|= convertArg(AI
.ImplicitBufferPtr
, ArgInfo
.ImplicitBufferPtr
);
682 Any
|= convertArg(AI
.WorkItemIDX
, ArgInfo
.WorkItemIDX
);
683 Any
|= convertArg(AI
.WorkItemIDY
, ArgInfo
.WorkItemIDY
);
684 Any
|= convertArg(AI
.WorkItemIDZ
, ArgInfo
.WorkItemIDZ
);
692 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
693 const llvm::SIMachineFunctionInfo
&MFI
, const TargetRegisterInfo
&TRI
,
694 const llvm::MachineFunction
&MF
)
695 : ExplicitKernArgSize(MFI
.getExplicitKernArgSize()),
696 MaxKernArgAlign(MFI
.getMaxKernArgAlign()), LDSSize(MFI
.getLDSSize()),
697 GDSSize(MFI
.getGDSSize()),
698 DynLDSAlign(MFI
.getDynLDSAlign()), IsEntryFunction(MFI
.isEntryFunction()),
699 NoSignedZerosFPMath(MFI
.hasNoSignedZerosFPMath()),
700 MemoryBound(MFI
.isMemoryBound()), WaveLimiter(MFI
.needsWaveLimiter()),
701 HasSpilledSGPRs(MFI
.hasSpilledSGPRs()),
702 HasSpilledVGPRs(MFI
.hasSpilledVGPRs()),
703 HighBitsOf32BitAddress(MFI
.get32BitAddressHighBits()),
704 Occupancy(MFI
.getOccupancy()),
705 ScratchRSrcReg(regToString(MFI
.getScratchRSrcReg(), TRI
)),
706 FrameOffsetReg(regToString(MFI
.getFrameOffsetReg(), TRI
)),
707 StackPtrOffsetReg(regToString(MFI
.getStackPtrOffsetReg(), TRI
)),
708 BytesInStackArgArea(MFI
.getBytesInStackArgArea()),
709 ReturnsVoid(MFI
.returnsVoid()),
710 ArgInfo(convertArgumentInfo(MFI
.getArgInfo(), TRI
)),
711 PSInputAddr(MFI
.getPSInputAddr()),
712 PSInputEnable(MFI
.getPSInputEnable()),
713 Mode(MFI
.getMode()) {
714 for (Register Reg
: MFI
.getSGPRSpillPhysVGPRs())
715 SpillPhysVGPRS
.push_back(regToString(Reg
, TRI
));
717 for (Register Reg
: MFI
.getWWMReservedRegs())
718 WWMReservedRegs
.push_back(regToString(Reg
, TRI
));
720 if (MFI
.getLongBranchReservedReg())
721 LongBranchReservedReg
= regToString(MFI
.getLongBranchReservedReg(), TRI
);
722 if (MFI
.getVGPRForAGPRCopy())
723 VGPRForAGPRCopy
= regToString(MFI
.getVGPRForAGPRCopy(), TRI
);
725 if (MFI
.getSGPRForEXECCopy())
726 SGPRForEXECCopy
= regToString(MFI
.getSGPRForEXECCopy(), TRI
);
728 auto SFI
= MFI
.getOptionalScavengeFI();
730 ScavengeFI
= yaml::FrameIndex(*SFI
, MF
.getFrameInfo());
733 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO
&YamlIO
) {
734 MappingTraits
<SIMachineFunctionInfo
>::mapping(YamlIO
, *this);
737 bool SIMachineFunctionInfo::initializeBaseYamlFields(
738 const yaml::SIMachineFunctionInfo
&YamlMFI
, const MachineFunction
&MF
,
739 PerFunctionMIParsingState
&PFS
, SMDiagnostic
&Error
, SMRange
&SourceRange
) {
740 ExplicitKernArgSize
= YamlMFI
.ExplicitKernArgSize
;
741 MaxKernArgAlign
= YamlMFI
.MaxKernArgAlign
;
742 LDSSize
= YamlMFI
.LDSSize
;
743 GDSSize
= YamlMFI
.GDSSize
;
744 DynLDSAlign
= YamlMFI
.DynLDSAlign
;
745 PSInputAddr
= YamlMFI
.PSInputAddr
;
746 PSInputEnable
= YamlMFI
.PSInputEnable
;
747 HighBitsOf32BitAddress
= YamlMFI
.HighBitsOf32BitAddress
;
748 Occupancy
= YamlMFI
.Occupancy
;
749 IsEntryFunction
= YamlMFI
.IsEntryFunction
;
750 NoSignedZerosFPMath
= YamlMFI
.NoSignedZerosFPMath
;
751 MemoryBound
= YamlMFI
.MemoryBound
;
752 WaveLimiter
= YamlMFI
.WaveLimiter
;
753 HasSpilledSGPRs
= YamlMFI
.HasSpilledSGPRs
;
754 HasSpilledVGPRs
= YamlMFI
.HasSpilledVGPRs
;
755 BytesInStackArgArea
= YamlMFI
.BytesInStackArgArea
;
756 ReturnsVoid
= YamlMFI
.ReturnsVoid
;
758 if (YamlMFI
.ScavengeFI
) {
759 auto FIOrErr
= YamlMFI
.ScavengeFI
->getFI(MF
.getFrameInfo());
761 // Create a diagnostic for a the frame index.
762 const MemoryBuffer
&Buffer
=
763 *PFS
.SM
->getMemoryBuffer(PFS
.SM
->getMainFileID());
765 Error
= SMDiagnostic(*PFS
.SM
, SMLoc(), Buffer
.getBufferIdentifier(), 1, 1,
766 SourceMgr::DK_Error
, toString(FIOrErr
.takeError()),
768 SourceRange
= YamlMFI
.ScavengeFI
->SourceRange
;
771 ScavengeFI
= *FIOrErr
;
773 ScavengeFI
= std::nullopt
;
778 bool SIMachineFunctionInfo::mayUseAGPRs(const Function
&F
) const {
779 return !F
.hasFnAttribute("amdgpu-no-agpr");
782 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction
&MF
) const {
786 if (!mayNeedAGPRs()) {
791 if (!AMDGPU::isEntryFunctionCC(MF
.getFunction().getCallingConv()) ||
792 MF
.getFrameInfo().hasCalls()) {
797 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
799 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
800 const Register Reg
= Register::index2VirtReg(I
);
801 const TargetRegisterClass
*RC
= MRI
.getRegClassOrNull(Reg
);
802 if (RC
&& SIRegisterInfo::isAGPRClass(RC
)) {
806 if (!RC
&& !MRI
.use_empty(Reg
) && MRI
.getType(Reg
).isValid()) {
807 // Defer caching UsesAGPRs, function might not yet been regbank selected.
812 for (MCRegister Reg
: AMDGPU::AGPR_32RegClass
) {
813 if (MRI
.isPhysRegUsed(Reg
)) {