1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUSubtarget.h"
11 #include "AMDGPUTargetMachine.h"
12 #include "GCNSubtarget.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "SIRegisterInfo.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MIRParser/MIParser.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
29 enum { MAX_LANES
= 64 };
33 const GCNTargetMachine
&getTM(const GCNSubtarget
*STI
) {
34 const SITargetLowering
*TLI
= STI
->getTargetLowering();
35 return static_cast<const GCNTargetMachine
&>(TLI
->getTargetMachine());
38 SIMachineFunctionInfo::SIMachineFunctionInfo(const Function
&F
,
39 const GCNSubtarget
*STI
)
40 : AMDGPUMachineFunction(F
, *STI
), Mode(F
, *STI
), GWSResourcePSV(getTM(STI
)),
41 UserSGPRInfo(F
, *STI
), WorkGroupIDX(false), WorkGroupIDY(false),
42 WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
43 PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
44 WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
45 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
46 const GCNSubtarget
&ST
= *static_cast<const GCNSubtarget
*>(STI
);
47 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
48 WavesPerEU
= ST
.getWavesPerEU(F
);
49 MaxNumWorkGroups
= ST
.getMaxNumWorkGroups(F
);
50 assert(MaxNumWorkGroups
.size() == 3);
52 Occupancy
= ST
.computeOccupancy(F
, getLDSSize());
53 CallingConv::ID CC
= F
.getCallingConv();
55 VRegFlags
.reserve(1024);
57 const bool IsKernel
= CC
== CallingConv::AMDGPU_KERNEL
||
58 CC
== CallingConv::SPIR_KERNEL
;
63 } else if (CC
== CallingConv::AMDGPU_PS
) {
64 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
67 MayNeedAGPRs
= ST
.hasMAIInsts();
69 if (AMDGPU::isChainCC(CC
)) {
70 // Chain functions don't receive an SP from their caller, but are free to
71 // set one up. For now, we can use s32 to match what amdgpu_gfx functions
72 // would use if called, but this can be revisited.
73 // FIXME: Only reserve this if we actually need it.
74 StackPtrOffsetReg
= AMDGPU::SGPR32
;
76 ScratchRSrcReg
= AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
;
78 ArgInfo
.PrivateSegmentBuffer
=
79 ArgDescriptor::createRegister(ScratchRSrcReg
);
81 ImplicitArgPtr
= false;
82 } else if (!isEntryFunction()) {
83 if (CC
!= CallingConv::AMDGPU_Gfx
)
84 ArgInfo
= AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
;
86 FrameOffsetReg
= AMDGPU::SGPR33
;
87 StackPtrOffsetReg
= AMDGPU::SGPR32
;
89 if (!ST
.enableFlatScratch()) {
90 // Non-entry functions have no special inputs for now, other registers
91 // required for scratch access.
92 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
94 ArgInfo
.PrivateSegmentBuffer
=
95 ArgDescriptor::createRegister(ScratchRSrcReg
);
98 if (!F
.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
99 ImplicitArgPtr
= true;
101 ImplicitArgPtr
= false;
102 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
105 if (ST
.hasGFX90AInsts() &&
106 ST
.getMaxNumVGPRs(F
) <= AMDGPU::VGPR_32RegClass
.getNumRegs() &&
108 MayNeedAGPRs
= false; // We will select all MAI with VGPR operands.
111 if (!AMDGPU::isGraphics(CC
) ||
112 ((CC
== CallingConv::AMDGPU_CS
|| CC
== CallingConv::AMDGPU_Gfx
) &&
113 ST
.hasArchitectedSGPRs())) {
114 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workgroup-id-x"))
117 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-y"))
120 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-z"))
124 if (!AMDGPU::isGraphics(CC
)) {
125 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workitem-id-x"))
128 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-y") &&
129 ST
.getMaxWorkitemID(F
, 1) != 0)
132 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-z") &&
133 ST
.getMaxWorkitemID(F
, 2) != 0)
136 if (!IsKernel
&& !F
.hasFnAttribute("amdgpu-no-lds-kernel-id"))
140 if (isEntryFunction()) {
141 // X, XY, and XYZ are the only supported combinations, so make sure Y is
146 if (!ST
.flatScratchIsArchitected()) {
147 PrivateSegmentWaveByteOffset
= true;
149 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
150 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
151 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
152 ArgInfo
.PrivateSegmentWaveByteOffset
=
153 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
157 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
158 StringRef S
= A
.getValueAsString();
160 S
.consumeInteger(0, GITPtrHigh
);
162 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
163 S
= A
.getValueAsString();
165 S
.consumeInteger(0, HighBitsOf32BitAddress
);
167 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
168 // VGPR available at all times. For now, reserve highest available VGPR. After
169 // RA, shift it to the lowest available unused VGPR if the one exist.
170 if (ST
.hasMAIInsts() && !ST
.hasGFX90AInsts()) {
172 AMDGPU::VGPR_32RegClass
.getRegister(ST
.getMaxNumVGPRs(F
) - 1);
176 MachineFunctionInfo
*SIMachineFunctionInfo::clone(
177 BumpPtrAllocator
&Allocator
, MachineFunction
&DestMF
,
178 const DenseMap
<MachineBasicBlock
*, MachineBasicBlock
*> &Src2DstMBB
)
180 return DestMF
.cloneInfo
<SIMachineFunctionInfo
>(*this);
183 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
184 limitOccupancy(getMaxWavesPerEU());
185 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
186 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
190 Register
SIMachineFunctionInfo::addPrivateSegmentBuffer(
191 const SIRegisterInfo
&TRI
) {
192 ArgInfo
.PrivateSegmentBuffer
=
193 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
194 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SGPR_128RegClass
));
196 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
199 Register
SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
200 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
201 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
203 return ArgInfo
.DispatchPtr
.getRegister();
206 Register
SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
207 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
208 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
210 return ArgInfo
.QueuePtr
.getRegister();
213 Register
SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
214 ArgInfo
.KernargSegmentPtr
215 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
216 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
218 return ArgInfo
.KernargSegmentPtr
.getRegister();
221 Register
SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
222 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
223 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
225 return ArgInfo
.DispatchID
.getRegister();
228 Register
SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
229 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
230 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
232 return ArgInfo
.FlatScratchInit
.getRegister();
235 Register
SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo
&TRI
) {
236 ArgInfo
.PrivateSegmentSize
= ArgDescriptor::createRegister(getNextUserSGPR());
238 return ArgInfo
.PrivateSegmentSize
.getRegister();
241 Register
SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
242 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
243 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
245 return ArgInfo
.ImplicitBufferPtr
.getRegister();
248 Register
SIMachineFunctionInfo::addLDSKernelId() {
249 ArgInfo
.LDSKernelId
= ArgDescriptor::createRegister(getNextUserSGPR());
251 return ArgInfo
.LDSKernelId
.getRegister();
254 SmallVectorImpl
<MCRegister
> *SIMachineFunctionInfo::addPreloadedKernArg(
255 const SIRegisterInfo
&TRI
, const TargetRegisterClass
*RC
,
256 unsigned AllocSizeDWord
, int KernArgIdx
, int PaddingSGPRs
) {
257 assert(!ArgInfo
.PreloadKernArgs
.count(KernArgIdx
) &&
258 "Preload kernel argument allocated twice.");
259 NumUserSGPRs
+= PaddingSGPRs
;
260 // If the available register tuples are aligned with the kernarg to be
261 // preloaded use that register, otherwise we need to use a set of SGPRs and
263 Register PreloadReg
=
264 TRI
.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0
, RC
);
266 (RC
== &AMDGPU::SReg_32RegClass
|| RC
== &AMDGPU::SReg_64RegClass
)) {
267 ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
.push_back(PreloadReg
);
268 NumUserSGPRs
+= AllocSizeDWord
;
270 for (unsigned I
= 0; I
< AllocSizeDWord
; ++I
) {
271 ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
.push_back(getNextUserSGPR());
276 // Track the actual number of SGPRs that HW will preload to.
277 UserSGPRInfo
.allocKernargPreloadSGPRs(AllocSizeDWord
+ PaddingSGPRs
);
278 return &ArgInfo
.PreloadKernArgs
[KernArgIdx
].Regs
;
281 void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction
&MF
, Register VGPR
,
282 uint64_t Size
, Align Alignment
) {
283 // Skip if it is an entry function or the register is already added.
284 if (isEntryFunction() || WWMSpills
.count(VGPR
))
287 // Skip if this is a function with the amdgpu_cs_chain or
288 // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
289 // We never need to allocate a spill for these because we don't even need to
290 // restore the inactive lanes for them (they're scratchier than the usual
291 // scratch registers).
292 if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR
))
295 WWMSpills
.insert(std::make_pair(
296 VGPR
, MF
.getFrameInfo().CreateSpillStackObject(Size
, Alignment
)));
299 // Separate out the callee-saved and scratch registers.
300 void SIMachineFunctionInfo::splitWWMSpillRegisters(
302 SmallVectorImpl
<std::pair
<Register
, int>> &CalleeSavedRegs
,
303 SmallVectorImpl
<std::pair
<Register
, int>> &ScratchRegs
) const {
304 const MCPhysReg
*CSRegs
= MF
.getRegInfo().getCalleeSavedRegs();
305 for (auto &Reg
: WWMSpills
) {
306 if (isCalleeSavedReg(CSRegs
, Reg
.first
))
307 CalleeSavedRegs
.push_back(Reg
);
309 ScratchRegs
.push_back(Reg
);
313 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg
*CSRegs
,
314 MCPhysReg Reg
) const {
315 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
316 if (CSRegs
[I
] == Reg
)
323 void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
324 MachineFunction
&MF
) {
325 const SIRegisterInfo
*TRI
= MF
.getSubtarget
<GCNSubtarget
>().getRegisterInfo();
326 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
327 for (Register
&Reg
: SpillPhysVGPRs
) {
329 TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
330 if (!NewReg
|| NewReg
>= Reg
)
333 MRI
.replaceRegWith(Reg
, NewReg
);
335 // Update various tables with the new VGPR.
336 WWMReservedRegs
.remove(Reg
);
337 WWMReservedRegs
.insert(NewReg
);
338 WWMSpills
.insert(std::make_pair(NewReg
, WWMSpills
[Reg
]));
339 WWMSpills
.erase(Reg
);
341 for (MachineBasicBlock
&MBB
: MF
) {
342 MBB
.removeLiveIn(Reg
);
343 MBB
.sortUniqueLiveIns();
350 bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
351 MachineFunction
&MF
, int FI
, unsigned LaneIndex
) {
352 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
355 LaneVGPR
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
356 SpillVGPRs
.push_back(LaneVGPR
);
358 LaneVGPR
= SpillVGPRs
.back();
361 SGPRSpillsToVirtualVGPRLanes
[FI
].emplace_back(LaneVGPR
, LaneIndex
);
365 bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
366 MachineFunction
&MF
, int FI
, unsigned LaneIndex
, bool IsPrologEpilog
) {
367 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
368 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
369 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
372 // Find the highest available register if called before RA to ensure the
373 // lowest registers are available for allocation. The LaneVGPR, in that
374 // case, will be shifted back to the lowest range after VGPR allocation.
375 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
,
377 if (LaneVGPR
== AMDGPU::NoRegister
) {
378 // We have no VGPRs left for spilling SGPRs. Reset because we will not
379 // partially spill the SGPR to VGPRs.
380 SGPRSpillsToPhysicalVGPRLanes
.erase(FI
);
384 allocateWWMSpill(MF
, LaneVGPR
);
385 reserveWWMRegister(LaneVGPR
);
386 for (MachineBasicBlock
&MBB
: MF
) {
387 MBB
.addLiveIn(LaneVGPR
);
388 MBB
.sortUniqueLiveIns();
390 SpillPhysVGPRs
.push_back(LaneVGPR
);
392 LaneVGPR
= SpillPhysVGPRs
.back();
395 SGPRSpillsToPhysicalVGPRLanes
[FI
].emplace_back(LaneVGPR
, LaneIndex
);
399 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
400 MachineFunction
&MF
, int FI
, bool SpillToPhysVGPRLane
,
401 bool IsPrologEpilog
) {
402 std::vector
<SIRegisterInfo::SpilledReg
> &SpillLanes
=
403 SpillToPhysVGPRLane
? SGPRSpillsToPhysicalVGPRLanes
[FI
]
404 : SGPRSpillsToVirtualVGPRLanes
[FI
];
406 // This has already been allocated.
407 if (!SpillLanes
.empty())
410 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
411 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
412 unsigned WaveSize
= ST
.getWavefrontSize();
414 unsigned Size
= FrameInfo
.getObjectSize(FI
);
415 unsigned NumLanes
= Size
/ 4;
417 if (NumLanes
> WaveSize
)
420 assert(Size
>= 4 && "invalid sgpr spill size");
421 assert(ST
.getRegisterInfo()->spillSGPRToVGPR() &&
422 "not spilling SGPRs to VGPRs");
424 unsigned &NumSpillLanes
= SpillToPhysVGPRLane
? NumPhysicalVGPRSpillLanes
425 : NumVirtualVGPRSpillLanes
;
427 for (unsigned I
= 0; I
< NumLanes
; ++I
, ++NumSpillLanes
) {
428 unsigned LaneIndex
= (NumSpillLanes
% WaveSize
);
430 bool Allocated
= SpillToPhysVGPRLane
431 ? allocatePhysicalVGPRForSGPRSpills(MF
, FI
, LaneIndex
,
433 : allocateVirtualVGPRForSGPRSpills(MF
, FI
, LaneIndex
);
443 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
444 /// Either AGPR is spilled to VGPR to vice versa.
445 /// Returns true if a \p FI can be eliminated completely.
446 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction
&MF
,
449 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
450 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
451 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
453 assert(ST
.hasMAIInsts() && FrameInfo
.isSpillSlotObjectIndex(FI
));
455 auto &Spill
= VGPRToAGPRSpills
[FI
];
457 // This has already been allocated.
458 if (!Spill
.Lanes
.empty())
459 return Spill
.FullyAllocated
;
461 unsigned Size
= FrameInfo
.getObjectSize(FI
);
462 unsigned NumLanes
= Size
/ 4;
463 Spill
.Lanes
.resize(NumLanes
, AMDGPU::NoRegister
);
465 const TargetRegisterClass
&RC
=
466 isAGPRtoVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::AGPR_32RegClass
;
467 auto Regs
= RC
.getRegisters();
469 auto &SpillRegs
= isAGPRtoVGPR
? SpillAGPR
: SpillVGPR
;
470 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
471 Spill
.FullyAllocated
= true;
473 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
475 BitVector OtherUsedRegs
;
476 OtherUsedRegs
.resize(TRI
->getNumRegs());
478 const uint32_t *CSRMask
=
479 TRI
->getCallPreservedMask(MF
, MF
.getFunction().getCallingConv());
481 OtherUsedRegs
.setBitsInMask(CSRMask
);
483 // TODO: Should include register tuples, but doesn't matter with current
485 for (MCPhysReg Reg
: SpillAGPR
)
486 OtherUsedRegs
.set(Reg
);
487 for (MCPhysReg Reg
: SpillVGPR
)
488 OtherUsedRegs
.set(Reg
);
490 SmallVectorImpl
<MCPhysReg
>::const_iterator NextSpillReg
= Regs
.begin();
491 for (int I
= NumLanes
- 1; I
>= 0; --I
) {
492 NextSpillReg
= std::find_if(
493 NextSpillReg
, Regs
.end(), [&MRI
, &OtherUsedRegs
](MCPhysReg Reg
) {
494 return MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
498 if (NextSpillReg
== Regs
.end()) { // Registers exhausted
499 Spill
.FullyAllocated
= false;
503 OtherUsedRegs
.set(*NextSpillReg
);
504 SpillRegs
.push_back(*NextSpillReg
);
505 MRI
.reserveReg(*NextSpillReg
, TRI
);
506 Spill
.Lanes
[I
] = *NextSpillReg
++;
509 return Spill
.FullyAllocated
;
512 bool SIMachineFunctionInfo::removeDeadFrameIndices(
513 MachineFrameInfo
&MFI
, bool ResetSGPRSpillStackIDs
) {
514 // Remove dead frame indices from function frame, however keep FP & BP since
515 // spills for them haven't been inserted yet. And also make sure to remove the
516 // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
517 // otherwise, it could result in an unexpected side effect and bug, in case of
518 // any re-mapping of freed frame indices by later pass(es) like "stack slot
520 for (auto &R
: make_early_inc_range(SGPRSpillsToVirtualVGPRLanes
)) {
521 MFI
.RemoveStackObject(R
.first
);
522 SGPRSpillsToVirtualVGPRLanes
.erase(R
.first
);
525 // Remove the dead frame indices of CSR SGPRs which are spilled to physical
526 // VGPR lanes during SILowerSGPRSpills pass.
527 if (!ResetSGPRSpillStackIDs
) {
528 for (auto &R
: make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes
)) {
529 MFI
.RemoveStackObject(R
.first
);
530 SGPRSpillsToPhysicalVGPRLanes
.erase(R
.first
);
533 bool HaveSGPRToMemory
= false;
535 if (ResetSGPRSpillStackIDs
) {
536 // All other SGPRs must be allocated on the default stack, so reset the
538 for (int I
= MFI
.getObjectIndexBegin(), E
= MFI
.getObjectIndexEnd(); I
!= E
;
540 if (!checkIndexInPrologEpilogSGPRSpills(I
)) {
541 if (MFI
.getStackID(I
) == TargetStackID::SGPRSpill
) {
542 MFI
.setStackID(I
, TargetStackID::Default
);
543 HaveSGPRToMemory
= true;
549 for (auto &R
: VGPRToAGPRSpills
) {
551 MFI
.RemoveStackObject(R
.first
);
554 return HaveSGPRToMemory
;
557 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo
&MFI
,
558 const SIRegisterInfo
&TRI
) {
563 MFI
.CreateStackObject(TRI
.getSpillSize(AMDGPU::SGPR_32RegClass
),
564 TRI
.getSpillAlign(AMDGPU::SGPR_32RegClass
), false);
568 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
569 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
570 return AMDGPU::SGPR0
+ NumUserSGPRs
;
573 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
574 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;
577 void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg
) {
581 void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg
,
583 VRegFlags
.grow(NewReg
);
584 VRegFlags
[NewReg
] = VRegFlags
[SrcReg
];
588 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction
&MF
) const {
589 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
590 if (!ST
.isAmdPalOS())
592 Register GitPtrLo
= AMDGPU::SGPR0
; // Low GIT address passed in
593 if (ST
.hasMergedShaders()) {
594 switch (MF
.getFunction().getCallingConv()) {
595 case CallingConv::AMDGPU_HS
:
596 case CallingConv::AMDGPU_GS
:
597 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
598 // ES+GS merged shader on gfx9+.
599 GitPtrLo
= AMDGPU::SGPR8
;
608 static yaml::StringValue
regToString(Register Reg
,
609 const TargetRegisterInfo
&TRI
) {
610 yaml::StringValue Dest
;
612 raw_string_ostream
OS(Dest
.Value
);
613 OS
<< printReg(Reg
, &TRI
);
618 static std::optional
<yaml::SIArgumentInfo
>
619 convertArgumentInfo(const AMDGPUFunctionArgInfo
&ArgInfo
,
620 const TargetRegisterInfo
&TRI
) {
621 yaml::SIArgumentInfo AI
;
623 auto convertArg
= [&](std::optional
<yaml::SIArgument
> &A
,
624 const ArgDescriptor
&Arg
) {
628 // Create a register or stack argument.
629 yaml::SIArgument SA
= yaml::SIArgument::createArgument(Arg
.isRegister());
630 if (Arg
.isRegister()) {
631 raw_string_ostream
OS(SA
.RegisterName
.Value
);
632 OS
<< printReg(Arg
.getRegister(), &TRI
);
634 SA
.StackOffset
= Arg
.getStackOffset();
635 // Check and update the optional mask.
637 SA
.Mask
= Arg
.getMask();
643 // TODO: Need to serialize kernarg preloads.
645 Any
|= convertArg(AI
.PrivateSegmentBuffer
, ArgInfo
.PrivateSegmentBuffer
);
646 Any
|= convertArg(AI
.DispatchPtr
, ArgInfo
.DispatchPtr
);
647 Any
|= convertArg(AI
.QueuePtr
, ArgInfo
.QueuePtr
);
648 Any
|= convertArg(AI
.KernargSegmentPtr
, ArgInfo
.KernargSegmentPtr
);
649 Any
|= convertArg(AI
.DispatchID
, ArgInfo
.DispatchID
);
650 Any
|= convertArg(AI
.FlatScratchInit
, ArgInfo
.FlatScratchInit
);
651 Any
|= convertArg(AI
.LDSKernelId
, ArgInfo
.LDSKernelId
);
652 Any
|= convertArg(AI
.PrivateSegmentSize
, ArgInfo
.PrivateSegmentSize
);
653 Any
|= convertArg(AI
.WorkGroupIDX
, ArgInfo
.WorkGroupIDX
);
654 Any
|= convertArg(AI
.WorkGroupIDY
, ArgInfo
.WorkGroupIDY
);
655 Any
|= convertArg(AI
.WorkGroupIDZ
, ArgInfo
.WorkGroupIDZ
);
656 Any
|= convertArg(AI
.WorkGroupInfo
, ArgInfo
.WorkGroupInfo
);
657 Any
|= convertArg(AI
.PrivateSegmentWaveByteOffset
,
658 ArgInfo
.PrivateSegmentWaveByteOffset
);
659 Any
|= convertArg(AI
.ImplicitArgPtr
, ArgInfo
.ImplicitArgPtr
);
660 Any
|= convertArg(AI
.ImplicitBufferPtr
, ArgInfo
.ImplicitBufferPtr
);
661 Any
|= convertArg(AI
.WorkItemIDX
, ArgInfo
.WorkItemIDX
);
662 Any
|= convertArg(AI
.WorkItemIDY
, ArgInfo
.WorkItemIDY
);
663 Any
|= convertArg(AI
.WorkItemIDZ
, ArgInfo
.WorkItemIDZ
);
671 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
672 const llvm::SIMachineFunctionInfo
&MFI
, const TargetRegisterInfo
&TRI
,
673 const llvm::MachineFunction
&MF
)
674 : ExplicitKernArgSize(MFI
.getExplicitKernArgSize()),
675 MaxKernArgAlign(MFI
.getMaxKernArgAlign()), LDSSize(MFI
.getLDSSize()),
676 GDSSize(MFI
.getGDSSize()),
677 DynLDSAlign(MFI
.getDynLDSAlign()), IsEntryFunction(MFI
.isEntryFunction()),
678 NoSignedZerosFPMath(MFI
.hasNoSignedZerosFPMath()),
679 MemoryBound(MFI
.isMemoryBound()), WaveLimiter(MFI
.needsWaveLimiter()),
680 HasSpilledSGPRs(MFI
.hasSpilledSGPRs()),
681 HasSpilledVGPRs(MFI
.hasSpilledVGPRs()),
682 HighBitsOf32BitAddress(MFI
.get32BitAddressHighBits()),
683 Occupancy(MFI
.getOccupancy()),
684 ScratchRSrcReg(regToString(MFI
.getScratchRSrcReg(), TRI
)),
685 FrameOffsetReg(regToString(MFI
.getFrameOffsetReg(), TRI
)),
686 StackPtrOffsetReg(regToString(MFI
.getStackPtrOffsetReg(), TRI
)),
687 BytesInStackArgArea(MFI
.getBytesInStackArgArea()),
688 ReturnsVoid(MFI
.returnsVoid()),
689 ArgInfo(convertArgumentInfo(MFI
.getArgInfo(), TRI
)),
690 PSInputAddr(MFI
.getPSInputAddr()),
691 PSInputEnable(MFI
.getPSInputEnable()),
692 Mode(MFI
.getMode()) {
693 for (Register Reg
: MFI
.getWWMReservedRegs())
694 WWMReservedRegs
.push_back(regToString(Reg
, TRI
));
696 if (MFI
.getLongBranchReservedReg())
697 LongBranchReservedReg
= regToString(MFI
.getLongBranchReservedReg(), TRI
);
698 if (MFI
.getVGPRForAGPRCopy())
699 VGPRForAGPRCopy
= regToString(MFI
.getVGPRForAGPRCopy(), TRI
);
701 if (MFI
.getSGPRForEXECCopy())
702 SGPRForEXECCopy
= regToString(MFI
.getSGPRForEXECCopy(), TRI
);
704 auto SFI
= MFI
.getOptionalScavengeFI();
706 ScavengeFI
= yaml::FrameIndex(*SFI
, MF
.getFrameInfo());
709 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO
&YamlIO
) {
710 MappingTraits
<SIMachineFunctionInfo
>::mapping(YamlIO
, *this);
713 bool SIMachineFunctionInfo::initializeBaseYamlFields(
714 const yaml::SIMachineFunctionInfo
&YamlMFI
, const MachineFunction
&MF
,
715 PerFunctionMIParsingState
&PFS
, SMDiagnostic
&Error
, SMRange
&SourceRange
) {
716 ExplicitKernArgSize
= YamlMFI
.ExplicitKernArgSize
;
717 MaxKernArgAlign
= YamlMFI
.MaxKernArgAlign
;
718 LDSSize
= YamlMFI
.LDSSize
;
719 GDSSize
= YamlMFI
.GDSSize
;
720 DynLDSAlign
= YamlMFI
.DynLDSAlign
;
721 PSInputAddr
= YamlMFI
.PSInputAddr
;
722 PSInputEnable
= YamlMFI
.PSInputEnable
;
723 HighBitsOf32BitAddress
= YamlMFI
.HighBitsOf32BitAddress
;
724 Occupancy
= YamlMFI
.Occupancy
;
725 IsEntryFunction
= YamlMFI
.IsEntryFunction
;
726 NoSignedZerosFPMath
= YamlMFI
.NoSignedZerosFPMath
;
727 MemoryBound
= YamlMFI
.MemoryBound
;
728 WaveLimiter
= YamlMFI
.WaveLimiter
;
729 HasSpilledSGPRs
= YamlMFI
.HasSpilledSGPRs
;
730 HasSpilledVGPRs
= YamlMFI
.HasSpilledVGPRs
;
731 BytesInStackArgArea
= YamlMFI
.BytesInStackArgArea
;
732 ReturnsVoid
= YamlMFI
.ReturnsVoid
;
734 if (YamlMFI
.ScavengeFI
) {
735 auto FIOrErr
= YamlMFI
.ScavengeFI
->getFI(MF
.getFrameInfo());
737 // Create a diagnostic for a the frame index.
738 const MemoryBuffer
&Buffer
=
739 *PFS
.SM
->getMemoryBuffer(PFS
.SM
->getMainFileID());
741 Error
= SMDiagnostic(*PFS
.SM
, SMLoc(), Buffer
.getBufferIdentifier(), 1, 1,
742 SourceMgr::DK_Error
, toString(FIOrErr
.takeError()),
743 "", std::nullopt
, std::nullopt
);
744 SourceRange
= YamlMFI
.ScavengeFI
->SourceRange
;
747 ScavengeFI
= *FIOrErr
;
749 ScavengeFI
= std::nullopt
;
754 bool SIMachineFunctionInfo::mayUseAGPRs(const Function
&F
) const {
755 return !F
.hasFnAttribute("amdgpu-no-agpr");
758 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction
&MF
) const {
762 if (!mayNeedAGPRs()) {
767 if (!AMDGPU::isEntryFunctionCC(MF
.getFunction().getCallingConv()) ||
768 MF
.getFrameInfo().hasCalls()) {
773 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
775 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
776 const Register Reg
= Register::index2VirtReg(I
);
777 const TargetRegisterClass
*RC
= MRI
.getRegClassOrNull(Reg
);
778 if (RC
&& SIRegisterInfo::isAGPRClass(RC
)) {
782 if (!RC
&& !MRI
.use_empty(Reg
) && MRI
.getType(Reg
).isValid()) {
783 // Defer caching UsesAGPRs, function might not yet been regbank selected.
788 for (MCRegister Reg
: AMDGPU::AGPR_32RegClass
) {
789 if (MRI
.isPhysRegUsed(Reg
)) {