1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction
&MF
)
33 : AMDGPUMachineFunction(MF
),
34 PrivateSegmentBuffer(false),
37 KernargSegmentPtr(false),
39 FlatScratchInit(false),
44 PrivateSegmentWaveByteOffset(false),
48 ImplicitBufferPtr(false),
49 ImplicitArgPtr(false),
50 GITPtrHigh(0xffffffff),
51 HighBitsOf32BitAddress(0),
53 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
54 const Function
&F
= MF
.getFunction();
55 FlatWorkGroupSizes
= ST
.getFlatWorkGroupSizes(F
);
56 WavesPerEU
= ST
.getWavesPerEU(F
);
58 Occupancy
= ST
.computeOccupancy(F
, getLDSSize());
59 CallingConv::ID CC
= F
.getCallingConv();
61 // FIXME: Should have analysis or something rather than attribute to detect
63 const bool HasCalls
= F
.hasFnAttribute("amdgpu-calls");
65 const bool IsKernel
= CC
== CallingConv::AMDGPU_KERNEL
||
66 CC
== CallingConv::SPIR_KERNEL
;
69 if (!F
.arg_empty() || ST
.getImplicitArgNumBytes(F
) != 0)
70 KernargSegmentPtr
= true;
73 } else if (CC
== CallingConv::AMDGPU_PS
) {
74 PSInputAddr
= AMDGPU::getInitialPSInputAddr(F
);
77 MayNeedAGPRs
= ST
.hasMAIInsts();
79 if (!isEntryFunction()) {
80 if (CC
!= CallingConv::AMDGPU_Gfx
)
81 ArgInfo
= AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
;
83 // TODO: Pick a high register, and shift down, similar to a kernel.
84 FrameOffsetReg
= AMDGPU::SGPR33
;
85 StackPtrOffsetReg
= AMDGPU::SGPR32
;
87 if (!ST
.enableFlatScratch()) {
88 // Non-entry functions have no special inputs for now, other registers
89 // required for scratch access.
90 ScratchRSrcReg
= AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
;
92 ArgInfo
.PrivateSegmentBuffer
=
93 ArgDescriptor::createRegister(ScratchRSrcReg
);
96 if (!F
.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
97 ImplicitArgPtr
= true;
99 ImplicitArgPtr
= false;
100 MaxKernArgAlign
= std::max(ST
.getAlignmentForImplicitArgPtr(),
103 if (ST
.hasGFX90AInsts() &&
104 ST
.getMaxNumVGPRs(F
) <= AMDGPU::VGPR_32RegClass
.getNumRegs() &&
106 MayNeedAGPRs
= false; // We will select all MAI with VGPR operands.
109 bool isAmdHsaOrMesa
= ST
.isAmdHsaOrMesa(F
);
110 if (isAmdHsaOrMesa
&& !ST
.enableFlatScratch())
111 PrivateSegmentBuffer
= true;
112 else if (ST
.isMesaGfxShader(F
))
113 ImplicitBufferPtr
= true;
115 if (!AMDGPU::isGraphics(CC
)) {
116 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workgroup-id-x"))
119 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-y"))
122 if (!F
.hasFnAttribute("amdgpu-no-workgroup-id-z"))
125 if (IsKernel
|| !F
.hasFnAttribute("amdgpu-no-workitem-id-x"))
128 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-y") &&
129 ST
.getMaxWorkitemID(F
, 1) != 0)
132 if (!F
.hasFnAttribute("amdgpu-no-workitem-id-z") &&
133 ST
.getMaxWorkitemID(F
, 2) != 0)
136 if (!F
.hasFnAttribute("amdgpu-no-dispatch-ptr"))
139 if (!F
.hasFnAttribute("amdgpu-no-queue-ptr"))
142 if (!F
.hasFnAttribute("amdgpu-no-dispatch-id"))
146 // FIXME: This attribute is a hack, we just need an analysis on the function
147 // to look for allocas.
148 bool HasStackObjects
= F
.hasFnAttribute("amdgpu-stack-objects");
150 // TODO: This could be refined a lot. The attribute is a poor way of
151 // detecting calls or stack objects that may require it before argument
153 if (ST
.hasFlatAddressSpace() && isEntryFunction() &&
154 (isAmdHsaOrMesa
|| ST
.enableFlatScratch()) &&
155 (HasCalls
|| HasStackObjects
|| ST
.enableFlatScratch()) &&
156 !ST
.flatScratchIsArchitected()) {
157 FlatScratchInit
= true;
160 if (isEntryFunction()) {
161 // X, XY, and XYZ are the only supported combinations, so make sure Y is
166 if (!ST
.flatScratchIsArchitected()) {
167 PrivateSegmentWaveByteOffset
= true;
169 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
170 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX9
&&
171 (CC
== CallingConv::AMDGPU_HS
|| CC
== CallingConv::AMDGPU_GS
))
172 ArgInfo
.PrivateSegmentWaveByteOffset
=
173 ArgDescriptor::createRegister(AMDGPU::SGPR5
);
177 Attribute A
= F
.getFnAttribute("amdgpu-git-ptr-high");
178 StringRef S
= A
.getValueAsString();
180 S
.consumeInteger(0, GITPtrHigh
);
182 A
= F
.getFnAttribute("amdgpu-32bit-address-high-bits");
183 S
= A
.getValueAsString();
185 S
.consumeInteger(0, HighBitsOf32BitAddress
);
187 S
= F
.getFnAttribute("amdgpu-gds-size").getValueAsString();
189 S
.consumeInteger(0, GDSSize
);
192 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction
&MF
) {
193 limitOccupancy(getMaxWavesPerEU());
194 const GCNSubtarget
& ST
= MF
.getSubtarget
<GCNSubtarget
>();
195 limitOccupancy(ST
.getOccupancyWithLocalMemSize(getLDSSize(),
199 Register
SIMachineFunctionInfo::addPrivateSegmentBuffer(
200 const SIRegisterInfo
&TRI
) {
201 ArgInfo
.PrivateSegmentBuffer
=
202 ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
203 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SGPR_128RegClass
));
205 return ArgInfo
.PrivateSegmentBuffer
.getRegister();
208 Register
SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo
&TRI
) {
209 ArgInfo
.DispatchPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
210 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
212 return ArgInfo
.DispatchPtr
.getRegister();
215 Register
SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo
&TRI
) {
216 ArgInfo
.QueuePtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
217 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
219 return ArgInfo
.QueuePtr
.getRegister();
222 Register
SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo
&TRI
) {
223 ArgInfo
.KernargSegmentPtr
224 = ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
225 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
227 return ArgInfo
.KernargSegmentPtr
.getRegister();
230 Register
SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo
&TRI
) {
231 ArgInfo
.DispatchID
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
232 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
234 return ArgInfo
.DispatchID
.getRegister();
237 Register
SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo
&TRI
) {
238 ArgInfo
.FlatScratchInit
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
239 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
241 return ArgInfo
.FlatScratchInit
.getRegister();
244 Register
SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo
&TRI
) {
245 ArgInfo
.ImplicitBufferPtr
= ArgDescriptor::createRegister(TRI
.getMatchingSuperReg(
246 getNextUserSGPR(), AMDGPU::sub0
, &AMDGPU::SReg_64RegClass
));
248 return ArgInfo
.ImplicitBufferPtr
.getRegister();
251 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg
*CSRegs
,
253 for (unsigned I
= 0; CSRegs
[I
]; ++I
) {
254 if (CSRegs
[I
] == Reg
)
261 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
264 // FIXME: This only works after processFunctionBeforeFrameFinalized
265 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction
&MF
,
266 unsigned NumNeed
) const {
267 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
268 unsigned WaveSize
= ST
.getWavefrontSize();
269 return NumVGPRSpillLanes
+ NumNeed
<= WaveSize
* SpillVGPRs
.size();
272 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
273 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction
&MF
,
275 std::vector
<SpilledReg
> &SpillLanes
= SGPRToVGPRSpills
[FI
];
277 // This has already been allocated.
278 if (!SpillLanes
.empty())
281 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
282 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
283 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
284 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
285 unsigned WaveSize
= ST
.getWavefrontSize();
287 unsigned Size
= FrameInfo
.getObjectSize(FI
);
288 unsigned NumLanes
= Size
/ 4;
290 if (NumLanes
> WaveSize
)
293 assert(Size
>= 4 && "invalid sgpr spill size");
294 assert(TRI
->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
296 // Make sure to handle the case where a wide SGPR spill may span between two
298 for (unsigned I
= 0; I
< NumLanes
; ++I
, ++NumVGPRSpillLanes
) {
300 unsigned VGPRIndex
= (NumVGPRSpillLanes
% WaveSize
);
302 if (VGPRIndex
== 0) {
303 LaneVGPR
= TRI
->findUnusedRegister(MRI
, &AMDGPU::VGPR_32RegClass
, MF
);
304 if (LaneVGPR
== AMDGPU::NoRegister
) {
305 // We have no VGPRs left for spilling SGPRs. Reset because we will not
306 // partially spill the SGPR to VGPRs.
307 SGPRToVGPRSpills
.erase(FI
);
308 NumVGPRSpillLanes
-= I
;
310 // FIXME: We can run out of free registers with split allocation if
311 // IPRA is enabled and a called function already uses every VGPR.
313 DiagnosticInfoResourceLimit
DiagOutOfRegs(MF
.getFunction(),
314 "VGPRs for SGPR spilling",
316 MF
.getFunction().getContext().diagnose(DiagOutOfRegs
);
321 Optional
<int> SpillFI
;
322 // We need to preserve inactive lanes, so always save, even caller-save
324 if (!isEntryFunction()) {
325 SpillFI
= FrameInfo
.CreateSpillStackObject(4, Align(4));
328 SpillVGPRs
.push_back(SGPRSpillVGPR(LaneVGPR
, SpillFI
));
330 // Add this register as live-in to all blocks to avoid machine verifer
331 // complaining about use of an undefined physical register.
332 for (MachineBasicBlock
&BB
: MF
)
333 BB
.addLiveIn(LaneVGPR
);
335 LaneVGPR
= SpillVGPRs
.back().VGPR
;
338 SpillLanes
.push_back(SpilledReg(LaneVGPR
, VGPRIndex
));
344 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
345 /// Either AGPR is spilled to VGPR to vice versa.
346 /// Returns true if a \p FI can be eliminated completely.
347 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction
&MF
,
350 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
351 MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
352 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
354 assert(ST
.hasMAIInsts() && FrameInfo
.isSpillSlotObjectIndex(FI
));
356 auto &Spill
= VGPRToAGPRSpills
[FI
];
358 // This has already been allocated.
359 if (!Spill
.Lanes
.empty())
360 return Spill
.FullyAllocated
;
362 unsigned Size
= FrameInfo
.getObjectSize(FI
);
363 unsigned NumLanes
= Size
/ 4;
364 Spill
.Lanes
.resize(NumLanes
, AMDGPU::NoRegister
);
366 const TargetRegisterClass
&RC
=
367 isAGPRtoVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::AGPR_32RegClass
;
368 auto Regs
= RC
.getRegisters();
370 auto &SpillRegs
= isAGPRtoVGPR
? SpillAGPR
: SpillVGPR
;
371 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
372 Spill
.FullyAllocated
= true;
374 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
376 BitVector OtherUsedRegs
;
377 OtherUsedRegs
.resize(TRI
->getNumRegs());
379 const uint32_t *CSRMask
=
380 TRI
->getCallPreservedMask(MF
, MF
.getFunction().getCallingConv());
382 OtherUsedRegs
.setBitsInMask(CSRMask
);
384 // TODO: Should include register tuples, but doesn't matter with current
386 for (MCPhysReg Reg
: SpillAGPR
)
387 OtherUsedRegs
.set(Reg
);
388 for (MCPhysReg Reg
: SpillVGPR
)
389 OtherUsedRegs
.set(Reg
);
391 SmallVectorImpl
<MCPhysReg
>::const_iterator NextSpillReg
= Regs
.begin();
392 for (int I
= NumLanes
- 1; I
>= 0; --I
) {
393 NextSpillReg
= std::find_if(
394 NextSpillReg
, Regs
.end(), [&MRI
, &OtherUsedRegs
](MCPhysReg Reg
) {
395 return MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
399 if (NextSpillReg
== Regs
.end()) { // Registers exhausted
400 Spill
.FullyAllocated
= false;
404 OtherUsedRegs
.set(*NextSpillReg
);
405 SpillRegs
.push_back(*NextSpillReg
);
406 Spill
.Lanes
[I
] = *NextSpillReg
++;
409 return Spill
.FullyAllocated
;
412 bool SIMachineFunctionInfo::removeDeadFrameIndices(
413 MachineFrameInfo
&MFI
, bool ResetSGPRSpillStackIDs
) {
414 // Remove dead frame indices from function frame, however keep FP & BP since
415 // spills for them haven't been inserted yet. And also make sure to remove the
416 // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
417 // result in an unexpected side effect and bug, in case of any re-mapping of
418 // freed frame indices by later pass(es) like "stack slot coloring".
419 for (auto &R
: make_early_inc_range(SGPRToVGPRSpills
)) {
420 if (R
.first
!= FramePointerSaveIndex
&& R
.first
!= BasePointerSaveIndex
) {
421 MFI
.RemoveStackObject(R
.first
);
422 SGPRToVGPRSpills
.erase(R
.first
);
426 bool HaveSGPRToMemory
= false;
428 if (ResetSGPRSpillStackIDs
) {
429 // All other SPGRs must be allocated on the default stack, so reset the
431 for (int i
= MFI
.getObjectIndexBegin(), e
= MFI
.getObjectIndexEnd(); i
!= e
;
433 if (i
!= FramePointerSaveIndex
&& i
!= BasePointerSaveIndex
) {
434 if (MFI
.getStackID(i
) == TargetStackID::SGPRSpill
) {
435 MFI
.setStackID(i
, TargetStackID::Default
);
436 HaveSGPRToMemory
= true;
442 for (auto &R
: VGPRToAGPRSpills
) {
444 MFI
.RemoveStackObject(R
.first
);
447 return HaveSGPRToMemory
;
450 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo
&MFI
,
451 const SIRegisterInfo
&TRI
) {
454 if (isEntryFunction()) {
455 ScavengeFI
= MFI
.CreateFixedObject(
456 TRI
.getSpillSize(AMDGPU::SGPR_32RegClass
), 0, false);
458 ScavengeFI
= MFI
.CreateStackObject(
459 TRI
.getSpillSize(AMDGPU::SGPR_32RegClass
),
460 TRI
.getSpillAlign(AMDGPU::SGPR_32RegClass
), false);
465 MCPhysReg
SIMachineFunctionInfo::getNextUserSGPR() const {
466 assert(NumSystemSGPRs
== 0 && "System SGPRs must be added after user SGPRs");
467 return AMDGPU::SGPR0
+ NumUserSGPRs
;
470 MCPhysReg
SIMachineFunctionInfo::getNextSystemSGPR() const {
471 return AMDGPU::SGPR0
+ NumUserSGPRs
+ NumSystemSGPRs
;
475 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction
&MF
) const {
476 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
477 if (!ST
.isAmdPalOS())
479 Register GitPtrLo
= AMDGPU::SGPR0
; // Low GIT address passed in
480 if (ST
.hasMergedShaders()) {
481 switch (MF
.getFunction().getCallingConv()) {
482 case CallingConv::AMDGPU_HS
:
483 case CallingConv::AMDGPU_GS
:
484 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
485 // ES+GS merged shader on gfx9+.
486 GitPtrLo
= AMDGPU::SGPR8
;
495 static yaml::StringValue
regToString(Register Reg
,
496 const TargetRegisterInfo
&TRI
) {
497 yaml::StringValue Dest
;
499 raw_string_ostream
OS(Dest
.Value
);
500 OS
<< printReg(Reg
, &TRI
);
505 static Optional
<yaml::SIArgumentInfo
>
506 convertArgumentInfo(const AMDGPUFunctionArgInfo
&ArgInfo
,
507 const TargetRegisterInfo
&TRI
) {
508 yaml::SIArgumentInfo AI
;
510 auto convertArg
= [&](Optional
<yaml::SIArgument
> &A
,
511 const ArgDescriptor
&Arg
) {
515 // Create a register or stack argument.
516 yaml::SIArgument SA
= yaml::SIArgument::createArgument(Arg
.isRegister());
517 if (Arg
.isRegister()) {
518 raw_string_ostream
OS(SA
.RegisterName
.Value
);
519 OS
<< printReg(Arg
.getRegister(), &TRI
);
521 SA
.StackOffset
= Arg
.getStackOffset();
522 // Check and update the optional mask.
524 SA
.Mask
= Arg
.getMask();
531 Any
|= convertArg(AI
.PrivateSegmentBuffer
, ArgInfo
.PrivateSegmentBuffer
);
532 Any
|= convertArg(AI
.DispatchPtr
, ArgInfo
.DispatchPtr
);
533 Any
|= convertArg(AI
.QueuePtr
, ArgInfo
.QueuePtr
);
534 Any
|= convertArg(AI
.KernargSegmentPtr
, ArgInfo
.KernargSegmentPtr
);
535 Any
|= convertArg(AI
.DispatchID
, ArgInfo
.DispatchID
);
536 Any
|= convertArg(AI
.FlatScratchInit
, ArgInfo
.FlatScratchInit
);
537 Any
|= convertArg(AI
.PrivateSegmentSize
, ArgInfo
.PrivateSegmentSize
);
538 Any
|= convertArg(AI
.WorkGroupIDX
, ArgInfo
.WorkGroupIDX
);
539 Any
|= convertArg(AI
.WorkGroupIDY
, ArgInfo
.WorkGroupIDY
);
540 Any
|= convertArg(AI
.WorkGroupIDZ
, ArgInfo
.WorkGroupIDZ
);
541 Any
|= convertArg(AI
.WorkGroupInfo
, ArgInfo
.WorkGroupInfo
);
542 Any
|= convertArg(AI
.PrivateSegmentWaveByteOffset
,
543 ArgInfo
.PrivateSegmentWaveByteOffset
);
544 Any
|= convertArg(AI
.ImplicitArgPtr
, ArgInfo
.ImplicitArgPtr
);
545 Any
|= convertArg(AI
.ImplicitBufferPtr
, ArgInfo
.ImplicitBufferPtr
);
546 Any
|= convertArg(AI
.WorkItemIDX
, ArgInfo
.WorkItemIDX
);
547 Any
|= convertArg(AI
.WorkItemIDY
, ArgInfo
.WorkItemIDY
);
548 Any
|= convertArg(AI
.WorkItemIDZ
, ArgInfo
.WorkItemIDZ
);
556 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
557 const llvm::SIMachineFunctionInfo
&MFI
, const TargetRegisterInfo
&TRI
,
558 const llvm::MachineFunction
&MF
)
559 : ExplicitKernArgSize(MFI
.getExplicitKernArgSize()),
560 MaxKernArgAlign(MFI
.getMaxKernArgAlign()), LDSSize(MFI
.getLDSSize()),
561 DynLDSAlign(MFI
.getDynLDSAlign()), IsEntryFunction(MFI
.isEntryFunction()),
562 NoSignedZerosFPMath(MFI
.hasNoSignedZerosFPMath()),
563 MemoryBound(MFI
.isMemoryBound()), WaveLimiter(MFI
.needsWaveLimiter()),
564 HasSpilledSGPRs(MFI
.hasSpilledSGPRs()),
565 HasSpilledVGPRs(MFI
.hasSpilledVGPRs()),
566 HighBitsOf32BitAddress(MFI
.get32BitAddressHighBits()),
567 Occupancy(MFI
.getOccupancy()),
568 ScratchRSrcReg(regToString(MFI
.getScratchRSrcReg(), TRI
)),
569 FrameOffsetReg(regToString(MFI
.getFrameOffsetReg(), TRI
)),
570 StackPtrOffsetReg(regToString(MFI
.getStackPtrOffsetReg(), TRI
)),
571 ArgInfo(convertArgumentInfo(MFI
.getArgInfo(), TRI
)), Mode(MFI
.getMode()) {
572 auto SFI
= MFI
.getOptionalScavengeFI();
574 ScavengeFI
= yaml::FrameIndex(*SFI
, MF
.getFrameInfo());
577 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO
&YamlIO
) {
578 MappingTraits
<SIMachineFunctionInfo
>::mapping(YamlIO
, *this);
581 bool SIMachineFunctionInfo::initializeBaseYamlFields(
582 const yaml::SIMachineFunctionInfo
&YamlMFI
, const MachineFunction
&MF
,
583 PerFunctionMIParsingState
&PFS
, SMDiagnostic
&Error
, SMRange
&SourceRange
) {
584 ExplicitKernArgSize
= YamlMFI
.ExplicitKernArgSize
;
585 MaxKernArgAlign
= assumeAligned(YamlMFI
.MaxKernArgAlign
);
586 LDSSize
= YamlMFI
.LDSSize
;
587 DynLDSAlign
= YamlMFI
.DynLDSAlign
;
588 HighBitsOf32BitAddress
= YamlMFI
.HighBitsOf32BitAddress
;
589 Occupancy
= YamlMFI
.Occupancy
;
590 IsEntryFunction
= YamlMFI
.IsEntryFunction
;
591 NoSignedZerosFPMath
= YamlMFI
.NoSignedZerosFPMath
;
592 MemoryBound
= YamlMFI
.MemoryBound
;
593 WaveLimiter
= YamlMFI
.WaveLimiter
;
594 HasSpilledSGPRs
= YamlMFI
.HasSpilledSGPRs
;
595 HasSpilledVGPRs
= YamlMFI
.HasSpilledVGPRs
;
597 if (YamlMFI
.ScavengeFI
) {
598 auto FIOrErr
= YamlMFI
.ScavengeFI
->getFI(MF
.getFrameInfo());
600 // Create a diagnostic for a the frame index.
601 const MemoryBuffer
&Buffer
=
602 *PFS
.SM
->getMemoryBuffer(PFS
.SM
->getMainFileID());
604 Error
= SMDiagnostic(*PFS
.SM
, SMLoc(), Buffer
.getBufferIdentifier(), 1, 1,
605 SourceMgr::DK_Error
, toString(FIOrErr
.takeError()),
607 SourceRange
= YamlMFI
.ScavengeFI
->SourceRange
;
610 ScavengeFI
= *FIOrErr
;
617 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction
&MF
) const {
618 for (const BasicBlock
&BB
: MF
.getFunction()) {
619 for (const Instruction
&I
: BB
) {
620 const auto *CB
= dyn_cast
<CallBase
>(&I
);
624 if (CB
->isInlineAsm()) {
625 const InlineAsm
*IA
= dyn_cast
<InlineAsm
>(CB
->getCalledOperand());
626 for (const auto &CI
: IA
->ParseConstraints()) {
627 for (StringRef Code
: CI
.Codes
) {
628 Code
.consume_front("{");
629 if (Code
.startswith("a"))
636 const Function
*Callee
=
637 dyn_cast
<Function
>(CB
->getCalledOperand()->stripPointerCasts());
641 if (Callee
->getIntrinsicID() == Intrinsic::not_intrinsic
)
649 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction
&MF
) const {
653 if (!mayNeedAGPRs()) {
658 if (!AMDGPU::isEntryFunctionCC(MF
.getFunction().getCallingConv()) ||
659 MF
.getFrameInfo().hasCalls()) {
664 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
666 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
667 const Register Reg
= Register::index2VirtReg(I
);
668 const TargetRegisterClass
*RC
= MRI
.getRegClassOrNull(Reg
);
669 if (RC
&& SIRegisterInfo::isAGPRClass(RC
)) {
672 } else if (!RC
&& !MRI
.use_empty(Reg
) && MRI
.getType(Reg
).isValid()) {
673 // Defer caching UsesAGPRs, function might not yet been regbank selected.
678 for (MCRegister Reg
: AMDGPU::AGPR_32RegClass
) {
679 if (MRI
.isPhysRegUsed(Reg
)) {