[AMDGPU] Select VGPR versions of MFMA if possible
[llvm-project.git] / llvm / lib / Target / AMDGPU / SIMachineFunctionInfo.cpp
blob0d89ba1ac168493a2c191ace0208316af1f083dc
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
28 #define MAX_LANES 64
30 using namespace llvm;
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33 : AMDGPUMachineFunction(MF),
34 PrivateSegmentBuffer(false),
35 DispatchPtr(false),
36 QueuePtr(false),
37 KernargSegmentPtr(false),
38 DispatchID(false),
39 FlatScratchInit(false),
40 WorkGroupIDX(false),
41 WorkGroupIDY(false),
42 WorkGroupIDZ(false),
43 WorkGroupInfo(false),
44 PrivateSegmentWaveByteOffset(false),
45 WorkItemIDX(false),
46 WorkItemIDY(false),
47 WorkItemIDZ(false),
48 ImplicitBufferPtr(false),
49 ImplicitArgPtr(false),
50 GITPtrHigh(0xffffffff),
51 HighBitsOf32BitAddress(0),
52 GDSSize(0) {
53 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
54 const Function &F = MF.getFunction();
55 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
56 WavesPerEU = ST.getWavesPerEU(F);
58 Occupancy = ST.computeOccupancy(F, getLDSSize());
59 CallingConv::ID CC = F.getCallingConv();
61 // FIXME: Should have analysis or something rather than attribute to detect
62 // calls.
63 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
65 const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
66 CC == CallingConv::SPIR_KERNEL;
68 if (IsKernel) {
69 if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
70 KernargSegmentPtr = true;
71 WorkGroupIDX = true;
72 WorkItemIDX = true;
73 } else if (CC == CallingConv::AMDGPU_PS) {
74 PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
77 MayNeedAGPRs = ST.hasMAIInsts();
79 if (!isEntryFunction()) {
80 if (CC != CallingConv::AMDGPU_Gfx)
81 ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
83 // TODO: Pick a high register, and shift down, similar to a kernel.
84 FrameOffsetReg = AMDGPU::SGPR33;
85 StackPtrOffsetReg = AMDGPU::SGPR32;
87 if (!ST.enableFlatScratch()) {
88 // Non-entry functions have no special inputs for now, other registers
89 // required for scratch access.
90 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
92 ArgInfo.PrivateSegmentBuffer =
93 ArgDescriptor::createRegister(ScratchRSrcReg);
96 if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
97 ImplicitArgPtr = true;
98 } else {
99 ImplicitArgPtr = false;
100 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
101 MaxKernArgAlign);
103 if (ST.hasGFX90AInsts() &&
104 ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
105 !mayUseAGPRs(MF))
106 MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
109 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
110 if (isAmdHsaOrMesa && !ST.enableFlatScratch())
111 PrivateSegmentBuffer = true;
112 else if (ST.isMesaGfxShader(F))
113 ImplicitBufferPtr = true;
115 if (!AMDGPU::isGraphics(CC)) {
116 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
117 WorkGroupIDX = true;
119 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
120 WorkGroupIDY = true;
122 if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
123 WorkGroupIDZ = true;
125 if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
126 WorkItemIDX = true;
128 if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
129 ST.getMaxWorkitemID(F, 1) != 0)
130 WorkItemIDY = true;
132 if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
133 ST.getMaxWorkitemID(F, 2) != 0)
134 WorkItemIDZ = true;
136 if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
137 DispatchPtr = true;
139 if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
140 QueuePtr = true;
142 if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
143 DispatchID = true;
146 // FIXME: This attribute is a hack, we just need an analysis on the function
147 // to look for allocas.
148 bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
150 // TODO: This could be refined a lot. The attribute is a poor way of
151 // detecting calls or stack objects that may require it before argument
152 // lowering.
153 if (ST.hasFlatAddressSpace() && isEntryFunction() &&
154 (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
155 (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
156 !ST.flatScratchIsArchitected()) {
157 FlatScratchInit = true;
160 if (isEntryFunction()) {
161 // X, XY, and XYZ are the only supported combinations, so make sure Y is
162 // enabled if Z is.
163 if (WorkItemIDZ)
164 WorkItemIDY = true;
166 if (!ST.flatScratchIsArchitected()) {
167 PrivateSegmentWaveByteOffset = true;
169 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
170 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
171 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
172 ArgInfo.PrivateSegmentWaveByteOffset =
173 ArgDescriptor::createRegister(AMDGPU::SGPR5);
177 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
178 StringRef S = A.getValueAsString();
179 if (!S.empty())
180 S.consumeInteger(0, GITPtrHigh);
182 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
183 S = A.getValueAsString();
184 if (!S.empty())
185 S.consumeInteger(0, HighBitsOf32BitAddress);
187 S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
188 if (!S.empty())
189 S.consumeInteger(0, GDSSize);
192 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
193 limitOccupancy(getMaxWavesPerEU());
194 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
195 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
196 MF.getFunction()));
199 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
200 const SIRegisterInfo &TRI) {
201 ArgInfo.PrivateSegmentBuffer =
202 ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
203 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
204 NumUserSGPRs += 4;
205 return ArgInfo.PrivateSegmentBuffer.getRegister();
208 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
209 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
210 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
211 NumUserSGPRs += 2;
212 return ArgInfo.DispatchPtr.getRegister();
215 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
216 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
217 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
218 NumUserSGPRs += 2;
219 return ArgInfo.QueuePtr.getRegister();
222 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
223 ArgInfo.KernargSegmentPtr
224 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
225 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
226 NumUserSGPRs += 2;
227 return ArgInfo.KernargSegmentPtr.getRegister();
230 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
231 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
232 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
233 NumUserSGPRs += 2;
234 return ArgInfo.DispatchID.getRegister();
237 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
238 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
239 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
240 NumUserSGPRs += 2;
241 return ArgInfo.FlatScratchInit.getRegister();
244 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
245 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
246 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
247 NumUserSGPRs += 2;
248 return ArgInfo.ImplicitBufferPtr.getRegister();
251 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
252 MCPhysReg Reg) {
253 for (unsigned I = 0; CSRegs[I]; ++I) {
254 if (CSRegs[I] == Reg)
255 return true;
258 return false;
261 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
262 /// SGPR spilling.
264 // FIXME: This only works after processFunctionBeforeFrameFinalized
265 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
266 unsigned NumNeed) const {
267 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
268 unsigned WaveSize = ST.getWavefrontSize();
269 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
272 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
273 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
274 int FI) {
275 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
277 // This has already been allocated.
278 if (!SpillLanes.empty())
279 return true;
281 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
282 const SIRegisterInfo *TRI = ST.getRegisterInfo();
283 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
284 MachineRegisterInfo &MRI = MF.getRegInfo();
285 unsigned WaveSize = ST.getWavefrontSize();
287 unsigned Size = FrameInfo.getObjectSize(FI);
288 unsigned NumLanes = Size / 4;
290 if (NumLanes > WaveSize)
291 return false;
293 assert(Size >= 4 && "invalid sgpr spill size");
294 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
296 // Make sure to handle the case where a wide SGPR spill may span between two
297 // VGPRs.
298 for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
299 Register LaneVGPR;
300 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
302 if (VGPRIndex == 0) {
303 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
304 if (LaneVGPR == AMDGPU::NoRegister) {
305 // We have no VGPRs left for spilling SGPRs. Reset because we will not
306 // partially spill the SGPR to VGPRs.
307 SGPRToVGPRSpills.erase(FI);
308 NumVGPRSpillLanes -= I;
310 // FIXME: We can run out of free registers with split allocation if
311 // IPRA is enabled and a called function already uses every VGPR.
312 #if 0
313 DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
314 "VGPRs for SGPR spilling",
315 0, DS_Error);
316 MF.getFunction().getContext().diagnose(DiagOutOfRegs);
317 #endif
318 return false;
321 Optional<int> SpillFI;
322 // We need to preserve inactive lanes, so always save, even caller-save
323 // registers.
324 if (!isEntryFunction()) {
325 SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
328 SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
330 // Add this register as live-in to all blocks to avoid machine verifer
331 // complaining about use of an undefined physical register.
332 for (MachineBasicBlock &BB : MF)
333 BB.addLiveIn(LaneVGPR);
334 } else {
335 LaneVGPR = SpillVGPRs.back().VGPR;
338 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
341 return true;
344 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
345 /// Either AGPR is spilled to VGPR to vice versa.
346 /// Returns true if a \p FI can be eliminated completely.
347 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
348 int FI,
349 bool isAGPRtoVGPR) {
350 MachineRegisterInfo &MRI = MF.getRegInfo();
351 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
352 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
354 assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
356 auto &Spill = VGPRToAGPRSpills[FI];
358 // This has already been allocated.
359 if (!Spill.Lanes.empty())
360 return Spill.FullyAllocated;
362 unsigned Size = FrameInfo.getObjectSize(FI);
363 unsigned NumLanes = Size / 4;
364 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
366 const TargetRegisterClass &RC =
367 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
368 auto Regs = RC.getRegisters();
370 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
371 const SIRegisterInfo *TRI = ST.getRegisterInfo();
372 Spill.FullyAllocated = true;
374 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
375 // once.
376 BitVector OtherUsedRegs;
377 OtherUsedRegs.resize(TRI->getNumRegs());
379 const uint32_t *CSRMask =
380 TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
381 if (CSRMask)
382 OtherUsedRegs.setBitsInMask(CSRMask);
384 // TODO: Should include register tuples, but doesn't matter with current
385 // usage.
386 for (MCPhysReg Reg : SpillAGPR)
387 OtherUsedRegs.set(Reg);
388 for (MCPhysReg Reg : SpillVGPR)
389 OtherUsedRegs.set(Reg);
391 SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
392 for (int I = NumLanes - 1; I >= 0; --I) {
393 NextSpillReg = std::find_if(
394 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
395 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
396 !OtherUsedRegs[Reg];
399 if (NextSpillReg == Regs.end()) { // Registers exhausted
400 Spill.FullyAllocated = false;
401 break;
404 OtherUsedRegs.set(*NextSpillReg);
405 SpillRegs.push_back(*NextSpillReg);
406 Spill.Lanes[I] = *NextSpillReg++;
409 return Spill.FullyAllocated;
412 bool SIMachineFunctionInfo::removeDeadFrameIndices(
413 MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
414 // Remove dead frame indices from function frame, however keep FP & BP since
415 // spills for them haven't been inserted yet. And also make sure to remove the
416 // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
417 // result in an unexpected side effect and bug, in case of any re-mapping of
418 // freed frame indices by later pass(es) like "stack slot coloring".
419 for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
420 if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
421 MFI.RemoveStackObject(R.first);
422 SGPRToVGPRSpills.erase(R.first);
426 bool HaveSGPRToMemory = false;
428 if (ResetSGPRSpillStackIDs) {
429 // All other SPGRs must be allocated on the default stack, so reset the
430 // stack ID.
431 for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
432 ++i) {
433 if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
434 if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
435 MFI.setStackID(i, TargetStackID::Default);
436 HaveSGPRToMemory = true;
442 for (auto &R : VGPRToAGPRSpills) {
443 if (R.second.IsDead)
444 MFI.RemoveStackObject(R.first);
447 return HaveSGPRToMemory;
450 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
451 const SIRegisterInfo &TRI) {
452 if (ScavengeFI)
453 return *ScavengeFI;
454 if (isEntryFunction()) {
455 ScavengeFI = MFI.CreateFixedObject(
456 TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
457 } else {
458 ScavengeFI = MFI.CreateStackObject(
459 TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
460 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
462 return *ScavengeFI;
465 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
466 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
467 return AMDGPU::SGPR0 + NumUserSGPRs;
470 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
471 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
474 Register
475 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
476 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
477 if (!ST.isAmdPalOS())
478 return Register();
479 Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
480 if (ST.hasMergedShaders()) {
481 switch (MF.getFunction().getCallingConv()) {
482 case CallingConv::AMDGPU_HS:
483 case CallingConv::AMDGPU_GS:
484 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
485 // ES+GS merged shader on gfx9+.
486 GitPtrLo = AMDGPU::SGPR8;
487 return GitPtrLo;
488 default:
489 return GitPtrLo;
492 return GitPtrLo;
495 static yaml::StringValue regToString(Register Reg,
496 const TargetRegisterInfo &TRI) {
497 yaml::StringValue Dest;
499 raw_string_ostream OS(Dest.Value);
500 OS << printReg(Reg, &TRI);
502 return Dest;
505 static Optional<yaml::SIArgumentInfo>
506 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
507 const TargetRegisterInfo &TRI) {
508 yaml::SIArgumentInfo AI;
510 auto convertArg = [&](Optional<yaml::SIArgument> &A,
511 const ArgDescriptor &Arg) {
512 if (!Arg)
513 return false;
515 // Create a register or stack argument.
516 yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
517 if (Arg.isRegister()) {
518 raw_string_ostream OS(SA.RegisterName.Value);
519 OS << printReg(Arg.getRegister(), &TRI);
520 } else
521 SA.StackOffset = Arg.getStackOffset();
522 // Check and update the optional mask.
523 if (Arg.isMasked())
524 SA.Mask = Arg.getMask();
526 A = SA;
527 return true;
530 bool Any = false;
531 Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
532 Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
533 Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
534 Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
535 Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
536 Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
537 Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
538 Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
539 Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
540 Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
541 Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
542 Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
543 ArgInfo.PrivateSegmentWaveByteOffset);
544 Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
545 Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
546 Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
547 Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
548 Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
550 if (Any)
551 return AI;
553 return None;
556 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
557 const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
558 const llvm::MachineFunction &MF)
559 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
560 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
561 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
562 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
563 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
564 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
565 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
566 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
567 Occupancy(MFI.getOccupancy()),
568 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
569 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
570 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
571 ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
572 auto SFI = MFI.getOptionalScavengeFI();
573 if (SFI)
574 ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
577 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
578 MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
581 bool SIMachineFunctionInfo::initializeBaseYamlFields(
582 const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
583 PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
584 ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
585 MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
586 LDSSize = YamlMFI.LDSSize;
587 DynLDSAlign = YamlMFI.DynLDSAlign;
588 HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
589 Occupancy = YamlMFI.Occupancy;
590 IsEntryFunction = YamlMFI.IsEntryFunction;
591 NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
592 MemoryBound = YamlMFI.MemoryBound;
593 WaveLimiter = YamlMFI.WaveLimiter;
594 HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
595 HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
597 if (YamlMFI.ScavengeFI) {
598 auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
599 if (!FIOrErr) {
600 // Create a diagnostic for a the frame index.
601 const MemoryBuffer &Buffer =
602 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
604 Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
605 SourceMgr::DK_Error, toString(FIOrErr.takeError()),
606 "", None, None);
607 SourceRange = YamlMFI.ScavengeFI->SourceRange;
608 return true;
610 ScavengeFI = *FIOrErr;
611 } else {
612 ScavengeFI = None;
614 return false;
617 bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
618 for (const BasicBlock &BB : MF.getFunction()) {
619 for (const Instruction &I : BB) {
620 const auto *CB = dyn_cast<CallBase>(&I);
621 if (!CB)
622 continue;
624 if (CB->isInlineAsm()) {
625 const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
626 for (const auto &CI : IA->ParseConstraints()) {
627 for (StringRef Code : CI.Codes) {
628 Code.consume_front("{");
629 if (Code.startswith("a"))
630 return true;
633 continue;
636 const Function *Callee =
637 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
638 if (!Callee)
639 return true;
641 if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
642 return true;
646 return false;
649 bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
650 if (UsesAGPRs)
651 return *UsesAGPRs;
653 if (!mayNeedAGPRs()) {
654 UsesAGPRs = false;
655 return false;
658 if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
659 MF.getFrameInfo().hasCalls()) {
660 UsesAGPRs = true;
661 return true;
664 const MachineRegisterInfo &MRI = MF.getRegInfo();
666 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
667 const Register Reg = Register::index2VirtReg(I);
668 const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
669 if (RC && SIRegisterInfo::isAGPRClass(RC)) {
670 UsesAGPRs = true;
671 return true;
672 } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
673 // Defer caching UsesAGPRs, function might not yet been regbank selected.
674 return true;
678 for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
679 if (MRI.isPhysRegUsed(Reg)) {
680 UsesAGPRs = true;
681 return true;
685 UsesAGPRs = false;
686 return false;