1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUMachineFunction.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIInstrInfo.h"
22 #include "SIModeRegisterDefaults.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/CodeGen/MIRYamlMapping.h"
26 #include "llvm/CodeGen/PseudoSourceValue.h"
27 #include "llvm/Support/raw_ostream.h"
32 class MachineFrameInfo
;
33 class MachineFunction
;
34 class SIMachineFunctionInfo
;
36 class TargetRegisterClass
;
38 class AMDGPUPseudoSourceValue
: public PseudoSourceValue
{
40 enum AMDGPUPSVKind
: unsigned {
41 PSVImage
= PseudoSourceValue::TargetCustom
,
46 AMDGPUPseudoSourceValue(unsigned Kind
, const AMDGPUTargetMachine
&TM
)
47 : PseudoSourceValue(Kind
, TM
) {}
50 bool isConstant(const MachineFrameInfo
*) const override
{
51 // This should probably be true for most images, but we will start by being
56 bool isAliased(const MachineFrameInfo
*) const override
{
60 bool mayAlias(const MachineFrameInfo
*) const override
{
65 class AMDGPUGWSResourcePseudoSourceValue final
: public AMDGPUPseudoSourceValue
{
67 explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine
&TM
)
68 : AMDGPUPseudoSourceValue(GWSResource
, TM
) {}
70 static bool classof(const PseudoSourceValue
*V
) {
71 return V
->kind() == GWSResource
;
74 // These are inaccessible memory from IR.
75 bool isAliased(const MachineFrameInfo
*) const override
{
79 // These are inaccessible memory from IR.
80 bool mayAlias(const MachineFrameInfo
*) const override
{
84 void printCustom(raw_ostream
&OS
) const override
{
94 StringValue RegisterName
;
97 std::optional
<unsigned> Mask
;
99 // Default constructor, which creates a stack argument.
100 SIArgument() : IsRegister(false), StackOffset(0) {}
101 SIArgument(const SIArgument
&Other
) {
102 IsRegister
= Other
.IsRegister
;
104 new (&RegisterName
) StringValue(Other
.RegisterName
);
106 StackOffset
= Other
.StackOffset
;
109 SIArgument
&operator=(const SIArgument
&Other
) {
110 // Default-construct or destruct the old RegisterName in case of switching
112 if (IsRegister
!= Other
.IsRegister
) {
113 if (Other
.IsRegister
)
114 new (&RegisterName
) StringValue();
116 RegisterName
.~StringValue();
118 IsRegister
= Other
.IsRegister
;
120 RegisterName
= Other
.RegisterName
;
122 StackOffset
= Other
.StackOffset
;
128 RegisterName
.~StringValue();
131 // Helper to create a register or stack argument.
132 static inline SIArgument
createArgument(bool IsReg
) {
134 return SIArgument(IsReg
);
139 // Construct a register argument.
140 SIArgument(bool) : IsRegister(true), RegisterName() {}
143 template <> struct MappingTraits
<SIArgument
> {
144 static void mapping(IO
&YamlIO
, SIArgument
&A
) {
145 if (YamlIO
.outputting()) {
147 YamlIO
.mapRequired("reg", A
.RegisterName
);
149 YamlIO
.mapRequired("offset", A
.StackOffset
);
151 auto Keys
= YamlIO
.keys();
152 if (is_contained(Keys
, "reg")) {
153 A
= SIArgument::createArgument(true);
154 YamlIO
.mapRequired("reg", A
.RegisterName
);
155 } else if (is_contained(Keys
, "offset"))
156 YamlIO
.mapRequired("offset", A
.StackOffset
);
158 YamlIO
.setError("missing required key 'reg' or 'offset'");
160 YamlIO
.mapOptional("mask", A
.Mask
);
162 static const bool flow
= true;
165 struct SIArgumentInfo
{
166 std::optional
<SIArgument
> PrivateSegmentBuffer
;
167 std::optional
<SIArgument
> DispatchPtr
;
168 std::optional
<SIArgument
> QueuePtr
;
169 std::optional
<SIArgument
> KernargSegmentPtr
;
170 std::optional
<SIArgument
> DispatchID
;
171 std::optional
<SIArgument
> FlatScratchInit
;
172 std::optional
<SIArgument
> PrivateSegmentSize
;
174 std::optional
<SIArgument
> WorkGroupIDX
;
175 std::optional
<SIArgument
> WorkGroupIDY
;
176 std::optional
<SIArgument
> WorkGroupIDZ
;
177 std::optional
<SIArgument
> WorkGroupInfo
;
178 std::optional
<SIArgument
> LDSKernelId
;
179 std::optional
<SIArgument
> PrivateSegmentWaveByteOffset
;
181 std::optional
<SIArgument
> ImplicitArgPtr
;
182 std::optional
<SIArgument
> ImplicitBufferPtr
;
184 std::optional
<SIArgument
> WorkItemIDX
;
185 std::optional
<SIArgument
> WorkItemIDY
;
186 std::optional
<SIArgument
> WorkItemIDZ
;
189 template <> struct MappingTraits
<SIArgumentInfo
> {
190 static void mapping(IO
&YamlIO
, SIArgumentInfo
&AI
) {
191 YamlIO
.mapOptional("privateSegmentBuffer", AI
.PrivateSegmentBuffer
);
192 YamlIO
.mapOptional("dispatchPtr", AI
.DispatchPtr
);
193 YamlIO
.mapOptional("queuePtr", AI
.QueuePtr
);
194 YamlIO
.mapOptional("kernargSegmentPtr", AI
.KernargSegmentPtr
);
195 YamlIO
.mapOptional("dispatchID", AI
.DispatchID
);
196 YamlIO
.mapOptional("flatScratchInit", AI
.FlatScratchInit
);
197 YamlIO
.mapOptional("privateSegmentSize", AI
.PrivateSegmentSize
);
199 YamlIO
.mapOptional("workGroupIDX", AI
.WorkGroupIDX
);
200 YamlIO
.mapOptional("workGroupIDY", AI
.WorkGroupIDY
);
201 YamlIO
.mapOptional("workGroupIDZ", AI
.WorkGroupIDZ
);
202 YamlIO
.mapOptional("workGroupInfo", AI
.WorkGroupInfo
);
203 YamlIO
.mapOptional("LDSKernelId", AI
.LDSKernelId
);
204 YamlIO
.mapOptional("privateSegmentWaveByteOffset",
205 AI
.PrivateSegmentWaveByteOffset
);
207 YamlIO
.mapOptional("implicitArgPtr", AI
.ImplicitArgPtr
);
208 YamlIO
.mapOptional("implicitBufferPtr", AI
.ImplicitBufferPtr
);
210 YamlIO
.mapOptional("workItemIDX", AI
.WorkItemIDX
);
211 YamlIO
.mapOptional("workItemIDY", AI
.WorkItemIDY
);
212 YamlIO
.mapOptional("workItemIDZ", AI
.WorkItemIDZ
);
216 // Default to default mode for default calling convention.
219 bool DX10Clamp
= true;
220 bool FP32InputDenormals
= true;
221 bool FP32OutputDenormals
= true;
222 bool FP64FP16InputDenormals
= true;
223 bool FP64FP16OutputDenormals
= true;
227 SIMode(const SIModeRegisterDefaults
&Mode
) {
229 DX10Clamp
= Mode
.DX10Clamp
;
230 FP32InputDenormals
= Mode
.FP32Denormals
.Input
!= DenormalMode::PreserveSign
;
231 FP32OutputDenormals
=
232 Mode
.FP32Denormals
.Output
!= DenormalMode::PreserveSign
;
233 FP64FP16InputDenormals
=
234 Mode
.FP64FP16Denormals
.Input
!= DenormalMode::PreserveSign
;
235 FP64FP16OutputDenormals
=
236 Mode
.FP64FP16Denormals
.Output
!= DenormalMode::PreserveSign
;
239 bool operator ==(const SIMode Other
) const {
240 return IEEE
== Other
.IEEE
&&
241 DX10Clamp
== Other
.DX10Clamp
&&
242 FP32InputDenormals
== Other
.FP32InputDenormals
&&
243 FP32OutputDenormals
== Other
.FP32OutputDenormals
&&
244 FP64FP16InputDenormals
== Other
.FP64FP16InputDenormals
&&
245 FP64FP16OutputDenormals
== Other
.FP64FP16OutputDenormals
;
249 template <> struct MappingTraits
<SIMode
> {
250 static void mapping(IO
&YamlIO
, SIMode
&Mode
) {
251 YamlIO
.mapOptional("ieee", Mode
.IEEE
, true);
252 YamlIO
.mapOptional("dx10-clamp", Mode
.DX10Clamp
, true);
253 YamlIO
.mapOptional("fp32-input-denormals", Mode
.FP32InputDenormals
, true);
254 YamlIO
.mapOptional("fp32-output-denormals", Mode
.FP32OutputDenormals
, true);
255 YamlIO
.mapOptional("fp64-fp16-input-denormals", Mode
.FP64FP16InputDenormals
, true);
256 YamlIO
.mapOptional("fp64-fp16-output-denormals", Mode
.FP64FP16OutputDenormals
, true);
260 struct SIMachineFunctionInfo final
: public yaml::MachineFunctionInfo
{
261 uint64_t ExplicitKernArgSize
= 0;
262 Align MaxKernArgAlign
;
263 uint32_t LDSSize
= 0;
264 uint32_t GDSSize
= 0;
266 bool IsEntryFunction
= false;
267 bool IsChainFunction
= false;
268 bool NoSignedZerosFPMath
= false;
269 bool MemoryBound
= false;
270 bool WaveLimiter
= false;
271 bool HasSpilledSGPRs
= false;
272 bool HasSpilledVGPRs
= false;
273 uint32_t HighBitsOf32BitAddress
= 0;
275 // TODO: 10 may be a better default since it's the maximum.
276 unsigned Occupancy
= 0;
278 SmallVector
<StringValue
, 2> SpillPhysVGPRS
;
279 SmallVector
<StringValue
> WWMReservedRegs
;
281 StringValue ScratchRSrcReg
= "$private_rsrc_reg";
282 StringValue FrameOffsetReg
= "$fp_reg";
283 StringValue StackPtrOffsetReg
= "$sp_reg";
285 unsigned BytesInStackArgArea
= 0;
286 bool ReturnsVoid
= true;
288 std::optional
<SIArgumentInfo
> ArgInfo
;
290 unsigned PSInputAddr
= 0;
291 unsigned PSInputEnable
= 0;
294 std::optional
<FrameIndex
> ScavengeFI
;
295 StringValue VGPRForAGPRCopy
;
296 StringValue SGPRForEXECCopy
;
297 StringValue LongBranchReservedReg
;
299 bool HasInitWholeWave
= false;
301 SIMachineFunctionInfo() = default;
302 SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo
&,
303 const TargetRegisterInfo
&TRI
,
304 const llvm::MachineFunction
&MF
);
306 void mappingImpl(yaml::IO
&YamlIO
) override
;
307 ~SIMachineFunctionInfo() = default;
310 template <> struct MappingTraits
<SIMachineFunctionInfo
> {
311 static void mapping(IO
&YamlIO
, SIMachineFunctionInfo
&MFI
) {
312 YamlIO
.mapOptional("explicitKernArgSize", MFI
.ExplicitKernArgSize
,
314 YamlIO
.mapOptional("maxKernArgAlign", MFI
.MaxKernArgAlign
);
315 YamlIO
.mapOptional("ldsSize", MFI
.LDSSize
, 0u);
316 YamlIO
.mapOptional("gdsSize", MFI
.GDSSize
, 0u);
317 YamlIO
.mapOptional("dynLDSAlign", MFI
.DynLDSAlign
, Align());
318 YamlIO
.mapOptional("isEntryFunction", MFI
.IsEntryFunction
, false);
319 YamlIO
.mapOptional("isChainFunction", MFI
.IsChainFunction
, false);
320 YamlIO
.mapOptional("noSignedZerosFPMath", MFI
.NoSignedZerosFPMath
, false);
321 YamlIO
.mapOptional("memoryBound", MFI
.MemoryBound
, false);
322 YamlIO
.mapOptional("waveLimiter", MFI
.WaveLimiter
, false);
323 YamlIO
.mapOptional("hasSpilledSGPRs", MFI
.HasSpilledSGPRs
, false);
324 YamlIO
.mapOptional("hasSpilledVGPRs", MFI
.HasSpilledVGPRs
, false);
325 YamlIO
.mapOptional("scratchRSrcReg", MFI
.ScratchRSrcReg
,
326 StringValue("$private_rsrc_reg"));
327 YamlIO
.mapOptional("frameOffsetReg", MFI
.FrameOffsetReg
,
328 StringValue("$fp_reg"));
329 YamlIO
.mapOptional("stackPtrOffsetReg", MFI
.StackPtrOffsetReg
,
330 StringValue("$sp_reg"));
331 YamlIO
.mapOptional("bytesInStackArgArea", MFI
.BytesInStackArgArea
, 0u);
332 YamlIO
.mapOptional("returnsVoid", MFI
.ReturnsVoid
, true);
333 YamlIO
.mapOptional("argumentInfo", MFI
.ArgInfo
);
334 YamlIO
.mapOptional("psInputAddr", MFI
.PSInputAddr
, 0u);
335 YamlIO
.mapOptional("psInputEnable", MFI
.PSInputEnable
, 0u);
336 YamlIO
.mapOptional("mode", MFI
.Mode
, SIMode());
337 YamlIO
.mapOptional("highBitsOf32BitAddress",
338 MFI
.HighBitsOf32BitAddress
, 0u);
339 YamlIO
.mapOptional("occupancy", MFI
.Occupancy
, 0);
340 YamlIO
.mapOptional("spillPhysVGPRs", MFI
.SpillPhysVGPRS
);
341 YamlIO
.mapOptional("wwmReservedRegs", MFI
.WWMReservedRegs
);
342 YamlIO
.mapOptional("scavengeFI", MFI
.ScavengeFI
);
343 YamlIO
.mapOptional("vgprForAGPRCopy", MFI
.VGPRForAGPRCopy
,
344 StringValue()); // Don't print out when it's empty.
345 YamlIO
.mapOptional("sgprForEXECCopy", MFI
.SGPRForEXECCopy
,
346 StringValue()); // Don't print out when it's empty.
347 YamlIO
.mapOptional("longBranchReservedReg", MFI
.LongBranchReservedReg
,
349 YamlIO
.mapOptional("hasInitWholeWave", MFI
.HasInitWholeWave
, false);
353 } // end namespace yaml
355 // A CSR SGPR value can be preserved inside a callee using one of the following
357 // 1. Copy to an unused scratch SGPR.
358 // 2. Spill to a VGPR lane.
359 // 3. Spill to memory via. a scratch VGPR.
360 // class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
361 // for an SGPR at function prolog/epilog.
362 enum class SGPRSaveKind
: uint8_t {
363 COPY_TO_SCRATCH_SGPR
,
368 class PrologEpilogSGPRSaveRestoreInfo
{
376 PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K
, int I
) : Kind(K
), Index(I
) {}
377 PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K
, Register R
)
379 Register
getReg() const { return Reg
; }
380 int getIndex() const { return Index
; }
381 SGPRSaveKind
getKind() const { return Kind
; }
384 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
385 /// tells the hardware which interpolation parameters to load.
386 class SIMachineFunctionInfo final
: public AMDGPUMachineFunction
,
387 private MachineRegisterInfo::Delegate
{
388 friend class GCNTargetMachine
;
390 // State of MODE register, assumed FP mode.
391 SIModeRegisterDefaults Mode
;
393 // Registers that may be reserved for spilling purposes. These may be the same
394 // as the input registers.
395 Register ScratchRSrcReg
= AMDGPU::PRIVATE_RSRC_REG
;
397 // This is the unswizzled offset from the current dispatch's scratch wave
398 // base to the beginning of the current function's frame.
399 Register FrameOffsetReg
= AMDGPU::FP_REG
;
401 // This is an ABI register used in the non-entry calling convention to
402 // communicate the unswizzled offset from the current dispatch's scratch wave
403 // base to the beginning of the new function's frame.
404 Register StackPtrOffsetReg
= AMDGPU::SP_REG
;
406 // Registers that may be reserved when RA doesn't allocate enough
407 // registers to plan for the case where an indirect branch ends up
408 // being needed during branch relaxation.
409 Register LongBranchReservedReg
;
411 AMDGPUFunctionArgInfo ArgInfo
;
414 unsigned PSInputAddr
= 0;
415 unsigned PSInputEnable
= 0;
417 /// Number of bytes of arguments this function has on the stack. If the callee
418 /// is expected to restore the argument stack this should be a multiple of 16,
419 /// all usable during a tail call.
421 /// The alternative would forbid tail call optimisation in some cases: if we
422 /// want to transfer control from a function with 8-bytes of stack-argument
423 /// space to a function with 16-bytes then misalignment of this value would
424 /// make a stack adjustment necessary, which could not be undone by the
426 unsigned BytesInStackArgArea
= 0;
428 bool ReturnsVoid
= true;
430 // A pair of default/requested minimum/maximum flat work group sizes.
431 // Minimum - first, maximum - second.
432 std::pair
<unsigned, unsigned> FlatWorkGroupSizes
= {0, 0};
434 // A pair of default/requested minimum/maximum number of waves per execution
435 // unit. Minimum - first, maximum - second.
436 std::pair
<unsigned, unsigned> WavesPerEU
= {0, 0};
438 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV
;
440 // Default/requested number of work groups for the function.
441 SmallVector
<unsigned> MaxNumWorkGroups
= {0, 0, 0};
444 unsigned NumUserSGPRs
= 0;
445 unsigned NumSystemSGPRs
= 0;
447 bool HasSpilledSGPRs
= false;
448 bool HasSpilledVGPRs
= false;
449 bool HasNonSpillStackObjects
= false;
450 bool IsStackRealigned
= false;
452 unsigned NumSpilledSGPRs
= 0;
453 unsigned NumSpilledVGPRs
= 0;
455 // Tracks information about user SGPRs that will be setup by hardware which
456 // will apply to all wavefronts of the grid.
457 GCNUserSGPRUsageInfo UserSGPRInfo
;
459 // Feature bits required for inputs passed in system SGPRs.
460 bool WorkGroupIDX
: 1; // Always initialized.
461 bool WorkGroupIDY
: 1;
462 bool WorkGroupIDZ
: 1;
463 bool WorkGroupInfo
: 1;
464 bool LDSKernelId
: 1;
465 bool PrivateSegmentWaveByteOffset
: 1;
467 bool WorkItemIDX
: 1; // Always initialized.
468 bool WorkItemIDY
: 1;
469 bool WorkItemIDZ
: 1;
471 // Pointer to where the ABI inserts special kernel arguments separate from the
472 // user arguments. This is an offset from the KernargSegmentPtr.
473 bool ImplicitArgPtr
: 1;
475 bool MayNeedAGPRs
: 1;
477 // The hard-wired high half of the address of the global information table
478 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
479 // current hardware only allows a 16 bit value.
482 unsigned HighBitsOf32BitAddress
;
484 // Flags associated with the virtual registers.
485 IndexedMap
<uint8_t, VirtReg2IndexFunctor
> VRegFlags
;
487 // Current recorded maximum possible occupancy.
490 mutable std::optional
<bool> UsesAGPRs
;
492 MCPhysReg
getNextUserSGPR() const;
494 MCPhysReg
getNextSystemSGPR() const;
496 // MachineRegisterInfo callback functions to notify events.
497 void MRI_NoteNewVirtualRegister(Register Reg
) override
;
498 void MRI_NoteCloneVirtualRegister(Register NewReg
, Register SrcReg
) override
;
501 struct VGPRSpillToAGPR
{
502 SmallVector
<MCPhysReg
, 32> Lanes
;
503 bool FullyAllocated
= false;
508 // To track virtual VGPR + lane index for each subregister of the SGPR spilled
509 // to frameindex key during SILowerSGPRSpills pass.
510 DenseMap
<int, std::vector
<SIRegisterInfo::SpilledReg
>>
511 SGPRSpillsToVirtualVGPRLanes
;
512 // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
513 // like Frame Pointer identified during PrologEpilogInserter.
514 DenseMap
<int, std::vector
<SIRegisterInfo::SpilledReg
>>
515 SGPRSpillsToPhysicalVGPRLanes
;
516 unsigned NumVirtualVGPRSpillLanes
= 0;
517 unsigned NumPhysicalVGPRSpillLanes
= 0;
518 SmallVector
<Register
, 2> SpillVGPRs
;
519 SmallVector
<Register
, 2> SpillPhysVGPRs
;
520 using WWMSpillsMap
= MapVector
<Register
, int>;
521 // To track the registers used in instructions that can potentially modify the
522 // inactive lanes. The WWM instructions and the writelane instructions for
523 // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
524 // modified by them should be spilled/restored at function prolog/epilog to
525 // avoid any undesired outcome. Each entry in this map holds a pair of values,
526 // the VGPR and its stack slot index.
527 WWMSpillsMap WWMSpills
;
529 // Before allocation, the VGPR registers are partitioned into two distinct
530 // sets, the first one for WWM-values and the second set for non-WWM values.
531 // The latter set should be reserved during WWM-regalloc.
532 BitVector NonWWMRegMask
;
534 using ReservedRegSet
= SmallSetVector
<Register
, 8>;
535 // To track the VGPRs reserved for WWM instructions. They get stack slots
536 // later during PrologEpilogInserter and get added into the superset WWMSpills
537 // for actual spilling. A separate set makes the register reserved part and
538 // the serialization easier.
539 ReservedRegSet WWMReservedRegs
;
541 using PrologEpilogSGPRSpill
=
542 std::pair
<Register
, PrologEpilogSGPRSaveRestoreInfo
>;
543 // To track the SGPR spill method used for a CSR SGPR register during
544 // frame lowering. Even though the SGPR spills are handled during
545 // SILowerSGPRSpills pass, some special handling needed later during the
546 // PrologEpilogInserter.
547 SmallVector
<PrologEpilogSGPRSpill
, 3> PrologEpilogSGPRSpills
;
549 // To save/restore EXEC MASK around WWM spills and copies.
550 Register SGPRForEXECCopy
;
552 DenseMap
<int, VGPRSpillToAGPR
> VGPRToAGPRSpills
;
554 // AGPRs used for VGPR spills.
555 SmallVector
<MCPhysReg
, 32> SpillAGPR
;
557 // VGPRs used for AGPR spills.
558 SmallVector
<MCPhysReg
, 32> SpillVGPR
;
560 // Emergency stack slot. Sometimes, we create this before finalizing the stack
561 // frame, so save it here and add it to the RegScavenger later.
562 std::optional
<int> ScavengeFI
;
565 Register VGPRForAGPRCopy
;
567 bool allocateVirtualVGPRForSGPRSpills(MachineFunction
&MF
, int FI
,
569 bool allocatePhysicalVGPRForSGPRSpills(MachineFunction
&MF
, int FI
,
571 bool IsPrologEpilog
);
574 Register
getVGPRForAGPRCopy() const {
575 return VGPRForAGPRCopy
;
578 void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy
) {
579 VGPRForAGPRCopy
= NewVGPRForAGPRCopy
;
582 bool isCalleeSavedReg(const MCPhysReg
*CSRegs
, MCPhysReg Reg
) const;
585 SIMachineFunctionInfo(const SIMachineFunctionInfo
&MFI
) = default;
586 SIMachineFunctionInfo(const Function
&F
, const GCNSubtarget
*STI
);
588 MachineFunctionInfo
*
589 clone(BumpPtrAllocator
&Allocator
, MachineFunction
&DestMF
,
590 const DenseMap
<MachineBasicBlock
*, MachineBasicBlock
*> &Src2DstMBB
)
593 bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo
&YamlMFI
,
594 const MachineFunction
&MF
,
595 PerFunctionMIParsingState
&PFS
,
596 SMDiagnostic
&Error
, SMRange
&SourceRange
);
598 void reserveWWMRegister(Register Reg
) { WWMReservedRegs
.insert(Reg
); }
599 bool isWWMReg(Register Reg
) const {
600 return Reg
.isVirtual() ? checkFlag(Reg
, AMDGPU::VirtRegFlag::WWM_REG
)
601 : WWMReservedRegs
.contains(Reg
);
604 void updateNonWWMRegMask(BitVector
&RegMask
) { NonWWMRegMask
= RegMask
; }
605 BitVector
getNonWWMRegMask() const { return NonWWMRegMask
; }
606 void clearNonWWMRegAllocMask() { NonWWMRegMask
.clear(); }
608 SIModeRegisterDefaults
getMode() const { return Mode
; }
610 ArrayRef
<SIRegisterInfo::SpilledReg
>
611 getSGPRSpillToVirtualVGPRLanes(int FrameIndex
) const {
612 auto I
= SGPRSpillsToVirtualVGPRLanes
.find(FrameIndex
);
613 return (I
== SGPRSpillsToVirtualVGPRLanes
.end())
614 ? ArrayRef
<SIRegisterInfo::SpilledReg
>()
615 : ArrayRef(I
->second
);
618 ArrayRef
<Register
> getSGPRSpillVGPRs() const { return SpillVGPRs
; }
619 ArrayRef
<Register
> getSGPRSpillPhysVGPRs() const { return SpillPhysVGPRs
; }
621 const WWMSpillsMap
&getWWMSpills() const { return WWMSpills
; }
622 const ReservedRegSet
&getWWMReservedRegs() const { return WWMReservedRegs
; }
624 ArrayRef
<PrologEpilogSGPRSpill
> getPrologEpilogSGPRSpills() const {
625 assert(is_sorted(PrologEpilogSGPRSpills
, llvm::less_first()));
626 return PrologEpilogSGPRSpills
;
629 GCNUserSGPRUsageInfo
&getUserSGPRInfo() { return UserSGPRInfo
; }
631 const GCNUserSGPRUsageInfo
&getUserSGPRInfo() const { return UserSGPRInfo
; }
633 void addToPrologEpilogSGPRSpills(Register Reg
,
634 PrologEpilogSGPRSaveRestoreInfo SI
) {
635 assert(!hasPrologEpilogSGPRSpillEntry(Reg
));
637 // Insert a new entry in the right place to keep the vector in sorted order.
638 // This should be cheap since the vector is expected to be very short.
639 PrologEpilogSGPRSpills
.insert(
641 PrologEpilogSGPRSpills
, Reg
,
642 [](const auto &LHS
, const auto &RHS
) { return LHS
< RHS
.first
; }),
643 std::make_pair(Reg
, SI
));
646 // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
647 // on success and false otherwise.
648 bool hasPrologEpilogSGPRSpillEntry(Register Reg
) const {
649 const auto *I
= find_if(PrologEpilogSGPRSpills
, [&Reg
](const auto &Spill
) {
650 return Spill
.first
== Reg
;
652 return I
!= PrologEpilogSGPRSpills
.end();
655 // Get the scratch SGPR if allocated to save/restore \p Reg.
656 Register
getScratchSGPRCopyDstReg(Register Reg
) const {
657 const auto *I
= find_if(PrologEpilogSGPRSpills
, [&Reg
](const auto &Spill
) {
658 return Spill
.first
== Reg
;
660 if (I
!= PrologEpilogSGPRSpills
.end() &&
661 I
->second
.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR
)
662 return I
->second
.getReg();
664 return AMDGPU::NoRegister
;
667 // Get all scratch SGPRs allocated to copy/restore the SGPR spills.
668 void getAllScratchSGPRCopyDstRegs(SmallVectorImpl
<Register
> &Regs
) const {
669 for (const auto &SI
: PrologEpilogSGPRSpills
) {
670 if (SI
.second
.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR
)
671 Regs
.push_back(SI
.second
.getReg());
675 // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
676 bool checkIndexInPrologEpilogSGPRSpills(int FI
) const {
677 return find_if(PrologEpilogSGPRSpills
,
678 [FI
](const std::pair
<Register
,
679 PrologEpilogSGPRSaveRestoreInfo
> &SI
) {
680 return SI
.second
.getKind() ==
681 SGPRSaveKind::SPILL_TO_VGPR_LANE
&&
682 SI
.second
.getIndex() == FI
;
683 }) != PrologEpilogSGPRSpills
.end();
686 const PrologEpilogSGPRSaveRestoreInfo
&
687 getPrologEpilogSGPRSaveRestoreInfo(Register Reg
) const {
688 const auto *I
= find_if(PrologEpilogSGPRSpills
, [&Reg
](const auto &Spill
) {
689 return Spill
.first
== Reg
;
691 assert(I
!= PrologEpilogSGPRSpills
.end());
696 ArrayRef
<SIRegisterInfo::SpilledReg
>
697 getSGPRSpillToPhysicalVGPRLanes(int FrameIndex
) const {
698 auto I
= SGPRSpillsToPhysicalVGPRLanes
.find(FrameIndex
);
699 return (I
== SGPRSpillsToPhysicalVGPRLanes
.end())
700 ? ArrayRef
<SIRegisterInfo::SpilledReg
>()
701 : ArrayRef(I
->second
);
704 void setFlag(Register Reg
, uint8_t Flag
) {
705 assert(Reg
.isVirtual());
706 if (VRegFlags
.inBounds(Reg
))
707 VRegFlags
[Reg
] |= Flag
;
710 bool checkFlag(Register Reg
, uint8_t Flag
) const {
711 if (Reg
.isPhysical())
714 return VRegFlags
.inBounds(Reg
) && VRegFlags
[Reg
] & Flag
;
717 bool hasVRegFlags() { return VRegFlags
.size(); }
719 void allocateWWMSpill(MachineFunction
&MF
, Register VGPR
, uint64_t Size
= 4,
720 Align Alignment
= Align(4));
722 void splitWWMSpillRegisters(
724 SmallVectorImpl
<std::pair
<Register
, int>> &CalleeSavedRegs
,
725 SmallVectorImpl
<std::pair
<Register
, int>> &ScratchRegs
) const;
727 ArrayRef
<MCPhysReg
> getAGPRSpillVGPRs() const {
731 Register
getSGPRForEXECCopy() const { return SGPRForEXECCopy
; }
733 void setSGPRForEXECCopy(Register Reg
) { SGPRForEXECCopy
= Reg
; }
735 ArrayRef
<MCPhysReg
> getVGPRSpillAGPRs() const {
739 MCPhysReg
getVGPRToAGPRSpill(int FrameIndex
, unsigned Lane
) const {
740 auto I
= VGPRToAGPRSpills
.find(FrameIndex
);
741 return (I
== VGPRToAGPRSpills
.end()) ? (MCPhysReg
)AMDGPU::NoRegister
742 : I
->second
.Lanes
[Lane
];
745 void setVGPRToAGPRSpillDead(int FrameIndex
) {
746 auto I
= VGPRToAGPRSpills
.find(FrameIndex
);
747 if (I
!= VGPRToAGPRSpills
.end())
748 I
->second
.IsDead
= true;
751 // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
753 void shiftWwmVGPRsToLowestRange(MachineFunction
&MF
,
754 SmallVectorImpl
<Register
> &WWMVGPRs
,
755 BitVector
&SavedVGPRs
);
757 bool allocateSGPRSpillToVGPRLane(MachineFunction
&MF
, int FI
,
758 bool SpillToPhysVGPRLane
= false,
759 bool IsPrologEpilog
= false);
760 bool allocateVGPRSpillToAGPR(MachineFunction
&MF
, int FI
, bool isAGPRtoVGPR
);
762 /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
763 /// to the default stack.
764 bool removeDeadFrameIndices(MachineFrameInfo
&MFI
,
765 bool ResetSGPRSpillStackIDs
);
767 int getScavengeFI(MachineFrameInfo
&MFI
, const SIRegisterInfo
&TRI
);
768 std::optional
<int> getOptionalScavengeFI() const { return ScavengeFI
; }
770 unsigned getBytesInStackArgArea() const {
771 return BytesInStackArgArea
;
774 void setBytesInStackArgArea(unsigned Bytes
) {
775 BytesInStackArgArea
= Bytes
;
779 Register
addPrivateSegmentBuffer(const SIRegisterInfo
&TRI
);
780 Register
addDispatchPtr(const SIRegisterInfo
&TRI
);
781 Register
addQueuePtr(const SIRegisterInfo
&TRI
);
782 Register
addKernargSegmentPtr(const SIRegisterInfo
&TRI
);
783 Register
addDispatchID(const SIRegisterInfo
&TRI
);
784 Register
addFlatScratchInit(const SIRegisterInfo
&TRI
);
785 Register
addPrivateSegmentSize(const SIRegisterInfo
&TRI
);
786 Register
addImplicitBufferPtr(const SIRegisterInfo
&TRI
);
787 Register
addLDSKernelId();
788 SmallVectorImpl
<MCRegister
> *
789 addPreloadedKernArg(const SIRegisterInfo
&TRI
, const TargetRegisterClass
*RC
,
790 unsigned AllocSizeDWord
, int KernArgIdx
,
793 /// Increment user SGPRs used for padding the argument list only.
794 Register
addReservedUserSGPR() {
795 Register Next
= getNextUserSGPR();
801 Register
addWorkGroupIDX() {
802 ArgInfo
.WorkGroupIDX
= ArgDescriptor::createRegister(getNextSystemSGPR());
804 return ArgInfo
.WorkGroupIDX
.getRegister();
807 Register
addWorkGroupIDY() {
808 ArgInfo
.WorkGroupIDY
= ArgDescriptor::createRegister(getNextSystemSGPR());
810 return ArgInfo
.WorkGroupIDY
.getRegister();
813 Register
addWorkGroupIDZ() {
814 ArgInfo
.WorkGroupIDZ
= ArgDescriptor::createRegister(getNextSystemSGPR());
816 return ArgInfo
.WorkGroupIDZ
.getRegister();
819 Register
addWorkGroupInfo() {
820 ArgInfo
.WorkGroupInfo
= ArgDescriptor::createRegister(getNextSystemSGPR());
822 return ArgInfo
.WorkGroupInfo
.getRegister();
825 bool hasLDSKernelId() const { return LDSKernelId
; }
827 // Add special VGPR inputs
828 void setWorkItemIDX(ArgDescriptor Arg
) {
829 ArgInfo
.WorkItemIDX
= Arg
;
832 void setWorkItemIDY(ArgDescriptor Arg
) {
833 ArgInfo
.WorkItemIDY
= Arg
;
836 void setWorkItemIDZ(ArgDescriptor Arg
) {
837 ArgInfo
.WorkItemIDZ
= Arg
;
840 Register
addPrivateSegmentWaveByteOffset() {
841 ArgInfo
.PrivateSegmentWaveByteOffset
842 = ArgDescriptor::createRegister(getNextSystemSGPR());
844 return ArgInfo
.PrivateSegmentWaveByteOffset
.getRegister();
847 void setPrivateSegmentWaveByteOffset(Register Reg
) {
848 ArgInfo
.PrivateSegmentWaveByteOffset
= ArgDescriptor::createRegister(Reg
);
851 bool hasWorkGroupIDX() const {
855 bool hasWorkGroupIDY() const {
859 bool hasWorkGroupIDZ() const {
863 bool hasWorkGroupInfo() const {
864 return WorkGroupInfo
;
867 bool hasPrivateSegmentWaveByteOffset() const {
868 return PrivateSegmentWaveByteOffset
;
871 bool hasWorkItemIDX() const {
875 bool hasWorkItemIDY() const {
879 bool hasWorkItemIDZ() const {
883 bool hasImplicitArgPtr() const {
884 return ImplicitArgPtr
;
887 AMDGPUFunctionArgInfo
&getArgInfo() {
891 const AMDGPUFunctionArgInfo
&getArgInfo() const {
895 std::tuple
<const ArgDescriptor
*, const TargetRegisterClass
*, LLT
>
896 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value
) const {
897 return ArgInfo
.getPreloadedValue(Value
);
900 MCRegister
getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value
) const {
901 const auto *Arg
= std::get
<0>(ArgInfo
.getPreloadedValue(Value
));
902 return Arg
? Arg
->getRegister() : MCRegister();
905 unsigned getGITPtrHigh() const {
909 Register
getGITPtrLoReg(const MachineFunction
&MF
) const;
911 uint32_t get32BitAddressHighBits() const {
912 return HighBitsOf32BitAddress
;
915 unsigned getNumUserSGPRs() const {
919 unsigned getNumPreloadedSGPRs() const {
920 return NumUserSGPRs
+ NumSystemSGPRs
;
923 unsigned getNumKernargPreloadedSGPRs() const {
924 return UserSGPRInfo
.getNumKernargPreloadSGPRs();
927 Register
getPrivateSegmentWaveByteOffsetSystemSGPR() const {
928 return ArgInfo
.PrivateSegmentWaveByteOffset
.getRegister();
931 /// Returns the physical register reserved for use as the resource
932 /// descriptor for scratch accesses.
933 Register
getScratchRSrcReg() const {
934 return ScratchRSrcReg
;
937 void setScratchRSrcReg(Register Reg
) {
938 assert(Reg
!= 0 && "Should never be unset");
939 ScratchRSrcReg
= Reg
;
942 Register
getFrameOffsetReg() const {
943 return FrameOffsetReg
;
946 void setFrameOffsetReg(Register Reg
) {
947 assert(Reg
!= 0 && "Should never be unset");
948 FrameOffsetReg
= Reg
;
951 void setStackPtrOffsetReg(Register Reg
) {
952 assert(Reg
!= 0 && "Should never be unset");
953 StackPtrOffsetReg
= Reg
;
956 void setLongBranchReservedReg(Register Reg
) { LongBranchReservedReg
= Reg
; }
958 // Note the unset value for this is AMDGPU::SP_REG rather than
959 // NoRegister. This is mostly a workaround for MIR tests where state that
960 // can't be directly computed from the function is not preserved in serialized
962 Register
getStackPtrOffsetReg() const {
963 return StackPtrOffsetReg
;
966 Register
getLongBranchReservedReg() const { return LongBranchReservedReg
; }
968 Register
getQueuePtrUserSGPR() const {
969 return ArgInfo
.QueuePtr
.getRegister();
972 Register
getImplicitBufferPtrUserSGPR() const {
973 return ArgInfo
.ImplicitBufferPtr
.getRegister();
976 bool hasSpilledSGPRs() const {
977 return HasSpilledSGPRs
;
980 void setHasSpilledSGPRs(bool Spill
= true) {
981 HasSpilledSGPRs
= Spill
;
984 bool hasSpilledVGPRs() const {
985 return HasSpilledVGPRs
;
988 void setHasSpilledVGPRs(bool Spill
= true) {
989 HasSpilledVGPRs
= Spill
;
992 bool hasNonSpillStackObjects() const {
993 return HasNonSpillStackObjects
;
996 void setHasNonSpillStackObjects(bool StackObject
= true) {
997 HasNonSpillStackObjects
= StackObject
;
1000 bool isStackRealigned() const {
1001 return IsStackRealigned
;
1004 void setIsStackRealigned(bool Realigned
= true) {
1005 IsStackRealigned
= Realigned
;
1008 unsigned getNumSpilledSGPRs() const {
1009 return NumSpilledSGPRs
;
1012 unsigned getNumSpilledVGPRs() const {
1013 return NumSpilledVGPRs
;
1016 void addToSpilledSGPRs(unsigned num
) {
1017 NumSpilledSGPRs
+= num
;
1020 void addToSpilledVGPRs(unsigned num
) {
1021 NumSpilledVGPRs
+= num
;
1024 unsigned getPSInputAddr() const {
1028 unsigned getPSInputEnable() const {
1029 return PSInputEnable
;
1032 bool isPSInputAllocated(unsigned Index
) const {
1033 return PSInputAddr
& (1 << Index
);
1036 void markPSInputAllocated(unsigned Index
) {
1037 PSInputAddr
|= 1 << Index
;
1040 void markPSInputEnabled(unsigned Index
) {
1041 PSInputEnable
|= 1 << Index
;
1044 bool returnsVoid() const {
1048 void setIfReturnsVoid(bool Value
) {
1049 ReturnsVoid
= Value
;
1052 /// \returns A pair of default/requested minimum/maximum flat work group sizes
1053 /// for this function.
1054 std::pair
<unsigned, unsigned> getFlatWorkGroupSizes() const {
1055 return FlatWorkGroupSizes
;
1058 /// \returns Default/requested minimum flat work group size for this function.
1059 unsigned getMinFlatWorkGroupSize() const {
1060 return FlatWorkGroupSizes
.first
;
1063 /// \returns Default/requested maximum flat work group size for this function.
1064 unsigned getMaxFlatWorkGroupSize() const {
1065 return FlatWorkGroupSizes
.second
;
1068 /// \returns A pair of default/requested minimum/maximum number of waves per
1070 std::pair
<unsigned, unsigned> getWavesPerEU() const {
1074 /// \returns Default/requested minimum number of waves per execution unit.
1075 unsigned getMinWavesPerEU() const {
1076 return WavesPerEU
.first
;
1079 /// \returns Default/requested maximum number of waves per execution unit.
1080 unsigned getMaxWavesPerEU() const {
1081 return WavesPerEU
.second
;
1084 const AMDGPUGWSResourcePseudoSourceValue
*
1085 getGWSPSV(const AMDGPUTargetMachine
&TM
) {
1086 return &GWSResourcePSV
;
1089 unsigned getOccupancy() const {
1093 unsigned getMinAllowedOccupancy() const {
1094 if (!isMemoryBound() && !needsWaveLimiter())
1096 return (Occupancy
< 4) ? Occupancy
: 4;
1099 void limitOccupancy(const MachineFunction
&MF
);
1101 void limitOccupancy(unsigned Limit
) {
1102 if (Occupancy
> Limit
)
1106 void increaseOccupancy(const MachineFunction
&MF
, unsigned Limit
) {
1107 if (Occupancy
< Limit
)
1112 bool mayNeedAGPRs() const {
1113 return MayNeedAGPRs
;
1116 // \returns true if a function has a use of AGPRs via inline asm or
1117 // has a call which may use it.
1118 bool mayUseAGPRs(const Function
&F
) const;
1120 // \returns true if a function needs or may need AGPRs.
1121 bool usesAGPRs(const MachineFunction
&MF
) const;
1123 /// \returns Default/requested number of work groups for this function.
1124 SmallVector
<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups
; }
1126 unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups
[0]; }
1127 unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups
[1]; }
1128 unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups
[2]; }
1131 } // end namespace llvm
1133 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H