1 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Pass to temporarily raise the wave priority beginning the start of
11 /// the shader function until its last VMEM instructions to allow younger
12 /// waves to issue their VMEM instructions as well.
14 //===----------------------------------------------------------------------===//
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Support/Allocator.h"
27 #define DEBUG_TYPE "amdgpu-set-wave-priority"
29 static cl::opt
<unsigned> DefaultVALUInstsThreshold(
30 "amdgpu-set-wave-priority-valu-insts-threshold",
31 cl::desc("VALU instruction count threshold for adjusting wave priority"),
32 cl::init(100), cl::Hidden
);
38 unsigned NumVALUInstsAtStart
= 0;
39 bool MayReachVMEMLoad
= false;
40 MachineInstr
*LastVMEMLoad
= nullptr;
43 using MBBInfoSet
= DenseMap
<const MachineBasicBlock
*, MBBInfo
>;
45 class AMDGPUSetWavePriority
: public MachineFunctionPass
{
49 AMDGPUSetWavePriority() : MachineFunctionPass(ID
) {}
51 StringRef
getPassName() const override
{ return "Set wave priority"; }
53 bool runOnMachineFunction(MachineFunction
&MF
) override
;
56 MachineInstr
*BuildSetprioMI(MachineBasicBlock
&MBB
,
57 MachineBasicBlock::iterator I
,
58 unsigned priority
) const;
60 const SIInstrInfo
*TII
;
63 } // End anonymous namespace.
65 INITIALIZE_PASS(AMDGPUSetWavePriority
, DEBUG_TYPE
, "Set wave priority", false,
68 char AMDGPUSetWavePriority::ID
= 0;
70 FunctionPass
*llvm::createAMDGPUSetWavePriorityPass() {
71 return new AMDGPUSetWavePriority();
75 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock
&MBB
,
76 MachineBasicBlock::iterator I
,
77 unsigned priority
) const {
78 return BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::S_SETPRIO
))
82 // Checks that for every predecessor Pred that can reach a VMEM load,
83 // none of Pred's successors can reach a VMEM load.
84 static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock
&MBB
,
85 MBBInfoSet
&MBBInfos
) {
86 for (const MachineBasicBlock
*Pred
: MBB
.predecessors()) {
87 if (!MBBInfos
[Pred
].MayReachVMEMLoad
)
89 for (const MachineBasicBlock
*Succ
: Pred
->successors()) {
90 if (MBBInfos
[Succ
].MayReachVMEMLoad
)
97 static bool isVMEMLoad(const MachineInstr
&MI
) {
98 return SIInstrInfo::isVMEM(MI
) && MI
.mayLoad();
101 bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction
&MF
) {
102 const unsigned HighPriority
= 3;
103 const unsigned LowPriority
= 0;
105 Function
&F
= MF
.getFunction();
106 if (skipFunction(F
) || !AMDGPU::isEntryFunctionCC(F
.getCallingConv()))
109 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
110 TII
= ST
.getInstrInfo();
112 unsigned VALUInstsThreshold
= DefaultVALUInstsThreshold
;
113 Attribute A
= F
.getFnAttribute("amdgpu-wave-priority-threshold");
115 A
.getValueAsString().getAsInteger(0, VALUInstsThreshold
);
117 // Find VMEM loads that may be executed before long-enough sequences of
118 // VALU instructions. We currently assume that backedges/loops, branch
119 // probabilities and other details can be ignored, so we essentially
120 // determine the largest number of VALU instructions along every
121 // possible path from the start of the function that may potentially be
122 // executed provided no backedge is ever taken.
124 for (MachineBasicBlock
*MBB
: post_order(&MF
)) {
126 unsigned MaxNumVALUInstsInMiddle
= 0;
127 unsigned NumVALUInstsAtEnd
= 0;
128 for (MachineInstr
&MI
: *MBB
) {
129 if (isVMEMLoad(MI
)) {
131 MBBInfo
&Info
= MBBInfos
[MBB
];
132 Info
.NumVALUInstsAtStart
= 0;
133 MaxNumVALUInstsInMiddle
= 0;
134 NumVALUInstsAtEnd
= 0;
135 Info
.LastVMEMLoad
= &MI
;
136 } else if (SIInstrInfo::isDS(MI
)) {
138 MaxNumVALUInstsInMiddle
=
139 std::max(MaxNumVALUInstsInMiddle
, NumVALUInstsAtEnd
);
140 NumVALUInstsAtEnd
= 0;
141 } else if (SIInstrInfo::isVALU(MI
)) {
143 ++MBBInfos
[MBB
].NumVALUInstsAtStart
;
148 bool SuccsMayReachVMEMLoad
= false;
149 unsigned NumFollowingVALUInsts
= 0;
150 for (const MachineBasicBlock
*Succ
: MBB
->successors()) {
151 SuccsMayReachVMEMLoad
|= MBBInfos
[Succ
].MayReachVMEMLoad
;
152 NumFollowingVALUInsts
=
153 std::max(NumFollowingVALUInsts
, MBBInfos
[Succ
].NumVALUInstsAtStart
);
155 MBBInfo
&Info
= MBBInfos
[MBB
];
157 Info
.NumVALUInstsAtStart
+= NumFollowingVALUInsts
;
158 NumVALUInstsAtEnd
+= NumFollowingVALUInsts
;
160 unsigned MaxNumVALUInsts
=
161 std::max(MaxNumVALUInstsInMiddle
, NumVALUInstsAtEnd
);
162 Info
.MayReachVMEMLoad
=
163 SuccsMayReachVMEMLoad
||
164 (Info
.LastVMEMLoad
&& MaxNumVALUInsts
>= VALUInstsThreshold
);
167 MachineBasicBlock
&Entry
= MF
.front();
168 if (!MBBInfos
[&Entry
].MayReachVMEMLoad
)
171 // Raise the priority at the beginning of the shader.
172 MachineBasicBlock::iterator I
= Entry
.begin(), E
= Entry
.end();
173 while (I
!= E
&& !SIInstrInfo::isVALU(*I
) && !I
->isTerminator())
175 BuildSetprioMI(Entry
, I
, HighPriority
);
177 // Lower the priority on edges where control leaves blocks from which
178 // the VMEM loads are reachable.
179 SmallSet
<MachineBasicBlock
*, 16> PriorityLoweringBlocks
;
180 for (MachineBasicBlock
&MBB
: MF
) {
181 if (MBBInfos
[&MBB
].MayReachVMEMLoad
) {
182 if (MBB
.succ_empty())
183 PriorityLoweringBlocks
.insert(&MBB
);
187 if (CanLowerPriorityDirectlyInPredecessors(MBB
, MBBInfos
)) {
188 for (MachineBasicBlock
*Pred
: MBB
.predecessors()) {
189 if (MBBInfos
[Pred
].MayReachVMEMLoad
)
190 PriorityLoweringBlocks
.insert(Pred
);
195 // Where lowering the priority in predecessors is not possible, the
196 // block receiving control either was not part of a loop in the first
197 // place or the loop simplification/canonicalization pass should have
198 // already tried to split the edge and insert a preheader, and if for
199 // whatever reason it failed to do so, then this leaves us with the
200 // only option of lowering the priority within the loop.
201 PriorityLoweringBlocks
.insert(&MBB
);
204 for (MachineBasicBlock
*MBB
: PriorityLoweringBlocks
) {
207 MBBInfos
[MBB
].LastVMEMLoad
208 ? std::next(MachineBasicBlock::iterator(MBBInfos
[MBB
].LastVMEMLoad
))