1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Pass to pre-allocated WWM registers
12 //===----------------------------------------------------------------------===//
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33 cl::init(false), cl::Hidden
);
37 class SIPreAllocateWWMRegs
: public MachineFunctionPass
{
39 const SIInstrInfo
*TII
;
40 const SIRegisterInfo
*TRI
;
41 MachineRegisterInfo
*MRI
;
43 LiveRegMatrix
*Matrix
;
45 RegisterClassInfo RegClassInfo
;
47 std::vector
<unsigned> RegsToRewrite
;
49 void printWWMInfo(const MachineInstr
&MI
);
55 SIPreAllocateWWMRegs() : MachineFunctionPass(ID
) {
56 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
59 bool runOnMachineFunction(MachineFunction
&MF
) override
;
61 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
62 AU
.addRequired
<LiveIntervalsWrapperPass
>();
63 AU
.addRequired
<VirtRegMap
>();
64 AU
.addRequired
<LiveRegMatrix
>();
66 MachineFunctionPass::getAnalysisUsage(AU
);
70 bool processDef(MachineOperand
&MO
);
71 void rewriteRegs(MachineFunction
&MF
);
74 } // End anonymous namespace.
76 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs
, DEBUG_TYPE
,
77 "SI Pre-allocate WWM Registers", false, false)
78 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass
)
79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap
)
80 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix
)
81 INITIALIZE_PASS_END(SIPreAllocateWWMRegs
, DEBUG_TYPE
,
82 "SI Pre-allocate WWM Registers", false, false)
84 char SIPreAllocateWWMRegs::ID
= 0;
86 char &llvm::SIPreAllocateWWMRegsID
= SIPreAllocateWWMRegs::ID
;
88 FunctionPass
*llvm::createSIPreAllocateWWMRegsPass() {
89 return new SIPreAllocateWWMRegs();
92 bool SIPreAllocateWWMRegs::processDef(MachineOperand
&MO
) {
93 Register Reg
= MO
.getReg();
97 if (!TRI
->isVGPR(*MRI
, Reg
))
100 if (VRM
->hasPhys(Reg
))
103 LiveInterval
&LI
= LIS
->getInterval(Reg
);
105 for (MCRegister PhysReg
: RegClassInfo
.getOrder(MRI
->getRegClass(Reg
))) {
106 if (!MRI
->isPhysRegUsed(PhysReg
, /*SkipRegMaskTest=*/true) &&
107 Matrix
->checkInterference(LI
, PhysReg
) == LiveRegMatrix::IK_Free
) {
108 Matrix
->assign(LI
, PhysReg
);
109 assert(PhysReg
!= 0);
110 RegsToRewrite
.push_back(Reg
);
115 llvm_unreachable("physreg not found for WWM expression");
118 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction
&MF
) {
119 for (MachineBasicBlock
&MBB
: MF
) {
120 for (MachineInstr
&MI
: MBB
) {
121 for (MachineOperand
&MO
: MI
.operands()) {
125 const Register VirtReg
= MO
.getReg();
126 if (VirtReg
.isPhysical())
129 if (!VRM
->hasPhys(VirtReg
))
132 Register PhysReg
= VRM
->getPhys(VirtReg
);
133 const unsigned SubReg
= MO
.getSubReg();
135 PhysReg
= TRI
->getSubReg(PhysReg
, SubReg
);
140 MO
.setIsRenamable(false);
145 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
147 for (unsigned Reg
: RegsToRewrite
) {
148 LIS
->removeInterval(Reg
);
150 const Register PhysReg
= VRM
->getPhys(Reg
);
151 assert(PhysReg
!= 0);
153 MFI
->reserveWWMRegister(PhysReg
);
156 RegsToRewrite
.clear();
158 // Update the set of reserved registers to include WWM ones.
159 MRI
->freezeReservedRegs();
163 LLVM_DUMP_METHOD
void
164 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr
&MI
) {
166 unsigned Opc
= MI
.getOpcode();
168 if (Opc
== AMDGPU::ENTER_STRICT_WWM
|| Opc
== AMDGPU::ENTER_STRICT_WQM
) {
169 dbgs() << "Entering ";
171 assert(Opc
== AMDGPU::EXIT_STRICT_WWM
|| Opc
== AMDGPU::EXIT_STRICT_WQM
);
172 dbgs() << "Exiting ";
175 if (Opc
== AMDGPU::ENTER_STRICT_WWM
|| Opc
== AMDGPU::EXIT_STRICT_WWM
) {
176 dbgs() << "Strict WWM ";
178 assert(Opc
== AMDGPU::ENTER_STRICT_WQM
|| Opc
== AMDGPU::EXIT_STRICT_WQM
);
179 dbgs() << "Strict WQM ";
182 dbgs() << "region: " << MI
;
187 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction
&MF
) {
188 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF
.getName() << "\n");
190 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
192 TII
= ST
.getInstrInfo();
193 TRI
= &TII
->getRegisterInfo();
194 MRI
= &MF
.getRegInfo();
196 LIS
= &getAnalysis
<LiveIntervalsWrapperPass
>().getLIS();
197 Matrix
= &getAnalysis
<LiveRegMatrix
>();
198 VRM
= &getAnalysis
<VirtRegMap
>();
200 RegClassInfo
.runOnMachineFunction(MF
);
202 bool PreallocateSGPRSpillVGPRs
=
203 EnablePreallocateSGPRSpillVGPRs
||
204 MF
.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
206 bool RegsAssigned
= false;
208 // We use a reverse post-order traversal of the control-flow graph to
209 // guarantee that we visit definitions in dominance order. Since WWM
210 // expressions are guaranteed to never involve phi nodes, and we can only
211 // escape WWM through the special WWM instruction, this means that this is a
212 // perfect elimination order, so we can never do any better.
213 ReversePostOrderTraversal
<MachineFunction
*> RPOT(&MF
);
215 for (MachineBasicBlock
*MBB
: RPOT
) {
217 for (MachineInstr
&MI
: *MBB
) {
218 if (MI
.getOpcode() == AMDGPU::V_SET_INACTIVE_B32
||
219 MI
.getOpcode() == AMDGPU::V_SET_INACTIVE_B64
)
220 RegsAssigned
|= processDef(MI
.getOperand(0));
222 if (MI
.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR
) {
223 if (!PreallocateSGPRSpillVGPRs
)
225 RegsAssigned
|= processDef(MI
.getOperand(0));
228 if (MI
.getOpcode() == AMDGPU::ENTER_STRICT_WWM
||
229 MI
.getOpcode() == AMDGPU::ENTER_STRICT_WQM
) {
230 LLVM_DEBUG(printWWMInfo(MI
));
235 if (MI
.getOpcode() == AMDGPU::EXIT_STRICT_WWM
||
236 MI
.getOpcode() == AMDGPU::EXIT_STRICT_WQM
) {
237 LLVM_DEBUG(printWWMInfo(MI
));
244 LLVM_DEBUG(dbgs() << "Processing " << MI
);
246 for (MachineOperand
&DefOpnd
: MI
.defs()) {
247 RegsAssigned
|= processDef(DefOpnd
);