Revert " [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432)"
[llvm-project.git] / llvm / lib / Target / AMDGPU / SIPreAllocateWWMRegs.cpp
blob29fef49ee70954b78ffc76a0f78e166e643f0ebb
1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
27 using namespace llvm;
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
31 static cl::opt<bool>
32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33 cl::init(false), cl::Hidden);
35 namespace {
37 class SIPreAllocateWWMRegs : public MachineFunctionPass {
38 private:
39 const SIInstrInfo *TII;
40 const SIRegisterInfo *TRI;
41 MachineRegisterInfo *MRI;
42 LiveIntervals *LIS;
43 LiveRegMatrix *Matrix;
44 VirtRegMap *VRM;
45 RegisterClassInfo RegClassInfo;
47 std::vector<unsigned> RegsToRewrite;
48 #ifndef NDEBUG
49 void printWWMInfo(const MachineInstr &MI);
50 #endif
52 public:
53 static char ID;
55 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
56 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
59 bool runOnMachineFunction(MachineFunction &MF) override;
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
62 AU.addRequired<LiveIntervalsWrapperPass>();
63 AU.addRequired<VirtRegMap>();
64 AU.addRequired<LiveRegMatrix>();
65 AU.setPreservesAll();
66 MachineFunctionPass::getAnalysisUsage(AU);
69 private:
70 bool processDef(MachineOperand &MO);
71 void rewriteRegs(MachineFunction &MF);
74 } // End anonymous namespace.
76 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
77 "SI Pre-allocate WWM Registers", false, false)
78 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
80 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
81 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
82 "SI Pre-allocate WWM Registers", false, false)
84 char SIPreAllocateWWMRegs::ID = 0;
86 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
88 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
89 return new SIPreAllocateWWMRegs();
92 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
93 Register Reg = MO.getReg();
94 if (Reg.isPhysical())
95 return false;
97 if (!TRI->isVGPR(*MRI, Reg))
98 return false;
100 if (VRM->hasPhys(Reg))
101 return false;
103 LiveInterval &LI = LIS->getInterval(Reg);
105 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
106 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
107 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
108 Matrix->assign(LI, PhysReg);
109 assert(PhysReg != 0);
110 RegsToRewrite.push_back(Reg);
111 return true;
115 llvm_unreachable("physreg not found for WWM expression");
118 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
119 for (MachineBasicBlock &MBB : MF) {
120 for (MachineInstr &MI : MBB) {
121 for (MachineOperand &MO : MI.operands()) {
122 if (!MO.isReg())
123 continue;
125 const Register VirtReg = MO.getReg();
126 if (VirtReg.isPhysical())
127 continue;
129 if (!VRM->hasPhys(VirtReg))
130 continue;
132 Register PhysReg = VRM->getPhys(VirtReg);
133 const unsigned SubReg = MO.getSubReg();
134 if (SubReg != 0) {
135 PhysReg = TRI->getSubReg(PhysReg, SubReg);
136 MO.setSubReg(0);
139 MO.setReg(PhysReg);
140 MO.setIsRenamable(false);
145 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
147 for (unsigned Reg : RegsToRewrite) {
148 LIS->removeInterval(Reg);
150 const Register PhysReg = VRM->getPhys(Reg);
151 assert(PhysReg != 0);
153 MFI->reserveWWMRegister(PhysReg);
156 RegsToRewrite.clear();
158 // Update the set of reserved registers to include WWM ones.
159 MRI->freezeReservedRegs();
162 #ifndef NDEBUG
163 LLVM_DUMP_METHOD void
164 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
166 unsigned Opc = MI.getOpcode();
168 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
169 dbgs() << "Entering ";
170 } else {
171 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
172 dbgs() << "Exiting ";
175 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
176 dbgs() << "Strict WWM ";
177 } else {
178 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
179 dbgs() << "Strict WQM ";
182 dbgs() << "region: " << MI;
185 #endif
187 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
188 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
190 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
192 TII = ST.getInstrInfo();
193 TRI = &TII->getRegisterInfo();
194 MRI = &MF.getRegInfo();
196 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
197 Matrix = &getAnalysis<LiveRegMatrix>();
198 VRM = &getAnalysis<VirtRegMap>();
200 RegClassInfo.runOnMachineFunction(MF);
202 bool PreallocateSGPRSpillVGPRs =
203 EnablePreallocateSGPRSpillVGPRs ||
204 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
206 bool RegsAssigned = false;
208 // We use a reverse post-order traversal of the control-flow graph to
209 // guarantee that we visit definitions in dominance order. Since WWM
210 // expressions are guaranteed to never involve phi nodes, and we can only
211 // escape WWM through the special WWM instruction, this means that this is a
212 // perfect elimination order, so we can never do any better.
213 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
215 for (MachineBasicBlock *MBB : RPOT) {
216 bool InWWM = false;
217 for (MachineInstr &MI : *MBB) {
218 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
219 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
220 RegsAssigned |= processDef(MI.getOperand(0));
222 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
223 if (!PreallocateSGPRSpillVGPRs)
224 continue;
225 RegsAssigned |= processDef(MI.getOperand(0));
228 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
229 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
230 LLVM_DEBUG(printWWMInfo(MI));
231 InWWM = true;
232 continue;
235 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
236 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
237 LLVM_DEBUG(printWWMInfo(MI));
238 InWWM = false;
241 if (!InWWM)
242 continue;
244 LLVM_DEBUG(dbgs() << "Processing " << MI);
246 for (MachineOperand &DefOpnd : MI.defs()) {
247 RegsAssigned |= processDef(DefOpnd);
252 if (!RegsAssigned)
253 return false;
255 rewriteRegs(MF);
256 return true;