lib/Target/AMDGPU/GCNNSAReassign.cpp

   1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
  11 /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
  12 /// with sequential versions where possible.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPU.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "SIInstrInfo.h"
  19 #include "SIMachineFunctionInfo.h"
  20 #include "llvm/ADT/Statistic.h"
  21 #include "llvm/CodeGen/LiveInterval.h"
  22 #include "llvm/CodeGen/LiveIntervals.h"
  23 #include "llvm/CodeGen/LiveRegMatrix.h"
  24 #include "llvm/CodeGen/MachineFunctionPass.h"
  25 #include "llvm/CodeGen/VirtRegMap.h"
  26 #include "llvm/Support/MathExtras.h"
  27 #include <algorithm>
  28
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "amdgpu-nsa-reassign"
  32
  33 STATISTIC(NumNSAInstructions,
  34           "Number of NSA instructions with non-sequential address found");
  35 STATISTIC(NumNSAConverted,
  36           "Number of NSA instructions changed to sequential");
  37
  38 namespace {
  39
  40 class GCNNSAReassign : public MachineFunctionPass {
  41 public:
  42   static char ID;
  43
  44   GCNNSAReassign() : MachineFunctionPass(ID) {
  45     initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
  46   }
  47
  48   bool runOnMachineFunction(MachineFunction &MF) override;
  49
  50   StringRef getPassName() const override { return "GCN NSA Reassign"; }
  51
  52   void getAnalysisUsage(AnalysisUsage &AU) const override {
  53     AU.addRequired<LiveIntervals>();
  54     AU.addRequired<VirtRegMap>();
  55     AU.addRequired<LiveRegMatrix>();
  56     AU.setPreservesAll();
  57     MachineFunctionPass::getAnalysisUsage(AU);
  58   }
  59
  60 private:
  61   typedef enum {
  62     NOT_NSA,        // Not an NSA instruction
  63     FIXED,          // NSA which we cannot modify
  64     NON_CONTIGUOUS, // NSA with non-sequential address which we can try
  65                     // to optimize.
  66     CONTIGUOUS      // NSA with all sequential address registers
  67   } NSA_Status;
  68
  69   const GCNSubtarget *ST;
  70
  71   const MachineRegisterInfo *MRI;
  72
  73   const SIRegisterInfo *TRI;
  74
  75   VirtRegMap *VRM;
  76
  77   LiveRegMatrix *LRM;
  78
  79   LiveIntervals *LIS;
  80
  81   unsigned MaxNumVGPRs;
  82
  83   const MCPhysReg *CSRegs;
  84
  85   NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
  86
  87   bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
  88                           unsigned StartReg) const;
  89
  90   bool canAssign(unsigned StartReg, unsigned NumRegs) const;
  91
  92   bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
  93 };
  94
  95 } // End anonymous namespace.
  96
  97 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
  98                       false, false)
  99 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 100 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 101 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
 102 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
 103                     false, false)
 104
 105
 106 char GCNNSAReassign::ID = 0;
 107
 108 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
 109
 110 bool
 111 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
 112                                    unsigned StartReg) const {
 113   unsigned NumRegs = Intervals.size();
 114
 115   for (unsigned N = 0; N < NumRegs; ++N)
 116     if (VRM->hasPhys(Intervals[N]->reg))
 117       LRM->unassign(*Intervals[N]);
 118
 119   for (unsigned N = 0; N < NumRegs; ++N)
 120     if (LRM->checkInterference(*Intervals[N], StartReg + N))
 121       return false;
 122
 123   for (unsigned N = 0; N < NumRegs; ++N)
 124     LRM->assign(*Intervals[N], StartReg + N);
 125
 126   return true;
 127 }
 128
 129 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
 130   for (unsigned N = 0; N < NumRegs; ++N) {
 131     unsigned Reg = StartReg + N;
 132     if (!MRI->isAllocatable(Reg))
 133       return false;
 134
 135     for (unsigned I = 0; CSRegs[I]; ++I)
 136       if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
 137           !LRM->isPhysRegUsed(CSRegs[I]))
 138       return false;
 139   }
 140
 141   return true;
 142 }
 143
 144 bool
 145 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
 146   unsigned NumRegs = Intervals.size();
 147
 148   if (NumRegs > MaxNumVGPRs)
 149     return false;
 150   unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
 151
 152   for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
 153     if (!canAssign(Reg, NumRegs))
 154       continue;
 155
 156     if (tryAssignRegisters(Intervals, Reg))
 157       return true;
 158   }
 159
 160   return false;
 161 }
 162
 163 GCNNSAReassign::NSA_Status
 164 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
 165   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
 166   if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
 167     return NSA_Status::NOT_NSA;
 168
 169   int VAddr0Idx =
 170     AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
 171
 172   unsigned VgprBase = 0;
 173   bool NSA = false;
 174   for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
 175     const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
 176     Register Reg = Op.getReg();
 177     if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
 178       return NSA_Status::FIXED;
 179
 180     Register PhysReg = VRM->getPhys(Reg);
 181
 182     if (!Fast) {
 183       if (!PhysReg)
 184         return NSA_Status::FIXED;
 185
 186       // Bail if address is not a VGPR32. That should be possible to extend the
 187       // optimization to work with subregs of a wider register tuples, but the
 188       // logic to find free registers will be much more complicated with much
 189       // less chances for success. That seems reasonable to assume that in most
 190       // cases a tuple is used because a vector variable contains different
 191       // parts of an address and it is either already consequitive or cannot
 192       // be reassigned if not. If needed it is better to rely on register
 193       // coalescer to process such address tuples.
 194       if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
 195         return NSA_Status::FIXED;
 196
 197       const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
 198
 199       if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
 200         return NSA_Status::FIXED;
 201
 202       for (auto U : MRI->use_nodbg_operands(Reg)) {
 203         if (U.isImplicit())
 204           return NSA_Status::FIXED;
 205         const MachineInstr *UseInst = U.getParent();
 206         if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
 207           return NSA_Status::FIXED;
 208       }
 209
 210       if (!LIS->hasInterval(Reg))
 211         return NSA_Status::FIXED;
 212     }
 213
 214     if (I == 0)
 215       VgprBase = PhysReg;
 216     else if (VgprBase + I != PhysReg)
 217       NSA = true;
 218   }
 219
 220   return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
 221 }
 222
 223 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
 224   ST = &MF.getSubtarget<GCNSubtarget>();
 225   if (ST->getGeneration() < GCNSubtarget::GFX10)
 226     return false;
 227
 228   MRI = &MF.getRegInfo();
 229   TRI = ST->getRegisterInfo();
 230   VRM = &getAnalysis<VirtRegMap>();
 231   LRM = &getAnalysis<LiveRegMatrix>();
 232   LIS = &getAnalysis<LiveIntervals>();
 233
 234   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 235   MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
 236   MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
 237   CSRegs = MRI->getCalleeSavedRegs();
 238
 239   using Candidate = std::pair<const MachineInstr*, bool>;
 240   SmallVector<Candidate, 32> Candidates;
 241   for (const MachineBasicBlock &MBB : MF) {
 242     for (const MachineInstr &MI : MBB) {
 243       switch (CheckNSA(MI)) {
 244       default:
 245         continue;
 246       case NSA_Status::CONTIGUOUS:
 247         Candidates.push_back(std::make_pair(&MI, true));
 248         break;
 249       case NSA_Status::NON_CONTIGUOUS:
 250         Candidates.push_back(std::make_pair(&MI, false));
 251         ++NumNSAInstructions;
 252         break;
 253       }
 254     }
 255   }
 256
 257   bool Changed = false;
 258   for (auto &C : Candidates) {
 259     if (C.second)
 260       continue;
 261
 262     const MachineInstr *MI = C.first;
 263     if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
 264       // Already happen to be fixed.
 265       C.second = true;
 266       ++NumNSAConverted;
 267       continue;
 268     }
 269
 270     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
 271     int VAddr0Idx =
 272       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
 273
 274     SmallVector<LiveInterval *, 16> Intervals;
 275     SmallVector<unsigned, 16> OrigRegs;
 276     SlotIndex MinInd, MaxInd;
 277     for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
 278       const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
 279       Register Reg = Op.getReg();
 280       LiveInterval *LI = &LIS->getInterval(Reg);
 281       if (llvm::find(Intervals, LI) != Intervals.end()) {
 282         // Same register used, unable to make sequential
 283         Intervals.clear();
 284         break;
 285       }
 286       Intervals.push_back(LI);
 287       OrigRegs.push_back(VRM->getPhys(Reg));
 288       MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
 289       MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
 290     }
 291
 292     if (Intervals.empty())
 293       continue;
 294
 295     LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
 296                       << "\tOriginal allocation:\t";
 297                for(auto *LI : Intervals)
 298                  dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
 299                dbgs() << '\n');
 300
 301     bool Success = scavengeRegs(Intervals);
 302     if (!Success) {
 303       LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
 304       if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
 305         continue;
 306     } else {
 307       // Check we did not make it worse for other instructions.
 308       auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
 309                                 [this](const Candidate &C, SlotIndex I) {
 310                                   return LIS->getInstructionIndex(*C.first) < I;
 311                                 });
 312       for (auto E = Candidates.end(); Success && I != E &&
 313               LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
 314         if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
 315           Success = false;
 316           LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
 317         }
 318       }
 319     }
 320
 321     if (!Success) {
 322       for (unsigned I = 0; I < Info->VAddrDwords; ++I)
 323         if (VRM->hasPhys(Intervals[I]->reg))
 324           LRM->unassign(*Intervals[I]);
 325
 326       for (unsigned I = 0; I < Info->VAddrDwords; ++I)
 327         LRM->assign(*Intervals[I], OrigRegs[I]);
 328
 329       continue;
 330     }
 331
 332     C.second = true;
 333     ++NumNSAConverted;
 334     LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
 335                  << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
 336                  << " : "
 337                  << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
 338                  << "]\n");
 339     Changed = true;
 340   }
 341
 342   return Changed;
 343 }