llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp

   1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file This file contains the AMDGPU DAG scheduling
  10 /// mutation to pair VOPD instructions back to back. It also contains
  11 //  subroutines useful in the creation of VOPD instructions
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "GCNVOPDUtils.h"
  16 #include "AMDGPUSubtarget.h"
  17 #include "GCNSubtarget.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIInstrInfo.h"
  20 #include "Utils/AMDGPUBaseInfo.h"
  21 #include "llvm/ADT/STLExtras.h"
  22 #include "llvm/ADT/SmallVector.h"
  23 #include "llvm/CodeGen/MachineBasicBlock.h"
  24 #include "llvm/CodeGen/MachineInstr.h"
  25 #include "llvm/CodeGen/MachineOperand.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/MacroFusion.h"
  28 #include "llvm/CodeGen/ScheduleDAG.h"
  29 #include "llvm/CodeGen/ScheduleDAGMutation.h"
  30 #include "llvm/CodeGen/TargetInstrInfo.h"
  31 #include "llvm/MC/MCInst.h"
  32
  33 using namespace llvm;
  34
  35 #define DEBUG_TYPE "gcn-vopd-utils"
  36
  37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
  38                                    const MachineInstr &FirstMI,
  39                                    const MachineInstr &SecondMI) {
  40   namespace VOPD = AMDGPU::VOPD;
  41
  42   const MachineFunction *MF = FirstMI.getMF();
  43   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
  44   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
  45   const MachineRegisterInfo &MRI = MF->getRegInfo();
  46   // Literals also count against scalar bus limit
  47   SmallVector<const MachineOperand *> UniqueLiterals;
  48   auto addLiteral = [&](const MachineOperand &Op) {
  49     for (auto &Literal : UniqueLiterals) {
  50       if (Literal->isIdenticalTo(Op))
  51         return;
  52     }
  53     UniqueLiterals.push_back(&Op);
  54   };
  55   SmallVector<Register> UniqueScalarRegs;
  56   assert([&]() -> bool {
  57     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
  58          MII != FirstMI.getParent()->instr_end(); ++MII) {
  59       if (&*MII == &SecondMI)
  60         return true;
  61     }
  62     return false;
  63   }() && "Expected FirstMI to precede SecondMI");
  64   // Cannot pair dependent instructions
  65   for (const auto &Use : SecondMI.uses())
  66     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
  67       return false;
  68
  69   auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
  70     const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
  71     const MachineOperand &Operand = MI.getOperand(OperandIdx);
  72     if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
  73       return Operand.getReg();
  74     return Register();
  75   };
  76
  77   auto InstInfo =
  78       AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
  79
  80   for (auto CompIdx : VOPD::COMPONENTS) {
  81     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
  82
  83     const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
  84     if (Src0.isReg()) {
  85       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
  86         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
  87           UniqueScalarRegs.push_back(Src0.getReg());
  88       }
  89     } else {
  90       if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
  91         addLiteral(Src0);
  92     }
  93
  94     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
  95       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
  96       addLiteral(MI.getOperand(CompOprIdx));
  97     }
  98     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
  99       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
 100   }
 101
 102   if (UniqueLiterals.size() > 1)
 103     return false;
 104   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
 105     return false;
 106
 107   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
 108   bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
 109                  FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
 110                  SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
 111
 112   if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
 113     return false;
 114
 115   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
 116                     << "\n\tY: " << SecondMI << "\n");
 117   return true;
 118 }
 119
 120 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
 121 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 122 /// SecondMI may be part of a fused pair at all.
 123 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
 124                                        const TargetSubtargetInfo &TSI,
 125                                        const MachineInstr *FirstMI,
 126                                        const MachineInstr &SecondMI) {
 127   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
 128   unsigned Opc2 = SecondMI.getOpcode();
 129   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
 130
 131   // One instruction case
 132   if (!FirstMI)
 133     return SecondCanBeVOPD.Y;
 134
 135   unsigned Opc = FirstMI->getOpcode();
 136   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
 137
 138   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
 139         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
 140     return false;
 141
 142   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
 143 }
 144
 145 namespace {
 146 /// Adapts design from MacroFusion
 147 /// Puts valid candidate instructions back-to-back so they can easily
 148 /// be turned into VOPD instructions
 149 /// Greedily pairs instruction candidates. O(n^2) algorithm.
 150 struct VOPDPairingMutation : ScheduleDAGMutation {
 151   MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
 152
 153   VOPDPairingMutation(
 154       MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
 155       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
 156
 157   void apply(ScheduleDAGInstrs *DAG) override {
 158     const TargetInstrInfo &TII = *DAG->TII;
 159     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
 160     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
 161       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
 162       return;
 163     }
 164
 165     std::vector<SUnit>::iterator ISUI, JSUI;
 166     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
 167       const MachineInstr *IMI = ISUI->getInstr();
 168       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
 169         continue;
 170       if (!hasLessThanNumFused(*ISUI, 2))
 171         continue;
 172
 173       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
 174         if (JSUI->isBoundaryNode())
 175           continue;
 176         const MachineInstr *JMI = JSUI->getInstr();
 177         if (!hasLessThanNumFused(*JSUI, 2) ||
 178             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
 179           continue;
 180         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
 181           break;
 182       }
 183     }
 184     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
 185   }
 186 };
 187 } // namespace
 188
 189 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
 190   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
 191 }