1 //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file contains the AMDGPU DAG scheduling
10 /// mutation to pair VOPD instructions back to back. It also contains
11 // subroutines useful in the creation of VOPD instructions
13 //===----------------------------------------------------------------------===//
15 #include "GCNVOPDUtils.h"
16 #include "AMDGPUSubtarget.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/MacroFusion.h"
28 #include "llvm/CodeGen/ScheduleDAG.h"
29 #include "llvm/CodeGen/ScheduleDAGMutation.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/MC/MCInst.h"
35 #define DEBUG_TYPE "gcn-vopd-utils"
37 bool llvm::checkVOPDRegConstraints(const SIInstrInfo
&TII
,
38 const MachineInstr
&FirstMI
,
39 const MachineInstr
&SecondMI
) {
40 namespace VOPD
= AMDGPU::VOPD
;
42 const MachineFunction
*MF
= FirstMI
.getMF();
43 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
44 const SIRegisterInfo
*TRI
= dyn_cast
<SIRegisterInfo
>(ST
.getRegisterInfo());
45 const MachineRegisterInfo
&MRI
= MF
->getRegInfo();
46 // Literals also count against scalar bus limit
47 SmallVector
<const MachineOperand
*> UniqueLiterals
;
48 auto addLiteral
= [&](const MachineOperand
&Op
) {
49 for (auto &Literal
: UniqueLiterals
) {
50 if (Literal
->isIdenticalTo(Op
))
53 UniqueLiterals
.push_back(&Op
);
55 SmallVector
<Register
> UniqueScalarRegs
;
56 assert([&]() -> bool {
57 for (auto MII
= MachineBasicBlock::const_iterator(&FirstMI
);
58 MII
!= FirstMI
.getParent()->instr_end(); ++MII
) {
59 if (&*MII
== &SecondMI
)
63 }() && "Expected FirstMI to precede SecondMI");
64 // Cannot pair dependent instructions
65 for (const auto &Use
: SecondMI
.uses())
66 if (Use
.isReg() && FirstMI
.modifiesRegister(Use
.getReg(), TRI
))
69 auto getVRegIdx
= [&](unsigned OpcodeIdx
, unsigned OperandIdx
) {
70 const MachineInstr
&MI
= (OpcodeIdx
== VOPD::X
) ? FirstMI
: SecondMI
;
71 const MachineOperand
&Operand
= MI
.getOperand(OperandIdx
);
72 if (Operand
.isReg() && TRI
->isVectorRegister(MRI
, Operand
.getReg()))
73 return Operand
.getReg();
78 AMDGPU::getVOPDInstInfo(FirstMI
.getDesc(), SecondMI
.getDesc());
80 for (auto CompIdx
: VOPD::COMPONENTS
) {
81 const MachineInstr
&MI
= (CompIdx
== VOPD::X
) ? FirstMI
: SecondMI
;
83 const MachineOperand
&Src0
= MI
.getOperand(VOPD::Component::SRC0
);
85 if (!TRI
->isVectorRegister(MRI
, Src0
.getReg())) {
86 if (!is_contained(UniqueScalarRegs
, Src0
.getReg()))
87 UniqueScalarRegs
.push_back(Src0
.getReg());
90 if (!TII
.isInlineConstant(MI
, VOPD::Component::SRC0
))
94 if (InstInfo
[CompIdx
].hasMandatoryLiteral()) {
95 auto CompOprIdx
= InstInfo
[CompIdx
].getMandatoryLiteralCompOperandIndex();
96 addLiteral(MI
.getOperand(CompOprIdx
));
98 if (MI
.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC
))
99 UniqueScalarRegs
.push_back(AMDGPU::VCC_LO
);
102 if (UniqueLiterals
.size() > 1)
104 if ((UniqueLiterals
.size() + UniqueScalarRegs
.size()) > 2)
107 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108 bool SkipSrc
= ST
.getGeneration() >= AMDGPUSubtarget::GFX12
&&
109 FirstMI
.getOpcode() == AMDGPU::V_MOV_B32_e32
&&
110 SecondMI
.getOpcode() == AMDGPU::V_MOV_B32_e32
;
112 if (InstInfo
.hasInvalidOperand(getVRegIdx
, SkipSrc
))
115 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116 << "\n\tY: " << SecondMI
<< "\n");
120 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121 /// together. Given SecondMI, when FirstMI is unspecified, then check if
122 /// SecondMI may be part of a fused pair at all.
123 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo
&TII
,
124 const TargetSubtargetInfo
&TSI
,
125 const MachineInstr
*FirstMI
,
126 const MachineInstr
&SecondMI
) {
127 const SIInstrInfo
&STII
= static_cast<const SIInstrInfo
&>(TII
);
128 unsigned Opc2
= SecondMI
.getOpcode();
129 auto SecondCanBeVOPD
= AMDGPU::getCanBeVOPD(Opc2
);
131 // One instruction case
133 return SecondCanBeVOPD
.Y
;
135 unsigned Opc
= FirstMI
->getOpcode();
136 auto FirstCanBeVOPD
= AMDGPU::getCanBeVOPD(Opc
);
138 if (!((FirstCanBeVOPD
.X
&& SecondCanBeVOPD
.Y
) ||
139 (FirstCanBeVOPD
.Y
&& SecondCanBeVOPD
.X
)))
142 return checkVOPDRegConstraints(STII
, *FirstMI
, SecondMI
);
146 /// Adapts design from MacroFusion
147 /// Puts valid candidate instructions back-to-back so they can easily
148 /// be turned into VOPD instructions
149 /// Greedily pairs instruction candidates. O(n^2) algorithm.
150 struct VOPDPairingMutation
: ScheduleDAGMutation
{
151 MacroFusionPredTy shouldScheduleAdjacent
; // NOLINT: function pointer
154 MacroFusionPredTy shouldScheduleAdjacent
) // NOLINT: function pointer
155 : shouldScheduleAdjacent(shouldScheduleAdjacent
) {}
157 void apply(ScheduleDAGInstrs
*DAG
) override
{
158 const TargetInstrInfo
&TII
= *DAG
->TII
;
159 const GCNSubtarget
&ST
= DAG
->MF
.getSubtarget
<GCNSubtarget
>();
160 if (!AMDGPU::hasVOPD(ST
) || !ST
.isWave32()) {
161 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
165 std::vector
<SUnit
>::iterator ISUI
, JSUI
;
166 for (ISUI
= DAG
->SUnits
.begin(); ISUI
!= DAG
->SUnits
.end(); ++ISUI
) {
167 const MachineInstr
*IMI
= ISUI
->getInstr();
168 if (!shouldScheduleAdjacent(TII
, ST
, nullptr, *IMI
))
170 if (!hasLessThanNumFused(*ISUI
, 2))
173 for (JSUI
= ISUI
+ 1; JSUI
!= DAG
->SUnits
.end(); ++JSUI
) {
174 if (JSUI
->isBoundaryNode())
176 const MachineInstr
*JMI
= JSUI
->getInstr();
177 if (!hasLessThanNumFused(*JSUI
, 2) ||
178 !shouldScheduleAdjacent(TII
, ST
, IMI
, *JMI
))
180 if (fuseInstructionPair(*DAG
, *ISUI
, *JSUI
))
184 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
189 std::unique_ptr
<ScheduleDAGMutation
> llvm::createVOPDPairingMutation() {
190 return std::make_unique
<VOPDPairingMutation
>(shouldScheduleVOPDAdjacent
);