1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass combines split register tuple initialization into a single pseudo:
12 /// undef %0.sub1:sreg_64 = S_MOV_B32 1
13 /// %0.sub0:sreg_64 = S_MOV_B32 2
15 /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
17 /// This is to allow rematerialization of a value instead of spilling. It is
18 /// supposed to be done after register coalescer to allow it to do its job and
19 /// before actual register allocation to allow rematerialization.
21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22 /// although the same shall be possible with other register classes and
23 /// instructions if necessary.
25 //===----------------------------------------------------------------------===//
28 #include "GCNSubtarget.h"
29 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/InitializePasses.h"
36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
40 class GCNPreRAOptimizations
: public MachineFunctionPass
{
42 const SIInstrInfo
*TII
;
43 const SIRegisterInfo
*TRI
;
44 MachineRegisterInfo
*MRI
;
47 bool processReg(Register Reg
);
52 GCNPreRAOptimizations() : MachineFunctionPass(ID
) {
53 initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
56 bool runOnMachineFunction(MachineFunction
&MF
) override
;
58 StringRef
getPassName() const override
{
59 return "AMDGPU Pre-RA optimizations";
62 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
63 AU
.addRequired
<LiveIntervalsWrapperPass
>();
65 MachineFunctionPass::getAnalysisUsage(AU
);
69 } // End anonymous namespace.
71 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations
, DEBUG_TYPE
,
72 "AMDGPU Pre-RA optimizations", false, false)
73 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass
)
74 INITIALIZE_PASS_END(GCNPreRAOptimizations
, DEBUG_TYPE
, "Pre-RA optimizations",
77 char GCNPreRAOptimizations::ID
= 0;
79 char &llvm::GCNPreRAOptimizationsID
= GCNPreRAOptimizations::ID
;
81 FunctionPass
*llvm::createGCNPreRAOptimizationsPass() {
82 return new GCNPreRAOptimizations();
85 bool GCNPreRAOptimizations::processReg(Register Reg
) {
86 MachineInstr
*Def0
= nullptr;
87 MachineInstr
*Def1
= nullptr;
90 SmallSet
<Register
, 32> ModifiedRegs
;
91 bool IsAGPRDst
= TRI
->isAGPRClass(MRI
->getRegClass(Reg
));
93 for (MachineInstr
&I
: MRI
->def_instructions(Reg
)) {
94 switch (I
.getOpcode()) {
97 case AMDGPU::V_ACCVGPR_WRITE_B32_e64
:
100 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101 // intermdiate temporary VGPR register. Try to find the defining
102 // accvgpr_write to avoid temporary registers.
107 Register SrcReg
= I
.getOperand(1).getReg();
109 if (!SrcReg
.isVirtual())
112 // Check if source of copy is from another AGPR.
113 bool IsAGPRSrc
= TRI
->isAGPRClass(MRI
->getRegClass(SrcReg
));
117 // def_instructions() does not look at subregs so it may give us a
118 // different instruction that defines the same vreg but different subreg
119 // so we have to manually check subreg.
120 Register SrcSubReg
= I
.getOperand(1).getSubReg();
121 for (auto &Def
: MRI
->def_instructions(SrcReg
)) {
122 if (SrcSubReg
!= Def
.getOperand(0).getSubReg())
125 if (Def
.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64
) {
126 MachineOperand DefSrcMO
= Def
.getOperand(1);
128 // Immediates are not an issue and can be propagated in
129 // postrapseudos pass. Only handle cases where defining
130 // accvgpr_write source is a vreg.
131 if (DefSrcMO
.isReg() && DefSrcMO
.getReg().isVirtual()) {
132 // Propagate source reg of accvgpr write to this copy instruction
133 I
.getOperand(1).setReg(DefSrcMO
.getReg());
134 I
.getOperand(1).setSubReg(DefSrcMO
.getSubReg());
136 // Reg uses were changed, collect unique set of registers to update
137 // live intervals at the end.
138 ModifiedRegs
.insert(DefSrcMO
.getReg());
139 ModifiedRegs
.insert(SrcReg
);
144 // Found the defining accvgpr_write, stop looking any further.
150 case AMDGPU::S_MOV_B32
:
151 if (I
.getOperand(0).getReg() != Reg
|| !I
.getOperand(1).isImm() ||
152 I
.getNumOperands() != 2)
155 switch (I
.getOperand(0).getSubReg()) {
162 Init
|= I
.getOperand(1).getImm() & 0xffffffff;
168 Init
|= static_cast<uint64_t>(I
.getOperand(1).getImm()) << 32;
175 // For AGPR reg, check if live intervals need to be updated.
178 for (Register RegToUpdate
: ModifiedRegs
) {
179 LIS
->removeInterval(RegToUpdate
);
180 LIS
->createAndComputeVirtRegInterval(RegToUpdate
);
187 // For SGPR reg, check if we can combine instructions.
188 if (!Def0
|| !Def1
|| Def0
->getParent() != Def1
->getParent())
191 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0
<< " " << *Def1
194 if (SlotIndex::isEarlierInstr(LIS
->getInstructionIndex(*Def1
),
195 LIS
->getInstructionIndex(*Def0
)))
196 std::swap(Def0
, Def1
);
198 LIS
->RemoveMachineInstrFromMaps(*Def0
);
199 LIS
->RemoveMachineInstrFromMaps(*Def1
);
200 auto NewI
= BuildMI(*Def0
->getParent(), *Def0
, Def0
->getDebugLoc(),
201 TII
->get(AMDGPU::S_MOV_B64_IMM_PSEUDO
), Reg
)
204 Def0
->eraseFromParent();
205 Def1
->eraseFromParent();
206 LIS
->InsertMachineInstrInMaps(*NewI
);
207 LIS
->removeInterval(Reg
);
208 LIS
->createAndComputeVirtRegInterval(Reg
);
210 LLVM_DEBUG(dbgs() << " " << *NewI
);
215 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction
&MF
) {
216 if (skipFunction(MF
.getFunction()))
219 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
220 TII
= ST
.getInstrInfo();
221 MRI
= &MF
.getRegInfo();
222 LIS
= &getAnalysis
<LiveIntervalsWrapperPass
>().getLIS();
223 TRI
= ST
.getRegisterInfo();
225 bool Changed
= false;
227 for (unsigned I
= 0, E
= MRI
->getNumVirtRegs(); I
!= E
; ++I
) {
228 Register Reg
= Register::index2VirtReg(I
);
229 if (!LIS
->hasInterval(Reg
))
231 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
232 if ((RC
->MC
->getSizeInBits() != 64 || !TRI
->isSGPRClass(RC
)) &&
233 (ST
.hasGFX90AInsts() || !TRI
->isAGPRClass(RC
)))
236 Changed
|= processReg(Reg
);