1 //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass mainly lowers early terminate pseudo instructions.
12 //===----------------------------------------------------------------------===//
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineDominators.h"
19 #include "llvm/InitializePasses.h"
23 #define DEBUG_TYPE "si-late-branch-lowering"
27 class SILateBranchLowering
: public MachineFunctionPass
{
29 const SIRegisterInfo
*TRI
= nullptr;
30 const SIInstrInfo
*TII
= nullptr;
31 MachineDominatorTree
*MDT
= nullptr;
33 void earlyTerm(MachineInstr
&MI
, MachineBasicBlock
*EarlyExitBlock
);
41 SILateBranchLowering() : MachineFunctionPass(ID
) {}
43 bool runOnMachineFunction(MachineFunction
&MF
) override
;
45 StringRef
getPassName() const override
{
46 return "SI Final Branch Preparation";
49 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
50 AU
.addRequired
<MachineDominatorTree
>();
51 AU
.addPreserved
<MachineDominatorTree
>();
52 MachineFunctionPass::getAnalysisUsage(AU
);
56 } // end anonymous namespace
58 char SILateBranchLowering::ID
= 0;
60 INITIALIZE_PASS_BEGIN(SILateBranchLowering
, DEBUG_TYPE
,
61 "SI insert s_cbranch_execz instructions", false, false)
62 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
63 INITIALIZE_PASS_END(SILateBranchLowering
, DEBUG_TYPE
,
64 "SI insert s_cbranch_execz instructions", false, false)
66 char &llvm::SILateBranchLoweringPassID
= SILateBranchLowering::ID
;
68 static void generateEndPgm(MachineBasicBlock
&MBB
,
69 MachineBasicBlock::iterator I
, DebugLoc DL
,
70 const SIInstrInfo
*TII
, MachineFunction
&MF
) {
71 const Function
&F
= MF
.getFunction();
72 bool IsPS
= F
.getCallingConv() == CallingConv::AMDGPU_PS
;
74 // Check if hardware has been configured to expect color or depth exports.
76 AMDGPU::getHasColorExport(F
) || AMDGPU::getHasDepthExport(F
);
78 // Prior to GFX10, hardware always expects at least one export for PS.
79 bool MustExport
= !AMDGPU::isGFX10Plus(TII
->getSubtarget());
81 if (IsPS
&& (HasExports
|| MustExport
)) {
82 // Generate "null export" if hardware is expecting PS to export.
83 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::EXP_DONE
))
84 .addImm(AMDGPU::Exp::ET_NULL
)
85 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
86 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
87 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
88 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
95 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ENDPGM
)).addImm(0);
98 static void splitBlock(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
99 MachineDominatorTree
*MDT
) {
100 MachineBasicBlock
*SplitBB
= MBB
.splitAt(MI
, /*UpdateLiveIns*/ true);
102 // Update dominator tree
103 using DomTreeT
= DomTreeBase
<MachineBasicBlock
>;
104 SmallVector
<DomTreeT::UpdateType
, 16> DTUpdates
;
105 for (MachineBasicBlock
*Succ
: SplitBB
->successors()) {
106 DTUpdates
.push_back({DomTreeT::Insert
, SplitBB
, Succ
});
107 DTUpdates
.push_back({DomTreeT::Delete
, &MBB
, Succ
});
109 DTUpdates
.push_back({DomTreeT::Insert
, &MBB
, SplitBB
});
110 MDT
->getBase().applyUpdates(DTUpdates
);
113 void SILateBranchLowering::earlyTerm(MachineInstr
&MI
,
114 MachineBasicBlock
*EarlyExitBlock
) {
115 MachineBasicBlock
&MBB
= *MI
.getParent();
116 const DebugLoc DL
= MI
.getDebugLoc();
118 auto BranchMI
= BuildMI(MBB
, MI
, DL
, TII
->get(AMDGPU::S_CBRANCH_SCC0
))
119 .addMBB(EarlyExitBlock
);
120 auto Next
= std::next(MI
.getIterator());
122 if (Next
!= MBB
.end() && !Next
->isTerminator())
123 splitBlock(MBB
, *BranchMI
, MDT
);
125 MBB
.addSuccessor(EarlyExitBlock
);
126 MDT
->getBase().insertEdge(&MBB
, EarlyExitBlock
);
129 bool SILateBranchLowering::runOnMachineFunction(MachineFunction
&MF
) {
130 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
131 TII
= ST
.getInstrInfo();
132 TRI
= &TII
->getRegisterInfo();
133 MDT
= &getAnalysis
<MachineDominatorTree
>();
135 MovOpc
= ST
.isWave32() ? AMDGPU::S_MOV_B32
: AMDGPU::S_MOV_B64
;
136 ExecReg
= ST
.isWave32() ? AMDGPU::EXEC_LO
: AMDGPU::EXEC
;
138 SmallVector
<MachineInstr
*, 4> EarlyTermInstrs
;
139 SmallVector
<MachineInstr
*, 1> EpilogInstrs
;
140 bool MadeChange
= false;
142 for (MachineBasicBlock
&MBB
: MF
) {
143 MachineBasicBlock::iterator I
, Next
;
144 for (I
= MBB
.begin(); I
!= MBB
.end(); I
= Next
) {
146 MachineInstr
&MI
= *I
;
148 switch (MI
.getOpcode()) {
149 case AMDGPU::S_BRANCH
:
150 // Optimize out branches to the next block.
151 // This only occurs in -O0 when BranchFolding is not executed.
152 if (MBB
.isLayoutSuccessor(MI
.getOperand(0).getMBB())) {
153 assert(&MI
== &MBB
.back());
154 MI
.eraseFromParent();
159 case AMDGPU::SI_EARLY_TERMINATE_SCC0
:
160 EarlyTermInstrs
.push_back(&MI
);
163 case AMDGPU::SI_RETURN_TO_EPILOG
:
164 EpilogInstrs
.push_back(&MI
);
173 // Lower any early exit branches first
174 if (!EarlyTermInstrs
.empty()) {
175 MachineBasicBlock
*EarlyExitBlock
= MF
.CreateMachineBasicBlock();
178 MF
.insert(MF
.end(), EarlyExitBlock
);
179 BuildMI(*EarlyExitBlock
, EarlyExitBlock
->end(), DL
, TII
->get(MovOpc
),
182 generateEndPgm(*EarlyExitBlock
, EarlyExitBlock
->end(), DL
, TII
, MF
);
184 for (MachineInstr
*Instr
: EarlyTermInstrs
) {
185 // Early termination in GS does nothing
186 if (MF
.getFunction().getCallingConv() != CallingConv::AMDGPU_GS
)
187 earlyTerm(*Instr
, EarlyExitBlock
);
188 Instr
->eraseFromParent();
191 EarlyTermInstrs
.clear();
195 // Now check return to epilog instructions occur at function end
196 if (!EpilogInstrs
.empty()) {
197 MachineBasicBlock
*EmptyMBBAtEnd
= nullptr;
198 assert(!MF
.getInfo
<SIMachineFunctionInfo
>()->returnsVoid());
200 // If there are multiple returns to epilog then all will
201 // become jumps to new empty end block.
202 if (EpilogInstrs
.size() > 1) {
203 EmptyMBBAtEnd
= MF
.CreateMachineBasicBlock();
204 MF
.insert(MF
.end(), EmptyMBBAtEnd
);
207 for (auto MI
: EpilogInstrs
) {
208 auto MBB
= MI
->getParent();
209 if (MBB
== &MF
.back() && MI
== &MBB
->back())
212 // SI_RETURN_TO_EPILOG is not the last instruction.
213 // Jump to empty block at function end.
214 if (!EmptyMBBAtEnd
) {
215 EmptyMBBAtEnd
= MF
.CreateMachineBasicBlock();
216 MF
.insert(MF
.end(), EmptyMBBAtEnd
);
219 MBB
->addSuccessor(EmptyMBBAtEnd
);
220 MDT
->getBase().insertEdge(MBB
, EmptyMBBAtEnd
);
221 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(AMDGPU::S_BRANCH
))
222 .addMBB(EmptyMBBAtEnd
);
223 MI
->eraseFromParent();
227 EpilogInstrs
.clear();