1 //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass mainly lowers early terminate pseudo instructions.
12 //===----------------------------------------------------------------------===//
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineDominators.h"
19 #include "llvm/InitializePasses.h"
23 #define DEBUG_TYPE "si-late-branch-lowering"
27 class SILateBranchLowering
: public MachineFunctionPass
{
29 const SIRegisterInfo
*TRI
= nullptr;
30 const SIInstrInfo
*TII
= nullptr;
31 MachineDominatorTree
*MDT
= nullptr;
33 void expandChainCall(MachineInstr
&MI
);
34 void earlyTerm(MachineInstr
&MI
, MachineBasicBlock
*EarlyExitBlock
);
42 SILateBranchLowering() : MachineFunctionPass(ID
) {}
44 bool runOnMachineFunction(MachineFunction
&MF
) override
;
46 StringRef
getPassName() const override
{
47 return "SI Final Branch Preparation";
50 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
51 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
52 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
53 MachineFunctionPass::getAnalysisUsage(AU
);
57 } // end anonymous namespace
59 char SILateBranchLowering::ID
= 0;
61 INITIALIZE_PASS_BEGIN(SILateBranchLowering
, DEBUG_TYPE
,
62 "SI insert s_cbranch_execz instructions", false, false)
63 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass
)
64 INITIALIZE_PASS_END(SILateBranchLowering
, DEBUG_TYPE
,
65 "SI insert s_cbranch_execz instructions", false, false)
67 char &llvm::SILateBranchLoweringPassID
= SILateBranchLowering::ID
;
69 static void generateEndPgm(MachineBasicBlock
&MBB
,
70 MachineBasicBlock::iterator I
, DebugLoc DL
,
71 const SIInstrInfo
*TII
, MachineFunction
&MF
) {
72 const Function
&F
= MF
.getFunction();
73 bool IsPS
= F
.getCallingConv() == CallingConv::AMDGPU_PS
;
75 // Check if hardware has been configured to expect color or depth exports.
76 bool HasColorExports
= AMDGPU::getHasColorExport(F
);
77 bool HasDepthExports
= AMDGPU::getHasDepthExport(F
);
78 bool HasExports
= HasColorExports
|| HasDepthExports
;
80 // Prior to GFX10, hardware always expects at least one export for PS.
81 bool MustExport
= !AMDGPU::isGFX10Plus(TII
->getSubtarget());
83 if (IsPS
&& (HasExports
|| MustExport
)) {
84 // Generate "null export" if hardware is expecting PS to export.
85 const GCNSubtarget
&ST
= MBB
.getParent()->getSubtarget
<GCNSubtarget
>();
87 ST
.hasNullExportTarget()
88 ? AMDGPU::Exp::ET_NULL
89 : (HasColorExports
? AMDGPU::Exp::ET_MRT0
: AMDGPU::Exp::ET_MRTZ
);
90 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::EXP_DONE
))
92 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
93 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
94 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
95 .addReg(AMDGPU::VGPR0
, RegState::Undef
)
102 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ENDPGM
)).addImm(0);
105 static void splitBlock(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
106 MachineDominatorTree
*MDT
) {
107 MachineBasicBlock
*SplitBB
= MBB
.splitAt(MI
, /*UpdateLiveIns*/ true);
109 // Update dominator tree
110 using DomTreeT
= DomTreeBase
<MachineBasicBlock
>;
111 SmallVector
<DomTreeT::UpdateType
, 16> DTUpdates
;
112 for (MachineBasicBlock
*Succ
: SplitBB
->successors()) {
113 DTUpdates
.push_back({DomTreeT::Insert
, SplitBB
, Succ
});
114 DTUpdates
.push_back({DomTreeT::Delete
, &MBB
, Succ
});
116 DTUpdates
.push_back({DomTreeT::Insert
, &MBB
, SplitBB
});
117 MDT
->getBase().applyUpdates(DTUpdates
);
120 void SILateBranchLowering::expandChainCall(MachineInstr
&MI
) {
121 // This is a tail call that needs to be expanded into at least
122 // 2 instructions, one for setting EXEC and one for the actual tail call.
123 constexpr unsigned ExecIdx
= 3;
125 BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), TII
->get(MovOpc
), ExecReg
)
126 ->addOperand(MI
.getOperand(ExecIdx
));
127 MI
.removeOperand(ExecIdx
);
129 MI
.setDesc(TII
->get(AMDGPU::SI_TCRETURN
));
132 void SILateBranchLowering::earlyTerm(MachineInstr
&MI
,
133 MachineBasicBlock
*EarlyExitBlock
) {
134 MachineBasicBlock
&MBB
= *MI
.getParent();
135 const DebugLoc DL
= MI
.getDebugLoc();
137 auto BranchMI
= BuildMI(MBB
, MI
, DL
, TII
->get(AMDGPU::S_CBRANCH_SCC0
))
138 .addMBB(EarlyExitBlock
);
139 auto Next
= std::next(MI
.getIterator());
141 if (Next
!= MBB
.end() && !Next
->isTerminator())
142 splitBlock(MBB
, *BranchMI
, MDT
);
144 MBB
.addSuccessor(EarlyExitBlock
);
145 MDT
->getBase().insertEdge(&MBB
, EarlyExitBlock
);
148 bool SILateBranchLowering::runOnMachineFunction(MachineFunction
&MF
) {
149 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
150 TII
= ST
.getInstrInfo();
151 TRI
= &TII
->getRegisterInfo();
152 MDT
= &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
154 MovOpc
= ST
.isWave32() ? AMDGPU::S_MOV_B32
: AMDGPU::S_MOV_B64
;
155 ExecReg
= ST
.isWave32() ? AMDGPU::EXEC_LO
: AMDGPU::EXEC
;
157 SmallVector
<MachineInstr
*, 4> EarlyTermInstrs
;
158 SmallVector
<MachineInstr
*, 1> EpilogInstrs
;
159 bool MadeChange
= false;
161 for (MachineBasicBlock
&MBB
: MF
) {
162 for (MachineInstr
&MI
: llvm::make_early_inc_range(MBB
)) {
163 switch (MI
.getOpcode()) {
164 case AMDGPU::S_BRANCH
:
165 // Optimize out branches to the next block.
166 // This only occurs in -O0 when BranchFolding is not executed.
167 if (MBB
.isLayoutSuccessor(MI
.getOperand(0).getMBB())) {
168 assert(&MI
== &MBB
.back());
169 MI
.eraseFromParent();
174 case AMDGPU::SI_CS_CHAIN_TC_W32
:
175 case AMDGPU::SI_CS_CHAIN_TC_W64
:
180 case AMDGPU::SI_EARLY_TERMINATE_SCC0
:
181 EarlyTermInstrs
.push_back(&MI
);
184 case AMDGPU::SI_RETURN_TO_EPILOG
:
185 EpilogInstrs
.push_back(&MI
);
194 // Lower any early exit branches first
195 if (!EarlyTermInstrs
.empty()) {
196 MachineBasicBlock
*EarlyExitBlock
= MF
.CreateMachineBasicBlock();
199 MF
.insert(MF
.end(), EarlyExitBlock
);
200 BuildMI(*EarlyExitBlock
, EarlyExitBlock
->end(), DL
, TII
->get(MovOpc
),
203 generateEndPgm(*EarlyExitBlock
, EarlyExitBlock
->end(), DL
, TII
, MF
);
205 for (MachineInstr
*Instr
: EarlyTermInstrs
) {
206 // Early termination in GS does nothing
207 if (MF
.getFunction().getCallingConv() != CallingConv::AMDGPU_GS
)
208 earlyTerm(*Instr
, EarlyExitBlock
);
209 Instr
->eraseFromParent();
212 EarlyTermInstrs
.clear();
216 // Now check return to epilog instructions occur at function end
217 if (!EpilogInstrs
.empty()) {
218 MachineBasicBlock
*EmptyMBBAtEnd
= nullptr;
219 assert(!MF
.getInfo
<SIMachineFunctionInfo
>()->returnsVoid());
221 // If there are multiple returns to epilog then all will
222 // become jumps to new empty end block.
223 if (EpilogInstrs
.size() > 1) {
224 EmptyMBBAtEnd
= MF
.CreateMachineBasicBlock();
225 MF
.insert(MF
.end(), EmptyMBBAtEnd
);
228 for (auto *MI
: EpilogInstrs
) {
229 auto MBB
= MI
->getParent();
230 if (MBB
== &MF
.back() && MI
== &MBB
->back())
233 // SI_RETURN_TO_EPILOG is not the last instruction.
234 // Jump to empty block at function end.
235 if (!EmptyMBBAtEnd
) {
236 EmptyMBBAtEnd
= MF
.CreateMachineBasicBlock();
237 MF
.insert(MF
.end(), EmptyMBBAtEnd
);
240 MBB
->addSuccessor(EmptyMBBAtEnd
);
241 MDT
->getBase().insertEdge(MBB
, EmptyMBBAtEnd
);
242 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
->get(AMDGPU::S_BRANCH
))
243 .addMBB(EmptyMBBAtEnd
);
244 MI
->eraseFromParent();
248 EpilogInstrs
.clear();