1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Computations in WWM can overwrite values in inactive channels for
11 /// variables that the register allocator thinks are dead. This pass adds fake
12 /// uses of those variables to their def(s) to make sure that they aren't
15 /// As an example, consider this snippet:
16 /// %vgpr0 = V_MOV_B32_e32 0.0
19 /// %vgpr2 = WWM killed %vgpr1
20 /// ... = killed %vgpr2
21 /// %vgpr0 = V_MOV_B32_e32 1.0
25 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
26 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
27 /// writing %vgpr1 would only write to channels that would be clobbered by the
28 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
29 /// it would clobber even the inactive channels for which the if-condition is
30 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
31 /// of %vgpr0 to its def to make sure they aren't allocated to the
34 /// In general, we need to figure out what registers might have their inactive
35 /// channels which are eventually used accidentally clobbered by a WWM
36 /// instruction. We do that by spotting three separate cases of registers:
38 /// 1. A "then phi": the value resulting from phi elimination of a phi node at
39 /// the end of an if..endif. If there is WWM code in the "then", then we
40 /// make the def at the end of the "then" branch a partial def by adding an
41 /// implicit use of the register.
43 /// 2. A "loop exit register": a value written inside a loop but used outside the
44 /// loop, where there is WWM code inside the loop (the case in the example
45 /// above). We add an implicit_def of the register in the loop pre-header,
46 /// and make the original def a partial def by adding an implicit use of the
49 /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
50 /// in a loop header. If there is WWM code inside the loop, then we make all
51 /// defs inside the loop partial defs by adding an implicit use of the
52 /// register on each one.
54 /// Note that we do not need to consider an if..else..endif phi. We only need to
55 /// consider non-uniform control flow, and control flow structurization would
56 /// have transformed a non-uniform if..else..endif into two if..endifs.
58 /// The analysis to detect these cases relies on a property of the MIR
59 /// arising from this pass running straight after PHIElimination and before any
60 /// coalescing: that any virtual register with more than one definition must be
61 /// the new register added to lower a phi node by PHIElimination.
63 /// FIXME: We should detect whether a register in one of the above categories is
64 /// already live at the WWM code before deciding to add the implicit uses to
65 /// synthesize its liveness.
67 /// FIXME: I believe this whole scheme may be flawed due to the possibility of
68 /// the register allocator doing live interval splitting.
70 //===----------------------------------------------------------------------===//
73 #include "AMDGPUSubtarget.h"
74 #include "SIInstrInfo.h"
75 #include "SIRegisterInfo.h"
76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77 #include "llvm/ADT/DepthFirstIterator.h"
78 #include "llvm/ADT/SparseBitVector.h"
79 #include "llvm/CodeGen/LiveIntervals.h"
80 #include "llvm/CodeGen/MachineDominators.h"
81 #include "llvm/CodeGen/MachineFunctionPass.h"
82 #include "llvm/CodeGen/MachineLoopInfo.h"
83 #include "llvm/CodeGen/Passes.h"
84 #include "llvm/CodeGen/TargetRegisterInfo.h"
88 #define DEBUG_TYPE "si-fix-wwm-liveness"
92 class SIFixWWMLiveness
: public MachineFunctionPass
{
94 MachineDominatorTree
*DomTree
;
95 MachineLoopInfo
*LoopInfo
;
96 LiveIntervals
*LIS
= nullptr;
97 const SIInstrInfo
*TII
;
98 const SIRegisterInfo
*TRI
;
99 MachineRegisterInfo
*MRI
;
101 std::vector
<MachineInstr
*> WWMs
;
102 std::vector
<MachineOperand
*> ThenDefs
;
103 std::vector
<std::pair
<MachineOperand
*, MachineLoop
*>> LoopExitDefs
;
104 std::vector
<std::pair
<MachineOperand
*, MachineLoop
*>> LoopPhiDefs
;
109 SIFixWWMLiveness() : MachineFunctionPass(ID
) {
110 initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
113 bool runOnMachineFunction(MachineFunction
&MF
) override
;
115 StringRef
getPassName() const override
{ return "SI Fix WWM Liveness"; }
117 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
118 AU
.addRequiredID(MachineDominatorsID
);
119 AU
.addRequiredID(MachineLoopInfoID
);
120 // Should preserve the same set that TwoAddressInstructions does.
121 AU
.addPreserved
<SlotIndexes
>();
122 AU
.addPreserved
<LiveIntervals
>();
123 AU
.addPreservedID(LiveVariablesID
);
124 AU
.addPreservedID(MachineLoopInfoID
);
125 AU
.addPreservedID(MachineDominatorsID
);
126 AU
.setPreservesCFG();
127 MachineFunctionPass::getAnalysisUsage(AU
);
131 void processDef(MachineOperand
&DefOpnd
);
132 bool processThenDef(MachineOperand
*DefOpnd
);
133 bool processLoopExitDef(MachineOperand
*DefOpnd
, MachineLoop
*Loop
);
134 bool processLoopPhiDef(MachineOperand
*DefOpnd
, MachineLoop
*Loop
);
137 } // End anonymous namespace.
139 INITIALIZE_PASS_BEGIN(SIFixWWMLiveness
, DEBUG_TYPE
,
140 "SI fix WWM liveness", false, false)
141 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
142 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
143 INITIALIZE_PASS_END(SIFixWWMLiveness
, DEBUG_TYPE
,
144 "SI fix WWM liveness", false, false)
146 char SIFixWWMLiveness::ID
= 0;
148 char &llvm::SIFixWWMLivenessID
= SIFixWWMLiveness::ID
;
150 FunctionPass
*llvm::createSIFixWWMLivenessPass() {
151 return new SIFixWWMLiveness();
154 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction
&MF
) {
155 LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF
.getName() << "\n");
156 bool Modified
= false;
158 // This doesn't actually need LiveIntervals, but we can preserve them.
159 LIS
= getAnalysisIfAvailable
<LiveIntervals
>();
161 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
163 TII
= ST
.getInstrInfo();
164 TRI
= &TII
->getRegisterInfo();
165 MRI
= &MF
.getRegInfo();
167 DomTree
= &getAnalysis
<MachineDominatorTree
>();
168 LoopInfo
= &getAnalysis
<MachineLoopInfo
>();
170 // Scan the function to find the WWM sections and the candidate registers for
171 // having liveness modified.
172 for (MachineBasicBlock
&MBB
: MF
) {
173 for (MachineInstr
&MI
: MBB
) {
174 if (MI
.getOpcode() == AMDGPU::EXIT_WWM
)
177 for (MachineOperand
&DefOpnd
: MI
.defs()) {
178 if (DefOpnd
.isReg()) {
179 unsigned Reg
= DefOpnd
.getReg();
180 if (TRI
->isVGPR(*MRI
, Reg
))
188 // Synthesize liveness over WWM sections as required.
189 for (auto ThenDef
: ThenDefs
)
190 Modified
|= processThenDef(ThenDef
);
191 for (auto LoopExitDef
: LoopExitDefs
)
192 Modified
|= processLoopExitDef(LoopExitDef
.first
, LoopExitDef
.second
);
193 for (auto LoopPhiDef
: LoopPhiDefs
)
194 Modified
|= processLoopPhiDef(LoopPhiDef
.first
, LoopPhiDef
.second
);
199 LoopExitDefs
.clear();
205 // During the function scan, process an operand that defines a VGPR.
206 // This categorizes the register and puts it in the appropriate list for later
207 // use when processing a WWM section.
208 void SIFixWWMLiveness::processDef(MachineOperand
&DefOpnd
) {
209 unsigned Reg
= DefOpnd
.getReg();
210 // Get all the defining instructions. For convenience, make Defs[0] the def
212 SmallVector
<const MachineInstr
*, 4> Defs
;
213 Defs
.push_back(DefOpnd
.getParent());
214 for (auto &MI
: MRI
->def_instructions(Reg
)) {
215 if (&MI
!= DefOpnd
.getParent())
218 // Check whether this def dominates all the others. If not, ignore this def.
219 // Either it is going to be processed when the scan encounters its other def
220 // that dominates all defs, or there is no def that dominates all others.
221 // The latter case is an eliminated phi from an if..else..endif or similar,
222 // which must be for uniform control flow so can be ignored.
223 // Because this pass runs shortly after PHIElimination, we assume that any
224 // multi-def register is a lowered phi, and thus has each def in a separate
226 for (unsigned I
= 1; I
!= Defs
.size(); ++I
) {
227 if (!DomTree
->dominates(Defs
[0]->getParent(), Defs
[I
]->getParent()))
230 // Check for the case of an if..endif lowered phi: It has two defs, one
231 // dominates the other, and there is a single use in a successor of the
233 // Later we will spot any WWM code inside
234 // the "then" clause and turn the second def into a partial def so its
235 // liveness goes through the WWM code in the "then" clause.
236 if (Defs
.size() == 2) {
237 auto DomDefBlock
= Defs
[0]->getParent();
238 if (DomDefBlock
->succ_size() == 2 && MRI
->hasOneUse(Reg
)) {
239 auto UseBlock
= MRI
->use_begin(Reg
)->getParent()->getParent();
240 for (auto Succ
: DomDefBlock
->successors()) {
241 if (Succ
== UseBlock
) {
242 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
) << " is a then phi reg\n");
243 ThenDefs
.push_back(&DefOpnd
);
249 // Check for the case of a non-lowered-phi register (single def) that exits
250 // a loop, that is, it has a use that is outside a loop that the def is
251 // inside. We find the outermost loop that the def is inside but a use is
252 // outside. Later we will spot any WWM code inside that loop and then make
253 // the def a partial def so its liveness goes round the loop and through the
255 if (Defs
.size() == 1) {
256 auto Loop
= LoopInfo
->getLoopFor(Defs
[0]->getParent());
259 bool IsLoopExit
= false;
260 for (auto &Use
: MRI
->use_instructions(Reg
)) {
261 auto UseBlock
= Use
.getParent();
262 if (Loop
->contains(UseBlock
))
265 while (auto Parent
= Loop
->getParentLoop()) {
266 if (Parent
->contains(UseBlock
))
273 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
)
274 << " is a loop exit reg with loop header at "
275 << "bb." << Loop
->getHeader()->getNumber() << "\n");
276 LoopExitDefs
.push_back(std::pair
<MachineOperand
*, MachineLoop
*>(
280 // Check for the case of a lowered single-preheader-loop phi, that is, a
281 // multi-def register where the dominating def is in the loop pre-header and
282 // all other defs are in backedges. Later we will spot any WWM code inside
283 // that loop and then make the backedge defs partial defs so the liveness
284 // goes through the WWM code.
285 // Note that we are ignoring multi-preheader loops on the basis that the
286 // structurizer does not allow that for non-uniform loops.
287 // There must be a single use in the loop header.
288 if (!MRI
->hasOneUse(Reg
))
290 auto UseBlock
= MRI
->use_begin(Reg
)->getParent()->getParent();
291 auto Loop
= LoopInfo
->getLoopFor(UseBlock
);
292 if (!Loop
|| Loop
->getHeader() != UseBlock
293 || Loop
->contains(Defs
[0]->getParent())) {
294 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
)
295 << " is multi-def but single use not in loop header\n");
298 for (unsigned I
= 1; I
!= Defs
.size(); ++I
) {
299 if (!Loop
->contains(Defs
[I
]->getParent()))
302 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
)
303 << " is a loop phi reg with loop header at "
304 << "bb." << Loop
->getHeader()->getNumber() << "\n");
305 LoopPhiDefs
.push_back(
306 std::pair
<MachineOperand
*, MachineLoop
*>(&DefOpnd
, Loop
));
309 // Process a then phi def: It has two defs, one dominates the other, and there
310 // is a single use in a successor of the dominant def. Here we spot any WWM
311 // code inside the "then" clause and turn the second def into a partial def so
312 // its liveness goes through the WWM code in the "then" clause.
313 bool SIFixWWMLiveness::processThenDef(MachineOperand
*DefOpnd
) {
314 LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd
->getParent());
315 if (DefOpnd
->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF
) {
316 // Ignore if dominating def is undef.
317 LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
320 unsigned Reg
= DefOpnd
->getReg();
321 // Get the use block, which is the endif block.
322 auto UseBlock
= MRI
->use_instr_begin(Reg
)->getParent();
323 // Check whether there is WWM code inside the then branch. The WWM code must
324 // be dominated by the if but not dominated by the endif.
325 bool ContainsWWM
= false;
326 for (auto WWM
: WWMs
) {
327 if (DomTree
->dominates(DefOpnd
->getParent()->getParent(), WWM
->getParent())
328 && !DomTree
->dominates(UseBlock
, WWM
->getParent())) {
329 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM
);
336 // Get the other def.
337 MachineInstr
*OtherDef
= nullptr;
338 for (auto &MI
: MRI
->def_instructions(Reg
)) {
339 if (&MI
!= DefOpnd
->getParent())
342 // Make it a partial def.
343 OtherDef
->addOperand(MachineOperand::CreateReg(Reg
, false, /*isImp=*/true));
344 LLVM_DEBUG(dbgs() << *OtherDef
);
348 // Process a loop exit def, that is, a register with a single use in a loop
349 // that has a use outside the loop. Here we spot any WWM code inside that loop
350 // and then make the def a partial def so its liveness goes round the loop and
351 // through the WWM code.
352 bool SIFixWWMLiveness::processLoopExitDef(MachineOperand
*DefOpnd
,
354 LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd
->getParent());
355 // Check whether there is WWM code inside the loop.
356 bool ContainsWWM
= false;
357 for (auto WWM
: WWMs
) {
358 if (Loop
->contains(WWM
->getParent())) {
359 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM
);
366 unsigned Reg
= DefOpnd
->getReg();
367 // Add a new implicit_def in loop preheader(s).
368 for (auto Pred
: Loop
->getHeader()->predecessors()) {
369 if (!Loop
->contains(Pred
)) {
370 auto ImplicitDef
= BuildMI(*Pred
, Pred
->getFirstTerminator(), DebugLoc(),
371 TII
->get(TargetOpcode::IMPLICIT_DEF
), Reg
);
372 LLVM_DEBUG(dbgs() << *ImplicitDef
);
376 // Make the original def partial.
377 DefOpnd
->getParent()->addOperand(MachineOperand::CreateReg(
378 Reg
, false, /*isImp=*/true));
379 LLVM_DEBUG(dbgs() << *DefOpnd
->getParent());
383 // Process a loop phi def, that is, a multi-def register where the dominating
384 // def is in the loop pre-header and all other defs are in backedges. Here we
385 // spot any WWM code inside that loop and then make the backedge defs partial
386 // defs so the liveness goes through the WWM code.
387 bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand
*DefOpnd
,
389 LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd
->getParent());
390 // Check whether there is WWM code inside the loop.
391 bool ContainsWWM
= false;
392 for (auto WWM
: WWMs
) {
393 if (Loop
->contains(WWM
->getParent())) {
394 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM
);
401 unsigned Reg
= DefOpnd
->getReg();
402 // Remove kill mark from uses.
403 for (auto &Use
: MRI
->use_operands(Reg
))
404 Use
.setIsKill(false);
405 // Make all defs except the dominating one partial defs.
406 SmallVector
<MachineInstr
*, 4> Defs
;
407 for (auto &Def
: MRI
->def_instructions(Reg
))
408 Defs
.push_back(&Def
);
409 for (auto Def
: Defs
) {
410 if (DefOpnd
->getParent() == Def
)
412 Def
->addOperand(MachineOperand::CreateReg(Reg
, false, /*isImp=*/true));
413 LLVM_DEBUG(dbgs() << *Def
);