1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass lowers the pseudo control flow instructions to real
11 /// machine instructions.
13 /// All control flow is handled using predicated instructions and
14 /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
15 /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
16 /// by writting to the 64-bit EXEC register (each bit corresponds to a
17 /// single vector ALU). Typically, for predicates, a vector ALU will write
18 /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
19 /// Vector ALU) and then the ScalarALU will AND the VCC register with the
20 /// EXEC to update the predicates.
23 /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
24 /// %sgpr0 = SI_IF %vcc
25 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
26 /// %sgpr0 = SI_ELSE %sgpr0
27 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
32 /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask
33 /// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
34 /// S_CBRANCH_EXECZ label0 // This instruction is an optional
35 /// // optimization which allows us to
36 /// // branch if all the bits of
38 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
41 /// %sgpr0 = S_OR_SAVEEXEC_B64 %exec // Restore the exec mask for the Then block
42 /// %exec = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
43 /// S_BRANCH_EXECZ label1 // Use our branch optimization
44 /// // instruction again.
45 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
47 /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
48 //===----------------------------------------------------------------------===//
51 #include "AMDGPUSubtarget.h"
52 #include "SIInstrInfo.h"
53 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
54 #include "llvm/ADT/SmallVector.h"
55 #include "llvm/ADT/StringRef.h"
56 #include "llvm/CodeGen/LiveIntervals.h"
57 #include "llvm/CodeGen/MachineBasicBlock.h"
58 #include "llvm/CodeGen/MachineDominators.h"
59 #include "llvm/CodeGen/MachineFunction.h"
60 #include "llvm/CodeGen/MachineFunctionPass.h"
61 #include "llvm/CodeGen/MachineInstr.h"
62 #include "llvm/CodeGen/MachineInstrBuilder.h"
63 #include "llvm/CodeGen/MachineOperand.h"
64 #include "llvm/CodeGen/MachineRegisterInfo.h"
65 #include "llvm/CodeGen/Passes.h"
66 #include "llvm/CodeGen/SlotIndexes.h"
67 #include "llvm/CodeGen/TargetRegisterInfo.h"
68 #include "llvm/MC/MCRegisterInfo.h"
69 #include "llvm/Pass.h"
75 #define DEBUG_TYPE "si-lower-control-flow"
79 class SILowerControlFlow
: public MachineFunctionPass
{
81 const SIRegisterInfo
*TRI
= nullptr;
82 const SIInstrInfo
*TII
= nullptr;
83 MachineRegisterInfo
*MRI
= nullptr;
84 LiveIntervals
*LIS
= nullptr;
85 MachineDominatorTree
*DT
= nullptr;
86 MachineLoopInfo
*MLI
= nullptr;
89 const TargetRegisterClass
*BoolRC
= nullptr;
95 unsigned Andn2TermOpc
;
97 unsigned OrSaveExecOpc
;
100 void emitIf(MachineInstr
&MI
);
101 void emitElse(MachineInstr
&MI
);
102 void emitIfBreak(MachineInstr
&MI
);
103 void emitLoop(MachineInstr
&MI
);
104 void emitEndCf(MachineInstr
&MI
);
106 void findMaskOperands(MachineInstr
&MI
, unsigned OpNo
,
107 SmallVectorImpl
<MachineOperand
> &Src
) const;
109 void combineMasks(MachineInstr
&MI
);
114 SILowerControlFlow() : MachineFunctionPass(ID
) {}
116 bool runOnMachineFunction(MachineFunction
&MF
) override
;
118 StringRef
getPassName() const override
{
119 return "SI Lower control flow pseudo instructions";
122 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
123 // Should preserve the same set that TwoAddressInstructions does.
124 AU
.addPreserved
<SlotIndexes
>();
125 AU
.addPreserved
<LiveIntervals
>();
126 AU
.addPreservedID(LiveVariablesID
);
127 AU
.addPreservedID(MachineLoopInfoID
);
128 AU
.addPreservedID(MachineDominatorsID
);
130 MachineFunctionPass::getAnalysisUsage(AU
);
134 } // end anonymous namespace
136 char SILowerControlFlow::ID
= 0;
138 INITIALIZE_PASS(SILowerControlFlow
, DEBUG_TYPE
,
139 "SI lower control flow", false, false)
141 static void setImpSCCDefDead(MachineInstr
&MI
, bool IsDead
) {
142 MachineOperand
&ImpDefSCC
= MI
.getOperand(3);
143 assert(ImpDefSCC
.getReg() == AMDGPU::SCC
&& ImpDefSCC
.isDef());
145 ImpDefSCC
.setIsDead(IsDead
);
148 char &llvm::SILowerControlFlowID
= SILowerControlFlow::ID
;
150 static bool isSimpleIf(const MachineInstr
&MI
, const MachineRegisterInfo
*MRI
,
151 const SIInstrInfo
*TII
) {
152 unsigned SaveExecReg
= MI
.getOperand(0).getReg();
153 auto U
= MRI
->use_instr_nodbg_begin(SaveExecReg
);
155 if (U
== MRI
->use_instr_nodbg_end() ||
156 std::next(U
) != MRI
->use_instr_nodbg_end() ||
157 U
->getOpcode() != AMDGPU::SI_END_CF
)
160 // Check for SI_KILL_*_TERMINATOR on path from if to endif.
161 // if there is any such terminator simplififcations are not safe.
162 auto SMBB
= MI
.getParent();
163 auto EMBB
= U
->getParent();
164 DenseSet
<const MachineBasicBlock
*> Visited
;
165 SmallVector
<MachineBasicBlock
*, 4> Worklist(SMBB
->succ_begin(),
168 while (!Worklist
.empty()) {
169 MachineBasicBlock
*MBB
= Worklist
.pop_back_val();
171 if (MBB
== EMBB
|| !Visited
.insert(MBB
).second
)
173 for(auto &Term
: MBB
->terminators())
174 if (TII
->isKillTerminator(Term
.getOpcode()))
177 Worklist
.append(MBB
->succ_begin(), MBB
->succ_end());
183 void SILowerControlFlow::emitIf(MachineInstr
&MI
) {
184 MachineBasicBlock
&MBB
= *MI
.getParent();
185 const DebugLoc
&DL
= MI
.getDebugLoc();
186 MachineBasicBlock::iterator
I(&MI
);
188 MachineOperand
&SaveExec
= MI
.getOperand(0);
189 MachineOperand
&Cond
= MI
.getOperand(1);
190 assert(SaveExec
.getSubReg() == AMDGPU::NoSubRegister
&&
191 Cond
.getSubReg() == AMDGPU::NoSubRegister
);
193 Register SaveExecReg
= SaveExec
.getReg();
195 MachineOperand
&ImpDefSCC
= MI
.getOperand(4);
196 assert(ImpDefSCC
.getReg() == AMDGPU::SCC
&& ImpDefSCC
.isDef());
198 // If there is only one use of save exec register and that use is SI_END_CF,
199 // we can optimize SI_IF by returning the full saved exec mask instead of
200 // just cleared bits.
201 bool SimpleIf
= isSimpleIf(MI
, MRI
, TII
);
203 // Add an implicit def of exec to discourage scheduling VALU after this which
204 // will interfere with trying to form s_and_saveexec_b64 later.
205 Register CopyReg
= SimpleIf
? SaveExecReg
206 : MRI
->createVirtualRegister(BoolRC
);
207 MachineInstr
*CopyExec
=
208 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), CopyReg
)
210 .addReg(Exec
, RegState::ImplicitDefine
);
212 unsigned Tmp
= MRI
->createVirtualRegister(BoolRC
);
215 BuildMI(MBB
, I
, DL
, TII
->get(AndOpc
), Tmp
)
219 setImpSCCDefDead(*And
, true);
221 MachineInstr
*Xor
= nullptr;
224 BuildMI(MBB
, I
, DL
, TII
->get(XorOpc
), SaveExecReg
)
227 setImpSCCDefDead(*Xor
, ImpDefSCC
.isDead());
230 // Use a copy that is a terminator to get correct spill code placement it with
232 MachineInstr
*SetExec
=
233 BuildMI(MBB
, I
, DL
, TII
->get(MovTermOpc
), Exec
)
234 .addReg(Tmp
, RegState::Kill
);
236 // Insert a pseudo terminator to help keep the verifier happy. This will also
237 // be used later when inserting skips.
238 MachineInstr
*NewBr
= BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::SI_MASK_BRANCH
))
239 .add(MI
.getOperand(2));
242 MI
.eraseFromParent();
246 LIS
->InsertMachineInstrInMaps(*CopyExec
);
248 // Replace with and so we don't need to fix the live interval for condition
250 LIS
->ReplaceMachineInstrInMaps(MI
, *And
);
253 LIS
->InsertMachineInstrInMaps(*Xor
);
254 LIS
->InsertMachineInstrInMaps(*SetExec
);
255 LIS
->InsertMachineInstrInMaps(*NewBr
);
257 LIS
->removeAllRegUnitsForPhysReg(Exec
);
258 MI
.eraseFromParent();
260 // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
261 // hard to add another def here but I'm not sure how to correctly update the
263 LIS
->removeInterval(SaveExecReg
);
264 LIS
->createAndComputeVirtRegInterval(SaveExecReg
);
265 LIS
->createAndComputeVirtRegInterval(Tmp
);
267 LIS
->createAndComputeVirtRegInterval(CopyReg
);
270 void SILowerControlFlow::emitElse(MachineInstr
&MI
) {
271 MachineBasicBlock
&MBB
= *MI
.getParent();
272 const DebugLoc
&DL
= MI
.getDebugLoc();
274 Register DstReg
= MI
.getOperand(0).getReg();
275 assert(MI
.getOperand(0).getSubReg() == AMDGPU::NoSubRegister
);
277 bool ExecModified
= MI
.getOperand(3).getImm() != 0;
278 MachineBasicBlock::iterator Start
= MBB
.begin();
280 // We are running before TwoAddressInstructions, and si_else's operands are
281 // tied. In order to correctly tie the registers, split this into a copy of
282 // the src like it does.
283 Register CopyReg
= MRI
->createVirtualRegister(BoolRC
);
284 MachineInstr
*CopyExec
=
285 BuildMI(MBB
, Start
, DL
, TII
->get(AMDGPU::COPY
), CopyReg
)
286 .add(MI
.getOperand(1)); // Saved EXEC
288 // This must be inserted before phis and any spill code inserted before the
290 Register SaveReg
= ExecModified
?
291 MRI
->createVirtualRegister(BoolRC
) : DstReg
;
292 MachineInstr
*OrSaveExec
=
293 BuildMI(MBB
, Start
, DL
, TII
->get(OrSaveExecOpc
), SaveReg
)
296 MachineBasicBlock
*DestBB
= MI
.getOperand(2).getMBB();
298 MachineBasicBlock::iterator
ElsePt(MI
);
302 BuildMI(MBB
, ElsePt
, DL
, TII
->get(AndOpc
), DstReg
)
307 LIS
->InsertMachineInstrInMaps(*And
);
311 BuildMI(MBB
, ElsePt
, DL
, TII
->get(XorTermrOpc
), Exec
)
315 MachineInstr
*Branch
=
316 BuildMI(MBB
, ElsePt
, DL
, TII
->get(AMDGPU::SI_MASK_BRANCH
))
320 MI
.eraseFromParent();
324 LIS
->RemoveMachineInstrFromMaps(MI
);
325 MI
.eraseFromParent();
327 LIS
->InsertMachineInstrInMaps(*CopyExec
);
328 LIS
->InsertMachineInstrInMaps(*OrSaveExec
);
330 LIS
->InsertMachineInstrInMaps(*Xor
);
331 LIS
->InsertMachineInstrInMaps(*Branch
);
333 // src reg is tied to dst reg.
334 LIS
->removeInterval(DstReg
);
335 LIS
->createAndComputeVirtRegInterval(DstReg
);
336 LIS
->createAndComputeVirtRegInterval(CopyReg
);
338 LIS
->createAndComputeVirtRegInterval(SaveReg
);
340 // Let this be recomputed.
341 LIS
->removeAllRegUnitsForPhysReg(Exec
);
344 void SILowerControlFlow::emitIfBreak(MachineInstr
&MI
) {
345 MachineBasicBlock
&MBB
= *MI
.getParent();
346 const DebugLoc
&DL
= MI
.getDebugLoc();
347 auto Dst
= MI
.getOperand(0).getReg();
349 // Skip ANDing with exec if the break condition is already masked by exec
350 // because it is a V_CMP in the same basic block. (We know the break
351 // condition operand was an i1 in IR, so if it is a VALU instruction it must
352 // be one with a carry-out.)
353 bool SkipAnding
= false;
354 if (MI
.getOperand(1).isReg()) {
355 if (MachineInstr
*Def
= MRI
->getUniqueVRegDef(MI
.getOperand(1).getReg())) {
356 SkipAnding
= Def
->getParent() == MI
.getParent()
357 && SIInstrInfo::isVALU(*Def
);
361 // AND the break condition operand with exec, then OR that into the "loop
363 MachineInstr
*And
= nullptr, *Or
= nullptr;
365 And
= BuildMI(MBB
, &MI
, DL
, TII
->get(AndOpc
), Dst
)
367 .add(MI
.getOperand(1));
368 Or
= BuildMI(MBB
, &MI
, DL
, TII
->get(OrOpc
), Dst
)
370 .add(MI
.getOperand(2));
372 Or
= BuildMI(MBB
, &MI
, DL
, TII
->get(OrOpc
), Dst
)
373 .add(MI
.getOperand(1))
374 .add(MI
.getOperand(2));
378 LIS
->InsertMachineInstrInMaps(*And
);
379 LIS
->ReplaceMachineInstrInMaps(MI
, *Or
);
382 MI
.eraseFromParent();
385 void SILowerControlFlow::emitLoop(MachineInstr
&MI
) {
386 MachineBasicBlock
&MBB
= *MI
.getParent();
387 const DebugLoc
&DL
= MI
.getDebugLoc();
389 MachineInstr
*AndN2
=
390 BuildMI(MBB
, &MI
, DL
, TII
->get(Andn2TermOpc
), Exec
)
392 .add(MI
.getOperand(0));
394 MachineInstr
*Branch
=
395 BuildMI(MBB
, &MI
, DL
, TII
->get(AMDGPU::S_CBRANCH_EXECNZ
))
396 .add(MI
.getOperand(1));
399 LIS
->ReplaceMachineInstrInMaps(MI
, *AndN2
);
400 LIS
->InsertMachineInstrInMaps(*Branch
);
403 MI
.eraseFromParent();
406 // Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
407 // is split as necessary to keep the exec modification in its own block.
408 static MachineBasicBlock
*insertInstWithExecFallthrough(MachineBasicBlock
&MBB
,
411 MachineDominatorTree
*DT
,
413 MachineLoopInfo
*MLI
) {
414 assert(NewMI
->isTerminator());
416 MachineBasicBlock::iterator InsPt
= MI
.getIterator();
417 if (std::next(MI
.getIterator()) == MBB
.end()) {
418 // Don't bother with a new block.
419 MBB
.insert(InsPt
, NewMI
);
421 LIS
->ReplaceMachineInstrInMaps(MI
, *NewMI
);
422 MI
.eraseFromParent();
426 MachineFunction
*MF
= MBB
.getParent();
427 MachineBasicBlock
*SplitMBB
428 = MF
->CreateMachineBasicBlock(MBB
.getBasicBlock());
430 MF
->insert(++MachineFunction::iterator(MBB
), SplitMBB
);
432 // FIXME: This is working around a MachineDominatorTree API defect.
434 // If a previous pass split a critical edge, it may not have been applied to
435 // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
436 // the CFG of the given block. Make sure to call a dominator tree method that
437 // will flush this cache before touching the successors of the block.
438 MachineDomTreeNode
*NodeMBB
= nullptr;
440 NodeMBB
= DT
->getNode(&MBB
);
442 // Move everything to the new block, except the end_cf pseudo.
443 SplitMBB
->splice(SplitMBB
->begin(), &MBB
, MBB
.begin(), MBB
.end());
445 SplitMBB
->transferSuccessorsAndUpdatePHIs(&MBB
);
446 MBB
.addSuccessor(SplitMBB
, BranchProbability::getOne());
448 MBB
.insert(MBB
.end(), NewMI
);
451 std::vector
<MachineDomTreeNode
*> Children
= NodeMBB
->getChildren();
452 DT
->addNewBlock(SplitMBB
, &MBB
);
454 // Reparent all of the children to the new block body.
455 auto *SplitNode
= DT
->getNode(SplitMBB
);
456 for (auto *Child
: Children
)
457 DT
->changeImmediateDominator(Child
, SplitNode
);
461 if (MachineLoop
*Loop
= MLI
->getLoopFor(&MBB
))
462 Loop
->addBasicBlockToLoop(SplitMBB
, MLI
->getBase());
466 LIS
->insertMBBInMaps(SplitMBB
);
467 LIS
->ReplaceMachineInstrInMaps(MI
, *NewMI
);
470 // All live-ins are forwarded.
471 for (auto &LiveIn
: MBB
.liveins())
472 SplitMBB
->addLiveIn(LiveIn
);
474 MI
.eraseFromParent();
478 void SILowerControlFlow::emitEndCf(MachineInstr
&MI
) {
479 MachineBasicBlock
&MBB
= *MI
.getParent();
480 const DebugLoc
&DL
= MI
.getDebugLoc();
482 MachineBasicBlock::iterator InsPt
= MBB
.begin();
484 // First, move the instruction. It's unnecessarily difficult to update
485 // LiveIntervals when there's a change in control flow, so move the
486 // instruction before changing the blocks.
487 MBB
.splice(InsPt
, &MBB
, MI
.getIterator());
491 MachineFunction
*MF
= MBB
.getParent();
493 // Create instruction without inserting it yet.
495 = BuildMI(*MF
, DL
, TII
->get(OrTermOpc
), Exec
)
497 .add(MI
.getOperand(0));
498 insertInstWithExecFallthrough(MBB
, MI
, NewMI
, DT
, LIS
, MLI
);
501 // Returns replace operands for a logical operation, either single result
502 // for exec or two operands if source was another equivalent operation.
503 void SILowerControlFlow::findMaskOperands(MachineInstr
&MI
, unsigned OpNo
,
504 SmallVectorImpl
<MachineOperand
> &Src
) const {
505 MachineOperand
&Op
= MI
.getOperand(OpNo
);
506 if (!Op
.isReg() || !Register::isVirtualRegister(Op
.getReg())) {
511 MachineInstr
*Def
= MRI
->getUniqueVRegDef(Op
.getReg());
512 if (!Def
|| Def
->getParent() != MI
.getParent() ||
513 !(Def
->isFullCopy() || (Def
->getOpcode() == MI
.getOpcode())))
516 // Make sure we do not modify exec between def and use.
517 // A copy with implcitly defined exec inserted earlier is an exclusion, it
518 // does not really modify exec.
519 for (auto I
= Def
->getIterator(); I
!= MI
.getIterator(); ++I
)
520 if (I
->modifiesRegister(Exec
, TRI
) &&
521 !(I
->isCopy() && I
->getOperand(0).getReg() != Exec
))
524 for (const auto &SrcOp
: Def
->explicit_operands())
525 if (SrcOp
.isReg() && SrcOp
.isUse() &&
526 (Register::isVirtualRegister(SrcOp
.getReg()) || SrcOp
.getReg() == Exec
))
527 Src
.push_back(SrcOp
);
530 // Search and combine pairs of equivalent instructions, like
531 // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
532 // S_OR_B64 x, (S_OR_B64 x, y) => S_OR_B64 x, y
533 // One of the operands is exec mask.
534 void SILowerControlFlow::combineMasks(MachineInstr
&MI
) {
535 assert(MI
.getNumExplicitOperands() == 3);
536 SmallVector
<MachineOperand
, 4> Ops
;
537 unsigned OpToReplace
= 1;
538 findMaskOperands(MI
, 1, Ops
);
539 if (Ops
.size() == 1) OpToReplace
= 2; // First operand can be exec or its copy
540 findMaskOperands(MI
, 2, Ops
);
541 if (Ops
.size() != 3) return;
543 unsigned UniqueOpndIdx
;
544 if (Ops
[0].isIdenticalTo(Ops
[1])) UniqueOpndIdx
= 2;
545 else if (Ops
[0].isIdenticalTo(Ops
[2])) UniqueOpndIdx
= 1;
546 else if (Ops
[1].isIdenticalTo(Ops
[2])) UniqueOpndIdx
= 1;
549 unsigned Reg
= MI
.getOperand(OpToReplace
).getReg();
550 MI
.RemoveOperand(OpToReplace
);
551 MI
.addOperand(Ops
[UniqueOpndIdx
]);
552 if (MRI
->use_empty(Reg
))
553 MRI
->getUniqueVRegDef(Reg
)->eraseFromParent();
556 bool SILowerControlFlow::runOnMachineFunction(MachineFunction
&MF
) {
557 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
558 TII
= ST
.getInstrInfo();
559 TRI
= &TII
->getRegisterInfo();
561 // This doesn't actually need LiveIntervals, but we can preserve them.
562 LIS
= getAnalysisIfAvailable
<LiveIntervals
>();
563 DT
= getAnalysisIfAvailable
<MachineDominatorTree
>();
564 MLI
= getAnalysisIfAvailable
<MachineLoopInfo
>();
566 MRI
= &MF
.getRegInfo();
567 BoolRC
= TRI
->getBoolRC();
570 AndOpc
= AMDGPU::S_AND_B32
;
571 OrOpc
= AMDGPU::S_OR_B32
;
572 OrTermOpc
= AMDGPU::S_OR_B32_term
;
573 XorOpc
= AMDGPU::S_XOR_B32
;
574 MovTermOpc
= AMDGPU::S_MOV_B32_term
;
575 Andn2TermOpc
= AMDGPU::S_ANDN2_B32_term
;
576 XorTermrOpc
= AMDGPU::S_XOR_B32_term
;
577 OrSaveExecOpc
= AMDGPU::S_OR_SAVEEXEC_B32
;
578 Exec
= AMDGPU::EXEC_LO
;
580 AndOpc
= AMDGPU::S_AND_B64
;
581 OrOpc
= AMDGPU::S_OR_B64
;
582 OrTermOpc
= AMDGPU::S_OR_B64_term
;
583 XorOpc
= AMDGPU::S_XOR_B64
;
584 MovTermOpc
= AMDGPU::S_MOV_B64_term
;
585 Andn2TermOpc
= AMDGPU::S_ANDN2_B64_term
;
586 XorTermrOpc
= AMDGPU::S_XOR_B64_term
;
587 OrSaveExecOpc
= AMDGPU::S_OR_SAVEEXEC_B64
;
591 MachineFunction::iterator NextBB
;
592 for (MachineFunction::iterator BI
= MF
.begin(), BE
= MF
.end();
593 BI
!= BE
; BI
= NextBB
) {
594 NextBB
= std::next(BI
);
595 MachineBasicBlock
*MBB
= &*BI
;
597 MachineBasicBlock::iterator I
, Next
, Last
;
599 for (I
= MBB
->begin(), Last
= MBB
->end(); I
!= MBB
->end(); I
= Next
) {
601 MachineInstr
&MI
= *I
;
603 switch (MI
.getOpcode()) {
608 case AMDGPU::SI_ELSE
:
612 case AMDGPU::SI_IF_BREAK
:
616 case AMDGPU::SI_LOOP
:
620 case AMDGPU::SI_END_CF
: {
621 MachineInstr
*NextMI
= nullptr;
623 if (Next
!= MBB
->end())
629 MBB
= NextMI
->getParent();
630 Next
= NextMI
->getIterator();
634 NextBB
= std::next(MBB
->getIterator());
638 case AMDGPU::S_AND_B64
:
639 case AMDGPU::S_OR_B64
:
640 case AMDGPU::S_AND_B32
:
641 case AMDGPU::S_OR_B32
:
642 // Cleanup bit manipulations on exec mask
652 // Replay newly inserted code to combine masks
653 Next
= (Last
== MBB
->end()) ? MBB
->begin() : Last
;