1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
10 #include "AMDGPUSubtarget.h"
11 #include "R600InstrInfo.h"
12 #include "R600RegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "llvm/ADT/DepthFirstIterator.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachinePostDominators.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MachineValueType.h"
37 #include "llvm/Support/raw_ostream.h"
48 #define DEBUG_TYPE "structcfg"
50 #define DEFAULT_VEC_SLOTS 8
54 //===----------------------------------------------------------------------===//
56 // Statistics for CFGStructurizer.
58 //===----------------------------------------------------------------------===//
60 STATISTIC(numSerialPatternMatch
, "CFGStructurizer number of serial pattern "
62 STATISTIC(numIfPatternMatch
, "CFGStructurizer number of if pattern "
64 STATISTIC(numClonedBlock
, "CFGStructurizer cloned blocks");
65 STATISTIC(numClonedInstr
, "CFGStructurizer cloned instructions");
69 void initializeAMDGPUCFGStructurizerPass(PassRegistry
&);
71 } // end namespace llvm
75 //===----------------------------------------------------------------------===//
77 // Miscellaneous utility for CFGStructurizer.
79 //===----------------------------------------------------------------------===//
81 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
83 #define SHOWNEWBLK(b, msg) \
84 LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
87 #define SHOWBLK_DETAIL(b, msg) \
89 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
94 #define INVALIDSCCNUM -1
96 //===----------------------------------------------------------------------===//
98 // supporting data structure for CFGStructurizer
100 //===----------------------------------------------------------------------===//
102 class BlockInformation
{
104 bool IsRetired
= false;
105 int SccNum
= INVALIDSCCNUM
;
107 BlockInformation() = default;
110 //===----------------------------------------------------------------------===//
114 //===----------------------------------------------------------------------===//
116 class AMDGPUCFGStructurizer
: public MachineFunctionPass
{
118 using MBBVector
= SmallVector
<MachineBasicBlock
*, 32>;
119 using MBBInfoMap
= std::map
<MachineBasicBlock
*, BlockInformation
*>;
120 using LoopLandInfoMap
= std::map
<MachineLoop
*, MachineBasicBlock
*>;
124 SinglePath_InPath
= 1,
125 SinglePath_NotInPath
= 2
130 AMDGPUCFGStructurizer() : MachineFunctionPass(ID
) {
131 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
134 StringRef
getPassName() const override
{
135 return "AMDGPU Control Flow Graph structurizer Pass";
138 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
139 AU
.addRequired
<MachineDominatorTree
>();
140 AU
.addRequired
<MachinePostDominatorTree
>();
141 AU
.addRequired
<MachineLoopInfo
>();
142 MachineFunctionPass::getAnalysisUsage(AU
);
145 /// Perform the CFG structurization
148 /// Perform the CFG preparation
149 /// This step will remove every unconditionnal/dead jump instructions and make
150 /// sure all loops have an exit block
153 bool runOnMachineFunction(MachineFunction
&MF
) override
{
154 TII
= MF
.getSubtarget
<R600Subtarget
>().getInstrInfo();
155 TRI
= &TII
->getRegisterInfo();
156 LLVM_DEBUG(MF
.dump(););
160 MLI
= &getAnalysis
<MachineLoopInfo
>();
161 LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI
););
162 MDT
= &getAnalysis
<MachineDominatorTree
>();
163 LLVM_DEBUG(MDT
->print(dbgs(), (const Module
*)nullptr););
164 PDT
= &getAnalysis
<MachinePostDominatorTree
>();
165 LLVM_DEBUG(PDT
->print(dbgs()););
168 LLVM_DEBUG(MF
.dump(););
173 MachineDominatorTree
*MDT
;
174 MachinePostDominatorTree
*PDT
;
175 MachineLoopInfo
*MLI
;
176 const R600InstrInfo
*TII
= nullptr;
177 const R600RegisterInfo
*TRI
= nullptr;
180 /// Print the ordered Blocks.
181 void printOrderedBlocks() const {
183 for (MBBVector::const_iterator iterBlk
= OrderedBlks
.begin(),
184 iterBlkEnd
= OrderedBlks
.end(); iterBlk
!= iterBlkEnd
; ++iterBlk
, ++i
) {
185 dbgs() << "BB" << (*iterBlk
)->getNumber();
186 dbgs() << "(" << getSCCNum(*iterBlk
) << "," << (*iterBlk
)->size() << ")";
187 if (i
!= 0 && i
% 10 == 0) {
195 static void PrintLoopinfo(const MachineLoopInfo
&LoopInfo
) {
196 for (MachineLoop::iterator iter
= LoopInfo
.begin(),
197 iterEnd
= LoopInfo
.end(); iter
!= iterEnd
; ++iter
) {
198 (*iter
)->print(dbgs(), 0);
203 int getSCCNum(MachineBasicBlock
*MBB
) const;
204 MachineBasicBlock
*getLoopLandInfo(MachineLoop
*LoopRep
) const;
205 bool hasBackEdge(MachineBasicBlock
*MBB
) const;
206 bool isRetiredBlock(MachineBasicBlock
*MBB
) const;
207 bool isActiveLoophead(MachineBasicBlock
*MBB
) const;
208 PathToKind
singlePathTo(MachineBasicBlock
*SrcMBB
, MachineBasicBlock
*DstMBB
,
209 bool AllowSideEntry
= true) const;
210 int countActiveBlock(MBBVector::const_iterator It
,
211 MBBVector::const_iterator E
) const;
212 bool needMigrateBlock(MachineBasicBlock
*MBB
) const;
215 void reversePredicateSetter(MachineBasicBlock::iterator I
,
216 MachineBasicBlock
&MBB
);
217 /// Compute the reversed DFS post order of Blocks
218 void orderBlocks(MachineFunction
*MF
);
220 // Function originally from CFGStructTraits
221 void insertInstrEnd(MachineBasicBlock
*MBB
, int NewOpcode
,
222 const DebugLoc
&DL
= DebugLoc());
223 MachineInstr
*insertInstrBefore(MachineBasicBlock
*MBB
, int NewOpcode
,
224 const DebugLoc
&DL
= DebugLoc());
225 MachineInstr
*insertInstrBefore(MachineBasicBlock::iterator I
, int NewOpcode
);
226 void insertCondBranchBefore(MachineBasicBlock::iterator I
, int NewOpcode
,
228 void insertCondBranchBefore(MachineBasicBlock
*MBB
,
229 MachineBasicBlock::iterator I
, int NewOpcode
,
230 int RegNum
, const DebugLoc
&DL
);
232 static int getBranchNzeroOpcode(int OldOpcode
);
233 static int getBranchZeroOpcode(int OldOpcode
);
234 static int getContinueNzeroOpcode(int OldOpcode
);
235 static int getContinueZeroOpcode(int OldOpcode
);
236 static MachineBasicBlock
*getTrueBranch(MachineInstr
*MI
);
237 static void setTrueBranch(MachineInstr
*MI
, MachineBasicBlock
*MBB
);
238 static MachineBasicBlock
*getFalseBranch(MachineBasicBlock
*MBB
,
240 static bool isCondBranch(MachineInstr
*MI
);
241 static bool isUncondBranch(MachineInstr
*MI
);
242 static DebugLoc
getLastDebugLocInBB(MachineBasicBlock
*MBB
);
243 static MachineInstr
*getNormalBlockBranchInstr(MachineBasicBlock
*MBB
);
245 /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
247 /// BB with backward-edge could have move instructions after the branch
248 /// instruction. Such move instruction "belong to" the loop backward-edge.
249 MachineInstr
*getLoopendBlockBranchInstr(MachineBasicBlock
*MBB
);
251 static MachineInstr
*getReturnInstr(MachineBasicBlock
*MBB
);
252 static bool isReturnBlock(MachineBasicBlock
*MBB
);
253 static void cloneSuccessorList(MachineBasicBlock
*DstMBB
,
254 MachineBasicBlock
*SrcMBB
);
255 static MachineBasicBlock
*clone(MachineBasicBlock
*MBB
);
257 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
258 /// because the AMDGPU instruction is not recognized as terminator fix this
259 /// and retire this routine
260 void replaceInstrUseOfBlockWith(MachineBasicBlock
*SrcMBB
,
261 MachineBasicBlock
*OldMBB
, MachineBasicBlock
*NewBlk
);
263 static void wrapup(MachineBasicBlock
*MBB
);
265 int patternMatch(MachineBasicBlock
*MBB
);
266 int patternMatchGroup(MachineBasicBlock
*MBB
);
267 int serialPatternMatch(MachineBasicBlock
*MBB
);
268 int ifPatternMatch(MachineBasicBlock
*MBB
);
269 int loopendPatternMatch();
270 int mergeLoop(MachineLoop
*LoopRep
);
272 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
273 /// the same loop with LoopLandInfo without explicitly keeping track of
274 /// loopContBlks and loopBreakBlks, this is a method to get the information.
275 bool isSameloopDetachedContbreak(MachineBasicBlock
*Src1MBB
,
276 MachineBasicBlock
*Src2MBB
);
277 int handleJumpintoIf(MachineBasicBlock
*HeadMBB
,
278 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
);
279 int handleJumpintoIfImp(MachineBasicBlock
*HeadMBB
,
280 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
);
281 int improveSimpleJumpintoIf(MachineBasicBlock
*HeadMBB
,
282 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
,
283 MachineBasicBlock
**LandMBBPtr
);
284 void showImproveSimpleJumpintoIf(MachineBasicBlock
*HeadMBB
,
285 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
,
286 MachineBasicBlock
*LandMBB
, bool Detail
= false);
287 int cloneOnSideEntryTo(MachineBasicBlock
*PreMBB
,
288 MachineBasicBlock
*SrcMBB
, MachineBasicBlock
*DstMBB
);
289 void mergeSerialBlock(MachineBasicBlock
*DstMBB
,
290 MachineBasicBlock
*SrcMBB
);
292 void mergeIfthenelseBlock(MachineInstr
*BranchMI
,
293 MachineBasicBlock
*MBB
, MachineBasicBlock
*TrueMBB
,
294 MachineBasicBlock
*FalseMBB
, MachineBasicBlock
*LandMBB
);
295 void mergeLooplandBlock(MachineBasicBlock
*DstMBB
,
296 MachineBasicBlock
*LandMBB
);
297 void mergeLoopbreakBlock(MachineBasicBlock
*ExitingMBB
,
298 MachineBasicBlock
*LandMBB
);
299 void settleLoopcontBlock(MachineBasicBlock
*ContingMBB
,
300 MachineBasicBlock
*ContMBB
);
302 /// normalizeInfiniteLoopExit change
304 /// uncond_br LoopHeader
308 /// cond_br 1 LoopHeader dummyExit
309 /// and return the newly added dummy exit block
310 MachineBasicBlock
*normalizeInfiniteLoopExit(MachineLoop
*LoopRep
);
311 void removeUnconditionalBranch(MachineBasicBlock
*MBB
);
313 /// Remove duplicate branches instructions in a block.
318 /// is transformed to
321 void removeRedundantConditionalBranch(MachineBasicBlock
*MBB
);
323 void addDummyExitBlock(SmallVectorImpl
<MachineBasicBlock
*> &RetMBB
);
324 void removeSuccessor(MachineBasicBlock
*MBB
);
325 MachineBasicBlock
*cloneBlockForPredecessor(MachineBasicBlock
*MBB
,
326 MachineBasicBlock
*PredMBB
);
327 void migrateInstruction(MachineBasicBlock
*SrcMBB
,
328 MachineBasicBlock
*DstMBB
, MachineBasicBlock::iterator I
);
329 void recordSccnum(MachineBasicBlock
*MBB
, int SCCNum
);
330 void retireBlock(MachineBasicBlock
*MBB
);
333 MBBInfoMap BlockInfoMap
;
334 LoopLandInfoMap LLInfoMap
;
335 std::map
<MachineLoop
*, bool> Visited
;
336 MachineFunction
*FuncRep
;
337 SmallVector
<MachineBasicBlock
*, DEFAULT_VEC_SLOTS
> OrderedBlks
;
340 } // end anonymous namespace
342 char AMDGPUCFGStructurizer::ID
= 0;
344 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock
*MBB
) const {
345 MBBInfoMap::const_iterator It
= BlockInfoMap
.find(MBB
);
346 if (It
== BlockInfoMap
.end())
347 return INVALIDSCCNUM
;
348 return (*It
).second
->SccNum
;
351 MachineBasicBlock
*AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop
*LoopRep
)
353 LoopLandInfoMap::const_iterator It
= LLInfoMap
.find(LoopRep
);
354 if (It
== LLInfoMap
.end())
359 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock
*MBB
) const {
360 MachineLoop
*LoopRep
= MLI
->getLoopFor(MBB
);
363 MachineBasicBlock
*LoopHeader
= LoopRep
->getHeader();
364 return MBB
->isSuccessor(LoopHeader
);
367 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock
*MBB
) const {
368 MBBInfoMap::const_iterator It
= BlockInfoMap
.find(MBB
);
369 if (It
== BlockInfoMap
.end())
371 return (*It
).second
->IsRetired
;
374 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock
*MBB
) const {
375 MachineLoop
*LoopRep
= MLI
->getLoopFor(MBB
);
376 while (LoopRep
&& LoopRep
->getHeader() == MBB
) {
377 MachineBasicBlock
*LoopLand
= getLoopLandInfo(LoopRep
);
380 if (!isRetiredBlock(LoopLand
))
382 LoopRep
= LoopRep
->getParentLoop();
387 AMDGPUCFGStructurizer::PathToKind
AMDGPUCFGStructurizer::singlePathTo(
388 MachineBasicBlock
*SrcMBB
, MachineBasicBlock
*DstMBB
,
389 bool AllowSideEntry
) const {
391 if (SrcMBB
== DstMBB
)
392 return SinglePath_InPath
;
393 while (SrcMBB
&& SrcMBB
->succ_size() == 1) {
394 SrcMBB
= *SrcMBB
->succ_begin();
395 if (SrcMBB
== DstMBB
)
396 return SinglePath_InPath
;
397 if (!AllowSideEntry
&& SrcMBB
->pred_size() > 1)
398 return Not_SinglePath
;
400 if (SrcMBB
&& SrcMBB
->succ_size()==0)
401 return SinglePath_NotInPath
;
402 return Not_SinglePath
;
405 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It
,
406 MBBVector::const_iterator E
) const {
409 if (!isRetiredBlock(*It
))
416 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock
*MBB
) const {
417 unsigned BlockSizeThreshold
= 30;
418 unsigned CloneInstrThreshold
= 100;
419 bool MultiplePreds
= MBB
&& (MBB
->pred_size() > 1);
423 unsigned BlkSize
= MBB
->size();
424 return ((BlkSize
> BlockSizeThreshold
) &&
425 (BlkSize
* (MBB
->pred_size() - 1) > CloneInstrThreshold
));
428 void AMDGPUCFGStructurizer::reversePredicateSetter(
429 MachineBasicBlock::iterator I
, MachineBasicBlock
&MBB
) {
430 assert(I
.isValid() && "Expected valid iterator");
434 if (I
->getOpcode() == R600::PRED_X
) {
435 switch (I
->getOperand(2).getImm()) {
436 case R600::PRED_SETE_INT
:
437 I
->getOperand(2).setImm(R600::PRED_SETNE_INT
);
439 case R600::PRED_SETNE_INT
:
440 I
->getOperand(2).setImm(R600::PRED_SETE_INT
);
442 case R600::PRED_SETE
:
443 I
->getOperand(2).setImm(R600::PRED_SETNE
);
445 case R600::PRED_SETNE
:
446 I
->getOperand(2).setImm(R600::PRED_SETE
);
449 llvm_unreachable("PRED_X Opcode invalid!");
455 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock
*MBB
,
456 int NewOpcode
, const DebugLoc
&DL
) {
458 MBB
->getParent()->CreateMachineInstr(TII
->get(NewOpcode
), DL
);
460 //assume the instruction doesn't take any reg operand ...
464 MachineInstr
*AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock
*MBB
,
466 const DebugLoc
&DL
) {
468 MBB
->getParent()->CreateMachineInstr(TII
->get(NewOpcode
), DL
);
469 if (MBB
->begin() != MBB
->end())
470 MBB
->insert(MBB
->begin(), MI
);
477 MachineInstr
*AMDGPUCFGStructurizer::insertInstrBefore(
478 MachineBasicBlock::iterator I
, int NewOpcode
) {
479 MachineInstr
*OldMI
= &(*I
);
480 MachineBasicBlock
*MBB
= OldMI
->getParent();
481 MachineInstr
*NewMBB
=
482 MBB
->getParent()->CreateMachineInstr(TII
->get(NewOpcode
), DebugLoc());
483 MBB
->insert(I
, NewMBB
);
484 //assume the instruction doesn't take any reg operand ...
485 SHOWNEWINSTR(NewMBB
);
489 void AMDGPUCFGStructurizer::insertCondBranchBefore(
490 MachineBasicBlock::iterator I
, int NewOpcode
, const DebugLoc
&DL
) {
491 MachineInstr
*OldMI
= &(*I
);
492 MachineBasicBlock
*MBB
= OldMI
->getParent();
493 MachineFunction
*MF
= MBB
->getParent();
494 MachineInstr
*NewMI
= MF
->CreateMachineInstr(TII
->get(NewOpcode
), DL
);
495 MBB
->insert(I
, NewMI
);
496 MachineInstrBuilder
MIB(*MF
, NewMI
);
497 MIB
.addReg(OldMI
->getOperand(1).getReg(), false);
499 //erase later oldInstr->eraseFromParent();
502 void AMDGPUCFGStructurizer::insertCondBranchBefore(
503 MachineBasicBlock
*blk
, MachineBasicBlock::iterator I
, int NewOpcode
,
504 int RegNum
, const DebugLoc
&DL
) {
505 MachineFunction
*MF
= blk
->getParent();
506 MachineInstr
*NewInstr
= MF
->CreateMachineInstr(TII
->get(NewOpcode
), DL
);
508 blk
->insert(I
, NewInstr
);
509 MachineInstrBuilder(*MF
, NewInstr
).addReg(RegNum
, false);
510 SHOWNEWINSTR(NewInstr
);
513 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode
) {
515 case R600::JUMP_COND
:
516 case R600::JUMP
: return R600::IF_PREDICATE_SET
;
517 case R600::BRANCH_COND_i32
:
518 case R600::BRANCH_COND_f32
: return R600::IF_LOGICALNZ_f32
;
519 default: llvm_unreachable("internal error");
524 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode
) {
526 case R600::JUMP_COND
:
527 case R600::JUMP
: return R600::IF_PREDICATE_SET
;
528 case R600::BRANCH_COND_i32
:
529 case R600::BRANCH_COND_f32
: return R600::IF_LOGICALZ_f32
;
530 default: llvm_unreachable("internal error");
535 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode
) {
537 case R600::JUMP_COND
:
538 case R600::JUMP
: return R600::CONTINUE_LOGICALNZ_i32
;
539 default: llvm_unreachable("internal error");
544 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode
) {
546 case R600::JUMP_COND
:
547 case R600::JUMP
: return R600::CONTINUE_LOGICALZ_i32
;
548 default: llvm_unreachable("internal error");
553 MachineBasicBlock
*AMDGPUCFGStructurizer::getTrueBranch(MachineInstr
*MI
) {
554 return MI
->getOperand(0).getMBB();
557 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr
*MI
,
558 MachineBasicBlock
*MBB
) {
559 MI
->getOperand(0).setMBB(MBB
);
563 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock
*MBB
,
565 assert(MBB
->succ_size() == 2);
566 MachineBasicBlock
*TrueBranch
= getTrueBranch(MI
);
567 MachineBasicBlock::succ_iterator It
= MBB
->succ_begin();
568 MachineBasicBlock::succ_iterator Next
= It
;
570 return (*It
== TrueBranch
) ? *Next
: *It
;
573 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr
*MI
) {
574 switch (MI
->getOpcode()) {
575 case R600::JUMP_COND
:
576 case R600::BRANCH_COND_i32
:
577 case R600::BRANCH_COND_f32
: return true;
584 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr
*MI
) {
585 switch (MI
->getOpcode()) {
595 DebugLoc
AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock
*MBB
) {
596 //get DebugLoc from the first MachineBasicBlock instruction with debug info
598 for (MachineBasicBlock::iterator It
= MBB
->begin(); It
!= MBB
->end();
600 MachineInstr
*instr
= &(*It
);
601 if (instr
->getDebugLoc())
602 DL
= instr
->getDebugLoc();
607 MachineInstr
*AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
608 MachineBasicBlock
*MBB
) {
609 MachineBasicBlock::reverse_iterator It
= MBB
->rbegin();
610 MachineInstr
*MI
= &*It
;
611 if (MI
&& (isCondBranch(MI
) || isUncondBranch(MI
)))
616 MachineInstr
*AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
617 MachineBasicBlock
*MBB
) {
618 for (MachineBasicBlock::reverse_iterator It
= MBB
->rbegin(), E
= MBB
->rend();
621 MachineInstr
*MI
= &*It
;
623 if (isCondBranch(MI
) || isUncondBranch(MI
))
625 else if (!TII
->isMov(MI
->getOpcode()))
632 MachineInstr
*AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock
*MBB
) {
633 MachineBasicBlock::reverse_iterator It
= MBB
->rbegin();
634 if (It
!= MBB
->rend()) {
635 MachineInstr
*instr
= &(*It
);
636 if (instr
->getOpcode() == R600::RETURN
)
642 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock
*MBB
) {
643 MachineInstr
*MI
= getReturnInstr(MBB
);
644 bool IsReturn
= (MBB
->succ_size() == 0);
648 LLVM_DEBUG(dbgs() << "BB" << MBB
->getNumber()
649 << " is return block without RETURN instr\n";);
653 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock
*DstMBB
,
654 MachineBasicBlock
*SrcMBB
) {
655 for (MachineBasicBlock::succ_iterator It
= SrcMBB
->succ_begin(),
656 iterEnd
= SrcMBB
->succ_end(); It
!= iterEnd
; ++It
)
657 DstMBB
->addSuccessor(*It
); // *iter's predecessor is also taken care of
660 MachineBasicBlock
*AMDGPUCFGStructurizer::clone(MachineBasicBlock
*MBB
) {
661 MachineFunction
*Func
= MBB
->getParent();
662 MachineBasicBlock
*NewMBB
= Func
->CreateMachineBasicBlock();
663 Func
->push_back(NewMBB
); //insert to function
664 for (const MachineInstr
&It
: *MBB
)
665 NewMBB
->push_back(Func
->CloneMachineInstr(&It
));
669 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
670 MachineBasicBlock
*SrcMBB
, MachineBasicBlock
*OldMBB
,
671 MachineBasicBlock
*NewBlk
) {
672 MachineInstr
*BranchMI
= getLoopendBlockBranchInstr(SrcMBB
);
673 if (BranchMI
&& isCondBranch(BranchMI
) &&
674 getTrueBranch(BranchMI
) == OldMBB
)
675 setTrueBranch(BranchMI
, NewBlk
);
678 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock
*MBB
) {
679 assert((!MBB
->getParent()->getJumpTableInfo()
680 || MBB
->getParent()->getJumpTableInfo()->isEmpty())
681 && "found a jump table");
683 //collect continue right before endloop
684 SmallVector
<MachineInstr
*, DEFAULT_VEC_SLOTS
> ContInstr
;
685 MachineBasicBlock::iterator Pre
= MBB
->begin();
686 MachineBasicBlock::iterator E
= MBB
->end();
687 MachineBasicBlock::iterator It
= Pre
;
689 if (Pre
->getOpcode() == R600::CONTINUE
690 && It
->getOpcode() == R600::ENDLOOP
)
691 ContInstr
.push_back(&*Pre
);
696 //delete continue right before endloop
697 for (unsigned i
= 0; i
< ContInstr
.size(); ++i
)
698 ContInstr
[i
]->eraseFromParent();
700 // TODO to fix up jump table so later phase won't be confused. if
701 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
702 // there isn't such an interface yet. alternatively, replace all the other
703 // blocks in the jump table with the entryBlk //}
706 bool AMDGPUCFGStructurizer::prepare() {
707 bool Changed
= false;
709 //FIXME: if not reducible flow graph, make it so ???
711 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
713 orderBlocks(FuncRep
);
715 SmallVector
<MachineBasicBlock
*, DEFAULT_VEC_SLOTS
> RetBlks
;
717 // Add an ExitBlk to loop that don't have one
718 for (MachineLoopInfo::iterator It
= MLI
->begin(),
719 E
= MLI
->end(); It
!= E
; ++It
) {
720 MachineLoop
*LoopRep
= (*It
);
721 MBBVector ExitingMBBs
;
722 LoopRep
->getExitingBlocks(ExitingMBBs
);
724 if (ExitingMBBs
.size() == 0) {
725 MachineBasicBlock
* DummyExitBlk
= normalizeInfiniteLoopExit(LoopRep
);
727 RetBlks
.push_back(DummyExitBlk
);
731 // Remove unconditional branch instr.
732 // Add dummy exit block iff there are multiple returns.
733 for (SmallVectorImpl
<MachineBasicBlock
*>::const_iterator
734 It
= OrderedBlks
.begin(), E
= OrderedBlks
.end(); It
!= E
; ++It
) {
735 MachineBasicBlock
*MBB
= *It
;
736 removeUnconditionalBranch(MBB
);
737 removeRedundantConditionalBranch(MBB
);
738 if (isReturnBlock(MBB
)) {
739 RetBlks
.push_back(MBB
);
741 assert(MBB
->succ_size() <= 2);
744 if (RetBlks
.size() >= 2) {
745 addDummyExitBlock(RetBlks
);
752 bool AMDGPUCFGStructurizer::run() {
753 //Assume reducible CFG...
754 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
757 //Use the worse block ordering to test the algorithm.
758 ReverseVector(orderedBlks
);
761 LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
764 MachineBasicBlock
*MBB
;
765 bool MakeProgress
= false;
766 int NumRemainedBlk
= countActiveBlock(OrderedBlks
.begin(),
771 LLVM_DEBUG(dbgs() << "numIter = " << NumIter
772 << ", numRemaintedBlk = " << NumRemainedBlk
<< "\n";);
774 SmallVectorImpl
<MachineBasicBlock
*>::const_iterator It
=
776 SmallVectorImpl
<MachineBasicBlock
*>::const_iterator E
=
779 SmallVectorImpl
<MachineBasicBlock
*>::const_iterator SccBeginIter
=
781 MachineBasicBlock
*SccBeginMBB
= nullptr;
782 int SccNumBlk
= 0; // The number of active blocks, init to a
783 // maximum possible number.
784 int SccNumIter
; // Number of iteration in this SCC.
793 SccNumBlk
= NumRemainedBlk
; // Init to maximum possible number.
794 LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB
);
798 if (!isRetiredBlock(MBB
))
803 bool ContNextScc
= true;
805 || getSCCNum(SccBeginMBB
) != getSCCNum(*It
)) {
806 // Just finish one scc.
808 int sccRemainedNumBlk
= countActiveBlock(SccBeginIter
, It
);
809 if (sccRemainedNumBlk
!= 1 && sccRemainedNumBlk
>= SccNumBlk
) {
810 LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB
)
811 << ", sccNumIter = " << SccNumIter
;
812 dbgs() << "doesn't make any progress\n";);
814 } else if (sccRemainedNumBlk
!= 1 && sccRemainedNumBlk
< SccNumBlk
) {
815 SccNumBlk
= sccRemainedNumBlk
;
818 LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB
)
819 << "sccNumIter = " << SccNumIter
<< '\n';);
821 // Finish the current scc.
825 // Continue on next component in the current scc.
830 SccBeginMBB
= nullptr;
831 } //while, "one iteration" over the function.
833 MachineBasicBlock
*EntryMBB
=
834 *GraphTraits
<MachineFunction
*>::nodes_begin(FuncRep
);
835 if (EntryMBB
->succ_size() == 0) {
837 LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
839 int NewnumRemainedBlk
840 = countActiveBlock(OrderedBlks
.begin(), OrderedBlks
.end());
841 // consider cloned blocks ??
842 if (NewnumRemainedBlk
== 1 || NewnumRemainedBlk
< NumRemainedBlk
) {
844 NumRemainedBlk
= NewnumRemainedBlk
;
846 MakeProgress
= false;
847 LLVM_DEBUG(dbgs() << "No progress\n";);
850 } while (!Finish
&& MakeProgress
);
852 // Misc wrap up to maintain the consistency of the Function representation.
853 wrapup(*GraphTraits
<MachineFunction
*>::nodes_begin(FuncRep
));
855 // Detach retired Block, release memory.
856 for (MBBInfoMap::iterator It
= BlockInfoMap
.begin(), E
= BlockInfoMap
.end();
858 if ((*It
).second
&& (*It
).second
->IsRetired
) {
859 assert(((*It
).first
)->getNumber() != -1);
860 LLVM_DEBUG(dbgs() << "Erase BB" << ((*It
).first
)->getNumber() << "\n";);
861 (*It
).first
->eraseFromParent(); //Remove from the parent Function.
865 BlockInfoMap
.clear();
869 LLVM_DEBUG(FuncRep
->viewCFG());
870 report_fatal_error("IRREDUCIBLE_CFG");
876 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction
*MF
) {
878 MachineBasicBlock
*MBB
;
879 for (scc_iterator
<MachineFunction
*> It
= scc_begin(MF
); !It
.isAtEnd();
881 const std::vector
<MachineBasicBlock
*> &SccNext
= *It
;
882 for (std::vector
<MachineBasicBlock
*>::const_iterator
883 blockIter
= SccNext
.begin(), blockEnd
= SccNext
.end();
884 blockIter
!= blockEnd
; ++blockIter
) {
886 OrderedBlks
.push_back(MBB
);
887 recordSccnum(MBB
, SccNum
);
891 // walk through all the block in func to check for unreachable
892 for (auto *MBB
: nodes(MF
)) {
893 SccNum
= getSCCNum(MBB
);
894 if (SccNum
== INVALIDSCCNUM
)
895 dbgs() << "unreachable block BB" << MBB
->getNumber() << "\n";
899 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock
*MBB
) {
903 LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB
->getNumber() << "\n";);
905 while ((CurMatch
= patternMatchGroup(MBB
)) > 0)
906 NumMatch
+= CurMatch
;
908 LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB
->getNumber()
909 << ", numMatch = " << NumMatch
<< "\n";);
914 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock
*MBB
) {
916 NumMatch
+= loopendPatternMatch();
917 NumMatch
+= serialPatternMatch(MBB
);
918 NumMatch
+= ifPatternMatch(MBB
);
922 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock
*MBB
) {
923 if (MBB
->succ_size() != 1)
926 MachineBasicBlock
*childBlk
= *MBB
->succ_begin();
927 if (childBlk
->pred_size() != 1 || isActiveLoophead(childBlk
))
930 mergeSerialBlock(MBB
, childBlk
);
931 ++numSerialPatternMatch
;
935 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock
*MBB
) {
937 if (MBB
->succ_size() != 2)
939 if (hasBackEdge(MBB
))
941 MachineInstr
*BranchMI
= getNormalBlockBranchInstr(MBB
);
945 assert(isCondBranch(BranchMI
));
948 MachineBasicBlock
*TrueMBB
= getTrueBranch(BranchMI
);
949 NumMatch
+= serialPatternMatch(TrueMBB
);
950 NumMatch
+= ifPatternMatch(TrueMBB
);
951 MachineBasicBlock
*FalseMBB
= getFalseBranch(MBB
, BranchMI
);
952 NumMatch
+= serialPatternMatch(FalseMBB
);
953 NumMatch
+= ifPatternMatch(FalseMBB
);
954 MachineBasicBlock
*LandBlk
;
957 assert (!TrueMBB
->succ_empty() || !FalseMBB
->succ_empty());
959 if (TrueMBB
->succ_size() == 1 && FalseMBB
->succ_size() == 1
960 && *TrueMBB
->succ_begin() == *FalseMBB
->succ_begin()) {
962 LandBlk
= *TrueMBB
->succ_begin();
963 } else if (TrueMBB
->succ_size() == 1 && *TrueMBB
->succ_begin() == FalseMBB
) {
964 // Triangle pattern, false is empty
967 } else if (FalseMBB
->succ_size() == 1
968 && *FalseMBB
->succ_begin() == TrueMBB
) {
969 // Triangle pattern, true is empty
970 // We reverse the predicate to make a triangle, empty false pattern;
971 std::swap(TrueMBB
, FalseMBB
);
972 reversePredicateSetter(MBB
->end(), *MBB
);
975 } else if (FalseMBB
->succ_size() == 1
976 && isSameloopDetachedContbreak(TrueMBB
, FalseMBB
)) {
977 LandBlk
= *FalseMBB
->succ_begin();
978 } else if (TrueMBB
->succ_size() == 1
979 && isSameloopDetachedContbreak(FalseMBB
, TrueMBB
)) {
980 LandBlk
= *TrueMBB
->succ_begin();
982 return NumMatch
+ handleJumpintoIf(MBB
, TrueMBB
, FalseMBB
);
985 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
986 // new BB created for landBlk==NULL may introduce new challenge to the
987 // reduction process.
989 ((TrueMBB
&& TrueMBB
->pred_size() > 1)
990 || (FalseMBB
&& FalseMBB
->pred_size() > 1))) {
991 Cloned
+= improveSimpleJumpintoIf(MBB
, TrueMBB
, FalseMBB
, &LandBlk
);
994 if (TrueMBB
&& TrueMBB
->pred_size() > 1) {
995 TrueMBB
= cloneBlockForPredecessor(TrueMBB
, MBB
);
999 if (FalseMBB
&& FalseMBB
->pred_size() > 1) {
1000 FalseMBB
= cloneBlockForPredecessor(FalseMBB
, MBB
);
1004 mergeIfthenelseBlock(BranchMI
, MBB
, TrueMBB
, FalseMBB
, LandBlk
);
1006 ++numIfPatternMatch
;
1008 numClonedBlock
+= Cloned
;
1010 return 1 + Cloned
+ NumMatch
;
1013 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1014 std::deque
<MachineLoop
*> NestedLoops
;
1015 for (auto &It
: *MLI
)
1016 for (MachineLoop
*ML
: depth_first(It
))
1017 NestedLoops
.push_front(ML
);
1019 if (NestedLoops
.empty())
1022 // Process nested loop outside->inside (we did push_front),
1023 // so "continue" to a outside loop won't be mistaken as "break"
1024 // of the current loop.
1026 for (MachineLoop
*ExaminedLoop
: NestedLoops
) {
1027 if (ExaminedLoop
->getNumBlocks() == 0 || Visited
[ExaminedLoop
])
1029 LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop
->dump(););
1030 int NumBreak
= mergeLoop(ExaminedLoop
);
1038 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop
*LoopRep
) {
1039 MachineBasicBlock
*LoopHeader
= LoopRep
->getHeader();
1040 MBBVector ExitingMBBs
;
1041 LoopRep
->getExitingBlocks(ExitingMBBs
);
1042 assert(!ExitingMBBs
.empty() && "Infinite Loop not supported");
1043 LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs
.size()
1044 << " exiting blocks\n";);
1045 // We assume a single ExitBlk
1047 LoopRep
->getExitBlocks(ExitBlks
);
1048 SmallPtrSet
<MachineBasicBlock
*, 2> ExitBlkSet
;
1049 for (unsigned i
= 0, e
= ExitBlks
.size(); i
< e
; ++i
)
1050 ExitBlkSet
.insert(ExitBlks
[i
]);
1051 assert(ExitBlkSet
.size() == 1);
1052 MachineBasicBlock
*ExitBlk
= *ExitBlks
.begin();
1053 assert(ExitBlk
&& "Loop has several exit block");
1054 MBBVector LatchBlks
;
1055 for (auto *LB
: inverse_children
<MachineBasicBlock
*>(LoopHeader
))
1056 if (LoopRep
->contains(LB
))
1057 LatchBlks
.push_back(LB
);
1059 for (unsigned i
= 0, e
= ExitingMBBs
.size(); i
< e
; ++i
)
1060 mergeLoopbreakBlock(ExitingMBBs
[i
], ExitBlk
);
1061 for (unsigned i
= 0, e
= LatchBlks
.size(); i
< e
; ++i
)
1062 settleLoopcontBlock(LatchBlks
[i
], LoopHeader
);
1066 Match
+= serialPatternMatch(LoopHeader
);
1067 Match
+= ifPatternMatch(LoopHeader
);
1068 } while (Match
> 0);
1069 mergeLooplandBlock(LoopHeader
, ExitBlk
);
1070 MachineLoop
*ParentLoop
= LoopRep
->getParentLoop();
1072 MLI
->changeLoopFor(LoopHeader
, ParentLoop
);
1074 MLI
->removeBlock(LoopHeader
);
1075 Visited
[LoopRep
] = true;
1079 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1080 MachineBasicBlock
*Src1MBB
, MachineBasicBlock
*Src2MBB
) {
1081 if (Src1MBB
->succ_size() == 0) {
1082 MachineLoop
*LoopRep
= MLI
->getLoopFor(Src1MBB
);
1083 if (LoopRep
&& LoopRep
== MLI
->getLoopFor(Src2MBB
)) {
1084 MachineBasicBlock
*&TheEntry
= LLInfoMap
[LoopRep
];
1086 LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1087 << Src1MBB
->getNumber() << " src2 = BB"
1088 << Src2MBB
->getNumber() << "\n";);
1096 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock
*HeadMBB
,
1097 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
) {
1098 int Num
= handleJumpintoIfImp(HeadMBB
, TrueMBB
, FalseMBB
);
1100 LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1102 Num
= handleJumpintoIfImp(HeadMBB
, FalseMBB
, TrueMBB
);
1107 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock
*HeadMBB
,
1108 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
) {
1110 MachineBasicBlock
*DownBlk
;
1112 //trueBlk could be the common post dominator
1115 LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB
->getNumber()
1116 << " true = BB" << TrueMBB
->getNumber()
1117 << ", numSucc=" << TrueMBB
->succ_size() << " false = BB"
1118 << FalseMBB
->getNumber() << "\n";);
1121 LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk
->getNumber(););
1123 if (singlePathTo(FalseMBB
, DownBlk
) == SinglePath_InPath
) {
1124 LLVM_DEBUG(dbgs() << " working\n";);
1126 Num
+= cloneOnSideEntryTo(HeadMBB
, TrueMBB
, DownBlk
);
1127 Num
+= cloneOnSideEntryTo(HeadMBB
, FalseMBB
, DownBlk
);
1129 numClonedBlock
+= Num
;
1130 Num
+= serialPatternMatch(*HeadMBB
->succ_begin());
1131 Num
+= serialPatternMatch(*std::next(HeadMBB
->succ_begin()));
1132 Num
+= ifPatternMatch(HeadMBB
);
1137 LLVM_DEBUG(dbgs() << " not working\n";);
1138 DownBlk
= (DownBlk
->succ_size() == 1) ? (*DownBlk
->succ_begin()) : nullptr;
1139 } // walk down the postDomTree
1145 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1146 MachineBasicBlock
*HeadMBB
, MachineBasicBlock
*TrueMBB
,
1147 MachineBasicBlock
*FalseMBB
, MachineBasicBlock
*LandMBB
, bool Detail
) {
1148 dbgs() << "head = BB" << HeadMBB
->getNumber()
1149 << " size = " << HeadMBB
->size();
1152 HeadMBB
->print(dbgs());
1157 dbgs() << ", true = BB" << TrueMBB
->getNumber() << " size = "
1158 << TrueMBB
->size() << " numPred = " << TrueMBB
->pred_size();
1161 TrueMBB
->print(dbgs());
1166 dbgs() << ", false = BB" << FalseMBB
->getNumber() << " size = "
1167 << FalseMBB
->size() << " numPred = " << FalseMBB
->pred_size();
1170 FalseMBB
->print(dbgs());
1175 dbgs() << ", land = BB" << LandMBB
->getNumber() << " size = "
1176 << LandMBB
->size() << " numPred = " << LandMBB
->pred_size();
1179 LandMBB
->print(dbgs());
1188 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock
*HeadMBB
,
1189 MachineBasicBlock
*TrueMBB
, MachineBasicBlock
*FalseMBB
,
1190 MachineBasicBlock
**LandMBBPtr
) {
1191 bool MigrateTrue
= false;
1192 bool MigrateFalse
= false;
1194 MachineBasicBlock
*LandBlk
= *LandMBBPtr
;
1196 assert((!TrueMBB
|| TrueMBB
->succ_size() <= 1)
1197 && (!FalseMBB
|| FalseMBB
->succ_size() <= 1));
1199 if (TrueMBB
== FalseMBB
)
1202 MigrateTrue
= needMigrateBlock(TrueMBB
);
1203 MigrateFalse
= needMigrateBlock(FalseMBB
);
1205 if (!MigrateTrue
&& !MigrateFalse
)
1208 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1209 // have more than one predecessors. without doing this, its predecessor
1210 // rather than headBlk will have undefined value in initReg.
1211 if (!MigrateTrue
&& TrueMBB
&& TrueMBB
->pred_size() > 1)
1213 if (!MigrateFalse
&& FalseMBB
&& FalseMBB
->pred_size() > 1)
1214 MigrateFalse
= true;
1217 dbgs() << "before improveSimpleJumpintoIf: ";
1218 showImproveSimpleJumpintoIf(HeadMBB
, TrueMBB
, FalseMBB
, LandBlk
, 0););
1220 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1222 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1223 // {initReg = 0; org falseBlk branch }
1224 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1226 // if landBlk->pred_size() > 2, put the about if-else inside
1227 // if (initReg !=2) {...}
1229 // add initReg = initVal to headBlk
1231 const TargetRegisterClass
* I32RC
= TRI
->getCFGStructurizerRegClass(MVT::i32
);
1232 if (!MigrateTrue
|| !MigrateFalse
) {
1233 // XXX: We have an opportunity here to optimize the "branch into if" case
1234 // here. Branch into if looks like this:
1237 // diamond_head branch_from
1239 // diamond_false diamond_true
1243 // The diamond_head block begins the "if" and the diamond_true block
1244 // is the block being "branched into".
1246 // If MigrateTrue is true, then TrueBB is the block being "branched into"
1247 // and if MigrateFalse is true, then FalseBB is the block being
1250 // Here is the pseudo code for how I think the optimization should work:
1251 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1252 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1253 // 3. Move the branch instruction from diamond_head into its own basic
1254 // block (new_block).
1255 // 4. Add an unconditional branch from diamond_head to new_block
1256 // 5. Replace the branch instruction in branch_from with an unconditional
1257 // branch to new_block. If branch_from has multiple predecessors, then
1258 // we need to replace the True/False block in the branch
1259 // instruction instead of replacing it.
1260 // 6. Change the condition of the branch instruction in new_block from
1261 // COND to (COND || GPR0)
1263 // In order insert these MOV instruction, we will need to use the
1264 // RegisterScavenger. Usually liveness stops being tracked during
1265 // the late machine optimization passes, however if we implement
1266 // bool TargetRegisterInfo::requiresRegisterScavenging(
1267 // const MachineFunction &MF)
1268 // and have it return true, liveness will be tracked correctly
1269 // by generic optimization passes. We will also need to make sure that
1270 // all of our target-specific passes that run after regalloc and before
1271 // the CFGStructurizer track liveness and we will need to modify this pass
1272 // to correctly track liveness.
1274 // After the above changes, the new CFG should look like this:
1277 // diamond_head branch_from
1281 // diamond_false diamond_true
1285 // Without this optimization, we are forced to duplicate the diamond_true
1286 // block and we will end up with a CFG like this:
1290 // diamond_head branch_from
1292 // diamond_false diamond_true diamond_true (duplicate)
1294 // done --------------------|
1296 // Duplicating diamond_true can be very costly especially if it has a
1297 // lot of instructions.
1303 bool LandBlkHasOtherPred
= (LandBlk
->pred_size() > 2);
1305 //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1306 MachineBasicBlock::iterator I
= insertInstrBefore(LandBlk
, R600::ENDIF
);
1308 if (LandBlkHasOtherPred
) {
1309 report_fatal_error("Extra register needed to handle CFG");
1310 Register CmpResReg
=
1311 HeadMBB
->getParent()->getRegInfo().createVirtualRegister(I32RC
);
1312 report_fatal_error("Extra compare instruction needed to handle CFG");
1313 insertCondBranchBefore(LandBlk
, I
, R600::IF_PREDICATE_SET
,
1314 CmpResReg
, DebugLoc());
1317 // XXX: We are running this after RA, so creating virtual registers will
1318 // cause an assertion failure in the PostRA scheduling pass.
1320 HeadMBB
->getParent()->getRegInfo().createVirtualRegister(I32RC
);
1321 insertCondBranchBefore(LandBlk
, I
, R600::IF_PREDICATE_SET
, InitReg
,
1325 migrateInstruction(TrueMBB
, LandBlk
, I
);
1326 // need to uncondionally insert the assignment to ensure a path from its
1327 // predecessor rather than headBlk has valid value in initReg if
1329 report_fatal_error("Extra register needed to handle CFG");
1331 insertInstrBefore(I
, R600::ELSE
);
1334 migrateInstruction(FalseMBB
, LandBlk
, I
);
1335 // need to uncondionally insert the assignment to ensure a path from its
1336 // predecessor rather than headBlk has valid value in initReg if
1338 report_fatal_error("Extra register needed to handle CFG");
1341 if (LandBlkHasOtherPred
) {
1343 insertInstrBefore(I
, R600::ENDIF
);
1345 // put initReg = 2 to other predecessors of landBlk
1346 for (MachineBasicBlock::pred_iterator PI
= LandBlk
->pred_begin(),
1347 PE
= LandBlk
->pred_end(); PI
!= PE
; ++PI
) {
1348 MachineBasicBlock
*MBB
= *PI
;
1349 if (MBB
!= TrueMBB
&& MBB
!= FalseMBB
)
1350 report_fatal_error("Extra register needed to handle CFG");
1354 dbgs() << "result from improveSimpleJumpintoIf: ";
1355 showImproveSimpleJumpintoIf(HeadMBB
, TrueMBB
, FalseMBB
, LandBlk
, 0););
1358 *LandMBBPtr
= LandBlk
;
1363 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock
*DstMBB
,
1364 MachineBasicBlock
*SrcMBB
) {
1365 LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB
->getNumber() << " <= BB"
1366 << SrcMBB
->getNumber() << "\n";);
1367 DstMBB
->splice(DstMBB
->end(), SrcMBB
, SrcMBB
->begin(), SrcMBB
->end());
1369 DstMBB
->removeSuccessor(SrcMBB
, true);
1370 cloneSuccessorList(DstMBB
, SrcMBB
);
1372 removeSuccessor(SrcMBB
);
1373 MLI
->removeBlock(SrcMBB
);
1374 retireBlock(SrcMBB
);
1377 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr
*BranchMI
,
1378 MachineBasicBlock
*MBB
, MachineBasicBlock
*TrueMBB
,
1379 MachineBasicBlock
*FalseMBB
, MachineBasicBlock
*LandMBB
) {
1381 LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB
->getNumber(); dbgs() << "{ ";
1382 if (TrueMBB
) { dbgs() << "BB" << TrueMBB
->getNumber(); } dbgs()
1384 dbgs() << "{ "; if (FalseMBB
) {
1385 dbgs() << "BB" << FalseMBB
->getNumber();
1386 } dbgs() << " }\n ";
1387 dbgs() << "landBlock: "; if (!LandMBB
) { dbgs() << "NULL"; } else {
1388 dbgs() << "BB" << LandMBB
->getNumber();
1391 int OldOpcode
= BranchMI
->getOpcode();
1392 DebugLoc BranchDL
= BranchMI
->getDebugLoc();
1402 MachineBasicBlock::iterator I
= BranchMI
;
1403 insertCondBranchBefore(I
, getBranchNzeroOpcode(OldOpcode
),
1407 MBB
->splice(I
, TrueMBB
, TrueMBB
->begin(), TrueMBB
->end());
1408 MBB
->removeSuccessor(TrueMBB
, true);
1409 if (LandMBB
&& TrueMBB
->succ_size()!=0)
1410 TrueMBB
->removeSuccessor(LandMBB
, true);
1411 retireBlock(TrueMBB
);
1412 MLI
->removeBlock(TrueMBB
);
1416 insertInstrBefore(I
, R600::ELSE
);
1417 MBB
->splice(I
, FalseMBB
, FalseMBB
->begin(),
1419 MBB
->removeSuccessor(FalseMBB
, true);
1420 if (LandMBB
&& FalseMBB
->succ_size() != 0)
1421 FalseMBB
->removeSuccessor(LandMBB
, true);
1422 retireBlock(FalseMBB
);
1423 MLI
->removeBlock(FalseMBB
);
1425 insertInstrBefore(I
, R600::ENDIF
);
1427 BranchMI
->eraseFromParent();
1429 if (LandMBB
&& TrueMBB
&& FalseMBB
)
1430 MBB
->addSuccessor(LandMBB
);
1433 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock
*DstBlk
,
1434 MachineBasicBlock
*LandMBB
) {
1435 LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk
->getNumber()
1436 << " land = BB" << LandMBB
->getNumber() << "\n";);
1438 insertInstrBefore(DstBlk
, R600::WHILELOOP
, DebugLoc());
1439 insertInstrEnd(DstBlk
, R600::ENDLOOP
, DebugLoc());
1440 DstBlk
->replaceSuccessor(DstBlk
, LandMBB
);
1443 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock
*ExitingMBB
,
1444 MachineBasicBlock
*LandMBB
) {
1445 LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1446 << ExitingMBB
->getNumber() << " land = BB"
1447 << LandMBB
->getNumber() << "\n";);
1448 MachineInstr
*BranchMI
= getLoopendBlockBranchInstr(ExitingMBB
);
1449 assert(BranchMI
&& isCondBranch(BranchMI
));
1450 DebugLoc DL
= BranchMI
->getDebugLoc();
1451 MachineBasicBlock
*TrueBranch
= getTrueBranch(BranchMI
);
1452 MachineBasicBlock::iterator I
= BranchMI
;
1453 if (TrueBranch
!= LandMBB
)
1454 reversePredicateSetter(I
, *I
->getParent());
1455 insertCondBranchBefore(ExitingMBB
, I
, R600::IF_PREDICATE_SET
, R600::PREDICATE_BIT
, DL
);
1456 insertInstrBefore(I
, R600::BREAK
);
1457 insertInstrBefore(I
, R600::ENDIF
);
1458 //now branchInst can be erase safely
1459 BranchMI
->eraseFromParent();
1460 //now take care of successors, retire blocks
1461 ExitingMBB
->removeSuccessor(LandMBB
, true);
1464 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock
*ContingMBB
,
1465 MachineBasicBlock
*ContMBB
) {
1466 LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1467 << ContingMBB
->getNumber() << ", cont = BB"
1468 << ContMBB
->getNumber() << "\n";);
1470 MachineInstr
*MI
= getLoopendBlockBranchInstr(ContingMBB
);
1472 assert(isCondBranch(MI
));
1473 MachineBasicBlock::iterator I
= MI
;
1474 MachineBasicBlock
*TrueBranch
= getTrueBranch(MI
);
1475 int OldOpcode
= MI
->getOpcode();
1476 DebugLoc DL
= MI
->getDebugLoc();
1478 bool UseContinueLogical
= ((&*ContingMBB
->rbegin()) == MI
);
1480 if (!UseContinueLogical
) {
1482 TrueBranch
== ContMBB
? getBranchNzeroOpcode(OldOpcode
) :
1483 getBranchZeroOpcode(OldOpcode
);
1484 insertCondBranchBefore(I
, BranchOpcode
, DL
);
1485 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1486 insertInstrEnd(ContingMBB
, R600::CONTINUE
, DL
);
1487 insertInstrEnd(ContingMBB
, R600::ENDIF
, DL
);
1490 TrueBranch
== ContMBB
? getContinueNzeroOpcode(OldOpcode
) :
1491 getContinueZeroOpcode(OldOpcode
);
1492 insertCondBranchBefore(I
, BranchOpcode
, DL
);
1495 MI
->eraseFromParent();
1497 // if we've arrived here then we've already erased the branch instruction
1498 // travel back up the basic block to see the last reference of our debug
1499 // location we've just inserted that reference here so it should be
1500 // representative insertEnd to ensure phi-moves, if exist, go before the
1502 insertInstrEnd(ContingMBB
, R600::CONTINUE
,
1503 getLastDebugLocInBB(ContingMBB
));
1507 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock
*PreMBB
,
1508 MachineBasicBlock
*SrcMBB
, MachineBasicBlock
*DstMBB
) {
1510 assert(PreMBB
->isSuccessor(SrcMBB
));
1511 while (SrcMBB
&& SrcMBB
!= DstMBB
) {
1512 assert(SrcMBB
->succ_size() == 1);
1513 if (SrcMBB
->pred_size() > 1) {
1514 SrcMBB
= cloneBlockForPredecessor(SrcMBB
, PreMBB
);
1519 SrcMBB
= *SrcMBB
->succ_begin();
1526 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock
*MBB
,
1527 MachineBasicBlock
*PredMBB
) {
1528 assert(PredMBB
->isSuccessor(MBB
) &&
1529 "succBlk is not a prececessor of curBlk");
1531 MachineBasicBlock
*CloneMBB
= clone(MBB
); //clone instructions
1532 replaceInstrUseOfBlockWith(PredMBB
, MBB
, CloneMBB
);
1533 //srcBlk, oldBlk, newBlk
1535 PredMBB
->replaceSuccessor(MBB
, CloneMBB
);
1537 // add all successor to cloneBlk
1538 cloneSuccessorList(CloneMBB
, MBB
);
1540 numClonedInstr
+= MBB
->size();
1542 LLVM_DEBUG(dbgs() << "Cloned block: "
1543 << "BB" << MBB
->getNumber() << "size " << MBB
->size()
1546 SHOWNEWBLK(CloneMBB
, "result of Cloned block: ");
1551 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock
*SrcMBB
,
1552 MachineBasicBlock
*DstMBB
, MachineBasicBlock::iterator I
) {
1553 MachineBasicBlock::iterator SpliceEnd
;
1554 //look for the input branchinstr, not the AMDGPU branchinstr
1555 MachineInstr
*BranchMI
= getNormalBlockBranchInstr(SrcMBB
);
1557 LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1558 SpliceEnd
= SrcMBB
->end();
1560 LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI
);
1561 SpliceEnd
= BranchMI
;
1563 LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1564 << DstMBB
->size() << "srcSize = " << SrcMBB
->size()
1567 //splice insert before insertPos
1568 DstMBB
->splice(I
, SrcMBB
, SrcMBB
->begin(), SpliceEnd
);
1570 LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1571 << DstMBB
->size() << "srcSize = " << SrcMBB
->size()
1576 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop
* LoopRep
) {
1577 MachineBasicBlock
*LoopHeader
= LoopRep
->getHeader();
1578 MachineBasicBlock
*LoopLatch
= LoopRep
->getLoopLatch();
1580 if (!LoopHeader
|| !LoopLatch
)
1582 MachineInstr
*BranchMI
= getLoopendBlockBranchInstr(LoopLatch
);
1583 // Is LoopRep an infinite loop ?
1584 if (!BranchMI
|| !isUncondBranch(BranchMI
))
1587 MachineBasicBlock
*DummyExitBlk
= FuncRep
->CreateMachineBasicBlock();
1588 FuncRep
->push_back(DummyExitBlk
); //insert to function
1589 SHOWNEWBLK(DummyExitBlk
, "DummyExitBlock to normalize infiniteLoop: ");
1590 LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI
<< "\n";);
1591 LLVMContext
&Ctx
= LoopHeader
->getParent()->getFunction().getContext();
1592 Ctx
.emitError("Extra register needed to handle CFG");
1596 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock
*MBB
) {
1597 MachineInstr
*BranchMI
;
1599 // I saw two unconditional branch in one basic block in example
1600 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1601 while ((BranchMI
= getLoopendBlockBranchInstr(MBB
))
1602 && isUncondBranch(BranchMI
)) {
1603 LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI
);
1604 BranchMI
->eraseFromParent();
1608 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1609 MachineBasicBlock
*MBB
) {
1610 if (MBB
->succ_size() != 2)
1612 MachineBasicBlock
*MBB1
= *MBB
->succ_begin();
1613 MachineBasicBlock
*MBB2
= *std::next(MBB
->succ_begin());
1617 MachineInstr
*BranchMI
= getNormalBlockBranchInstr(MBB
);
1618 assert(BranchMI
&& isCondBranch(BranchMI
));
1619 LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI
);
1620 BranchMI
->eraseFromParent();
1621 SHOWNEWBLK(MBB1
, "Removing redundant successor");
1622 MBB
->removeSuccessor(MBB1
, true);
1625 void AMDGPUCFGStructurizer::addDummyExitBlock(
1626 SmallVectorImpl
<MachineBasicBlock
*> &RetMBB
) {
1627 MachineBasicBlock
*DummyExitBlk
= FuncRep
->CreateMachineBasicBlock();
1628 FuncRep
->push_back(DummyExitBlk
); //insert to function
1629 insertInstrEnd(DummyExitBlk
, R600::RETURN
);
1631 for (SmallVectorImpl
<MachineBasicBlock
*>::iterator It
= RetMBB
.begin(),
1632 E
= RetMBB
.end(); It
!= E
; ++It
) {
1633 MachineBasicBlock
*MBB
= *It
;
1634 MachineInstr
*MI
= getReturnInstr(MBB
);
1636 MI
->eraseFromParent();
1637 MBB
->addSuccessor(DummyExitBlk
);
1638 LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB
->getNumber()
1639 << " successors\n";);
1641 SHOWNEWBLK(DummyExitBlk
, "DummyExitBlock: ");
1644 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock
*MBB
) {
1645 while (MBB
->succ_size())
1646 MBB
->removeSuccessor(*MBB
->succ_begin());
1649 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock
*MBB
,
1651 BlockInformation
*&srcBlkInfo
= BlockInfoMap
[MBB
];
1653 srcBlkInfo
= new BlockInformation();
1654 srcBlkInfo
->SccNum
= SccNum
;
1657 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock
*MBB
) {
1658 LLVM_DEBUG(dbgs() << "Retiring BB" << MBB
->getNumber() << "\n";);
1660 BlockInformation
*&SrcBlkInfo
= BlockInfoMap
[MBB
];
1663 SrcBlkInfo
= new BlockInformation();
1665 SrcBlkInfo
->IsRetired
= true;
1666 assert(MBB
->succ_size() == 0 && MBB
->pred_size() == 0
1667 && "can't retire block yet");
1670 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer
, "amdgpustructurizer",
1671 "AMDGPU CFG Structurizer", false, false)
1672 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
1673 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree
)
1674 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
1675 INITIALIZE_PASS_END(AMDGPUCFGStructurizer
, "amdgpustructurizer",
1676 "AMDGPU CFG Structurizer", false, false)
1678 FunctionPass
*llvm::createAMDGPUCFGStructurizerPass() {
1679 return new AMDGPUCFGStructurizer();