1 //===- StructurizeCFG.cpp -------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/Analysis/InstructionSimplify.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/Analysis/UniformityAnalysis.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/PassManager.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Use.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueHandle.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Scalar.h"
44 #include "llvm/Transforms/Utils.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/Local.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
53 using namespace llvm::PatternMatch
;
55 #define DEBUG_TYPE "structurizecfg"
57 // The name for newly created blocks.
58 const char FlowBlockName
[] = "Flow";
62 static cl::opt
<bool> ForceSkipUniformRegions(
63 "structurizecfg-skip-uniform-regions",
65 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
69 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden
,
70 cl::desc("Allow relaxed uniform region checks"),
73 // Definition of the complex types used in this pass.
75 using BBValuePair
= std::pair
<BasicBlock
*, Value
*>;
77 using RNVector
= SmallVector
<RegionNode
*, 8>;
78 using BBVector
= SmallVector
<BasicBlock
*, 8>;
79 using BranchVector
= SmallVector
<BranchInst
*, 8>;
80 using BBValueVector
= SmallVector
<BBValuePair
, 2>;
82 using BBSet
= SmallPtrSet
<BasicBlock
*, 8>;
84 using PhiMap
= MapVector
<PHINode
*, BBValueVector
>;
85 using BB2BBVecMap
= MapVector
<BasicBlock
*, BBVector
>;
87 using BBPhiMap
= DenseMap
<BasicBlock
*, PhiMap
>;
88 using BBPredicates
= DenseMap
<BasicBlock
*, Value
*>;
89 using PredMap
= DenseMap
<BasicBlock
*, BBPredicates
>;
90 using BB2BBMap
= DenseMap
<BasicBlock
*, BasicBlock
*>;
92 using BranchDebugLocMap
= DenseMap
<BasicBlock
*, DebugLoc
>;
94 // A traits type that is intended to be used in graph algorithms. The graph
95 // traits starts at an entry node, and traverses the RegionNodes that are in
97 struct SubGraphTraits
{
98 using NodeRef
= std::pair
<RegionNode
*, SmallDenseSet
<RegionNode
*> *>;
99 using BaseSuccIterator
= GraphTraits
<RegionNode
*>::ChildIteratorType
;
101 // This wraps a set of Nodes into the iterator, so we know which edges to
103 class WrappedSuccIterator
104 : public iterator_adaptor_base
<
105 WrappedSuccIterator
, BaseSuccIterator
,
106 typename
std::iterator_traits
<BaseSuccIterator
>::iterator_category
,
107 NodeRef
, std::ptrdiff_t, NodeRef
*, NodeRef
> {
108 SmallDenseSet
<RegionNode
*> *Nodes
;
111 WrappedSuccIterator(BaseSuccIterator It
, SmallDenseSet
<RegionNode
*> *Nodes
)
112 : iterator_adaptor_base(It
), Nodes(Nodes
) {}
114 NodeRef
operator*() const { return {*I
, Nodes
}; }
117 static bool filterAll(const NodeRef
&N
) { return true; }
118 static bool filterSet(const NodeRef
&N
) { return N
.second
->count(N
.first
); }
120 using ChildIteratorType
=
121 filter_iterator
<WrappedSuccIterator
, bool (*)(const NodeRef
&)>;
123 static NodeRef
getEntryNode(Region
*R
) {
124 return {GraphTraits
<Region
*>::getEntryNode(R
), nullptr};
127 static NodeRef
getEntryNode(NodeRef N
) { return N
; }
129 static iterator_range
<ChildIteratorType
> children(const NodeRef
&N
) {
130 auto *filter
= N
.second
? &filterSet
: &filterAll
;
131 return make_filter_range(
132 make_range
<WrappedSuccIterator
>(
133 {GraphTraits
<RegionNode
*>::child_begin(N
.first
), N
.second
},
134 {GraphTraits
<RegionNode
*>::child_end(N
.first
), N
.second
}),
138 static ChildIteratorType
child_begin(const NodeRef
&N
) {
139 return children(N
).begin();
142 static ChildIteratorType
child_end(const NodeRef
&N
) {
143 return children(N
).end();
147 /// Finds the nearest common dominator of a set of BasicBlocks.
149 /// For every BB you add to the set, you can specify whether we "remember" the
150 /// block. When you get the common dominator, you can also ask whether it's one
151 /// of the blocks we remembered.
152 class NearestCommonDominator
{
154 BasicBlock
*Result
= nullptr;
155 bool ResultIsRemembered
= false;
157 /// Add BB to the resulting dominator.
158 void addBlock(BasicBlock
*BB
, bool Remember
) {
161 ResultIsRemembered
= Remember
;
165 BasicBlock
*NewResult
= DT
->findNearestCommonDominator(Result
, BB
);
166 if (NewResult
!= Result
)
167 ResultIsRemembered
= false;
169 ResultIsRemembered
|= Remember
;
174 explicit NearestCommonDominator(DominatorTree
*DomTree
) : DT(DomTree
) {}
176 void addBlock(BasicBlock
*BB
) {
177 addBlock(BB
, /* Remember = */ false);
180 void addAndRememberBlock(BasicBlock
*BB
) {
181 addBlock(BB
, /* Remember = */ true);
184 /// Get the nearest common dominator of all the BBs added via addBlock() and
185 /// addAndRememberBlock().
186 BasicBlock
*result() { return Result
; }
188 /// Is the BB returned by getResult() one of the blocks we added to the set
189 /// with addAndRememberBlock()?
190 bool resultIsRememberedBlock() { return ResultIsRemembered
; }
193 /// Transforms the control flow graph on one single entry/exit region
196 /// After the transform all "If"/"Then"/"Else" style control flow looks like
208 /// | | 1 = "If" block, calculates the condition
209 /// 4 | 2 = "Then" subregion, runs if the condition is true
210 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
211 /// |/ 4 = "Else" optional subregion, runs if the condition is false
212 /// 5 5 = "End" block, also rejoins the control flow
215 /// Control flow is expressed as a branch where the true exit goes into the
216 /// "Then"/"Else" region, while the false exit skips the region
217 /// The condition for the optional "Else" region is expressed as a PHI node.
218 /// The incoming values of the PHI node are true for the "If" edge and false
219 /// for the "Then" edge.
221 /// Additionally to that even complicated loops look like this:
228 /// | / 1 = "Entry" block
229 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
230 /// 3 3 = "Flow" block, with back edge to entry block
234 /// The back edge of the "Flow" block is always on the false side of the branch
235 /// while the true side continues the general flow. So the loop condition
236 /// consist of a network of PHI nodes where the true incoming values expresses
237 /// breaks and the false values expresses continue states.
239 class StructurizeCFG
{
241 ConstantInt
*BoolTrue
;
242 ConstantInt
*BoolFalse
;
246 Region
*ParentRegion
;
248 UniformityInfo
*UA
= nullptr;
251 SmallVector
<RegionNode
*, 8> Order
;
255 SmallVector
<WeakVH
, 8> AffectedPhis
;
256 BBPhiMap DeletedPhis
;
257 BB2BBVecMap AddedPhis
;
260 BranchVector Conditions
;
264 BranchVector LoopConds
;
266 BranchDebugLocMap TermDL
;
268 RegionNode
*PrevNode
;
272 void analyzeLoops(RegionNode
*N
);
274 Value
*buildCondition(BranchInst
*Term
, unsigned Idx
, bool Invert
);
276 void gatherPredicates(RegionNode
*N
);
280 void insertConditions(bool Loops
);
282 void simplifyConditions();
284 void delPhiValues(BasicBlock
*From
, BasicBlock
*To
);
286 void addPhiValues(BasicBlock
*From
, BasicBlock
*To
);
288 void findUndefBlocks(BasicBlock
*PHIBlock
,
289 const SmallSet
<BasicBlock
*, 8> &Incomings
,
290 SmallVector
<BasicBlock
*> &UndefBlks
) const;
293 void simplifyAffectedPhis();
295 void killTerminator(BasicBlock
*BB
);
297 void changeExit(RegionNode
*Node
, BasicBlock
*NewExit
,
298 bool IncludeDominator
);
300 BasicBlock
*getNextFlow(BasicBlock
*Dominator
);
302 BasicBlock
*needPrefix(bool NeedEmpty
);
304 BasicBlock
*needPostfix(BasicBlock
*Flow
, bool ExitUseAllowed
);
306 void setPrevNode(BasicBlock
*BB
);
308 bool dominatesPredicates(BasicBlock
*BB
, RegionNode
*Node
);
310 bool isPredictableTrue(RegionNode
*Node
);
312 void wireFlow(bool ExitUseAllowed
, BasicBlock
*LoopEnd
);
314 void handleLoops(bool ExitUseAllowed
, BasicBlock
*LoopEnd
);
321 void init(Region
*R
);
322 bool run(Region
*R
, DominatorTree
*DT
);
323 bool makeUniformRegion(Region
*R
, UniformityInfo
&UA
);
326 class StructurizeCFGLegacyPass
: public RegionPass
{
327 bool SkipUniformRegions
;
332 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_
= false)
333 : RegionPass(ID
), SkipUniformRegions(SkipUniformRegions_
) {
334 if (ForceSkipUniformRegions
.getNumOccurrences())
335 SkipUniformRegions
= ForceSkipUniformRegions
.getValue();
336 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
339 bool runOnRegion(Region
*R
, RGPassManager
&RGM
) override
{
342 if (SkipUniformRegions
) {
344 getAnalysis
<UniformityInfoWrapperPass
>().getUniformityInfo();
345 if (SCFG
.makeUniformRegion(R
, UA
))
348 DominatorTree
*DT
= &getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
349 return SCFG
.run(R
, DT
);
352 StringRef
getPassName() const override
{ return "Structurize control flow"; }
354 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
355 if (SkipUniformRegions
)
356 AU
.addRequired
<UniformityInfoWrapperPass
>();
357 AU
.addRequired
<DominatorTreeWrapperPass
>();
359 AU
.addPreserved
<DominatorTreeWrapperPass
>();
360 RegionPass::getAnalysisUsage(AU
);
364 } // end anonymous namespace
366 char StructurizeCFGLegacyPass::ID
= 0;
368 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass
, "structurizecfg",
369 "Structurize the CFG", false, false)
370 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass
)
371 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
372 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
)
373 INITIALIZE_PASS_END(StructurizeCFGLegacyPass
, "structurizecfg",
374 "Structurize the CFG", false, false)
376 /// Build up the general order of nodes, by performing a topological sort of the
377 /// parent region's nodes, while ensuring that there is no outer cycle node
378 /// between any two inner cycle nodes.
379 void StructurizeCFG::orderNodes() {
380 Order
.resize(std::distance(GraphTraits
<Region
*>::nodes_begin(ParentRegion
),
381 GraphTraits
<Region
*>::nodes_end(ParentRegion
)));
385 SmallDenseSet
<RegionNode
*> Nodes
;
386 auto EntryNode
= SubGraphTraits::getEntryNode(ParentRegion
);
388 // A list of range indices of SCCs in Order, to be processed.
389 SmallVector
<std::pair
<unsigned, unsigned>, 8> WorkList
;
390 unsigned I
= 0, E
= Order
.size();
392 // Run through all the SCCs in the subgraph starting with Entry.
394 scc_iterator
<SubGraphTraits::NodeRef
, SubGraphTraits
>::begin(
396 !SCCI
.isAtEnd(); ++SCCI
) {
399 // An SCC up to the size of 2, can be reduced to an entry (the last node),
400 // and a possible additional node. Therefore, it is already in order, and
401 // there is no need to add it to the work-list.
402 unsigned Size
= SCC
.size();
404 WorkList
.emplace_back(I
, I
+ Size
);
406 // Add the SCC nodes to the Order array.
407 for (const auto &N
: SCC
) {
408 assert(I
< E
&& "SCC size mismatch!");
409 Order
[I
++] = N
.first
;
412 assert(I
== E
&& "SCC size mismatch!");
414 // If there are no more SCCs to order, then we are done.
415 if (WorkList
.empty())
418 std::tie(I
, E
) = WorkList
.pop_back_val();
420 // Collect the set of nodes in the SCC's subgraph. These are only the
421 // possible child nodes; we do not add the entry (last node) otherwise we
422 // will have the same exact SCC all over again.
424 Nodes
.insert(Order
.begin() + I
, Order
.begin() + E
- 1);
426 // Update the entry node.
427 EntryNode
.first
= Order
[E
- 1];
428 EntryNode
.second
= &Nodes
;
432 /// Determine the end of the loops
433 void StructurizeCFG::analyzeLoops(RegionNode
*N
) {
434 if (N
->isSubRegion()) {
435 // Test for exit as back edge
436 BasicBlock
*Exit
= N
->getNodeAs
<Region
>()->getExit();
437 if (Visited
.count(Exit
))
438 Loops
[Exit
] = N
->getEntry();
441 // Test for successors as back edge
442 BasicBlock
*BB
= N
->getNodeAs
<BasicBlock
>();
443 BranchInst
*Term
= cast
<BranchInst
>(BB
->getTerminator());
445 for (BasicBlock
*Succ
: Term
->successors())
446 if (Visited
.count(Succ
))
451 /// Build the condition for one edge
452 Value
*StructurizeCFG::buildCondition(BranchInst
*Term
, unsigned Idx
,
454 Value
*Cond
= Invert
? BoolFalse
: BoolTrue
;
455 if (Term
->isConditional()) {
456 Cond
= Term
->getCondition();
458 if (Idx
!= (unsigned)Invert
)
459 Cond
= invertCondition(Cond
);
464 /// Analyze the predecessors of each block and build up predicates
465 void StructurizeCFG::gatherPredicates(RegionNode
*N
) {
466 RegionInfo
*RI
= ParentRegion
->getRegionInfo();
467 BasicBlock
*BB
= N
->getEntry();
468 BBPredicates
&Pred
= Predicates
[BB
];
469 BBPredicates
&LPred
= LoopPreds
[BB
];
471 for (BasicBlock
*P
: predecessors(BB
)) {
472 // Ignore it if it's a branch from outside into our region entry
473 if (!ParentRegion
->contains(P
))
476 Region
*R
= RI
->getRegionFor(P
);
477 if (R
== ParentRegion
) {
478 // It's a top level block in our region
479 BranchInst
*Term
= cast
<BranchInst
>(P
->getTerminator());
480 for (unsigned i
= 0, e
= Term
->getNumSuccessors(); i
!= e
; ++i
) {
481 BasicBlock
*Succ
= Term
->getSuccessor(i
);
485 if (Visited
.count(P
)) {
486 // Normal forward edge
487 if (Term
->isConditional()) {
488 // Try to treat it like an ELSE block
489 BasicBlock
*Other
= Term
->getSuccessor(!i
);
490 if (Visited
.count(Other
) && !Loops
.count(Other
) &&
491 !Pred
.count(Other
) && !Pred
.count(P
)) {
493 Pred
[Other
] = BoolFalse
;
498 Pred
[P
] = buildCondition(Term
, i
, false);
501 LPred
[P
] = buildCondition(Term
, i
, true);
505 // It's an exit from a sub region
506 while (R
->getParent() != ParentRegion
)
509 // Edge from inside a subregion to its entry, ignore it
513 BasicBlock
*Entry
= R
->getEntry();
514 if (Visited
.count(Entry
))
515 Pred
[Entry
] = BoolTrue
;
517 LPred
[Entry
] = BoolFalse
;
522 /// Collect various loop and predicate infos
523 void StructurizeCFG::collectInfos() {
531 // Reset the visited nodes
534 for (RegionNode
*RN
: reverse(Order
)) {
535 LLVM_DEBUG(dbgs() << "Visiting: "
536 << (RN
->isSubRegion() ? "SubRegion with entry: " : "")
537 << RN
->getEntry()->getName() << "\n");
539 // Analyze all the conditions leading to a node
540 gatherPredicates(RN
);
542 // Remember that we've seen this node
543 Visited
.insert(RN
->getEntry());
545 // Find the last back edges
549 // Reset the collected term debug locations
552 for (BasicBlock
&BB
: *Func
) {
553 if (const DebugLoc
&DL
= BB
.getTerminator()->getDebugLoc())
558 /// Insert the missing branch conditions
559 void StructurizeCFG::insertConditions(bool Loops
) {
560 BranchVector
&Conds
= Loops
? LoopConds
: Conditions
;
561 Value
*Default
= Loops
? BoolTrue
: BoolFalse
;
562 SSAUpdater PhiInserter
;
564 for (BranchInst
*Term
: Conds
) {
565 assert(Term
->isConditional());
567 BasicBlock
*Parent
= Term
->getParent();
568 BasicBlock
*SuccTrue
= Term
->getSuccessor(0);
569 BasicBlock
*SuccFalse
= Term
->getSuccessor(1);
571 PhiInserter
.Initialize(Boolean
, "");
572 PhiInserter
.AddAvailableValue(&Func
->getEntryBlock(), Default
);
573 PhiInserter
.AddAvailableValue(Loops
? SuccFalse
: Parent
, Default
);
575 BBPredicates
&Preds
= Loops
? LoopPreds
[SuccFalse
] : Predicates
[SuccTrue
];
577 NearestCommonDominator
Dominator(DT
);
578 Dominator
.addBlock(Parent
);
580 Value
*ParentValue
= nullptr;
581 for (std::pair
<BasicBlock
*, Value
*> BBAndPred
: Preds
) {
582 BasicBlock
*BB
= BBAndPred
.first
;
583 Value
*Pred
= BBAndPred
.second
;
589 PhiInserter
.AddAvailableValue(BB
, Pred
);
590 Dominator
.addAndRememberBlock(BB
);
594 Term
->setCondition(ParentValue
);
596 if (!Dominator
.resultIsRememberedBlock())
597 PhiInserter
.AddAvailableValue(Dominator
.result(), Default
);
599 Term
->setCondition(PhiInserter
.GetValueInMiddleOfBlock(Parent
));
604 /// Simplify any inverted conditions that were built by buildConditions.
605 void StructurizeCFG::simplifyConditions() {
606 SmallVector
<Instruction
*> InstToErase
;
607 for (auto &I
: concat
<PredMap::value_type
>(Predicates
, LoopPreds
)) {
608 auto &Preds
= I
.second
;
609 for (auto &J
: Preds
) {
610 auto &Cond
= J
.second
;
611 Instruction
*Inverted
;
612 if (match(Cond
, m_Not(m_OneUse(m_Instruction(Inverted
)))) &&
613 !Cond
->use_empty()) {
614 if (auto *InvertedCmp
= dyn_cast
<CmpInst
>(Inverted
)) {
615 InvertedCmp
->setPredicate(InvertedCmp
->getInversePredicate());
616 Cond
->replaceAllUsesWith(InvertedCmp
);
617 InstToErase
.push_back(cast
<Instruction
>(Cond
));
622 for (auto *I
: InstToErase
)
623 I
->eraseFromParent();
626 /// Remove all PHI values coming from "From" into "To" and remember
627 /// them in DeletedPhis
628 void StructurizeCFG::delPhiValues(BasicBlock
*From
, BasicBlock
*To
) {
629 PhiMap
&Map
= DeletedPhis
[To
];
630 for (PHINode
&Phi
: To
->phis()) {
631 bool Recorded
= false;
632 while (Phi
.getBasicBlockIndex(From
) != -1) {
633 Value
*Deleted
= Phi
.removeIncomingValue(From
, false);
634 Map
[&Phi
].push_back(std::make_pair(From
, Deleted
));
636 AffectedPhis
.push_back(&Phi
);
643 /// Add a dummy PHI value as soon as we knew the new predecessor
644 void StructurizeCFG::addPhiValues(BasicBlock
*From
, BasicBlock
*To
) {
645 for (PHINode
&Phi
: To
->phis()) {
646 Value
*Undef
= UndefValue::get(Phi
.getType());
647 Phi
.addIncoming(Undef
, From
);
649 AddedPhis
[To
].push_back(From
);
652 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values
653 /// from predecessors \p Incomings, we have a chance to mark the available value
654 /// from some blocks as undefined. The function will find out all such blocks
655 /// and return in \p UndefBlks.
656 void StructurizeCFG::findUndefBlocks(
657 BasicBlock
*PHIBlock
, const SmallSet
<BasicBlock
*, 8> &Incomings
,
658 SmallVector
<BasicBlock
*> &UndefBlks
) const {
659 // We may get a post-structured CFG like below:
675 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
676 // of B before structurization. F1/F2/F3 are flow blocks inserted during
677 // structurization process. Block N is not a predecessor of B before
678 // structurization, but are placed between the predecessors(P1/P2) of B after
679 // structurization. This usually means that threads went to N never take the
680 // path N->F2->F3->B. For example, the threads take the branch F1->N may
681 // always take the branch F2->P2. So, when we are reconstructing a PHI
682 // originally in B, we can safely say the incoming value from N is undefined.
683 SmallSet
<BasicBlock
*, 8> VisitedBlock
;
684 SmallVector
<BasicBlock
*, 8> Stack
;
685 if (PHIBlock
== ParentRegion
->getExit()) {
686 for (auto P
: predecessors(PHIBlock
)) {
687 if (ParentRegion
->contains(P
))
691 append_range(Stack
, predecessors(PHIBlock
));
694 // Do a backward traversal over the CFG, and stop further searching if
695 // the block is not a Flow. If a block is neither flow block nor the
696 // incoming predecessor, then the incoming value from the block is
697 // undefined value for the PHI being reconstructed.
698 while (!Stack
.empty()) {
699 BasicBlock
*Current
= Stack
.pop_back_val();
700 if (VisitedBlock
.contains(Current
))
703 VisitedBlock
.insert(Current
);
704 if (FlowSet
.contains(Current
)) {
705 for (auto P
: predecessors(Current
))
707 } else if (!Incomings
.contains(Current
)) {
708 UndefBlks
.push_back(Current
);
713 /// Add the real PHI value as soon as everything is set up
714 void StructurizeCFG::setPhiValues() {
715 SmallVector
<PHINode
*, 8> InsertedPhis
;
716 SSAUpdater
Updater(&InsertedPhis
);
717 for (const auto &AddedPhi
: AddedPhis
) {
718 BasicBlock
*To
= AddedPhi
.first
;
719 const BBVector
&From
= AddedPhi
.second
;
721 if (!DeletedPhis
.count(To
))
724 SmallVector
<BasicBlock
*> UndefBlks
;
725 bool CachedUndefs
= false;
726 PhiMap
&Map
= DeletedPhis
[To
];
727 for (const auto &PI
: Map
) {
728 PHINode
*Phi
= PI
.first
;
729 Value
*Undef
= UndefValue::get(Phi
->getType());
730 Updater
.Initialize(Phi
->getType(), "");
731 Updater
.AddAvailableValue(&Func
->getEntryBlock(), Undef
);
732 Updater
.AddAvailableValue(To
, Undef
);
734 SmallSet
<BasicBlock
*, 8> Incomings
;
735 SmallVector
<BasicBlock
*> ConstantPreds
;
736 for (const auto &VI
: PI
.second
) {
737 Incomings
.insert(VI
.first
);
738 Updater
.AddAvailableValue(VI
.first
, VI
.second
);
739 if (isa
<Constant
>(VI
.second
))
740 ConstantPreds
.push_back(VI
.first
);
744 findUndefBlocks(To
, Incomings
, UndefBlks
);
748 for (auto UB
: UndefBlks
) {
749 // If this undef block is dominated by any predecessor(before
750 // structurization) of reconstructed PHI with constant incoming value,
751 // don't mark the available value as undefined. Setting undef to such
752 // block will stop us from getting optimal phi insertion.
753 if (any_of(ConstantPreds
,
754 [&](BasicBlock
*CP
) { return DT
->dominates(CP
, UB
); }))
756 Updater
.AddAvailableValue(UB
, Undef
);
759 for (BasicBlock
*FI
: From
)
760 Phi
->setIncomingValueForBlock(FI
, Updater
.GetValueAtEndOfBlock(FI
));
761 AffectedPhis
.push_back(Phi
);
764 DeletedPhis
.erase(To
);
766 assert(DeletedPhis
.empty());
768 AffectedPhis
.append(InsertedPhis
.begin(), InsertedPhis
.end());
771 void StructurizeCFG::simplifyAffectedPhis() {
775 SimplifyQuery
Q(Func
->getParent()->getDataLayout());
777 // Setting CanUseUndef to true might extend value liveness, set it to false
778 // to achieve better register pressure.
779 Q
.CanUseUndef
= false;
780 for (WeakVH VH
: AffectedPhis
) {
781 if (auto Phi
= dyn_cast_or_null
<PHINode
>(VH
)) {
782 if (auto NewValue
= simplifyInstruction(Phi
, Q
)) {
783 Phi
->replaceAllUsesWith(NewValue
);
784 Phi
->eraseFromParent();
792 /// Remove phi values from all successors and then remove the terminator.
793 void StructurizeCFG::killTerminator(BasicBlock
*BB
) {
794 Instruction
*Term
= BB
->getTerminator();
798 for (BasicBlock
*Succ
: successors(BB
))
799 delPhiValues(BB
, Succ
);
801 Term
->eraseFromParent();
804 /// Let node exit(s) point to NewExit
805 void StructurizeCFG::changeExit(RegionNode
*Node
, BasicBlock
*NewExit
,
806 bool IncludeDominator
) {
807 if (Node
->isSubRegion()) {
808 Region
*SubRegion
= Node
->getNodeAs
<Region
>();
809 BasicBlock
*OldExit
= SubRegion
->getExit();
810 BasicBlock
*Dominator
= nullptr;
812 // Find all the edges from the sub region to the exit.
813 // We use make_early_inc_range here because we modify BB's terminator.
814 for (BasicBlock
*BB
: llvm::make_early_inc_range(predecessors(OldExit
))) {
815 if (!SubRegion
->contains(BB
))
818 // Modify the edges to point to the new exit
819 delPhiValues(BB
, OldExit
);
820 BB
->getTerminator()->replaceUsesOfWith(OldExit
, NewExit
);
821 addPhiValues(BB
, NewExit
);
823 // Find the new dominator (if requested)
824 if (IncludeDominator
) {
828 Dominator
= DT
->findNearestCommonDominator(Dominator
, BB
);
832 // Change the dominator (if requested)
834 DT
->changeImmediateDominator(NewExit
, Dominator
);
836 // Update the region info
837 SubRegion
->replaceExit(NewExit
);
839 BasicBlock
*BB
= Node
->getNodeAs
<BasicBlock
>();
841 BranchInst
*Br
= BranchInst::Create(NewExit
, BB
);
842 Br
->setDebugLoc(TermDL
[BB
]);
843 addPhiValues(BB
, NewExit
);
844 if (IncludeDominator
)
845 DT
->changeImmediateDominator(NewExit
, BB
);
849 /// Create a new flow node and update dominator tree and region info
850 BasicBlock
*StructurizeCFG::getNextFlow(BasicBlock
*Dominator
) {
851 LLVMContext
&Context
= Func
->getContext();
852 BasicBlock
*Insert
= Order
.empty() ? ParentRegion
->getExit() :
853 Order
.back()->getEntry();
854 BasicBlock
*Flow
= BasicBlock::Create(Context
, FlowBlockName
,
856 FlowSet
.insert(Flow
);
858 // use a temporary variable to avoid a use-after-free if the map's storage is
860 DebugLoc DL
= TermDL
[Dominator
];
861 TermDL
[Flow
] = std::move(DL
);
863 DT
->addNewBlock(Flow
, Dominator
);
864 ParentRegion
->getRegionInfo()->setRegionFor(Flow
, ParentRegion
);
868 /// Create a new or reuse the previous node as flow node
869 BasicBlock
*StructurizeCFG::needPrefix(bool NeedEmpty
) {
870 BasicBlock
*Entry
= PrevNode
->getEntry();
872 if (!PrevNode
->isSubRegion()) {
873 killTerminator(Entry
);
874 if (!NeedEmpty
|| Entry
->getFirstInsertionPt() == Entry
->end())
878 // create a new flow node
879 BasicBlock
*Flow
= getNextFlow(Entry
);
882 changeExit(PrevNode
, Flow
, true);
883 PrevNode
= ParentRegion
->getBBNode(Flow
);
887 /// Returns the region exit if possible, otherwise just a new flow node
888 BasicBlock
*StructurizeCFG::needPostfix(BasicBlock
*Flow
,
889 bool ExitUseAllowed
) {
890 if (!Order
.empty() || !ExitUseAllowed
)
891 return getNextFlow(Flow
);
893 BasicBlock
*Exit
= ParentRegion
->getExit();
894 DT
->changeImmediateDominator(Exit
, Flow
);
895 addPhiValues(Flow
, Exit
);
899 /// Set the previous node
900 void StructurizeCFG::setPrevNode(BasicBlock
*BB
) {
901 PrevNode
= ParentRegion
->contains(BB
) ? ParentRegion
->getBBNode(BB
)
905 /// Does BB dominate all the predicates of Node?
906 bool StructurizeCFG::dominatesPredicates(BasicBlock
*BB
, RegionNode
*Node
) {
907 BBPredicates
&Preds
= Predicates
[Node
->getEntry()];
908 return llvm::all_of(Preds
, [&](std::pair
<BasicBlock
*, Value
*> Pred
) {
909 return DT
->dominates(BB
, Pred
.first
);
913 /// Can we predict that this node will always be called?
914 bool StructurizeCFG::isPredictableTrue(RegionNode
*Node
) {
915 BBPredicates
&Preds
= Predicates
[Node
->getEntry()];
916 bool Dominated
= false;
918 // Regionentry is always true
922 for (std::pair
<BasicBlock
*, Value
*> Pred
: Preds
) {
923 BasicBlock
*BB
= Pred
.first
;
924 Value
*V
= Pred
.second
;
929 if (!Dominated
&& DT
->dominates(BB
, PrevNode
->getEntry()))
933 // TODO: The dominator check is too strict
937 /// Take one node from the order vector and wire it up
938 void StructurizeCFG::wireFlow(bool ExitUseAllowed
,
939 BasicBlock
*LoopEnd
) {
940 RegionNode
*Node
= Order
.pop_back_val();
941 Visited
.insert(Node
->getEntry());
943 if (isPredictableTrue(Node
)) {
944 // Just a linear flow
946 changeExit(PrevNode
, Node
->getEntry(), true);
950 // Insert extra prefix node (or reuse last one)
951 BasicBlock
*Flow
= needPrefix(false);
953 // Insert extra postfix node (or use exit instead)
954 BasicBlock
*Entry
= Node
->getEntry();
955 BasicBlock
*Next
= needPostfix(Flow
, ExitUseAllowed
);
957 // let it point to entry and next block
958 BranchInst
*Br
= BranchInst::Create(Entry
, Next
, BoolPoison
, Flow
);
959 Br
->setDebugLoc(TermDL
[Flow
]);
960 Conditions
.push_back(Br
);
961 addPhiValues(Flow
, Entry
);
962 DT
->changeImmediateDominator(Entry
, Flow
);
965 while (!Order
.empty() && !Visited
.count(LoopEnd
) &&
966 dominatesPredicates(Entry
, Order
.back())) {
967 handleLoops(false, LoopEnd
);
970 changeExit(PrevNode
, Next
, false);
975 void StructurizeCFG::handleLoops(bool ExitUseAllowed
,
976 BasicBlock
*LoopEnd
) {
977 RegionNode
*Node
= Order
.back();
978 BasicBlock
*LoopStart
= Node
->getEntry();
980 if (!Loops
.count(LoopStart
)) {
981 wireFlow(ExitUseAllowed
, LoopEnd
);
985 if (!isPredictableTrue(Node
))
986 LoopStart
= needPrefix(true);
988 LoopEnd
= Loops
[Node
->getEntry()];
989 wireFlow(false, LoopEnd
);
990 while (!Visited
.count(LoopEnd
)) {
991 handleLoops(false, LoopEnd
);
994 assert(LoopStart
!= &LoopStart
->getParent()->getEntryBlock());
996 // Create an extra loop end node
997 LoopEnd
= needPrefix(false);
998 BasicBlock
*Next
= needPostfix(LoopEnd
, ExitUseAllowed
);
999 BranchInst
*Br
= BranchInst::Create(Next
, LoopStart
, BoolPoison
, LoopEnd
);
1000 Br
->setDebugLoc(TermDL
[LoopEnd
]);
1001 LoopConds
.push_back(Br
);
1002 addPhiValues(LoopEnd
, LoopStart
);
1006 /// After this function control flow looks like it should be, but
1007 /// branches and PHI nodes only have undefined conditions.
1008 void StructurizeCFG::createFlow() {
1009 BasicBlock
*Exit
= ParentRegion
->getExit();
1010 bool EntryDominatesExit
= DT
->dominates(ParentRegion
->getEntry(), Exit
);
1012 AffectedPhis
.clear();
1013 DeletedPhis
.clear();
1021 while (!Order
.empty()) {
1022 handleLoops(EntryDominatesExit
, nullptr);
1026 changeExit(PrevNode
, Exit
, EntryDominatesExit
);
1028 assert(EntryDominatesExit
);
1031 /// Handle a rare case where the disintegrated nodes instructions
1032 /// no longer dominate all their uses. Not sure if this is really necessary
1033 void StructurizeCFG::rebuildSSA() {
1035 for (BasicBlock
*BB
: ParentRegion
->blocks())
1036 for (Instruction
&I
: *BB
) {
1037 bool Initialized
= false;
1038 // We may modify the use list as we iterate over it, so we use
1039 // make_early_inc_range.
1040 for (Use
&U
: llvm::make_early_inc_range(I
.uses())) {
1041 Instruction
*User
= cast
<Instruction
>(U
.getUser());
1042 if (User
->getParent() == BB
) {
1044 } else if (PHINode
*UserPN
= dyn_cast
<PHINode
>(User
)) {
1045 if (UserPN
->getIncomingBlock(U
) == BB
)
1049 if (DT
->dominates(&I
, User
))
1053 Value
*Undef
= UndefValue::get(I
.getType());
1054 Updater
.Initialize(I
.getType(), "");
1055 Updater
.AddAvailableValue(&Func
->getEntryBlock(), Undef
);
1056 Updater
.AddAvailableValue(BB
, &I
);
1059 Updater
.RewriteUseAfterInsertions(U
);
1064 static bool hasOnlyUniformBranches(Region
*R
, unsigned UniformMDKindID
,
1065 const UniformityInfo
&UA
) {
1066 // Bool for if all sub-regions are uniform.
1067 bool SubRegionsAreUniform
= true;
1068 // Count of how many direct children are conditional.
1069 unsigned ConditionalDirectChildren
= 0;
1071 for (auto *E
: R
->elements()) {
1072 if (!E
->isSubRegion()) {
1073 auto Br
= dyn_cast
<BranchInst
>(E
->getEntry()->getTerminator());
1074 if (!Br
|| !Br
->isConditional())
1077 if (!UA
.isUniform(Br
))
1080 // One of our direct children is conditional.
1081 ConditionalDirectChildren
++;
1083 LLVM_DEBUG(dbgs() << "BB: " << Br
->getParent()->getName()
1084 << " has uniform terminator\n");
1086 // Explicitly refuse to treat regions as uniform if they have non-uniform
1087 // subregions. We cannot rely on UniformityAnalysis for branches in
1088 // subregions because those branches may have been removed and re-created,
1089 // so we look for our metadata instead.
1091 // Warning: It would be nice to treat regions as uniform based only on
1092 // their direct child basic blocks' terminators, regardless of whether
1093 // subregions are uniform or not. However, this requires a very careful
1094 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1095 for (auto *BB
: E
->getNodeAs
<Region
>()->blocks()) {
1096 auto Br
= dyn_cast
<BranchInst
>(BB
->getTerminator());
1097 if (!Br
|| !Br
->isConditional())
1100 if (!Br
->getMetadata(UniformMDKindID
)) {
1101 // Early exit if we cannot have relaxed uniform regions.
1102 if (!RelaxedUniformRegions
)
1105 SubRegionsAreUniform
= false;
1112 // Our region is uniform if:
1113 // 1. All conditional branches that are direct children are uniform (checked
1116 // a. All sub-regions are uniform.
1117 // b. There is one or less conditional branches among the direct children.
1118 return SubRegionsAreUniform
|| (ConditionalDirectChildren
<= 1);
1121 void StructurizeCFG::init(Region
*R
) {
1122 LLVMContext
&Context
= R
->getEntry()->getContext();
1124 Boolean
= Type::getInt1Ty(Context
);
1125 BoolTrue
= ConstantInt::getTrue(Context
);
1126 BoolFalse
= ConstantInt::getFalse(Context
);
1127 BoolPoison
= PoisonValue::get(Boolean
);
1132 bool StructurizeCFG::makeUniformRegion(Region
*R
, UniformityInfo
&UA
) {
1133 if (R
->isTopLevelRegion())
1138 // TODO: We could probably be smarter here with how we handle sub-regions.
1139 // We currently rely on the fact that metadata is set by earlier invocations
1140 // of the pass on sub-regions, and that this metadata doesn't get lost --
1141 // but we shouldn't rely on metadata for correctness!
1142 unsigned UniformMDKindID
=
1143 R
->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1145 if (hasOnlyUniformBranches(R
, UniformMDKindID
, UA
)) {
1146 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1149 // Mark all direct child block terminators as having been treated as
1150 // uniform. To account for a possible future in which non-uniform
1151 // sub-regions are treated more cleverly, indirect children are not
1152 // marked as uniform.
1153 MDNode
*MD
= MDNode::get(R
->getEntry()->getParent()->getContext(), {});
1154 for (RegionNode
*E
: R
->elements()) {
1155 if (E
->isSubRegion())
1158 if (Instruction
*Term
= E
->getEntry()->getTerminator())
1159 Term
->setMetadata(UniformMDKindID
, MD
);
1167 /// Run the transformation for each region found
1168 bool StructurizeCFG::run(Region
*R
, DominatorTree
*DT
) {
1169 if (R
->isTopLevelRegion())
1174 Func
= R
->getEntry()->getParent();
1175 assert(hasOnlySimpleTerminator(*Func
) && "Unsupported block terminator.");
1182 insertConditions(false);
1183 insertConditions(true);
1185 simplifyConditions();
1186 simplifyAffectedPhis();
1192 DeletedPhis
.clear();
1205 Pass
*llvm::createStructurizeCFGPass(bool SkipUniformRegions
) {
1206 return new StructurizeCFGLegacyPass(SkipUniformRegions
);
1209 static void addRegionIntoQueue(Region
&R
, std::vector
<Region
*> &Regions
) {
1210 Regions
.push_back(&R
);
1211 for (const auto &E
: R
)
1212 addRegionIntoQueue(*E
, Regions
);
1215 PreservedAnalyses
StructurizeCFGPass::run(Function
&F
,
1216 FunctionAnalysisManager
&AM
) {
1218 bool Changed
= false;
1219 DominatorTree
*DT
= &AM
.getResult
<DominatorTreeAnalysis
>(F
);
1220 auto &RI
= AM
.getResult
<RegionInfoAnalysis
>(F
);
1221 std::vector
<Region
*> Regions
;
1222 addRegionIntoQueue(*RI
.getTopLevelRegion(), Regions
);
1223 while (!Regions
.empty()) {
1224 Region
*R
= Regions
.back();
1225 StructurizeCFG SCFG
;
1227 Changed
|= SCFG
.run(R
, DT
);
1231 return PreservedAnalyses::all();
1232 PreservedAnalyses PA
;
1233 PA
.preserve
<DominatorTreeAnalysis
>();