[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Transforms / Scalar / StructurizeCFG.cpp
blob7d96a3478858bd8d9380d88908b98703f33ebe75
1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/Analysis/InstructionSimplify.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/Analysis/UniformityAnalysis.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/PassManager.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Use.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueHandle.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Scalar.h"
44 #include "llvm/Transforms/Utils.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/Local.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <utility>
52 using namespace llvm;
53 using namespace llvm::PatternMatch;
55 #define DEBUG_TYPE "structurizecfg"
57 // The name for newly created blocks.
58 const char FlowBlockName[] = "Flow";
60 namespace {
62 static cl::opt<bool> ForceSkipUniformRegions(
63 "structurizecfg-skip-uniform-regions",
64 cl::Hidden,
65 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
66 cl::init(false));
68 static cl::opt<bool>
69 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
70 cl::desc("Allow relaxed uniform region checks"),
71 cl::init(true));
73 // Definition of the complex types used in this pass.
75 using BBValuePair = std::pair<BasicBlock *, Value *>;
77 using RNVector = SmallVector<RegionNode *, 8>;
78 using BBVector = SmallVector<BasicBlock *, 8>;
79 using BranchVector = SmallVector<BranchInst *, 8>;
80 using BBValueVector = SmallVector<BBValuePair, 2>;
82 using BBSet = SmallPtrSet<BasicBlock *, 8>;
84 using PhiMap = MapVector<PHINode *, BBValueVector>;
85 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
88 using BBPredicates = DenseMap<BasicBlock *, Value *>;
89 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
90 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
92 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
94 // A traits type that is intended to be used in graph algorithms. The graph
95 // traits starts at an entry node, and traverses the RegionNodes that are in
96 // the Nodes set.
97 struct SubGraphTraits {
98 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
99 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
101 // This wraps a set of Nodes into the iterator, so we know which edges to
102 // filter out.
103 class WrappedSuccIterator
104 : public iterator_adaptor_base<
105 WrappedSuccIterator, BaseSuccIterator,
106 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
107 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
108 SmallDenseSet<RegionNode *> *Nodes;
110 public:
111 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
112 : iterator_adaptor_base(It), Nodes(Nodes) {}
114 NodeRef operator*() const { return {*I, Nodes}; }
117 static bool filterAll(const NodeRef &N) { return true; }
118 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
120 using ChildIteratorType =
121 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
123 static NodeRef getEntryNode(Region *R) {
124 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
127 static NodeRef getEntryNode(NodeRef N) { return N; }
129 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
130 auto *filter = N.second ? &filterSet : &filterAll;
131 return make_filter_range(
132 make_range<WrappedSuccIterator>(
133 {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
134 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
135 filter);
138 static ChildIteratorType child_begin(const NodeRef &N) {
139 return children(N).begin();
142 static ChildIteratorType child_end(const NodeRef &N) {
143 return children(N).end();
147 /// Finds the nearest common dominator of a set of BasicBlocks.
149 /// For every BB you add to the set, you can specify whether we "remember" the
150 /// block. When you get the common dominator, you can also ask whether it's one
151 /// of the blocks we remembered.
152 class NearestCommonDominator {
153 DominatorTree *DT;
154 BasicBlock *Result = nullptr;
155 bool ResultIsRemembered = false;
157 /// Add BB to the resulting dominator.
158 void addBlock(BasicBlock *BB, bool Remember) {
159 if (!Result) {
160 Result = BB;
161 ResultIsRemembered = Remember;
162 return;
165 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
166 if (NewResult != Result)
167 ResultIsRemembered = false;
168 if (NewResult == BB)
169 ResultIsRemembered |= Remember;
170 Result = NewResult;
173 public:
174 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
176 void addBlock(BasicBlock *BB) {
177 addBlock(BB, /* Remember = */ false);
180 void addAndRememberBlock(BasicBlock *BB) {
181 addBlock(BB, /* Remember = */ true);
184 /// Get the nearest common dominator of all the BBs added via addBlock() and
185 /// addAndRememberBlock().
186 BasicBlock *result() { return Result; }
188 /// Is the BB returned by getResult() one of the blocks we added to the set
189 /// with addAndRememberBlock()?
190 bool resultIsRememberedBlock() { return ResultIsRemembered; }
193 /// Transforms the control flow graph on one single entry/exit region
194 /// at a time.
196 /// After the transform all "If"/"Then"/"Else" style control flow looks like
197 /// this:
199 /// \verbatim
200 /// 1
201 /// ||
202 /// | |
203 /// 2 |
204 /// | /
205 /// |/
206 /// 3
207 /// || Where:
208 /// | | 1 = "If" block, calculates the condition
209 /// 4 | 2 = "Then" subregion, runs if the condition is true
210 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
211 /// |/ 4 = "Else" optional subregion, runs if the condition is false
212 /// 5 5 = "End" block, also rejoins the control flow
213 /// \endverbatim
215 /// Control flow is expressed as a branch where the true exit goes into the
216 /// "Then"/"Else" region, while the false exit skips the region
217 /// The condition for the optional "Else" region is expressed as a PHI node.
218 /// The incoming values of the PHI node are true for the "If" edge and false
219 /// for the "Then" edge.
221 /// Additionally to that even complicated loops look like this:
223 /// \verbatim
224 /// 1
225 /// ||
226 /// | |
227 /// 2 ^ Where:
228 /// | / 1 = "Entry" block
229 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
230 /// 3 3 = "Flow" block, with back edge to entry block
231 /// |
232 /// \endverbatim
234 /// The back edge of the "Flow" block is always on the false side of the branch
235 /// while the true side continues the general flow. So the loop condition
236 /// consist of a network of PHI nodes where the true incoming values expresses
237 /// breaks and the false values expresses continue states.
239 class StructurizeCFG {
240 Type *Boolean;
241 ConstantInt *BoolTrue;
242 ConstantInt *BoolFalse;
243 Value *BoolPoison;
245 Function *Func;
246 Region *ParentRegion;
248 UniformityInfo *UA = nullptr;
249 DominatorTree *DT;
251 SmallVector<RegionNode *, 8> Order;
252 BBSet Visited;
253 BBSet FlowSet;
255 SmallVector<WeakVH, 8> AffectedPhis;
256 BBPhiMap DeletedPhis;
257 BB2BBVecMap AddedPhis;
259 PredMap Predicates;
260 BranchVector Conditions;
262 BB2BBMap Loops;
263 PredMap LoopPreds;
264 BranchVector LoopConds;
266 BranchDebugLocMap TermDL;
268 RegionNode *PrevNode;
270 void orderNodes();
272 void analyzeLoops(RegionNode *N);
274 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
276 void gatherPredicates(RegionNode *N);
278 void collectInfos();
280 void insertConditions(bool Loops);
282 void simplifyConditions();
284 void delPhiValues(BasicBlock *From, BasicBlock *To);
286 void addPhiValues(BasicBlock *From, BasicBlock *To);
288 void findUndefBlocks(BasicBlock *PHIBlock,
289 const SmallSet<BasicBlock *, 8> &Incomings,
290 SmallVector<BasicBlock *> &UndefBlks) const;
291 void setPhiValues();
293 void simplifyAffectedPhis();
295 void killTerminator(BasicBlock *BB);
297 void changeExit(RegionNode *Node, BasicBlock *NewExit,
298 bool IncludeDominator);
300 BasicBlock *getNextFlow(BasicBlock *Dominator);
302 BasicBlock *needPrefix(bool NeedEmpty);
304 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
306 void setPrevNode(BasicBlock *BB);
308 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
310 bool isPredictableTrue(RegionNode *Node);
312 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
314 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
316 void createFlow();
318 void rebuildSSA();
320 public:
321 void init(Region *R);
322 bool run(Region *R, DominatorTree *DT);
323 bool makeUniformRegion(Region *R, UniformityInfo &UA);
326 class StructurizeCFGLegacyPass : public RegionPass {
327 bool SkipUniformRegions;
329 public:
330 static char ID;
332 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
333 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
334 if (ForceSkipUniformRegions.getNumOccurrences())
335 SkipUniformRegions = ForceSkipUniformRegions.getValue();
336 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
339 bool runOnRegion(Region *R, RGPassManager &RGM) override {
340 StructurizeCFG SCFG;
341 SCFG.init(R);
342 if (SkipUniformRegions) {
343 UniformityInfo &UA =
344 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
345 if (SCFG.makeUniformRegion(R, UA))
346 return false;
348 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
349 return SCFG.run(R, DT);
352 StringRef getPassName() const override { return "Structurize control flow"; }
354 void getAnalysisUsage(AnalysisUsage &AU) const override {
355 if (SkipUniformRegions)
356 AU.addRequired<UniformityInfoWrapperPass>();
357 AU.addRequired<DominatorTreeWrapperPass>();
359 AU.addPreserved<DominatorTreeWrapperPass>();
360 RegionPass::getAnalysisUsage(AU);
364 } // end anonymous namespace
366 char StructurizeCFGLegacyPass::ID = 0;
368 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
369 "Structurize the CFG", false, false)
370 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
371 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
372 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
373 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
374 "Structurize the CFG", false, false)
376 /// Build up the general order of nodes, by performing a topological sort of the
377 /// parent region's nodes, while ensuring that there is no outer cycle node
378 /// between any two inner cycle nodes.
379 void StructurizeCFG::orderNodes() {
380 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
381 GraphTraits<Region *>::nodes_end(ParentRegion)));
382 if (Order.empty())
383 return;
385 SmallDenseSet<RegionNode *> Nodes;
386 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
388 // A list of range indices of SCCs in Order, to be processed.
389 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
390 unsigned I = 0, E = Order.size();
391 while (true) {
392 // Run through all the SCCs in the subgraph starting with Entry.
393 for (auto SCCI =
394 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
395 EntryNode);
396 !SCCI.isAtEnd(); ++SCCI) {
397 auto &SCC = *SCCI;
399 // An SCC up to the size of 2, can be reduced to an entry (the last node),
400 // and a possible additional node. Therefore, it is already in order, and
401 // there is no need to add it to the work-list.
402 unsigned Size = SCC.size();
403 if (Size > 2)
404 WorkList.emplace_back(I, I + Size);
406 // Add the SCC nodes to the Order array.
407 for (const auto &N : SCC) {
408 assert(I < E && "SCC size mismatch!");
409 Order[I++] = N.first;
412 assert(I == E && "SCC size mismatch!");
414 // If there are no more SCCs to order, then we are done.
415 if (WorkList.empty())
416 break;
418 std::tie(I, E) = WorkList.pop_back_val();
420 // Collect the set of nodes in the SCC's subgraph. These are only the
421 // possible child nodes; we do not add the entry (last node) otherwise we
422 // will have the same exact SCC all over again.
423 Nodes.clear();
424 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
426 // Update the entry node.
427 EntryNode.first = Order[E - 1];
428 EntryNode.second = &Nodes;
432 /// Determine the end of the loops
433 void StructurizeCFG::analyzeLoops(RegionNode *N) {
434 if (N->isSubRegion()) {
435 // Test for exit as back edge
436 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
437 if (Visited.count(Exit))
438 Loops[Exit] = N->getEntry();
440 } else {
441 // Test for successors as back edge
442 BasicBlock *BB = N->getNodeAs<BasicBlock>();
443 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
445 for (BasicBlock *Succ : Term->successors())
446 if (Visited.count(Succ))
447 Loops[Succ] = BB;
451 /// Build the condition for one edge
452 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
453 bool Invert) {
454 Value *Cond = Invert ? BoolFalse : BoolTrue;
455 if (Term->isConditional()) {
456 Cond = Term->getCondition();
458 if (Idx != (unsigned)Invert)
459 Cond = invertCondition(Cond);
461 return Cond;
464 /// Analyze the predecessors of each block and build up predicates
465 void StructurizeCFG::gatherPredicates(RegionNode *N) {
466 RegionInfo *RI = ParentRegion->getRegionInfo();
467 BasicBlock *BB = N->getEntry();
468 BBPredicates &Pred = Predicates[BB];
469 BBPredicates &LPred = LoopPreds[BB];
471 for (BasicBlock *P : predecessors(BB)) {
472 // Ignore it if it's a branch from outside into our region entry
473 if (!ParentRegion->contains(P))
474 continue;
476 Region *R = RI->getRegionFor(P);
477 if (R == ParentRegion) {
478 // It's a top level block in our region
479 BranchInst *Term = cast<BranchInst>(P->getTerminator());
480 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
481 BasicBlock *Succ = Term->getSuccessor(i);
482 if (Succ != BB)
483 continue;
485 if (Visited.count(P)) {
486 // Normal forward edge
487 if (Term->isConditional()) {
488 // Try to treat it like an ELSE block
489 BasicBlock *Other = Term->getSuccessor(!i);
490 if (Visited.count(Other) && !Loops.count(Other) &&
491 !Pred.count(Other) && !Pred.count(P)) {
493 Pred[Other] = BoolFalse;
494 Pred[P] = BoolTrue;
495 continue;
498 Pred[P] = buildCondition(Term, i, false);
499 } else {
500 // Back edge
501 LPred[P] = buildCondition(Term, i, true);
504 } else {
505 // It's an exit from a sub region
506 while (R->getParent() != ParentRegion)
507 R = R->getParent();
509 // Edge from inside a subregion to its entry, ignore it
510 if (*R == *N)
511 continue;
513 BasicBlock *Entry = R->getEntry();
514 if (Visited.count(Entry))
515 Pred[Entry] = BoolTrue;
516 else
517 LPred[Entry] = BoolFalse;
522 /// Collect various loop and predicate infos
523 void StructurizeCFG::collectInfos() {
524 // Reset predicate
525 Predicates.clear();
527 // and loop infos
528 Loops.clear();
529 LoopPreds.clear();
531 // Reset the visited nodes
532 Visited.clear();
534 for (RegionNode *RN : reverse(Order)) {
535 LLVM_DEBUG(dbgs() << "Visiting: "
536 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
537 << RN->getEntry()->getName() << "\n");
539 // Analyze all the conditions leading to a node
540 gatherPredicates(RN);
542 // Remember that we've seen this node
543 Visited.insert(RN->getEntry());
545 // Find the last back edges
546 analyzeLoops(RN);
549 // Reset the collected term debug locations
550 TermDL.clear();
552 for (BasicBlock &BB : *Func) {
553 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
554 TermDL[&BB] = DL;
558 /// Insert the missing branch conditions
559 void StructurizeCFG::insertConditions(bool Loops) {
560 BranchVector &Conds = Loops ? LoopConds : Conditions;
561 Value *Default = Loops ? BoolTrue : BoolFalse;
562 SSAUpdater PhiInserter;
564 for (BranchInst *Term : Conds) {
565 assert(Term->isConditional());
567 BasicBlock *Parent = Term->getParent();
568 BasicBlock *SuccTrue = Term->getSuccessor(0);
569 BasicBlock *SuccFalse = Term->getSuccessor(1);
571 PhiInserter.Initialize(Boolean, "");
572 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
573 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
575 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
577 NearestCommonDominator Dominator(DT);
578 Dominator.addBlock(Parent);
580 Value *ParentValue = nullptr;
581 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
582 BasicBlock *BB = BBAndPred.first;
583 Value *Pred = BBAndPred.second;
585 if (BB == Parent) {
586 ParentValue = Pred;
587 break;
589 PhiInserter.AddAvailableValue(BB, Pred);
590 Dominator.addAndRememberBlock(BB);
593 if (ParentValue) {
594 Term->setCondition(ParentValue);
595 } else {
596 if (!Dominator.resultIsRememberedBlock())
597 PhiInserter.AddAvailableValue(Dominator.result(), Default);
599 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
604 /// Simplify any inverted conditions that were built by buildConditions.
605 void StructurizeCFG::simplifyConditions() {
606 SmallVector<Instruction *> InstToErase;
607 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
608 auto &Preds = I.second;
609 for (auto &J : Preds) {
610 auto &Cond = J.second;
611 Instruction *Inverted;
612 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
613 !Cond->use_empty()) {
614 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
615 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
616 Cond->replaceAllUsesWith(InvertedCmp);
617 InstToErase.push_back(cast<Instruction>(Cond));
622 for (auto *I : InstToErase)
623 I->eraseFromParent();
626 /// Remove all PHI values coming from "From" into "To" and remember
627 /// them in DeletedPhis
628 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
629 PhiMap &Map = DeletedPhis[To];
630 for (PHINode &Phi : To->phis()) {
631 bool Recorded = false;
632 while (Phi.getBasicBlockIndex(From) != -1) {
633 Value *Deleted = Phi.removeIncomingValue(From, false);
634 Map[&Phi].push_back(std::make_pair(From, Deleted));
635 if (!Recorded) {
636 AffectedPhis.push_back(&Phi);
637 Recorded = true;
643 /// Add a dummy PHI value as soon as we knew the new predecessor
644 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
645 for (PHINode &Phi : To->phis()) {
646 Value *Undef = UndefValue::get(Phi.getType());
647 Phi.addIncoming(Undef, From);
649 AddedPhis[To].push_back(From);
652 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values
653 /// from predecessors \p Incomings, we have a chance to mark the available value
654 /// from some blocks as undefined. The function will find out all such blocks
655 /// and return in \p UndefBlks.
656 void StructurizeCFG::findUndefBlocks(
657 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
658 SmallVector<BasicBlock *> &UndefBlks) const {
659 // We may get a post-structured CFG like below:
661 // | P1
662 // |/
663 // F1
664 // |\
665 // | N
666 // |/
667 // F2
668 // |\
669 // | P2
670 // |/
671 // F3
672 // |\
673 // B
675 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
676 // of B before structurization. F1/F2/F3 are flow blocks inserted during
677 // structurization process. Block N is not a predecessor of B before
678 // structurization, but are placed between the predecessors(P1/P2) of B after
679 // structurization. This usually means that threads went to N never take the
680 // path N->F2->F3->B. For example, the threads take the branch F1->N may
681 // always take the branch F2->P2. So, when we are reconstructing a PHI
682 // originally in B, we can safely say the incoming value from N is undefined.
683 SmallSet<BasicBlock *, 8> VisitedBlock;
684 SmallVector<BasicBlock *, 8> Stack;
685 if (PHIBlock == ParentRegion->getExit()) {
686 for (auto P : predecessors(PHIBlock)) {
687 if (ParentRegion->contains(P))
688 Stack.push_back(P);
690 } else {
691 append_range(Stack, predecessors(PHIBlock));
694 // Do a backward traversal over the CFG, and stop further searching if
695 // the block is not a Flow. If a block is neither flow block nor the
696 // incoming predecessor, then the incoming value from the block is
697 // undefined value for the PHI being reconstructed.
698 while (!Stack.empty()) {
699 BasicBlock *Current = Stack.pop_back_val();
700 if (VisitedBlock.contains(Current))
701 continue;
703 VisitedBlock.insert(Current);
704 if (FlowSet.contains(Current)) {
705 for (auto P : predecessors(Current))
706 Stack.push_back(P);
707 } else if (!Incomings.contains(Current)) {
708 UndefBlks.push_back(Current);
713 /// Add the real PHI value as soon as everything is set up
714 void StructurizeCFG::setPhiValues() {
715 SmallVector<PHINode *, 8> InsertedPhis;
716 SSAUpdater Updater(&InsertedPhis);
717 for (const auto &AddedPhi : AddedPhis) {
718 BasicBlock *To = AddedPhi.first;
719 const BBVector &From = AddedPhi.second;
721 if (!DeletedPhis.count(To))
722 continue;
724 SmallVector<BasicBlock *> UndefBlks;
725 bool CachedUndefs = false;
726 PhiMap &Map = DeletedPhis[To];
727 for (const auto &PI : Map) {
728 PHINode *Phi = PI.first;
729 Value *Undef = UndefValue::get(Phi->getType());
730 Updater.Initialize(Phi->getType(), "");
731 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
732 Updater.AddAvailableValue(To, Undef);
734 SmallSet<BasicBlock *, 8> Incomings;
735 SmallVector<BasicBlock *> ConstantPreds;
736 for (const auto &VI : PI.second) {
737 Incomings.insert(VI.first);
738 Updater.AddAvailableValue(VI.first, VI.second);
739 if (isa<Constant>(VI.second))
740 ConstantPreds.push_back(VI.first);
743 if (!CachedUndefs) {
744 findUndefBlocks(To, Incomings, UndefBlks);
745 CachedUndefs = true;
748 for (auto UB : UndefBlks) {
749 // If this undef block is dominated by any predecessor(before
750 // structurization) of reconstructed PHI with constant incoming value,
751 // don't mark the available value as undefined. Setting undef to such
752 // block will stop us from getting optimal phi insertion.
753 if (any_of(ConstantPreds,
754 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
755 continue;
756 Updater.AddAvailableValue(UB, Undef);
759 for (BasicBlock *FI : From)
760 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
761 AffectedPhis.push_back(Phi);
764 DeletedPhis.erase(To);
766 assert(DeletedPhis.empty());
768 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
771 void StructurizeCFG::simplifyAffectedPhis() {
772 bool Changed;
773 do {
774 Changed = false;
775 SimplifyQuery Q(Func->getParent()->getDataLayout());
776 Q.DT = DT;
777 // Setting CanUseUndef to true might extend value liveness, set it to false
778 // to achieve better register pressure.
779 Q.CanUseUndef = false;
780 for (WeakVH VH : AffectedPhis) {
781 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
782 if (auto NewValue = simplifyInstruction(Phi, Q)) {
783 Phi->replaceAllUsesWith(NewValue);
784 Phi->eraseFromParent();
785 Changed = true;
789 } while (Changed);
792 /// Remove phi values from all successors and then remove the terminator.
793 void StructurizeCFG::killTerminator(BasicBlock *BB) {
794 Instruction *Term = BB->getTerminator();
795 if (!Term)
796 return;
798 for (BasicBlock *Succ : successors(BB))
799 delPhiValues(BB, Succ);
801 Term->eraseFromParent();
804 /// Let node exit(s) point to NewExit
805 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
806 bool IncludeDominator) {
807 if (Node->isSubRegion()) {
808 Region *SubRegion = Node->getNodeAs<Region>();
809 BasicBlock *OldExit = SubRegion->getExit();
810 BasicBlock *Dominator = nullptr;
812 // Find all the edges from the sub region to the exit.
813 // We use make_early_inc_range here because we modify BB's terminator.
814 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
815 if (!SubRegion->contains(BB))
816 continue;
818 // Modify the edges to point to the new exit
819 delPhiValues(BB, OldExit);
820 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
821 addPhiValues(BB, NewExit);
823 // Find the new dominator (if requested)
824 if (IncludeDominator) {
825 if (!Dominator)
826 Dominator = BB;
827 else
828 Dominator = DT->findNearestCommonDominator(Dominator, BB);
832 // Change the dominator (if requested)
833 if (Dominator)
834 DT->changeImmediateDominator(NewExit, Dominator);
836 // Update the region info
837 SubRegion->replaceExit(NewExit);
838 } else {
839 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
840 killTerminator(BB);
841 BranchInst *Br = BranchInst::Create(NewExit, BB);
842 Br->setDebugLoc(TermDL[BB]);
843 addPhiValues(BB, NewExit);
844 if (IncludeDominator)
845 DT->changeImmediateDominator(NewExit, BB);
849 /// Create a new flow node and update dominator tree and region info
850 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
851 LLVMContext &Context = Func->getContext();
852 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
853 Order.back()->getEntry();
854 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
855 Func, Insert);
856 FlowSet.insert(Flow);
858 // use a temporary variable to avoid a use-after-free if the map's storage is
859 // reallocated
860 DebugLoc DL = TermDL[Dominator];
861 TermDL[Flow] = std::move(DL);
863 DT->addNewBlock(Flow, Dominator);
864 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
865 return Flow;
868 /// Create a new or reuse the previous node as flow node
869 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
870 BasicBlock *Entry = PrevNode->getEntry();
872 if (!PrevNode->isSubRegion()) {
873 killTerminator(Entry);
874 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
875 return Entry;
878 // create a new flow node
879 BasicBlock *Flow = getNextFlow(Entry);
881 // and wire it up
882 changeExit(PrevNode, Flow, true);
883 PrevNode = ParentRegion->getBBNode(Flow);
884 return Flow;
887 /// Returns the region exit if possible, otherwise just a new flow node
888 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
889 bool ExitUseAllowed) {
890 if (!Order.empty() || !ExitUseAllowed)
891 return getNextFlow(Flow);
893 BasicBlock *Exit = ParentRegion->getExit();
894 DT->changeImmediateDominator(Exit, Flow);
895 addPhiValues(Flow, Exit);
896 return Exit;
899 /// Set the previous node
900 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
901 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
902 : nullptr;
905 /// Does BB dominate all the predicates of Node?
906 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
907 BBPredicates &Preds = Predicates[Node->getEntry()];
908 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
909 return DT->dominates(BB, Pred.first);
913 /// Can we predict that this node will always be called?
914 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
915 BBPredicates &Preds = Predicates[Node->getEntry()];
916 bool Dominated = false;
918 // Regionentry is always true
919 if (!PrevNode)
920 return true;
922 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
923 BasicBlock *BB = Pred.first;
924 Value *V = Pred.second;
926 if (V != BoolTrue)
927 return false;
929 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
930 Dominated = true;
933 // TODO: The dominator check is too strict
934 return Dominated;
937 /// Take one node from the order vector and wire it up
938 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
939 BasicBlock *LoopEnd) {
940 RegionNode *Node = Order.pop_back_val();
941 Visited.insert(Node->getEntry());
943 if (isPredictableTrue(Node)) {
944 // Just a linear flow
945 if (PrevNode) {
946 changeExit(PrevNode, Node->getEntry(), true);
948 PrevNode = Node;
949 } else {
950 // Insert extra prefix node (or reuse last one)
951 BasicBlock *Flow = needPrefix(false);
953 // Insert extra postfix node (or use exit instead)
954 BasicBlock *Entry = Node->getEntry();
955 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
957 // let it point to entry and next block
958 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
959 Br->setDebugLoc(TermDL[Flow]);
960 Conditions.push_back(Br);
961 addPhiValues(Flow, Entry);
962 DT->changeImmediateDominator(Entry, Flow);
964 PrevNode = Node;
965 while (!Order.empty() && !Visited.count(LoopEnd) &&
966 dominatesPredicates(Entry, Order.back())) {
967 handleLoops(false, LoopEnd);
970 changeExit(PrevNode, Next, false);
971 setPrevNode(Next);
975 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
976 BasicBlock *LoopEnd) {
977 RegionNode *Node = Order.back();
978 BasicBlock *LoopStart = Node->getEntry();
980 if (!Loops.count(LoopStart)) {
981 wireFlow(ExitUseAllowed, LoopEnd);
982 return;
985 if (!isPredictableTrue(Node))
986 LoopStart = needPrefix(true);
988 LoopEnd = Loops[Node->getEntry()];
989 wireFlow(false, LoopEnd);
990 while (!Visited.count(LoopEnd)) {
991 handleLoops(false, LoopEnd);
994 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
996 // Create an extra loop end node
997 LoopEnd = needPrefix(false);
998 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
999 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1000 Br->setDebugLoc(TermDL[LoopEnd]);
1001 LoopConds.push_back(Br);
1002 addPhiValues(LoopEnd, LoopStart);
1003 setPrevNode(Next);
1006 /// After this function control flow looks like it should be, but
1007 /// branches and PHI nodes only have undefined conditions.
1008 void StructurizeCFG::createFlow() {
1009 BasicBlock *Exit = ParentRegion->getExit();
1010 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1012 AffectedPhis.clear();
1013 DeletedPhis.clear();
1014 AddedPhis.clear();
1015 Conditions.clear();
1016 LoopConds.clear();
1018 PrevNode = nullptr;
1019 Visited.clear();
1021 while (!Order.empty()) {
1022 handleLoops(EntryDominatesExit, nullptr);
1025 if (PrevNode)
1026 changeExit(PrevNode, Exit, EntryDominatesExit);
1027 else
1028 assert(EntryDominatesExit);
1031 /// Handle a rare case where the disintegrated nodes instructions
1032 /// no longer dominate all their uses. Not sure if this is really necessary
1033 void StructurizeCFG::rebuildSSA() {
1034 SSAUpdater Updater;
1035 for (BasicBlock *BB : ParentRegion->blocks())
1036 for (Instruction &I : *BB) {
1037 bool Initialized = false;
1038 // We may modify the use list as we iterate over it, so we use
1039 // make_early_inc_range.
1040 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1041 Instruction *User = cast<Instruction>(U.getUser());
1042 if (User->getParent() == BB) {
1043 continue;
1044 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1045 if (UserPN->getIncomingBlock(U) == BB)
1046 continue;
1049 if (DT->dominates(&I, User))
1050 continue;
1052 if (!Initialized) {
1053 Value *Undef = UndefValue::get(I.getType());
1054 Updater.Initialize(I.getType(), "");
1055 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1056 Updater.AddAvailableValue(BB, &I);
1057 Initialized = true;
1059 Updater.RewriteUseAfterInsertions(U);
1064 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1065 const UniformityInfo &UA) {
1066 // Bool for if all sub-regions are uniform.
1067 bool SubRegionsAreUniform = true;
1068 // Count of how many direct children are conditional.
1069 unsigned ConditionalDirectChildren = 0;
1071 for (auto *E : R->elements()) {
1072 if (!E->isSubRegion()) {
1073 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1074 if (!Br || !Br->isConditional())
1075 continue;
1077 if (!UA.isUniform(Br))
1078 return false;
1080 // One of our direct children is conditional.
1081 ConditionalDirectChildren++;
1083 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1084 << " has uniform terminator\n");
1085 } else {
1086 // Explicitly refuse to treat regions as uniform if they have non-uniform
1087 // subregions. We cannot rely on UniformityAnalysis for branches in
1088 // subregions because those branches may have been removed and re-created,
1089 // so we look for our metadata instead.
1091 // Warning: It would be nice to treat regions as uniform based only on
1092 // their direct child basic blocks' terminators, regardless of whether
1093 // subregions are uniform or not. However, this requires a very careful
1094 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1095 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1096 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1097 if (!Br || !Br->isConditional())
1098 continue;
1100 if (!Br->getMetadata(UniformMDKindID)) {
1101 // Early exit if we cannot have relaxed uniform regions.
1102 if (!RelaxedUniformRegions)
1103 return false;
1105 SubRegionsAreUniform = false;
1106 break;
1112 // Our region is uniform if:
1113 // 1. All conditional branches that are direct children are uniform (checked
1114 // above).
1115 // 2. And either:
1116 // a. All sub-regions are uniform.
1117 // b. There is one or less conditional branches among the direct children.
1118 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1121 void StructurizeCFG::init(Region *R) {
1122 LLVMContext &Context = R->getEntry()->getContext();
1124 Boolean = Type::getInt1Ty(Context);
1125 BoolTrue = ConstantInt::getTrue(Context);
1126 BoolFalse = ConstantInt::getFalse(Context);
1127 BoolPoison = PoisonValue::get(Boolean);
1129 this->UA = nullptr;
1132 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1133 if (R->isTopLevelRegion())
1134 return false;
1136 this->UA = &UA;
1138 // TODO: We could probably be smarter here with how we handle sub-regions.
1139 // We currently rely on the fact that metadata is set by earlier invocations
1140 // of the pass on sub-regions, and that this metadata doesn't get lost --
1141 // but we shouldn't rely on metadata for correctness!
1142 unsigned UniformMDKindID =
1143 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1145 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1146 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1147 << '\n');
1149 // Mark all direct child block terminators as having been treated as
1150 // uniform. To account for a possible future in which non-uniform
1151 // sub-regions are treated more cleverly, indirect children are not
1152 // marked as uniform.
1153 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1154 for (RegionNode *E : R->elements()) {
1155 if (E->isSubRegion())
1156 continue;
1158 if (Instruction *Term = E->getEntry()->getTerminator())
1159 Term->setMetadata(UniformMDKindID, MD);
1162 return true;
1164 return false;
1167 /// Run the transformation for each region found
1168 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1169 if (R->isTopLevelRegion())
1170 return false;
1172 this->DT = DT;
1174 Func = R->getEntry()->getParent();
1175 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1177 ParentRegion = R;
1179 orderNodes();
1180 collectInfos();
1181 createFlow();
1182 insertConditions(false);
1183 insertConditions(true);
1184 setPhiValues();
1185 simplifyConditions();
1186 simplifyAffectedPhis();
1187 rebuildSSA();
1189 // Cleanup
1190 Order.clear();
1191 Visited.clear();
1192 DeletedPhis.clear();
1193 AddedPhis.clear();
1194 Predicates.clear();
1195 Conditions.clear();
1196 Loops.clear();
1197 LoopPreds.clear();
1198 LoopConds.clear();
1199 FlowSet.clear();
1200 TermDL.clear();
1202 return true;
1205 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1206 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1209 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1210 Regions.push_back(&R);
1211 for (const auto &E : R)
1212 addRegionIntoQueue(*E, Regions);
1215 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1216 FunctionAnalysisManager &AM) {
1218 bool Changed = false;
1219 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1220 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1221 std::vector<Region *> Regions;
1222 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1223 while (!Regions.empty()) {
1224 Region *R = Regions.back();
1225 StructurizeCFG SCFG;
1226 SCFG.init(R);
1227 Changed |= SCFG.run(R, DT);
1228 Regions.pop_back();
1230 if (!Changed)
1231 return PreservedAnalyses::all();
1232 PreservedAnalyses PA;
1233 PA.preserve<DominatorTreeAnalysis>();
1234 return PA;