[CostModel][X86] Attempt to match v4f32 shuffles that map to MOVSS/INSERTPS instruction
[llvm-project.git] / llvm / lib / Transforms / Utils / ControlFlowUtils.cpp
blob5ba626fa213ad257a0371358adb393f0e880750a
1 //===- ControlFlowUtils.cpp - Control Flow Utilities -----------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Utilities to manipulate the CFG and restore SSA for the new control flow.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Utils/ControlFlowUtils.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/Analysis/DomTreeUpdater.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/ValueHandle.h"
20 #include "llvm/Transforms/Utils/Local.h"
22 #define DEBUG_TYPE "control-flow-hub"
24 using namespace llvm;
26 using BBPredicates = DenseMap<BasicBlock *, Instruction *>;
27 using EdgeDescriptor = ControlFlowHub::BranchDescriptor;
29 // Redirects the terminator of the incoming block to the first guard block in
30 // the hub. Returns the branch condition from `BB` if it exits.
31 // - If only one of Succ0 or Succ1 is not null, the corresponding branch
32 // successor is redirected to the FirstGuardBlock.
33 // - Else both are not null, and branch is replaced with an unconditional
34 // branch to the FirstGuardBlock.
35 static Value *redirectToHub(BasicBlock *BB, BasicBlock *Succ0,
36 BasicBlock *Succ1, BasicBlock *FirstGuardBlock) {
37 assert(isa<BranchInst>(BB->getTerminator()) &&
38 "Only support branch terminator.");
39 auto *Branch = cast<BranchInst>(BB->getTerminator());
40 auto *Condition = Branch->isConditional() ? Branch->getCondition() : nullptr;
42 assert(Succ0 || Succ1);
44 if (Branch->isUnconditional()) {
45 assert(Succ0 == Branch->getSuccessor(0));
46 assert(!Succ1);
47 Branch->setSuccessor(0, FirstGuardBlock);
48 } else {
49 assert(!Succ1 || Succ1 == Branch->getSuccessor(1));
50 if (Succ0 && !Succ1) {
51 Branch->setSuccessor(0, FirstGuardBlock);
52 } else if (Succ1 && !Succ0) {
53 Branch->setSuccessor(1, FirstGuardBlock);
54 } else {
55 Branch->eraseFromParent();
56 BranchInst::Create(FirstGuardBlock, BB);
60 return Condition;
63 // Setup the branch instructions for guard blocks.
65 // Each guard block terminates in a conditional branch that transfers
66 // control to the corresponding outgoing block or the next guard
67 // block. The last guard block has two outgoing blocks as successors.
68 static void setupBranchForGuard(ArrayRef<BasicBlock *> GuardBlocks,
69 ArrayRef<BasicBlock *> Outgoing,
70 BBPredicates &GuardPredicates) {
71 assert(Outgoing.size() > 1);
72 assert(GuardBlocks.size() == Outgoing.size() - 1);
73 int I = 0;
74 for (int E = GuardBlocks.size() - 1; I != E; ++I) {
75 BasicBlock *Out = Outgoing[I];
76 BranchInst::Create(Out, GuardBlocks[I + 1], GuardPredicates[Out],
77 GuardBlocks[I]);
79 BasicBlock *Out = Outgoing[I];
80 BranchInst::Create(Out, Outgoing[I + 1], GuardPredicates[Out],
81 GuardBlocks[I]);
84 // Assign an index to each outgoing block. At the corresponding guard
85 // block, compute the branch condition by comparing this index.
86 static void calcPredicateUsingInteger(ArrayRef<EdgeDescriptor> Branches,
87 ArrayRef<BasicBlock *> Outgoing,
88 ArrayRef<BasicBlock *> GuardBlocks,
89 BBPredicates &GuardPredicates) {
90 LLVMContext &Context = GuardBlocks.front()->getContext();
91 BasicBlock *FirstGuardBlock = GuardBlocks.front();
92 Type *Int32Ty = Type::getInt32Ty(Context);
94 auto *Phi = PHINode::Create(Int32Ty, Branches.size(), "merged.bb.idx",
95 FirstGuardBlock);
97 for (auto [BB, Succ0, Succ1] : Branches) {
98 Value *Condition = redirectToHub(BB, Succ0, Succ1, FirstGuardBlock);
99 Value *IncomingId = nullptr;
100 if (Succ0 && Succ1) {
101 auto Succ0Iter = find(Outgoing, Succ0);
102 auto Succ1Iter = find(Outgoing, Succ1);
103 Value *Id0 =
104 ConstantInt::get(Int32Ty, std::distance(Outgoing.begin(), Succ0Iter));
105 Value *Id1 =
106 ConstantInt::get(Int32Ty, std::distance(Outgoing.begin(), Succ1Iter));
107 IncomingId = SelectInst::Create(Condition, Id0, Id1, "target.bb.idx",
108 BB->getTerminator()->getIterator());
109 } else {
110 // Get the index of the non-null successor.
111 auto SuccIter = Succ0 ? find(Outgoing, Succ0) : find(Outgoing, Succ1);
112 IncomingId =
113 ConstantInt::get(Int32Ty, std::distance(Outgoing.begin(), SuccIter));
115 Phi->addIncoming(IncomingId, BB);
118 for (int I = 0, E = Outgoing.size() - 1; I != E; ++I) {
119 BasicBlock *Out = Outgoing[I];
120 LLVM_DEBUG(dbgs() << "Creating integer guard for " << Out->getName()
121 << "\n");
122 auto *Cmp = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, Phi,
123 ConstantInt::get(Int32Ty, I),
124 Out->getName() + ".predicate", GuardBlocks[I]);
125 GuardPredicates[Out] = Cmp;
129 // Determine the branch condition to be used at each guard block from the
130 // original boolean values.
131 static void calcPredicateUsingBooleans(
132 ArrayRef<EdgeDescriptor> Branches, ArrayRef<BasicBlock *> Outgoing,
133 SmallVectorImpl<BasicBlock *> &GuardBlocks, BBPredicates &GuardPredicates,
134 SmallVectorImpl<WeakVH> &DeletionCandidates) {
135 LLVMContext &Context = GuardBlocks.front()->getContext();
136 auto *BoolTrue = ConstantInt::getTrue(Context);
137 auto *BoolFalse = ConstantInt::getFalse(Context);
138 BasicBlock *FirstGuardBlock = GuardBlocks.front();
140 // The predicate for the last outgoing is trivially true, and so we
141 // process only the first N-1 successors.
142 for (int I = 0, E = Outgoing.size() - 1; I != E; ++I) {
143 BasicBlock *Out = Outgoing[I];
144 LLVM_DEBUG(dbgs() << "Creating boolean guard for " << Out->getName()
145 << "\n");
147 auto *Phi =
148 PHINode::Create(Type::getInt1Ty(Context), Branches.size(),
149 StringRef("Guard.") + Out->getName(), FirstGuardBlock);
150 GuardPredicates[Out] = Phi;
153 for (auto [BB, Succ0, Succ1] : Branches) {
154 Value *Condition = redirectToHub(BB, Succ0, Succ1, FirstGuardBlock);
156 // Optimization: Consider an incoming block A with both successors
157 // Succ0 and Succ1 in the set of outgoing blocks. The predicates
158 // for Succ0 and Succ1 complement each other. If Succ0 is visited
159 // first in the loop below, control will branch to Succ0 using the
160 // corresponding predicate. But if that branch is not taken, then
161 // control must reach Succ1, which means that the incoming value of
162 // the predicate from `BB` is true for Succ1.
163 bool OneSuccessorDone = false;
164 for (int I = 0, E = Outgoing.size() - 1; I != E; ++I) {
165 BasicBlock *Out = Outgoing[I];
166 PHINode *Phi = cast<PHINode>(GuardPredicates[Out]);
167 if (Out != Succ0 && Out != Succ1) {
168 Phi->addIncoming(BoolFalse, BB);
169 } else if (!Succ0 || !Succ1 || OneSuccessorDone) {
170 // Optimization: When only one successor is an outgoing block,
171 // the incoming predicate from `BB` is always true.
172 Phi->addIncoming(BoolTrue, BB);
173 } else {
174 assert(Succ0 && Succ1);
175 if (Out == Succ0) {
176 Phi->addIncoming(Condition, BB);
177 } else {
178 Value *Inverted = invertCondition(Condition);
179 DeletionCandidates.push_back(Condition);
180 Phi->addIncoming(Inverted, BB);
182 OneSuccessorDone = true;
188 // Capture the existing control flow as guard predicates, and redirect
189 // control flow from \p Incoming block through the \p GuardBlocks to the
190 // \p Outgoing blocks.
192 // There is one guard predicate for each outgoing block OutBB. The
193 // predicate represents whether the hub should transfer control flow
194 // to OutBB. These predicates are NOT ORTHOGONAL. The Hub evaluates
195 // them in the same order as the Outgoing set-vector, and control
196 // branches to the first outgoing block whose predicate evaluates to true.
198 // The last guard block has two outgoing blocks as successors since the
199 // condition for the final outgoing block is trivially true. So we create one
200 // less block (including the first guard block) than the number of outgoing
201 // blocks.
202 static void convertToGuardPredicates(
203 ArrayRef<EdgeDescriptor> Branches, ArrayRef<BasicBlock *> Outgoing,
204 SmallVectorImpl<BasicBlock *> &GuardBlocks,
205 SmallVectorImpl<WeakVH> &DeletionCandidates, const StringRef Prefix,
206 std::optional<unsigned> MaxControlFlowBooleans) {
207 BBPredicates GuardPredicates;
208 Function *F = Outgoing.front()->getParent();
210 for (int I = 0, E = Outgoing.size() - 1; I != E; ++I)
211 GuardBlocks.push_back(
212 BasicBlock::Create(F->getContext(), Prefix + ".guard", F));
214 // When we are using an integer to record which target block to jump to, we
215 // are creating less live values, actually we are using one single integer to
216 // store the index of the target block. When we are using booleans to store
217 // the branching information, we need (N-1) boolean values, where N is the
218 // number of outgoing block.
219 if (!MaxControlFlowBooleans || Outgoing.size() <= *MaxControlFlowBooleans)
220 calcPredicateUsingBooleans(Branches, Outgoing, GuardBlocks, GuardPredicates,
221 DeletionCandidates);
222 else
223 calcPredicateUsingInteger(Branches, Outgoing, GuardBlocks, GuardPredicates);
225 setupBranchForGuard(GuardBlocks, Outgoing, GuardPredicates);
228 // After creating a control flow hub, the operands of PHINodes in an outgoing
229 // block Out no longer match the predecessors of that block. Predecessors of Out
230 // that are incoming blocks to the hub are now replaced by just one edge from
231 // the hub. To match this new control flow, the corresponding values from each
232 // PHINode must now be moved a new PHINode in the first guard block of the hub.
234 // This operation cannot be performed with SSAUpdater, because it involves one
235 // new use: If the block Out is in the list of Incoming blocks, then the newly
236 // created PHI in the Hub will use itself along that edge from Out to Hub.
237 static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock,
238 ArrayRef<EdgeDescriptor> Incoming,
239 BasicBlock *FirstGuardBlock) {
240 auto I = Out->begin();
241 while (I != Out->end() && isa<PHINode>(I)) {
242 auto *Phi = cast<PHINode>(I);
243 auto *NewPhi =
244 PHINode::Create(Phi->getType(), Incoming.size(),
245 Phi->getName() + ".moved", FirstGuardBlock->begin());
246 bool AllUndef = true;
247 for (auto [BB, Succ0, Succ1] : Incoming) {
248 Value *V = PoisonValue::get(Phi->getType());
249 if (BB == Out) {
250 V = NewPhi;
251 } else if (Phi->getBasicBlockIndex(BB) != -1) {
252 V = Phi->removeIncomingValue(BB, false);
253 AllUndef &= isa<UndefValue>(V);
255 NewPhi->addIncoming(V, BB);
257 assert(NewPhi->getNumIncomingValues() == Incoming.size());
258 Value *NewV = NewPhi;
259 if (AllUndef) {
260 NewPhi->eraseFromParent();
261 NewV = PoisonValue::get(Phi->getType());
263 if (Phi->getNumOperands() == 0) {
264 Phi->replaceAllUsesWith(NewV);
265 I = Phi->eraseFromParent();
266 continue;
268 Phi->addIncoming(NewV, GuardBlock);
269 ++I;
273 BasicBlock *ControlFlowHub::finalize(
274 DomTreeUpdater *DTU, SmallVectorImpl<BasicBlock *> &GuardBlocks,
275 const StringRef Prefix, std::optional<unsigned> MaxControlFlowBooleans) {
276 #ifndef NDEBUG
277 SmallSet<BasicBlock *, 8> Incoming;
278 #endif
279 SetVector<BasicBlock *> Outgoing;
281 for (auto [BB, Succ0, Succ1] : Branches) {
282 #ifndef NDEBUG
283 assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
284 #endif
285 if (Succ0)
286 Outgoing.insert(Succ0);
287 if (Succ1)
288 Outgoing.insert(Succ1);
291 if (Outgoing.size() < 2)
292 return Outgoing.front();
294 SmallVector<DominatorTree::UpdateType, 16> Updates;
295 if (DTU) {
296 for (auto [BB, Succ0, Succ1] : Branches) {
297 if (Succ0)
298 Updates.push_back({DominatorTree::Delete, BB, Succ0});
299 if (Succ1)
300 Updates.push_back({DominatorTree::Delete, BB, Succ1});
304 SmallVector<WeakVH, 8> DeletionCandidates;
305 convertToGuardPredicates(Branches, Outgoing.getArrayRef(), GuardBlocks,
306 DeletionCandidates, Prefix, MaxControlFlowBooleans);
307 BasicBlock *FirstGuardBlock = GuardBlocks.front();
309 // Update the PHINodes in each outgoing block to match the new control flow.
310 for (int I = 0, E = GuardBlocks.size(); I != E; ++I)
311 reconnectPhis(Outgoing[I], GuardBlocks[I], Branches, FirstGuardBlock);
312 // Process the Nth (last) outgoing block with the (N-1)th (last) guard block.
313 reconnectPhis(Outgoing.back(), GuardBlocks.back(), Branches, FirstGuardBlock);
315 if (DTU) {
316 int NumGuards = GuardBlocks.size();
318 for (auto [BB, Succ0, Succ1] : Branches)
319 Updates.push_back({DominatorTree::Insert, BB, FirstGuardBlock});
321 for (int I = 0; I != NumGuards - 1; ++I) {
322 Updates.push_back({DominatorTree::Insert, GuardBlocks[I], Outgoing[I]});
323 Updates.push_back(
324 {DominatorTree::Insert, GuardBlocks[I], GuardBlocks[I + 1]});
326 // The second successor of the last guard block is an outgoing block instead
327 // of having a "next" guard block.
328 Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
329 Outgoing[NumGuards - 1]});
330 Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
331 Outgoing[NumGuards]});
332 DTU->applyUpdates(Updates);
335 for (auto I : DeletionCandidates) {
336 if (I->use_empty())
337 if (auto *Inst = dyn_cast_or_null<Instruction>(I))
338 Inst->eraseFromParent();
341 return FirstGuardBlock;