[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Transforms / Utils / SimplifyCFG.cpp
blobba3129f5581fec5517a0e5d2f90c8617ed8f8ee5
1 //===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Peephole optimize the CFG.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/SetOperations.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Analysis/AssumptionCache.h"
28 #include "llvm/Analysis/CaptureTracking.h"
29 #include "llvm/Analysis/ConstantFolding.h"
30 #include "llvm/Analysis/EHPersonalities.h"
31 #include "llvm/Analysis/GuardUtils.h"
32 #include "llvm/Analysis/InstructionSimplify.h"
33 #include "llvm/Analysis/MemorySSA.h"
34 #include "llvm/Analysis/MemorySSAUpdater.h"
35 #include "llvm/Analysis/TargetTransformInfo.h"
36 #include "llvm/Analysis/ValueTracking.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GlobalValue.h"
47 #include "llvm/IR/GlobalVariable.h"
48 #include "llvm/IR/IRBuilder.h"
49 #include "llvm/IR/InstrTypes.h"
50 #include "llvm/IR/Instruction.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/IntrinsicInst.h"
53 #include "llvm/IR/Intrinsics.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/MDBuilder.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/NoFolder.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PatternMatch.h"
61 #include "llvm/IR/PseudoProbe.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/Support/BranchProbability.h"
68 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/CommandLine.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/ErrorHandling.h"
72 #include "llvm/Support/KnownBits.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
76 #include "llvm/Transforms/Utils/Local.h"
77 #include "llvm/Transforms/Utils/SSAUpdater.h"
78 #include "llvm/Transforms/Utils/ValueMapper.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <climits>
82 #include <cstddef>
83 #include <cstdint>
84 #include <iterator>
85 #include <map>
86 #include <set>
87 #include <tuple>
88 #include <utility>
89 #include <vector>
91 using namespace llvm;
92 using namespace PatternMatch;
94 #define DEBUG_TYPE "simplifycfg"
96 cl::opt<bool> llvm::RequireAndPreserveDomTree(
97 "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
98 cl::init(false),
99 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
100 "into preserving DomTree,"));
102 // Chosen as 2 so as to be cheap, but still to have enough power to fold
103 // a select, so the "clamp" idiom (of a min followed by a max) will be caught.
104 // To catch this, we need to fold a compare and a select, hence '2' being the
105 // minimum reasonable default.
106 static cl::opt<unsigned> PHINodeFoldingThreshold(
107 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
108 cl::desc(
109 "Control the amount of phi node folding to perform (default = 2)"));
111 static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
112 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
113 cl::desc("Control the maximal total instruction cost that we are willing "
114 "to speculatively execute to fold a 2-entry PHI node into a "
115 "select (default = 4)"));
117 static cl::opt<bool>
118 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
119 cl::desc("Hoist common instructions up to the parent block"));
121 static cl::opt<bool>
122 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
123 cl::desc("Sink common instructions down to the end block"));
125 static cl::opt<bool> HoistCondStores(
126 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
127 cl::desc("Hoist conditional stores if an unconditional store precedes"));
129 static cl::opt<bool> MergeCondStores(
130 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
131 cl::desc("Hoist conditional stores even if an unconditional store does not "
132 "precede - hoist multiple conditional stores into a single "
133 "predicated store"));
135 static cl::opt<bool> MergeCondStoresAggressively(
136 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
137 cl::desc("When merging conditional stores, do so even if the resultant "
138 "basic blocks are unlikely to be if-converted as a result"));
140 static cl::opt<bool> SpeculateOneExpensiveInst(
141 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
142 cl::desc("Allow exactly one expensive instruction to be speculatively "
143 "executed"));
145 static cl::opt<unsigned> MaxSpeculationDepth(
146 "max-speculation-depth", cl::Hidden, cl::init(10),
147 cl::desc("Limit maximum recursion depth when calculating costs of "
148 "speculatively executed instructions"));
150 static cl::opt<int>
151 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
152 cl::init(10),
153 cl::desc("Max size of a block which is still considered "
154 "small enough to thread through"));
156 // Two is chosen to allow one negation and a logical combine.
157 static cl::opt<unsigned>
158 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
159 cl::init(2),
160 cl::desc("Maximum cost of combining conditions when "
161 "folding branches"));
163 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
164 STATISTIC(NumLinearMaps,
165 "Number of switch instructions turned into linear mapping");
166 STATISTIC(NumLookupTables,
167 "Number of switch instructions turned into lookup tables");
168 STATISTIC(
169 NumLookupTablesHoles,
170 "Number of switch instructions turned into lookup tables (holes checked)");
171 STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
172 STATISTIC(NumFoldValueComparisonIntoPredecessors,
173 "Number of value comparisons folded into predecessor basic blocks");
174 STATISTIC(NumFoldBranchToCommonDest,
175 "Number of branches folded into predecessor basic block");
176 STATISTIC(
177 NumHoistCommonCode,
178 "Number of common instruction 'blocks' hoisted up to the begin block");
179 STATISTIC(NumHoistCommonInstrs,
180 "Number of common instructions hoisted up to the begin block");
181 STATISTIC(NumSinkCommonCode,
182 "Number of common instruction 'blocks' sunk down to the end block");
183 STATISTIC(NumSinkCommonInstrs,
184 "Number of common instructions sunk down to the end block");
185 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
186 STATISTIC(NumInvokes,
187 "Number of invokes with empty resume blocks simplified into calls");
189 namespace {
191 // The first field contains the value that the switch produces when a certain
192 // case group is selected, and the second field is a vector containing the
193 // cases composing the case group.
194 using SwitchCaseResultVectorTy =
195 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
197 // The first field contains the phi node that generates a result of the switch
198 // and the second field contains the value generated for a certain case in the
199 // switch for that PHI.
200 using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
202 /// ValueEqualityComparisonCase - Represents a case of a switch.
203 struct ValueEqualityComparisonCase {
204 ConstantInt *Value;
205 BasicBlock *Dest;
207 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
208 : Value(Value), Dest(Dest) {}
210 bool operator<(ValueEqualityComparisonCase RHS) const {
211 // Comparing pointers is ok as we only rely on the order for uniquing.
212 return Value < RHS.Value;
215 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
218 class SimplifyCFGOpt {
219 const TargetTransformInfo &TTI;
220 DomTreeUpdater *DTU;
221 const DataLayout &DL;
222 ArrayRef<WeakVH> LoopHeaders;
223 const SimplifyCFGOptions &Options;
224 bool Resimplify;
226 Value *isValueEqualityComparison(Instruction *TI);
227 BasicBlock *GetValueEqualityComparisonCases(
228 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
229 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
230 BasicBlock *Pred,
231 IRBuilder<> &Builder);
232 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
233 Instruction *PTI,
234 IRBuilder<> &Builder);
235 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
236 IRBuilder<> &Builder);
238 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
239 bool simplifySingleResume(ResumeInst *RI);
240 bool simplifyCommonResume(ResumeInst *RI);
241 bool simplifyCleanupReturn(CleanupReturnInst *RI);
242 bool simplifyUnreachable(UnreachableInst *UI);
243 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
244 bool simplifyIndirectBr(IndirectBrInst *IBI);
245 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
246 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
247 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
249 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
250 IRBuilder<> &Builder);
252 bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
253 bool EqTermsOnly);
254 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
255 const TargetTransformInfo &TTI);
256 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
257 BasicBlock *TrueBB, BasicBlock *FalseBB,
258 uint32_t TrueWeight, uint32_t FalseWeight);
259 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
260 const DataLayout &DL);
261 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
262 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
263 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
265 public:
266 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
267 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
268 const SimplifyCFGOptions &Opts)
269 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
270 assert((!DTU || !DTU->hasPostDomTree()) &&
271 "SimplifyCFG is not yet capable of maintaining validity of a "
272 "PostDomTree, so don't ask for it.");
275 bool simplifyOnce(BasicBlock *BB);
276 bool simplifyOnceImpl(BasicBlock *BB);
277 bool run(BasicBlock *BB);
279 // Helper to set Resimplify and return change indication.
280 bool requestResimplify() {
281 Resimplify = true;
282 return true;
286 } // end anonymous namespace
288 /// Return true if it is safe to merge these two
289 /// terminator instructions together.
290 static bool
291 SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
292 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
293 if (SI1 == SI2)
294 return false; // Can't merge with self!
296 // It is not safe to merge these two switch instructions if they have a common
297 // successor, and if that successor has a PHI node, and if *that* PHI node has
298 // conflicting incoming values from the two switch blocks.
299 BasicBlock *SI1BB = SI1->getParent();
300 BasicBlock *SI2BB = SI2->getParent();
302 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
303 bool Fail = false;
304 for (BasicBlock *Succ : successors(SI2BB))
305 if (SI1Succs.count(Succ))
306 for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
307 PHINode *PN = cast<PHINode>(BBI);
308 if (PN->getIncomingValueForBlock(SI1BB) !=
309 PN->getIncomingValueForBlock(SI2BB)) {
310 if (FailBlocks)
311 FailBlocks->insert(Succ);
312 Fail = true;
316 return !Fail;
319 /// Update PHI nodes in Succ to indicate that there will now be entries in it
320 /// from the 'NewPred' block. The values that will be flowing into the PHI nodes
321 /// will be the same as those coming in from ExistPred, an existing predecessor
322 /// of Succ.
323 static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
324 BasicBlock *ExistPred,
325 MemorySSAUpdater *MSSAU = nullptr) {
326 for (PHINode &PN : Succ->phis())
327 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
328 if (MSSAU)
329 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
330 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
333 /// Compute an abstract "cost" of speculating the given instruction,
334 /// which is assumed to be safe to speculate. TCC_Free means cheap,
335 /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
336 /// expensive.
337 static InstructionCost computeSpeculationCost(const User *I,
338 const TargetTransformInfo &TTI) {
339 assert(isSafeToSpeculativelyExecute(I) &&
340 "Instruction is not safe to speculatively execute!");
341 return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
344 /// If we have a merge point of an "if condition" as accepted above,
345 /// return true if the specified value dominates the block. We
346 /// don't handle the true generality of domination here, just a special case
347 /// which works well enough for us.
349 /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
350 /// see if V (which must be an instruction) and its recursive operands
351 /// that do not dominate BB have a combined cost lower than Budget and
352 /// are non-trapping. If both are true, the instruction is inserted into the
353 /// set and true is returned.
355 /// The cost for most non-trapping instructions is defined as 1 except for
356 /// Select whose cost is 2.
358 /// After this function returns, Cost is increased by the cost of
359 /// V plus its non-dominating operands. If that cost is greater than
360 /// Budget, false is returned and Cost is undefined.
361 static bool dominatesMergePoint(Value *V, BasicBlock *BB,
362 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
363 InstructionCost &Cost,
364 InstructionCost Budget,
365 const TargetTransformInfo &TTI,
366 unsigned Depth = 0) {
367 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
368 // so limit the recursion depth.
369 // TODO: While this recursion limit does prevent pathological behavior, it
370 // would be better to track visited instructions to avoid cycles.
371 if (Depth == MaxSpeculationDepth)
372 return false;
374 Instruction *I = dyn_cast<Instruction>(V);
375 if (!I) {
376 // Non-instructions all dominate instructions, but not all constantexprs
377 // can be executed unconditionally.
378 if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
379 if (C->canTrap())
380 return false;
381 return true;
383 BasicBlock *PBB = I->getParent();
385 // We don't want to allow weird loops that might have the "if condition" in
386 // the bottom of this block.
387 if (PBB == BB)
388 return false;
390 // If this instruction is defined in a block that contains an unconditional
391 // branch to BB, then it must be in the 'conditional' part of the "if
392 // statement". If not, it definitely dominates the region.
393 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
394 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
395 return true;
397 // If we have seen this instruction before, don't count it again.
398 if (AggressiveInsts.count(I))
399 return true;
401 // Okay, it looks like the instruction IS in the "condition". Check to
402 // see if it's a cheap instruction to unconditionally compute, and if it
403 // only uses stuff defined outside of the condition. If so, hoist it out.
404 if (!isSafeToSpeculativelyExecute(I))
405 return false;
407 Cost += computeSpeculationCost(I, TTI);
409 // Allow exactly one instruction to be speculated regardless of its cost
410 // (as long as it is safe to do so).
411 // This is intended to flatten the CFG even if the instruction is a division
412 // or other expensive operation. The speculation of an expensive instruction
413 // is expected to be undone in CodeGenPrepare if the speculation has not
414 // enabled further IR optimizations.
415 if (Cost > Budget &&
416 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
417 !Cost.isValid()))
418 return false;
420 // Okay, we can only really hoist these out if their operands do
421 // not take us over the cost threshold.
422 for (Use &Op : I->operands())
423 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
424 Depth + 1))
425 return false;
426 // Okay, it's safe to do this! Remember this instruction.
427 AggressiveInsts.insert(I);
428 return true;
431 /// Extract ConstantInt from value, looking through IntToPtr
432 /// and PointerNullValue. Return NULL if value is not a constant int.
433 static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
434 // Normal constant int.
435 ConstantInt *CI = dyn_cast<ConstantInt>(V);
436 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
437 return CI;
439 // This is some kind of pointer constant. Turn it into a pointer-sized
440 // ConstantInt if possible.
441 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
443 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
444 if (isa<ConstantPointerNull>(V))
445 return ConstantInt::get(PtrTy, 0);
447 // IntToPtr const int.
448 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
449 if (CE->getOpcode() == Instruction::IntToPtr)
450 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
451 // The constant is very likely to have the right type already.
452 if (CI->getType() == PtrTy)
453 return CI;
454 else
455 return cast<ConstantInt>(
456 ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
458 return nullptr;
461 namespace {
463 /// Given a chain of or (||) or and (&&) comparison of a value against a
464 /// constant, this will try to recover the information required for a switch
465 /// structure.
466 /// It will depth-first traverse the chain of comparison, seeking for patterns
467 /// like %a == 12 or %a < 4 and combine them to produce a set of integer
468 /// representing the different cases for the switch.
469 /// Note that if the chain is composed of '||' it will build the set of elements
470 /// that matches the comparisons (i.e. any of this value validate the chain)
471 /// while for a chain of '&&' it will build the set elements that make the test
472 /// fail.
473 struct ConstantComparesGatherer {
474 const DataLayout &DL;
476 /// Value found for the switch comparison
477 Value *CompValue = nullptr;
479 /// Extra clause to be checked before the switch
480 Value *Extra = nullptr;
482 /// Set of integers to match in switch
483 SmallVector<ConstantInt *, 8> Vals;
485 /// Number of comparisons matched in the and/or chain
486 unsigned UsedICmps = 0;
488 /// Construct and compute the result for the comparison instruction Cond
489 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
490 gather(Cond);
493 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
494 ConstantComparesGatherer &
495 operator=(const ConstantComparesGatherer &) = delete;
497 private:
498 /// Try to set the current value used for the comparison, it succeeds only if
499 /// it wasn't set before or if the new value is the same as the old one
500 bool setValueOnce(Value *NewVal) {
501 if (CompValue && CompValue != NewVal)
502 return false;
503 CompValue = NewVal;
504 return (CompValue != nullptr);
507 /// Try to match Instruction "I" as a comparison against a constant and
508 /// populates the array Vals with the set of values that match (or do not
509 /// match depending on isEQ).
510 /// Return false on failure. On success, the Value the comparison matched
511 /// against is placed in CompValue.
512 /// If CompValue is already set, the function is expected to fail if a match
513 /// is found but the value compared to is different.
514 bool matchInstruction(Instruction *I, bool isEQ) {
515 // If this is an icmp against a constant, handle this as one of the cases.
516 ICmpInst *ICI;
517 ConstantInt *C;
518 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
519 (C = GetConstantInt(I->getOperand(1), DL)))) {
520 return false;
523 Value *RHSVal;
524 const APInt *RHSC;
526 // Pattern match a special case
527 // (x & ~2^z) == y --> x == y || x == y|2^z
528 // This undoes a transformation done by instcombine to fuse 2 compares.
529 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
530 // It's a little bit hard to see why the following transformations are
531 // correct. Here is a CVC3 program to verify them for 64-bit values:
534 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
535 x : BITVECTOR(64);
536 y : BITVECTOR(64);
537 z : BITVECTOR(64);
538 mask : BITVECTOR(64) = BVSHL(ONE, z);
539 QUERY( (y & ~mask = y) =>
540 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
542 QUERY( (y | mask = y) =>
543 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
547 // Please note that each pattern must be a dual implication (<--> or
548 // iff). One directional implication can create spurious matches. If the
549 // implication is only one-way, an unsatisfiable condition on the left
550 // side can imply a satisfiable condition on the right side. Dual
551 // implication ensures that satisfiable conditions are transformed to
552 // other satisfiable conditions and unsatisfiable conditions are
553 // transformed to other unsatisfiable conditions.
555 // Here is a concrete example of a unsatisfiable condition on the left
556 // implying a satisfiable condition on the right:
558 // mask = (1 << z)
559 // (x & ~mask) == y --> (x == y || x == (y | mask))
561 // Substituting y = 3, z = 0 yields:
562 // (x & -2) == 3 --> (x == 3 || x == 2)
564 // Pattern match a special case:
566 QUERY( (y & ~mask = y) =>
567 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
570 if (match(ICI->getOperand(0),
571 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
572 APInt Mask = ~*RHSC;
573 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
574 // If we already have a value for the switch, it has to match!
575 if (!setValueOnce(RHSVal))
576 return false;
578 Vals.push_back(C);
579 Vals.push_back(
580 ConstantInt::get(C->getContext(),
581 C->getValue() | Mask));
582 UsedICmps++;
583 return true;
587 // Pattern match a special case:
589 QUERY( (y | mask = y) =>
590 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 if (match(ICI->getOperand(0),
594 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
595 APInt Mask = *RHSC;
596 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
597 // If we already have a value for the switch, it has to match!
598 if (!setValueOnce(RHSVal))
599 return false;
601 Vals.push_back(C);
602 Vals.push_back(ConstantInt::get(C->getContext(),
603 C->getValue() & ~Mask));
604 UsedICmps++;
605 return true;
609 // If we already have a value for the switch, it has to match!
610 if (!setValueOnce(ICI->getOperand(0)))
611 return false;
613 UsedICmps++;
614 Vals.push_back(C);
615 return ICI->getOperand(0);
618 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
619 ConstantRange Span =
620 ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
622 // Shift the range if the compare is fed by an add. This is the range
623 // compare idiom as emitted by instcombine.
624 Value *CandidateVal = I->getOperand(0);
625 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
626 Span = Span.subtract(*RHSC);
627 CandidateVal = RHSVal;
630 // If this is an and/!= check, then we are looking to build the set of
631 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
632 // x != 0 && x != 1.
633 if (!isEQ)
634 Span = Span.inverse();
636 // If there are a ton of values, we don't want to make a ginormous switch.
637 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
638 return false;
641 // If we already have a value for the switch, it has to match!
642 if (!setValueOnce(CandidateVal))
643 return false;
645 // Add all values from the range to the set
646 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
647 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
649 UsedICmps++;
650 return true;
653 /// Given a potentially 'or'd or 'and'd together collection of icmp
654 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
655 /// the value being compared, and stick the list constants into the Vals
656 /// vector.
657 /// One "Extra" case is allowed to differ from the other.
658 void gather(Value *V) {
659 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
661 // Keep a stack (SmallVector for efficiency) for depth-first traversal
662 SmallVector<Value *, 8> DFT;
663 SmallPtrSet<Value *, 8> Visited;
665 // Initialize
666 Visited.insert(V);
667 DFT.push_back(V);
669 while (!DFT.empty()) {
670 V = DFT.pop_back_val();
672 if (Instruction *I = dyn_cast<Instruction>(V)) {
673 // If it is a || (or && depending on isEQ), process the operands.
674 Value *Op0, *Op1;
675 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
676 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
677 if (Visited.insert(Op1).second)
678 DFT.push_back(Op1);
679 if (Visited.insert(Op0).second)
680 DFT.push_back(Op0);
682 continue;
685 // Try to match the current instruction
686 if (matchInstruction(I, isEQ))
687 // Match succeed, continue the loop
688 continue;
691 // One element of the sequence of || (or &&) could not be match as a
692 // comparison against the same value as the others.
693 // We allow only one "Extra" case to be checked before the switch
694 if (!Extra) {
695 Extra = V;
696 continue;
698 // Failed to parse a proper sequence, abort now
699 CompValue = nullptr;
700 break;
705 } // end anonymous namespace
707 static void EraseTerminatorAndDCECond(Instruction *TI,
708 MemorySSAUpdater *MSSAU = nullptr) {
709 Instruction *Cond = nullptr;
710 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
711 Cond = dyn_cast<Instruction>(SI->getCondition());
712 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
713 if (BI->isConditional())
714 Cond = dyn_cast<Instruction>(BI->getCondition());
715 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
716 Cond = dyn_cast<Instruction>(IBI->getAddress());
719 TI->eraseFromParent();
720 if (Cond)
721 RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
724 /// Return true if the specified terminator checks
725 /// to see if a value is equal to constant integer value.
726 Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
727 Value *CV = nullptr;
728 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
729 // Do not permit merging of large switch instructions into their
730 // predecessors unless there is only one predecessor.
731 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
732 CV = SI->getCondition();
733 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
734 if (BI->isConditional() && BI->getCondition()->hasOneUse())
735 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
736 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
737 CV = ICI->getOperand(0);
740 // Unwrap any lossless ptrtoint cast.
741 if (CV) {
742 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
743 Value *Ptr = PTII->getPointerOperand();
744 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
745 CV = Ptr;
748 return CV;
751 /// Given a value comparison instruction,
752 /// decode all of the 'cases' that it represents and return the 'default' block.
753 BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
754 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
755 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
756 Cases.reserve(SI->getNumCases());
757 for (auto Case : SI->cases())
758 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
759 Case.getCaseSuccessor()));
760 return SI->getDefaultDest();
763 BranchInst *BI = cast<BranchInst>(TI);
764 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
765 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
766 Cases.push_back(ValueEqualityComparisonCase(
767 GetConstantInt(ICI->getOperand(1), DL), Succ));
768 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
771 /// Given a vector of bb/value pairs, remove any entries
772 /// in the list that match the specified block.
773 static void
774 EliminateBlockCases(BasicBlock *BB,
775 std::vector<ValueEqualityComparisonCase> &Cases) {
776 llvm::erase_value(Cases, BB);
779 /// Return true if there are any keys in C1 that exist in C2 as well.
780 static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
781 std::vector<ValueEqualityComparisonCase> &C2) {
782 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
784 // Make V1 be smaller than V2.
785 if (V1->size() > V2->size())
786 std::swap(V1, V2);
788 if (V1->empty())
789 return false;
790 if (V1->size() == 1) {
791 // Just scan V2.
792 ConstantInt *TheVal = (*V1)[0].Value;
793 for (unsigned i = 0, e = V2->size(); i != e; ++i)
794 if (TheVal == (*V2)[i].Value)
795 return true;
798 // Otherwise, just sort both lists and compare element by element.
799 array_pod_sort(V1->begin(), V1->end());
800 array_pod_sort(V2->begin(), V2->end());
801 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
802 while (i1 != e1 && i2 != e2) {
803 if ((*V1)[i1].Value == (*V2)[i2].Value)
804 return true;
805 if ((*V1)[i1].Value < (*V2)[i2].Value)
806 ++i1;
807 else
808 ++i2;
810 return false;
813 // Set branch weights on SwitchInst. This sets the metadata if there is at
814 // least one non-zero weight.
815 static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
816 // Check that there is at least one non-zero weight. Otherwise, pass
817 // nullptr to setMetadata which will erase the existing metadata.
818 MDNode *N = nullptr;
819 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
820 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
821 SI->setMetadata(LLVMContext::MD_prof, N);
824 // Similar to the above, but for branch and select instructions that take
825 // exactly 2 weights.
826 static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
827 uint32_t FalseWeight) {
828 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
829 // Check that there is at least one non-zero weight. Otherwise, pass
830 // nullptr to setMetadata which will erase the existing metadata.
831 MDNode *N = nullptr;
832 if (TrueWeight || FalseWeight)
833 N = MDBuilder(I->getParent()->getContext())
834 .createBranchWeights(TrueWeight, FalseWeight);
835 I->setMetadata(LLVMContext::MD_prof, N);
838 /// If TI is known to be a terminator instruction and its block is known to
839 /// only have a single predecessor block, check to see if that predecessor is
840 /// also a value comparison with the same value, and if that comparison
841 /// determines the outcome of this comparison. If so, simplify TI. This does a
842 /// very limited form of jump threading.
843 bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
844 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
845 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
846 if (!PredVal)
847 return false; // Not a value comparison in predecessor.
849 Value *ThisVal = isValueEqualityComparison(TI);
850 assert(ThisVal && "This isn't a value comparison!!");
851 if (ThisVal != PredVal)
852 return false; // Different predicates.
854 // TODO: Preserve branch weight metadata, similarly to how
855 // FoldValueComparisonIntoPredecessors preserves it.
857 // Find out information about when control will move from Pred to TI's block.
858 std::vector<ValueEqualityComparisonCase> PredCases;
859 BasicBlock *PredDef =
860 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
861 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
863 // Find information about how control leaves this block.
864 std::vector<ValueEqualityComparisonCase> ThisCases;
865 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
866 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
868 // If TI's block is the default block from Pred's comparison, potentially
869 // simplify TI based on this knowledge.
870 if (PredDef == TI->getParent()) {
871 // If we are here, we know that the value is none of those cases listed in
872 // PredCases. If there are any cases in ThisCases that are in PredCases, we
873 // can simplify TI.
874 if (!ValuesOverlap(PredCases, ThisCases))
875 return false;
877 if (isa<BranchInst>(TI)) {
878 // Okay, one of the successors of this condbr is dead. Convert it to a
879 // uncond br.
880 assert(ThisCases.size() == 1 && "Branch can only have one case!");
881 // Insert the new branch.
882 Instruction *NI = Builder.CreateBr(ThisDef);
883 (void)NI;
885 // Remove PHI node entries for the dead edge.
886 ThisCases[0].Dest->removePredecessor(PredDef);
888 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
889 << "Through successor TI: " << *TI << "Leaving: " << *NI
890 << "\n");
892 EraseTerminatorAndDCECond(TI);
894 if (DTU)
895 DTU->applyUpdates(
896 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
898 return true;
901 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
902 // Okay, TI has cases that are statically dead, prune them away.
903 SmallPtrSet<Constant *, 16> DeadCases;
904 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
905 DeadCases.insert(PredCases[i].Value);
907 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
908 << "Through successor TI: " << *TI);
910 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
911 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
912 --i;
913 auto *Successor = i->getCaseSuccessor();
914 if (DTU)
915 ++NumPerSuccessorCases[Successor];
916 if (DeadCases.count(i->getCaseValue())) {
917 Successor->removePredecessor(PredDef);
918 SI.removeCase(i);
919 if (DTU)
920 --NumPerSuccessorCases[Successor];
924 if (DTU) {
925 std::vector<DominatorTree::UpdateType> Updates;
926 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
927 if (I.second == 0)
928 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
929 DTU->applyUpdates(Updates);
932 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
933 return true;
936 // Otherwise, TI's block must correspond to some matched value. Find out
937 // which value (or set of values) this is.
938 ConstantInt *TIV = nullptr;
939 BasicBlock *TIBB = TI->getParent();
940 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
941 if (PredCases[i].Dest == TIBB) {
942 if (TIV)
943 return false; // Cannot handle multiple values coming to this block.
944 TIV = PredCases[i].Value;
946 assert(TIV && "No edge from pred to succ?");
948 // Okay, we found the one constant that our value can be if we get into TI's
949 // BB. Find out which successor will unconditionally be branched to.
950 BasicBlock *TheRealDest = nullptr;
951 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
952 if (ThisCases[i].Value == TIV) {
953 TheRealDest = ThisCases[i].Dest;
954 break;
957 // If not handled by any explicit cases, it is handled by the default case.
958 if (!TheRealDest)
959 TheRealDest = ThisDef;
961 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
963 // Remove PHI node entries for dead edges.
964 BasicBlock *CheckEdge = TheRealDest;
965 for (BasicBlock *Succ : successors(TIBB))
966 if (Succ != CheckEdge) {
967 if (Succ != TheRealDest)
968 RemovedSuccs.insert(Succ);
969 Succ->removePredecessor(TIBB);
970 } else
971 CheckEdge = nullptr;
973 // Insert the new branch.
974 Instruction *NI = Builder.CreateBr(TheRealDest);
975 (void)NI;
977 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
978 << "Through successor TI: " << *TI << "Leaving: " << *NI
979 << "\n");
981 EraseTerminatorAndDCECond(TI);
982 if (DTU) {
983 SmallVector<DominatorTree::UpdateType, 2> Updates;
984 Updates.reserve(RemovedSuccs.size());
985 for (auto *RemovedSucc : RemovedSuccs)
986 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
987 DTU->applyUpdates(Updates);
989 return true;
992 namespace {
994 /// This class implements a stable ordering of constant
995 /// integers that does not depend on their address. This is important for
996 /// applications that sort ConstantInt's to ensure uniqueness.
997 struct ConstantIntOrdering {
998 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
999 return LHS->getValue().ult(RHS->getValue());
1003 } // end anonymous namespace
1005 static int ConstantIntSortPredicate(ConstantInt *const *P1,
1006 ConstantInt *const *P2) {
1007 const ConstantInt *LHS = *P1;
1008 const ConstantInt *RHS = *P2;
1009 if (LHS == RHS)
1010 return 0;
1011 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1014 static inline bool HasBranchWeights(const Instruction *I) {
1015 MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
1016 if (ProfMD && ProfMD->getOperand(0))
1017 if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
1018 return MDS->getString().equals("branch_weights");
1020 return false;
1023 /// Get Weights of a given terminator, the default weight is at the front
1024 /// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1025 /// metadata.
1026 static void GetBranchWeights(Instruction *TI,
1027 SmallVectorImpl<uint64_t> &Weights) {
1028 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1029 assert(MD);
1030 for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1031 ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1032 Weights.push_back(CI->getValue().getZExtValue());
1035 // If TI is a conditional eq, the default case is the false case,
1036 // and the corresponding branch-weight data is at index 2. We swap the
1037 // default weight to be the first entry.
1038 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1039 assert(Weights.size() == 2);
1040 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1041 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1042 std::swap(Weights.front(), Weights.back());
1046 /// Keep halving the weights until all can fit in uint32_t.
1047 static void FitWeights(MutableArrayRef<uint64_t> Weights) {
1048 uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
1049 if (Max > UINT_MAX) {
1050 unsigned Offset = 32 - countLeadingZeros(Max);
1051 for (uint64_t &I : Weights)
1052 I >>= Offset;
1056 static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1057 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1058 Instruction *PTI = PredBlock->getTerminator();
1060 // If we have bonus instructions, clone them into the predecessor block.
1061 // Note that there may be multiple predecessor blocks, so we cannot move
1062 // bonus instructions to a predecessor block.
1063 for (Instruction &BonusInst : *BB) {
1064 if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
1065 continue;
1067 Instruction *NewBonusInst = BonusInst.clone();
1069 if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1070 // Unless the instruction has the same !dbg location as the original
1071 // branch, drop it. When we fold the bonus instructions we want to make
1072 // sure we reset their debug locations in order to avoid stepping on
1073 // dead code caused by folding dead branches.
1074 NewBonusInst->setDebugLoc(DebugLoc());
1077 RemapInstruction(NewBonusInst, VMap,
1078 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1079 VMap[&BonusInst] = NewBonusInst;
1081 // If we moved a load, we cannot any longer claim any knowledge about
1082 // its potential value. The previous information might have been valid
1083 // only given the branch precondition.
1084 // For an analogous reason, we must also drop all the metadata whose
1085 // semantics we don't understand. We *can* preserve !annotation, because
1086 // it is tied to the instruction itself, not the value or position.
1087 // Similarly strip attributes on call parameters that may cause UB in
1088 // location the call is moved to.
1089 NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
1090 LLVMContext::MD_annotation);
1092 PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst);
1093 NewBonusInst->takeName(&BonusInst);
1094 BonusInst.setName(NewBonusInst->getName() + ".old");
1096 // Update (liveout) uses of bonus instructions,
1097 // now that the bonus instruction has been cloned into predecessor.
1098 SSAUpdater SSAUpdate;
1099 SSAUpdate.Initialize(BonusInst.getType(),
1100 (NewBonusInst->getName() + ".merge").str());
1101 SSAUpdate.AddAvailableValue(BB, &BonusInst);
1102 SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
1103 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1104 auto *UI = cast<Instruction>(U.getUser());
1105 if (UI->getParent() != PredBlock)
1106 SSAUpdate.RewriteUseAfterInsertions(U);
1107 else // Use is in the same block as, and comes before, NewBonusInst.
1108 SSAUpdate.RewriteUse(U);
1113 bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1114 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1115 BasicBlock *BB = TI->getParent();
1116 BasicBlock *Pred = PTI->getParent();
1118 SmallVector<DominatorTree::UpdateType, 32> Updates;
1120 // Figure out which 'cases' to copy from SI to PSI.
1121 std::vector<ValueEqualityComparisonCase> BBCases;
1122 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1124 std::vector<ValueEqualityComparisonCase> PredCases;
1125 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1127 // Based on whether the default edge from PTI goes to BB or not, fill in
1128 // PredCases and PredDefault with the new switch cases we would like to
1129 // build.
1130 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1132 // Update the branch weight metadata along the way
1133 SmallVector<uint64_t, 8> Weights;
1134 bool PredHasWeights = HasBranchWeights(PTI);
1135 bool SuccHasWeights = HasBranchWeights(TI);
1137 if (PredHasWeights) {
1138 GetBranchWeights(PTI, Weights);
1139 // branch-weight metadata is inconsistent here.
1140 if (Weights.size() != 1 + PredCases.size())
1141 PredHasWeights = SuccHasWeights = false;
1142 } else if (SuccHasWeights)
1143 // If there are no predecessor weights but there are successor weights,
1144 // populate Weights with 1, which will later be scaled to the sum of
1145 // successor's weights
1146 Weights.assign(1 + PredCases.size(), 1);
1148 SmallVector<uint64_t, 8> SuccWeights;
1149 if (SuccHasWeights) {
1150 GetBranchWeights(TI, SuccWeights);
1151 // branch-weight metadata is inconsistent here.
1152 if (SuccWeights.size() != 1 + BBCases.size())
1153 PredHasWeights = SuccHasWeights = false;
1154 } else if (PredHasWeights)
1155 SuccWeights.assign(1 + BBCases.size(), 1);
1157 if (PredDefault == BB) {
1158 // If this is the default destination from PTI, only the edges in TI
1159 // that don't occur in PTI, or that branch to BB will be activated.
1160 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1161 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1162 if (PredCases[i].Dest != BB)
1163 PTIHandled.insert(PredCases[i].Value);
1164 else {
1165 // The default destination is BB, we don't need explicit targets.
1166 std::swap(PredCases[i], PredCases.back());
1168 if (PredHasWeights || SuccHasWeights) {
1169 // Increase weight for the default case.
1170 Weights[0] += Weights[i + 1];
1171 std::swap(Weights[i + 1], Weights.back());
1172 Weights.pop_back();
1175 PredCases.pop_back();
1176 --i;
1177 --e;
1180 // Reconstruct the new switch statement we will be building.
1181 if (PredDefault != BBDefault) {
1182 PredDefault->removePredecessor(Pred);
1183 if (DTU && PredDefault != BB)
1184 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1185 PredDefault = BBDefault;
1186 ++NewSuccessors[BBDefault];
1189 unsigned CasesFromPred = Weights.size();
1190 uint64_t ValidTotalSuccWeight = 0;
1191 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1192 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1193 PredCases.push_back(BBCases[i]);
1194 ++NewSuccessors[BBCases[i].Dest];
1195 if (SuccHasWeights || PredHasWeights) {
1196 // The default weight is at index 0, so weight for the ith case
1197 // should be at index i+1. Scale the cases from successor by
1198 // PredDefaultWeight (Weights[0]).
1199 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1200 ValidTotalSuccWeight += SuccWeights[i + 1];
1204 if (SuccHasWeights || PredHasWeights) {
1205 ValidTotalSuccWeight += SuccWeights[0];
1206 // Scale the cases from predecessor by ValidTotalSuccWeight.
1207 for (unsigned i = 1; i < CasesFromPred; ++i)
1208 Weights[i] *= ValidTotalSuccWeight;
1209 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1210 Weights[0] *= SuccWeights[0];
1212 } else {
1213 // If this is not the default destination from PSI, only the edges
1214 // in SI that occur in PSI with a destination of BB will be
1215 // activated.
1216 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1217 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1218 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1219 if (PredCases[i].Dest == BB) {
1220 PTIHandled.insert(PredCases[i].Value);
1222 if (PredHasWeights || SuccHasWeights) {
1223 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1224 std::swap(Weights[i + 1], Weights.back());
1225 Weights.pop_back();
1228 std::swap(PredCases[i], PredCases.back());
1229 PredCases.pop_back();
1230 --i;
1231 --e;
1234 // Okay, now we know which constants were sent to BB from the
1235 // predecessor. Figure out where they will all go now.
1236 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1237 if (PTIHandled.count(BBCases[i].Value)) {
1238 // If this is one we are capable of getting...
1239 if (PredHasWeights || SuccHasWeights)
1240 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1241 PredCases.push_back(BBCases[i]);
1242 ++NewSuccessors[BBCases[i].Dest];
1243 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1246 // If there are any constants vectored to BB that TI doesn't handle,
1247 // they must go to the default destination of TI.
1248 for (ConstantInt *I : PTIHandled) {
1249 if (PredHasWeights || SuccHasWeights)
1250 Weights.push_back(WeightsForHandled[I]);
1251 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1252 ++NewSuccessors[BBDefault];
1256 // Okay, at this point, we know which new successor Pred will get. Make
1257 // sure we update the number of entries in the PHI nodes for these
1258 // successors.
1259 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1260 if (DTU) {
1261 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1262 Updates.reserve(Updates.size() + NewSuccessors.size());
1264 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1265 NewSuccessors) {
1266 for (auto I : seq(0, NewSuccessor.second)) {
1267 (void)I;
1268 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1270 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1271 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1274 Builder.SetInsertPoint(PTI);
1275 // Convert pointer to int before we switch.
1276 if (CV->getType()->isPointerTy()) {
1277 CV =
1278 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1281 // Now that the successors are updated, create the new Switch instruction.
1282 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1283 NewSI->setDebugLoc(PTI->getDebugLoc());
1284 for (ValueEqualityComparisonCase &V : PredCases)
1285 NewSI->addCase(V.Value, V.Dest);
1287 if (PredHasWeights || SuccHasWeights) {
1288 // Halve the weights if any of them cannot fit in an uint32_t
1289 FitWeights(Weights);
1291 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1293 setBranchWeights(NewSI, MDWeights);
1296 EraseTerminatorAndDCECond(PTI);
1298 // Okay, last check. If BB is still a successor of PSI, then we must
1299 // have an infinite loop case. If so, add an infinitely looping block
1300 // to handle the case to preserve the behavior of the code.
1301 BasicBlock *InfLoopBlock = nullptr;
1302 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1303 if (NewSI->getSuccessor(i) == BB) {
1304 if (!InfLoopBlock) {
1305 // Insert it at the end of the function, because it's either code,
1306 // or it won't matter if it's hot. :)
1307 InfLoopBlock =
1308 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1309 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1310 if (DTU)
1311 Updates.push_back(
1312 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1314 NewSI->setSuccessor(i, InfLoopBlock);
1317 if (DTU) {
1318 if (InfLoopBlock)
1319 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1321 Updates.push_back({DominatorTree::Delete, Pred, BB});
1323 DTU->applyUpdates(Updates);
1326 ++NumFoldValueComparisonIntoPredecessors;
1327 return true;
1330 /// The specified terminator is a value equality comparison instruction
1331 /// (either a switch or a branch on "X == c").
1332 /// See if any of the predecessors of the terminator block are value comparisons
1333 /// on the same value. If so, and if safe to do so, fold them together.
1334 bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1335 IRBuilder<> &Builder) {
1336 BasicBlock *BB = TI->getParent();
1337 Value *CV = isValueEqualityComparison(TI); // CondVal
1338 assert(CV && "Not a comparison?");
1340 bool Changed = false;
1342 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1343 while (!Preds.empty()) {
1344 BasicBlock *Pred = Preds.pop_back_val();
1345 Instruction *PTI = Pred->getTerminator();
1347 // Don't try to fold into itself.
1348 if (Pred == BB)
1349 continue;
1351 // See if the predecessor is a comparison with the same value.
1352 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1353 if (PCV != CV)
1354 continue;
1356 SmallSetVector<BasicBlock *, 4> FailBlocks;
1357 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1358 for (auto *Succ : FailBlocks) {
1359 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1360 return false;
1364 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1365 Changed = true;
1367 return Changed;
1370 // If we would need to insert a select that uses the value of this invoke
1371 // (comments in HoistThenElseCodeToIf explain why we would need to do this), we
1372 // can't hoist the invoke, as there is nowhere to put the select in this case.
1373 static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1374 Instruction *I1, Instruction *I2) {
1375 for (BasicBlock *Succ : successors(BB1)) {
1376 for (const PHINode &PN : Succ->phis()) {
1377 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1378 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1379 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1380 return false;
1384 return true;
1387 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1389 /// Given a conditional branch that goes to BB1 and BB2, hoist any common code
1390 /// in the two blocks up into the branch block. The caller of this function
1391 /// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
1392 /// only perform hoisting in case both blocks only contain a terminator. In that
1393 /// case, only the original BI will be replaced and selects for PHIs are added.
1394 bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
1395 const TargetTransformInfo &TTI,
1396 bool EqTermsOnly) {
1397 // This does very trivial matching, with limited scanning, to find identical
1398 // instructions in the two blocks. In particular, we don't want to get into
1399 // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
1400 // such, we currently just scan for obviously identical instructions in an
1401 // identical order.
1402 BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
1403 BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
1405 // If either of the blocks has it's address taken, then we can't do this fold,
1406 // because the code we'd hoist would no longer run when we jump into the block
1407 // by it's address.
1408 if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
1409 return false;
1411 BasicBlock::iterator BB1_Itr = BB1->begin();
1412 BasicBlock::iterator BB2_Itr = BB2->begin();
1414 Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
1415 // Skip debug info if it is not identical.
1416 DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1417 DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1418 if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1419 while (isa<DbgInfoIntrinsic>(I1))
1420 I1 = &*BB1_Itr++;
1421 while (isa<DbgInfoIntrinsic>(I2))
1422 I2 = &*BB2_Itr++;
1424 // FIXME: Can we define a safety predicate for CallBr?
1425 if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
1426 (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
1427 isa<CallBrInst>(I1))
1428 return false;
1430 BasicBlock *BIParent = BI->getParent();
1432 bool Changed = false;
1434 auto _ = make_scope_exit([&]() {
1435 if (Changed)
1436 ++NumHoistCommonCode;
1439 // Check if only hoisting terminators is allowed. This does not add new
1440 // instructions to the hoist location.
1441 if (EqTermsOnly) {
1442 // Skip any debug intrinsics, as they are free to hoist.
1443 auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
1444 auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
1445 if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
1446 return false;
1447 if (!I1NonDbg->isTerminator())
1448 return false;
1449 // Now we know that we only need to hoist debug instrinsics and the
1450 // terminator. Let the loop below handle those 2 cases.
1453 do {
1454 // If we are hoisting the terminator instruction, don't move one (making a
1455 // broken BB), instead clone it, and remove BI.
1456 if (I1->isTerminator())
1457 goto HoistTerminator;
1459 // If we're going to hoist a call, make sure that the two instructions we're
1460 // commoning/hoisting are both marked with musttail, or neither of them is
1461 // marked as such. Otherwise, we might end up in a situation where we hoist
1462 // from a block where the terminator is a `ret` to a block where the terminator
1463 // is a `br`, and `musttail` calls expect to be followed by a return.
1464 auto *C1 = dyn_cast<CallInst>(I1);
1465 auto *C2 = dyn_cast<CallInst>(I2);
1466 if (C1 && C2)
1467 if (C1->isMustTailCall() != C2->isMustTailCall())
1468 return Changed;
1470 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1471 return Changed;
1473 // If any of the two call sites has nomerge attribute, stop hoisting.
1474 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1475 if (CB1->cannotMerge())
1476 return Changed;
1477 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1478 if (CB2->cannotMerge())
1479 return Changed;
1481 if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
1482 assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
1483 // The debug location is an integral part of a debug info intrinsic
1484 // and can't be separated from it or replaced. Instead of attempting
1485 // to merge locations, simply hoist both copies of the intrinsic.
1486 BIParent->getInstList().splice(BI->getIterator(),
1487 BB1->getInstList(), I1);
1488 BIParent->getInstList().splice(BI->getIterator(),
1489 BB2->getInstList(), I2);
1490 Changed = true;
1491 } else {
1492 // For a normal instruction, we just move one to right before the branch,
1493 // then replace all uses of the other with the first. Finally, we remove
1494 // the now redundant second instruction.
1495 BIParent->getInstList().splice(BI->getIterator(),
1496 BB1->getInstList(), I1);
1497 if (!I2->use_empty())
1498 I2->replaceAllUsesWith(I1);
1499 I1->andIRFlags(I2);
1500 unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
1501 LLVMContext::MD_range,
1502 LLVMContext::MD_fpmath,
1503 LLVMContext::MD_invariant_load,
1504 LLVMContext::MD_nonnull,
1505 LLVMContext::MD_invariant_group,
1506 LLVMContext::MD_align,
1507 LLVMContext::MD_dereferenceable,
1508 LLVMContext::MD_dereferenceable_or_null,
1509 LLVMContext::MD_mem_parallel_loop_access,
1510 LLVMContext::MD_access_group,
1511 LLVMContext::MD_preserve_access_index};
1512 combineMetadata(I1, I2, KnownIDs, true);
1514 // I1 and I2 are being combined into a single instruction. Its debug
1515 // location is the merged locations of the original instructions.
1516 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1518 I2->eraseFromParent();
1519 Changed = true;
1521 ++NumHoistCommonInstrs;
1523 I1 = &*BB1_Itr++;
1524 I2 = &*BB2_Itr++;
1525 // Skip debug info if it is not identical.
1526 DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1527 DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1528 if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1529 while (isa<DbgInfoIntrinsic>(I1))
1530 I1 = &*BB1_Itr++;
1531 while (isa<DbgInfoIntrinsic>(I2))
1532 I2 = &*BB2_Itr++;
1534 } while (I1->isIdenticalToWhenDefined(I2));
1536 return true;
1538 HoistTerminator:
1539 // It may not be possible to hoist an invoke.
1540 // FIXME: Can we define a safety predicate for CallBr?
1541 if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
1542 return Changed;
1544 // TODO: callbr hoisting currently disabled pending further study.
1545 if (isa<CallBrInst>(I1))
1546 return Changed;
1548 for (BasicBlock *Succ : successors(BB1)) {
1549 for (PHINode &PN : Succ->phis()) {
1550 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1551 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1552 if (BB1V == BB2V)
1553 continue;
1555 // Check for passingValueIsAlwaysUndefined here because we would rather
1556 // eliminate undefined control flow then converting it to a select.
1557 if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
1558 passingValueIsAlwaysUndefined(BB2V, &PN))
1559 return Changed;
1561 if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
1562 return Changed;
1563 if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
1564 return Changed;
1568 // Okay, it is safe to hoist the terminator.
1569 Instruction *NT = I1->clone();
1570 BIParent->getInstList().insert(BI->getIterator(), NT);
1571 if (!NT->getType()->isVoidTy()) {
1572 I1->replaceAllUsesWith(NT);
1573 I2->replaceAllUsesWith(NT);
1574 NT->takeName(I1);
1576 Changed = true;
1577 ++NumHoistCommonInstrs;
1579 // Ensure terminator gets a debug location, even an unknown one, in case
1580 // it involves inlinable calls.
1581 NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1583 // PHIs created below will adopt NT's merged DebugLoc.
1584 IRBuilder<NoFolder> Builder(NT);
1586 // Hoisting one of the terminators from our successor is a great thing.
1587 // Unfortunately, the successors of the if/else blocks may have PHI nodes in
1588 // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
1589 // nodes, so we insert select instruction to compute the final result.
1590 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1591 for (BasicBlock *Succ : successors(BB1)) {
1592 for (PHINode &PN : Succ->phis()) {
1593 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1594 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1595 if (BB1V == BB2V)
1596 continue;
1598 // These values do not agree. Insert a select instruction before NT
1599 // that determines the right value.
1600 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1601 if (!SI) {
1602 // Propagate fast-math-flags from phi node to its replacement select.
1603 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1604 if (isa<FPMathOperator>(PN))
1605 Builder.setFastMathFlags(PN.getFastMathFlags());
1607 SI = cast<SelectInst>(
1608 Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
1609 BB1V->getName() + "." + BB2V->getName(), BI));
1612 // Make the PHI node use the select for all incoming values for BB1/BB2
1613 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1614 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1615 PN.setIncomingValue(i, SI);
1619 SmallVector<DominatorTree::UpdateType, 4> Updates;
1621 // Update any PHI nodes in our new successors.
1622 for (BasicBlock *Succ : successors(BB1)) {
1623 AddPredecessorToBlock(Succ, BIParent, BB1);
1624 if (DTU)
1625 Updates.push_back({DominatorTree::Insert, BIParent, Succ});
1628 if (DTU)
1629 for (BasicBlock *Succ : successors(BI))
1630 Updates.push_back({DominatorTree::Delete, BIParent, Succ});
1632 EraseTerminatorAndDCECond(BI);
1633 if (DTU)
1634 DTU->applyUpdates(Updates);
1635 return Changed;
1638 // Check lifetime markers.
1639 static bool isLifeTimeMarker(const Instruction *I) {
1640 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1641 switch (II->getIntrinsicID()) {
1642 default:
1643 break;
1644 case Intrinsic::lifetime_start:
1645 case Intrinsic::lifetime_end:
1646 return true;
1649 return false;
1652 // TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1653 // into variables.
1654 static bool replacingOperandWithVariableIsCheap(const Instruction *I,
1655 int OpIdx) {
1656 return !isa<IntrinsicInst>(I);
1659 // All instructions in Insts belong to different blocks that all unconditionally
1660 // branch to a common successor. Analyze each instruction and return true if it
1661 // would be possible to sink them into their successor, creating one common
1662 // instruction instead. For every value that would be required to be provided by
1663 // PHI node (because an operand varies in each input block), add to PHIOperands.
1664 static bool canSinkInstructions(
1665 ArrayRef<Instruction *> Insts,
1666 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1667 // Prune out obviously bad instructions to move. Each instruction must have
1668 // exactly zero or one use, and we check later that use is by a single, common
1669 // PHI instruction in the successor.
1670 bool HasUse = !Insts.front()->user_empty();
1671 for (auto *I : Insts) {
1672 // These instructions may change or break semantics if moved.
1673 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1674 I->getType()->isTokenTy())
1675 return false;
1677 // Do not try to sink an instruction in an infinite loop - it can cause
1678 // this algorithm to infinite loop.
1679 if (I->getParent()->getSingleSuccessor() == I->getParent())
1680 return false;
1682 // Conservatively return false if I is an inline-asm instruction. Sinking
1683 // and merging inline-asm instructions can potentially create arguments
1684 // that cannot satisfy the inline-asm constraints.
1685 // If the instruction has nomerge attribute, return false.
1686 if (const auto *C = dyn_cast<CallBase>(I))
1687 if (C->isInlineAsm() || C->cannotMerge())
1688 return false;
1690 // Each instruction must have zero or one use.
1691 if (HasUse && !I->hasOneUse())
1692 return false;
1693 if (!HasUse && !I->user_empty())
1694 return false;
1697 const Instruction *I0 = Insts.front();
1698 for (auto *I : Insts)
1699 if (!I->isSameOperationAs(I0))
1700 return false;
1702 // All instructions in Insts are known to be the same opcode. If they have a
1703 // use, check that the only user is a PHI or in the same block as the
1704 // instruction, because if a user is in the same block as an instruction we're
1705 // contemplating sinking, it must already be determined to be sinkable.
1706 if (HasUse) {
1707 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1708 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1709 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1710 auto *U = cast<Instruction>(*I->user_begin());
1711 return (PNUse &&
1712 PNUse->getParent() == Succ &&
1713 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1714 U->getParent() == I->getParent();
1716 return false;
1719 // Because SROA can't handle speculating stores of selects, try not to sink
1720 // loads, stores or lifetime markers of allocas when we'd have to create a
1721 // PHI for the address operand. Also, because it is likely that loads or
1722 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
1723 // them.
1724 // This can cause code churn which can have unintended consequences down
1725 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
1726 // FIXME: This is a workaround for a deficiency in SROA - see
1727 // https://llvm.org/bugs/show_bug.cgi?id=30188
1728 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
1729 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1731 return false;
1732 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
1733 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
1735 return false;
1736 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
1737 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1739 return false;
1741 // For calls to be sinkable, they must all be indirect, or have same callee.
1742 // I.e. if we have two direct calls to different callees, we don't want to
1743 // turn that into an indirect call. Likewise, if we have an indirect call,
1744 // and a direct call, we don't actually want to have a single indirect call.
1745 if (isa<CallBase>(I0)) {
1746 auto IsIndirectCall = [](const Instruction *I) {
1747 return cast<CallBase>(I)->isIndirectCall();
1749 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
1750 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
1751 if (HaveIndirectCalls) {
1752 if (!AllCallsAreIndirect)
1753 return false;
1754 } else {
1755 // All callees must be identical.
1756 Value *Callee = nullptr;
1757 for (const Instruction *I : Insts) {
1758 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
1759 if (!Callee)
1760 Callee = CurrCallee;
1761 else if (Callee != CurrCallee)
1762 return false;
1767 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1768 Value *Op = I0->getOperand(OI);
1769 if (Op->getType()->isTokenTy())
1770 // Don't touch any operand of token type.
1771 return false;
1773 auto SameAsI0 = [&I0, OI](const Instruction *I) {
1774 assert(I->getNumOperands() == I0->getNumOperands());
1775 return I->getOperand(OI) == I0->getOperand(OI);
1777 if (!all_of(Insts, SameAsI0)) {
1778 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
1779 !canReplaceOperandWithVariable(I0, OI))
1780 // We can't create a PHI from this GEP.
1781 return false;
1782 for (auto *I : Insts)
1783 PHIOperands[I].push_back(I->getOperand(OI));
1786 return true;
1789 // Assuming canSinkInstructions(Blocks) has returned true, sink the last
1790 // instruction of every block in Blocks to their common successor, commoning
1791 // into one instruction.
1792 static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1793 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1795 // canSinkInstructions returning true guarantees that every block has at
1796 // least one non-terminator instruction.
1797 SmallVector<Instruction*,4> Insts;
1798 for (auto *BB : Blocks) {
1799 Instruction *I = BB->getTerminator();
1800 do {
1801 I = I->getPrevNode();
1802 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
1803 if (!isa<DbgInfoIntrinsic>(I))
1804 Insts.push_back(I);
1807 // The only checking we need to do now is that all users of all instructions
1808 // are the same PHI node. canSinkInstructions should have checked this but
1809 // it is slightly over-aggressive - it gets confused by commutative
1810 // instructions so double-check it here.
1811 Instruction *I0 = Insts.front();
1812 if (!I0->user_empty()) {
1813 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1814 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
1815 auto *U = cast<Instruction>(*I->user_begin());
1816 return U == PNUse;
1818 return false;
1821 // We don't need to do any more checking here; canSinkInstructions should
1822 // have done it all for us.
1823 SmallVector<Value*, 4> NewOperands;
1824 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
1825 // This check is different to that in canSinkInstructions. There, we
1826 // cared about the global view once simplifycfg (and instcombine) have
1827 // completed - it takes into account PHIs that become trivially
1828 // simplifiable. However here we need a more local view; if an operand
1829 // differs we create a PHI and rely on instcombine to clean up the very
1830 // small mess we may make.
1831 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1832 return I->getOperand(O) != I0->getOperand(O);
1834 if (!NeedPHI) {
1835 NewOperands.push_back(I0->getOperand(O));
1836 continue;
1839 // Create a new PHI in the successor block and populate it.
1840 auto *Op = I0->getOperand(O);
1841 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
1842 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1843 Op->getName() + ".sink", &BBEnd->front());
1844 for (auto *I : Insts)
1845 PN->addIncoming(I->getOperand(O), I->getParent());
1846 NewOperands.push_back(PN);
1849 // Arbitrarily use I0 as the new "common" instruction; remap its operands
1850 // and move it to the start of the successor block.
1851 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
1852 I0->getOperandUse(O).set(NewOperands[O]);
1853 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1855 // Update metadata and IR flags, and merge debug locations.
1856 for (auto *I : Insts)
1857 if (I != I0) {
1858 // The debug location for the "common" instruction is the merged locations
1859 // of all the commoned instructions. We start with the original location
1860 // of the "common" instruction and iteratively merge each location in the
1861 // loop below.
1862 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
1863 // However, as N-way merge for CallInst is rare, so we use simplified API
1864 // instead of using complex API for N-way merge.
1865 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
1866 combineMetadataForCSE(I0, I, true);
1867 I0->andIRFlags(I);
1870 if (!I0->user_empty()) {
1871 // canSinkLastInstruction checked that all instructions were used by
1872 // one and only one PHI node. Find that now, RAUW it to our common
1873 // instruction and nuke it.
1874 auto *PN = cast<PHINode>(*I0->user_begin());
1875 PN->replaceAllUsesWith(I0);
1876 PN->eraseFromParent();
1879 // Finally nuke all instructions apart from the common instruction.
1880 for (auto *I : Insts)
1881 if (I != I0)
1882 I->eraseFromParent();
1884 return true;
1887 namespace {
1889 // LockstepReverseIterator - Iterates through instructions
1890 // in a set of blocks in reverse order from the first non-terminator.
1891 // For example (assume all blocks have size n):
1892 // LockstepReverseIterator I([B1, B2, B3]);
1893 // *I-- = [B1[n], B2[n], B3[n]];
1894 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1895 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1896 // ...
1897 class LockstepReverseIterator {
1898 ArrayRef<BasicBlock*> Blocks;
1899 SmallVector<Instruction*,4> Insts;
1900 bool Fail;
1902 public:
1903 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
1904 reset();
1907 void reset() {
1908 Fail = false;
1909 Insts.clear();
1910 for (auto *BB : Blocks) {
1911 Instruction *Inst = BB->getTerminator();
1912 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1913 Inst = Inst->getPrevNode();
1914 if (!Inst) {
1915 // Block wasn't big enough.
1916 Fail = true;
1917 return;
1919 Insts.push_back(Inst);
1923 bool isValid() const {
1924 return !Fail;
1927 void operator--() {
1928 if (Fail)
1929 return;
1930 for (auto *&Inst : Insts) {
1931 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1932 Inst = Inst->getPrevNode();
1933 // Already at beginning of block.
1934 if (!Inst) {
1935 Fail = true;
1936 return;
1941 void operator++() {
1942 if (Fail)
1943 return;
1944 for (auto *&Inst : Insts) {
1945 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1946 Inst = Inst->getNextNode();
1947 // Already at end of block.
1948 if (!Inst) {
1949 Fail = true;
1950 return;
1955 ArrayRef<Instruction*> operator * () const {
1956 return Insts;
1960 } // end anonymous namespace
1962 /// Check whether BB's predecessors end with unconditional branches. If it is
1963 /// true, sink any common code from the predecessors to BB.
1964 static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
1965 DomTreeUpdater *DTU) {
1966 // We support two situations:
1967 // (1) all incoming arcs are unconditional
1968 // (2) there are non-unconditional incoming arcs
1970 // (2) is very common in switch defaults and
1971 // else-if patterns;
1973 // if (a) f(1);
1974 // else if (b) f(2);
1976 // produces:
1978 // [if]
1979 // / \
1980 // [f(1)] [if]
1981 // | | \
1982 // | | |
1983 // | [f(2)]|
1984 // \ | /
1985 // [ end ]
1987 // [end] has two unconditional predecessor arcs and one conditional. The
1988 // conditional refers to the implicit empty 'else' arc. This conditional
1989 // arc can also be caused by an empty default block in a switch.
1991 // In this case, we attempt to sink code from all *unconditional* arcs.
1992 // If we can sink instructions from these arcs (determined during the scan
1993 // phase below) we insert a common successor for all unconditional arcs and
1994 // connect that to [end], to enable sinking:
1996 // [if]
1997 // / \
1998 // [x(1)] [if]
1999 // | | \
2000 // | | \
2001 // | [x(2)] |
2002 // \ / |
2003 // [sink.split] |
2004 // \ /
2005 // [ end ]
2007 SmallVector<BasicBlock*,4> UnconditionalPreds;
2008 bool HaveNonUnconditionalPredecessors = false;
2009 for (auto *PredBB : predecessors(BB)) {
2010 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2011 if (PredBr && PredBr->isUnconditional())
2012 UnconditionalPreds.push_back(PredBB);
2013 else
2014 HaveNonUnconditionalPredecessors = true;
2016 if (UnconditionalPreds.size() < 2)
2017 return false;
2019 // We take a two-step approach to tail sinking. First we scan from the end of
2020 // each block upwards in lockstep. If the n'th instruction from the end of each
2021 // block can be sunk, those instructions are added to ValuesToSink and we
2022 // carry on. If we can sink an instruction but need to PHI-merge some operands
2023 // (because they're not identical in each instruction) we add these to
2024 // PHIOperands.
2025 int ScanIdx = 0;
2026 SmallPtrSet<Value*,4> InstructionsToSink;
2027 DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
2028 LockstepReverseIterator LRI(UnconditionalPreds);
2029 while (LRI.isValid() &&
2030 canSinkInstructions(*LRI, PHIOperands)) {
2031 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2032 << "\n");
2033 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2034 ++ScanIdx;
2035 --LRI;
2038 // If no instructions can be sunk, early-return.
2039 if (ScanIdx == 0)
2040 return false;
2042 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2043 // actually sink before encountering instruction that is unprofitable to sink?
2044 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2045 unsigned NumPHIdValues = 0;
2046 for (auto *I : *LRI)
2047 for (auto *V : PHIOperands[I]) {
2048 if (InstructionsToSink.count(V) == 0)
2049 ++NumPHIdValues;
2050 // FIXME: this check is overly optimistic. We may end up not sinking
2051 // said instruction, due to the very same profitability check.
2052 // See @creating_too_many_phis in sink-common-code.ll.
2054 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2055 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2056 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2057 NumPHIInsts++;
2059 return NumPHIInsts <= 1;
2062 // We've determined that we are going to sink last ScanIdx instructions,
2063 // and recorded them in InstructionsToSink. Now, some instructions may be
2064 // unprofitable to sink. But that determination depends on the instructions
2065 // that we are going to sink.
2067 // First, forward scan: find the first instruction unprofitable to sink,
2068 // recording all the ones that are profitable to sink.
2069 // FIXME: would it be better, after we detect that not all are profitable.
2070 // to either record the profitable ones, or erase the unprofitable ones?
2071 // Maybe we need to choose (at runtime) the one that will touch least instrs?
2072 LRI.reset();
2073 int Idx = 0;
2074 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2075 while (Idx < ScanIdx) {
2076 if (!ProfitableToSinkInstruction(LRI)) {
2077 // Too many PHIs would be created.
2078 LLVM_DEBUG(
2079 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2080 break;
2082 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2083 --LRI;
2084 ++Idx;
2087 // If no instructions can be sunk, early-return.
2088 if (Idx == 0)
2089 return false;
2091 // Did we determine that (only) some instructions are unprofitable to sink?
2092 if (Idx < ScanIdx) {
2093 // Okay, some instructions are unprofitable.
2094 ScanIdx = Idx;
2095 InstructionsToSink = InstructionsProfitableToSink;
2097 // But, that may make other instructions unprofitable, too.
2098 // So, do a backward scan, do any earlier instructions become unprofitable?
2099 assert(!ProfitableToSinkInstruction(LRI) &&
2100 "We already know that the last instruction is unprofitable to sink");
2101 ++LRI;
2102 --Idx;
2103 while (Idx >= 0) {
2104 // If we detect that an instruction becomes unprofitable to sink,
2105 // all earlier instructions won't be sunk either,
2106 // so preemptively keep InstructionsProfitableToSink in sync.
2107 // FIXME: is this the most performant approach?
2108 for (auto *I : *LRI)
2109 InstructionsProfitableToSink.erase(I);
2110 if (!ProfitableToSinkInstruction(LRI)) {
2111 // Everything starting with this instruction won't be sunk.
2112 ScanIdx = Idx;
2113 InstructionsToSink = InstructionsProfitableToSink;
2115 ++LRI;
2116 --Idx;
2120 // If no instructions can be sunk, early-return.
2121 if (ScanIdx == 0)
2122 return false;
2124 bool Changed = false;
2126 if (HaveNonUnconditionalPredecessors) {
2127 // It is always legal to sink common instructions from unconditional
2128 // predecessors. However, if not all predecessors are unconditional,
2129 // this transformation might be pessimizing. So as a rule of thumb,
2130 // don't do it unless we'd sink at least one non-speculatable instruction.
2131 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2132 LRI.reset();
2133 int Idx = 0;
2134 bool Profitable = false;
2135 while (Idx < ScanIdx) {
2136 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2137 Profitable = true;
2138 break;
2140 --LRI;
2141 ++Idx;
2143 if (!Profitable)
2144 return false;
2146 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2147 // We have a conditional edge and we're going to sink some instructions.
2148 // Insert a new block postdominating all blocks we're going to sink from.
2149 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2150 // Edges couldn't be split.
2151 return false;
2152 Changed = true;
2155 // Now that we've analyzed all potential sinking candidates, perform the
2156 // actual sink. We iteratively sink the last non-terminator of the source
2157 // blocks into their common successor unless doing so would require too
2158 // many PHI instructions to be generated (currently only one PHI is allowed
2159 // per sunk instruction).
2161 // We can use InstructionsToSink to discount values needing PHI-merging that will
2162 // actually be sunk in a later iteration. This allows us to be more
2163 // aggressive in what we sink. This does allow a false positive where we
2164 // sink presuming a later value will also be sunk, but stop half way through
2165 // and never actually sink it which means we produce more PHIs than intended.
2166 // This is unlikely in practice though.
2167 int SinkIdx = 0;
2168 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2169 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2170 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2171 << "\n");
2173 // Because we've sunk every instruction in turn, the current instruction to
2174 // sink is always at index 0.
2175 LRI.reset();
2177 if (!sinkLastInstruction(UnconditionalPreds)) {
2178 LLVM_DEBUG(
2179 dbgs()
2180 << "SINK: stopping here, failed to actually sink instruction!\n");
2181 break;
2184 NumSinkCommonInstrs++;
2185 Changed = true;
2187 if (SinkIdx != 0)
2188 ++NumSinkCommonCode;
2189 return Changed;
2192 /// Determine if we can hoist sink a sole store instruction out of a
2193 /// conditional block.
2195 /// We are looking for code like the following:
2196 /// BrBB:
2197 /// store i32 %add, i32* %arrayidx2
2198 /// ... // No other stores or function calls (we could be calling a memory
2199 /// ... // function).
2200 /// %cmp = icmp ult %x, %y
2201 /// br i1 %cmp, label %EndBB, label %ThenBB
2202 /// ThenBB:
2203 /// store i32 %add5, i32* %arrayidx2
2204 /// br label EndBB
2205 /// EndBB:
2206 /// ...
2207 /// We are going to transform this into:
2208 /// BrBB:
2209 /// store i32 %add, i32* %arrayidx2
2210 /// ... //
2211 /// %cmp = icmp ult %x, %y
2212 /// %add.add5 = select i1 %cmp, i32 %add, %add5
2213 /// store i32 %add.add5, i32* %arrayidx2
2214 /// ...
2216 /// \return The pointer to the value of the previous store if the store can be
2217 /// hoisted into the predecessor block. 0 otherwise.
2218 static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
2219 BasicBlock *StoreBB, BasicBlock *EndBB) {
2220 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2221 if (!StoreToHoist)
2222 return nullptr;
2224 // Volatile or atomic.
2225 if (!StoreToHoist->isSimple())
2226 return nullptr;
2228 Value *StorePtr = StoreToHoist->getPointerOperand();
2229 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2231 // Look for a store to the same pointer in BrBB.
2232 unsigned MaxNumInstToLookAt = 9;
2233 // Skip pseudo probe intrinsic calls which are not really killing any memory
2234 // accesses.
2235 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2236 if (!MaxNumInstToLookAt)
2237 break;
2238 --MaxNumInstToLookAt;
2240 // Could be calling an instruction that affects memory like free().
2241 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2242 return nullptr;
2244 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2245 // Found the previous store to same location and type. Make sure it is
2246 // simple, to avoid introducing a spurious non-atomic write after an
2247 // atomic write.
2248 if (SI->getPointerOperand() == StorePtr &&
2249 SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
2250 // Found the previous store, return its value operand.
2251 return SI->getValueOperand();
2252 return nullptr; // Unknown store.
2255 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2256 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2257 LI->isSimple()) {
2258 // Local objects (created by an `alloca` instruction) are always
2259 // writable, so once we are past a read from a location it is valid to
2260 // also write to that same location.
2261 // If the address of the local object never escapes the function, that
2262 // means it's never concurrently read or written, hence moving the store
2263 // from under the condition will not introduce a data race.
2264 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2265 if (AI && !PointerMayBeCaptured(AI, false, true))
2266 // Found a previous load, return it.
2267 return LI;
2269 // The load didn't work out, but we may still find a store.
2273 return nullptr;
2276 /// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2277 /// converted to selects.
2278 static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
2279 BasicBlock *EndBB,
2280 unsigned &SpeculatedInstructions,
2281 InstructionCost &Cost,
2282 const TargetTransformInfo &TTI) {
2283 TargetTransformInfo::TargetCostKind CostKind =
2284 BB->getParent()->hasMinSize()
2285 ? TargetTransformInfo::TCK_CodeSize
2286 : TargetTransformInfo::TCK_SizeAndLatency;
2288 bool HaveRewritablePHIs = false;
2289 for (PHINode &PN : EndBB->phis()) {
2290 Value *OrigV = PN.getIncomingValueForBlock(BB);
2291 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2293 // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
2294 // Skip PHIs which are trivial.
2295 if (ThenV == OrigV)
2296 continue;
2298 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2299 CmpInst::BAD_ICMP_PREDICATE, CostKind);
2301 // Don't convert to selects if we could remove undefined behavior instead.
2302 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2303 passingValueIsAlwaysUndefined(ThenV, &PN))
2304 return false;
2306 HaveRewritablePHIs = true;
2307 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2308 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2309 if (!OrigCE && !ThenCE)
2310 continue; // Known safe and cheap.
2312 if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
2313 (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
2314 return false;
2315 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2316 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2317 InstructionCost MaxCost =
2318 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2319 if (OrigCost + ThenCost > MaxCost)
2320 return false;
2322 // Account for the cost of an unfolded ConstantExpr which could end up
2323 // getting expanded into Instructions.
2324 // FIXME: This doesn't account for how many operations are combined in the
2325 // constant expression.
2326 ++SpeculatedInstructions;
2327 if (SpeculatedInstructions > 1)
2328 return false;
2331 return HaveRewritablePHIs;
2334 /// Speculate a conditional basic block flattening the CFG.
2336 /// Note that this is a very risky transform currently. Speculating
2337 /// instructions like this is most often not desirable. Instead, there is an MI
2338 /// pass which can do it with full awareness of the resource constraints.
2339 /// However, some cases are "obvious" and we should do directly. An example of
2340 /// this is speculating a single, reasonably cheap instruction.
2342 /// There is only one distinct advantage to flattening the CFG at the IR level:
2343 /// it makes very common but simplistic optimizations such as are common in
2344 /// instcombine and the DAG combiner more powerful by removing CFG edges and
2345 /// modeling their effects with easier to reason about SSA value graphs.
2348 /// An illustration of this transform is turning this IR:
2349 /// \code
2350 /// BB:
2351 /// %cmp = icmp ult %x, %y
2352 /// br i1 %cmp, label %EndBB, label %ThenBB
2353 /// ThenBB:
2354 /// %sub = sub %x, %y
2355 /// br label BB2
2356 /// EndBB:
2357 /// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2358 /// ...
2359 /// \endcode
2361 /// Into this IR:
2362 /// \code
2363 /// BB:
2364 /// %cmp = icmp ult %x, %y
2365 /// %sub = sub %x, %y
2366 /// %cond = select i1 %cmp, 0, %sub
2367 /// ...
2368 /// \endcode
2370 /// \returns true if the conditional block is removed.
2371 bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
2372 const TargetTransformInfo &TTI) {
2373 // Be conservative for now. FP select instruction can often be expensive.
2374 Value *BrCond = BI->getCondition();
2375 if (isa<FCmpInst>(BrCond))
2376 return false;
2378 BasicBlock *BB = BI->getParent();
2379 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
2380 InstructionCost Budget =
2381 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2383 // If ThenBB is actually on the false edge of the conditional branch, remember
2384 // to swap the select operands later.
2385 bool Invert = false;
2386 if (ThenBB != BI->getSuccessor(0)) {
2387 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
2388 Invert = true;
2390 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
2392 // If the branch is non-unpredictable, and is predicted to *not* branch to
2393 // the `then` block, then avoid speculating it.
2394 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
2395 uint64_t TWeight, FWeight;
2396 if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) {
2397 uint64_t EndWeight = Invert ? TWeight : FWeight;
2398 BranchProbability BIEndProb =
2399 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
2400 BranchProbability Likely = TTI.getPredictableBranchThreshold();
2401 if (BIEndProb >= Likely)
2402 return false;
2406 // Keep a count of how many times instructions are used within ThenBB when
2407 // they are candidates for sinking into ThenBB. Specifically:
2408 // - They are defined in BB, and
2409 // - They have no side effects, and
2410 // - All of their uses are in ThenBB.
2411 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
2413 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
2415 unsigned SpeculatedInstructions = 0;
2416 Value *SpeculatedStoreValue = nullptr;
2417 StoreInst *SpeculatedStore = nullptr;
2418 for (BasicBlock::iterator BBI = ThenBB->begin(),
2419 BBE = std::prev(ThenBB->end());
2420 BBI != BBE; ++BBI) {
2421 Instruction *I = &*BBI;
2422 // Skip debug info.
2423 if (isa<DbgInfoIntrinsic>(I)) {
2424 SpeculatedDbgIntrinsics.push_back(I);
2425 continue;
2428 // Skip pseudo probes. The consequence is we lose track of the branch
2429 // probability for ThenBB, which is fine since the optimization here takes
2430 // place regardless of the branch probability.
2431 if (isa<PseudoProbeInst>(I)) {
2432 // The probe should be deleted so that it will not be over-counted when
2433 // the samples collected on the non-conditional path are counted towards
2434 // the conditional path. We leave it for the counts inference algorithm to
2435 // figure out a proper count for an unknown probe.
2436 SpeculatedDbgIntrinsics.push_back(I);
2437 continue;
2440 // Only speculatively execute a single instruction (not counting the
2441 // terminator) for now.
2442 ++SpeculatedInstructions;
2443 if (SpeculatedInstructions > 1)
2444 return false;
2446 // Don't hoist the instruction if it's unsafe or expensive.
2447 if (!isSafeToSpeculativelyExecute(I) &&
2448 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
2449 I, BB, ThenBB, EndBB))))
2450 return false;
2451 if (!SpeculatedStoreValue &&
2452 computeSpeculationCost(I, TTI) >
2453 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
2454 return false;
2456 // Store the store speculation candidate.
2457 if (SpeculatedStoreValue)
2458 SpeculatedStore = cast<StoreInst>(I);
2460 // Do not hoist the instruction if any of its operands are defined but not
2461 // used in BB. The transformation will prevent the operand from
2462 // being sunk into the use block.
2463 for (Use &Op : I->operands()) {
2464 Instruction *OpI = dyn_cast<Instruction>(Op);
2465 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
2466 continue; // Not a candidate for sinking.
2468 ++SinkCandidateUseCounts[OpI];
2472 // Consider any sink candidates which are only used in ThenBB as costs for
2473 // speculation. Note, while we iterate over a DenseMap here, we are summing
2474 // and so iteration order isn't significant.
2475 for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
2476 I = SinkCandidateUseCounts.begin(),
2477 E = SinkCandidateUseCounts.end();
2478 I != E; ++I)
2479 if (I->first->hasNUses(I->second)) {
2480 ++SpeculatedInstructions;
2481 if (SpeculatedInstructions > 1)
2482 return false;
2485 // Check that we can insert the selects and that it's not too expensive to do
2486 // so.
2487 bool Convert = SpeculatedStore != nullptr;
2488 InstructionCost Cost = 0;
2489 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
2490 SpeculatedInstructions,
2491 Cost, TTI);
2492 if (!Convert || Cost > Budget)
2493 return false;
2495 // If we get here, we can hoist the instruction and if-convert.
2496 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
2498 // Insert a select of the value of the speculated store.
2499 if (SpeculatedStoreValue) {
2500 IRBuilder<NoFolder> Builder(BI);
2501 Value *TrueV = SpeculatedStore->getValueOperand();
2502 Value *FalseV = SpeculatedStoreValue;
2503 if (Invert)
2504 std::swap(TrueV, FalseV);
2505 Value *S = Builder.CreateSelect(
2506 BrCond, TrueV, FalseV, "spec.store.select", BI);
2507 SpeculatedStore->setOperand(0, S);
2508 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
2509 SpeculatedStore->getDebugLoc());
2512 // Metadata can be dependent on the condition we are hoisting above.
2513 // Conservatively strip all metadata on the instruction. Drop the debug loc
2514 // to avoid making it appear as if the condition is a constant, which would
2515 // be misleading while debugging.
2516 // Similarly strip attributes that maybe dependent on condition we are
2517 // hoisting above.
2518 for (auto &I : *ThenBB) {
2519 if (!SpeculatedStoreValue || &I != SpeculatedStore)
2520 I.setDebugLoc(DebugLoc());
2521 I.dropUndefImplyingAttrsAndUnknownMetadata();
2524 // Hoist the instructions.
2525 BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
2526 ThenBB->begin(), std::prev(ThenBB->end()));
2528 // Insert selects and rewrite the PHI operands.
2529 IRBuilder<NoFolder> Builder(BI);
2530 for (PHINode &PN : EndBB->phis()) {
2531 unsigned OrigI = PN.getBasicBlockIndex(BB);
2532 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
2533 Value *OrigV = PN.getIncomingValue(OrigI);
2534 Value *ThenV = PN.getIncomingValue(ThenI);
2536 // Skip PHIs which are trivial.
2537 if (OrigV == ThenV)
2538 continue;
2540 // Create a select whose true value is the speculatively executed value and
2541 // false value is the pre-existing value. Swap them if the branch
2542 // destinations were inverted.
2543 Value *TrueV = ThenV, *FalseV = OrigV;
2544 if (Invert)
2545 std::swap(TrueV, FalseV);
2546 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
2547 PN.setIncomingValue(OrigI, V);
2548 PN.setIncomingValue(ThenI, V);
2551 // Remove speculated dbg intrinsics.
2552 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
2553 // dbg value for the different flows and inserting it after the select.
2554 for (Instruction *I : SpeculatedDbgIntrinsics)
2555 I->eraseFromParent();
2557 ++NumSpeculations;
2558 return true;
2561 /// Return true if we can thread a branch across this block.
2562 static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
2563 int Size = 0;
2565 SmallPtrSet<const Value *, 32> EphValues;
2566 auto IsEphemeral = [&](const Value *V) {
2567 if (isa<AssumeInst>(V))
2568 return true;
2569 return isSafeToSpeculativelyExecute(V) &&
2570 all_of(V->users(),
2571 [&](const User *U) { return EphValues.count(U); });
2574 // Walk the loop in reverse so that we can identify ephemeral values properly
2575 // (values only feeding assumes).
2576 for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
2577 // Can't fold blocks that contain noduplicate or convergent calls.
2578 if (CallInst *CI = dyn_cast<CallInst>(&I))
2579 if (CI->cannotDuplicate() || CI->isConvergent())
2580 return false;
2582 // Ignore ephemeral values which are deleted during codegen.
2583 if (IsEphemeral(&I))
2584 EphValues.insert(&I);
2585 // We will delete Phis while threading, so Phis should not be accounted in
2586 // block's size.
2587 else if (!isa<PHINode>(I)) {
2588 if (Size++ > MaxSmallBlockSize)
2589 return false; // Don't clone large BB's.
2592 // We can only support instructions that do not define values that are
2593 // live outside of the current basic block.
2594 for (User *U : I.users()) {
2595 Instruction *UI = cast<Instruction>(U);
2596 if (UI->getParent() != BB || isa<PHINode>(UI))
2597 return false;
2600 // Looks ok, continue checking.
2603 return true;
2606 /// If we have a conditional branch on a PHI node value that is defined in the
2607 /// same block as the branch and if any PHI entries are constants, thread edges
2608 /// corresponding to that entry to be branches to their ultimate destination.
2609 static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
2610 DomTreeUpdater *DTU,
2611 const DataLayout &DL,
2612 AssumptionCache *AC) {
2613 BasicBlock *BB = BI->getParent();
2614 PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
2615 // NOTE: we currently cannot transform this case if the PHI node is used
2616 // outside of the block.
2617 if (!PN || PN->getParent() != BB || !PN->hasOneUse())
2618 return false;
2620 // Degenerate case of a single entry PHI.
2621 if (PN->getNumIncomingValues() == 1) {
2622 FoldSingleEntryPHINodes(PN->getParent());
2623 return true;
2626 // Now we know that this block has multiple preds and two succs.
2627 if (!BlockIsSimpleEnoughToThreadThrough(BB))
2628 return false;
2630 // Okay, this is a simple enough basic block. See if any phi values are
2631 // constants.
2632 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
2633 ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
2634 if (!CB || !CB->getType()->isIntegerTy(1))
2635 continue;
2637 // Okay, we now know that all edges from PredBB should be revectored to
2638 // branch to RealDest.
2639 BasicBlock *PredBB = PN->getIncomingBlock(i);
2640 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
2642 if (RealDest == BB)
2643 continue; // Skip self loops.
2644 // Skip if the predecessor's terminator is an indirect branch.
2645 if (isa<IndirectBrInst>(PredBB->getTerminator()))
2646 continue;
2648 SmallVector<DominatorTree::UpdateType, 3> Updates;
2650 // The dest block might have PHI nodes, other predecessors and other
2651 // difficult cases. Instead of being smart about this, just insert a new
2652 // block that jumps to the destination block, effectively splitting
2653 // the edge we are about to create.
2654 BasicBlock *EdgeBB =
2655 BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
2656 RealDest->getParent(), RealDest);
2657 BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
2658 if (DTU)
2659 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
2660 CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
2662 // Update PHI nodes.
2663 AddPredecessorToBlock(RealDest, EdgeBB, BB);
2665 // BB may have instructions that are being threaded over. Clone these
2666 // instructions into EdgeBB. We know that there will be no uses of the
2667 // cloned instructions outside of EdgeBB.
2668 BasicBlock::iterator InsertPt = EdgeBB->begin();
2669 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
2670 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
2671 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
2672 TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
2673 continue;
2675 // Clone the instruction.
2676 Instruction *N = BBI->clone();
2677 if (BBI->hasName())
2678 N->setName(BBI->getName() + ".c");
2680 // Update operands due to translation.
2681 for (Use &Op : N->operands()) {
2682 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
2683 if (PI != TranslateMap.end())
2684 Op = PI->second;
2687 // Check for trivial simplification.
2688 if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
2689 if (!BBI->use_empty())
2690 TranslateMap[&*BBI] = V;
2691 if (!N->mayHaveSideEffects()) {
2692 N->deleteValue(); // Instruction folded away, don't need actual inst
2693 N = nullptr;
2695 } else {
2696 if (!BBI->use_empty())
2697 TranslateMap[&*BBI] = N;
2699 if (N) {
2700 // Insert the new instruction into its new home.
2701 EdgeBB->getInstList().insert(InsertPt, N);
2703 // Register the new instruction with the assumption cache if necessary.
2704 if (auto *Assume = dyn_cast<AssumeInst>(N))
2705 if (AC)
2706 AC->registerAssumption(Assume);
2710 // Loop over all of the edges from PredBB to BB, changing them to branch
2711 // to EdgeBB instead.
2712 Instruction *PredBBTI = PredBB->getTerminator();
2713 for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
2714 if (PredBBTI->getSuccessor(i) == BB) {
2715 BB->removePredecessor(PredBB);
2716 PredBBTI->setSuccessor(i, EdgeBB);
2719 if (DTU) {
2720 Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
2721 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2723 DTU->applyUpdates(Updates);
2726 // Signal repeat, simplifying any other constants.
2727 return None;
2730 return false;
2733 static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
2734 const DataLayout &DL, AssumptionCache *AC) {
2735 Optional<bool> Result;
2736 bool EverChanged = false;
2737 do {
2738 // Note that None means "we changed things, but recurse further."
2739 Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC);
2740 EverChanged |= Result == None || *Result;
2741 } while (Result == None);
2742 return EverChanged;
2745 /// Given a BB that starts with the specified two-entry PHI node,
2746 /// see if we can eliminate it.
2747 static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
2748 DomTreeUpdater *DTU, const DataLayout &DL) {
2749 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
2750 // statement", which has a very simple dominance structure. Basically, we
2751 // are trying to find the condition that is being branched on, which
2752 // subsequently causes this merge to happen. We really want control
2753 // dependence information for this check, but simplifycfg can't keep it up
2754 // to date, and this catches most of the cases we care about anyway.
2755 BasicBlock *BB = PN->getParent();
2757 BasicBlock *IfTrue, *IfFalse;
2758 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
2759 if (!DomBI)
2760 return false;
2761 Value *IfCond = DomBI->getCondition();
2762 // Don't bother if the branch will be constant folded trivially.
2763 if (isa<ConstantInt>(IfCond))
2764 return false;
2766 BasicBlock *DomBlock = DomBI->getParent();
2767 SmallVector<BasicBlock *, 2> IfBlocks;
2768 llvm::copy_if(
2769 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
2770 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
2772 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
2773 "Will have either one or two blocks to speculate.");
2775 // If the branch is non-unpredictable, see if we either predictably jump to
2776 // the merge bb (if we have only a single 'then' block), or if we predictably
2777 // jump to one specific 'then' block (if we have two of them).
2778 // It isn't beneficial to speculatively execute the code
2779 // from the block that we know is predictably not entered.
2780 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
2781 uint64_t TWeight, FWeight;
2782 if (DomBI->extractProfMetadata(TWeight, FWeight) &&
2783 (TWeight + FWeight) != 0) {
2784 BranchProbability BITrueProb =
2785 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
2786 BranchProbability Likely = TTI.getPredictableBranchThreshold();
2787 BranchProbability BIFalseProb = BITrueProb.getCompl();
2788 if (IfBlocks.size() == 1) {
2789 BranchProbability BIBBProb =
2790 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
2791 if (BIBBProb >= Likely)
2792 return false;
2793 } else {
2794 if (BITrueProb >= Likely || BIFalseProb >= Likely)
2795 return false;
2800 // Don't try to fold an unreachable block. For example, the phi node itself
2801 // can't be the candidate if-condition for a select that we want to form.
2802 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
2803 if (IfCondPhiInst->getParent() == BB)
2804 return false;
2806 // Okay, we found that we can merge this two-entry phi node into a select.
2807 // Doing so would require us to fold *all* two entry phi nodes in this block.
2808 // At some point this becomes non-profitable (particularly if the target
2809 // doesn't support cmov's). Only do this transformation if there are two or
2810 // fewer PHI nodes in this block.
2811 unsigned NumPhis = 0;
2812 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
2813 if (NumPhis > 2)
2814 return false;
2816 // Loop over the PHI's seeing if we can promote them all to select
2817 // instructions. While we are at it, keep track of the instructions
2818 // that need to be moved to the dominating block.
2819 SmallPtrSet<Instruction *, 4> AggressiveInsts;
2820 InstructionCost Cost = 0;
2821 InstructionCost Budget =
2822 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2824 bool Changed = false;
2825 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
2826 PHINode *PN = cast<PHINode>(II++);
2827 if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
2828 PN->replaceAllUsesWith(V);
2829 PN->eraseFromParent();
2830 Changed = true;
2831 continue;
2834 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
2835 Cost, Budget, TTI) ||
2836 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
2837 Cost, Budget, TTI))
2838 return Changed;
2841 // If we folded the first phi, PN dangles at this point. Refresh it. If
2842 // we ran out of PHIs then we simplified them all.
2843 PN = dyn_cast<PHINode>(BB->begin());
2844 if (!PN)
2845 return true;
2847 // Return true if at least one of these is a 'not', and another is either
2848 // a 'not' too, or a constant.
2849 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
2850 if (!match(V0, m_Not(m_Value())))
2851 std::swap(V0, V1);
2852 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
2853 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
2856 // Don't fold i1 branches on PHIs which contain binary operators or
2857 // (possibly inverted) select form of or/ands, unless one of
2858 // the incoming values is an 'not' and another one is freely invertible.
2859 // These can often be turned into switches and other things.
2860 auto IsBinOpOrAnd = [](Value *V) {
2861 return match(
2862 V, m_CombineOr(
2863 m_BinOp(),
2864 m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
2865 m_Select(m_Value(), m_Value(), m_ImmConstant()))));
2867 if (PN->getType()->isIntegerTy(1) &&
2868 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
2869 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
2870 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
2871 PN->getIncomingValue(1)))
2872 return Changed;
2874 // If all PHI nodes are promotable, check to make sure that all instructions
2875 // in the predecessor blocks can be promoted as well. If not, we won't be able
2876 // to get rid of the control flow, so it's not worth promoting to select
2877 // instructions.
2878 for (BasicBlock *IfBlock : IfBlocks)
2879 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
2880 if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
2881 !isa<PseudoProbeInst>(I)) {
2882 // This is not an aggressive instruction that we can promote.
2883 // Because of this, we won't be able to get rid of the control flow, so
2884 // the xform is not worth it.
2885 return Changed;
2888 // If either of the blocks has it's address taken, we can't do this fold.
2889 if (any_of(IfBlocks,
2890 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
2891 return Changed;
2893 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
2894 << " T: " << IfTrue->getName()
2895 << " F: " << IfFalse->getName() << "\n");
2897 // If we can still promote the PHI nodes after this gauntlet of tests,
2898 // do all of the PHI's now.
2900 // Move all 'aggressive' instructions, which are defined in the
2901 // conditional parts of the if's up to the dominating block.
2902 for (BasicBlock *IfBlock : IfBlocks)
2903 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
2905 IRBuilder<NoFolder> Builder(DomBI);
2906 // Propagate fast-math-flags from phi nodes to replacement selects.
2907 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2908 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
2909 if (isa<FPMathOperator>(PN))
2910 Builder.setFastMathFlags(PN->getFastMathFlags());
2912 // Change the PHI node into a select instruction.
2913 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
2914 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
2916 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
2917 PN->replaceAllUsesWith(Sel);
2918 Sel->takeName(PN);
2919 PN->eraseFromParent();
2922 // At this point, all IfBlocks are empty, so our if statement
2923 // has been flattened. Change DomBlock to jump directly to our new block to
2924 // avoid other simplifycfg's kicking in on the diamond.
2925 Builder.CreateBr(BB);
2927 SmallVector<DominatorTree::UpdateType, 3> Updates;
2928 if (DTU) {
2929 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
2930 for (auto *Successor : successors(DomBlock))
2931 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
2934 DomBI->eraseFromParent();
2935 if (DTU)
2936 DTU->applyUpdates(Updates);
2938 return true;
2941 static Value *createLogicalOp(IRBuilderBase &Builder,
2942 Instruction::BinaryOps Opc, Value *LHS,
2943 Value *RHS, const Twine &Name = "") {
2944 // Try to relax logical op to binary op.
2945 if (impliesPoison(RHS, LHS))
2946 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
2947 if (Opc == Instruction::And)
2948 return Builder.CreateLogicalAnd(LHS, RHS, Name);
2949 if (Opc == Instruction::Or)
2950 return Builder.CreateLogicalOr(LHS, RHS, Name);
2951 llvm_unreachable("Invalid logical opcode");
2954 /// Return true if either PBI or BI has branch weight available, and store
2955 /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
2956 /// not have branch weight, use 1:1 as its weight.
2957 static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
2958 uint64_t &PredTrueWeight,
2959 uint64_t &PredFalseWeight,
2960 uint64_t &SuccTrueWeight,
2961 uint64_t &SuccFalseWeight) {
2962 bool PredHasWeights =
2963 PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
2964 bool SuccHasWeights =
2965 BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
2966 if (PredHasWeights || SuccHasWeights) {
2967 if (!PredHasWeights)
2968 PredTrueWeight = PredFalseWeight = 1;
2969 if (!SuccHasWeights)
2970 SuccTrueWeight = SuccFalseWeight = 1;
2971 return true;
2972 } else {
2973 return false;
2977 /// Determine if the two branches share a common destination and deduce a glue
2978 /// that joins the branches' conditions to arrive at the common destination if
2979 /// that would be profitable.
2980 static Optional<std::pair<Instruction::BinaryOps, bool>>
2981 shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
2982 const TargetTransformInfo *TTI) {
2983 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
2984 "Both blocks must end with a conditional branches.");
2985 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
2986 "PredBB must be a predecessor of BB.");
2988 // We have the potential to fold the conditions together, but if the
2989 // predecessor branch is predictable, we may not want to merge them.
2990 uint64_t PTWeight, PFWeight;
2991 BranchProbability PBITrueProb, Likely;
2992 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
2993 PBI->extractProfMetadata(PTWeight, PFWeight) &&
2994 (PTWeight + PFWeight) != 0) {
2995 PBITrueProb =
2996 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
2997 Likely = TTI->getPredictableBranchThreshold();
3000 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3001 // Speculate the 2nd condition unless the 1st is probably true.
3002 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3003 return {{Instruction::Or, false}};
3004 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3005 // Speculate the 2nd condition unless the 1st is probably false.
3006 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3007 return {{Instruction::And, false}};
3008 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3009 // Speculate the 2nd condition unless the 1st is probably true.
3010 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3011 return {{Instruction::And, true}};
3012 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3013 // Speculate the 2nd condition unless the 1st is probably false.
3014 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3015 return {{Instruction::Or, true}};
3017 return None;
3020 static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
3021 DomTreeUpdater *DTU,
3022 MemorySSAUpdater *MSSAU,
3023 const TargetTransformInfo *TTI) {
3024 BasicBlock *BB = BI->getParent();
3025 BasicBlock *PredBlock = PBI->getParent();
3027 // Determine if the two branches share a common destination.
3028 Instruction::BinaryOps Opc;
3029 bool InvertPredCond;
3030 std::tie(Opc, InvertPredCond) =
3031 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3033 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3035 IRBuilder<> Builder(PBI);
3036 // The builder is used to create instructions to eliminate the branch in BB.
3037 // If BB's terminator has !annotation metadata, add it to the new
3038 // instructions.
3039 Builder.CollectMetadataToCopy(BB->getTerminator(),
3040 {LLVMContext::MD_annotation});
3042 // If we need to invert the condition in the pred block to match, do so now.
3043 if (InvertPredCond) {
3044 Value *NewCond = PBI->getCondition();
3045 if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
3046 CmpInst *CI = cast<CmpInst>(NewCond);
3047 CI->setPredicate(CI->getInversePredicate());
3048 } else {
3049 NewCond =
3050 Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
3053 PBI->setCondition(NewCond);
3054 PBI->swapSuccessors();
3057 BasicBlock *UniqueSucc =
3058 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3060 // Before cloning instructions, notify the successor basic block that it
3061 // is about to have a new predecessor. This will update PHI nodes,
3062 // which will allow us to update live-out uses of bonus instructions.
3063 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3065 // Try to update branch weights.
3066 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3067 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3068 SuccTrueWeight, SuccFalseWeight)) {
3069 SmallVector<uint64_t, 8> NewWeights;
3071 if (PBI->getSuccessor(0) == BB) {
3072 // PBI: br i1 %x, BB, FalseDest
3073 // BI: br i1 %y, UniqueSucc, FalseDest
3074 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3075 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3076 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3077 // TrueWeight for PBI * FalseWeight for BI.
3078 // We assume that total weights of a BranchInst can fit into 32 bits.
3079 // Therefore, we will not have overflow using 64-bit arithmetic.
3080 NewWeights.push_back(PredFalseWeight *
3081 (SuccFalseWeight + SuccTrueWeight) +
3082 PredTrueWeight * SuccFalseWeight);
3083 } else {
3084 // PBI: br i1 %x, TrueDest, BB
3085 // BI: br i1 %y, TrueDest, UniqueSucc
3086 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3087 // FalseWeight for PBI * TrueWeight for BI.
3088 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3089 PredFalseWeight * SuccTrueWeight);
3090 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3091 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3094 // Halve the weights if any of them cannot fit in an uint32_t
3095 FitWeights(NewWeights);
3097 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3098 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3100 // TODO: If BB is reachable from all paths through PredBlock, then we
3101 // could replace PBI's branch probabilities with BI's.
3102 } else
3103 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3105 // Now, update the CFG.
3106 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3108 if (DTU)
3109 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3110 {DominatorTree::Delete, PredBlock, BB}});
3112 // If BI was a loop latch, it may have had associated loop metadata.
3113 // We need to copy it to the new latch, that is, PBI.
3114 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3115 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3117 ValueToValueMapTy VMap; // maps original values to cloned values
3118 CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3120 // Now that the Cond was cloned into the predecessor basic block,
3121 // or/and the two conditions together.
3122 Value *BICond = VMap[BI->getCondition()];
3123 PBI->setCondition(
3124 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3126 // Copy any debug value intrinsics into the end of PredBlock.
3127 for (Instruction &I : *BB) {
3128 if (isa<DbgInfoIntrinsic>(I)) {
3129 Instruction *NewI = I.clone();
3130 RemapInstruction(NewI, VMap,
3131 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
3132 NewI->insertBefore(PBI);
3136 ++NumFoldBranchToCommonDest;
3137 return true;
3140 /// If this basic block is simple enough, and if a predecessor branches to us
3141 /// and one of our successors, fold the block into the predecessor and use
3142 /// logical operations to pick the right destination.
3143 bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
3144 MemorySSAUpdater *MSSAU,
3145 const TargetTransformInfo *TTI,
3146 unsigned BonusInstThreshold) {
3147 // If this block ends with an unconditional branch,
3148 // let SpeculativelyExecuteBB() deal with it.
3149 if (!BI->isConditional())
3150 return false;
3152 BasicBlock *BB = BI->getParent();
3153 TargetTransformInfo::TargetCostKind CostKind =
3154 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
3155 : TargetTransformInfo::TCK_SizeAndLatency;
3157 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3159 if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
3160 Cond->getParent() != BB || !Cond->hasOneUse())
3161 return false;
3163 // Cond is known to be a compare or binary operator. Check to make sure that
3164 // neither operand is a potentially-trapping constant expression.
3165 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
3166 if (CE->canTrap())
3167 return false;
3168 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
3169 if (CE->canTrap())
3170 return false;
3172 // Finally, don't infinitely unroll conditional loops.
3173 if (is_contained(successors(BB), BB))
3174 return false;
3176 // With which predecessors will we want to deal with?
3177 SmallVector<BasicBlock *, 8> Preds;
3178 for (BasicBlock *PredBlock : predecessors(BB)) {
3179 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3181 // Check that we have two conditional branches. If there is a PHI node in
3182 // the common successor, verify that the same value flows in from both
3183 // blocks.
3184 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3185 continue;
3187 // Determine if the two branches share a common destination.
3188 Instruction::BinaryOps Opc;
3189 bool InvertPredCond;
3190 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3191 std::tie(Opc, InvertPredCond) = *Recipe;
3192 else
3193 continue;
3195 // Check the cost of inserting the necessary logic before performing the
3196 // transformation.
3197 if (TTI) {
3198 Type *Ty = BI->getCondition()->getType();
3199 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
3200 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3201 !isa<CmpInst>(PBI->getCondition())))
3202 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3204 if (Cost > BranchFoldThreshold)
3205 continue;
3208 // Ok, we do want to deal with this predecessor. Record it.
3209 Preds.emplace_back(PredBlock);
3212 // If there aren't any predecessors into which we can fold,
3213 // don't bother checking the cost.
3214 if (Preds.empty())
3215 return false;
3217 // Only allow this transformation if computing the condition doesn't involve
3218 // too many instructions and these involved instructions can be executed
3219 // unconditionally. We denote all involved instructions except the condition
3220 // as "bonus instructions", and only allow this transformation when the
3221 // number of the bonus instructions we'll need to create when cloning into
3222 // each predecessor does not exceed a certain threshold.
3223 unsigned NumBonusInsts = 0;
3224 const unsigned PredCount = Preds.size();
3225 for (Instruction &I : *BB) {
3226 // Don't check the branch condition comparison itself.
3227 if (&I == Cond)
3228 continue;
3229 // Ignore dbg intrinsics, and the terminator.
3230 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3231 continue;
3232 // I must be safe to execute unconditionally.
3233 if (!isSafeToSpeculativelyExecute(&I))
3234 return false;
3236 // Account for the cost of duplicating this instruction into each
3237 // predecessor.
3238 NumBonusInsts += PredCount;
3239 // Early exits once we reach the limit.
3240 if (NumBonusInsts > BonusInstThreshold)
3241 return false;
3244 // Ok, we have the budget. Perform the transformation.
3245 for (BasicBlock *PredBlock : Preds) {
3246 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
3247 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
3249 return false;
3252 // If there is only one store in BB1 and BB2, return it, otherwise return
3253 // nullptr.
3254 static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
3255 StoreInst *S = nullptr;
3256 for (auto *BB : {BB1, BB2}) {
3257 if (!BB)
3258 continue;
3259 for (auto &I : *BB)
3260 if (auto *SI = dyn_cast<StoreInst>(&I)) {
3261 if (S)
3262 // Multiple stores seen.
3263 return nullptr;
3264 else
3265 S = SI;
3268 return S;
3271 static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
3272 Value *AlternativeV = nullptr) {
3273 // PHI is going to be a PHI node that allows the value V that is defined in
3274 // BB to be referenced in BB's only successor.
3276 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
3277 // doesn't matter to us what the other operand is (it'll never get used). We
3278 // could just create a new PHI with an undef incoming value, but that could
3279 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
3280 // other PHI. So here we directly look for some PHI in BB's successor with V
3281 // as an incoming operand. If we find one, we use it, else we create a new
3282 // one.
3284 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
3285 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
3286 // where OtherBB is the single other predecessor of BB's only successor.
3287 PHINode *PHI = nullptr;
3288 BasicBlock *Succ = BB->getSingleSuccessor();
3290 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
3291 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
3292 PHI = cast<PHINode>(I);
3293 if (!AlternativeV)
3294 break;
3296 assert(Succ->hasNPredecessors(2));
3297 auto PredI = pred_begin(Succ);
3298 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
3299 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
3300 break;
3301 PHI = nullptr;
3303 if (PHI)
3304 return PHI;
3306 // If V is not an instruction defined in BB, just return it.
3307 if (!AlternativeV &&
3308 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
3309 return V;
3311 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
3312 PHI->addIncoming(V, BB);
3313 for (BasicBlock *PredBB : predecessors(Succ))
3314 if (PredBB != BB)
3315 PHI->addIncoming(
3316 AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
3317 return PHI;
3320 static bool mergeConditionalStoreToAddress(
3321 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
3322 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
3323 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
3324 // For every pointer, there must be exactly two stores, one coming from
3325 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
3326 // store (to any address) in PTB,PFB or QTB,QFB.
3327 // FIXME: We could relax this restriction with a bit more work and performance
3328 // testing.
3329 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
3330 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
3331 if (!PStore || !QStore)
3332 return false;
3334 // Now check the stores are compatible.
3335 if (!QStore->isUnordered() || !PStore->isUnordered())
3336 return false;
3338 // Check that sinking the store won't cause program behavior changes. Sinking
3339 // the store out of the Q blocks won't change any behavior as we're sinking
3340 // from a block to its unconditional successor. But we're moving a store from
3341 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
3342 // So we need to check that there are no aliasing loads or stores in
3343 // QBI, QTB and QFB. We also need to check there are no conflicting memory
3344 // operations between PStore and the end of its parent block.
3346 // The ideal way to do this is to query AliasAnalysis, but we don't
3347 // preserve AA currently so that is dangerous. Be super safe and just
3348 // check there are no other memory operations at all.
3349 for (auto &I : *QFB->getSinglePredecessor())
3350 if (I.mayReadOrWriteMemory())
3351 return false;
3352 for (auto &I : *QFB)
3353 if (&I != QStore && I.mayReadOrWriteMemory())
3354 return false;
3355 if (QTB)
3356 for (auto &I : *QTB)
3357 if (&I != QStore && I.mayReadOrWriteMemory())
3358 return false;
3359 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
3360 I != E; ++I)
3361 if (&*I != PStore && I->mayReadOrWriteMemory())
3362 return false;
3364 // If we're not in aggressive mode, we only optimize if we have some
3365 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
3366 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
3367 if (!BB)
3368 return true;
3369 // Heuristic: if the block can be if-converted/phi-folded and the
3370 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
3371 // thread this store.
3372 InstructionCost Cost = 0;
3373 InstructionCost Budget =
3374 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3375 for (auto &I : BB->instructionsWithoutDebug()) {
3376 // Consider terminator instruction to be free.
3377 if (I.isTerminator())
3378 continue;
3379 // If this is one the stores that we want to speculate out of this BB,
3380 // then don't count it's cost, consider it to be free.
3381 if (auto *S = dyn_cast<StoreInst>(&I))
3382 if (llvm::find(FreeStores, S))
3383 continue;
3384 // Else, we have a white-list of instructions that we are ak speculating.
3385 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
3386 return false; // Not in white-list - not worthwhile folding.
3387 // And finally, if this is a non-free instruction that we are okay
3388 // speculating, ensure that we consider the speculation budget.
3389 Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
3390 if (Cost > Budget)
3391 return false; // Eagerly refuse to fold as soon as we're out of budget.
3393 assert(Cost <= Budget &&
3394 "When we run out of budget we will eagerly return from within the "
3395 "per-instruction loop.");
3396 return true;
3399 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
3400 if (!MergeCondStoresAggressively &&
3401 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
3402 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
3403 return false;
3405 // If PostBB has more than two predecessors, we need to split it so we can
3406 // sink the store.
3407 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
3408 // We know that QFB's only successor is PostBB. And QFB has a single
3409 // predecessor. If QTB exists, then its only successor is also PostBB.
3410 // If QTB does not exist, then QFB's only predecessor has a conditional
3411 // branch to QFB and PostBB.
3412 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
3413 BasicBlock *NewBB =
3414 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
3415 if (!NewBB)
3416 return false;
3417 PostBB = NewBB;
3420 // OK, we're going to sink the stores to PostBB. The store has to be
3421 // conditional though, so first create the predicate.
3422 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
3423 ->getCondition();
3424 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
3425 ->getCondition();
3427 Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
3428 PStore->getParent());
3429 Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
3430 QStore->getParent(), PPHI);
3432 IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
3434 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
3435 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
3437 if (InvertPCond)
3438 PPred = QB.CreateNot(PPred);
3439 if (InvertQCond)
3440 QPred = QB.CreateNot(QPred);
3441 Value *CombinedPred = QB.CreateOr(PPred, QPred);
3443 auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
3444 /*Unreachable=*/false,
3445 /*BranchWeights=*/nullptr, DTU);
3446 QB.SetInsertPoint(T);
3447 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
3448 AAMDNodes AAMD;
3449 PStore->getAAMetadata(AAMD, /*Merge=*/false);
3450 PStore->getAAMetadata(AAMD, /*Merge=*/true);
3451 SI->setAAMetadata(AAMD);
3452 // Choose the minimum alignment. If we could prove both stores execute, we
3453 // could use biggest one. In this case, though, we only know that one of the
3454 // stores executes. And we don't know it's safe to take the alignment from a
3455 // store that doesn't execute.
3456 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
3458 QStore->eraseFromParent();
3459 PStore->eraseFromParent();
3461 return true;
3464 static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
3465 DomTreeUpdater *DTU, const DataLayout &DL,
3466 const TargetTransformInfo &TTI) {
3467 // The intention here is to find diamonds or triangles (see below) where each
3468 // conditional block contains a store to the same address. Both of these
3469 // stores are conditional, so they can't be unconditionally sunk. But it may
3470 // be profitable to speculatively sink the stores into one merged store at the
3471 // end, and predicate the merged store on the union of the two conditions of
3472 // PBI and QBI.
3474 // This can reduce the number of stores executed if both of the conditions are
3475 // true, and can allow the blocks to become small enough to be if-converted.
3476 // This optimization will also chain, so that ladders of test-and-set
3477 // sequences can be if-converted away.
3479 // We only deal with simple diamonds or triangles:
3481 // PBI or PBI or a combination of the two
3482 // / \ | \
3483 // PTB PFB | PFB
3484 // \ / | /
3485 // QBI QBI
3486 // / \ | \
3487 // QTB QFB | QFB
3488 // \ / | /
3489 // PostBB PostBB
3491 // We model triangles as a type of diamond with a nullptr "true" block.
3492 // Triangles are canonicalized so that the fallthrough edge is represented by
3493 // a true condition, as in the diagram above.
3494 BasicBlock *PTB = PBI->getSuccessor(0);
3495 BasicBlock *PFB = PBI->getSuccessor(1);
3496 BasicBlock *QTB = QBI->getSuccessor(0);
3497 BasicBlock *QFB = QBI->getSuccessor(1);
3498 BasicBlock *PostBB = QFB->getSingleSuccessor();
3500 // Make sure we have a good guess for PostBB. If QTB's only successor is
3501 // QFB, then QFB is a better PostBB.
3502 if (QTB->getSingleSuccessor() == QFB)
3503 PostBB = QFB;
3505 // If we couldn't find a good PostBB, stop.
3506 if (!PostBB)
3507 return false;
3509 bool InvertPCond = false, InvertQCond = false;
3510 // Canonicalize fallthroughs to the true branches.
3511 if (PFB == QBI->getParent()) {
3512 std::swap(PFB, PTB);
3513 InvertPCond = true;
3515 if (QFB == PostBB) {
3516 std::swap(QFB, QTB);
3517 InvertQCond = true;
3520 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
3521 // and QFB may not. Model fallthroughs as a nullptr block.
3522 if (PTB == QBI->getParent())
3523 PTB = nullptr;
3524 if (QTB == PostBB)
3525 QTB = nullptr;
3527 // Legality bailouts. We must have at least the non-fallthrough blocks and
3528 // the post-dominating block, and the non-fallthroughs must only have one
3529 // predecessor.
3530 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
3531 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
3533 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
3534 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
3535 return false;
3536 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
3537 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
3538 return false;
3539 if (!QBI->getParent()->hasNUses(2))
3540 return false;
3542 // OK, this is a sequence of two diamonds or triangles.
3543 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
3544 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
3545 for (auto *BB : {PTB, PFB}) {
3546 if (!BB)
3547 continue;
3548 for (auto &I : *BB)
3549 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3550 PStoreAddresses.insert(SI->getPointerOperand());
3552 for (auto *BB : {QTB, QFB}) {
3553 if (!BB)
3554 continue;
3555 for (auto &I : *BB)
3556 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3557 QStoreAddresses.insert(SI->getPointerOperand());
3560 set_intersect(PStoreAddresses, QStoreAddresses);
3561 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
3562 // clear what it contains.
3563 auto &CommonAddresses = PStoreAddresses;
3565 bool Changed = false;
3566 for (auto *Address : CommonAddresses)
3567 Changed |=
3568 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
3569 InvertPCond, InvertQCond, DTU, DL, TTI);
3570 return Changed;
3573 /// If the previous block ended with a widenable branch, determine if reusing
3574 /// the target block is profitable and legal. This will have the effect of
3575 /// "widening" PBI, but doesn't require us to reason about hosting safety.
3576 static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3577 DomTreeUpdater *DTU) {
3578 // TODO: This can be generalized in two important ways:
3579 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
3580 // values from the PBI edge.
3581 // 2) We can sink side effecting instructions into BI's fallthrough
3582 // successor provided they doesn't contribute to computation of
3583 // BI's condition.
3584 Value *CondWB, *WC;
3585 BasicBlock *IfTrueBB, *IfFalseBB;
3586 if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
3587 IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
3588 return false;
3589 if (!IfFalseBB->phis().empty())
3590 return false; // TODO
3591 // Use lambda to lazily compute expensive condition after cheap ones.
3592 auto NoSideEffects = [](BasicBlock &BB) {
3593 return !llvm::any_of(BB, [](const Instruction &I) {
3594 return I.mayWriteToMemory() || I.mayHaveSideEffects();
3597 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
3598 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
3599 NoSideEffects(*BI->getParent())) {
3600 auto *OldSuccessor = BI->getSuccessor(1);
3601 OldSuccessor->removePredecessor(BI->getParent());
3602 BI->setSuccessor(1, IfFalseBB);
3603 if (DTU)
3604 DTU->applyUpdates(
3605 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
3606 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
3607 return true;
3609 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
3610 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
3611 NoSideEffects(*BI->getParent())) {
3612 auto *OldSuccessor = BI->getSuccessor(0);
3613 OldSuccessor->removePredecessor(BI->getParent());
3614 BI->setSuccessor(0, IfFalseBB);
3615 if (DTU)
3616 DTU->applyUpdates(
3617 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
3618 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
3619 return true;
3621 return false;
3624 /// If we have a conditional branch as a predecessor of another block,
3625 /// this function tries to simplify it. We know
3626 /// that PBI and BI are both conditional branches, and BI is in one of the
3627 /// successor blocks of PBI - PBI branches to BI.
3628 static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3629 DomTreeUpdater *DTU,
3630 const DataLayout &DL,
3631 const TargetTransformInfo &TTI) {
3632 assert(PBI->isConditional() && BI->isConditional());
3633 BasicBlock *BB = BI->getParent();
3635 // If this block ends with a branch instruction, and if there is a
3636 // predecessor that ends on a branch of the same condition, make
3637 // this conditional branch redundant.
3638 if (PBI->getCondition() == BI->getCondition() &&
3639 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3640 // Okay, the outcome of this conditional branch is statically
3641 // knowable. If this block had a single pred, handle specially.
3642 if (BB->getSinglePredecessor()) {
3643 // Turn this into a branch on constant.
3644 bool CondIsTrue = PBI->getSuccessor(0) == BB;
3645 BI->setCondition(
3646 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
3647 return true; // Nuke the branch on constant.
3650 // Otherwise, if there are multiple predecessors, insert a PHI that merges
3651 // in the constant and simplify the block result. Subsequent passes of
3652 // simplifycfg will thread the block.
3653 if (BlockIsSimpleEnoughToThreadThrough(BB)) {
3654 pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
3655 PHINode *NewPN = PHINode::Create(
3656 Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
3657 BI->getCondition()->getName() + ".pr", &BB->front());
3658 // Okay, we're going to insert the PHI node. Since PBI is not the only
3659 // predecessor, compute the PHI'd conditional value for all of the preds.
3660 // Any predecessor where the condition is not computable we keep symbolic.
3661 for (pred_iterator PI = PB; PI != PE; ++PI) {
3662 BasicBlock *P = *PI;
3663 if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
3664 PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
3665 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3666 bool CondIsTrue = PBI->getSuccessor(0) == BB;
3667 NewPN->addIncoming(
3668 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
3670 } else {
3671 NewPN->addIncoming(BI->getCondition(), P);
3675 BI->setCondition(NewPN);
3676 return true;
3680 // If the previous block ended with a widenable branch, determine if reusing
3681 // the target block is profitable and legal. This will have the effect of
3682 // "widening" PBI, but doesn't require us to reason about hosting safety.
3683 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
3684 return true;
3686 if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
3687 if (CE->canTrap())
3688 return false;
3690 // If both branches are conditional and both contain stores to the same
3691 // address, remove the stores from the conditionals and create a conditional
3692 // merged store at the end.
3693 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
3694 return true;
3696 // If this is a conditional branch in an empty block, and if any
3697 // predecessors are a conditional branch to one of our destinations,
3698 // fold the conditions into logical ops and one cond br.
3700 // Ignore dbg intrinsics.
3701 if (&*BB->instructionsWithoutDebug().begin() != BI)
3702 return false;
3704 int PBIOp, BIOp;
3705 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3706 PBIOp = 0;
3707 BIOp = 0;
3708 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3709 PBIOp = 0;
3710 BIOp = 1;
3711 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3712 PBIOp = 1;
3713 BIOp = 0;
3714 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3715 PBIOp = 1;
3716 BIOp = 1;
3717 } else {
3718 return false;
3721 // Check to make sure that the other destination of this branch
3722 // isn't BB itself. If so, this is an infinite loop that will
3723 // keep getting unwound.
3724 if (PBI->getSuccessor(PBIOp) == BB)
3725 return false;
3727 // Do not perform this transformation if it would require
3728 // insertion of a large number of select instructions. For targets
3729 // without predication/cmovs, this is a big pessimization.
3731 // Also do not perform this transformation if any phi node in the common
3732 // destination block can trap when reached by BB or PBB (PR17073). In that
3733 // case, it would be unsafe to hoist the operation into a select instruction.
3735 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
3736 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
3737 unsigned NumPhis = 0;
3738 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
3739 ++II, ++NumPhis) {
3740 if (NumPhis > 2) // Disable this xform.
3741 return false;
3743 PHINode *PN = cast<PHINode>(II);
3744 Value *BIV = PN->getIncomingValueForBlock(BB);
3745 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
3746 if (CE->canTrap())
3747 return false;
3749 unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
3750 Value *PBIV = PN->getIncomingValue(PBBIdx);
3751 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
3752 if (CE->canTrap())
3753 return false;
3756 // Finally, if everything is ok, fold the branches to logical ops.
3757 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
3759 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
3760 << "AND: " << *BI->getParent());
3762 SmallVector<DominatorTree::UpdateType, 5> Updates;
3764 // If OtherDest *is* BB, then BB is a basic block with a single conditional
3765 // branch in it, where one edge (OtherDest) goes back to itself but the other
3766 // exits. We don't *know* that the program avoids the infinite loop
3767 // (even though that seems likely). If we do this xform naively, we'll end up
3768 // recursively unpeeling the loop. Since we know that (after the xform is
3769 // done) that the block *is* infinite if reached, we just make it an obviously
3770 // infinite loop with no cond branch.
3771 if (OtherDest == BB) {
3772 // Insert it at the end of the function, because it's either code,
3773 // or it won't matter if it's hot. :)
3774 BasicBlock *InfLoopBlock =
3775 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
3776 BranchInst::Create(InfLoopBlock, InfLoopBlock);
3777 if (DTU)
3778 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
3779 OtherDest = InfLoopBlock;
3782 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3784 // BI may have other predecessors. Because of this, we leave
3785 // it alone, but modify PBI.
3787 // Make sure we get to CommonDest on True&True directions.
3788 Value *PBICond = PBI->getCondition();
3789 IRBuilder<NoFolder> Builder(PBI);
3790 if (PBIOp)
3791 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
3793 Value *BICond = BI->getCondition();
3794 if (BIOp)
3795 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
3797 // Merge the conditions.
3798 Value *Cond =
3799 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
3801 // Modify PBI to branch on the new condition to the new dests.
3802 PBI->setCondition(Cond);
3803 PBI->setSuccessor(0, CommonDest);
3804 PBI->setSuccessor(1, OtherDest);
3806 if (DTU) {
3807 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
3808 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
3810 DTU->applyUpdates(Updates);
3813 // Update branch weight for PBI.
3814 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3815 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
3816 bool HasWeights =
3817 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3818 SuccTrueWeight, SuccFalseWeight);
3819 if (HasWeights) {
3820 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3821 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3822 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3823 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3824 // The weight to CommonDest should be PredCommon * SuccTotal +
3825 // PredOther * SuccCommon.
3826 // The weight to OtherDest should be PredOther * SuccOther.
3827 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
3828 PredOther * SuccCommon,
3829 PredOther * SuccOther};
3830 // Halve the weights if any of them cannot fit in an uint32_t
3831 FitWeights(NewWeights);
3833 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
3836 // OtherDest may have phi nodes. If so, add an entry from PBI's
3837 // block that are identical to the entries for BI's block.
3838 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
3840 // We know that the CommonDest already had an edge from PBI to
3841 // it. If it has PHIs though, the PHIs may have different
3842 // entries for BB and PBI's BB. If so, insert a select to make
3843 // them agree.
3844 for (PHINode &PN : CommonDest->phis()) {
3845 Value *BIV = PN.getIncomingValueForBlock(BB);
3846 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
3847 Value *PBIV = PN.getIncomingValue(PBBIdx);
3848 if (BIV != PBIV) {
3849 // Insert a select in PBI to pick the right value.
3850 SelectInst *NV = cast<SelectInst>(
3851 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
3852 PN.setIncomingValue(PBBIdx, NV);
3853 // Although the select has the same condition as PBI, the original branch
3854 // weights for PBI do not apply to the new select because the select's
3855 // 'logical' edges are incoming edges of the phi that is eliminated, not
3856 // the outgoing edges of PBI.
3857 if (HasWeights) {
3858 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3859 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3860 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3861 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3862 // The weight to PredCommonDest should be PredCommon * SuccTotal.
3863 // The weight to PredOtherDest should be PredOther * SuccCommon.
3864 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
3865 PredOther * SuccCommon};
3867 FitWeights(NewWeights);
3869 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
3874 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
3875 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3877 // This basic block is probably dead. We know it has at least
3878 // one fewer predecessor.
3879 return true;
3882 // Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
3883 // true or to FalseBB if Cond is false.
3884 // Takes care of updating the successors and removing the old terminator.
3885 // Also makes sure not to introduce new successors by assuming that edges to
3886 // non-successor TrueBBs and FalseBBs aren't reachable.
3887 bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
3888 Value *Cond, BasicBlock *TrueBB,
3889 BasicBlock *FalseBB,
3890 uint32_t TrueWeight,
3891 uint32_t FalseWeight) {
3892 auto *BB = OldTerm->getParent();
3893 // Remove any superfluous successor edges from the CFG.
3894 // First, figure out which successors to preserve.
3895 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
3896 // successor.
3897 BasicBlock *KeepEdge1 = TrueBB;
3898 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
3900 SmallPtrSet<BasicBlock *, 2> RemovedSuccessors;
3902 // Then remove the rest.
3903 for (BasicBlock *Succ : successors(OldTerm)) {
3904 // Make sure only to keep exactly one copy of each edge.
3905 if (Succ == KeepEdge1)
3906 KeepEdge1 = nullptr;
3907 else if (Succ == KeepEdge2)
3908 KeepEdge2 = nullptr;
3909 else {
3910 Succ->removePredecessor(BB,
3911 /*KeepOneInputPHIs=*/true);
3913 if (Succ != TrueBB && Succ != FalseBB)
3914 RemovedSuccessors.insert(Succ);
3918 IRBuilder<> Builder(OldTerm);
3919 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
3921 // Insert an appropriate new terminator.
3922 if (!KeepEdge1 && !KeepEdge2) {
3923 if (TrueBB == FalseBB) {
3924 // We were only looking for one successor, and it was present.
3925 // Create an unconditional branch to it.
3926 Builder.CreateBr(TrueBB);
3927 } else {
3928 // We found both of the successors we were looking for.
3929 // Create a conditional branch sharing the condition of the select.
3930 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
3931 if (TrueWeight != FalseWeight)
3932 setBranchWeights(NewBI, TrueWeight, FalseWeight);
3934 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
3935 // Neither of the selected blocks were successors, so this
3936 // terminator must be unreachable.
3937 new UnreachableInst(OldTerm->getContext(), OldTerm);
3938 } else {
3939 // One of the selected values was a successor, but the other wasn't.
3940 // Insert an unconditional branch to the one that was found;
3941 // the edge to the one that wasn't must be unreachable.
3942 if (!KeepEdge1) {
3943 // Only TrueBB was found.
3944 Builder.CreateBr(TrueBB);
3945 } else {
3946 // Only FalseBB was found.
3947 Builder.CreateBr(FalseBB);
3951 EraseTerminatorAndDCECond(OldTerm);
3953 if (DTU) {
3954 SmallVector<DominatorTree::UpdateType, 2> Updates;
3955 Updates.reserve(RemovedSuccessors.size());
3956 for (auto *RemovedSuccessor : RemovedSuccessors)
3957 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
3958 DTU->applyUpdates(Updates);
3961 return true;
3964 // Replaces
3965 // (switch (select cond, X, Y)) on constant X, Y
3966 // with a branch - conditional if X and Y lead to distinct BBs,
3967 // unconditional otherwise.
3968 bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
3969 SelectInst *Select) {
3970 // Check for constant integer values in the select.
3971 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
3972 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
3973 if (!TrueVal || !FalseVal)
3974 return false;
3976 // Find the relevant condition and destinations.
3977 Value *Condition = Select->getCondition();
3978 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
3979 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
3981 // Get weight for TrueBB and FalseBB.
3982 uint32_t TrueWeight = 0, FalseWeight = 0;
3983 SmallVector<uint64_t, 8> Weights;
3984 bool HasWeights = HasBranchWeights(SI);
3985 if (HasWeights) {
3986 GetBranchWeights(SI, Weights);
3987 if (Weights.size() == 1 + SI->getNumCases()) {
3988 TrueWeight =
3989 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
3990 FalseWeight =
3991 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
3995 // Perform the actual simplification.
3996 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
3997 FalseWeight);
4000 // Replaces
4001 // (indirectbr (select cond, blockaddress(@fn, BlockA),
4002 // blockaddress(@fn, BlockB)))
4003 // with
4004 // (br cond, BlockA, BlockB).
4005 bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4006 SelectInst *SI) {
4007 // Check that both operands of the select are block addresses.
4008 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4009 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4010 if (!TBA || !FBA)
4011 return false;
4013 // Extract the actual blocks.
4014 BasicBlock *TrueBB = TBA->getBasicBlock();
4015 BasicBlock *FalseBB = FBA->getBasicBlock();
4017 // Perform the actual simplification.
4018 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4022 /// This is called when we find an icmp instruction
4023 /// (a seteq/setne with a constant) as the only instruction in a
4024 /// block that ends with an uncond branch. We are looking for a very specific
4025 /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4026 /// this case, we merge the first two "or's of icmp" into a switch, but then the
4027 /// default value goes to an uncond block with a seteq in it, we get something
4028 /// like:
4030 /// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4031 /// DEFAULT:
4032 /// %tmp = icmp eq i8 %A, 92
4033 /// br label %end
4034 /// end:
4035 /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4037 /// We prefer to split the edge to 'end' so that there is a true/false entry to
4038 /// the PHI, merging the third icmp into the switch.
4039 bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4040 ICmpInst *ICI, IRBuilder<> &Builder) {
4041 BasicBlock *BB = ICI->getParent();
4043 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4044 // complex.
4045 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4046 return false;
4048 Value *V = ICI->getOperand(0);
4049 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4051 // The pattern we're looking for is where our only predecessor is a switch on
4052 // 'V' and this block is the default case for the switch. In this case we can
4053 // fold the compared value into the switch to simplify things.
4054 BasicBlock *Pred = BB->getSinglePredecessor();
4055 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4056 return false;
4058 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4059 if (SI->getCondition() != V)
4060 return false;
4062 // If BB is reachable on a non-default case, then we simply know the value of
4063 // V in this block. Substitute it and constant fold the icmp instruction
4064 // away.
4065 if (SI->getDefaultDest() != BB) {
4066 ConstantInt *VVal = SI->findCaseDest(BB);
4067 assert(VVal && "Should have a unique destination value");
4068 ICI->setOperand(0, VVal);
4070 if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
4071 ICI->replaceAllUsesWith(V);
4072 ICI->eraseFromParent();
4074 // BB is now empty, so it is likely to simplify away.
4075 return requestResimplify();
4078 // Ok, the block is reachable from the default dest. If the constant we're
4079 // comparing exists in one of the other edges, then we can constant fold ICI
4080 // and zap it.
4081 if (SI->findCaseValue(Cst) != SI->case_default()) {
4082 Value *V;
4083 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4084 V = ConstantInt::getFalse(BB->getContext());
4085 else
4086 V = ConstantInt::getTrue(BB->getContext());
4088 ICI->replaceAllUsesWith(V);
4089 ICI->eraseFromParent();
4090 // BB is now empty, so it is likely to simplify away.
4091 return requestResimplify();
4094 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4095 // the block.
4096 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4097 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4098 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4099 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4100 return false;
4102 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4103 // true in the PHI.
4104 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4105 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4107 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4108 std::swap(DefaultCst, NewCst);
4110 // Replace ICI (which is used by the PHI for the default value) with true or
4111 // false depending on if it is EQ or NE.
4112 ICI->replaceAllUsesWith(DefaultCst);
4113 ICI->eraseFromParent();
4115 SmallVector<DominatorTree::UpdateType, 2> Updates;
4117 // Okay, the switch goes to this block on a default value. Add an edge from
4118 // the switch to the merge point on the compared value.
4119 BasicBlock *NewBB =
4120 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4122 SwitchInstProfUpdateWrapper SIW(*SI);
4123 auto W0 = SIW.getSuccessorWeight(0);
4124 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
4125 if (W0) {
4126 NewW = ((uint64_t(*W0) + 1) >> 1);
4127 SIW.setSuccessorWeight(0, *NewW);
4129 SIW.addCase(Cst, NewBB, NewW);
4130 if (DTU)
4131 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4134 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4135 Builder.SetInsertPoint(NewBB);
4136 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4137 Builder.CreateBr(SuccBlock);
4138 PHIUse->addIncoming(NewCst, NewBB);
4139 if (DTU) {
4140 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4141 DTU->applyUpdates(Updates);
4143 return true;
4146 /// The specified branch is a conditional branch.
4147 /// Check to see if it is branching on an or/and chain of icmp instructions, and
4148 /// fold it into a switch instruction if so.
4149 bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4150 IRBuilder<> &Builder,
4151 const DataLayout &DL) {
4152 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4153 if (!Cond)
4154 return false;
4156 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4157 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4158 // 'setne's and'ed together, collect them.
4160 // Try to gather values from a chain of and/or to be turned into a switch
4161 ConstantComparesGatherer ConstantCompare(Cond, DL);
4162 // Unpack the result
4163 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4164 Value *CompVal = ConstantCompare.CompValue;
4165 unsigned UsedICmps = ConstantCompare.UsedICmps;
4166 Value *ExtraCase = ConstantCompare.Extra;
4168 // If we didn't have a multiply compared value, fail.
4169 if (!CompVal)
4170 return false;
4172 // Avoid turning single icmps into a switch.
4173 if (UsedICmps <= 1)
4174 return false;
4176 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4178 // There might be duplicate constants in the list, which the switch
4179 // instruction can't handle, remove them now.
4180 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4181 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4183 // If Extra was used, we require at least two switch values to do the
4184 // transformation. A switch with one value is just a conditional branch.
4185 if (ExtraCase && Values.size() < 2)
4186 return false;
4188 // TODO: Preserve branch weight metadata, similarly to how
4189 // FoldValueComparisonIntoPredecessors preserves it.
4191 // Figure out which block is which destination.
4192 BasicBlock *DefaultBB = BI->getSuccessor(1);
4193 BasicBlock *EdgeBB = BI->getSuccessor(0);
4194 if (!TrueWhenEqual)
4195 std::swap(DefaultBB, EdgeBB);
4197 BasicBlock *BB = BI->getParent();
4199 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4200 << " cases into SWITCH. BB is:\n"
4201 << *BB);
4203 SmallVector<DominatorTree::UpdateType, 2> Updates;
4205 // If there are any extra values that couldn't be folded into the switch
4206 // then we evaluate them with an explicit branch first. Split the block
4207 // right before the condbr to handle it.
4208 if (ExtraCase) {
4209 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4210 /*MSSAU=*/nullptr, "switch.early.test");
4212 // Remove the uncond branch added to the old block.
4213 Instruction *OldTI = BB->getTerminator();
4214 Builder.SetInsertPoint(OldTI);
4216 // There can be an unintended UB if extra values are Poison. Before the
4217 // transformation, extra values may not be evaluated according to the
4218 // condition, and it will not raise UB. But after transformation, we are
4219 // evaluating extra values before checking the condition, and it will raise
4220 // UB. It can be solved by adding freeze instruction to extra values.
4221 AssumptionCache *AC = Options.AC;
4223 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4224 ExtraCase = Builder.CreateFreeze(ExtraCase);
4226 if (TrueWhenEqual)
4227 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4228 else
4229 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4231 OldTI->eraseFromParent();
4233 if (DTU)
4234 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4236 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4237 // for the edge we just added.
4238 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4240 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4241 << "\nEXTRABB = " << *BB);
4242 BB = NewBB;
4245 Builder.SetInsertPoint(BI);
4246 // Convert pointer to int before we switch.
4247 if (CompVal->getType()->isPointerTy()) {
4248 CompVal = Builder.CreatePtrToInt(
4249 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4252 // Create the new switch instruction now.
4253 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4255 // Add all of the 'cases' to the switch instruction.
4256 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4257 New->addCase(Values[i], EdgeBB);
4259 // We added edges from PI to the EdgeBB. As such, if there were any
4260 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4261 // the number of edges added.
4262 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4263 PHINode *PN = cast<PHINode>(BBI);
4264 Value *InVal = PN->getIncomingValueForBlock(BB);
4265 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
4266 PN->addIncoming(InVal, BB);
4269 // Erase the old branch instruction.
4270 EraseTerminatorAndDCECond(BI);
4271 if (DTU)
4272 DTU->applyUpdates(Updates);
4274 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
4275 return true;
4278 bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
4279 if (isa<PHINode>(RI->getValue()))
4280 return simplifyCommonResume(RI);
4281 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
4282 RI->getValue() == RI->getParent()->getFirstNonPHI())
4283 // The resume must unwind the exception that caused control to branch here.
4284 return simplifySingleResume(RI);
4286 return false;
4289 // Check if cleanup block is empty
4290 static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
4291 for (Instruction &I : R) {
4292 auto *II = dyn_cast<IntrinsicInst>(&I);
4293 if (!II)
4294 return false;
4296 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
4297 switch (IntrinsicID) {
4298 case Intrinsic::dbg_declare:
4299 case Intrinsic::dbg_value:
4300 case Intrinsic::dbg_label:
4301 case Intrinsic::lifetime_end:
4302 break;
4303 default:
4304 return false;
4307 return true;
4310 // Simplify resume that is shared by several landing pads (phi of landing pad).
4311 bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
4312 BasicBlock *BB = RI->getParent();
4314 // Check that there are no other instructions except for debug and lifetime
4315 // intrinsics between the phi's and resume instruction.
4316 if (!isCleanupBlockEmpty(
4317 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
4318 return false;
4320 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
4321 auto *PhiLPInst = cast<PHINode>(RI->getValue());
4323 // Check incoming blocks to see if any of them are trivial.
4324 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
4325 Idx++) {
4326 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
4327 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
4329 // If the block has other successors, we can not delete it because
4330 // it has other dependents.
4331 if (IncomingBB->getUniqueSuccessor() != BB)
4332 continue;
4334 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
4335 // Not the landing pad that caused the control to branch here.
4336 if (IncomingValue != LandingPad)
4337 continue;
4339 if (isCleanupBlockEmpty(
4340 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
4341 TrivialUnwindBlocks.insert(IncomingBB);
4344 // If no trivial unwind blocks, don't do any simplifications.
4345 if (TrivialUnwindBlocks.empty())
4346 return false;
4348 // Turn all invokes that unwind here into calls.
4349 for (auto *TrivialBB : TrivialUnwindBlocks) {
4350 // Blocks that will be simplified should be removed from the phi node.
4351 // Note there could be multiple edges to the resume block, and we need
4352 // to remove them all.
4353 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
4354 BB->removePredecessor(TrivialBB, true);
4356 for (BasicBlock *Pred :
4357 llvm::make_early_inc_range(predecessors(TrivialBB))) {
4358 removeUnwindEdge(Pred, DTU);
4359 ++NumInvokes;
4362 // In each SimplifyCFG run, only the current processed block can be erased.
4363 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
4364 // of erasing TrivialBB, we only remove the branch to the common resume
4365 // block so that we can later erase the resume block since it has no
4366 // predecessors.
4367 TrivialBB->getTerminator()->eraseFromParent();
4368 new UnreachableInst(RI->getContext(), TrivialBB);
4369 if (DTU)
4370 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
4373 // Delete the resume block if all its predecessors have been removed.
4374 if (pred_empty(BB))
4375 DeleteDeadBlock(BB, DTU);
4377 return !TrivialUnwindBlocks.empty();
4380 // Simplify resume that is only used by a single (non-phi) landing pad.
4381 bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
4382 BasicBlock *BB = RI->getParent();
4383 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
4384 assert(RI->getValue() == LPInst &&
4385 "Resume must unwind the exception that caused control to here");
4387 // Check that there are no other instructions except for debug intrinsics.
4388 if (!isCleanupBlockEmpty(
4389 make_range<Instruction *>(LPInst->getNextNode(), RI)))
4390 return false;
4392 // Turn all invokes that unwind here into calls and delete the basic block.
4393 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
4394 removeUnwindEdge(Pred, DTU);
4395 ++NumInvokes;
4398 // The landingpad is now unreachable. Zap it.
4399 DeleteDeadBlock(BB, DTU);
4400 return true;
4403 static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
4404 // If this is a trivial cleanup pad that executes no instructions, it can be
4405 // eliminated. If the cleanup pad continues to the caller, any predecessor
4406 // that is an EH pad will be updated to continue to the caller and any
4407 // predecessor that terminates with an invoke instruction will have its invoke
4408 // instruction converted to a call instruction. If the cleanup pad being
4409 // simplified does not continue to the caller, each predecessor will be
4410 // updated to continue to the unwind destination of the cleanup pad being
4411 // simplified.
4412 BasicBlock *BB = RI->getParent();
4413 CleanupPadInst *CPInst = RI->getCleanupPad();
4414 if (CPInst->getParent() != BB)
4415 // This isn't an empty cleanup.
4416 return false;
4418 // We cannot kill the pad if it has multiple uses. This typically arises
4419 // from unreachable basic blocks.
4420 if (!CPInst->hasOneUse())
4421 return false;
4423 // Check that there are no other instructions except for benign intrinsics.
4424 if (!isCleanupBlockEmpty(
4425 make_range<Instruction *>(CPInst->getNextNode(), RI)))
4426 return false;
4428 // If the cleanup return we are simplifying unwinds to the caller, this will
4429 // set UnwindDest to nullptr.
4430 BasicBlock *UnwindDest = RI->getUnwindDest();
4431 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
4433 // We're about to remove BB from the control flow. Before we do, sink any
4434 // PHINodes into the unwind destination. Doing this before changing the
4435 // control flow avoids some potentially slow checks, since we can currently
4436 // be certain that UnwindDest and BB have no common predecessors (since they
4437 // are both EH pads).
4438 if (UnwindDest) {
4439 // First, go through the PHI nodes in UnwindDest and update any nodes that
4440 // reference the block we are removing
4441 for (PHINode &DestPN : UnwindDest->phis()) {
4442 int Idx = DestPN.getBasicBlockIndex(BB);
4443 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
4444 assert(Idx != -1);
4445 // This PHI node has an incoming value that corresponds to a control
4446 // path through the cleanup pad we are removing. If the incoming
4447 // value is in the cleanup pad, it must be a PHINode (because we
4448 // verified above that the block is otherwise empty). Otherwise, the
4449 // value is either a constant or a value that dominates the cleanup
4450 // pad being removed.
4452 // Because BB and UnwindDest are both EH pads, all of their
4453 // predecessors must unwind to these blocks, and since no instruction
4454 // can have multiple unwind destinations, there will be no overlap in
4455 // incoming blocks between SrcPN and DestPN.
4456 Value *SrcVal = DestPN.getIncomingValue(Idx);
4457 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
4459 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
4460 for (auto *Pred : predecessors(BB)) {
4461 Value *Incoming =
4462 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
4463 DestPN.addIncoming(Incoming, Pred);
4467 // Sink any remaining PHI nodes directly into UnwindDest.
4468 Instruction *InsertPt = DestEHPad;
4469 for (PHINode &PN : make_early_inc_range(BB->phis())) {
4470 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
4471 // If the PHI node has no uses or all of its uses are in this basic
4472 // block (meaning they are debug or lifetime intrinsics), just leave
4473 // it. It will be erased when we erase BB below.
4474 continue;
4476 // Otherwise, sink this PHI node into UnwindDest.
4477 // Any predecessors to UnwindDest which are not already represented
4478 // must be back edges which inherit the value from the path through
4479 // BB. In this case, the PHI value must reference itself.
4480 for (auto *pred : predecessors(UnwindDest))
4481 if (pred != BB)
4482 PN.addIncoming(&PN, pred);
4483 PN.moveBefore(InsertPt);
4484 // Also, add a dummy incoming value for the original BB itself,
4485 // so that the PHI is well-formed until we drop said predecessor.
4486 PN.addIncoming(UndefValue::get(PN.getType()), BB);
4490 std::vector<DominatorTree::UpdateType> Updates;
4492 // We use make_early_inc_range here because we will remove all predecessors.
4493 for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
4494 if (UnwindDest == nullptr) {
4495 if (DTU) {
4496 DTU->applyUpdates(Updates);
4497 Updates.clear();
4499 removeUnwindEdge(PredBB, DTU);
4500 ++NumInvokes;
4501 } else {
4502 BB->removePredecessor(PredBB);
4503 Instruction *TI = PredBB->getTerminator();
4504 TI->replaceUsesOfWith(BB, UnwindDest);
4505 if (DTU) {
4506 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
4507 Updates.push_back({DominatorTree::Delete, PredBB, BB});
4512 if (DTU)
4513 DTU->applyUpdates(Updates);
4515 DeleteDeadBlock(BB, DTU);
4517 return true;
4520 // Try to merge two cleanuppads together.
4521 static bool mergeCleanupPad(CleanupReturnInst *RI) {
4522 // Skip any cleanuprets which unwind to caller, there is nothing to merge
4523 // with.
4524 BasicBlock *UnwindDest = RI->getUnwindDest();
4525 if (!UnwindDest)
4526 return false;
4528 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
4529 // be safe to merge without code duplication.
4530 if (UnwindDest->getSinglePredecessor() != RI->getParent())
4531 return false;
4533 // Verify that our cleanuppad's unwind destination is another cleanuppad.
4534 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
4535 if (!SuccessorCleanupPad)
4536 return false;
4538 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
4539 // Replace any uses of the successor cleanupad with the predecessor pad
4540 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
4541 // funclet bundle operands.
4542 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
4543 // Remove the old cleanuppad.
4544 SuccessorCleanupPad->eraseFromParent();
4545 // Now, we simply replace the cleanupret with a branch to the unwind
4546 // destination.
4547 BranchInst::Create(UnwindDest, RI->getParent());
4548 RI->eraseFromParent();
4550 return true;
4553 bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
4554 // It is possible to transiantly have an undef cleanuppad operand because we
4555 // have deleted some, but not all, dead blocks.
4556 // Eventually, this block will be deleted.
4557 if (isa<UndefValue>(RI->getOperand(0)))
4558 return false;
4560 if (mergeCleanupPad(RI))
4561 return true;
4563 if (removeEmptyCleanup(RI, DTU))
4564 return true;
4566 return false;
4569 // WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
4570 bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
4571 BasicBlock *BB = UI->getParent();
4573 bool Changed = false;
4575 // If there are any instructions immediately before the unreachable that can
4576 // be removed, do so.
4577 while (UI->getIterator() != BB->begin()) {
4578 BasicBlock::iterator BBI = UI->getIterator();
4579 --BBI;
4581 if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
4582 break; // Can not drop any more instructions. We're done here.
4583 // Otherwise, this instruction can be freely erased,
4584 // even if it is not side-effect free.
4586 // Note that deleting EH's here is in fact okay, although it involves a bit
4587 // of subtle reasoning. If this inst is an EH, all the predecessors of this
4588 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
4589 // and we can therefore guarantee this block will be erased.
4591 // Delete this instruction (any uses are guaranteed to be dead)
4592 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
4593 BBI->eraseFromParent();
4594 Changed = true;
4597 // If the unreachable instruction is the first in the block, take a gander
4598 // at all of the predecessors of this instruction, and simplify them.
4599 if (&BB->front() != UI)
4600 return Changed;
4602 std::vector<DominatorTree::UpdateType> Updates;
4604 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
4605 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
4606 auto *Predecessor = Preds[i];
4607 Instruction *TI = Predecessor->getTerminator();
4608 IRBuilder<> Builder(TI);
4609 if (auto *BI = dyn_cast<BranchInst>(TI)) {
4610 // We could either have a proper unconditional branch,
4611 // or a degenerate conditional branch with matching destinations.
4612 if (all_of(BI->successors(),
4613 [BB](auto *Successor) { return Successor == BB; })) {
4614 new UnreachableInst(TI->getContext(), TI);
4615 TI->eraseFromParent();
4616 Changed = true;
4617 } else {
4618 assert(BI->isConditional() && "Can't get here with an uncond branch.");
4619 Value* Cond = BI->getCondition();
4620 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
4621 "The destinations are guaranteed to be different here.");
4622 if (BI->getSuccessor(0) == BB) {
4623 Builder.CreateAssumption(Builder.CreateNot(Cond));
4624 Builder.CreateBr(BI->getSuccessor(1));
4625 } else {
4626 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
4627 Builder.CreateAssumption(Cond);
4628 Builder.CreateBr(BI->getSuccessor(0));
4630 EraseTerminatorAndDCECond(BI);
4631 Changed = true;
4633 if (DTU)
4634 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4635 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
4636 SwitchInstProfUpdateWrapper SU(*SI);
4637 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
4638 if (i->getCaseSuccessor() != BB) {
4639 ++i;
4640 continue;
4642 BB->removePredecessor(SU->getParent());
4643 i = SU.removeCase(i);
4644 e = SU->case_end();
4645 Changed = true;
4647 // Note that the default destination can't be removed!
4648 if (DTU && SI->getDefaultDest() != BB)
4649 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4650 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
4651 if (II->getUnwindDest() == BB) {
4652 if (DTU) {
4653 DTU->applyUpdates(Updates);
4654 Updates.clear();
4656 removeUnwindEdge(TI->getParent(), DTU);
4657 Changed = true;
4659 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
4660 if (CSI->getUnwindDest() == BB) {
4661 if (DTU) {
4662 DTU->applyUpdates(Updates);
4663 Updates.clear();
4665 removeUnwindEdge(TI->getParent(), DTU);
4666 Changed = true;
4667 continue;
4670 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
4671 E = CSI->handler_end();
4672 I != E; ++I) {
4673 if (*I == BB) {
4674 CSI->removeHandler(I);
4675 --I;
4676 --E;
4677 Changed = true;
4680 if (DTU)
4681 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4682 if (CSI->getNumHandlers() == 0) {
4683 if (CSI->hasUnwindDest()) {
4684 // Redirect all predecessors of the block containing CatchSwitchInst
4685 // to instead branch to the CatchSwitchInst's unwind destination.
4686 if (DTU) {
4687 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
4688 Updates.push_back({DominatorTree::Insert,
4689 PredecessorOfPredecessor,
4690 CSI->getUnwindDest()});
4691 Updates.push_back({DominatorTree::Delete,
4692 PredecessorOfPredecessor, Predecessor});
4695 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
4696 } else {
4697 // Rewrite all preds to unwind to caller (or from invoke to call).
4698 if (DTU) {
4699 DTU->applyUpdates(Updates);
4700 Updates.clear();
4702 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
4703 for (BasicBlock *EHPred : EHPreds)
4704 removeUnwindEdge(EHPred, DTU);
4706 // The catchswitch is no longer reachable.
4707 new UnreachableInst(CSI->getContext(), CSI);
4708 CSI->eraseFromParent();
4709 Changed = true;
4711 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
4712 (void)CRI;
4713 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
4714 "Expected to always have an unwind to BB.");
4715 if (DTU)
4716 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4717 new UnreachableInst(TI->getContext(), TI);
4718 TI->eraseFromParent();
4719 Changed = true;
4723 if (DTU)
4724 DTU->applyUpdates(Updates);
4726 // If this block is now dead, remove it.
4727 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
4728 DeleteDeadBlock(BB, DTU);
4729 return true;
4732 return Changed;
4735 static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
4736 assert(Cases.size() >= 1);
4738 array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
4739 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
4740 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
4741 return false;
4743 return true;
4746 static void createUnreachableSwitchDefault(SwitchInst *Switch,
4747 DomTreeUpdater *DTU) {
4748 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
4749 auto *BB = Switch->getParent();
4750 auto *OrigDefaultBlock = Switch->getDefaultDest();
4751 OrigDefaultBlock->removePredecessor(BB);
4752 BasicBlock *NewDefaultBlock = BasicBlock::Create(
4753 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
4754 OrigDefaultBlock);
4755 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
4756 Switch->setDefaultDest(&*NewDefaultBlock);
4757 if (DTU) {
4758 SmallVector<DominatorTree::UpdateType, 2> Updates;
4759 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
4760 if (!is_contained(successors(BB), OrigDefaultBlock))
4761 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
4762 DTU->applyUpdates(Updates);
4766 /// Turn a switch with two reachable destinations into an integer range
4767 /// comparison and branch.
4768 bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
4769 IRBuilder<> &Builder) {
4770 assert(SI->getNumCases() > 1 && "Degenerate switch?");
4772 bool HasDefault =
4773 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4775 auto *BB = SI->getParent();
4777 // Partition the cases into two sets with different destinations.
4778 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
4779 BasicBlock *DestB = nullptr;
4780 SmallVector<ConstantInt *, 16> CasesA;
4781 SmallVector<ConstantInt *, 16> CasesB;
4783 for (auto Case : SI->cases()) {
4784 BasicBlock *Dest = Case.getCaseSuccessor();
4785 if (!DestA)
4786 DestA = Dest;
4787 if (Dest == DestA) {
4788 CasesA.push_back(Case.getCaseValue());
4789 continue;
4791 if (!DestB)
4792 DestB = Dest;
4793 if (Dest == DestB) {
4794 CasesB.push_back(Case.getCaseValue());
4795 continue;
4797 return false; // More than two destinations.
4800 assert(DestA && DestB &&
4801 "Single-destination switch should have been folded.");
4802 assert(DestA != DestB);
4803 assert(DestB != SI->getDefaultDest());
4804 assert(!CasesB.empty() && "There must be non-default cases.");
4805 assert(!CasesA.empty() || HasDefault);
4807 // Figure out if one of the sets of cases form a contiguous range.
4808 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
4809 BasicBlock *ContiguousDest = nullptr;
4810 BasicBlock *OtherDest = nullptr;
4811 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
4812 ContiguousCases = &CasesA;
4813 ContiguousDest = DestA;
4814 OtherDest = DestB;
4815 } else if (CasesAreContiguous(CasesB)) {
4816 ContiguousCases = &CasesB;
4817 ContiguousDest = DestB;
4818 OtherDest = DestA;
4819 } else
4820 return false;
4822 // Start building the compare and branch.
4824 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
4825 Constant *NumCases =
4826 ConstantInt::get(Offset->getType(), ContiguousCases->size());
4828 Value *Sub = SI->getCondition();
4829 if (!Offset->isNullValue())
4830 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
4832 Value *Cmp;
4833 // If NumCases overflowed, then all possible values jump to the successor.
4834 if (NumCases->isNullValue() && !ContiguousCases->empty())
4835 Cmp = ConstantInt::getTrue(SI->getContext());
4836 else
4837 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
4838 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
4840 // Update weight for the newly-created conditional branch.
4841 if (HasBranchWeights(SI)) {
4842 SmallVector<uint64_t, 8> Weights;
4843 GetBranchWeights(SI, Weights);
4844 if (Weights.size() == 1 + SI->getNumCases()) {
4845 uint64_t TrueWeight = 0;
4846 uint64_t FalseWeight = 0;
4847 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
4848 if (SI->getSuccessor(I) == ContiguousDest)
4849 TrueWeight += Weights[I];
4850 else
4851 FalseWeight += Weights[I];
4853 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
4854 TrueWeight /= 2;
4855 FalseWeight /= 2;
4857 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4861 // Prune obsolete incoming values off the successors' PHI nodes.
4862 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
4863 unsigned PreviousEdges = ContiguousCases->size();
4864 if (ContiguousDest == SI->getDefaultDest())
4865 ++PreviousEdges;
4866 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4867 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4869 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
4870 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
4871 if (OtherDest == SI->getDefaultDest())
4872 ++PreviousEdges;
4873 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4874 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4877 // Clean up the default block - it may have phis or other instructions before
4878 // the unreachable terminator.
4879 if (!HasDefault)
4880 createUnreachableSwitchDefault(SI, DTU);
4882 auto *UnreachableDefault = SI->getDefaultDest();
4884 // Drop the switch.
4885 SI->eraseFromParent();
4887 if (!HasDefault && DTU)
4888 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
4890 return true;
4893 /// Compute masked bits for the condition of a switch
4894 /// and use it to remove dead cases.
4895 static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
4896 AssumptionCache *AC,
4897 const DataLayout &DL) {
4898 Value *Cond = SI->getCondition();
4899 unsigned Bits = Cond->getType()->getIntegerBitWidth();
4900 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
4902 // We can also eliminate cases by determining that their values are outside of
4903 // the limited range of the condition based on how many significant (non-sign)
4904 // bits are in the condition value.
4905 unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
4906 unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
4908 // Gather dead cases.
4909 SmallVector<ConstantInt *, 8> DeadCases;
4910 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
4911 for (auto &Case : SI->cases()) {
4912 auto *Successor = Case.getCaseSuccessor();
4913 if (DTU)
4914 ++NumPerSuccessorCases[Successor];
4915 const APInt &CaseVal = Case.getCaseValue()->getValue();
4916 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
4917 (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
4918 DeadCases.push_back(Case.getCaseValue());
4919 if (DTU)
4920 --NumPerSuccessorCases[Successor];
4921 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
4922 << " is dead.\n");
4926 // If we can prove that the cases must cover all possible values, the
4927 // default destination becomes dead and we can remove it. If we know some
4928 // of the bits in the value, we can use that to more precisely compute the
4929 // number of possible unique case values.
4930 bool HasDefault =
4931 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4932 const unsigned NumUnknownBits =
4933 Bits - (Known.Zero | Known.One).countPopulation();
4934 assert(NumUnknownBits <= Bits);
4935 if (HasDefault && DeadCases.empty() &&
4936 NumUnknownBits < 64 /* avoid overflow */ &&
4937 SI->getNumCases() == (1ULL << NumUnknownBits)) {
4938 createUnreachableSwitchDefault(SI, DTU);
4939 return true;
4942 if (DeadCases.empty())
4943 return false;
4945 SwitchInstProfUpdateWrapper SIW(*SI);
4946 for (ConstantInt *DeadCase : DeadCases) {
4947 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
4948 assert(CaseI != SI->case_default() &&
4949 "Case was not found. Probably mistake in DeadCases forming.");
4950 // Prune unused values from PHI nodes.
4951 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
4952 SIW.removeCase(CaseI);
4955 if (DTU) {
4956 std::vector<DominatorTree::UpdateType> Updates;
4957 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
4958 if (I.second == 0)
4959 Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
4960 DTU->applyUpdates(Updates);
4963 return true;
4966 /// If BB would be eligible for simplification by
4967 /// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
4968 /// by an unconditional branch), look at the phi node for BB in the successor
4969 /// block and see if the incoming value is equal to CaseValue. If so, return
4970 /// the phi node, and set PhiIndex to BB's index in the phi node.
4971 static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
4972 BasicBlock *BB, int *PhiIndex) {
4973 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
4974 return nullptr; // BB must be empty to be a candidate for simplification.
4975 if (!BB->getSinglePredecessor())
4976 return nullptr; // BB must be dominated by the switch.
4978 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
4979 if (!Branch || !Branch->isUnconditional())
4980 return nullptr; // Terminator must be unconditional branch.
4982 BasicBlock *Succ = Branch->getSuccessor(0);
4984 for (PHINode &PHI : Succ->phis()) {
4985 int Idx = PHI.getBasicBlockIndex(BB);
4986 assert(Idx >= 0 && "PHI has no entry for predecessor?");
4988 Value *InValue = PHI.getIncomingValue(Idx);
4989 if (InValue != CaseValue)
4990 continue;
4992 *PhiIndex = Idx;
4993 return &PHI;
4996 return nullptr;
4999 /// Try to forward the condition of a switch instruction to a phi node
5000 /// dominated by the switch, if that would mean that some of the destination
5001 /// blocks of the switch can be folded away. Return true if a change is made.
5002 static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
5003 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5005 ForwardingNodesMap ForwardingNodes;
5006 BasicBlock *SwitchBlock = SI->getParent();
5007 bool Changed = false;
5008 for (auto &Case : SI->cases()) {
5009 ConstantInt *CaseValue = Case.getCaseValue();
5010 BasicBlock *CaseDest = Case.getCaseSuccessor();
5012 // Replace phi operands in successor blocks that are using the constant case
5013 // value rather than the switch condition variable:
5014 // switchbb:
5015 // switch i32 %x, label %default [
5016 // i32 17, label %succ
5017 // ...
5018 // succ:
5019 // %r = phi i32 ... [ 17, %switchbb ] ...
5020 // -->
5021 // %r = phi i32 ... [ %x, %switchbb ] ...
5023 for (PHINode &Phi : CaseDest->phis()) {
5024 // This only works if there is exactly 1 incoming edge from the switch to
5025 // a phi. If there is >1, that means multiple cases of the switch map to 1
5026 // value in the phi, and that phi value is not the switch condition. Thus,
5027 // this transform would not make sense (the phi would be invalid because
5028 // a phi can't have different incoming values from the same block).
5029 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5030 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5031 count(Phi.blocks(), SwitchBlock) == 1) {
5032 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5033 Changed = true;
5037 // Collect phi nodes that are indirectly using this switch's case constants.
5038 int PhiIdx;
5039 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5040 ForwardingNodes[Phi].push_back(PhiIdx);
5043 for (auto &ForwardingNode : ForwardingNodes) {
5044 PHINode *Phi = ForwardingNode.first;
5045 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5046 if (Indexes.size() < 2)
5047 continue;
5049 for (int Index : Indexes)
5050 Phi->setIncomingValue(Index, SI->getCondition());
5051 Changed = true;
5054 return Changed;
5057 /// Return true if the backend will be able to handle
5058 /// initializing an array of constants like C.
5059 static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
5060 if (C->isThreadDependent())
5061 return false;
5062 if (C->isDLLImportDependent())
5063 return false;
5065 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5066 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5067 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5068 return false;
5070 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5071 if (!CE->isGEPWithNoNotionalOverIndexing())
5072 return false;
5073 if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
5074 return false;
5077 if (!TTI.shouldBuildLookupTablesForConstant(C))
5078 return false;
5080 return true;
5083 /// If V is a Constant, return it. Otherwise, try to look up
5084 /// its constant value in ConstantPool, returning 0 if it's not there.
5085 static Constant *
5086 LookupConstant(Value *V,
5087 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5088 if (Constant *C = dyn_cast<Constant>(V))
5089 return C;
5090 return ConstantPool.lookup(V);
5093 /// Try to fold instruction I into a constant. This works for
5094 /// simple instructions such as binary operations where both operands are
5095 /// constant or can be replaced by constants from the ConstantPool. Returns the
5096 /// resulting constant on success, 0 otherwise.
5097 static Constant *
5098 ConstantFold(Instruction *I, const DataLayout &DL,
5099 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5100 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5101 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5102 if (!A)
5103 return nullptr;
5104 if (A->isAllOnesValue())
5105 return LookupConstant(Select->getTrueValue(), ConstantPool);
5106 if (A->isNullValue())
5107 return LookupConstant(Select->getFalseValue(), ConstantPool);
5108 return nullptr;
5111 SmallVector<Constant *, 4> COps;
5112 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5113 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5114 COps.push_back(A);
5115 else
5116 return nullptr;
5119 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
5120 return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
5121 COps[1], DL);
5124 return ConstantFoldInstOperands(I, COps, DL);
5127 /// Try to determine the resulting constant values in phi nodes
5128 /// at the common destination basic block, *CommonDest, for one of the case
5129 /// destionations CaseDest corresponding to value CaseVal (0 for the default
5130 /// case), of a switch instruction SI.
5131 static bool
5132 GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
5133 BasicBlock **CommonDest,
5134 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5135 const DataLayout &DL, const TargetTransformInfo &TTI) {
5136 // The block from which we enter the common destination.
5137 BasicBlock *Pred = SI->getParent();
5139 // If CaseDest is empty except for some side-effect free instructions through
5140 // which we can constant-propagate the CaseVal, continue to its successor.
5141 SmallDenseMap<Value *, Constant *> ConstantPool;
5142 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5143 for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
5144 if (I.isTerminator()) {
5145 // If the terminator is a simple branch, continue to the next block.
5146 if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
5147 return false;
5148 Pred = CaseDest;
5149 CaseDest = I.getSuccessor(0);
5150 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5151 // Instruction is side-effect free and constant.
5153 // If the instruction has uses outside this block or a phi node slot for
5154 // the block, it is not safe to bypass the instruction since it would then
5155 // no longer dominate all its uses.
5156 for (auto &Use : I.uses()) {
5157 User *User = Use.getUser();
5158 if (Instruction *I = dyn_cast<Instruction>(User))
5159 if (I->getParent() == CaseDest)
5160 continue;
5161 if (PHINode *Phi = dyn_cast<PHINode>(User))
5162 if (Phi->getIncomingBlock(Use) == CaseDest)
5163 continue;
5164 return false;
5167 ConstantPool.insert(std::make_pair(&I, C));
5168 } else {
5169 break;
5173 // If we did not have a CommonDest before, use the current one.
5174 if (!*CommonDest)
5175 *CommonDest = CaseDest;
5176 // If the destination isn't the common one, abort.
5177 if (CaseDest != *CommonDest)
5178 return false;
5180 // Get the values for this case from phi nodes in the destination block.
5181 for (PHINode &PHI : (*CommonDest)->phis()) {
5182 int Idx = PHI.getBasicBlockIndex(Pred);
5183 if (Idx == -1)
5184 continue;
5186 Constant *ConstVal =
5187 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5188 if (!ConstVal)
5189 return false;
5191 // Be conservative about which kinds of constants we support.
5192 if (!ValidLookupTableConstant(ConstVal, TTI))
5193 return false;
5195 Res.push_back(std::make_pair(&PHI, ConstVal));
5198 return Res.size() > 0;
5201 // Helper function used to add CaseVal to the list of cases that generate
5202 // Result. Returns the updated number of cases that generate this result.
5203 static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
5204 SwitchCaseResultVectorTy &UniqueResults,
5205 Constant *Result) {
5206 for (auto &I : UniqueResults) {
5207 if (I.first == Result) {
5208 I.second.push_back(CaseVal);
5209 return I.second.size();
5212 UniqueResults.push_back(
5213 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5214 return 1;
5217 // Helper function that initializes a map containing
5218 // results for the PHI node of the common destination block for a switch
5219 // instruction. Returns false if multiple PHI nodes have been found or if
5220 // there is not a common destination block for the switch.
5221 static bool
5222 InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
5223 SwitchCaseResultVectorTy &UniqueResults,
5224 Constant *&DefaultResult, const DataLayout &DL,
5225 const TargetTransformInfo &TTI,
5226 uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
5227 for (auto &I : SI->cases()) {
5228 ConstantInt *CaseVal = I.getCaseValue();
5230 // Resulting value at phi nodes for this case value.
5231 SwitchCaseResultsTy Results;
5232 if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5233 DL, TTI))
5234 return false;
5236 // Only one value per case is permitted.
5237 if (Results.size() > 1)
5238 return false;
5240 // Add the case->result mapping to UniqueResults.
5241 const uintptr_t NumCasesForResult =
5242 MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
5244 // Early out if there are too many cases for this result.
5245 if (NumCasesForResult > MaxCasesPerResult)
5246 return false;
5248 // Early out if there are too many unique results.
5249 if (UniqueResults.size() > MaxUniqueResults)
5250 return false;
5252 // Check the PHI consistency.
5253 if (!PHI)
5254 PHI = Results[0].first;
5255 else if (PHI != Results[0].first)
5256 return false;
5258 // Find the default result value.
5259 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
5260 BasicBlock *DefaultDest = SI->getDefaultDest();
5261 GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
5262 DL, TTI);
5263 // If the default value is not found abort unless the default destination
5264 // is unreachable.
5265 DefaultResult =
5266 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
5267 if ((!DefaultResult &&
5268 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
5269 return false;
5271 return true;
5274 // Helper function that checks if it is possible to transform a switch with only
5275 // two cases (or two cases + default) that produces a result into a select.
5276 // Example:
5277 // switch (a) {
5278 // case 10: %0 = icmp eq i32 %a, 10
5279 // return 10; %1 = select i1 %0, i32 10, i32 4
5280 // case 20: ----> %2 = icmp eq i32 %a, 20
5281 // return 2; %3 = select i1 %2, i32 2, i32 %1
5282 // default:
5283 // return 4;
5284 // }
5285 static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
5286 Constant *DefaultResult, Value *Condition,
5287 IRBuilder<> &Builder) {
5288 // If we are selecting between only two cases transform into a simple
5289 // select or a two-way select if default is possible.
5290 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
5291 ResultVector[1].second.size() == 1) {
5292 ConstantInt *const FirstCase = ResultVector[0].second[0];
5293 ConstantInt *const SecondCase = ResultVector[1].second[0];
5295 bool DefaultCanTrigger = DefaultResult;
5296 Value *SelectValue = ResultVector[1].first;
5297 if (DefaultCanTrigger) {
5298 Value *const ValueCompare =
5299 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
5300 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
5301 DefaultResult, "switch.select");
5303 Value *const ValueCompare =
5304 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
5305 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
5306 SelectValue, "switch.select");
5309 // Handle the degenerate case where two cases have the same value.
5310 if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 &&
5311 DefaultResult) {
5312 Value *Cmp1 = Builder.CreateICmpEQ(
5313 Condition, ResultVector[0].second[0], "switch.selectcmp.case1");
5314 Value *Cmp2 = Builder.CreateICmpEQ(
5315 Condition, ResultVector[0].second[1], "switch.selectcmp.case2");
5316 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
5317 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
5320 return nullptr;
5323 // Helper function to cleanup a switch instruction that has been converted into
5324 // a select, fixing up PHI nodes and basic blocks.
5325 static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
5326 Value *SelectValue,
5327 IRBuilder<> &Builder,
5328 DomTreeUpdater *DTU) {
5329 std::vector<DominatorTree::UpdateType> Updates;
5331 BasicBlock *SelectBB = SI->getParent();
5332 BasicBlock *DestBB = PHI->getParent();
5334 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
5335 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
5336 Builder.CreateBr(DestBB);
5338 // Remove the switch.
5340 while (PHI->getBasicBlockIndex(SelectBB) >= 0)
5341 PHI->removeIncomingValue(SelectBB);
5342 PHI->addIncoming(SelectValue, SelectBB);
5344 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
5345 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
5346 BasicBlock *Succ = SI->getSuccessor(i);
5348 if (Succ == DestBB)
5349 continue;
5350 Succ->removePredecessor(SelectBB);
5351 if (DTU && RemovedSuccessors.insert(Succ).second)
5352 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
5354 SI->eraseFromParent();
5355 if (DTU)
5356 DTU->applyUpdates(Updates);
5359 /// If the switch is only used to initialize one or more
5360 /// phi nodes in a common successor block with only two different
5361 /// constant values, replace the switch with select.
5362 static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
5363 DomTreeUpdater *DTU, const DataLayout &DL,
5364 const TargetTransformInfo &TTI) {
5365 Value *const Cond = SI->getCondition();
5366 PHINode *PHI = nullptr;
5367 BasicBlock *CommonDest = nullptr;
5368 Constant *DefaultResult;
5369 SwitchCaseResultVectorTy UniqueResults;
5370 // Collect all the cases that will deliver the same value from the switch.
5371 if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
5372 DL, TTI, /*MaxUniqueResults*/2,
5373 /*MaxCasesPerResult*/2))
5374 return false;
5375 assert(PHI != nullptr && "PHI for value select not found");
5377 Builder.SetInsertPoint(SI);
5378 Value *SelectValue =
5379 ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
5380 if (SelectValue) {
5381 RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
5382 return true;
5384 // The switch couldn't be converted into a select.
5385 return false;
5388 namespace {
5390 /// This class represents a lookup table that can be used to replace a switch.
5391 class SwitchLookupTable {
5392 public:
5393 /// Create a lookup table to use as a switch replacement with the contents
5394 /// of Values, using DefaultValue to fill any holes in the table.
5395 SwitchLookupTable(
5396 Module &M, uint64_t TableSize, ConstantInt *Offset,
5397 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5398 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
5400 /// Build instructions with Builder to retrieve the value at
5401 /// the position given by Index in the lookup table.
5402 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
5404 /// Return true if a table with TableSize elements of
5405 /// type ElementType would fit in a target-legal register.
5406 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
5407 Type *ElementType);
5409 private:
5410 // Depending on the contents of the table, it can be represented in
5411 // different ways.
5412 enum {
5413 // For tables where each element contains the same value, we just have to
5414 // store that single value and return it for each lookup.
5415 SingleValueKind,
5417 // For tables where there is a linear relationship between table index
5418 // and values. We calculate the result with a simple multiplication
5419 // and addition instead of a table lookup.
5420 LinearMapKind,
5422 // For small tables with integer elements, we can pack them into a bitmap
5423 // that fits into a target-legal register. Values are retrieved by
5424 // shift and mask operations.
5425 BitMapKind,
5427 // The table is stored as an array of values. Values are retrieved by load
5428 // instructions from the table.
5429 ArrayKind
5430 } Kind;
5432 // For SingleValueKind, this is the single value.
5433 Constant *SingleValue = nullptr;
5435 // For BitMapKind, this is the bitmap.
5436 ConstantInt *BitMap = nullptr;
5437 IntegerType *BitMapElementTy = nullptr;
5439 // For LinearMapKind, these are the constants used to derive the value.
5440 ConstantInt *LinearOffset = nullptr;
5441 ConstantInt *LinearMultiplier = nullptr;
5443 // For ArrayKind, this is the array.
5444 GlobalVariable *Array = nullptr;
5447 } // end anonymous namespace
5449 SwitchLookupTable::SwitchLookupTable(
5450 Module &M, uint64_t TableSize, ConstantInt *Offset,
5451 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5452 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
5453 assert(Values.size() && "Can't build lookup table without values!");
5454 assert(TableSize >= Values.size() && "Can't fit values in table!");
5456 // If all values in the table are equal, this is that value.
5457 SingleValue = Values.begin()->second;
5459 Type *ValueType = Values.begin()->second->getType();
5461 // Build up the table contents.
5462 SmallVector<Constant *, 64> TableContents(TableSize);
5463 for (size_t I = 0, E = Values.size(); I != E; ++I) {
5464 ConstantInt *CaseVal = Values[I].first;
5465 Constant *CaseRes = Values[I].second;
5466 assert(CaseRes->getType() == ValueType);
5468 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
5469 TableContents[Idx] = CaseRes;
5471 if (CaseRes != SingleValue)
5472 SingleValue = nullptr;
5475 // Fill in any holes in the table with the default result.
5476 if (Values.size() < TableSize) {
5477 assert(DefaultValue &&
5478 "Need a default value to fill the lookup table holes.");
5479 assert(DefaultValue->getType() == ValueType);
5480 for (uint64_t I = 0; I < TableSize; ++I) {
5481 if (!TableContents[I])
5482 TableContents[I] = DefaultValue;
5485 if (DefaultValue != SingleValue)
5486 SingleValue = nullptr;
5489 // If each element in the table contains the same value, we only need to store
5490 // that single value.
5491 if (SingleValue) {
5492 Kind = SingleValueKind;
5493 return;
5496 // Check if we can derive the value with a linear transformation from the
5497 // table index.
5498 if (isa<IntegerType>(ValueType)) {
5499 bool LinearMappingPossible = true;
5500 APInt PrevVal;
5501 APInt DistToPrev;
5502 assert(TableSize >= 2 && "Should be a SingleValue table.");
5503 // Check if there is the same distance between two consecutive values.
5504 for (uint64_t I = 0; I < TableSize; ++I) {
5505 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
5506 if (!ConstVal) {
5507 // This is an undef. We could deal with it, but undefs in lookup tables
5508 // are very seldom. It's probably not worth the additional complexity.
5509 LinearMappingPossible = false;
5510 break;
5512 const APInt &Val = ConstVal->getValue();
5513 if (I != 0) {
5514 APInt Dist = Val - PrevVal;
5515 if (I == 1) {
5516 DistToPrev = Dist;
5517 } else if (Dist != DistToPrev) {
5518 LinearMappingPossible = false;
5519 break;
5522 PrevVal = Val;
5524 if (LinearMappingPossible) {
5525 LinearOffset = cast<ConstantInt>(TableContents[0]);
5526 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
5527 Kind = LinearMapKind;
5528 ++NumLinearMaps;
5529 return;
5533 // If the type is integer and the table fits in a register, build a bitmap.
5534 if (WouldFitInRegister(DL, TableSize, ValueType)) {
5535 IntegerType *IT = cast<IntegerType>(ValueType);
5536 APInt TableInt(TableSize * IT->getBitWidth(), 0);
5537 for (uint64_t I = TableSize; I > 0; --I) {
5538 TableInt <<= IT->getBitWidth();
5539 // Insert values into the bitmap. Undef values are set to zero.
5540 if (!isa<UndefValue>(TableContents[I - 1])) {
5541 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
5542 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
5545 BitMap = ConstantInt::get(M.getContext(), TableInt);
5546 BitMapElementTy = IT;
5547 Kind = BitMapKind;
5548 ++NumBitMaps;
5549 return;
5552 // Store the table in an array.
5553 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
5554 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
5556 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
5557 GlobalVariable::PrivateLinkage, Initializer,
5558 "switch.table." + FuncName);
5559 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
5560 // Set the alignment to that of an array items. We will be only loading one
5561 // value out of it.
5562 Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType)));
5563 Kind = ArrayKind;
5566 Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
5567 switch (Kind) {
5568 case SingleValueKind:
5569 return SingleValue;
5570 case LinearMapKind: {
5571 // Derive the result value from the input value.
5572 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
5573 false, "switch.idx.cast");
5574 if (!LinearMultiplier->isOne())
5575 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
5576 if (!LinearOffset->isZero())
5577 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
5578 return Result;
5580 case BitMapKind: {
5581 // Type of the bitmap (e.g. i59).
5582 IntegerType *MapTy = BitMap->getType();
5584 // Cast Index to the same type as the bitmap.
5585 // Note: The Index is <= the number of elements in the table, so
5586 // truncating it to the width of the bitmask is safe.
5587 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
5589 // Multiply the shift amount by the element width.
5590 ShiftAmt = Builder.CreateMul(
5591 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
5592 "switch.shiftamt");
5594 // Shift down.
5595 Value *DownShifted =
5596 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
5597 // Mask off.
5598 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
5600 case ArrayKind: {
5601 // Make sure the table index will not overflow when treated as signed.
5602 IntegerType *IT = cast<IntegerType>(Index->getType());
5603 uint64_t TableSize =
5604 Array->getInitializer()->getType()->getArrayNumElements();
5605 if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
5606 Index = Builder.CreateZExt(
5607 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
5608 "switch.tableidx.zext");
5610 Value *GEPIndices[] = {Builder.getInt32(0), Index};
5611 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
5612 GEPIndices, "switch.gep");
5613 return Builder.CreateLoad(
5614 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
5615 "switch.load");
5618 llvm_unreachable("Unknown lookup table kind!");
5621 bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
5622 uint64_t TableSize,
5623 Type *ElementType) {
5624 auto *IT = dyn_cast<IntegerType>(ElementType);
5625 if (!IT)
5626 return false;
5627 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
5628 // are <= 15, we could try to narrow the type.
5630 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
5631 if (TableSize >= UINT_MAX / IT->getBitWidth())
5632 return false;
5633 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
5636 static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
5637 const DataLayout &DL) {
5638 // Allow any legal type.
5639 if (TTI.isTypeLegal(Ty))
5640 return true;
5642 auto *IT = dyn_cast<IntegerType>(Ty);
5643 if (!IT)
5644 return false;
5646 // Also allow power of 2 integer types that have at least 8 bits and fit in
5647 // a register. These types are common in frontend languages and targets
5648 // usually support loads of these types.
5649 // TODO: We could relax this to any integer that fits in a register and rely
5650 // on ABI alignment and padding in the table to allow the load to be widened.
5651 // Or we could widen the constants and truncate the load.
5652 unsigned BitWidth = IT->getBitWidth();
5653 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
5654 DL.fitsInLegalInteger(IT->getBitWidth());
5657 /// Determine whether a lookup table should be built for this switch, based on
5658 /// the number of cases, size of the table, and the types of the results.
5659 // TODO: We could support larger than legal types by limiting based on the
5660 // number of loads required and/or table size. If the constants are small we
5661 // could use smaller table entries and extend after the load.
5662 static bool
5663 ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
5664 const TargetTransformInfo &TTI, const DataLayout &DL,
5665 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
5666 if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
5667 return false; // TableSize overflowed, or mul below might overflow.
5669 bool AllTablesFitInRegister = true;
5670 bool HasIllegalType = false;
5671 for (const auto &I : ResultTypes) {
5672 Type *Ty = I.second;
5674 // Saturate this flag to true.
5675 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
5677 // Saturate this flag to false.
5678 AllTablesFitInRegister =
5679 AllTablesFitInRegister &&
5680 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
5682 // If both flags saturate, we're done. NOTE: This *only* works with
5683 // saturating flags, and all flags have to saturate first due to the
5684 // non-deterministic behavior of iterating over a dense map.
5685 if (HasIllegalType && !AllTablesFitInRegister)
5686 break;
5689 // If each table would fit in a register, we should build it anyway.
5690 if (AllTablesFitInRegister)
5691 return true;
5693 // Don't build a table that doesn't fit in-register if it has illegal types.
5694 if (HasIllegalType)
5695 return false;
5697 // The table density should be at least 40%. This is the same criterion as for
5698 // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
5699 // FIXME: Find the best cut-off.
5700 return SI->getNumCases() * 10 >= TableSize * 4;
5703 /// Try to reuse the switch table index compare. Following pattern:
5704 /// \code
5705 /// if (idx < tablesize)
5706 /// r = table[idx]; // table does not contain default_value
5707 /// else
5708 /// r = default_value;
5709 /// if (r != default_value)
5710 /// ...
5711 /// \endcode
5712 /// Is optimized to:
5713 /// \code
5714 /// cond = idx < tablesize;
5715 /// if (cond)
5716 /// r = table[idx];
5717 /// else
5718 /// r = default_value;
5719 /// if (cond)
5720 /// ...
5721 /// \endcode
5722 /// Jump threading will then eliminate the second if(cond).
5723 static void reuseTableCompare(
5724 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
5725 Constant *DefaultValue,
5726 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
5727 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
5728 if (!CmpInst)
5729 return;
5731 // We require that the compare is in the same block as the phi so that jump
5732 // threading can do its work afterwards.
5733 if (CmpInst->getParent() != PhiBlock)
5734 return;
5736 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
5737 if (!CmpOp1)
5738 return;
5740 Value *RangeCmp = RangeCheckBranch->getCondition();
5741 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
5742 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
5744 // Check if the compare with the default value is constant true or false.
5745 Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5746 DefaultValue, CmpOp1, true);
5747 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
5748 return;
5750 // Check if the compare with the case values is distinct from the default
5751 // compare result.
5752 for (auto ValuePair : Values) {
5753 Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5754 ValuePair.second, CmpOp1, true);
5755 if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
5756 return;
5757 assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
5758 "Expect true or false as compare result.");
5761 // Check if the branch instruction dominates the phi node. It's a simple
5762 // dominance check, but sufficient for our needs.
5763 // Although this check is invariant in the calling loops, it's better to do it
5764 // at this late stage. Practically we do it at most once for a switch.
5765 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
5766 for (BasicBlock *Pred : predecessors(PhiBlock)) {
5767 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
5768 return;
5771 if (DefaultConst == FalseConst) {
5772 // The compare yields the same result. We can replace it.
5773 CmpInst->replaceAllUsesWith(RangeCmp);
5774 ++NumTableCmpReuses;
5775 } else {
5776 // The compare yields the same result, just inverted. We can replace it.
5777 Value *InvertedTableCmp = BinaryOperator::CreateXor(
5778 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
5779 RangeCheckBranch);
5780 CmpInst->replaceAllUsesWith(InvertedTableCmp);
5781 ++NumTableCmpReuses;
5785 /// If the switch is only used to initialize one or more phi nodes in a common
5786 /// successor block with different constant values, replace the switch with
5787 /// lookup tables.
5788 static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
5789 DomTreeUpdater *DTU, const DataLayout &DL,
5790 const TargetTransformInfo &TTI) {
5791 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5793 BasicBlock *BB = SI->getParent();
5794 Function *Fn = BB->getParent();
5795 // Only build lookup table when we have a target that supports it or the
5796 // attribute is not set.
5797 if (!TTI.shouldBuildLookupTables() ||
5798 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
5799 return false;
5801 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
5802 // split off a dense part and build a lookup table for that.
5804 // FIXME: This creates arrays of GEPs to constant strings, which means each
5805 // GEP needs a runtime relocation in PIC code. We should just build one big
5806 // string and lookup indices into that.
5808 // Ignore switches with less than three cases. Lookup tables will not make
5809 // them faster, so we don't analyze them.
5810 if (SI->getNumCases() < 3)
5811 return false;
5813 // Figure out the corresponding result for each case value and phi node in the
5814 // common destination, as well as the min and max case values.
5815 assert(!SI->cases().empty());
5816 SwitchInst::CaseIt CI = SI->case_begin();
5817 ConstantInt *MinCaseVal = CI->getCaseValue();
5818 ConstantInt *MaxCaseVal = CI->getCaseValue();
5820 BasicBlock *CommonDest = nullptr;
5822 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
5823 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
5825 SmallDenseMap<PHINode *, Constant *> DefaultResults;
5826 SmallDenseMap<PHINode *, Type *> ResultTypes;
5827 SmallVector<PHINode *, 4> PHIs;
5829 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
5830 ConstantInt *CaseVal = CI->getCaseValue();
5831 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
5832 MinCaseVal = CaseVal;
5833 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
5834 MaxCaseVal = CaseVal;
5836 // Resulting value at phi nodes for this case value.
5837 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
5838 ResultsTy Results;
5839 if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
5840 Results, DL, TTI))
5841 return false;
5843 // Append the result from this case to the list for each phi.
5844 for (const auto &I : Results) {
5845 PHINode *PHI = I.first;
5846 Constant *Value = I.second;
5847 if (!ResultLists.count(PHI))
5848 PHIs.push_back(PHI);
5849 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
5853 // Keep track of the result types.
5854 for (PHINode *PHI : PHIs) {
5855 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
5858 uint64_t NumResults = ResultLists[PHIs[0]].size();
5859 APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
5860 uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
5861 bool TableHasHoles = (NumResults < TableSize);
5863 // If the table has holes, we need a constant result for the default case
5864 // or a bitmask that fits in a register.
5865 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
5866 bool HasDefaultResults =
5867 GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
5868 DefaultResultsList, DL, TTI);
5870 bool NeedMask = (TableHasHoles && !HasDefaultResults);
5871 if (NeedMask) {
5872 // As an extra penalty for the validity test we require more cases.
5873 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
5874 return false;
5875 if (!DL.fitsInLegalInteger(TableSize))
5876 return false;
5879 for (const auto &I : DefaultResultsList) {
5880 PHINode *PHI = I.first;
5881 Constant *Result = I.second;
5882 DefaultResults[PHI] = Result;
5885 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
5886 return false;
5888 std::vector<DominatorTree::UpdateType> Updates;
5890 // Create the BB that does the lookups.
5891 Module &Mod = *CommonDest->getParent()->getParent();
5892 BasicBlock *LookupBB = BasicBlock::Create(
5893 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
5895 // Compute the table index value.
5896 Builder.SetInsertPoint(SI);
5897 Value *TableIndex;
5898 if (MinCaseVal->isNullValue())
5899 TableIndex = SI->getCondition();
5900 else
5901 TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
5902 "switch.tableidx");
5904 // Compute the maximum table size representable by the integer type we are
5905 // switching upon.
5906 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
5907 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
5908 assert(MaxTableSize >= TableSize &&
5909 "It is impossible for a switch to have more entries than the max "
5910 "representable value of its input integer type's size.");
5912 // If the default destination is unreachable, or if the lookup table covers
5913 // all values of the conditional variable, branch directly to the lookup table
5914 // BB. Otherwise, check that the condition is within the case range.
5915 const bool DefaultIsReachable =
5916 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5917 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
5918 BranchInst *RangeCheckBranch = nullptr;
5920 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5921 Builder.CreateBr(LookupBB);
5922 if (DTU)
5923 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
5924 // Note: We call removeProdecessor later since we need to be able to get the
5925 // PHI value for the default case in case we're using a bit mask.
5926 } else {
5927 Value *Cmp = Builder.CreateICmpULT(
5928 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
5929 RangeCheckBranch =
5930 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
5931 if (DTU)
5932 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
5935 // Populate the BB that does the lookups.
5936 Builder.SetInsertPoint(LookupBB);
5938 if (NeedMask) {
5939 // Before doing the lookup, we do the hole check. The LookupBB is therefore
5940 // re-purposed to do the hole check, and we create a new LookupBB.
5941 BasicBlock *MaskBB = LookupBB;
5942 MaskBB->setName("switch.hole_check");
5943 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
5944 CommonDest->getParent(), CommonDest);
5946 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
5947 // unnecessary illegal types.
5948 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
5949 APInt MaskInt(TableSizePowOf2, 0);
5950 APInt One(TableSizePowOf2, 1);
5951 // Build bitmask; fill in a 1 bit for every case.
5952 const ResultListTy &ResultList = ResultLists[PHIs[0]];
5953 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
5954 uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
5955 .getLimitedValue();
5956 MaskInt |= One << Idx;
5958 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
5960 // Get the TableIndex'th bit of the bitmask.
5961 // If this bit is 0 (meaning hole) jump to the default destination,
5962 // else continue with table lookup.
5963 IntegerType *MapTy = TableMask->getType();
5964 Value *MaskIndex =
5965 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
5966 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
5967 Value *LoBit = Builder.CreateTrunc(
5968 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
5969 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
5970 if (DTU) {
5971 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
5972 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
5974 Builder.SetInsertPoint(LookupBB);
5975 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
5978 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5979 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
5980 // do not delete PHINodes here.
5981 SI->getDefaultDest()->removePredecessor(BB,
5982 /*KeepOneInputPHIs=*/true);
5983 if (DTU)
5984 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
5987 for (PHINode *PHI : PHIs) {
5988 const ResultListTy &ResultList = ResultLists[PHI];
5990 // If using a bitmask, use any value to fill the lookup table holes.
5991 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
5992 StringRef FuncName = Fn->getName();
5993 SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
5994 FuncName);
5996 Value *Result = Table.BuildLookup(TableIndex, Builder);
5998 // Do a small peephole optimization: re-use the switch table compare if
5999 // possible.
6000 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6001 BasicBlock *PhiBlock = PHI->getParent();
6002 // Search for compare instructions which use the phi.
6003 for (auto *User : PHI->users()) {
6004 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6008 PHI->addIncoming(Result, LookupBB);
6011 Builder.CreateBr(CommonDest);
6012 if (DTU)
6013 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6015 // Remove the switch.
6016 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6017 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6018 BasicBlock *Succ = SI->getSuccessor(i);
6020 if (Succ == SI->getDefaultDest())
6021 continue;
6022 Succ->removePredecessor(BB);
6023 RemovedSuccessors.insert(Succ);
6025 SI->eraseFromParent();
6027 if (DTU) {
6028 for (BasicBlock *RemovedSuccessor : RemovedSuccessors)
6029 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
6030 DTU->applyUpdates(Updates);
6033 ++NumLookupTables;
6034 if (NeedMask)
6035 ++NumLookupTablesHoles;
6036 return true;
6039 static bool isSwitchDense(ArrayRef<int64_t> Values) {
6040 // See also SelectionDAGBuilder::isDense(), which this function was based on.
6041 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6042 uint64_t Range = Diff + 1;
6043 uint64_t NumCases = Values.size();
6044 // 40% is the default density for building a jump table in optsize/minsize mode.
6045 uint64_t MinDensity = 40;
6047 return NumCases * 100 >= Range * MinDensity;
6050 /// Try to transform a switch that has "holes" in it to a contiguous sequence
6051 /// of cases.
6053 /// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6054 /// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6056 /// This converts a sparse switch into a dense switch which allows better
6057 /// lowering and could also allow transforming into a lookup table.
6058 static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6059 const DataLayout &DL,
6060 const TargetTransformInfo &TTI) {
6061 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6062 if (CondTy->getIntegerBitWidth() > 64 ||
6063 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6064 return false;
6065 // Only bother with this optimization if there are more than 3 switch cases;
6066 // SDAG will only bother creating jump tables for 4 or more cases.
6067 if (SI->getNumCases() < 4)
6068 return false;
6070 // This transform is agnostic to the signedness of the input or case values. We
6071 // can treat the case values as signed or unsigned. We can optimize more common
6072 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6073 // as signed.
6074 SmallVector<int64_t,4> Values;
6075 for (auto &C : SI->cases())
6076 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6077 llvm::sort(Values);
6079 // If the switch is already dense, there's nothing useful to do here.
6080 if (isSwitchDense(Values))
6081 return false;
6083 // First, transform the values such that they start at zero and ascend.
6084 int64_t Base = Values[0];
6085 for (auto &V : Values)
6086 V -= (uint64_t)(Base);
6088 // Now we have signed numbers that have been shifted so that, given enough
6089 // precision, there are no negative values. Since the rest of the transform
6090 // is bitwise only, we switch now to an unsigned representation.
6092 // This transform can be done speculatively because it is so cheap - it
6093 // results in a single rotate operation being inserted.
6094 // FIXME: It's possible that optimizing a switch on powers of two might also
6095 // be beneficial - flag values are often powers of two and we could use a CLZ
6096 // as the key function.
6098 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6099 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6100 // less than 64.
6101 unsigned Shift = 64;
6102 for (auto &V : Values)
6103 Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
6104 assert(Shift < 64);
6105 if (Shift > 0)
6106 for (auto &V : Values)
6107 V = (int64_t)((uint64_t)V >> Shift);
6109 if (!isSwitchDense(Values))
6110 // Transform didn't create a dense switch.
6111 return false;
6113 // The obvious transform is to shift the switch condition right and emit a
6114 // check that the condition actually cleanly divided by GCD, i.e.
6115 // C & (1 << Shift - 1) == 0
6116 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6118 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6119 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6120 // are nonzero then the switch condition will be very large and will hit the
6121 // default case.
6123 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6124 Builder.SetInsertPoint(SI);
6125 auto *ShiftC = ConstantInt::get(Ty, Shift);
6126 auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6127 auto *LShr = Builder.CreateLShr(Sub, ShiftC);
6128 auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
6129 auto *Rot = Builder.CreateOr(LShr, Shl);
6130 SI->replaceUsesOfWith(SI->getCondition(), Rot);
6132 for (auto Case : SI->cases()) {
6133 auto *Orig = Case.getCaseValue();
6134 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
6135 Case.setValue(
6136 cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
6138 return true;
6141 bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
6142 BasicBlock *BB = SI->getParent();
6144 if (isValueEqualityComparison(SI)) {
6145 // If we only have one predecessor, and if it is a branch on this value,
6146 // see if that predecessor totally determines the outcome of this switch.
6147 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
6148 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
6149 return requestResimplify();
6151 Value *Cond = SI->getCondition();
6152 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
6153 if (SimplifySwitchOnSelect(SI, Select))
6154 return requestResimplify();
6156 // If the block only contains the switch, see if we can fold the block
6157 // away into any preds.
6158 if (SI == &*BB->instructionsWithoutDebug().begin())
6159 if (FoldValueComparisonIntoPredecessors(SI, Builder))
6160 return requestResimplify();
6163 // Try to transform the switch into an icmp and a branch.
6164 if (TurnSwitchRangeIntoICmp(SI, Builder))
6165 return requestResimplify();
6167 // Remove unreachable cases.
6168 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
6169 return requestResimplify();
6171 if (switchToSelect(SI, Builder, DTU, DL, TTI))
6172 return requestResimplify();
6174 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
6175 return requestResimplify();
6177 // The conversion from switch to lookup tables results in difficult-to-analyze
6178 // code and makes pruning branches much harder. This is a problem if the
6179 // switch expression itself can still be restricted as a result of inlining or
6180 // CVP. Therefore, only apply this transformation during late stages of the
6181 // optimisation pipeline.
6182 if (Options.ConvertSwitchToLookupTable &&
6183 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
6184 return requestResimplify();
6186 if (ReduceSwitchRange(SI, Builder, DL, TTI))
6187 return requestResimplify();
6189 return false;
6192 bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
6193 BasicBlock *BB = IBI->getParent();
6194 bool Changed = false;
6196 // Eliminate redundant destinations.
6197 SmallPtrSet<Value *, 8> Succs;
6198 SmallPtrSet<BasicBlock *, 8> RemovedSuccs;
6199 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
6200 BasicBlock *Dest = IBI->getDestination(i);
6201 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
6202 if (!Dest->hasAddressTaken())
6203 RemovedSuccs.insert(Dest);
6204 Dest->removePredecessor(BB);
6205 IBI->removeDestination(i);
6206 --i;
6207 --e;
6208 Changed = true;
6212 if (DTU) {
6213 std::vector<DominatorTree::UpdateType> Updates;
6214 Updates.reserve(RemovedSuccs.size());
6215 for (auto *RemovedSucc : RemovedSuccs)
6216 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
6217 DTU->applyUpdates(Updates);
6220 if (IBI->getNumDestinations() == 0) {
6221 // If the indirectbr has no successors, change it to unreachable.
6222 new UnreachableInst(IBI->getContext(), IBI);
6223 EraseTerminatorAndDCECond(IBI);
6224 return true;
6227 if (IBI->getNumDestinations() == 1) {
6228 // If the indirectbr has one successor, change it to a direct branch.
6229 BranchInst::Create(IBI->getDestination(0), IBI);
6230 EraseTerminatorAndDCECond(IBI);
6231 return true;
6234 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
6235 if (SimplifyIndirectBrOnSelect(IBI, SI))
6236 return requestResimplify();
6238 return Changed;
6241 /// Given an block with only a single landing pad and a unconditional branch
6242 /// try to find another basic block which this one can be merged with. This
6243 /// handles cases where we have multiple invokes with unique landing pads, but
6244 /// a shared handler.
6246 /// We specifically choose to not worry about merging non-empty blocks
6247 /// here. That is a PRE/scheduling problem and is best solved elsewhere. In
6248 /// practice, the optimizer produces empty landing pad blocks quite frequently
6249 /// when dealing with exception dense code. (see: instcombine, gvn, if-else
6250 /// sinking in this file)
6252 /// This is primarily a code size optimization. We need to avoid performing
6253 /// any transform which might inhibit optimization (such as our ability to
6254 /// specialize a particular handler via tail commoning). We do this by not
6255 /// merging any blocks which require us to introduce a phi. Since the same
6256 /// values are flowing through both blocks, we don't lose any ability to
6257 /// specialize. If anything, we make such specialization more likely.
6259 /// TODO - This transformation could remove entries from a phi in the target
6260 /// block when the inputs in the phi are the same for the two blocks being
6261 /// merged. In some cases, this could result in removal of the PHI entirely.
6262 static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
6263 BasicBlock *BB, DomTreeUpdater *DTU) {
6264 auto Succ = BB->getUniqueSuccessor();
6265 assert(Succ);
6266 // If there's a phi in the successor block, we'd likely have to introduce
6267 // a phi into the merged landing pad block.
6268 if (isa<PHINode>(*Succ->begin()))
6269 return false;
6271 for (BasicBlock *OtherPred : predecessors(Succ)) {
6272 if (BB == OtherPred)
6273 continue;
6274 BasicBlock::iterator I = OtherPred->begin();
6275 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
6276 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
6277 continue;
6278 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
6280 BranchInst *BI2 = dyn_cast<BranchInst>(I);
6281 if (!BI2 || !BI2->isIdenticalTo(BI))
6282 continue;
6284 std::vector<DominatorTree::UpdateType> Updates;
6286 // We've found an identical block. Update our predecessors to take that
6287 // path instead and make ourselves dead.
6288 SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
6289 for (BasicBlock *Pred : Preds) {
6290 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
6291 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
6292 "unexpected successor");
6293 II->setUnwindDest(OtherPred);
6294 if (DTU) {
6295 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
6296 Updates.push_back({DominatorTree::Delete, Pred, BB});
6300 // The debug info in OtherPred doesn't cover the merged control flow that
6301 // used to go through BB. We need to delete it or update it.
6302 for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
6303 Instruction &Inst = *I;
6304 I++;
6305 if (isa<DbgInfoIntrinsic>(Inst))
6306 Inst.eraseFromParent();
6309 SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
6310 for (BasicBlock *Succ : Succs) {
6311 Succ->removePredecessor(BB);
6312 if (DTU)
6313 Updates.push_back({DominatorTree::Delete, BB, Succ});
6316 IRBuilder<> Builder(BI);
6317 Builder.CreateUnreachable();
6318 BI->eraseFromParent();
6319 if (DTU)
6320 DTU->applyUpdates(Updates);
6321 return true;
6323 return false;
6326 bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
6327 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
6328 : simplifyCondBranch(Branch, Builder);
6331 bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
6332 IRBuilder<> &Builder) {
6333 BasicBlock *BB = BI->getParent();
6334 BasicBlock *Succ = BI->getSuccessor(0);
6336 // If the Terminator is the only non-phi instruction, simplify the block.
6337 // If LoopHeader is provided, check if the block or its successor is a loop
6338 // header. (This is for early invocations before loop simplify and
6339 // vectorization to keep canonical loop forms for nested loops. These blocks
6340 // can be eliminated when the pass is invoked later in the back-end.)
6341 // Note that if BB has only one predecessor then we do not introduce new
6342 // backedge, so we can eliminate BB.
6343 bool NeedCanonicalLoop =
6344 Options.NeedCanonicalLoop &&
6345 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
6346 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
6347 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
6348 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
6349 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
6350 return true;
6352 // If the only instruction in the block is a seteq/setne comparison against a
6353 // constant, try to simplify the block.
6354 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
6355 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
6356 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
6358 if (I->isTerminator() &&
6359 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
6360 return true;
6363 // See if we can merge an empty landing pad block with another which is
6364 // equivalent.
6365 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
6366 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
6368 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
6369 return true;
6372 // If this basic block is ONLY a compare and a branch, and if a predecessor
6373 // branches to us and our successor, fold the comparison into the
6374 // predecessor and use logical operations to update the incoming value
6375 // for PHI nodes in common successor.
6376 if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
6377 Options.BonusInstThreshold))
6378 return requestResimplify();
6379 return false;
6382 static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
6383 BasicBlock *PredPred = nullptr;
6384 for (auto *P : predecessors(BB)) {
6385 BasicBlock *PPred = P->getSinglePredecessor();
6386 if (!PPred || (PredPred && PredPred != PPred))
6387 return nullptr;
6388 PredPred = PPred;
6390 return PredPred;
6393 bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
6394 assert(
6395 !isa<ConstantInt>(BI->getCondition()) &&
6396 BI->getSuccessor(0) != BI->getSuccessor(1) &&
6397 "Tautological conditional branch should have been eliminated already.");
6399 BasicBlock *BB = BI->getParent();
6400 if (!Options.SimplifyCondBranch)
6401 return false;
6403 // Conditional branch
6404 if (isValueEqualityComparison(BI)) {
6405 // If we only have one predecessor, and if it is a branch on this value,
6406 // see if that predecessor totally determines the outcome of this
6407 // switch.
6408 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
6409 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
6410 return requestResimplify();
6412 // This block must be empty, except for the setcond inst, if it exists.
6413 // Ignore dbg and pseudo intrinsics.
6414 auto I = BB->instructionsWithoutDebug(true).begin();
6415 if (&*I == BI) {
6416 if (FoldValueComparisonIntoPredecessors(BI, Builder))
6417 return requestResimplify();
6418 } else if (&*I == cast<Instruction>(BI->getCondition())) {
6419 ++I;
6420 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
6421 return requestResimplify();
6425 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
6426 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
6427 return true;
6429 // If this basic block has dominating predecessor blocks and the dominating
6430 // blocks' conditions imply BI's condition, we know the direction of BI.
6431 Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
6432 if (Imp) {
6433 // Turn this into a branch on constant.
6434 auto *OldCond = BI->getCondition();
6435 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
6436 : ConstantInt::getFalse(BB->getContext());
6437 BI->setCondition(TorF);
6438 RecursivelyDeleteTriviallyDeadInstructions(OldCond);
6439 return requestResimplify();
6442 // If this basic block is ONLY a compare and a branch, and if a predecessor
6443 // branches to us and one of our successors, fold the comparison into the
6444 // predecessor and use logical operations to pick the right destination.
6445 if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
6446 Options.BonusInstThreshold))
6447 return requestResimplify();
6449 // We have a conditional branch to two blocks that are only reachable
6450 // from BI. We know that the condbr dominates the two blocks, so see if
6451 // there is any identical code in the "then" and "else" blocks. If so, we
6452 // can hoist it up to the branching block.
6453 if (BI->getSuccessor(0)->getSinglePredecessor()) {
6454 if (BI->getSuccessor(1)->getSinglePredecessor()) {
6455 if (HoistCommon &&
6456 HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
6457 return requestResimplify();
6458 } else {
6459 // If Successor #1 has multiple preds, we may be able to conditionally
6460 // execute Successor #0 if it branches to Successor #1.
6461 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
6462 if (Succ0TI->getNumSuccessors() == 1 &&
6463 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
6464 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
6465 return requestResimplify();
6467 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
6468 // If Successor #0 has multiple preds, we may be able to conditionally
6469 // execute Successor #1 if it branches to Successor #0.
6470 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
6471 if (Succ1TI->getNumSuccessors() == 1 &&
6472 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
6473 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
6474 return requestResimplify();
6477 // If this is a branch on a phi node in the current block, thread control
6478 // through this block if any PHI node entries are constants.
6479 if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
6480 if (PN->getParent() == BI->getParent())
6481 if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC))
6482 return requestResimplify();
6484 // Scan predecessor blocks for conditional branches.
6485 for (BasicBlock *Pred : predecessors(BB))
6486 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
6487 if (PBI != BI && PBI->isConditional())
6488 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
6489 return requestResimplify();
6491 // Look for diamond patterns.
6492 if (MergeCondStores)
6493 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
6494 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
6495 if (PBI != BI && PBI->isConditional())
6496 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
6497 return requestResimplify();
6499 return false;
6502 /// Check if passing a value to an instruction will cause undefined behavior.
6503 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
6504 Constant *C = dyn_cast<Constant>(V);
6505 if (!C)
6506 return false;
6508 if (I->use_empty())
6509 return false;
6511 if (C->isNullValue() || isa<UndefValue>(C)) {
6512 // Only look at the first use, avoid hurting compile time with long uselists
6513 User *Use = *I->user_begin();
6515 // Now make sure that there are no instructions in between that can alter
6516 // control flow (eg. calls)
6517 for (BasicBlock::iterator
6518 i = ++BasicBlock::iterator(I),
6519 UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
6520 i != UI; ++i) {
6521 if (i == I->getParent()->end())
6522 return false;
6523 if (!isGuaranteedToTransferExecutionToSuccessor(&*i))
6524 return false;
6527 // Look through GEPs. A load from a GEP derived from NULL is still undefined
6528 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
6529 if (GEP->getPointerOperand() == I) {
6530 if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
6531 PtrValueMayBeModified = true;
6532 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
6535 // Look through bitcasts.
6536 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
6537 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
6539 // Load from null is undefined.
6540 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
6541 if (!LI->isVolatile())
6542 return !NullPointerIsDefined(LI->getFunction(),
6543 LI->getPointerAddressSpace());
6545 // Store to null is undefined.
6546 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
6547 if (!SI->isVolatile())
6548 return (!NullPointerIsDefined(SI->getFunction(),
6549 SI->getPointerAddressSpace())) &&
6550 SI->getPointerOperand() == I;
6552 if (auto *CB = dyn_cast<CallBase>(Use)) {
6553 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
6554 return false;
6555 // A call to null is undefined.
6556 if (CB->getCalledOperand() == I)
6557 return true;
6559 if (C->isNullValue()) {
6560 for (const llvm::Use &Arg : CB->args())
6561 if (Arg == I) {
6562 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
6563 if (CB->isPassingUndefUB(ArgIdx) &&
6564 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
6565 // Passing null to a nonnnull+noundef argument is undefined.
6566 return !PtrValueMayBeModified;
6569 } else if (isa<UndefValue>(C)) {
6570 // Passing undef to a noundef argument is undefined.
6571 for (const llvm::Use &Arg : CB->args())
6572 if (Arg == I) {
6573 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
6574 if (CB->isPassingUndefUB(ArgIdx)) {
6575 // Passing undef to a noundef argument is undefined.
6576 return true;
6582 return false;
6585 /// If BB has an incoming value that will always trigger undefined behavior
6586 /// (eg. null pointer dereference), remove the branch leading here.
6587 static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
6588 DomTreeUpdater *DTU) {
6589 for (PHINode &PHI : BB->phis())
6590 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
6591 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
6592 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
6593 Instruction *T = Predecessor->getTerminator();
6594 IRBuilder<> Builder(T);
6595 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
6596 BB->removePredecessor(Predecessor);
6597 // Turn uncoditional branches into unreachables and remove the dead
6598 // destination from conditional branches.
6599 if (BI->isUnconditional())
6600 Builder.CreateUnreachable();
6601 else
6602 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
6603 : BI->getSuccessor(0));
6604 BI->eraseFromParent();
6605 if (DTU)
6606 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
6607 return true;
6609 // TODO: SwitchInst.
6612 return false;
6615 bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
6616 bool Changed = false;
6618 assert(BB && BB->getParent() && "Block not embedded in function!");
6619 assert(BB->getTerminator() && "Degenerate basic block encountered!");
6621 // Remove basic blocks that have no predecessors (except the entry block)...
6622 // or that just have themself as a predecessor. These are unreachable.
6623 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
6624 BB->getSinglePredecessor() == BB) {
6625 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
6626 DeleteDeadBlock(BB, DTU);
6627 return true;
6630 // Check to see if we can constant propagate this terminator instruction
6631 // away...
6632 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
6633 /*TLI=*/nullptr, DTU);
6635 // Check for and eliminate duplicate PHI nodes in this block.
6636 Changed |= EliminateDuplicatePHINodes(BB);
6638 // Check for and remove branches that will always cause undefined behavior.
6639 if (removeUndefIntroducingPredecessor(BB, DTU))
6640 return requestResimplify();
6642 // Merge basic blocks into their predecessor if there is only one distinct
6643 // pred, and if there is only one distinct successor of the predecessor, and
6644 // if there are no PHI nodes.
6645 if (MergeBlockIntoPredecessor(BB, DTU))
6646 return true;
6648 if (SinkCommon && Options.SinkCommonInsts)
6649 if (SinkCommonCodeFromPredecessors(BB, DTU)) {
6650 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
6651 // so we may now how duplicate PHI's.
6652 // Let's rerun EliminateDuplicatePHINodes() first,
6653 // before FoldTwoEntryPHINode() potentially converts them into select's,
6654 // after which we'd need a whole EarlyCSE pass run to cleanup them.
6655 return true;
6658 IRBuilder<> Builder(BB);
6660 if (Options.FoldTwoEntryPHINode) {
6661 // If there is a trivial two-entry PHI node in this basic block, and we can
6662 // eliminate it, do so now.
6663 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
6664 if (PN->getNumIncomingValues() == 2)
6665 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
6666 return true;
6669 Instruction *Terminator = BB->getTerminator();
6670 Builder.SetInsertPoint(Terminator);
6671 switch (Terminator->getOpcode()) {
6672 case Instruction::Br:
6673 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
6674 break;
6675 case Instruction::Resume:
6676 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
6677 break;
6678 case Instruction::CleanupRet:
6679 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
6680 break;
6681 case Instruction::Switch:
6682 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
6683 break;
6684 case Instruction::Unreachable:
6685 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
6686 break;
6687 case Instruction::IndirectBr:
6688 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
6689 break;
6692 return Changed;
6695 bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
6696 bool Changed = simplifyOnceImpl(BB);
6698 return Changed;
6701 bool SimplifyCFGOpt::run(BasicBlock *BB) {
6702 bool Changed = false;
6704 // Repeated simplify BB as long as resimplification is requested.
6705 do {
6706 Resimplify = false;
6708 // Perform one round of simplifcation. Resimplify flag will be set if
6709 // another iteration is requested.
6710 Changed |= simplifyOnce(BB);
6711 } while (Resimplify);
6713 return Changed;
6716 bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
6717 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
6718 ArrayRef<WeakVH> LoopHeaders) {
6719 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
6720 Options)
6721 .run(BB);