[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / Transforms / Utils / ScalarEvolutionExpander.cpp
bloba3951fdf8a158953f213ced9366767eca37b88e2
1 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the scalar evolution expander,
10 // which is used to generate the code corresponding to a given scalar evolution
11 // expression.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/LoopUtils.h"
31 #ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32 #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33 #else
34 #define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35 #endif
37 using namespace llvm;
39 cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
44 using namespace PatternMatch;
46 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
47 /// reusing an existing cast if a suitable one (= dominating IP) exists, or
48 /// creating a new one.
49 Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
50 Instruction::CastOps Op,
51 BasicBlock::iterator IP) {
52 // This function must be called with the builder having a valid insertion
53 // point. It doesn't need to be the actual IP where the uses of the returned
54 // cast will be added, but it must dominate such IP.
55 // We use this precondition to produce a cast that will dominate all its
56 // uses. In particular, this is crucial for the case where the builder's
57 // insertion point *is* the point where we were asked to put the cast.
58 // Since we don't know the builder's insertion point is actually
59 // where the uses will be added (only that it dominates it), we are
60 // not allowed to move it.
61 BasicBlock::iterator BIP = Builder.GetInsertPoint();
63 Value *Ret = nullptr;
65 // Check to see if there is already a cast!
66 for (User *U : V->users()) {
67 if (U->getType() != Ty)
68 continue;
69 CastInst *CI = dyn_cast<CastInst>(U);
70 if (!CI || CI->getOpcode() != Op)
71 continue;
73 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
74 // the cast must also properly dominate the Builder's insertion point.
75 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
76 (&*IP == CI || CI->comesBefore(&*IP))) {
77 Ret = CI;
78 break;
82 // Create a new cast.
83 if (!Ret) {
84 SCEVInsertPointGuard Guard(Builder, this);
85 Builder.SetInsertPoint(&*IP);
86 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
89 // We assert at the end of the function since IP might point to an
90 // instruction with different dominance properties than a cast
91 // (an invoke for example) and not dominate BIP (but the cast does).
92 assert(!isa<Instruction>(Ret) ||
93 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
95 return Ret;
98 BasicBlock::iterator
99 SCEVExpander::findInsertPointAfter(Instruction *I,
100 Instruction *MustDominate) const {
101 BasicBlock::iterator IP = ++I->getIterator();
102 if (auto *II = dyn_cast<InvokeInst>(I))
103 IP = II->getNormalDest()->begin();
105 while (isa<PHINode>(IP))
106 ++IP;
108 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
109 ++IP;
110 } else if (isa<CatchSwitchInst>(IP)) {
111 IP = MustDominate->getParent()->getFirstInsertionPt();
112 } else {
113 assert(!IP->isEHPad() && "unexpected eh pad!");
116 // Adjust insert point to be after instructions inserted by the expander, so
117 // we can re-use already inserted instructions. Avoid skipping past the
118 // original \p MustDominate, in case it is an inserted instruction.
119 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
120 ++IP;
122 return IP;
125 BasicBlock::iterator
126 SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
127 // Cast the argument at the beginning of the entry block, after
128 // any bitcasts of other arguments.
129 if (Argument *A = dyn_cast<Argument>(V)) {
130 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
131 while ((isa<BitCastInst>(IP) &&
132 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
133 cast<BitCastInst>(IP)->getOperand(0) != A) ||
134 isa<DbgInfoIntrinsic>(IP))
135 ++IP;
136 return IP;
139 // Cast the instruction immediately after the instruction.
140 if (Instruction *I = dyn_cast<Instruction>(V))
141 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
143 // Otherwise, this must be some kind of a constant,
144 // so let's plop this cast into the function's entry block.
145 assert(isa<Constant>(V) &&
146 "Expected the cast argument to be a global/constant");
147 return Builder.GetInsertBlock()
148 ->getParent()
149 ->getEntryBlock()
150 .getFirstInsertionPt();
153 /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
154 /// which must be possible with a noop cast, doing what we can to share
155 /// the casts.
156 Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
157 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
158 assert((Op == Instruction::BitCast ||
159 Op == Instruction::PtrToInt ||
160 Op == Instruction::IntToPtr) &&
161 "InsertNoopCastOfTo cannot perform non-noop casts!");
162 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
163 "InsertNoopCastOfTo cannot change sizes!");
165 // inttoptr only works for integral pointers. For non-integral pointers, we
166 // can create a GEP on null with the integral value as index. Note that
167 // it is safe to use GEP of null instead of inttoptr here, because only
168 // expressions already based on a GEP of null should be converted to pointers
169 // during expansion.
170 if (Op == Instruction::IntToPtr) {
171 auto *PtrTy = cast<PointerType>(Ty);
172 if (DL.isNonIntegralPointerType(PtrTy))
173 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
175 // Short-circuit unnecessary bitcasts.
176 if (Op == Instruction::BitCast) {
177 if (V->getType() == Ty)
178 return V;
179 if (CastInst *CI = dyn_cast<CastInst>(V)) {
180 if (CI->getOperand(0)->getType() == Ty)
181 return CI->getOperand(0);
184 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
185 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
186 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
187 if (CastInst *CI = dyn_cast<CastInst>(V))
188 if ((CI->getOpcode() == Instruction::PtrToInt ||
189 CI->getOpcode() == Instruction::IntToPtr) &&
190 SE.getTypeSizeInBits(CI->getType()) ==
191 SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
192 return CI->getOperand(0);
193 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
194 if ((CE->getOpcode() == Instruction::PtrToInt ||
195 CE->getOpcode() == Instruction::IntToPtr) &&
196 SE.getTypeSizeInBits(CE->getType()) ==
197 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
198 return CE->getOperand(0);
201 // Fold a cast of a constant.
202 if (Constant *C = dyn_cast<Constant>(V))
203 return ConstantExpr::getCast(Op, C, Ty);
205 // Try to reuse existing cast, or insert one.
206 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
209 /// InsertBinop - Insert the specified binary operator, doing a small amount
210 /// of work to avoid inserting an obviously redundant operation, and hoisting
211 /// to an outer loop when the opportunity is there and it is safe.
212 Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
213 Value *LHS, Value *RHS,
214 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
215 // Fold a binop with constant operands.
216 if (Constant *CLHS = dyn_cast<Constant>(LHS))
217 if (Constant *CRHS = dyn_cast<Constant>(RHS))
218 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
219 return Res;
221 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
222 unsigned ScanLimit = 6;
223 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
224 // Scanning starts from the last instruction before the insertion point.
225 BasicBlock::iterator IP = Builder.GetInsertPoint();
226 if (IP != BlockBegin) {
227 --IP;
228 for (; ScanLimit; --IP, --ScanLimit) {
229 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
230 // generated code.
231 if (isa<DbgInfoIntrinsic>(IP))
232 ScanLimit++;
234 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
235 // Ensure that no-wrap flags match.
236 if (isa<OverflowingBinaryOperator>(I)) {
237 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
238 return true;
239 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
240 return true;
242 // Conservatively, do not use any instruction which has any of exact
243 // flags installed.
244 if (isa<PossiblyExactOperator>(I) && I->isExact())
245 return true;
246 return false;
248 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
249 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
250 return &*IP;
251 if (IP == BlockBegin) break;
255 // Save the original insertion point so we can restore it when we're done.
256 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
257 SCEVInsertPointGuard Guard(Builder, this);
259 if (IsSafeToHoist) {
260 // Move the insertion point out of as many loops as we can.
261 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
262 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
263 BasicBlock *Preheader = L->getLoopPreheader();
264 if (!Preheader) break;
266 // Ok, move up a level.
267 Builder.SetInsertPoint(Preheader->getTerminator());
271 // If we haven't found this binop, insert it.
272 // TODO: Use the Builder, which will make CreateBinOp below fold with
273 // InstSimplifyFolder.
274 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
275 BO->setDebugLoc(Loc);
276 if (Flags & SCEV::FlagNUW)
277 BO->setHasNoUnsignedWrap();
278 if (Flags & SCEV::FlagNSW)
279 BO->setHasNoSignedWrap();
281 return BO;
284 /// expandAddToGEP - Expand an addition expression with a pointer type into
285 /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
286 /// BasicAliasAnalysis and other passes analyze the result. See the rules
287 /// for getelementptr vs. inttoptr in
288 /// http://llvm.org/docs/LangRef.html#pointeraliasing
289 /// for details.
291 /// Design note: The correctness of using getelementptr here depends on
292 /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
293 /// they may introduce pointer arithmetic which may not be safely converted
294 /// into getelementptr.
296 /// Design note: It might seem desirable for this function to be more
297 /// loop-aware. If some of the indices are loop-invariant while others
298 /// aren't, it might seem desirable to emit multiple GEPs, keeping the
299 /// loop-invariant portions of the overall computation outside the loop.
300 /// However, there are a few reasons this is not done here. Hoisting simple
301 /// arithmetic is a low-level optimization that often isn't very
302 /// important until late in the optimization process. In fact, passes
303 /// like InstructionCombining will combine GEPs, even if it means
304 /// pushing loop-invariant computation down into loops, so even if the
305 /// GEPs were split here, the work would quickly be undone. The
306 /// LoopStrengthReduction pass, which is usually run quite late (and
307 /// after the last InstructionCombining pass), takes care of hoisting
308 /// loop-invariant portions of expressions, after considering what
309 /// can be folded using target addressing modes.
311 Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
312 assert(!isa<Instruction>(V) ||
313 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
315 Value *Idx = expand(Offset);
317 // Fold a GEP with constant operands.
318 if (Constant *CLHS = dyn_cast<Constant>(V))
319 if (Constant *CRHS = dyn_cast<Constant>(Idx))
320 return Builder.CreatePtrAdd(CLHS, CRHS);
322 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
323 unsigned ScanLimit = 6;
324 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
325 // Scanning starts from the last instruction before the insertion point.
326 BasicBlock::iterator IP = Builder.GetInsertPoint();
327 if (IP != BlockBegin) {
328 --IP;
329 for (; ScanLimit; --IP, --ScanLimit) {
330 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
331 // generated code.
332 if (isa<DbgInfoIntrinsic>(IP))
333 ScanLimit++;
334 if (IP->getOpcode() == Instruction::GetElementPtr &&
335 IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
336 cast<GEPOperator>(&*IP)->getSourceElementType() ==
337 Builder.getInt8Ty())
338 return &*IP;
339 if (IP == BlockBegin) break;
343 // Save the original insertion point so we can restore it when we're done.
344 SCEVInsertPointGuard Guard(Builder, this);
346 // Move the insertion point out of as many loops as we can.
347 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
348 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
349 BasicBlock *Preheader = L->getLoopPreheader();
350 if (!Preheader) break;
352 // Ok, move up a level.
353 Builder.SetInsertPoint(Preheader->getTerminator());
356 // Emit a GEP.
357 return Builder.CreatePtrAdd(V, Idx, "scevgep");
360 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
361 /// SCEV expansion. If they are nested, this is the most nested. If they are
362 /// neighboring, pick the later.
363 static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
364 DominatorTree &DT) {
365 if (!A) return B;
366 if (!B) return A;
367 if (A->contains(B)) return B;
368 if (B->contains(A)) return A;
369 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
370 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
371 return A; // Arbitrarily break the tie.
374 /// getRelevantLoop - Get the most relevant loop associated with the given
375 /// expression, according to PickMostRelevantLoop.
376 const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
377 // Test whether we've already computed the most relevant loop for this SCEV.
378 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
379 if (!Pair.second)
380 return Pair.first->second;
382 switch (S->getSCEVType()) {
383 case scConstant:
384 case scVScale:
385 return nullptr; // A constant has no relevant loops.
386 case scTruncate:
387 case scZeroExtend:
388 case scSignExtend:
389 case scPtrToInt:
390 case scAddExpr:
391 case scMulExpr:
392 case scUDivExpr:
393 case scAddRecExpr:
394 case scUMaxExpr:
395 case scSMaxExpr:
396 case scUMinExpr:
397 case scSMinExpr:
398 case scSequentialUMinExpr: {
399 const Loop *L = nullptr;
400 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
401 L = AR->getLoop();
402 for (const SCEV *Op : S->operands())
403 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
404 return RelevantLoops[S] = L;
406 case scUnknown: {
407 const SCEVUnknown *U = cast<SCEVUnknown>(S);
408 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
409 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
410 // A non-instruction has no relevant loops.
411 return nullptr;
413 case scCouldNotCompute:
414 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
416 llvm_unreachable("Unexpected SCEV type!");
419 namespace {
421 /// LoopCompare - Compare loops by PickMostRelevantLoop.
422 class LoopCompare {
423 DominatorTree &DT;
424 public:
425 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
427 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
428 std::pair<const Loop *, const SCEV *> RHS) const {
429 // Keep pointer operands sorted at the end.
430 if (LHS.second->getType()->isPointerTy() !=
431 RHS.second->getType()->isPointerTy())
432 return LHS.second->getType()->isPointerTy();
434 // Compare loops with PickMostRelevantLoop.
435 if (LHS.first != RHS.first)
436 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
438 // If one operand is a non-constant negative and the other is not,
439 // put the non-constant negative on the right so that a sub can
440 // be used instead of a negate and add.
441 if (LHS.second->isNonConstantNegative()) {
442 if (!RHS.second->isNonConstantNegative())
443 return false;
444 } else if (RHS.second->isNonConstantNegative())
445 return true;
447 // Otherwise they are equivalent according to this comparison.
448 return false;
454 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
455 // Collect all the add operands in a loop, along with their associated loops.
456 // Iterate in reverse so that constants are emitted last, all else equal, and
457 // so that pointer operands are inserted first, which the code below relies on
458 // to form more involved GEPs.
459 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
460 for (const SCEV *Op : reverse(S->operands()))
461 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
463 // Sort by loop. Use a stable sort so that constants follow non-constants and
464 // pointer operands precede non-pointer operands.
465 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
467 // Emit instructions to add all the operands. Hoist as much as possible
468 // out of loops, and form meaningful getelementptrs where possible.
469 Value *Sum = nullptr;
470 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
471 const Loop *CurLoop = I->first;
472 const SCEV *Op = I->second;
473 if (!Sum) {
474 // This is the first operand. Just expand it.
475 Sum = expand(Op);
476 ++I;
477 continue;
480 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
481 if (isa<PointerType>(Sum->getType())) {
482 // The running sum expression is a pointer. Try to form a getelementptr
483 // at this level with that as the base.
484 SmallVector<const SCEV *, 4> NewOps;
485 for (; I != E && I->first == CurLoop; ++I) {
486 // If the operand is SCEVUnknown and not instructions, peek through
487 // it, to enable more of it to be folded into the GEP.
488 const SCEV *X = I->second;
489 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
490 if (!isa<Instruction>(U->getValue()))
491 X = SE.getSCEV(U->getValue());
492 NewOps.push_back(X);
494 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
495 } else if (Op->isNonConstantNegative()) {
496 // Instead of doing a negate and add, just do a subtract.
497 Value *W = expand(SE.getNegativeSCEV(Op));
498 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
499 /*IsSafeToHoist*/ true);
500 ++I;
501 } else {
502 // A simple add.
503 Value *W = expand(Op);
504 // Canonicalize a constant to the RHS.
505 if (isa<Constant>(Sum))
506 std::swap(Sum, W);
507 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
508 /*IsSafeToHoist*/ true);
509 ++I;
513 return Sum;
516 Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
517 Type *Ty = S->getType();
519 // Collect all the mul operands in a loop, along with their associated loops.
520 // Iterate in reverse so that constants are emitted last, all else equal.
521 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
522 for (const SCEV *Op : reverse(S->operands()))
523 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
525 // Sort by loop. Use a stable sort so that constants follow non-constants.
526 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
528 // Emit instructions to mul all the operands. Hoist as much as possible
529 // out of loops.
530 Value *Prod = nullptr;
531 auto I = OpsAndLoops.begin();
533 // Expand the calculation of X pow N in the following manner:
534 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
535 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
536 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
537 auto E = I;
538 // Calculate how many times the same operand from the same loop is included
539 // into this power.
540 uint64_t Exponent = 0;
541 const uint64_t MaxExponent = UINT64_MAX >> 1;
542 // No one sane will ever try to calculate such huge exponents, but if we
543 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
544 // below when the power of 2 exceeds our Exponent, and we want it to be
545 // 1u << 31 at most to not deal with unsigned overflow.
546 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
547 ++Exponent;
548 ++E;
550 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
552 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
553 // that are needed into the result.
554 Value *P = expand(I->second);
555 Value *Result = nullptr;
556 if (Exponent & 1)
557 Result = P;
558 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
559 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
560 /*IsSafeToHoist*/ true);
561 if (Exponent & BinExp)
562 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
563 SCEV::FlagAnyWrap,
564 /*IsSafeToHoist*/ true)
565 : P;
568 I = E;
569 assert(Result && "Nothing was expanded?");
570 return Result;
573 while (I != OpsAndLoops.end()) {
574 if (!Prod) {
575 // This is the first operand. Just expand it.
576 Prod = ExpandOpBinPowN();
577 } else if (I->second->isAllOnesValue()) {
578 // Instead of doing a multiply by negative one, just do a negate.
579 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
580 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
581 ++I;
582 } else {
583 // A simple mul.
584 Value *W = ExpandOpBinPowN();
585 // Canonicalize a constant to the RHS.
586 if (isa<Constant>(Prod)) std::swap(Prod, W);
587 const APInt *RHS;
588 if (match(W, m_Power2(RHS))) {
589 // Canonicalize Prod*(1<<C) to Prod<<C.
590 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
591 auto NWFlags = S->getNoWrapFlags();
592 // clear nsw flag if shl will produce poison value.
593 if (RHS->logBase2() == RHS->getBitWidth() - 1)
594 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
595 Prod = InsertBinop(Instruction::Shl, Prod,
596 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
597 /*IsSafeToHoist*/ true);
598 } else {
599 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
600 /*IsSafeToHoist*/ true);
605 return Prod;
608 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
609 Value *LHS = expand(S->getLHS());
610 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
611 const APInt &RHS = SC->getAPInt();
612 if (RHS.isPowerOf2())
613 return InsertBinop(Instruction::LShr, LHS,
614 ConstantInt::get(SC->getType(), RHS.logBase2()),
615 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
618 Value *RHS = expand(S->getRHS());
619 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
620 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
623 /// Determine if this is a well-behaved chain of instructions leading back to
624 /// the PHI. If so, it may be reused by expanded expressions.
625 bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
626 const Loop *L) {
627 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
628 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
629 return false;
630 // If any of the operands don't dominate the insert position, bail.
631 // Addrec operands are always loop-invariant, so this can only happen
632 // if there are instructions which haven't been hoisted.
633 if (L == IVIncInsertLoop) {
634 for (Use &Op : llvm::drop_begin(IncV->operands()))
635 if (Instruction *OInst = dyn_cast<Instruction>(Op))
636 if (!SE.DT.dominates(OInst, IVIncInsertPos))
637 return false;
639 // Advance to the next instruction.
640 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
641 if (!IncV)
642 return false;
644 if (IncV->mayHaveSideEffects())
645 return false;
647 if (IncV == PN)
648 return true;
650 return isNormalAddRecExprPHI(PN, IncV, L);
653 /// getIVIncOperand returns an induction variable increment's induction
654 /// variable operand.
656 /// If allowScale is set, any type of GEP is allowed as long as the nonIV
657 /// operands dominate InsertPos.
659 /// If allowScale is not set, ensure that a GEP increment conforms to one of the
660 /// simple patterns generated by getAddRecExprPHILiterally and
661 /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
662 Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
663 Instruction *InsertPos,
664 bool allowScale) {
665 if (IncV == InsertPos)
666 return nullptr;
668 switch (IncV->getOpcode()) {
669 default:
670 return nullptr;
671 // Check for a simple Add/Sub or GEP of a loop invariant step.
672 case Instruction::Add:
673 case Instruction::Sub: {
674 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
675 if (!OInst || SE.DT.dominates(OInst, InsertPos))
676 return dyn_cast<Instruction>(IncV->getOperand(0));
677 return nullptr;
679 case Instruction::BitCast:
680 return dyn_cast<Instruction>(IncV->getOperand(0));
681 case Instruction::GetElementPtr:
682 for (Use &U : llvm::drop_begin(IncV->operands())) {
683 if (isa<Constant>(U))
684 continue;
685 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
686 if (!SE.DT.dominates(OInst, InsertPos))
687 return nullptr;
689 if (allowScale) {
690 // allow any kind of GEP as long as it can be hoisted.
691 continue;
693 // GEPs produced by SCEVExpander use i8 element type.
694 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
695 return nullptr;
696 break;
698 return dyn_cast<Instruction>(IncV->getOperand(0));
702 /// If the insert point of the current builder or any of the builders on the
703 /// stack of saved builders has 'I' as its insert point, update it to point to
704 /// the instruction after 'I'. This is intended to be used when the instruction
705 /// 'I' is being moved. If this fixup is not done and 'I' is moved to a
706 /// different block, the inconsistent insert point (with a mismatched
707 /// Instruction and Block) can lead to an instruction being inserted in a block
708 /// other than its parent.
709 void SCEVExpander::fixupInsertPoints(Instruction *I) {
710 BasicBlock::iterator It(*I);
711 BasicBlock::iterator NewInsertPt = std::next(It);
712 if (Builder.GetInsertPoint() == It)
713 Builder.SetInsertPoint(&*NewInsertPt);
714 for (auto *InsertPtGuard : InsertPointGuards)
715 if (InsertPtGuard->GetInsertPoint() == It)
716 InsertPtGuard->SetInsertPoint(NewInsertPt);
719 /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
720 /// it available to other uses in this loop. Recursively hoist any operands,
721 /// until we reach a value that dominates InsertPos.
722 bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
723 bool RecomputePoisonFlags) {
724 auto FixupPoisonFlags = [this](Instruction *I) {
725 // Drop flags that are potentially inferred from old context and infer flags
726 // in new context.
727 I->dropPoisonGeneratingFlags();
728 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
729 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
730 auto *BO = cast<BinaryOperator>(I);
731 BO->setHasNoUnsignedWrap(
732 ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
733 BO->setHasNoSignedWrap(
734 ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
738 if (SE.DT.dominates(IncV, InsertPos)) {
739 if (RecomputePoisonFlags)
740 FixupPoisonFlags(IncV);
741 return true;
744 // InsertPos must itself dominate IncV so that IncV's new position satisfies
745 // its existing users.
746 if (isa<PHINode>(InsertPos) ||
747 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
748 return false;
750 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
751 return false;
753 // Check that the chain of IV operands leading back to Phi can be hoisted.
754 SmallVector<Instruction*, 4> IVIncs;
755 for(;;) {
756 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
757 if (!Oper)
758 return false;
759 // IncV is safe to hoist.
760 IVIncs.push_back(IncV);
761 IncV = Oper;
762 if (SE.DT.dominates(IncV, InsertPos))
763 break;
765 for (Instruction *I : llvm::reverse(IVIncs)) {
766 fixupInsertPoints(I);
767 I->moveBefore(InsertPos);
768 if (RecomputePoisonFlags)
769 FixupPoisonFlags(I);
771 return true;
774 /// Determine if this cyclic phi is in a form that would have been generated by
775 /// LSR. We don't care if the phi was actually expanded in this pass, as long
776 /// as it is in a low-cost form, for example, no implied multiplication. This
777 /// should match any patterns generated by getAddRecExprPHILiterally and
778 /// expandAddtoGEP.
779 bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
780 const Loop *L) {
781 for(Instruction *IVOper = IncV;
782 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
783 /*allowScale=*/false));) {
784 if (IVOper == PN)
785 return true;
787 return false;
790 /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
791 /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
792 /// need to materialize IV increments elsewhere to handle difficult situations.
793 Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
794 bool useSubtract) {
795 Value *IncV;
796 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
797 if (PN->getType()->isPointerTy()) {
798 IncV = expandAddToGEP(SE.getSCEV(StepV), PN);
799 } else {
800 IncV = useSubtract ?
801 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
802 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
804 return IncV;
807 /// Check whether we can cheaply express the requested SCEV in terms of
808 /// the available PHI SCEV by truncation and/or inversion of the step.
809 static bool canBeCheaplyTransformed(ScalarEvolution &SE,
810 const SCEVAddRecExpr *Phi,
811 const SCEVAddRecExpr *Requested,
812 bool &InvertStep) {
813 // We can't transform to match a pointer PHI.
814 Type *PhiTy = Phi->getType();
815 Type *RequestedTy = Requested->getType();
816 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
817 return false;
819 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
820 return false;
822 // Try truncate it if necessary.
823 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
824 if (!Phi)
825 return false;
827 // Check whether truncation will help.
828 if (Phi == Requested) {
829 InvertStep = false;
830 return true;
833 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
834 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
835 InvertStep = true;
836 return true;
839 return false;
842 static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
843 if (!isa<IntegerType>(AR->getType()))
844 return false;
846 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
847 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
848 const SCEV *Step = AR->getStepRecurrence(SE);
849 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
850 SE.getSignExtendExpr(AR, WideTy));
851 const SCEV *ExtendAfterOp =
852 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
853 return ExtendAfterOp == OpAfterExtend;
856 static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
857 if (!isa<IntegerType>(AR->getType()))
858 return false;
860 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
861 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
862 const SCEV *Step = AR->getStepRecurrence(SE);
863 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
864 SE.getZeroExtendExpr(AR, WideTy));
865 const SCEV *ExtendAfterOp =
866 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
867 return ExtendAfterOp == OpAfterExtend;
870 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
871 /// the base addrec, which is the addrec without any non-loop-dominating
872 /// values, and return the PHI.
873 PHINode *
874 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
875 const Loop *L, Type *&TruncTy,
876 bool &InvertStep) {
877 assert((!IVIncInsertLoop || IVIncInsertPos) &&
878 "Uninitialized insert position");
880 // Reuse a previously-inserted PHI, if present.
881 BasicBlock *LatchBlock = L->getLoopLatch();
882 if (LatchBlock) {
883 PHINode *AddRecPhiMatch = nullptr;
884 Instruction *IncV = nullptr;
885 TruncTy = nullptr;
886 InvertStep = false;
888 // Only try partially matching scevs that need truncation and/or
889 // step-inversion if we know this loop is outside the current loop.
890 bool TryNonMatchingSCEV =
891 IVIncInsertLoop &&
892 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
894 for (PHINode &PN : L->getHeader()->phis()) {
895 if (!SE.isSCEVable(PN.getType()))
896 continue;
898 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
899 // PHI has no meaning at all.
900 if (!PN.isComplete()) {
901 SCEV_DEBUG_WITH_TYPE(
902 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
903 continue;
906 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
907 if (!PhiSCEV)
908 continue;
910 bool IsMatchingSCEV = PhiSCEV == Normalized;
911 // We only handle truncation and inversion of phi recurrences for the
912 // expanded expression if the expanded expression's loop dominates the
913 // loop we insert to. Check now, so we can bail out early.
914 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
915 continue;
917 // TODO: this possibly can be reworked to avoid this cast at all.
918 Instruction *TempIncV =
919 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
920 if (!TempIncV)
921 continue;
923 // Check whether we can reuse this PHI node.
924 if (LSRMode) {
925 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
926 continue;
927 } else {
928 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
929 continue;
932 // Stop if we have found an exact match SCEV.
933 if (IsMatchingSCEV) {
934 IncV = TempIncV;
935 TruncTy = nullptr;
936 InvertStep = false;
937 AddRecPhiMatch = &PN;
938 break;
941 // Try whether the phi can be translated into the requested form
942 // (truncated and/or offset by a constant).
943 if ((!TruncTy || InvertStep) &&
944 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
945 // Record the phi node. But don't stop we might find an exact match
946 // later.
947 AddRecPhiMatch = &PN;
948 IncV = TempIncV;
949 TruncTy = Normalized->getType();
953 if (AddRecPhiMatch) {
954 // Ok, the add recurrence looks usable.
955 // Remember this PHI, even in post-inc mode.
956 InsertedValues.insert(AddRecPhiMatch);
957 // Remember the increment.
958 rememberInstruction(IncV);
959 // Those values were not actually inserted but re-used.
960 ReusedValues.insert(AddRecPhiMatch);
961 ReusedValues.insert(IncV);
962 return AddRecPhiMatch;
966 // Save the original insertion point so we can restore it when we're done.
967 SCEVInsertPointGuard Guard(Builder, this);
969 // Another AddRec may need to be recursively expanded below. For example, if
970 // this AddRec is quadratic, the StepV may itself be an AddRec in this
971 // loop. Remove this loop from the PostIncLoops set before expanding such
972 // AddRecs. Otherwise, we cannot find a valid position for the step
973 // (i.e. StepV can never dominate its loop header). Ideally, we could do
974 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
975 // so it's not worth implementing SmallPtrSet::swap.
976 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
977 PostIncLoops.clear();
979 // Expand code for the start value into the loop preheader.
980 assert(L->getLoopPreheader() &&
981 "Can't expand add recurrences without a loop preheader!");
982 Value *StartV =
983 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
985 // StartV must have been be inserted into L's preheader to dominate the new
986 // phi.
987 assert(!isa<Instruction>(StartV) ||
988 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
989 L->getHeader()));
991 // Expand code for the step value. Do this before creating the PHI so that PHI
992 // reuse code doesn't see an incomplete PHI.
993 const SCEV *Step = Normalized->getStepRecurrence(SE);
994 Type *ExpandTy = Normalized->getType();
995 // If the stride is negative, insert a sub instead of an add for the increment
996 // (unless it's a constant, because subtracts of constants are canonicalized
997 // to adds).
998 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
999 if (useSubtract)
1000 Step = SE.getNegativeSCEV(Step);
1001 // Expand the step somewhere that dominates the loop header.
1002 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1004 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1005 // we actually do emit an addition. It does not apply if we emit a
1006 // subtraction.
1007 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1008 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1010 // Create the PHI.
1011 BasicBlock *Header = L->getHeader();
1012 Builder.SetInsertPoint(Header, Header->begin());
1013 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1014 PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
1015 Twine(IVName) + ".iv");
1017 // Create the step instructions and populate the PHI.
1018 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1019 BasicBlock *Pred = *HPI;
1021 // Add a start value.
1022 if (!L->contains(Pred)) {
1023 PN->addIncoming(StartV, Pred);
1024 continue;
1027 // Create a step value and add it to the PHI.
1028 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1029 // instructions at IVIncInsertPos.
1030 Instruction *InsertPos = L == IVIncInsertLoop ?
1031 IVIncInsertPos : Pred->getTerminator();
1032 Builder.SetInsertPoint(InsertPos);
1033 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1035 if (isa<OverflowingBinaryOperator>(IncV)) {
1036 if (IncrementIsNUW)
1037 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1038 if (IncrementIsNSW)
1039 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1041 PN->addIncoming(IncV, Pred);
1044 // After expanding subexpressions, restore the PostIncLoops set so the caller
1045 // can ensure that IVIncrement dominates the current uses.
1046 PostIncLoops = SavedPostIncLoops;
1048 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1049 // effective when we are able to use an IV inserted here, so record it.
1050 InsertedValues.insert(PN);
1051 InsertedIVs.push_back(PN);
1052 return PN;
1055 Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1056 const Loop *L = S->getLoop();
1058 // Determine a normalized form of this expression, which is the expression
1059 // before any post-inc adjustment is made.
1060 const SCEVAddRecExpr *Normalized = S;
1061 if (PostIncLoops.count(L)) {
1062 PostIncLoopSet Loops;
1063 Loops.insert(L);
1064 Normalized = cast<SCEVAddRecExpr>(
1065 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1068 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1069 const SCEV *Step = Normalized->getStepRecurrence(SE);
1070 assert(SE.properlyDominates(Start, L->getHeader()) &&
1071 "Start does not properly dominate loop header");
1072 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1074 // In some cases, we decide to reuse an existing phi node but need to truncate
1075 // it and/or invert the step.
1076 Type *TruncTy = nullptr;
1077 bool InvertStep = false;
1078 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1080 // Accommodate post-inc mode, if necessary.
1081 Value *Result;
1082 if (!PostIncLoops.count(L))
1083 Result = PN;
1084 else {
1085 // In PostInc mode, use the post-incremented value.
1086 BasicBlock *LatchBlock = L->getLoopLatch();
1087 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1088 Result = PN->getIncomingValueForBlock(LatchBlock);
1090 // We might be introducing a new use of the post-inc IV that is not poison
1091 // safe, in which case we should drop poison generating flags. Only keep
1092 // those flags for which SCEV has proven that they always hold.
1093 if (isa<OverflowingBinaryOperator>(Result)) {
1094 auto *I = cast<Instruction>(Result);
1095 if (!S->hasNoUnsignedWrap())
1096 I->setHasNoUnsignedWrap(false);
1097 if (!S->hasNoSignedWrap())
1098 I->setHasNoSignedWrap(false);
1101 // For an expansion to use the postinc form, the client must call
1102 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1103 // or dominated by IVIncInsertPos.
1104 if (isa<Instruction>(Result) &&
1105 !SE.DT.dominates(cast<Instruction>(Result),
1106 &*Builder.GetInsertPoint())) {
1107 // The induction variable's postinc expansion does not dominate this use.
1108 // IVUsers tries to prevent this case, so it is rare. However, it can
1109 // happen when an IVUser outside the loop is not dominated by the latch
1110 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1111 // all cases. Consider a phi outside whose operand is replaced during
1112 // expansion with the value of the postinc user. Without fundamentally
1113 // changing the way postinc users are tracked, the only remedy is
1114 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1115 // but hopefully expandCodeFor handles that.
1116 bool useSubtract =
1117 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1118 if (useSubtract)
1119 Step = SE.getNegativeSCEV(Step);
1120 Value *StepV;
1122 // Expand the step somewhere that dominates the loop header.
1123 SCEVInsertPointGuard Guard(Builder, this);
1124 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1126 Result = expandIVInc(PN, StepV, L, useSubtract);
1130 // We have decided to reuse an induction variable of a dominating loop. Apply
1131 // truncation and/or inversion of the step.
1132 if (TruncTy) {
1133 // Truncate the result.
1134 if (TruncTy != Result->getType())
1135 Result = Builder.CreateTrunc(Result, TruncTy);
1137 // Invert the result.
1138 if (InvertStep)
1139 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1142 return Result;
1145 Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1146 // In canonical mode we compute the addrec as an expression of a canonical IV
1147 // using evaluateAtIteration and expand the resulting SCEV expression. This
1148 // way we avoid introducing new IVs to carry on the computation of the addrec
1149 // throughout the loop.
1151 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1152 // type wider than the addrec itself. Emitting a canonical IV of the
1153 // proper type might produce non-legal types, for example expanding an i64
1154 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1155 // back to non-canonical mode for nested addrecs.
1156 if (!CanonicalMode || (S->getNumOperands() > 2))
1157 return expandAddRecExprLiterally(S);
1159 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1160 const Loop *L = S->getLoop();
1162 // First check for an existing canonical IV in a suitable type.
1163 PHINode *CanonicalIV = nullptr;
1164 if (PHINode *PN = L->getCanonicalInductionVariable())
1165 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1166 CanonicalIV = PN;
1168 // Rewrite an AddRec in terms of the canonical induction variable, if
1169 // its type is more narrow.
1170 if (CanonicalIV &&
1171 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1172 !S->getType()->isPointerTy()) {
1173 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
1174 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1175 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1176 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1177 S->getNoWrapFlags(SCEV::FlagNW)));
1178 BasicBlock::iterator NewInsertPt =
1179 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1180 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1181 return V;
1184 // {X,+,F} --> X + {0,+,F}
1185 if (!S->getStart()->isZero()) {
1186 if (isa<PointerType>(S->getType())) {
1187 Value *StartV = expand(SE.getPointerBase(S));
1188 return expandAddToGEP(SE.removePointerBase(S), StartV);
1191 SmallVector<const SCEV *, 4> NewOps(S->operands());
1192 NewOps[0] = SE.getConstant(Ty, 0);
1193 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1194 S->getNoWrapFlags(SCEV::FlagNW));
1196 // Just do a normal add. Pre-expand the operands to suppress folding.
1198 // The LHS and RHS values are factored out of the expand call to make the
1199 // output independent of the argument evaluation order.
1200 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1201 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1202 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1205 // If we don't yet have a canonical IV, create one.
1206 if (!CanonicalIV) {
1207 // Create and insert the PHI node for the induction variable in the
1208 // specified loop.
1209 BasicBlock *Header = L->getHeader();
1210 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1211 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1212 CanonicalIV->insertBefore(Header->begin());
1213 rememberInstruction(CanonicalIV);
1215 SmallSet<BasicBlock *, 4> PredSeen;
1216 Constant *One = ConstantInt::get(Ty, 1);
1217 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1218 BasicBlock *HP = *HPI;
1219 if (!PredSeen.insert(HP).second) {
1220 // There must be an incoming value for each predecessor, even the
1221 // duplicates!
1222 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1223 continue;
1226 if (L->contains(HP)) {
1227 // Insert a unit add instruction right before the terminator
1228 // corresponding to the back-edge.
1229 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1230 "indvar.next",
1231 HP->getTerminator());
1232 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1233 rememberInstruction(Add);
1234 CanonicalIV->addIncoming(Add, HP);
1235 } else {
1236 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1241 // {0,+,1} --> Insert a canonical induction variable into the loop!
1242 if (S->isAffine() && S->getOperand(1)->isOne()) {
1243 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1244 "IVs with types different from the canonical IV should "
1245 "already have been handled!");
1246 return CanonicalIV;
1249 // {0,+,F} --> {0,+,1} * F
1251 // If this is a simple linear addrec, emit it now as a special case.
1252 if (S->isAffine()) // {0,+,F} --> i*F
1253 return
1254 expand(SE.getTruncateOrNoop(
1255 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1256 SE.getNoopOrAnyExtend(S->getOperand(1),
1257 CanonicalIV->getType())),
1258 Ty));
1260 // If this is a chain of recurrences, turn it into a closed form, using the
1261 // folders, then expandCodeFor the closed form. This allows the folders to
1262 // simplify the expression without having to build a bunch of special code
1263 // into this folder.
1264 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1266 // Promote S up to the canonical IV type, if the cast is foldable.
1267 const SCEV *NewS = S;
1268 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1269 if (isa<SCEVAddRecExpr>(Ext))
1270 NewS = Ext;
1272 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1274 // Truncate the result down to the original type, if needed.
1275 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1276 return expand(T);
1279 Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1280 Value *V = expand(S->getOperand());
1281 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1282 GetOptimalInsertionPointForCastOf(V));
1285 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1286 Value *V = expand(S->getOperand());
1287 return Builder.CreateTrunc(V, S->getType());
1290 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1291 Value *V = expand(S->getOperand());
1292 return Builder.CreateZExt(V, S->getType(), "",
1293 SE.isKnownNonNegative(S->getOperand()));
1296 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1297 Value *V = expand(S->getOperand());
1298 return Builder.CreateSExt(V, S->getType());
1301 Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1302 Intrinsic::ID IntrinID, Twine Name,
1303 bool IsSequential) {
1304 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1305 Type *Ty = LHS->getType();
1306 if (IsSequential)
1307 LHS = Builder.CreateFreeze(LHS);
1308 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1309 Value *RHS = expand(S->getOperand(i));
1310 if (IsSequential && i != 0)
1311 RHS = Builder.CreateFreeze(RHS);
1312 Value *Sel;
1313 if (Ty->isIntegerTy())
1314 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1315 /*FMFSource=*/nullptr, Name);
1316 else {
1317 Value *ICmp =
1318 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1319 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1321 LHS = Sel;
1323 return LHS;
1326 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1327 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1330 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1331 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1334 Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1335 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1338 Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1339 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1342 Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1343 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1346 Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1347 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1350 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
1351 BasicBlock::iterator IP) {
1352 setInsertPoint(IP);
1353 Value *V = expandCodeFor(SH, Ty);
1354 return V;
1357 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
1358 // Expand the code for this SCEV.
1359 Value *V = expand(SH);
1361 if (Ty) {
1362 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1363 "non-trivial casts should be done with the SCEVs directly!");
1364 V = InsertNoopCastOfTo(V, Ty);
1366 return V;
1369 Value *SCEVExpander::FindValueInExprValueMap(
1370 const SCEV *S, const Instruction *InsertPt,
1371 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1372 // If the expansion is not in CanonicalMode, and the SCEV contains any
1373 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1374 if (!CanonicalMode && SE.containsAddRecurrence(S))
1375 return nullptr;
1377 // If S is a constant, it may be worse to reuse an existing Value.
1378 if (isa<SCEVConstant>(S))
1379 return nullptr;
1381 for (Value *V : SE.getSCEVValues(S)) {
1382 Instruction *EntInst = dyn_cast<Instruction>(V);
1383 if (!EntInst)
1384 continue;
1386 // Choose a Value from the set which dominates the InsertPt.
1387 // InsertPt should be inside the Value's parent loop so as not to break
1388 // the LCSSA form.
1389 assert(EntInst->getFunction() == InsertPt->getFunction());
1390 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1391 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1392 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1393 continue;
1395 // Make sure reusing the instruction is poison-safe.
1396 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1397 return V;
1398 DropPoisonGeneratingInsts.clear();
1400 return nullptr;
1403 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1404 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1405 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1406 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1407 // the expansion will try to reuse Value from ExprValueMap, and only when it
1408 // fails, expand the SCEV literally.
1409 Value *SCEVExpander::expand(const SCEV *S) {
1410 // Compute an insertion point for this SCEV object. Hoist the instructions
1411 // as far out in the loop nest as possible.
1412 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1414 // We can move insertion point only if there is no div or rem operations
1415 // otherwise we are risky to move it over the check for zero denominator.
1416 auto SafeToHoist = [](const SCEV *S) {
1417 return !SCEVExprContains(S, [](const SCEV *S) {
1418 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1419 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1420 // Division by non-zero constants can be hoisted.
1421 return SC->getValue()->isZero();
1422 // All other divisions should not be moved as they may be
1423 // divisions by zero and should be kept within the
1424 // conditions of the surrounding loops that guard their
1425 // execution (see PR35406).
1426 return true;
1428 return false;
1431 if (SafeToHoist(S)) {
1432 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1433 L = L->getParentLoop()) {
1434 if (SE.isLoopInvariant(S, L)) {
1435 if (!L) break;
1436 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1437 InsertPt = Preheader->getTerminator()->getIterator();
1438 } else {
1439 // LSR sets the insertion point for AddRec start/step values to the
1440 // block start to simplify value reuse, even though it's an invalid
1441 // position. SCEVExpander must correct for this in all cases.
1442 InsertPt = L->getHeader()->getFirstInsertionPt();
1444 } else {
1445 // If the SCEV is computable at this level, insert it into the header
1446 // after the PHIs (and after any other instructions that we've inserted
1447 // there) so that it is guaranteed to dominate any user inside the loop.
1448 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1449 InsertPt = L->getHeader()->getFirstInsertionPt();
1451 while (InsertPt != Builder.GetInsertPoint() &&
1452 (isInsertedInstruction(&*InsertPt) ||
1453 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1454 InsertPt = std::next(InsertPt);
1456 break;
1461 // Check to see if we already expanded this here.
1462 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1463 if (I != InsertedExpressions.end())
1464 return I->second;
1466 SCEVInsertPointGuard Guard(Builder, this);
1467 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1469 // Expand the expression into instructions.
1470 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1471 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1472 if (!V) {
1473 V = visit(S);
1474 V = fixupLCSSAFormFor(V);
1475 } else {
1476 for (Instruction *I : DropPoisonGeneratingInsts) {
1477 I->dropPoisonGeneratingFlagsAndMetadata();
1478 // See if we can re-infer from first principles any of the flags we just
1479 // dropped.
1480 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1481 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1482 auto *BO = cast<BinaryOperator>(I);
1483 BO->setHasNoUnsignedWrap(
1484 ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
1485 BO->setHasNoSignedWrap(
1486 ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
1488 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1489 auto *Src = NNI->getOperand(0);
1490 if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
1491 Constant::getNullValue(Src->getType()), I,
1492 DL).value_or(false))
1493 NNI->setNonNeg(true);
1497 // Remember the expanded value for this SCEV at this location.
1499 // This is independent of PostIncLoops. The mapped value simply materializes
1500 // the expression at this insertion point. If the mapped value happened to be
1501 // a postinc expansion, it could be reused by a non-postinc user, but only if
1502 // its insertion point was already at the head of the loop.
1503 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1504 return V;
1507 void SCEVExpander::rememberInstruction(Value *I) {
1508 auto DoInsert = [this](Value *V) {
1509 if (!PostIncLoops.empty())
1510 InsertedPostIncValues.insert(V);
1511 else
1512 InsertedValues.insert(V);
1514 DoInsert(I);
1517 /// replaceCongruentIVs - Check for congruent phis in this loop header and
1518 /// replace them with their most canonical representative. Return the number of
1519 /// phis eliminated.
1521 /// This does not depend on any SCEVExpander state but should be used in
1522 /// the same context that SCEVExpander is used.
1523 unsigned
1524 SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
1525 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1526 const TargetTransformInfo *TTI) {
1527 // Find integer phis in order of increasing width.
1528 SmallVector<PHINode*, 8> Phis;
1529 for (PHINode &PN : L->getHeader()->phis())
1530 Phis.push_back(&PN);
1532 if (TTI)
1533 // Use stable_sort to preserve order of equivalent PHIs, so the order
1534 // of the sorted Phis is the same from run to run on the same loop.
1535 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1536 // Put pointers at the back and make sure pointer < pointer = false.
1537 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1538 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1539 return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1540 LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1543 unsigned NumElim = 0;
1544 DenseMap<const SCEV *, PHINode *> ExprToIVMap;
1545 // Process phis from wide to narrow. Map wide phis to their truncation
1546 // so narrow phis can reuse them.
1547 for (PHINode *Phi : Phis) {
1548 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1549 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1550 return V;
1551 if (!SE.isSCEVable(PN->getType()))
1552 return nullptr;
1553 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1554 if (!Const)
1555 return nullptr;
1556 return Const->getValue();
1559 // Fold constant phis. They may be congruent to other constant phis and
1560 // would confuse the logic below that expects proper IVs.
1561 if (Value *V = SimplifyPHINode(Phi)) {
1562 if (V->getType() != Phi->getType())
1563 continue;
1564 SE.forgetValue(Phi);
1565 Phi->replaceAllUsesWith(V);
1566 DeadInsts.emplace_back(Phi);
1567 ++NumElim;
1568 SCEV_DEBUG_WITH_TYPE(DebugType,
1569 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1570 << '\n');
1571 continue;
1574 if (!SE.isSCEVable(Phi->getType()))
1575 continue;
1577 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1578 if (!OrigPhiRef) {
1579 OrigPhiRef = Phi;
1580 if (Phi->getType()->isIntegerTy() && TTI &&
1581 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1582 // Make sure we only rewrite using simple induction variables;
1583 // otherwise, we can make the trip count of a loop unanalyzable
1584 // to SCEV.
1585 const SCEV *PhiExpr = SE.getSCEV(Phi);
1586 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1587 // This phi can be freely truncated to the narrowest phi type. Map the
1588 // truncated expression to it so it will be reused for narrow types.
1589 const SCEV *TruncExpr =
1590 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1591 ExprToIVMap[TruncExpr] = Phi;
1594 continue;
1597 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1598 // sense.
1599 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1600 continue;
1602 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1603 Instruction *OrigInc = dyn_cast<Instruction>(
1604 OrigPhiRef->getIncomingValueForBlock(LatchBlock));
1605 Instruction *IsomorphicInc =
1606 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1608 if (OrigInc && IsomorphicInc) {
1609 // If this phi has the same width but is more canonical, replace the
1610 // original with it. As part of the "more canonical" determination,
1611 // respect a prior decision to use an IV chain.
1612 if (OrigPhiRef->getType() == Phi->getType() &&
1613 !(ChainedPhis.count(Phi) ||
1614 isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
1615 (ChainedPhis.count(Phi) ||
1616 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1617 std::swap(OrigPhiRef, Phi);
1618 std::swap(OrigInc, IsomorphicInc);
1620 // Replacing the congruent phi is sufficient because acyclic
1621 // redundancy elimination, CSE/GVN, should handle the
1622 // rest. However, once SCEV proves that a phi is congruent,
1623 // it's often the head of an IV user cycle that is isomorphic
1624 // with the original phi. It's worth eagerly cleaning up the
1625 // common case of a single IV increment so that DeleteDeadPHIs
1626 // can remove cycles that had postinc uses.
1627 // Because we may potentially introduce a new use of OrigIV that didn't
1628 // exist before at this point, its poison flags need readjustment.
1629 const SCEV *TruncExpr =
1630 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1631 if (OrigInc != IsomorphicInc &&
1632 TruncExpr == SE.getSCEV(IsomorphicInc) &&
1633 SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
1634 hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) {
1635 SCEV_DEBUG_WITH_TYPE(
1636 DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1637 << *IsomorphicInc << '\n');
1638 Value *NewInc = OrigInc;
1639 if (OrigInc->getType() != IsomorphicInc->getType()) {
1640 BasicBlock::iterator IP;
1641 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1642 IP = PN->getParent()->getFirstInsertionPt();
1643 else
1644 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1646 IRBuilder<> Builder(IP->getParent(), IP);
1647 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1648 NewInc = Builder.CreateTruncOrBitCast(
1649 OrigInc, IsomorphicInc->getType(), IVName);
1651 IsomorphicInc->replaceAllUsesWith(NewInc);
1652 DeadInsts.emplace_back(IsomorphicInc);
1656 SCEV_DEBUG_WITH_TYPE(DebugType,
1657 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1658 << '\n');
1659 SCEV_DEBUG_WITH_TYPE(
1660 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1661 ++NumElim;
1662 Value *NewIV = OrigPhiRef;
1663 if (OrigPhiRef->getType() != Phi->getType()) {
1664 IRBuilder<> Builder(L->getHeader(),
1665 L->getHeader()->getFirstInsertionPt());
1666 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1667 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1669 Phi->replaceAllUsesWith(NewIV);
1670 DeadInsts.emplace_back(Phi);
1672 return NumElim;
1675 bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1676 const Instruction *At,
1677 Loop *L) {
1678 using namespace llvm::PatternMatch;
1680 SmallVector<BasicBlock *, 4> ExitingBlocks;
1681 L->getExitingBlocks(ExitingBlocks);
1683 // Look for suitable value in simple conditions at the loop exits.
1684 for (BasicBlock *BB : ExitingBlocks) {
1685 ICmpInst::Predicate Pred;
1686 Instruction *LHS, *RHS;
1688 if (!match(BB->getTerminator(),
1689 m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
1690 m_BasicBlock(), m_BasicBlock())))
1691 continue;
1693 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1694 return true;
1696 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1697 return true;
1700 // Use expand's logic which is used for reusing a previous Value in
1701 // ExprValueMap. Note that we don't currently model the cost of
1702 // needing to drop poison generating flags on the instruction if we
1703 // want to reuse it. We effectively assume that has zero cost.
1704 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1705 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1708 template<typename T> static InstructionCost costAndCollectOperands(
1709 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1710 TargetTransformInfo::TargetCostKind CostKind,
1711 SmallVectorImpl<SCEVOperand> &Worklist) {
1713 const T *S = cast<T>(WorkItem.S);
1714 InstructionCost Cost = 0;
1715 // Object to help map SCEV operands to expanded IR instructions.
1716 struct OperationIndices {
1717 OperationIndices(unsigned Opc, size_t min, size_t max) :
1718 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1719 unsigned Opcode;
1720 size_t MinIdx;
1721 size_t MaxIdx;
1724 // Collect the operations of all the instructions that will be needed to
1725 // expand the SCEVExpr. This is so that when we come to cost the operands,
1726 // we know what the generated user(s) will be.
1727 SmallVector<OperationIndices, 2> Operations;
1729 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1730 Operations.emplace_back(Opcode, 0, 0);
1731 return TTI.getCastInstrCost(Opcode, S->getType(),
1732 S->getOperand(0)->getType(),
1733 TTI::CastContextHint::None, CostKind);
1736 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1737 unsigned MinIdx = 0,
1738 unsigned MaxIdx = 1) -> InstructionCost {
1739 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1740 return NumRequired *
1741 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1744 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1745 unsigned MaxIdx) -> InstructionCost {
1746 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1747 Type *OpType = S->getType();
1748 return NumRequired * TTI.getCmpSelInstrCost(
1749 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1750 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1753 switch (S->getSCEVType()) {
1754 case scCouldNotCompute:
1755 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1756 case scUnknown:
1757 case scConstant:
1758 case scVScale:
1759 return 0;
1760 case scPtrToInt:
1761 Cost = CastCost(Instruction::PtrToInt);
1762 break;
1763 case scTruncate:
1764 Cost = CastCost(Instruction::Trunc);
1765 break;
1766 case scZeroExtend:
1767 Cost = CastCost(Instruction::ZExt);
1768 break;
1769 case scSignExtend:
1770 Cost = CastCost(Instruction::SExt);
1771 break;
1772 case scUDivExpr: {
1773 unsigned Opcode = Instruction::UDiv;
1774 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1775 if (SC->getAPInt().isPowerOf2())
1776 Opcode = Instruction::LShr;
1777 Cost = ArithCost(Opcode, 1);
1778 break;
1780 case scAddExpr:
1781 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1782 break;
1783 case scMulExpr:
1784 // TODO: this is a very pessimistic cost modelling for Mul,
1785 // because of Bin Pow algorithm actually used by the expander,
1786 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1787 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1788 break;
1789 case scSMaxExpr:
1790 case scUMaxExpr:
1791 case scSMinExpr:
1792 case scUMinExpr:
1793 case scSequentialUMinExpr: {
1794 // FIXME: should this ask the cost for Intrinsic's?
1795 // The reduction tree.
1796 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1797 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1798 switch (S->getSCEVType()) {
1799 case scSequentialUMinExpr: {
1800 // The safety net against poison.
1801 // FIXME: this is broken.
1802 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1803 Cost += ArithCost(Instruction::Or,
1804 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1805 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1806 break;
1808 default:
1809 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1810 "Unhandled SCEV expression type?");
1811 break;
1813 break;
1815 case scAddRecExpr: {
1816 // In this polynominal, we may have some zero operands, and we shouldn't
1817 // really charge for those. So how many non-zero coefficients are there?
1818 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1819 return !Op->isZero();
1822 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1823 assert(!(*std::prev(S->operands().end()))->isZero() &&
1824 "Last operand should not be zero");
1826 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1827 int NumNonZeroDegreeNonOneTerms =
1828 llvm::count_if(S->operands(), [](const SCEV *Op) {
1829 auto *SConst = dyn_cast<SCEVConstant>(Op);
1830 return !SConst || SConst->getAPInt().ugt(1);
1833 // Much like with normal add expr, the polynominal will require
1834 // one less addition than the number of it's terms.
1835 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1836 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1837 // Here, *each* one of those will require a multiplication.
1838 InstructionCost MulCost =
1839 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1840 Cost = AddCost + MulCost;
1842 // What is the degree of this polynominal?
1843 int PolyDegree = S->getNumOperands() - 1;
1844 assert(PolyDegree >= 1 && "Should be at least affine.");
1846 // The final term will be:
1847 // Op_{PolyDegree} * x ^ {PolyDegree}
1848 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1849 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1850 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1851 // FIXME: this is conservatively correct, but might be overly pessimistic.
1852 Cost += MulCost * (PolyDegree - 1);
1853 break;
1857 for (auto &CostOp : Operations) {
1858 for (auto SCEVOp : enumerate(S->operands())) {
1859 // Clamp the index to account for multiple IR operations being chained.
1860 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1861 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1862 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1865 return Cost;
1868 bool SCEVExpander::isHighCostExpansionHelper(
1869 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1870 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1871 SmallPtrSetImpl<const SCEV *> &Processed,
1872 SmallVectorImpl<SCEVOperand> &Worklist) {
1873 if (Cost > Budget)
1874 return true; // Already run out of budget, give up.
1876 const SCEV *S = WorkItem.S;
1877 // Was the cost of expansion of this expression already accounted for?
1878 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1879 return false; // We have already accounted for this expression.
1881 // If we can find an existing value for this scev available at the point "At"
1882 // then consider the expression cheap.
1883 if (hasRelatedExistingExpansion(S, &At, L))
1884 return false; // Consider the expression to be free.
1886 TargetTransformInfo::TargetCostKind CostKind =
1887 L->getHeader()->getParent()->hasMinSize()
1888 ? TargetTransformInfo::TCK_CodeSize
1889 : TargetTransformInfo::TCK_RecipThroughput;
1891 switch (S->getSCEVType()) {
1892 case scCouldNotCompute:
1893 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1894 case scUnknown:
1895 case scVScale:
1896 // Assume to be zero-cost.
1897 return false;
1898 case scConstant: {
1899 // Only evalulate the costs of constants when optimizing for size.
1900 if (CostKind != TargetTransformInfo::TCK_CodeSize)
1901 return false;
1902 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
1903 Type *Ty = S->getType();
1904 Cost += TTI.getIntImmCostInst(
1905 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
1906 return Cost > Budget;
1908 case scTruncate:
1909 case scPtrToInt:
1910 case scZeroExtend:
1911 case scSignExtend: {
1912 Cost +=
1913 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
1914 return false; // Will answer upon next entry into this function.
1916 case scUDivExpr: {
1917 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
1918 // HowManyLessThans produced to compute a precise expression, rather than a
1919 // UDiv from the user's code. If we can't find a UDiv in the code with some
1920 // simple searching, we need to account for it's cost.
1922 // At the beginning of this function we already tried to find existing
1923 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
1924 // pattern involving division. This is just a simple search heuristic.
1925 if (hasRelatedExistingExpansion(
1926 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
1927 return false; // Consider it to be free.
1929 Cost +=
1930 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
1931 return false; // Will answer upon next entry into this function.
1933 case scAddExpr:
1934 case scMulExpr:
1935 case scUMaxExpr:
1936 case scSMaxExpr:
1937 case scUMinExpr:
1938 case scSMinExpr:
1939 case scSequentialUMinExpr: {
1940 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
1941 "Nary expr should have more than 1 operand.");
1942 // The simple nary expr will require one less op (or pair of ops)
1943 // than the number of it's terms.
1944 Cost +=
1945 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
1946 return Cost > Budget;
1948 case scAddRecExpr: {
1949 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
1950 "Polynomial should be at least linear");
1951 Cost += costAndCollectOperands<SCEVAddRecExpr>(
1952 WorkItem, TTI, CostKind, Worklist);
1953 return Cost > Budget;
1956 llvm_unreachable("Unknown SCEV kind!");
1959 Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
1960 Instruction *IP) {
1961 assert(IP);
1962 switch (Pred->getKind()) {
1963 case SCEVPredicate::P_Union:
1964 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
1965 case SCEVPredicate::P_Compare:
1966 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
1967 case SCEVPredicate::P_Wrap: {
1968 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
1969 return expandWrapPredicate(AddRecPred, IP);
1972 llvm_unreachable("Unknown SCEV predicate type");
1975 Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
1976 Instruction *IP) {
1977 Value *Expr0 = expand(Pred->getLHS(), IP);
1978 Value *Expr1 = expand(Pred->getRHS(), IP);
1980 Builder.SetInsertPoint(IP);
1981 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
1982 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
1983 return I;
1986 Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
1987 Instruction *Loc, bool Signed) {
1988 assert(AR->isAffine() && "Cannot generate RT check for "
1989 "non-affine expression");
1991 // FIXME: It is highly suspicious that we're ignoring the predicates here.
1992 SmallVector<const SCEVPredicate *, 4> Pred;
1993 const SCEV *ExitCount =
1994 SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
1996 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
1998 const SCEV *Step = AR->getStepRecurrence(SE);
1999 const SCEV *Start = AR->getStart();
2001 Type *ARTy = AR->getType();
2002 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2003 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2005 // The expression {Start,+,Step} has nusw/nssw if
2006 // Step < 0, Start - |Step| * Backedge <= Start
2007 // Step >= 0, Start + |Step| * Backedge > Start
2008 // and |Step| * Backedge doesn't unsigned overflow.
2010 Builder.SetInsertPoint(Loc);
2011 Value *TripCountVal = expand(ExitCount, Loc);
2013 IntegerType *Ty =
2014 IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
2016 Value *StepValue = expand(Step, Loc);
2017 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2018 Value *StartValue = expand(Start, Loc);
2020 ConstantInt *Zero =
2021 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2023 Builder.SetInsertPoint(Loc);
2024 // Compute |Step|
2025 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2026 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2028 // Compute |Step| * Backedge
2029 // Compute:
2030 // 1. Start + |Step| * Backedge < Start
2031 // 2. Start - |Step| * Backedge > Start
2033 // And select either 1. or 2. depending on whether step is positive or
2034 // negative. If Step is known to be positive or negative, only create
2035 // either 1. or 2.
2036 auto ComputeEndCheck = [&]() -> Value * {
2037 // Checking <u 0 is always false.
2038 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2039 return ConstantInt::getFalse(Loc->getContext());
2041 // Get the backedge taken count and truncate or extended to the AR type.
2042 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2044 Value *MulV, *OfMul;
2045 if (Step->isOne()) {
2046 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2047 // needed, there is never an overflow, so to avoid artificially inflating
2048 // the cost of the check, directly emit the optimized IR.
2049 MulV = TruncTripCount;
2050 OfMul = ConstantInt::getFalse(MulV->getContext());
2051 } else {
2052 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2053 Intrinsic::umul_with_overflow, Ty);
2054 CallInst *Mul =
2055 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2056 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2057 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2060 Value *Add = nullptr, *Sub = nullptr;
2061 bool NeedPosCheck = !SE.isKnownNegative(Step);
2062 bool NeedNegCheck = !SE.isKnownPositive(Step);
2064 if (isa<PointerType>(ARTy)) {
2065 Value *NegMulV = Builder.CreateNeg(MulV);
2066 if (NeedPosCheck)
2067 Add = Builder.CreatePtrAdd(StartValue, MulV);
2068 if (NeedNegCheck)
2069 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2070 } else {
2071 if (NeedPosCheck)
2072 Add = Builder.CreateAdd(StartValue, MulV);
2073 if (NeedNegCheck)
2074 Sub = Builder.CreateSub(StartValue, MulV);
2077 Value *EndCompareLT = nullptr;
2078 Value *EndCompareGT = nullptr;
2079 Value *EndCheck = nullptr;
2080 if (NeedPosCheck)
2081 EndCheck = EndCompareLT = Builder.CreateICmp(
2082 Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
2083 if (NeedNegCheck)
2084 EndCheck = EndCompareGT = Builder.CreateICmp(
2085 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2086 if (NeedPosCheck && NeedNegCheck) {
2087 // Select the answer based on the sign of Step.
2088 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2090 return Builder.CreateOr(EndCheck, OfMul);
2092 Value *EndCheck = ComputeEndCheck();
2094 // If the backedge taken count type is larger than the AR type,
2095 // check that we don't drop any bits by truncating it. If we are
2096 // dropping bits, then we have overflow (unless the step is zero).
2097 if (SrcBits > DstBits) {
2098 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2099 auto *BackedgeCheck =
2100 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2101 ConstantInt::get(Loc->getContext(), MaxVal));
2102 BackedgeCheck = Builder.CreateAnd(
2103 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2105 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2108 return EndCheck;
2111 Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
2112 Instruction *IP) {
2113 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2114 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2116 // Add a check for NUSW
2117 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2118 NUSWCheck = generateOverflowCheck(A, IP, false);
2120 // Add a check for NSSW
2121 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2122 NSSWCheck = generateOverflowCheck(A, IP, true);
2124 if (NUSWCheck && NSSWCheck)
2125 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2127 if (NUSWCheck)
2128 return NUSWCheck;
2130 if (NSSWCheck)
2131 return NSSWCheck;
2133 return ConstantInt::getFalse(IP->getContext());
2136 Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
2137 Instruction *IP) {
2138 // Loop over all checks in this set.
2139 SmallVector<Value *> Checks;
2140 for (const auto *Pred : Union->getPredicates()) {
2141 Checks.push_back(expandCodeForPredicate(Pred, IP));
2142 Builder.SetInsertPoint(IP);
2145 if (Checks.empty())
2146 return ConstantInt::getFalse(IP->getContext());
2147 return Builder.CreateOr(Checks);
2150 Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2151 auto *DefI = dyn_cast<Instruction>(V);
2152 if (!PreserveLCSSA || !DefI)
2153 return V;
2155 Instruction *InsertPt = &*Builder.GetInsertPoint();
2156 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2157 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2158 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2159 return V;
2161 // Create a temporary instruction to at the current insertion point, so we
2162 // can hand it off to the helper to create LCSSA PHIs if required for the
2163 // new use.
2164 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2165 // would accept a insertion point and return an LCSSA phi for that
2166 // insertion point, so there is no need to insert & remove the temporary
2167 // instruction.
2168 Type *ToTy;
2169 if (DefI->getType()->isIntegerTy())
2170 ToTy = PointerType::get(DefI->getContext(), 0);
2171 else
2172 ToTy = Type::getInt32Ty(DefI->getContext());
2173 Instruction *User =
2174 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2175 auto RemoveUserOnExit =
2176 make_scope_exit([User]() { User->eraseFromParent(); });
2178 SmallVector<Instruction *, 1> ToUpdate;
2179 ToUpdate.push_back(DefI);
2180 SmallVector<PHINode *, 16> PHIsToRemove;
2181 SmallVector<PHINode *, 16> InsertedPHIs;
2182 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2183 &InsertedPHIs);
2184 for (PHINode *PN : InsertedPHIs)
2185 rememberInstruction(PN);
2186 for (PHINode *PN : PHIsToRemove) {
2187 if (!PN->use_empty())
2188 continue;
2189 InsertedValues.erase(PN);
2190 InsertedPostIncValues.erase(PN);
2191 PN->eraseFromParent();
2194 return User->getOperand(0);
2197 namespace {
2198 // Search for a SCEV subexpression that is not safe to expand. Any expression
2199 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2200 // UDiv expressions. We don't know if the UDiv is derived from an IR divide
2201 // instruction, but the important thing is that we prove the denominator is
2202 // nonzero before expansion.
2204 // IVUsers already checks that IV-derived expressions are safe. So this check is
2205 // only needed when the expression includes some subexpression that is not IV
2206 // derived.
2208 // Currently, we only allow division by a value provably non-zero here.
2210 // We cannot generally expand recurrences unless the step dominates the loop
2211 // header. The expander handles the special case of affine recurrences by
2212 // scaling the recurrence outside the loop, but this technique isn't generally
2213 // applicable. Expanding a nested recurrence outside a loop requires computing
2214 // binomial coefficients. This could be done, but the recurrence has to be in a
2215 // perfectly reduced form, which can't be guaranteed.
2216 struct SCEVFindUnsafe {
2217 ScalarEvolution &SE;
2218 bool CanonicalMode;
2219 bool IsUnsafe = false;
2221 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2222 : SE(SE), CanonicalMode(CanonicalMode) {}
2224 bool follow(const SCEV *S) {
2225 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2226 if (!SE.isKnownNonZero(D->getRHS())) {
2227 IsUnsafe = true;
2228 return false;
2231 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2232 // For non-affine addrecs or in non-canonical mode we need a preheader
2233 // to insert into.
2234 if (!AR->getLoop()->getLoopPreheader() &&
2235 (!CanonicalMode || !AR->isAffine())) {
2236 IsUnsafe = true;
2237 return false;
2240 return true;
2242 bool isDone() const { return IsUnsafe; }
2244 } // namespace
2246 bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
2247 SCEVFindUnsafe Search(SE, CanonicalMode);
2248 visitAll(S, Search);
2249 return !Search.IsUnsafe;
2252 bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2253 const Instruction *InsertionPoint) const {
2254 if (!isSafeToExpand(S))
2255 return false;
2256 // We have to prove that the expanded site of S dominates InsertionPoint.
2257 // This is easy when not in the same block, but hard when S is an instruction
2258 // to be expanded somewhere inside the same block as our insertion point.
2259 // What we really need here is something analogous to an OrderedBasicBlock,
2260 // but for the moment, we paper over the problem by handling two common and
2261 // cheap to check cases.
2262 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2263 return true;
2264 if (SE.dominates(S, InsertionPoint->getParent())) {
2265 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2266 return true;
2267 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2268 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2269 return true;
2271 return false;
2274 void SCEVExpanderCleaner::cleanup() {
2275 // Result is used, nothing to remove.
2276 if (ResultUsed)
2277 return;
2279 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2280 #ifndef NDEBUG
2281 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2282 InsertedInstructions.end());
2283 (void)InsertedSet;
2284 #endif
2285 // Remove sets with value handles.
2286 Expander.clear();
2288 // Remove all inserted instructions.
2289 for (Instruction *I : reverse(InsertedInstructions)) {
2290 #ifndef NDEBUG
2291 assert(all_of(I->users(),
2292 [&InsertedSet](Value *U) {
2293 return InsertedSet.contains(cast<Instruction>(U));
2294 }) &&
2295 "removed instruction should only be used by instructions inserted "
2296 "during expansion");
2297 #endif
2298 assert(!I->getType()->isVoidTy() &&
2299 "inserted instruction should have non-void types");
2300 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2301 I->eraseFromParent();