1 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of the scalar evolution expander,
10 // which is used to generate the code corresponding to a given scalar evolution
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/LoopUtils.h"
31 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
32 #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
34 #define SCEV_DEBUG_WITH_TYPE(TYPE, X)
39 cl::opt
<unsigned> llvm::SCEVCheapExpansionBudget(
40 "scev-cheap-expansion-budget", cl::Hidden
, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
44 using namespace PatternMatch
;
46 PoisonFlags::PoisonFlags(const Instruction
*I
) {
53 GEPNW
= GEPNoWrapFlags::none();
54 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(I
)) {
55 NUW
= OBO
->hasNoUnsignedWrap();
56 NSW
= OBO
->hasNoSignedWrap();
58 if (auto *PEO
= dyn_cast
<PossiblyExactOperator
>(I
))
59 Exact
= PEO
->isExact();
60 if (auto *PDI
= dyn_cast
<PossiblyDisjointInst
>(I
))
61 Disjoint
= PDI
->isDisjoint();
62 if (auto *PNI
= dyn_cast
<PossiblyNonNegInst
>(I
))
63 NNeg
= PNI
->hasNonNeg();
64 if (auto *TI
= dyn_cast
<TruncInst
>(I
)) {
65 NUW
= TI
->hasNoUnsignedWrap();
66 NSW
= TI
->hasNoSignedWrap();
68 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
))
69 GEPNW
= GEP
->getNoWrapFlags();
70 if (auto *ICmp
= dyn_cast
<ICmpInst
>(I
))
71 SameSign
= ICmp
->hasSameSign();
74 void PoisonFlags::apply(Instruction
*I
) {
75 if (isa
<OverflowingBinaryOperator
>(I
)) {
76 I
->setHasNoUnsignedWrap(NUW
);
77 I
->setHasNoSignedWrap(NSW
);
79 if (isa
<PossiblyExactOperator
>(I
))
81 if (auto *PDI
= dyn_cast
<PossiblyDisjointInst
>(I
))
82 PDI
->setIsDisjoint(Disjoint
);
83 if (auto *PNI
= dyn_cast
<PossiblyNonNegInst
>(I
))
85 if (isa
<TruncInst
>(I
)) {
86 I
->setHasNoUnsignedWrap(NUW
);
87 I
->setHasNoSignedWrap(NSW
);
89 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
))
90 GEP
->setNoWrapFlags(GEPNW
);
91 if (auto *ICmp
= dyn_cast
<ICmpInst
>(I
))
92 ICmp
->setSameSign(SameSign
);
95 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
96 /// reusing an existing cast if a suitable one (= dominating IP) exists, or
97 /// creating a new one.
98 Value
*SCEVExpander::ReuseOrCreateCast(Value
*V
, Type
*Ty
,
99 Instruction::CastOps Op
,
100 BasicBlock::iterator IP
) {
101 // This function must be called with the builder having a valid insertion
102 // point. It doesn't need to be the actual IP where the uses of the returned
103 // cast will be added, but it must dominate such IP.
104 // We use this precondition to produce a cast that will dominate all its
105 // uses. In particular, this is crucial for the case where the builder's
106 // insertion point *is* the point where we were asked to put the cast.
107 // Since we don't know the builder's insertion point is actually
108 // where the uses will be added (only that it dominates it), we are
109 // not allowed to move it.
110 BasicBlock::iterator BIP
= Builder
.GetInsertPoint();
112 Value
*Ret
= nullptr;
114 // Check to see if there is already a cast!
115 for (User
*U
: V
->users()) {
116 if (U
->getType() != Ty
)
118 CastInst
*CI
= dyn_cast
<CastInst
>(U
);
119 if (!CI
|| CI
->getOpcode() != Op
)
122 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
123 // the cast must also properly dominate the Builder's insertion point.
124 if (IP
->getParent() == CI
->getParent() && &*BIP
!= CI
&&
125 (&*IP
== CI
|| CI
->comesBefore(&*IP
))) {
131 // Create a new cast.
133 SCEVInsertPointGuard
Guard(Builder
, this);
134 Builder
.SetInsertPoint(&*IP
);
135 Ret
= Builder
.CreateCast(Op
, V
, Ty
, V
->getName());
138 // We assert at the end of the function since IP might point to an
139 // instruction with different dominance properties than a cast
140 // (an invoke for example) and not dominate BIP (but the cast does).
141 assert(!isa
<Instruction
>(Ret
) ||
142 SE
.DT
.dominates(cast
<Instruction
>(Ret
), &*BIP
));
148 SCEVExpander::findInsertPointAfter(Instruction
*I
,
149 Instruction
*MustDominate
) const {
150 BasicBlock::iterator IP
= ++I
->getIterator();
151 if (auto *II
= dyn_cast
<InvokeInst
>(I
))
152 IP
= II
->getNormalDest()->begin();
154 while (isa
<PHINode
>(IP
))
157 if (isa
<FuncletPadInst
>(IP
) || isa
<LandingPadInst
>(IP
)) {
159 } else if (isa
<CatchSwitchInst
>(IP
)) {
160 IP
= MustDominate
->getParent()->getFirstInsertionPt();
162 assert(!IP
->isEHPad() && "unexpected eh pad!");
165 // Adjust insert point to be after instructions inserted by the expander, so
166 // we can re-use already inserted instructions. Avoid skipping past the
167 // original \p MustDominate, in case it is an inserted instruction.
168 while (isInsertedInstruction(&*IP
) && &*IP
!= MustDominate
)
175 SCEVExpander::GetOptimalInsertionPointForCastOf(Value
*V
) const {
176 // Cast the argument at the beginning of the entry block, after
177 // any bitcasts of other arguments.
178 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
179 BasicBlock::iterator IP
= A
->getParent()->getEntryBlock().begin();
180 while ((isa
<BitCastInst
>(IP
) &&
181 isa
<Argument
>(cast
<BitCastInst
>(IP
)->getOperand(0)) &&
182 cast
<BitCastInst
>(IP
)->getOperand(0) != A
) ||
183 isa
<DbgInfoIntrinsic
>(IP
))
188 // Cast the instruction immediately after the instruction.
189 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
190 return findInsertPointAfter(I
, &*Builder
.GetInsertPoint());
192 // Otherwise, this must be some kind of a constant,
193 // so let's plop this cast into the function's entry block.
194 assert(isa
<Constant
>(V
) &&
195 "Expected the cast argument to be a global/constant");
196 return Builder
.GetInsertBlock()
199 .getFirstInsertionPt();
202 /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
203 /// which must be possible with a noop cast, doing what we can to share
205 Value
*SCEVExpander::InsertNoopCastOfTo(Value
*V
, Type
*Ty
) {
206 Instruction::CastOps Op
= CastInst::getCastOpcode(V
, false, Ty
, false);
207 assert((Op
== Instruction::BitCast
||
208 Op
== Instruction::PtrToInt
||
209 Op
== Instruction::IntToPtr
) &&
210 "InsertNoopCastOfTo cannot perform non-noop casts!");
211 assert(SE
.getTypeSizeInBits(V
->getType()) == SE
.getTypeSizeInBits(Ty
) &&
212 "InsertNoopCastOfTo cannot change sizes!");
214 // inttoptr only works for integral pointers. For non-integral pointers, we
215 // can create a GEP on null with the integral value as index. Note that
216 // it is safe to use GEP of null instead of inttoptr here, because only
217 // expressions already based on a GEP of null should be converted to pointers
219 if (Op
== Instruction::IntToPtr
) {
220 auto *PtrTy
= cast
<PointerType
>(Ty
);
221 if (DL
.isNonIntegralPointerType(PtrTy
))
222 return Builder
.CreatePtrAdd(Constant::getNullValue(PtrTy
), V
, "scevgep");
224 // Short-circuit unnecessary bitcasts.
225 if (Op
== Instruction::BitCast
) {
226 if (V
->getType() == Ty
)
228 if (CastInst
*CI
= dyn_cast
<CastInst
>(V
)) {
229 if (CI
->getOperand(0)->getType() == Ty
)
230 return CI
->getOperand(0);
233 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
234 if ((Op
== Instruction::PtrToInt
|| Op
== Instruction::IntToPtr
) &&
235 SE
.getTypeSizeInBits(Ty
) == SE
.getTypeSizeInBits(V
->getType())) {
236 if (CastInst
*CI
= dyn_cast
<CastInst
>(V
))
237 if ((CI
->getOpcode() == Instruction::PtrToInt
||
238 CI
->getOpcode() == Instruction::IntToPtr
) &&
239 SE
.getTypeSizeInBits(CI
->getType()) ==
240 SE
.getTypeSizeInBits(CI
->getOperand(0)->getType()))
241 return CI
->getOperand(0);
242 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(V
))
243 if ((CE
->getOpcode() == Instruction::PtrToInt
||
244 CE
->getOpcode() == Instruction::IntToPtr
) &&
245 SE
.getTypeSizeInBits(CE
->getType()) ==
246 SE
.getTypeSizeInBits(CE
->getOperand(0)->getType()))
247 return CE
->getOperand(0);
250 // Fold a cast of a constant.
251 if (Constant
*C
= dyn_cast
<Constant
>(V
))
252 return ConstantExpr::getCast(Op
, C
, Ty
);
254 // Try to reuse existing cast, or insert one.
255 return ReuseOrCreateCast(V
, Ty
, Op
, GetOptimalInsertionPointForCastOf(V
));
258 /// InsertBinop - Insert the specified binary operator, doing a small amount
259 /// of work to avoid inserting an obviously redundant operation, and hoisting
260 /// to an outer loop when the opportunity is there and it is safe.
261 Value
*SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode
,
262 Value
*LHS
, Value
*RHS
,
263 SCEV::NoWrapFlags Flags
, bool IsSafeToHoist
) {
264 // Fold a binop with constant operands.
265 if (Constant
*CLHS
= dyn_cast
<Constant
>(LHS
))
266 if (Constant
*CRHS
= dyn_cast
<Constant
>(RHS
))
267 if (Constant
*Res
= ConstantFoldBinaryOpOperands(Opcode
, CLHS
, CRHS
, DL
))
270 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
271 unsigned ScanLimit
= 6;
272 BasicBlock::iterator BlockBegin
= Builder
.GetInsertBlock()->begin();
273 // Scanning starts from the last instruction before the insertion point.
274 BasicBlock::iterator IP
= Builder
.GetInsertPoint();
275 if (IP
!= BlockBegin
) {
277 for (; ScanLimit
; --IP
, --ScanLimit
) {
278 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
280 if (isa
<DbgInfoIntrinsic
>(IP
))
283 auto canGenerateIncompatiblePoison
= [&Flags
](Instruction
*I
) {
284 // Ensure that no-wrap flags match.
285 if (isa
<OverflowingBinaryOperator
>(I
)) {
286 if (I
->hasNoSignedWrap() != (Flags
& SCEV::FlagNSW
))
288 if (I
->hasNoUnsignedWrap() != (Flags
& SCEV::FlagNUW
))
291 // Conservatively, do not use any instruction which has any of exact
293 if (isa
<PossiblyExactOperator
>(I
) && I
->isExact())
297 if (IP
->getOpcode() == (unsigned)Opcode
&& IP
->getOperand(0) == LHS
&&
298 IP
->getOperand(1) == RHS
&& !canGenerateIncompatiblePoison(&*IP
))
300 if (IP
== BlockBegin
) break;
304 // Save the original insertion point so we can restore it when we're done.
305 DebugLoc Loc
= Builder
.GetInsertPoint()->getDebugLoc();
306 SCEVInsertPointGuard
Guard(Builder
, this);
309 // Move the insertion point out of as many loops as we can.
310 while (const Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock())) {
311 if (!L
->isLoopInvariant(LHS
) || !L
->isLoopInvariant(RHS
)) break;
312 BasicBlock
*Preheader
= L
->getLoopPreheader();
313 if (!Preheader
) break;
315 // Ok, move up a level.
316 Builder
.SetInsertPoint(Preheader
->getTerminator());
320 // If we haven't found this binop, insert it.
321 // TODO: Use the Builder, which will make CreateBinOp below fold with
322 // InstSimplifyFolder.
323 Instruction
*BO
= Builder
.Insert(BinaryOperator::Create(Opcode
, LHS
, RHS
));
324 BO
->setDebugLoc(Loc
);
325 if (Flags
& SCEV::FlagNUW
)
326 BO
->setHasNoUnsignedWrap();
327 if (Flags
& SCEV::FlagNSW
)
328 BO
->setHasNoSignedWrap();
333 /// expandAddToGEP - Expand an addition expression with a pointer type into
334 /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
335 /// BasicAliasAnalysis and other passes analyze the result. See the rules
336 /// for getelementptr vs. inttoptr in
337 /// http://llvm.org/docs/LangRef.html#pointeraliasing
340 /// Design note: The correctness of using getelementptr here depends on
341 /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
342 /// they may introduce pointer arithmetic which may not be safely converted
343 /// into getelementptr.
345 /// Design note: It might seem desirable for this function to be more
346 /// loop-aware. If some of the indices are loop-invariant while others
347 /// aren't, it might seem desirable to emit multiple GEPs, keeping the
348 /// loop-invariant portions of the overall computation outside the loop.
349 /// However, there are a few reasons this is not done here. Hoisting simple
350 /// arithmetic is a low-level optimization that often isn't very
351 /// important until late in the optimization process. In fact, passes
352 /// like InstructionCombining will combine GEPs, even if it means
353 /// pushing loop-invariant computation down into loops, so even if the
354 /// GEPs were split here, the work would quickly be undone. The
355 /// LoopStrengthReduction pass, which is usually run quite late (and
356 /// after the last InstructionCombining pass), takes care of hoisting
357 /// loop-invariant portions of expressions, after considering what
358 /// can be folded using target addressing modes.
360 Value
*SCEVExpander::expandAddToGEP(const SCEV
*Offset
, Value
*V
,
361 SCEV::NoWrapFlags Flags
) {
362 assert(!isa
<Instruction
>(V
) ||
363 SE
.DT
.dominates(cast
<Instruction
>(V
), &*Builder
.GetInsertPoint()));
365 Value
*Idx
= expand(Offset
);
366 GEPNoWrapFlags NW
= (Flags
& SCEV::FlagNUW
) ? GEPNoWrapFlags::noUnsignedWrap()
367 : GEPNoWrapFlags::none();
369 // Fold a GEP with constant operands.
370 if (Constant
*CLHS
= dyn_cast
<Constant
>(V
))
371 if (Constant
*CRHS
= dyn_cast
<Constant
>(Idx
))
372 return Builder
.CreatePtrAdd(CLHS
, CRHS
, "", NW
);
374 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
375 unsigned ScanLimit
= 6;
376 BasicBlock::iterator BlockBegin
= Builder
.GetInsertBlock()->begin();
377 // Scanning starts from the last instruction before the insertion point.
378 BasicBlock::iterator IP
= Builder
.GetInsertPoint();
379 if (IP
!= BlockBegin
) {
381 for (; ScanLimit
; --IP
, --ScanLimit
) {
382 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
384 if (isa
<DbgInfoIntrinsic
>(IP
))
386 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(IP
)) {
387 if (GEP
->getPointerOperand() == V
&&
388 GEP
->getSourceElementType() == Builder
.getInt8Ty() &&
389 GEP
->getOperand(1) == Idx
) {
391 GEP
->setNoWrapFlags(GEP
->getNoWrapFlags() & NW
);
395 if (IP
== BlockBegin
) break;
399 // Save the original insertion point so we can restore it when we're done.
400 SCEVInsertPointGuard
Guard(Builder
, this);
402 // Move the insertion point out of as many loops as we can.
403 while (const Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock())) {
404 if (!L
->isLoopInvariant(V
) || !L
->isLoopInvariant(Idx
)) break;
405 BasicBlock
*Preheader
= L
->getLoopPreheader();
406 if (!Preheader
) break;
408 // Ok, move up a level.
409 Builder
.SetInsertPoint(Preheader
->getTerminator());
413 return Builder
.CreatePtrAdd(V
, Idx
, "scevgep", NW
);
416 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
417 /// SCEV expansion. If they are nested, this is the most nested. If they are
418 /// neighboring, pick the later.
419 static const Loop
*PickMostRelevantLoop(const Loop
*A
, const Loop
*B
,
423 if (A
->contains(B
)) return B
;
424 if (B
->contains(A
)) return A
;
425 if (DT
.dominates(A
->getHeader(), B
->getHeader())) return B
;
426 if (DT
.dominates(B
->getHeader(), A
->getHeader())) return A
;
427 return A
; // Arbitrarily break the tie.
430 /// getRelevantLoop - Get the most relevant loop associated with the given
431 /// expression, according to PickMostRelevantLoop.
432 const Loop
*SCEVExpander::getRelevantLoop(const SCEV
*S
) {
433 // Test whether we've already computed the most relevant loop for this SCEV.
434 auto Pair
= RelevantLoops
.insert(std::make_pair(S
, nullptr));
436 return Pair
.first
->second
;
438 switch (S
->getSCEVType()) {
441 return nullptr; // A constant has no relevant loops.
454 case scSequentialUMinExpr
: {
455 const Loop
*L
= nullptr;
456 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
))
458 for (const SCEV
*Op
: S
->operands())
459 L
= PickMostRelevantLoop(L
, getRelevantLoop(Op
), SE
.DT
);
460 return RelevantLoops
[S
] = L
;
463 const SCEVUnknown
*U
= cast
<SCEVUnknown
>(S
);
464 if (const Instruction
*I
= dyn_cast
<Instruction
>(U
->getValue()))
465 return Pair
.first
->second
= SE
.LI
.getLoopFor(I
->getParent());
466 // A non-instruction has no relevant loops.
469 case scCouldNotCompute
:
470 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
472 llvm_unreachable("Unexpected SCEV type!");
477 /// LoopCompare - Compare loops by PickMostRelevantLoop.
481 explicit LoopCompare(DominatorTree
&dt
) : DT(dt
) {}
483 bool operator()(std::pair
<const Loop
*, const SCEV
*> LHS
,
484 std::pair
<const Loop
*, const SCEV
*> RHS
) const {
485 // Keep pointer operands sorted at the end.
486 if (LHS
.second
->getType()->isPointerTy() !=
487 RHS
.second
->getType()->isPointerTy())
488 return LHS
.second
->getType()->isPointerTy();
490 // Compare loops with PickMostRelevantLoop.
491 if (LHS
.first
!= RHS
.first
)
492 return PickMostRelevantLoop(LHS
.first
, RHS
.first
, DT
) != LHS
.first
;
494 // If one operand is a non-constant negative and the other is not,
495 // put the non-constant negative on the right so that a sub can
496 // be used instead of a negate and add.
497 if (LHS
.second
->isNonConstantNegative()) {
498 if (!RHS
.second
->isNonConstantNegative())
500 } else if (RHS
.second
->isNonConstantNegative())
503 // Otherwise they are equivalent according to this comparison.
510 Value
*SCEVExpander::visitAddExpr(const SCEVAddExpr
*S
) {
511 // Recognize the canonical representation of an unsimplifed urem.
512 const SCEV
*URemLHS
= nullptr;
513 const SCEV
*URemRHS
= nullptr;
514 if (SE
.matchURem(S
, URemLHS
, URemRHS
)) {
515 Value
*LHS
= expand(URemLHS
);
516 Value
*RHS
= expand(URemRHS
);
517 return InsertBinop(Instruction::URem
, LHS
, RHS
, SCEV::FlagAnyWrap
,
518 /*IsSafeToHoist*/ false);
521 // Collect all the add operands in a loop, along with their associated loops.
522 // Iterate in reverse so that constants are emitted last, all else equal, and
523 // so that pointer operands are inserted first, which the code below relies on
524 // to form more involved GEPs.
525 SmallVector
<std::pair
<const Loop
*, const SCEV
*>, 8> OpsAndLoops
;
526 for (const SCEV
*Op
: reverse(S
->operands()))
527 OpsAndLoops
.push_back(std::make_pair(getRelevantLoop(Op
), Op
));
529 // Sort by loop. Use a stable sort so that constants follow non-constants and
530 // pointer operands precede non-pointer operands.
531 llvm::stable_sort(OpsAndLoops
, LoopCompare(SE
.DT
));
533 // Emit instructions to add all the operands. Hoist as much as possible
534 // out of loops, and form meaningful getelementptrs where possible.
535 Value
*Sum
= nullptr;
536 for (auto I
= OpsAndLoops
.begin(), E
= OpsAndLoops
.end(); I
!= E
;) {
537 const Loop
*CurLoop
= I
->first
;
538 const SCEV
*Op
= I
->second
;
540 // This is the first operand. Just expand it.
546 assert(!Op
->getType()->isPointerTy() && "Only first op can be pointer");
547 if (isa
<PointerType
>(Sum
->getType())) {
548 // The running sum expression is a pointer. Try to form a getelementptr
549 // at this level with that as the base.
550 SmallVector
<const SCEV
*, 4> NewOps
;
551 for (; I
!= E
&& I
->first
== CurLoop
; ++I
) {
552 // If the operand is SCEVUnknown and not instructions, peek through
553 // it, to enable more of it to be folded into the GEP.
554 const SCEV
*X
= I
->second
;
555 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(X
))
556 if (!isa
<Instruction
>(U
->getValue()))
557 X
= SE
.getSCEV(U
->getValue());
560 Sum
= expandAddToGEP(SE
.getAddExpr(NewOps
), Sum
, S
->getNoWrapFlags());
561 } else if (Op
->isNonConstantNegative()) {
562 // Instead of doing a negate and add, just do a subtract.
563 Value
*W
= expand(SE
.getNegativeSCEV(Op
));
564 Sum
= InsertBinop(Instruction::Sub
, Sum
, W
, SCEV::FlagAnyWrap
,
565 /*IsSafeToHoist*/ true);
569 Value
*W
= expand(Op
);
570 // Canonicalize a constant to the RHS.
571 if (isa
<Constant
>(Sum
))
573 Sum
= InsertBinop(Instruction::Add
, Sum
, W
, S
->getNoWrapFlags(),
574 /*IsSafeToHoist*/ true);
582 Value
*SCEVExpander::visitMulExpr(const SCEVMulExpr
*S
) {
583 Type
*Ty
= S
->getType();
585 // Collect all the mul operands in a loop, along with their associated loops.
586 // Iterate in reverse so that constants are emitted last, all else equal.
587 SmallVector
<std::pair
<const Loop
*, const SCEV
*>, 8> OpsAndLoops
;
588 for (const SCEV
*Op
: reverse(S
->operands()))
589 OpsAndLoops
.push_back(std::make_pair(getRelevantLoop(Op
), Op
));
591 // Sort by loop. Use a stable sort so that constants follow non-constants.
592 llvm::stable_sort(OpsAndLoops
, LoopCompare(SE
.DT
));
594 // Emit instructions to mul all the operands. Hoist as much as possible
596 Value
*Prod
= nullptr;
597 auto I
= OpsAndLoops
.begin();
599 // Expand the calculation of X pow N in the following manner:
600 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
601 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
602 const auto ExpandOpBinPowN
= [this, &I
, &OpsAndLoops
]() {
604 // Calculate how many times the same operand from the same loop is included
606 uint64_t Exponent
= 0;
607 const uint64_t MaxExponent
= UINT64_MAX
>> 1;
608 // No one sane will ever try to calculate such huge exponents, but if we
609 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
610 // below when the power of 2 exceeds our Exponent, and we want it to be
611 // 1u << 31 at most to not deal with unsigned overflow.
612 while (E
!= OpsAndLoops
.end() && *I
== *E
&& Exponent
!= MaxExponent
) {
616 assert(Exponent
> 0 && "Trying to calculate a zeroth exponent of operand?");
618 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
619 // that are needed into the result.
620 Value
*P
= expand(I
->second
);
621 Value
*Result
= nullptr;
624 for (uint64_t BinExp
= 2; BinExp
<= Exponent
; BinExp
<<= 1) {
625 P
= InsertBinop(Instruction::Mul
, P
, P
, SCEV::FlagAnyWrap
,
626 /*IsSafeToHoist*/ true);
627 if (Exponent
& BinExp
)
628 Result
= Result
? InsertBinop(Instruction::Mul
, Result
, P
,
630 /*IsSafeToHoist*/ true)
635 assert(Result
&& "Nothing was expanded?");
639 while (I
!= OpsAndLoops
.end()) {
641 // This is the first operand. Just expand it.
642 Prod
= ExpandOpBinPowN();
643 } else if (I
->second
->isAllOnesValue()) {
644 // Instead of doing a multiply by negative one, just do a negate.
645 Prod
= InsertBinop(Instruction::Sub
, Constant::getNullValue(Ty
), Prod
,
646 SCEV::FlagAnyWrap
, /*IsSafeToHoist*/ true);
650 Value
*W
= ExpandOpBinPowN();
651 // Canonicalize a constant to the RHS.
652 if (isa
<Constant
>(Prod
)) std::swap(Prod
, W
);
654 if (match(W
, m_Power2(RHS
))) {
655 // Canonicalize Prod*(1<<C) to Prod<<C.
656 assert(!Ty
->isVectorTy() && "vector types are not SCEVable");
657 auto NWFlags
= S
->getNoWrapFlags();
658 // clear nsw flag if shl will produce poison value.
659 if (RHS
->logBase2() == RHS
->getBitWidth() - 1)
660 NWFlags
= ScalarEvolution::clearFlags(NWFlags
, SCEV::FlagNSW
);
661 Prod
= InsertBinop(Instruction::Shl
, Prod
,
662 ConstantInt::get(Ty
, RHS
->logBase2()), NWFlags
,
663 /*IsSafeToHoist*/ true);
665 Prod
= InsertBinop(Instruction::Mul
, Prod
, W
, S
->getNoWrapFlags(),
666 /*IsSafeToHoist*/ true);
674 Value
*SCEVExpander::visitUDivExpr(const SCEVUDivExpr
*S
) {
675 Value
*LHS
= expand(S
->getLHS());
676 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(S
->getRHS())) {
677 const APInt
&RHS
= SC
->getAPInt();
678 if (RHS
.isPowerOf2())
679 return InsertBinop(Instruction::LShr
, LHS
,
680 ConstantInt::get(SC
->getType(), RHS
.logBase2()),
681 SCEV::FlagAnyWrap
, /*IsSafeToHoist*/ true);
684 const SCEV
*RHSExpr
= S
->getRHS();
685 Value
*RHS
= expand(RHSExpr
);
687 bool GuaranteedNotPoison
=
688 ScalarEvolution::isGuaranteedNotToBePoison(RHSExpr
);
689 if (!GuaranteedNotPoison
)
690 RHS
= Builder
.CreateFreeze(RHS
);
692 // We need an umax if either RHSExpr is not known to be zero, or if it is
693 // not guaranteed to be non-poison. In the later case, the frozen poison may
695 if (!SE
.isKnownNonZero(RHSExpr
) || !GuaranteedNotPoison
)
696 RHS
= Builder
.CreateIntrinsic(RHS
->getType(), Intrinsic::umax
,
697 {RHS
, ConstantInt::get(RHS
->getType(), 1)});
699 return InsertBinop(Instruction::UDiv
, LHS
, RHS
, SCEV::FlagAnyWrap
,
700 /*IsSafeToHoist*/ SE
.isKnownNonZero(S
->getRHS()));
703 /// Determine if this is a well-behaved chain of instructions leading back to
704 /// the PHI. If so, it may be reused by expanded expressions.
705 bool SCEVExpander::isNormalAddRecExprPHI(PHINode
*PN
, Instruction
*IncV
,
707 if (IncV
->getNumOperands() == 0 || isa
<PHINode
>(IncV
) ||
708 (isa
<CastInst
>(IncV
) && !isa
<BitCastInst
>(IncV
)))
710 // If any of the operands don't dominate the insert position, bail.
711 // Addrec operands are always loop-invariant, so this can only happen
712 // if there are instructions which haven't been hoisted.
713 if (L
== IVIncInsertLoop
) {
714 for (Use
&Op
: llvm::drop_begin(IncV
->operands()))
715 if (Instruction
*OInst
= dyn_cast
<Instruction
>(Op
))
716 if (!SE
.DT
.dominates(OInst
, IVIncInsertPos
))
719 // Advance to the next instruction.
720 IncV
= dyn_cast
<Instruction
>(IncV
->getOperand(0));
724 if (IncV
->mayHaveSideEffects())
730 return isNormalAddRecExprPHI(PN
, IncV
, L
);
733 /// getIVIncOperand returns an induction variable increment's induction
734 /// variable operand.
736 /// If allowScale is set, any type of GEP is allowed as long as the nonIV
737 /// operands dominate InsertPos.
739 /// If allowScale is not set, ensure that a GEP increment conforms to one of the
740 /// simple patterns generated by getAddRecExprPHILiterally and
741 /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
742 Instruction
*SCEVExpander::getIVIncOperand(Instruction
*IncV
,
743 Instruction
*InsertPos
,
745 if (IncV
== InsertPos
)
748 switch (IncV
->getOpcode()) {
751 // Check for a simple Add/Sub or GEP of a loop invariant step.
752 case Instruction::Add
:
753 case Instruction::Sub
: {
754 Instruction
*OInst
= dyn_cast
<Instruction
>(IncV
->getOperand(1));
755 if (!OInst
|| SE
.DT
.dominates(OInst
, InsertPos
))
756 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
759 case Instruction::BitCast
:
760 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
761 case Instruction::GetElementPtr
:
762 for (Use
&U
: llvm::drop_begin(IncV
->operands())) {
763 if (isa
<Constant
>(U
))
765 if (Instruction
*OInst
= dyn_cast
<Instruction
>(U
)) {
766 if (!SE
.DT
.dominates(OInst
, InsertPos
))
770 // allow any kind of GEP as long as it can be hoisted.
773 // GEPs produced by SCEVExpander use i8 element type.
774 if (!cast
<GEPOperator
>(IncV
)->getSourceElementType()->isIntegerTy(8))
778 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
782 /// If the insert point of the current builder or any of the builders on the
783 /// stack of saved builders has 'I' as its insert point, update it to point to
784 /// the instruction after 'I'. This is intended to be used when the instruction
785 /// 'I' is being moved. If this fixup is not done and 'I' is moved to a
786 /// different block, the inconsistent insert point (with a mismatched
787 /// Instruction and Block) can lead to an instruction being inserted in a block
788 /// other than its parent.
789 void SCEVExpander::fixupInsertPoints(Instruction
*I
) {
790 BasicBlock::iterator
It(*I
);
791 BasicBlock::iterator NewInsertPt
= std::next(It
);
792 if (Builder
.GetInsertPoint() == It
)
793 Builder
.SetInsertPoint(&*NewInsertPt
);
794 for (auto *InsertPtGuard
: InsertPointGuards
)
795 if (InsertPtGuard
->GetInsertPoint() == It
)
796 InsertPtGuard
->SetInsertPoint(NewInsertPt
);
799 /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
800 /// it available to other uses in this loop. Recursively hoist any operands,
801 /// until we reach a value that dominates InsertPos.
802 bool SCEVExpander::hoistIVInc(Instruction
*IncV
, Instruction
*InsertPos
,
803 bool RecomputePoisonFlags
) {
804 auto FixupPoisonFlags
= [this](Instruction
*I
) {
805 // Drop flags that are potentially inferred from old context and infer flags
808 I
->dropPoisonGeneratingFlags();
809 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(I
))
810 if (auto Flags
= SE
.getStrengthenedNoWrapFlagsFromBinOp(OBO
)) {
811 auto *BO
= cast
<BinaryOperator
>(I
);
812 BO
->setHasNoUnsignedWrap(
813 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) == SCEV::FlagNUW
);
814 BO
->setHasNoSignedWrap(
815 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) == SCEV::FlagNSW
);
819 if (SE
.DT
.dominates(IncV
, InsertPos
)) {
820 if (RecomputePoisonFlags
)
821 FixupPoisonFlags(IncV
);
825 // InsertPos must itself dominate IncV so that IncV's new position satisfies
826 // its existing users.
827 if (isa
<PHINode
>(InsertPos
) ||
828 !SE
.DT
.dominates(InsertPos
->getParent(), IncV
->getParent()))
831 if (!SE
.LI
.movementPreservesLCSSAForm(IncV
, InsertPos
))
834 // Check that the chain of IV operands leading back to Phi can be hoisted.
835 SmallVector
<Instruction
*, 4> IVIncs
;
837 Instruction
*Oper
= getIVIncOperand(IncV
, InsertPos
, /*allowScale*/true);
840 // IncV is safe to hoist.
841 IVIncs
.push_back(IncV
);
843 if (SE
.DT
.dominates(IncV
, InsertPos
))
846 for (Instruction
*I
: llvm::reverse(IVIncs
)) {
847 fixupInsertPoints(I
);
848 I
->moveBefore(InsertPos
);
849 if (RecomputePoisonFlags
)
855 bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode
*OrigPhi
,
857 Instruction
*OrigInc
,
858 Instruction
*WideInc
) {
859 return match(OrigInc
, m_c_BinOp(m_Specific(OrigPhi
), m_Value())) &&
860 match(WideInc
, m_c_BinOp(m_Specific(WidePhi
), m_Value())) &&
861 OrigInc
->getOpcode() == WideInc
->getOpcode();
864 /// Determine if this cyclic phi is in a form that would have been generated by
865 /// LSR. We don't care if the phi was actually expanded in this pass, as long
866 /// as it is in a low-cost form, for example, no implied multiplication. This
867 /// should match any patterns generated by getAddRecExprPHILiterally and
869 bool SCEVExpander::isExpandedAddRecExprPHI(PHINode
*PN
, Instruction
*IncV
,
871 for(Instruction
*IVOper
= IncV
;
872 (IVOper
= getIVIncOperand(IVOper
, L
->getLoopPreheader()->getTerminator(),
873 /*allowScale=*/false));) {
880 /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
881 /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
882 /// need to materialize IV increments elsewhere to handle difficult situations.
883 Value
*SCEVExpander::expandIVInc(PHINode
*PN
, Value
*StepV
, const Loop
*L
,
886 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
887 if (PN
->getType()->isPointerTy()) {
888 // TODO: Change name to IVName.iv.next.
889 IncV
= Builder
.CreatePtrAdd(PN
, StepV
, "scevgep");
892 Builder
.CreateSub(PN
, StepV
, Twine(IVName
) + ".iv.next") :
893 Builder
.CreateAdd(PN
, StepV
, Twine(IVName
) + ".iv.next");
898 /// Check whether we can cheaply express the requested SCEV in terms of
899 /// the available PHI SCEV by truncation and/or inversion of the step.
900 static bool canBeCheaplyTransformed(ScalarEvolution
&SE
,
901 const SCEVAddRecExpr
*Phi
,
902 const SCEVAddRecExpr
*Requested
,
904 // We can't transform to match a pointer PHI.
905 Type
*PhiTy
= Phi
->getType();
906 Type
*RequestedTy
= Requested
->getType();
907 if (PhiTy
->isPointerTy() || RequestedTy
->isPointerTy())
910 if (RequestedTy
->getIntegerBitWidth() > PhiTy
->getIntegerBitWidth())
913 // Try truncate it if necessary.
914 Phi
= dyn_cast
<SCEVAddRecExpr
>(SE
.getTruncateOrNoop(Phi
, RequestedTy
));
918 // Check whether truncation will help.
919 if (Phi
== Requested
) {
924 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
925 if (SE
.getMinusSCEV(Requested
->getStart(), Requested
) == Phi
) {
933 static bool IsIncrementNSW(ScalarEvolution
&SE
, const SCEVAddRecExpr
*AR
) {
934 if (!isa
<IntegerType
>(AR
->getType()))
937 unsigned BitWidth
= cast
<IntegerType
>(AR
->getType())->getBitWidth();
938 Type
*WideTy
= IntegerType::get(AR
->getType()->getContext(), BitWidth
* 2);
939 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
940 const SCEV
*OpAfterExtend
= SE
.getAddExpr(SE
.getSignExtendExpr(Step
, WideTy
),
941 SE
.getSignExtendExpr(AR
, WideTy
));
942 const SCEV
*ExtendAfterOp
=
943 SE
.getSignExtendExpr(SE
.getAddExpr(AR
, Step
), WideTy
);
944 return ExtendAfterOp
== OpAfterExtend
;
947 static bool IsIncrementNUW(ScalarEvolution
&SE
, const SCEVAddRecExpr
*AR
) {
948 if (!isa
<IntegerType
>(AR
->getType()))
951 unsigned BitWidth
= cast
<IntegerType
>(AR
->getType())->getBitWidth();
952 Type
*WideTy
= IntegerType::get(AR
->getType()->getContext(), BitWidth
* 2);
953 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
954 const SCEV
*OpAfterExtend
= SE
.getAddExpr(SE
.getZeroExtendExpr(Step
, WideTy
),
955 SE
.getZeroExtendExpr(AR
, WideTy
));
956 const SCEV
*ExtendAfterOp
=
957 SE
.getZeroExtendExpr(SE
.getAddExpr(AR
, Step
), WideTy
);
958 return ExtendAfterOp
== OpAfterExtend
;
961 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
962 /// the base addrec, which is the addrec without any non-loop-dominating
963 /// values, and return the PHI.
965 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr
*Normalized
,
966 const Loop
*L
, Type
*&TruncTy
,
968 assert((!IVIncInsertLoop
|| IVIncInsertPos
) &&
969 "Uninitialized insert position");
971 // Reuse a previously-inserted PHI, if present.
972 BasicBlock
*LatchBlock
= L
->getLoopLatch();
974 PHINode
*AddRecPhiMatch
= nullptr;
975 Instruction
*IncV
= nullptr;
979 // Only try partially matching scevs that need truncation and/or
980 // step-inversion if we know this loop is outside the current loop.
981 bool TryNonMatchingSCEV
=
983 SE
.DT
.properlyDominates(LatchBlock
, IVIncInsertLoop
->getHeader());
985 for (PHINode
&PN
: L
->getHeader()->phis()) {
986 if (!SE
.isSCEVable(PN
.getType()))
989 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
990 // PHI has no meaning at all.
991 if (!PN
.isComplete()) {
992 SCEV_DEBUG_WITH_TYPE(
993 DebugType
, dbgs() << "One incomplete PHI is found: " << PN
<< "\n");
997 const SCEVAddRecExpr
*PhiSCEV
= dyn_cast
<SCEVAddRecExpr
>(SE
.getSCEV(&PN
));
1001 bool IsMatchingSCEV
= PhiSCEV
== Normalized
;
1002 // We only handle truncation and inversion of phi recurrences for the
1003 // expanded expression if the expanded expression's loop dominates the
1004 // loop we insert to. Check now, so we can bail out early.
1005 if (!IsMatchingSCEV
&& !TryNonMatchingSCEV
)
1008 // TODO: this possibly can be reworked to avoid this cast at all.
1009 Instruction
*TempIncV
=
1010 dyn_cast
<Instruction
>(PN
.getIncomingValueForBlock(LatchBlock
));
1014 // Check whether we can reuse this PHI node.
1016 if (!isExpandedAddRecExprPHI(&PN
, TempIncV
, L
))
1019 if (!isNormalAddRecExprPHI(&PN
, TempIncV
, L
))
1023 // Stop if we have found an exact match SCEV.
1024 if (IsMatchingSCEV
) {
1028 AddRecPhiMatch
= &PN
;
1032 // Try whether the phi can be translated into the requested form
1033 // (truncated and/or offset by a constant).
1034 if ((!TruncTy
|| InvertStep
) &&
1035 canBeCheaplyTransformed(SE
, PhiSCEV
, Normalized
, InvertStep
)) {
1036 // Record the phi node. But don't stop we might find an exact match
1038 AddRecPhiMatch
= &PN
;
1040 TruncTy
= Normalized
->getType();
1044 if (AddRecPhiMatch
) {
1045 // Ok, the add recurrence looks usable.
1046 // Remember this PHI, even in post-inc mode.
1047 InsertedValues
.insert(AddRecPhiMatch
);
1048 // Remember the increment.
1049 rememberInstruction(IncV
);
1050 // Those values were not actually inserted but re-used.
1051 ReusedValues
.insert(AddRecPhiMatch
);
1052 ReusedValues
.insert(IncV
);
1053 return AddRecPhiMatch
;
1057 // Save the original insertion point so we can restore it when we're done.
1058 SCEVInsertPointGuard
Guard(Builder
, this);
1060 // Another AddRec may need to be recursively expanded below. For example, if
1061 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1062 // loop. Remove this loop from the PostIncLoops set before expanding such
1063 // AddRecs. Otherwise, we cannot find a valid position for the step
1064 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1065 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1066 // so it's not worth implementing SmallPtrSet::swap.
1067 PostIncLoopSet SavedPostIncLoops
= PostIncLoops
;
1068 PostIncLoops
.clear();
1070 // Expand code for the start value into the loop preheader.
1071 assert(L
->getLoopPreheader() &&
1072 "Can't expand add recurrences without a loop preheader!");
1074 expand(Normalized
->getStart(), L
->getLoopPreheader()->getTerminator());
1076 // StartV must have been be inserted into L's preheader to dominate the new
1078 assert(!isa
<Instruction
>(StartV
) ||
1079 SE
.DT
.properlyDominates(cast
<Instruction
>(StartV
)->getParent(),
1082 // Expand code for the step value. Do this before creating the PHI so that PHI
1083 // reuse code doesn't see an incomplete PHI.
1084 const SCEV
*Step
= Normalized
->getStepRecurrence(SE
);
1085 Type
*ExpandTy
= Normalized
->getType();
1086 // If the stride is negative, insert a sub instead of an add for the increment
1087 // (unless it's a constant, because subtracts of constants are canonicalized
1089 bool useSubtract
= !ExpandTy
->isPointerTy() && Step
->isNonConstantNegative();
1091 Step
= SE
.getNegativeSCEV(Step
);
1092 // Expand the step somewhere that dominates the loop header.
1093 Value
*StepV
= expand(Step
, L
->getHeader()->getFirstInsertionPt());
1095 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1096 // we actually do emit an addition. It does not apply if we emit a
1098 bool IncrementIsNUW
= !useSubtract
&& IsIncrementNUW(SE
, Normalized
);
1099 bool IncrementIsNSW
= !useSubtract
&& IsIncrementNSW(SE
, Normalized
);
1102 BasicBlock
*Header
= L
->getHeader();
1103 Builder
.SetInsertPoint(Header
, Header
->begin());
1105 Builder
.CreatePHI(ExpandTy
, pred_size(Header
), Twine(IVName
) + ".iv");
1107 // Create the step instructions and populate the PHI.
1108 for (BasicBlock
*Pred
: predecessors(Header
)) {
1109 // Add a start value.
1110 if (!L
->contains(Pred
)) {
1111 PN
->addIncoming(StartV
, Pred
);
1115 // Create a step value and add it to the PHI.
1116 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1117 // instructions at IVIncInsertPos.
1118 Instruction
*InsertPos
= L
== IVIncInsertLoop
?
1119 IVIncInsertPos
: Pred
->getTerminator();
1120 Builder
.SetInsertPoint(InsertPos
);
1121 Value
*IncV
= expandIVInc(PN
, StepV
, L
, useSubtract
);
1123 if (isa
<OverflowingBinaryOperator
>(IncV
)) {
1125 cast
<BinaryOperator
>(IncV
)->setHasNoUnsignedWrap();
1127 cast
<BinaryOperator
>(IncV
)->setHasNoSignedWrap();
1129 PN
->addIncoming(IncV
, Pred
);
1132 // After expanding subexpressions, restore the PostIncLoops set so the caller
1133 // can ensure that IVIncrement dominates the current uses.
1134 PostIncLoops
= SavedPostIncLoops
;
1136 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1137 // effective when we are able to use an IV inserted here, so record it.
1138 InsertedValues
.insert(PN
);
1139 InsertedIVs
.push_back(PN
);
1143 Value
*SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr
*S
) {
1144 const Loop
*L
= S
->getLoop();
1146 // Determine a normalized form of this expression, which is the expression
1147 // before any post-inc adjustment is made.
1148 const SCEVAddRecExpr
*Normalized
= S
;
1149 if (PostIncLoops
.count(L
)) {
1150 PostIncLoopSet Loops
;
1152 Normalized
= cast
<SCEVAddRecExpr
>(
1153 normalizeForPostIncUse(S
, Loops
, SE
, /*CheckInvertible=*/false));
1156 [[maybe_unused
]] const SCEV
*Start
= Normalized
->getStart();
1157 const SCEV
*Step
= Normalized
->getStepRecurrence(SE
);
1158 assert(SE
.properlyDominates(Start
, L
->getHeader()) &&
1159 "Start does not properly dominate loop header");
1160 assert(SE
.dominates(Step
, L
->getHeader()) && "Step not dominate loop header");
1162 // In some cases, we decide to reuse an existing phi node but need to truncate
1163 // it and/or invert the step.
1164 Type
*TruncTy
= nullptr;
1165 bool InvertStep
= false;
1166 PHINode
*PN
= getAddRecExprPHILiterally(Normalized
, L
, TruncTy
, InvertStep
);
1168 // Accommodate post-inc mode, if necessary.
1170 if (!PostIncLoops
.count(L
))
1173 // In PostInc mode, use the post-incremented value.
1174 BasicBlock
*LatchBlock
= L
->getLoopLatch();
1175 assert(LatchBlock
&& "PostInc mode requires a unique loop latch!");
1176 Result
= PN
->getIncomingValueForBlock(LatchBlock
);
1178 // We might be introducing a new use of the post-inc IV that is not poison
1179 // safe, in which case we should drop poison generating flags. Only keep
1180 // those flags for which SCEV has proven that they always hold.
1181 if (isa
<OverflowingBinaryOperator
>(Result
)) {
1182 auto *I
= cast
<Instruction
>(Result
);
1183 if (!S
->hasNoUnsignedWrap())
1184 I
->setHasNoUnsignedWrap(false);
1185 if (!S
->hasNoSignedWrap())
1186 I
->setHasNoSignedWrap(false);
1189 // For an expansion to use the postinc form, the client must call
1190 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1191 // or dominated by IVIncInsertPos.
1192 if (isa
<Instruction
>(Result
) &&
1193 !SE
.DT
.dominates(cast
<Instruction
>(Result
),
1194 &*Builder
.GetInsertPoint())) {
1195 // The induction variable's postinc expansion does not dominate this use.
1196 // IVUsers tries to prevent this case, so it is rare. However, it can
1197 // happen when an IVUser outside the loop is not dominated by the latch
1198 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1199 // all cases. Consider a phi outside whose operand is replaced during
1200 // expansion with the value of the postinc user. Without fundamentally
1201 // changing the way postinc users are tracked, the only remedy is
1202 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1203 // but hopefully expandCodeFor handles that.
1205 !S
->getType()->isPointerTy() && Step
->isNonConstantNegative();
1207 Step
= SE
.getNegativeSCEV(Step
);
1210 // Expand the step somewhere that dominates the loop header.
1211 SCEVInsertPointGuard
Guard(Builder
, this);
1212 StepV
= expand(Step
, L
->getHeader()->getFirstInsertionPt());
1214 Result
= expandIVInc(PN
, StepV
, L
, useSubtract
);
1218 // We have decided to reuse an induction variable of a dominating loop. Apply
1219 // truncation and/or inversion of the step.
1221 // Truncate the result.
1222 if (TruncTy
!= Result
->getType())
1223 Result
= Builder
.CreateTrunc(Result
, TruncTy
);
1225 // Invert the result.
1227 Result
= Builder
.CreateSub(expand(Normalized
->getStart()), Result
);
1233 Value
*SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr
*S
) {
1234 // In canonical mode we compute the addrec as an expression of a canonical IV
1235 // using evaluateAtIteration and expand the resulting SCEV expression. This
1236 // way we avoid introducing new IVs to carry on the computation of the addrec
1237 // throughout the loop.
1239 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1240 // type wider than the addrec itself. Emitting a canonical IV of the
1241 // proper type might produce non-legal types, for example expanding an i64
1242 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1243 // back to non-canonical mode for nested addrecs.
1244 if (!CanonicalMode
|| (S
->getNumOperands() > 2))
1245 return expandAddRecExprLiterally(S
);
1247 Type
*Ty
= SE
.getEffectiveSCEVType(S
->getType());
1248 const Loop
*L
= S
->getLoop();
1250 // First check for an existing canonical IV in a suitable type.
1251 PHINode
*CanonicalIV
= nullptr;
1252 if (PHINode
*PN
= L
->getCanonicalInductionVariable())
1253 if (SE
.getTypeSizeInBits(PN
->getType()) >= SE
.getTypeSizeInBits(Ty
))
1256 // Rewrite an AddRec in terms of the canonical induction variable, if
1257 // its type is more narrow.
1259 SE
.getTypeSizeInBits(CanonicalIV
->getType()) > SE
.getTypeSizeInBits(Ty
) &&
1260 !S
->getType()->isPointerTy()) {
1261 SmallVector
<const SCEV
*, 4> NewOps(S
->getNumOperands());
1262 for (unsigned i
= 0, e
= S
->getNumOperands(); i
!= e
; ++i
)
1263 NewOps
[i
] = SE
.getAnyExtendExpr(S
->getOperand(i
), CanonicalIV
->getType());
1264 Value
*V
= expand(SE
.getAddRecExpr(NewOps
, S
->getLoop(),
1265 S
->getNoWrapFlags(SCEV::FlagNW
)));
1266 BasicBlock::iterator NewInsertPt
=
1267 findInsertPointAfter(cast
<Instruction
>(V
), &*Builder
.GetInsertPoint());
1268 V
= expand(SE
.getTruncateExpr(SE
.getUnknown(V
), Ty
), NewInsertPt
);
1272 // {X,+,F} --> X + {0,+,F}
1273 if (!S
->getStart()->isZero()) {
1274 if (isa
<PointerType
>(S
->getType())) {
1275 Value
*StartV
= expand(SE
.getPointerBase(S
));
1276 return expandAddToGEP(SE
.removePointerBase(S
), StartV
,
1277 S
->getNoWrapFlags(SCEV::FlagNUW
));
1280 SmallVector
<const SCEV
*, 4> NewOps(S
->operands());
1281 NewOps
[0] = SE
.getConstant(Ty
, 0);
1282 const SCEV
*Rest
= SE
.getAddRecExpr(NewOps
, L
,
1283 S
->getNoWrapFlags(SCEV::FlagNW
));
1285 // Just do a normal add. Pre-expand the operands to suppress folding.
1287 // The LHS and RHS values are factored out of the expand call to make the
1288 // output independent of the argument evaluation order.
1289 const SCEV
*AddExprLHS
= SE
.getUnknown(expand(S
->getStart()));
1290 const SCEV
*AddExprRHS
= SE
.getUnknown(expand(Rest
));
1291 return expand(SE
.getAddExpr(AddExprLHS
, AddExprRHS
));
1294 // If we don't yet have a canonical IV, create one.
1296 // Create and insert the PHI node for the induction variable in the
1298 BasicBlock
*Header
= L
->getHeader();
1299 pred_iterator HPB
= pred_begin(Header
), HPE
= pred_end(Header
);
1300 CanonicalIV
= PHINode::Create(Ty
, std::distance(HPB
, HPE
), "indvar");
1301 CanonicalIV
->insertBefore(Header
->begin());
1302 rememberInstruction(CanonicalIV
);
1304 SmallSet
<BasicBlock
*, 4> PredSeen
;
1305 Constant
*One
= ConstantInt::get(Ty
, 1);
1306 for (pred_iterator HPI
= HPB
; HPI
!= HPE
; ++HPI
) {
1307 BasicBlock
*HP
= *HPI
;
1308 if (!PredSeen
.insert(HP
).second
) {
1309 // There must be an incoming value for each predecessor, even the
1311 CanonicalIV
->addIncoming(CanonicalIV
->getIncomingValueForBlock(HP
), HP
);
1315 if (L
->contains(HP
)) {
1316 // Insert a unit add instruction right before the terminator
1317 // corresponding to the back-edge.
1318 Instruction
*Add
= BinaryOperator::CreateAdd(CanonicalIV
, One
,
1320 HP
->getTerminator()->getIterator());
1321 Add
->setDebugLoc(HP
->getTerminator()->getDebugLoc());
1322 rememberInstruction(Add
);
1323 CanonicalIV
->addIncoming(Add
, HP
);
1325 CanonicalIV
->addIncoming(Constant::getNullValue(Ty
), HP
);
1330 // {0,+,1} --> Insert a canonical induction variable into the loop!
1331 if (S
->isAffine() && S
->getOperand(1)->isOne()) {
1332 assert(Ty
== SE
.getEffectiveSCEVType(CanonicalIV
->getType()) &&
1333 "IVs with types different from the canonical IV should "
1334 "already have been handled!");
1338 // {0,+,F} --> {0,+,1} * F
1340 // If this is a simple linear addrec, emit it now as a special case.
1341 if (S
->isAffine()) // {0,+,F} --> i*F
1343 expand(SE
.getTruncateOrNoop(
1344 SE
.getMulExpr(SE
.getUnknown(CanonicalIV
),
1345 SE
.getNoopOrAnyExtend(S
->getOperand(1),
1346 CanonicalIV
->getType())),
1349 // If this is a chain of recurrences, turn it into a closed form, using the
1350 // folders, then expandCodeFor the closed form. This allows the folders to
1351 // simplify the expression without having to build a bunch of special code
1352 // into this folder.
1353 const SCEV
*IH
= SE
.getUnknown(CanonicalIV
); // Get I as a "symbolic" SCEV.
1355 // Promote S up to the canonical IV type, if the cast is foldable.
1356 const SCEV
*NewS
= S
;
1357 const SCEV
*Ext
= SE
.getNoopOrAnyExtend(S
, CanonicalIV
->getType());
1358 if (isa
<SCEVAddRecExpr
>(Ext
))
1361 const SCEV
*V
= cast
<SCEVAddRecExpr
>(NewS
)->evaluateAtIteration(IH
, SE
);
1363 // Truncate the result down to the original type, if needed.
1364 const SCEV
*T
= SE
.getTruncateOrNoop(V
, Ty
);
1368 Value
*SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr
*S
) {
1369 Value
*V
= expand(S
->getOperand());
1370 return ReuseOrCreateCast(V
, S
->getType(), CastInst::PtrToInt
,
1371 GetOptimalInsertionPointForCastOf(V
));
1374 Value
*SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr
*S
) {
1375 Value
*V
= expand(S
->getOperand());
1376 return Builder
.CreateTrunc(V
, S
->getType());
1379 Value
*SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr
*S
) {
1380 Value
*V
= expand(S
->getOperand());
1381 return Builder
.CreateZExt(V
, S
->getType(), "",
1382 SE
.isKnownNonNegative(S
->getOperand()));
1385 Value
*SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr
*S
) {
1386 Value
*V
= expand(S
->getOperand());
1387 return Builder
.CreateSExt(V
, S
->getType());
1390 Value
*SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr
*S
,
1391 Intrinsic::ID IntrinID
, Twine Name
,
1392 bool IsSequential
) {
1393 bool PrevSafeMode
= SafeUDivMode
;
1394 SafeUDivMode
|= IsSequential
;
1395 Value
*LHS
= expand(S
->getOperand(S
->getNumOperands() - 1));
1396 Type
*Ty
= LHS
->getType();
1398 LHS
= Builder
.CreateFreeze(LHS
);
1399 for (int i
= S
->getNumOperands() - 2; i
>= 0; --i
) {
1400 SafeUDivMode
= (IsSequential
&& i
!= 0) || PrevSafeMode
;
1401 Value
*RHS
= expand(S
->getOperand(i
));
1402 if (IsSequential
&& i
!= 0)
1403 RHS
= Builder
.CreateFreeze(RHS
);
1405 if (Ty
->isIntegerTy())
1406 Sel
= Builder
.CreateIntrinsic(IntrinID
, {Ty
}, {LHS
, RHS
},
1407 /*FMFSource=*/nullptr, Name
);
1410 Builder
.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID
), LHS
, RHS
);
1411 Sel
= Builder
.CreateSelect(ICmp
, LHS
, RHS
, Name
);
1415 SafeUDivMode
= PrevSafeMode
;
1419 Value
*SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr
*S
) {
1420 return expandMinMaxExpr(S
, Intrinsic::smax
, "smax");
1423 Value
*SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr
*S
) {
1424 return expandMinMaxExpr(S
, Intrinsic::umax
, "umax");
1427 Value
*SCEVExpander::visitSMinExpr(const SCEVSMinExpr
*S
) {
1428 return expandMinMaxExpr(S
, Intrinsic::smin
, "smin");
1431 Value
*SCEVExpander::visitUMinExpr(const SCEVUMinExpr
*S
) {
1432 return expandMinMaxExpr(S
, Intrinsic::umin
, "umin");
1435 Value
*SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr
*S
) {
1436 return expandMinMaxExpr(S
, Intrinsic::umin
, "umin", /*IsSequential*/true);
1439 Value
*SCEVExpander::visitVScale(const SCEVVScale
*S
) {
1440 return Builder
.CreateVScale(ConstantInt::get(S
->getType(), 1));
1443 Value
*SCEVExpander::expandCodeFor(const SCEV
*SH
, Type
*Ty
,
1444 BasicBlock::iterator IP
) {
1446 Value
*V
= expandCodeFor(SH
, Ty
);
1450 Value
*SCEVExpander::expandCodeFor(const SCEV
*SH
, Type
*Ty
) {
1451 // Expand the code for this SCEV.
1452 Value
*V
= expand(SH
);
1455 assert(SE
.getTypeSizeInBits(Ty
) == SE
.getTypeSizeInBits(SH
->getType()) &&
1456 "non-trivial casts should be done with the SCEVs directly!");
1457 V
= InsertNoopCastOfTo(V
, Ty
);
1462 Value
*SCEVExpander::FindValueInExprValueMap(
1463 const SCEV
*S
, const Instruction
*InsertPt
,
1464 SmallVectorImpl
<Instruction
*> &DropPoisonGeneratingInsts
) {
1465 // If the expansion is not in CanonicalMode, and the SCEV contains any
1466 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1467 if (!CanonicalMode
&& SE
.containsAddRecurrence(S
))
1470 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1471 if (isa
<SCEVConstant
>(S
) || isa
<SCEVUnknown
>(S
))
1474 for (Value
*V
: SE
.getSCEVValues(S
)) {
1475 Instruction
*EntInst
= dyn_cast
<Instruction
>(V
);
1479 // Choose a Value from the set which dominates the InsertPt.
1480 // InsertPt should be inside the Value's parent loop so as not to break
1482 assert(EntInst
->getFunction() == InsertPt
->getFunction());
1483 if (S
->getType() != V
->getType() || !SE
.DT
.dominates(EntInst
, InsertPt
) ||
1484 !(SE
.LI
.getLoopFor(EntInst
->getParent()) == nullptr ||
1485 SE
.LI
.getLoopFor(EntInst
->getParent())->contains(InsertPt
)))
1488 // Make sure reusing the instruction is poison-safe.
1489 if (SE
.canReuseInstruction(S
, EntInst
, DropPoisonGeneratingInsts
))
1491 DropPoisonGeneratingInsts
.clear();
1496 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1497 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1498 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1499 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1500 // the expansion will try to reuse Value from ExprValueMap, and only when it
1501 // fails, expand the SCEV literally.
1502 Value
*SCEVExpander::expand(const SCEV
*S
) {
1503 // Compute an insertion point for this SCEV object. Hoist the instructions
1504 // as far out in the loop nest as possible.
1505 BasicBlock::iterator InsertPt
= Builder
.GetInsertPoint();
1507 // We can move insertion point only if there is no div or rem operations
1508 // otherwise we are risky to move it over the check for zero denominator.
1509 auto SafeToHoist
= [](const SCEV
*S
) {
1510 return !SCEVExprContains(S
, [](const SCEV
*S
) {
1511 if (const auto *D
= dyn_cast
<SCEVUDivExpr
>(S
)) {
1512 if (const auto *SC
= dyn_cast
<SCEVConstant
>(D
->getRHS()))
1513 // Division by non-zero constants can be hoisted.
1514 return SC
->getValue()->isZero();
1515 // All other divisions should not be moved as they may be
1516 // divisions by zero and should be kept within the
1517 // conditions of the surrounding loops that guard their
1518 // execution (see PR35406).
1524 if (SafeToHoist(S
)) {
1525 for (Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock());;
1526 L
= L
->getParentLoop()) {
1527 if (SE
.isLoopInvariant(S
, L
)) {
1529 if (BasicBlock
*Preheader
= L
->getLoopPreheader()) {
1530 InsertPt
= Preheader
->getTerminator()->getIterator();
1532 // LSR sets the insertion point for AddRec start/step values to the
1533 // block start to simplify value reuse, even though it's an invalid
1534 // position. SCEVExpander must correct for this in all cases.
1535 InsertPt
= L
->getHeader()->getFirstInsertionPt();
1538 // If the SCEV is computable at this level, insert it into the header
1539 // after the PHIs (and after any other instructions that we've inserted
1540 // there) so that it is guaranteed to dominate any user inside the loop.
1541 if (L
&& SE
.hasComputableLoopEvolution(S
, L
) && !PostIncLoops
.count(L
))
1542 InsertPt
= L
->getHeader()->getFirstInsertionPt();
1544 while (InsertPt
!= Builder
.GetInsertPoint() &&
1545 (isInsertedInstruction(&*InsertPt
) ||
1546 isa
<DbgInfoIntrinsic
>(&*InsertPt
))) {
1547 InsertPt
= std::next(InsertPt
);
1554 // Check to see if we already expanded this here.
1555 auto I
= InsertedExpressions
.find(std::make_pair(S
, &*InsertPt
));
1556 if (I
!= InsertedExpressions
.end())
1559 SCEVInsertPointGuard
Guard(Builder
, this);
1560 Builder
.SetInsertPoint(InsertPt
->getParent(), InsertPt
);
1562 // Expand the expression into instructions.
1563 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
1564 Value
*V
= FindValueInExprValueMap(S
, &*InsertPt
, DropPoisonGeneratingInsts
);
1567 V
= fixupLCSSAFormFor(V
);
1569 for (Instruction
*I
: DropPoisonGeneratingInsts
) {
1571 I
->dropPoisonGeneratingAnnotations();
1572 // See if we can re-infer from first principles any of the flags we just
1574 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(I
))
1575 if (auto Flags
= SE
.getStrengthenedNoWrapFlagsFromBinOp(OBO
)) {
1576 auto *BO
= cast
<BinaryOperator
>(I
);
1577 BO
->setHasNoUnsignedWrap(
1578 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) == SCEV::FlagNUW
);
1579 BO
->setHasNoSignedWrap(
1580 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) == SCEV::FlagNSW
);
1582 if (auto *NNI
= dyn_cast
<PossiblyNonNegInst
>(I
)) {
1583 auto *Src
= NNI
->getOperand(0);
1584 if (isImpliedByDomCondition(ICmpInst::ICMP_SGE
, Src
,
1585 Constant::getNullValue(Src
->getType()), I
,
1586 DL
).value_or(false))
1587 NNI
->setNonNeg(true);
1591 // Remember the expanded value for this SCEV at this location.
1593 // This is independent of PostIncLoops. The mapped value simply materializes
1594 // the expression at this insertion point. If the mapped value happened to be
1595 // a postinc expansion, it could be reused by a non-postinc user, but only if
1596 // its insertion point was already at the head of the loop.
1597 InsertedExpressions
[std::make_pair(S
, &*InsertPt
)] = V
;
1601 void SCEVExpander::rememberInstruction(Value
*I
) {
1602 auto DoInsert
= [this](Value
*V
) {
1603 if (!PostIncLoops
.empty())
1604 InsertedPostIncValues
.insert(V
);
1606 InsertedValues
.insert(V
);
1611 void SCEVExpander::rememberFlags(Instruction
*I
) {
1612 // If we already have flags for the instruction, keep the existing ones.
1613 OrigFlags
.try_emplace(I
, PoisonFlags(I
));
1616 void SCEVExpander::replaceCongruentIVInc(
1617 PHINode
*&Phi
, PHINode
*&OrigPhi
, Loop
*L
, const DominatorTree
*DT
,
1618 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
) {
1619 BasicBlock
*LatchBlock
= L
->getLoopLatch();
1623 Instruction
*OrigInc
=
1624 dyn_cast
<Instruction
>(OrigPhi
->getIncomingValueForBlock(LatchBlock
));
1625 Instruction
*IsomorphicInc
=
1626 dyn_cast
<Instruction
>(Phi
->getIncomingValueForBlock(LatchBlock
));
1627 if (!OrigInc
|| !IsomorphicInc
)
1630 // If this phi has the same width but is more canonical, replace the
1631 // original with it. As part of the "more canonical" determination,
1632 // respect a prior decision to use an IV chain.
1633 if (OrigPhi
->getType() == Phi
->getType() &&
1634 !(ChainedPhis
.count(Phi
) ||
1635 isExpandedAddRecExprPHI(OrigPhi
, OrigInc
, L
)) &&
1636 (ChainedPhis
.count(Phi
) ||
1637 isExpandedAddRecExprPHI(Phi
, IsomorphicInc
, L
))) {
1638 std::swap(OrigPhi
, Phi
);
1639 std::swap(OrigInc
, IsomorphicInc
);
1642 // Replacing the congruent phi is sufficient because acyclic
1643 // redundancy elimination, CSE/GVN, should handle the
1644 // rest. However, once SCEV proves that a phi is congruent,
1645 // it's often the head of an IV user cycle that is isomorphic
1646 // with the original phi. It's worth eagerly cleaning up the
1647 // common case of a single IV increment so that DeleteDeadPHIs
1648 // can remove cycles that had postinc uses.
1649 // Because we may potentially introduce a new use of OrigIV that didn't
1650 // exist before at this point, its poison flags need readjustment.
1651 const SCEV
*TruncExpr
=
1652 SE
.getTruncateOrNoop(SE
.getSCEV(OrigInc
), IsomorphicInc
->getType());
1653 if (OrigInc
== IsomorphicInc
|| TruncExpr
!= SE
.getSCEV(IsomorphicInc
) ||
1654 !SE
.LI
.replacementPreservesLCSSAForm(IsomorphicInc
, OrigInc
))
1657 bool BothHaveNUW
= false;
1658 bool BothHaveNSW
= false;
1659 auto *OBOIncV
= dyn_cast
<OverflowingBinaryOperator
>(OrigInc
);
1660 auto *OBOIsomorphic
= dyn_cast
<OverflowingBinaryOperator
>(IsomorphicInc
);
1661 if (OBOIncV
&& OBOIsomorphic
) {
1663 OBOIncV
->hasNoUnsignedWrap() && OBOIsomorphic
->hasNoUnsignedWrap();
1665 OBOIncV
->hasNoSignedWrap() && OBOIsomorphic
->hasNoSignedWrap();
1668 if (!hoistIVInc(OrigInc
, IsomorphicInc
,
1669 /*RecomputePoisonFlags*/ true))
1672 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1673 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1674 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1675 // make IsomorphicInc's uses more poisonous.
1676 assert(OrigInc
->getType()->getScalarSizeInBits() >=
1677 IsomorphicInc
->getType()->getScalarSizeInBits() &&
1678 "Should only replace an increment with a wider one.");
1679 if (BothHaveNUW
|| BothHaveNSW
) {
1680 OrigInc
->setHasNoUnsignedWrap(OBOIncV
->hasNoUnsignedWrap() || BothHaveNUW
);
1681 OrigInc
->setHasNoSignedWrap(OBOIncV
->hasNoSignedWrap() || BothHaveNSW
);
1684 SCEV_DEBUG_WITH_TYPE(DebugType
,
1685 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1686 << *IsomorphicInc
<< '\n');
1687 Value
*NewInc
= OrigInc
;
1688 if (OrigInc
->getType() != IsomorphicInc
->getType()) {
1689 BasicBlock::iterator IP
;
1690 if (PHINode
*PN
= dyn_cast
<PHINode
>(OrigInc
))
1691 IP
= PN
->getParent()->getFirstInsertionPt();
1693 IP
= OrigInc
->getNextNonDebugInstruction()->getIterator();
1695 IRBuilder
<> Builder(IP
->getParent(), IP
);
1696 Builder
.SetCurrentDebugLocation(IsomorphicInc
->getDebugLoc());
1698 Builder
.CreateTruncOrBitCast(OrigInc
, IsomorphicInc
->getType(), IVName
);
1700 IsomorphicInc
->replaceAllUsesWith(NewInc
);
1701 DeadInsts
.emplace_back(IsomorphicInc
);
1704 /// replaceCongruentIVs - Check for congruent phis in this loop header and
1705 /// replace them with their most canonical representative. Return the number of
1706 /// phis eliminated.
1708 /// This does not depend on any SCEVExpander state but should be used in
1709 /// the same context that SCEVExpander is used.
1711 SCEVExpander::replaceCongruentIVs(Loop
*L
, const DominatorTree
*DT
,
1712 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
1713 const TargetTransformInfo
*TTI
) {
1714 // Find integer phis in order of increasing width.
1715 SmallVector
<PHINode
*, 8> Phis
;
1716 for (PHINode
&PN
: L
->getHeader()->phis())
1717 Phis
.push_back(&PN
);
1720 // Use stable_sort to preserve order of equivalent PHIs, so the order
1721 // of the sorted Phis is the same from run to run on the same loop.
1722 llvm::stable_sort(Phis
, [](Value
*LHS
, Value
*RHS
) {
1723 // Put pointers at the back and make sure pointer < pointer = false.
1724 if (!LHS
->getType()->isIntegerTy() || !RHS
->getType()->isIntegerTy())
1725 return RHS
->getType()->isIntegerTy() && !LHS
->getType()->isIntegerTy();
1726 return RHS
->getType()->getPrimitiveSizeInBits().getFixedValue() <
1727 LHS
->getType()->getPrimitiveSizeInBits().getFixedValue();
1730 unsigned NumElim
= 0;
1731 DenseMap
<const SCEV
*, PHINode
*> ExprToIVMap
;
1732 // Process phis from wide to narrow. Map wide phis to their truncation
1733 // so narrow phis can reuse them.
1734 for (PHINode
*Phi
: Phis
) {
1735 auto SimplifyPHINode
= [&](PHINode
*PN
) -> Value
* {
1736 if (Value
*V
= simplifyInstruction(PN
, {DL
, &SE
.TLI
, &SE
.DT
, &SE
.AC
}))
1738 if (!SE
.isSCEVable(PN
->getType()))
1740 auto *Const
= dyn_cast
<SCEVConstant
>(SE
.getSCEV(PN
));
1743 return Const
->getValue();
1746 // Fold constant phis. They may be congruent to other constant phis and
1747 // would confuse the logic below that expects proper IVs.
1748 if (Value
*V
= SimplifyPHINode(Phi
)) {
1749 if (V
->getType() != Phi
->getType())
1751 SE
.forgetValue(Phi
);
1752 Phi
->replaceAllUsesWith(V
);
1753 DeadInsts
.emplace_back(Phi
);
1755 SCEV_DEBUG_WITH_TYPE(DebugType
,
1756 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1761 if (!SE
.isSCEVable(Phi
->getType()))
1764 PHINode
*&OrigPhiRef
= ExprToIVMap
[SE
.getSCEV(Phi
)];
1767 if (Phi
->getType()->isIntegerTy() && TTI
&&
1768 TTI
->isTruncateFree(Phi
->getType(), Phis
.back()->getType())) {
1769 // Make sure we only rewrite using simple induction variables;
1770 // otherwise, we can make the trip count of a loop unanalyzable
1772 const SCEV
*PhiExpr
= SE
.getSCEV(Phi
);
1773 if (isa
<SCEVAddRecExpr
>(PhiExpr
)) {
1774 // This phi can be freely truncated to the narrowest phi type. Map the
1775 // truncated expression to it so it will be reused for narrow types.
1776 const SCEV
*TruncExpr
=
1777 SE
.getTruncateExpr(PhiExpr
, Phis
.back()->getType());
1778 ExprToIVMap
[TruncExpr
] = Phi
;
1784 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1786 if (OrigPhiRef
->getType()->isPointerTy() != Phi
->getType()->isPointerTy())
1789 replaceCongruentIVInc(Phi
, OrigPhiRef
, L
, DT
, DeadInsts
);
1790 SCEV_DEBUG_WITH_TYPE(DebugType
,
1791 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1793 SCEV_DEBUG_WITH_TYPE(
1794 DebugType
, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef
<< '\n');
1796 Value
*NewIV
= OrigPhiRef
;
1797 if (OrigPhiRef
->getType() != Phi
->getType()) {
1798 IRBuilder
<> Builder(L
->getHeader(),
1799 L
->getHeader()->getFirstInsertionPt());
1800 Builder
.SetCurrentDebugLocation(Phi
->getDebugLoc());
1801 NewIV
= Builder
.CreateTruncOrBitCast(OrigPhiRef
, Phi
->getType(), IVName
);
1803 Phi
->replaceAllUsesWith(NewIV
);
1804 DeadInsts
.emplace_back(Phi
);
1809 bool SCEVExpander::hasRelatedExistingExpansion(const SCEV
*S
,
1810 const Instruction
*At
,
1812 using namespace llvm::PatternMatch
;
1814 SmallVector
<BasicBlock
*, 4> ExitingBlocks
;
1815 L
->getExitingBlocks(ExitingBlocks
);
1817 // Look for suitable value in simple conditions at the loop exits.
1818 for (BasicBlock
*BB
: ExitingBlocks
) {
1820 Instruction
*LHS
, *RHS
;
1822 if (!match(BB
->getTerminator(),
1823 m_Br(m_ICmp(Pred
, m_Instruction(LHS
), m_Instruction(RHS
)),
1824 m_BasicBlock(), m_BasicBlock())))
1827 if (SE
.getSCEV(LHS
) == S
&& SE
.DT
.dominates(LHS
, At
))
1830 if (SE
.getSCEV(RHS
) == S
&& SE
.DT
.dominates(RHS
, At
))
1834 // Use expand's logic which is used for reusing a previous Value in
1835 // ExprValueMap. Note that we don't currently model the cost of
1836 // needing to drop poison generating flags on the instruction if we
1837 // want to reuse it. We effectively assume that has zero cost.
1838 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
1839 return FindValueInExprValueMap(S
, At
, DropPoisonGeneratingInsts
) != nullptr;
1842 template<typename T
> static InstructionCost
costAndCollectOperands(
1843 const SCEVOperand
&WorkItem
, const TargetTransformInfo
&TTI
,
1844 TargetTransformInfo::TargetCostKind CostKind
,
1845 SmallVectorImpl
<SCEVOperand
> &Worklist
) {
1847 const T
*S
= cast
<T
>(WorkItem
.S
);
1848 InstructionCost Cost
= 0;
1849 // Object to help map SCEV operands to expanded IR instructions.
1850 struct OperationIndices
{
1851 OperationIndices(unsigned Opc
, size_t min
, size_t max
) :
1852 Opcode(Opc
), MinIdx(min
), MaxIdx(max
) { }
1858 // Collect the operations of all the instructions that will be needed to
1859 // expand the SCEVExpr. This is so that when we come to cost the operands,
1860 // we know what the generated user(s) will be.
1861 SmallVector
<OperationIndices
, 2> Operations
;
1863 auto CastCost
= [&](unsigned Opcode
) -> InstructionCost
{
1864 Operations
.emplace_back(Opcode
, 0, 0);
1865 return TTI
.getCastInstrCost(Opcode
, S
->getType(),
1866 S
->getOperand(0)->getType(),
1867 TTI::CastContextHint::None
, CostKind
);
1870 auto ArithCost
= [&](unsigned Opcode
, unsigned NumRequired
,
1871 unsigned MinIdx
= 0,
1872 unsigned MaxIdx
= 1) -> InstructionCost
{
1873 Operations
.emplace_back(Opcode
, MinIdx
, MaxIdx
);
1874 return NumRequired
*
1875 TTI
.getArithmeticInstrCost(Opcode
, S
->getType(), CostKind
);
1878 auto CmpSelCost
= [&](unsigned Opcode
, unsigned NumRequired
, unsigned MinIdx
,
1879 unsigned MaxIdx
) -> InstructionCost
{
1880 Operations
.emplace_back(Opcode
, MinIdx
, MaxIdx
);
1881 Type
*OpType
= S
->getType();
1882 return NumRequired
* TTI
.getCmpSelInstrCost(
1883 Opcode
, OpType
, CmpInst::makeCmpResultType(OpType
),
1884 CmpInst::BAD_ICMP_PREDICATE
, CostKind
);
1887 switch (S
->getSCEVType()) {
1888 case scCouldNotCompute
:
1889 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1895 Cost
= CastCost(Instruction::PtrToInt
);
1898 Cost
= CastCost(Instruction::Trunc
);
1901 Cost
= CastCost(Instruction::ZExt
);
1904 Cost
= CastCost(Instruction::SExt
);
1907 unsigned Opcode
= Instruction::UDiv
;
1908 if (auto *SC
= dyn_cast
<SCEVConstant
>(S
->getOperand(1)))
1909 if (SC
->getAPInt().isPowerOf2())
1910 Opcode
= Instruction::LShr
;
1911 Cost
= ArithCost(Opcode
, 1);
1915 Cost
= ArithCost(Instruction::Add
, S
->getNumOperands() - 1);
1918 // TODO: this is a very pessimistic cost modelling for Mul,
1919 // because of Bin Pow algorithm actually used by the expander,
1920 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1921 Cost
= ArithCost(Instruction::Mul
, S
->getNumOperands() - 1);
1927 case scSequentialUMinExpr
: {
1928 // FIXME: should this ask the cost for Intrinsic's?
1929 // The reduction tree.
1930 Cost
+= CmpSelCost(Instruction::ICmp
, S
->getNumOperands() - 1, 0, 1);
1931 Cost
+= CmpSelCost(Instruction::Select
, S
->getNumOperands() - 1, 0, 2);
1932 switch (S
->getSCEVType()) {
1933 case scSequentialUMinExpr
: {
1934 // The safety net against poison.
1935 // FIXME: this is broken.
1936 Cost
+= CmpSelCost(Instruction::ICmp
, S
->getNumOperands() - 1, 0, 0);
1937 Cost
+= ArithCost(Instruction::Or
,
1938 S
->getNumOperands() > 2 ? S
->getNumOperands() - 2 : 0);
1939 Cost
+= CmpSelCost(Instruction::Select
, 1, 0, 1);
1943 assert(!isa
<SCEVSequentialMinMaxExpr
>(S
) &&
1944 "Unhandled SCEV expression type?");
1949 case scAddRecExpr
: {
1950 // Addrec expands to a phi and add per recurrence.
1951 unsigned NumRecurrences
= S
->getNumOperands() - 1;
1952 Cost
+= TTI
.getCFInstrCost(Instruction::PHI
, CostKind
) * NumRecurrences
;
1954 TTI
.getArithmeticInstrCost(Instruction::Add
, S
->getType(), CostKind
) *
1956 // AR start is used in phi.
1957 Worklist
.emplace_back(Instruction::PHI
, 0, S
->getOperand(0));
1958 // Other operands are used in add.
1959 for (const SCEV
*Op
: S
->operands().drop_front())
1960 Worklist
.emplace_back(Instruction::Add
, 1, Op
);
1965 for (auto &CostOp
: Operations
) {
1966 for (auto SCEVOp
: enumerate(S
->operands())) {
1967 // Clamp the index to account for multiple IR operations being chained.
1968 size_t MinIdx
= std::max(SCEVOp
.index(), CostOp
.MinIdx
);
1969 size_t OpIdx
= std::min(MinIdx
, CostOp
.MaxIdx
);
1970 Worklist
.emplace_back(CostOp
.Opcode
, OpIdx
, SCEVOp
.value());
1976 bool SCEVExpander::isHighCostExpansionHelper(
1977 const SCEVOperand
&WorkItem
, Loop
*L
, const Instruction
&At
,
1978 InstructionCost
&Cost
, unsigned Budget
, const TargetTransformInfo
&TTI
,
1979 SmallPtrSetImpl
<const SCEV
*> &Processed
,
1980 SmallVectorImpl
<SCEVOperand
> &Worklist
) {
1982 return true; // Already run out of budget, give up.
1984 const SCEV
*S
= WorkItem
.S
;
1985 // Was the cost of expansion of this expression already accounted for?
1986 if (!isa
<SCEVConstant
>(S
) && !Processed
.insert(S
).second
)
1987 return false; // We have already accounted for this expression.
1989 // If we can find an existing value for this scev available at the point "At"
1990 // then consider the expression cheap.
1991 if (hasRelatedExistingExpansion(S
, &At
, L
))
1992 return false; // Consider the expression to be free.
1994 TargetTransformInfo::TargetCostKind CostKind
=
1995 L
->getHeader()->getParent()->hasMinSize()
1996 ? TargetTransformInfo::TCK_CodeSize
1997 : TargetTransformInfo::TCK_RecipThroughput
;
1999 switch (S
->getSCEVType()) {
2000 case scCouldNotCompute
:
2001 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2004 // Assume to be zero-cost.
2007 // Only evalulate the costs of constants when optimizing for size.
2008 if (CostKind
!= TargetTransformInfo::TCK_CodeSize
)
2010 const APInt
&Imm
= cast
<SCEVConstant
>(S
)->getAPInt();
2011 Type
*Ty
= S
->getType();
2012 Cost
+= TTI
.getIntImmCostInst(
2013 WorkItem
.ParentOpcode
, WorkItem
.OperandIdx
, Imm
, Ty
, CostKind
);
2014 return Cost
> Budget
;
2019 case scSignExtend
: {
2021 costAndCollectOperands
<SCEVCastExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
2022 return false; // Will answer upon next entry into this function.
2025 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2026 // HowManyLessThans produced to compute a precise expression, rather than a
2027 // UDiv from the user's code. If we can't find a UDiv in the code with some
2028 // simple searching, we need to account for it's cost.
2030 // At the beginning of this function we already tried to find existing
2031 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2032 // pattern involving division. This is just a simple search heuristic.
2033 if (hasRelatedExistingExpansion(
2034 SE
.getAddExpr(S
, SE
.getConstant(S
->getType(), 1)), &At
, L
))
2035 return false; // Consider it to be free.
2038 costAndCollectOperands
<SCEVUDivExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
2039 return false; // Will answer upon next entry into this function.
2047 case scSequentialUMinExpr
: {
2048 assert(cast
<SCEVNAryExpr
>(S
)->getNumOperands() > 1 &&
2049 "Nary expr should have more than 1 operand.");
2050 // The simple nary expr will require one less op (or pair of ops)
2051 // than the number of it's terms.
2053 costAndCollectOperands
<SCEVNAryExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
2054 return Cost
> Budget
;
2056 case scAddRecExpr
: {
2057 assert(cast
<SCEVAddRecExpr
>(S
)->getNumOperands() >= 2 &&
2058 "Polynomial should be at least linear");
2059 Cost
+= costAndCollectOperands
<SCEVAddRecExpr
>(
2060 WorkItem
, TTI
, CostKind
, Worklist
);
2061 return Cost
> Budget
;
2064 llvm_unreachable("Unknown SCEV kind!");
2067 Value
*SCEVExpander::expandCodeForPredicate(const SCEVPredicate
*Pred
,
2070 switch (Pred
->getKind()) {
2071 case SCEVPredicate::P_Union
:
2072 return expandUnionPredicate(cast
<SCEVUnionPredicate
>(Pred
), IP
);
2073 case SCEVPredicate::P_Compare
:
2074 return expandComparePredicate(cast
<SCEVComparePredicate
>(Pred
), IP
);
2075 case SCEVPredicate::P_Wrap
: {
2076 auto *AddRecPred
= cast
<SCEVWrapPredicate
>(Pred
);
2077 return expandWrapPredicate(AddRecPred
, IP
);
2080 llvm_unreachable("Unknown SCEV predicate type");
2083 Value
*SCEVExpander::expandComparePredicate(const SCEVComparePredicate
*Pred
,
2085 Value
*Expr0
= expand(Pred
->getLHS(), IP
);
2086 Value
*Expr1
= expand(Pred
->getRHS(), IP
);
2088 Builder
.SetInsertPoint(IP
);
2089 auto InvPred
= ICmpInst::getInversePredicate(Pred
->getPredicate());
2090 auto *I
= Builder
.CreateICmp(InvPred
, Expr0
, Expr1
, "ident.check");
2094 Value
*SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr
*AR
,
2095 Instruction
*Loc
, bool Signed
) {
2096 assert(AR
->isAffine() && "Cannot generate RT check for "
2097 "non-affine expression");
2099 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2100 SmallVector
<const SCEVPredicate
*, 4> Pred
;
2101 const SCEV
*ExitCount
=
2102 SE
.getPredicatedSymbolicMaxBackedgeTakenCount(AR
->getLoop(), Pred
);
2104 assert(!isa
<SCEVCouldNotCompute
>(ExitCount
) && "Invalid loop count");
2106 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
2107 const SCEV
*Start
= AR
->getStart();
2109 Type
*ARTy
= AR
->getType();
2110 unsigned SrcBits
= SE
.getTypeSizeInBits(ExitCount
->getType());
2111 unsigned DstBits
= SE
.getTypeSizeInBits(ARTy
);
2113 // The expression {Start,+,Step} has nusw/nssw if
2114 // Step < 0, Start - |Step| * Backedge <= Start
2115 // Step >= 0, Start + |Step| * Backedge > Start
2116 // and |Step| * Backedge doesn't unsigned overflow.
2118 Builder
.SetInsertPoint(Loc
);
2119 Value
*TripCountVal
= expand(ExitCount
, Loc
);
2122 IntegerType::get(Loc
->getContext(), SE
.getTypeSizeInBits(ARTy
));
2124 Value
*StepValue
= expand(Step
, Loc
);
2125 Value
*NegStepValue
= expand(SE
.getNegativeSCEV(Step
), Loc
);
2126 Value
*StartValue
= expand(Start
, Loc
);
2129 ConstantInt::get(Loc
->getContext(), APInt::getZero(DstBits
));
2131 Builder
.SetInsertPoint(Loc
);
2133 Value
*StepCompare
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, StepValue
, Zero
);
2134 Value
*AbsStep
= Builder
.CreateSelect(StepCompare
, NegStepValue
, StepValue
);
2136 // Compute |Step| * Backedge
2138 // 1. Start + |Step| * Backedge < Start
2139 // 2. Start - |Step| * Backedge > Start
2141 // And select either 1. or 2. depending on whether step is positive or
2142 // negative. If Step is known to be positive or negative, only create
2144 auto ComputeEndCheck
= [&]() -> Value
* {
2145 // Checking <u 0 is always false.
2146 if (!Signed
&& Start
->isZero() && SE
.isKnownPositive(Step
))
2147 return ConstantInt::getFalse(Loc
->getContext());
2149 // Get the backedge taken count and truncate or extended to the AR type.
2150 Value
*TruncTripCount
= Builder
.CreateZExtOrTrunc(TripCountVal
, Ty
);
2152 Value
*MulV
, *OfMul
;
2153 if (Step
->isOne()) {
2154 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2155 // needed, there is never an overflow, so to avoid artificially inflating
2156 // the cost of the check, directly emit the optimized IR.
2157 MulV
= TruncTripCount
;
2158 OfMul
= ConstantInt::getFalse(MulV
->getContext());
2160 CallInst
*Mul
= Builder
.CreateIntrinsic(Intrinsic::umul_with_overflow
, Ty
,
2161 {AbsStep
, TruncTripCount
},
2162 /*FMFSource=*/nullptr, "mul");
2163 MulV
= Builder
.CreateExtractValue(Mul
, 0, "mul.result");
2164 OfMul
= Builder
.CreateExtractValue(Mul
, 1, "mul.overflow");
2167 Value
*Add
= nullptr, *Sub
= nullptr;
2168 bool NeedPosCheck
= !SE
.isKnownNegative(Step
);
2169 bool NeedNegCheck
= !SE
.isKnownPositive(Step
);
2171 if (isa
<PointerType
>(ARTy
)) {
2172 Value
*NegMulV
= Builder
.CreateNeg(MulV
);
2174 Add
= Builder
.CreatePtrAdd(StartValue
, MulV
);
2176 Sub
= Builder
.CreatePtrAdd(StartValue
, NegMulV
);
2179 Add
= Builder
.CreateAdd(StartValue
, MulV
);
2181 Sub
= Builder
.CreateSub(StartValue
, MulV
);
2184 Value
*EndCompareLT
= nullptr;
2185 Value
*EndCompareGT
= nullptr;
2186 Value
*EndCheck
= nullptr;
2188 EndCheck
= EndCompareLT
= Builder
.CreateICmp(
2189 Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
, Add
, StartValue
);
2191 EndCheck
= EndCompareGT
= Builder
.CreateICmp(
2192 Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
, Sub
, StartValue
);
2193 if (NeedPosCheck
&& NeedNegCheck
) {
2194 // Select the answer based on the sign of Step.
2195 EndCheck
= Builder
.CreateSelect(StepCompare
, EndCompareGT
, EndCompareLT
);
2197 return Builder
.CreateOr(EndCheck
, OfMul
);
2199 Value
*EndCheck
= ComputeEndCheck();
2201 // If the backedge taken count type is larger than the AR type,
2202 // check that we don't drop any bits by truncating it. If we are
2203 // dropping bits, then we have overflow (unless the step is zero).
2204 if (SrcBits
> DstBits
) {
2205 auto MaxVal
= APInt::getMaxValue(DstBits
).zext(SrcBits
);
2206 auto *BackedgeCheck
=
2207 Builder
.CreateICmp(ICmpInst::ICMP_UGT
, TripCountVal
,
2208 ConstantInt::get(Loc
->getContext(), MaxVal
));
2209 BackedgeCheck
= Builder
.CreateAnd(
2210 BackedgeCheck
, Builder
.CreateICmp(ICmpInst::ICMP_NE
, StepValue
, Zero
));
2212 EndCheck
= Builder
.CreateOr(EndCheck
, BackedgeCheck
);
2218 Value
*SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate
*Pred
,
2220 const auto *A
= cast
<SCEVAddRecExpr
>(Pred
->getExpr());
2221 Value
*NSSWCheck
= nullptr, *NUSWCheck
= nullptr;
2223 // Add a check for NUSW
2224 if (Pred
->getFlags() & SCEVWrapPredicate::IncrementNUSW
)
2225 NUSWCheck
= generateOverflowCheck(A
, IP
, false);
2227 // Add a check for NSSW
2228 if (Pred
->getFlags() & SCEVWrapPredicate::IncrementNSSW
)
2229 NSSWCheck
= generateOverflowCheck(A
, IP
, true);
2231 if (NUSWCheck
&& NSSWCheck
)
2232 return Builder
.CreateOr(NUSWCheck
, NSSWCheck
);
2240 return ConstantInt::getFalse(IP
->getContext());
2243 Value
*SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate
*Union
,
2245 // Loop over all checks in this set.
2246 SmallVector
<Value
*> Checks
;
2247 for (const auto *Pred
: Union
->getPredicates()) {
2248 Checks
.push_back(expandCodeForPredicate(Pred
, IP
));
2249 Builder
.SetInsertPoint(IP
);
2253 return ConstantInt::getFalse(IP
->getContext());
2254 return Builder
.CreateOr(Checks
);
2257 Value
*SCEVExpander::fixupLCSSAFormFor(Value
*V
) {
2258 auto *DefI
= dyn_cast
<Instruction
>(V
);
2259 if (!PreserveLCSSA
|| !DefI
)
2262 BasicBlock::iterator InsertPt
= Builder
.GetInsertPoint();
2263 Loop
*DefLoop
= SE
.LI
.getLoopFor(DefI
->getParent());
2264 Loop
*UseLoop
= SE
.LI
.getLoopFor(InsertPt
->getParent());
2265 if (!DefLoop
|| UseLoop
== DefLoop
|| DefLoop
->contains(UseLoop
))
2268 // Create a temporary instruction to at the current insertion point, so we
2269 // can hand it off to the helper to create LCSSA PHIs if required for the
2271 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2272 // would accept a insertion point and return an LCSSA phi for that
2273 // insertion point, so there is no need to insert & remove the temporary
2276 if (DefI
->getType()->isIntegerTy())
2277 ToTy
= PointerType::get(DefI
->getContext(), 0);
2279 ToTy
= Type::getInt32Ty(DefI
->getContext());
2281 CastInst::CreateBitOrPointerCast(DefI
, ToTy
, "tmp.lcssa.user", InsertPt
);
2282 auto RemoveUserOnExit
=
2283 make_scope_exit([User
]() { User
->eraseFromParent(); });
2285 SmallVector
<Instruction
*, 1> ToUpdate
;
2286 ToUpdate
.push_back(DefI
);
2287 SmallVector
<PHINode
*, 16> PHIsToRemove
;
2288 SmallVector
<PHINode
*, 16> InsertedPHIs
;
2289 formLCSSAForInstructions(ToUpdate
, SE
.DT
, SE
.LI
, &SE
, &PHIsToRemove
,
2291 for (PHINode
*PN
: InsertedPHIs
)
2292 rememberInstruction(PN
);
2293 for (PHINode
*PN
: PHIsToRemove
) {
2294 if (!PN
->use_empty())
2296 InsertedValues
.erase(PN
);
2297 InsertedPostIncValues
.erase(PN
);
2298 PN
->eraseFromParent();
2301 return User
->getOperand(0);
2305 // Search for a SCEV subexpression that is not safe to expand. Any expression
2306 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2307 // UDiv expressions. We don't know if the UDiv is derived from an IR divide
2308 // instruction, but the important thing is that we prove the denominator is
2309 // nonzero before expansion.
2311 // IVUsers already checks that IV-derived expressions are safe. So this check is
2312 // only needed when the expression includes some subexpression that is not IV
2315 // Currently, we only allow division by a value provably non-zero here.
2317 // We cannot generally expand recurrences unless the step dominates the loop
2318 // header. The expander handles the special case of affine recurrences by
2319 // scaling the recurrence outside the loop, but this technique isn't generally
2320 // applicable. Expanding a nested recurrence outside a loop requires computing
2321 // binomial coefficients. This could be done, but the recurrence has to be in a
2322 // perfectly reduced form, which can't be guaranteed.
2323 struct SCEVFindUnsafe
{
2324 ScalarEvolution
&SE
;
2326 bool IsUnsafe
= false;
2328 SCEVFindUnsafe(ScalarEvolution
&SE
, bool CanonicalMode
)
2329 : SE(SE
), CanonicalMode(CanonicalMode
) {}
2331 bool follow(const SCEV
*S
) {
2332 if (const SCEVUDivExpr
*D
= dyn_cast
<SCEVUDivExpr
>(S
)) {
2333 if (!SE
.isKnownNonZero(D
->getRHS())) {
2338 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
2339 // For non-affine addrecs or in non-canonical mode we need a preheader
2341 if (!AR
->getLoop()->getLoopPreheader() &&
2342 (!CanonicalMode
|| !AR
->isAffine())) {
2349 bool isDone() const { return IsUnsafe
; }
2353 bool SCEVExpander::isSafeToExpand(const SCEV
*S
) const {
2354 SCEVFindUnsafe
Search(SE
, CanonicalMode
);
2355 visitAll(S
, Search
);
2356 return !Search
.IsUnsafe
;
2359 bool SCEVExpander::isSafeToExpandAt(const SCEV
*S
,
2360 const Instruction
*InsertionPoint
) const {
2361 if (!isSafeToExpand(S
))
2363 // We have to prove that the expanded site of S dominates InsertionPoint.
2364 // This is easy when not in the same block, but hard when S is an instruction
2365 // to be expanded somewhere inside the same block as our insertion point.
2366 // What we really need here is something analogous to an OrderedBasicBlock,
2367 // but for the moment, we paper over the problem by handling two common and
2368 // cheap to check cases.
2369 if (SE
.properlyDominates(S
, InsertionPoint
->getParent()))
2371 if (SE
.dominates(S
, InsertionPoint
->getParent())) {
2372 if (InsertionPoint
->getParent()->getTerminator() == InsertionPoint
)
2374 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(S
))
2375 if (llvm::is_contained(InsertionPoint
->operand_values(), U
->getValue()))
2381 void SCEVExpanderCleaner::cleanup() {
2382 // Result is used, nothing to remove.
2386 // Restore original poison flags.
2387 for (auto [I
, Flags
] : Expander
.OrigFlags
)
2390 auto InsertedInstructions
= Expander
.getAllInsertedInstructions();
2392 SmallPtrSet
<Instruction
*, 8> InsertedSet(InsertedInstructions
.begin(),
2393 InsertedInstructions
.end());
2396 // Remove sets with value handles.
2399 // Remove all inserted instructions.
2400 for (Instruction
*I
: reverse(InsertedInstructions
)) {
2402 assert(all_of(I
->users(),
2403 [&InsertedSet
](Value
*U
) {
2404 return InsertedSet
.contains(cast
<Instruction
>(U
));
2406 "removed instruction should only be used by instructions inserted "
2407 "during expansion");
2409 assert(!I
->getType()->isVoidTy() &&
2410 "inserted instruction should have non-void types");
2411 I
->replaceAllUsesWith(PoisonValue::get(I
->getType()));
2412 I
->eraseFromParent();