1 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of the scalar evolution expander,
10 // which is used to generate the code corresponding to a given scalar evolution
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/LoopUtils.h"
31 #ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32 #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
34 #define SCEV_DEBUG_WITH_TYPE(TYPE, X)
39 cl::opt
<unsigned> llvm::SCEVCheapExpansionBudget(
40 "scev-cheap-expansion-budget", cl::Hidden
, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
44 using namespace PatternMatch
;
46 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
47 /// reusing an existing cast if a suitable one (= dominating IP) exists, or
48 /// creating a new one.
49 Value
*SCEVExpander::ReuseOrCreateCast(Value
*V
, Type
*Ty
,
50 Instruction::CastOps Op
,
51 BasicBlock::iterator IP
) {
52 // This function must be called with the builder having a valid insertion
53 // point. It doesn't need to be the actual IP where the uses of the returned
54 // cast will be added, but it must dominate such IP.
55 // We use this precondition to produce a cast that will dominate all its
56 // uses. In particular, this is crucial for the case where the builder's
57 // insertion point *is* the point where we were asked to put the cast.
58 // Since we don't know the builder's insertion point is actually
59 // where the uses will be added (only that it dominates it), we are
60 // not allowed to move it.
61 BasicBlock::iterator BIP
= Builder
.GetInsertPoint();
65 // Check to see if there is already a cast!
66 for (User
*U
: V
->users()) {
67 if (U
->getType() != Ty
)
69 CastInst
*CI
= dyn_cast
<CastInst
>(U
);
70 if (!CI
|| CI
->getOpcode() != Op
)
73 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
74 // the cast must also properly dominate the Builder's insertion point.
75 if (IP
->getParent() == CI
->getParent() && &*BIP
!= CI
&&
76 (&*IP
== CI
|| CI
->comesBefore(&*IP
))) {
84 SCEVInsertPointGuard
Guard(Builder
, this);
85 Builder
.SetInsertPoint(&*IP
);
86 Ret
= Builder
.CreateCast(Op
, V
, Ty
, V
->getName());
89 // We assert at the end of the function since IP might point to an
90 // instruction with different dominance properties than a cast
91 // (an invoke for example) and not dominate BIP (but the cast does).
92 assert(!isa
<Instruction
>(Ret
) ||
93 SE
.DT
.dominates(cast
<Instruction
>(Ret
), &*BIP
));
99 SCEVExpander::findInsertPointAfter(Instruction
*I
,
100 Instruction
*MustDominate
) const {
101 BasicBlock::iterator IP
= ++I
->getIterator();
102 if (auto *II
= dyn_cast
<InvokeInst
>(I
))
103 IP
= II
->getNormalDest()->begin();
105 while (isa
<PHINode
>(IP
))
108 if (isa
<FuncletPadInst
>(IP
) || isa
<LandingPadInst
>(IP
)) {
110 } else if (isa
<CatchSwitchInst
>(IP
)) {
111 IP
= MustDominate
->getParent()->getFirstInsertionPt();
113 assert(!IP
->isEHPad() && "unexpected eh pad!");
116 // Adjust insert point to be after instructions inserted by the expander, so
117 // we can re-use already inserted instructions. Avoid skipping past the
118 // original \p MustDominate, in case it is an inserted instruction.
119 while (isInsertedInstruction(&*IP
) && &*IP
!= MustDominate
)
126 SCEVExpander::GetOptimalInsertionPointForCastOf(Value
*V
) const {
127 // Cast the argument at the beginning of the entry block, after
128 // any bitcasts of other arguments.
129 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
130 BasicBlock::iterator IP
= A
->getParent()->getEntryBlock().begin();
131 while ((isa
<BitCastInst
>(IP
) &&
132 isa
<Argument
>(cast
<BitCastInst
>(IP
)->getOperand(0)) &&
133 cast
<BitCastInst
>(IP
)->getOperand(0) != A
) ||
134 isa
<DbgInfoIntrinsic
>(IP
))
139 // Cast the instruction immediately after the instruction.
140 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
141 return findInsertPointAfter(I
, &*Builder
.GetInsertPoint());
143 // Otherwise, this must be some kind of a constant,
144 // so let's plop this cast into the function's entry block.
145 assert(isa
<Constant
>(V
) &&
146 "Expected the cast argument to be a global/constant");
147 return Builder
.GetInsertBlock()
150 .getFirstInsertionPt();
153 /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
154 /// which must be possible with a noop cast, doing what we can to share
156 Value
*SCEVExpander::InsertNoopCastOfTo(Value
*V
, Type
*Ty
) {
157 Instruction::CastOps Op
= CastInst::getCastOpcode(V
, false, Ty
, false);
158 assert((Op
== Instruction::BitCast
||
159 Op
== Instruction::PtrToInt
||
160 Op
== Instruction::IntToPtr
) &&
161 "InsertNoopCastOfTo cannot perform non-noop casts!");
162 assert(SE
.getTypeSizeInBits(V
->getType()) == SE
.getTypeSizeInBits(Ty
) &&
163 "InsertNoopCastOfTo cannot change sizes!");
165 // inttoptr only works for integral pointers. For non-integral pointers, we
166 // can create a GEP on null with the integral value as index. Note that
167 // it is safe to use GEP of null instead of inttoptr here, because only
168 // expressions already based on a GEP of null should be converted to pointers
170 if (Op
== Instruction::IntToPtr
) {
171 auto *PtrTy
= cast
<PointerType
>(Ty
);
172 if (DL
.isNonIntegralPointerType(PtrTy
))
173 return Builder
.CreatePtrAdd(Constant::getNullValue(PtrTy
), V
, "scevgep");
175 // Short-circuit unnecessary bitcasts.
176 if (Op
== Instruction::BitCast
) {
177 if (V
->getType() == Ty
)
179 if (CastInst
*CI
= dyn_cast
<CastInst
>(V
)) {
180 if (CI
->getOperand(0)->getType() == Ty
)
181 return CI
->getOperand(0);
184 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
185 if ((Op
== Instruction::PtrToInt
|| Op
== Instruction::IntToPtr
) &&
186 SE
.getTypeSizeInBits(Ty
) == SE
.getTypeSizeInBits(V
->getType())) {
187 if (CastInst
*CI
= dyn_cast
<CastInst
>(V
))
188 if ((CI
->getOpcode() == Instruction::PtrToInt
||
189 CI
->getOpcode() == Instruction::IntToPtr
) &&
190 SE
.getTypeSizeInBits(CI
->getType()) ==
191 SE
.getTypeSizeInBits(CI
->getOperand(0)->getType()))
192 return CI
->getOperand(0);
193 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(V
))
194 if ((CE
->getOpcode() == Instruction::PtrToInt
||
195 CE
->getOpcode() == Instruction::IntToPtr
) &&
196 SE
.getTypeSizeInBits(CE
->getType()) ==
197 SE
.getTypeSizeInBits(CE
->getOperand(0)->getType()))
198 return CE
->getOperand(0);
201 // Fold a cast of a constant.
202 if (Constant
*C
= dyn_cast
<Constant
>(V
))
203 return ConstantExpr::getCast(Op
, C
, Ty
);
205 // Try to reuse existing cast, or insert one.
206 return ReuseOrCreateCast(V
, Ty
, Op
, GetOptimalInsertionPointForCastOf(V
));
209 /// InsertBinop - Insert the specified binary operator, doing a small amount
210 /// of work to avoid inserting an obviously redundant operation, and hoisting
211 /// to an outer loop when the opportunity is there and it is safe.
212 Value
*SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode
,
213 Value
*LHS
, Value
*RHS
,
214 SCEV::NoWrapFlags Flags
, bool IsSafeToHoist
) {
215 // Fold a binop with constant operands.
216 if (Constant
*CLHS
= dyn_cast
<Constant
>(LHS
))
217 if (Constant
*CRHS
= dyn_cast
<Constant
>(RHS
))
218 if (Constant
*Res
= ConstantFoldBinaryOpOperands(Opcode
, CLHS
, CRHS
, DL
))
221 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
222 unsigned ScanLimit
= 6;
223 BasicBlock::iterator BlockBegin
= Builder
.GetInsertBlock()->begin();
224 // Scanning starts from the last instruction before the insertion point.
225 BasicBlock::iterator IP
= Builder
.GetInsertPoint();
226 if (IP
!= BlockBegin
) {
228 for (; ScanLimit
; --IP
, --ScanLimit
) {
229 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
231 if (isa
<DbgInfoIntrinsic
>(IP
))
234 auto canGenerateIncompatiblePoison
= [&Flags
](Instruction
*I
) {
235 // Ensure that no-wrap flags match.
236 if (isa
<OverflowingBinaryOperator
>(I
)) {
237 if (I
->hasNoSignedWrap() != (Flags
& SCEV::FlagNSW
))
239 if (I
->hasNoUnsignedWrap() != (Flags
& SCEV::FlagNUW
))
242 // Conservatively, do not use any instruction which has any of exact
244 if (isa
<PossiblyExactOperator
>(I
) && I
->isExact())
248 if (IP
->getOpcode() == (unsigned)Opcode
&& IP
->getOperand(0) == LHS
&&
249 IP
->getOperand(1) == RHS
&& !canGenerateIncompatiblePoison(&*IP
))
251 if (IP
== BlockBegin
) break;
255 // Save the original insertion point so we can restore it when we're done.
256 DebugLoc Loc
= Builder
.GetInsertPoint()->getDebugLoc();
257 SCEVInsertPointGuard
Guard(Builder
, this);
260 // Move the insertion point out of as many loops as we can.
261 while (const Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock())) {
262 if (!L
->isLoopInvariant(LHS
) || !L
->isLoopInvariant(RHS
)) break;
263 BasicBlock
*Preheader
= L
->getLoopPreheader();
264 if (!Preheader
) break;
266 // Ok, move up a level.
267 Builder
.SetInsertPoint(Preheader
->getTerminator());
271 // If we haven't found this binop, insert it.
272 // TODO: Use the Builder, which will make CreateBinOp below fold with
273 // InstSimplifyFolder.
274 Instruction
*BO
= Builder
.Insert(BinaryOperator::Create(Opcode
, LHS
, RHS
));
275 BO
->setDebugLoc(Loc
);
276 if (Flags
& SCEV::FlagNUW
)
277 BO
->setHasNoUnsignedWrap();
278 if (Flags
& SCEV::FlagNSW
)
279 BO
->setHasNoSignedWrap();
284 /// expandAddToGEP - Expand an addition expression with a pointer type into
285 /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
286 /// BasicAliasAnalysis and other passes analyze the result. See the rules
287 /// for getelementptr vs. inttoptr in
288 /// http://llvm.org/docs/LangRef.html#pointeraliasing
291 /// Design note: The correctness of using getelementptr here depends on
292 /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
293 /// they may introduce pointer arithmetic which may not be safely converted
294 /// into getelementptr.
296 /// Design note: It might seem desirable for this function to be more
297 /// loop-aware. If some of the indices are loop-invariant while others
298 /// aren't, it might seem desirable to emit multiple GEPs, keeping the
299 /// loop-invariant portions of the overall computation outside the loop.
300 /// However, there are a few reasons this is not done here. Hoisting simple
301 /// arithmetic is a low-level optimization that often isn't very
302 /// important until late in the optimization process. In fact, passes
303 /// like InstructionCombining will combine GEPs, even if it means
304 /// pushing loop-invariant computation down into loops, so even if the
305 /// GEPs were split here, the work would quickly be undone. The
306 /// LoopStrengthReduction pass, which is usually run quite late (and
307 /// after the last InstructionCombining pass), takes care of hoisting
308 /// loop-invariant portions of expressions, after considering what
309 /// can be folded using target addressing modes.
311 Value
*SCEVExpander::expandAddToGEP(const SCEV
*Offset
, Value
*V
) {
312 assert(!isa
<Instruction
>(V
) ||
313 SE
.DT
.dominates(cast
<Instruction
>(V
), &*Builder
.GetInsertPoint()));
315 Value
*Idx
= expand(Offset
);
317 // Fold a GEP with constant operands.
318 if (Constant
*CLHS
= dyn_cast
<Constant
>(V
))
319 if (Constant
*CRHS
= dyn_cast
<Constant
>(Idx
))
320 return Builder
.CreatePtrAdd(CLHS
, CRHS
);
322 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
323 unsigned ScanLimit
= 6;
324 BasicBlock::iterator BlockBegin
= Builder
.GetInsertBlock()->begin();
325 // Scanning starts from the last instruction before the insertion point.
326 BasicBlock::iterator IP
= Builder
.GetInsertPoint();
327 if (IP
!= BlockBegin
) {
329 for (; ScanLimit
; --IP
, --ScanLimit
) {
330 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
332 if (isa
<DbgInfoIntrinsic
>(IP
))
334 if (IP
->getOpcode() == Instruction::GetElementPtr
&&
335 IP
->getOperand(0) == V
&& IP
->getOperand(1) == Idx
&&
336 cast
<GEPOperator
>(&*IP
)->getSourceElementType() ==
339 if (IP
== BlockBegin
) break;
343 // Save the original insertion point so we can restore it when we're done.
344 SCEVInsertPointGuard
Guard(Builder
, this);
346 // Move the insertion point out of as many loops as we can.
347 while (const Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock())) {
348 if (!L
->isLoopInvariant(V
) || !L
->isLoopInvariant(Idx
)) break;
349 BasicBlock
*Preheader
= L
->getLoopPreheader();
350 if (!Preheader
) break;
352 // Ok, move up a level.
353 Builder
.SetInsertPoint(Preheader
->getTerminator());
357 return Builder
.CreatePtrAdd(V
, Idx
, "scevgep");
360 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
361 /// SCEV expansion. If they are nested, this is the most nested. If they are
362 /// neighboring, pick the later.
363 static const Loop
*PickMostRelevantLoop(const Loop
*A
, const Loop
*B
,
367 if (A
->contains(B
)) return B
;
368 if (B
->contains(A
)) return A
;
369 if (DT
.dominates(A
->getHeader(), B
->getHeader())) return B
;
370 if (DT
.dominates(B
->getHeader(), A
->getHeader())) return A
;
371 return A
; // Arbitrarily break the tie.
374 /// getRelevantLoop - Get the most relevant loop associated with the given
375 /// expression, according to PickMostRelevantLoop.
376 const Loop
*SCEVExpander::getRelevantLoop(const SCEV
*S
) {
377 // Test whether we've already computed the most relevant loop for this SCEV.
378 auto Pair
= RelevantLoops
.insert(std::make_pair(S
, nullptr));
380 return Pair
.first
->second
;
382 switch (S
->getSCEVType()) {
385 return nullptr; // A constant has no relevant loops.
398 case scSequentialUMinExpr
: {
399 const Loop
*L
= nullptr;
400 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
))
402 for (const SCEV
*Op
: S
->operands())
403 L
= PickMostRelevantLoop(L
, getRelevantLoop(Op
), SE
.DT
);
404 return RelevantLoops
[S
] = L
;
407 const SCEVUnknown
*U
= cast
<SCEVUnknown
>(S
);
408 if (const Instruction
*I
= dyn_cast
<Instruction
>(U
->getValue()))
409 return Pair
.first
->second
= SE
.LI
.getLoopFor(I
->getParent());
410 // A non-instruction has no relevant loops.
413 case scCouldNotCompute
:
414 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
416 llvm_unreachable("Unexpected SCEV type!");
421 /// LoopCompare - Compare loops by PickMostRelevantLoop.
425 explicit LoopCompare(DominatorTree
&dt
) : DT(dt
) {}
427 bool operator()(std::pair
<const Loop
*, const SCEV
*> LHS
,
428 std::pair
<const Loop
*, const SCEV
*> RHS
) const {
429 // Keep pointer operands sorted at the end.
430 if (LHS
.second
->getType()->isPointerTy() !=
431 RHS
.second
->getType()->isPointerTy())
432 return LHS
.second
->getType()->isPointerTy();
434 // Compare loops with PickMostRelevantLoop.
435 if (LHS
.first
!= RHS
.first
)
436 return PickMostRelevantLoop(LHS
.first
, RHS
.first
, DT
) != LHS
.first
;
438 // If one operand is a non-constant negative and the other is not,
439 // put the non-constant negative on the right so that a sub can
440 // be used instead of a negate and add.
441 if (LHS
.second
->isNonConstantNegative()) {
442 if (!RHS
.second
->isNonConstantNegative())
444 } else if (RHS
.second
->isNonConstantNegative())
447 // Otherwise they are equivalent according to this comparison.
454 Value
*SCEVExpander::visitAddExpr(const SCEVAddExpr
*S
) {
455 // Collect all the add operands in a loop, along with their associated loops.
456 // Iterate in reverse so that constants are emitted last, all else equal, and
457 // so that pointer operands are inserted first, which the code below relies on
458 // to form more involved GEPs.
459 SmallVector
<std::pair
<const Loop
*, const SCEV
*>, 8> OpsAndLoops
;
460 for (const SCEV
*Op
: reverse(S
->operands()))
461 OpsAndLoops
.push_back(std::make_pair(getRelevantLoop(Op
), Op
));
463 // Sort by loop. Use a stable sort so that constants follow non-constants and
464 // pointer operands precede non-pointer operands.
465 llvm::stable_sort(OpsAndLoops
, LoopCompare(SE
.DT
));
467 // Emit instructions to add all the operands. Hoist as much as possible
468 // out of loops, and form meaningful getelementptrs where possible.
469 Value
*Sum
= nullptr;
470 for (auto I
= OpsAndLoops
.begin(), E
= OpsAndLoops
.end(); I
!= E
;) {
471 const Loop
*CurLoop
= I
->first
;
472 const SCEV
*Op
= I
->second
;
474 // This is the first operand. Just expand it.
480 assert(!Op
->getType()->isPointerTy() && "Only first op can be pointer");
481 if (isa
<PointerType
>(Sum
->getType())) {
482 // The running sum expression is a pointer. Try to form a getelementptr
483 // at this level with that as the base.
484 SmallVector
<const SCEV
*, 4> NewOps
;
485 for (; I
!= E
&& I
->first
== CurLoop
; ++I
) {
486 // If the operand is SCEVUnknown and not instructions, peek through
487 // it, to enable more of it to be folded into the GEP.
488 const SCEV
*X
= I
->second
;
489 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(X
))
490 if (!isa
<Instruction
>(U
->getValue()))
491 X
= SE
.getSCEV(U
->getValue());
494 Sum
= expandAddToGEP(SE
.getAddExpr(NewOps
), Sum
);
495 } else if (Op
->isNonConstantNegative()) {
496 // Instead of doing a negate and add, just do a subtract.
497 Value
*W
= expand(SE
.getNegativeSCEV(Op
));
498 Sum
= InsertBinop(Instruction::Sub
, Sum
, W
, SCEV::FlagAnyWrap
,
499 /*IsSafeToHoist*/ true);
503 Value
*W
= expand(Op
);
504 // Canonicalize a constant to the RHS.
505 if (isa
<Constant
>(Sum
))
507 Sum
= InsertBinop(Instruction::Add
, Sum
, W
, S
->getNoWrapFlags(),
508 /*IsSafeToHoist*/ true);
516 Value
*SCEVExpander::visitMulExpr(const SCEVMulExpr
*S
) {
517 Type
*Ty
= S
->getType();
519 // Collect all the mul operands in a loop, along with their associated loops.
520 // Iterate in reverse so that constants are emitted last, all else equal.
521 SmallVector
<std::pair
<const Loop
*, const SCEV
*>, 8> OpsAndLoops
;
522 for (const SCEV
*Op
: reverse(S
->operands()))
523 OpsAndLoops
.push_back(std::make_pair(getRelevantLoop(Op
), Op
));
525 // Sort by loop. Use a stable sort so that constants follow non-constants.
526 llvm::stable_sort(OpsAndLoops
, LoopCompare(SE
.DT
));
528 // Emit instructions to mul all the operands. Hoist as much as possible
530 Value
*Prod
= nullptr;
531 auto I
= OpsAndLoops
.begin();
533 // Expand the calculation of X pow N in the following manner:
534 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
535 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
536 const auto ExpandOpBinPowN
= [this, &I
, &OpsAndLoops
]() {
538 // Calculate how many times the same operand from the same loop is included
540 uint64_t Exponent
= 0;
541 const uint64_t MaxExponent
= UINT64_MAX
>> 1;
542 // No one sane will ever try to calculate such huge exponents, but if we
543 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
544 // below when the power of 2 exceeds our Exponent, and we want it to be
545 // 1u << 31 at most to not deal with unsigned overflow.
546 while (E
!= OpsAndLoops
.end() && *I
== *E
&& Exponent
!= MaxExponent
) {
550 assert(Exponent
> 0 && "Trying to calculate a zeroth exponent of operand?");
552 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
553 // that are needed into the result.
554 Value
*P
= expand(I
->second
);
555 Value
*Result
= nullptr;
558 for (uint64_t BinExp
= 2; BinExp
<= Exponent
; BinExp
<<= 1) {
559 P
= InsertBinop(Instruction::Mul
, P
, P
, SCEV::FlagAnyWrap
,
560 /*IsSafeToHoist*/ true);
561 if (Exponent
& BinExp
)
562 Result
= Result
? InsertBinop(Instruction::Mul
, Result
, P
,
564 /*IsSafeToHoist*/ true)
569 assert(Result
&& "Nothing was expanded?");
573 while (I
!= OpsAndLoops
.end()) {
575 // This is the first operand. Just expand it.
576 Prod
= ExpandOpBinPowN();
577 } else if (I
->second
->isAllOnesValue()) {
578 // Instead of doing a multiply by negative one, just do a negate.
579 Prod
= InsertBinop(Instruction::Sub
, Constant::getNullValue(Ty
), Prod
,
580 SCEV::FlagAnyWrap
, /*IsSafeToHoist*/ true);
584 Value
*W
= ExpandOpBinPowN();
585 // Canonicalize a constant to the RHS.
586 if (isa
<Constant
>(Prod
)) std::swap(Prod
, W
);
588 if (match(W
, m_Power2(RHS
))) {
589 // Canonicalize Prod*(1<<C) to Prod<<C.
590 assert(!Ty
->isVectorTy() && "vector types are not SCEVable");
591 auto NWFlags
= S
->getNoWrapFlags();
592 // clear nsw flag if shl will produce poison value.
593 if (RHS
->logBase2() == RHS
->getBitWidth() - 1)
594 NWFlags
= ScalarEvolution::clearFlags(NWFlags
, SCEV::FlagNSW
);
595 Prod
= InsertBinop(Instruction::Shl
, Prod
,
596 ConstantInt::get(Ty
, RHS
->logBase2()), NWFlags
,
597 /*IsSafeToHoist*/ true);
599 Prod
= InsertBinop(Instruction::Mul
, Prod
, W
, S
->getNoWrapFlags(),
600 /*IsSafeToHoist*/ true);
608 Value
*SCEVExpander::visitUDivExpr(const SCEVUDivExpr
*S
) {
609 Value
*LHS
= expand(S
->getLHS());
610 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(S
->getRHS())) {
611 const APInt
&RHS
= SC
->getAPInt();
612 if (RHS
.isPowerOf2())
613 return InsertBinop(Instruction::LShr
, LHS
,
614 ConstantInt::get(SC
->getType(), RHS
.logBase2()),
615 SCEV::FlagAnyWrap
, /*IsSafeToHoist*/ true);
618 Value
*RHS
= expand(S
->getRHS());
619 return InsertBinop(Instruction::UDiv
, LHS
, RHS
, SCEV::FlagAnyWrap
,
620 /*IsSafeToHoist*/ SE
.isKnownNonZero(S
->getRHS()));
623 /// Determine if this is a well-behaved chain of instructions leading back to
624 /// the PHI. If so, it may be reused by expanded expressions.
625 bool SCEVExpander::isNormalAddRecExprPHI(PHINode
*PN
, Instruction
*IncV
,
627 if (IncV
->getNumOperands() == 0 || isa
<PHINode
>(IncV
) ||
628 (isa
<CastInst
>(IncV
) && !isa
<BitCastInst
>(IncV
)))
630 // If any of the operands don't dominate the insert position, bail.
631 // Addrec operands are always loop-invariant, so this can only happen
632 // if there are instructions which haven't been hoisted.
633 if (L
== IVIncInsertLoop
) {
634 for (Use
&Op
: llvm::drop_begin(IncV
->operands()))
635 if (Instruction
*OInst
= dyn_cast
<Instruction
>(Op
))
636 if (!SE
.DT
.dominates(OInst
, IVIncInsertPos
))
639 // Advance to the next instruction.
640 IncV
= dyn_cast
<Instruction
>(IncV
->getOperand(0));
644 if (IncV
->mayHaveSideEffects())
650 return isNormalAddRecExprPHI(PN
, IncV
, L
);
653 /// getIVIncOperand returns an induction variable increment's induction
654 /// variable operand.
656 /// If allowScale is set, any type of GEP is allowed as long as the nonIV
657 /// operands dominate InsertPos.
659 /// If allowScale is not set, ensure that a GEP increment conforms to one of the
660 /// simple patterns generated by getAddRecExprPHILiterally and
661 /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
662 Instruction
*SCEVExpander::getIVIncOperand(Instruction
*IncV
,
663 Instruction
*InsertPos
,
665 if (IncV
== InsertPos
)
668 switch (IncV
->getOpcode()) {
671 // Check for a simple Add/Sub or GEP of a loop invariant step.
672 case Instruction::Add
:
673 case Instruction::Sub
: {
674 Instruction
*OInst
= dyn_cast
<Instruction
>(IncV
->getOperand(1));
675 if (!OInst
|| SE
.DT
.dominates(OInst
, InsertPos
))
676 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
679 case Instruction::BitCast
:
680 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
681 case Instruction::GetElementPtr
:
682 for (Use
&U
: llvm::drop_begin(IncV
->operands())) {
683 if (isa
<Constant
>(U
))
685 if (Instruction
*OInst
= dyn_cast
<Instruction
>(U
)) {
686 if (!SE
.DT
.dominates(OInst
, InsertPos
))
690 // allow any kind of GEP as long as it can be hoisted.
693 // GEPs produced by SCEVExpander use i8 element type.
694 if (!cast
<GEPOperator
>(IncV
)->getSourceElementType()->isIntegerTy(8))
698 return dyn_cast
<Instruction
>(IncV
->getOperand(0));
702 /// If the insert point of the current builder or any of the builders on the
703 /// stack of saved builders has 'I' as its insert point, update it to point to
704 /// the instruction after 'I'. This is intended to be used when the instruction
705 /// 'I' is being moved. If this fixup is not done and 'I' is moved to a
706 /// different block, the inconsistent insert point (with a mismatched
707 /// Instruction and Block) can lead to an instruction being inserted in a block
708 /// other than its parent.
709 void SCEVExpander::fixupInsertPoints(Instruction
*I
) {
710 BasicBlock::iterator
It(*I
);
711 BasicBlock::iterator NewInsertPt
= std::next(It
);
712 if (Builder
.GetInsertPoint() == It
)
713 Builder
.SetInsertPoint(&*NewInsertPt
);
714 for (auto *InsertPtGuard
: InsertPointGuards
)
715 if (InsertPtGuard
->GetInsertPoint() == It
)
716 InsertPtGuard
->SetInsertPoint(NewInsertPt
);
719 /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
720 /// it available to other uses in this loop. Recursively hoist any operands,
721 /// until we reach a value that dominates InsertPos.
722 bool SCEVExpander::hoistIVInc(Instruction
*IncV
, Instruction
*InsertPos
,
723 bool RecomputePoisonFlags
) {
724 auto FixupPoisonFlags
= [this](Instruction
*I
) {
725 // Drop flags that are potentially inferred from old context and infer flags
727 I
->dropPoisonGeneratingFlags();
728 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(I
))
729 if (auto Flags
= SE
.getStrengthenedNoWrapFlagsFromBinOp(OBO
)) {
730 auto *BO
= cast
<BinaryOperator
>(I
);
731 BO
->setHasNoUnsignedWrap(
732 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) == SCEV::FlagNUW
);
733 BO
->setHasNoSignedWrap(
734 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) == SCEV::FlagNSW
);
738 if (SE
.DT
.dominates(IncV
, InsertPos
)) {
739 if (RecomputePoisonFlags
)
740 FixupPoisonFlags(IncV
);
744 // InsertPos must itself dominate IncV so that IncV's new position satisfies
745 // its existing users.
746 if (isa
<PHINode
>(InsertPos
) ||
747 !SE
.DT
.dominates(InsertPos
->getParent(), IncV
->getParent()))
750 if (!SE
.LI
.movementPreservesLCSSAForm(IncV
, InsertPos
))
753 // Check that the chain of IV operands leading back to Phi can be hoisted.
754 SmallVector
<Instruction
*, 4> IVIncs
;
756 Instruction
*Oper
= getIVIncOperand(IncV
, InsertPos
, /*allowScale*/true);
759 // IncV is safe to hoist.
760 IVIncs
.push_back(IncV
);
762 if (SE
.DT
.dominates(IncV
, InsertPos
))
765 for (Instruction
*I
: llvm::reverse(IVIncs
)) {
766 fixupInsertPoints(I
);
767 I
->moveBefore(InsertPos
);
768 if (RecomputePoisonFlags
)
774 /// Determine if this cyclic phi is in a form that would have been generated by
775 /// LSR. We don't care if the phi was actually expanded in this pass, as long
776 /// as it is in a low-cost form, for example, no implied multiplication. This
777 /// should match any patterns generated by getAddRecExprPHILiterally and
779 bool SCEVExpander::isExpandedAddRecExprPHI(PHINode
*PN
, Instruction
*IncV
,
781 for(Instruction
*IVOper
= IncV
;
782 (IVOper
= getIVIncOperand(IVOper
, L
->getLoopPreheader()->getTerminator(),
783 /*allowScale=*/false));) {
790 /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
791 /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
792 /// need to materialize IV increments elsewhere to handle difficult situations.
793 Value
*SCEVExpander::expandIVInc(PHINode
*PN
, Value
*StepV
, const Loop
*L
,
796 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
797 if (PN
->getType()->isPointerTy()) {
798 IncV
= expandAddToGEP(SE
.getSCEV(StepV
), PN
);
801 Builder
.CreateSub(PN
, StepV
, Twine(IVName
) + ".iv.next") :
802 Builder
.CreateAdd(PN
, StepV
, Twine(IVName
) + ".iv.next");
807 /// Check whether we can cheaply express the requested SCEV in terms of
808 /// the available PHI SCEV by truncation and/or inversion of the step.
809 static bool canBeCheaplyTransformed(ScalarEvolution
&SE
,
810 const SCEVAddRecExpr
*Phi
,
811 const SCEVAddRecExpr
*Requested
,
813 // We can't transform to match a pointer PHI.
814 Type
*PhiTy
= Phi
->getType();
815 Type
*RequestedTy
= Requested
->getType();
816 if (PhiTy
->isPointerTy() || RequestedTy
->isPointerTy())
819 if (RequestedTy
->getIntegerBitWidth() > PhiTy
->getIntegerBitWidth())
822 // Try truncate it if necessary.
823 Phi
= dyn_cast
<SCEVAddRecExpr
>(SE
.getTruncateOrNoop(Phi
, RequestedTy
));
827 // Check whether truncation will help.
828 if (Phi
== Requested
) {
833 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
834 if (SE
.getMinusSCEV(Requested
->getStart(), Requested
) == Phi
) {
842 static bool IsIncrementNSW(ScalarEvolution
&SE
, const SCEVAddRecExpr
*AR
) {
843 if (!isa
<IntegerType
>(AR
->getType()))
846 unsigned BitWidth
= cast
<IntegerType
>(AR
->getType())->getBitWidth();
847 Type
*WideTy
= IntegerType::get(AR
->getType()->getContext(), BitWidth
* 2);
848 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
849 const SCEV
*OpAfterExtend
= SE
.getAddExpr(SE
.getSignExtendExpr(Step
, WideTy
),
850 SE
.getSignExtendExpr(AR
, WideTy
));
851 const SCEV
*ExtendAfterOp
=
852 SE
.getSignExtendExpr(SE
.getAddExpr(AR
, Step
), WideTy
);
853 return ExtendAfterOp
== OpAfterExtend
;
856 static bool IsIncrementNUW(ScalarEvolution
&SE
, const SCEVAddRecExpr
*AR
) {
857 if (!isa
<IntegerType
>(AR
->getType()))
860 unsigned BitWidth
= cast
<IntegerType
>(AR
->getType())->getBitWidth();
861 Type
*WideTy
= IntegerType::get(AR
->getType()->getContext(), BitWidth
* 2);
862 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
863 const SCEV
*OpAfterExtend
= SE
.getAddExpr(SE
.getZeroExtendExpr(Step
, WideTy
),
864 SE
.getZeroExtendExpr(AR
, WideTy
));
865 const SCEV
*ExtendAfterOp
=
866 SE
.getZeroExtendExpr(SE
.getAddExpr(AR
, Step
), WideTy
);
867 return ExtendAfterOp
== OpAfterExtend
;
870 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
871 /// the base addrec, which is the addrec without any non-loop-dominating
872 /// values, and return the PHI.
874 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr
*Normalized
,
875 const Loop
*L
, Type
*&TruncTy
,
877 assert((!IVIncInsertLoop
|| IVIncInsertPos
) &&
878 "Uninitialized insert position");
880 // Reuse a previously-inserted PHI, if present.
881 BasicBlock
*LatchBlock
= L
->getLoopLatch();
883 PHINode
*AddRecPhiMatch
= nullptr;
884 Instruction
*IncV
= nullptr;
888 // Only try partially matching scevs that need truncation and/or
889 // step-inversion if we know this loop is outside the current loop.
890 bool TryNonMatchingSCEV
=
892 SE
.DT
.properlyDominates(LatchBlock
, IVIncInsertLoop
->getHeader());
894 for (PHINode
&PN
: L
->getHeader()->phis()) {
895 if (!SE
.isSCEVable(PN
.getType()))
898 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
899 // PHI has no meaning at all.
900 if (!PN
.isComplete()) {
901 SCEV_DEBUG_WITH_TYPE(
902 DebugType
, dbgs() << "One incomplete PHI is found: " << PN
<< "\n");
906 const SCEVAddRecExpr
*PhiSCEV
= dyn_cast
<SCEVAddRecExpr
>(SE
.getSCEV(&PN
));
910 bool IsMatchingSCEV
= PhiSCEV
== Normalized
;
911 // We only handle truncation and inversion of phi recurrences for the
912 // expanded expression if the expanded expression's loop dominates the
913 // loop we insert to. Check now, so we can bail out early.
914 if (!IsMatchingSCEV
&& !TryNonMatchingSCEV
)
917 // TODO: this possibly can be reworked to avoid this cast at all.
918 Instruction
*TempIncV
=
919 dyn_cast
<Instruction
>(PN
.getIncomingValueForBlock(LatchBlock
));
923 // Check whether we can reuse this PHI node.
925 if (!isExpandedAddRecExprPHI(&PN
, TempIncV
, L
))
928 if (!isNormalAddRecExprPHI(&PN
, TempIncV
, L
))
932 // Stop if we have found an exact match SCEV.
933 if (IsMatchingSCEV
) {
937 AddRecPhiMatch
= &PN
;
941 // Try whether the phi can be translated into the requested form
942 // (truncated and/or offset by a constant).
943 if ((!TruncTy
|| InvertStep
) &&
944 canBeCheaplyTransformed(SE
, PhiSCEV
, Normalized
, InvertStep
)) {
945 // Record the phi node. But don't stop we might find an exact match
947 AddRecPhiMatch
= &PN
;
949 TruncTy
= Normalized
->getType();
953 if (AddRecPhiMatch
) {
954 // Ok, the add recurrence looks usable.
955 // Remember this PHI, even in post-inc mode.
956 InsertedValues
.insert(AddRecPhiMatch
);
957 // Remember the increment.
958 rememberInstruction(IncV
);
959 // Those values were not actually inserted but re-used.
960 ReusedValues
.insert(AddRecPhiMatch
);
961 ReusedValues
.insert(IncV
);
962 return AddRecPhiMatch
;
966 // Save the original insertion point so we can restore it when we're done.
967 SCEVInsertPointGuard
Guard(Builder
, this);
969 // Another AddRec may need to be recursively expanded below. For example, if
970 // this AddRec is quadratic, the StepV may itself be an AddRec in this
971 // loop. Remove this loop from the PostIncLoops set before expanding such
972 // AddRecs. Otherwise, we cannot find a valid position for the step
973 // (i.e. StepV can never dominate its loop header). Ideally, we could do
974 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
975 // so it's not worth implementing SmallPtrSet::swap.
976 PostIncLoopSet SavedPostIncLoops
= PostIncLoops
;
977 PostIncLoops
.clear();
979 // Expand code for the start value into the loop preheader.
980 assert(L
->getLoopPreheader() &&
981 "Can't expand add recurrences without a loop preheader!");
983 expand(Normalized
->getStart(), L
->getLoopPreheader()->getTerminator());
985 // StartV must have been be inserted into L's preheader to dominate the new
987 assert(!isa
<Instruction
>(StartV
) ||
988 SE
.DT
.properlyDominates(cast
<Instruction
>(StartV
)->getParent(),
991 // Expand code for the step value. Do this before creating the PHI so that PHI
992 // reuse code doesn't see an incomplete PHI.
993 const SCEV
*Step
= Normalized
->getStepRecurrence(SE
);
994 Type
*ExpandTy
= Normalized
->getType();
995 // If the stride is negative, insert a sub instead of an add for the increment
996 // (unless it's a constant, because subtracts of constants are canonicalized
998 bool useSubtract
= !ExpandTy
->isPointerTy() && Step
->isNonConstantNegative();
1000 Step
= SE
.getNegativeSCEV(Step
);
1001 // Expand the step somewhere that dominates the loop header.
1002 Value
*StepV
= expand(Step
, L
->getHeader()->getFirstInsertionPt());
1004 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1005 // we actually do emit an addition. It does not apply if we emit a
1007 bool IncrementIsNUW
= !useSubtract
&& IsIncrementNUW(SE
, Normalized
);
1008 bool IncrementIsNSW
= !useSubtract
&& IsIncrementNSW(SE
, Normalized
);
1011 BasicBlock
*Header
= L
->getHeader();
1012 Builder
.SetInsertPoint(Header
, Header
->begin());
1013 pred_iterator HPB
= pred_begin(Header
), HPE
= pred_end(Header
);
1014 PHINode
*PN
= Builder
.CreatePHI(ExpandTy
, std::distance(HPB
, HPE
),
1015 Twine(IVName
) + ".iv");
1017 // Create the step instructions and populate the PHI.
1018 for (pred_iterator HPI
= HPB
; HPI
!= HPE
; ++HPI
) {
1019 BasicBlock
*Pred
= *HPI
;
1021 // Add a start value.
1022 if (!L
->contains(Pred
)) {
1023 PN
->addIncoming(StartV
, Pred
);
1027 // Create a step value and add it to the PHI.
1028 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1029 // instructions at IVIncInsertPos.
1030 Instruction
*InsertPos
= L
== IVIncInsertLoop
?
1031 IVIncInsertPos
: Pred
->getTerminator();
1032 Builder
.SetInsertPoint(InsertPos
);
1033 Value
*IncV
= expandIVInc(PN
, StepV
, L
, useSubtract
);
1035 if (isa
<OverflowingBinaryOperator
>(IncV
)) {
1037 cast
<BinaryOperator
>(IncV
)->setHasNoUnsignedWrap();
1039 cast
<BinaryOperator
>(IncV
)->setHasNoSignedWrap();
1041 PN
->addIncoming(IncV
, Pred
);
1044 // After expanding subexpressions, restore the PostIncLoops set so the caller
1045 // can ensure that IVIncrement dominates the current uses.
1046 PostIncLoops
= SavedPostIncLoops
;
1048 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1049 // effective when we are able to use an IV inserted here, so record it.
1050 InsertedValues
.insert(PN
);
1051 InsertedIVs
.push_back(PN
);
1055 Value
*SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr
*S
) {
1056 const Loop
*L
= S
->getLoop();
1058 // Determine a normalized form of this expression, which is the expression
1059 // before any post-inc adjustment is made.
1060 const SCEVAddRecExpr
*Normalized
= S
;
1061 if (PostIncLoops
.count(L
)) {
1062 PostIncLoopSet Loops
;
1064 Normalized
= cast
<SCEVAddRecExpr
>(
1065 normalizeForPostIncUse(S
, Loops
, SE
, /*CheckInvertible=*/false));
1068 [[maybe_unused
]] const SCEV
*Start
= Normalized
->getStart();
1069 const SCEV
*Step
= Normalized
->getStepRecurrence(SE
);
1070 assert(SE
.properlyDominates(Start
, L
->getHeader()) &&
1071 "Start does not properly dominate loop header");
1072 assert(SE
.dominates(Step
, L
->getHeader()) && "Step not dominate loop header");
1074 // In some cases, we decide to reuse an existing phi node but need to truncate
1075 // it and/or invert the step.
1076 Type
*TruncTy
= nullptr;
1077 bool InvertStep
= false;
1078 PHINode
*PN
= getAddRecExprPHILiterally(Normalized
, L
, TruncTy
, InvertStep
);
1080 // Accommodate post-inc mode, if necessary.
1082 if (!PostIncLoops
.count(L
))
1085 // In PostInc mode, use the post-incremented value.
1086 BasicBlock
*LatchBlock
= L
->getLoopLatch();
1087 assert(LatchBlock
&& "PostInc mode requires a unique loop latch!");
1088 Result
= PN
->getIncomingValueForBlock(LatchBlock
);
1090 // We might be introducing a new use of the post-inc IV that is not poison
1091 // safe, in which case we should drop poison generating flags. Only keep
1092 // those flags for which SCEV has proven that they always hold.
1093 if (isa
<OverflowingBinaryOperator
>(Result
)) {
1094 auto *I
= cast
<Instruction
>(Result
);
1095 if (!S
->hasNoUnsignedWrap())
1096 I
->setHasNoUnsignedWrap(false);
1097 if (!S
->hasNoSignedWrap())
1098 I
->setHasNoSignedWrap(false);
1101 // For an expansion to use the postinc form, the client must call
1102 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1103 // or dominated by IVIncInsertPos.
1104 if (isa
<Instruction
>(Result
) &&
1105 !SE
.DT
.dominates(cast
<Instruction
>(Result
),
1106 &*Builder
.GetInsertPoint())) {
1107 // The induction variable's postinc expansion does not dominate this use.
1108 // IVUsers tries to prevent this case, so it is rare. However, it can
1109 // happen when an IVUser outside the loop is not dominated by the latch
1110 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1111 // all cases. Consider a phi outside whose operand is replaced during
1112 // expansion with the value of the postinc user. Without fundamentally
1113 // changing the way postinc users are tracked, the only remedy is
1114 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1115 // but hopefully expandCodeFor handles that.
1117 !S
->getType()->isPointerTy() && Step
->isNonConstantNegative();
1119 Step
= SE
.getNegativeSCEV(Step
);
1122 // Expand the step somewhere that dominates the loop header.
1123 SCEVInsertPointGuard
Guard(Builder
, this);
1124 StepV
= expand(Step
, L
->getHeader()->getFirstInsertionPt());
1126 Result
= expandIVInc(PN
, StepV
, L
, useSubtract
);
1130 // We have decided to reuse an induction variable of a dominating loop. Apply
1131 // truncation and/or inversion of the step.
1133 // Truncate the result.
1134 if (TruncTy
!= Result
->getType())
1135 Result
= Builder
.CreateTrunc(Result
, TruncTy
);
1137 // Invert the result.
1139 Result
= Builder
.CreateSub(expand(Normalized
->getStart()), Result
);
1145 Value
*SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr
*S
) {
1146 // In canonical mode we compute the addrec as an expression of a canonical IV
1147 // using evaluateAtIteration and expand the resulting SCEV expression. This
1148 // way we avoid introducing new IVs to carry on the computation of the addrec
1149 // throughout the loop.
1151 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1152 // type wider than the addrec itself. Emitting a canonical IV of the
1153 // proper type might produce non-legal types, for example expanding an i64
1154 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1155 // back to non-canonical mode for nested addrecs.
1156 if (!CanonicalMode
|| (S
->getNumOperands() > 2))
1157 return expandAddRecExprLiterally(S
);
1159 Type
*Ty
= SE
.getEffectiveSCEVType(S
->getType());
1160 const Loop
*L
= S
->getLoop();
1162 // First check for an existing canonical IV in a suitable type.
1163 PHINode
*CanonicalIV
= nullptr;
1164 if (PHINode
*PN
= L
->getCanonicalInductionVariable())
1165 if (SE
.getTypeSizeInBits(PN
->getType()) >= SE
.getTypeSizeInBits(Ty
))
1168 // Rewrite an AddRec in terms of the canonical induction variable, if
1169 // its type is more narrow.
1171 SE
.getTypeSizeInBits(CanonicalIV
->getType()) > SE
.getTypeSizeInBits(Ty
) &&
1172 !S
->getType()->isPointerTy()) {
1173 SmallVector
<const SCEV
*, 4> NewOps(S
->getNumOperands());
1174 for (unsigned i
= 0, e
= S
->getNumOperands(); i
!= e
; ++i
)
1175 NewOps
[i
] = SE
.getAnyExtendExpr(S
->getOperand(i
), CanonicalIV
->getType());
1176 Value
*V
= expand(SE
.getAddRecExpr(NewOps
, S
->getLoop(),
1177 S
->getNoWrapFlags(SCEV::FlagNW
)));
1178 BasicBlock::iterator NewInsertPt
=
1179 findInsertPointAfter(cast
<Instruction
>(V
), &*Builder
.GetInsertPoint());
1180 V
= expand(SE
.getTruncateExpr(SE
.getUnknown(V
), Ty
), NewInsertPt
);
1184 // {X,+,F} --> X + {0,+,F}
1185 if (!S
->getStart()->isZero()) {
1186 if (isa
<PointerType
>(S
->getType())) {
1187 Value
*StartV
= expand(SE
.getPointerBase(S
));
1188 return expandAddToGEP(SE
.removePointerBase(S
), StartV
);
1191 SmallVector
<const SCEV
*, 4> NewOps(S
->operands());
1192 NewOps
[0] = SE
.getConstant(Ty
, 0);
1193 const SCEV
*Rest
= SE
.getAddRecExpr(NewOps
, L
,
1194 S
->getNoWrapFlags(SCEV::FlagNW
));
1196 // Just do a normal add. Pre-expand the operands to suppress folding.
1198 // The LHS and RHS values are factored out of the expand call to make the
1199 // output independent of the argument evaluation order.
1200 const SCEV
*AddExprLHS
= SE
.getUnknown(expand(S
->getStart()));
1201 const SCEV
*AddExprRHS
= SE
.getUnknown(expand(Rest
));
1202 return expand(SE
.getAddExpr(AddExprLHS
, AddExprRHS
));
1205 // If we don't yet have a canonical IV, create one.
1207 // Create and insert the PHI node for the induction variable in the
1209 BasicBlock
*Header
= L
->getHeader();
1210 pred_iterator HPB
= pred_begin(Header
), HPE
= pred_end(Header
);
1211 CanonicalIV
= PHINode::Create(Ty
, std::distance(HPB
, HPE
), "indvar");
1212 CanonicalIV
->insertBefore(Header
->begin());
1213 rememberInstruction(CanonicalIV
);
1215 SmallSet
<BasicBlock
*, 4> PredSeen
;
1216 Constant
*One
= ConstantInt::get(Ty
, 1);
1217 for (pred_iterator HPI
= HPB
; HPI
!= HPE
; ++HPI
) {
1218 BasicBlock
*HP
= *HPI
;
1219 if (!PredSeen
.insert(HP
).second
) {
1220 // There must be an incoming value for each predecessor, even the
1222 CanonicalIV
->addIncoming(CanonicalIV
->getIncomingValueForBlock(HP
), HP
);
1226 if (L
->contains(HP
)) {
1227 // Insert a unit add instruction right before the terminator
1228 // corresponding to the back-edge.
1229 Instruction
*Add
= BinaryOperator::CreateAdd(CanonicalIV
, One
,
1231 HP
->getTerminator());
1232 Add
->setDebugLoc(HP
->getTerminator()->getDebugLoc());
1233 rememberInstruction(Add
);
1234 CanonicalIV
->addIncoming(Add
, HP
);
1236 CanonicalIV
->addIncoming(Constant::getNullValue(Ty
), HP
);
1241 // {0,+,1} --> Insert a canonical induction variable into the loop!
1242 if (S
->isAffine() && S
->getOperand(1)->isOne()) {
1243 assert(Ty
== SE
.getEffectiveSCEVType(CanonicalIV
->getType()) &&
1244 "IVs with types different from the canonical IV should "
1245 "already have been handled!");
1249 // {0,+,F} --> {0,+,1} * F
1251 // If this is a simple linear addrec, emit it now as a special case.
1252 if (S
->isAffine()) // {0,+,F} --> i*F
1254 expand(SE
.getTruncateOrNoop(
1255 SE
.getMulExpr(SE
.getUnknown(CanonicalIV
),
1256 SE
.getNoopOrAnyExtend(S
->getOperand(1),
1257 CanonicalIV
->getType())),
1260 // If this is a chain of recurrences, turn it into a closed form, using the
1261 // folders, then expandCodeFor the closed form. This allows the folders to
1262 // simplify the expression without having to build a bunch of special code
1263 // into this folder.
1264 const SCEV
*IH
= SE
.getUnknown(CanonicalIV
); // Get I as a "symbolic" SCEV.
1266 // Promote S up to the canonical IV type, if the cast is foldable.
1267 const SCEV
*NewS
= S
;
1268 const SCEV
*Ext
= SE
.getNoopOrAnyExtend(S
, CanonicalIV
->getType());
1269 if (isa
<SCEVAddRecExpr
>(Ext
))
1272 const SCEV
*V
= cast
<SCEVAddRecExpr
>(NewS
)->evaluateAtIteration(IH
, SE
);
1274 // Truncate the result down to the original type, if needed.
1275 const SCEV
*T
= SE
.getTruncateOrNoop(V
, Ty
);
1279 Value
*SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr
*S
) {
1280 Value
*V
= expand(S
->getOperand());
1281 return ReuseOrCreateCast(V
, S
->getType(), CastInst::PtrToInt
,
1282 GetOptimalInsertionPointForCastOf(V
));
1285 Value
*SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr
*S
) {
1286 Value
*V
= expand(S
->getOperand());
1287 return Builder
.CreateTrunc(V
, S
->getType());
1290 Value
*SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr
*S
) {
1291 Value
*V
= expand(S
->getOperand());
1292 return Builder
.CreateZExt(V
, S
->getType(), "",
1293 SE
.isKnownNonNegative(S
->getOperand()));
1296 Value
*SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr
*S
) {
1297 Value
*V
= expand(S
->getOperand());
1298 return Builder
.CreateSExt(V
, S
->getType());
1301 Value
*SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr
*S
,
1302 Intrinsic::ID IntrinID
, Twine Name
,
1303 bool IsSequential
) {
1304 Value
*LHS
= expand(S
->getOperand(S
->getNumOperands() - 1));
1305 Type
*Ty
= LHS
->getType();
1307 LHS
= Builder
.CreateFreeze(LHS
);
1308 for (int i
= S
->getNumOperands() - 2; i
>= 0; --i
) {
1309 Value
*RHS
= expand(S
->getOperand(i
));
1310 if (IsSequential
&& i
!= 0)
1311 RHS
= Builder
.CreateFreeze(RHS
);
1313 if (Ty
->isIntegerTy())
1314 Sel
= Builder
.CreateIntrinsic(IntrinID
, {Ty
}, {LHS
, RHS
},
1315 /*FMFSource=*/nullptr, Name
);
1318 Builder
.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID
), LHS
, RHS
);
1319 Sel
= Builder
.CreateSelect(ICmp
, LHS
, RHS
, Name
);
1326 Value
*SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr
*S
) {
1327 return expandMinMaxExpr(S
, Intrinsic::smax
, "smax");
1330 Value
*SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr
*S
) {
1331 return expandMinMaxExpr(S
, Intrinsic::umax
, "umax");
1334 Value
*SCEVExpander::visitSMinExpr(const SCEVSMinExpr
*S
) {
1335 return expandMinMaxExpr(S
, Intrinsic::smin
, "smin");
1338 Value
*SCEVExpander::visitUMinExpr(const SCEVUMinExpr
*S
) {
1339 return expandMinMaxExpr(S
, Intrinsic::umin
, "umin");
1342 Value
*SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr
*S
) {
1343 return expandMinMaxExpr(S
, Intrinsic::umin
, "umin", /*IsSequential*/true);
1346 Value
*SCEVExpander::visitVScale(const SCEVVScale
*S
) {
1347 return Builder
.CreateVScale(ConstantInt::get(S
->getType(), 1));
1350 Value
*SCEVExpander::expandCodeFor(const SCEV
*SH
, Type
*Ty
,
1351 BasicBlock::iterator IP
) {
1353 Value
*V
= expandCodeFor(SH
, Ty
);
1357 Value
*SCEVExpander::expandCodeFor(const SCEV
*SH
, Type
*Ty
) {
1358 // Expand the code for this SCEV.
1359 Value
*V
= expand(SH
);
1362 assert(SE
.getTypeSizeInBits(Ty
) == SE
.getTypeSizeInBits(SH
->getType()) &&
1363 "non-trivial casts should be done with the SCEVs directly!");
1364 V
= InsertNoopCastOfTo(V
, Ty
);
1369 Value
*SCEVExpander::FindValueInExprValueMap(
1370 const SCEV
*S
, const Instruction
*InsertPt
,
1371 SmallVectorImpl
<Instruction
*> &DropPoisonGeneratingInsts
) {
1372 // If the expansion is not in CanonicalMode, and the SCEV contains any
1373 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1374 if (!CanonicalMode
&& SE
.containsAddRecurrence(S
))
1377 // If S is a constant, it may be worse to reuse an existing Value.
1378 if (isa
<SCEVConstant
>(S
))
1381 for (Value
*V
: SE
.getSCEVValues(S
)) {
1382 Instruction
*EntInst
= dyn_cast
<Instruction
>(V
);
1386 // Choose a Value from the set which dominates the InsertPt.
1387 // InsertPt should be inside the Value's parent loop so as not to break
1389 assert(EntInst
->getFunction() == InsertPt
->getFunction());
1390 if (S
->getType() != V
->getType() || !SE
.DT
.dominates(EntInst
, InsertPt
) ||
1391 !(SE
.LI
.getLoopFor(EntInst
->getParent()) == nullptr ||
1392 SE
.LI
.getLoopFor(EntInst
->getParent())->contains(InsertPt
)))
1395 // Make sure reusing the instruction is poison-safe.
1396 if (SE
.canReuseInstruction(S
, EntInst
, DropPoisonGeneratingInsts
))
1398 DropPoisonGeneratingInsts
.clear();
1403 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1404 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1405 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1406 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1407 // the expansion will try to reuse Value from ExprValueMap, and only when it
1408 // fails, expand the SCEV literally.
1409 Value
*SCEVExpander::expand(const SCEV
*S
) {
1410 // Compute an insertion point for this SCEV object. Hoist the instructions
1411 // as far out in the loop nest as possible.
1412 BasicBlock::iterator InsertPt
= Builder
.GetInsertPoint();
1414 // We can move insertion point only if there is no div or rem operations
1415 // otherwise we are risky to move it over the check for zero denominator.
1416 auto SafeToHoist
= [](const SCEV
*S
) {
1417 return !SCEVExprContains(S
, [](const SCEV
*S
) {
1418 if (const auto *D
= dyn_cast
<SCEVUDivExpr
>(S
)) {
1419 if (const auto *SC
= dyn_cast
<SCEVConstant
>(D
->getRHS()))
1420 // Division by non-zero constants can be hoisted.
1421 return SC
->getValue()->isZero();
1422 // All other divisions should not be moved as they may be
1423 // divisions by zero and should be kept within the
1424 // conditions of the surrounding loops that guard their
1425 // execution (see PR35406).
1431 if (SafeToHoist(S
)) {
1432 for (Loop
*L
= SE
.LI
.getLoopFor(Builder
.GetInsertBlock());;
1433 L
= L
->getParentLoop()) {
1434 if (SE
.isLoopInvariant(S
, L
)) {
1436 if (BasicBlock
*Preheader
= L
->getLoopPreheader()) {
1437 InsertPt
= Preheader
->getTerminator()->getIterator();
1439 // LSR sets the insertion point for AddRec start/step values to the
1440 // block start to simplify value reuse, even though it's an invalid
1441 // position. SCEVExpander must correct for this in all cases.
1442 InsertPt
= L
->getHeader()->getFirstInsertionPt();
1445 // If the SCEV is computable at this level, insert it into the header
1446 // after the PHIs (and after any other instructions that we've inserted
1447 // there) so that it is guaranteed to dominate any user inside the loop.
1448 if (L
&& SE
.hasComputableLoopEvolution(S
, L
) && !PostIncLoops
.count(L
))
1449 InsertPt
= L
->getHeader()->getFirstInsertionPt();
1451 while (InsertPt
!= Builder
.GetInsertPoint() &&
1452 (isInsertedInstruction(&*InsertPt
) ||
1453 isa
<DbgInfoIntrinsic
>(&*InsertPt
))) {
1454 InsertPt
= std::next(InsertPt
);
1461 // Check to see if we already expanded this here.
1462 auto I
= InsertedExpressions
.find(std::make_pair(S
, &*InsertPt
));
1463 if (I
!= InsertedExpressions
.end())
1466 SCEVInsertPointGuard
Guard(Builder
, this);
1467 Builder
.SetInsertPoint(InsertPt
->getParent(), InsertPt
);
1469 // Expand the expression into instructions.
1470 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
1471 Value
*V
= FindValueInExprValueMap(S
, &*InsertPt
, DropPoisonGeneratingInsts
);
1474 V
= fixupLCSSAFormFor(V
);
1476 for (Instruction
*I
: DropPoisonGeneratingInsts
) {
1477 I
->dropPoisonGeneratingFlagsAndMetadata();
1478 // See if we can re-infer from first principles any of the flags we just
1480 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(I
))
1481 if (auto Flags
= SE
.getStrengthenedNoWrapFlagsFromBinOp(OBO
)) {
1482 auto *BO
= cast
<BinaryOperator
>(I
);
1483 BO
->setHasNoUnsignedWrap(
1484 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) == SCEV::FlagNUW
);
1485 BO
->setHasNoSignedWrap(
1486 ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) == SCEV::FlagNSW
);
1488 if (auto *NNI
= dyn_cast
<PossiblyNonNegInst
>(I
)) {
1489 auto *Src
= NNI
->getOperand(0);
1490 if (isImpliedByDomCondition(ICmpInst::ICMP_SGE
, Src
,
1491 Constant::getNullValue(Src
->getType()), I
,
1492 DL
).value_or(false))
1493 NNI
->setNonNeg(true);
1497 // Remember the expanded value for this SCEV at this location.
1499 // This is independent of PostIncLoops. The mapped value simply materializes
1500 // the expression at this insertion point. If the mapped value happened to be
1501 // a postinc expansion, it could be reused by a non-postinc user, but only if
1502 // its insertion point was already at the head of the loop.
1503 InsertedExpressions
[std::make_pair(S
, &*InsertPt
)] = V
;
1507 void SCEVExpander::rememberInstruction(Value
*I
) {
1508 auto DoInsert
= [this](Value
*V
) {
1509 if (!PostIncLoops
.empty())
1510 InsertedPostIncValues
.insert(V
);
1512 InsertedValues
.insert(V
);
1517 /// replaceCongruentIVs - Check for congruent phis in this loop header and
1518 /// replace them with their most canonical representative. Return the number of
1519 /// phis eliminated.
1521 /// This does not depend on any SCEVExpander state but should be used in
1522 /// the same context that SCEVExpander is used.
1524 SCEVExpander::replaceCongruentIVs(Loop
*L
, const DominatorTree
*DT
,
1525 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
1526 const TargetTransformInfo
*TTI
) {
1527 // Find integer phis in order of increasing width.
1528 SmallVector
<PHINode
*, 8> Phis
;
1529 for (PHINode
&PN
: L
->getHeader()->phis())
1530 Phis
.push_back(&PN
);
1533 // Use stable_sort to preserve order of equivalent PHIs, so the order
1534 // of the sorted Phis is the same from run to run on the same loop.
1535 llvm::stable_sort(Phis
, [](Value
*LHS
, Value
*RHS
) {
1536 // Put pointers at the back and make sure pointer < pointer = false.
1537 if (!LHS
->getType()->isIntegerTy() || !RHS
->getType()->isIntegerTy())
1538 return RHS
->getType()->isIntegerTy() && !LHS
->getType()->isIntegerTy();
1539 return RHS
->getType()->getPrimitiveSizeInBits().getFixedValue() <
1540 LHS
->getType()->getPrimitiveSizeInBits().getFixedValue();
1543 unsigned NumElim
= 0;
1544 DenseMap
<const SCEV
*, PHINode
*> ExprToIVMap
;
1545 // Process phis from wide to narrow. Map wide phis to their truncation
1546 // so narrow phis can reuse them.
1547 for (PHINode
*Phi
: Phis
) {
1548 auto SimplifyPHINode
= [&](PHINode
*PN
) -> Value
* {
1549 if (Value
*V
= simplifyInstruction(PN
, {DL
, &SE
.TLI
, &SE
.DT
, &SE
.AC
}))
1551 if (!SE
.isSCEVable(PN
->getType()))
1553 auto *Const
= dyn_cast
<SCEVConstant
>(SE
.getSCEV(PN
));
1556 return Const
->getValue();
1559 // Fold constant phis. They may be congruent to other constant phis and
1560 // would confuse the logic below that expects proper IVs.
1561 if (Value
*V
= SimplifyPHINode(Phi
)) {
1562 if (V
->getType() != Phi
->getType())
1564 SE
.forgetValue(Phi
);
1565 Phi
->replaceAllUsesWith(V
);
1566 DeadInsts
.emplace_back(Phi
);
1568 SCEV_DEBUG_WITH_TYPE(DebugType
,
1569 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1574 if (!SE
.isSCEVable(Phi
->getType()))
1577 PHINode
*&OrigPhiRef
= ExprToIVMap
[SE
.getSCEV(Phi
)];
1580 if (Phi
->getType()->isIntegerTy() && TTI
&&
1581 TTI
->isTruncateFree(Phi
->getType(), Phis
.back()->getType())) {
1582 // Make sure we only rewrite using simple induction variables;
1583 // otherwise, we can make the trip count of a loop unanalyzable
1585 const SCEV
*PhiExpr
= SE
.getSCEV(Phi
);
1586 if (isa
<SCEVAddRecExpr
>(PhiExpr
)) {
1587 // This phi can be freely truncated to the narrowest phi type. Map the
1588 // truncated expression to it so it will be reused for narrow types.
1589 const SCEV
*TruncExpr
=
1590 SE
.getTruncateExpr(PhiExpr
, Phis
.back()->getType());
1591 ExprToIVMap
[TruncExpr
] = Phi
;
1597 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1599 if (OrigPhiRef
->getType()->isPointerTy() != Phi
->getType()->isPointerTy())
1602 if (BasicBlock
*LatchBlock
= L
->getLoopLatch()) {
1603 Instruction
*OrigInc
= dyn_cast
<Instruction
>(
1604 OrigPhiRef
->getIncomingValueForBlock(LatchBlock
));
1605 Instruction
*IsomorphicInc
=
1606 dyn_cast
<Instruction
>(Phi
->getIncomingValueForBlock(LatchBlock
));
1608 if (OrigInc
&& IsomorphicInc
) {
1609 // If this phi has the same width but is more canonical, replace the
1610 // original with it. As part of the "more canonical" determination,
1611 // respect a prior decision to use an IV chain.
1612 if (OrigPhiRef
->getType() == Phi
->getType() &&
1613 !(ChainedPhis
.count(Phi
) ||
1614 isExpandedAddRecExprPHI(OrigPhiRef
, OrigInc
, L
)) &&
1615 (ChainedPhis
.count(Phi
) ||
1616 isExpandedAddRecExprPHI(Phi
, IsomorphicInc
, L
))) {
1617 std::swap(OrigPhiRef
, Phi
);
1618 std::swap(OrigInc
, IsomorphicInc
);
1620 // Replacing the congruent phi is sufficient because acyclic
1621 // redundancy elimination, CSE/GVN, should handle the
1622 // rest. However, once SCEV proves that a phi is congruent,
1623 // it's often the head of an IV user cycle that is isomorphic
1624 // with the original phi. It's worth eagerly cleaning up the
1625 // common case of a single IV increment so that DeleteDeadPHIs
1626 // can remove cycles that had postinc uses.
1627 // Because we may potentially introduce a new use of OrigIV that didn't
1628 // exist before at this point, its poison flags need readjustment.
1629 const SCEV
*TruncExpr
=
1630 SE
.getTruncateOrNoop(SE
.getSCEV(OrigInc
), IsomorphicInc
->getType());
1631 if (OrigInc
!= IsomorphicInc
&&
1632 TruncExpr
== SE
.getSCEV(IsomorphicInc
) &&
1633 SE
.LI
.replacementPreservesLCSSAForm(IsomorphicInc
, OrigInc
) &&
1634 hoistIVInc(OrigInc
, IsomorphicInc
, /*RecomputePoisonFlags*/ true)) {
1635 SCEV_DEBUG_WITH_TYPE(
1636 DebugType
, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1637 << *IsomorphicInc
<< '\n');
1638 Value
*NewInc
= OrigInc
;
1639 if (OrigInc
->getType() != IsomorphicInc
->getType()) {
1640 BasicBlock::iterator IP
;
1641 if (PHINode
*PN
= dyn_cast
<PHINode
>(OrigInc
))
1642 IP
= PN
->getParent()->getFirstInsertionPt();
1644 IP
= OrigInc
->getNextNonDebugInstruction()->getIterator();
1646 IRBuilder
<> Builder(IP
->getParent(), IP
);
1647 Builder
.SetCurrentDebugLocation(IsomorphicInc
->getDebugLoc());
1648 NewInc
= Builder
.CreateTruncOrBitCast(
1649 OrigInc
, IsomorphicInc
->getType(), IVName
);
1651 IsomorphicInc
->replaceAllUsesWith(NewInc
);
1652 DeadInsts
.emplace_back(IsomorphicInc
);
1656 SCEV_DEBUG_WITH_TYPE(DebugType
,
1657 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1659 SCEV_DEBUG_WITH_TYPE(
1660 DebugType
, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef
<< '\n');
1662 Value
*NewIV
= OrigPhiRef
;
1663 if (OrigPhiRef
->getType() != Phi
->getType()) {
1664 IRBuilder
<> Builder(L
->getHeader(),
1665 L
->getHeader()->getFirstInsertionPt());
1666 Builder
.SetCurrentDebugLocation(Phi
->getDebugLoc());
1667 NewIV
= Builder
.CreateTruncOrBitCast(OrigPhiRef
, Phi
->getType(), IVName
);
1669 Phi
->replaceAllUsesWith(NewIV
);
1670 DeadInsts
.emplace_back(Phi
);
1675 bool SCEVExpander::hasRelatedExistingExpansion(const SCEV
*S
,
1676 const Instruction
*At
,
1678 using namespace llvm::PatternMatch
;
1680 SmallVector
<BasicBlock
*, 4> ExitingBlocks
;
1681 L
->getExitingBlocks(ExitingBlocks
);
1683 // Look for suitable value in simple conditions at the loop exits.
1684 for (BasicBlock
*BB
: ExitingBlocks
) {
1685 ICmpInst::Predicate Pred
;
1686 Instruction
*LHS
, *RHS
;
1688 if (!match(BB
->getTerminator(),
1689 m_Br(m_ICmp(Pred
, m_Instruction(LHS
), m_Instruction(RHS
)),
1690 m_BasicBlock(), m_BasicBlock())))
1693 if (SE
.getSCEV(LHS
) == S
&& SE
.DT
.dominates(LHS
, At
))
1696 if (SE
.getSCEV(RHS
) == S
&& SE
.DT
.dominates(RHS
, At
))
1700 // Use expand's logic which is used for reusing a previous Value in
1701 // ExprValueMap. Note that we don't currently model the cost of
1702 // needing to drop poison generating flags on the instruction if we
1703 // want to reuse it. We effectively assume that has zero cost.
1704 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
1705 return FindValueInExprValueMap(S
, At
, DropPoisonGeneratingInsts
) != nullptr;
1708 template<typename T
> static InstructionCost
costAndCollectOperands(
1709 const SCEVOperand
&WorkItem
, const TargetTransformInfo
&TTI
,
1710 TargetTransformInfo::TargetCostKind CostKind
,
1711 SmallVectorImpl
<SCEVOperand
> &Worklist
) {
1713 const T
*S
= cast
<T
>(WorkItem
.S
);
1714 InstructionCost Cost
= 0;
1715 // Object to help map SCEV operands to expanded IR instructions.
1716 struct OperationIndices
{
1717 OperationIndices(unsigned Opc
, size_t min
, size_t max
) :
1718 Opcode(Opc
), MinIdx(min
), MaxIdx(max
) { }
1724 // Collect the operations of all the instructions that will be needed to
1725 // expand the SCEVExpr. This is so that when we come to cost the operands,
1726 // we know what the generated user(s) will be.
1727 SmallVector
<OperationIndices
, 2> Operations
;
1729 auto CastCost
= [&](unsigned Opcode
) -> InstructionCost
{
1730 Operations
.emplace_back(Opcode
, 0, 0);
1731 return TTI
.getCastInstrCost(Opcode
, S
->getType(),
1732 S
->getOperand(0)->getType(),
1733 TTI::CastContextHint::None
, CostKind
);
1736 auto ArithCost
= [&](unsigned Opcode
, unsigned NumRequired
,
1737 unsigned MinIdx
= 0,
1738 unsigned MaxIdx
= 1) -> InstructionCost
{
1739 Operations
.emplace_back(Opcode
, MinIdx
, MaxIdx
);
1740 return NumRequired
*
1741 TTI
.getArithmeticInstrCost(Opcode
, S
->getType(), CostKind
);
1744 auto CmpSelCost
= [&](unsigned Opcode
, unsigned NumRequired
, unsigned MinIdx
,
1745 unsigned MaxIdx
) -> InstructionCost
{
1746 Operations
.emplace_back(Opcode
, MinIdx
, MaxIdx
);
1747 Type
*OpType
= S
->getType();
1748 return NumRequired
* TTI
.getCmpSelInstrCost(
1749 Opcode
, OpType
, CmpInst::makeCmpResultType(OpType
),
1750 CmpInst::BAD_ICMP_PREDICATE
, CostKind
);
1753 switch (S
->getSCEVType()) {
1754 case scCouldNotCompute
:
1755 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1761 Cost
= CastCost(Instruction::PtrToInt
);
1764 Cost
= CastCost(Instruction::Trunc
);
1767 Cost
= CastCost(Instruction::ZExt
);
1770 Cost
= CastCost(Instruction::SExt
);
1773 unsigned Opcode
= Instruction::UDiv
;
1774 if (auto *SC
= dyn_cast
<SCEVConstant
>(S
->getOperand(1)))
1775 if (SC
->getAPInt().isPowerOf2())
1776 Opcode
= Instruction::LShr
;
1777 Cost
= ArithCost(Opcode
, 1);
1781 Cost
= ArithCost(Instruction::Add
, S
->getNumOperands() - 1);
1784 // TODO: this is a very pessimistic cost modelling for Mul,
1785 // because of Bin Pow algorithm actually used by the expander,
1786 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1787 Cost
= ArithCost(Instruction::Mul
, S
->getNumOperands() - 1);
1793 case scSequentialUMinExpr
: {
1794 // FIXME: should this ask the cost for Intrinsic's?
1795 // The reduction tree.
1796 Cost
+= CmpSelCost(Instruction::ICmp
, S
->getNumOperands() - 1, 0, 1);
1797 Cost
+= CmpSelCost(Instruction::Select
, S
->getNumOperands() - 1, 0, 2);
1798 switch (S
->getSCEVType()) {
1799 case scSequentialUMinExpr
: {
1800 // The safety net against poison.
1801 // FIXME: this is broken.
1802 Cost
+= CmpSelCost(Instruction::ICmp
, S
->getNumOperands() - 1, 0, 0);
1803 Cost
+= ArithCost(Instruction::Or
,
1804 S
->getNumOperands() > 2 ? S
->getNumOperands() - 2 : 0);
1805 Cost
+= CmpSelCost(Instruction::Select
, 1, 0, 1);
1809 assert(!isa
<SCEVSequentialMinMaxExpr
>(S
) &&
1810 "Unhandled SCEV expression type?");
1815 case scAddRecExpr
: {
1816 // In this polynominal, we may have some zero operands, and we shouldn't
1817 // really charge for those. So how many non-zero coefficients are there?
1818 int NumTerms
= llvm::count_if(S
->operands(), [](const SCEV
*Op
) {
1819 return !Op
->isZero();
1822 assert(NumTerms
>= 1 && "Polynominal should have at least one term.");
1823 assert(!(*std::prev(S
->operands().end()))->isZero() &&
1824 "Last operand should not be zero");
1826 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1827 int NumNonZeroDegreeNonOneTerms
=
1828 llvm::count_if(S
->operands(), [](const SCEV
*Op
) {
1829 auto *SConst
= dyn_cast
<SCEVConstant
>(Op
);
1830 return !SConst
|| SConst
->getAPInt().ugt(1);
1833 // Much like with normal add expr, the polynominal will require
1834 // one less addition than the number of it's terms.
1835 InstructionCost AddCost
= ArithCost(Instruction::Add
, NumTerms
- 1,
1836 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1837 // Here, *each* one of those will require a multiplication.
1838 InstructionCost MulCost
=
1839 ArithCost(Instruction::Mul
, NumNonZeroDegreeNonOneTerms
);
1840 Cost
= AddCost
+ MulCost
;
1842 // What is the degree of this polynominal?
1843 int PolyDegree
= S
->getNumOperands() - 1;
1844 assert(PolyDegree
>= 1 && "Should be at least affine.");
1846 // The final term will be:
1847 // Op_{PolyDegree} * x ^ {PolyDegree}
1848 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1849 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1850 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1851 // FIXME: this is conservatively correct, but might be overly pessimistic.
1852 Cost
+= MulCost
* (PolyDegree
- 1);
1857 for (auto &CostOp
: Operations
) {
1858 for (auto SCEVOp
: enumerate(S
->operands())) {
1859 // Clamp the index to account for multiple IR operations being chained.
1860 size_t MinIdx
= std::max(SCEVOp
.index(), CostOp
.MinIdx
);
1861 size_t OpIdx
= std::min(MinIdx
, CostOp
.MaxIdx
);
1862 Worklist
.emplace_back(CostOp
.Opcode
, OpIdx
, SCEVOp
.value());
1868 bool SCEVExpander::isHighCostExpansionHelper(
1869 const SCEVOperand
&WorkItem
, Loop
*L
, const Instruction
&At
,
1870 InstructionCost
&Cost
, unsigned Budget
, const TargetTransformInfo
&TTI
,
1871 SmallPtrSetImpl
<const SCEV
*> &Processed
,
1872 SmallVectorImpl
<SCEVOperand
> &Worklist
) {
1874 return true; // Already run out of budget, give up.
1876 const SCEV
*S
= WorkItem
.S
;
1877 // Was the cost of expansion of this expression already accounted for?
1878 if (!isa
<SCEVConstant
>(S
) && !Processed
.insert(S
).second
)
1879 return false; // We have already accounted for this expression.
1881 // If we can find an existing value for this scev available at the point "At"
1882 // then consider the expression cheap.
1883 if (hasRelatedExistingExpansion(S
, &At
, L
))
1884 return false; // Consider the expression to be free.
1886 TargetTransformInfo::TargetCostKind CostKind
=
1887 L
->getHeader()->getParent()->hasMinSize()
1888 ? TargetTransformInfo::TCK_CodeSize
1889 : TargetTransformInfo::TCK_RecipThroughput
;
1891 switch (S
->getSCEVType()) {
1892 case scCouldNotCompute
:
1893 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1896 // Assume to be zero-cost.
1899 // Only evalulate the costs of constants when optimizing for size.
1900 if (CostKind
!= TargetTransformInfo::TCK_CodeSize
)
1902 const APInt
&Imm
= cast
<SCEVConstant
>(S
)->getAPInt();
1903 Type
*Ty
= S
->getType();
1904 Cost
+= TTI
.getIntImmCostInst(
1905 WorkItem
.ParentOpcode
, WorkItem
.OperandIdx
, Imm
, Ty
, CostKind
);
1906 return Cost
> Budget
;
1911 case scSignExtend
: {
1913 costAndCollectOperands
<SCEVCastExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
1914 return false; // Will answer upon next entry into this function.
1917 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
1918 // HowManyLessThans produced to compute a precise expression, rather than a
1919 // UDiv from the user's code. If we can't find a UDiv in the code with some
1920 // simple searching, we need to account for it's cost.
1922 // At the beginning of this function we already tried to find existing
1923 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
1924 // pattern involving division. This is just a simple search heuristic.
1925 if (hasRelatedExistingExpansion(
1926 SE
.getAddExpr(S
, SE
.getConstant(S
->getType(), 1)), &At
, L
))
1927 return false; // Consider it to be free.
1930 costAndCollectOperands
<SCEVUDivExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
1931 return false; // Will answer upon next entry into this function.
1939 case scSequentialUMinExpr
: {
1940 assert(cast
<SCEVNAryExpr
>(S
)->getNumOperands() > 1 &&
1941 "Nary expr should have more than 1 operand.");
1942 // The simple nary expr will require one less op (or pair of ops)
1943 // than the number of it's terms.
1945 costAndCollectOperands
<SCEVNAryExpr
>(WorkItem
, TTI
, CostKind
, Worklist
);
1946 return Cost
> Budget
;
1948 case scAddRecExpr
: {
1949 assert(cast
<SCEVAddRecExpr
>(S
)->getNumOperands() >= 2 &&
1950 "Polynomial should be at least linear");
1951 Cost
+= costAndCollectOperands
<SCEVAddRecExpr
>(
1952 WorkItem
, TTI
, CostKind
, Worklist
);
1953 return Cost
> Budget
;
1956 llvm_unreachable("Unknown SCEV kind!");
1959 Value
*SCEVExpander::expandCodeForPredicate(const SCEVPredicate
*Pred
,
1962 switch (Pred
->getKind()) {
1963 case SCEVPredicate::P_Union
:
1964 return expandUnionPredicate(cast
<SCEVUnionPredicate
>(Pred
), IP
);
1965 case SCEVPredicate::P_Compare
:
1966 return expandComparePredicate(cast
<SCEVComparePredicate
>(Pred
), IP
);
1967 case SCEVPredicate::P_Wrap
: {
1968 auto *AddRecPred
= cast
<SCEVWrapPredicate
>(Pred
);
1969 return expandWrapPredicate(AddRecPred
, IP
);
1972 llvm_unreachable("Unknown SCEV predicate type");
1975 Value
*SCEVExpander::expandComparePredicate(const SCEVComparePredicate
*Pred
,
1977 Value
*Expr0
= expand(Pred
->getLHS(), IP
);
1978 Value
*Expr1
= expand(Pred
->getRHS(), IP
);
1980 Builder
.SetInsertPoint(IP
);
1981 auto InvPred
= ICmpInst::getInversePredicate(Pred
->getPredicate());
1982 auto *I
= Builder
.CreateICmp(InvPred
, Expr0
, Expr1
, "ident.check");
1986 Value
*SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr
*AR
,
1987 Instruction
*Loc
, bool Signed
) {
1988 assert(AR
->isAffine() && "Cannot generate RT check for "
1989 "non-affine expression");
1991 // FIXME: It is highly suspicious that we're ignoring the predicates here.
1992 SmallVector
<const SCEVPredicate
*, 4> Pred
;
1993 const SCEV
*ExitCount
=
1994 SE
.getPredicatedBackedgeTakenCount(AR
->getLoop(), Pred
);
1996 assert(!isa
<SCEVCouldNotCompute
>(ExitCount
) && "Invalid loop count");
1998 const SCEV
*Step
= AR
->getStepRecurrence(SE
);
1999 const SCEV
*Start
= AR
->getStart();
2001 Type
*ARTy
= AR
->getType();
2002 unsigned SrcBits
= SE
.getTypeSizeInBits(ExitCount
->getType());
2003 unsigned DstBits
= SE
.getTypeSizeInBits(ARTy
);
2005 // The expression {Start,+,Step} has nusw/nssw if
2006 // Step < 0, Start - |Step| * Backedge <= Start
2007 // Step >= 0, Start + |Step| * Backedge > Start
2008 // and |Step| * Backedge doesn't unsigned overflow.
2010 Builder
.SetInsertPoint(Loc
);
2011 Value
*TripCountVal
= expand(ExitCount
, Loc
);
2014 IntegerType::get(Loc
->getContext(), SE
.getTypeSizeInBits(ARTy
));
2016 Value
*StepValue
= expand(Step
, Loc
);
2017 Value
*NegStepValue
= expand(SE
.getNegativeSCEV(Step
), Loc
);
2018 Value
*StartValue
= expand(Start
, Loc
);
2021 ConstantInt::get(Loc
->getContext(), APInt::getZero(DstBits
));
2023 Builder
.SetInsertPoint(Loc
);
2025 Value
*StepCompare
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, StepValue
, Zero
);
2026 Value
*AbsStep
= Builder
.CreateSelect(StepCompare
, NegStepValue
, StepValue
);
2028 // Compute |Step| * Backedge
2030 // 1. Start + |Step| * Backedge < Start
2031 // 2. Start - |Step| * Backedge > Start
2033 // And select either 1. or 2. depending on whether step is positive or
2034 // negative. If Step is known to be positive or negative, only create
2036 auto ComputeEndCheck
= [&]() -> Value
* {
2037 // Checking <u 0 is always false.
2038 if (!Signed
&& Start
->isZero() && SE
.isKnownPositive(Step
))
2039 return ConstantInt::getFalse(Loc
->getContext());
2041 // Get the backedge taken count and truncate or extended to the AR type.
2042 Value
*TruncTripCount
= Builder
.CreateZExtOrTrunc(TripCountVal
, Ty
);
2044 Value
*MulV
, *OfMul
;
2045 if (Step
->isOne()) {
2046 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2047 // needed, there is never an overflow, so to avoid artificially inflating
2048 // the cost of the check, directly emit the optimized IR.
2049 MulV
= TruncTripCount
;
2050 OfMul
= ConstantInt::getFalse(MulV
->getContext());
2052 auto *MulF
= Intrinsic::getDeclaration(Loc
->getModule(),
2053 Intrinsic::umul_with_overflow
, Ty
);
2055 Builder
.CreateCall(MulF
, {AbsStep
, TruncTripCount
}, "mul");
2056 MulV
= Builder
.CreateExtractValue(Mul
, 0, "mul.result");
2057 OfMul
= Builder
.CreateExtractValue(Mul
, 1, "mul.overflow");
2060 Value
*Add
= nullptr, *Sub
= nullptr;
2061 bool NeedPosCheck
= !SE
.isKnownNegative(Step
);
2062 bool NeedNegCheck
= !SE
.isKnownPositive(Step
);
2064 if (isa
<PointerType
>(ARTy
)) {
2065 Value
*NegMulV
= Builder
.CreateNeg(MulV
);
2067 Add
= Builder
.CreatePtrAdd(StartValue
, MulV
);
2069 Sub
= Builder
.CreatePtrAdd(StartValue
, NegMulV
);
2072 Add
= Builder
.CreateAdd(StartValue
, MulV
);
2074 Sub
= Builder
.CreateSub(StartValue
, MulV
);
2077 Value
*EndCompareLT
= nullptr;
2078 Value
*EndCompareGT
= nullptr;
2079 Value
*EndCheck
= nullptr;
2081 EndCheck
= EndCompareLT
= Builder
.CreateICmp(
2082 Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
, Add
, StartValue
);
2084 EndCheck
= EndCompareGT
= Builder
.CreateICmp(
2085 Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
, Sub
, StartValue
);
2086 if (NeedPosCheck
&& NeedNegCheck
) {
2087 // Select the answer based on the sign of Step.
2088 EndCheck
= Builder
.CreateSelect(StepCompare
, EndCompareGT
, EndCompareLT
);
2090 return Builder
.CreateOr(EndCheck
, OfMul
);
2092 Value
*EndCheck
= ComputeEndCheck();
2094 // If the backedge taken count type is larger than the AR type,
2095 // check that we don't drop any bits by truncating it. If we are
2096 // dropping bits, then we have overflow (unless the step is zero).
2097 if (SrcBits
> DstBits
) {
2098 auto MaxVal
= APInt::getMaxValue(DstBits
).zext(SrcBits
);
2099 auto *BackedgeCheck
=
2100 Builder
.CreateICmp(ICmpInst::ICMP_UGT
, TripCountVal
,
2101 ConstantInt::get(Loc
->getContext(), MaxVal
));
2102 BackedgeCheck
= Builder
.CreateAnd(
2103 BackedgeCheck
, Builder
.CreateICmp(ICmpInst::ICMP_NE
, StepValue
, Zero
));
2105 EndCheck
= Builder
.CreateOr(EndCheck
, BackedgeCheck
);
2111 Value
*SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate
*Pred
,
2113 const auto *A
= cast
<SCEVAddRecExpr
>(Pred
->getExpr());
2114 Value
*NSSWCheck
= nullptr, *NUSWCheck
= nullptr;
2116 // Add a check for NUSW
2117 if (Pred
->getFlags() & SCEVWrapPredicate::IncrementNUSW
)
2118 NUSWCheck
= generateOverflowCheck(A
, IP
, false);
2120 // Add a check for NSSW
2121 if (Pred
->getFlags() & SCEVWrapPredicate::IncrementNSSW
)
2122 NSSWCheck
= generateOverflowCheck(A
, IP
, true);
2124 if (NUSWCheck
&& NSSWCheck
)
2125 return Builder
.CreateOr(NUSWCheck
, NSSWCheck
);
2133 return ConstantInt::getFalse(IP
->getContext());
2136 Value
*SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate
*Union
,
2138 // Loop over all checks in this set.
2139 SmallVector
<Value
*> Checks
;
2140 for (const auto *Pred
: Union
->getPredicates()) {
2141 Checks
.push_back(expandCodeForPredicate(Pred
, IP
));
2142 Builder
.SetInsertPoint(IP
);
2146 return ConstantInt::getFalse(IP
->getContext());
2147 return Builder
.CreateOr(Checks
);
2150 Value
*SCEVExpander::fixupLCSSAFormFor(Value
*V
) {
2151 auto *DefI
= dyn_cast
<Instruction
>(V
);
2152 if (!PreserveLCSSA
|| !DefI
)
2155 Instruction
*InsertPt
= &*Builder
.GetInsertPoint();
2156 Loop
*DefLoop
= SE
.LI
.getLoopFor(DefI
->getParent());
2157 Loop
*UseLoop
= SE
.LI
.getLoopFor(InsertPt
->getParent());
2158 if (!DefLoop
|| UseLoop
== DefLoop
|| DefLoop
->contains(UseLoop
))
2161 // Create a temporary instruction to at the current insertion point, so we
2162 // can hand it off to the helper to create LCSSA PHIs if required for the
2164 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2165 // would accept a insertion point and return an LCSSA phi for that
2166 // insertion point, so there is no need to insert & remove the temporary
2169 if (DefI
->getType()->isIntegerTy())
2170 ToTy
= PointerType::get(DefI
->getContext(), 0);
2172 ToTy
= Type::getInt32Ty(DefI
->getContext());
2174 CastInst::CreateBitOrPointerCast(DefI
, ToTy
, "tmp.lcssa.user", InsertPt
);
2175 auto RemoveUserOnExit
=
2176 make_scope_exit([User
]() { User
->eraseFromParent(); });
2178 SmallVector
<Instruction
*, 1> ToUpdate
;
2179 ToUpdate
.push_back(DefI
);
2180 SmallVector
<PHINode
*, 16> PHIsToRemove
;
2181 SmallVector
<PHINode
*, 16> InsertedPHIs
;
2182 formLCSSAForInstructions(ToUpdate
, SE
.DT
, SE
.LI
, &SE
, &PHIsToRemove
,
2184 for (PHINode
*PN
: InsertedPHIs
)
2185 rememberInstruction(PN
);
2186 for (PHINode
*PN
: PHIsToRemove
) {
2187 if (!PN
->use_empty())
2189 InsertedValues
.erase(PN
);
2190 InsertedPostIncValues
.erase(PN
);
2191 PN
->eraseFromParent();
2194 return User
->getOperand(0);
2198 // Search for a SCEV subexpression that is not safe to expand. Any expression
2199 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2200 // UDiv expressions. We don't know if the UDiv is derived from an IR divide
2201 // instruction, but the important thing is that we prove the denominator is
2202 // nonzero before expansion.
2204 // IVUsers already checks that IV-derived expressions are safe. So this check is
2205 // only needed when the expression includes some subexpression that is not IV
2208 // Currently, we only allow division by a value provably non-zero here.
2210 // We cannot generally expand recurrences unless the step dominates the loop
2211 // header. The expander handles the special case of affine recurrences by
2212 // scaling the recurrence outside the loop, but this technique isn't generally
2213 // applicable. Expanding a nested recurrence outside a loop requires computing
2214 // binomial coefficients. This could be done, but the recurrence has to be in a
2215 // perfectly reduced form, which can't be guaranteed.
2216 struct SCEVFindUnsafe
{
2217 ScalarEvolution
&SE
;
2219 bool IsUnsafe
= false;
2221 SCEVFindUnsafe(ScalarEvolution
&SE
, bool CanonicalMode
)
2222 : SE(SE
), CanonicalMode(CanonicalMode
) {}
2224 bool follow(const SCEV
*S
) {
2225 if (const SCEVUDivExpr
*D
= dyn_cast
<SCEVUDivExpr
>(S
)) {
2226 if (!SE
.isKnownNonZero(D
->getRHS())) {
2231 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
2232 // For non-affine addrecs or in non-canonical mode we need a preheader
2234 if (!AR
->getLoop()->getLoopPreheader() &&
2235 (!CanonicalMode
|| !AR
->isAffine())) {
2242 bool isDone() const { return IsUnsafe
; }
2246 bool SCEVExpander::isSafeToExpand(const SCEV
*S
) const {
2247 SCEVFindUnsafe
Search(SE
, CanonicalMode
);
2248 visitAll(S
, Search
);
2249 return !Search
.IsUnsafe
;
2252 bool SCEVExpander::isSafeToExpandAt(const SCEV
*S
,
2253 const Instruction
*InsertionPoint
) const {
2254 if (!isSafeToExpand(S
))
2256 // We have to prove that the expanded site of S dominates InsertionPoint.
2257 // This is easy when not in the same block, but hard when S is an instruction
2258 // to be expanded somewhere inside the same block as our insertion point.
2259 // What we really need here is something analogous to an OrderedBasicBlock,
2260 // but for the moment, we paper over the problem by handling two common and
2261 // cheap to check cases.
2262 if (SE
.properlyDominates(S
, InsertionPoint
->getParent()))
2264 if (SE
.dominates(S
, InsertionPoint
->getParent())) {
2265 if (InsertionPoint
->getParent()->getTerminator() == InsertionPoint
)
2267 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(S
))
2268 if (llvm::is_contained(InsertionPoint
->operand_values(), U
->getValue()))
2274 void SCEVExpanderCleaner::cleanup() {
2275 // Result is used, nothing to remove.
2279 auto InsertedInstructions
= Expander
.getAllInsertedInstructions();
2281 SmallPtrSet
<Instruction
*, 8> InsertedSet(InsertedInstructions
.begin(),
2282 InsertedInstructions
.end());
2285 // Remove sets with value handles.
2288 // Remove all inserted instructions.
2289 for (Instruction
*I
: reverse(InsertedInstructions
)) {
2291 assert(all_of(I
->users(),
2292 [&InsertedSet
](Value
*U
) {
2293 return InsertedSet
.contains(cast
<Instruction
>(U
));
2295 "removed instruction should only be used by instructions inserted "
2296 "during expansion");
2298 assert(!I
->getType()->isVoidTy() &&
2299 "inserted instruction should have non-void types");
2300 I
->replaceAllUsesWith(PoisonValue::get(I
->getType()));
2301 I
->eraseFromParent();