1 //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements induction variable simplification. It does
10 // not define any actual pass or policy, but provides a single function to
11 // simplify a loop's induction variables based on ScalarEvolution.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/PatternMatch.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/Transforms/Utils/Local.h"
27 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
31 #define DEBUG_TYPE "indvars"
33 STATISTIC(NumElimIdentity
, "Number of IV identities eliminated");
34 STATISTIC(NumElimOperand
, "Number of IV operands folded into a use");
35 STATISTIC(NumFoldedUser
, "Number of IV users folded into a constant");
36 STATISTIC(NumElimRem
, "Number of IV remainder operations eliminated");
39 "Number of IV signed division operations converted to unsigned division");
42 "Number of IV signed remainder operations converted to unsigned remainder");
43 STATISTIC(NumElimCmp
, "Number of IV comparisons eliminated");
46 /// This is a utility for simplifying induction variables
47 /// based on ScalarEvolution. It is the primary instrument of the
48 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
49 /// other loop passes that preserve SCEV.
50 class SimplifyIndvar
{
55 const TargetTransformInfo
*TTI
;
56 SCEVExpander
&Rewriter
;
57 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
62 SimplifyIndvar(Loop
*Loop
, ScalarEvolution
*SE
, DominatorTree
*DT
,
63 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
64 SCEVExpander
&Rewriter
,
65 SmallVectorImpl
<WeakTrackingVH
> &Dead
)
66 : L(Loop
), LI(LI
), SE(SE
), DT(DT
), TTI(TTI
), Rewriter(Rewriter
),
68 assert(LI
&& "IV simplification requires LoopInfo");
71 bool hasChanged() const { return Changed
; }
73 /// Iteratively perform simplification on a worklist of users of the
74 /// specified induction variable. This is the top-level driver that applies
75 /// all simplifications to users of an IV.
76 void simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
= nullptr);
78 Value
*foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
80 bool eliminateIdentitySCEV(Instruction
*UseInst
, Instruction
*IVOperand
);
81 bool replaceIVUserWithLoopInvariant(Instruction
*UseInst
);
82 bool replaceFloatIVWithIntegerIV(Instruction
*UseInst
);
84 bool eliminateOverflowIntrinsic(WithOverflowInst
*WO
);
85 bool eliminateSaturatingIntrinsic(SaturatingInst
*SI
);
86 bool eliminateTrunc(TruncInst
*TI
);
87 bool eliminateIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
88 bool makeIVComparisonInvariant(ICmpInst
*ICmp
, Instruction
*IVOperand
);
89 void eliminateIVComparison(ICmpInst
*ICmp
, Instruction
*IVOperand
);
90 void simplifyIVRemainder(BinaryOperator
*Rem
, Instruction
*IVOperand
,
92 void replaceRemWithNumerator(BinaryOperator
*Rem
);
93 void replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
);
94 void replaceSRemWithURem(BinaryOperator
*Rem
);
95 bool eliminateSDiv(BinaryOperator
*SDiv
);
96 bool strengthenOverflowingOperation(BinaryOperator
*OBO
,
97 Instruction
*IVOperand
);
98 bool strengthenRightShift(BinaryOperator
*BO
, Instruction
*IVOperand
);
102 /// Find a point in code which dominates all given instructions. We can safely
103 /// assume that, whatever fact we can prove at the found point, this fact is
104 /// also true for each of the given instructions.
105 static Instruction
*findCommonDominator(ArrayRef
<Instruction
*> Instructions
,
107 Instruction
*CommonDom
= nullptr;
108 for (auto *Insn
: Instructions
)
109 if (!CommonDom
|| DT
.dominates(Insn
, CommonDom
))
111 else if (!DT
.dominates(CommonDom
, Insn
))
112 // If there is no dominance relation, use common dominator.
114 DT
.findNearestCommonDominator(CommonDom
->getParent(),
115 Insn
->getParent())->getTerminator();
116 assert(CommonDom
&& "Common dominator not found?");
120 /// Fold an IV operand into its use. This removes increments of an
121 /// aligned IV when used by a instruction that ignores the low bits.
123 /// IVOperand is guaranteed SCEVable, but UseInst may not be.
125 /// Return the operand of IVOperand for this induction variable if IVOperand can
126 /// be folded (in case more folding opportunities have been exposed).
127 /// Otherwise return null.
128 Value
*SimplifyIndvar::foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
) {
129 Value
*IVSrc
= nullptr;
130 const unsigned OperIdx
= 0;
131 const SCEV
*FoldedExpr
= nullptr;
132 bool MustDropExactFlag
= false;
133 switch (UseInst
->getOpcode()) {
136 case Instruction::UDiv
:
137 case Instruction::LShr
:
138 // We're only interested in the case where we know something about
139 // the numerator and have a constant denominator.
140 if (IVOperand
!= UseInst
->getOperand(OperIdx
) ||
141 !isa
<ConstantInt
>(UseInst
->getOperand(1)))
144 // Attempt to fold a binary operator with constant operand.
145 // e.g. ((I + 1) >> 2) => I >> 2
146 if (!isa
<BinaryOperator
>(IVOperand
)
147 || !isa
<ConstantInt
>(IVOperand
->getOperand(1)))
150 IVSrc
= IVOperand
->getOperand(0);
151 // IVSrc must be the (SCEVable) IV, since the other operand is const.
152 assert(SE
->isSCEVable(IVSrc
->getType()) && "Expect SCEVable IV operand");
154 ConstantInt
*D
= cast
<ConstantInt
>(UseInst
->getOperand(1));
155 if (UseInst
->getOpcode() == Instruction::LShr
) {
156 // Get a constant for the divisor. See createSCEV.
157 uint32_t BitWidth
= cast
<IntegerType
>(UseInst
->getType())->getBitWidth();
158 if (D
->getValue().uge(BitWidth
))
161 D
= ConstantInt::get(UseInst
->getContext(),
162 APInt::getOneBitSet(BitWidth
, D
->getZExtValue()));
164 const auto *LHS
= SE
->getSCEV(IVSrc
);
165 const auto *RHS
= SE
->getSCEV(D
);
166 FoldedExpr
= SE
->getUDivExpr(LHS
, RHS
);
167 // We might have 'exact' flag set at this point which will no longer be
168 // correct after we make the replacement.
169 if (UseInst
->isExact() && LHS
!= SE
->getMulExpr(FoldedExpr
, RHS
))
170 MustDropExactFlag
= true;
172 // We have something that might fold it's operand. Compare SCEVs.
173 if (!SE
->isSCEVable(UseInst
->getType()))
176 // Bypass the operand if SCEV can prove it has no effect.
177 if (SE
->getSCEV(UseInst
) != FoldedExpr
)
180 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
181 << " -> " << *UseInst
<< '\n');
183 UseInst
->setOperand(OperIdx
, IVSrc
);
184 assert(SE
->getSCEV(UseInst
) == FoldedExpr
&& "bad SCEV with folded oper");
186 if (MustDropExactFlag
)
187 UseInst
->dropPoisonGeneratingFlags();
191 if (IVOperand
->use_empty())
192 DeadInsts
.emplace_back(IVOperand
);
196 bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst
*ICmp
,
197 Instruction
*IVOperand
) {
198 unsigned IVOperIdx
= 0;
199 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
200 if (IVOperand
!= ICmp
->getOperand(0)) {
202 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
204 Pred
= ICmpInst::getSwappedPredicate(Pred
);
207 // Get the SCEVs for the ICmp operands (in the specific context of the
209 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
210 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
211 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
213 auto *PN
= dyn_cast
<PHINode
>(IVOperand
);
217 auto LIP
= SE
->getLoopInvariantPredicate(Pred
, S
, X
, L
, ICmp
);
220 ICmpInst::Predicate InvariantPredicate
= LIP
->Pred
;
221 const SCEV
*InvariantLHS
= LIP
->LHS
;
222 const SCEV
*InvariantRHS
= LIP
->RHS
;
224 // Rewrite the comparison to a loop invariant comparison if it can be done
225 // cheaply, where cheaply means "we don't need to emit any new
228 SmallDenseMap
<const SCEV
*, Value
*> CheapExpansions
;
229 CheapExpansions
[S
] = ICmp
->getOperand(IVOperIdx
);
230 CheapExpansions
[X
] = ICmp
->getOperand(1 - IVOperIdx
);
232 // TODO: Support multiple entry loops? (We currently bail out of these in
233 // the IndVarSimplify pass)
234 if (auto *BB
= L
->getLoopPredecessor()) {
235 const int Idx
= PN
->getBasicBlockIndex(BB
);
237 Value
*Incoming
= PN
->getIncomingValue(Idx
);
238 const SCEV
*IncomingS
= SE
->getSCEV(Incoming
);
239 CheapExpansions
[IncomingS
] = Incoming
;
242 Value
*NewLHS
= CheapExpansions
[InvariantLHS
];
243 Value
*NewRHS
= CheapExpansions
[InvariantRHS
];
246 if (auto *ConstLHS
= dyn_cast
<SCEVConstant
>(InvariantLHS
))
247 NewLHS
= ConstLHS
->getValue();
249 if (auto *ConstRHS
= dyn_cast
<SCEVConstant
>(InvariantRHS
))
250 NewRHS
= ConstRHS
->getValue();
252 if (!NewLHS
|| !NewRHS
)
253 // We could not find an existing value to replace either LHS or RHS.
254 // Generating new instructions has subtler tradeoffs, so avoid doing that
258 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp
<< '\n');
259 ICmp
->setPredicate(InvariantPredicate
);
260 ICmp
->setOperand(0, NewLHS
);
261 ICmp
->setOperand(1, NewRHS
);
265 /// SimplifyIVUsers helper for eliminating useless
266 /// comparisons against an induction variable.
267 void SimplifyIndvar::eliminateIVComparison(ICmpInst
*ICmp
,
268 Instruction
*IVOperand
) {
269 unsigned IVOperIdx
= 0;
270 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
271 ICmpInst::Predicate OriginalPred
= Pred
;
272 if (IVOperand
!= ICmp
->getOperand(0)) {
274 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
276 Pred
= ICmpInst::getSwappedPredicate(Pred
);
279 // Get the SCEVs for the ICmp operands (in the specific context of the
281 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
282 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
283 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
285 // If the condition is always true or always false in the given context,
286 // replace it with a constant value.
287 SmallVector
<Instruction
*, 4> Users
;
288 for (auto *U
: ICmp
->users())
289 Users
.push_back(cast
<Instruction
>(U
));
290 const Instruction
*CtxI
= findCommonDominator(Users
, *DT
);
291 if (auto Ev
= SE
->evaluatePredicateAt(Pred
, S
, X
, CtxI
)) {
292 ICmp
->replaceAllUsesWith(ConstantInt::getBool(ICmp
->getContext(), *Ev
));
293 DeadInsts
.emplace_back(ICmp
);
294 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp
<< '\n');
295 } else if (makeIVComparisonInvariant(ICmp
, IVOperand
)) {
296 // fallthrough to end of function
297 } else if (ICmpInst::isSigned(OriginalPred
) &&
298 SE
->isKnownNonNegative(S
) && SE
->isKnownNonNegative(X
)) {
299 // If we were unable to make anything above, all we can is to canonicalize
300 // the comparison hoping that it will open the doors for other
301 // optimizations. If we find out that we compare two non-negative values,
302 // we turn the instruction's predicate to its unsigned version. Note that
303 // we cannot rely on Pred here unless we check if we have swapped it.
304 assert(ICmp
->getPredicate() == OriginalPred
&& "Predicate changed?");
305 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
307 ICmp
->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred
));
315 bool SimplifyIndvar::eliminateSDiv(BinaryOperator
*SDiv
) {
316 // Get the SCEVs for the ICmp operands.
317 auto *N
= SE
->getSCEV(SDiv
->getOperand(0));
318 auto *D
= SE
->getSCEV(SDiv
->getOperand(1));
320 // Simplify unnecessary loops away.
321 const Loop
*L
= LI
->getLoopFor(SDiv
->getParent());
322 N
= SE
->getSCEVAtScope(N
, L
);
323 D
= SE
->getSCEVAtScope(D
, L
);
325 // Replace sdiv by udiv if both of the operands are non-negative
326 if (SE
->isKnownNonNegative(N
) && SE
->isKnownNonNegative(D
)) {
327 auto *UDiv
= BinaryOperator::Create(
328 BinaryOperator::UDiv
, SDiv
->getOperand(0), SDiv
->getOperand(1),
329 SDiv
->getName() + ".udiv", SDiv
);
330 UDiv
->setIsExact(SDiv
->isExact());
331 SDiv
->replaceAllUsesWith(UDiv
);
332 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv
<< '\n');
335 DeadInsts
.push_back(SDiv
);
342 // i %s n -> i %u n if i >= 0 and n >= 0
343 void SimplifyIndvar::replaceSRemWithURem(BinaryOperator
*Rem
) {
344 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
345 auto *URem
= BinaryOperator::Create(BinaryOperator::URem
, N
, D
,
346 Rem
->getName() + ".urem", Rem
);
347 Rem
->replaceAllUsesWith(URem
);
348 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem
<< '\n');
351 DeadInsts
.emplace_back(Rem
);
354 // i % n --> i if i is in [0,n).
355 void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator
*Rem
) {
356 Rem
->replaceAllUsesWith(Rem
->getOperand(0));
357 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
360 DeadInsts
.emplace_back(Rem
);
363 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
364 void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
) {
365 auto *T
= Rem
->getType();
366 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
367 ICmpInst
*ICmp
= new ICmpInst(Rem
, ICmpInst::ICMP_EQ
, N
, D
);
369 SelectInst::Create(ICmp
, ConstantInt::get(T
, 0), N
, "iv.rem", Rem
);
370 Rem
->replaceAllUsesWith(Sel
);
371 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
374 DeadInsts
.emplace_back(Rem
);
377 /// SimplifyIVUsers helper for eliminating useless remainder operations
378 /// operating on an induction variable or replacing srem by urem.
379 void SimplifyIndvar::simplifyIVRemainder(BinaryOperator
*Rem
,
380 Instruction
*IVOperand
,
382 auto *NValue
= Rem
->getOperand(0);
383 auto *DValue
= Rem
->getOperand(1);
384 // We're only interested in the case where we know something about
385 // the numerator, unless it is a srem, because we want to replace srem by urem
387 bool UsedAsNumerator
= IVOperand
== NValue
;
388 if (!UsedAsNumerator
&& !IsSigned
)
391 const SCEV
*N
= SE
->getSCEV(NValue
);
393 // Simplify unnecessary loops away.
394 const Loop
*ICmpLoop
= LI
->getLoopFor(Rem
->getParent());
395 N
= SE
->getSCEVAtScope(N
, ICmpLoop
);
397 bool IsNumeratorNonNegative
= !IsSigned
|| SE
->isKnownNonNegative(N
);
399 // Do not proceed if the Numerator may be negative
400 if (!IsNumeratorNonNegative
)
403 const SCEV
*D
= SE
->getSCEV(DValue
);
404 D
= SE
->getSCEVAtScope(D
, ICmpLoop
);
406 if (UsedAsNumerator
) {
407 auto LT
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
408 if (SE
->isKnownPredicate(LT
, N
, D
)) {
409 replaceRemWithNumerator(Rem
);
413 auto *T
= Rem
->getType();
414 const auto *NLessOne
= SE
->getMinusSCEV(N
, SE
->getOne(T
));
415 if (SE
->isKnownPredicate(LT
, NLessOne
, D
)) {
416 replaceRemWithNumeratorOrZero(Rem
);
421 // Try to replace SRem with URem, if both N and D are known non-negative.
422 // Since we had already check N, we only need to check D now
423 if (!IsSigned
|| !SE
->isKnownNonNegative(D
))
426 replaceSRemWithURem(Rem
);
429 bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst
*WO
) {
430 const SCEV
*LHS
= SE
->getSCEV(WO
->getLHS());
431 const SCEV
*RHS
= SE
->getSCEV(WO
->getRHS());
432 if (!SE
->willNotOverflow(WO
->getBinaryOp(), WO
->isSigned(), LHS
, RHS
))
435 // Proved no overflow, nuke the overflow check and, if possible, the overflow
436 // intrinsic as well.
438 BinaryOperator
*NewResult
= BinaryOperator::Create(
439 WO
->getBinaryOp(), WO
->getLHS(), WO
->getRHS(), "", WO
);
442 NewResult
->setHasNoSignedWrap(true);
444 NewResult
->setHasNoUnsignedWrap(true);
446 SmallVector
<ExtractValueInst
*, 4> ToDelete
;
448 for (auto *U
: WO
->users()) {
449 if (auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
450 if (EVI
->getIndices()[0] == 1)
451 EVI
->replaceAllUsesWith(ConstantInt::getFalse(WO
->getContext()));
453 assert(EVI
->getIndices()[0] == 0 && "Only two possibilities!");
454 EVI
->replaceAllUsesWith(NewResult
);
456 ToDelete
.push_back(EVI
);
460 for (auto *EVI
: ToDelete
)
461 EVI
->eraseFromParent();
464 WO
->eraseFromParent();
470 bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst
*SI
) {
471 const SCEV
*LHS
= SE
->getSCEV(SI
->getLHS());
472 const SCEV
*RHS
= SE
->getSCEV(SI
->getRHS());
473 if (!SE
->willNotOverflow(SI
->getBinaryOp(), SI
->isSigned(), LHS
, RHS
))
476 BinaryOperator
*BO
= BinaryOperator::Create(
477 SI
->getBinaryOp(), SI
->getLHS(), SI
->getRHS(), SI
->getName(), SI
);
479 BO
->setHasNoSignedWrap();
481 BO
->setHasNoUnsignedWrap();
483 SI
->replaceAllUsesWith(BO
);
484 DeadInsts
.emplace_back(SI
);
489 bool SimplifyIndvar::eliminateTrunc(TruncInst
*TI
) {
490 // It is always legal to replace
491 // icmp <pred> i32 trunc(iv), n
493 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
495 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
496 // Or with either of these if pred is an equality predicate.
498 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
499 // every comparison which uses trunc, it means that we can replace each of
500 // them with comparison of iv against sext/zext(n). We no longer need trunc
503 // TODO: Should we do this if we can widen *some* comparisons, but not all
504 // of them? Sometimes it is enough to enable other optimizations, but the
505 // trunc instruction will stay in the loop.
506 Value
*IV
= TI
->getOperand(0);
507 Type
*IVTy
= IV
->getType();
508 const SCEV
*IVSCEV
= SE
->getSCEV(IV
);
509 const SCEV
*TISCEV
= SE
->getSCEV(TI
);
511 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
513 bool DoesSExtCollapse
= false;
514 bool DoesZExtCollapse
= false;
515 if (IVSCEV
== SE
->getSignExtendExpr(TISCEV
, IVTy
))
516 DoesSExtCollapse
= true;
517 if (IVSCEV
== SE
->getZeroExtendExpr(TISCEV
, IVTy
))
518 DoesZExtCollapse
= true;
520 // If neither sext nor zext does collapse, it is not profitable to do any
522 if (!DoesSExtCollapse
&& !DoesZExtCollapse
)
525 // Collect users of the trunc that look like comparisons against invariants.
526 // Bail if we find something different.
527 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
528 for (auto *U
: TI
->users()) {
529 // We don't care about users in unreachable blocks.
530 if (isa
<Instruction
>(U
) &&
531 !DT
->isReachableFromEntry(cast
<Instruction
>(U
)->getParent()))
533 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(U
);
534 if (!ICI
) return false;
535 assert(L
->contains(ICI
->getParent()) && "LCSSA form broken?");
536 if (!(ICI
->getOperand(0) == TI
&& L
->isLoopInvariant(ICI
->getOperand(1))) &&
537 !(ICI
->getOperand(1) == TI
&& L
->isLoopInvariant(ICI
->getOperand(0))))
539 // If we cannot get rid of trunc, bail.
540 if (ICI
->isSigned() && !DoesSExtCollapse
)
542 if (ICI
->isUnsigned() && !DoesZExtCollapse
)
544 // For equality, either signed or unsigned works.
545 ICmpUsers
.push_back(ICI
);
548 auto CanUseZExt
= [&](ICmpInst
*ICI
) {
549 // Unsigned comparison can be widened as unsigned.
550 if (ICI
->isUnsigned())
552 // Is it profitable to do zext?
553 if (!DoesZExtCollapse
)
555 // For equality, we can safely zext both parts.
556 if (ICI
->isEquality())
558 // Otherwise we can only use zext when comparing two non-negative or two
559 // negative values. But in practice, we will never pass DoesZExtCollapse
560 // check for a negative value, because zext(trunc(x)) is non-negative. So
561 // it only make sense to check for non-negativity here.
562 const SCEV
*SCEVOP1
= SE
->getSCEV(ICI
->getOperand(0));
563 const SCEV
*SCEVOP2
= SE
->getSCEV(ICI
->getOperand(1));
564 return SE
->isKnownNonNegative(SCEVOP1
) && SE
->isKnownNonNegative(SCEVOP2
);
566 // Replace all comparisons against trunc with comparisons against IV.
567 for (auto *ICI
: ICmpUsers
) {
568 bool IsSwapped
= L
->isLoopInvariant(ICI
->getOperand(0));
569 auto *Op1
= IsSwapped
? ICI
->getOperand(0) : ICI
->getOperand(1);
570 Instruction
*Ext
= nullptr;
571 // For signed/unsigned predicate, replace the old comparison with comparison
572 // of immediate IV against sext/zext of the invariant argument. If we can
573 // use either sext or zext (i.e. we are dealing with equality predicate),
574 // then prefer zext as a more canonical form.
575 // TODO: If we see a signed comparison which can be turned into unsigned,
576 // we can do it here for canonicalization purposes.
577 ICmpInst::Predicate Pred
= ICI
->getPredicate();
578 if (IsSwapped
) Pred
= ICmpInst::getSwappedPredicate(Pred
);
579 if (CanUseZExt(ICI
)) {
580 assert(DoesZExtCollapse
&& "Unprofitable zext?");
581 Ext
= new ZExtInst(Op1
, IVTy
, "zext", ICI
);
582 Pred
= ICmpInst::getUnsignedPredicate(Pred
);
584 assert(DoesSExtCollapse
&& "Unprofitable sext?");
585 Ext
= new SExtInst(Op1
, IVTy
, "sext", ICI
);
586 assert(Pred
== ICmpInst::getSignedPredicate(Pred
) && "Must be signed!");
589 L
->makeLoopInvariant(Ext
, Changed
);
591 ICmpInst
*NewICI
= new ICmpInst(ICI
, Pred
, IV
, Ext
);
592 ICI
->replaceAllUsesWith(NewICI
);
593 DeadInsts
.emplace_back(ICI
);
596 // Trunc no longer needed.
597 TI
->replaceAllUsesWith(PoisonValue::get(TI
->getType()));
598 DeadInsts
.emplace_back(TI
);
602 /// Eliminate an operation that consumes a simple IV and has no observable
603 /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
604 /// but UseInst may not be.
605 bool SimplifyIndvar::eliminateIVUser(Instruction
*UseInst
,
606 Instruction
*IVOperand
) {
607 if (ICmpInst
*ICmp
= dyn_cast
<ICmpInst
>(UseInst
)) {
608 eliminateIVComparison(ICmp
, IVOperand
);
611 if (BinaryOperator
*Bin
= dyn_cast
<BinaryOperator
>(UseInst
)) {
612 bool IsSRem
= Bin
->getOpcode() == Instruction::SRem
;
613 if (IsSRem
|| Bin
->getOpcode() == Instruction::URem
) {
614 simplifyIVRemainder(Bin
, IVOperand
, IsSRem
);
618 if (Bin
->getOpcode() == Instruction::SDiv
)
619 return eliminateSDiv(Bin
);
622 if (auto *WO
= dyn_cast
<WithOverflowInst
>(UseInst
))
623 if (eliminateOverflowIntrinsic(WO
))
626 if (auto *SI
= dyn_cast
<SaturatingInst
>(UseInst
))
627 if (eliminateSaturatingIntrinsic(SI
))
630 if (auto *TI
= dyn_cast
<TruncInst
>(UseInst
))
631 if (eliminateTrunc(TI
))
634 if (eliminateIdentitySCEV(UseInst
, IVOperand
))
640 static Instruction
*GetLoopInvariantInsertPosition(Loop
*L
, Instruction
*Hint
) {
641 if (auto *BB
= L
->getLoopPreheader())
642 return BB
->getTerminator();
647 /// Replace the UseInst with a loop invariant expression if it is safe.
648 bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction
*I
) {
649 if (!SE
->isSCEVable(I
->getType()))
652 // Get the symbolic expression for this instruction.
653 const SCEV
*S
= SE
->getSCEV(I
);
655 if (!SE
->isLoopInvariant(S
, L
))
658 // Do not generate something ridiculous even if S is loop invariant.
659 if (Rewriter
.isHighCostExpansion(S
, L
, SCEVCheapExpansionBudget
, TTI
, I
))
662 auto *IP
= GetLoopInvariantInsertPosition(L
, I
);
664 if (!Rewriter
.isSafeToExpandAt(S
, IP
)) {
665 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
666 << " with non-speculable loop invariant: " << *S
<< '\n');
670 auto *Invariant
= Rewriter
.expandCodeFor(S
, I
->getType(), IP
);
672 I
->replaceAllUsesWith(Invariant
);
673 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
674 << " with loop invariant: " << *S
<< '\n');
677 DeadInsts
.emplace_back(I
);
681 /// Eliminate redundant type cast between integer and float.
682 bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction
*UseInst
) {
683 if (UseInst
->getOpcode() != CastInst::SIToFP
&&
684 UseInst
->getOpcode() != CastInst::UIToFP
)
687 Instruction
*IVOperand
= cast
<Instruction
>(UseInst
->getOperand(0));
688 // Get the symbolic expression for this instruction.
689 const SCEV
*IV
= SE
->getSCEV(IVOperand
);
691 if (UseInst
->getOpcode() == CastInst::SIToFP
)
692 MaskBits
= SE
->getSignedRange(IV
).getMinSignedBits();
694 MaskBits
= SE
->getUnsignedRange(IV
).getActiveBits();
695 unsigned DestNumSigBits
= UseInst
->getType()->getFPMantissaWidth();
696 if (MaskBits
<= DestNumSigBits
) {
697 for (User
*U
: UseInst
->users()) {
698 // Match for fptosi/fptoui of sitofp and with same type.
699 auto *CI
= dyn_cast
<CastInst
>(U
);
703 CastInst::CastOps Opcode
= CI
->getOpcode();
704 if (Opcode
!= CastInst::FPToSI
&& Opcode
!= CastInst::FPToUI
)
707 Value
*Conv
= nullptr;
708 if (IVOperand
->getType() != CI
->getType()) {
709 IRBuilder
<> Builder(CI
);
710 StringRef Name
= IVOperand
->getName();
711 // To match InstCombine logic, we only need sext if both fptosi and
712 // sitofp are used. If one of them is unsigned, then we can use zext.
713 if (SE
->getTypeSizeInBits(IVOperand
->getType()) >
714 SE
->getTypeSizeInBits(CI
->getType())) {
715 Conv
= Builder
.CreateTrunc(IVOperand
, CI
->getType(), Name
+ ".trunc");
716 } else if (Opcode
== CastInst::FPToUI
||
717 UseInst
->getOpcode() == CastInst::UIToFP
) {
718 Conv
= Builder
.CreateZExt(IVOperand
, CI
->getType(), Name
+ ".zext");
720 Conv
= Builder
.CreateSExt(IVOperand
, CI
->getType(), Name
+ ".sext");
725 CI
->replaceAllUsesWith(Conv
);
726 DeadInsts
.push_back(CI
);
727 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
728 << " with: " << *Conv
<< '\n');
738 /// Eliminate any operation that SCEV can prove is an identity function.
739 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction
*UseInst
,
740 Instruction
*IVOperand
) {
741 if (!SE
->isSCEVable(UseInst
->getType()) ||
742 (UseInst
->getType() != IVOperand
->getType()) ||
743 (SE
->getSCEV(UseInst
) != SE
->getSCEV(IVOperand
)))
746 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
747 // dominator tree, even if X is an operand to Y. For instance, in
749 // %iv = phi i32 {0,+,1}
750 // br %cond, label %left, label %merge
753 // %X = add i32 %iv, 0
757 // %M = phi (%X, %iv)
759 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
760 // %M.replaceAllUsesWith(%X) would be incorrect.
762 if (isa
<PHINode
>(UseInst
))
763 // If UseInst is not a PHI node then we know that IVOperand dominates
764 // UseInst directly from the legality of SSA.
765 if (!DT
|| !DT
->dominates(IVOperand
, UseInst
))
768 if (!LI
->replacementPreservesLCSSAForm(UseInst
, IVOperand
))
771 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst
<< '\n');
773 UseInst
->replaceAllUsesWith(IVOperand
);
776 DeadInsts
.emplace_back(UseInst
);
780 /// Annotate BO with nsw / nuw if it provably does not signed-overflow /
781 /// unsigned-overflow. Returns true if anything changed, false otherwise.
782 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator
*BO
,
783 Instruction
*IVOperand
) {
784 auto Flags
= SE
->getStrengthenedNoWrapFlagsFromBinOp(
785 cast
<OverflowingBinaryOperator
>(BO
));
790 BO
->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) ==
792 BO
->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) ==
795 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
796 // flags on addrecs while performing zero/sign extensions. We could call
797 // forgetValue() here to make sure those flags also propagate to any other
798 // SCEV expressions based on the addrec. However, this can have pathological
799 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
803 /// Annotate the Shr in (X << IVOperand) >> C as exact using the
804 /// information from the IV's range. Returns true if anything changed, false
806 bool SimplifyIndvar::strengthenRightShift(BinaryOperator
*BO
,
807 Instruction
*IVOperand
) {
808 using namespace llvm::PatternMatch
;
810 if (BO
->getOpcode() == Instruction::Shl
) {
811 bool Changed
= false;
812 ConstantRange IVRange
= SE
->getUnsignedRange(SE
->getSCEV(IVOperand
));
813 for (auto *U
: BO
->users()) {
816 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
))) ||
818 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
)))) {
819 BinaryOperator
*Shr
= cast
<BinaryOperator
>(U
);
820 if (!Shr
->isExact() && IVRange
.getUnsignedMin().uge(*C
)) {
821 Shr
->setIsExact(true);
832 /// Add all uses of Def to the current IV's worklist.
833 static void pushIVUsers(
834 Instruction
*Def
, Loop
*L
,
835 SmallPtrSet
<Instruction
*,16> &Simplified
,
836 SmallVectorImpl
< std::pair
<Instruction
*,Instruction
*> > &SimpleIVUsers
) {
838 for (User
*U
: Def
->users()) {
839 Instruction
*UI
= cast
<Instruction
>(U
);
841 // Avoid infinite or exponential worklist processing.
842 // Also ensure unique worklist users.
843 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
848 // Only change the current Loop, do not change the other parts (e.g. other
850 if (!L
->contains(UI
))
853 // Do not push the same instruction more than once.
854 if (!Simplified
.insert(UI
).second
)
857 SimpleIVUsers
.push_back(std::make_pair(UI
, Def
));
861 /// Return true if this instruction generates a simple SCEV
862 /// expression in terms of that IV.
864 /// This is similar to IVUsers' isInteresting() but processes each instruction
865 /// non-recursively when the operand is already known to be a simpleIVUser.
867 static bool isSimpleIVUser(Instruction
*I
, const Loop
*L
, ScalarEvolution
*SE
) {
868 if (!SE
->isSCEVable(I
->getType()))
871 // Get the symbolic expression for this instruction.
872 const SCEV
*S
= SE
->getSCEV(I
);
874 // Only consider affine recurrences.
875 const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
);
876 if (AR
&& AR
->getLoop() == L
)
882 /// Iteratively perform simplification on a worklist of users
883 /// of the specified induction variable. Each successive simplification may push
884 /// more users which may themselves be candidates for simplification.
886 /// This algorithm does not require IVUsers analysis. Instead, it simplifies
887 /// instructions in-place during analysis. Rather than rewriting induction
888 /// variables bottom-up from their users, it transforms a chain of IVUsers
889 /// top-down, updating the IR only when it encounters a clear optimization
892 /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
894 void SimplifyIndvar::simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
) {
895 if (!SE
->isSCEVable(CurrIV
->getType()))
898 // Instructions processed by SimplifyIndvar for CurrIV.
899 SmallPtrSet
<Instruction
*,16> Simplified
;
901 // Use-def pairs if IV users waiting to be processed for CurrIV.
902 SmallVector
<std::pair
<Instruction
*, Instruction
*>, 8> SimpleIVUsers
;
904 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
905 // called multiple times for the same LoopPhi. This is the proper thing to
906 // do for loop header phis that use each other.
907 pushIVUsers(CurrIV
, L
, Simplified
, SimpleIVUsers
);
909 while (!SimpleIVUsers
.empty()) {
910 std::pair
<Instruction
*, Instruction
*> UseOper
=
911 SimpleIVUsers
.pop_back_val();
912 Instruction
*UseInst
= UseOper
.first
;
914 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
915 // rather than try to do some complex analysis or transformation (such as
916 // widening) basing on it.
917 // TODO: Propagate TLI and pass it here to handle more cases.
918 if (isInstructionTriviallyDead(UseInst
, /* TLI */ nullptr)) {
919 DeadInsts
.emplace_back(UseInst
);
923 // Bypass back edges to avoid extra work.
924 if (UseInst
== CurrIV
) continue;
926 // Try to replace UseInst with a loop invariant before any other
928 if (replaceIVUserWithLoopInvariant(UseInst
))
931 Instruction
*IVOperand
= UseOper
.second
;
932 for (unsigned N
= 0; IVOperand
; ++N
) {
933 assert(N
<= Simplified
.size() && "runaway iteration");
936 Value
*NewOper
= foldIVUser(UseInst
, IVOperand
);
938 break; // done folding
939 IVOperand
= dyn_cast
<Instruction
>(NewOper
);
944 if (eliminateIVUser(UseInst
, IVOperand
)) {
945 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
949 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(UseInst
)) {
950 if ((isa
<OverflowingBinaryOperator
>(BO
) &&
951 strengthenOverflowingOperation(BO
, IVOperand
)) ||
952 (isa
<ShlOperator
>(BO
) && strengthenRightShift(BO
, IVOperand
))) {
953 // re-queue uses of the now modified binary operator and fall
954 // through to the checks that remain.
955 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
959 // Try to use integer induction for FPToSI of float induction directly.
960 if (replaceFloatIVWithIntegerIV(UseInst
)) {
961 // Re-queue the potentially new direct uses of IVOperand.
962 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
966 CastInst
*Cast
= dyn_cast
<CastInst
>(UseInst
);
971 if (isSimpleIVUser(UseInst
, L
, SE
)) {
972 pushIVUsers(UseInst
, L
, Simplified
, SimpleIVUsers
);
979 void IVVisitor::anchor() { }
981 /// Simplify instructions that use this induction variable
982 /// by using ScalarEvolution to analyze the IV's recurrence.
983 bool simplifyUsersOfIV(PHINode
*CurrIV
, ScalarEvolution
*SE
, DominatorTree
*DT
,
984 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
985 SmallVectorImpl
<WeakTrackingVH
> &Dead
,
986 SCEVExpander
&Rewriter
, IVVisitor
*V
) {
987 SimplifyIndvar
SIV(LI
->getLoopFor(CurrIV
->getParent()), SE
, DT
, LI
, TTI
,
989 SIV
.simplifyUsers(CurrIV
, V
);
990 return SIV
.hasChanged();
993 /// Simplify users of induction variables within this
994 /// loop. This does not actually change or add IVs.
995 bool simplifyLoopIVs(Loop
*L
, ScalarEvolution
*SE
, DominatorTree
*DT
,
996 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
997 SmallVectorImpl
<WeakTrackingVH
> &Dead
) {
998 SCEVExpander
Rewriter(*SE
, SE
->getDataLayout(), "indvars");
1000 Rewriter
.setDebugType(DEBUG_TYPE
);
1002 bool Changed
= false;
1003 for (BasicBlock::iterator I
= L
->getHeader()->begin(); isa
<PHINode
>(I
); ++I
) {
1005 simplifyUsersOfIV(cast
<PHINode
>(I
), SE
, DT
, LI
, TTI
, Dead
, Rewriter
);
1013 //===----------------------------------------------------------------------===//
1014 // Widen Induction Variables - Extend the width of an IV to cover its
1016 //===----------------------------------------------------------------------===//
1026 ScalarEvolution
*SE
;
1029 // Does the module have any calls to the llvm.experimental.guard intrinsic
1030 // at all? If not we can avoid scanning instructions looking for guards.
1033 bool UsePostIncrementRanges
;
1036 unsigned NumElimExt
= 0;
1037 unsigned NumWidened
= 0;
1040 PHINode
*WidePhi
= nullptr;
1041 Instruction
*WideInc
= nullptr;
1042 const SCEV
*WideIncExpr
= nullptr;
1043 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
1045 SmallPtrSet
<Instruction
*,16> Widened
;
1047 enum class ExtendKind
{ Zero
, Sign
, Unknown
};
1049 // A map tracking the kind of extension used to widen each narrow IV
1050 // and narrow IV user.
1051 // Key: pointer to a narrow IV or IV user.
1052 // Value: the kind of extension used to widen this Instruction.
1053 DenseMap
<AssertingVH
<Instruction
>, ExtendKind
> ExtendKindMap
;
1055 using DefUserPair
= std::pair
<AssertingVH
<Value
>, AssertingVH
<Instruction
>>;
1057 // A map with control-dependent ranges for post increment IV uses. The key is
1058 // a pair of IV def and a use of this def denoting the context. The value is
1059 // a ConstantRange representing possible values of the def at the given
1061 DenseMap
<DefUserPair
, ConstantRange
> PostIncRangeInfos
;
1063 Optional
<ConstantRange
> getPostIncRangeInfo(Value
*Def
,
1064 Instruction
*UseI
) {
1065 DefUserPair
Key(Def
, UseI
);
1066 auto It
= PostIncRangeInfos
.find(Key
);
1067 return It
== PostIncRangeInfos
.end()
1068 ? Optional
<ConstantRange
>(None
)
1069 : Optional
<ConstantRange
>(It
->second
);
1072 void calculatePostIncRanges(PHINode
*OrigPhi
);
1073 void calculatePostIncRange(Instruction
*NarrowDef
, Instruction
*NarrowUser
);
1075 void updatePostIncRangeInfo(Value
*Def
, Instruction
*UseI
, ConstantRange R
) {
1076 DefUserPair
Key(Def
, UseI
);
1077 auto It
= PostIncRangeInfos
.find(Key
);
1078 if (It
== PostIncRangeInfos
.end())
1079 PostIncRangeInfos
.insert({Key
, R
});
1081 It
->second
= R
.intersectWith(It
->second
);
1085 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1086 /// computes the same value as the Narrow IV def. This avoids caching Use*
1088 struct NarrowIVDefUse
{
1089 Instruction
*NarrowDef
= nullptr;
1090 Instruction
*NarrowUse
= nullptr;
1091 Instruction
*WideDef
= nullptr;
1093 // True if the narrow def is never negative. Tracking this information lets
1094 // us use a sign extension instead of a zero extension or vice versa, when
1095 // profitable and legal.
1096 bool NeverNegative
= false;
1098 NarrowIVDefUse(Instruction
*ND
, Instruction
*NU
, Instruction
*WD
,
1100 : NarrowDef(ND
), NarrowUse(NU
), WideDef(WD
),
1101 NeverNegative(NeverNegative
) {}
1104 WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1105 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1106 bool HasGuards
, bool UsePostIncrementRanges
= true);
1108 PHINode
*createWideIV(SCEVExpander
&Rewriter
);
1110 unsigned getNumElimExt() { return NumElimExt
; };
1111 unsigned getNumWidened() { return NumWidened
; };
1114 Value
*createExtendInst(Value
*NarrowOper
, Type
*WideType
, bool IsSigned
,
1117 Instruction
*cloneIVUser(NarrowIVDefUse DU
, const SCEVAddRecExpr
*WideAR
);
1118 Instruction
*cloneArithmeticIVUser(NarrowIVDefUse DU
,
1119 const SCEVAddRecExpr
*WideAR
);
1120 Instruction
*cloneBitwiseIVUser(NarrowIVDefUse DU
);
1122 ExtendKind
getExtendKind(Instruction
*I
);
1124 using WidenedRecTy
= std::pair
<const SCEVAddRecExpr
*, ExtendKind
>;
1126 WidenedRecTy
getWideRecurrence(NarrowIVDefUse DU
);
1128 WidenedRecTy
getExtendedOperandRecurrence(NarrowIVDefUse DU
);
1130 const SCEV
*getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1131 unsigned OpCode
) const;
1133 Instruction
*widenIVUse(NarrowIVDefUse DU
, SCEVExpander
&Rewriter
);
1135 bool widenLoopCompare(NarrowIVDefUse DU
);
1136 bool widenWithVariantUse(NarrowIVDefUse DU
);
1138 void pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
);
1141 SmallVector
<NarrowIVDefUse
, 8> NarrowIVUsers
;
1145 /// Determine the insertion point for this user. By default, insert immediately
1146 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1147 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1148 /// common dominator for the incoming blocks. A nullptr can be returned if no
1149 /// viable location is found: it may happen if User is a PHI and Def only comes
1150 /// to this PHI from unreachable blocks.
1151 static Instruction
*getInsertPointForUses(Instruction
*User
, Value
*Def
,
1152 DominatorTree
*DT
, LoopInfo
*LI
) {
1153 PHINode
*PHI
= dyn_cast
<PHINode
>(User
);
1157 Instruction
*InsertPt
= nullptr;
1158 for (unsigned i
= 0, e
= PHI
->getNumIncomingValues(); i
!= e
; ++i
) {
1159 if (PHI
->getIncomingValue(i
) != Def
)
1162 BasicBlock
*InsertBB
= PHI
->getIncomingBlock(i
);
1164 if (!DT
->isReachableFromEntry(InsertBB
))
1168 InsertPt
= InsertBB
->getTerminator();
1171 InsertBB
= DT
->findNearestCommonDominator(InsertPt
->getParent(), InsertBB
);
1172 InsertPt
= InsertBB
->getTerminator();
1175 // If we have skipped all inputs, it means that Def only comes to Phi from
1176 // unreachable blocks.
1180 auto *DefI
= dyn_cast
<Instruction
>(Def
);
1184 assert(DT
->dominates(DefI
, InsertPt
) && "def does not dominate all uses");
1186 auto *L
= LI
->getLoopFor(DefI
->getParent());
1187 assert(!L
|| L
->contains(LI
->getLoopFor(InsertPt
->getParent())));
1189 for (auto *DTN
= (*DT
)[InsertPt
->getParent()]; DTN
; DTN
= DTN
->getIDom())
1190 if (LI
->getLoopFor(DTN
->getBlock()) == L
)
1191 return DTN
->getBlock()->getTerminator();
1193 llvm_unreachable("DefI dominates InsertPt!");
1196 WidenIV::WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1197 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1198 bool HasGuards
, bool UsePostIncrementRanges
)
1199 : OrigPhi(WI
.NarrowIV
), WideType(WI
.WidestNativeType
), LI(LInfo
),
1200 L(LI
->getLoopFor(OrigPhi
->getParent())), SE(SEv
), DT(DTree
),
1201 HasGuards(HasGuards
), UsePostIncrementRanges(UsePostIncrementRanges
),
1203 assert(L
->getHeader() == OrigPhi
->getParent() && "Phi must be an IV");
1204 ExtendKindMap
[OrigPhi
] = WI
.IsSigned
? ExtendKind::Sign
: ExtendKind::Zero
;
1207 Value
*WidenIV::createExtendInst(Value
*NarrowOper
, Type
*WideType
,
1208 bool IsSigned
, Instruction
*Use
) {
1209 // Set the debug location and conservative insertion point.
1210 IRBuilder
<> Builder(Use
);
1211 // Hoist the insertion point into loop preheaders as far as possible.
1212 for (const Loop
*L
= LI
->getLoopFor(Use
->getParent());
1213 L
&& L
->getLoopPreheader() && L
->isLoopInvariant(NarrowOper
);
1214 L
= L
->getParentLoop())
1215 Builder
.SetInsertPoint(L
->getLoopPreheader()->getTerminator());
1217 return IsSigned
? Builder
.CreateSExt(NarrowOper
, WideType
) :
1218 Builder
.CreateZExt(NarrowOper
, WideType
);
1221 /// Instantiate a wide operation to replace a narrow operation. This only needs
1222 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1223 /// 0 for any operation we decide not to clone.
1224 Instruction
*WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU
,
1225 const SCEVAddRecExpr
*WideAR
) {
1226 unsigned Opcode
= DU
.NarrowUse
->getOpcode();
1230 case Instruction::Add
:
1231 case Instruction::Mul
:
1232 case Instruction::UDiv
:
1233 case Instruction::Sub
:
1234 return cloneArithmeticIVUser(DU
, WideAR
);
1236 case Instruction::And
:
1237 case Instruction::Or
:
1238 case Instruction::Xor
:
1239 case Instruction::Shl
:
1240 case Instruction::LShr
:
1241 case Instruction::AShr
:
1242 return cloneBitwiseIVUser(DU
);
1246 Instruction
*WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU
) {
1247 Instruction
*NarrowUse
= DU
.NarrowUse
;
1248 Instruction
*NarrowDef
= DU
.NarrowDef
;
1249 Instruction
*WideDef
= DU
.WideDef
;
1251 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse
<< "\n");
1253 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1254 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1255 // invariant and will be folded or hoisted. If it actually comes from a
1256 // widened IV, it should be removed during a future call to widenIVUse.
1257 bool IsSigned
= getExtendKind(NarrowDef
) == ExtendKind::Sign
;
1258 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1260 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1261 IsSigned
, NarrowUse
);
1262 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1264 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1265 IsSigned
, NarrowUse
);
1267 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1268 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1269 NarrowBO
->getName());
1270 IRBuilder
<> Builder(NarrowUse
);
1271 Builder
.Insert(WideBO
);
1272 WideBO
->copyIRFlags(NarrowBO
);
1276 Instruction
*WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU
,
1277 const SCEVAddRecExpr
*WideAR
) {
1278 Instruction
*NarrowUse
= DU
.NarrowUse
;
1279 Instruction
*NarrowDef
= DU
.NarrowDef
;
1280 Instruction
*WideDef
= DU
.WideDef
;
1282 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1284 unsigned IVOpIdx
= (NarrowUse
->getOperand(0) == NarrowDef
) ? 0 : 1;
1286 // We're trying to find X such that
1288 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1290 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1291 // and check using SCEV if any of them are correct.
1293 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1294 // correct solution to X.
1295 auto GuessNonIVOperand
= [&](bool SignExt
) {
1296 const SCEV
*WideLHS
;
1297 const SCEV
*WideRHS
;
1299 auto GetExtend
= [this, SignExt
](const SCEV
*S
, Type
*Ty
) {
1301 return SE
->getSignExtendExpr(S
, Ty
);
1302 return SE
->getZeroExtendExpr(S
, Ty
);
1306 WideLHS
= SE
->getSCEV(WideDef
);
1307 const SCEV
*NarrowRHS
= SE
->getSCEV(NarrowUse
->getOperand(1));
1308 WideRHS
= GetExtend(NarrowRHS
, WideType
);
1310 const SCEV
*NarrowLHS
= SE
->getSCEV(NarrowUse
->getOperand(0));
1311 WideLHS
= GetExtend(NarrowLHS
, WideType
);
1312 WideRHS
= SE
->getSCEV(WideDef
);
1315 // WideUse is "WideDef `op.wide` X" as described in the comment.
1316 const SCEV
*WideUse
=
1317 getSCEVByOpCode(WideLHS
, WideRHS
, NarrowUse
->getOpcode());
1319 return WideUse
== WideAR
;
1322 bool SignExtend
= getExtendKind(NarrowDef
) == ExtendKind::Sign
;
1323 if (!GuessNonIVOperand(SignExtend
)) {
1324 SignExtend
= !SignExtend
;
1325 if (!GuessNonIVOperand(SignExtend
))
1329 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1331 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1332 SignExtend
, NarrowUse
);
1333 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1335 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1336 SignExtend
, NarrowUse
);
1338 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1339 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1340 NarrowBO
->getName());
1342 IRBuilder
<> Builder(NarrowUse
);
1343 Builder
.Insert(WideBO
);
1344 WideBO
->copyIRFlags(NarrowBO
);
1348 WidenIV::ExtendKind
WidenIV::getExtendKind(Instruction
*I
) {
1349 auto It
= ExtendKindMap
.find(I
);
1350 assert(It
!= ExtendKindMap
.end() && "Instruction not yet extended!");
1354 const SCEV
*WidenIV::getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1355 unsigned OpCode
) const {
1357 case Instruction::Add
:
1358 return SE
->getAddExpr(LHS
, RHS
);
1359 case Instruction::Sub
:
1360 return SE
->getMinusSCEV(LHS
, RHS
);
1361 case Instruction::Mul
:
1362 return SE
->getMulExpr(LHS
, RHS
);
1363 case Instruction::UDiv
:
1364 return SE
->getUDivExpr(LHS
, RHS
);
1366 llvm_unreachable("Unsupported opcode.");
1370 /// No-wrap operations can transfer sign extension of their result to their
1371 /// operands. Generate the SCEV value for the widened operation without
1372 /// actually modifying the IR yet. If the expression after extending the
1373 /// operands is an AddRec for this loop, return the AddRec and the kind of
1375 WidenIV::WidenedRecTy
1376 WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU
) {
1377 // Handle the common case of add<nsw/nuw>
1378 const unsigned OpCode
= DU
.NarrowUse
->getOpcode();
1379 // Only Add/Sub/Mul instructions supported yet.
1380 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1381 OpCode
!= Instruction::Mul
)
1382 return {nullptr, ExtendKind::Unknown
};
1384 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1385 // if extending the other will lead to a recurrence.
1386 const unsigned ExtendOperIdx
=
1387 DU
.NarrowUse
->getOperand(0) == DU
.NarrowDef
? 1 : 0;
1388 assert(DU
.NarrowUse
->getOperand(1-ExtendOperIdx
) == DU
.NarrowDef
&& "bad DU");
1390 const SCEV
*ExtendOperExpr
= nullptr;
1391 const OverflowingBinaryOperator
*OBO
=
1392 cast
<OverflowingBinaryOperator
>(DU
.NarrowUse
);
1393 ExtendKind ExtKind
= getExtendKind(DU
.NarrowDef
);
1394 if (ExtKind
== ExtendKind::Sign
&& OBO
->hasNoSignedWrap())
1395 ExtendOperExpr
= SE
->getSignExtendExpr(
1396 SE
->getSCEV(DU
.NarrowUse
->getOperand(ExtendOperIdx
)), WideType
);
1397 else if (ExtKind
== ExtendKind::Zero
&& OBO
->hasNoUnsignedWrap())
1398 ExtendOperExpr
= SE
->getZeroExtendExpr(
1399 SE
->getSCEV(DU
.NarrowUse
->getOperand(ExtendOperIdx
)), WideType
);
1401 return {nullptr, ExtendKind::Unknown
};
1403 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1404 // flags. This instruction may be guarded by control flow that the no-wrap
1405 // behavior depends on. Non-control-equivalent instructions can be mapped to
1406 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1407 // semantics to those operations.
1408 const SCEV
*lhs
= SE
->getSCEV(DU
.WideDef
);
1409 const SCEV
*rhs
= ExtendOperExpr
;
1411 // Let's swap operands to the initial order for the case of non-commutative
1412 // operations, like SUB. See PR21014.
1413 if (ExtendOperIdx
== 0)
1414 std::swap(lhs
, rhs
);
1415 const SCEVAddRecExpr
*AddRec
=
1416 dyn_cast
<SCEVAddRecExpr
>(getSCEVByOpCode(lhs
, rhs
, OpCode
));
1418 if (!AddRec
|| AddRec
->getLoop() != L
)
1419 return {nullptr, ExtendKind::Unknown
};
1421 return {AddRec
, ExtKind
};
1424 /// Is this instruction potentially interesting for further simplification after
1425 /// widening it's type? In other words, can the extend be safely hoisted out of
1426 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1427 /// so, return the extended recurrence and the kind of extension used. Otherwise
1428 /// return {nullptr, ExtendKind::Unknown}.
1429 WidenIV::WidenedRecTy
WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU
) {
1430 if (!DU
.NarrowUse
->getType()->isIntegerTy())
1431 return {nullptr, ExtendKind::Unknown
};
1433 const SCEV
*NarrowExpr
= SE
->getSCEV(DU
.NarrowUse
);
1434 if (SE
->getTypeSizeInBits(NarrowExpr
->getType()) >=
1435 SE
->getTypeSizeInBits(WideType
)) {
1436 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1437 // index. So don't follow this use.
1438 return {nullptr, ExtendKind::Unknown
};
1441 const SCEV
*WideExpr
;
1443 if (DU
.NeverNegative
) {
1444 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1445 if (isa
<SCEVAddRecExpr
>(WideExpr
))
1446 ExtKind
= ExtendKind::Sign
;
1448 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1449 ExtKind
= ExtendKind::Zero
;
1451 } else if (getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
) {
1452 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1453 ExtKind
= ExtendKind::Sign
;
1455 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1456 ExtKind
= ExtendKind::Zero
;
1458 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideExpr
);
1459 if (!AddRec
|| AddRec
->getLoop() != L
)
1460 return {nullptr, ExtendKind::Unknown
};
1461 return {AddRec
, ExtKind
};
1464 /// This IV user cannot be widened. Replace this use of the original narrow IV
1465 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1466 static void truncateIVUse(WidenIV::NarrowIVDefUse DU
, DominatorTree
*DT
,
1468 auto *InsertPt
= getInsertPointForUses(DU
.NarrowUse
, DU
.NarrowDef
, DT
, LI
);
1471 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU
.WideDef
<< " for user "
1472 << *DU
.NarrowUse
<< "\n");
1473 IRBuilder
<> Builder(InsertPt
);
1474 Value
*Trunc
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowDef
->getType());
1475 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, Trunc
);
1478 /// If the narrow use is a compare instruction, then widen the compare
1479 // (and possibly the other operand). The extend operation is hoisted into the
1480 // loop preheader as far as possible.
1481 bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU
) {
1482 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(DU
.NarrowUse
);
1486 // We can legally widen the comparison in the following two cases:
1488 // - The signedness of the IV extension and comparison match
1490 // - The narrow IV is always positive (and thus its sign extension is equal
1491 // to its zero extension). For instance, let's say we're zero extending
1492 // %narrow for the following use
1494 // icmp slt i32 %narrow, %val ... (A)
1496 // and %narrow is always positive. Then
1498 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1499 // == icmp slt i32 zext(%narrow), sext(%val)
1500 bool IsSigned
= getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
;
1501 if (!(DU
.NeverNegative
|| IsSigned
== Cmp
->isSigned()))
1504 Value
*Op
= Cmp
->getOperand(Cmp
->getOperand(0) == DU
.NarrowDef
? 1 : 0);
1505 unsigned CastWidth
= SE
->getTypeSizeInBits(Op
->getType());
1506 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1507 assert(CastWidth
<= IVWidth
&& "Unexpected width while widening compare.");
1509 // Widen the compare instruction.
1510 auto *InsertPt
= getInsertPointForUses(DU
.NarrowUse
, DU
.NarrowDef
, DT
, LI
);
1513 IRBuilder
<> Builder(InsertPt
);
1514 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1516 // Widen the other operand of the compare, if necessary.
1517 if (CastWidth
< IVWidth
) {
1518 Value
*ExtOp
= createExtendInst(Op
, WideType
, Cmp
->isSigned(), Cmp
);
1519 DU
.NarrowUse
->replaceUsesOfWith(Op
, ExtOp
);
1524 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1525 // will not work when:
1526 // 1) SCEV traces back to an instruction inside the loop that SCEV can not
1527 // expand, eg. add %indvar, (load %addr)
1528 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1529 // While SCEV fails to avoid trunc, we can still try to use instruction
1530 // combining approach to prove trunc is not required. This can be further
1531 // extended with other instruction combining checks, but for now we handle the
1532 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1535 // %c = sub nsw %b, %indvar
1536 // %d = sext %c to i64
1538 // %indvar.ext1 = sext %indvar to i64
1539 // %m = sext %b to i64
1540 // %d = sub nsw i64 %m, %indvar.ext1
1541 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1542 // trunc is required regardless of how %b is generated. This pattern is common
1543 // when calculating address in 64 bit architecture
1544 bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU
) {
1545 Instruction
*NarrowUse
= DU
.NarrowUse
;
1546 Instruction
*NarrowDef
= DU
.NarrowDef
;
1547 Instruction
*WideDef
= DU
.WideDef
;
1549 // Handle the common case of add<nsw/nuw>
1550 const unsigned OpCode
= NarrowUse
->getOpcode();
1551 // Only Add/Sub/Mul instructions are supported.
1552 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1553 OpCode
!= Instruction::Mul
)
1556 // The operand that is not defined by NarrowDef of DU. Let's call it the
1558 assert((NarrowUse
->getOperand(0) == NarrowDef
||
1559 NarrowUse
->getOperand(1) == NarrowDef
) &&
1562 const OverflowingBinaryOperator
*OBO
=
1563 cast
<OverflowingBinaryOperator
>(NarrowUse
);
1564 ExtendKind ExtKind
= getExtendKind(NarrowDef
);
1565 bool CanSignExtend
= ExtKind
== ExtendKind::Sign
&& OBO
->hasNoSignedWrap();
1566 bool CanZeroExtend
= ExtKind
== ExtendKind::Zero
&& OBO
->hasNoUnsignedWrap();
1567 auto AnotherOpExtKind
= ExtKind
;
1569 // Check that all uses are either:
1570 // - narrow def (in case of we are widening the IV increment);
1571 // - single-input LCSSA Phis;
1572 // - comparison of the chosen type;
1573 // - extend of the chosen type (raison d'etre).
1574 SmallVector
<Instruction
*, 4> ExtUsers
;
1575 SmallVector
<PHINode
*, 4> LCSSAPhiUsers
;
1576 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
1577 for (Use
&U
: NarrowUse
->uses()) {
1578 Instruction
*User
= cast
<Instruction
>(U
.getUser());
1579 if (User
== NarrowDef
)
1581 if (!L
->contains(User
)) {
1582 auto *LCSSAPhi
= cast
<PHINode
>(User
);
1583 // Make sure there is only 1 input, so that we don't have to split
1585 if (LCSSAPhi
->getNumOperands() != 1)
1587 LCSSAPhiUsers
.push_back(LCSSAPhi
);
1590 if (auto *ICmp
= dyn_cast
<ICmpInst
>(User
)) {
1591 auto Pred
= ICmp
->getPredicate();
1592 // We have 3 types of predicates: signed, unsigned and equality
1593 // predicates. For equality, it's legal to widen icmp for either sign and
1594 // zero extend. For sign extend, we can also do so for signed predicates,
1595 // likeweise for zero extend we can widen icmp for unsigned predicates.
1596 if (ExtKind
== ExtendKind::Zero
&& ICmpInst::isSigned(Pred
))
1598 if (ExtKind
== ExtendKind::Sign
&& ICmpInst::isUnsigned(Pred
))
1600 ICmpUsers
.push_back(ICmp
);
1603 if (ExtKind
== ExtendKind::Sign
)
1604 User
= dyn_cast
<SExtInst
>(User
);
1606 User
= dyn_cast
<ZExtInst
>(User
);
1607 if (!User
|| User
->getType() != WideType
)
1609 ExtUsers
.push_back(User
);
1611 if (ExtUsers
.empty()) {
1612 DeadInsts
.emplace_back(NarrowUse
);
1616 // We'll prove some facts that should be true in the context of ext users. If
1617 // there is no users, we are done now. If there are some, pick their common
1618 // dominator as context.
1619 const Instruction
*CtxI
= findCommonDominator(ExtUsers
, *DT
);
1621 if (!CanSignExtend
&& !CanZeroExtend
) {
1622 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1623 // will most likely not see it. Let's try to prove it.
1624 if (OpCode
!= Instruction::Add
)
1626 if (ExtKind
!= ExtendKind::Zero
)
1628 const SCEV
*LHS
= SE
->getSCEV(OBO
->getOperand(0));
1629 const SCEV
*RHS
= SE
->getSCEV(OBO
->getOperand(1));
1630 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1631 if (NarrowUse
->getOperand(0) != NarrowDef
)
1633 if (!SE
->isKnownNegative(RHS
))
1635 bool ProvedSubNUW
= SE
->isKnownPredicateAt(ICmpInst::ICMP_UGE
, LHS
,
1636 SE
->getNegativeSCEV(RHS
), CtxI
);
1639 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1640 // neg(zext(neg(op))), which is basically sext(op).
1641 AnotherOpExtKind
= ExtendKind::Sign
;
1644 // Verifying that Defining operand is an AddRec
1645 const SCEV
*Op1
= SE
->getSCEV(WideDef
);
1646 const SCEVAddRecExpr
*AddRecOp1
= dyn_cast
<SCEVAddRecExpr
>(Op1
);
1647 if (!AddRecOp1
|| AddRecOp1
->getLoop() != L
)
1650 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1652 // Generating a widening use instruction.
1654 (NarrowUse
->getOperand(0) == NarrowDef
)
1656 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1657 AnotherOpExtKind
== ExtendKind::Sign
, NarrowUse
);
1659 (NarrowUse
->getOperand(1) == NarrowDef
)
1661 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1662 AnotherOpExtKind
== ExtendKind::Sign
, NarrowUse
);
1664 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1665 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1666 NarrowBO
->getName());
1667 IRBuilder
<> Builder(NarrowUse
);
1668 Builder
.Insert(WideBO
);
1669 WideBO
->copyIRFlags(NarrowBO
);
1670 ExtendKindMap
[NarrowUse
] = ExtKind
;
1672 for (Instruction
*User
: ExtUsers
) {
1673 assert(User
->getType() == WideType
&& "Checked before!");
1674 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User
<< " replaced by "
1675 << *WideBO
<< "\n");
1677 User
->replaceAllUsesWith(WideBO
);
1678 DeadInsts
.emplace_back(User
);
1681 for (PHINode
*User
: LCSSAPhiUsers
) {
1682 assert(User
->getNumOperands() == 1 && "Checked before!");
1683 Builder
.SetInsertPoint(User
);
1685 Builder
.CreatePHI(WideBO
->getType(), 1, User
->getName() + ".wide");
1686 BasicBlock
*LoopExitingBlock
= User
->getParent()->getSinglePredecessor();
1687 assert(LoopExitingBlock
&& L
->contains(LoopExitingBlock
) &&
1688 "Not a LCSSA Phi?");
1689 WidePN
->addIncoming(WideBO
, LoopExitingBlock
);
1690 Builder
.SetInsertPoint(&*User
->getParent()->getFirstInsertionPt());
1691 auto *TruncPN
= Builder
.CreateTrunc(WidePN
, User
->getType());
1692 User
->replaceAllUsesWith(TruncPN
);
1693 DeadInsts
.emplace_back(User
);
1696 for (ICmpInst
*User
: ICmpUsers
) {
1697 Builder
.SetInsertPoint(User
);
1698 auto ExtendedOp
= [&](Value
* V
)->Value
* {
1701 if (ExtKind
== ExtendKind::Zero
)
1702 return Builder
.CreateZExt(V
, WideBO
->getType());
1704 return Builder
.CreateSExt(V
, WideBO
->getType());
1706 auto Pred
= User
->getPredicate();
1707 auto *LHS
= ExtendedOp(User
->getOperand(0));
1708 auto *RHS
= ExtendedOp(User
->getOperand(1));
1710 Builder
.CreateICmp(Pred
, LHS
, RHS
, User
->getName() + ".wide");
1711 User
->replaceAllUsesWith(WideCmp
);
1712 DeadInsts
.emplace_back(User
);
1718 /// Determine whether an individual user of the narrow IV can be widened. If so,
1719 /// return the wide clone of the user.
1720 Instruction
*WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU
, SCEVExpander
&Rewriter
) {
1721 assert(ExtendKindMap
.count(DU
.NarrowDef
) &&
1722 "Should already know the kind of extension used to widen NarrowDef");
1724 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1725 if (PHINode
*UsePhi
= dyn_cast
<PHINode
>(DU
.NarrowUse
)) {
1726 if (LI
->getLoopFor(UsePhi
->getParent()) != L
) {
1727 // For LCSSA phis, sink the truncate outside the loop.
1728 // After SimplifyCFG most loop exit targets have a single predecessor.
1729 // Otherwise fall back to a truncate within the loop.
1730 if (UsePhi
->getNumOperands() != 1)
1731 truncateIVUse(DU
, DT
, LI
);
1733 // Widening the PHI requires us to insert a trunc. The logical place
1734 // for this trunc is in the same BB as the PHI. This is not possible if
1735 // the BB is terminated by a catchswitch.
1736 if (isa
<CatchSwitchInst
>(UsePhi
->getParent()->getTerminator()))
1740 PHINode::Create(DU
.WideDef
->getType(), 1, UsePhi
->getName() + ".wide",
1742 WidePhi
->addIncoming(DU
.WideDef
, UsePhi
->getIncomingBlock(0));
1743 IRBuilder
<> Builder(&*WidePhi
->getParent()->getFirstInsertionPt());
1744 Value
*Trunc
= Builder
.CreateTrunc(WidePhi
, DU
.NarrowDef
->getType());
1745 UsePhi
->replaceAllUsesWith(Trunc
);
1746 DeadInsts
.emplace_back(UsePhi
);
1747 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to "
1748 << *WidePhi
<< "\n");
1754 // This narrow use can be widened by a sext if it's non-negative or its narrow
1755 // def was widended by a sext. Same for zext.
1756 auto canWidenBySExt
= [&]() {
1757 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
;
1759 auto canWidenByZExt
= [&]() {
1760 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == ExtendKind::Zero
;
1763 // Our raison d'etre! Eliminate sign and zero extension.
1764 if ((isa
<SExtInst
>(DU
.NarrowUse
) && canWidenBySExt()) ||
1765 (isa
<ZExtInst
>(DU
.NarrowUse
) && canWidenByZExt())) {
1766 Value
*NewDef
= DU
.WideDef
;
1767 if (DU
.NarrowUse
->getType() != WideType
) {
1768 unsigned CastWidth
= SE
->getTypeSizeInBits(DU
.NarrowUse
->getType());
1769 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1770 if (CastWidth
< IVWidth
) {
1771 // The cast isn't as wide as the IV, so insert a Trunc.
1772 IRBuilder
<> Builder(DU
.NarrowUse
);
1773 NewDef
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowUse
->getType());
1776 // A wider extend was hidden behind a narrower one. This may induce
1777 // another round of IV widening in which the intermediate IV becomes
1778 // dead. It should be very rare.
1779 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1780 << " not wide enough to subsume " << *DU
.NarrowUse
1782 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1783 NewDef
= DU
.NarrowUse
;
1786 if (NewDef
!= DU
.NarrowUse
) {
1787 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU
.NarrowUse
1788 << " replaced by " << *DU
.WideDef
<< "\n");
1790 DU
.NarrowUse
->replaceAllUsesWith(NewDef
);
1791 DeadInsts
.emplace_back(DU
.NarrowUse
);
1793 // Now that the extend is gone, we want to expose it's uses for potential
1794 // further simplification. We don't need to directly inform SimplifyIVUsers
1795 // of the new users, because their parent IV will be processed later as a
1796 // new loop phi. If we preserved IVUsers analysis, we would also want to
1797 // push the uses of WideDef here.
1799 // No further widening is needed. The deceased [sz]ext had done it for us.
1803 // Does this user itself evaluate to a recurrence after widening?
1804 WidenedRecTy WideAddRec
= getExtendedOperandRecurrence(DU
);
1805 if (!WideAddRec
.first
)
1806 WideAddRec
= getWideRecurrence(DU
);
1808 assert((WideAddRec
.first
== nullptr) ==
1809 (WideAddRec
.second
== ExtendKind::Unknown
));
1810 if (!WideAddRec
.first
) {
1811 // If use is a loop condition, try to promote the condition instead of
1812 // truncating the IV first.
1813 if (widenLoopCompare(DU
))
1816 // We are here about to generate a truncate instruction that may hurt
1817 // performance because the scalar evolution expression computed earlier
1818 // in WideAddRec.first does not indicate a polynomial induction expression.
1819 // In that case, look at the operands of the use instruction to determine
1820 // if we can still widen the use instead of truncating its operand.
1821 if (widenWithVariantUse(DU
))
1824 // This user does not evaluate to a recurrence after widening, so don't
1825 // follow it. Instead insert a Trunc to kill off the original use,
1826 // eventually isolating the original narrow IV so it can be removed.
1827 truncateIVUse(DU
, DT
, LI
);
1831 // Reuse the IV increment that SCEVExpander created as long as it dominates
1833 Instruction
*WideUse
= nullptr;
1834 if (WideAddRec
.first
== WideIncExpr
&&
1835 Rewriter
.hoistIVInc(WideInc
, DU
.NarrowUse
))
1838 WideUse
= cloneIVUser(DU
, WideAddRec
.first
);
1842 // Evaluation of WideAddRec ensured that the narrow expression could be
1843 // extended outside the loop without overflow. This suggests that the wide use
1844 // evaluates to the same expression as the extended narrow use, but doesn't
1845 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1846 // where it fails, we simply throw away the newly created wide use.
1847 if (WideAddRec
.first
!= SE
->getSCEV(WideUse
)) {
1848 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": "
1849 << *SE
->getSCEV(WideUse
) << " != " << *WideAddRec
.first
1851 DeadInsts
.emplace_back(WideUse
);
1855 // if we reached this point then we are going to replace
1856 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1857 replaceAllDbgUsesWith(*DU
.NarrowUse
, *WideUse
, *WideUse
, *DT
);
1859 ExtendKindMap
[DU
.NarrowUse
] = WideAddRec
.second
;
1860 // Returning WideUse pushes it on the worklist.
1864 /// Add eligible users of NarrowDef to NarrowIVUsers.
1865 void WidenIV::pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
) {
1866 const SCEV
*NarrowSCEV
= SE
->getSCEV(NarrowDef
);
1867 bool NonNegativeDef
=
1868 SE
->isKnownPredicate(ICmpInst::ICMP_SGE
, NarrowSCEV
,
1869 SE
->getZero(NarrowSCEV
->getType()));
1870 for (User
*U
: NarrowDef
->users()) {
1871 Instruction
*NarrowUser
= cast
<Instruction
>(U
);
1873 // Handle data flow merges and bizarre phi cycles.
1874 if (!Widened
.insert(NarrowUser
).second
)
1877 bool NonNegativeUse
= false;
1878 if (!NonNegativeDef
) {
1879 // We might have a control-dependent range information for this context.
1880 if (auto RangeInfo
= getPostIncRangeInfo(NarrowDef
, NarrowUser
))
1881 NonNegativeUse
= RangeInfo
->getSignedMin().isNonNegative();
1884 NarrowIVUsers
.emplace_back(NarrowDef
, NarrowUser
, WideDef
,
1885 NonNegativeDef
|| NonNegativeUse
);
1889 /// Process a single induction variable. First use the SCEVExpander to create a
1890 /// wide induction variable that evaluates to the same recurrence as the
1891 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1892 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1893 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1895 /// It would be simpler to delete uses as they are processed, but we must avoid
1896 /// invalidating SCEV expressions.
1897 PHINode
*WidenIV::createWideIV(SCEVExpander
&Rewriter
) {
1898 // Is this phi an induction variable?
1899 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(OrigPhi
));
1903 // Widen the induction variable expression.
1904 const SCEV
*WideIVExpr
= getExtendKind(OrigPhi
) == ExtendKind::Sign
1905 ? SE
->getSignExtendExpr(AddRec
, WideType
)
1906 : SE
->getZeroExtendExpr(AddRec
, WideType
);
1908 assert(SE
->getEffectiveSCEVType(WideIVExpr
->getType()) == WideType
&&
1909 "Expect the new IV expression to preserve its type");
1911 // Can the IV be extended outside the loop without overflow?
1912 AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideIVExpr
);
1913 if (!AddRec
|| AddRec
->getLoop() != L
)
1916 // An AddRec must have loop-invariant operands. Since this AddRec is
1917 // materialized by a loop header phi, the expression cannot have any post-loop
1918 // operands, so they must dominate the loop header.
1920 SE
->properlyDominates(AddRec
->getStart(), L
->getHeader()) &&
1921 SE
->properlyDominates(AddRec
->getStepRecurrence(*SE
), L
->getHeader()) &&
1922 "Loop header phi recurrence inputs do not dominate the loop");
1924 // Iterate over IV uses (including transitive ones) looking for IV increments
1925 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1926 // the increment calculate control-dependent range information basing on
1927 // dominating conditions inside of the loop (e.g. a range check inside of the
1928 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1930 // Control-dependent range information is later used to prove that a narrow
1931 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1932 // this on demand because when pushNarrowIVUsers needs this information some
1933 // of the dominating conditions might be already widened.
1934 if (UsePostIncrementRanges
)
1935 calculatePostIncRanges(OrigPhi
);
1937 // The rewriter provides a value for the desired IV expression. This may
1938 // either find an existing phi or materialize a new one. Either way, we
1939 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1940 // of the phi-SCC dominates the loop entry.
1941 Instruction
*InsertPt
= &*L
->getHeader()->getFirstInsertionPt();
1942 Value
*ExpandInst
= Rewriter
.expandCodeFor(AddRec
, WideType
, InsertPt
);
1943 // If the wide phi is not a phi node, for example a cast node, like bitcast,
1944 // inttoptr, ptrtoint, just skip for now.
1945 if (!(WidePhi
= dyn_cast
<PHINode
>(ExpandInst
))) {
1946 // if the cast node is an inserted instruction without any user, we should
1947 // remove it to make sure the pass don't touch the function as we can not
1949 if (ExpandInst
->hasNUses(0) &&
1950 Rewriter
.isInsertedInstruction(cast
<Instruction
>(ExpandInst
)))
1951 DeadInsts
.emplace_back(ExpandInst
);
1955 // Remembering the WideIV increment generated by SCEVExpander allows
1956 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1957 // employ a general reuse mechanism because the call above is the only call to
1958 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1959 if (BasicBlock
*LatchBlock
= L
->getLoopLatch()) {
1961 cast
<Instruction
>(WidePhi
->getIncomingValueForBlock(LatchBlock
));
1962 WideIncExpr
= SE
->getSCEV(WideInc
);
1963 // Propagate the debug location associated with the original loop increment
1964 // to the new (widened) increment.
1966 cast
<Instruction
>(OrigPhi
->getIncomingValueForBlock(LatchBlock
));
1967 WideInc
->setDebugLoc(OrigInc
->getDebugLoc());
1970 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi
<< "\n");
1973 // Traverse the def-use chain using a worklist starting at the original IV.
1974 assert(Widened
.empty() && NarrowIVUsers
.empty() && "expect initial state" );
1976 Widened
.insert(OrigPhi
);
1977 pushNarrowIVUsers(OrigPhi
, WidePhi
);
1979 while (!NarrowIVUsers
.empty()) {
1980 WidenIV::NarrowIVDefUse DU
= NarrowIVUsers
.pop_back_val();
1982 // Process a def-use edge. This may replace the use, so don't hold a
1983 // use_iterator across it.
1984 Instruction
*WideUse
= widenIVUse(DU
, Rewriter
);
1986 // Follow all def-use edges from the previous narrow use.
1988 pushNarrowIVUsers(DU
.NarrowUse
, WideUse
);
1990 // widenIVUse may have removed the def-use edge.
1991 if (DU
.NarrowDef
->use_empty())
1992 DeadInsts
.emplace_back(DU
.NarrowDef
);
1995 // Attach any debug information to the new PHI.
1996 replaceAllDbgUsesWith(*OrigPhi
, *WidePhi
, *WidePhi
, *DT
);
2001 /// Calculates control-dependent range for the given def at the given context
2002 /// by looking at dominating conditions inside of the loop
2003 void WidenIV::calculatePostIncRange(Instruction
*NarrowDef
,
2004 Instruction
*NarrowUser
) {
2005 using namespace llvm::PatternMatch
;
2007 Value
*NarrowDefLHS
;
2008 const APInt
*NarrowDefRHS
;
2009 if (!match(NarrowDef
, m_NSWAdd(m_Value(NarrowDefLHS
),
2010 m_APInt(NarrowDefRHS
))) ||
2011 !NarrowDefRHS
->isNonNegative())
2014 auto UpdateRangeFromCondition
= [&] (Value
*Condition
,
2016 CmpInst::Predicate Pred
;
2018 if (!match(Condition
, m_ICmp(Pred
, m_Specific(NarrowDefLHS
),
2022 CmpInst::Predicate P
=
2023 TrueDest
? Pred
: CmpInst::getInversePredicate(Pred
);
2025 auto CmpRHSRange
= SE
->getSignedRange(SE
->getSCEV(CmpRHS
));
2026 auto CmpConstrainedLHSRange
=
2027 ConstantRange::makeAllowedICmpRegion(P
, CmpRHSRange
);
2028 auto NarrowDefRange
= CmpConstrainedLHSRange
.addWithNoWrap(
2029 *NarrowDefRHS
, OverflowingBinaryOperator::NoSignedWrap
);
2031 updatePostIncRangeInfo(NarrowDef
, NarrowUser
, NarrowDefRange
);
2034 auto UpdateRangeFromGuards
= [&](Instruction
*Ctx
) {
2038 for (Instruction
&I
: make_range(Ctx
->getIterator().getReverse(),
2039 Ctx
->getParent()->rend())) {
2041 if (match(&I
, m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(C
))))
2042 UpdateRangeFromCondition(C
, /*TrueDest=*/true);
2046 UpdateRangeFromGuards(NarrowUser
);
2048 BasicBlock
*NarrowUserBB
= NarrowUser
->getParent();
2049 // If NarrowUserBB is statically unreachable asking dominator queries may
2050 // yield surprising results. (e.g. the block may not have a dom tree node)
2051 if (!DT
->isReachableFromEntry(NarrowUserBB
))
2054 for (auto *DTB
= (*DT
)[NarrowUserBB
]->getIDom();
2055 L
->contains(DTB
->getBlock());
2056 DTB
= DTB
->getIDom()) {
2057 auto *BB
= DTB
->getBlock();
2058 auto *TI
= BB
->getTerminator();
2059 UpdateRangeFromGuards(TI
);
2061 auto *BI
= dyn_cast
<BranchInst
>(TI
);
2062 if (!BI
|| !BI
->isConditional())
2065 auto *TrueSuccessor
= BI
->getSuccessor(0);
2066 auto *FalseSuccessor
= BI
->getSuccessor(1);
2068 auto DominatesNarrowUser
= [this, NarrowUser
] (BasicBlockEdge BBE
) {
2069 return BBE
.isSingleEdge() &&
2070 DT
->dominates(BBE
, NarrowUser
->getParent());
2073 if (DominatesNarrowUser(BasicBlockEdge(BB
, TrueSuccessor
)))
2074 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/true);
2076 if (DominatesNarrowUser(BasicBlockEdge(BB
, FalseSuccessor
)))
2077 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/false);
2081 /// Calculates PostIncRangeInfos map for the given IV
2082 void WidenIV::calculatePostIncRanges(PHINode
*OrigPhi
) {
2083 SmallPtrSet
<Instruction
*, 16> Visited
;
2084 SmallVector
<Instruction
*, 6> Worklist
;
2085 Worklist
.push_back(OrigPhi
);
2086 Visited
.insert(OrigPhi
);
2088 while (!Worklist
.empty()) {
2089 Instruction
*NarrowDef
= Worklist
.pop_back_val();
2091 for (Use
&U
: NarrowDef
->uses()) {
2092 auto *NarrowUser
= cast
<Instruction
>(U
.getUser());
2094 // Don't go looking outside the current loop.
2095 auto *NarrowUserLoop
= (*LI
)[NarrowUser
->getParent()];
2096 if (!NarrowUserLoop
|| !L
->contains(NarrowUserLoop
))
2099 if (!Visited
.insert(NarrowUser
).second
)
2102 Worklist
.push_back(NarrowUser
);
2104 calculatePostIncRange(NarrowDef
, NarrowUser
);
2109 PHINode
*llvm::createWideIV(const WideIVInfo
&WI
,
2110 LoopInfo
*LI
, ScalarEvolution
*SE
, SCEVExpander
&Rewriter
,
2111 DominatorTree
*DT
, SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
2112 unsigned &NumElimExt
, unsigned &NumWidened
,
2113 bool HasGuards
, bool UsePostIncrementRanges
) {
2114 WidenIV
Widener(WI
, LI
, SE
, DT
, DeadInsts
, HasGuards
, UsePostIncrementRanges
);
2115 PHINode
*WidePHI
= Widener
.createWideIV(Rewriter
);
2116 NumElimExt
= Widener
.getNumElimExt();
2117 NumWidened
= Widener
.getNumWidened();