1 //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements induction variable simplification. It does
10 // not define any actual pass or policy, but provides a single function to
11 // simplify a loop's induction variables based on ScalarEvolution.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Transforms/Utils/Local.h"
28 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
31 using namespace llvm::PatternMatch
;
33 #define DEBUG_TYPE "indvars"
35 STATISTIC(NumElimIdentity
, "Number of IV identities eliminated");
36 STATISTIC(NumElimOperand
, "Number of IV operands folded into a use");
37 STATISTIC(NumFoldedUser
, "Number of IV users folded into a constant");
38 STATISTIC(NumElimRem
, "Number of IV remainder operations eliminated");
41 "Number of IV signed division operations converted to unsigned division");
44 "Number of IV signed remainder operations converted to unsigned remainder");
45 STATISTIC(NumElimCmp
, "Number of IV comparisons eliminated");
48 /// This is a utility for simplifying induction variables
49 /// based on ScalarEvolution. It is the primary instrument of the
50 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
51 /// other loop passes that preserve SCEV.
52 class SimplifyIndvar
{
57 const TargetTransformInfo
*TTI
;
58 SCEVExpander
&Rewriter
;
59 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
64 SimplifyIndvar(Loop
*Loop
, ScalarEvolution
*SE
, DominatorTree
*DT
,
65 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
66 SCEVExpander
&Rewriter
,
67 SmallVectorImpl
<WeakTrackingVH
> &Dead
)
68 : L(Loop
), LI(LI
), SE(SE
), DT(DT
), TTI(TTI
), Rewriter(Rewriter
),
70 assert(LI
&& "IV simplification requires LoopInfo");
73 bool hasChanged() const { return Changed
; }
75 /// Iteratively perform simplification on a worklist of users of the
76 /// specified induction variable. This is the top-level driver that applies
77 /// all simplifications to users of an IV.
78 void simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
= nullptr);
80 Value
*foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
82 bool eliminateIdentitySCEV(Instruction
*UseInst
, Instruction
*IVOperand
);
83 bool replaceIVUserWithLoopInvariant(Instruction
*UseInst
);
84 bool replaceFloatIVWithIntegerIV(Instruction
*UseInst
);
86 bool eliminateOverflowIntrinsic(WithOverflowInst
*WO
);
87 bool eliminateSaturatingIntrinsic(SaturatingInst
*SI
);
88 bool eliminateTrunc(TruncInst
*TI
);
89 bool eliminateIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
90 bool makeIVComparisonInvariant(ICmpInst
*ICmp
, Instruction
*IVOperand
);
91 void eliminateIVComparison(ICmpInst
*ICmp
, Instruction
*IVOperand
);
92 void simplifyIVRemainder(BinaryOperator
*Rem
, Instruction
*IVOperand
,
94 void replaceRemWithNumerator(BinaryOperator
*Rem
);
95 void replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
);
96 void replaceSRemWithURem(BinaryOperator
*Rem
);
97 bool eliminateSDiv(BinaryOperator
*SDiv
);
98 bool strengthenBinaryOp(BinaryOperator
*BO
, Instruction
*IVOperand
);
99 bool strengthenOverflowingOperation(BinaryOperator
*OBO
,
100 Instruction
*IVOperand
);
101 bool strengthenRightShift(BinaryOperator
*BO
, Instruction
*IVOperand
);
105 /// Find a point in code which dominates all given instructions. We can safely
106 /// assume that, whatever fact we can prove at the found point, this fact is
107 /// also true for each of the given instructions.
108 static Instruction
*findCommonDominator(ArrayRef
<Instruction
*> Instructions
,
110 Instruction
*CommonDom
= nullptr;
111 for (auto *Insn
: Instructions
)
113 CommonDom
? DT
.findNearestCommonDominator(CommonDom
, Insn
) : Insn
;
114 assert(CommonDom
&& "Common dominator not found?");
118 /// Fold an IV operand into its use. This removes increments of an
119 /// aligned IV when used by a instruction that ignores the low bits.
121 /// IVOperand is guaranteed SCEVable, but UseInst may not be.
123 /// Return the operand of IVOperand for this induction variable if IVOperand can
124 /// be folded (in case more folding opportunities have been exposed).
125 /// Otherwise return null.
126 Value
*SimplifyIndvar::foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
) {
127 Value
*IVSrc
= nullptr;
128 const unsigned OperIdx
= 0;
129 const SCEV
*FoldedExpr
= nullptr;
130 bool MustDropExactFlag
= false;
131 switch (UseInst
->getOpcode()) {
134 case Instruction::UDiv
:
135 case Instruction::LShr
:
136 // We're only interested in the case where we know something about
137 // the numerator and have a constant denominator.
138 if (IVOperand
!= UseInst
->getOperand(OperIdx
) ||
139 !isa
<ConstantInt
>(UseInst
->getOperand(1)))
142 // Attempt to fold a binary operator with constant operand.
143 // e.g. ((I + 1) >> 2) => I >> 2
144 if (!isa
<BinaryOperator
>(IVOperand
)
145 || !isa
<ConstantInt
>(IVOperand
->getOperand(1)))
148 IVSrc
= IVOperand
->getOperand(0);
149 // IVSrc must be the (SCEVable) IV, since the other operand is const.
150 assert(SE
->isSCEVable(IVSrc
->getType()) && "Expect SCEVable IV operand");
152 ConstantInt
*D
= cast
<ConstantInt
>(UseInst
->getOperand(1));
153 if (UseInst
->getOpcode() == Instruction::LShr
) {
154 // Get a constant for the divisor. See createSCEV.
155 uint32_t BitWidth
= cast
<IntegerType
>(UseInst
->getType())->getBitWidth();
156 if (D
->getValue().uge(BitWidth
))
159 D
= ConstantInt::get(UseInst
->getContext(),
160 APInt::getOneBitSet(BitWidth
, D
->getZExtValue()));
162 const auto *LHS
= SE
->getSCEV(IVSrc
);
163 const auto *RHS
= SE
->getSCEV(D
);
164 FoldedExpr
= SE
->getUDivExpr(LHS
, RHS
);
165 // We might have 'exact' flag set at this point which will no longer be
166 // correct after we make the replacement.
167 if (UseInst
->isExact() && LHS
!= SE
->getMulExpr(FoldedExpr
, RHS
))
168 MustDropExactFlag
= true;
170 // We have something that might fold it's operand. Compare SCEVs.
171 if (!SE
->isSCEVable(UseInst
->getType()))
174 // Bypass the operand if SCEV can prove it has no effect.
175 if (SE
->getSCEV(UseInst
) != FoldedExpr
)
178 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
179 << " -> " << *UseInst
<< '\n');
181 UseInst
->setOperand(OperIdx
, IVSrc
);
182 assert(SE
->getSCEV(UseInst
) == FoldedExpr
&& "bad SCEV with folded oper");
184 if (MustDropExactFlag
)
185 UseInst
->dropPoisonGeneratingFlags();
189 if (IVOperand
->use_empty())
190 DeadInsts
.emplace_back(IVOperand
);
194 bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst
*ICmp
,
195 Instruction
*IVOperand
) {
196 auto *Preheader
= L
->getLoopPreheader();
199 unsigned IVOperIdx
= 0;
200 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
201 if (IVOperand
!= ICmp
->getOperand(0)) {
203 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
205 Pred
= ICmpInst::getSwappedPredicate(Pred
);
208 // Get the SCEVs for the ICmp operands (in the specific context of the
210 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
211 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
212 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
213 auto LIP
= SE
->getLoopInvariantPredicate(Pred
, S
, X
, L
, ICmp
);
216 ICmpInst::Predicate InvariantPredicate
= LIP
->Pred
;
217 const SCEV
*InvariantLHS
= LIP
->LHS
;
218 const SCEV
*InvariantRHS
= LIP
->RHS
;
220 // Do not generate something ridiculous.
221 auto *PHTerm
= Preheader
->getTerminator();
222 if (Rewriter
.isHighCostExpansion({InvariantLHS
, InvariantRHS
}, L
,
223 2 * SCEVCheapExpansionBudget
, TTI
, PHTerm
) ||
224 !Rewriter
.isSafeToExpandAt(InvariantLHS
, PHTerm
) ||
225 !Rewriter
.isSafeToExpandAt(InvariantRHS
, PHTerm
))
228 Rewriter
.expandCodeFor(InvariantLHS
, IVOperand
->getType(), PHTerm
);
230 Rewriter
.expandCodeFor(InvariantRHS
, IVOperand
->getType(), PHTerm
);
231 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp
<< '\n');
232 ICmp
->setPredicate(InvariantPredicate
);
233 ICmp
->setOperand(0, NewLHS
);
234 ICmp
->setOperand(1, NewRHS
);
238 /// SimplifyIVUsers helper for eliminating useless
239 /// comparisons against an induction variable.
240 void SimplifyIndvar::eliminateIVComparison(ICmpInst
*ICmp
,
241 Instruction
*IVOperand
) {
242 unsigned IVOperIdx
= 0;
243 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
244 ICmpInst::Predicate OriginalPred
= Pred
;
245 if (IVOperand
!= ICmp
->getOperand(0)) {
247 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
249 Pred
= ICmpInst::getSwappedPredicate(Pred
);
252 // Get the SCEVs for the ICmp operands (in the specific context of the
254 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
255 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
256 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
258 // If the condition is always true or always false in the given context,
259 // replace it with a constant value.
260 SmallVector
<Instruction
*, 4> Users
;
261 for (auto *U
: ICmp
->users())
262 Users
.push_back(cast
<Instruction
>(U
));
263 const Instruction
*CtxI
= findCommonDominator(Users
, *DT
);
264 if (auto Ev
= SE
->evaluatePredicateAt(Pred
, S
, X
, CtxI
)) {
265 SE
->forgetValue(ICmp
);
266 ICmp
->replaceAllUsesWith(ConstantInt::getBool(ICmp
->getContext(), *Ev
));
267 DeadInsts
.emplace_back(ICmp
);
268 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp
<< '\n');
269 } else if (makeIVComparisonInvariant(ICmp
, IVOperand
)) {
270 // fallthrough to end of function
271 } else if (ICmpInst::isSigned(OriginalPred
) &&
272 SE
->isKnownNonNegative(S
) && SE
->isKnownNonNegative(X
)) {
273 // If we were unable to make anything above, all we can is to canonicalize
274 // the comparison hoping that it will open the doors for other
275 // optimizations. If we find out that we compare two non-negative values,
276 // we turn the instruction's predicate to its unsigned version. Note that
277 // we cannot rely on Pred here unless we check if we have swapped it.
278 assert(ICmp
->getPredicate() == OriginalPred
&& "Predicate changed?");
279 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
281 ICmp
->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred
));
289 bool SimplifyIndvar::eliminateSDiv(BinaryOperator
*SDiv
) {
290 // Get the SCEVs for the ICmp operands.
291 auto *N
= SE
->getSCEV(SDiv
->getOperand(0));
292 auto *D
= SE
->getSCEV(SDiv
->getOperand(1));
294 // Simplify unnecessary loops away.
295 const Loop
*L
= LI
->getLoopFor(SDiv
->getParent());
296 N
= SE
->getSCEVAtScope(N
, L
);
297 D
= SE
->getSCEVAtScope(D
, L
);
299 // Replace sdiv by udiv if both of the operands are non-negative
300 if (SE
->isKnownNonNegative(N
) && SE
->isKnownNonNegative(D
)) {
301 auto *UDiv
= BinaryOperator::Create(
302 BinaryOperator::UDiv
, SDiv
->getOperand(0), SDiv
->getOperand(1),
303 SDiv
->getName() + ".udiv", SDiv
);
304 UDiv
->setIsExact(SDiv
->isExact());
305 SDiv
->replaceAllUsesWith(UDiv
);
306 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv
<< '\n');
309 DeadInsts
.push_back(SDiv
);
316 // i %s n -> i %u n if i >= 0 and n >= 0
317 void SimplifyIndvar::replaceSRemWithURem(BinaryOperator
*Rem
) {
318 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
319 auto *URem
= BinaryOperator::Create(BinaryOperator::URem
, N
, D
,
320 Rem
->getName() + ".urem", Rem
);
321 Rem
->replaceAllUsesWith(URem
);
322 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem
<< '\n');
325 DeadInsts
.emplace_back(Rem
);
328 // i % n --> i if i is in [0,n).
329 void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator
*Rem
) {
330 Rem
->replaceAllUsesWith(Rem
->getOperand(0));
331 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
334 DeadInsts
.emplace_back(Rem
);
337 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
338 void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
) {
339 auto *T
= Rem
->getType();
340 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
341 ICmpInst
*ICmp
= new ICmpInst(Rem
, ICmpInst::ICMP_EQ
, N
, D
);
343 SelectInst::Create(ICmp
, ConstantInt::get(T
, 0), N
, "iv.rem", Rem
);
344 Rem
->replaceAllUsesWith(Sel
);
345 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
348 DeadInsts
.emplace_back(Rem
);
351 /// SimplifyIVUsers helper for eliminating useless remainder operations
352 /// operating on an induction variable or replacing srem by urem.
353 void SimplifyIndvar::simplifyIVRemainder(BinaryOperator
*Rem
,
354 Instruction
*IVOperand
,
356 auto *NValue
= Rem
->getOperand(0);
357 auto *DValue
= Rem
->getOperand(1);
358 // We're only interested in the case where we know something about
359 // the numerator, unless it is a srem, because we want to replace srem by urem
361 bool UsedAsNumerator
= IVOperand
== NValue
;
362 if (!UsedAsNumerator
&& !IsSigned
)
365 const SCEV
*N
= SE
->getSCEV(NValue
);
367 // Simplify unnecessary loops away.
368 const Loop
*ICmpLoop
= LI
->getLoopFor(Rem
->getParent());
369 N
= SE
->getSCEVAtScope(N
, ICmpLoop
);
371 bool IsNumeratorNonNegative
= !IsSigned
|| SE
->isKnownNonNegative(N
);
373 // Do not proceed if the Numerator may be negative
374 if (!IsNumeratorNonNegative
)
377 const SCEV
*D
= SE
->getSCEV(DValue
);
378 D
= SE
->getSCEVAtScope(D
, ICmpLoop
);
380 if (UsedAsNumerator
) {
381 auto LT
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
382 if (SE
->isKnownPredicate(LT
, N
, D
)) {
383 replaceRemWithNumerator(Rem
);
387 auto *T
= Rem
->getType();
388 const auto *NLessOne
= SE
->getMinusSCEV(N
, SE
->getOne(T
));
389 if (SE
->isKnownPredicate(LT
, NLessOne
, D
)) {
390 replaceRemWithNumeratorOrZero(Rem
);
395 // Try to replace SRem with URem, if both N and D are known non-negative.
396 // Since we had already check N, we only need to check D now
397 if (!IsSigned
|| !SE
->isKnownNonNegative(D
))
400 replaceSRemWithURem(Rem
);
403 bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst
*WO
) {
404 const SCEV
*LHS
= SE
->getSCEV(WO
->getLHS());
405 const SCEV
*RHS
= SE
->getSCEV(WO
->getRHS());
406 if (!SE
->willNotOverflow(WO
->getBinaryOp(), WO
->isSigned(), LHS
, RHS
))
409 // Proved no overflow, nuke the overflow check and, if possible, the overflow
410 // intrinsic as well.
412 BinaryOperator
*NewResult
= BinaryOperator::Create(
413 WO
->getBinaryOp(), WO
->getLHS(), WO
->getRHS(), "", WO
);
416 NewResult
->setHasNoSignedWrap(true);
418 NewResult
->setHasNoUnsignedWrap(true);
420 SmallVector
<ExtractValueInst
*, 4> ToDelete
;
422 for (auto *U
: WO
->users()) {
423 if (auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
424 if (EVI
->getIndices()[0] == 1)
425 EVI
->replaceAllUsesWith(ConstantInt::getFalse(WO
->getContext()));
427 assert(EVI
->getIndices()[0] == 0 && "Only two possibilities!");
428 EVI
->replaceAllUsesWith(NewResult
);
430 ToDelete
.push_back(EVI
);
434 for (auto *EVI
: ToDelete
)
435 EVI
->eraseFromParent();
438 WO
->eraseFromParent();
444 bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst
*SI
) {
445 const SCEV
*LHS
= SE
->getSCEV(SI
->getLHS());
446 const SCEV
*RHS
= SE
->getSCEV(SI
->getRHS());
447 if (!SE
->willNotOverflow(SI
->getBinaryOp(), SI
->isSigned(), LHS
, RHS
))
450 BinaryOperator
*BO
= BinaryOperator::Create(
451 SI
->getBinaryOp(), SI
->getLHS(), SI
->getRHS(), SI
->getName(), SI
);
453 BO
->setHasNoSignedWrap();
455 BO
->setHasNoUnsignedWrap();
457 SI
->replaceAllUsesWith(BO
);
458 DeadInsts
.emplace_back(SI
);
463 bool SimplifyIndvar::eliminateTrunc(TruncInst
*TI
) {
464 // It is always legal to replace
465 // icmp <pred> i32 trunc(iv), n
467 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
469 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
470 // Or with either of these if pred is an equality predicate.
472 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
473 // every comparison which uses trunc, it means that we can replace each of
474 // them with comparison of iv against sext/zext(n). We no longer need trunc
477 // TODO: Should we do this if we can widen *some* comparisons, but not all
478 // of them? Sometimes it is enough to enable other optimizations, but the
479 // trunc instruction will stay in the loop.
480 Value
*IV
= TI
->getOperand(0);
481 Type
*IVTy
= IV
->getType();
482 const SCEV
*IVSCEV
= SE
->getSCEV(IV
);
483 const SCEV
*TISCEV
= SE
->getSCEV(TI
);
485 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
487 bool DoesSExtCollapse
= false;
488 bool DoesZExtCollapse
= false;
489 if (IVSCEV
== SE
->getSignExtendExpr(TISCEV
, IVTy
))
490 DoesSExtCollapse
= true;
491 if (IVSCEV
== SE
->getZeroExtendExpr(TISCEV
, IVTy
))
492 DoesZExtCollapse
= true;
494 // If neither sext nor zext does collapse, it is not profitable to do any
496 if (!DoesSExtCollapse
&& !DoesZExtCollapse
)
499 // Collect users of the trunc that look like comparisons against invariants.
500 // Bail if we find something different.
501 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
502 for (auto *U
: TI
->users()) {
503 // We don't care about users in unreachable blocks.
504 if (isa
<Instruction
>(U
) &&
505 !DT
->isReachableFromEntry(cast
<Instruction
>(U
)->getParent()))
507 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(U
);
508 if (!ICI
) return false;
509 assert(L
->contains(ICI
->getParent()) && "LCSSA form broken?");
510 if (!(ICI
->getOperand(0) == TI
&& L
->isLoopInvariant(ICI
->getOperand(1))) &&
511 !(ICI
->getOperand(1) == TI
&& L
->isLoopInvariant(ICI
->getOperand(0))))
513 // If we cannot get rid of trunc, bail.
514 if (ICI
->isSigned() && !DoesSExtCollapse
)
516 if (ICI
->isUnsigned() && !DoesZExtCollapse
)
518 // For equality, either signed or unsigned works.
519 ICmpUsers
.push_back(ICI
);
522 auto CanUseZExt
= [&](ICmpInst
*ICI
) {
523 // Unsigned comparison can be widened as unsigned.
524 if (ICI
->isUnsigned())
526 // Is it profitable to do zext?
527 if (!DoesZExtCollapse
)
529 // For equality, we can safely zext both parts.
530 if (ICI
->isEquality())
532 // Otherwise we can only use zext when comparing two non-negative or two
533 // negative values. But in practice, we will never pass DoesZExtCollapse
534 // check for a negative value, because zext(trunc(x)) is non-negative. So
535 // it only make sense to check for non-negativity here.
536 const SCEV
*SCEVOP1
= SE
->getSCEV(ICI
->getOperand(0));
537 const SCEV
*SCEVOP2
= SE
->getSCEV(ICI
->getOperand(1));
538 return SE
->isKnownNonNegative(SCEVOP1
) && SE
->isKnownNonNegative(SCEVOP2
);
540 // Replace all comparisons against trunc with comparisons against IV.
541 for (auto *ICI
: ICmpUsers
) {
542 bool IsSwapped
= L
->isLoopInvariant(ICI
->getOperand(0));
543 auto *Op1
= IsSwapped
? ICI
->getOperand(0) : ICI
->getOperand(1);
544 IRBuilder
<> Builder(ICI
);
545 Value
*Ext
= nullptr;
546 // For signed/unsigned predicate, replace the old comparison with comparison
547 // of immediate IV against sext/zext of the invariant argument. If we can
548 // use either sext or zext (i.e. we are dealing with equality predicate),
549 // then prefer zext as a more canonical form.
550 // TODO: If we see a signed comparison which can be turned into unsigned,
551 // we can do it here for canonicalization purposes.
552 ICmpInst::Predicate Pred
= ICI
->getPredicate();
553 if (IsSwapped
) Pred
= ICmpInst::getSwappedPredicate(Pred
);
554 if (CanUseZExt(ICI
)) {
555 assert(DoesZExtCollapse
&& "Unprofitable zext?");
556 Ext
= Builder
.CreateZExt(Op1
, IVTy
, "zext");
557 Pred
= ICmpInst::getUnsignedPredicate(Pred
);
559 assert(DoesSExtCollapse
&& "Unprofitable sext?");
560 Ext
= Builder
.CreateSExt(Op1
, IVTy
, "sext");
561 assert(Pred
== ICmpInst::getSignedPredicate(Pred
) && "Must be signed!");
564 L
->makeLoopInvariant(Ext
, Changed
);
566 auto *NewCmp
= Builder
.CreateICmp(Pred
, IV
, Ext
);
567 ICI
->replaceAllUsesWith(NewCmp
);
568 DeadInsts
.emplace_back(ICI
);
571 // Trunc no longer needed.
572 TI
->replaceAllUsesWith(PoisonValue::get(TI
->getType()));
573 DeadInsts
.emplace_back(TI
);
577 /// Eliminate an operation that consumes a simple IV and has no observable
578 /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
579 /// but UseInst may not be.
580 bool SimplifyIndvar::eliminateIVUser(Instruction
*UseInst
,
581 Instruction
*IVOperand
) {
582 if (ICmpInst
*ICmp
= dyn_cast
<ICmpInst
>(UseInst
)) {
583 eliminateIVComparison(ICmp
, IVOperand
);
586 if (BinaryOperator
*Bin
= dyn_cast
<BinaryOperator
>(UseInst
)) {
587 bool IsSRem
= Bin
->getOpcode() == Instruction::SRem
;
588 if (IsSRem
|| Bin
->getOpcode() == Instruction::URem
) {
589 simplifyIVRemainder(Bin
, IVOperand
, IsSRem
);
593 if (Bin
->getOpcode() == Instruction::SDiv
)
594 return eliminateSDiv(Bin
);
597 if (auto *WO
= dyn_cast
<WithOverflowInst
>(UseInst
))
598 if (eliminateOverflowIntrinsic(WO
))
601 if (auto *SI
= dyn_cast
<SaturatingInst
>(UseInst
))
602 if (eliminateSaturatingIntrinsic(SI
))
605 if (auto *TI
= dyn_cast
<TruncInst
>(UseInst
))
606 if (eliminateTrunc(TI
))
609 if (eliminateIdentitySCEV(UseInst
, IVOperand
))
615 static Instruction
*GetLoopInvariantInsertPosition(Loop
*L
, Instruction
*Hint
) {
616 if (auto *BB
= L
->getLoopPreheader())
617 return BB
->getTerminator();
622 /// Replace the UseInst with a loop invariant expression if it is safe.
623 bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction
*I
) {
624 if (!SE
->isSCEVable(I
->getType()))
627 // Get the symbolic expression for this instruction.
628 const SCEV
*S
= SE
->getSCEV(I
);
630 if (!SE
->isLoopInvariant(S
, L
))
633 // Do not generate something ridiculous even if S is loop invariant.
634 if (Rewriter
.isHighCostExpansion(S
, L
, SCEVCheapExpansionBudget
, TTI
, I
))
637 auto *IP
= GetLoopInvariantInsertPosition(L
, I
);
639 if (!Rewriter
.isSafeToExpandAt(S
, IP
)) {
640 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
641 << " with non-speculable loop invariant: " << *S
<< '\n');
645 auto *Invariant
= Rewriter
.expandCodeFor(S
, I
->getType(), IP
);
647 I
->replaceAllUsesWith(Invariant
);
648 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
649 << " with loop invariant: " << *S
<< '\n');
652 DeadInsts
.emplace_back(I
);
656 /// Eliminate redundant type cast between integer and float.
657 bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction
*UseInst
) {
658 if (UseInst
->getOpcode() != CastInst::SIToFP
&&
659 UseInst
->getOpcode() != CastInst::UIToFP
)
662 Instruction
*IVOperand
= cast
<Instruction
>(UseInst
->getOperand(0));
663 // Get the symbolic expression for this instruction.
664 const SCEV
*IV
= SE
->getSCEV(IVOperand
);
666 if (UseInst
->getOpcode() == CastInst::SIToFP
)
667 MaskBits
= (int)SE
->getSignedRange(IV
).getMinSignedBits();
669 MaskBits
= (int)SE
->getUnsignedRange(IV
).getActiveBits();
670 int DestNumSigBits
= UseInst
->getType()->getFPMantissaWidth();
671 if (MaskBits
<= DestNumSigBits
) {
672 for (User
*U
: UseInst
->users()) {
673 // Match for fptosi/fptoui of sitofp and with same type.
674 auto *CI
= dyn_cast
<CastInst
>(U
);
678 CastInst::CastOps Opcode
= CI
->getOpcode();
679 if (Opcode
!= CastInst::FPToSI
&& Opcode
!= CastInst::FPToUI
)
682 Value
*Conv
= nullptr;
683 if (IVOperand
->getType() != CI
->getType()) {
684 IRBuilder
<> Builder(CI
);
685 StringRef Name
= IVOperand
->getName();
686 // To match InstCombine logic, we only need sext if both fptosi and
687 // sitofp are used. If one of them is unsigned, then we can use zext.
688 if (SE
->getTypeSizeInBits(IVOperand
->getType()) >
689 SE
->getTypeSizeInBits(CI
->getType())) {
690 Conv
= Builder
.CreateTrunc(IVOperand
, CI
->getType(), Name
+ ".trunc");
691 } else if (Opcode
== CastInst::FPToUI
||
692 UseInst
->getOpcode() == CastInst::UIToFP
) {
693 Conv
= Builder
.CreateZExt(IVOperand
, CI
->getType(), Name
+ ".zext");
695 Conv
= Builder
.CreateSExt(IVOperand
, CI
->getType(), Name
+ ".sext");
700 CI
->replaceAllUsesWith(Conv
);
701 DeadInsts
.push_back(CI
);
702 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
703 << " with: " << *Conv
<< '\n');
713 /// Eliminate any operation that SCEV can prove is an identity function.
714 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction
*UseInst
,
715 Instruction
*IVOperand
) {
716 if (!SE
->isSCEVable(UseInst
->getType()) ||
717 UseInst
->getType() != IVOperand
->getType())
720 const SCEV
*UseSCEV
= SE
->getSCEV(UseInst
);
721 if (UseSCEV
!= SE
->getSCEV(IVOperand
))
724 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
725 // dominator tree, even if X is an operand to Y. For instance, in
727 // %iv = phi i32 {0,+,1}
728 // br %cond, label %left, label %merge
731 // %X = add i32 %iv, 0
735 // %M = phi (%X, %iv)
737 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
738 // %M.replaceAllUsesWith(%X) would be incorrect.
740 if (isa
<PHINode
>(UseInst
))
741 // If UseInst is not a PHI node then we know that IVOperand dominates
742 // UseInst directly from the legality of SSA.
743 if (!DT
|| !DT
->dominates(IVOperand
, UseInst
))
746 if (!LI
->replacementPreservesLCSSAForm(UseInst
, IVOperand
))
749 // Make sure the operand is not more poisonous than the instruction.
750 if (!impliesPoison(IVOperand
, UseInst
)) {
751 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
752 if (!SE
->canReuseInstruction(UseSCEV
, IVOperand
, DropPoisonGeneratingInsts
))
755 for (Instruction
*I
: DropPoisonGeneratingInsts
)
756 I
->dropPoisonGeneratingFlagsAndMetadata();
759 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst
<< '\n');
761 SE
->forgetValue(UseInst
);
762 UseInst
->replaceAllUsesWith(IVOperand
);
765 DeadInsts
.emplace_back(UseInst
);
769 bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator
*BO
,
770 Instruction
*IVOperand
) {
771 return (isa
<OverflowingBinaryOperator
>(BO
) &&
772 strengthenOverflowingOperation(BO
, IVOperand
)) ||
773 (isa
<ShlOperator
>(BO
) && strengthenRightShift(BO
, IVOperand
));
776 /// Annotate BO with nsw / nuw if it provably does not signed-overflow /
777 /// unsigned-overflow. Returns true if anything changed, false otherwise.
778 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator
*BO
,
779 Instruction
*IVOperand
) {
780 auto Flags
= SE
->getStrengthenedNoWrapFlagsFromBinOp(
781 cast
<OverflowingBinaryOperator
>(BO
));
786 BO
->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNUW
) ==
788 BO
->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags
, SCEV::FlagNSW
) ==
791 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
792 // flags on addrecs while performing zero/sign extensions. We could call
793 // forgetValue() here to make sure those flags also propagate to any other
794 // SCEV expressions based on the addrec. However, this can have pathological
795 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
799 /// Annotate the Shr in (X << IVOperand) >> C as exact using the
800 /// information from the IV's range. Returns true if anything changed, false
802 bool SimplifyIndvar::strengthenRightShift(BinaryOperator
*BO
,
803 Instruction
*IVOperand
) {
804 if (BO
->getOpcode() == Instruction::Shl
) {
805 bool Changed
= false;
806 ConstantRange IVRange
= SE
->getUnsignedRange(SE
->getSCEV(IVOperand
));
807 for (auto *U
: BO
->users()) {
810 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
))) ||
812 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
)))) {
813 BinaryOperator
*Shr
= cast
<BinaryOperator
>(U
);
814 if (!Shr
->isExact() && IVRange
.getUnsignedMin().uge(*C
)) {
815 Shr
->setIsExact(true);
826 /// Add all uses of Def to the current IV's worklist.
827 static void pushIVUsers(
828 Instruction
*Def
, Loop
*L
,
829 SmallPtrSet
<Instruction
*,16> &Simplified
,
830 SmallVectorImpl
< std::pair
<Instruction
*,Instruction
*> > &SimpleIVUsers
) {
832 for (User
*U
: Def
->users()) {
833 Instruction
*UI
= cast
<Instruction
>(U
);
835 // Avoid infinite or exponential worklist processing.
836 // Also ensure unique worklist users.
837 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
842 // Only change the current Loop, do not change the other parts (e.g. other
844 if (!L
->contains(UI
))
847 // Do not push the same instruction more than once.
848 if (!Simplified
.insert(UI
).second
)
851 SimpleIVUsers
.push_back(std::make_pair(UI
, Def
));
855 /// Return true if this instruction generates a simple SCEV
856 /// expression in terms of that IV.
858 /// This is similar to IVUsers' isInteresting() but processes each instruction
859 /// non-recursively when the operand is already known to be a simpleIVUser.
861 static bool isSimpleIVUser(Instruction
*I
, const Loop
*L
, ScalarEvolution
*SE
) {
862 if (!SE
->isSCEVable(I
->getType()))
865 // Get the symbolic expression for this instruction.
866 const SCEV
*S
= SE
->getSCEV(I
);
868 // Only consider affine recurrences.
869 const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
);
870 if (AR
&& AR
->getLoop() == L
)
876 /// Iteratively perform simplification on a worklist of users
877 /// of the specified induction variable. Each successive simplification may push
878 /// more users which may themselves be candidates for simplification.
880 /// This algorithm does not require IVUsers analysis. Instead, it simplifies
881 /// instructions in-place during analysis. Rather than rewriting induction
882 /// variables bottom-up from their users, it transforms a chain of IVUsers
883 /// top-down, updating the IR only when it encounters a clear optimization
886 /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
888 void SimplifyIndvar::simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
) {
889 if (!SE
->isSCEVable(CurrIV
->getType()))
892 // Instructions processed by SimplifyIndvar for CurrIV.
893 SmallPtrSet
<Instruction
*,16> Simplified
;
895 // Use-def pairs if IV users waiting to be processed for CurrIV.
896 SmallVector
<std::pair
<Instruction
*, Instruction
*>, 8> SimpleIVUsers
;
898 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
899 // called multiple times for the same LoopPhi. This is the proper thing to
900 // do for loop header phis that use each other.
901 pushIVUsers(CurrIV
, L
, Simplified
, SimpleIVUsers
);
903 while (!SimpleIVUsers
.empty()) {
904 std::pair
<Instruction
*, Instruction
*> UseOper
=
905 SimpleIVUsers
.pop_back_val();
906 Instruction
*UseInst
= UseOper
.first
;
908 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
909 // rather than try to do some complex analysis or transformation (such as
910 // widening) basing on it.
911 // TODO: Propagate TLI and pass it here to handle more cases.
912 if (isInstructionTriviallyDead(UseInst
, /* TLI */ nullptr)) {
913 DeadInsts
.emplace_back(UseInst
);
917 // Bypass back edges to avoid extra work.
918 if (UseInst
== CurrIV
) continue;
920 // Try to replace UseInst with a loop invariant before any other
922 if (replaceIVUserWithLoopInvariant(UseInst
))
925 // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
927 if ((isa
<PtrToIntInst
>(UseInst
)) || (isa
<TruncInst
>(UseInst
)))
928 for (Use
&U
: UseInst
->uses()) {
929 Instruction
*User
= cast
<Instruction
>(U
.getUser());
930 if (replaceIVUserWithLoopInvariant(User
))
931 break; // done replacing
934 Instruction
*IVOperand
= UseOper
.second
;
935 for (unsigned N
= 0; IVOperand
; ++N
) {
936 assert(N
<= Simplified
.size() && "runaway iteration");
939 Value
*NewOper
= foldIVUser(UseInst
, IVOperand
);
941 break; // done folding
942 IVOperand
= dyn_cast
<Instruction
>(NewOper
);
947 if (eliminateIVUser(UseInst
, IVOperand
)) {
948 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
952 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(UseInst
)) {
953 if (strengthenBinaryOp(BO
, IVOperand
)) {
954 // re-queue uses of the now modified binary operator and fall
955 // through to the checks that remain.
956 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
960 // Try to use integer induction for FPToSI of float induction directly.
961 if (replaceFloatIVWithIntegerIV(UseInst
)) {
962 // Re-queue the potentially new direct uses of IVOperand.
963 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
967 CastInst
*Cast
= dyn_cast
<CastInst
>(UseInst
);
972 if (isSimpleIVUser(UseInst
, L
, SE
)) {
973 pushIVUsers(UseInst
, L
, Simplified
, SimpleIVUsers
);
980 void IVVisitor::anchor() { }
982 /// Simplify instructions that use this induction variable
983 /// by using ScalarEvolution to analyze the IV's recurrence.
984 bool simplifyUsersOfIV(PHINode
*CurrIV
, ScalarEvolution
*SE
, DominatorTree
*DT
,
985 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
986 SmallVectorImpl
<WeakTrackingVH
> &Dead
,
987 SCEVExpander
&Rewriter
, IVVisitor
*V
) {
988 SimplifyIndvar
SIV(LI
->getLoopFor(CurrIV
->getParent()), SE
, DT
, LI
, TTI
,
990 SIV
.simplifyUsers(CurrIV
, V
);
991 return SIV
.hasChanged();
994 /// Simplify users of induction variables within this
995 /// loop. This does not actually change or add IVs.
996 bool simplifyLoopIVs(Loop
*L
, ScalarEvolution
*SE
, DominatorTree
*DT
,
997 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
998 SmallVectorImpl
<WeakTrackingVH
> &Dead
) {
999 SCEVExpander
Rewriter(*SE
, SE
->getDataLayout(), "indvars");
1001 Rewriter
.setDebugType(DEBUG_TYPE
);
1003 bool Changed
= false;
1004 for (BasicBlock::iterator I
= L
->getHeader()->begin(); isa
<PHINode
>(I
); ++I
) {
1006 simplifyUsersOfIV(cast
<PHINode
>(I
), SE
, DT
, LI
, TTI
, Dead
, Rewriter
);
1014 //===----------------------------------------------------------------------===//
1015 // Widen Induction Variables - Extend the width of an IV to cover its
1017 //===----------------------------------------------------------------------===//
1027 ScalarEvolution
*SE
;
1030 // Does the module have any calls to the llvm.experimental.guard intrinsic
1031 // at all? If not we can avoid scanning instructions looking for guards.
1034 bool UsePostIncrementRanges
;
1037 unsigned NumElimExt
= 0;
1038 unsigned NumWidened
= 0;
1041 PHINode
*WidePhi
= nullptr;
1042 Instruction
*WideInc
= nullptr;
1043 const SCEV
*WideIncExpr
= nullptr;
1044 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
1046 SmallPtrSet
<Instruction
*,16> Widened
;
1048 enum class ExtendKind
{ Zero
, Sign
, Unknown
};
1050 // A map tracking the kind of extension used to widen each narrow IV
1051 // and narrow IV user.
1052 // Key: pointer to a narrow IV or IV user.
1053 // Value: the kind of extension used to widen this Instruction.
1054 DenseMap
<AssertingVH
<Instruction
>, ExtendKind
> ExtendKindMap
;
1056 using DefUserPair
= std::pair
<AssertingVH
<Value
>, AssertingVH
<Instruction
>>;
1058 // A map with control-dependent ranges for post increment IV uses. The key is
1059 // a pair of IV def and a use of this def denoting the context. The value is
1060 // a ConstantRange representing possible values of the def at the given
1062 DenseMap
<DefUserPair
, ConstantRange
> PostIncRangeInfos
;
1064 std::optional
<ConstantRange
> getPostIncRangeInfo(Value
*Def
,
1065 Instruction
*UseI
) {
1066 DefUserPair
Key(Def
, UseI
);
1067 auto It
= PostIncRangeInfos
.find(Key
);
1068 return It
== PostIncRangeInfos
.end()
1069 ? std::optional
<ConstantRange
>(std::nullopt
)
1070 : std::optional
<ConstantRange
>(It
->second
);
1073 void calculatePostIncRanges(PHINode
*OrigPhi
);
1074 void calculatePostIncRange(Instruction
*NarrowDef
, Instruction
*NarrowUser
);
1076 void updatePostIncRangeInfo(Value
*Def
, Instruction
*UseI
, ConstantRange R
) {
1077 DefUserPair
Key(Def
, UseI
);
1078 auto It
= PostIncRangeInfos
.find(Key
);
1079 if (It
== PostIncRangeInfos
.end())
1080 PostIncRangeInfos
.insert({Key
, R
});
1082 It
->second
= R
.intersectWith(It
->second
);
1086 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1087 /// computes the same value as the Narrow IV def. This avoids caching Use*
1089 struct NarrowIVDefUse
{
1090 Instruction
*NarrowDef
= nullptr;
1091 Instruction
*NarrowUse
= nullptr;
1092 Instruction
*WideDef
= nullptr;
1094 // True if the narrow def is never negative. Tracking this information lets
1095 // us use a sign extension instead of a zero extension or vice versa, when
1096 // profitable and legal.
1097 bool NeverNegative
= false;
1099 NarrowIVDefUse(Instruction
*ND
, Instruction
*NU
, Instruction
*WD
,
1101 : NarrowDef(ND
), NarrowUse(NU
), WideDef(WD
),
1102 NeverNegative(NeverNegative
) {}
1105 WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1106 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1107 bool HasGuards
, bool UsePostIncrementRanges
= true);
1109 PHINode
*createWideIV(SCEVExpander
&Rewriter
);
1111 unsigned getNumElimExt() { return NumElimExt
; };
1112 unsigned getNumWidened() { return NumWidened
; };
1115 Value
*createExtendInst(Value
*NarrowOper
, Type
*WideType
, bool IsSigned
,
1118 Instruction
*cloneIVUser(NarrowIVDefUse DU
, const SCEVAddRecExpr
*WideAR
);
1119 Instruction
*cloneArithmeticIVUser(NarrowIVDefUse DU
,
1120 const SCEVAddRecExpr
*WideAR
);
1121 Instruction
*cloneBitwiseIVUser(NarrowIVDefUse DU
);
1123 ExtendKind
getExtendKind(Instruction
*I
);
1125 using WidenedRecTy
= std::pair
<const SCEVAddRecExpr
*, ExtendKind
>;
1127 WidenedRecTy
getWideRecurrence(NarrowIVDefUse DU
);
1129 WidenedRecTy
getExtendedOperandRecurrence(NarrowIVDefUse DU
);
1131 const SCEV
*getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1132 unsigned OpCode
) const;
1134 Instruction
*widenIVUse(NarrowIVDefUse DU
, SCEVExpander
&Rewriter
);
1136 bool widenLoopCompare(NarrowIVDefUse DU
);
1137 bool widenWithVariantUse(NarrowIVDefUse DU
);
1139 void pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
);
1142 SmallVector
<NarrowIVDefUse
, 8> NarrowIVUsers
;
1146 /// Determine the insertion point for this user. By default, insert immediately
1147 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1148 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1149 /// common dominator for the incoming blocks. A nullptr can be returned if no
1150 /// viable location is found: it may happen if User is a PHI and Def only comes
1151 /// to this PHI from unreachable blocks.
1152 static Instruction
*getInsertPointForUses(Instruction
*User
, Value
*Def
,
1153 DominatorTree
*DT
, LoopInfo
*LI
) {
1154 PHINode
*PHI
= dyn_cast
<PHINode
>(User
);
1158 Instruction
*InsertPt
= nullptr;
1159 for (unsigned i
= 0, e
= PHI
->getNumIncomingValues(); i
!= e
; ++i
) {
1160 if (PHI
->getIncomingValue(i
) != Def
)
1163 BasicBlock
*InsertBB
= PHI
->getIncomingBlock(i
);
1165 if (!DT
->isReachableFromEntry(InsertBB
))
1169 InsertPt
= InsertBB
->getTerminator();
1172 InsertBB
= DT
->findNearestCommonDominator(InsertPt
->getParent(), InsertBB
);
1173 InsertPt
= InsertBB
->getTerminator();
1176 // If we have skipped all inputs, it means that Def only comes to Phi from
1177 // unreachable blocks.
1181 auto *DefI
= dyn_cast
<Instruction
>(Def
);
1185 assert(DT
->dominates(DefI
, InsertPt
) && "def does not dominate all uses");
1187 auto *L
= LI
->getLoopFor(DefI
->getParent());
1188 assert(!L
|| L
->contains(LI
->getLoopFor(InsertPt
->getParent())));
1190 for (auto *DTN
= (*DT
)[InsertPt
->getParent()]; DTN
; DTN
= DTN
->getIDom())
1191 if (LI
->getLoopFor(DTN
->getBlock()) == L
)
1192 return DTN
->getBlock()->getTerminator();
1194 llvm_unreachable("DefI dominates InsertPt!");
1197 WidenIV::WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1198 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1199 bool HasGuards
, bool UsePostIncrementRanges
)
1200 : OrigPhi(WI
.NarrowIV
), WideType(WI
.WidestNativeType
), LI(LInfo
),
1201 L(LI
->getLoopFor(OrigPhi
->getParent())), SE(SEv
), DT(DTree
),
1202 HasGuards(HasGuards
), UsePostIncrementRanges(UsePostIncrementRanges
),
1204 assert(L
->getHeader() == OrigPhi
->getParent() && "Phi must be an IV");
1205 ExtendKindMap
[OrigPhi
] = WI
.IsSigned
? ExtendKind::Sign
: ExtendKind::Zero
;
1208 Value
*WidenIV::createExtendInst(Value
*NarrowOper
, Type
*WideType
,
1209 bool IsSigned
, Instruction
*Use
) {
1210 // Set the debug location and conservative insertion point.
1211 IRBuilder
<> Builder(Use
);
1212 // Hoist the insertion point into loop preheaders as far as possible.
1213 for (const Loop
*L
= LI
->getLoopFor(Use
->getParent());
1214 L
&& L
->getLoopPreheader() && L
->isLoopInvariant(NarrowOper
);
1215 L
= L
->getParentLoop())
1216 Builder
.SetInsertPoint(L
->getLoopPreheader()->getTerminator());
1218 return IsSigned
? Builder
.CreateSExt(NarrowOper
, WideType
) :
1219 Builder
.CreateZExt(NarrowOper
, WideType
);
1222 /// Instantiate a wide operation to replace a narrow operation. This only needs
1223 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1224 /// 0 for any operation we decide not to clone.
1225 Instruction
*WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU
,
1226 const SCEVAddRecExpr
*WideAR
) {
1227 unsigned Opcode
= DU
.NarrowUse
->getOpcode();
1231 case Instruction::Add
:
1232 case Instruction::Mul
:
1233 case Instruction::UDiv
:
1234 case Instruction::Sub
:
1235 return cloneArithmeticIVUser(DU
, WideAR
);
1237 case Instruction::And
:
1238 case Instruction::Or
:
1239 case Instruction::Xor
:
1240 case Instruction::Shl
:
1241 case Instruction::LShr
:
1242 case Instruction::AShr
:
1243 return cloneBitwiseIVUser(DU
);
1247 Instruction
*WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU
) {
1248 Instruction
*NarrowUse
= DU
.NarrowUse
;
1249 Instruction
*NarrowDef
= DU
.NarrowDef
;
1250 Instruction
*WideDef
= DU
.WideDef
;
1252 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse
<< "\n");
1254 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1255 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1256 // invariant and will be folded or hoisted. If it actually comes from a
1257 // widened IV, it should be removed during a future call to widenIVUse.
1258 bool IsSigned
= getExtendKind(NarrowDef
) == ExtendKind::Sign
;
1259 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1261 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1262 IsSigned
, NarrowUse
);
1263 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1265 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1266 IsSigned
, NarrowUse
);
1268 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1269 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1270 NarrowBO
->getName());
1271 IRBuilder
<> Builder(NarrowUse
);
1272 Builder
.Insert(WideBO
);
1273 WideBO
->copyIRFlags(NarrowBO
);
1277 Instruction
*WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU
,
1278 const SCEVAddRecExpr
*WideAR
) {
1279 Instruction
*NarrowUse
= DU
.NarrowUse
;
1280 Instruction
*NarrowDef
= DU
.NarrowDef
;
1281 Instruction
*WideDef
= DU
.WideDef
;
1283 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1285 unsigned IVOpIdx
= (NarrowUse
->getOperand(0) == NarrowDef
) ? 0 : 1;
1287 // We're trying to find X such that
1289 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1291 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1292 // and check using SCEV if any of them are correct.
1294 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1295 // correct solution to X.
1296 auto GuessNonIVOperand
= [&](bool SignExt
) {
1297 const SCEV
*WideLHS
;
1298 const SCEV
*WideRHS
;
1300 auto GetExtend
= [this, SignExt
](const SCEV
*S
, Type
*Ty
) {
1302 return SE
->getSignExtendExpr(S
, Ty
);
1303 return SE
->getZeroExtendExpr(S
, Ty
);
1307 WideLHS
= SE
->getSCEV(WideDef
);
1308 const SCEV
*NarrowRHS
= SE
->getSCEV(NarrowUse
->getOperand(1));
1309 WideRHS
= GetExtend(NarrowRHS
, WideType
);
1311 const SCEV
*NarrowLHS
= SE
->getSCEV(NarrowUse
->getOperand(0));
1312 WideLHS
= GetExtend(NarrowLHS
, WideType
);
1313 WideRHS
= SE
->getSCEV(WideDef
);
1316 // WideUse is "WideDef `op.wide` X" as described in the comment.
1317 const SCEV
*WideUse
=
1318 getSCEVByOpCode(WideLHS
, WideRHS
, NarrowUse
->getOpcode());
1320 return WideUse
== WideAR
;
1323 bool SignExtend
= getExtendKind(NarrowDef
) == ExtendKind::Sign
;
1324 if (!GuessNonIVOperand(SignExtend
)) {
1325 SignExtend
= !SignExtend
;
1326 if (!GuessNonIVOperand(SignExtend
))
1330 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1332 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1333 SignExtend
, NarrowUse
);
1334 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1336 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1337 SignExtend
, NarrowUse
);
1339 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1340 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1341 NarrowBO
->getName());
1343 IRBuilder
<> Builder(NarrowUse
);
1344 Builder
.Insert(WideBO
);
1345 WideBO
->copyIRFlags(NarrowBO
);
1349 WidenIV::ExtendKind
WidenIV::getExtendKind(Instruction
*I
) {
1350 auto It
= ExtendKindMap
.find(I
);
1351 assert(It
!= ExtendKindMap
.end() && "Instruction not yet extended!");
1355 const SCEV
*WidenIV::getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1356 unsigned OpCode
) const {
1358 case Instruction::Add
:
1359 return SE
->getAddExpr(LHS
, RHS
);
1360 case Instruction::Sub
:
1361 return SE
->getMinusSCEV(LHS
, RHS
);
1362 case Instruction::Mul
:
1363 return SE
->getMulExpr(LHS
, RHS
);
1364 case Instruction::UDiv
:
1365 return SE
->getUDivExpr(LHS
, RHS
);
1367 llvm_unreachable("Unsupported opcode.");
1371 /// No-wrap operations can transfer sign extension of their result to their
1372 /// operands. Generate the SCEV value for the widened operation without
1373 /// actually modifying the IR yet. If the expression after extending the
1374 /// operands is an AddRec for this loop, return the AddRec and the kind of
1376 WidenIV::WidenedRecTy
1377 WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU
) {
1378 // Handle the common case of add<nsw/nuw>
1379 const unsigned OpCode
= DU
.NarrowUse
->getOpcode();
1380 // Only Add/Sub/Mul instructions supported yet.
1381 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1382 OpCode
!= Instruction::Mul
)
1383 return {nullptr, ExtendKind::Unknown
};
1385 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1386 // if extending the other will lead to a recurrence.
1387 const unsigned ExtendOperIdx
=
1388 DU
.NarrowUse
->getOperand(0) == DU
.NarrowDef
? 1 : 0;
1389 assert(DU
.NarrowUse
->getOperand(1-ExtendOperIdx
) == DU
.NarrowDef
&& "bad DU");
1391 const OverflowingBinaryOperator
*OBO
=
1392 cast
<OverflowingBinaryOperator
>(DU
.NarrowUse
);
1393 ExtendKind ExtKind
= getExtendKind(DU
.NarrowDef
);
1394 if (!(ExtKind
== ExtendKind::Sign
&& OBO
->hasNoSignedWrap()) &&
1395 !(ExtKind
== ExtendKind::Zero
&& OBO
->hasNoUnsignedWrap())) {
1396 ExtKind
= ExtendKind::Unknown
;
1398 // For a non-negative NarrowDef, we can choose either type of
1399 // extension. We want to use the current extend kind if legal
1400 // (see above), and we only hit this code if we need to check
1401 // the opposite case.
1402 if (DU
.NeverNegative
) {
1403 if (OBO
->hasNoSignedWrap()) {
1404 ExtKind
= ExtendKind::Sign
;
1405 } else if (OBO
->hasNoUnsignedWrap()) {
1406 ExtKind
= ExtendKind::Zero
;
1411 const SCEV
*ExtendOperExpr
=
1412 SE
->getSCEV(DU
.NarrowUse
->getOperand(ExtendOperIdx
));
1413 if (ExtKind
== ExtendKind::Sign
)
1414 ExtendOperExpr
= SE
->getSignExtendExpr(ExtendOperExpr
, WideType
);
1415 else if (ExtKind
== ExtendKind::Zero
)
1416 ExtendOperExpr
= SE
->getZeroExtendExpr(ExtendOperExpr
, WideType
);
1418 return {nullptr, ExtendKind::Unknown
};
1420 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1421 // flags. This instruction may be guarded by control flow that the no-wrap
1422 // behavior depends on. Non-control-equivalent instructions can be mapped to
1423 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1424 // semantics to those operations.
1425 const SCEV
*lhs
= SE
->getSCEV(DU
.WideDef
);
1426 const SCEV
*rhs
= ExtendOperExpr
;
1428 // Let's swap operands to the initial order for the case of non-commutative
1429 // operations, like SUB. See PR21014.
1430 if (ExtendOperIdx
== 0)
1431 std::swap(lhs
, rhs
);
1432 const SCEVAddRecExpr
*AddRec
=
1433 dyn_cast
<SCEVAddRecExpr
>(getSCEVByOpCode(lhs
, rhs
, OpCode
));
1435 if (!AddRec
|| AddRec
->getLoop() != L
)
1436 return {nullptr, ExtendKind::Unknown
};
1438 return {AddRec
, ExtKind
};
1441 /// Is this instruction potentially interesting for further simplification after
1442 /// widening it's type? In other words, can the extend be safely hoisted out of
1443 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1444 /// so, return the extended recurrence and the kind of extension used. Otherwise
1445 /// return {nullptr, ExtendKind::Unknown}.
1446 WidenIV::WidenedRecTy
WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU
) {
1447 if (!DU
.NarrowUse
->getType()->isIntegerTy())
1448 return {nullptr, ExtendKind::Unknown
};
1450 const SCEV
*NarrowExpr
= SE
->getSCEV(DU
.NarrowUse
);
1451 if (SE
->getTypeSizeInBits(NarrowExpr
->getType()) >=
1452 SE
->getTypeSizeInBits(WideType
)) {
1453 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1454 // index. So don't follow this use.
1455 return {nullptr, ExtendKind::Unknown
};
1458 const SCEV
*WideExpr
;
1460 if (DU
.NeverNegative
) {
1461 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1462 if (isa
<SCEVAddRecExpr
>(WideExpr
))
1463 ExtKind
= ExtendKind::Sign
;
1465 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1466 ExtKind
= ExtendKind::Zero
;
1468 } else if (getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
) {
1469 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1470 ExtKind
= ExtendKind::Sign
;
1472 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1473 ExtKind
= ExtendKind::Zero
;
1475 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideExpr
);
1476 if (!AddRec
|| AddRec
->getLoop() != L
)
1477 return {nullptr, ExtendKind::Unknown
};
1478 return {AddRec
, ExtKind
};
1481 /// This IV user cannot be widened. Replace this use of the original narrow IV
1482 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1483 static void truncateIVUse(WidenIV::NarrowIVDefUse DU
, DominatorTree
*DT
,
1485 auto *InsertPt
= getInsertPointForUses(DU
.NarrowUse
, DU
.NarrowDef
, DT
, LI
);
1488 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU
.WideDef
<< " for user "
1489 << *DU
.NarrowUse
<< "\n");
1490 IRBuilder
<> Builder(InsertPt
);
1491 Value
*Trunc
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowDef
->getType());
1492 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, Trunc
);
1495 /// If the narrow use is a compare instruction, then widen the compare
1496 // (and possibly the other operand). The extend operation is hoisted into the
1497 // loop preheader as far as possible.
1498 bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU
) {
1499 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(DU
.NarrowUse
);
1503 // We can legally widen the comparison in the following two cases:
1505 // - The signedness of the IV extension and comparison match
1507 // - The narrow IV is always positive (and thus its sign extension is equal
1508 // to its zero extension). For instance, let's say we're zero extending
1509 // %narrow for the following use
1511 // icmp slt i32 %narrow, %val ... (A)
1513 // and %narrow is always positive. Then
1515 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1516 // == icmp slt i32 zext(%narrow), sext(%val)
1517 bool IsSigned
= getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
;
1518 if (!(DU
.NeverNegative
|| IsSigned
== Cmp
->isSigned()))
1521 Value
*Op
= Cmp
->getOperand(Cmp
->getOperand(0) == DU
.NarrowDef
? 1 : 0);
1522 unsigned CastWidth
= SE
->getTypeSizeInBits(Op
->getType());
1523 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1524 assert(CastWidth
<= IVWidth
&& "Unexpected width while widening compare.");
1526 // Widen the compare instruction.
1527 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1529 // Widen the other operand of the compare, if necessary.
1530 if (CastWidth
< IVWidth
) {
1531 Value
*ExtOp
= createExtendInst(Op
, WideType
, Cmp
->isSigned(), Cmp
);
1532 DU
.NarrowUse
->replaceUsesOfWith(Op
, ExtOp
);
1537 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1538 // will not work when:
1539 // 1) SCEV traces back to an instruction inside the loop that SCEV can not
1540 // expand, eg. add %indvar, (load %addr)
1541 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1542 // While SCEV fails to avoid trunc, we can still try to use instruction
1543 // combining approach to prove trunc is not required. This can be further
1544 // extended with other instruction combining checks, but for now we handle the
1545 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1548 // %c = sub nsw %b, %indvar
1549 // %d = sext %c to i64
1551 // %indvar.ext1 = sext %indvar to i64
1552 // %m = sext %b to i64
1553 // %d = sub nsw i64 %m, %indvar.ext1
1554 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1555 // trunc is required regardless of how %b is generated. This pattern is common
1556 // when calculating address in 64 bit architecture
1557 bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU
) {
1558 Instruction
*NarrowUse
= DU
.NarrowUse
;
1559 Instruction
*NarrowDef
= DU
.NarrowDef
;
1560 Instruction
*WideDef
= DU
.WideDef
;
1562 // Handle the common case of add<nsw/nuw>
1563 const unsigned OpCode
= NarrowUse
->getOpcode();
1564 // Only Add/Sub/Mul instructions are supported.
1565 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1566 OpCode
!= Instruction::Mul
)
1569 // The operand that is not defined by NarrowDef of DU. Let's call it the
1571 assert((NarrowUse
->getOperand(0) == NarrowDef
||
1572 NarrowUse
->getOperand(1) == NarrowDef
) &&
1575 const OverflowingBinaryOperator
*OBO
=
1576 cast
<OverflowingBinaryOperator
>(NarrowUse
);
1577 ExtendKind ExtKind
= getExtendKind(NarrowDef
);
1578 bool CanSignExtend
= ExtKind
== ExtendKind::Sign
&& OBO
->hasNoSignedWrap();
1579 bool CanZeroExtend
= ExtKind
== ExtendKind::Zero
&& OBO
->hasNoUnsignedWrap();
1580 auto AnotherOpExtKind
= ExtKind
;
1582 // Check that all uses are either:
1583 // - narrow def (in case of we are widening the IV increment);
1584 // - single-input LCSSA Phis;
1585 // - comparison of the chosen type;
1586 // - extend of the chosen type (raison d'etre).
1587 SmallVector
<Instruction
*, 4> ExtUsers
;
1588 SmallVector
<PHINode
*, 4> LCSSAPhiUsers
;
1589 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
1590 for (Use
&U
: NarrowUse
->uses()) {
1591 Instruction
*User
= cast
<Instruction
>(U
.getUser());
1592 if (User
== NarrowDef
)
1594 if (!L
->contains(User
)) {
1595 auto *LCSSAPhi
= cast
<PHINode
>(User
);
1596 // Make sure there is only 1 input, so that we don't have to split
1598 if (LCSSAPhi
->getNumOperands() != 1)
1600 LCSSAPhiUsers
.push_back(LCSSAPhi
);
1603 if (auto *ICmp
= dyn_cast
<ICmpInst
>(User
)) {
1604 auto Pred
= ICmp
->getPredicate();
1605 // We have 3 types of predicates: signed, unsigned and equality
1606 // predicates. For equality, it's legal to widen icmp for either sign and
1607 // zero extend. For sign extend, we can also do so for signed predicates,
1608 // likeweise for zero extend we can widen icmp for unsigned predicates.
1609 if (ExtKind
== ExtendKind::Zero
&& ICmpInst::isSigned(Pred
))
1611 if (ExtKind
== ExtendKind::Sign
&& ICmpInst::isUnsigned(Pred
))
1613 ICmpUsers
.push_back(ICmp
);
1616 if (ExtKind
== ExtendKind::Sign
)
1617 User
= dyn_cast
<SExtInst
>(User
);
1619 User
= dyn_cast
<ZExtInst
>(User
);
1620 if (!User
|| User
->getType() != WideType
)
1622 ExtUsers
.push_back(User
);
1624 if (ExtUsers
.empty()) {
1625 DeadInsts
.emplace_back(NarrowUse
);
1629 // We'll prove some facts that should be true in the context of ext users. If
1630 // there is no users, we are done now. If there are some, pick their common
1631 // dominator as context.
1632 const Instruction
*CtxI
= findCommonDominator(ExtUsers
, *DT
);
1634 if (!CanSignExtend
&& !CanZeroExtend
) {
1635 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1636 // will most likely not see it. Let's try to prove it.
1637 if (OpCode
!= Instruction::Add
)
1639 if (ExtKind
!= ExtendKind::Zero
)
1641 const SCEV
*LHS
= SE
->getSCEV(OBO
->getOperand(0));
1642 const SCEV
*RHS
= SE
->getSCEV(OBO
->getOperand(1));
1643 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1644 if (NarrowUse
->getOperand(0) != NarrowDef
)
1646 if (!SE
->isKnownNegative(RHS
))
1648 bool ProvedSubNUW
= SE
->isKnownPredicateAt(ICmpInst::ICMP_UGE
, LHS
,
1649 SE
->getNegativeSCEV(RHS
), CtxI
);
1652 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1653 // neg(zext(neg(op))), which is basically sext(op).
1654 AnotherOpExtKind
= ExtendKind::Sign
;
1657 // Verifying that Defining operand is an AddRec
1658 const SCEV
*Op1
= SE
->getSCEV(WideDef
);
1659 const SCEVAddRecExpr
*AddRecOp1
= dyn_cast
<SCEVAddRecExpr
>(Op1
);
1660 if (!AddRecOp1
|| AddRecOp1
->getLoop() != L
)
1663 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1665 // Generating a widening use instruction.
1667 (NarrowUse
->getOperand(0) == NarrowDef
)
1669 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1670 AnotherOpExtKind
== ExtendKind::Sign
, NarrowUse
);
1672 (NarrowUse
->getOperand(1) == NarrowDef
)
1674 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1675 AnotherOpExtKind
== ExtendKind::Sign
, NarrowUse
);
1677 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1678 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1679 NarrowBO
->getName());
1680 IRBuilder
<> Builder(NarrowUse
);
1681 Builder
.Insert(WideBO
);
1682 WideBO
->copyIRFlags(NarrowBO
);
1683 ExtendKindMap
[NarrowUse
] = ExtKind
;
1685 for (Instruction
*User
: ExtUsers
) {
1686 assert(User
->getType() == WideType
&& "Checked before!");
1687 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User
<< " replaced by "
1688 << *WideBO
<< "\n");
1690 User
->replaceAllUsesWith(WideBO
);
1691 DeadInsts
.emplace_back(User
);
1694 for (PHINode
*User
: LCSSAPhiUsers
) {
1695 assert(User
->getNumOperands() == 1 && "Checked before!");
1696 Builder
.SetInsertPoint(User
);
1698 Builder
.CreatePHI(WideBO
->getType(), 1, User
->getName() + ".wide");
1699 BasicBlock
*LoopExitingBlock
= User
->getParent()->getSinglePredecessor();
1700 assert(LoopExitingBlock
&& L
->contains(LoopExitingBlock
) &&
1701 "Not a LCSSA Phi?");
1702 WidePN
->addIncoming(WideBO
, LoopExitingBlock
);
1703 Builder
.SetInsertPoint(User
->getParent(),
1704 User
->getParent()->getFirstInsertionPt());
1705 auto *TruncPN
= Builder
.CreateTrunc(WidePN
, User
->getType());
1706 User
->replaceAllUsesWith(TruncPN
);
1707 DeadInsts
.emplace_back(User
);
1710 for (ICmpInst
*User
: ICmpUsers
) {
1711 Builder
.SetInsertPoint(User
);
1712 auto ExtendedOp
= [&](Value
* V
)->Value
* {
1715 if (ExtKind
== ExtendKind::Zero
)
1716 return Builder
.CreateZExt(V
, WideBO
->getType());
1718 return Builder
.CreateSExt(V
, WideBO
->getType());
1720 auto Pred
= User
->getPredicate();
1721 auto *LHS
= ExtendedOp(User
->getOperand(0));
1722 auto *RHS
= ExtendedOp(User
->getOperand(1));
1724 Builder
.CreateICmp(Pred
, LHS
, RHS
, User
->getName() + ".wide");
1725 User
->replaceAllUsesWith(WideCmp
);
1726 DeadInsts
.emplace_back(User
);
1732 /// Determine whether an individual user of the narrow IV can be widened. If so,
1733 /// return the wide clone of the user.
1734 Instruction
*WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU
, SCEVExpander
&Rewriter
) {
1735 assert(ExtendKindMap
.count(DU
.NarrowDef
) &&
1736 "Should already know the kind of extension used to widen NarrowDef");
1738 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1739 if (PHINode
*UsePhi
= dyn_cast
<PHINode
>(DU
.NarrowUse
)) {
1740 if (LI
->getLoopFor(UsePhi
->getParent()) != L
) {
1741 // For LCSSA phis, sink the truncate outside the loop.
1742 // After SimplifyCFG most loop exit targets have a single predecessor.
1743 // Otherwise fall back to a truncate within the loop.
1744 if (UsePhi
->getNumOperands() != 1)
1745 truncateIVUse(DU
, DT
, LI
);
1747 // Widening the PHI requires us to insert a trunc. The logical place
1748 // for this trunc is in the same BB as the PHI. This is not possible if
1749 // the BB is terminated by a catchswitch.
1750 if (isa
<CatchSwitchInst
>(UsePhi
->getParent()->getTerminator()))
1754 PHINode::Create(DU
.WideDef
->getType(), 1, UsePhi
->getName() + ".wide",
1756 WidePhi
->addIncoming(DU
.WideDef
, UsePhi
->getIncomingBlock(0));
1757 BasicBlock
*WidePhiBB
= WidePhi
->getParent();
1758 IRBuilder
<> Builder(WidePhiBB
, WidePhiBB
->getFirstInsertionPt());
1759 Value
*Trunc
= Builder
.CreateTrunc(WidePhi
, DU
.NarrowDef
->getType());
1760 UsePhi
->replaceAllUsesWith(Trunc
);
1761 DeadInsts
.emplace_back(UsePhi
);
1762 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to "
1763 << *WidePhi
<< "\n");
1769 // This narrow use can be widened by a sext if it's non-negative or its narrow
1770 // def was widened by a sext. Same for zext.
1771 auto canWidenBySExt
= [&]() {
1772 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == ExtendKind::Sign
;
1774 auto canWidenByZExt
= [&]() {
1775 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == ExtendKind::Zero
;
1778 // Our raison d'etre! Eliminate sign and zero extension.
1779 if ((match(DU
.NarrowUse
, m_SExtLike(m_Value())) && canWidenBySExt()) ||
1780 (isa
<ZExtInst
>(DU
.NarrowUse
) && canWidenByZExt())) {
1781 Value
*NewDef
= DU
.WideDef
;
1782 if (DU
.NarrowUse
->getType() != WideType
) {
1783 unsigned CastWidth
= SE
->getTypeSizeInBits(DU
.NarrowUse
->getType());
1784 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1785 if (CastWidth
< IVWidth
) {
1786 // The cast isn't as wide as the IV, so insert a Trunc.
1787 IRBuilder
<> Builder(DU
.NarrowUse
);
1788 NewDef
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowUse
->getType());
1791 // A wider extend was hidden behind a narrower one. This may induce
1792 // another round of IV widening in which the intermediate IV becomes
1793 // dead. It should be very rare.
1794 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1795 << " not wide enough to subsume " << *DU
.NarrowUse
1797 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1798 NewDef
= DU
.NarrowUse
;
1801 if (NewDef
!= DU
.NarrowUse
) {
1802 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU
.NarrowUse
1803 << " replaced by " << *DU
.WideDef
<< "\n");
1805 DU
.NarrowUse
->replaceAllUsesWith(NewDef
);
1806 DeadInsts
.emplace_back(DU
.NarrowUse
);
1808 // Now that the extend is gone, we want to expose it's uses for potential
1809 // further simplification. We don't need to directly inform SimplifyIVUsers
1810 // of the new users, because their parent IV will be processed later as a
1811 // new loop phi. If we preserved IVUsers analysis, we would also want to
1812 // push the uses of WideDef here.
1814 // No further widening is needed. The deceased [sz]ext had done it for us.
1818 auto tryAddRecExpansion
= [&]() -> Instruction
* {
1819 // Does this user itself evaluate to a recurrence after widening?
1820 WidenedRecTy WideAddRec
= getExtendedOperandRecurrence(DU
);
1821 if (!WideAddRec
.first
)
1822 WideAddRec
= getWideRecurrence(DU
);
1823 assert((WideAddRec
.first
== nullptr) ==
1824 (WideAddRec
.second
== ExtendKind::Unknown
));
1825 if (!WideAddRec
.first
)
1828 // Reuse the IV increment that SCEVExpander created as long as it dominates
1830 Instruction
*WideUse
= nullptr;
1831 if (WideAddRec
.first
== WideIncExpr
&&
1832 Rewriter
.hoistIVInc(WideInc
, DU
.NarrowUse
))
1835 WideUse
= cloneIVUser(DU
, WideAddRec
.first
);
1839 // Evaluation of WideAddRec ensured that the narrow expression could be
1840 // extended outside the loop without overflow. This suggests that the wide use
1841 // evaluates to the same expression as the extended narrow use, but doesn't
1842 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1843 // where it fails, we simply throw away the newly created wide use.
1844 if (WideAddRec
.first
!= SE
->getSCEV(WideUse
)) {
1845 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": "
1846 << *SE
->getSCEV(WideUse
) << " != " << *WideAddRec
.first
1848 DeadInsts
.emplace_back(WideUse
);
1852 // if we reached this point then we are going to replace
1853 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1854 replaceAllDbgUsesWith(*DU
.NarrowUse
, *WideUse
, *WideUse
, *DT
);
1856 ExtendKindMap
[DU
.NarrowUse
] = WideAddRec
.second
;
1857 // Returning WideUse pushes it on the worklist.
1861 if (auto *I
= tryAddRecExpansion())
1864 // If use is a loop condition, try to promote the condition instead of
1865 // truncating the IV first.
1866 if (widenLoopCompare(DU
))
1869 // We are here about to generate a truncate instruction that may hurt
1870 // performance because the scalar evolution expression computed earlier
1871 // in WideAddRec.first does not indicate a polynomial induction expression.
1872 // In that case, look at the operands of the use instruction to determine
1873 // if we can still widen the use instead of truncating its operand.
1874 if (widenWithVariantUse(DU
))
1877 // This user does not evaluate to a recurrence after widening, so don't
1878 // follow it. Instead insert a Trunc to kill off the original use,
1879 // eventually isolating the original narrow IV so it can be removed.
1880 truncateIVUse(DU
, DT
, LI
);
1884 /// Add eligible users of NarrowDef to NarrowIVUsers.
1885 void WidenIV::pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
) {
1886 const SCEV
*NarrowSCEV
= SE
->getSCEV(NarrowDef
);
1887 bool NonNegativeDef
=
1888 SE
->isKnownPredicate(ICmpInst::ICMP_SGE
, NarrowSCEV
,
1889 SE
->getZero(NarrowSCEV
->getType()));
1890 for (User
*U
: NarrowDef
->users()) {
1891 Instruction
*NarrowUser
= cast
<Instruction
>(U
);
1893 // Handle data flow merges and bizarre phi cycles.
1894 if (!Widened
.insert(NarrowUser
).second
)
1897 bool NonNegativeUse
= false;
1898 if (!NonNegativeDef
) {
1899 // We might have a control-dependent range information for this context.
1900 if (auto RangeInfo
= getPostIncRangeInfo(NarrowDef
, NarrowUser
))
1901 NonNegativeUse
= RangeInfo
->getSignedMin().isNonNegative();
1904 NarrowIVUsers
.emplace_back(NarrowDef
, NarrowUser
, WideDef
,
1905 NonNegativeDef
|| NonNegativeUse
);
1909 /// Process a single induction variable. First use the SCEVExpander to create a
1910 /// wide induction variable that evaluates to the same recurrence as the
1911 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1912 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1913 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1915 /// It would be simpler to delete uses as they are processed, but we must avoid
1916 /// invalidating SCEV expressions.
1917 PHINode
*WidenIV::createWideIV(SCEVExpander
&Rewriter
) {
1918 // Is this phi an induction variable?
1919 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(OrigPhi
));
1923 // Widen the induction variable expression.
1924 const SCEV
*WideIVExpr
= getExtendKind(OrigPhi
) == ExtendKind::Sign
1925 ? SE
->getSignExtendExpr(AddRec
, WideType
)
1926 : SE
->getZeroExtendExpr(AddRec
, WideType
);
1928 assert(SE
->getEffectiveSCEVType(WideIVExpr
->getType()) == WideType
&&
1929 "Expect the new IV expression to preserve its type");
1931 // Can the IV be extended outside the loop without overflow?
1932 AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideIVExpr
);
1933 if (!AddRec
|| AddRec
->getLoop() != L
)
1936 // An AddRec must have loop-invariant operands. Since this AddRec is
1937 // materialized by a loop header phi, the expression cannot have any post-loop
1938 // operands, so they must dominate the loop header.
1940 SE
->properlyDominates(AddRec
->getStart(), L
->getHeader()) &&
1941 SE
->properlyDominates(AddRec
->getStepRecurrence(*SE
), L
->getHeader()) &&
1942 "Loop header phi recurrence inputs do not dominate the loop");
1944 // Iterate over IV uses (including transitive ones) looking for IV increments
1945 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1946 // the increment calculate control-dependent range information basing on
1947 // dominating conditions inside of the loop (e.g. a range check inside of the
1948 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1950 // Control-dependent range information is later used to prove that a narrow
1951 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1952 // this on demand because when pushNarrowIVUsers needs this information some
1953 // of the dominating conditions might be already widened.
1954 if (UsePostIncrementRanges
)
1955 calculatePostIncRanges(OrigPhi
);
1957 // The rewriter provides a value for the desired IV expression. This may
1958 // either find an existing phi or materialize a new one. Either way, we
1959 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1960 // of the phi-SCC dominates the loop entry.
1961 Instruction
*InsertPt
= &*L
->getHeader()->getFirstInsertionPt();
1962 Value
*ExpandInst
= Rewriter
.expandCodeFor(AddRec
, WideType
, InsertPt
);
1963 // If the wide phi is not a phi node, for example a cast node, like bitcast,
1964 // inttoptr, ptrtoint, just skip for now.
1965 if (!(WidePhi
= dyn_cast
<PHINode
>(ExpandInst
))) {
1966 // if the cast node is an inserted instruction without any user, we should
1967 // remove it to make sure the pass don't touch the function as we can not
1969 if (ExpandInst
->hasNUses(0) &&
1970 Rewriter
.isInsertedInstruction(cast
<Instruction
>(ExpandInst
)))
1971 DeadInsts
.emplace_back(ExpandInst
);
1975 // Remembering the WideIV increment generated by SCEVExpander allows
1976 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1977 // employ a general reuse mechanism because the call above is the only call to
1978 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1979 if (BasicBlock
*LatchBlock
= L
->getLoopLatch()) {
1981 dyn_cast
<Instruction
>(WidePhi
->getIncomingValueForBlock(LatchBlock
));
1983 WideIncExpr
= SE
->getSCEV(WideInc
);
1984 // Propagate the debug location associated with the original loop
1985 // increment to the new (widened) increment.
1987 cast
<Instruction
>(OrigPhi
->getIncomingValueForBlock(LatchBlock
));
1988 WideInc
->setDebugLoc(OrigInc
->getDebugLoc());
1992 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi
<< "\n");
1995 // Traverse the def-use chain using a worklist starting at the original IV.
1996 assert(Widened
.empty() && NarrowIVUsers
.empty() && "expect initial state" );
1998 Widened
.insert(OrigPhi
);
1999 pushNarrowIVUsers(OrigPhi
, WidePhi
);
2001 while (!NarrowIVUsers
.empty()) {
2002 WidenIV::NarrowIVDefUse DU
= NarrowIVUsers
.pop_back_val();
2004 // Process a def-use edge. This may replace the use, so don't hold a
2005 // use_iterator across it.
2006 Instruction
*WideUse
= widenIVUse(DU
, Rewriter
);
2008 // Follow all def-use edges from the previous narrow use.
2010 pushNarrowIVUsers(DU
.NarrowUse
, WideUse
);
2012 // widenIVUse may have removed the def-use edge.
2013 if (DU
.NarrowDef
->use_empty())
2014 DeadInsts
.emplace_back(DU
.NarrowDef
);
2017 // Attach any debug information to the new PHI.
2018 replaceAllDbgUsesWith(*OrigPhi
, *WidePhi
, *WidePhi
, *DT
);
2023 /// Calculates control-dependent range for the given def at the given context
2024 /// by looking at dominating conditions inside of the loop
2025 void WidenIV::calculatePostIncRange(Instruction
*NarrowDef
,
2026 Instruction
*NarrowUser
) {
2027 Value
*NarrowDefLHS
;
2028 const APInt
*NarrowDefRHS
;
2029 if (!match(NarrowDef
, m_NSWAdd(m_Value(NarrowDefLHS
),
2030 m_APInt(NarrowDefRHS
))) ||
2031 !NarrowDefRHS
->isNonNegative())
2034 auto UpdateRangeFromCondition
= [&] (Value
*Condition
,
2036 CmpInst::Predicate Pred
;
2038 if (!match(Condition
, m_ICmp(Pred
, m_Specific(NarrowDefLHS
),
2042 CmpInst::Predicate P
=
2043 TrueDest
? Pred
: CmpInst::getInversePredicate(Pred
);
2045 auto CmpRHSRange
= SE
->getSignedRange(SE
->getSCEV(CmpRHS
));
2046 auto CmpConstrainedLHSRange
=
2047 ConstantRange::makeAllowedICmpRegion(P
, CmpRHSRange
);
2048 auto NarrowDefRange
= CmpConstrainedLHSRange
.addWithNoWrap(
2049 *NarrowDefRHS
, OverflowingBinaryOperator::NoSignedWrap
);
2051 updatePostIncRangeInfo(NarrowDef
, NarrowUser
, NarrowDefRange
);
2054 auto UpdateRangeFromGuards
= [&](Instruction
*Ctx
) {
2058 for (Instruction
&I
: make_range(Ctx
->getIterator().getReverse(),
2059 Ctx
->getParent()->rend())) {
2061 if (match(&I
, m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(C
))))
2062 UpdateRangeFromCondition(C
, /*TrueDest=*/true);
2066 UpdateRangeFromGuards(NarrowUser
);
2068 BasicBlock
*NarrowUserBB
= NarrowUser
->getParent();
2069 // If NarrowUserBB is statically unreachable asking dominator queries may
2070 // yield surprising results. (e.g. the block may not have a dom tree node)
2071 if (!DT
->isReachableFromEntry(NarrowUserBB
))
2074 for (auto *DTB
= (*DT
)[NarrowUserBB
]->getIDom();
2075 L
->contains(DTB
->getBlock());
2076 DTB
= DTB
->getIDom()) {
2077 auto *BB
= DTB
->getBlock();
2078 auto *TI
= BB
->getTerminator();
2079 UpdateRangeFromGuards(TI
);
2081 auto *BI
= dyn_cast
<BranchInst
>(TI
);
2082 if (!BI
|| !BI
->isConditional())
2085 auto *TrueSuccessor
= BI
->getSuccessor(0);
2086 auto *FalseSuccessor
= BI
->getSuccessor(1);
2088 auto DominatesNarrowUser
= [this, NarrowUser
] (BasicBlockEdge BBE
) {
2089 return BBE
.isSingleEdge() &&
2090 DT
->dominates(BBE
, NarrowUser
->getParent());
2093 if (DominatesNarrowUser(BasicBlockEdge(BB
, TrueSuccessor
)))
2094 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/true);
2096 if (DominatesNarrowUser(BasicBlockEdge(BB
, FalseSuccessor
)))
2097 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/false);
2101 /// Calculates PostIncRangeInfos map for the given IV
2102 void WidenIV::calculatePostIncRanges(PHINode
*OrigPhi
) {
2103 SmallPtrSet
<Instruction
*, 16> Visited
;
2104 SmallVector
<Instruction
*, 6> Worklist
;
2105 Worklist
.push_back(OrigPhi
);
2106 Visited
.insert(OrigPhi
);
2108 while (!Worklist
.empty()) {
2109 Instruction
*NarrowDef
= Worklist
.pop_back_val();
2111 for (Use
&U
: NarrowDef
->uses()) {
2112 auto *NarrowUser
= cast
<Instruction
>(U
.getUser());
2114 // Don't go looking outside the current loop.
2115 auto *NarrowUserLoop
= (*LI
)[NarrowUser
->getParent()];
2116 if (!NarrowUserLoop
|| !L
->contains(NarrowUserLoop
))
2119 if (!Visited
.insert(NarrowUser
).second
)
2122 Worklist
.push_back(NarrowUser
);
2124 calculatePostIncRange(NarrowDef
, NarrowUser
);
2129 PHINode
*llvm::createWideIV(const WideIVInfo
&WI
,
2130 LoopInfo
*LI
, ScalarEvolution
*SE
, SCEVExpander
&Rewriter
,
2131 DominatorTree
*DT
, SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
2132 unsigned &NumElimExt
, unsigned &NumWidened
,
2133 bool HasGuards
, bool UsePostIncrementRanges
) {
2134 WidenIV
Widener(WI
, LI
, SE
, DT
, DeadInsts
, HasGuards
, UsePostIncrementRanges
);
2135 PHINode
*WidePHI
= Widener
.createWideIV(Rewriter
);
2136 NumElimExt
= Widener
.getNumElimExt();
2137 NumWidened
= Widener
.getNumWidened();