1 //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements induction variable simplification. It does
10 // not define any actual pass or policy, but provides a single function to
11 // simplify a loop's induction variables based on ScalarEvolution.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Transforms/Utils/Local.h"
29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33 #define DEBUG_TYPE "indvars"
35 STATISTIC(NumElimIdentity
, "Number of IV identities eliminated");
36 STATISTIC(NumElimOperand
, "Number of IV operands folded into a use");
37 STATISTIC(NumFoldedUser
, "Number of IV users folded into a constant");
38 STATISTIC(NumElimRem
, "Number of IV remainder operations eliminated");
41 "Number of IV signed division operations converted to unsigned division");
44 "Number of IV signed remainder operations converted to unsigned remainder");
45 STATISTIC(NumElimCmp
, "Number of IV comparisons eliminated");
48 /// This is a utility for simplifying induction variables
49 /// based on ScalarEvolution. It is the primary instrument of the
50 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
51 /// other loop passes that preserve SCEV.
52 class SimplifyIndvar
{
57 const TargetTransformInfo
*TTI
;
58 SCEVExpander
&Rewriter
;
59 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
64 SimplifyIndvar(Loop
*Loop
, ScalarEvolution
*SE
, DominatorTree
*DT
,
65 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
66 SCEVExpander
&Rewriter
,
67 SmallVectorImpl
<WeakTrackingVH
> &Dead
)
68 : L(Loop
), LI(LI
), SE(SE
), DT(DT
), TTI(TTI
), Rewriter(Rewriter
),
69 DeadInsts(Dead
), Changed(false) {
70 assert(LI
&& "IV simplification requires LoopInfo");
73 bool hasChanged() const { return Changed
; }
75 /// Iteratively perform simplification on a worklist of users of the
76 /// specified induction variable. This is the top-level driver that applies
77 /// all simplifications to users of an IV.
78 void simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
= nullptr);
80 Value
*foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
82 bool eliminateIdentitySCEV(Instruction
*UseInst
, Instruction
*IVOperand
);
83 bool replaceIVUserWithLoopInvariant(Instruction
*UseInst
);
85 bool eliminateOverflowIntrinsic(WithOverflowInst
*WO
);
86 bool eliminateSaturatingIntrinsic(SaturatingInst
*SI
);
87 bool eliminateTrunc(TruncInst
*TI
);
88 bool eliminateIVUser(Instruction
*UseInst
, Instruction
*IVOperand
);
89 bool makeIVComparisonInvariant(ICmpInst
*ICmp
, Value
*IVOperand
);
90 void eliminateIVComparison(ICmpInst
*ICmp
, Value
*IVOperand
);
91 void simplifyIVRemainder(BinaryOperator
*Rem
, Value
*IVOperand
,
93 void replaceRemWithNumerator(BinaryOperator
*Rem
);
94 void replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
);
95 void replaceSRemWithURem(BinaryOperator
*Rem
);
96 bool eliminateSDiv(BinaryOperator
*SDiv
);
97 bool strengthenOverflowingOperation(BinaryOperator
*OBO
, Value
*IVOperand
);
98 bool strengthenRightShift(BinaryOperator
*BO
, Value
*IVOperand
);
102 /// Find a point in code which dominates all given instructions. We can safely
103 /// assume that, whatever fact we can prove at the found point, this fact is
104 /// also true for each of the given instructions.
105 static Instruction
*findCommonDominator(ArrayRef
<Instruction
*> Instructions
,
107 Instruction
*CommonDom
= nullptr;
108 for (auto *Insn
: Instructions
)
109 if (!CommonDom
|| DT
.dominates(Insn
, CommonDom
))
111 else if (!DT
.dominates(CommonDom
, Insn
))
112 // If there is no dominance relation, use common dominator.
114 DT
.findNearestCommonDominator(CommonDom
->getParent(),
115 Insn
->getParent())->getTerminator();
116 assert(CommonDom
&& "Common dominator not found?");
120 /// Fold an IV operand into its use. This removes increments of an
121 /// aligned IV when used by a instruction that ignores the low bits.
123 /// IVOperand is guaranteed SCEVable, but UseInst may not be.
125 /// Return the operand of IVOperand for this induction variable if IVOperand can
126 /// be folded (in case more folding opportunities have been exposed).
127 /// Otherwise return null.
128 Value
*SimplifyIndvar::foldIVUser(Instruction
*UseInst
, Instruction
*IVOperand
) {
129 Value
*IVSrc
= nullptr;
130 const unsigned OperIdx
= 0;
131 const SCEV
*FoldedExpr
= nullptr;
132 bool MustDropExactFlag
= false;
133 switch (UseInst
->getOpcode()) {
136 case Instruction::UDiv
:
137 case Instruction::LShr
:
138 // We're only interested in the case where we know something about
139 // the numerator and have a constant denominator.
140 if (IVOperand
!= UseInst
->getOperand(OperIdx
) ||
141 !isa
<ConstantInt
>(UseInst
->getOperand(1)))
144 // Attempt to fold a binary operator with constant operand.
145 // e.g. ((I + 1) >> 2) => I >> 2
146 if (!isa
<BinaryOperator
>(IVOperand
)
147 || !isa
<ConstantInt
>(IVOperand
->getOperand(1)))
150 IVSrc
= IVOperand
->getOperand(0);
151 // IVSrc must be the (SCEVable) IV, since the other operand is const.
152 assert(SE
->isSCEVable(IVSrc
->getType()) && "Expect SCEVable IV operand");
154 ConstantInt
*D
= cast
<ConstantInt
>(UseInst
->getOperand(1));
155 if (UseInst
->getOpcode() == Instruction::LShr
) {
156 // Get a constant for the divisor. See createSCEV.
157 uint32_t BitWidth
= cast
<IntegerType
>(UseInst
->getType())->getBitWidth();
158 if (D
->getValue().uge(BitWidth
))
161 D
= ConstantInt::get(UseInst
->getContext(),
162 APInt::getOneBitSet(BitWidth
, D
->getZExtValue()));
164 FoldedExpr
= SE
->getUDivExpr(SE
->getSCEV(IVSrc
), SE
->getSCEV(D
));
165 // We might have 'exact' flag set at this point which will no longer be
166 // correct after we make the replacement.
167 if (UseInst
->isExact() &&
168 SE
->getSCEV(IVSrc
) != SE
->getMulExpr(FoldedExpr
, SE
->getSCEV(D
)))
169 MustDropExactFlag
= true;
171 // We have something that might fold it's operand. Compare SCEVs.
172 if (!SE
->isSCEVable(UseInst
->getType()))
175 // Bypass the operand if SCEV can prove it has no effect.
176 if (SE
->getSCEV(UseInst
) != FoldedExpr
)
179 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
180 << " -> " << *UseInst
<< '\n');
182 UseInst
->setOperand(OperIdx
, IVSrc
);
183 assert(SE
->getSCEV(UseInst
) == FoldedExpr
&& "bad SCEV with folded oper");
185 if (MustDropExactFlag
)
186 UseInst
->dropPoisonGeneratingFlags();
190 if (IVOperand
->use_empty())
191 DeadInsts
.emplace_back(IVOperand
);
195 bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst
*ICmp
,
197 unsigned IVOperIdx
= 0;
198 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
199 if (IVOperand
!= ICmp
->getOperand(0)) {
201 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
203 Pred
= ICmpInst::getSwappedPredicate(Pred
);
206 // Get the SCEVs for the ICmp operands (in the specific context of the
208 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
209 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
210 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
212 auto *PN
= dyn_cast
<PHINode
>(IVOperand
);
215 auto LIP
= SE
->getLoopInvariantPredicate(Pred
, S
, X
, L
);
218 ICmpInst::Predicate InvariantPredicate
= LIP
->Pred
;
219 const SCEV
*InvariantLHS
= LIP
->LHS
;
220 const SCEV
*InvariantRHS
= LIP
->RHS
;
222 // Rewrite the comparison to a loop invariant comparison if it can be done
223 // cheaply, where cheaply means "we don't need to emit any new
226 SmallDenseMap
<const SCEV
*, Value
*> CheapExpansions
;
227 CheapExpansions
[S
] = ICmp
->getOperand(IVOperIdx
);
228 CheapExpansions
[X
] = ICmp
->getOperand(1 - IVOperIdx
);
230 // TODO: Support multiple entry loops? (We currently bail out of these in
231 // the IndVarSimplify pass)
232 if (auto *BB
= L
->getLoopPredecessor()) {
233 const int Idx
= PN
->getBasicBlockIndex(BB
);
235 Value
*Incoming
= PN
->getIncomingValue(Idx
);
236 const SCEV
*IncomingS
= SE
->getSCEV(Incoming
);
237 CheapExpansions
[IncomingS
] = Incoming
;
240 Value
*NewLHS
= CheapExpansions
[InvariantLHS
];
241 Value
*NewRHS
= CheapExpansions
[InvariantRHS
];
244 if (auto *ConstLHS
= dyn_cast
<SCEVConstant
>(InvariantLHS
))
245 NewLHS
= ConstLHS
->getValue();
247 if (auto *ConstRHS
= dyn_cast
<SCEVConstant
>(InvariantRHS
))
248 NewRHS
= ConstRHS
->getValue();
250 if (!NewLHS
|| !NewRHS
)
251 // We could not find an existing value to replace either LHS or RHS.
252 // Generating new instructions has subtler tradeoffs, so avoid doing that
256 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp
<< '\n');
257 ICmp
->setPredicate(InvariantPredicate
);
258 ICmp
->setOperand(0, NewLHS
);
259 ICmp
->setOperand(1, NewRHS
);
263 /// SimplifyIVUsers helper for eliminating useless
264 /// comparisons against an induction variable.
265 void SimplifyIndvar::eliminateIVComparison(ICmpInst
*ICmp
, Value
*IVOperand
) {
266 unsigned IVOperIdx
= 0;
267 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
268 ICmpInst::Predicate OriginalPred
= Pred
;
269 if (IVOperand
!= ICmp
->getOperand(0)) {
271 assert(IVOperand
== ICmp
->getOperand(1) && "Can't find IVOperand");
273 Pred
= ICmpInst::getSwappedPredicate(Pred
);
276 // Get the SCEVs for the ICmp operands (in the specific context of the
278 const Loop
*ICmpLoop
= LI
->getLoopFor(ICmp
->getParent());
279 const SCEV
*S
= SE
->getSCEVAtScope(ICmp
->getOperand(IVOperIdx
), ICmpLoop
);
280 const SCEV
*X
= SE
->getSCEVAtScope(ICmp
->getOperand(1 - IVOperIdx
), ICmpLoop
);
282 // If the condition is always true or always false in the given context,
283 // replace it with a constant value.
284 SmallVector
<Instruction
*, 4> Users
;
285 for (auto *U
: ICmp
->users())
286 Users
.push_back(cast
<Instruction
>(U
));
287 const Instruction
*CtxI
= findCommonDominator(Users
, *DT
);
288 if (auto Ev
= SE
->evaluatePredicateAt(Pred
, S
, X
, CtxI
)) {
289 ICmp
->replaceAllUsesWith(ConstantInt::getBool(ICmp
->getContext(), *Ev
));
290 DeadInsts
.emplace_back(ICmp
);
291 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp
<< '\n');
292 } else if (makeIVComparisonInvariant(ICmp
, IVOperand
)) {
293 // fallthrough to end of function
294 } else if (ICmpInst::isSigned(OriginalPred
) &&
295 SE
->isKnownNonNegative(S
) && SE
->isKnownNonNegative(X
)) {
296 // If we were unable to make anything above, all we can is to canonicalize
297 // the comparison hoping that it will open the doors for other
298 // optimizations. If we find out that we compare two non-negative values,
299 // we turn the instruction's predicate to its unsigned version. Note that
300 // we cannot rely on Pred here unless we check if we have swapped it.
301 assert(ICmp
->getPredicate() == OriginalPred
&& "Predicate changed?");
302 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
304 ICmp
->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred
));
312 bool SimplifyIndvar::eliminateSDiv(BinaryOperator
*SDiv
) {
313 // Get the SCEVs for the ICmp operands.
314 auto *N
= SE
->getSCEV(SDiv
->getOperand(0));
315 auto *D
= SE
->getSCEV(SDiv
->getOperand(1));
317 // Simplify unnecessary loops away.
318 const Loop
*L
= LI
->getLoopFor(SDiv
->getParent());
319 N
= SE
->getSCEVAtScope(N
, L
);
320 D
= SE
->getSCEVAtScope(D
, L
);
322 // Replace sdiv by udiv if both of the operands are non-negative
323 if (SE
->isKnownNonNegative(N
) && SE
->isKnownNonNegative(D
)) {
324 auto *UDiv
= BinaryOperator::Create(
325 BinaryOperator::UDiv
, SDiv
->getOperand(0), SDiv
->getOperand(1),
326 SDiv
->getName() + ".udiv", SDiv
);
327 UDiv
->setIsExact(SDiv
->isExact());
328 SDiv
->replaceAllUsesWith(UDiv
);
329 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv
<< '\n');
332 DeadInsts
.push_back(SDiv
);
339 // i %s n -> i %u n if i >= 0 and n >= 0
340 void SimplifyIndvar::replaceSRemWithURem(BinaryOperator
*Rem
) {
341 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
342 auto *URem
= BinaryOperator::Create(BinaryOperator::URem
, N
, D
,
343 Rem
->getName() + ".urem", Rem
);
344 Rem
->replaceAllUsesWith(URem
);
345 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem
<< '\n');
348 DeadInsts
.emplace_back(Rem
);
351 // i % n --> i if i is in [0,n).
352 void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator
*Rem
) {
353 Rem
->replaceAllUsesWith(Rem
->getOperand(0));
354 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
357 DeadInsts
.emplace_back(Rem
);
360 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
361 void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator
*Rem
) {
362 auto *T
= Rem
->getType();
363 auto *N
= Rem
->getOperand(0), *D
= Rem
->getOperand(1);
364 ICmpInst
*ICmp
= new ICmpInst(Rem
, ICmpInst::ICMP_EQ
, N
, D
);
366 SelectInst::Create(ICmp
, ConstantInt::get(T
, 0), N
, "iv.rem", Rem
);
367 Rem
->replaceAllUsesWith(Sel
);
368 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem
<< '\n');
371 DeadInsts
.emplace_back(Rem
);
374 /// SimplifyIVUsers helper for eliminating useless remainder operations
375 /// operating on an induction variable or replacing srem by urem.
376 void SimplifyIndvar::simplifyIVRemainder(BinaryOperator
*Rem
, Value
*IVOperand
,
378 auto *NValue
= Rem
->getOperand(0);
379 auto *DValue
= Rem
->getOperand(1);
380 // We're only interested in the case where we know something about
381 // the numerator, unless it is a srem, because we want to replace srem by urem
383 bool UsedAsNumerator
= IVOperand
== NValue
;
384 if (!UsedAsNumerator
&& !IsSigned
)
387 const SCEV
*N
= SE
->getSCEV(NValue
);
389 // Simplify unnecessary loops away.
390 const Loop
*ICmpLoop
= LI
->getLoopFor(Rem
->getParent());
391 N
= SE
->getSCEVAtScope(N
, ICmpLoop
);
393 bool IsNumeratorNonNegative
= !IsSigned
|| SE
->isKnownNonNegative(N
);
395 // Do not proceed if the Numerator may be negative
396 if (!IsNumeratorNonNegative
)
399 const SCEV
*D
= SE
->getSCEV(DValue
);
400 D
= SE
->getSCEVAtScope(D
, ICmpLoop
);
402 if (UsedAsNumerator
) {
403 auto LT
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
404 if (SE
->isKnownPredicate(LT
, N
, D
)) {
405 replaceRemWithNumerator(Rem
);
409 auto *T
= Rem
->getType();
410 const auto *NLessOne
= SE
->getMinusSCEV(N
, SE
->getOne(T
));
411 if (SE
->isKnownPredicate(LT
, NLessOne
, D
)) {
412 replaceRemWithNumeratorOrZero(Rem
);
417 // Try to replace SRem with URem, if both N and D are known non-negative.
418 // Since we had already check N, we only need to check D now
419 if (!IsSigned
|| !SE
->isKnownNonNegative(D
))
422 replaceSRemWithURem(Rem
);
425 bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst
*WO
) {
426 const SCEV
*LHS
= SE
->getSCEV(WO
->getLHS());
427 const SCEV
*RHS
= SE
->getSCEV(WO
->getRHS());
428 if (!SE
->willNotOverflow(WO
->getBinaryOp(), WO
->isSigned(), LHS
, RHS
))
431 // Proved no overflow, nuke the overflow check and, if possible, the overflow
432 // intrinsic as well.
434 BinaryOperator
*NewResult
= BinaryOperator::Create(
435 WO
->getBinaryOp(), WO
->getLHS(), WO
->getRHS(), "", WO
);
438 NewResult
->setHasNoSignedWrap(true);
440 NewResult
->setHasNoUnsignedWrap(true);
442 SmallVector
<ExtractValueInst
*, 4> ToDelete
;
444 for (auto *U
: WO
->users()) {
445 if (auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
446 if (EVI
->getIndices()[0] == 1)
447 EVI
->replaceAllUsesWith(ConstantInt::getFalse(WO
->getContext()));
449 assert(EVI
->getIndices()[0] == 0 && "Only two possibilities!");
450 EVI
->replaceAllUsesWith(NewResult
);
452 ToDelete
.push_back(EVI
);
456 for (auto *EVI
: ToDelete
)
457 EVI
->eraseFromParent();
460 WO
->eraseFromParent();
466 bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst
*SI
) {
467 const SCEV
*LHS
= SE
->getSCEV(SI
->getLHS());
468 const SCEV
*RHS
= SE
->getSCEV(SI
->getRHS());
469 if (!SE
->willNotOverflow(SI
->getBinaryOp(), SI
->isSigned(), LHS
, RHS
))
472 BinaryOperator
*BO
= BinaryOperator::Create(
473 SI
->getBinaryOp(), SI
->getLHS(), SI
->getRHS(), SI
->getName(), SI
);
475 BO
->setHasNoSignedWrap();
477 BO
->setHasNoUnsignedWrap();
479 SI
->replaceAllUsesWith(BO
);
480 DeadInsts
.emplace_back(SI
);
485 bool SimplifyIndvar::eliminateTrunc(TruncInst
*TI
) {
486 // It is always legal to replace
487 // icmp <pred> i32 trunc(iv), n
489 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
491 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
492 // Or with either of these if pred is an equality predicate.
494 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
495 // every comparison which uses trunc, it means that we can replace each of
496 // them with comparison of iv against sext/zext(n). We no longer need trunc
499 // TODO: Should we do this if we can widen *some* comparisons, but not all
500 // of them? Sometimes it is enough to enable other optimizations, but the
501 // trunc instruction will stay in the loop.
502 Value
*IV
= TI
->getOperand(0);
503 Type
*IVTy
= IV
->getType();
504 const SCEV
*IVSCEV
= SE
->getSCEV(IV
);
505 const SCEV
*TISCEV
= SE
->getSCEV(TI
);
507 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
509 bool DoesSExtCollapse
= false;
510 bool DoesZExtCollapse
= false;
511 if (IVSCEV
== SE
->getSignExtendExpr(TISCEV
, IVTy
))
512 DoesSExtCollapse
= true;
513 if (IVSCEV
== SE
->getZeroExtendExpr(TISCEV
, IVTy
))
514 DoesZExtCollapse
= true;
516 // If neither sext nor zext does collapse, it is not profitable to do any
518 if (!DoesSExtCollapse
&& !DoesZExtCollapse
)
521 // Collect users of the trunc that look like comparisons against invariants.
522 // Bail if we find something different.
523 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
524 for (auto *U
: TI
->users()) {
525 // We don't care about users in unreachable blocks.
526 if (isa
<Instruction
>(U
) &&
527 !DT
->isReachableFromEntry(cast
<Instruction
>(U
)->getParent()))
529 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(U
);
530 if (!ICI
) return false;
531 assert(L
->contains(ICI
->getParent()) && "LCSSA form broken?");
532 if (!(ICI
->getOperand(0) == TI
&& L
->isLoopInvariant(ICI
->getOperand(1))) &&
533 !(ICI
->getOperand(1) == TI
&& L
->isLoopInvariant(ICI
->getOperand(0))))
535 // If we cannot get rid of trunc, bail.
536 if (ICI
->isSigned() && !DoesSExtCollapse
)
538 if (ICI
->isUnsigned() && !DoesZExtCollapse
)
540 // For equality, either signed or unsigned works.
541 ICmpUsers
.push_back(ICI
);
544 auto CanUseZExt
= [&](ICmpInst
*ICI
) {
545 // Unsigned comparison can be widened as unsigned.
546 if (ICI
->isUnsigned())
548 // Is it profitable to do zext?
549 if (!DoesZExtCollapse
)
551 // For equality, we can safely zext both parts.
552 if (ICI
->isEquality())
554 // Otherwise we can only use zext when comparing two non-negative or two
555 // negative values. But in practice, we will never pass DoesZExtCollapse
556 // check for a negative value, because zext(trunc(x)) is non-negative. So
557 // it only make sense to check for non-negativity here.
558 const SCEV
*SCEVOP1
= SE
->getSCEV(ICI
->getOperand(0));
559 const SCEV
*SCEVOP2
= SE
->getSCEV(ICI
->getOperand(1));
560 return SE
->isKnownNonNegative(SCEVOP1
) && SE
->isKnownNonNegative(SCEVOP2
);
562 // Replace all comparisons against trunc with comparisons against IV.
563 for (auto *ICI
: ICmpUsers
) {
564 bool IsSwapped
= L
->isLoopInvariant(ICI
->getOperand(0));
565 auto *Op1
= IsSwapped
? ICI
->getOperand(0) : ICI
->getOperand(1);
566 Instruction
*Ext
= nullptr;
567 // For signed/unsigned predicate, replace the old comparison with comparison
568 // of immediate IV against sext/zext of the invariant argument. If we can
569 // use either sext or zext (i.e. we are dealing with equality predicate),
570 // then prefer zext as a more canonical form.
571 // TODO: If we see a signed comparison which can be turned into unsigned,
572 // we can do it here for canonicalization purposes.
573 ICmpInst::Predicate Pred
= ICI
->getPredicate();
574 if (IsSwapped
) Pred
= ICmpInst::getSwappedPredicate(Pred
);
575 if (CanUseZExt(ICI
)) {
576 assert(DoesZExtCollapse
&& "Unprofitable zext?");
577 Ext
= new ZExtInst(Op1
, IVTy
, "zext", ICI
);
578 Pred
= ICmpInst::getUnsignedPredicate(Pred
);
580 assert(DoesSExtCollapse
&& "Unprofitable sext?");
581 Ext
= new SExtInst(Op1
, IVTy
, "sext", ICI
);
582 assert(Pred
== ICmpInst::getSignedPredicate(Pred
) && "Must be signed!");
585 L
->makeLoopInvariant(Ext
, Changed
);
587 ICmpInst
*NewICI
= new ICmpInst(ICI
, Pred
, IV
, Ext
);
588 ICI
->replaceAllUsesWith(NewICI
);
589 DeadInsts
.emplace_back(ICI
);
592 // Trunc no longer needed.
593 TI
->replaceAllUsesWith(UndefValue::get(TI
->getType()));
594 DeadInsts
.emplace_back(TI
);
598 /// Eliminate an operation that consumes a simple IV and has no observable
599 /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
600 /// but UseInst may not be.
601 bool SimplifyIndvar::eliminateIVUser(Instruction
*UseInst
,
602 Instruction
*IVOperand
) {
603 if (ICmpInst
*ICmp
= dyn_cast
<ICmpInst
>(UseInst
)) {
604 eliminateIVComparison(ICmp
, IVOperand
);
607 if (BinaryOperator
*Bin
= dyn_cast
<BinaryOperator
>(UseInst
)) {
608 bool IsSRem
= Bin
->getOpcode() == Instruction::SRem
;
609 if (IsSRem
|| Bin
->getOpcode() == Instruction::URem
) {
610 simplifyIVRemainder(Bin
, IVOperand
, IsSRem
);
614 if (Bin
->getOpcode() == Instruction::SDiv
)
615 return eliminateSDiv(Bin
);
618 if (auto *WO
= dyn_cast
<WithOverflowInst
>(UseInst
))
619 if (eliminateOverflowIntrinsic(WO
))
622 if (auto *SI
= dyn_cast
<SaturatingInst
>(UseInst
))
623 if (eliminateSaturatingIntrinsic(SI
))
626 if (auto *TI
= dyn_cast
<TruncInst
>(UseInst
))
627 if (eliminateTrunc(TI
))
630 if (eliminateIdentitySCEV(UseInst
, IVOperand
))
636 static Instruction
*GetLoopInvariantInsertPosition(Loop
*L
, Instruction
*Hint
) {
637 if (auto *BB
= L
->getLoopPreheader())
638 return BB
->getTerminator();
643 /// Replace the UseInst with a loop invariant expression if it is safe.
644 bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction
*I
) {
645 if (!SE
->isSCEVable(I
->getType()))
648 // Get the symbolic expression for this instruction.
649 const SCEV
*S
= SE
->getSCEV(I
);
651 if (!SE
->isLoopInvariant(S
, L
))
654 // Do not generate something ridiculous even if S is loop invariant.
655 if (Rewriter
.isHighCostExpansion(S
, L
, SCEVCheapExpansionBudget
, TTI
, I
))
658 auto *IP
= GetLoopInvariantInsertPosition(L
, I
);
660 if (!isSafeToExpandAt(S
, IP
, *SE
)) {
661 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
662 << " with non-speculable loop invariant: " << *S
<< '\n');
666 auto *Invariant
= Rewriter
.expandCodeFor(S
, I
->getType(), IP
);
668 I
->replaceAllUsesWith(Invariant
);
669 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
670 << " with loop invariant: " << *S
<< '\n');
673 DeadInsts
.emplace_back(I
);
677 /// Eliminate any operation that SCEV can prove is an identity function.
678 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction
*UseInst
,
679 Instruction
*IVOperand
) {
680 if (!SE
->isSCEVable(UseInst
->getType()) ||
681 (UseInst
->getType() != IVOperand
->getType()) ||
682 (SE
->getSCEV(UseInst
) != SE
->getSCEV(IVOperand
)))
685 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
686 // dominator tree, even if X is an operand to Y. For instance, in
688 // %iv = phi i32 {0,+,1}
689 // br %cond, label %left, label %merge
692 // %X = add i32 %iv, 0
696 // %M = phi (%X, %iv)
698 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
699 // %M.replaceAllUsesWith(%X) would be incorrect.
701 if (isa
<PHINode
>(UseInst
))
702 // If UseInst is not a PHI node then we know that IVOperand dominates
703 // UseInst directly from the legality of SSA.
704 if (!DT
|| !DT
->dominates(IVOperand
, UseInst
))
707 if (!LI
->replacementPreservesLCSSAForm(UseInst
, IVOperand
))
710 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst
<< '\n');
712 UseInst
->replaceAllUsesWith(IVOperand
);
715 DeadInsts
.emplace_back(UseInst
);
719 /// Annotate BO with nsw / nuw if it provably does not signed-overflow /
720 /// unsigned-overflow. Returns true if anything changed, false otherwise.
721 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator
*BO
,
723 SCEV::NoWrapFlags Flags
;
725 std::tie(Flags
, Deduced
) = SE
->getStrengthenedNoWrapFlagsFromBinOp(
726 cast
<OverflowingBinaryOperator
>(BO
));
731 BO
->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags
, SCEV::FlagNUW
) ==
733 BO
->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags
, SCEV::FlagNSW
) ==
736 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
737 // flags on addrecs while performing zero/sign extensions. We could call
738 // forgetValue() here to make sure those flags also propagate to any other
739 // SCEV expressions based on the addrec. However, this can have pathological
740 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
744 /// Annotate the Shr in (X << IVOperand) >> C as exact using the
745 /// information from the IV's range. Returns true if anything changed, false
747 bool SimplifyIndvar::strengthenRightShift(BinaryOperator
*BO
,
749 using namespace llvm::PatternMatch
;
751 if (BO
->getOpcode() == Instruction::Shl
) {
752 bool Changed
= false;
753 ConstantRange IVRange
= SE
->getUnsignedRange(SE
->getSCEV(IVOperand
));
754 for (auto *U
: BO
->users()) {
757 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
))) ||
759 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand
)), m_APInt(C
)))) {
760 BinaryOperator
*Shr
= cast
<BinaryOperator
>(U
);
761 if (!Shr
->isExact() && IVRange
.getUnsignedMin().uge(*C
)) {
762 Shr
->setIsExact(true);
773 /// Add all uses of Def to the current IV's worklist.
774 static void pushIVUsers(
775 Instruction
*Def
, Loop
*L
,
776 SmallPtrSet
<Instruction
*,16> &Simplified
,
777 SmallVectorImpl
< std::pair
<Instruction
*,Instruction
*> > &SimpleIVUsers
) {
779 for (User
*U
: Def
->users()) {
780 Instruction
*UI
= cast
<Instruction
>(U
);
782 // Avoid infinite or exponential worklist processing.
783 // Also ensure unique worklist users.
784 // If Def is a LoopPhi, it may not be in the Simplified set, so check for
789 // Only change the current Loop, do not change the other parts (e.g. other
791 if (!L
->contains(UI
))
794 // Do not push the same instruction more than once.
795 if (!Simplified
.insert(UI
).second
)
798 SimpleIVUsers
.push_back(std::make_pair(UI
, Def
));
802 /// Return true if this instruction generates a simple SCEV
803 /// expression in terms of that IV.
805 /// This is similar to IVUsers' isInteresting() but processes each instruction
806 /// non-recursively when the operand is already known to be a simpleIVUser.
808 static bool isSimpleIVUser(Instruction
*I
, const Loop
*L
, ScalarEvolution
*SE
) {
809 if (!SE
->isSCEVable(I
->getType()))
812 // Get the symbolic expression for this instruction.
813 const SCEV
*S
= SE
->getSCEV(I
);
815 // Only consider affine recurrences.
816 const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(S
);
817 if (AR
&& AR
->getLoop() == L
)
823 /// Iteratively perform simplification on a worklist of users
824 /// of the specified induction variable. Each successive simplification may push
825 /// more users which may themselves be candidates for simplification.
827 /// This algorithm does not require IVUsers analysis. Instead, it simplifies
828 /// instructions in-place during analysis. Rather than rewriting induction
829 /// variables bottom-up from their users, it transforms a chain of IVUsers
830 /// top-down, updating the IR only when it encounters a clear optimization
833 /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
835 void SimplifyIndvar::simplifyUsers(PHINode
*CurrIV
, IVVisitor
*V
) {
836 if (!SE
->isSCEVable(CurrIV
->getType()))
839 // Instructions processed by SimplifyIndvar for CurrIV.
840 SmallPtrSet
<Instruction
*,16> Simplified
;
842 // Use-def pairs if IV users waiting to be processed for CurrIV.
843 SmallVector
<std::pair
<Instruction
*, Instruction
*>, 8> SimpleIVUsers
;
845 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
846 // called multiple times for the same LoopPhi. This is the proper thing to
847 // do for loop header phis that use each other.
848 pushIVUsers(CurrIV
, L
, Simplified
, SimpleIVUsers
);
850 while (!SimpleIVUsers
.empty()) {
851 std::pair
<Instruction
*, Instruction
*> UseOper
=
852 SimpleIVUsers
.pop_back_val();
853 Instruction
*UseInst
= UseOper
.first
;
855 // If a user of the IndVar is trivially dead, we prefer just to mark it dead
856 // rather than try to do some complex analysis or transformation (such as
857 // widening) basing on it.
858 // TODO: Propagate TLI and pass it here to handle more cases.
859 if (isInstructionTriviallyDead(UseInst
, /* TLI */ nullptr)) {
860 DeadInsts
.emplace_back(UseInst
);
864 // Bypass back edges to avoid extra work.
865 if (UseInst
== CurrIV
) continue;
867 // Try to replace UseInst with a loop invariant before any other
869 if (replaceIVUserWithLoopInvariant(UseInst
))
872 Instruction
*IVOperand
= UseOper
.second
;
873 for (unsigned N
= 0; IVOperand
; ++N
) {
874 assert(N
<= Simplified
.size() && "runaway iteration");
876 Value
*NewOper
= foldIVUser(UseInst
, IVOperand
);
878 break; // done folding
879 IVOperand
= dyn_cast
<Instruction
>(NewOper
);
884 if (eliminateIVUser(UseInst
, IVOperand
)) {
885 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
889 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(UseInst
)) {
890 if ((isa
<OverflowingBinaryOperator
>(BO
) &&
891 strengthenOverflowingOperation(BO
, IVOperand
)) ||
892 (isa
<ShlOperator
>(BO
) && strengthenRightShift(BO
, IVOperand
))) {
893 // re-queue uses of the now modified binary operator and fall
894 // through to the checks that remain.
895 pushIVUsers(IVOperand
, L
, Simplified
, SimpleIVUsers
);
899 CastInst
*Cast
= dyn_cast
<CastInst
>(UseInst
);
904 if (isSimpleIVUser(UseInst
, L
, SE
)) {
905 pushIVUsers(UseInst
, L
, Simplified
, SimpleIVUsers
);
912 void IVVisitor::anchor() { }
914 /// Simplify instructions that use this induction variable
915 /// by using ScalarEvolution to analyze the IV's recurrence.
916 bool simplifyUsersOfIV(PHINode
*CurrIV
, ScalarEvolution
*SE
, DominatorTree
*DT
,
917 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
918 SmallVectorImpl
<WeakTrackingVH
> &Dead
,
919 SCEVExpander
&Rewriter
, IVVisitor
*V
) {
920 SimplifyIndvar
SIV(LI
->getLoopFor(CurrIV
->getParent()), SE
, DT
, LI
, TTI
,
922 SIV
.simplifyUsers(CurrIV
, V
);
923 return SIV
.hasChanged();
926 /// Simplify users of induction variables within this
927 /// loop. This does not actually change or add IVs.
928 bool simplifyLoopIVs(Loop
*L
, ScalarEvolution
*SE
, DominatorTree
*DT
,
929 LoopInfo
*LI
, const TargetTransformInfo
*TTI
,
930 SmallVectorImpl
<WeakTrackingVH
> &Dead
) {
931 SCEVExpander
Rewriter(*SE
, SE
->getDataLayout(), "indvars");
933 Rewriter
.setDebugType(DEBUG_TYPE
);
935 bool Changed
= false;
936 for (BasicBlock::iterator I
= L
->getHeader()->begin(); isa
<PHINode
>(I
); ++I
) {
938 simplifyUsersOfIV(cast
<PHINode
>(I
), SE
, DT
, LI
, TTI
, Dead
, Rewriter
);
945 //===----------------------------------------------------------------------===//
946 // Widen Induction Variables - Extend the width of an IV to cover its
948 //===----------------------------------------------------------------------===//
961 // Does the module have any calls to the llvm.experimental.guard intrinsic
962 // at all? If not we can avoid scanning instructions looking for guards.
965 bool UsePostIncrementRanges
;
968 unsigned NumElimExt
= 0;
969 unsigned NumWidened
= 0;
972 PHINode
*WidePhi
= nullptr;
973 Instruction
*WideInc
= nullptr;
974 const SCEV
*WideIncExpr
= nullptr;
975 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
;
977 SmallPtrSet
<Instruction
*,16> Widened
;
979 enum ExtendKind
{ ZeroExtended
, SignExtended
, Unknown
};
981 // A map tracking the kind of extension used to widen each narrow IV
982 // and narrow IV user.
983 // Key: pointer to a narrow IV or IV user.
984 // Value: the kind of extension used to widen this Instruction.
985 DenseMap
<AssertingVH
<Instruction
>, ExtendKind
> ExtendKindMap
;
987 using DefUserPair
= std::pair
<AssertingVH
<Value
>, AssertingVH
<Instruction
>>;
989 // A map with control-dependent ranges for post increment IV uses. The key is
990 // a pair of IV def and a use of this def denoting the context. The value is
991 // a ConstantRange representing possible values of the def at the given
993 DenseMap
<DefUserPair
, ConstantRange
> PostIncRangeInfos
;
995 Optional
<ConstantRange
> getPostIncRangeInfo(Value
*Def
,
997 DefUserPair
Key(Def
, UseI
);
998 auto It
= PostIncRangeInfos
.find(Key
);
999 return It
== PostIncRangeInfos
.end()
1000 ? Optional
<ConstantRange
>(None
)
1001 : Optional
<ConstantRange
>(It
->second
);
1004 void calculatePostIncRanges(PHINode
*OrigPhi
);
1005 void calculatePostIncRange(Instruction
*NarrowDef
, Instruction
*NarrowUser
);
1007 void updatePostIncRangeInfo(Value
*Def
, Instruction
*UseI
, ConstantRange R
) {
1008 DefUserPair
Key(Def
, UseI
);
1009 auto It
= PostIncRangeInfos
.find(Key
);
1010 if (It
== PostIncRangeInfos
.end())
1011 PostIncRangeInfos
.insert({Key
, R
});
1013 It
->second
= R
.intersectWith(It
->second
);
1017 /// Record a link in the Narrow IV def-use chain along with the WideIV that
1018 /// computes the same value as the Narrow IV def. This avoids caching Use*
1020 struct NarrowIVDefUse
{
1021 Instruction
*NarrowDef
= nullptr;
1022 Instruction
*NarrowUse
= nullptr;
1023 Instruction
*WideDef
= nullptr;
1025 // True if the narrow def is never negative. Tracking this information lets
1026 // us use a sign extension instead of a zero extension or vice versa, when
1027 // profitable and legal.
1028 bool NeverNegative
= false;
1030 NarrowIVDefUse(Instruction
*ND
, Instruction
*NU
, Instruction
*WD
,
1032 : NarrowDef(ND
), NarrowUse(NU
), WideDef(WD
),
1033 NeverNegative(NeverNegative
) {}
1036 WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1037 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1038 bool HasGuards
, bool UsePostIncrementRanges
= true);
1040 PHINode
*createWideIV(SCEVExpander
&Rewriter
);
1042 unsigned getNumElimExt() { return NumElimExt
; };
1043 unsigned getNumWidened() { return NumWidened
; };
1046 Value
*createExtendInst(Value
*NarrowOper
, Type
*WideType
, bool IsSigned
,
1049 Instruction
*cloneIVUser(NarrowIVDefUse DU
, const SCEVAddRecExpr
*WideAR
);
1050 Instruction
*cloneArithmeticIVUser(NarrowIVDefUse DU
,
1051 const SCEVAddRecExpr
*WideAR
);
1052 Instruction
*cloneBitwiseIVUser(NarrowIVDefUse DU
);
1054 ExtendKind
getExtendKind(Instruction
*I
);
1056 using WidenedRecTy
= std::pair
<const SCEVAddRecExpr
*, ExtendKind
>;
1058 WidenedRecTy
getWideRecurrence(NarrowIVDefUse DU
);
1060 WidenedRecTy
getExtendedOperandRecurrence(NarrowIVDefUse DU
);
1062 const SCEV
*getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1063 unsigned OpCode
) const;
1065 Instruction
*widenIVUse(NarrowIVDefUse DU
, SCEVExpander
&Rewriter
);
1067 bool widenLoopCompare(NarrowIVDefUse DU
);
1068 bool widenWithVariantUse(NarrowIVDefUse DU
);
1070 void pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
);
1073 SmallVector
<NarrowIVDefUse
, 8> NarrowIVUsers
;
1077 /// Determine the insertion point for this user. By default, insert immediately
1078 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1079 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1080 /// common dominator for the incoming blocks. A nullptr can be returned if no
1081 /// viable location is found: it may happen if User is a PHI and Def only comes
1082 /// to this PHI from unreachable blocks.
1083 static Instruction
*getInsertPointForUses(Instruction
*User
, Value
*Def
,
1084 DominatorTree
*DT
, LoopInfo
*LI
) {
1085 PHINode
*PHI
= dyn_cast
<PHINode
>(User
);
1089 Instruction
*InsertPt
= nullptr;
1090 for (unsigned i
= 0, e
= PHI
->getNumIncomingValues(); i
!= e
; ++i
) {
1091 if (PHI
->getIncomingValue(i
) != Def
)
1094 BasicBlock
*InsertBB
= PHI
->getIncomingBlock(i
);
1096 if (!DT
->isReachableFromEntry(InsertBB
))
1100 InsertPt
= InsertBB
->getTerminator();
1103 InsertBB
= DT
->findNearestCommonDominator(InsertPt
->getParent(), InsertBB
);
1104 InsertPt
= InsertBB
->getTerminator();
1107 // If we have skipped all inputs, it means that Def only comes to Phi from
1108 // unreachable blocks.
1112 auto *DefI
= dyn_cast
<Instruction
>(Def
);
1116 assert(DT
->dominates(DefI
, InsertPt
) && "def does not dominate all uses");
1118 auto *L
= LI
->getLoopFor(DefI
->getParent());
1119 assert(!L
|| L
->contains(LI
->getLoopFor(InsertPt
->getParent())));
1121 for (auto *DTN
= (*DT
)[InsertPt
->getParent()]; DTN
; DTN
= DTN
->getIDom())
1122 if (LI
->getLoopFor(DTN
->getBlock()) == L
)
1123 return DTN
->getBlock()->getTerminator();
1125 llvm_unreachable("DefI dominates InsertPt!");
1128 WidenIV::WidenIV(const WideIVInfo
&WI
, LoopInfo
*LInfo
, ScalarEvolution
*SEv
,
1129 DominatorTree
*DTree
, SmallVectorImpl
<WeakTrackingVH
> &DI
,
1130 bool HasGuards
, bool UsePostIncrementRanges
)
1131 : OrigPhi(WI
.NarrowIV
), WideType(WI
.WidestNativeType
), LI(LInfo
),
1132 L(LI
->getLoopFor(OrigPhi
->getParent())), SE(SEv
), DT(DTree
),
1133 HasGuards(HasGuards
), UsePostIncrementRanges(UsePostIncrementRanges
),
1135 assert(L
->getHeader() == OrigPhi
->getParent() && "Phi must be an IV");
1136 ExtendKindMap
[OrigPhi
] = WI
.IsSigned
? SignExtended
: ZeroExtended
;
1139 Value
*WidenIV::createExtendInst(Value
*NarrowOper
, Type
*WideType
,
1140 bool IsSigned
, Instruction
*Use
) {
1141 // Set the debug location and conservative insertion point.
1142 IRBuilder
<> Builder(Use
);
1143 // Hoist the insertion point into loop preheaders as far as possible.
1144 for (const Loop
*L
= LI
->getLoopFor(Use
->getParent());
1145 L
&& L
->getLoopPreheader() && L
->isLoopInvariant(NarrowOper
);
1146 L
= L
->getParentLoop())
1147 Builder
.SetInsertPoint(L
->getLoopPreheader()->getTerminator());
1149 return IsSigned
? Builder
.CreateSExt(NarrowOper
, WideType
) :
1150 Builder
.CreateZExt(NarrowOper
, WideType
);
1153 /// Instantiate a wide operation to replace a narrow operation. This only needs
1154 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1155 /// 0 for any operation we decide not to clone.
1156 Instruction
*WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU
,
1157 const SCEVAddRecExpr
*WideAR
) {
1158 unsigned Opcode
= DU
.NarrowUse
->getOpcode();
1162 case Instruction::Add
:
1163 case Instruction::Mul
:
1164 case Instruction::UDiv
:
1165 case Instruction::Sub
:
1166 return cloneArithmeticIVUser(DU
, WideAR
);
1168 case Instruction::And
:
1169 case Instruction::Or
:
1170 case Instruction::Xor
:
1171 case Instruction::Shl
:
1172 case Instruction::LShr
:
1173 case Instruction::AShr
:
1174 return cloneBitwiseIVUser(DU
);
1178 Instruction
*WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU
) {
1179 Instruction
*NarrowUse
= DU
.NarrowUse
;
1180 Instruction
*NarrowDef
= DU
.NarrowDef
;
1181 Instruction
*WideDef
= DU
.WideDef
;
1183 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse
<< "\n");
1185 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1186 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1187 // invariant and will be folded or hoisted. If it actually comes from a
1188 // widened IV, it should be removed during a future call to widenIVUse.
1189 bool IsSigned
= getExtendKind(NarrowDef
) == SignExtended
;
1190 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1192 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1193 IsSigned
, NarrowUse
);
1194 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1196 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1197 IsSigned
, NarrowUse
);
1199 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1200 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1201 NarrowBO
->getName());
1202 IRBuilder
<> Builder(NarrowUse
);
1203 Builder
.Insert(WideBO
);
1204 WideBO
->copyIRFlags(NarrowBO
);
1208 Instruction
*WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU
,
1209 const SCEVAddRecExpr
*WideAR
) {
1210 Instruction
*NarrowUse
= DU
.NarrowUse
;
1211 Instruction
*NarrowDef
= DU
.NarrowDef
;
1212 Instruction
*WideDef
= DU
.WideDef
;
1214 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1216 unsigned IVOpIdx
= (NarrowUse
->getOperand(0) == NarrowDef
) ? 0 : 1;
1218 // We're trying to find X such that
1220 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1222 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1223 // and check using SCEV if any of them are correct.
1225 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1226 // correct solution to X.
1227 auto GuessNonIVOperand
= [&](bool SignExt
) {
1228 const SCEV
*WideLHS
;
1229 const SCEV
*WideRHS
;
1231 auto GetExtend
= [this, SignExt
](const SCEV
*S
, Type
*Ty
) {
1233 return SE
->getSignExtendExpr(S
, Ty
);
1234 return SE
->getZeroExtendExpr(S
, Ty
);
1238 WideLHS
= SE
->getSCEV(WideDef
);
1239 const SCEV
*NarrowRHS
= SE
->getSCEV(NarrowUse
->getOperand(1));
1240 WideRHS
= GetExtend(NarrowRHS
, WideType
);
1242 const SCEV
*NarrowLHS
= SE
->getSCEV(NarrowUse
->getOperand(0));
1243 WideLHS
= GetExtend(NarrowLHS
, WideType
);
1244 WideRHS
= SE
->getSCEV(WideDef
);
1247 // WideUse is "WideDef `op.wide` X" as described in the comment.
1248 const SCEV
*WideUse
=
1249 getSCEVByOpCode(WideLHS
, WideRHS
, NarrowUse
->getOpcode());
1251 return WideUse
== WideAR
;
1254 bool SignExtend
= getExtendKind(NarrowDef
) == SignExtended
;
1255 if (!GuessNonIVOperand(SignExtend
)) {
1256 SignExtend
= !SignExtend
;
1257 if (!GuessNonIVOperand(SignExtend
))
1261 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1263 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1264 SignExtend
, NarrowUse
);
1265 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1267 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1268 SignExtend
, NarrowUse
);
1270 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1271 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1272 NarrowBO
->getName());
1274 IRBuilder
<> Builder(NarrowUse
);
1275 Builder
.Insert(WideBO
);
1276 WideBO
->copyIRFlags(NarrowBO
);
1280 WidenIV::ExtendKind
WidenIV::getExtendKind(Instruction
*I
) {
1281 auto It
= ExtendKindMap
.find(I
);
1282 assert(It
!= ExtendKindMap
.end() && "Instruction not yet extended!");
1286 const SCEV
*WidenIV::getSCEVByOpCode(const SCEV
*LHS
, const SCEV
*RHS
,
1287 unsigned OpCode
) const {
1289 case Instruction::Add
:
1290 return SE
->getAddExpr(LHS
, RHS
);
1291 case Instruction::Sub
:
1292 return SE
->getMinusSCEV(LHS
, RHS
);
1293 case Instruction::Mul
:
1294 return SE
->getMulExpr(LHS
, RHS
);
1295 case Instruction::UDiv
:
1296 return SE
->getUDivExpr(LHS
, RHS
);
1298 llvm_unreachable("Unsupported opcode.");
1302 /// No-wrap operations can transfer sign extension of their result to their
1303 /// operands. Generate the SCEV value for the widened operation without
1304 /// actually modifying the IR yet. If the expression after extending the
1305 /// operands is an AddRec for this loop, return the AddRec and the kind of
1307 WidenIV::WidenedRecTy
1308 WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU
) {
1309 // Handle the common case of add<nsw/nuw>
1310 const unsigned OpCode
= DU
.NarrowUse
->getOpcode();
1311 // Only Add/Sub/Mul instructions supported yet.
1312 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1313 OpCode
!= Instruction::Mul
)
1314 return {nullptr, Unknown
};
1316 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1317 // if extending the other will lead to a recurrence.
1318 const unsigned ExtendOperIdx
=
1319 DU
.NarrowUse
->getOperand(0) == DU
.NarrowDef
? 1 : 0;
1320 assert(DU
.NarrowUse
->getOperand(1-ExtendOperIdx
) == DU
.NarrowDef
&& "bad DU");
1322 const SCEV
*ExtendOperExpr
= nullptr;
1323 const OverflowingBinaryOperator
*OBO
=
1324 cast
<OverflowingBinaryOperator
>(DU
.NarrowUse
);
1325 ExtendKind ExtKind
= getExtendKind(DU
.NarrowDef
);
1326 if (ExtKind
== SignExtended
&& OBO
->hasNoSignedWrap())
1327 ExtendOperExpr
= SE
->getSignExtendExpr(
1328 SE
->getSCEV(DU
.NarrowUse
->getOperand(ExtendOperIdx
)), WideType
);
1329 else if(ExtKind
== ZeroExtended
&& OBO
->hasNoUnsignedWrap())
1330 ExtendOperExpr
= SE
->getZeroExtendExpr(
1331 SE
->getSCEV(DU
.NarrowUse
->getOperand(ExtendOperIdx
)), WideType
);
1333 return {nullptr, Unknown
};
1335 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1336 // flags. This instruction may be guarded by control flow that the no-wrap
1337 // behavior depends on. Non-control-equivalent instructions can be mapped to
1338 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1339 // semantics to those operations.
1340 const SCEV
*lhs
= SE
->getSCEV(DU
.WideDef
);
1341 const SCEV
*rhs
= ExtendOperExpr
;
1343 // Let's swap operands to the initial order for the case of non-commutative
1344 // operations, like SUB. See PR21014.
1345 if (ExtendOperIdx
== 0)
1346 std::swap(lhs
, rhs
);
1347 const SCEVAddRecExpr
*AddRec
=
1348 dyn_cast
<SCEVAddRecExpr
>(getSCEVByOpCode(lhs
, rhs
, OpCode
));
1350 if (!AddRec
|| AddRec
->getLoop() != L
)
1351 return {nullptr, Unknown
};
1353 return {AddRec
, ExtKind
};
1356 /// Is this instruction potentially interesting for further simplification after
1357 /// widening it's type? In other words, can the extend be safely hoisted out of
1358 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1359 /// so, return the extended recurrence and the kind of extension used. Otherwise
1360 /// return {nullptr, Unknown}.
1361 WidenIV::WidenedRecTy
WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU
) {
1362 if (!DU
.NarrowUse
->getType()->isIntegerTy())
1363 return {nullptr, Unknown
};
1365 const SCEV
*NarrowExpr
= SE
->getSCEV(DU
.NarrowUse
);
1366 if (SE
->getTypeSizeInBits(NarrowExpr
->getType()) >=
1367 SE
->getTypeSizeInBits(WideType
)) {
1368 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1369 // index. So don't follow this use.
1370 return {nullptr, Unknown
};
1373 const SCEV
*WideExpr
;
1375 if (DU
.NeverNegative
) {
1376 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1377 if (isa
<SCEVAddRecExpr
>(WideExpr
))
1378 ExtKind
= SignExtended
;
1380 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1381 ExtKind
= ZeroExtended
;
1383 } else if (getExtendKind(DU
.NarrowDef
) == SignExtended
) {
1384 WideExpr
= SE
->getSignExtendExpr(NarrowExpr
, WideType
);
1385 ExtKind
= SignExtended
;
1387 WideExpr
= SE
->getZeroExtendExpr(NarrowExpr
, WideType
);
1388 ExtKind
= ZeroExtended
;
1390 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideExpr
);
1391 if (!AddRec
|| AddRec
->getLoop() != L
)
1392 return {nullptr, Unknown
};
1393 return {AddRec
, ExtKind
};
1396 /// This IV user cannot be widened. Replace this use of the original narrow IV
1397 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1398 static void truncateIVUse(WidenIV::NarrowIVDefUse DU
, DominatorTree
*DT
,
1400 auto *InsertPt
= getInsertPointForUses(DU
.NarrowUse
, DU
.NarrowDef
, DT
, LI
);
1403 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU
.WideDef
<< " for user "
1404 << *DU
.NarrowUse
<< "\n");
1405 IRBuilder
<> Builder(InsertPt
);
1406 Value
*Trunc
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowDef
->getType());
1407 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, Trunc
);
1410 /// If the narrow use is a compare instruction, then widen the compare
1411 // (and possibly the other operand). The extend operation is hoisted into the
1412 // loop preheader as far as possible.
1413 bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU
) {
1414 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(DU
.NarrowUse
);
1418 // We can legally widen the comparison in the following two cases:
1420 // - The signedness of the IV extension and comparison match
1422 // - The narrow IV is always positive (and thus its sign extension is equal
1423 // to its zero extension). For instance, let's say we're zero extending
1424 // %narrow for the following use
1426 // icmp slt i32 %narrow, %val ... (A)
1428 // and %narrow is always positive. Then
1430 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1431 // == icmp slt i32 zext(%narrow), sext(%val)
1432 bool IsSigned
= getExtendKind(DU
.NarrowDef
) == SignExtended
;
1433 if (!(DU
.NeverNegative
|| IsSigned
== Cmp
->isSigned()))
1436 Value
*Op
= Cmp
->getOperand(Cmp
->getOperand(0) == DU
.NarrowDef
? 1 : 0);
1437 unsigned CastWidth
= SE
->getTypeSizeInBits(Op
->getType());
1438 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1439 assert(CastWidth
<= IVWidth
&& "Unexpected width while widening compare.");
1441 // Widen the compare instruction.
1442 auto *InsertPt
= getInsertPointForUses(DU
.NarrowUse
, DU
.NarrowDef
, DT
, LI
);
1445 IRBuilder
<> Builder(InsertPt
);
1446 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1448 // Widen the other operand of the compare, if necessary.
1449 if (CastWidth
< IVWidth
) {
1450 Value
*ExtOp
= createExtendInst(Op
, WideType
, Cmp
->isSigned(), Cmp
);
1451 DU
.NarrowUse
->replaceUsesOfWith(Op
, ExtOp
);
1456 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1457 // will not work when:
1458 // 1) SCEV traces back to an instruction inside the loop that SCEV can not
1459 // expand, eg. add %indvar, (load %addr)
1460 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1461 // While SCEV fails to avoid trunc, we can still try to use instruction
1462 // combining approach to prove trunc is not required. This can be further
1463 // extended with other instruction combining checks, but for now we handle the
1464 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1467 // %c = sub nsw %b, %indvar
1468 // %d = sext %c to i64
1470 // %indvar.ext1 = sext %indvar to i64
1471 // %m = sext %b to i64
1472 // %d = sub nsw i64 %m, %indvar.ext1
1473 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1474 // trunc is required regardless of how %b is generated. This pattern is common
1475 // when calculating address in 64 bit architecture
1476 bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU
) {
1477 Instruction
*NarrowUse
= DU
.NarrowUse
;
1478 Instruction
*NarrowDef
= DU
.NarrowDef
;
1479 Instruction
*WideDef
= DU
.WideDef
;
1481 // Handle the common case of add<nsw/nuw>
1482 const unsigned OpCode
= NarrowUse
->getOpcode();
1483 // Only Add/Sub/Mul instructions are supported.
1484 if (OpCode
!= Instruction::Add
&& OpCode
!= Instruction::Sub
&&
1485 OpCode
!= Instruction::Mul
)
1488 // The operand that is not defined by NarrowDef of DU. Let's call it the
1490 assert((NarrowUse
->getOperand(0) == NarrowDef
||
1491 NarrowUse
->getOperand(1) == NarrowDef
) &&
1494 const OverflowingBinaryOperator
*OBO
=
1495 cast
<OverflowingBinaryOperator
>(NarrowUse
);
1496 ExtendKind ExtKind
= getExtendKind(NarrowDef
);
1497 bool CanSignExtend
= ExtKind
== SignExtended
&& OBO
->hasNoSignedWrap();
1498 bool CanZeroExtend
= ExtKind
== ZeroExtended
&& OBO
->hasNoUnsignedWrap();
1499 auto AnotherOpExtKind
= ExtKind
;
1501 // Check that all uses are either:
1502 // - narrow def (in case of we are widening the IV increment);
1503 // - single-input LCSSA Phis;
1504 // - comparison of the chosen type;
1505 // - extend of the chosen type (raison d'etre).
1506 SmallVector
<Instruction
*, 4> ExtUsers
;
1507 SmallVector
<PHINode
*, 4> LCSSAPhiUsers
;
1508 SmallVector
<ICmpInst
*, 4> ICmpUsers
;
1509 for (Use
&U
: NarrowUse
->uses()) {
1510 Instruction
*User
= cast
<Instruction
>(U
.getUser());
1511 if (User
== NarrowDef
)
1513 if (!L
->contains(User
)) {
1514 auto *LCSSAPhi
= cast
<PHINode
>(User
);
1515 // Make sure there is only 1 input, so that we don't have to split
1517 if (LCSSAPhi
->getNumOperands() != 1)
1519 LCSSAPhiUsers
.push_back(LCSSAPhi
);
1522 if (auto *ICmp
= dyn_cast
<ICmpInst
>(User
)) {
1523 auto Pred
= ICmp
->getPredicate();
1524 // We have 3 types of predicates: signed, unsigned and equality
1525 // predicates. For equality, it's legal to widen icmp for either sign and
1526 // zero extend. For sign extend, we can also do so for signed predicates,
1527 // likeweise for zero extend we can widen icmp for unsigned predicates.
1528 if (ExtKind
== ZeroExtended
&& ICmpInst::isSigned(Pred
))
1530 if (ExtKind
== SignExtended
&& ICmpInst::isUnsigned(Pred
))
1532 ICmpUsers
.push_back(ICmp
);
1535 if (ExtKind
== SignExtended
)
1536 User
= dyn_cast
<SExtInst
>(User
);
1538 User
= dyn_cast
<ZExtInst
>(User
);
1539 if (!User
|| User
->getType() != WideType
)
1541 ExtUsers
.push_back(User
);
1543 if (ExtUsers
.empty()) {
1544 DeadInsts
.emplace_back(NarrowUse
);
1548 // We'll prove some facts that should be true in the context of ext users. If
1549 // there is no users, we are done now. If there are some, pick their common
1550 // dominator as context.
1551 const Instruction
*CtxI
= findCommonDominator(ExtUsers
, *DT
);
1553 if (!CanSignExtend
&& !CanZeroExtend
) {
1554 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1555 // will most likely not see it. Let's try to prove it.
1556 if (OpCode
!= Instruction::Add
)
1558 if (ExtKind
!= ZeroExtended
)
1560 const SCEV
*LHS
= SE
->getSCEV(OBO
->getOperand(0));
1561 const SCEV
*RHS
= SE
->getSCEV(OBO
->getOperand(1));
1562 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1563 if (NarrowUse
->getOperand(0) != NarrowDef
)
1565 if (!SE
->isKnownNegative(RHS
))
1567 bool ProvedSubNUW
= SE
->isKnownPredicateAt(ICmpInst::ICMP_UGE
, LHS
,
1568 SE
->getNegativeSCEV(RHS
), CtxI
);
1571 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1572 // neg(zext(neg(op))), which is basically sext(op).
1573 AnotherOpExtKind
= SignExtended
;
1576 // Verifying that Defining operand is an AddRec
1577 const SCEV
*Op1
= SE
->getSCEV(WideDef
);
1578 const SCEVAddRecExpr
*AddRecOp1
= dyn_cast
<SCEVAddRecExpr
>(Op1
);
1579 if (!AddRecOp1
|| AddRecOp1
->getLoop() != L
)
1582 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse
<< "\n");
1584 // Generating a widening use instruction.
1585 Value
*LHS
= (NarrowUse
->getOperand(0) == NarrowDef
)
1587 : createExtendInst(NarrowUse
->getOperand(0), WideType
,
1588 AnotherOpExtKind
, NarrowUse
);
1589 Value
*RHS
= (NarrowUse
->getOperand(1) == NarrowDef
)
1591 : createExtendInst(NarrowUse
->getOperand(1), WideType
,
1592 AnotherOpExtKind
, NarrowUse
);
1594 auto *NarrowBO
= cast
<BinaryOperator
>(NarrowUse
);
1595 auto *WideBO
= BinaryOperator::Create(NarrowBO
->getOpcode(), LHS
, RHS
,
1596 NarrowBO
->getName());
1597 IRBuilder
<> Builder(NarrowUse
);
1598 Builder
.Insert(WideBO
);
1599 WideBO
->copyIRFlags(NarrowBO
);
1600 ExtendKindMap
[NarrowUse
] = ExtKind
;
1602 for (Instruction
*User
: ExtUsers
) {
1603 assert(User
->getType() == WideType
&& "Checked before!");
1604 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User
<< " replaced by "
1605 << *WideBO
<< "\n");
1607 User
->replaceAllUsesWith(WideBO
);
1608 DeadInsts
.emplace_back(User
);
1611 for (PHINode
*User
: LCSSAPhiUsers
) {
1612 assert(User
->getNumOperands() == 1 && "Checked before!");
1613 Builder
.SetInsertPoint(User
);
1615 Builder
.CreatePHI(WideBO
->getType(), 1, User
->getName() + ".wide");
1616 BasicBlock
*LoopExitingBlock
= User
->getParent()->getSinglePredecessor();
1617 assert(LoopExitingBlock
&& L
->contains(LoopExitingBlock
) &&
1618 "Not a LCSSA Phi?");
1619 WidePN
->addIncoming(WideBO
, LoopExitingBlock
);
1620 Builder
.SetInsertPoint(&*User
->getParent()->getFirstInsertionPt());
1621 auto *TruncPN
= Builder
.CreateTrunc(WidePN
, User
->getType());
1622 User
->replaceAllUsesWith(TruncPN
);
1623 DeadInsts
.emplace_back(User
);
1626 for (ICmpInst
*User
: ICmpUsers
) {
1627 Builder
.SetInsertPoint(User
);
1628 auto ExtendedOp
= [&](Value
* V
)->Value
* {
1631 if (ExtKind
== ZeroExtended
)
1632 return Builder
.CreateZExt(V
, WideBO
->getType());
1634 return Builder
.CreateSExt(V
, WideBO
->getType());
1636 auto Pred
= User
->getPredicate();
1637 auto *LHS
= ExtendedOp(User
->getOperand(0));
1638 auto *RHS
= ExtendedOp(User
->getOperand(1));
1640 Builder
.CreateICmp(Pred
, LHS
, RHS
, User
->getName() + ".wide");
1641 User
->replaceAllUsesWith(WideCmp
);
1642 DeadInsts
.emplace_back(User
);
1648 /// Determine whether an individual user of the narrow IV can be widened. If so,
1649 /// return the wide clone of the user.
1650 Instruction
*WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU
, SCEVExpander
&Rewriter
) {
1651 assert(ExtendKindMap
.count(DU
.NarrowDef
) &&
1652 "Should already know the kind of extension used to widen NarrowDef");
1654 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1655 if (PHINode
*UsePhi
= dyn_cast
<PHINode
>(DU
.NarrowUse
)) {
1656 if (LI
->getLoopFor(UsePhi
->getParent()) != L
) {
1657 // For LCSSA phis, sink the truncate outside the loop.
1658 // After SimplifyCFG most loop exit targets have a single predecessor.
1659 // Otherwise fall back to a truncate within the loop.
1660 if (UsePhi
->getNumOperands() != 1)
1661 truncateIVUse(DU
, DT
, LI
);
1663 // Widening the PHI requires us to insert a trunc. The logical place
1664 // for this trunc is in the same BB as the PHI. This is not possible if
1665 // the BB is terminated by a catchswitch.
1666 if (isa
<CatchSwitchInst
>(UsePhi
->getParent()->getTerminator()))
1670 PHINode::Create(DU
.WideDef
->getType(), 1, UsePhi
->getName() + ".wide",
1672 WidePhi
->addIncoming(DU
.WideDef
, UsePhi
->getIncomingBlock(0));
1673 IRBuilder
<> Builder(&*WidePhi
->getParent()->getFirstInsertionPt());
1674 Value
*Trunc
= Builder
.CreateTrunc(WidePhi
, DU
.NarrowDef
->getType());
1675 UsePhi
->replaceAllUsesWith(Trunc
);
1676 DeadInsts
.emplace_back(UsePhi
);
1677 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to "
1678 << *WidePhi
<< "\n");
1684 // This narrow use can be widened by a sext if it's non-negative or its narrow
1685 // def was widended by a sext. Same for zext.
1686 auto canWidenBySExt
= [&]() {
1687 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == SignExtended
;
1689 auto canWidenByZExt
= [&]() {
1690 return DU
.NeverNegative
|| getExtendKind(DU
.NarrowDef
) == ZeroExtended
;
1693 // Our raison d'etre! Eliminate sign and zero extension.
1694 if ((isa
<SExtInst
>(DU
.NarrowUse
) && canWidenBySExt()) ||
1695 (isa
<ZExtInst
>(DU
.NarrowUse
) && canWidenByZExt())) {
1696 Value
*NewDef
= DU
.WideDef
;
1697 if (DU
.NarrowUse
->getType() != WideType
) {
1698 unsigned CastWidth
= SE
->getTypeSizeInBits(DU
.NarrowUse
->getType());
1699 unsigned IVWidth
= SE
->getTypeSizeInBits(WideType
);
1700 if (CastWidth
< IVWidth
) {
1701 // The cast isn't as wide as the IV, so insert a Trunc.
1702 IRBuilder
<> Builder(DU
.NarrowUse
);
1703 NewDef
= Builder
.CreateTrunc(DU
.WideDef
, DU
.NarrowUse
->getType());
1706 // A wider extend was hidden behind a narrower one. This may induce
1707 // another round of IV widening in which the intermediate IV becomes
1708 // dead. It should be very rare.
1709 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1710 << " not wide enough to subsume " << *DU
.NarrowUse
1712 DU
.NarrowUse
->replaceUsesOfWith(DU
.NarrowDef
, DU
.WideDef
);
1713 NewDef
= DU
.NarrowUse
;
1716 if (NewDef
!= DU
.NarrowUse
) {
1717 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU
.NarrowUse
1718 << " replaced by " << *DU
.WideDef
<< "\n");
1720 DU
.NarrowUse
->replaceAllUsesWith(NewDef
);
1721 DeadInsts
.emplace_back(DU
.NarrowUse
);
1723 // Now that the extend is gone, we want to expose it's uses for potential
1724 // further simplification. We don't need to directly inform SimplifyIVUsers
1725 // of the new users, because their parent IV will be processed later as a
1726 // new loop phi. If we preserved IVUsers analysis, we would also want to
1727 // push the uses of WideDef here.
1729 // No further widening is needed. The deceased [sz]ext had done it for us.
1733 // Does this user itself evaluate to a recurrence after widening?
1734 WidenedRecTy WideAddRec
= getExtendedOperandRecurrence(DU
);
1735 if (!WideAddRec
.first
)
1736 WideAddRec
= getWideRecurrence(DU
);
1738 assert((WideAddRec
.first
== nullptr) == (WideAddRec
.second
== Unknown
));
1739 if (!WideAddRec
.first
) {
1740 // If use is a loop condition, try to promote the condition instead of
1741 // truncating the IV first.
1742 if (widenLoopCompare(DU
))
1745 // We are here about to generate a truncate instruction that may hurt
1746 // performance because the scalar evolution expression computed earlier
1747 // in WideAddRec.first does not indicate a polynomial induction expression.
1748 // In that case, look at the operands of the use instruction to determine
1749 // if we can still widen the use instead of truncating its operand.
1750 if (widenWithVariantUse(DU
))
1753 // This user does not evaluate to a recurrence after widening, so don't
1754 // follow it. Instead insert a Trunc to kill off the original use,
1755 // eventually isolating the original narrow IV so it can be removed.
1756 truncateIVUse(DU
, DT
, LI
);
1759 // Assume block terminators cannot evaluate to a recurrence. We can't to
1760 // insert a Trunc after a terminator if there happens to be a critical edge.
1761 assert(DU
.NarrowUse
!= DU
.NarrowUse
->getParent()->getTerminator() &&
1762 "SCEV is not expected to evaluate a block terminator");
1764 // Reuse the IV increment that SCEVExpander created as long as it dominates
1766 Instruction
*WideUse
= nullptr;
1767 if (WideAddRec
.first
== WideIncExpr
&&
1768 Rewriter
.hoistIVInc(WideInc
, DU
.NarrowUse
))
1771 WideUse
= cloneIVUser(DU
, WideAddRec
.first
);
1775 // Evaluation of WideAddRec ensured that the narrow expression could be
1776 // extended outside the loop without overflow. This suggests that the wide use
1777 // evaluates to the same expression as the extended narrow use, but doesn't
1778 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1779 // where it fails, we simply throw away the newly created wide use.
1780 if (WideAddRec
.first
!= SE
->getSCEV(WideUse
)) {
1781 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": "
1782 << *SE
->getSCEV(WideUse
) << " != " << *WideAddRec
.first
1784 DeadInsts
.emplace_back(WideUse
);
1788 // if we reached this point then we are going to replace
1789 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1790 replaceAllDbgUsesWith(*DU
.NarrowUse
, *WideUse
, *WideUse
, *DT
);
1792 ExtendKindMap
[DU
.NarrowUse
] = WideAddRec
.second
;
1793 // Returning WideUse pushes it on the worklist.
1797 /// Add eligible users of NarrowDef to NarrowIVUsers.
1798 void WidenIV::pushNarrowIVUsers(Instruction
*NarrowDef
, Instruction
*WideDef
) {
1799 const SCEV
*NarrowSCEV
= SE
->getSCEV(NarrowDef
);
1800 bool NonNegativeDef
=
1801 SE
->isKnownPredicate(ICmpInst::ICMP_SGE
, NarrowSCEV
,
1802 SE
->getZero(NarrowSCEV
->getType()));
1803 for (User
*U
: NarrowDef
->users()) {
1804 Instruction
*NarrowUser
= cast
<Instruction
>(U
);
1806 // Handle data flow merges and bizarre phi cycles.
1807 if (!Widened
.insert(NarrowUser
).second
)
1810 bool NonNegativeUse
= false;
1811 if (!NonNegativeDef
) {
1812 // We might have a control-dependent range information for this context.
1813 if (auto RangeInfo
= getPostIncRangeInfo(NarrowDef
, NarrowUser
))
1814 NonNegativeUse
= RangeInfo
->getSignedMin().isNonNegative();
1817 NarrowIVUsers
.emplace_back(NarrowDef
, NarrowUser
, WideDef
,
1818 NonNegativeDef
|| NonNegativeUse
);
1822 /// Process a single induction variable. First use the SCEVExpander to create a
1823 /// wide induction variable that evaluates to the same recurrence as the
1824 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1825 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1826 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1828 /// It would be simpler to delete uses as they are processed, but we must avoid
1829 /// invalidating SCEV expressions.
1830 PHINode
*WidenIV::createWideIV(SCEVExpander
&Rewriter
) {
1831 // Is this phi an induction variable?
1832 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(OrigPhi
));
1836 // Widen the induction variable expression.
1837 const SCEV
*WideIVExpr
= getExtendKind(OrigPhi
) == SignExtended
1838 ? SE
->getSignExtendExpr(AddRec
, WideType
)
1839 : SE
->getZeroExtendExpr(AddRec
, WideType
);
1841 assert(SE
->getEffectiveSCEVType(WideIVExpr
->getType()) == WideType
&&
1842 "Expect the new IV expression to preserve its type");
1844 // Can the IV be extended outside the loop without overflow?
1845 AddRec
= dyn_cast
<SCEVAddRecExpr
>(WideIVExpr
);
1846 if (!AddRec
|| AddRec
->getLoop() != L
)
1849 // An AddRec must have loop-invariant operands. Since this AddRec is
1850 // materialized by a loop header phi, the expression cannot have any post-loop
1851 // operands, so they must dominate the loop header.
1853 SE
->properlyDominates(AddRec
->getStart(), L
->getHeader()) &&
1854 SE
->properlyDominates(AddRec
->getStepRecurrence(*SE
), L
->getHeader()) &&
1855 "Loop header phi recurrence inputs do not dominate the loop");
1857 // Iterate over IV uses (including transitive ones) looking for IV increments
1858 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1859 // the increment calculate control-dependent range information basing on
1860 // dominating conditions inside of the loop (e.g. a range check inside of the
1861 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1863 // Control-dependent range information is later used to prove that a narrow
1864 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1865 // this on demand because when pushNarrowIVUsers needs this information some
1866 // of the dominating conditions might be already widened.
1867 if (UsePostIncrementRanges
)
1868 calculatePostIncRanges(OrigPhi
);
1870 // The rewriter provides a value for the desired IV expression. This may
1871 // either find an existing phi or materialize a new one. Either way, we
1872 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1873 // of the phi-SCC dominates the loop entry.
1874 Instruction
*InsertPt
= &*L
->getHeader()->getFirstInsertionPt();
1875 Value
*ExpandInst
= Rewriter
.expandCodeFor(AddRec
, WideType
, InsertPt
);
1876 // If the wide phi is not a phi node, for example a cast node, like bitcast,
1877 // inttoptr, ptrtoint, just skip for now.
1878 if (!(WidePhi
= dyn_cast
<PHINode
>(ExpandInst
))) {
1879 // if the cast node is an inserted instruction without any user, we should
1880 // remove it to make sure the pass don't touch the function as we can not
1882 if (ExpandInst
->hasNUses(0) &&
1883 Rewriter
.isInsertedInstruction(cast
<Instruction
>(ExpandInst
)))
1884 DeadInsts
.emplace_back(ExpandInst
);
1888 // Remembering the WideIV increment generated by SCEVExpander allows
1889 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1890 // employ a general reuse mechanism because the call above is the only call to
1891 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1892 if (BasicBlock
*LatchBlock
= L
->getLoopLatch()) {
1894 cast
<Instruction
>(WidePhi
->getIncomingValueForBlock(LatchBlock
));
1895 WideIncExpr
= SE
->getSCEV(WideInc
);
1896 // Propagate the debug location associated with the original loop increment
1897 // to the new (widened) increment.
1899 cast
<Instruction
>(OrigPhi
->getIncomingValueForBlock(LatchBlock
));
1900 WideInc
->setDebugLoc(OrigInc
->getDebugLoc());
1903 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi
<< "\n");
1906 // Traverse the def-use chain using a worklist starting at the original IV.
1907 assert(Widened
.empty() && NarrowIVUsers
.empty() && "expect initial state" );
1909 Widened
.insert(OrigPhi
);
1910 pushNarrowIVUsers(OrigPhi
, WidePhi
);
1912 while (!NarrowIVUsers
.empty()) {
1913 WidenIV::NarrowIVDefUse DU
= NarrowIVUsers
.pop_back_val();
1915 // Process a def-use edge. This may replace the use, so don't hold a
1916 // use_iterator across it.
1917 Instruction
*WideUse
= widenIVUse(DU
, Rewriter
);
1919 // Follow all def-use edges from the previous narrow use.
1921 pushNarrowIVUsers(DU
.NarrowUse
, WideUse
);
1923 // widenIVUse may have removed the def-use edge.
1924 if (DU
.NarrowDef
->use_empty())
1925 DeadInsts
.emplace_back(DU
.NarrowDef
);
1928 // Attach any debug information to the new PHI.
1929 replaceAllDbgUsesWith(*OrigPhi
, *WidePhi
, *WidePhi
, *DT
);
1934 /// Calculates control-dependent range for the given def at the given context
1935 /// by looking at dominating conditions inside of the loop
1936 void WidenIV::calculatePostIncRange(Instruction
*NarrowDef
,
1937 Instruction
*NarrowUser
) {
1938 using namespace llvm::PatternMatch
;
1940 Value
*NarrowDefLHS
;
1941 const APInt
*NarrowDefRHS
;
1942 if (!match(NarrowDef
, m_NSWAdd(m_Value(NarrowDefLHS
),
1943 m_APInt(NarrowDefRHS
))) ||
1944 !NarrowDefRHS
->isNonNegative())
1947 auto UpdateRangeFromCondition
= [&] (Value
*Condition
,
1949 CmpInst::Predicate Pred
;
1951 if (!match(Condition
, m_ICmp(Pred
, m_Specific(NarrowDefLHS
),
1955 CmpInst::Predicate P
=
1956 TrueDest
? Pred
: CmpInst::getInversePredicate(Pred
);
1958 auto CmpRHSRange
= SE
->getSignedRange(SE
->getSCEV(CmpRHS
));
1959 auto CmpConstrainedLHSRange
=
1960 ConstantRange::makeAllowedICmpRegion(P
, CmpRHSRange
);
1961 auto NarrowDefRange
= CmpConstrainedLHSRange
.addWithNoWrap(
1962 *NarrowDefRHS
, OverflowingBinaryOperator::NoSignedWrap
);
1964 updatePostIncRangeInfo(NarrowDef
, NarrowUser
, NarrowDefRange
);
1967 auto UpdateRangeFromGuards
= [&](Instruction
*Ctx
) {
1971 for (Instruction
&I
: make_range(Ctx
->getIterator().getReverse(),
1972 Ctx
->getParent()->rend())) {
1974 if (match(&I
, m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(C
))))
1975 UpdateRangeFromCondition(C
, /*TrueDest=*/true);
1979 UpdateRangeFromGuards(NarrowUser
);
1981 BasicBlock
*NarrowUserBB
= NarrowUser
->getParent();
1982 // If NarrowUserBB is statically unreachable asking dominator queries may
1983 // yield surprising results. (e.g. the block may not have a dom tree node)
1984 if (!DT
->isReachableFromEntry(NarrowUserBB
))
1987 for (auto *DTB
= (*DT
)[NarrowUserBB
]->getIDom();
1988 L
->contains(DTB
->getBlock());
1989 DTB
= DTB
->getIDom()) {
1990 auto *BB
= DTB
->getBlock();
1991 auto *TI
= BB
->getTerminator();
1992 UpdateRangeFromGuards(TI
);
1994 auto *BI
= dyn_cast
<BranchInst
>(TI
);
1995 if (!BI
|| !BI
->isConditional())
1998 auto *TrueSuccessor
= BI
->getSuccessor(0);
1999 auto *FalseSuccessor
= BI
->getSuccessor(1);
2001 auto DominatesNarrowUser
= [this, NarrowUser
] (BasicBlockEdge BBE
) {
2002 return BBE
.isSingleEdge() &&
2003 DT
->dominates(BBE
, NarrowUser
->getParent());
2006 if (DominatesNarrowUser(BasicBlockEdge(BB
, TrueSuccessor
)))
2007 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/true);
2009 if (DominatesNarrowUser(BasicBlockEdge(BB
, FalseSuccessor
)))
2010 UpdateRangeFromCondition(BI
->getCondition(), /*TrueDest=*/false);
2014 /// Calculates PostIncRangeInfos map for the given IV
2015 void WidenIV::calculatePostIncRanges(PHINode
*OrigPhi
) {
2016 SmallPtrSet
<Instruction
*, 16> Visited
;
2017 SmallVector
<Instruction
*, 6> Worklist
;
2018 Worklist
.push_back(OrigPhi
);
2019 Visited
.insert(OrigPhi
);
2021 while (!Worklist
.empty()) {
2022 Instruction
*NarrowDef
= Worklist
.pop_back_val();
2024 for (Use
&U
: NarrowDef
->uses()) {
2025 auto *NarrowUser
= cast
<Instruction
>(U
.getUser());
2027 // Don't go looking outside the current loop.
2028 auto *NarrowUserLoop
= (*LI
)[NarrowUser
->getParent()];
2029 if (!NarrowUserLoop
|| !L
->contains(NarrowUserLoop
))
2032 if (!Visited
.insert(NarrowUser
).second
)
2035 Worklist
.push_back(NarrowUser
);
2037 calculatePostIncRange(NarrowDef
, NarrowUser
);
2042 PHINode
*llvm::createWideIV(const WideIVInfo
&WI
,
2043 LoopInfo
*LI
, ScalarEvolution
*SE
, SCEVExpander
&Rewriter
,
2044 DominatorTree
*DT
, SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
2045 unsigned &NumElimExt
, unsigned &NumWidened
,
2046 bool HasGuards
, bool UsePostIncrementRanges
) {
2047 WidenIV
Widener(WI
, LI
, SE
, DT
, DeadInsts
, HasGuards
, UsePostIncrementRanges
);
2048 PHINode
*WidePHI
= Widener
.createWideIV(Rewriter
);
2049 NumElimExt
= Widener
.getNumElimExt();
2050 NumWidened
= Widener
.getNumWidened();