1 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Correlated Value Propagation pass.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
14 #include "llvm/ADT/DepthFirstIterator.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/DomTreeUpdater.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LazyValueInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/ConstantRange.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/PatternMatch.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Transforms/Utils/Local.h"
48 #define DEBUG_TYPE "correlated-value-propagation"
50 STATISTIC(NumPhis
, "Number of phis propagated");
51 STATISTIC(NumPhiCommon
, "Number of phis deleted via common incoming value");
52 STATISTIC(NumSelects
, "Number of selects propagated");
53 STATISTIC(NumCmps
, "Number of comparisons propagated");
54 STATISTIC(NumReturns
, "Number of return values propagated");
55 STATISTIC(NumDeadCases
, "Number of switch cases removed");
56 STATISTIC(NumSDivSRemsNarrowed
,
57 "Number of sdivs/srems whose width was decreased");
58 STATISTIC(NumSDivs
, "Number of sdiv converted to udiv");
59 STATISTIC(NumUDivURemsNarrowed
,
60 "Number of udivs/urems whose width was decreased");
61 STATISTIC(NumAShrsConverted
, "Number of ashr converted to lshr");
62 STATISTIC(NumAShrsRemoved
, "Number of ashr removed");
63 STATISTIC(NumSRems
, "Number of srem converted to urem");
64 STATISTIC(NumSExt
, "Number of sext converted to zext");
65 STATISTIC(NumSIToFP
, "Number of sitofp converted to uitofp");
66 STATISTIC(NumSICmps
, "Number of signed icmp preds simplified to unsigned");
67 STATISTIC(NumAnd
, "Number of ands removed");
68 STATISTIC(NumNW
, "Number of no-wrap deductions");
69 STATISTIC(NumNSW
, "Number of no-signed-wrap deductions");
70 STATISTIC(NumNUW
, "Number of no-unsigned-wrap deductions");
71 STATISTIC(NumAddNW
, "Number of no-wrap deductions for add");
72 STATISTIC(NumAddNSW
, "Number of no-signed-wrap deductions for add");
73 STATISTIC(NumAddNUW
, "Number of no-unsigned-wrap deductions for add");
74 STATISTIC(NumSubNW
, "Number of no-wrap deductions for sub");
75 STATISTIC(NumSubNSW
, "Number of no-signed-wrap deductions for sub");
76 STATISTIC(NumSubNUW
, "Number of no-unsigned-wrap deductions for sub");
77 STATISTIC(NumMulNW
, "Number of no-wrap deductions for mul");
78 STATISTIC(NumMulNSW
, "Number of no-signed-wrap deductions for mul");
79 STATISTIC(NumMulNUW
, "Number of no-unsigned-wrap deductions for mul");
80 STATISTIC(NumShlNW
, "Number of no-wrap deductions for shl");
81 STATISTIC(NumShlNSW
, "Number of no-signed-wrap deductions for shl");
82 STATISTIC(NumShlNUW
, "Number of no-unsigned-wrap deductions for shl");
83 STATISTIC(NumAbs
, "Number of llvm.abs intrinsics removed");
84 STATISTIC(NumOverflows
, "Number of overflow checks removed");
85 STATISTIC(NumSaturating
,
86 "Number of saturating arithmetics converted to normal arithmetics");
87 STATISTIC(NumNonNull
, "Number of function pointer arguments marked non-null");
88 STATISTIC(NumCmpIntr
, "Number of llvm.[us]cmp intrinsics removed");
89 STATISTIC(NumMinMax
, "Number of llvm.[us]{min,max} intrinsics removed");
91 "Number of llvm.s{min,max} intrinsics simplified to unsigned");
92 STATISTIC(NumUDivURemsNarrowedExpanded
,
93 "Number of bound udiv's/urem's expanded");
94 STATISTIC(NumNNeg
, "Number of zext/uitofp non-negative deductions");
96 static Constant
*getConstantAt(Value
*V
, Instruction
*At
, LazyValueInfo
*LVI
) {
97 if (Constant
*C
= LVI
->getConstant(V
, At
))
100 // TODO: The following really should be sunk inside LVI's core algorithm, or
101 // at least the outer shims around such.
102 auto *C
= dyn_cast
<CmpInst
>(V
);
106 Value
*Op0
= C
->getOperand(0);
107 Constant
*Op1
= dyn_cast
<Constant
>(C
->getOperand(1));
111 return LVI
->getPredicateAt(C
->getPredicate(), Op0
, Op1
, At
,
112 /*UseBlockValue=*/false);
115 static bool processSelect(SelectInst
*S
, LazyValueInfo
*LVI
) {
116 if (S
->getType()->isVectorTy() || isa
<Constant
>(S
->getCondition()))
119 bool Changed
= false;
120 for (Use
&U
: make_early_inc_range(S
->uses())) {
121 auto *I
= cast
<Instruction
>(U
.getUser());
123 if (auto *PN
= dyn_cast
<PHINode
>(I
))
124 C
= LVI
->getConstantOnEdge(S
->getCondition(), PN
->getIncomingBlock(U
),
127 C
= getConstantAt(S
->getCondition(), I
, LVI
);
129 auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
);
133 U
.set(CI
->isOne() ? S
->getTrueValue() : S
->getFalseValue());
138 if (Changed
&& S
->use_empty())
139 S
->eraseFromParent();
144 /// Try to simplify a phi with constant incoming values that match the edge
145 /// values of a non-constant value on all other edges:
147 /// %isnull = icmp eq i8* %x, null
148 /// br i1 %isnull, label %bb2, label %bb1
152 /// %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ]
155 static bool simplifyCommonValuePhi(PHINode
*P
, LazyValueInfo
*LVI
,
157 // Collect incoming constants and initialize possible common value.
158 SmallVector
<std::pair
<Constant
*, unsigned>, 4> IncomingConstants
;
159 Value
*CommonValue
= nullptr;
160 for (unsigned i
= 0, e
= P
->getNumIncomingValues(); i
!= e
; ++i
) {
161 Value
*Incoming
= P
->getIncomingValue(i
);
162 if (auto *IncomingConstant
= dyn_cast
<Constant
>(Incoming
)) {
163 IncomingConstants
.push_back(std::make_pair(IncomingConstant
, i
));
164 } else if (!CommonValue
) {
165 // The potential common value is initialized to the first non-constant.
166 CommonValue
= Incoming
;
167 } else if (Incoming
!= CommonValue
) {
168 // There can be only one non-constant common value.
173 if (!CommonValue
|| IncomingConstants
.empty())
176 // The common value must be valid in all incoming blocks.
177 BasicBlock
*ToBB
= P
->getParent();
178 if (auto *CommonInst
= dyn_cast
<Instruction
>(CommonValue
))
179 if (!DT
->dominates(CommonInst
, ToBB
))
182 // We have a phi with exactly 1 variable incoming value and 1 or more constant
183 // incoming values. See if all constant incoming values can be mapped back to
184 // the same incoming variable value.
185 for (auto &IncomingConstant
: IncomingConstants
) {
186 Constant
*C
= IncomingConstant
.first
;
187 BasicBlock
*IncomingBB
= P
->getIncomingBlock(IncomingConstant
.second
);
188 if (C
!= LVI
->getConstantOnEdge(CommonValue
, IncomingBB
, ToBB
, P
))
192 // LVI only guarantees that the value matches a certain constant if the value
193 // is not poison. Make sure we don't replace a well-defined value with poison.
194 // This is usually satisfied due to a prior branch on the value.
195 if (!isGuaranteedNotToBePoison(CommonValue
, nullptr, P
, DT
))
198 // All constant incoming values map to the same variable along the incoming
199 // edges of the phi. The phi is unnecessary.
200 P
->replaceAllUsesWith(CommonValue
);
201 P
->eraseFromParent();
206 static Value
*getValueOnEdge(LazyValueInfo
*LVI
, Value
*Incoming
,
207 BasicBlock
*From
, BasicBlock
*To
,
209 if (Constant
*C
= LVI
->getConstantOnEdge(Incoming
, From
, To
, CxtI
))
212 // Look if the incoming value is a select with a scalar condition for which
213 // LVI can tells us the value. In that case replace the incoming value with
214 // the appropriate value of the select. This often allows us to remove the
216 auto *SI
= dyn_cast
<SelectInst
>(Incoming
);
220 // Once LVI learns to handle vector types, we could also add support
221 // for vector type constants that are not all zeroes or all ones.
222 Value
*Condition
= SI
->getCondition();
223 if (!Condition
->getType()->isVectorTy()) {
224 if (Constant
*C
= LVI
->getConstantOnEdge(Condition
, From
, To
, CxtI
)) {
226 return SI
->getTrueValue();
227 if (C
->isZeroValue())
228 return SI
->getFalseValue();
232 // Look if the select has a constant but LVI tells us that the incoming
233 // value can never be that constant. In that case replace the incoming
234 // value with the other value of the select. This often allows us to
235 // remove the select later.
238 if (auto *C
= dyn_cast
<Constant
>(SI
->getFalseValue()))
239 if (auto *Res
= dyn_cast_or_null
<ConstantInt
>(
240 LVI
->getPredicateOnEdge(ICmpInst::ICMP_EQ
, SI
, C
, From
, To
, CxtI
));
241 Res
&& Res
->isZero())
242 return SI
->getTrueValue();
245 // similar to the select "false" case, but try the select "true" value
246 if (auto *C
= dyn_cast
<Constant
>(SI
->getTrueValue()))
247 if (auto *Res
= dyn_cast_or_null
<ConstantInt
>(
248 LVI
->getPredicateOnEdge(ICmpInst::ICMP_EQ
, SI
, C
, From
, To
, CxtI
));
249 Res
&& Res
->isZero())
250 return SI
->getFalseValue();
255 static bool processPHI(PHINode
*P
, LazyValueInfo
*LVI
, DominatorTree
*DT
,
256 const SimplifyQuery
&SQ
) {
257 bool Changed
= false;
259 BasicBlock
*BB
= P
->getParent();
260 for (unsigned i
= 0, e
= P
->getNumIncomingValues(); i
< e
; ++i
) {
261 Value
*Incoming
= P
->getIncomingValue(i
);
262 if (isa
<Constant
>(Incoming
)) continue;
264 Value
*V
= getValueOnEdge(LVI
, Incoming
, P
->getIncomingBlock(i
), BB
, P
);
266 P
->setIncomingValue(i
, V
);
271 if (Value
*V
= simplifyInstruction(P
, SQ
)) {
272 P
->replaceAllUsesWith(V
);
273 P
->eraseFromParent();
278 Changed
= simplifyCommonValuePhi(P
, LVI
, DT
);
286 static bool processICmp(ICmpInst
*Cmp
, LazyValueInfo
*LVI
) {
287 // Only for signed relational comparisons of integers.
288 if (!Cmp
->getOperand(0)->getType()->isIntOrIntVectorTy())
291 if (!Cmp
->isSigned() && (!Cmp
->isUnsigned() || Cmp
->hasSameSign()))
294 bool Changed
= false;
296 ConstantRange CR1
= LVI
->getConstantRangeAtUse(Cmp
->getOperandUse(0),
297 /*UndefAllowed=*/false),
298 CR2
= LVI
->getConstantRangeAtUse(Cmp
->getOperandUse(1),
299 /*UndefAllowed=*/false);
301 if (Cmp
->isSigned()) {
302 ICmpInst::Predicate UnsignedPred
=
303 ConstantRange::getEquivalentPredWithFlippedSignedness(
304 Cmp
->getPredicate(), CR1
, CR2
);
306 if (UnsignedPred
== ICmpInst::Predicate::BAD_ICMP_PREDICATE
)
310 Cmp
->setPredicate(UnsignedPred
);
314 if (ConstantRange::areInsensitiveToSignednessOfICmpPredicate(CR1
, CR2
)) {
322 /// See if LazyValueInfo's ability to exploit edge conditions or range
323 /// information is sufficient to prove this comparison. Even for local
324 /// conditions, this can sometimes prove conditions instcombine can't by
325 /// exploiting range information.
326 static bool constantFoldCmp(CmpInst
*Cmp
, LazyValueInfo
*LVI
) {
327 Value
*Op0
= Cmp
->getOperand(0);
328 Value
*Op1
= Cmp
->getOperand(1);
329 Constant
*Res
= LVI
->getPredicateAt(Cmp
->getPredicate(), Op0
, Op1
, Cmp
,
330 /*UseBlockValue=*/true);
335 Cmp
->replaceAllUsesWith(Res
);
336 Cmp
->eraseFromParent();
340 static bool processCmp(CmpInst
*Cmp
, LazyValueInfo
*LVI
) {
341 if (constantFoldCmp(Cmp
, LVI
))
344 if (auto *ICmp
= dyn_cast
<ICmpInst
>(Cmp
))
345 if (processICmp(ICmp
, LVI
))
351 /// Simplify a switch instruction by removing cases which can never fire. If the
352 /// uselessness of a case could be determined locally then constant propagation
353 /// would already have figured it out. Instead, walk the predecessors and
354 /// statically evaluate cases based on information available on that edge. Cases
355 /// that cannot fire no matter what the incoming edge can safely be removed. If
356 /// a case fires on every incoming edge then the entire switch can be removed
357 /// and replaced with a branch to the case destination.
358 static bool processSwitch(SwitchInst
*I
, LazyValueInfo
*LVI
,
360 DomTreeUpdater
DTU(*DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
361 Value
*Cond
= I
->getCondition();
362 BasicBlock
*BB
= I
->getParent();
364 // Analyse each switch case in turn.
365 bool Changed
= false;
366 DenseMap
<BasicBlock
*, int> SuccessorsCount
;
367 for (auto *Succ
: successors(BB
))
368 SuccessorsCount
[Succ
]++;
370 { // Scope for SwitchInstProfUpdateWrapper. It must not live during
371 // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
372 SwitchInstProfUpdateWrapper
SI(*I
);
373 unsigned ReachableCaseCount
= 0;
375 for (auto CI
= SI
->case_begin(), CE
= SI
->case_end(); CI
!= CE
;) {
376 ConstantInt
*Case
= CI
->getCaseValue();
377 auto *Res
= dyn_cast_or_null
<ConstantInt
>(
378 LVI
->getPredicateAt(CmpInst::ICMP_EQ
, Cond
, Case
, I
,
379 /* UseBlockValue */ true));
381 if (Res
&& Res
->isZero()) {
382 // This case never fires - remove it.
383 BasicBlock
*Succ
= CI
->getCaseSuccessor();
384 Succ
->removePredecessor(BB
);
385 CI
= SI
.removeCase(CI
);
388 // The condition can be modified by removePredecessor's PHI simplification
390 Cond
= SI
->getCondition();
394 if (--SuccessorsCount
[Succ
] == 0)
395 DTU
.applyUpdatesPermissive({{DominatorTree::Delete
, BB
, Succ
}});
398 if (Res
&& Res
->isOne()) {
399 // This case always fires. Arrange for the switch to be turned into an
400 // unconditional branch by replacing the switch condition with the case
402 SI
->setCondition(Case
);
403 NumDeadCases
+= SI
->getNumCases();
408 // Increment the case iterator since we didn't delete it.
410 ++ReachableCaseCount
;
413 BasicBlock
*DefaultDest
= SI
->getDefaultDest();
414 if (ReachableCaseCount
> 1 &&
415 !isa
<UnreachableInst
>(DefaultDest
->getFirstNonPHIOrDbg())) {
416 ConstantRange CR
= LVI
->getConstantRangeAtUse(I
->getOperandUse(0),
417 /*UndefAllowed*/ false);
418 // The default dest is unreachable if all cases are covered.
419 if (!CR
.isSizeLargerThan(ReachableCaseCount
)) {
420 BasicBlock
*NewUnreachableBB
=
421 BasicBlock::Create(BB
->getContext(), "default.unreachable",
422 BB
->getParent(), DefaultDest
);
423 new UnreachableInst(BB
->getContext(), NewUnreachableBB
);
425 DefaultDest
->removePredecessor(BB
);
426 SI
->setDefaultDest(NewUnreachableBB
);
428 if (SuccessorsCount
[DefaultDest
] == 1)
429 DTU
.applyUpdates({{DominatorTree::Delete
, BB
, DefaultDest
}});
430 DTU
.applyUpdates({{DominatorTree::Insert
, BB
, NewUnreachableBB
}});
439 // If the switch has been simplified to the point where it can be replaced
440 // by a branch then do so now.
441 ConstantFoldTerminator(BB
, /*DeleteDeadConditions = */ false,
442 /*TLI = */ nullptr, &DTU
);
446 // See if we can prove that the given binary op intrinsic will not overflow.
447 static bool willNotOverflow(BinaryOpIntrinsic
*BO
, LazyValueInfo
*LVI
) {
448 ConstantRange LRange
=
449 LVI
->getConstantRangeAtUse(BO
->getOperandUse(0), /*UndefAllowed*/ false);
450 ConstantRange RRange
=
451 LVI
->getConstantRangeAtUse(BO
->getOperandUse(1), /*UndefAllowed*/ false);
452 ConstantRange NWRegion
= ConstantRange::makeGuaranteedNoWrapRegion(
453 BO
->getBinaryOp(), RRange
, BO
->getNoWrapKind());
454 return NWRegion
.contains(LRange
);
457 static void setDeducedOverflowingFlags(Value
*V
, Instruction::BinaryOps Opcode
,
458 bool NewNSW
, bool NewNUW
) {
459 Statistic
*OpcNW
, *OpcNSW
, *OpcNUW
;
461 case Instruction::Add
:
466 case Instruction::Sub
:
471 case Instruction::Mul
:
476 case Instruction::Shl
:
482 llvm_unreachable("Will not be called with other binops");
485 auto *Inst
= dyn_cast
<Instruction
>(V
);
492 Inst
->setHasNoSignedWrap();
500 Inst
->setHasNoUnsignedWrap();
504 static bool processBinOp(BinaryOperator
*BinOp
, LazyValueInfo
*LVI
);
506 // See if @llvm.abs argument is alays positive/negative, and simplify.
507 // Notably, INT_MIN can belong to either range, regardless of the NSW,
508 // because it is negation-invariant.
509 static bool processAbsIntrinsic(IntrinsicInst
*II
, LazyValueInfo
*LVI
) {
510 Value
*X
= II
->getArgOperand(0);
511 bool IsIntMinPoison
= cast
<ConstantInt
>(II
->getArgOperand(1))->isOne();
512 APInt IntMin
= APInt::getSignedMinValue(X
->getType()->getScalarSizeInBits());
513 ConstantRange Range
= LVI
->getConstantRangeAtUse(
514 II
->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison
);
516 // Is X in [0, IntMin]? NOTE: INT_MIN is fine!
517 if (Range
.icmp(CmpInst::ICMP_ULE
, IntMin
)) {
519 II
->replaceAllUsesWith(X
);
520 II
->eraseFromParent();
524 // Is X in [IntMin, 0]? NOTE: INT_MIN is fine!
525 if (Range
.getSignedMax().isNonPositive()) {
527 Value
*NegX
= B
.CreateNeg(X
, II
->getName(),
528 /*HasNSW=*/IsIntMinPoison
);
530 II
->replaceAllUsesWith(NegX
);
531 II
->eraseFromParent();
533 // See if we can infer some no-wrap flags.
534 if (auto *BO
= dyn_cast
<BinaryOperator
>(NegX
))
535 processBinOp(BO
, LVI
);
540 // Argument's range crosses zero.
541 // Can we at least tell that the argument is never INT_MIN?
542 if (!IsIntMinPoison
&& !Range
.contains(IntMin
)) {
545 II
->setArgOperand(1, ConstantInt::getTrue(II
->getContext()));
551 static bool processCmpIntrinsic(CmpIntrinsic
*CI
, LazyValueInfo
*LVI
) {
552 ConstantRange LHS_CR
=
553 LVI
->getConstantRangeAtUse(CI
->getOperandUse(0), /*UndefAllowed*/ false);
554 ConstantRange RHS_CR
=
555 LVI
->getConstantRangeAtUse(CI
->getOperandUse(1), /*UndefAllowed*/ false);
557 if (LHS_CR
.icmp(CI
->getGTPredicate(), RHS_CR
)) {
559 CI
->replaceAllUsesWith(ConstantInt::get(CI
->getType(), 1));
560 CI
->eraseFromParent();
563 if (LHS_CR
.icmp(CI
->getLTPredicate(), RHS_CR
)) {
565 CI
->replaceAllUsesWith(ConstantInt::getSigned(CI
->getType(), -1));
566 CI
->eraseFromParent();
569 if (LHS_CR
.icmp(ICmpInst::ICMP_EQ
, RHS_CR
)) {
571 CI
->replaceAllUsesWith(ConstantInt::get(CI
->getType(), 0));
572 CI
->eraseFromParent();
579 // See if this min/max intrinsic always picks it's one specific operand.
580 // If not, check whether we can canonicalize signed minmax into unsigned version
581 static bool processMinMaxIntrinsic(MinMaxIntrinsic
*MM
, LazyValueInfo
*LVI
) {
582 CmpInst::Predicate Pred
= CmpInst::getNonStrictPredicate(MM
->getPredicate());
583 ConstantRange LHS_CR
= LVI
->getConstantRangeAtUse(MM
->getOperandUse(0),
584 /*UndefAllowed*/ false);
585 ConstantRange RHS_CR
= LVI
->getConstantRangeAtUse(MM
->getOperandUse(1),
586 /*UndefAllowed*/ false);
587 if (LHS_CR
.icmp(Pred
, RHS_CR
)) {
589 MM
->replaceAllUsesWith(MM
->getLHS());
590 MM
->eraseFromParent();
593 if (RHS_CR
.icmp(Pred
, LHS_CR
)) {
595 MM
->replaceAllUsesWith(MM
->getRHS());
596 MM
->eraseFromParent();
600 if (MM
->isSigned() &&
601 ConstantRange::areInsensitiveToSignednessOfICmpPredicate(LHS_CR
,
605 MM
->replaceAllUsesWith(B
.CreateBinaryIntrinsic(
606 MM
->getIntrinsicID() == Intrinsic::smin
? Intrinsic::umin
608 MM
->getLHS(), MM
->getRHS()));
609 MM
->eraseFromParent();
616 // Rewrite this with.overflow intrinsic as non-overflowing.
617 static bool processOverflowIntrinsic(WithOverflowInst
*WO
, LazyValueInfo
*LVI
) {
619 Instruction::BinaryOps Opcode
= WO
->getBinaryOp();
620 bool NSW
= WO
->isSigned();
621 bool NUW
= !WO
->isSigned();
624 B
.CreateBinOp(Opcode
, WO
->getLHS(), WO
->getRHS(), WO
->getName());
625 setDeducedOverflowingFlags(NewOp
, Opcode
, NSW
, NUW
);
627 StructType
*ST
= cast
<StructType
>(WO
->getType());
628 Constant
*Struct
= ConstantStruct::get(ST
,
629 { PoisonValue::get(ST
->getElementType(0)),
630 ConstantInt::getFalse(ST
->getElementType(1)) });
631 Value
*NewI
= B
.CreateInsertValue(Struct
, NewOp
, 0);
632 WO
->replaceAllUsesWith(NewI
);
633 WO
->eraseFromParent();
636 // See if we can infer the other no-wrap too.
637 if (auto *BO
= dyn_cast
<BinaryOperator
>(NewOp
))
638 processBinOp(BO
, LVI
);
643 static bool processSaturatingInst(SaturatingInst
*SI
, LazyValueInfo
*LVI
) {
644 Instruction::BinaryOps Opcode
= SI
->getBinaryOp();
645 bool NSW
= SI
->isSigned();
646 bool NUW
= !SI
->isSigned();
647 BinaryOperator
*BinOp
= BinaryOperator::Create(
648 Opcode
, SI
->getLHS(), SI
->getRHS(), SI
->getName(), SI
->getIterator());
649 BinOp
->setDebugLoc(SI
->getDebugLoc());
650 setDeducedOverflowingFlags(BinOp
, Opcode
, NSW
, NUW
);
652 SI
->replaceAllUsesWith(BinOp
);
653 SI
->eraseFromParent();
656 // See if we can infer the other no-wrap too.
657 if (auto *BO
= dyn_cast
<BinaryOperator
>(BinOp
))
658 processBinOp(BO
, LVI
);
663 /// Infer nonnull attributes for the arguments at the specified callsite.
664 static bool processCallSite(CallBase
&CB
, LazyValueInfo
*LVI
) {
666 if (CB
.getIntrinsicID() == Intrinsic::abs
) {
667 return processAbsIntrinsic(&cast
<IntrinsicInst
>(CB
), LVI
);
670 if (auto *CI
= dyn_cast
<CmpIntrinsic
>(&CB
)) {
671 return processCmpIntrinsic(CI
, LVI
);
674 if (auto *MM
= dyn_cast
<MinMaxIntrinsic
>(&CB
)) {
675 return processMinMaxIntrinsic(MM
, LVI
);
678 if (auto *WO
= dyn_cast
<WithOverflowInst
>(&CB
)) {
679 if (willNotOverflow(WO
, LVI
))
680 return processOverflowIntrinsic(WO
, LVI
);
683 if (auto *SI
= dyn_cast
<SaturatingInst
>(&CB
)) {
684 if (willNotOverflow(SI
, LVI
))
685 return processSaturatingInst(SI
, LVI
);
688 bool Changed
= false;
690 // Deopt bundle operands are intended to capture state with minimal
691 // perturbance of the code otherwise. If we can find a constant value for
692 // any such operand and remove a use of the original value, that's
693 // desireable since it may allow further optimization of that value (e.g. via
694 // single use rules in instcombine). Since deopt uses tend to,
695 // idiomatically, appear along rare conditional paths, it's reasonable likely
696 // we may have a conditional fact with which LVI can fold.
697 if (auto DeoptBundle
= CB
.getOperandBundle(LLVMContext::OB_deopt
)) {
698 for (const Use
&ConstU
: DeoptBundle
->Inputs
) {
699 Use
&U
= const_cast<Use
&>(ConstU
);
701 if (V
->getType()->isVectorTy()) continue;
702 if (isa
<Constant
>(V
)) continue;
704 Constant
*C
= LVI
->getConstant(V
, &CB
);
711 SmallVector
<unsigned, 4> ArgNos
;
714 for (Value
*V
: CB
.args()) {
715 PointerType
*Type
= dyn_cast
<PointerType
>(V
->getType());
716 // Try to mark pointer typed parameters as non-null. We skip the
717 // relatively expensive analysis for constants which are obviously either
718 // null or non-null to start with.
719 if (Type
&& !CB
.paramHasAttr(ArgNo
, Attribute::NonNull
) &&
721 if (auto *Res
= dyn_cast_or_null
<ConstantInt
>(LVI
->getPredicateAt(
722 ICmpInst::ICMP_EQ
, V
, ConstantPointerNull::get(Type
), &CB
,
723 /*UseBlockValue=*/false));
724 Res
&& Res
->isZero())
725 ArgNos
.push_back(ArgNo
);
729 assert(ArgNo
== CB
.arg_size() && "Call arguments not processed correctly.");
734 NumNonNull
+= ArgNos
.size();
735 AttributeList AS
= CB
.getAttributes();
736 LLVMContext
&Ctx
= CB
.getContext();
737 AS
= AS
.addParamAttribute(Ctx
, ArgNos
,
738 Attribute::get(Ctx
, Attribute::NonNull
));
739 CB
.setAttributes(AS
);
744 enum class Domain
{ NonNegative
, NonPositive
, Unknown
};
746 static Domain
getDomain(const ConstantRange
&CR
) {
747 if (CR
.isAllNonNegative())
748 return Domain::NonNegative
;
749 if (CR
.icmp(ICmpInst::ICMP_SLE
, APInt::getZero(CR
.getBitWidth())))
750 return Domain::NonPositive
;
751 return Domain::Unknown
;
754 /// Try to shrink a sdiv/srem's width down to the smallest power of two that's
755 /// sufficient to contain its operands.
756 static bool narrowSDivOrSRem(BinaryOperator
*Instr
, const ConstantRange
&LCR
,
757 const ConstantRange
&RCR
) {
758 assert(Instr
->getOpcode() == Instruction::SDiv
||
759 Instr
->getOpcode() == Instruction::SRem
);
761 // Find the smallest power of two bitwidth that's sufficient to hold Instr's
763 unsigned OrigWidth
= Instr
->getType()->getScalarSizeInBits();
765 // What is the smallest bit width that can accommodate the entire value ranges
766 // of both of the operands?
767 unsigned MinSignedBits
=
768 std::max(LCR
.getMinSignedBits(), RCR
.getMinSignedBits());
770 // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
771 // prove that such a combination is impossible, we need to bump the bitwidth.
772 if (RCR
.contains(APInt::getAllOnes(OrigWidth
)) &&
773 LCR
.contains(APInt::getSignedMinValue(MinSignedBits
).sext(OrigWidth
)))
776 // Don't shrink below 8 bits wide.
777 unsigned NewWidth
= std::max
<unsigned>(PowerOf2Ceil(MinSignedBits
), 8);
779 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
781 if (NewWidth
>= OrigWidth
)
784 ++NumSDivSRemsNarrowed
;
785 IRBuilder
<> B
{Instr
};
786 auto *TruncTy
= Instr
->getType()->getWithNewBitWidth(NewWidth
);
787 auto *LHS
= B
.CreateTruncOrBitCast(Instr
->getOperand(0), TruncTy
,
788 Instr
->getName() + ".lhs.trunc");
789 auto *RHS
= B
.CreateTruncOrBitCast(Instr
->getOperand(1), TruncTy
,
790 Instr
->getName() + ".rhs.trunc");
791 auto *BO
= B
.CreateBinOp(Instr
->getOpcode(), LHS
, RHS
, Instr
->getName());
792 auto *Sext
= B
.CreateSExt(BO
, Instr
->getType(), Instr
->getName() + ".sext");
793 if (auto *BinOp
= dyn_cast
<BinaryOperator
>(BO
))
794 if (BinOp
->getOpcode() == Instruction::SDiv
)
795 BinOp
->setIsExact(Instr
->isExact());
797 Instr
->replaceAllUsesWith(Sext
);
798 Instr
->eraseFromParent();
802 static bool expandUDivOrURem(BinaryOperator
*Instr
, const ConstantRange
&XCR
,
803 const ConstantRange
&YCR
) {
804 Type
*Ty
= Instr
->getType();
805 assert(Instr
->getOpcode() == Instruction::UDiv
||
806 Instr
->getOpcode() == Instruction::URem
);
807 bool IsRem
= Instr
->getOpcode() == Instruction::URem
;
809 Value
*X
= Instr
->getOperand(0);
810 Value
*Y
= Instr
->getOperand(1);
812 // X u/ Y -> 0 iff X u< Y
813 // X u% Y -> X iff X u< Y
814 if (XCR
.icmp(ICmpInst::ICMP_ULT
, YCR
)) {
815 Instr
->replaceAllUsesWith(IsRem
? X
: Constant::getNullValue(Ty
));
816 Instr
->eraseFromParent();
817 ++NumUDivURemsNarrowedExpanded
;
823 // We can represent the modulo operation as a loop/self-recursion:
829 // ret urem_rec(Z, Y)
830 // which isn't better, but if we only need a single iteration
831 // to compute the answer, this becomes quite good:
832 // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation)
833 // Now, we do not care about all full multiples of Y in X, they do not change
834 // the answer, thus we could rewrite the expression as:
835 // X* = X - (Y * |_ X / Y _|)
837 // so we don't need the *first* iteration to return, we just need to
838 // know *which* iteration will always return, so we could also rewrite it as:
839 // X* = X - (Y * |_ X / Y _|)
840 // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation)
841 // but that does not seem profitable here.
843 // Even if we don't know X's range, the divisor may be so large, X can't ever
844 // be 2x larger than that. I.e. if divisor is always negative.
845 if (!XCR
.icmp(ICmpInst::ICMP_ULT
, YCR
.uadd_sat(YCR
)) && !YCR
.isAllNegative())
848 IRBuilder
<> B(Instr
);
850 if (XCR
.icmp(ICmpInst::ICMP_UGE
, YCR
)) {
851 // If X is between Y and 2*Y the result is known.
853 ExpandedOp
= B
.CreateNUWSub(X
, Y
);
855 ExpandedOp
= ConstantInt::get(Instr
->getType(), 1);
857 // NOTE: this transformation introduces two uses of X,
858 // but it may be undef so we must freeze it first.
860 if (!isGuaranteedNotToBeUndef(X
))
861 FrozenX
= B
.CreateFreeze(X
, X
->getName() + ".frozen");
863 if (!isGuaranteedNotToBeUndef(Y
))
864 FrozenY
= B
.CreateFreeze(Y
, Y
->getName() + ".frozen");
865 auto *AdjX
= B
.CreateNUWSub(FrozenX
, FrozenY
, Instr
->getName() + ".urem");
866 auto *Cmp
= B
.CreateICmp(ICmpInst::ICMP_ULT
, FrozenX
, FrozenY
,
867 Instr
->getName() + ".cmp");
868 ExpandedOp
= B
.CreateSelect(Cmp
, FrozenX
, AdjX
);
871 B
.CreateICmp(ICmpInst::ICMP_UGE
, X
, Y
, Instr
->getName() + ".cmp");
872 ExpandedOp
= B
.CreateZExt(Cmp
, Ty
, Instr
->getName() + ".udiv");
874 ExpandedOp
->takeName(Instr
);
875 Instr
->replaceAllUsesWith(ExpandedOp
);
876 Instr
->eraseFromParent();
877 ++NumUDivURemsNarrowedExpanded
;
881 /// Try to shrink a udiv/urem's width down to the smallest power of two that's
882 /// sufficient to contain its operands.
883 static bool narrowUDivOrURem(BinaryOperator
*Instr
, const ConstantRange
&XCR
,
884 const ConstantRange
&YCR
) {
885 assert(Instr
->getOpcode() == Instruction::UDiv
||
886 Instr
->getOpcode() == Instruction::URem
);
888 // Find the smallest power of two bitwidth that's sufficient to hold Instr's
891 // What is the smallest bit width that can accommodate the entire value ranges
892 // of both of the operands?
893 unsigned MaxActiveBits
= std::max(XCR
.getActiveBits(), YCR
.getActiveBits());
894 // Don't shrink below 8 bits wide.
895 unsigned NewWidth
= std::max
<unsigned>(PowerOf2Ceil(MaxActiveBits
), 8);
897 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
899 if (NewWidth
>= Instr
->getType()->getScalarSizeInBits())
902 ++NumUDivURemsNarrowed
;
903 IRBuilder
<> B
{Instr
};
904 auto *TruncTy
= Instr
->getType()->getWithNewBitWidth(NewWidth
);
905 auto *LHS
= B
.CreateTruncOrBitCast(Instr
->getOperand(0), TruncTy
,
906 Instr
->getName() + ".lhs.trunc");
907 auto *RHS
= B
.CreateTruncOrBitCast(Instr
->getOperand(1), TruncTy
,
908 Instr
->getName() + ".rhs.trunc");
909 auto *BO
= B
.CreateBinOp(Instr
->getOpcode(), LHS
, RHS
, Instr
->getName());
910 auto *Zext
= B
.CreateZExt(BO
, Instr
->getType(), Instr
->getName() + ".zext");
911 if (auto *BinOp
= dyn_cast
<BinaryOperator
>(BO
))
912 if (BinOp
->getOpcode() == Instruction::UDiv
)
913 BinOp
->setIsExact(Instr
->isExact());
915 Instr
->replaceAllUsesWith(Zext
);
916 Instr
->eraseFromParent();
920 static bool processUDivOrURem(BinaryOperator
*Instr
, LazyValueInfo
*LVI
) {
921 assert(Instr
->getOpcode() == Instruction::UDiv
||
922 Instr
->getOpcode() == Instruction::URem
);
923 ConstantRange XCR
= LVI
->getConstantRangeAtUse(Instr
->getOperandUse(0),
924 /*UndefAllowed*/ false);
925 // Allow undef for RHS, as we can assume it is division by zero UB.
926 ConstantRange YCR
= LVI
->getConstantRangeAtUse(Instr
->getOperandUse(1),
927 /*UndefAllowed*/ true);
928 if (expandUDivOrURem(Instr
, XCR
, YCR
))
931 return narrowUDivOrURem(Instr
, XCR
, YCR
);
934 static bool processSRem(BinaryOperator
*SDI
, const ConstantRange
&LCR
,
935 const ConstantRange
&RCR
, LazyValueInfo
*LVI
) {
936 assert(SDI
->getOpcode() == Instruction::SRem
);
938 if (LCR
.abs().icmp(CmpInst::ICMP_ULT
, RCR
.abs())) {
939 SDI
->replaceAllUsesWith(SDI
->getOperand(0));
940 SDI
->eraseFromParent();
948 std::array
<Operand
, 2> Ops
= {{{SDI
->getOperand(0), getDomain(LCR
)},
949 {SDI
->getOperand(1), getDomain(RCR
)}}};
950 if (Ops
[0].D
== Domain::Unknown
|| Ops
[1].D
== Domain::Unknown
)
953 // We know domains of both of the operands!
956 // We need operands to be non-negative, so negate each one that isn't.
957 for (Operand
&Op
: Ops
) {
958 if (Op
.D
== Domain::NonNegative
)
960 auto *BO
= BinaryOperator::CreateNeg(Op
.V
, Op
.V
->getName() + ".nonneg",
962 BO
->setDebugLoc(SDI
->getDebugLoc());
966 auto *URem
= BinaryOperator::CreateURem(Ops
[0].V
, Ops
[1].V
, SDI
->getName(),
968 URem
->setDebugLoc(SDI
->getDebugLoc());
972 // If the divident was non-positive, we need to negate the result.
973 if (Ops
[0].D
== Domain::NonPositive
) {
974 Res
= BinaryOperator::CreateNeg(Res
, Res
->getName() + ".neg",
976 Res
->setDebugLoc(SDI
->getDebugLoc());
979 SDI
->replaceAllUsesWith(Res
);
980 SDI
->eraseFromParent();
982 // Try to simplify our new urem.
983 processUDivOrURem(URem
, LVI
);
988 /// See if LazyValueInfo's ability to exploit edge conditions or range
989 /// information is sufficient to prove the signs of both operands of this SDiv.
990 /// If this is the case, replace the SDiv with a UDiv. Even for local
991 /// conditions, this can sometimes prove conditions instcombine can't by
992 /// exploiting range information.
993 static bool processSDiv(BinaryOperator
*SDI
, const ConstantRange
&LCR
,
994 const ConstantRange
&RCR
, LazyValueInfo
*LVI
) {
995 assert(SDI
->getOpcode() == Instruction::SDiv
);
997 // Check whether the division folds to a constant.
998 ConstantRange DivCR
= LCR
.sdiv(RCR
);
999 if (const APInt
*Elem
= DivCR
.getSingleElement()) {
1000 SDI
->replaceAllUsesWith(ConstantInt::get(SDI
->getType(), *Elem
));
1001 SDI
->eraseFromParent();
1009 std::array
<Operand
, 2> Ops
= {{{SDI
->getOperand(0), getDomain(LCR
)},
1010 {SDI
->getOperand(1), getDomain(RCR
)}}};
1011 if (Ops
[0].D
== Domain::Unknown
|| Ops
[1].D
== Domain::Unknown
)
1014 // We know domains of both of the operands!
1017 // We need operands to be non-negative, so negate each one that isn't.
1018 for (Operand
&Op
: Ops
) {
1019 if (Op
.D
== Domain::NonNegative
)
1021 auto *BO
= BinaryOperator::CreateNeg(Op
.V
, Op
.V
->getName() + ".nonneg",
1022 SDI
->getIterator());
1023 BO
->setDebugLoc(SDI
->getDebugLoc());
1027 auto *UDiv
= BinaryOperator::CreateUDiv(Ops
[0].V
, Ops
[1].V
, SDI
->getName(),
1028 SDI
->getIterator());
1029 UDiv
->setDebugLoc(SDI
->getDebugLoc());
1030 UDiv
->setIsExact(SDI
->isExact());
1034 // If the operands had two different domains, we need to negate the result.
1035 if (Ops
[0].D
!= Ops
[1].D
) {
1036 Res
= BinaryOperator::CreateNeg(Res
, Res
->getName() + ".neg",
1037 SDI
->getIterator());
1038 Res
->setDebugLoc(SDI
->getDebugLoc());
1041 SDI
->replaceAllUsesWith(Res
);
1042 SDI
->eraseFromParent();
1044 // Try to simplify our new udiv.
1045 processUDivOrURem(UDiv
, LVI
);
1050 static bool processSDivOrSRem(BinaryOperator
*Instr
, LazyValueInfo
*LVI
) {
1051 assert(Instr
->getOpcode() == Instruction::SDiv
||
1052 Instr
->getOpcode() == Instruction::SRem
);
1054 LVI
->getConstantRangeAtUse(Instr
->getOperandUse(0), /*AllowUndef*/ false);
1055 // Allow undef for RHS, as we can assume it is division by zero UB.
1057 LVI
->getConstantRangeAtUse(Instr
->getOperandUse(1), /*AlloweUndef*/ true);
1058 if (Instr
->getOpcode() == Instruction::SDiv
)
1059 if (processSDiv(Instr
, LCR
, RCR
, LVI
))
1062 if (Instr
->getOpcode() == Instruction::SRem
) {
1063 if (processSRem(Instr
, LCR
, RCR
, LVI
))
1067 return narrowSDivOrSRem(Instr
, LCR
, RCR
);
1070 static bool processAShr(BinaryOperator
*SDI
, LazyValueInfo
*LVI
) {
1071 ConstantRange LRange
=
1072 LVI
->getConstantRangeAtUse(SDI
->getOperandUse(0), /*UndefAllowed*/ false);
1073 unsigned OrigWidth
= SDI
->getType()->getScalarSizeInBits();
1074 ConstantRange NegOneOrZero
=
1075 ConstantRange(APInt(OrigWidth
, (uint64_t)-1, true), APInt(OrigWidth
, 1));
1076 if (NegOneOrZero
.contains(LRange
)) {
1077 // ashr of -1 or 0 never changes the value, so drop the whole instruction
1079 SDI
->replaceAllUsesWith(SDI
->getOperand(0));
1080 SDI
->eraseFromParent();
1084 if (!LRange
.isAllNonNegative())
1087 ++NumAShrsConverted
;
1088 auto *BO
= BinaryOperator::CreateLShr(SDI
->getOperand(0), SDI
->getOperand(1),
1089 "", SDI
->getIterator());
1091 BO
->setDebugLoc(SDI
->getDebugLoc());
1092 BO
->setIsExact(SDI
->isExact());
1093 SDI
->replaceAllUsesWith(BO
);
1094 SDI
->eraseFromParent();
1099 static bool processSExt(SExtInst
*SDI
, LazyValueInfo
*LVI
) {
1100 const Use
&Base
= SDI
->getOperandUse(0);
1101 if (!LVI
->getConstantRangeAtUse(Base
, /*UndefAllowed*/ false)
1102 .isAllNonNegative())
1106 auto *ZExt
= CastInst::CreateZExtOrBitCast(Base
, SDI
->getType(), "",
1107 SDI
->getIterator());
1108 ZExt
->takeName(SDI
);
1109 ZExt
->setDebugLoc(SDI
->getDebugLoc());
1111 SDI
->replaceAllUsesWith(ZExt
);
1112 SDI
->eraseFromParent();
1117 static bool processPossibleNonNeg(PossiblyNonNegInst
*I
, LazyValueInfo
*LVI
) {
1121 const Use
&Base
= I
->getOperandUse(0);
1122 if (!LVI
->getConstantRangeAtUse(Base
, /*UndefAllowed*/ false)
1123 .isAllNonNegative())
1132 static bool processZExt(ZExtInst
*ZExt
, LazyValueInfo
*LVI
) {
1133 return processPossibleNonNeg(cast
<PossiblyNonNegInst
>(ZExt
), LVI
);
1136 static bool processUIToFP(UIToFPInst
*UIToFP
, LazyValueInfo
*LVI
) {
1137 return processPossibleNonNeg(cast
<PossiblyNonNegInst
>(UIToFP
), LVI
);
1140 static bool processSIToFP(SIToFPInst
*SIToFP
, LazyValueInfo
*LVI
) {
1141 const Use
&Base
= SIToFP
->getOperandUse(0);
1142 if (!LVI
->getConstantRangeAtUse(Base
, /*UndefAllowed*/ false)
1143 .isAllNonNegative())
1147 auto *UIToFP
= CastInst::Create(Instruction::UIToFP
, Base
, SIToFP
->getType(),
1148 "", SIToFP
->getIterator());
1149 UIToFP
->takeName(SIToFP
);
1150 UIToFP
->setDebugLoc(SIToFP
->getDebugLoc());
1151 UIToFP
->setNonNeg();
1152 SIToFP
->replaceAllUsesWith(UIToFP
);
1153 SIToFP
->eraseFromParent();
1158 static bool processBinOp(BinaryOperator
*BinOp
, LazyValueInfo
*LVI
) {
1159 using OBO
= OverflowingBinaryOperator
;
1161 bool NSW
= BinOp
->hasNoSignedWrap();
1162 bool NUW
= BinOp
->hasNoUnsignedWrap();
1166 Instruction::BinaryOps Opcode
= BinOp
->getOpcode();
1167 ConstantRange LRange
= LVI
->getConstantRangeAtUse(BinOp
->getOperandUse(0),
1168 /*UndefAllowed=*/false);
1169 ConstantRange RRange
= LVI
->getConstantRangeAtUse(BinOp
->getOperandUse(1),
1170 /*UndefAllowed=*/false);
1172 bool Changed
= false;
1173 bool NewNUW
= false, NewNSW
= false;
1175 ConstantRange NUWRange
= ConstantRange::makeGuaranteedNoWrapRegion(
1176 Opcode
, RRange
, OBO::NoUnsignedWrap
);
1177 NewNUW
= NUWRange
.contains(LRange
);
1181 ConstantRange NSWRange
= ConstantRange::makeGuaranteedNoWrapRegion(
1182 Opcode
, RRange
, OBO::NoSignedWrap
);
1183 NewNSW
= NSWRange
.contains(LRange
);
1187 setDeducedOverflowingFlags(BinOp
, Opcode
, NewNSW
, NewNUW
);
1192 static bool processAnd(BinaryOperator
*BinOp
, LazyValueInfo
*LVI
) {
1193 using namespace llvm::PatternMatch
;
1195 // Pattern match (and lhs, C) where C includes a superset of bits which might
1196 // be set in lhs. This is a common truncation idiom created by instcombine.
1197 const Use
&LHS
= BinOp
->getOperandUse(0);
1199 if (!match(BinOp
->getOperand(1), m_LowBitMask(RHS
)))
1202 // We can only replace the AND with LHS based on range info if the range does
1203 // not include undef.
1204 ConstantRange LRange
=
1205 LVI
->getConstantRangeAtUse(LHS
, /*UndefAllowed=*/false);
1206 if (!LRange
.getUnsignedMax().ule(*RHS
))
1209 BinOp
->replaceAllUsesWith(LHS
);
1210 BinOp
->eraseFromParent();
1215 static bool runImpl(Function
&F
, LazyValueInfo
*LVI
, DominatorTree
*DT
,
1216 const SimplifyQuery
&SQ
) {
1217 bool FnChanged
= false;
1218 std::optional
<ConstantRange
> RetRange
;
1219 if (F
.hasExactDefinition() && F
.getReturnType()->isIntOrIntVectorTy())
1221 ConstantRange::getEmpty(F
.getReturnType()->getScalarSizeInBits());
1223 // Visiting in a pre-order depth-first traversal causes us to simplify early
1224 // blocks before querying later blocks (which require us to analyze early
1225 // blocks). Eagerly simplifying shallow blocks means there is strictly less
1226 // work to do for deep blocks. This also means we don't visit unreachable
1228 for (BasicBlock
*BB
: depth_first(&F
.getEntryBlock())) {
1229 bool BBChanged
= false;
1230 for (Instruction
&II
: llvm::make_early_inc_range(*BB
)) {
1231 switch (II
.getOpcode()) {
1232 case Instruction::Select
:
1233 BBChanged
|= processSelect(cast
<SelectInst
>(&II
), LVI
);
1235 case Instruction::PHI
:
1236 BBChanged
|= processPHI(cast
<PHINode
>(&II
), LVI
, DT
, SQ
);
1238 case Instruction::ICmp
:
1239 case Instruction::FCmp
:
1240 BBChanged
|= processCmp(cast
<CmpInst
>(&II
), LVI
);
1242 case Instruction::Call
:
1243 case Instruction::Invoke
:
1244 BBChanged
|= processCallSite(cast
<CallBase
>(II
), LVI
);
1246 case Instruction::SRem
:
1247 case Instruction::SDiv
:
1248 BBChanged
|= processSDivOrSRem(cast
<BinaryOperator
>(&II
), LVI
);
1250 case Instruction::UDiv
:
1251 case Instruction::URem
:
1252 BBChanged
|= processUDivOrURem(cast
<BinaryOperator
>(&II
), LVI
);
1254 case Instruction::AShr
:
1255 BBChanged
|= processAShr(cast
<BinaryOperator
>(&II
), LVI
);
1257 case Instruction::SExt
:
1258 BBChanged
|= processSExt(cast
<SExtInst
>(&II
), LVI
);
1260 case Instruction::ZExt
:
1261 BBChanged
|= processZExt(cast
<ZExtInst
>(&II
), LVI
);
1263 case Instruction::UIToFP
:
1264 BBChanged
|= processUIToFP(cast
<UIToFPInst
>(&II
), LVI
);
1266 case Instruction::SIToFP
:
1267 BBChanged
|= processSIToFP(cast
<SIToFPInst
>(&II
), LVI
);
1269 case Instruction::Add
:
1270 case Instruction::Sub
:
1271 case Instruction::Mul
:
1272 case Instruction::Shl
:
1273 BBChanged
|= processBinOp(cast
<BinaryOperator
>(&II
), LVI
);
1275 case Instruction::And
:
1276 BBChanged
|= processAnd(cast
<BinaryOperator
>(&II
), LVI
);
1281 Instruction
*Term
= BB
->getTerminator();
1282 switch (Term
->getOpcode()) {
1283 case Instruction::Switch
:
1284 BBChanged
|= processSwitch(cast
<SwitchInst
>(Term
), LVI
, DT
);
1286 case Instruction::Ret
: {
1287 auto *RI
= cast
<ReturnInst
>(Term
);
1288 // Try to determine the return value if we can. This is mainly here to
1289 // simplify the writing of unit tests, but also helps to enable IPO by
1290 // constant folding the return values of callees.
1291 auto *RetVal
= RI
->getReturnValue();
1292 if (!RetVal
) break; // handle "ret void"
1293 if (RetRange
&& !RetRange
->isFullSet())
1295 RetRange
->unionWith(LVI
->getConstantRange(RetVal
, RI
,
1296 /*UndefAllowed=*/false));
1298 if (isa
<Constant
>(RetVal
)) break; // nothing to do
1299 if (auto *C
= getConstantAt(RetVal
, RI
, LVI
)) {
1301 RI
->replaceUsesOfWith(RetVal
, C
);
1307 FnChanged
|= BBChanged
;
1310 // Infer range attribute on return value.
1311 if (RetRange
&& !RetRange
->isFullSet()) {
1312 Attribute RangeAttr
= F
.getRetAttribute(Attribute::Range
);
1313 if (RangeAttr
.isValid())
1314 RetRange
= RetRange
->intersectWith(RangeAttr
.getRange());
1315 // Don't add attribute for constant integer returns to reduce noise. These
1316 // are propagated across functions by IPSCCP.
1317 if (!RetRange
->isEmptySet() && !RetRange
->isSingleElement()) {
1318 F
.addRangeRetAttr(*RetRange
);
1326 CorrelatedValuePropagationPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1327 LazyValueInfo
*LVI
= &AM
.getResult
<LazyValueAnalysis
>(F
);
1328 DominatorTree
*DT
= &AM
.getResult
<DominatorTreeAnalysis
>(F
);
1330 bool Changed
= runImpl(F
, LVI
, DT
, getBestSimplifyQuery(AM
, F
));
1332 PreservedAnalyses PA
;
1334 PA
= PreservedAnalyses::all();
1336 #if defined(EXPENSIVE_CHECKS)
1337 assert(DT
->verify(DominatorTree::VerificationLevel::Full
));
1339 assert(DT
->verify(DominatorTree::VerificationLevel::Fast
));
1340 #endif // EXPENSIVE_CHECKS
1342 PA
.preserve
<DominatorTreeAnalysis
>();
1343 PA
.preserve
<LazyValueAnalysis
>();
1346 // Keeping LVI alive is expensive, both because it uses a lot of memory, and
1347 // because invalidating values in LVI is expensive. While CVP does preserve
1348 // LVI, we know that passes after JumpThreading+CVP will not need the result
1349 // of this analysis, so we forcefully discard it early.
1350 PA
.abandon
<LazyValueAnalysis
>();