1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains routines that help analyze properties that chains of
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/ValueTracking.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/Analysis/AssumptionCache.h"
28 #include "llvm/Analysis/GuardUtils.h"
29 #include "llvm/Analysis/InstructionSimplify.h"
30 #include "llvm/Analysis/Loads.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/Analysis/TargetLibraryInfo.h"
34 #include "llvm/IR/Argument.h"
35 #include "llvm/IR/Attributes.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CallSite.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/ConstantRange.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GetElementPtrTypeIterator.h"
47 #include "llvm/IR/GlobalAlias.h"
48 #include "llvm/IR/GlobalValue.h"
49 #include "llvm/IR/GlobalVariable.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/Operator.h"
59 #include "llvm/IR/PatternMatch.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/User.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CommandLine.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/KnownBits.h"
68 #include "llvm/Support/MathExtras.h"
77 using namespace llvm::PatternMatch
;
79 const unsigned MaxDepth
= 6;
81 // Controls the number of uses of the value searched for possible
82 // dominating comparisons.
83 static cl::opt
<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
84 cl::Hidden
, cl::init(20));
86 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
87 /// returns the element type's bitwidth.
88 static unsigned getBitWidth(Type
*Ty
, const DataLayout
&DL
) {
89 if (unsigned BitWidth
= Ty
->getScalarSizeInBits())
92 return DL
.getIndexTypeSizeInBits(Ty
);
97 // Simplifying using an assume can only be done in a particular control-flow
98 // context (the context instruction provides that context). If an assume and
99 // the context instruction are not in the same block then the DT helps in
100 // figuring out if we can use it.
102 const DataLayout
&DL
;
104 const Instruction
*CxtI
;
105 const DominatorTree
*DT
;
107 // Unlike the other analyses, this may be a nullptr because not all clients
108 // provide it currently.
109 OptimizationRemarkEmitter
*ORE
;
111 /// Set of assumptions that should be excluded from further queries.
112 /// This is because of the potential for mutual recursion to cause
113 /// computeKnownBits to repeatedly visit the same assume intrinsic. The
114 /// classic case of this is assume(x = y), which will attempt to determine
115 /// bits in x from bits in y, which will attempt to determine bits in y from
116 /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
117 /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
118 /// (all of which can call computeKnownBits), and so on.
119 std::array
<const Value
*, MaxDepth
> Excluded
;
121 /// If true, it is safe to use metadata during simplification.
124 unsigned NumExcluded
= 0;
126 Query(const DataLayout
&DL
, AssumptionCache
*AC
, const Instruction
*CxtI
,
127 const DominatorTree
*DT
, bool UseInstrInfo
,
128 OptimizationRemarkEmitter
*ORE
= nullptr)
129 : DL(DL
), AC(AC
), CxtI(CxtI
), DT(DT
), ORE(ORE
), IIQ(UseInstrInfo
) {}
131 Query(const Query
&Q
, const Value
*NewExcl
)
132 : DL(Q
.DL
), AC(Q
.AC
), CxtI(Q
.CxtI
), DT(Q
.DT
), ORE(Q
.ORE
), IIQ(Q
.IIQ
),
133 NumExcluded(Q
.NumExcluded
) {
134 Excluded
= Q
.Excluded
;
135 Excluded
[NumExcluded
++] = NewExcl
;
136 assert(NumExcluded
<= Excluded
.size());
139 bool isExcluded(const Value
*Value
) const {
140 if (NumExcluded
== 0)
142 auto End
= Excluded
.begin() + NumExcluded
;
143 return std::find(Excluded
.begin(), End
, Value
) != End
;
147 } // end anonymous namespace
149 // Given the provided Value and, potentially, a context instruction, return
150 // the preferred context instruction (if any).
151 static const Instruction
*safeCxtI(const Value
*V
, const Instruction
*CxtI
) {
152 // If we've been provided with a context instruction, then use that (provided
153 // it has been inserted).
154 if (CxtI
&& CxtI
->getParent())
157 // If the value is really an already-inserted instruction, then use that.
158 CxtI
= dyn_cast
<Instruction
>(V
);
159 if (CxtI
&& CxtI
->getParent())
165 static void computeKnownBits(const Value
*V
, KnownBits
&Known
,
166 unsigned Depth
, const Query
&Q
);
168 void llvm::computeKnownBits(const Value
*V
, KnownBits
&Known
,
169 const DataLayout
&DL
, unsigned Depth
,
170 AssumptionCache
*AC
, const Instruction
*CxtI
,
171 const DominatorTree
*DT
,
172 OptimizationRemarkEmitter
*ORE
, bool UseInstrInfo
) {
173 ::computeKnownBits(V
, Known
, Depth
,
174 Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
, ORE
));
177 static KnownBits
computeKnownBits(const Value
*V
, unsigned Depth
,
180 KnownBits
llvm::computeKnownBits(const Value
*V
, const DataLayout
&DL
,
181 unsigned Depth
, AssumptionCache
*AC
,
182 const Instruction
*CxtI
,
183 const DominatorTree
*DT
,
184 OptimizationRemarkEmitter
*ORE
,
186 return ::computeKnownBits(
187 V
, Depth
, Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
, ORE
));
190 bool llvm::haveNoCommonBitsSet(const Value
*LHS
, const Value
*RHS
,
191 const DataLayout
&DL
, AssumptionCache
*AC
,
192 const Instruction
*CxtI
, const DominatorTree
*DT
,
194 assert(LHS
->getType() == RHS
->getType() &&
195 "LHS and RHS should have the same type");
196 assert(LHS
->getType()->isIntOrIntVectorTy() &&
197 "LHS and RHS should be integers");
198 // Look for an inverted mask: (X & ~M) op (Y & M).
200 if (match(LHS
, m_c_And(m_Not(m_Value(M
)), m_Value())) &&
201 match(RHS
, m_c_And(m_Specific(M
), m_Value())))
203 if (match(RHS
, m_c_And(m_Not(m_Value(M
)), m_Value())) &&
204 match(LHS
, m_c_And(m_Specific(M
), m_Value())))
206 IntegerType
*IT
= cast
<IntegerType
>(LHS
->getType()->getScalarType());
207 KnownBits
LHSKnown(IT
->getBitWidth());
208 KnownBits
RHSKnown(IT
->getBitWidth());
209 computeKnownBits(LHS
, LHSKnown
, DL
, 0, AC
, CxtI
, DT
, nullptr, UseInstrInfo
);
210 computeKnownBits(RHS
, RHSKnown
, DL
, 0, AC
, CxtI
, DT
, nullptr, UseInstrInfo
);
211 return (LHSKnown
.Zero
| RHSKnown
.Zero
).isAllOnesValue();
214 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction
*CxtI
) {
215 for (const User
*U
: CxtI
->users()) {
216 if (const ICmpInst
*IC
= dyn_cast
<ICmpInst
>(U
))
217 if (IC
->isEquality())
218 if (Constant
*C
= dyn_cast
<Constant
>(IC
->getOperand(1)))
219 if (C
->isNullValue())
226 static bool isKnownToBeAPowerOfTwo(const Value
*V
, bool OrZero
, unsigned Depth
,
229 bool llvm::isKnownToBeAPowerOfTwo(const Value
*V
, const DataLayout
&DL
,
230 bool OrZero
, unsigned Depth
,
231 AssumptionCache
*AC
, const Instruction
*CxtI
,
232 const DominatorTree
*DT
, bool UseInstrInfo
) {
233 return ::isKnownToBeAPowerOfTwo(
234 V
, OrZero
, Depth
, Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
));
237 static bool isKnownNonZero(const Value
*V
, unsigned Depth
, const Query
&Q
);
239 bool llvm::isKnownNonZero(const Value
*V
, const DataLayout
&DL
, unsigned Depth
,
240 AssumptionCache
*AC
, const Instruction
*CxtI
,
241 const DominatorTree
*DT
, bool UseInstrInfo
) {
242 return ::isKnownNonZero(V
, Depth
,
243 Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
));
246 bool llvm::isKnownNonNegative(const Value
*V
, const DataLayout
&DL
,
247 unsigned Depth
, AssumptionCache
*AC
,
248 const Instruction
*CxtI
, const DominatorTree
*DT
,
251 computeKnownBits(V
, DL
, Depth
, AC
, CxtI
, DT
, nullptr, UseInstrInfo
);
252 return Known
.isNonNegative();
255 bool llvm::isKnownPositive(const Value
*V
, const DataLayout
&DL
, unsigned Depth
,
256 AssumptionCache
*AC
, const Instruction
*CxtI
,
257 const DominatorTree
*DT
, bool UseInstrInfo
) {
258 if (auto *CI
= dyn_cast
<ConstantInt
>(V
))
259 return CI
->getValue().isStrictlyPositive();
261 // TODO: We'd doing two recursive queries here. We should factor this such
262 // that only a single query is needed.
263 return isKnownNonNegative(V
, DL
, Depth
, AC
, CxtI
, DT
, UseInstrInfo
) &&
264 isKnownNonZero(V
, DL
, Depth
, AC
, CxtI
, DT
, UseInstrInfo
);
267 bool llvm::isKnownNegative(const Value
*V
, const DataLayout
&DL
, unsigned Depth
,
268 AssumptionCache
*AC
, const Instruction
*CxtI
,
269 const DominatorTree
*DT
, bool UseInstrInfo
) {
271 computeKnownBits(V
, DL
, Depth
, AC
, CxtI
, DT
, nullptr, UseInstrInfo
);
272 return Known
.isNegative();
275 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
, const Query
&Q
);
277 bool llvm::isKnownNonEqual(const Value
*V1
, const Value
*V2
,
278 const DataLayout
&DL
, AssumptionCache
*AC
,
279 const Instruction
*CxtI
, const DominatorTree
*DT
,
281 return ::isKnownNonEqual(V1
, V2
,
282 Query(DL
, AC
, safeCxtI(V1
, safeCxtI(V2
, CxtI
)), DT
,
283 UseInstrInfo
, /*ORE=*/nullptr));
286 static bool MaskedValueIsZero(const Value
*V
, const APInt
&Mask
, unsigned Depth
,
289 bool llvm::MaskedValueIsZero(const Value
*V
, const APInt
&Mask
,
290 const DataLayout
&DL
, unsigned Depth
,
291 AssumptionCache
*AC
, const Instruction
*CxtI
,
292 const DominatorTree
*DT
, bool UseInstrInfo
) {
293 return ::MaskedValueIsZero(
294 V
, Mask
, Depth
, Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
));
297 static unsigned ComputeNumSignBits(const Value
*V
, unsigned Depth
,
300 unsigned llvm::ComputeNumSignBits(const Value
*V
, const DataLayout
&DL
,
301 unsigned Depth
, AssumptionCache
*AC
,
302 const Instruction
*CxtI
,
303 const DominatorTree
*DT
, bool UseInstrInfo
) {
304 return ::ComputeNumSignBits(
305 V
, Depth
, Query(DL
, AC
, safeCxtI(V
, CxtI
), DT
, UseInstrInfo
));
308 static void computeKnownBitsAddSub(bool Add
, const Value
*Op0
, const Value
*Op1
,
310 KnownBits
&KnownOut
, KnownBits
&Known2
,
311 unsigned Depth
, const Query
&Q
) {
312 unsigned BitWidth
= KnownOut
.getBitWidth();
314 // If an initial sequence of bits in the result is not needed, the
315 // corresponding bits in the operands are not needed.
316 KnownBits
LHSKnown(BitWidth
);
317 computeKnownBits(Op0
, LHSKnown
, Depth
+ 1, Q
);
318 computeKnownBits(Op1
, Known2
, Depth
+ 1, Q
);
320 KnownOut
= KnownBits::computeForAddSub(Add
, NSW
, LHSKnown
, Known2
);
323 static void computeKnownBitsMul(const Value
*Op0
, const Value
*Op1
, bool NSW
,
324 KnownBits
&Known
, KnownBits
&Known2
,
325 unsigned Depth
, const Query
&Q
) {
326 unsigned BitWidth
= Known
.getBitWidth();
327 computeKnownBits(Op1
, Known
, Depth
+ 1, Q
);
328 computeKnownBits(Op0
, Known2
, Depth
+ 1, Q
);
330 bool isKnownNegative
= false;
331 bool isKnownNonNegative
= false;
332 // If the multiplication is known not to overflow, compute the sign bit.
335 // The product of a number with itself is non-negative.
336 isKnownNonNegative
= true;
338 bool isKnownNonNegativeOp1
= Known
.isNonNegative();
339 bool isKnownNonNegativeOp0
= Known2
.isNonNegative();
340 bool isKnownNegativeOp1
= Known
.isNegative();
341 bool isKnownNegativeOp0
= Known2
.isNegative();
342 // The product of two numbers with the same sign is non-negative.
343 isKnownNonNegative
= (isKnownNegativeOp1
&& isKnownNegativeOp0
) ||
344 (isKnownNonNegativeOp1
&& isKnownNonNegativeOp0
);
345 // The product of a negative number and a non-negative number is either
347 if (!isKnownNonNegative
)
348 isKnownNegative
= (isKnownNegativeOp1
&& isKnownNonNegativeOp0
&&
349 isKnownNonZero(Op0
, Depth
, Q
)) ||
350 (isKnownNegativeOp0
&& isKnownNonNegativeOp1
&&
351 isKnownNonZero(Op1
, Depth
, Q
));
355 assert(!Known
.hasConflict() && !Known2
.hasConflict());
356 // Compute a conservative estimate for high known-0 bits.
357 unsigned LeadZ
= std::max(Known
.countMinLeadingZeros() +
358 Known2
.countMinLeadingZeros(),
359 BitWidth
) - BitWidth
;
360 LeadZ
= std::min(LeadZ
, BitWidth
);
362 // The result of the bottom bits of an integer multiply can be
363 // inferred by looking at the bottom bits of both operands and
364 // multiplying them together.
365 // We can infer at least the minimum number of known trailing bits
366 // of both operands. Depending on number of trailing zeros, we can
367 // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming
368 // a and b are divisible by m and n respectively.
369 // We then calculate how many of those bits are inferrable and set
370 // the output. For example, the i8 mul:
373 // We know the bottom 3 bits are zero since the first can be divided by
374 // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4).
375 // Applying the multiplication to the trimmed arguments gets:
385 // Which allows us to infer the 2 LSBs. Since we're multiplying the result
386 // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits.
387 // The proof for this can be described as:
388 // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) &&
389 // (C7 == (1 << (umin(countTrailingZeros(C1), C5) +
390 // umin(countTrailingZeros(C2), C6) +
391 // umin(C5 - umin(countTrailingZeros(C1), C5),
392 // C6 - umin(countTrailingZeros(C2), C6)))) - 1)
393 // %aa = shl i8 %a, C5
394 // %bb = shl i8 %b, C6
395 // %aaa = or i8 %aa, C1
396 // %bbb = or i8 %bb, C2
397 // %mul = mul i8 %aaa, %bbb
398 // %mask = and i8 %mul, C7
400 // %mask = i8 ((C1*C2)&C7)
401 // Where C5, C6 describe the known bits of %a, %b
402 // C1, C2 describe the known bottom bits of %a, %b.
403 // C7 describes the mask of the known bits of the result.
404 APInt Bottom0
= Known
.One
;
405 APInt Bottom1
= Known2
.One
;
407 // How many times we'd be able to divide each argument by 2 (shr by 1).
408 // This gives us the number of trailing zeros on the multiplication result.
409 unsigned TrailBitsKnown0
= (Known
.Zero
| Known
.One
).countTrailingOnes();
410 unsigned TrailBitsKnown1
= (Known2
.Zero
| Known2
.One
).countTrailingOnes();
411 unsigned TrailZero0
= Known
.countMinTrailingZeros();
412 unsigned TrailZero1
= Known2
.countMinTrailingZeros();
413 unsigned TrailZ
= TrailZero0
+ TrailZero1
;
415 // Figure out the fewest known-bits operand.
416 unsigned SmallestOperand
= std::min(TrailBitsKnown0
- TrailZero0
,
417 TrailBitsKnown1
- TrailZero1
);
418 unsigned ResultBitsKnown
= std::min(SmallestOperand
+ TrailZ
, BitWidth
);
420 APInt BottomKnown
= Bottom0
.getLoBits(TrailBitsKnown0
) *
421 Bottom1
.getLoBits(TrailBitsKnown1
);
424 Known
.Zero
.setHighBits(LeadZ
);
425 Known
.Zero
|= (~BottomKnown
).getLoBits(ResultBitsKnown
);
426 Known
.One
|= BottomKnown
.getLoBits(ResultBitsKnown
);
428 // Only make use of no-wrap flags if we failed to compute the sign bit
429 // directly. This matters if the multiplication always overflows, in
430 // which case we prefer to follow the result of the direct computation,
431 // though as the program is invoking undefined behaviour we can choose
432 // whatever we like here.
433 if (isKnownNonNegative
&& !Known
.isNegative())
434 Known
.makeNonNegative();
435 else if (isKnownNegative
&& !Known
.isNonNegative())
436 Known
.makeNegative();
439 void llvm::computeKnownBitsFromRangeMetadata(const MDNode
&Ranges
,
441 unsigned BitWidth
= Known
.getBitWidth();
442 unsigned NumRanges
= Ranges
.getNumOperands() / 2;
443 assert(NumRanges
>= 1);
445 Known
.Zero
.setAllBits();
446 Known
.One
.setAllBits();
448 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
450 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 0));
452 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 1));
453 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
455 // The first CommonPrefixBits of all values in Range are equal.
456 unsigned CommonPrefixBits
=
457 (Range
.getUnsignedMax() ^ Range
.getUnsignedMin()).countLeadingZeros();
459 APInt Mask
= APInt::getHighBitsSet(BitWidth
, CommonPrefixBits
);
460 Known
.One
&= Range
.getUnsignedMax() & Mask
;
461 Known
.Zero
&= ~Range
.getUnsignedMax() & Mask
;
465 static bool isEphemeralValueOf(const Instruction
*I
, const Value
*E
) {
466 SmallVector
<const Value
*, 16> WorkSet(1, I
);
467 SmallPtrSet
<const Value
*, 32> Visited
;
468 SmallPtrSet
<const Value
*, 16> EphValues
;
470 // The instruction defining an assumption's condition itself is always
471 // considered ephemeral to that assumption (even if it has other
472 // non-ephemeral users). See r246696's test case for an example.
473 if (is_contained(I
->operands(), E
))
476 while (!WorkSet
.empty()) {
477 const Value
*V
= WorkSet
.pop_back_val();
478 if (!Visited
.insert(V
).second
)
481 // If all uses of this value are ephemeral, then so is this value.
482 if (llvm::all_of(V
->users(), [&](const User
*U
) {
483 return EphValues
.count(U
);
488 if (V
== I
|| isSafeToSpeculativelyExecute(V
)) {
490 if (const User
*U
= dyn_cast
<User
>(V
))
491 for (User::const_op_iterator J
= U
->op_begin(), JE
= U
->op_end();
493 WorkSet
.push_back(*J
);
501 // Is this an intrinsic that cannot be speculated but also cannot trap?
502 bool llvm::isAssumeLikeIntrinsic(const Instruction
*I
) {
503 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
))
504 if (Function
*F
= CI
->getCalledFunction())
505 switch (F
->getIntrinsicID()) {
507 // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
508 case Intrinsic::assume
:
509 case Intrinsic::sideeffect
:
510 case Intrinsic::dbg_declare
:
511 case Intrinsic::dbg_value
:
512 case Intrinsic::dbg_label
:
513 case Intrinsic::invariant_start
:
514 case Intrinsic::invariant_end
:
515 case Intrinsic::lifetime_start
:
516 case Intrinsic::lifetime_end
:
517 case Intrinsic::objectsize
:
518 case Intrinsic::ptr_annotation
:
519 case Intrinsic::var_annotation
:
526 bool llvm::isValidAssumeForContext(const Instruction
*Inv
,
527 const Instruction
*CxtI
,
528 const DominatorTree
*DT
) {
529 // There are two restrictions on the use of an assume:
530 // 1. The assume must dominate the context (or the control flow must
531 // reach the assume whenever it reaches the context).
532 // 2. The context must not be in the assume's set of ephemeral values
533 // (otherwise we will use the assume to prove that the condition
534 // feeding the assume is trivially true, thus causing the removal of
538 if (DT
->dominates(Inv
, CxtI
))
540 } else if (Inv
->getParent() == CxtI
->getParent()->getSinglePredecessor()) {
541 // We don't have a DT, but this trivially dominates.
545 // With or without a DT, the only remaining case we will check is if the
546 // instructions are in the same BB. Give up if that is not the case.
547 if (Inv
->getParent() != CxtI
->getParent())
550 // If we have a dom tree, then we now know that the assume doesn't dominate
551 // the other instruction. If we don't have a dom tree then we can check if
552 // the assume is first in the BB.
554 // Search forward from the assume until we reach the context (or the end
555 // of the block); the common case is that the assume will come first.
556 for (auto I
= std::next(BasicBlock::const_iterator(Inv
)),
557 IE
= Inv
->getParent()->end(); I
!= IE
; ++I
)
562 // The context comes first, but they're both in the same block. Make sure
563 // there is nothing in between that might interrupt the control flow.
564 for (BasicBlock::const_iterator I
=
565 std::next(BasicBlock::const_iterator(CxtI
)), IE(Inv
);
567 if (!isSafeToSpeculativelyExecute(&*I
) && !isAssumeLikeIntrinsic(&*I
))
570 return !isEphemeralValueOf(Inv
, CxtI
);
573 static void computeKnownBitsFromAssume(const Value
*V
, KnownBits
&Known
,
574 unsigned Depth
, const Query
&Q
) {
575 // Use of assumptions is context-sensitive. If we don't have a context, we
577 if (!Q
.AC
|| !Q
.CxtI
)
580 unsigned BitWidth
= Known
.getBitWidth();
582 // Note that the patterns below need to be kept in sync with the code
583 // in AssumptionCache::updateAffectedValues.
585 for (auto &AssumeVH
: Q
.AC
->assumptionsFor(V
)) {
588 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
589 assert(I
->getParent()->getParent() == Q
.CxtI
->getParent()->getParent() &&
590 "Got assumption for the wrong function!");
594 // Warning: This loop can end up being somewhat performance sensitive.
595 // We're running this loop for once for each value queried resulting in a
596 // runtime of ~O(#assumes * #values).
598 assert(I
->getCalledFunction()->getIntrinsicID() == Intrinsic::assume
&&
599 "must be an assume intrinsic");
601 Value
*Arg
= I
->getArgOperand(0);
603 if (Arg
== V
&& isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
604 assert(BitWidth
== 1 && "assume operand is not i1?");
608 if (match(Arg
, m_Not(m_Specific(V
))) &&
609 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
610 assert(BitWidth
== 1 && "assume operand is not i1?");
615 // The remaining tests are all recursive, so bail out if we hit the limit.
616 if (Depth
== MaxDepth
)
620 auto m_V
= m_CombineOr(m_Specific(V
),
621 m_CombineOr(m_PtrToInt(m_Specific(V
)),
622 m_BitCast(m_Specific(V
))));
624 CmpInst::Predicate Pred
;
627 if (match(Arg
, m_c_ICmp(Pred
, m_V
, m_Value(A
))) &&
628 Pred
== ICmpInst::ICMP_EQ
&& isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
629 KnownBits
RHSKnown(BitWidth
);
630 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
631 Known
.Zero
|= RHSKnown
.Zero
;
632 Known
.One
|= RHSKnown
.One
;
634 } else if (match(Arg
,
635 m_c_ICmp(Pred
, m_c_And(m_V
, m_Value(B
)), m_Value(A
))) &&
636 Pred
== ICmpInst::ICMP_EQ
&&
637 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
638 KnownBits
RHSKnown(BitWidth
);
639 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
640 KnownBits
MaskKnown(BitWidth
);
641 computeKnownBits(B
, MaskKnown
, Depth
+1, Query(Q
, I
));
643 // For those bits in the mask that are known to be one, we can propagate
644 // known bits from the RHS to V.
645 Known
.Zero
|= RHSKnown
.Zero
& MaskKnown
.One
;
646 Known
.One
|= RHSKnown
.One
& MaskKnown
.One
;
647 // assume(~(v & b) = a)
648 } else if (match(Arg
, m_c_ICmp(Pred
, m_Not(m_c_And(m_V
, m_Value(B
))),
650 Pred
== ICmpInst::ICMP_EQ
&&
651 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
652 KnownBits
RHSKnown(BitWidth
);
653 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
654 KnownBits
MaskKnown(BitWidth
);
655 computeKnownBits(B
, MaskKnown
, Depth
+1, Query(Q
, I
));
657 // For those bits in the mask that are known to be one, we can propagate
658 // inverted known bits from the RHS to V.
659 Known
.Zero
|= RHSKnown
.One
& MaskKnown
.One
;
660 Known
.One
|= RHSKnown
.Zero
& MaskKnown
.One
;
662 } else if (match(Arg
,
663 m_c_ICmp(Pred
, m_c_Or(m_V
, m_Value(B
)), m_Value(A
))) &&
664 Pred
== ICmpInst::ICMP_EQ
&&
665 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
666 KnownBits
RHSKnown(BitWidth
);
667 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
668 KnownBits
BKnown(BitWidth
);
669 computeKnownBits(B
, BKnown
, Depth
+1, Query(Q
, I
));
671 // For those bits in B that are known to be zero, we can propagate known
672 // bits from the RHS to V.
673 Known
.Zero
|= RHSKnown
.Zero
& BKnown
.Zero
;
674 Known
.One
|= RHSKnown
.One
& BKnown
.Zero
;
675 // assume(~(v | b) = a)
676 } else if (match(Arg
, m_c_ICmp(Pred
, m_Not(m_c_Or(m_V
, m_Value(B
))),
678 Pred
== ICmpInst::ICMP_EQ
&&
679 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
680 KnownBits
RHSKnown(BitWidth
);
681 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
682 KnownBits
BKnown(BitWidth
);
683 computeKnownBits(B
, BKnown
, Depth
+1, Query(Q
, I
));
685 // For those bits in B that are known to be zero, we can propagate
686 // inverted known bits from the RHS to V.
687 Known
.Zero
|= RHSKnown
.One
& BKnown
.Zero
;
688 Known
.One
|= RHSKnown
.Zero
& BKnown
.Zero
;
690 } else if (match(Arg
,
691 m_c_ICmp(Pred
, m_c_Xor(m_V
, m_Value(B
)), m_Value(A
))) &&
692 Pred
== ICmpInst::ICMP_EQ
&&
693 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
694 KnownBits
RHSKnown(BitWidth
);
695 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
696 KnownBits
BKnown(BitWidth
);
697 computeKnownBits(B
, BKnown
, Depth
+1, Query(Q
, I
));
699 // For those bits in B that are known to be zero, we can propagate known
700 // bits from the RHS to V. For those bits in B that are known to be one,
701 // we can propagate inverted known bits from the RHS to V.
702 Known
.Zero
|= RHSKnown
.Zero
& BKnown
.Zero
;
703 Known
.One
|= RHSKnown
.One
& BKnown
.Zero
;
704 Known
.Zero
|= RHSKnown
.One
& BKnown
.One
;
705 Known
.One
|= RHSKnown
.Zero
& BKnown
.One
;
706 // assume(~(v ^ b) = a)
707 } else if (match(Arg
, m_c_ICmp(Pred
, m_Not(m_c_Xor(m_V
, m_Value(B
))),
709 Pred
== ICmpInst::ICMP_EQ
&&
710 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
711 KnownBits
RHSKnown(BitWidth
);
712 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
713 KnownBits
BKnown(BitWidth
);
714 computeKnownBits(B
, BKnown
, Depth
+1, Query(Q
, I
));
716 // For those bits in B that are known to be zero, we can propagate
717 // inverted known bits from the RHS to V. For those bits in B that are
718 // known to be one, we can propagate known bits from the RHS to V.
719 Known
.Zero
|= RHSKnown
.One
& BKnown
.Zero
;
720 Known
.One
|= RHSKnown
.Zero
& BKnown
.Zero
;
721 Known
.Zero
|= RHSKnown
.Zero
& BKnown
.One
;
722 Known
.One
|= RHSKnown
.One
& BKnown
.One
;
723 // assume(v << c = a)
724 } else if (match(Arg
, m_c_ICmp(Pred
, m_Shl(m_V
, m_ConstantInt(C
)),
726 Pred
== ICmpInst::ICMP_EQ
&&
727 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
) &&
729 KnownBits
RHSKnown(BitWidth
);
730 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
731 // For those bits in RHS that are known, we can propagate them to known
732 // bits in V shifted to the right by C.
733 RHSKnown
.Zero
.lshrInPlace(C
);
734 Known
.Zero
|= RHSKnown
.Zero
;
735 RHSKnown
.One
.lshrInPlace(C
);
736 Known
.One
|= RHSKnown
.One
;
737 // assume(~(v << c) = a)
738 } else if (match(Arg
, m_c_ICmp(Pred
, m_Not(m_Shl(m_V
, m_ConstantInt(C
))),
740 Pred
== ICmpInst::ICMP_EQ
&&
741 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
) &&
743 KnownBits
RHSKnown(BitWidth
);
744 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
745 // For those bits in RHS that are known, we can propagate them inverted
746 // to known bits in V shifted to the right by C.
747 RHSKnown
.One
.lshrInPlace(C
);
748 Known
.Zero
|= RHSKnown
.One
;
749 RHSKnown
.Zero
.lshrInPlace(C
);
750 Known
.One
|= RHSKnown
.Zero
;
751 // assume(v >> c = a)
752 } else if (match(Arg
,
753 m_c_ICmp(Pred
, m_Shr(m_V
, m_ConstantInt(C
)),
755 Pred
== ICmpInst::ICMP_EQ
&&
756 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
) &&
758 KnownBits
RHSKnown(BitWidth
);
759 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
760 // For those bits in RHS that are known, we can propagate them to known
761 // bits in V shifted to the right by C.
762 Known
.Zero
|= RHSKnown
.Zero
<< C
;
763 Known
.One
|= RHSKnown
.One
<< C
;
764 // assume(~(v >> c) = a)
765 } else if (match(Arg
, m_c_ICmp(Pred
, m_Not(m_Shr(m_V
, m_ConstantInt(C
))),
767 Pred
== ICmpInst::ICMP_EQ
&&
768 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
) &&
770 KnownBits
RHSKnown(BitWidth
);
771 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
772 // For those bits in RHS that are known, we can propagate them inverted
773 // to known bits in V shifted to the right by C.
774 Known
.Zero
|= RHSKnown
.One
<< C
;
775 Known
.One
|= RHSKnown
.Zero
<< C
;
776 // assume(v >=_s c) where c is non-negative
777 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
778 Pred
== ICmpInst::ICMP_SGE
&&
779 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
780 KnownBits
RHSKnown(BitWidth
);
781 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
783 if (RHSKnown
.isNonNegative()) {
784 // We know that the sign bit is zero.
785 Known
.makeNonNegative();
787 // assume(v >_s c) where c is at least -1.
788 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
789 Pred
== ICmpInst::ICMP_SGT
&&
790 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
791 KnownBits
RHSKnown(BitWidth
);
792 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
794 if (RHSKnown
.isAllOnes() || RHSKnown
.isNonNegative()) {
795 // We know that the sign bit is zero.
796 Known
.makeNonNegative();
798 // assume(v <=_s c) where c is negative
799 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
800 Pred
== ICmpInst::ICMP_SLE
&&
801 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
802 KnownBits
RHSKnown(BitWidth
);
803 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
805 if (RHSKnown
.isNegative()) {
806 // We know that the sign bit is one.
807 Known
.makeNegative();
809 // assume(v <_s c) where c is non-positive
810 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
811 Pred
== ICmpInst::ICMP_SLT
&&
812 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
813 KnownBits
RHSKnown(BitWidth
);
814 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
816 if (RHSKnown
.isZero() || RHSKnown
.isNegative()) {
817 // We know that the sign bit is one.
818 Known
.makeNegative();
821 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
822 Pred
== ICmpInst::ICMP_ULE
&&
823 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
824 KnownBits
RHSKnown(BitWidth
);
825 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
827 // Whatever high bits in c are zero are known to be zero.
828 Known
.Zero
.setHighBits(RHSKnown
.countMinLeadingZeros());
830 } else if (match(Arg
, m_ICmp(Pred
, m_V
, m_Value(A
))) &&
831 Pred
== ICmpInst::ICMP_ULT
&&
832 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
833 KnownBits
RHSKnown(BitWidth
);
834 computeKnownBits(A
, RHSKnown
, Depth
+1, Query(Q
, I
));
836 // If the RHS is known zero, then this assumption must be wrong (nothing
837 // is unsigned less than zero). Signal a conflict and get out of here.
838 if (RHSKnown
.isZero()) {
839 Known
.Zero
.setAllBits();
840 Known
.One
.setAllBits();
844 // Whatever high bits in c are zero are known to be zero (if c is a power
845 // of 2, then one more).
846 if (isKnownToBeAPowerOfTwo(A
, false, Depth
+ 1, Query(Q
, I
)))
847 Known
.Zero
.setHighBits(RHSKnown
.countMinLeadingZeros() + 1);
849 Known
.Zero
.setHighBits(RHSKnown
.countMinLeadingZeros());
853 // If assumptions conflict with each other or previous known bits, then we
854 // have a logical fallacy. It's possible that the assumption is not reachable,
855 // so this isn't a real bug. On the other hand, the program may have undefined
856 // behavior, or we might have a bug in the compiler. We can't assert/crash, so
857 // clear out the known bits, try to warn the user, and hope for the best.
858 if (Known
.Zero
.intersects(Known
.One
)) {
863 auto *CxtI
= const_cast<Instruction
*>(Q
.CxtI
);
864 return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
866 << "Detected conflicting code assumptions. Program may "
867 "have undefined behavior, or compiler may have "
873 /// Compute known bits from a shift operator, including those with a
874 /// non-constant shift amount. Known is the output of this function. Known2 is a
875 /// pre-allocated temporary with the same bit width as Known. KZF and KOF are
876 /// operator-specific functions that, given the known-zero or known-one bits
877 /// respectively, and a shift amount, compute the implied known-zero or
878 /// known-one bits of the shift operator's result respectively for that shift
879 /// amount. The results from calling KZF and KOF are conservatively combined for
880 /// all permitted shift amounts.
881 static void computeKnownBitsFromShiftOperator(
882 const Operator
*I
, KnownBits
&Known
, KnownBits
&Known2
,
883 unsigned Depth
, const Query
&Q
,
884 function_ref
<APInt(const APInt
&, unsigned)> KZF
,
885 function_ref
<APInt(const APInt
&, unsigned)> KOF
) {
886 unsigned BitWidth
= Known
.getBitWidth();
888 if (auto *SA
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
889 unsigned ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
891 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
892 Known
.Zero
= KZF(Known
.Zero
, ShiftAmt
);
893 Known
.One
= KOF(Known
.One
, ShiftAmt
);
894 // If the known bits conflict, this must be an overflowing left shift, so
895 // the shift result is poison. We can return anything we want. Choose 0 for
896 // the best folding opportunity.
897 if (Known
.hasConflict())
903 computeKnownBits(I
->getOperand(1), Known
, Depth
+ 1, Q
);
905 // If the shift amount could be greater than or equal to the bit-width of the
906 // LHS, the value could be poison, but bail out because the check below is
907 // expensive. TODO: Should we just carry on?
908 if ((~Known
.Zero
).uge(BitWidth
)) {
913 // Note: We cannot use Known.Zero.getLimitedValue() here, because if
914 // BitWidth > 64 and any upper bits are known, we'll end up returning the
915 // limit value (which implies all bits are known).
916 uint64_t ShiftAmtKZ
= Known
.Zero
.zextOrTrunc(64).getZExtValue();
917 uint64_t ShiftAmtKO
= Known
.One
.zextOrTrunc(64).getZExtValue();
919 // It would be more-clearly correct to use the two temporaries for this
920 // calculation. Reusing the APInts here to prevent unnecessary allocations.
923 // If we know the shifter operand is nonzero, we can sometimes infer more
924 // known bits. However this is expensive to compute, so be lazy about it and
925 // only compute it when absolutely necessary.
926 Optional
<bool> ShifterOperandIsNonZero
;
928 // Early exit if we can't constrain any well-defined shift amount.
929 if (!(ShiftAmtKZ
& (PowerOf2Ceil(BitWidth
) - 1)) &&
930 !(ShiftAmtKO
& (PowerOf2Ceil(BitWidth
) - 1))) {
931 ShifterOperandIsNonZero
= isKnownNonZero(I
->getOperand(1), Depth
+ 1, Q
);
932 if (!*ShifterOperandIsNonZero
)
936 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
938 Known
.Zero
.setAllBits();
939 Known
.One
.setAllBits();
940 for (unsigned ShiftAmt
= 0; ShiftAmt
< BitWidth
; ++ShiftAmt
) {
941 // Combine the shifted known input bits only for those shift amounts
942 // compatible with its known constraints.
943 if ((ShiftAmt
& ~ShiftAmtKZ
) != ShiftAmt
)
945 if ((ShiftAmt
| ShiftAmtKO
) != ShiftAmt
)
947 // If we know the shifter is nonzero, we may be able to infer more known
948 // bits. This check is sunk down as far as possible to avoid the expensive
949 // call to isKnownNonZero if the cheaper checks above fail.
951 if (!ShifterOperandIsNonZero
.hasValue())
952 ShifterOperandIsNonZero
=
953 isKnownNonZero(I
->getOperand(1), Depth
+ 1, Q
);
954 if (*ShifterOperandIsNonZero
)
958 Known
.Zero
&= KZF(Known2
.Zero
, ShiftAmt
);
959 Known
.One
&= KOF(Known2
.One
, ShiftAmt
);
962 // If the known bits conflict, the result is poison. Return a 0 and hope the
963 // caller can further optimize that.
964 if (Known
.hasConflict())
968 static void computeKnownBitsFromOperator(const Operator
*I
, KnownBits
&Known
,
969 unsigned Depth
, const Query
&Q
) {
970 unsigned BitWidth
= Known
.getBitWidth();
972 KnownBits
Known2(Known
);
973 switch (I
->getOpcode()) {
975 case Instruction::Load
:
977 Q
.IIQ
.getMetadata(cast
<LoadInst
>(I
), LLVMContext::MD_range
))
978 computeKnownBitsFromRangeMetadata(*MD
, Known
);
980 case Instruction::And
: {
981 // If either the LHS or the RHS are Zero, the result is zero.
982 computeKnownBits(I
->getOperand(1), Known
, Depth
+ 1, Q
);
983 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
985 // Output known-1 bits are only known if set in both the LHS & RHS.
986 Known
.One
&= Known2
.One
;
987 // Output known-0 are known to be clear if zero in either the LHS | RHS.
988 Known
.Zero
|= Known2
.Zero
;
990 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
991 // here we handle the more general case of adding any odd number by
992 // matching the form add(x, add(x, y)) where y is odd.
993 // TODO: This could be generalized to clearing any bit set in y where the
994 // following bit is known to be unset in y.
995 Value
*X
= nullptr, *Y
= nullptr;
996 if (!Known
.Zero
[0] && !Known
.One
[0] &&
997 match(I
, m_c_BinOp(m_Value(X
), m_Add(m_Deferred(X
), m_Value(Y
))))) {
999 computeKnownBits(Y
, Known2
, Depth
+ 1, Q
);
1000 if (Known2
.countMinTrailingOnes() > 0)
1001 Known
.Zero
.setBit(0);
1005 case Instruction::Or
:
1006 computeKnownBits(I
->getOperand(1), Known
, Depth
+ 1, Q
);
1007 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1009 // Output known-0 bits are only known if clear in both the LHS & RHS.
1010 Known
.Zero
&= Known2
.Zero
;
1011 // Output known-1 are known to be set if set in either the LHS | RHS.
1012 Known
.One
|= Known2
.One
;
1014 case Instruction::Xor
: {
1015 computeKnownBits(I
->getOperand(1), Known
, Depth
+ 1, Q
);
1016 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1018 // Output known-0 bits are known if clear or set in both the LHS & RHS.
1019 APInt KnownZeroOut
= (Known
.Zero
& Known2
.Zero
) | (Known
.One
& Known2
.One
);
1020 // Output known-1 are known to be set if set in only one of the LHS, RHS.
1021 Known
.One
= (Known
.Zero
& Known2
.One
) | (Known
.One
& Known2
.Zero
);
1022 Known
.Zero
= std::move(KnownZeroOut
);
1025 case Instruction::Mul
: {
1026 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1027 computeKnownBitsMul(I
->getOperand(0), I
->getOperand(1), NSW
, Known
,
1031 case Instruction::UDiv
: {
1032 // For the purposes of computing leading zeros we can conservatively
1033 // treat a udiv as a logical right shift by the power of 2 known to
1034 // be less than the denominator.
1035 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1036 unsigned LeadZ
= Known2
.countMinLeadingZeros();
1039 computeKnownBits(I
->getOperand(1), Known2
, Depth
+ 1, Q
);
1040 unsigned RHSMaxLeadingZeros
= Known2
.countMaxLeadingZeros();
1041 if (RHSMaxLeadingZeros
!= BitWidth
)
1042 LeadZ
= std::min(BitWidth
, LeadZ
+ BitWidth
- RHSMaxLeadingZeros
- 1);
1044 Known
.Zero
.setHighBits(LeadZ
);
1047 case Instruction::Select
: {
1048 const Value
*LHS
, *RHS
;
1049 SelectPatternFlavor SPF
= matchSelectPattern(I
, LHS
, RHS
).Flavor
;
1050 if (SelectPatternResult::isMinOrMax(SPF
)) {
1051 computeKnownBits(RHS
, Known
, Depth
+ 1, Q
);
1052 computeKnownBits(LHS
, Known2
, Depth
+ 1, Q
);
1054 computeKnownBits(I
->getOperand(2), Known
, Depth
+ 1, Q
);
1055 computeKnownBits(I
->getOperand(1), Known2
, Depth
+ 1, Q
);
1058 unsigned MaxHighOnes
= 0;
1059 unsigned MaxHighZeros
= 0;
1060 if (SPF
== SPF_SMAX
) {
1061 // If both sides are negative, the result is negative.
1062 if (Known
.isNegative() && Known2
.isNegative())
1063 // We can derive a lower bound on the result by taking the max of the
1064 // leading one bits.
1066 std::max(Known
.countMinLeadingOnes(), Known2
.countMinLeadingOnes());
1067 // If either side is non-negative, the result is non-negative.
1068 else if (Known
.isNonNegative() || Known2
.isNonNegative())
1070 } else if (SPF
== SPF_SMIN
) {
1071 // If both sides are non-negative, the result is non-negative.
1072 if (Known
.isNonNegative() && Known2
.isNonNegative())
1073 // We can derive an upper bound on the result by taking the max of the
1074 // leading zero bits.
1075 MaxHighZeros
= std::max(Known
.countMinLeadingZeros(),
1076 Known2
.countMinLeadingZeros());
1077 // If either side is negative, the result is negative.
1078 else if (Known
.isNegative() || Known2
.isNegative())
1080 } else if (SPF
== SPF_UMAX
) {
1081 // We can derive a lower bound on the result by taking the max of the
1082 // leading one bits.
1084 std::max(Known
.countMinLeadingOnes(), Known2
.countMinLeadingOnes());
1085 } else if (SPF
== SPF_UMIN
) {
1086 // We can derive an upper bound on the result by taking the max of the
1087 // leading zero bits.
1089 std::max(Known
.countMinLeadingZeros(), Known2
.countMinLeadingZeros());
1090 } else if (SPF
== SPF_ABS
) {
1091 // RHS from matchSelectPattern returns the negation part of abs pattern.
1092 // If the negate has an NSW flag we can assume the sign bit of the result
1093 // will be 0 because that makes abs(INT_MIN) undefined.
1094 if (Q
.IIQ
.hasNoSignedWrap(cast
<Instruction
>(RHS
)))
1098 // Only known if known in both the LHS and RHS.
1099 Known
.One
&= Known2
.One
;
1100 Known
.Zero
&= Known2
.Zero
;
1101 if (MaxHighOnes
> 0)
1102 Known
.One
.setHighBits(MaxHighOnes
);
1103 if (MaxHighZeros
> 0)
1104 Known
.Zero
.setHighBits(MaxHighZeros
);
1107 case Instruction::FPTrunc
:
1108 case Instruction::FPExt
:
1109 case Instruction::FPToUI
:
1110 case Instruction::FPToSI
:
1111 case Instruction::SIToFP
:
1112 case Instruction::UIToFP
:
1113 break; // Can't work with floating point.
1114 case Instruction::PtrToInt
:
1115 case Instruction::IntToPtr
:
1116 // Fall through and handle them the same as zext/trunc.
1118 case Instruction::ZExt
:
1119 case Instruction::Trunc
: {
1120 Type
*SrcTy
= I
->getOperand(0)->getType();
1122 unsigned SrcBitWidth
;
1123 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1124 // which fall through here.
1125 Type
*ScalarTy
= SrcTy
->getScalarType();
1126 SrcBitWidth
= ScalarTy
->isPointerTy() ?
1127 Q
.DL
.getIndexTypeSizeInBits(ScalarTy
) :
1128 Q
.DL
.getTypeSizeInBits(ScalarTy
);
1130 assert(SrcBitWidth
&& "SrcBitWidth can't be zero");
1131 Known
= Known
.zextOrTrunc(SrcBitWidth
);
1132 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1133 Known
= Known
.zextOrTrunc(BitWidth
);
1134 // Any top bits are known to be zero.
1135 if (BitWidth
> SrcBitWidth
)
1136 Known
.Zero
.setBitsFrom(SrcBitWidth
);
1139 case Instruction::BitCast
: {
1140 Type
*SrcTy
= I
->getOperand(0)->getType();
1141 if (SrcTy
->isIntOrPtrTy() &&
1142 // TODO: For now, not handling conversions like:
1143 // (bitcast i64 %x to <2 x i32>)
1144 !I
->getType()->isVectorTy()) {
1145 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1150 case Instruction::SExt
: {
1151 // Compute the bits in the result that are not present in the input.
1152 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
1154 Known
= Known
.trunc(SrcBitWidth
);
1155 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1156 // If the sign bit of the input is known set or clear, then we know the
1157 // top bits of the result.
1158 Known
= Known
.sext(BitWidth
);
1161 case Instruction::Shl
: {
1162 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
1163 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1164 auto KZF
= [NSW
](const APInt
&KnownZero
, unsigned ShiftAmt
) {
1165 APInt KZResult
= KnownZero
<< ShiftAmt
;
1166 KZResult
.setLowBits(ShiftAmt
); // Low bits known 0.
1167 // If this shift has "nsw" keyword, then the result is either a poison
1168 // value or has the same sign bit as the first operand.
1169 if (NSW
&& KnownZero
.isSignBitSet())
1170 KZResult
.setSignBit();
1174 auto KOF
= [NSW
](const APInt
&KnownOne
, unsigned ShiftAmt
) {
1175 APInt KOResult
= KnownOne
<< ShiftAmt
;
1176 if (NSW
&& KnownOne
.isSignBitSet())
1177 KOResult
.setSignBit();
1181 computeKnownBitsFromShiftOperator(I
, Known
, Known2
, Depth
, Q
, KZF
, KOF
);
1184 case Instruction::LShr
: {
1185 // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
1186 auto KZF
= [](const APInt
&KnownZero
, unsigned ShiftAmt
) {
1187 APInt KZResult
= KnownZero
.lshr(ShiftAmt
);
1188 // High bits known zero.
1189 KZResult
.setHighBits(ShiftAmt
);
1193 auto KOF
= [](const APInt
&KnownOne
, unsigned ShiftAmt
) {
1194 return KnownOne
.lshr(ShiftAmt
);
1197 computeKnownBitsFromShiftOperator(I
, Known
, Known2
, Depth
, Q
, KZF
, KOF
);
1200 case Instruction::AShr
: {
1201 // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
1202 auto KZF
= [](const APInt
&KnownZero
, unsigned ShiftAmt
) {
1203 return KnownZero
.ashr(ShiftAmt
);
1206 auto KOF
= [](const APInt
&KnownOne
, unsigned ShiftAmt
) {
1207 return KnownOne
.ashr(ShiftAmt
);
1210 computeKnownBitsFromShiftOperator(I
, Known
, Known2
, Depth
, Q
, KZF
, KOF
);
1213 case Instruction::Sub
: {
1214 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1215 computeKnownBitsAddSub(false, I
->getOperand(0), I
->getOperand(1), NSW
,
1216 Known
, Known2
, Depth
, Q
);
1219 case Instruction::Add
: {
1220 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1221 computeKnownBitsAddSub(true, I
->getOperand(0), I
->getOperand(1), NSW
,
1222 Known
, Known2
, Depth
, Q
);
1225 case Instruction::SRem
:
1226 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
1227 APInt RA
= Rem
->getValue().abs();
1228 if (RA
.isPowerOf2()) {
1229 APInt LowBits
= RA
- 1;
1230 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1232 // The low bits of the first operand are unchanged by the srem.
1233 Known
.Zero
= Known2
.Zero
& LowBits
;
1234 Known
.One
= Known2
.One
& LowBits
;
1236 // If the first operand is non-negative or has all low bits zero, then
1237 // the upper bits are all zero.
1238 if (Known2
.isNonNegative() || LowBits
.isSubsetOf(Known2
.Zero
))
1239 Known
.Zero
|= ~LowBits
;
1241 // If the first operand is negative and not all low bits are zero, then
1242 // the upper bits are all one.
1243 if (Known2
.isNegative() && LowBits
.intersects(Known2
.One
))
1244 Known
.One
|= ~LowBits
;
1246 assert((Known
.Zero
& Known
.One
) == 0 && "Bits known to be one AND zero?");
1251 // The sign bit is the LHS's sign bit, except when the result of the
1252 // remainder is zero.
1253 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1254 // If it's known zero, our sign bit is also zero.
1255 if (Known2
.isNonNegative())
1256 Known
.makeNonNegative();
1259 case Instruction::URem
: {
1260 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
1261 const APInt
&RA
= Rem
->getValue();
1262 if (RA
.isPowerOf2()) {
1263 APInt LowBits
= (RA
- 1);
1264 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1265 Known
.Zero
|= ~LowBits
;
1266 Known
.One
&= LowBits
;
1271 // Since the result is less than or equal to either operand, any leading
1272 // zero bits in either operand must also exist in the result.
1273 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1274 computeKnownBits(I
->getOperand(1), Known2
, Depth
+ 1, Q
);
1277 std::max(Known
.countMinLeadingZeros(), Known2
.countMinLeadingZeros());
1279 Known
.Zero
.setHighBits(Leaders
);
1283 case Instruction::Alloca
: {
1284 const AllocaInst
*AI
= cast
<AllocaInst
>(I
);
1285 unsigned Align
= AI
->getAlignment();
1287 Align
= Q
.DL
.getABITypeAlignment(AI
->getAllocatedType());
1290 Known
.Zero
.setLowBits(countTrailingZeros(Align
));
1293 case Instruction::GetElementPtr
: {
1294 // Analyze all of the subscripts of this getelementptr instruction
1295 // to determine if we can prove known low zero bits.
1296 KnownBits
LocalKnown(BitWidth
);
1297 computeKnownBits(I
->getOperand(0), LocalKnown
, Depth
+ 1, Q
);
1298 unsigned TrailZ
= LocalKnown
.countMinTrailingZeros();
1300 gep_type_iterator GTI
= gep_type_begin(I
);
1301 for (unsigned i
= 1, e
= I
->getNumOperands(); i
!= e
; ++i
, ++GTI
) {
1302 Value
*Index
= I
->getOperand(i
);
1303 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
1304 // Handle struct member offset arithmetic.
1306 // Handle case when index is vector zeroinitializer
1307 Constant
*CIndex
= cast
<Constant
>(Index
);
1308 if (CIndex
->isZeroValue())
1311 if (CIndex
->getType()->isVectorTy())
1312 Index
= CIndex
->getSplatValue();
1314 unsigned Idx
= cast
<ConstantInt
>(Index
)->getZExtValue();
1315 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
1316 uint64_t Offset
= SL
->getElementOffset(Idx
);
1317 TrailZ
= std::min
<unsigned>(TrailZ
,
1318 countTrailingZeros(Offset
));
1320 // Handle array index arithmetic.
1321 Type
*IndexedTy
= GTI
.getIndexedType();
1322 if (!IndexedTy
->isSized()) {
1326 unsigned GEPOpiBits
= Index
->getType()->getScalarSizeInBits();
1327 uint64_t TypeSize
= Q
.DL
.getTypeAllocSize(IndexedTy
);
1328 LocalKnown
.Zero
= LocalKnown
.One
= APInt(GEPOpiBits
, 0);
1329 computeKnownBits(Index
, LocalKnown
, Depth
+ 1, Q
);
1330 TrailZ
= std::min(TrailZ
,
1331 unsigned(countTrailingZeros(TypeSize
) +
1332 LocalKnown
.countMinTrailingZeros()));
1336 Known
.Zero
.setLowBits(TrailZ
);
1339 case Instruction::PHI
: {
1340 const PHINode
*P
= cast
<PHINode
>(I
);
1341 // Handle the case of a simple two-predecessor recurrence PHI.
1342 // There's a lot more that could theoretically be done here, but
1343 // this is sufficient to catch some interesting cases.
1344 if (P
->getNumIncomingValues() == 2) {
1345 for (unsigned i
= 0; i
!= 2; ++i
) {
1346 Value
*L
= P
->getIncomingValue(i
);
1347 Value
*R
= P
->getIncomingValue(!i
);
1348 Operator
*LU
= dyn_cast
<Operator
>(L
);
1351 unsigned Opcode
= LU
->getOpcode();
1352 // Check for operations that have the property that if
1353 // both their operands have low zero bits, the result
1354 // will have low zero bits.
1355 if (Opcode
== Instruction::Add
||
1356 Opcode
== Instruction::Sub
||
1357 Opcode
== Instruction::And
||
1358 Opcode
== Instruction::Or
||
1359 Opcode
== Instruction::Mul
) {
1360 Value
*LL
= LU
->getOperand(0);
1361 Value
*LR
= LU
->getOperand(1);
1362 // Find a recurrence.
1369 // Ok, we have a PHI of the form L op= R. Check for low
1371 computeKnownBits(R
, Known2
, Depth
+ 1, Q
);
1373 // We need to take the minimum number of known bits
1374 KnownBits
Known3(Known
);
1375 computeKnownBits(L
, Known3
, Depth
+ 1, Q
);
1377 Known
.Zero
.setLowBits(std::min(Known2
.countMinTrailingZeros(),
1378 Known3
.countMinTrailingZeros()));
1380 auto *OverflowOp
= dyn_cast
<OverflowingBinaryOperator
>(LU
);
1381 if (OverflowOp
&& Q
.IIQ
.hasNoSignedWrap(OverflowOp
)) {
1382 // If initial value of recurrence is nonnegative, and we are adding
1383 // a nonnegative number with nsw, the result can only be nonnegative
1384 // or poison value regardless of the number of times we execute the
1385 // add in phi recurrence. If initial value is negative and we are
1386 // adding a negative number with nsw, the result can only be
1387 // negative or poison value. Similar arguments apply to sub and mul.
1389 // (add non-negative, non-negative) --> non-negative
1390 // (add negative, negative) --> negative
1391 if (Opcode
== Instruction::Add
) {
1392 if (Known2
.isNonNegative() && Known3
.isNonNegative())
1393 Known
.makeNonNegative();
1394 else if (Known2
.isNegative() && Known3
.isNegative())
1395 Known
.makeNegative();
1398 // (sub nsw non-negative, negative) --> non-negative
1399 // (sub nsw negative, non-negative) --> negative
1400 else if (Opcode
== Instruction::Sub
&& LL
== I
) {
1401 if (Known2
.isNonNegative() && Known3
.isNegative())
1402 Known
.makeNonNegative();
1403 else if (Known2
.isNegative() && Known3
.isNonNegative())
1404 Known
.makeNegative();
1407 // (mul nsw non-negative, non-negative) --> non-negative
1408 else if (Opcode
== Instruction::Mul
&& Known2
.isNonNegative() &&
1409 Known3
.isNonNegative())
1410 Known
.makeNonNegative();
1418 // Unreachable blocks may have zero-operand PHI nodes.
1419 if (P
->getNumIncomingValues() == 0)
1422 // Otherwise take the unions of the known bit sets of the operands,
1423 // taking conservative care to avoid excessive recursion.
1424 if (Depth
< MaxDepth
- 1 && !Known
.Zero
&& !Known
.One
) {
1425 // Skip if every incoming value references to ourself.
1426 if (dyn_cast_or_null
<UndefValue
>(P
->hasConstantValue()))
1429 Known
.Zero
.setAllBits();
1430 Known
.One
.setAllBits();
1431 for (Value
*IncValue
: P
->incoming_values()) {
1432 // Skip direct self references.
1433 if (IncValue
== P
) continue;
1435 Known2
= KnownBits(BitWidth
);
1436 // Recurse, but cap the recursion to one level, because we don't
1437 // want to waste time spinning around in loops.
1438 computeKnownBits(IncValue
, Known2
, MaxDepth
- 1, Q
);
1439 Known
.Zero
&= Known2
.Zero
;
1440 Known
.One
&= Known2
.One
;
1441 // If all bits have been ruled out, there's no need to check
1443 if (!Known
.Zero
&& !Known
.One
)
1449 case Instruction::Call
:
1450 case Instruction::Invoke
:
1451 // If range metadata is attached to this call, set known bits from that,
1452 // and then intersect with known bits based on other properties of the
1455 Q
.IIQ
.getMetadata(cast
<Instruction
>(I
), LLVMContext::MD_range
))
1456 computeKnownBitsFromRangeMetadata(*MD
, Known
);
1457 if (const Value
*RV
= ImmutableCallSite(I
).getReturnedArgOperand()) {
1458 computeKnownBits(RV
, Known2
, Depth
+ 1, Q
);
1459 Known
.Zero
|= Known2
.Zero
;
1460 Known
.One
|= Known2
.One
;
1462 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
1463 switch (II
->getIntrinsicID()) {
1465 case Intrinsic::bitreverse
:
1466 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1467 Known
.Zero
|= Known2
.Zero
.reverseBits();
1468 Known
.One
|= Known2
.One
.reverseBits();
1470 case Intrinsic::bswap
:
1471 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1472 Known
.Zero
|= Known2
.Zero
.byteSwap();
1473 Known
.One
|= Known2
.One
.byteSwap();
1475 case Intrinsic::ctlz
: {
1476 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1477 // If we have a known 1, its position is our upper bound.
1478 unsigned PossibleLZ
= Known2
.One
.countLeadingZeros();
1479 // If this call is undefined for 0, the result will be less than 2^n.
1480 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1481 PossibleLZ
= std::min(PossibleLZ
, BitWidth
- 1);
1482 unsigned LowBits
= Log2_32(PossibleLZ
)+1;
1483 Known
.Zero
.setBitsFrom(LowBits
);
1486 case Intrinsic::cttz
: {
1487 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1488 // If we have a known 1, its position is our upper bound.
1489 unsigned PossibleTZ
= Known2
.One
.countTrailingZeros();
1490 // If this call is undefined for 0, the result will be less than 2^n.
1491 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1492 PossibleTZ
= std::min(PossibleTZ
, BitWidth
- 1);
1493 unsigned LowBits
= Log2_32(PossibleTZ
)+1;
1494 Known
.Zero
.setBitsFrom(LowBits
);
1497 case Intrinsic::ctpop
: {
1498 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1499 // We can bound the space the count needs. Also, bits known to be zero
1500 // can't contribute to the population.
1501 unsigned BitsPossiblySet
= Known2
.countMaxPopulation();
1502 unsigned LowBits
= Log2_32(BitsPossiblySet
)+1;
1503 Known
.Zero
.setBitsFrom(LowBits
);
1504 // TODO: we could bound KnownOne using the lower bound on the number
1505 // of bits which might be set provided by popcnt KnownOne2.
1508 case Intrinsic::fshr
:
1509 case Intrinsic::fshl
: {
1511 if (!match(I
->getOperand(2), m_APInt(SA
)))
1514 // Normalize to funnel shift left.
1515 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
1516 if (II
->getIntrinsicID() == Intrinsic::fshr
)
1517 ShiftAmt
= BitWidth
- ShiftAmt
;
1519 KnownBits
Known3(Known
);
1520 computeKnownBits(I
->getOperand(0), Known2
, Depth
+ 1, Q
);
1521 computeKnownBits(I
->getOperand(1), Known3
, Depth
+ 1, Q
);
1524 Known2
.Zero
.shl(ShiftAmt
) | Known3
.Zero
.lshr(BitWidth
- ShiftAmt
);
1526 Known2
.One
.shl(ShiftAmt
) | Known3
.One
.lshr(BitWidth
- ShiftAmt
);
1529 case Intrinsic::x86_sse42_crc32_64_64
:
1530 Known
.Zero
.setBitsFrom(32);
1535 case Instruction::ExtractElement
:
1536 // Look through extract element. At the moment we keep this simple and skip
1537 // tracking the specific element. But at least we might find information
1538 // valid for all elements of the vector (for example if vector is sign
1539 // extended, shifted, etc).
1540 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1542 case Instruction::ExtractValue
:
1543 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
->getOperand(0))) {
1544 const ExtractValueInst
*EVI
= cast
<ExtractValueInst
>(I
);
1545 if (EVI
->getNumIndices() != 1) break;
1546 if (EVI
->getIndices()[0] == 0) {
1547 switch (II
->getIntrinsicID()) {
1549 case Intrinsic::uadd_with_overflow
:
1550 case Intrinsic::sadd_with_overflow
:
1551 computeKnownBitsAddSub(true, II
->getArgOperand(0),
1552 II
->getArgOperand(1), false, Known
, Known2
,
1555 case Intrinsic::usub_with_overflow
:
1556 case Intrinsic::ssub_with_overflow
:
1557 computeKnownBitsAddSub(false, II
->getArgOperand(0),
1558 II
->getArgOperand(1), false, Known
, Known2
,
1561 case Intrinsic::umul_with_overflow
:
1562 case Intrinsic::smul_with_overflow
:
1563 computeKnownBitsMul(II
->getArgOperand(0), II
->getArgOperand(1), false,
1564 Known
, Known2
, Depth
, Q
);
1572 /// Determine which bits of V are known to be either zero or one and return
1574 KnownBits
computeKnownBits(const Value
*V
, unsigned Depth
, const Query
&Q
) {
1575 KnownBits
Known(getBitWidth(V
->getType(), Q
.DL
));
1576 computeKnownBits(V
, Known
, Depth
, Q
);
1580 /// Determine which bits of V are known to be either zero or one and return
1581 /// them in the Known bit set.
1583 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
1584 /// we cannot optimize based on the assumption that it is zero without changing
1585 /// it to be an explicit zero. If we don't change it to zero, other code could
1586 /// optimized based on the contradictory assumption that it is non-zero.
1587 /// Because instcombine aggressively folds operations with undef args anyway,
1588 /// this won't lose us code quality.
1590 /// This function is defined on values with integer type, values with pointer
1591 /// type, and vectors of integers. In the case
1592 /// where V is a vector, known zero, and known one values are the
1593 /// same width as the vector element, and the bit is set only if it is true
1594 /// for all of the elements in the vector.
1595 void computeKnownBits(const Value
*V
, KnownBits
&Known
, unsigned Depth
,
1597 assert(V
&& "No Value?");
1598 assert(Depth
<= MaxDepth
&& "Limit Search Depth");
1599 unsigned BitWidth
= Known
.getBitWidth();
1601 assert((V
->getType()->isIntOrIntVectorTy(BitWidth
) ||
1602 V
->getType()->isPtrOrPtrVectorTy()) &&
1603 "Not integer or pointer type!");
1605 Type
*ScalarTy
= V
->getType()->getScalarType();
1606 unsigned ExpectedWidth
= ScalarTy
->isPointerTy() ?
1607 Q
.DL
.getIndexTypeSizeInBits(ScalarTy
) : Q
.DL
.getTypeSizeInBits(ScalarTy
);
1608 assert(ExpectedWidth
== BitWidth
&& "V and Known should have same BitWidth");
1610 (void)ExpectedWidth
;
1613 if (match(V
, m_APInt(C
))) {
1614 // We know all of the bits for a scalar constant or a splat vector constant!
1616 Known
.Zero
= ~Known
.One
;
1619 // Null and aggregate-zero are all-zeros.
1620 if (isa
<ConstantPointerNull
>(V
) || isa
<ConstantAggregateZero
>(V
)) {
1624 // Handle a constant vector by taking the intersection of the known bits of
1626 if (const ConstantDataSequential
*CDS
= dyn_cast
<ConstantDataSequential
>(V
)) {
1627 // We know that CDS must be a vector of integers. Take the intersection of
1629 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
1630 for (unsigned i
= 0, e
= CDS
->getNumElements(); i
!= e
; ++i
) {
1631 APInt Elt
= CDS
->getElementAsAPInt(i
);
1638 if (const auto *CV
= dyn_cast
<ConstantVector
>(V
)) {
1639 // We know that CV must be a vector of integers. Take the intersection of
1641 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
1642 for (unsigned i
= 0, e
= CV
->getNumOperands(); i
!= e
; ++i
) {
1643 Constant
*Element
= CV
->getAggregateElement(i
);
1644 auto *ElementCI
= dyn_cast_or_null
<ConstantInt
>(Element
);
1649 const APInt
&Elt
= ElementCI
->getValue();
1656 // Start out not knowing anything.
1659 // We can't imply anything about undefs.
1660 if (isa
<UndefValue
>(V
))
1663 // There's no point in looking through other users of ConstantData for
1664 // assumptions. Confirm that we've handled them all.
1665 assert(!isa
<ConstantData
>(V
) && "Unhandled constant data!");
1667 // Limit search depth.
1668 // All recursive calls that increase depth must come after this.
1669 if (Depth
== MaxDepth
)
1672 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
1673 // the bits of its aliasee.
1674 if (const GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
)) {
1675 if (!GA
->isInterposable())
1676 computeKnownBits(GA
->getAliasee(), Known
, Depth
+ 1, Q
);
1680 if (const Operator
*I
= dyn_cast
<Operator
>(V
))
1681 computeKnownBitsFromOperator(I
, Known
, Depth
, Q
);
1683 // Aligned pointers have trailing zeros - refine Known.Zero set
1684 if (V
->getType()->isPointerTy()) {
1685 unsigned Align
= V
->getPointerAlignment(Q
.DL
);
1687 Known
.Zero
.setLowBits(countTrailingZeros(Align
));
1690 // computeKnownBitsFromAssume strictly refines Known.
1691 // Therefore, we run them after computeKnownBitsFromOperator.
1693 // Check whether a nearby assume intrinsic can determine some known bits.
1694 computeKnownBitsFromAssume(V
, Known
, Depth
, Q
);
1696 assert((Known
.Zero
& Known
.One
) == 0 && "Bits known to be one AND zero?");
1699 /// Return true if the given value is known to have exactly one
1700 /// bit set when defined. For vectors return true if every element is known to
1701 /// be a power of two when defined. Supports values with integer or pointer
1702 /// types and vectors of integers.
1703 bool isKnownToBeAPowerOfTwo(const Value
*V
, bool OrZero
, unsigned Depth
,
1705 assert(Depth
<= MaxDepth
&& "Limit Search Depth");
1707 // Attempt to match against constants.
1708 if (OrZero
&& match(V
, m_Power2OrZero()))
1710 if (match(V
, m_Power2()))
1713 // 1 << X is clearly a power of two if the one is not shifted off the end. If
1714 // it is shifted off the end then the result is undefined.
1715 if (match(V
, m_Shl(m_One(), m_Value())))
1718 // (signmask) >>l X is clearly a power of two if the one is not shifted off
1719 // the bottom. If it is shifted off the bottom then the result is undefined.
1720 if (match(V
, m_LShr(m_SignMask(), m_Value())))
1723 // The remaining tests are all recursive, so bail out if we hit the limit.
1724 if (Depth
++ == MaxDepth
)
1727 Value
*X
= nullptr, *Y
= nullptr;
1728 // A shift left or a logical shift right of a power of two is a power of two
1730 if (OrZero
&& (match(V
, m_Shl(m_Value(X
), m_Value())) ||
1731 match(V
, m_LShr(m_Value(X
), m_Value()))))
1732 return isKnownToBeAPowerOfTwo(X
, /*OrZero*/ true, Depth
, Q
);
1734 if (const ZExtInst
*ZI
= dyn_cast
<ZExtInst
>(V
))
1735 return isKnownToBeAPowerOfTwo(ZI
->getOperand(0), OrZero
, Depth
, Q
);
1737 if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(V
))
1738 return isKnownToBeAPowerOfTwo(SI
->getTrueValue(), OrZero
, Depth
, Q
) &&
1739 isKnownToBeAPowerOfTwo(SI
->getFalseValue(), OrZero
, Depth
, Q
);
1741 if (OrZero
&& match(V
, m_And(m_Value(X
), m_Value(Y
)))) {
1742 // A power of two and'd with anything is a power of two or zero.
1743 if (isKnownToBeAPowerOfTwo(X
, /*OrZero*/ true, Depth
, Q
) ||
1744 isKnownToBeAPowerOfTwo(Y
, /*OrZero*/ true, Depth
, Q
))
1746 // X & (-X) is always a power of two or zero.
1747 if (match(X
, m_Neg(m_Specific(Y
))) || match(Y
, m_Neg(m_Specific(X
))))
1752 // Adding a power-of-two or zero to the same power-of-two or zero yields
1753 // either the original power-of-two, a larger power-of-two or zero.
1754 if (match(V
, m_Add(m_Value(X
), m_Value(Y
)))) {
1755 const OverflowingBinaryOperator
*VOBO
= cast
<OverflowingBinaryOperator
>(V
);
1756 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(VOBO
) ||
1757 Q
.IIQ
.hasNoSignedWrap(VOBO
)) {
1758 if (match(X
, m_And(m_Specific(Y
), m_Value())) ||
1759 match(X
, m_And(m_Value(), m_Specific(Y
))))
1760 if (isKnownToBeAPowerOfTwo(Y
, OrZero
, Depth
, Q
))
1762 if (match(Y
, m_And(m_Specific(X
), m_Value())) ||
1763 match(Y
, m_And(m_Value(), m_Specific(X
))))
1764 if (isKnownToBeAPowerOfTwo(X
, OrZero
, Depth
, Q
))
1767 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
1768 KnownBits
LHSBits(BitWidth
);
1769 computeKnownBits(X
, LHSBits
, Depth
, Q
);
1771 KnownBits
RHSBits(BitWidth
);
1772 computeKnownBits(Y
, RHSBits
, Depth
, Q
);
1773 // If i8 V is a power of two or zero:
1774 // ZeroBits: 1 1 1 0 1 1 1 1
1775 // ~ZeroBits: 0 0 0 1 0 0 0 0
1776 if ((~(LHSBits
.Zero
& RHSBits
.Zero
)).isPowerOf2())
1777 // If OrZero isn't set, we cannot give back a zero result.
1778 // Make sure either the LHS or RHS has a bit set.
1779 if (OrZero
|| RHSBits
.One
.getBoolValue() || LHSBits
.One
.getBoolValue())
1784 // An exact divide or right shift can only shift off zero bits, so the result
1785 // is a power of two only if the first operand is a power of two and not
1786 // copying a sign bit (sdiv int_min, 2).
1787 if (match(V
, m_Exact(m_LShr(m_Value(), m_Value()))) ||
1788 match(V
, m_Exact(m_UDiv(m_Value(), m_Value())))) {
1789 return isKnownToBeAPowerOfTwo(cast
<Operator
>(V
)->getOperand(0), OrZero
,
1796 /// Test whether a GEP's result is known to be non-null.
1798 /// Uses properties inherent in a GEP to try to determine whether it is known
1801 /// Currently this routine does not support vector GEPs.
1802 static bool isGEPKnownNonNull(const GEPOperator
*GEP
, unsigned Depth
,
1804 const Function
*F
= nullptr;
1805 if (const Instruction
*I
= dyn_cast
<Instruction
>(GEP
))
1806 F
= I
->getFunction();
1808 if (!GEP
->isInBounds() ||
1809 NullPointerIsDefined(F
, GEP
->getPointerAddressSpace()))
1812 // FIXME: Support vector-GEPs.
1813 assert(GEP
->getType()->isPointerTy() && "We only support plain pointer GEP");
1815 // If the base pointer is non-null, we cannot walk to a null address with an
1816 // inbounds GEP in address space zero.
1817 if (isKnownNonZero(GEP
->getPointerOperand(), Depth
, Q
))
1820 // Walk the GEP operands and see if any operand introduces a non-zero offset.
1821 // If so, then the GEP cannot produce a null pointer, as doing so would
1822 // inherently violate the inbounds contract within address space zero.
1823 for (gep_type_iterator GTI
= gep_type_begin(GEP
), GTE
= gep_type_end(GEP
);
1824 GTI
!= GTE
; ++GTI
) {
1825 // Struct types are easy -- they must always be indexed by a constant.
1826 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
1827 ConstantInt
*OpC
= cast
<ConstantInt
>(GTI
.getOperand());
1828 unsigned ElementIdx
= OpC
->getZExtValue();
1829 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
1830 uint64_t ElementOffset
= SL
->getElementOffset(ElementIdx
);
1831 if (ElementOffset
> 0)
1836 // If we have a zero-sized type, the index doesn't matter. Keep looping.
1837 if (Q
.DL
.getTypeAllocSize(GTI
.getIndexedType()) == 0)
1840 // Fast path the constant operand case both for efficiency and so we don't
1841 // increment Depth when just zipping down an all-constant GEP.
1842 if (ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(GTI
.getOperand())) {
1848 // We post-increment Depth here because while isKnownNonZero increments it
1849 // as well, when we pop back up that increment won't persist. We don't want
1850 // to recurse 10k times just because we have 10k GEP operands. We don't
1851 // bail completely out because we want to handle constant GEPs regardless
1853 if (Depth
++ >= MaxDepth
)
1856 if (isKnownNonZero(GTI
.getOperand(), Depth
, Q
))
1863 static bool isKnownNonNullFromDominatingCondition(const Value
*V
,
1864 const Instruction
*CtxI
,
1865 const DominatorTree
*DT
) {
1866 assert(V
->getType()->isPointerTy() && "V must be pointer type");
1867 assert(!isa
<ConstantData
>(V
) && "Did not expect ConstantPointerNull");
1872 unsigned NumUsesExplored
= 0;
1873 for (auto *U
: V
->users()) {
1874 // Avoid massive lists
1875 if (NumUsesExplored
>= DomConditionsMaxUses
)
1879 // If the value is used as an argument to a call or invoke, then argument
1880 // attributes may provide an answer about null-ness.
1881 if (auto CS
= ImmutableCallSite(U
))
1882 if (auto *CalledFunc
= CS
.getCalledFunction())
1883 for (const Argument
&Arg
: CalledFunc
->args())
1884 if (CS
.getArgOperand(Arg
.getArgNo()) == V
&&
1885 Arg
.hasNonNullAttr() && DT
->dominates(CS
.getInstruction(), CtxI
))
1888 // Consider only compare instructions uniquely controlling a branch
1889 CmpInst::Predicate Pred
;
1890 if (!match(const_cast<User
*>(U
),
1891 m_c_ICmp(Pred
, m_Specific(V
), m_Zero())) ||
1892 (Pred
!= ICmpInst::ICMP_EQ
&& Pred
!= ICmpInst::ICMP_NE
))
1895 SmallVector
<const User
*, 4> WorkList
;
1896 SmallPtrSet
<const User
*, 4> Visited
;
1897 for (auto *CmpU
: U
->users()) {
1898 assert(WorkList
.empty() && "Should be!");
1899 if (Visited
.insert(CmpU
).second
)
1900 WorkList
.push_back(CmpU
);
1902 while (!WorkList
.empty()) {
1903 auto *Curr
= WorkList
.pop_back_val();
1905 // If a user is an AND, add all its users to the work list. We only
1906 // propagate "pred != null" condition through AND because it is only
1907 // correct to assume that all conditions of AND are met in true branch.
1908 // TODO: Support similar logic of OR and EQ predicate?
1909 if (Pred
== ICmpInst::ICMP_NE
)
1910 if (auto *BO
= dyn_cast
<BinaryOperator
>(Curr
))
1911 if (BO
->getOpcode() == Instruction::And
) {
1912 for (auto *BOU
: BO
->users())
1913 if (Visited
.insert(BOU
).second
)
1914 WorkList
.push_back(BOU
);
1918 if (const BranchInst
*BI
= dyn_cast
<BranchInst
>(Curr
)) {
1919 assert(BI
->isConditional() && "uses a comparison!");
1921 BasicBlock
*NonNullSuccessor
=
1922 BI
->getSuccessor(Pred
== ICmpInst::ICMP_EQ
? 1 : 0);
1923 BasicBlockEdge
Edge(BI
->getParent(), NonNullSuccessor
);
1924 if (Edge
.isSingleEdge() && DT
->dominates(Edge
, CtxI
->getParent()))
1926 } else if (Pred
== ICmpInst::ICMP_NE
&& isGuard(Curr
) &&
1927 DT
->dominates(cast
<Instruction
>(Curr
), CtxI
)) {
1937 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
1938 /// ensure that the value it's attached to is never Value? 'RangeType' is
1939 /// is the type of the value described by the range.
1940 static bool rangeMetadataExcludesValue(const MDNode
* Ranges
, const APInt
& Value
) {
1941 const unsigned NumRanges
= Ranges
->getNumOperands() / 2;
1942 assert(NumRanges
>= 1);
1943 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
1944 ConstantInt
*Lower
=
1945 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 0));
1946 ConstantInt
*Upper
=
1947 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 1));
1948 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
1949 if (Range
.contains(Value
))
1955 /// Return true if the given value is known to be non-zero when defined. For
1956 /// vectors, return true if every element is known to be non-zero when
1957 /// defined. For pointers, if the context instruction and dominator tree are
1958 /// specified, perform context-sensitive analysis and return true if the
1959 /// pointer couldn't possibly be null at the specified instruction.
1960 /// Supports values with integer or pointer type and vectors of integers.
1961 bool isKnownNonZero(const Value
*V
, unsigned Depth
, const Query
&Q
) {
1962 if (auto *C
= dyn_cast
<Constant
>(V
)) {
1963 if (C
->isNullValue())
1965 if (isa
<ConstantInt
>(C
))
1966 // Must be non-zero due to null test above.
1969 // For constant vectors, check that all elements are undefined or known
1970 // non-zero to determine that the whole vector is known non-zero.
1971 if (auto *VecTy
= dyn_cast
<VectorType
>(C
->getType())) {
1972 for (unsigned i
= 0, e
= VecTy
->getNumElements(); i
!= e
; ++i
) {
1973 Constant
*Elt
= C
->getAggregateElement(i
);
1974 if (!Elt
|| Elt
->isNullValue())
1976 if (!isa
<UndefValue
>(Elt
) && !isa
<ConstantInt
>(Elt
))
1982 // A global variable in address space 0 is non null unless extern weak
1983 // or an absolute symbol reference. Other address spaces may have null as a
1984 // valid address for a global, so we can't assume anything.
1985 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
1986 if (!GV
->isAbsoluteSymbolRef() && !GV
->hasExternalWeakLinkage() &&
1987 GV
->getType()->getAddressSpace() == 0)
1993 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
1994 if (MDNode
*Ranges
= Q
.IIQ
.getMetadata(I
, LLVMContext::MD_range
)) {
1995 // If the possible ranges don't contain zero, then the value is
1996 // definitely non-zero.
1997 if (auto *Ty
= dyn_cast
<IntegerType
>(V
->getType())) {
1998 const APInt
ZeroValue(Ty
->getBitWidth(), 0);
1999 if (rangeMetadataExcludesValue(Ranges
, ZeroValue
))
2005 // Some of the tests below are recursive, so bail out if we hit the limit.
2006 if (Depth
++ >= MaxDepth
)
2009 // Check for pointer simplifications.
2010 if (V
->getType()->isPointerTy()) {
2011 // Alloca never returns null, malloc might.
2012 if (isa
<AllocaInst
>(V
) && Q
.DL
.getAllocaAddrSpace() == 0)
2015 // A byval, inalloca, or nonnull argument is never null.
2016 if (const Argument
*A
= dyn_cast
<Argument
>(V
))
2017 if (A
->hasByValOrInAllocaAttr() || A
->hasNonNullAttr())
2020 // A Load tagged with nonnull metadata is never null.
2021 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(V
))
2022 if (Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_nonnull
))
2025 if (const auto *Call
= dyn_cast
<CallBase
>(V
)) {
2026 if (Call
->isReturnNonNull())
2028 if (const auto *RP
= getArgumentAliasingToReturnedPointer(Call
))
2029 return isKnownNonZero(RP
, Depth
, Q
);
2034 // Check for recursive pointer simplifications.
2035 if (V
->getType()->isPointerTy()) {
2036 if (isKnownNonNullFromDominatingCondition(V
, Q
.CxtI
, Q
.DT
))
2039 // Look through bitcast operations, GEPs, and int2ptr instructions as they
2040 // do not alter the value, or at least not the nullness property of the
2041 // value, e.g., int2ptr is allowed to zero/sign extend the value.
2043 // Note that we have to take special care to avoid looking through
2044 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2045 // as casts that can alter the value, e.g., AddrSpaceCasts.
2046 if (const GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(V
))
2047 if (isGEPKnownNonNull(GEP
, Depth
, Q
))
2050 if (auto *BCO
= dyn_cast
<BitCastOperator
>(V
))
2051 return isKnownNonZero(BCO
->getOperand(0), Depth
, Q
);
2053 if (auto *I2P
= dyn_cast
<IntToPtrInst
>(V
))
2054 if (Q
.DL
.getTypeSizeInBits(I2P
->getSrcTy()) <=
2055 Q
.DL
.getTypeSizeInBits(I2P
->getDestTy()))
2056 return isKnownNonZero(I2P
->getOperand(0), Depth
, Q
);
2059 // Similar to int2ptr above, we can look through ptr2int here if the cast
2060 // is a no-op or an extend and not a truncate.
2061 if (auto *P2I
= dyn_cast
<PtrToIntInst
>(V
))
2062 if (Q
.DL
.getTypeSizeInBits(P2I
->getSrcTy()) <=
2063 Q
.DL
.getTypeSizeInBits(P2I
->getDestTy()))
2064 return isKnownNonZero(P2I
->getOperand(0), Depth
, Q
);
2066 unsigned BitWidth
= getBitWidth(V
->getType()->getScalarType(), Q
.DL
);
2068 // X | Y != 0 if X != 0 or Y != 0.
2069 Value
*X
= nullptr, *Y
= nullptr;
2070 if (match(V
, m_Or(m_Value(X
), m_Value(Y
))))
2071 return isKnownNonZero(X
, Depth
, Q
) || isKnownNonZero(Y
, Depth
, Q
);
2073 // ext X != 0 if X != 0.
2074 if (isa
<SExtInst
>(V
) || isa
<ZExtInst
>(V
))
2075 return isKnownNonZero(cast
<Instruction
>(V
)->getOperand(0), Depth
, Q
);
2077 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2078 // if the lowest bit is shifted off the end.
2079 if (match(V
, m_Shl(m_Value(X
), m_Value(Y
)))) {
2080 // shl nuw can't remove any non-zero bits.
2081 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(V
);
2082 if (Q
.IIQ
.hasNoUnsignedWrap(BO
))
2083 return isKnownNonZero(X
, Depth
, Q
);
2085 KnownBits
Known(BitWidth
);
2086 computeKnownBits(X
, Known
, Depth
, Q
);
2090 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2091 // defined if the sign bit is shifted off the end.
2092 else if (match(V
, m_Shr(m_Value(X
), m_Value(Y
)))) {
2093 // shr exact can only shift out zero bits.
2094 const PossiblyExactOperator
*BO
= cast
<PossiblyExactOperator
>(V
);
2096 return isKnownNonZero(X
, Depth
, Q
);
2098 KnownBits Known
= computeKnownBits(X
, Depth
, Q
);
2099 if (Known
.isNegative())
2102 // If the shifter operand is a constant, and all of the bits shifted
2103 // out are known to be zero, and X is known non-zero then at least one
2104 // non-zero bit must remain.
2105 if (ConstantInt
*Shift
= dyn_cast
<ConstantInt
>(Y
)) {
2106 auto ShiftVal
= Shift
->getLimitedValue(BitWidth
- 1);
2107 // Is there a known one in the portion not shifted out?
2108 if (Known
.countMaxLeadingZeros() < BitWidth
- ShiftVal
)
2110 // Are all the bits to be shifted out known zero?
2111 if (Known
.countMinTrailingZeros() >= ShiftVal
)
2112 return isKnownNonZero(X
, Depth
, Q
);
2115 // div exact can only produce a zero if the dividend is zero.
2116 else if (match(V
, m_Exact(m_IDiv(m_Value(X
), m_Value())))) {
2117 return isKnownNonZero(X
, Depth
, Q
);
2120 else if (match(V
, m_Add(m_Value(X
), m_Value(Y
)))) {
2121 KnownBits XKnown
= computeKnownBits(X
, Depth
, Q
);
2122 KnownBits YKnown
= computeKnownBits(Y
, Depth
, Q
);
2124 // If X and Y are both non-negative (as signed values) then their sum is not
2125 // zero unless both X and Y are zero.
2126 if (XKnown
.isNonNegative() && YKnown
.isNonNegative())
2127 if (isKnownNonZero(X
, Depth
, Q
) || isKnownNonZero(Y
, Depth
, Q
))
2130 // If X and Y are both negative (as signed values) then their sum is not
2131 // zero unless both X and Y equal INT_MIN.
2132 if (XKnown
.isNegative() && YKnown
.isNegative()) {
2133 APInt Mask
= APInt::getSignedMaxValue(BitWidth
);
2134 // The sign bit of X is set. If some other bit is set then X is not equal
2136 if (XKnown
.One
.intersects(Mask
))
2138 // The sign bit of Y is set. If some other bit is set then Y is not equal
2140 if (YKnown
.One
.intersects(Mask
))
2144 // The sum of a non-negative number and a power of two is not zero.
2145 if (XKnown
.isNonNegative() &&
2146 isKnownToBeAPowerOfTwo(Y
, /*OrZero*/ false, Depth
, Q
))
2148 if (YKnown
.isNonNegative() &&
2149 isKnownToBeAPowerOfTwo(X
, /*OrZero*/ false, Depth
, Q
))
2153 else if (match(V
, m_Mul(m_Value(X
), m_Value(Y
)))) {
2154 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(V
);
2155 // If X and Y are non-zero then so is X * Y as long as the multiplication
2156 // does not overflow.
2157 if ((Q
.IIQ
.hasNoSignedWrap(BO
) || Q
.IIQ
.hasNoUnsignedWrap(BO
)) &&
2158 isKnownNonZero(X
, Depth
, Q
) && isKnownNonZero(Y
, Depth
, Q
))
2161 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2162 else if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(V
)) {
2163 if (isKnownNonZero(SI
->getTrueValue(), Depth
, Q
) &&
2164 isKnownNonZero(SI
->getFalseValue(), Depth
, Q
))
2168 else if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
2169 // Try and detect a recurrence that monotonically increases from a
2170 // starting value, as these are common as induction variables.
2171 if (PN
->getNumIncomingValues() == 2) {
2172 Value
*Start
= PN
->getIncomingValue(0);
2173 Value
*Induction
= PN
->getIncomingValue(1);
2174 if (isa
<ConstantInt
>(Induction
) && !isa
<ConstantInt
>(Start
))
2175 std::swap(Start
, Induction
);
2176 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(Start
)) {
2177 if (!C
->isZero() && !C
->isNegative()) {
2179 if (Q
.IIQ
.UseInstrInfo
&&
2180 (match(Induction
, m_NSWAdd(m_Specific(PN
), m_ConstantInt(X
))) ||
2181 match(Induction
, m_NUWAdd(m_Specific(PN
), m_ConstantInt(X
)))) &&
2187 // Check if all incoming values are non-zero constant.
2188 bool AllNonZeroConstants
= llvm::all_of(PN
->operands(), [](Value
*V
) {
2189 return isa
<ConstantInt
>(V
) && !cast
<ConstantInt
>(V
)->isZero();
2191 if (AllNonZeroConstants
)
2195 KnownBits
Known(BitWidth
);
2196 computeKnownBits(V
, Known
, Depth
, Q
);
2197 return Known
.One
!= 0;
2200 /// Return true if V2 == V1 + X, where X is known non-zero.
2201 static bool isAddOfNonZero(const Value
*V1
, const Value
*V2
, const Query
&Q
) {
2202 const BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(V1
);
2203 if (!BO
|| BO
->getOpcode() != Instruction::Add
)
2205 Value
*Op
= nullptr;
2206 if (V2
== BO
->getOperand(0))
2207 Op
= BO
->getOperand(1);
2208 else if (V2
== BO
->getOperand(1))
2209 Op
= BO
->getOperand(0);
2212 return isKnownNonZero(Op
, 0, Q
);
2215 /// Return true if it is known that V1 != V2.
2216 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
, const Query
&Q
) {
2219 if (V1
->getType() != V2
->getType())
2220 // We can't look through casts yet.
2222 if (isAddOfNonZero(V1
, V2
, Q
) || isAddOfNonZero(V2
, V1
, Q
))
2225 if (V1
->getType()->isIntOrIntVectorTy()) {
2226 // Are any known bits in V1 contradictory to known bits in V2? If V1
2227 // has a known zero where V2 has a known one, they must not be equal.
2228 KnownBits Known1
= computeKnownBits(V1
, 0, Q
);
2229 KnownBits Known2
= computeKnownBits(V2
, 0, Q
);
2231 if (Known1
.Zero
.intersects(Known2
.One
) ||
2232 Known2
.Zero
.intersects(Known1
.One
))
2238 /// Return true if 'V & Mask' is known to be zero. We use this predicate to
2239 /// simplify operations downstream. Mask is known to be zero for bits that V
2242 /// This function is defined on values with integer type, values with pointer
2243 /// type, and vectors of integers. In the case
2244 /// where V is a vector, the mask, known zero, and known one values are the
2245 /// same width as the vector element, and the bit is set only if it is true
2246 /// for all of the elements in the vector.
2247 bool MaskedValueIsZero(const Value
*V
, const APInt
&Mask
, unsigned Depth
,
2249 KnownBits
Known(Mask
.getBitWidth());
2250 computeKnownBits(V
, Known
, Depth
, Q
);
2251 return Mask
.isSubsetOf(Known
.Zero
);
2254 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
2255 // Returns the input and lower/upper bounds.
2256 static bool isSignedMinMaxClamp(const Value
*Select
, const Value
*&In
,
2257 const APInt
*&CLow
, const APInt
*&CHigh
) {
2258 assert(isa
<Operator
>(Select
) &&
2259 cast
<Operator
>(Select
)->getOpcode() == Instruction::Select
&&
2260 "Input should be a Select!");
2262 const Value
*LHS
, *RHS
, *LHS2
, *RHS2
;
2263 SelectPatternFlavor SPF
= matchSelectPattern(Select
, LHS
, RHS
).Flavor
;
2264 if (SPF
!= SPF_SMAX
&& SPF
!= SPF_SMIN
)
2267 if (!match(RHS
, m_APInt(CLow
)))
2270 SelectPatternFlavor SPF2
= matchSelectPattern(LHS
, LHS2
, RHS2
).Flavor
;
2271 if (getInverseMinMaxFlavor(SPF
) != SPF2
)
2274 if (!match(RHS2
, m_APInt(CHigh
)))
2277 if (SPF
== SPF_SMIN
)
2278 std::swap(CLow
, CHigh
);
2281 return CLow
->sle(*CHigh
);
2284 /// For vector constants, loop over the elements and find the constant with the
2285 /// minimum number of sign bits. Return 0 if the value is not a vector constant
2286 /// or if any element was not analyzed; otherwise, return the count for the
2287 /// element with the minimum number of sign bits.
2288 static unsigned computeNumSignBitsVectorConstant(const Value
*V
,
2290 const auto *CV
= dyn_cast
<Constant
>(V
);
2291 if (!CV
|| !CV
->getType()->isVectorTy())
2294 unsigned MinSignBits
= TyBits
;
2295 unsigned NumElts
= CV
->getType()->getVectorNumElements();
2296 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2297 // If we find a non-ConstantInt, bail out.
2298 auto *Elt
= dyn_cast_or_null
<ConstantInt
>(CV
->getAggregateElement(i
));
2302 MinSignBits
= std::min(MinSignBits
, Elt
->getValue().getNumSignBits());
2308 static unsigned ComputeNumSignBitsImpl(const Value
*V
, unsigned Depth
,
2311 static unsigned ComputeNumSignBits(const Value
*V
, unsigned Depth
,
2313 unsigned Result
= ComputeNumSignBitsImpl(V
, Depth
, Q
);
2314 assert(Result
> 0 && "At least one sign bit needs to be present!");
2318 /// Return the number of times the sign bit of the register is replicated into
2319 /// the other bits. We know that at least 1 bit is always equal to the sign bit
2320 /// (itself), but other cases can give us information. For example, immediately
2321 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
2322 /// other, so we return 3. For vectors, return the number of sign bits for the
2323 /// vector element with the minimum number of known sign bits.
2324 static unsigned ComputeNumSignBitsImpl(const Value
*V
, unsigned Depth
,
2326 assert(Depth
<= MaxDepth
&& "Limit Search Depth");
2328 // We return the minimum number of sign bits that are guaranteed to be present
2329 // in V, so for undef we have to conservatively return 1. We don't have the
2330 // same behavior for poison though -- that's a FIXME today.
2332 Type
*ScalarTy
= V
->getType()->getScalarType();
2333 unsigned TyBits
= ScalarTy
->isPointerTy() ?
2334 Q
.DL
.getIndexTypeSizeInBits(ScalarTy
) :
2335 Q
.DL
.getTypeSizeInBits(ScalarTy
);
2338 unsigned FirstAnswer
= 1;
2340 // Note that ConstantInt is handled by the general computeKnownBits case
2343 if (Depth
== MaxDepth
)
2344 return 1; // Limit search depth.
2346 const Operator
*U
= dyn_cast
<Operator
>(V
);
2347 switch (Operator::getOpcode(V
)) {
2349 case Instruction::SExt
:
2350 Tmp
= TyBits
- U
->getOperand(0)->getType()->getScalarSizeInBits();
2351 return ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
) + Tmp
;
2353 case Instruction::SDiv
: {
2354 const APInt
*Denominator
;
2355 // sdiv X, C -> adds log(C) sign bits.
2356 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
2358 // Ignore non-positive denominator.
2359 if (!Denominator
->isStrictlyPositive())
2362 // Calculate the incoming numerator bits.
2363 unsigned NumBits
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2365 // Add floor(log(C)) bits to the numerator bits.
2366 return std::min(TyBits
, NumBits
+ Denominator
->logBase2());
2371 case Instruction::SRem
: {
2372 const APInt
*Denominator
;
2373 // srem X, C -> we know that the result is within [-C+1,C) when C is a
2374 // positive constant. This let us put a lower bound on the number of sign
2376 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
2378 // Ignore non-positive denominator.
2379 if (!Denominator
->isStrictlyPositive())
2382 // Calculate the incoming numerator bits. SRem by a positive constant
2383 // can't lower the number of sign bits.
2385 ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2387 // Calculate the leading sign bit constraints by examining the
2388 // denominator. Given that the denominator is positive, there are two
2391 // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
2392 // (1 << ceilLogBase2(C)).
2394 // 2. the numerator is negative. Then the result range is (-C,0] and
2395 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
2397 // Thus a lower bound on the number of sign bits is `TyBits -
2398 // ceilLogBase2(C)`.
2400 unsigned ResBits
= TyBits
- Denominator
->ceilLogBase2();
2401 return std::max(NumrBits
, ResBits
);
2406 case Instruction::AShr
: {
2407 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2408 // ashr X, C -> adds C sign bits. Vectors too.
2410 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
2411 if (ShAmt
->uge(TyBits
))
2412 break; // Bad shift.
2413 unsigned ShAmtLimited
= ShAmt
->getZExtValue();
2414 Tmp
+= ShAmtLimited
;
2415 if (Tmp
> TyBits
) Tmp
= TyBits
;
2419 case Instruction::Shl
: {
2421 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
2422 // shl destroys sign bits.
2423 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2424 if (ShAmt
->uge(TyBits
) || // Bad shift.
2425 ShAmt
->uge(Tmp
)) break; // Shifted all sign bits out.
2426 Tmp2
= ShAmt
->getZExtValue();
2431 case Instruction::And
:
2432 case Instruction::Or
:
2433 case Instruction::Xor
: // NOT is handled here.
2434 // Logical binary ops preserve the number of sign bits at the worst.
2435 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2437 Tmp2
= ComputeNumSignBits(U
->getOperand(1), Depth
+ 1, Q
);
2438 FirstAnswer
= std::min(Tmp
, Tmp2
);
2439 // We computed what we know about the sign bits as our first
2440 // answer. Now proceed to the generic code that uses
2441 // computeKnownBits, and pick whichever answer is better.
2445 case Instruction::Select
: {
2446 // If we have a clamp pattern, we know that the number of sign bits will be
2447 // the minimum of the clamp min/max range.
2449 const APInt
*CLow
, *CHigh
;
2450 if (isSignedMinMaxClamp(U
, X
, CLow
, CHigh
))
2451 return std::min(CLow
->getNumSignBits(), CHigh
->getNumSignBits());
2453 Tmp
= ComputeNumSignBits(U
->getOperand(1), Depth
+ 1, Q
);
2454 if (Tmp
== 1) break;
2455 Tmp2
= ComputeNumSignBits(U
->getOperand(2), Depth
+ 1, Q
);
2456 return std::min(Tmp
, Tmp2
);
2459 case Instruction::Add
:
2460 // Add can have at most one carry bit. Thus we know that the output
2461 // is, at worst, one more bit than the inputs.
2462 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2463 if (Tmp
== 1) break;
2465 // Special case decrementing a value (ADD X, -1):
2466 if (const auto *CRHS
= dyn_cast
<Constant
>(U
->getOperand(1)))
2467 if (CRHS
->isAllOnesValue()) {
2468 KnownBits
Known(TyBits
);
2469 computeKnownBits(U
->getOperand(0), Known
, Depth
+ 1, Q
);
2471 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2473 if ((Known
.Zero
| 1).isAllOnesValue())
2476 // If we are subtracting one from a positive number, there is no carry
2477 // out of the result.
2478 if (Known
.isNonNegative())
2482 Tmp2
= ComputeNumSignBits(U
->getOperand(1), Depth
+ 1, Q
);
2483 if (Tmp2
== 1) break;
2484 return std::min(Tmp
, Tmp2
)-1;
2486 case Instruction::Sub
:
2487 Tmp2
= ComputeNumSignBits(U
->getOperand(1), Depth
+ 1, Q
);
2488 if (Tmp2
== 1) break;
2491 if (const auto *CLHS
= dyn_cast
<Constant
>(U
->getOperand(0)))
2492 if (CLHS
->isNullValue()) {
2493 KnownBits
Known(TyBits
);
2494 computeKnownBits(U
->getOperand(1), Known
, Depth
+ 1, Q
);
2495 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2497 if ((Known
.Zero
| 1).isAllOnesValue())
2500 // If the input is known to be positive (the sign bit is known clear),
2501 // the output of the NEG has the same number of sign bits as the input.
2502 if (Known
.isNonNegative())
2505 // Otherwise, we treat this like a SUB.
2508 // Sub can have at most one carry bit. Thus we know that the output
2509 // is, at worst, one more bit than the inputs.
2510 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2511 if (Tmp
== 1) break;
2512 return std::min(Tmp
, Tmp2
)-1;
2514 case Instruction::Mul
: {
2515 // The output of the Mul can be at most twice the valid bits in the inputs.
2516 unsigned SignBitsOp0
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2517 if (SignBitsOp0
== 1) break;
2518 unsigned SignBitsOp1
= ComputeNumSignBits(U
->getOperand(1), Depth
+ 1, Q
);
2519 if (SignBitsOp1
== 1) break;
2520 unsigned OutValidBits
=
2521 (TyBits
- SignBitsOp0
+ 1) + (TyBits
- SignBitsOp1
+ 1);
2522 return OutValidBits
> TyBits
? 1 : TyBits
- OutValidBits
+ 1;
2525 case Instruction::PHI
: {
2526 const PHINode
*PN
= cast
<PHINode
>(U
);
2527 unsigned NumIncomingValues
= PN
->getNumIncomingValues();
2528 // Don't analyze large in-degree PHIs.
2529 if (NumIncomingValues
> 4) break;
2530 // Unreachable blocks may have zero-operand PHI nodes.
2531 if (NumIncomingValues
== 0) break;
2533 // Take the minimum of all incoming values. This can't infinitely loop
2534 // because of our depth threshold.
2535 Tmp
= ComputeNumSignBits(PN
->getIncomingValue(0), Depth
+ 1, Q
);
2536 for (unsigned i
= 1, e
= NumIncomingValues
; i
!= e
; ++i
) {
2537 if (Tmp
== 1) return Tmp
;
2539 Tmp
, ComputeNumSignBits(PN
->getIncomingValue(i
), Depth
+ 1, Q
));
2544 case Instruction::Trunc
:
2545 // FIXME: it's tricky to do anything useful for this, but it is an important
2546 // case for targets like X86.
2549 case Instruction::ExtractElement
:
2550 // Look through extract element. At the moment we keep this simple and skip
2551 // tracking the specific element. But at least we might find information
2552 // valid for all elements of the vector (for example if vector is sign
2553 // extended, shifted, etc).
2554 return ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
2556 case Instruction::ShuffleVector
: {
2557 // TODO: This is copied almost directly from the SelectionDAG version of
2558 // ComputeNumSignBits. It would be better if we could share common
2559 // code. If not, make sure that changes are translated to the DAG.
2561 // Collect the minimum number of sign bits that are shared by every vector
2562 // element referenced by the shuffle.
2563 auto *Shuf
= cast
<ShuffleVectorInst
>(U
);
2564 int NumElts
= Shuf
->getOperand(0)->getType()->getVectorNumElements();
2565 int NumMaskElts
= Shuf
->getMask()->getType()->getVectorNumElements();
2566 APInt
DemandedLHS(NumElts
, 0), DemandedRHS(NumElts
, 0);
2567 for (int i
= 0; i
!= NumMaskElts
; ++i
) {
2568 int M
= Shuf
->getMaskValue(i
);
2569 assert(M
< NumElts
* 2 && "Invalid shuffle mask constant");
2570 // For undef elements, we don't know anything about the common state of
2571 // the shuffle result.
2575 DemandedLHS
.setBit(M
% NumElts
);
2577 DemandedRHS
.setBit(M
% NumElts
);
2579 Tmp
= std::numeric_limits
<unsigned>::max();
2581 Tmp
= ComputeNumSignBits(Shuf
->getOperand(0), Depth
+ 1, Q
);
2582 if (!!DemandedRHS
) {
2583 Tmp2
= ComputeNumSignBits(Shuf
->getOperand(1), Depth
+ 1, Q
);
2584 Tmp
= std::min(Tmp
, Tmp2
);
2586 // If we don't know anything, early out and try computeKnownBits fall-back.
2589 assert(Tmp
<= V
->getType()->getScalarSizeInBits() &&
2590 "Failed to determine minimum sign bits");
2595 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2596 // use this information.
2598 // If we can examine all elements of a vector constant successfully, we're
2599 // done (we can't do any better than that). If not, keep trying.
2600 if (unsigned VecSignBits
= computeNumSignBitsVectorConstant(V
, TyBits
))
2603 KnownBits
Known(TyBits
);
2604 computeKnownBits(V
, Known
, Depth
, Q
);
2606 // If we know that the sign bit is either zero or one, determine the number of
2607 // identical bits in the top of the input value.
2608 return std::max(FirstAnswer
, Known
.countMinSignBits());
2611 /// This function computes the integer multiple of Base that equals V.
2612 /// If successful, it returns true and returns the multiple in
2613 /// Multiple. If unsuccessful, it returns false. It looks
2614 /// through SExt instructions only if LookThroughSExt is true.
2615 bool llvm::ComputeMultiple(Value
*V
, unsigned Base
, Value
*&Multiple
,
2616 bool LookThroughSExt
, unsigned Depth
) {
2617 const unsigned MaxDepth
= 6;
2619 assert(V
&& "No Value?");
2620 assert(Depth
<= MaxDepth
&& "Limit Search Depth");
2621 assert(V
->getType()->isIntegerTy() && "Not integer or pointer type!");
2623 Type
*T
= V
->getType();
2625 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
);
2635 ConstantExpr
*CO
= dyn_cast
<ConstantExpr
>(V
);
2636 Constant
*BaseVal
= ConstantInt::get(T
, Base
);
2637 if (CO
&& CO
== BaseVal
) {
2639 Multiple
= ConstantInt::get(T
, 1);
2643 if (CI
&& CI
->getZExtValue() % Base
== 0) {
2644 Multiple
= ConstantInt::get(T
, CI
->getZExtValue() / Base
);
2648 if (Depth
== MaxDepth
) return false; // Limit search depth.
2650 Operator
*I
= dyn_cast
<Operator
>(V
);
2651 if (!I
) return false;
2653 switch (I
->getOpcode()) {
2655 case Instruction::SExt
:
2656 if (!LookThroughSExt
) return false;
2657 // otherwise fall through to ZExt
2659 case Instruction::ZExt
:
2660 return ComputeMultiple(I
->getOperand(0), Base
, Multiple
,
2661 LookThroughSExt
, Depth
+1);
2662 case Instruction::Shl
:
2663 case Instruction::Mul
: {
2664 Value
*Op0
= I
->getOperand(0);
2665 Value
*Op1
= I
->getOperand(1);
2667 if (I
->getOpcode() == Instruction::Shl
) {
2668 ConstantInt
*Op1CI
= dyn_cast
<ConstantInt
>(Op1
);
2669 if (!Op1CI
) return false;
2670 // Turn Op0 << Op1 into Op0 * 2^Op1
2671 APInt Op1Int
= Op1CI
->getValue();
2672 uint64_t BitToSet
= Op1Int
.getLimitedValue(Op1Int
.getBitWidth() - 1);
2673 APInt
API(Op1Int
.getBitWidth(), 0);
2674 API
.setBit(BitToSet
);
2675 Op1
= ConstantInt::get(V
->getContext(), API
);
2678 Value
*Mul0
= nullptr;
2679 if (ComputeMultiple(Op0
, Base
, Mul0
, LookThroughSExt
, Depth
+1)) {
2680 if (Constant
*Op1C
= dyn_cast
<Constant
>(Op1
))
2681 if (Constant
*MulC
= dyn_cast
<Constant
>(Mul0
)) {
2682 if (Op1C
->getType()->getPrimitiveSizeInBits() <
2683 MulC
->getType()->getPrimitiveSizeInBits())
2684 Op1C
= ConstantExpr::getZExt(Op1C
, MulC
->getType());
2685 if (Op1C
->getType()->getPrimitiveSizeInBits() >
2686 MulC
->getType()->getPrimitiveSizeInBits())
2687 MulC
= ConstantExpr::getZExt(MulC
, Op1C
->getType());
2689 // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
2690 Multiple
= ConstantExpr::getMul(MulC
, Op1C
);
2694 if (ConstantInt
*Mul0CI
= dyn_cast
<ConstantInt
>(Mul0
))
2695 if (Mul0CI
->getValue() == 1) {
2696 // V == Base * Op1, so return Op1
2702 Value
*Mul1
= nullptr;
2703 if (ComputeMultiple(Op1
, Base
, Mul1
, LookThroughSExt
, Depth
+1)) {
2704 if (Constant
*Op0C
= dyn_cast
<Constant
>(Op0
))
2705 if (Constant
*MulC
= dyn_cast
<Constant
>(Mul1
)) {
2706 if (Op0C
->getType()->getPrimitiveSizeInBits() <
2707 MulC
->getType()->getPrimitiveSizeInBits())
2708 Op0C
= ConstantExpr::getZExt(Op0C
, MulC
->getType());
2709 if (Op0C
->getType()->getPrimitiveSizeInBits() >
2710 MulC
->getType()->getPrimitiveSizeInBits())
2711 MulC
= ConstantExpr::getZExt(MulC
, Op0C
->getType());
2713 // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
2714 Multiple
= ConstantExpr::getMul(MulC
, Op0C
);
2718 if (ConstantInt
*Mul1CI
= dyn_cast
<ConstantInt
>(Mul1
))
2719 if (Mul1CI
->getValue() == 1) {
2720 // V == Base * Op0, so return Op0
2728 // We could not determine if V is a multiple of Base.
2732 Intrinsic::ID
llvm::getIntrinsicForCallSite(ImmutableCallSite ICS
,
2733 const TargetLibraryInfo
*TLI
) {
2734 const Function
*F
= ICS
.getCalledFunction();
2736 return Intrinsic::not_intrinsic
;
2738 if (F
->isIntrinsic())
2739 return F
->getIntrinsicID();
2742 return Intrinsic::not_intrinsic
;
2745 // We're going to make assumptions on the semantics of the functions, check
2746 // that the target knows that it's available in this environment and it does
2747 // not have local linkage.
2748 if (!F
|| F
->hasLocalLinkage() || !TLI
->getLibFunc(*F
, Func
))
2749 return Intrinsic::not_intrinsic
;
2751 if (!ICS
.onlyReadsMemory())
2752 return Intrinsic::not_intrinsic
;
2754 // Otherwise check if we have a call to a function that can be turned into a
2755 // vector intrinsic.
2762 return Intrinsic::sin
;
2766 return Intrinsic::cos
;
2770 return Intrinsic::exp
;
2774 return Intrinsic::exp2
;
2778 return Intrinsic::log
;
2780 case LibFunc_log10f
:
2781 case LibFunc_log10l
:
2782 return Intrinsic::log10
;
2786 return Intrinsic::log2
;
2790 return Intrinsic::fabs
;
2794 return Intrinsic::minnum
;
2798 return Intrinsic::maxnum
;
2799 case LibFunc_copysign
:
2800 case LibFunc_copysignf
:
2801 case LibFunc_copysignl
:
2802 return Intrinsic::copysign
;
2804 case LibFunc_floorf
:
2805 case LibFunc_floorl
:
2806 return Intrinsic::floor
;
2810 return Intrinsic::ceil
;
2812 case LibFunc_truncf
:
2813 case LibFunc_truncl
:
2814 return Intrinsic::trunc
;
2818 return Intrinsic::rint
;
2819 case LibFunc_nearbyint
:
2820 case LibFunc_nearbyintf
:
2821 case LibFunc_nearbyintl
:
2822 return Intrinsic::nearbyint
;
2824 case LibFunc_roundf
:
2825 case LibFunc_roundl
:
2826 return Intrinsic::round
;
2830 return Intrinsic::pow
;
2834 return Intrinsic::sqrt
;
2837 return Intrinsic::not_intrinsic
;
2840 /// Return true if we can prove that the specified FP value is never equal to
2843 /// NOTE: this function will need to be revisited when we support non-default
2845 bool llvm::CannotBeNegativeZero(const Value
*V
, const TargetLibraryInfo
*TLI
,
2847 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
))
2848 return !CFP
->getValueAPF().isNegZero();
2850 // Limit search depth.
2851 if (Depth
== MaxDepth
)
2854 auto *Op
= dyn_cast
<Operator
>(V
);
2858 // Check if the nsz fast-math flag is set.
2859 if (auto *FPO
= dyn_cast
<FPMathOperator
>(Op
))
2860 if (FPO
->hasNoSignedZeros())
2863 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
2864 if (match(Op
, m_FAdd(m_Value(), m_PosZeroFP())))
2867 // sitofp and uitofp turn into +0.0 for zero.
2868 if (isa
<SIToFPInst
>(Op
) || isa
<UIToFPInst
>(Op
))
2871 if (auto *Call
= dyn_cast
<CallInst
>(Op
)) {
2872 Intrinsic::ID IID
= getIntrinsicForCallSite(Call
, TLI
);
2876 // sqrt(-0.0) = -0.0, no other negative results are possible.
2877 case Intrinsic::sqrt
:
2878 case Intrinsic::canonicalize
:
2879 return CannotBeNegativeZero(Call
->getArgOperand(0), TLI
, Depth
+ 1);
2881 case Intrinsic::fabs
:
2889 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
2890 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
2891 /// bit despite comparing equal.
2892 static bool cannotBeOrderedLessThanZeroImpl(const Value
*V
,
2893 const TargetLibraryInfo
*TLI
,
2896 // TODO: This function does not do the right thing when SignBitOnly is true
2897 // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
2898 // which flips the sign bits of NaNs. See
2899 // https://llvm.org/bugs/show_bug.cgi?id=31702.
2901 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(V
)) {
2902 return !CFP
->getValueAPF().isNegative() ||
2903 (!SignBitOnly
&& CFP
->getValueAPF().isZero());
2906 // Handle vector of constants.
2907 if (auto *CV
= dyn_cast
<Constant
>(V
)) {
2908 if (CV
->getType()->isVectorTy()) {
2909 unsigned NumElts
= CV
->getType()->getVectorNumElements();
2910 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2911 auto *CFP
= dyn_cast_or_null
<ConstantFP
>(CV
->getAggregateElement(i
));
2914 if (CFP
->getValueAPF().isNegative() &&
2915 (SignBitOnly
|| !CFP
->getValueAPF().isZero()))
2919 // All non-negative ConstantFPs.
2924 if (Depth
== MaxDepth
)
2925 return false; // Limit search depth.
2927 const Operator
*I
= dyn_cast
<Operator
>(V
);
2931 switch (I
->getOpcode()) {
2934 // Unsigned integers are always nonnegative.
2935 case Instruction::UIToFP
:
2937 case Instruction::FMul
:
2938 // x*x is always non-negative or a NaN.
2939 if (I
->getOperand(0) == I
->getOperand(1) &&
2940 (!SignBitOnly
|| cast
<FPMathOperator
>(I
)->hasNoNaNs()))
2944 case Instruction::FAdd
:
2945 case Instruction::FDiv
:
2946 case Instruction::FRem
:
2947 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
2949 cannotBeOrderedLessThanZeroImpl(I
->getOperand(1), TLI
, SignBitOnly
,
2951 case Instruction::Select
:
2952 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(1), TLI
, SignBitOnly
,
2954 cannotBeOrderedLessThanZeroImpl(I
->getOperand(2), TLI
, SignBitOnly
,
2956 case Instruction::FPExt
:
2957 case Instruction::FPTrunc
:
2958 // Widening/narrowing never change sign.
2959 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
2961 case Instruction::ExtractElement
:
2962 // Look through extract element. At the moment we keep this simple and skip
2963 // tracking the specific element. But at least we might find information
2964 // valid for all elements of the vector.
2965 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
2967 case Instruction::Call
:
2968 const auto *CI
= cast
<CallInst
>(I
);
2969 Intrinsic::ID IID
= getIntrinsicForCallSite(CI
, TLI
);
2973 case Intrinsic::maxnum
:
2974 return (isKnownNeverNaN(I
->getOperand(0), TLI
) &&
2975 cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
,
2976 SignBitOnly
, Depth
+ 1)) ||
2977 (isKnownNeverNaN(I
->getOperand(1), TLI
) &&
2978 cannotBeOrderedLessThanZeroImpl(I
->getOperand(1), TLI
,
2979 SignBitOnly
, Depth
+ 1));
2981 case Intrinsic::maximum
:
2982 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
2984 cannotBeOrderedLessThanZeroImpl(I
->getOperand(1), TLI
, SignBitOnly
,
2986 case Intrinsic::minnum
:
2987 case Intrinsic::minimum
:
2988 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
2990 cannotBeOrderedLessThanZeroImpl(I
->getOperand(1), TLI
, SignBitOnly
,
2992 case Intrinsic::exp
:
2993 case Intrinsic::exp2
:
2994 case Intrinsic::fabs
:
2997 case Intrinsic::sqrt
:
2998 // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
3001 return CI
->hasNoNaNs() && (CI
->hasNoSignedZeros() ||
3002 CannotBeNegativeZero(CI
->getOperand(0), TLI
));
3004 case Intrinsic::powi
:
3005 if (ConstantInt
*Exponent
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
3006 // powi(x,n) is non-negative if n is even.
3007 if (Exponent
->getBitWidth() <= 64 && Exponent
->getSExtValue() % 2u == 0)
3010 // TODO: This is not correct. Given that exp is an integer, here are the
3011 // ways that pow can return a negative value:
3013 // pow(x, exp) --> negative if exp is odd and x is negative.
3014 // pow(-0, exp) --> -inf if exp is negative odd.
3015 // pow(-0, exp) --> -0 if exp is positive odd.
3016 // pow(-inf, exp) --> -0 if exp is negative odd.
3017 // pow(-inf, exp) --> -inf if exp is positive odd.
3019 // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
3020 // but we must return false if x == -0. Unfortunately we do not currently
3021 // have a way of expressing this constraint. See details in
3022 // https://llvm.org/bugs/show_bug.cgi?id=31702.
3023 return cannotBeOrderedLessThanZeroImpl(I
->getOperand(0), TLI
, SignBitOnly
,
3026 case Intrinsic::fma
:
3027 case Intrinsic::fmuladd
:
3028 // x*x+y is non-negative if y is non-negative.
3029 return I
->getOperand(0) == I
->getOperand(1) &&
3030 (!SignBitOnly
|| cast
<FPMathOperator
>(I
)->hasNoNaNs()) &&
3031 cannotBeOrderedLessThanZeroImpl(I
->getOperand(2), TLI
, SignBitOnly
,
3039 bool llvm::CannotBeOrderedLessThanZero(const Value
*V
,
3040 const TargetLibraryInfo
*TLI
) {
3041 return cannotBeOrderedLessThanZeroImpl(V
, TLI
, false, 0);
3044 bool llvm::SignBitMustBeZero(const Value
*V
, const TargetLibraryInfo
*TLI
) {
3045 return cannotBeOrderedLessThanZeroImpl(V
, TLI
, true, 0);
3048 bool llvm::isKnownNeverNaN(const Value
*V
, const TargetLibraryInfo
*TLI
,
3050 assert(V
->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
3052 // If we're told that NaNs won't happen, assume they won't.
3053 if (auto *FPMathOp
= dyn_cast
<FPMathOperator
>(V
))
3054 if (FPMathOp
->hasNoNaNs())
3057 // Handle scalar constants.
3058 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
))
3059 return !CFP
->isNaN();
3061 if (Depth
== MaxDepth
)
3064 if (auto *Inst
= dyn_cast
<Instruction
>(V
)) {
3065 switch (Inst
->getOpcode()) {
3066 case Instruction::FAdd
:
3067 case Instruction::FMul
:
3068 case Instruction::FSub
:
3069 case Instruction::FDiv
:
3070 case Instruction::FRem
: {
3071 // TODO: Need isKnownNeverInfinity
3074 case Instruction::Select
: {
3075 return isKnownNeverNaN(Inst
->getOperand(1), TLI
, Depth
+ 1) &&
3076 isKnownNeverNaN(Inst
->getOperand(2), TLI
, Depth
+ 1);
3078 case Instruction::SIToFP
:
3079 case Instruction::UIToFP
:
3081 case Instruction::FPTrunc
:
3082 case Instruction::FPExt
:
3083 return isKnownNeverNaN(Inst
->getOperand(0), TLI
, Depth
+ 1);
3089 if (const auto *II
= dyn_cast
<IntrinsicInst
>(V
)) {
3090 switch (II
->getIntrinsicID()) {
3091 case Intrinsic::canonicalize
:
3092 case Intrinsic::fabs
:
3093 case Intrinsic::copysign
:
3094 case Intrinsic::exp
:
3095 case Intrinsic::exp2
:
3096 case Intrinsic::floor
:
3097 case Intrinsic::ceil
:
3098 case Intrinsic::trunc
:
3099 case Intrinsic::rint
:
3100 case Intrinsic::nearbyint
:
3101 case Intrinsic::round
:
3102 return isKnownNeverNaN(II
->getArgOperand(0), TLI
, Depth
+ 1);
3103 case Intrinsic::sqrt
:
3104 return isKnownNeverNaN(II
->getArgOperand(0), TLI
, Depth
+ 1) &&
3105 CannotBeOrderedLessThanZero(II
->getArgOperand(0), TLI
);
3111 // Bail out for constant expressions, but try to handle vector constants.
3112 if (!V
->getType()->isVectorTy() || !isa
<Constant
>(V
))
3115 // For vectors, verify that each element is not NaN.
3116 unsigned NumElts
= V
->getType()->getVectorNumElements();
3117 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3118 Constant
*Elt
= cast
<Constant
>(V
)->getAggregateElement(i
);
3121 if (isa
<UndefValue
>(Elt
))
3123 auto *CElt
= dyn_cast
<ConstantFP
>(Elt
);
3124 if (!CElt
|| CElt
->isNaN())
3127 // All elements were confirmed not-NaN or undefined.
3131 Value
*llvm::isBytewiseValue(Value
*V
) {
3133 // All byte-wide stores are splatable, even of arbitrary variables.
3134 if (V
->getType()->isIntegerTy(8))
3137 LLVMContext
&Ctx
= V
->getContext();
3139 // Undef don't care.
3140 auto *UndefInt8
= UndefValue::get(Type::getInt8Ty(Ctx
));
3141 if (isa
<UndefValue
>(V
))
3144 Constant
*C
= dyn_cast
<Constant
>(V
);
3146 // Conceptually, we could handle things like:
3147 // %a = zext i8 %X to i16
3148 // %b = shl i16 %a, 8
3149 // %c = or i16 %a, %b
3150 // but until there is an example that actually needs this, it doesn't seem
3151 // worth worrying about.
3155 // Handle 'null' ConstantArrayZero etc.
3156 if (C
->isNullValue())
3157 return Constant::getNullValue(Type::getInt8Ty(Ctx
));
3159 // Constant floating-point values can be handled as integer values if the
3160 // corresponding integer value is "byteable". An important case is 0.0.
3161 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
)) {
3163 if (CFP
->getType()->isHalfTy())
3164 Ty
= Type::getInt16Ty(Ctx
);
3165 else if (CFP
->getType()->isFloatTy())
3166 Ty
= Type::getInt32Ty(Ctx
);
3167 else if (CFP
->getType()->isDoubleTy())
3168 Ty
= Type::getInt64Ty(Ctx
);
3169 // Don't handle long double formats, which have strange constraints.
3170 return Ty
? isBytewiseValue(ConstantExpr::getBitCast(CFP
, Ty
)) : nullptr;
3173 // We can handle constant integers that are multiple of 8 bits.
3174 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
3175 if (CI
->getBitWidth() % 8 == 0) {
3176 assert(CI
->getBitWidth() > 8 && "8 bits should be handled above!");
3177 if (!CI
->getValue().isSplat(8))
3179 return ConstantInt::get(Ctx
, CI
->getValue().trunc(8));
3183 auto Merge
= [&](Value
*LHS
, Value
*RHS
) -> Value
* {
3188 if (LHS
== UndefInt8
)
3190 if (RHS
== UndefInt8
)
3195 if (ConstantDataSequential
*CA
= dyn_cast
<ConstantDataSequential
>(C
)) {
3196 Value
*Val
= UndefInt8
;
3197 for (unsigned I
= 0, E
= CA
->getNumElements(); I
!= E
; ++I
)
3198 if (!(Val
= Merge(Val
, isBytewiseValue(CA
->getElementAsConstant(I
)))))
3203 if (isa
<ConstantVector
>(C
)) {
3204 Constant
*Splat
= cast
<ConstantVector
>(C
)->getSplatValue();
3205 return Splat
? isBytewiseValue(Splat
) : nullptr;
3208 if (isa
<ConstantArray
>(C
) || isa
<ConstantStruct
>(C
)) {
3209 Value
*Val
= UndefInt8
;
3210 for (unsigned I
= 0, E
= C
->getNumOperands(); I
!= E
; ++I
)
3211 if (!(Val
= Merge(Val
, isBytewiseValue(C
->getOperand(I
)))))
3216 // Don't try to handle the handful of other constants.
3220 // This is the recursive version of BuildSubAggregate. It takes a few different
3221 // arguments. Idxs is the index within the nested struct From that we are
3222 // looking at now (which is of type IndexedType). IdxSkip is the number of
3223 // indices from Idxs that should be left out when inserting into the resulting
3224 // struct. To is the result struct built so far, new insertvalue instructions
3226 static Value
*BuildSubAggregate(Value
*From
, Value
* To
, Type
*IndexedType
,
3227 SmallVectorImpl
<unsigned> &Idxs
,
3229 Instruction
*InsertBefore
) {
3230 StructType
*STy
= dyn_cast
<StructType
>(IndexedType
);
3232 // Save the original To argument so we can modify it
3234 // General case, the type indexed by Idxs is a struct
3235 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
3236 // Process each struct element recursively
3239 To
= BuildSubAggregate(From
, To
, STy
->getElementType(i
), Idxs
, IdxSkip
,
3243 // Couldn't find any inserted value for this index? Cleanup
3244 while (PrevTo
!= OrigTo
) {
3245 InsertValueInst
* Del
= cast
<InsertValueInst
>(PrevTo
);
3246 PrevTo
= Del
->getAggregateOperand();
3247 Del
->eraseFromParent();
3249 // Stop processing elements
3253 // If we successfully found a value for each of our subaggregates
3257 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
3258 // the struct's elements had a value that was inserted directly. In the latter
3259 // case, perhaps we can't determine each of the subelements individually, but
3260 // we might be able to find the complete struct somewhere.
3262 // Find the value that is at that particular spot
3263 Value
*V
= FindInsertedValue(From
, Idxs
);
3268 // Insert the value in the new (sub) aggregate
3269 return InsertValueInst::Create(To
, V
, makeArrayRef(Idxs
).slice(IdxSkip
),
3270 "tmp", InsertBefore
);
3273 // This helper takes a nested struct and extracts a part of it (which is again a
3274 // struct) into a new value. For example, given the struct:
3275 // { a, { b, { c, d }, e } }
3276 // and the indices "1, 1" this returns
3279 // It does this by inserting an insertvalue for each element in the resulting
3280 // struct, as opposed to just inserting a single struct. This will only work if
3281 // each of the elements of the substruct are known (ie, inserted into From by an
3282 // insertvalue instruction somewhere).
3284 // All inserted insertvalue instructions are inserted before InsertBefore
3285 static Value
*BuildSubAggregate(Value
*From
, ArrayRef
<unsigned> idx_range
,
3286 Instruction
*InsertBefore
) {
3287 assert(InsertBefore
&& "Must have someplace to insert!");
3288 Type
*IndexedType
= ExtractValueInst::getIndexedType(From
->getType(),
3290 Value
*To
= UndefValue::get(IndexedType
);
3291 SmallVector
<unsigned, 10> Idxs(idx_range
.begin(), idx_range
.end());
3292 unsigned IdxSkip
= Idxs
.size();
3294 return BuildSubAggregate(From
, To
, IndexedType
, Idxs
, IdxSkip
, InsertBefore
);
3297 /// Given an aggregate and a sequence of indices, see if the scalar value
3298 /// indexed is already around as a register, for example if it was inserted
3299 /// directly into the aggregate.
3301 /// If InsertBefore is not null, this function will duplicate (modified)
3302 /// insertvalues when a part of a nested struct is extracted.
3303 Value
*llvm::FindInsertedValue(Value
*V
, ArrayRef
<unsigned> idx_range
,
3304 Instruction
*InsertBefore
) {
3305 // Nothing to index? Just return V then (this is useful at the end of our
3307 if (idx_range
.empty())
3309 // We have indices, so V should have an indexable type.
3310 assert((V
->getType()->isStructTy() || V
->getType()->isArrayTy()) &&
3311 "Not looking at a struct or array?");
3312 assert(ExtractValueInst::getIndexedType(V
->getType(), idx_range
) &&
3313 "Invalid indices for type?");
3315 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
3316 C
= C
->getAggregateElement(idx_range
[0]);
3317 if (!C
) return nullptr;
3318 return FindInsertedValue(C
, idx_range
.slice(1), InsertBefore
);
3321 if (InsertValueInst
*I
= dyn_cast
<InsertValueInst
>(V
)) {
3322 // Loop the indices for the insertvalue instruction in parallel with the
3323 // requested indices
3324 const unsigned *req_idx
= idx_range
.begin();
3325 for (const unsigned *i
= I
->idx_begin(), *e
= I
->idx_end();
3326 i
!= e
; ++i
, ++req_idx
) {
3327 if (req_idx
== idx_range
.end()) {
3328 // We can't handle this without inserting insertvalues
3332 // The requested index identifies a part of a nested aggregate. Handle
3333 // this specially. For example,
3334 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
3335 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
3336 // %C = extractvalue {i32, { i32, i32 } } %B, 1
3337 // This can be changed into
3338 // %A = insertvalue {i32, i32 } undef, i32 10, 0
3339 // %C = insertvalue {i32, i32 } %A, i32 11, 1
3340 // which allows the unused 0,0 element from the nested struct to be
3342 return BuildSubAggregate(V
, makeArrayRef(idx_range
.begin(), req_idx
),
3346 // This insert value inserts something else than what we are looking for.
3347 // See if the (aggregate) value inserted into has the value we are
3348 // looking for, then.
3350 return FindInsertedValue(I
->getAggregateOperand(), idx_range
,
3353 // If we end up here, the indices of the insertvalue match with those
3354 // requested (though possibly only partially). Now we recursively look at
3355 // the inserted value, passing any remaining indices.
3356 return FindInsertedValue(I
->getInsertedValueOperand(),
3357 makeArrayRef(req_idx
, idx_range
.end()),
3361 if (ExtractValueInst
*I
= dyn_cast
<ExtractValueInst
>(V
)) {
3362 // If we're extracting a value from an aggregate that was extracted from
3363 // something else, we can extract from that something else directly instead.
3364 // However, we will need to chain I's indices with the requested indices.
3366 // Calculate the number of indices required
3367 unsigned size
= I
->getNumIndices() + idx_range
.size();
3368 // Allocate some space to put the new indices in
3369 SmallVector
<unsigned, 5> Idxs
;
3371 // Add indices from the extract value instruction
3372 Idxs
.append(I
->idx_begin(), I
->idx_end());
3374 // Add requested indices
3375 Idxs
.append(idx_range
.begin(), idx_range
.end());
3377 assert(Idxs
.size() == size
3378 && "Number of indices added not correct?");
3380 return FindInsertedValue(I
->getAggregateOperand(), Idxs
, InsertBefore
);
3382 // Otherwise, we don't know (such as, extracting from a function return value
3383 // or load instruction)
3387 /// Analyze the specified pointer to see if it can be expressed as a base
3388 /// pointer plus a constant offset. Return the base and offset to the caller.
3389 Value
*llvm::GetPointerBaseWithConstantOffset(Value
*Ptr
, int64_t &Offset
,
3390 const DataLayout
&DL
) {
3391 unsigned BitWidth
= DL
.getIndexTypeSizeInBits(Ptr
->getType());
3392 APInt
ByteOffset(BitWidth
, 0);
3394 // We walk up the defs but use a visited set to handle unreachable code. In
3395 // that case, we stop after accumulating the cycle once (not that it
3397 SmallPtrSet
<Value
*, 16> Visited
;
3398 while (Visited
.insert(Ptr
).second
) {
3399 if (Ptr
->getType()->isVectorTy())
3402 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(Ptr
)) {
3403 // If one of the values we have visited is an addrspacecast, then
3404 // the pointer type of this GEP may be different from the type
3405 // of the Ptr parameter which was passed to this function. This
3406 // means when we construct GEPOffset, we need to use the size
3407 // of GEP's pointer type rather than the size of the original
3409 APInt
GEPOffset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), 0);
3410 if (!GEP
->accumulateConstantOffset(DL
, GEPOffset
))
3413 APInt
OrigByteOffset(ByteOffset
);
3414 ByteOffset
+= GEPOffset
.sextOrTrunc(ByteOffset
.getBitWidth());
3415 if (ByteOffset
.getMinSignedBits() > 64) {
3416 // Stop traversal if the pointer offset wouldn't fit into int64_t
3417 // (this should be removed if Offset is updated to an APInt)
3418 ByteOffset
= OrigByteOffset
;
3422 Ptr
= GEP
->getPointerOperand();
3423 } else if (Operator::getOpcode(Ptr
) == Instruction::BitCast
||
3424 Operator::getOpcode(Ptr
) == Instruction::AddrSpaceCast
) {
3425 Ptr
= cast
<Operator
>(Ptr
)->getOperand(0);
3426 } else if (GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(Ptr
)) {
3427 if (GA
->isInterposable())
3429 Ptr
= GA
->getAliasee();
3434 Offset
= ByteOffset
.getSExtValue();
3438 bool llvm::isGEPBasedOnPointerToString(const GEPOperator
*GEP
,
3439 unsigned CharSize
) {
3440 // Make sure the GEP has exactly three arguments.
3441 if (GEP
->getNumOperands() != 3)
3444 // Make sure the index-ee is a pointer to array of \p CharSize integers.
3446 ArrayType
*AT
= dyn_cast
<ArrayType
>(GEP
->getSourceElementType());
3447 if (!AT
|| !AT
->getElementType()->isIntegerTy(CharSize
))
3450 // Check to make sure that the first operand of the GEP is an integer and
3451 // has value 0 so that we are sure we're indexing into the initializer.
3452 const ConstantInt
*FirstIdx
= dyn_cast
<ConstantInt
>(GEP
->getOperand(1));
3453 if (!FirstIdx
|| !FirstIdx
->isZero())
3459 bool llvm::getConstantDataArrayInfo(const Value
*V
,
3460 ConstantDataArraySlice
&Slice
,
3461 unsigned ElementSize
, uint64_t Offset
) {
3464 // Look through bitcast instructions and geps.
3465 V
= V
->stripPointerCasts();
3467 // If the value is a GEP instruction or constant expression, treat it as an
3469 if (const GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(V
)) {
3470 // The GEP operator should be based on a pointer to string constant, and is
3471 // indexing into the string constant.
3472 if (!isGEPBasedOnPointerToString(GEP
, ElementSize
))
3475 // If the second index isn't a ConstantInt, then this is a variable index
3476 // into the array. If this occurs, we can't say anything meaningful about
3478 uint64_t StartIdx
= 0;
3479 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(GEP
->getOperand(2)))
3480 StartIdx
= CI
->getZExtValue();
3483 return getConstantDataArrayInfo(GEP
->getOperand(0), Slice
, ElementSize
,
3487 // The GEP instruction, constant or instruction, must reference a global
3488 // variable that is a constant and is initialized. The referenced constant
3489 // initializer is the array that we'll use for optimization.
3490 const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(V
);
3491 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer())
3494 const ConstantDataArray
*Array
;
3496 if (GV
->getInitializer()->isNullValue()) {
3497 Type
*GVTy
= GV
->getValueType();
3498 if ( (ArrayTy
= dyn_cast
<ArrayType
>(GVTy
)) ) {
3499 // A zeroinitializer for the array; there is no ConstantDataArray.
3502 const DataLayout
&DL
= GV
->getParent()->getDataLayout();
3503 uint64_t SizeInBytes
= DL
.getTypeStoreSize(GVTy
);
3504 uint64_t Length
= SizeInBytes
/ (ElementSize
/ 8);
3505 if (Length
<= Offset
)
3508 Slice
.Array
= nullptr;
3510 Slice
.Length
= Length
- Offset
;
3514 // This must be a ConstantDataArray.
3515 Array
= dyn_cast
<ConstantDataArray
>(GV
->getInitializer());
3518 ArrayTy
= Array
->getType();
3520 if (!ArrayTy
->getElementType()->isIntegerTy(ElementSize
))
3523 uint64_t NumElts
= ArrayTy
->getArrayNumElements();
3524 if (Offset
> NumElts
)
3527 Slice
.Array
= Array
;
3528 Slice
.Offset
= Offset
;
3529 Slice
.Length
= NumElts
- Offset
;
3533 /// This function computes the length of a null-terminated C string pointed to
3534 /// by V. If successful, it returns true and returns the string in Str.
3535 /// If unsuccessful, it returns false.
3536 bool llvm::getConstantStringInfo(const Value
*V
, StringRef
&Str
,
3537 uint64_t Offset
, bool TrimAtNul
) {
3538 ConstantDataArraySlice Slice
;
3539 if (!getConstantDataArrayInfo(V
, Slice
, 8, Offset
))
3542 if (Slice
.Array
== nullptr) {
3547 if (Slice
.Length
== 1) {
3548 Str
= StringRef("", 1);
3551 // We cannot instantiate a StringRef as we do not have an appropriate string
3556 // Start out with the entire array in the StringRef.
3557 Str
= Slice
.Array
->getAsString();
3558 // Skip over 'offset' bytes.
3559 Str
= Str
.substr(Slice
.Offset
);
3562 // Trim off the \0 and anything after it. If the array is not nul
3563 // terminated, we just return the whole end of string. The client may know
3564 // some other way that the string is length-bound.
3565 Str
= Str
.substr(0, Str
.find('\0'));
3570 // These next two are very similar to the above, but also look through PHI
3572 // TODO: See if we can integrate these two together.
3574 /// If we can compute the length of the string pointed to by
3575 /// the specified pointer, return 'len+1'. If we can't, return 0.
3576 static uint64_t GetStringLengthH(const Value
*V
,
3577 SmallPtrSetImpl
<const PHINode
*> &PHIs
,
3578 unsigned CharSize
) {
3579 // Look through noop bitcast instructions.
3580 V
= V
->stripPointerCasts();
3582 // If this is a PHI node, there are two cases: either we have already seen it
3584 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
3585 if (!PHIs
.insert(PN
).second
)
3586 return ~0ULL; // already in the set.
3588 // If it was new, see if all the input strings are the same length.
3589 uint64_t LenSoFar
= ~0ULL;
3590 for (Value
*IncValue
: PN
->incoming_values()) {
3591 uint64_t Len
= GetStringLengthH(IncValue
, PHIs
, CharSize
);
3592 if (Len
== 0) return 0; // Unknown length -> unknown.
3594 if (Len
== ~0ULL) continue;
3596 if (Len
!= LenSoFar
&& LenSoFar
!= ~0ULL)
3597 return 0; // Disagree -> unknown.
3601 // Success, all agree.
3605 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
3606 if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(V
)) {
3607 uint64_t Len1
= GetStringLengthH(SI
->getTrueValue(), PHIs
, CharSize
);
3608 if (Len1
== 0) return 0;
3609 uint64_t Len2
= GetStringLengthH(SI
->getFalseValue(), PHIs
, CharSize
);
3610 if (Len2
== 0) return 0;
3611 if (Len1
== ~0ULL) return Len2
;
3612 if (Len2
== ~0ULL) return Len1
;
3613 if (Len1
!= Len2
) return 0;
3617 // Otherwise, see if we can read the string.
3618 ConstantDataArraySlice Slice
;
3619 if (!getConstantDataArrayInfo(V
, Slice
, CharSize
))
3622 if (Slice
.Array
== nullptr)
3625 // Search for nul characters
3626 unsigned NullIndex
= 0;
3627 for (unsigned E
= Slice
.Length
; NullIndex
< E
; ++NullIndex
) {
3628 if (Slice
.Array
->getElementAsInteger(Slice
.Offset
+ NullIndex
) == 0)
3632 return NullIndex
+ 1;
3635 /// If we can compute the length of the string pointed to by
3636 /// the specified pointer, return 'len+1'. If we can't, return 0.
3637 uint64_t llvm::GetStringLength(const Value
*V
, unsigned CharSize
) {
3638 if (!V
->getType()->isPointerTy())
3641 SmallPtrSet
<const PHINode
*, 32> PHIs
;
3642 uint64_t Len
= GetStringLengthH(V
, PHIs
, CharSize
);
3643 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
3644 // an empty string as a length.
3645 return Len
== ~0ULL ? 1 : Len
;
3648 const Value
*llvm::getArgumentAliasingToReturnedPointer(const CallBase
*Call
) {
3650 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
3651 if (const Value
*RV
= Call
->getReturnedArgOperand())
3653 // This can be used only as a aliasing property.
3654 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call
))
3655 return Call
->getArgOperand(0);
3659 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
3660 const CallBase
*Call
) {
3661 return Call
->getIntrinsicID() == Intrinsic::launder_invariant_group
||
3662 Call
->getIntrinsicID() == Intrinsic::strip_invariant_group
;
3665 /// \p PN defines a loop-variant pointer to an object. Check if the
3666 /// previous iteration of the loop was referring to the same object as \p PN.
3667 static bool isSameUnderlyingObjectInLoop(const PHINode
*PN
,
3668 const LoopInfo
*LI
) {
3669 // Find the loop-defined value.
3670 Loop
*L
= LI
->getLoopFor(PN
->getParent());
3671 if (PN
->getNumIncomingValues() != 2)
3674 // Find the value from previous iteration.
3675 auto *PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(0));
3676 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
3677 PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(1));
3678 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
3681 // If a new pointer is loaded in the loop, the pointer references a different
3682 // object in every iteration. E.g.:
3686 if (auto *Load
= dyn_cast
<LoadInst
>(PrevValue
))
3687 if (!L
->isLoopInvariant(Load
->getPointerOperand()))
3692 Value
*llvm::GetUnderlyingObject(Value
*V
, const DataLayout
&DL
,
3693 unsigned MaxLookup
) {
3694 if (!V
->getType()->isPointerTy())
3696 for (unsigned Count
= 0; MaxLookup
== 0 || Count
< MaxLookup
; ++Count
) {
3697 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(V
)) {
3698 V
= GEP
->getPointerOperand();
3699 } else if (Operator::getOpcode(V
) == Instruction::BitCast
||
3700 Operator::getOpcode(V
) == Instruction::AddrSpaceCast
) {
3701 V
= cast
<Operator
>(V
)->getOperand(0);
3702 } else if (GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
)) {
3703 if (GA
->isInterposable())
3705 V
= GA
->getAliasee();
3706 } else if (isa
<AllocaInst
>(V
)) {
3707 // An alloca can't be further simplified.
3710 if (auto *Call
= dyn_cast
<CallBase
>(V
)) {
3711 // CaptureTracking can know about special capturing properties of some
3712 // intrinsics like launder.invariant.group, that can't be expressed with
3713 // the attributes, but have properties like returning aliasing pointer.
3714 // Because some analysis may assume that nocaptured pointer is not
3715 // returned from some special intrinsic (because function would have to
3716 // be marked with returns attribute), it is crucial to use this function
3717 // because it should be in sync with CaptureTracking. Not using it may
3718 // cause weird miscompilations where 2 aliasing pointers are assumed to
3720 if (auto *RP
= getArgumentAliasingToReturnedPointer(Call
)) {
3726 // See if InstructionSimplify knows any relevant tricks.
3727 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
3728 // TODO: Acquire a DominatorTree and AssumptionCache and use them.
3729 if (Value
*Simplified
= SimplifyInstruction(I
, {DL
, I
})) {
3736 assert(V
->getType()->isPointerTy() && "Unexpected operand type!");
3741 void llvm::GetUnderlyingObjects(Value
*V
, SmallVectorImpl
<Value
*> &Objects
,
3742 const DataLayout
&DL
, LoopInfo
*LI
,
3743 unsigned MaxLookup
) {
3744 SmallPtrSet
<Value
*, 4> Visited
;
3745 SmallVector
<Value
*, 4> Worklist
;
3746 Worklist
.push_back(V
);
3748 Value
*P
= Worklist
.pop_back_val();
3749 P
= GetUnderlyingObject(P
, DL
, MaxLookup
);
3751 if (!Visited
.insert(P
).second
)
3754 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(P
)) {
3755 Worklist
.push_back(SI
->getTrueValue());
3756 Worklist
.push_back(SI
->getFalseValue());
3760 if (PHINode
*PN
= dyn_cast
<PHINode
>(P
)) {
3761 // If this PHI changes the underlying object in every iteration of the
3762 // loop, don't look through it. Consider:
3765 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
3769 // Prev is tracking Curr one iteration behind so they refer to different
3770 // underlying objects.
3771 if (!LI
|| !LI
->isLoopHeader(PN
->getParent()) ||
3772 isSameUnderlyingObjectInLoop(PN
, LI
))
3773 for (Value
*IncValue
: PN
->incoming_values())
3774 Worklist
.push_back(IncValue
);
3778 Objects
.push_back(P
);
3779 } while (!Worklist
.empty());
3782 /// This is the function that does the work of looking through basic
3783 /// ptrtoint+arithmetic+inttoptr sequences.
3784 static const Value
*getUnderlyingObjectFromInt(const Value
*V
) {
3786 if (const Operator
*U
= dyn_cast
<Operator
>(V
)) {
3787 // If we find a ptrtoint, we can transfer control back to the
3788 // regular getUnderlyingObjectFromInt.
3789 if (U
->getOpcode() == Instruction::PtrToInt
)
3790 return U
->getOperand(0);
3791 // If we find an add of a constant, a multiplied value, or a phi, it's
3792 // likely that the other operand will lead us to the base
3793 // object. We don't have to worry about the case where the
3794 // object address is somehow being computed by the multiply,
3795 // because our callers only care when the result is an
3796 // identifiable object.
3797 if (U
->getOpcode() != Instruction::Add
||
3798 (!isa
<ConstantInt
>(U
->getOperand(1)) &&
3799 Operator::getOpcode(U
->getOperand(1)) != Instruction::Mul
&&
3800 !isa
<PHINode
>(U
->getOperand(1))))
3802 V
= U
->getOperand(0);
3806 assert(V
->getType()->isIntegerTy() && "Unexpected operand type!");
3810 /// This is a wrapper around GetUnderlyingObjects and adds support for basic
3811 /// ptrtoint+arithmetic+inttoptr sequences.
3812 /// It returns false if unidentified object is found in GetUnderlyingObjects.
3813 bool llvm::getUnderlyingObjectsForCodeGen(const Value
*V
,
3814 SmallVectorImpl
<Value
*> &Objects
,
3815 const DataLayout
&DL
) {
3816 SmallPtrSet
<const Value
*, 16> Visited
;
3817 SmallVector
<const Value
*, 4> Working(1, V
);
3819 V
= Working
.pop_back_val();
3821 SmallVector
<Value
*, 4> Objs
;
3822 GetUnderlyingObjects(const_cast<Value
*>(V
), Objs
, DL
);
3824 for (Value
*V
: Objs
) {
3825 if (!Visited
.insert(V
).second
)
3827 if (Operator::getOpcode(V
) == Instruction::IntToPtr
) {
3829 getUnderlyingObjectFromInt(cast
<User
>(V
)->getOperand(0));
3830 if (O
->getType()->isPointerTy()) {
3831 Working
.push_back(O
);
3835 // If GetUnderlyingObjects fails to find an identifiable object,
3836 // getUnderlyingObjectsForCodeGen also fails for safety.
3837 if (!isIdentifiedObject(V
)) {
3841 Objects
.push_back(const_cast<Value
*>(V
));
3843 } while (!Working
.empty());
3847 /// Return true if the only users of this pointer are lifetime markers.
3848 bool llvm::onlyUsedByLifetimeMarkers(const Value
*V
) {
3849 for (const User
*U
: V
->users()) {
3850 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
3851 if (!II
) return false;
3853 if (!II
->isLifetimeStartOrEnd())
3859 bool llvm::isSafeToSpeculativelyExecute(const Value
*V
,
3860 const Instruction
*CtxI
,
3861 const DominatorTree
*DT
) {
3862 const Operator
*Inst
= dyn_cast
<Operator
>(V
);
3866 for (unsigned i
= 0, e
= Inst
->getNumOperands(); i
!= e
; ++i
)
3867 if (Constant
*C
= dyn_cast
<Constant
>(Inst
->getOperand(i
)))
3871 switch (Inst
->getOpcode()) {
3874 case Instruction::UDiv
:
3875 case Instruction::URem
: {
3876 // x / y is undefined if y == 0.
3878 if (match(Inst
->getOperand(1), m_APInt(V
)))
3882 case Instruction::SDiv
:
3883 case Instruction::SRem
: {
3884 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
3885 const APInt
*Numerator
, *Denominator
;
3886 if (!match(Inst
->getOperand(1), m_APInt(Denominator
)))
3888 // We cannot hoist this division if the denominator is 0.
3889 if (*Denominator
== 0)
3891 // It's safe to hoist if the denominator is not 0 or -1.
3892 if (*Denominator
!= -1)
3894 // At this point we know that the denominator is -1. It is safe to hoist as
3895 // long we know that the numerator is not INT_MIN.
3896 if (match(Inst
->getOperand(0), m_APInt(Numerator
)))
3897 return !Numerator
->isMinSignedValue();
3898 // The numerator *might* be MinSignedValue.
3901 case Instruction::Load
: {
3902 const LoadInst
*LI
= cast
<LoadInst
>(Inst
);
3903 if (!LI
->isUnordered() ||
3904 // Speculative load may create a race that did not exist in the source.
3905 LI
->getFunction()->hasFnAttribute(Attribute::SanitizeThread
) ||
3906 // Speculative load may load data from dirty regions.
3907 LI
->getFunction()->hasFnAttribute(Attribute::SanitizeAddress
) ||
3908 LI
->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress
))
3910 const DataLayout
&DL
= LI
->getModule()->getDataLayout();
3911 return isDereferenceableAndAlignedPointer(LI
->getPointerOperand(),
3912 LI
->getAlignment(), DL
, CtxI
, DT
);
3914 case Instruction::Call
: {
3915 auto *CI
= cast
<const CallInst
>(Inst
);
3916 const Function
*Callee
= CI
->getCalledFunction();
3918 // The called function could have undefined behavior or side-effects, even
3919 // if marked readnone nounwind.
3920 return Callee
&& Callee
->isSpeculatable();
3922 case Instruction::VAArg
:
3923 case Instruction::Alloca
:
3924 case Instruction::Invoke
:
3925 case Instruction::CallBr
:
3926 case Instruction::PHI
:
3927 case Instruction::Store
:
3928 case Instruction::Ret
:
3929 case Instruction::Br
:
3930 case Instruction::IndirectBr
:
3931 case Instruction::Switch
:
3932 case Instruction::Unreachable
:
3933 case Instruction::Fence
:
3934 case Instruction::AtomicRMW
:
3935 case Instruction::AtomicCmpXchg
:
3936 case Instruction::LandingPad
:
3937 case Instruction::Resume
:
3938 case Instruction::CatchSwitch
:
3939 case Instruction::CatchPad
:
3940 case Instruction::CatchRet
:
3941 case Instruction::CleanupPad
:
3942 case Instruction::CleanupRet
:
3943 return false; // Misc instructions which have effects
3947 bool llvm::mayBeMemoryDependent(const Instruction
&I
) {
3948 return I
.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I
);
3951 OverflowResult
llvm::computeOverflowForUnsignedMul(
3952 const Value
*LHS
, const Value
*RHS
, const DataLayout
&DL
,
3953 AssumptionCache
*AC
, const Instruction
*CxtI
, const DominatorTree
*DT
,
3954 bool UseInstrInfo
) {
3955 // Multiplying n * m significant bits yields a result of n + m significant
3956 // bits. If the total number of significant bits does not exceed the
3957 // result bit width (minus 1), there is no overflow.
3958 // This means if we have enough leading zero bits in the operands
3959 // we can guarantee that the result does not overflow.
3960 // Ref: "Hacker's Delight" by Henry Warren
3961 unsigned BitWidth
= LHS
->getType()->getScalarSizeInBits();
3962 KnownBits
LHSKnown(BitWidth
);
3963 KnownBits
RHSKnown(BitWidth
);
3964 computeKnownBits(LHS
, LHSKnown
, DL
, /*Depth=*/0, AC
, CxtI
, DT
, nullptr,
3966 computeKnownBits(RHS
, RHSKnown
, DL
, /*Depth=*/0, AC
, CxtI
, DT
, nullptr,
3968 // Note that underestimating the number of zero bits gives a more
3969 // conservative answer.
3970 unsigned ZeroBits
= LHSKnown
.countMinLeadingZeros() +
3971 RHSKnown
.countMinLeadingZeros();
3972 // First handle the easy case: if we have enough zero bits there's
3973 // definitely no overflow.
3974 if (ZeroBits
>= BitWidth
)
3975 return OverflowResult::NeverOverflows
;
3977 // Get the largest possible values for each operand.
3978 APInt LHSMax
= ~LHSKnown
.Zero
;
3979 APInt RHSMax
= ~RHSKnown
.Zero
;
3981 // We know the multiply operation doesn't overflow if the maximum values for
3982 // each operand will not overflow after we multiply them together.
3984 (void)LHSMax
.umul_ov(RHSMax
, MaxOverflow
);
3986 return OverflowResult::NeverOverflows
;
3988 // We know it always overflows if multiplying the smallest possible values for
3989 // the operands also results in overflow.
3991 (void)LHSKnown
.One
.umul_ov(RHSKnown
.One
, MinOverflow
);
3993 return OverflowResult::AlwaysOverflows
;
3995 return OverflowResult::MayOverflow
;
3999 llvm::computeOverflowForSignedMul(const Value
*LHS
, const Value
*RHS
,
4000 const DataLayout
&DL
, AssumptionCache
*AC
,
4001 const Instruction
*CxtI
,
4002 const DominatorTree
*DT
, bool UseInstrInfo
) {
4003 // Multiplying n * m significant bits yields a result of n + m significant
4004 // bits. If the total number of significant bits does not exceed the
4005 // result bit width (minus 1), there is no overflow.
4006 // This means if we have enough leading sign bits in the operands
4007 // we can guarantee that the result does not overflow.
4008 // Ref: "Hacker's Delight" by Henry Warren
4009 unsigned BitWidth
= LHS
->getType()->getScalarSizeInBits();
4011 // Note that underestimating the number of sign bits gives a more
4012 // conservative answer.
4013 unsigned SignBits
= ComputeNumSignBits(LHS
, DL
, 0, AC
, CxtI
, DT
) +
4014 ComputeNumSignBits(RHS
, DL
, 0, AC
, CxtI
, DT
);
4016 // First handle the easy case: if we have enough sign bits there's
4017 // definitely no overflow.
4018 if (SignBits
> BitWidth
+ 1)
4019 return OverflowResult::NeverOverflows
;
4021 // There are two ambiguous cases where there can be no overflow:
4022 // SignBits == BitWidth + 1 and
4023 // SignBits == BitWidth
4024 // The second case is difficult to check, therefore we only handle the
4026 if (SignBits
== BitWidth
+ 1) {
4027 // It overflows only when both arguments are negative and the true
4028 // product is exactly the minimum negative number.
4029 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
4030 // For simplicity we just check if at least one side is not negative.
4031 KnownBits LHSKnown
= computeKnownBits(LHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
,
4032 nullptr, UseInstrInfo
);
4033 KnownBits RHSKnown
= computeKnownBits(RHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
,
4034 nullptr, UseInstrInfo
);
4035 if (LHSKnown
.isNonNegative() || RHSKnown
.isNonNegative())
4036 return OverflowResult::NeverOverflows
;
4038 return OverflowResult::MayOverflow
;
4041 OverflowResult
llvm::computeOverflowForUnsignedAdd(
4042 const Value
*LHS
, const Value
*RHS
, const DataLayout
&DL
,
4043 AssumptionCache
*AC
, const Instruction
*CxtI
, const DominatorTree
*DT
,
4044 bool UseInstrInfo
) {
4045 KnownBits LHSKnown
= computeKnownBits(LHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
,
4046 nullptr, UseInstrInfo
);
4047 if (LHSKnown
.isNonNegative() || LHSKnown
.isNegative()) {
4048 KnownBits RHSKnown
= computeKnownBits(RHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
,
4049 nullptr, UseInstrInfo
);
4051 if (LHSKnown
.isNegative() && RHSKnown
.isNegative()) {
4052 // The sign bit is set in both cases: this MUST overflow.
4053 return OverflowResult::AlwaysOverflows
;
4056 if (LHSKnown
.isNonNegative() && RHSKnown
.isNonNegative()) {
4057 // The sign bit is clear in both cases: this CANNOT overflow.
4058 return OverflowResult::NeverOverflows
;
4062 return OverflowResult::MayOverflow
;
4065 /// Return true if we can prove that adding the two values of the
4066 /// knownbits will not overflow.
4067 /// Otherwise return false.
4068 static bool checkRippleForSignedAdd(const KnownBits
&LHSKnown
,
4069 const KnownBits
&RHSKnown
) {
4070 // Addition of two 2's complement numbers having opposite signs will never
4072 if ((LHSKnown
.isNegative() && RHSKnown
.isNonNegative()) ||
4073 (LHSKnown
.isNonNegative() && RHSKnown
.isNegative()))
4076 // If either of the values is known to be non-negative, adding them can only
4077 // overflow if the second is also non-negative, so we can assume that.
4078 // Two non-negative numbers will only overflow if there is a carry to the
4079 // sign bit, so we can check if even when the values are as big as possible
4080 // there is no overflow to the sign bit.
4081 if (LHSKnown
.isNonNegative() || RHSKnown
.isNonNegative()) {
4082 APInt MaxLHS
= ~LHSKnown
.Zero
;
4083 MaxLHS
.clearSignBit();
4084 APInt MaxRHS
= ~RHSKnown
.Zero
;
4085 MaxRHS
.clearSignBit();
4086 APInt Result
= std::move(MaxLHS
) + std::move(MaxRHS
);
4087 return Result
.isSignBitClear();
4090 // If either of the values is known to be negative, adding them can only
4091 // overflow if the second is also negative, so we can assume that.
4092 // Two negative number will only overflow if there is no carry to the sign
4093 // bit, so we can check if even when the values are as small as possible
4094 // there is overflow to the sign bit.
4095 if (LHSKnown
.isNegative() || RHSKnown
.isNegative()) {
4096 APInt MinLHS
= LHSKnown
.One
;
4097 MinLHS
.clearSignBit();
4098 APInt MinRHS
= RHSKnown
.One
;
4099 MinRHS
.clearSignBit();
4100 APInt Result
= std::move(MinLHS
) + std::move(MinRHS
);
4101 return Result
.isSignBitSet();
4104 // If we reached here it means that we know nothing about the sign bits.
4105 // In this case we can't know if there will be an overflow, since by
4106 // changing the sign bits any two values can be made to overflow.
4110 static OverflowResult
computeOverflowForSignedAdd(const Value
*LHS
,
4112 const AddOperator
*Add
,
4113 const DataLayout
&DL
,
4114 AssumptionCache
*AC
,
4115 const Instruction
*CxtI
,
4116 const DominatorTree
*DT
) {
4117 if (Add
&& Add
->hasNoSignedWrap()) {
4118 return OverflowResult::NeverOverflows
;
4121 // If LHS and RHS each have at least two sign bits, the addition will look
4127 // If the carry into the most significant position is 0, X and Y can't both
4128 // be 1 and therefore the carry out of the addition is also 0.
4130 // If the carry into the most significant position is 1, X and Y can't both
4131 // be 0 and therefore the carry out of the addition is also 1.
4133 // Since the carry into the most significant position is always equal to
4134 // the carry out of the addition, there is no signed overflow.
4135 if (ComputeNumSignBits(LHS
, DL
, 0, AC
, CxtI
, DT
) > 1 &&
4136 ComputeNumSignBits(RHS
, DL
, 0, AC
, CxtI
, DT
) > 1)
4137 return OverflowResult::NeverOverflows
;
4139 KnownBits LHSKnown
= computeKnownBits(LHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
);
4140 KnownBits RHSKnown
= computeKnownBits(RHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
);
4142 if (checkRippleForSignedAdd(LHSKnown
, RHSKnown
))
4143 return OverflowResult::NeverOverflows
;
4145 // The remaining code needs Add to be available. Early returns if not so.
4147 return OverflowResult::MayOverflow
;
4149 // If the sign of Add is the same as at least one of the operands, this add
4150 // CANNOT overflow. This is particularly useful when the sum is
4151 // @llvm.assume'ed non-negative rather than proved so from analyzing its
4153 bool LHSOrRHSKnownNonNegative
=
4154 (LHSKnown
.isNonNegative() || RHSKnown
.isNonNegative());
4155 bool LHSOrRHSKnownNegative
=
4156 (LHSKnown
.isNegative() || RHSKnown
.isNegative());
4157 if (LHSOrRHSKnownNonNegative
|| LHSOrRHSKnownNegative
) {
4158 KnownBits AddKnown
= computeKnownBits(Add
, DL
, /*Depth=*/0, AC
, CxtI
, DT
);
4159 if ((AddKnown
.isNonNegative() && LHSOrRHSKnownNonNegative
) ||
4160 (AddKnown
.isNegative() && LHSOrRHSKnownNegative
)) {
4161 return OverflowResult::NeverOverflows
;
4165 return OverflowResult::MayOverflow
;
4168 OverflowResult
llvm::computeOverflowForUnsignedSub(const Value
*LHS
,
4170 const DataLayout
&DL
,
4171 AssumptionCache
*AC
,
4172 const Instruction
*CxtI
,
4173 const DominatorTree
*DT
) {
4174 KnownBits LHSKnown
= computeKnownBits(LHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
);
4175 if (LHSKnown
.isNonNegative() || LHSKnown
.isNegative()) {
4176 KnownBits RHSKnown
= computeKnownBits(RHS
, DL
, /*Depth=*/0, AC
, CxtI
, DT
);
4178 // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
4179 if (LHSKnown
.isNegative() && RHSKnown
.isNonNegative())
4180 return OverflowResult::NeverOverflows
;
4182 // If the LHS is non-negative and the RHS negative, we always wrap.
4183 if (LHSKnown
.isNonNegative() && RHSKnown
.isNegative())
4184 return OverflowResult::AlwaysOverflows
;
4187 return OverflowResult::MayOverflow
;
4190 OverflowResult
llvm::computeOverflowForSignedSub(const Value
*LHS
,
4192 const DataLayout
&DL
,
4193 AssumptionCache
*AC
,
4194 const Instruction
*CxtI
,
4195 const DominatorTree
*DT
) {
4196 // If LHS and RHS each have at least two sign bits, the subtraction
4198 if (ComputeNumSignBits(LHS
, DL
, 0, AC
, CxtI
, DT
) > 1 &&
4199 ComputeNumSignBits(RHS
, DL
, 0, AC
, CxtI
, DT
) > 1)
4200 return OverflowResult::NeverOverflows
;
4202 KnownBits LHSKnown
= computeKnownBits(LHS
, DL
, 0, AC
, CxtI
, DT
);
4204 KnownBits RHSKnown
= computeKnownBits(RHS
, DL
, 0, AC
, CxtI
, DT
);
4206 // Subtraction of two 2's complement numbers having identical signs will
4208 if ((LHSKnown
.isNegative() && RHSKnown
.isNegative()) ||
4209 (LHSKnown
.isNonNegative() && RHSKnown
.isNonNegative()))
4210 return OverflowResult::NeverOverflows
;
4212 // TODO: implement logic similar to checkRippleForAdd
4213 return OverflowResult::MayOverflow
;
4216 bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst
*II
,
4217 const DominatorTree
&DT
) {
4219 auto IID
= II
->getIntrinsicID();
4220 assert((IID
== Intrinsic::sadd_with_overflow
||
4221 IID
== Intrinsic::uadd_with_overflow
||
4222 IID
== Intrinsic::ssub_with_overflow
||
4223 IID
== Intrinsic::usub_with_overflow
||
4224 IID
== Intrinsic::smul_with_overflow
||
4225 IID
== Intrinsic::umul_with_overflow
) &&
4226 "Not an overflow intrinsic!");
4229 SmallVector
<const BranchInst
*, 2> GuardingBranches
;
4230 SmallVector
<const ExtractValueInst
*, 2> Results
;
4232 for (const User
*U
: II
->users()) {
4233 if (const auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
4234 assert(EVI
->getNumIndices() == 1 && "Obvious from CI's type");
4236 if (EVI
->getIndices()[0] == 0)
4237 Results
.push_back(EVI
);
4239 assert(EVI
->getIndices()[0] == 1 && "Obvious from CI's type");
4241 for (const auto *U
: EVI
->users())
4242 if (const auto *B
= dyn_cast
<BranchInst
>(U
)) {
4243 assert(B
->isConditional() && "How else is it using an i1?");
4244 GuardingBranches
.push_back(B
);
4248 // We are using the aggregate directly in a way we don't want to analyze
4249 // here (storing it to a global, say).
4254 auto AllUsesGuardedByBranch
= [&](const BranchInst
*BI
) {
4255 BasicBlockEdge
NoWrapEdge(BI
->getParent(), BI
->getSuccessor(1));
4256 if (!NoWrapEdge
.isSingleEdge())
4259 // Check if all users of the add are provably no-wrap.
4260 for (const auto *Result
: Results
) {
4261 // If the extractvalue itself is not executed on overflow, the we don't
4262 // need to check each use separately, since domination is transitive.
4263 if (DT
.dominates(NoWrapEdge
, Result
->getParent()))
4266 for (auto &RU
: Result
->uses())
4267 if (!DT
.dominates(NoWrapEdge
, RU
))
4274 return llvm::any_of(GuardingBranches
, AllUsesGuardedByBranch
);
4278 OverflowResult
llvm::computeOverflowForSignedAdd(const AddOperator
*Add
,
4279 const DataLayout
&DL
,
4280 AssumptionCache
*AC
,
4281 const Instruction
*CxtI
,
4282 const DominatorTree
*DT
) {
4283 return ::computeOverflowForSignedAdd(Add
->getOperand(0), Add
->getOperand(1),
4284 Add
, DL
, AC
, CxtI
, DT
);
4287 OverflowResult
llvm::computeOverflowForSignedAdd(const Value
*LHS
,
4289 const DataLayout
&DL
,
4290 AssumptionCache
*AC
,
4291 const Instruction
*CxtI
,
4292 const DominatorTree
*DT
) {
4293 return ::computeOverflowForSignedAdd(LHS
, RHS
, nullptr, DL
, AC
, CxtI
, DT
);
4296 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction
*I
) {
4297 // A memory operation returns normally if it isn't volatile. A volatile
4298 // operation is allowed to trap.
4300 // An atomic operation isn't guaranteed to return in a reasonable amount of
4301 // time because it's possible for another thread to interfere with it for an
4302 // arbitrary length of time, but programs aren't allowed to rely on that.
4303 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(I
))
4304 return !LI
->isVolatile();
4305 if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(I
))
4306 return !SI
->isVolatile();
4307 if (const AtomicCmpXchgInst
*CXI
= dyn_cast
<AtomicCmpXchgInst
>(I
))
4308 return !CXI
->isVolatile();
4309 if (const AtomicRMWInst
*RMWI
= dyn_cast
<AtomicRMWInst
>(I
))
4310 return !RMWI
->isVolatile();
4311 if (const MemIntrinsic
*MII
= dyn_cast
<MemIntrinsic
>(I
))
4312 return !MII
->isVolatile();
4314 // If there is no successor, then execution can't transfer to it.
4315 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(I
))
4316 return !CRI
->unwindsToCaller();
4317 if (const auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(I
))
4318 return !CatchSwitch
->unwindsToCaller();
4319 if (isa
<ResumeInst
>(I
))
4321 if (isa
<ReturnInst
>(I
))
4323 if (isa
<UnreachableInst
>(I
))
4326 // Calls can throw, or contain an infinite loop, or kill the process.
4327 if (auto CS
= ImmutableCallSite(I
)) {
4328 // Call sites that throw have implicit non-local control flow.
4329 if (!CS
.doesNotThrow())
4332 // Non-throwing call sites can loop infinitely, call exit/pthread_exit
4333 // etc. and thus not return. However, LLVM already assumes that
4335 // - Thread exiting actions are modeled as writes to memory invisible to
4338 // - Loops that don't have side effects (side effects are volatile/atomic
4339 // stores and IO) always terminate (see http://llvm.org/PR965).
4340 // Furthermore IO itself is also modeled as writes to memory invisible to
4343 // We rely on those assumptions here, and use the memory effects of the call
4344 // target as a proxy for checking that it always returns.
4346 // FIXME: This isn't aggressive enough; a call which only writes to a global
4347 // is guaranteed to return.
4348 return CS
.onlyReadsMemory() || CS
.onlyAccessesArgMemory() ||
4349 match(I
, m_Intrinsic
<Intrinsic::assume
>()) ||
4350 match(I
, m_Intrinsic
<Intrinsic::sideeffect
>()) ||
4351 match(I
, m_Intrinsic
<Intrinsic::experimental_widenable_condition
>());
4354 // Other instructions return normally.
4358 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock
*BB
) {
4359 // TODO: This is slightly conservative for invoke instruction since exiting
4360 // via an exception *is* normal control for them.
4361 for (auto I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
)
4362 if (!isGuaranteedToTransferExecutionToSuccessor(&*I
))
4367 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction
*I
,
4369 // The loop header is guaranteed to be executed for every iteration.
4371 // FIXME: Relax this constraint to cover all basic blocks that are
4372 // guaranteed to be executed at every iteration.
4373 if (I
->getParent() != L
->getHeader()) return false;
4375 for (const Instruction
&LI
: *L
->getHeader()) {
4376 if (&LI
== I
) return true;
4377 if (!isGuaranteedToTransferExecutionToSuccessor(&LI
)) return false;
4379 llvm_unreachable("Instruction not contained in its own parent basic block.");
4382 bool llvm::propagatesFullPoison(const Instruction
*I
) {
4383 switch (I
->getOpcode()) {
4384 case Instruction::Add
:
4385 case Instruction::Sub
:
4386 case Instruction::Xor
:
4387 case Instruction::Trunc
:
4388 case Instruction::BitCast
:
4389 case Instruction::AddrSpaceCast
:
4390 case Instruction::Mul
:
4391 case Instruction::Shl
:
4392 case Instruction::GetElementPtr
:
4393 // These operations all propagate poison unconditionally. Note that poison
4394 // is not any particular value, so xor or subtraction of poison with
4395 // itself still yields poison, not zero.
4398 case Instruction::AShr
:
4399 case Instruction::SExt
:
4400 // For these operations, one bit of the input is replicated across
4401 // multiple output bits. A replicated poison bit is still poison.
4404 case Instruction::ICmp
:
4405 // Comparing poison with any value yields poison. This is why, for
4406 // instance, x s< (x +nsw 1) can be folded to true.
4414 const Value
*llvm::getGuaranteedNonFullPoisonOp(const Instruction
*I
) {
4415 switch (I
->getOpcode()) {
4416 case Instruction::Store
:
4417 return cast
<StoreInst
>(I
)->getPointerOperand();
4419 case Instruction::Load
:
4420 return cast
<LoadInst
>(I
)->getPointerOperand();
4422 case Instruction::AtomicCmpXchg
:
4423 return cast
<AtomicCmpXchgInst
>(I
)->getPointerOperand();
4425 case Instruction::AtomicRMW
:
4426 return cast
<AtomicRMWInst
>(I
)->getPointerOperand();
4428 case Instruction::UDiv
:
4429 case Instruction::SDiv
:
4430 case Instruction::URem
:
4431 case Instruction::SRem
:
4432 return I
->getOperand(1);
4439 bool llvm::programUndefinedIfFullPoison(const Instruction
*PoisonI
) {
4440 // We currently only look for uses of poison values within the same basic
4441 // block, as that makes it easier to guarantee that the uses will be
4442 // executed given that PoisonI is executed.
4444 // FIXME: Expand this to consider uses beyond the same basic block. To do
4445 // this, look out for the distinction between post-dominance and strong
4447 const BasicBlock
*BB
= PoisonI
->getParent();
4449 // Set of instructions that we have proved will yield poison if PoisonI
4451 SmallSet
<const Value
*, 16> YieldsPoison
;
4452 SmallSet
<const BasicBlock
*, 4> Visited
;
4453 YieldsPoison
.insert(PoisonI
);
4454 Visited
.insert(PoisonI
->getParent());
4456 BasicBlock::const_iterator Begin
= PoisonI
->getIterator(), End
= BB
->end();
4459 while (Iter
++ < MaxDepth
) {
4460 for (auto &I
: make_range(Begin
, End
)) {
4461 if (&I
!= PoisonI
) {
4462 const Value
*NotPoison
= getGuaranteedNonFullPoisonOp(&I
);
4463 if (NotPoison
!= nullptr && YieldsPoison
.count(NotPoison
))
4465 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
4469 // Mark poison that propagates from I through uses of I.
4470 if (YieldsPoison
.count(&I
)) {
4471 for (const User
*User
: I
.users()) {
4472 const Instruction
*UserI
= cast
<Instruction
>(User
);
4473 if (propagatesFullPoison(UserI
))
4474 YieldsPoison
.insert(User
);
4479 if (auto *NextBB
= BB
->getSingleSuccessor()) {
4480 if (Visited
.insert(NextBB
).second
) {
4482 Begin
= BB
->getFirstNonPHI()->getIterator();
4493 static bool isKnownNonNaN(const Value
*V
, FastMathFlags FMF
) {
4497 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
4500 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
4501 if (!C
->getElementType()->isFloatingPointTy())
4503 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
4504 if (C
->getElementAsAPFloat(I
).isNaN())
4513 static bool isKnownNonZero(const Value
*V
) {
4514 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
4515 return !C
->isZero();
4517 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
4518 if (!C
->getElementType()->isFloatingPointTy())
4520 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
4521 if (C
->getElementAsAPFloat(I
).isZero())
4530 /// Match clamp pattern for float types without care about NaNs or signed zeros.
4531 /// Given non-min/max outer cmp/select from the clamp pattern this
4532 /// function recognizes if it can be substitued by a "canonical" min/max
4534 static SelectPatternResult
matchFastFloatClamp(CmpInst::Predicate Pred
,
4535 Value
*CmpLHS
, Value
*CmpRHS
,
4536 Value
*TrueVal
, Value
*FalseVal
,
4537 Value
*&LHS
, Value
*&RHS
) {
4539 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
4540 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
4541 // and return description of the outer Max/Min.
4543 // First, check if select has inverse order:
4544 if (CmpRHS
== FalseVal
) {
4545 std::swap(TrueVal
, FalseVal
);
4546 Pred
= CmpInst::getInversePredicate(Pred
);
4549 // Assume success now. If there's no match, callers should not use these anyway.
4554 if (CmpRHS
!= TrueVal
|| !match(CmpRHS
, m_APFloat(FC1
)) || !FC1
->isFinite())
4555 return {SPF_UNKNOWN
, SPNB_NA
, false};
4559 case CmpInst::FCMP_OLT
:
4560 case CmpInst::FCMP_OLE
:
4561 case CmpInst::FCMP_ULT
:
4562 case CmpInst::FCMP_ULE
:
4564 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS
), m_APFloat(FC2
)),
4565 m_UnordFMin(m_Specific(CmpLHS
), m_APFloat(FC2
)))) &&
4566 FC1
->compare(*FC2
) == APFloat::cmpResult::cmpLessThan
)
4567 return {SPF_FMAXNUM
, SPNB_RETURNS_ANY
, false};
4569 case CmpInst::FCMP_OGT
:
4570 case CmpInst::FCMP_OGE
:
4571 case CmpInst::FCMP_UGT
:
4572 case CmpInst::FCMP_UGE
:
4574 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS
), m_APFloat(FC2
)),
4575 m_UnordFMax(m_Specific(CmpLHS
), m_APFloat(FC2
)))) &&
4576 FC1
->compare(*FC2
) == APFloat::cmpResult::cmpGreaterThan
)
4577 return {SPF_FMINNUM
, SPNB_RETURNS_ANY
, false};
4583 return {SPF_UNKNOWN
, SPNB_NA
, false};
4586 /// Recognize variations of:
4587 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
4588 static SelectPatternResult
matchClamp(CmpInst::Predicate Pred
,
4589 Value
*CmpLHS
, Value
*CmpRHS
,
4590 Value
*TrueVal
, Value
*FalseVal
) {
4591 // Swap the select operands and predicate to match the patterns below.
4592 if (CmpRHS
!= TrueVal
) {
4593 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4594 std::swap(TrueVal
, FalseVal
);
4597 if (CmpRHS
== TrueVal
&& match(CmpRHS
, m_APInt(C1
))) {
4599 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
4600 if (match(FalseVal
, m_SMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
4601 C1
->slt(*C2
) && Pred
== CmpInst::ICMP_SLT
)
4602 return {SPF_SMAX
, SPNB_NA
, false};
4604 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
4605 if (match(FalseVal
, m_SMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
4606 C1
->sgt(*C2
) && Pred
== CmpInst::ICMP_SGT
)
4607 return {SPF_SMIN
, SPNB_NA
, false};
4609 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
4610 if (match(FalseVal
, m_UMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
4611 C1
->ult(*C2
) && Pred
== CmpInst::ICMP_ULT
)
4612 return {SPF_UMAX
, SPNB_NA
, false};
4614 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
4615 if (match(FalseVal
, m_UMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
4616 C1
->ugt(*C2
) && Pred
== CmpInst::ICMP_UGT
)
4617 return {SPF_UMIN
, SPNB_NA
, false};
4619 return {SPF_UNKNOWN
, SPNB_NA
, false};
4622 /// Recognize variations of:
4623 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
4624 static SelectPatternResult
matchMinMaxOfMinMax(CmpInst::Predicate Pred
,
4625 Value
*CmpLHS
, Value
*CmpRHS
,
4626 Value
*TVal
, Value
*FVal
,
4628 // TODO: Allow FP min/max with nnan/nsz.
4629 assert(CmpInst::isIntPredicate(Pred
) && "Expected integer comparison");
4632 SelectPatternResult L
= matchSelectPattern(TVal
, A
, B
, nullptr, Depth
+ 1);
4633 if (!SelectPatternResult::isMinOrMax(L
.Flavor
))
4634 return {SPF_UNKNOWN
, SPNB_NA
, false};
4637 SelectPatternResult R
= matchSelectPattern(FVal
, C
, D
, nullptr, Depth
+ 1);
4638 if (L
.Flavor
!= R
.Flavor
)
4639 return {SPF_UNKNOWN
, SPNB_NA
, false};
4641 // We have something like: x Pred y ? min(a, b) : min(c, d).
4642 // Try to match the compare to the min/max operations of the select operands.
4643 // First, make sure we have the right compare predicate.
4646 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
) {
4647 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4648 std::swap(CmpLHS
, CmpRHS
);
4650 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
)
4652 return {SPF_UNKNOWN
, SPNB_NA
, false};
4654 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
) {
4655 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4656 std::swap(CmpLHS
, CmpRHS
);
4658 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
)
4660 return {SPF_UNKNOWN
, SPNB_NA
, false};
4662 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
) {
4663 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4664 std::swap(CmpLHS
, CmpRHS
);
4666 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
)
4668 return {SPF_UNKNOWN
, SPNB_NA
, false};
4670 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
) {
4671 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4672 std::swap(CmpLHS
, CmpRHS
);
4674 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
)
4676 return {SPF_UNKNOWN
, SPNB_NA
, false};
4678 return {SPF_UNKNOWN
, SPNB_NA
, false};
4681 // If there is a common operand in the already matched min/max and the other
4682 // min/max operands match the compare operands (either directly or inverted),
4683 // then this is min/max of the same flavor.
4685 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
4686 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
4688 if ((CmpLHS
== A
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
4689 match(A
, m_Not(m_Specific(CmpRHS
)))))
4690 return {L
.Flavor
, SPNB_NA
, false};
4692 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
4693 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
4695 if ((CmpLHS
== A
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
4696 match(A
, m_Not(m_Specific(CmpRHS
)))))
4697 return {L
.Flavor
, SPNB_NA
, false};
4699 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
4700 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
4702 if ((CmpLHS
== B
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
4703 match(B
, m_Not(m_Specific(CmpRHS
)))))
4704 return {L
.Flavor
, SPNB_NA
, false};
4706 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
4707 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
4709 if ((CmpLHS
== B
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
4710 match(B
, m_Not(m_Specific(CmpRHS
)))))
4711 return {L
.Flavor
, SPNB_NA
, false};
4714 return {SPF_UNKNOWN
, SPNB_NA
, false};
4717 /// Match non-obvious integer minimum and maximum sequences.
4718 static SelectPatternResult
matchMinMax(CmpInst::Predicate Pred
,
4719 Value
*CmpLHS
, Value
*CmpRHS
,
4720 Value
*TrueVal
, Value
*FalseVal
,
4721 Value
*&LHS
, Value
*&RHS
,
4723 // Assume success. If there's no match, callers should not use these anyway.
4727 SelectPatternResult SPR
= matchClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
);
4728 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
4731 SPR
= matchMinMaxOfMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, Depth
);
4732 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
4735 if (Pred
!= CmpInst::ICMP_SGT
&& Pred
!= CmpInst::ICMP_SLT
)
4736 return {SPF_UNKNOWN
, SPNB_NA
, false};
4739 // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
4740 // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
4741 if (match(TrueVal
, m_Zero()) &&
4742 match(FalseVal
, m_NSWSub(m_Specific(CmpLHS
), m_Specific(CmpRHS
))))
4743 return {Pred
== CmpInst::ICMP_SGT
? SPF_SMIN
: SPF_SMAX
, SPNB_NA
, false};
4746 // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
4747 // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
4748 if (match(FalseVal
, m_Zero()) &&
4749 match(TrueVal
, m_NSWSub(m_Specific(CmpLHS
), m_Specific(CmpRHS
))))
4750 return {Pred
== CmpInst::ICMP_SGT
? SPF_SMAX
: SPF_SMIN
, SPNB_NA
, false};
4753 if (!match(CmpRHS
, m_APInt(C1
)))
4754 return {SPF_UNKNOWN
, SPNB_NA
, false};
4756 // An unsigned min/max can be written with a signed compare.
4758 if ((CmpLHS
== TrueVal
&& match(FalseVal
, m_APInt(C2
))) ||
4759 (CmpLHS
== FalseVal
&& match(TrueVal
, m_APInt(C2
)))) {
4760 // Is the sign bit set?
4761 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
4762 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
4763 if (Pred
== CmpInst::ICMP_SLT
&& C1
->isNullValue() &&
4764 C2
->isMaxSignedValue())
4765 return {CmpLHS
== TrueVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
4767 // Is the sign bit clear?
4768 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
4769 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
4770 if (Pred
== CmpInst::ICMP_SGT
&& C1
->isAllOnesValue() &&
4771 C2
->isMinSignedValue())
4772 return {CmpLHS
== FalseVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
4775 // Look through 'not' ops to find disguised signed min/max.
4776 // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
4777 // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
4778 if (match(TrueVal
, m_Not(m_Specific(CmpLHS
))) &&
4779 match(FalseVal
, m_APInt(C2
)) && ~(*C1
) == *C2
)
4780 return {Pred
== CmpInst::ICMP_SGT
? SPF_SMIN
: SPF_SMAX
, SPNB_NA
, false};
4782 // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
4783 // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
4784 if (match(FalseVal
, m_Not(m_Specific(CmpLHS
))) &&
4785 match(TrueVal
, m_APInt(C2
)) && ~(*C1
) == *C2
)
4786 return {Pred
== CmpInst::ICMP_SGT
? SPF_SMAX
: SPF_SMIN
, SPNB_NA
, false};
4788 return {SPF_UNKNOWN
, SPNB_NA
, false};
4791 bool llvm::isKnownNegation(const Value
*X
, const Value
*Y
, bool NeedNSW
) {
4792 assert(X
&& Y
&& "Invalid operand");
4794 // X = sub (0, Y) || X = sub nsw (0, Y)
4795 if ((!NeedNSW
&& match(X
, m_Sub(m_ZeroInt(), m_Specific(Y
)))) ||
4796 (NeedNSW
&& match(X
, m_NSWSub(m_ZeroInt(), m_Specific(Y
)))))
4799 // Y = sub (0, X) || Y = sub nsw (0, X)
4800 if ((!NeedNSW
&& match(Y
, m_Sub(m_ZeroInt(), m_Specific(X
)))) ||
4801 (NeedNSW
&& match(Y
, m_NSWSub(m_ZeroInt(), m_Specific(X
)))))
4804 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
4806 return (!NeedNSW
&& (match(X
, m_Sub(m_Value(A
), m_Value(B
))) &&
4807 match(Y
, m_Sub(m_Specific(B
), m_Specific(A
))))) ||
4808 (NeedNSW
&& (match(X
, m_NSWSub(m_Value(A
), m_Value(B
))) &&
4809 match(Y
, m_NSWSub(m_Specific(B
), m_Specific(A
)))));
4812 static SelectPatternResult
matchSelectPattern(CmpInst::Predicate Pred
,
4814 Value
*CmpLHS
, Value
*CmpRHS
,
4815 Value
*TrueVal
, Value
*FalseVal
,
4816 Value
*&LHS
, Value
*&RHS
,
4818 if (CmpInst::isFPPredicate(Pred
)) {
4819 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
4820 // 0.0 operand, set the compare's 0.0 operands to that same value for the
4821 // purpose of identifying min/max. Disregard vector constants with undefined
4822 // elements because those can not be back-propagated for analysis.
4823 Value
*OutputZeroVal
= nullptr;
4824 if (match(TrueVal
, m_AnyZeroFP()) && !match(FalseVal
, m_AnyZeroFP()) &&
4825 !cast
<Constant
>(TrueVal
)->containsUndefElement())
4826 OutputZeroVal
= TrueVal
;
4827 else if (match(FalseVal
, m_AnyZeroFP()) && !match(TrueVal
, m_AnyZeroFP()) &&
4828 !cast
<Constant
>(FalseVal
)->containsUndefElement())
4829 OutputZeroVal
= FalseVal
;
4831 if (OutputZeroVal
) {
4832 if (match(CmpLHS
, m_AnyZeroFP()))
4833 CmpLHS
= OutputZeroVal
;
4834 if (match(CmpRHS
, m_AnyZeroFP()))
4835 CmpRHS
= OutputZeroVal
;
4842 // Signed zero may return inconsistent results between implementations.
4843 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
4844 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
4845 // Therefore, we behave conservatively and only proceed if at least one of the
4846 // operands is known to not be zero or if we don't care about signed zero.
4849 // FIXME: Include OGT/OLT/UGT/ULT.
4850 case CmpInst::FCMP_OGE
: case CmpInst::FCMP_OLE
:
4851 case CmpInst::FCMP_UGE
: case CmpInst::FCMP_ULE
:
4852 if (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
4853 !isKnownNonZero(CmpRHS
))
4854 return {SPF_UNKNOWN
, SPNB_NA
, false};
4857 SelectPatternNaNBehavior NaNBehavior
= SPNB_NA
;
4858 bool Ordered
= false;
4860 // When given one NaN and one non-NaN input:
4861 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
4862 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
4863 // ordered comparison fails), which could be NaN or non-NaN.
4864 // so here we discover exactly what NaN behavior is required/accepted.
4865 if (CmpInst::isFPPredicate(Pred
)) {
4866 bool LHSSafe
= isKnownNonNaN(CmpLHS
, FMF
);
4867 bool RHSSafe
= isKnownNonNaN(CmpRHS
, FMF
);
4869 if (LHSSafe
&& RHSSafe
) {
4870 // Both operands are known non-NaN.
4871 NaNBehavior
= SPNB_RETURNS_ANY
;
4872 } else if (CmpInst::isOrdered(Pred
)) {
4873 // An ordered comparison will return false when given a NaN, so it
4877 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
4878 NaNBehavior
= SPNB_RETURNS_NAN
;
4880 NaNBehavior
= SPNB_RETURNS_OTHER
;
4882 // Completely unsafe.
4883 return {SPF_UNKNOWN
, SPNB_NA
, false};
4886 // An unordered comparison will return true when given a NaN, so it
4889 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
4890 NaNBehavior
= SPNB_RETURNS_OTHER
;
4892 NaNBehavior
= SPNB_RETURNS_NAN
;
4894 // Completely unsafe.
4895 return {SPF_UNKNOWN
, SPNB_NA
, false};
4899 if (TrueVal
== CmpRHS
&& FalseVal
== CmpLHS
) {
4900 std::swap(CmpLHS
, CmpRHS
);
4901 Pred
= CmpInst::getSwappedPredicate(Pred
);
4902 if (NaNBehavior
== SPNB_RETURNS_NAN
)
4903 NaNBehavior
= SPNB_RETURNS_OTHER
;
4904 else if (NaNBehavior
== SPNB_RETURNS_OTHER
)
4905 NaNBehavior
= SPNB_RETURNS_NAN
;
4909 // ([if]cmp X, Y) ? X : Y
4910 if (TrueVal
== CmpLHS
&& FalseVal
== CmpRHS
) {
4912 default: return {SPF_UNKNOWN
, SPNB_NA
, false}; // Equality.
4913 case ICmpInst::ICMP_UGT
:
4914 case ICmpInst::ICMP_UGE
: return {SPF_UMAX
, SPNB_NA
, false};
4915 case ICmpInst::ICMP_SGT
:
4916 case ICmpInst::ICMP_SGE
: return {SPF_SMAX
, SPNB_NA
, false};
4917 case ICmpInst::ICMP_ULT
:
4918 case ICmpInst::ICMP_ULE
: return {SPF_UMIN
, SPNB_NA
, false};
4919 case ICmpInst::ICMP_SLT
:
4920 case ICmpInst::ICMP_SLE
: return {SPF_SMIN
, SPNB_NA
, false};
4921 case FCmpInst::FCMP_UGT
:
4922 case FCmpInst::FCMP_UGE
:
4923 case FCmpInst::FCMP_OGT
:
4924 case FCmpInst::FCMP_OGE
: return {SPF_FMAXNUM
, NaNBehavior
, Ordered
};
4925 case FCmpInst::FCMP_ULT
:
4926 case FCmpInst::FCMP_ULE
:
4927 case FCmpInst::FCMP_OLT
:
4928 case FCmpInst::FCMP_OLE
: return {SPF_FMINNUM
, NaNBehavior
, Ordered
};
4932 if (isKnownNegation(TrueVal
, FalseVal
)) {
4933 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
4934 // match against either LHS or sext(LHS).
4935 auto MaybeSExtCmpLHS
=
4936 m_CombineOr(m_Specific(CmpLHS
), m_SExt(m_Specific(CmpLHS
)));
4937 auto ZeroOrAllOnes
= m_CombineOr(m_ZeroInt(), m_AllOnes());
4938 auto ZeroOrOne
= m_CombineOr(m_ZeroInt(), m_One());
4939 if (match(TrueVal
, MaybeSExtCmpLHS
)) {
4940 // Set the return values. If the compare uses the negated value (-X >s 0),
4941 // swap the return values because the negated value is always 'RHS'.
4944 if (match(CmpLHS
, m_Neg(m_Specific(FalseVal
))))
4945 std::swap(LHS
, RHS
);
4947 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
4948 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
4949 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
4950 return {SPF_ABS
, SPNB_NA
, false};
4952 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
4953 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
4954 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
4955 return {SPF_NABS
, SPNB_NA
, false};
4957 else if (match(FalseVal
, MaybeSExtCmpLHS
)) {
4958 // Set the return values. If the compare uses the negated value (-X >s 0),
4959 // swap the return values because the negated value is always 'RHS'.
4962 if (match(CmpLHS
, m_Neg(m_Specific(TrueVal
))))
4963 std::swap(LHS
, RHS
);
4965 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
4966 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
4967 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
4968 return {SPF_NABS
, SPNB_NA
, false};
4970 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
4971 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
4972 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
4973 return {SPF_ABS
, SPNB_NA
, false};
4977 if (CmpInst::isIntPredicate(Pred
))
4978 return matchMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
, Depth
);
4980 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
4981 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
4982 // semantics than minNum. Be conservative in such case.
4983 if (NaNBehavior
!= SPNB_RETURNS_ANY
||
4984 (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
4985 !isKnownNonZero(CmpRHS
)))
4986 return {SPF_UNKNOWN
, SPNB_NA
, false};
4988 return matchFastFloatClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
);
4991 /// Helps to match a select pattern in case of a type mismatch.
4993 /// The function processes the case when type of true and false values of a
4994 /// select instruction differs from type of the cmp instruction operands because
4995 /// of a cast instruction. The function checks if it is legal to move the cast
4996 /// operation after "select". If yes, it returns the new second value of
4997 /// "select" (with the assumption that cast is moved):
4998 /// 1. As operand of cast instruction when both values of "select" are same cast
5000 /// 2. As restored constant (by applying reverse cast operation) when the first
5001 /// value of the "select" is a cast operation and the second value is a
5003 /// NOTE: We return only the new second value because the first value could be
5004 /// accessed as operand of cast instruction.
5005 static Value
*lookThroughCast(CmpInst
*CmpI
, Value
*V1
, Value
*V2
,
5006 Instruction::CastOps
*CastOp
) {
5007 auto *Cast1
= dyn_cast
<CastInst
>(V1
);
5011 *CastOp
= Cast1
->getOpcode();
5012 Type
*SrcTy
= Cast1
->getSrcTy();
5013 if (auto *Cast2
= dyn_cast
<CastInst
>(V2
)) {
5014 // If V1 and V2 are both the same cast from the same type, look through V1.
5015 if (*CastOp
== Cast2
->getOpcode() && SrcTy
== Cast2
->getSrcTy())
5016 return Cast2
->getOperand(0);
5020 auto *C
= dyn_cast
<Constant
>(V2
);
5024 Constant
*CastedTo
= nullptr;
5026 case Instruction::ZExt
:
5027 if (CmpI
->isUnsigned())
5028 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
);
5030 case Instruction::SExt
:
5031 if (CmpI
->isSigned())
5032 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
, true);
5034 case Instruction::Trunc
:
5036 if (match(CmpI
->getOperand(1), m_Constant(CmpConst
)) &&
5037 CmpConst
->getType() == SrcTy
) {
5038 // Here we have the following case:
5040 // %cond = cmp iN %x, CmpConst
5041 // %tr = trunc iN %x to iK
5042 // %narrowsel = select i1 %cond, iK %t, iK C
5044 // We can always move trunc after select operation:
5046 // %cond = cmp iN %x, CmpConst
5047 // %widesel = select i1 %cond, iN %x, iN CmpConst
5048 // %tr = trunc iN %widesel to iK
5050 // Note that C could be extended in any way because we don't care about
5051 // upper bits after truncation. It can't be abs pattern, because it would
5054 // select i1 %cond, x, -x.
5056 // So only min/max pattern could be matched. Such match requires widened C
5057 // == CmpConst. That is why set widened C = CmpConst, condition trunc
5058 // CmpConst == C is checked below.
5059 CastedTo
= CmpConst
;
5061 CastedTo
= ConstantExpr::getIntegerCast(C
, SrcTy
, CmpI
->isSigned());
5064 case Instruction::FPTrunc
:
5065 CastedTo
= ConstantExpr::getFPExtend(C
, SrcTy
, true);
5067 case Instruction::FPExt
:
5068 CastedTo
= ConstantExpr::getFPTrunc(C
, SrcTy
, true);
5070 case Instruction::FPToUI
:
5071 CastedTo
= ConstantExpr::getUIToFP(C
, SrcTy
, true);
5073 case Instruction::FPToSI
:
5074 CastedTo
= ConstantExpr::getSIToFP(C
, SrcTy
, true);
5076 case Instruction::UIToFP
:
5077 CastedTo
= ConstantExpr::getFPToUI(C
, SrcTy
, true);
5079 case Instruction::SIToFP
:
5080 CastedTo
= ConstantExpr::getFPToSI(C
, SrcTy
, true);
5089 // Make sure the cast doesn't lose any information.
5090 Constant
*CastedBack
=
5091 ConstantExpr::getCast(*CastOp
, CastedTo
, C
->getType(), true);
5092 if (CastedBack
!= C
)
5098 SelectPatternResult
llvm::matchSelectPattern(Value
*V
, Value
*&LHS
, Value
*&RHS
,
5099 Instruction::CastOps
*CastOp
,
5101 if (Depth
>= MaxDepth
)
5102 return {SPF_UNKNOWN
, SPNB_NA
, false};
5104 SelectInst
*SI
= dyn_cast
<SelectInst
>(V
);
5105 if (!SI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
5107 CmpInst
*CmpI
= dyn_cast
<CmpInst
>(SI
->getCondition());
5108 if (!CmpI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
5110 CmpInst::Predicate Pred
= CmpI
->getPredicate();
5111 Value
*CmpLHS
= CmpI
->getOperand(0);
5112 Value
*CmpRHS
= CmpI
->getOperand(1);
5113 Value
*TrueVal
= SI
->getTrueValue();
5114 Value
*FalseVal
= SI
->getFalseValue();
5116 if (isa
<FPMathOperator
>(CmpI
))
5117 FMF
= CmpI
->getFastMathFlags();
5120 if (CmpI
->isEquality())
5121 return {SPF_UNKNOWN
, SPNB_NA
, false};
5123 // Deal with type mismatches.
5124 if (CastOp
&& CmpLHS
->getType() != TrueVal
->getType()) {
5125 if (Value
*C
= lookThroughCast(CmpI
, TrueVal
, FalseVal
, CastOp
)) {
5126 // If this is a potential fmin/fmax with a cast to integer, then ignore
5127 // -0.0 because there is no corresponding integer value.
5128 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
5129 FMF
.setNoSignedZeros();
5130 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
5131 cast
<CastInst
>(TrueVal
)->getOperand(0), C
,
5134 if (Value
*C
= lookThroughCast(CmpI
, FalseVal
, TrueVal
, CastOp
)) {
5135 // If this is a potential fmin/fmax with a cast to integer, then ignore
5136 // -0.0 because there is no corresponding integer value.
5137 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
5138 FMF
.setNoSignedZeros();
5139 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
5140 C
, cast
<CastInst
>(FalseVal
)->getOperand(0),
5144 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
,
5148 CmpInst::Predicate
llvm::getMinMaxPred(SelectPatternFlavor SPF
, bool Ordered
) {
5149 if (SPF
== SPF_SMIN
) return ICmpInst::ICMP_SLT
;
5150 if (SPF
== SPF_UMIN
) return ICmpInst::ICMP_ULT
;
5151 if (SPF
== SPF_SMAX
) return ICmpInst::ICMP_SGT
;
5152 if (SPF
== SPF_UMAX
) return ICmpInst::ICMP_UGT
;
5153 if (SPF
== SPF_FMINNUM
)
5154 return Ordered
? FCmpInst::FCMP_OLT
: FCmpInst::FCMP_ULT
;
5155 if (SPF
== SPF_FMAXNUM
)
5156 return Ordered
? FCmpInst::FCMP_OGT
: FCmpInst::FCMP_UGT
;
5157 llvm_unreachable("unhandled!");
5160 SelectPatternFlavor
llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF
) {
5161 if (SPF
== SPF_SMIN
) return SPF_SMAX
;
5162 if (SPF
== SPF_UMIN
) return SPF_UMAX
;
5163 if (SPF
== SPF_SMAX
) return SPF_SMIN
;
5164 if (SPF
== SPF_UMAX
) return SPF_UMIN
;
5165 llvm_unreachable("unhandled!");
5168 CmpInst::Predicate
llvm::getInverseMinMaxPred(SelectPatternFlavor SPF
) {
5169 return getMinMaxPred(getInverseMinMaxFlavor(SPF
));
5172 /// Return true if "icmp Pred LHS RHS" is always true.
5173 static bool isTruePredicate(CmpInst::Predicate Pred
, const Value
*LHS
,
5174 const Value
*RHS
, const DataLayout
&DL
,
5176 assert(!LHS
->getType()->isVectorTy() && "TODO: extend to handle vectors!");
5177 if (ICmpInst::isTrueWhenEqual(Pred
) && LHS
== RHS
)
5184 case CmpInst::ICMP_SLE
: {
5187 // LHS s<= LHS +_{nsw} C if C >= 0
5188 if (match(RHS
, m_NSWAdd(m_Specific(LHS
), m_APInt(C
))))
5189 return !C
->isNegative();
5193 case CmpInst::ICMP_ULE
: {
5196 // LHS u<= LHS +_{nuw} C for any C
5197 if (match(RHS
, m_NUWAdd(m_Specific(LHS
), m_APInt(C
))))
5200 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
5201 auto MatchNUWAddsToSameValue
= [&](const Value
*A
, const Value
*B
,
5203 const APInt
*&CA
, const APInt
*&CB
) {
5204 if (match(A
, m_NUWAdd(m_Value(X
), m_APInt(CA
))) &&
5205 match(B
, m_NUWAdd(m_Specific(X
), m_APInt(CB
))))
5208 // If X & C == 0 then (X | C) == X +_{nuw} C
5209 if (match(A
, m_Or(m_Value(X
), m_APInt(CA
))) &&
5210 match(B
, m_Or(m_Specific(X
), m_APInt(CB
)))) {
5211 KnownBits
Known(CA
->getBitWidth());
5212 computeKnownBits(X
, Known
, DL
, Depth
+ 1, /*AC*/ nullptr,
5213 /*CxtI*/ nullptr, /*DT*/ nullptr);
5214 if (CA
->isSubsetOf(Known
.Zero
) && CB
->isSubsetOf(Known
.Zero
))
5222 const APInt
*CLHS
, *CRHS
;
5223 if (MatchNUWAddsToSameValue(LHS
, RHS
, X
, CLHS
, CRHS
))
5224 return CLHS
->ule(*CRHS
);
5231 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
5232 /// ALHS ARHS" is true. Otherwise, return None.
5233 static Optional
<bool>
5234 isImpliedCondOperands(CmpInst::Predicate Pred
, const Value
*ALHS
,
5235 const Value
*ARHS
, const Value
*BLHS
, const Value
*BRHS
,
5236 const DataLayout
&DL
, unsigned Depth
) {
5241 case CmpInst::ICMP_SLT
:
5242 case CmpInst::ICMP_SLE
:
5243 if (isTruePredicate(CmpInst::ICMP_SLE
, BLHS
, ALHS
, DL
, Depth
) &&
5244 isTruePredicate(CmpInst::ICMP_SLE
, ARHS
, BRHS
, DL
, Depth
))
5248 case CmpInst::ICMP_ULT
:
5249 case CmpInst::ICMP_ULE
:
5250 if (isTruePredicate(CmpInst::ICMP_ULE
, BLHS
, ALHS
, DL
, Depth
) &&
5251 isTruePredicate(CmpInst::ICMP_ULE
, ARHS
, BRHS
, DL
, Depth
))
5257 /// Return true if the operands of the two compares match. IsSwappedOps is true
5258 /// when the operands match, but are swapped.
5259 static bool isMatchingOps(const Value
*ALHS
, const Value
*ARHS
,
5260 const Value
*BLHS
, const Value
*BRHS
,
5261 bool &IsSwappedOps
) {
5263 bool IsMatchingOps
= (ALHS
== BLHS
&& ARHS
== BRHS
);
5264 IsSwappedOps
= (ALHS
== BRHS
&& ARHS
== BLHS
);
5265 return IsMatchingOps
|| IsSwappedOps
;
5268 /// Return true if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is true.
5269 /// Return false if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is false.
5270 /// Otherwise, return None if we can't infer anything.
5271 static Optional
<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred
,
5272 CmpInst::Predicate BPred
,
5273 bool AreSwappedOps
) {
5274 // Canonicalize the predicate as if the operands were not commuted.
5276 BPred
= ICmpInst::getSwappedPredicate(BPred
);
5278 if (CmpInst::isImpliedTrueByMatchingCmp(APred
, BPred
))
5280 if (CmpInst::isImpliedFalseByMatchingCmp(APred
, BPred
))
5286 /// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
5287 /// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
5288 /// Otherwise, return None if we can't infer anything.
5289 static Optional
<bool>
5290 isImpliedCondMatchingImmOperands(CmpInst::Predicate APred
,
5291 const ConstantInt
*C1
,
5292 CmpInst::Predicate BPred
,
5293 const ConstantInt
*C2
) {
5294 ConstantRange DomCR
=
5295 ConstantRange::makeExactICmpRegion(APred
, C1
->getValue());
5297 ConstantRange::makeAllowedICmpRegion(BPred
, C2
->getValue());
5298 ConstantRange Intersection
= DomCR
.intersectWith(CR
);
5299 ConstantRange Difference
= DomCR
.difference(CR
);
5300 if (Intersection
.isEmptySet())
5302 if (Difference
.isEmptySet())
5307 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
5308 /// false. Otherwise, return None if we can't infer anything.
5309 static Optional
<bool> isImpliedCondICmps(const ICmpInst
*LHS
,
5310 const ICmpInst
*RHS
,
5311 const DataLayout
&DL
, bool LHSIsTrue
,
5313 Value
*ALHS
= LHS
->getOperand(0);
5314 Value
*ARHS
= LHS
->getOperand(1);
5315 // The rest of the logic assumes the LHS condition is true. If that's not the
5316 // case, invert the predicate to make it so.
5317 ICmpInst::Predicate APred
=
5318 LHSIsTrue
? LHS
->getPredicate() : LHS
->getInversePredicate();
5320 Value
*BLHS
= RHS
->getOperand(0);
5321 Value
*BRHS
= RHS
->getOperand(1);
5322 ICmpInst::Predicate BPred
= RHS
->getPredicate();
5324 // Can we infer anything when the two compares have matching operands?
5326 if (isMatchingOps(ALHS
, ARHS
, BLHS
, BRHS
, AreSwappedOps
)) {
5327 if (Optional
<bool> Implication
= isImpliedCondMatchingOperands(
5328 APred
, BPred
, AreSwappedOps
))
5330 // No amount of additional analysis will infer the second condition, so
5335 // Can we infer anything when the LHS operands match and the RHS operands are
5336 // constants (not necessarily matching)?
5337 if (ALHS
== BLHS
&& isa
<ConstantInt
>(ARHS
) && isa
<ConstantInt
>(BRHS
)) {
5338 if (Optional
<bool> Implication
= isImpliedCondMatchingImmOperands(
5339 APred
, cast
<ConstantInt
>(ARHS
), BPred
, cast
<ConstantInt
>(BRHS
)))
5341 // No amount of additional analysis will infer the second condition, so
5347 return isImpliedCondOperands(APred
, ALHS
, ARHS
, BLHS
, BRHS
, DL
, Depth
);
5351 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
5352 /// false. Otherwise, return None if we can't infer anything. We expect the
5353 /// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction.
5354 static Optional
<bool> isImpliedCondAndOr(const BinaryOperator
*LHS
,
5355 const ICmpInst
*RHS
,
5356 const DataLayout
&DL
, bool LHSIsTrue
,
5358 // The LHS must be an 'or' or an 'and' instruction.
5359 assert((LHS
->getOpcode() == Instruction::And
||
5360 LHS
->getOpcode() == Instruction::Or
) &&
5361 "Expected LHS to be 'and' or 'or'.");
5363 assert(Depth
<= MaxDepth
&& "Hit recursion limit");
5365 // If the result of an 'or' is false, then we know both legs of the 'or' are
5366 // false. Similarly, if the result of an 'and' is true, then we know both
5367 // legs of the 'and' are true.
5369 if ((!LHSIsTrue
&& match(LHS
, m_Or(m_Value(ALHS
), m_Value(ARHS
)))) ||
5370 (LHSIsTrue
&& match(LHS
, m_And(m_Value(ALHS
), m_Value(ARHS
))))) {
5371 // FIXME: Make this non-recursion.
5372 if (Optional
<bool> Implication
=
5373 isImpliedCondition(ALHS
, RHS
, DL
, LHSIsTrue
, Depth
+ 1))
5375 if (Optional
<bool> Implication
=
5376 isImpliedCondition(ARHS
, RHS
, DL
, LHSIsTrue
, Depth
+ 1))
5383 Optional
<bool> llvm::isImpliedCondition(const Value
*LHS
, const Value
*RHS
,
5384 const DataLayout
&DL
, bool LHSIsTrue
,
5386 // Bail out when we hit the limit.
5387 if (Depth
== MaxDepth
)
5390 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
5392 if (LHS
->getType() != RHS
->getType())
5395 Type
*OpTy
= LHS
->getType();
5396 assert(OpTy
->isIntOrIntVectorTy(1) && "Expected integer type only!");
5398 // LHS ==> RHS by definition
5402 // FIXME: Extending the code below to handle vectors.
5403 if (OpTy
->isVectorTy())
5406 assert(OpTy
->isIntegerTy(1) && "implied by above");
5408 // Both LHS and RHS are icmps.
5409 const ICmpInst
*LHSCmp
= dyn_cast
<ICmpInst
>(LHS
);
5410 const ICmpInst
*RHSCmp
= dyn_cast
<ICmpInst
>(RHS
);
5411 if (LHSCmp
&& RHSCmp
)
5412 return isImpliedCondICmps(LHSCmp
, RHSCmp
, DL
, LHSIsTrue
, Depth
);
5414 // The LHS should be an 'or' or an 'and' instruction. We expect the RHS to be
5415 // an icmp. FIXME: Add support for and/or on the RHS.
5416 const BinaryOperator
*LHSBO
= dyn_cast
<BinaryOperator
>(LHS
);
5417 if (LHSBO
&& RHSCmp
) {
5418 if ((LHSBO
->getOpcode() == Instruction::And
||
5419 LHSBO
->getOpcode() == Instruction::Or
))
5420 return isImpliedCondAndOr(LHSBO
, RHSCmp
, DL
, LHSIsTrue
, Depth
);
5425 Optional
<bool> llvm::isImpliedByDomCondition(const Value
*Cond
,
5426 const Instruction
*ContextI
,
5427 const DataLayout
&DL
) {
5428 assert(Cond
->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
5429 if (!ContextI
|| !ContextI
->getParent())
5432 // TODO: This is a poor/cheap way to determine dominance. Should we use a
5433 // dominator tree (eg, from a SimplifyQuery) instead?
5434 const BasicBlock
*ContextBB
= ContextI
->getParent();
5435 const BasicBlock
*PredBB
= ContextBB
->getSinglePredecessor();
5439 // We need a conditional branch in the predecessor.
5441 BasicBlock
*TrueBB
, *FalseBB
;
5442 if (!match(PredBB
->getTerminator(), m_Br(m_Value(PredCond
), TrueBB
, FalseBB
)))
5445 // The branch should get simplified. Don't bother simplifying this condition.
5446 if (TrueBB
== FalseBB
)
5449 assert((TrueBB
== ContextBB
|| FalseBB
== ContextBB
) &&
5450 "Predecessor block does not point to successor?");
5452 // Is this condition implied by the predecessor condition?
5453 bool CondIsTrue
= TrueBB
== ContextBB
;
5454 return isImpliedCondition(PredCond
, Cond
, DL
, CondIsTrue
);