1 //===- InstCombineAndOrXor.cpp --------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitAnd, visitOr, and visitXor functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/Analysis/CmpInstAnalysis.h"
15 #include "llvm/Analysis/InstructionSimplify.h"
16 #include "llvm/IR/ConstantRange.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Transforms/InstCombine/InstCombiner.h"
20 #include "llvm/Transforms/Utils/Local.h"
23 using namespace PatternMatch
;
25 #define DEBUG_TYPE "instcombine"
27 /// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
29 static unsigned getFCmpCode(FCmpInst::Predicate CC
) {
30 assert(FCmpInst::FCMP_FALSE
<= CC
&& CC
<= FCmpInst::FCMP_TRUE
&&
31 "Unexpected FCmp predicate!");
32 // Take advantage of the bit pattern of FCmpInst::Predicate here.
34 static_assert(FCmpInst::FCMP_FALSE
== 0, ""); // 0 0 0 0
35 static_assert(FCmpInst::FCMP_OEQ
== 1, ""); // 0 0 0 1
36 static_assert(FCmpInst::FCMP_OGT
== 2, ""); // 0 0 1 0
37 static_assert(FCmpInst::FCMP_OGE
== 3, ""); // 0 0 1 1
38 static_assert(FCmpInst::FCMP_OLT
== 4, ""); // 0 1 0 0
39 static_assert(FCmpInst::FCMP_OLE
== 5, ""); // 0 1 0 1
40 static_assert(FCmpInst::FCMP_ONE
== 6, ""); // 0 1 1 0
41 static_assert(FCmpInst::FCMP_ORD
== 7, ""); // 0 1 1 1
42 static_assert(FCmpInst::FCMP_UNO
== 8, ""); // 1 0 0 0
43 static_assert(FCmpInst::FCMP_UEQ
== 9, ""); // 1 0 0 1
44 static_assert(FCmpInst::FCMP_UGT
== 10, ""); // 1 0 1 0
45 static_assert(FCmpInst::FCMP_UGE
== 11, ""); // 1 0 1 1
46 static_assert(FCmpInst::FCMP_ULT
== 12, ""); // 1 1 0 0
47 static_assert(FCmpInst::FCMP_ULE
== 13, ""); // 1 1 0 1
48 static_assert(FCmpInst::FCMP_UNE
== 14, ""); // 1 1 1 0
49 static_assert(FCmpInst::FCMP_TRUE
== 15, ""); // 1 1 1 1
53 /// This is the complement of getICmpCode, which turns an opcode and two
54 /// operands into either a constant true or false, or a brand new ICmp
55 /// instruction. The sign is passed in to determine which kind of predicate to
56 /// use in the new icmp instruction.
57 static Value
*getNewICmpValue(unsigned Code
, bool Sign
, Value
*LHS
, Value
*RHS
,
58 InstCombiner::BuilderTy
&Builder
) {
59 ICmpInst::Predicate NewPred
;
60 if (Constant
*TorF
= getPredForICmpCode(Code
, Sign
, LHS
->getType(), NewPred
))
62 return Builder
.CreateICmp(NewPred
, LHS
, RHS
);
65 /// This is the complement of getFCmpCode, which turns an opcode and two
66 /// operands into either a FCmp instruction, or a true/false constant.
67 static Value
*getFCmpValue(unsigned Code
, Value
*LHS
, Value
*RHS
,
68 InstCombiner::BuilderTy
&Builder
) {
69 const auto Pred
= static_cast<FCmpInst::Predicate
>(Code
);
70 assert(FCmpInst::FCMP_FALSE
<= Pred
&& Pred
<= FCmpInst::FCMP_TRUE
&&
71 "Unexpected FCmp predicate!");
72 if (Pred
== FCmpInst::FCMP_FALSE
)
73 return ConstantInt::get(CmpInst::makeCmpResultType(LHS
->getType()), 0);
74 if (Pred
== FCmpInst::FCMP_TRUE
)
75 return ConstantInt::get(CmpInst::makeCmpResultType(LHS
->getType()), 1);
76 return Builder
.CreateFCmp(Pred
, LHS
, RHS
);
79 /// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
80 /// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
81 /// \param I Binary operator to transform.
82 /// \return Pointer to node that must replace the original binary operator, or
83 /// null pointer if no transformation was made.
84 static Value
*SimplifyBSwap(BinaryOperator
&I
,
85 InstCombiner::BuilderTy
&Builder
) {
86 assert(I
.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
88 Value
*OldLHS
= I
.getOperand(0);
89 Value
*OldRHS
= I
.getOperand(1);
92 if (!match(OldLHS
, m_BSwap(m_Value(NewLHS
))))
98 if (match(OldRHS
, m_BSwap(m_Value(NewRHS
)))) {
99 // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
100 if (!OldLHS
->hasOneUse() && !OldRHS
->hasOneUse())
102 // NewRHS initialized by the matcher.
103 } else if (match(OldRHS
, m_APInt(C
))) {
104 // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
105 if (!OldLHS
->hasOneUse())
107 NewRHS
= ConstantInt::get(I
.getType(), C
->byteSwap());
111 Value
*BinOp
= Builder
.CreateBinOp(I
.getOpcode(), NewLHS
, NewRHS
);
112 Function
*F
= Intrinsic::getDeclaration(I
.getModule(), Intrinsic::bswap
,
114 return Builder
.CreateCall(F
, BinOp
);
117 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
118 /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
119 /// whether to treat V, Lo, and Hi as signed or not.
120 Value
*InstCombinerImpl::insertRangeTest(Value
*V
, const APInt
&Lo
,
121 const APInt
&Hi
, bool isSigned
,
123 assert((isSigned
? Lo
.slt(Hi
) : Lo
.ult(Hi
)) &&
124 "Lo is not < Hi in range emission code!");
126 Type
*Ty
= V
->getType();
128 // V >= Min && V < Hi --> V < Hi
129 // V < Min || V >= Hi --> V >= Hi
130 ICmpInst::Predicate Pred
= Inside
? ICmpInst::ICMP_ULT
: ICmpInst::ICMP_UGE
;
131 if (isSigned
? Lo
.isMinSignedValue() : Lo
.isMinValue()) {
132 Pred
= isSigned
? ICmpInst::getSignedPredicate(Pred
) : Pred
;
133 return Builder
.CreateICmp(Pred
, V
, ConstantInt::get(Ty
, Hi
));
136 // V >= Lo && V < Hi --> V - Lo u< Hi - Lo
137 // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo
139 Builder
.CreateSub(V
, ConstantInt::get(Ty
, Lo
), V
->getName() + ".off");
140 Constant
*HiMinusLo
= ConstantInt::get(Ty
, Hi
- Lo
);
141 return Builder
.CreateICmp(Pred
, VMinusLo
, HiMinusLo
);
144 /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
145 /// that can be simplified.
146 /// One of A and B is considered the mask. The other is the value. This is
147 /// described as the "AMask" or "BMask" part of the enum. If the enum contains
148 /// only "Mask", then both A and B can be considered masks. If A is the mask,
149 /// then it was proven that (A & C) == C. This is trivial if C == A or C == 0.
150 /// If both A and C are constants, this proof is also easy.
151 /// For the following explanations, we assume that A is the mask.
153 /// "AllOnes" declares that the comparison is true only if (A & B) == A or all
154 /// bits of A are set in B.
155 /// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes
157 /// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all
158 /// bits of A are cleared in B.
159 /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes
161 /// "Mixed" declares that (A & B) == C and C might or might not contain any
162 /// number of one bits and zero bits.
163 /// Example: (icmp eq (A & 3), 1) -> AMask_Mixed
165 /// "Not" means that in above descriptions "==" should be replaced by "!=".
166 /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes
168 /// If the mask A contains a single bit, then the following is equivalent:
169 /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
170 /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
171 enum MaskedICmpType
{
173 AMask_NotAllOnes
= 2,
175 BMask_NotAllOnes
= 8,
177 Mask_NotAllZeros
= 32,
179 AMask_NotMixed
= 128,
184 /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)
186 static unsigned getMaskedICmpType(Value
*A
, Value
*B
, Value
*C
,
187 ICmpInst::Predicate Pred
) {
188 ConstantInt
*ACst
= dyn_cast
<ConstantInt
>(A
);
189 ConstantInt
*BCst
= dyn_cast
<ConstantInt
>(B
);
190 ConstantInt
*CCst
= dyn_cast
<ConstantInt
>(C
);
191 bool IsEq
= (Pred
== ICmpInst::ICMP_EQ
);
192 bool IsAPow2
= (ACst
&& !ACst
->isZero() && ACst
->getValue().isPowerOf2());
193 bool IsBPow2
= (BCst
&& !BCst
->isZero() && BCst
->getValue().isPowerOf2());
194 unsigned MaskVal
= 0;
195 if (CCst
&& CCst
->isZero()) {
196 // if C is zero, then both A and B qualify as mask
197 MaskVal
|= (IsEq
? (Mask_AllZeros
| AMask_Mixed
| BMask_Mixed
)
198 : (Mask_NotAllZeros
| AMask_NotMixed
| BMask_NotMixed
));
200 MaskVal
|= (IsEq
? (AMask_NotAllOnes
| AMask_NotMixed
)
201 : (AMask_AllOnes
| AMask_Mixed
));
203 MaskVal
|= (IsEq
? (BMask_NotAllOnes
| BMask_NotMixed
)
204 : (BMask_AllOnes
| BMask_Mixed
));
209 MaskVal
|= (IsEq
? (AMask_AllOnes
| AMask_Mixed
)
210 : (AMask_NotAllOnes
| AMask_NotMixed
));
212 MaskVal
|= (IsEq
? (Mask_NotAllZeros
| AMask_NotMixed
)
213 : (Mask_AllZeros
| AMask_Mixed
));
214 } else if (ACst
&& CCst
&& ConstantExpr::getAnd(ACst
, CCst
) == CCst
) {
215 MaskVal
|= (IsEq
? AMask_Mixed
: AMask_NotMixed
);
219 MaskVal
|= (IsEq
? (BMask_AllOnes
| BMask_Mixed
)
220 : (BMask_NotAllOnes
| BMask_NotMixed
));
222 MaskVal
|= (IsEq
? (Mask_NotAllZeros
| BMask_NotMixed
)
223 : (Mask_AllZeros
| BMask_Mixed
));
224 } else if (BCst
&& CCst
&& ConstantExpr::getAnd(BCst
, CCst
) == CCst
) {
225 MaskVal
|= (IsEq
? BMask_Mixed
: BMask_NotMixed
);
231 /// Convert an analysis of a masked ICmp into its equivalent if all boolean
232 /// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
233 /// is adjacent to the corresponding normal flag (recording ==), this just
234 /// involves swapping those bits over.
235 static unsigned conjugateICmpMask(unsigned Mask
) {
237 NewMask
= (Mask
& (AMask_AllOnes
| BMask_AllOnes
| Mask_AllZeros
|
238 AMask_Mixed
| BMask_Mixed
))
241 NewMask
|= (Mask
& (AMask_NotAllOnes
| BMask_NotAllOnes
| Mask_NotAllZeros
|
242 AMask_NotMixed
| BMask_NotMixed
))
248 // Adapts the external decomposeBitTestICmp for local use.
249 static bool decomposeBitTestICmp(Value
*LHS
, Value
*RHS
, CmpInst::Predicate
&Pred
,
250 Value
*&X
, Value
*&Y
, Value
*&Z
) {
252 if (!llvm::decomposeBitTestICmp(LHS
, RHS
, Pred
, X
, Mask
))
255 Y
= ConstantInt::get(X
->getType(), Mask
);
256 Z
= ConstantInt::get(X
->getType(), 0);
260 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
261 /// Return the pattern classes (from MaskedICmpType) for the left hand side and
262 /// the right hand side as a pair.
263 /// LHS and RHS are the left hand side and the right hand side ICmps and PredL
264 /// and PredR are their predicates, respectively.
266 Optional
<std::pair
<unsigned, unsigned>>
267 getMaskedTypeForICmpPair(Value
*&A
, Value
*&B
, Value
*&C
,
268 Value
*&D
, Value
*&E
, ICmpInst
*LHS
,
270 ICmpInst::Predicate
&PredL
,
271 ICmpInst::Predicate
&PredR
) {
272 // vectors are not (yet?) supported. Don't support pointers either.
273 if (!LHS
->getOperand(0)->getType()->isIntegerTy() ||
274 !RHS
->getOperand(0)->getType()->isIntegerTy())
277 // Here comes the tricky part:
278 // LHS might be of the form L11 & L12 == X, X == L21 & L22,
279 // and L11 & L12 == L21 & L22. The same goes for RHS.
280 // Now we must find those components L** and R**, that are equal, so
281 // that we can extract the parameters A, B, C, D, and E for the canonical
283 Value
*L1
= LHS
->getOperand(0);
284 Value
*L2
= LHS
->getOperand(1);
285 Value
*L11
, *L12
, *L21
, *L22
;
286 // Check whether the icmp can be decomposed into a bit test.
287 if (decomposeBitTestICmp(L1
, L2
, PredL
, L11
, L12
, L2
)) {
288 L21
= L22
= L1
= nullptr;
290 // Look for ANDs in the LHS icmp.
291 if (!match(L1
, m_And(m_Value(L11
), m_Value(L12
)))) {
292 // Any icmp can be viewed as being trivially masked; if it allows us to
293 // remove one, it's worth it.
295 L12
= Constant::getAllOnesValue(L1
->getType());
298 if (!match(L2
, m_And(m_Value(L21
), m_Value(L22
)))) {
300 L22
= Constant::getAllOnesValue(L2
->getType());
304 // Bail if LHS was a icmp that can't be decomposed into an equality.
305 if (!ICmpInst::isEquality(PredL
))
308 Value
*R1
= RHS
->getOperand(0);
309 Value
*R2
= RHS
->getOperand(1);
312 if (decomposeBitTestICmp(R1
, R2
, PredR
, R11
, R12
, R2
)) {
313 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
316 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
326 if (!match(R1
, m_And(m_Value(R11
), m_Value(R12
)))) {
327 // As before, model no mask as a trivial mask if it'll let us do an
330 R12
= Constant::getAllOnesValue(R1
->getType());
333 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
338 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
346 // Bail if RHS was a icmp that can't be decomposed into an equality.
347 if (!ICmpInst::isEquality(PredR
))
350 // Look for ANDs on the right side of the RHS icmp.
352 if (!match(R2
, m_And(m_Value(R11
), m_Value(R12
)))) {
354 R12
= Constant::getAllOnesValue(R2
->getType());
357 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
362 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
371 assert(Ok
&& "Failed to find AND on the right side of the RHS icmp.");
377 } else if (L12
== A
) {
380 } else if (L21
== A
) {
383 } else if (L22
== A
) {
388 unsigned LeftType
= getMaskedICmpType(A
, B
, C
, PredL
);
389 unsigned RightType
= getMaskedICmpType(A
, D
, E
, PredR
);
390 return Optional
<std::pair
<unsigned, unsigned>>(std::make_pair(LeftType
, RightType
));
393 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
394 /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
395 /// and the right hand side is of type BMask_Mixed. For example,
396 /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
397 static Value
*foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
398 ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
, Value
*A
, Value
*B
, Value
*C
,
399 Value
*D
, Value
*E
, ICmpInst::Predicate PredL
, ICmpInst::Predicate PredR
,
400 InstCombiner::BuilderTy
&Builder
) {
401 // We are given the canonical form:
402 // (icmp ne (A & B), 0) & (icmp eq (A & D), E).
405 // If IsAnd is false, we get it in negated form:
406 // (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
407 // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
409 // We currently handle the case of B, C, D, E are constant.
411 ConstantInt
*BCst
, *CCst
, *DCst
, *ECst
;
412 if (!match(B
, m_ConstantInt(BCst
)) || !match(C
, m_ConstantInt(CCst
)) ||
413 !match(D
, m_ConstantInt(DCst
)) || !match(E
, m_ConstantInt(ECst
)))
416 ICmpInst::Predicate NewCC
= IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
;
418 // Update E to the canonical form when D is a power of two and RHS is
420 // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
421 // (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
423 ECst
= cast
<ConstantInt
>(ConstantExpr::getXor(DCst
, ECst
));
425 // If B or D is zero, skip because if LHS or RHS can be trivially folded by
426 // other folding rules and this pattern won't apply any more.
427 if (BCst
->getValue() == 0 || DCst
->getValue() == 0)
430 // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
431 // deduce anything from it.
433 // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
434 if ((BCst
->getValue() & DCst
->getValue()) == 0)
437 // If the following two conditions are met:
439 // 1. mask B covers only a single bit that's not covered by mask D, that is,
440 // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
441 // B and D has only one bit set) and,
443 // 2. RHS (and E) indicates that the rest of B's bits are zero (in other
444 // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
446 // then that single bit in B must be one and thus the whole expression can be
448 // (A & (B | D)) == (B & (B ^ D)) | E.
451 // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
452 // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
453 if ((((BCst
->getValue() & DCst
->getValue()) & ECst
->getValue()) == 0) &&
454 (BCst
->getValue() & (BCst
->getValue() ^ DCst
->getValue())).isPowerOf2()) {
455 APInt BorD
= BCst
->getValue() | DCst
->getValue();
456 APInt BandBxorDorE
= (BCst
->getValue() & (BCst
->getValue() ^ DCst
->getValue())) |
458 Value
*NewMask
= ConstantInt::get(BCst
->getType(), BorD
);
459 Value
*NewMaskedValue
= ConstantInt::get(BCst
->getType(), BandBxorDorE
);
460 Value
*NewAnd
= Builder
.CreateAnd(A
, NewMask
);
461 return Builder
.CreateICmp(NewCC
, NewAnd
, NewMaskedValue
);
464 auto IsSubSetOrEqual
= [](ConstantInt
*C1
, ConstantInt
*C2
) {
465 return (C1
->getValue() & C2
->getValue()) == C1
->getValue();
467 auto IsSuperSetOrEqual
= [](ConstantInt
*C1
, ConstantInt
*C2
) {
468 return (C1
->getValue() & C2
->getValue()) == C2
->getValue();
471 // In the following, we consider only the cases where B is a superset of D, B
472 // is a subset of D, or B == D because otherwise there's at least one bit
473 // covered by B but not D, in which case we can't deduce much from it, so
474 // no folding (aside from the single must-be-one bit case right above.)
476 // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
477 if (!IsSubSetOrEqual(BCst
, DCst
) && !IsSuperSetOrEqual(BCst
, DCst
))
480 // At this point, either B is a superset of D, B is a subset of D or B == D.
482 // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
483 // and the whole expression becomes false (or true if negated), otherwise, no
486 // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
487 // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
488 if (ECst
->isZero()) {
489 if (IsSubSetOrEqual(BCst
, DCst
))
490 return ConstantInt::get(LHS
->getType(), !IsAnd
);
494 // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
495 // D. If B is a superset of (or equal to) D, since E is not zero, LHS is
496 // subsumed by RHS (RHS implies LHS.) So the whole expression becomes
498 // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
499 // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
500 if (IsSuperSetOrEqual(BCst
, DCst
))
502 // Otherwise, B is a subset of D. If B and E have a common bit set,
503 // ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
504 // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
505 assert(IsSubSetOrEqual(BCst
, DCst
) && "Precondition due to above code");
506 if ((BCst
->getValue() & ECst
->getValue()) != 0)
508 // Otherwise, LHS and RHS contradict and the whole expression becomes false
509 // (or true if negated.) For example,
510 // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
511 // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
512 return ConstantInt::get(LHS
->getType(), !IsAnd
);
515 /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
516 /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
517 /// aren't of the common mask pattern type.
518 static Value
*foldLogOpOfMaskedICmpsAsymmetric(
519 ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
, Value
*A
, Value
*B
, Value
*C
,
520 Value
*D
, Value
*E
, ICmpInst::Predicate PredL
, ICmpInst::Predicate PredR
,
521 unsigned LHSMask
, unsigned RHSMask
, InstCombiner::BuilderTy
&Builder
) {
522 assert(ICmpInst::isEquality(PredL
) && ICmpInst::isEquality(PredR
) &&
523 "Expected equality predicates for masked type of icmps.");
524 // Handle Mask_NotAllZeros-BMask_Mixed cases.
525 // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
526 // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
527 // which gets swapped to
528 // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
530 LHSMask
= conjugateICmpMask(LHSMask
);
531 RHSMask
= conjugateICmpMask(RHSMask
);
533 if ((LHSMask
& Mask_NotAllZeros
) && (RHSMask
& BMask_Mixed
)) {
534 if (Value
*V
= foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
535 LHS
, RHS
, IsAnd
, A
, B
, C
, D
, E
,
536 PredL
, PredR
, Builder
)) {
539 } else if ((LHSMask
& BMask_Mixed
) && (RHSMask
& Mask_NotAllZeros
)) {
540 if (Value
*V
= foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
541 RHS
, LHS
, IsAnd
, A
, D
, E
, B
, C
,
542 PredR
, PredL
, Builder
)) {
549 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
550 /// into a single (icmp(A & X) ==/!= Y).
551 static Value
*foldLogOpOfMaskedICmps(ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
,
552 InstCombiner::BuilderTy
&Builder
) {
553 Value
*A
= nullptr, *B
= nullptr, *C
= nullptr, *D
= nullptr, *E
= nullptr;
554 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
555 Optional
<std::pair
<unsigned, unsigned>> MaskPair
=
556 getMaskedTypeForICmpPair(A
, B
, C
, D
, E
, LHS
, RHS
, PredL
, PredR
);
559 assert(ICmpInst::isEquality(PredL
) && ICmpInst::isEquality(PredR
) &&
560 "Expected equality predicates for masked type of icmps.");
561 unsigned LHSMask
= MaskPair
->first
;
562 unsigned RHSMask
= MaskPair
->second
;
563 unsigned Mask
= LHSMask
& RHSMask
;
565 // Even if the two sides don't share a common pattern, check if folding can
567 if (Value
*V
= foldLogOpOfMaskedICmpsAsymmetric(
568 LHS
, RHS
, IsAnd
, A
, B
, C
, D
, E
, PredL
, PredR
, LHSMask
, RHSMask
,
574 // In full generality:
575 // (icmp (A & B) Op C) | (icmp (A & D) Op E)
576 // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
578 // If the latter can be converted into (icmp (A & X) Op Y) then the former is
579 // equivalent to (icmp (A & X) !Op Y).
581 // Therefore, we can pretend for the rest of this function that we're dealing
582 // with the conjunction, provided we flip the sense of any comparisons (both
583 // input and output).
585 // In most cases we're going to produce an EQ for the "&&" case.
586 ICmpInst::Predicate NewCC
= IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
;
588 // Convert the masking analysis into its equivalent with negated
590 Mask
= conjugateICmpMask(Mask
);
593 if (Mask
& Mask_AllZeros
) {
594 // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
595 // -> (icmp eq (A & (B|D)), 0)
596 Value
*NewOr
= Builder
.CreateOr(B
, D
);
597 Value
*NewAnd
= Builder
.CreateAnd(A
, NewOr
);
598 // We can't use C as zero because we might actually handle
599 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
600 // with B and D, having a single bit set.
601 Value
*Zero
= Constant::getNullValue(A
->getType());
602 return Builder
.CreateICmp(NewCC
, NewAnd
, Zero
);
604 if (Mask
& BMask_AllOnes
) {
605 // (icmp eq (A & B), B) & (icmp eq (A & D), D)
606 // -> (icmp eq (A & (B|D)), (B|D))
607 Value
*NewOr
= Builder
.CreateOr(B
, D
);
608 Value
*NewAnd
= Builder
.CreateAnd(A
, NewOr
);
609 return Builder
.CreateICmp(NewCC
, NewAnd
, NewOr
);
611 if (Mask
& AMask_AllOnes
) {
612 // (icmp eq (A & B), A) & (icmp eq (A & D), A)
613 // -> (icmp eq (A & (B&D)), A)
614 Value
*NewAnd1
= Builder
.CreateAnd(B
, D
);
615 Value
*NewAnd2
= Builder
.CreateAnd(A
, NewAnd1
);
616 return Builder
.CreateICmp(NewCC
, NewAnd2
, A
);
619 // Remaining cases assume at least that B and D are constant, and depend on
620 // their actual values. This isn't strictly necessary, just a "handle the
621 // easy cases for now" decision.
622 ConstantInt
*BCst
, *DCst
;
623 if (!match(B
, m_ConstantInt(BCst
)) || !match(D
, m_ConstantInt(DCst
)))
626 if (Mask
& (Mask_NotAllZeros
| BMask_NotAllOnes
)) {
627 // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
628 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
629 // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
630 // Only valid if one of the masks is a superset of the other (check "B&D" is
631 // the same as either B or D).
632 APInt NewMask
= BCst
->getValue() & DCst
->getValue();
634 if (NewMask
== BCst
->getValue())
636 else if (NewMask
== DCst
->getValue())
640 if (Mask
& AMask_NotAllOnes
) {
641 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
642 // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
643 // Only valid if one of the masks is a superset of the other (check "B|D" is
644 // the same as either B or D).
645 APInt NewMask
= BCst
->getValue() | DCst
->getValue();
647 if (NewMask
== BCst
->getValue())
649 else if (NewMask
== DCst
->getValue())
653 if (Mask
& BMask_Mixed
) {
654 // (icmp eq (A & B), C) & (icmp eq (A & D), E)
655 // We already know that B & C == C && D & E == E.
656 // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
657 // C and E, which are shared by both the mask B and the mask D, don't
658 // contradict, then we can transform to
659 // -> (icmp eq (A & (B|D)), (C|E))
660 // Currently, we only handle the case of B, C, D, and E being constant.
661 // We can't simply use C and E because we might actually handle
662 // (icmp ne (A & B), B) & (icmp eq (A & D), D)
663 // with B and D, having a single bit set.
664 ConstantInt
*CCst
, *ECst
;
665 if (!match(C
, m_ConstantInt(CCst
)) || !match(E
, m_ConstantInt(ECst
)))
668 CCst
= cast
<ConstantInt
>(ConstantExpr::getXor(BCst
, CCst
));
670 ECst
= cast
<ConstantInt
>(ConstantExpr::getXor(DCst
, ECst
));
672 // If there is a conflict, we should actually return a false for the
674 if (((BCst
->getValue() & DCst
->getValue()) &
675 (CCst
->getValue() ^ ECst
->getValue())).getBoolValue())
676 return ConstantInt::get(LHS
->getType(), !IsAnd
);
678 Value
*NewOr1
= Builder
.CreateOr(B
, D
);
679 Value
*NewOr2
= ConstantExpr::getOr(CCst
, ECst
);
680 Value
*NewAnd
= Builder
.CreateAnd(A
, NewOr1
);
681 return Builder
.CreateICmp(NewCC
, NewAnd
, NewOr2
);
687 /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
688 /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
689 /// If \p Inverted is true then the check is for the inverted range, e.g.
690 /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
691 Value
*InstCombinerImpl::simplifyRangeCheck(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
693 // Check the lower range comparison, e.g. x >= 0
694 // InstCombine already ensured that if there is a constant it's on the RHS.
695 ConstantInt
*RangeStart
= dyn_cast
<ConstantInt
>(Cmp0
->getOperand(1));
699 ICmpInst::Predicate Pred0
= (Inverted
? Cmp0
->getInversePredicate() :
700 Cmp0
->getPredicate());
702 // Accept x > -1 or x >= 0 (after potentially inverting the predicate).
703 if (!((Pred0
== ICmpInst::ICMP_SGT
&& RangeStart
->isMinusOne()) ||
704 (Pred0
== ICmpInst::ICMP_SGE
&& RangeStart
->isZero())))
707 ICmpInst::Predicate Pred1
= (Inverted
? Cmp1
->getInversePredicate() :
708 Cmp1
->getPredicate());
710 Value
*Input
= Cmp0
->getOperand(0);
712 if (Cmp1
->getOperand(0) == Input
) {
713 // For the upper range compare we have: icmp x, n
714 RangeEnd
= Cmp1
->getOperand(1);
715 } else if (Cmp1
->getOperand(1) == Input
) {
716 // For the upper range compare we have: icmp n, x
717 RangeEnd
= Cmp1
->getOperand(0);
718 Pred1
= ICmpInst::getSwappedPredicate(Pred1
);
723 // Check the upper range comparison, e.g. x < n
724 ICmpInst::Predicate NewPred
;
726 case ICmpInst::ICMP_SLT
: NewPred
= ICmpInst::ICMP_ULT
; break;
727 case ICmpInst::ICMP_SLE
: NewPred
= ICmpInst::ICMP_ULE
; break;
728 default: return nullptr;
731 // This simplification is only valid if the upper range is not negative.
732 KnownBits Known
= computeKnownBits(RangeEnd
, /*Depth=*/0, Cmp1
);
733 if (!Known
.isNonNegative())
737 NewPred
= ICmpInst::getInversePredicate(NewPred
);
739 return Builder
.CreateICmp(NewPred
, Input
, RangeEnd
);
743 foldAndOrOfEqualityCmpsWithConstants(ICmpInst
*LHS
, ICmpInst
*RHS
,
745 InstCombiner::BuilderTy
&Builder
) {
746 Value
*X
= LHS
->getOperand(0);
747 if (X
!= RHS
->getOperand(0))
750 const APInt
*C1
, *C2
;
751 if (!match(LHS
->getOperand(1), m_APInt(C1
)) ||
752 !match(RHS
->getOperand(1), m_APInt(C2
)))
755 // We only handle (X != C1 && X != C2) and (X == C1 || X == C2).
756 ICmpInst::Predicate Pred
= LHS
->getPredicate();
757 if (Pred
!= RHS
->getPredicate())
759 if (JoinedByAnd
&& Pred
!= ICmpInst::ICMP_NE
)
761 if (!JoinedByAnd
&& Pred
!= ICmpInst::ICMP_EQ
)
764 // The larger unsigned constant goes on the right.
768 APInt Xor
= *C1
^ *C2
;
769 if (Xor
.isPowerOf2()) {
770 // If LHSC and RHSC differ by only one bit, then set that bit in X and
771 // compare against the larger constant:
772 // (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2
773 // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
774 // We choose an 'or' with a Pow2 constant rather than the inverse mask with
775 // 'and' because that may lead to smaller codegen from a smaller constant.
776 Value
*Or
= Builder
.CreateOr(X
, ConstantInt::get(X
->getType(), Xor
));
777 return Builder
.CreateICmp(Pred
, Or
, ConstantInt::get(X
->getType(), *C2
));
780 // Special case: get the ordering right when the values wrap around zero.
781 // Ie, we assumed the constants were unsigned when swapping earlier.
782 if (C1
->isNullValue() && C2
->isAllOnesValue())
785 if (*C1
== *C2
- 1) {
786 // (X == 13 || X == 14) --> X - 13 <=u 1
787 // (X != 13 && X != 14) --> X - 13 >u 1
788 // An 'add' is the canonical IR form, so favor that over a 'sub'.
789 Value
*Add
= Builder
.CreateAdd(X
, ConstantInt::get(X
->getType(), -(*C1
)));
790 auto NewPred
= JoinedByAnd
? ICmpInst::ICMP_UGT
: ICmpInst::ICMP_ULE
;
791 return Builder
.CreateICmp(NewPred
, Add
, ConstantInt::get(X
->getType(), 1));
797 // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
798 // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
799 Value
*InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst
*LHS
,
804 CmpInst::Predicate Pred
= IsAnd
? CmpInst::ICMP_NE
: CmpInst::ICMP_EQ
;
805 if (LHS
->getPredicate() != Pred
|| RHS
->getPredicate() != Pred
)
808 if (!match(LHS
->getOperand(1), m_Zero()) ||
809 !match(RHS
->getOperand(1), m_Zero()))
812 Value
*L1
, *L2
, *R1
, *R2
;
813 if (match(LHS
->getOperand(0), m_And(m_Value(L1
), m_Value(L2
))) &&
814 match(RHS
->getOperand(0), m_And(m_Value(R1
), m_Value(R2
)))) {
815 if (L1
== R2
|| L2
== R2
)
821 isKnownToBeAPowerOfTwo(L2
, false, 0, CxtI
) &&
822 isKnownToBeAPowerOfTwo(R2
, false, 0, CxtI
)) {
823 // If this is a logical and/or, then we must prevent propagation of a
824 // poison value from the RHS by inserting freeze.
826 R2
= Builder
.CreateFreeze(R2
);
827 Value
*Mask
= Builder
.CreateOr(L2
, R2
);
828 Value
*Masked
= Builder
.CreateAnd(L1
, Mask
);
829 auto NewPred
= IsAnd
? CmpInst::ICMP_EQ
: CmpInst::ICMP_NE
;
830 return Builder
.CreateICmp(NewPred
, Masked
, Mask
);
840 /// Where Y is checking that all the high bits (covered by a mask 4294967168)
841 /// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0
842 /// Pattern can be one of:
843 /// %t = add i32 %arg, 128
844 /// %r = icmp ult i32 %t, 256
846 /// %t0 = shl i32 %arg, 24
847 /// %t1 = ashr i32 %t0, 24
848 /// %r = icmp eq i32 %t1, %arg
850 /// %t0 = trunc i32 %arg to i8
851 /// %t1 = sext i8 %t0 to i32
852 /// %r = icmp eq i32 %t1, %arg
853 /// This pattern is a signed truncation check.
855 /// And X is checking that some bit in that same mask is zero.
856 /// I.e. can be one of:
857 /// %r = icmp sgt i32 %arg, -1
859 /// %t = and i32 %arg, 2147483648
860 /// %r = icmp eq i32 %t, 0
862 /// Since we are checking that all the bits in that mask are the same,
863 /// and a particular bit is zero, what we are really checking is that all the
864 /// masked bits are zero.
865 /// So this should be transformed to:
866 /// %r = icmp ult i32 %arg, 128
867 static Value
*foldSignedTruncationCheck(ICmpInst
*ICmp0
, ICmpInst
*ICmp1
,
869 InstCombiner::BuilderTy
&Builder
) {
870 assert(CxtI
.getOpcode() == Instruction::And
);
872 // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two)
873 auto tryToMatchSignedTruncationCheck
= [](ICmpInst
*ICmp
, Value
*&X
,
874 APInt
&SignBitMask
) -> bool {
875 CmpInst::Predicate Pred
;
876 const APInt
*I01
, *I1
; // powers of two; I1 == I01 << 1
878 m_ICmp(Pred
, m_Add(m_Value(X
), m_Power2(I01
)), m_Power2(I1
))) &&
879 Pred
== ICmpInst::ICMP_ULT
&& I1
->ugt(*I01
) && I01
->shl(1) == *I1
))
881 // Which bit is the new sign bit as per the 'signed truncation' pattern?
886 // One icmp needs to be 'signed truncation check'.
887 // We need to match this first, else we will mismatch commutative cases.
891 if (tryToMatchSignedTruncationCheck(ICmp1
, X1
, HighestBit
))
893 else if (tryToMatchSignedTruncationCheck(ICmp0
, X1
, HighestBit
))
898 assert(HighestBit
.isPowerOf2() && "expected to be power of two (non-zero)");
900 // Try to match/decompose into: icmp eq (X & Mask), 0
901 auto tryToDecompose
= [](ICmpInst
*ICmp
, Value
*&X
,
902 APInt
&UnsetBitsMask
) -> bool {
903 CmpInst::Predicate Pred
= ICmp
->getPredicate();
904 // Can it be decomposed into icmp eq (X & Mask), 0 ?
905 if (llvm::decomposeBitTestICmp(ICmp
->getOperand(0), ICmp
->getOperand(1),
906 Pred
, X
, UnsetBitsMask
,
907 /*LookThroughTrunc=*/false) &&
908 Pred
== ICmpInst::ICMP_EQ
)
910 // Is it icmp eq (X & Mask), 0 already?
912 if (match(ICmp
, m_ICmp(Pred
, m_And(m_Value(X
), m_APInt(Mask
)), m_Zero())) &&
913 Pred
== ICmpInst::ICMP_EQ
) {
914 UnsetBitsMask
= *Mask
;
920 // And the other icmp needs to be decomposable into a bit test.
923 if (!tryToDecompose(OtherICmp
, X0
, UnsetBitsMask
))
926 assert(!UnsetBitsMask
.isNullValue() && "empty mask makes no sense.");
928 // Are they working on the same value?
933 } else if (match(X0
, m_Trunc(m_Specific(X1
)))) {
934 UnsetBitsMask
= UnsetBitsMask
.zext(X1
->getType()->getScalarSizeInBits());
939 // So which bits should be uniform as per the 'signed truncation check'?
940 // (all the bits starting with (i.e. including) HighestBit)
941 APInt SignBitsMask
= ~(HighestBit
- 1U);
943 // UnsetBitsMask must have some common bits with SignBitsMask,
944 if (!UnsetBitsMask
.intersects(SignBitsMask
))
947 // Does UnsetBitsMask contain any bits outside of SignBitsMask?
948 if (!UnsetBitsMask
.isSubsetOf(SignBitsMask
)) {
949 APInt OtherHighestBit
= (~UnsetBitsMask
) + 1U;
950 if (!OtherHighestBit
.isPowerOf2())
952 HighestBit
= APIntOps::umin(HighestBit
, OtherHighestBit
);
954 // Else, if it does not, then all is ok as-is.
956 // %r = icmp ult %X, SignBit
957 return Builder
.CreateICmpULT(X
, ConstantInt::get(X
->getType(), HighestBit
),
958 CxtI
.getName() + ".simplified");
961 /// Reduce a pair of compares that check if a value has exactly 1 bit set.
962 static Value
*foldIsPowerOf2(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
, bool JoinedByAnd
,
963 InstCombiner::BuilderTy
&Builder
) {
964 // Handle 'and' / 'or' commutation: make the equality check the first operand.
965 if (JoinedByAnd
&& Cmp1
->getPredicate() == ICmpInst::ICMP_NE
)
966 std::swap(Cmp0
, Cmp1
);
967 else if (!JoinedByAnd
&& Cmp1
->getPredicate() == ICmpInst::ICMP_EQ
)
968 std::swap(Cmp0
, Cmp1
);
970 // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1
971 CmpInst::Predicate Pred0
, Pred1
;
973 if (JoinedByAnd
&& match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_ZeroInt())) &&
974 match(Cmp1
, m_ICmp(Pred1
, m_Intrinsic
<Intrinsic::ctpop
>(m_Specific(X
)),
975 m_SpecificInt(2))) &&
976 Pred0
== ICmpInst::ICMP_NE
&& Pred1
== ICmpInst::ICMP_ULT
) {
977 Value
*CtPop
= Cmp1
->getOperand(0);
978 return Builder
.CreateICmpEQ(CtPop
, ConstantInt::get(CtPop
->getType(), 1));
980 // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
981 if (!JoinedByAnd
&& match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_ZeroInt())) &&
982 match(Cmp1
, m_ICmp(Pred1
, m_Intrinsic
<Intrinsic::ctpop
>(m_Specific(X
)),
983 m_SpecificInt(1))) &&
984 Pred0
== ICmpInst::ICMP_EQ
&& Pred1
== ICmpInst::ICMP_UGT
) {
985 Value
*CtPop
= Cmp1
->getOperand(0);
986 return Builder
.CreateICmpNE(CtPop
, ConstantInt::get(CtPop
->getType(), 1));
991 /// Commuted variants are assumed to be handled by calling this function again
992 /// with the parameters swapped.
993 static Value
*foldUnsignedUnderflowCheck(ICmpInst
*ZeroICmp
,
994 ICmpInst
*UnsignedICmp
, bool IsAnd
,
995 const SimplifyQuery
&Q
,
996 InstCombiner::BuilderTy
&Builder
) {
998 ICmpInst::Predicate EqPred
;
999 if (!match(ZeroICmp
, m_ICmp(EqPred
, m_Value(ZeroCmpOp
), m_Zero())) ||
1000 !ICmpInst::isEquality(EqPred
))
1003 auto IsKnownNonZero
= [&](Value
*V
) {
1004 return isKnownNonZero(V
, Q
.DL
, /*Depth=*/0, Q
.AC
, Q
.CxtI
, Q
.DT
);
1007 ICmpInst::Predicate UnsignedPred
;
1010 if (match(UnsignedICmp
,
1011 m_c_ICmp(UnsignedPred
, m_Specific(ZeroCmpOp
), m_Value(A
))) &&
1012 match(ZeroCmpOp
, m_c_Add(m_Specific(A
), m_Value(B
))) &&
1013 (ZeroICmp
->hasOneUse() || UnsignedICmp
->hasOneUse())) {
1014 auto GetKnownNonZeroAndOther
= [&](Value
*&NonZero
, Value
*&Other
) {
1015 if (!IsKnownNonZero(NonZero
))
1016 std::swap(NonZero
, Other
);
1017 return IsKnownNonZero(NonZero
);
1020 // Given ZeroCmpOp = (A + B)
1021 // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A
1022 // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A
1024 // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff
1025 // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff
1026 // with X being the value (A/B) that is known to be non-zero,
1027 // and Y being remaining value.
1028 if (UnsignedPred
== ICmpInst::ICMP_ULE
&& EqPred
== ICmpInst::ICMP_NE
&&
1030 return Builder
.CreateICmpULT(Builder
.CreateNeg(B
), A
);
1031 if (UnsignedPred
== ICmpInst::ICMP_ULT
&& EqPred
== ICmpInst::ICMP_NE
&&
1032 IsAnd
&& GetKnownNonZeroAndOther(B
, A
))
1033 return Builder
.CreateICmpULT(Builder
.CreateNeg(B
), A
);
1034 if (UnsignedPred
== ICmpInst::ICMP_UGT
&& EqPred
== ICmpInst::ICMP_EQ
&&
1036 return Builder
.CreateICmpUGE(Builder
.CreateNeg(B
), A
);
1037 if (UnsignedPred
== ICmpInst::ICMP_UGE
&& EqPred
== ICmpInst::ICMP_EQ
&&
1038 !IsAnd
&& GetKnownNonZeroAndOther(B
, A
))
1039 return Builder
.CreateICmpUGE(Builder
.CreateNeg(B
), A
);
1042 Value
*Base
, *Offset
;
1043 if (!match(ZeroCmpOp
, m_Sub(m_Value(Base
), m_Value(Offset
))))
1046 if (!match(UnsignedICmp
,
1047 m_c_ICmp(UnsignedPred
, m_Specific(Base
), m_Specific(Offset
))) ||
1048 !ICmpInst::isUnsigned(UnsignedPred
))
1051 // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset
1052 // (no overflow and not null)
1053 if ((UnsignedPred
== ICmpInst::ICMP_UGE
||
1054 UnsignedPred
== ICmpInst::ICMP_UGT
) &&
1055 EqPred
== ICmpInst::ICMP_NE
&& IsAnd
)
1056 return Builder
.CreateICmpUGT(Base
, Offset
);
1058 // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset
1059 // (overflow or null)
1060 if ((UnsignedPred
== ICmpInst::ICMP_ULE
||
1061 UnsignedPred
== ICmpInst::ICMP_ULT
) &&
1062 EqPred
== ICmpInst::ICMP_EQ
&& !IsAnd
)
1063 return Builder
.CreateICmpULE(Base
, Offset
);
1065 // Base <= Offset && (Base - Offset) != 0 --> Base < Offset
1066 if (UnsignedPred
== ICmpInst::ICMP_ULE
&& EqPred
== ICmpInst::ICMP_NE
&&
1068 return Builder
.CreateICmpULT(Base
, Offset
);
1070 // Base > Offset || (Base - Offset) == 0 --> Base >= Offset
1071 if (UnsignedPred
== ICmpInst::ICMP_UGT
&& EqPred
== ICmpInst::ICMP_EQ
&&
1073 return Builder
.CreateICmpUGE(Base
, Offset
);
1084 /// Match an extraction of bits from an integer.
1085 static Optional
<IntPart
> matchIntPart(Value
*V
) {
1087 if (!match(V
, m_OneUse(m_Trunc(m_Value(X
)))))
1090 unsigned NumOriginalBits
= X
->getType()->getScalarSizeInBits();
1091 unsigned NumExtractedBits
= V
->getType()->getScalarSizeInBits();
1094 // For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits
1095 // from Y, not any shifted-in zeroes.
1096 if (match(X
, m_OneUse(m_LShr(m_Value(Y
), m_APInt(Shift
)))) &&
1097 Shift
->ule(NumOriginalBits
- NumExtractedBits
))
1098 return {{Y
, (unsigned)Shift
->getZExtValue(), NumExtractedBits
}};
1099 return {{X
, 0, NumExtractedBits
}};
1102 /// Materialize an extraction of bits from an integer in IR.
1103 static Value
*extractIntPart(const IntPart
&P
, IRBuilderBase
&Builder
) {
1106 V
= Builder
.CreateLShr(V
, P
.StartBit
);
1107 Type
*TruncTy
= V
->getType()->getWithNewBitWidth(P
.NumBits
);
1108 if (TruncTy
!= V
->getType())
1109 V
= Builder
.CreateTrunc(V
, TruncTy
);
1113 /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
1114 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
1115 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
1116 static Value
*foldEqOfParts(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
, bool IsAnd
,
1117 InstCombiner::BuilderTy
&Builder
) {
1118 if (!Cmp0
->hasOneUse() || !Cmp1
->hasOneUse())
1121 CmpInst::Predicate Pred
= IsAnd
? CmpInst::ICMP_EQ
: CmpInst::ICMP_NE
;
1122 if (Cmp0
->getPredicate() != Pred
|| Cmp1
->getPredicate() != Pred
)
1125 Optional
<IntPart
> L0
= matchIntPart(Cmp0
->getOperand(0));
1126 Optional
<IntPart
> R0
= matchIntPart(Cmp0
->getOperand(1));
1127 Optional
<IntPart
> L1
= matchIntPart(Cmp1
->getOperand(0));
1128 Optional
<IntPart
> R1
= matchIntPart(Cmp1
->getOperand(1));
1129 if (!L0
|| !R0
|| !L1
|| !R1
)
1132 // Make sure the LHS/RHS compare a part of the same value, possibly after
1134 if (L0
->From
!= L1
->From
|| R0
->From
!= R1
->From
) {
1135 if (L0
->From
!= R1
->From
|| R0
->From
!= L1
->From
)
1140 // Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being
1141 // the low part and L1/R1 being the high part.
1142 if (L0
->StartBit
+ L0
->NumBits
!= L1
->StartBit
||
1143 R0
->StartBit
+ R0
->NumBits
!= R1
->StartBit
) {
1144 if (L1
->StartBit
+ L1
->NumBits
!= L0
->StartBit
||
1145 R1
->StartBit
+ R1
->NumBits
!= R0
->StartBit
)
1151 // We can simplify to a comparison of these larger parts of the integers.
1152 IntPart L
= {L0
->From
, L0
->StartBit
, L0
->NumBits
+ L1
->NumBits
};
1153 IntPart R
= {R0
->From
, R0
->StartBit
, R0
->NumBits
+ R1
->NumBits
};
1154 Value
*LValue
= extractIntPart(L
, Builder
);
1155 Value
*RValue
= extractIntPart(R
, Builder
);
1156 return Builder
.CreateICmp(Pred
, LValue
, RValue
);
1159 /// Reduce logic-of-compares with equality to a constant by substituting a
1160 /// common operand with the constant. Callers are expected to call this with
1161 /// Cmp0/Cmp1 switched to handle logic op commutativity.
1162 static Value
*foldAndOrOfICmpsWithConstEq(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
1163 BinaryOperator
&Logic
,
1164 InstCombiner::BuilderTy
&Builder
,
1165 const SimplifyQuery
&Q
) {
1166 bool IsAnd
= Logic
.getOpcode() == Instruction::And
;
1167 assert((IsAnd
|| Logic
.getOpcode() == Instruction::Or
) && "Wrong logic op");
1169 // Match an equality compare with a non-poison constant as Cmp0.
1170 // Also, give up if the compare can be constant-folded to avoid looping.
1171 ICmpInst::Predicate Pred0
;
1174 if (!match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_Constant(C
))) ||
1175 !isGuaranteedNotToBeUndefOrPoison(C
) || isa
<Constant
>(X
))
1177 if ((IsAnd
&& Pred0
!= ICmpInst::ICMP_EQ
) ||
1178 (!IsAnd
&& Pred0
!= ICmpInst::ICMP_NE
))
1181 // The other compare must include a common operand (X). Canonicalize the
1182 // common operand as operand 1 (Pred1 is swapped if the common operand was
1185 ICmpInst::Predicate Pred1
;
1186 if (!match(Cmp1
, m_c_ICmp(Pred1
, m_Value(Y
), m_Deferred(X
))))
1189 // Replace variable with constant value equivalence to remove a variable use:
1190 // (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C)
1191 // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C)
1192 // Can think of the 'or' substitution with the 'and' bool equivalent:
1193 // A || B --> A || (!A && B)
1194 Value
*SubstituteCmp
= SimplifyICmpInst(Pred1
, Y
, C
, Q
);
1195 if (!SubstituteCmp
) {
1196 // If we need to create a new instruction, require that the old compare can
1198 if (!Cmp1
->hasOneUse())
1200 SubstituteCmp
= Builder
.CreateICmp(Pred1
, Y
, C
);
1202 return Builder
.CreateBinOp(Logic
.getOpcode(), Cmp0
, SubstituteCmp
);
1205 /// Fold (icmp)&(icmp) if possible.
1206 Value
*InstCombinerImpl::foldAndOfICmps(ICmpInst
*LHS
, ICmpInst
*RHS
,
1207 BinaryOperator
&And
) {
1208 const SimplifyQuery Q
= SQ
.getWithInstruction(&And
);
1210 // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
1211 // if K1 and K2 are a one-bit mask.
1212 if (Value
*V
= foldAndOrOfICmpsOfAndWithPow2(LHS
, RHS
, &And
,
1216 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
1218 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
1219 if (predicatesFoldable(PredL
, PredR
)) {
1220 if (LHS
->getOperand(0) == RHS
->getOperand(1) &&
1221 LHS
->getOperand(1) == RHS
->getOperand(0))
1222 LHS
->swapOperands();
1223 if (LHS
->getOperand(0) == RHS
->getOperand(0) &&
1224 LHS
->getOperand(1) == RHS
->getOperand(1)) {
1225 Value
*Op0
= LHS
->getOperand(0), *Op1
= LHS
->getOperand(1);
1226 unsigned Code
= getICmpCode(LHS
) & getICmpCode(RHS
);
1227 bool IsSigned
= LHS
->isSigned() || RHS
->isSigned();
1228 return getNewICmpValue(Code
, IsSigned
, Op0
, Op1
, Builder
);
1232 // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
1233 if (Value
*V
= foldLogOpOfMaskedICmps(LHS
, RHS
, true, Builder
))
1236 if (Value
*V
= foldAndOrOfICmpsWithConstEq(LHS
, RHS
, And
, Builder
, Q
))
1238 if (Value
*V
= foldAndOrOfICmpsWithConstEq(RHS
, LHS
, And
, Builder
, Q
))
1241 // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
1242 if (Value
*V
= simplifyRangeCheck(LHS
, RHS
, /*Inverted=*/false))
1245 // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
1246 if (Value
*V
= simplifyRangeCheck(RHS
, LHS
, /*Inverted=*/false))
1249 if (Value
*V
= foldAndOrOfEqualityCmpsWithConstants(LHS
, RHS
, true, Builder
))
1252 if (Value
*V
= foldSignedTruncationCheck(LHS
, RHS
, And
, Builder
))
1255 if (Value
*V
= foldIsPowerOf2(LHS
, RHS
, true /* JoinedByAnd */, Builder
))
1259 foldUnsignedUnderflowCheck(LHS
, RHS
, /*IsAnd=*/true, Q
, Builder
))
1262 foldUnsignedUnderflowCheck(RHS
, LHS
, /*IsAnd=*/true, Q
, Builder
))
1265 if (Value
*X
= foldEqOfParts(LHS
, RHS
, /*IsAnd=*/true, Builder
))
1268 // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
1269 Value
*LHS0
= LHS
->getOperand(0), *RHS0
= RHS
->getOperand(0);
1271 ConstantInt
*LHSC
, *RHSC
;
1272 if (!match(LHS
->getOperand(1), m_ConstantInt(LHSC
)) ||
1273 !match(RHS
->getOperand(1), m_ConstantInt(RHSC
)))
1276 if (LHSC
== RHSC
&& PredL
== PredR
) {
1277 // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
1278 // where C is a power of 2 or
1279 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
1280 if ((PredL
== ICmpInst::ICMP_ULT
&& LHSC
->getValue().isPowerOf2()) ||
1281 (PredL
== ICmpInst::ICMP_EQ
&& LHSC
->isZero())) {
1282 Value
*NewOr
= Builder
.CreateOr(LHS0
, RHS0
);
1283 return Builder
.CreateICmp(PredL
, NewOr
, LHSC
);
1287 // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
1288 // where CMAX is the all ones value for the truncated type,
1289 // iff the lower bits of C2 and CA are zero.
1290 if (PredL
== ICmpInst::ICMP_EQ
&& PredL
== PredR
&& LHS
->hasOneUse() &&
1293 ConstantInt
*AndC
, *SmallC
= nullptr, *BigC
= nullptr;
1295 // (trunc x) == C1 & (and x, CA) == C2
1296 // (and x, CA) == C2 & (trunc x) == C1
1297 if (match(RHS0
, m_Trunc(m_Value(V
))) &&
1298 match(LHS0
, m_And(m_Specific(V
), m_ConstantInt(AndC
)))) {
1301 } else if (match(LHS0
, m_Trunc(m_Value(V
))) &&
1302 match(RHS0
, m_And(m_Specific(V
), m_ConstantInt(AndC
)))) {
1307 if (SmallC
&& BigC
) {
1308 unsigned BigBitSize
= BigC
->getType()->getBitWidth();
1309 unsigned SmallBitSize
= SmallC
->getType()->getBitWidth();
1311 // Check that the low bits are zero.
1312 APInt Low
= APInt::getLowBitsSet(BigBitSize
, SmallBitSize
);
1313 if ((Low
& AndC
->getValue()).isNullValue() &&
1314 (Low
& BigC
->getValue()).isNullValue()) {
1315 Value
*NewAnd
= Builder
.CreateAnd(V
, Low
| AndC
->getValue());
1316 APInt N
= SmallC
->getValue().zext(BigBitSize
) | BigC
->getValue();
1317 Value
*NewVal
= ConstantInt::get(AndC
->getType()->getContext(), N
);
1318 return Builder
.CreateICmp(PredL
, NewAnd
, NewVal
);
1323 // From here on, we only handle:
1324 // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
1328 // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
1329 if (PredL
== ICmpInst::ICMP_UGE
|| PredL
== ICmpInst::ICMP_ULE
||
1330 PredR
== ICmpInst::ICMP_UGE
|| PredR
== ICmpInst::ICMP_ULE
||
1331 PredL
== ICmpInst::ICMP_SGE
|| PredL
== ICmpInst::ICMP_SLE
||
1332 PredR
== ICmpInst::ICMP_SGE
|| PredR
== ICmpInst::ICMP_SLE
)
1335 // We can't fold (ugt x, C) & (sgt x, C2).
1336 if (!predicatesFoldable(PredL
, PredR
))
1339 // Ensure that the larger constant is on the RHS.
1341 if (CmpInst::isSigned(PredL
) ||
1342 (ICmpInst::isEquality(PredL
) && CmpInst::isSigned(PredR
)))
1343 ShouldSwap
= LHSC
->getValue().sgt(RHSC
->getValue());
1345 ShouldSwap
= LHSC
->getValue().ugt(RHSC
->getValue());
1348 std::swap(LHS
, RHS
);
1349 std::swap(LHSC
, RHSC
);
1350 std::swap(PredL
, PredR
);
1353 // At this point, we know we have two icmp instructions
1354 // comparing a value against two constants and and'ing the result
1355 // together. Because of the above check, we know that we only have
1356 // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
1357 // (from the icmp folding check above), that the two constants
1358 // are not equal and that the larger constant is on the RHS
1359 assert(LHSC
!= RHSC
&& "Compares not folded above?");
1363 llvm_unreachable("Unknown integer condition code!");
1364 case ICmpInst::ICMP_NE
:
1367 llvm_unreachable("Unknown integer condition code!");
1368 case ICmpInst::ICMP_ULT
:
1369 // (X != 13 & X u< 14) -> X < 13
1370 if (LHSC
->getValue() == (RHSC
->getValue() - 1))
1371 return Builder
.CreateICmpULT(LHS0
, LHSC
);
1372 if (LHSC
->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1
1373 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(),
1375 break; // (X != 13 & X u< 15) -> no change
1376 case ICmpInst::ICMP_SLT
:
1377 // (X != 13 & X s< 14) -> X < 13
1378 if (LHSC
->getValue() == (RHSC
->getValue() - 1))
1379 return Builder
.CreateICmpSLT(LHS0
, LHSC
);
1380 // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1))
1381 if (LHSC
->isMinValue(true))
1382 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(),
1384 break; // (X != 13 & X s< 15) -> no change
1385 case ICmpInst::ICMP_NE
:
1386 // Potential folds for this case should already be handled.
1390 case ICmpInst::ICMP_UGT
:
1393 llvm_unreachable("Unknown integer condition code!");
1394 case ICmpInst::ICMP_NE
:
1395 // (X u> 13 & X != 14) -> X u> 14
1396 if (RHSC
->getValue() == (LHSC
->getValue() + 1))
1397 return Builder
.CreateICmp(PredL
, LHS0
, RHSC
);
1398 // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1)
1399 if (RHSC
->isMaxValue(false))
1400 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(),
1402 break; // (X u> 13 & X != 15) -> no change
1403 case ICmpInst::ICMP_ULT
: // (X u> 13 & X u< 15) -> (X-14) u< 1
1404 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(),
1408 case ICmpInst::ICMP_SGT
:
1411 llvm_unreachable("Unknown integer condition code!");
1412 case ICmpInst::ICMP_NE
:
1413 // (X s> 13 & X != 14) -> X s> 14
1414 if (RHSC
->getValue() == (LHSC
->getValue() + 1))
1415 return Builder
.CreateICmp(PredL
, LHS0
, RHSC
);
1416 // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1)
1417 if (RHSC
->isMaxValue(true))
1418 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(),
1420 break; // (X s> 13 & X != 15) -> no change
1421 case ICmpInst::ICMP_SLT
: // (X s> 13 & X s< 15) -> (X-14) u< 1
1422 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue(), true,
1431 Value
*InstCombinerImpl::foldLogicOfFCmps(FCmpInst
*LHS
, FCmpInst
*RHS
,
1433 Value
*LHS0
= LHS
->getOperand(0), *LHS1
= LHS
->getOperand(1);
1434 Value
*RHS0
= RHS
->getOperand(0), *RHS1
= RHS
->getOperand(1);
1435 FCmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
1437 if (LHS0
== RHS1
&& RHS0
== LHS1
) {
1438 // Swap RHS operands to match LHS.
1439 PredR
= FCmpInst::getSwappedPredicate(PredR
);
1440 std::swap(RHS0
, RHS1
);
1443 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
1444 // Suppose the relation between x and y is R, where R is one of
1445 // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for
1446 // testing the desired relations.
1448 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
1449 // bool(R & CC0) && bool(R & CC1)
1450 // = bool((R & CC0) & (R & CC1))
1451 // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency
1453 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
1454 // bool(R & CC0) || bool(R & CC1)
1455 // = bool((R & CC0) | (R & CC1))
1456 // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
1457 if (LHS0
== RHS0
&& LHS1
== RHS1
) {
1458 unsigned FCmpCodeL
= getFCmpCode(PredL
);
1459 unsigned FCmpCodeR
= getFCmpCode(PredR
);
1460 unsigned NewPred
= IsAnd
? FCmpCodeL
& FCmpCodeR
: FCmpCodeL
| FCmpCodeR
;
1461 return getFCmpValue(NewPred
, LHS0
, LHS1
, Builder
);
1464 if ((PredL
== FCmpInst::FCMP_ORD
&& PredR
== FCmpInst::FCMP_ORD
&& IsAnd
) ||
1465 (PredL
== FCmpInst::FCMP_UNO
&& PredR
== FCmpInst::FCMP_UNO
&& !IsAnd
)) {
1466 if (LHS0
->getType() != RHS0
->getType())
1469 // FCmp canonicalization ensures that (fcmp ord/uno X, X) and
1470 // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).
1471 if (match(LHS1
, m_PosZeroFP()) && match(RHS1
, m_PosZeroFP()))
1472 // Ignore the constants because they are obviously not NANs:
1473 // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)
1474 // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)
1475 return Builder
.CreateFCmp(PredL
, LHS0
, RHS0
);
1481 /// This a limited reassociation for a special case (see above) where we are
1482 /// checking if two values are either both NAN (unordered) or not-NAN (ordered).
1483 /// This could be handled more generally in '-reassociation', but it seems like
1484 /// an unlikely pattern for a large number of logic ops and fcmps.
1485 static Instruction
*reassociateFCmps(BinaryOperator
&BO
,
1486 InstCombiner::BuilderTy
&Builder
) {
1487 Instruction::BinaryOps Opcode
= BO
.getOpcode();
1488 assert((Opcode
== Instruction::And
|| Opcode
== Instruction::Or
) &&
1489 "Expecting and/or op for fcmp transform");
1491 // There are 4 commuted variants of the pattern. Canonicalize operands of this
1492 // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
1493 Value
*Op0
= BO
.getOperand(0), *Op1
= BO
.getOperand(1), *X
;
1494 FCmpInst::Predicate Pred
;
1495 if (match(Op1
, m_FCmp(Pred
, m_Value(), m_AnyZeroFP())))
1496 std::swap(Op0
, Op1
);
1498 // Match inner binop and the predicate for combining 2 NAN checks into 1.
1499 BinaryOperator
*BO1
;
1500 FCmpInst::Predicate NanPred
= Opcode
== Instruction::And
? FCmpInst::FCMP_ORD
1501 : FCmpInst::FCMP_UNO
;
1502 if (!match(Op0
, m_FCmp(Pred
, m_Value(X
), m_AnyZeroFP())) || Pred
!= NanPred
||
1503 !match(Op1
, m_BinOp(BO1
)) || BO1
->getOpcode() != Opcode
)
1506 // The inner logic op must have a matching fcmp operand.
1507 Value
*BO10
= BO1
->getOperand(0), *BO11
= BO1
->getOperand(1), *Y
;
1508 if (!match(BO10
, m_FCmp(Pred
, m_Value(Y
), m_AnyZeroFP())) ||
1509 Pred
!= NanPred
|| X
->getType() != Y
->getType())
1510 std::swap(BO10
, BO11
);
1512 if (!match(BO10
, m_FCmp(Pred
, m_Value(Y
), m_AnyZeroFP())) ||
1513 Pred
!= NanPred
|| X
->getType() != Y
->getType())
1516 // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
1517 // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
1518 Value
*NewFCmp
= Builder
.CreateFCmp(Pred
, X
, Y
);
1519 if (auto *NewFCmpInst
= dyn_cast
<FCmpInst
>(NewFCmp
)) {
1520 // Intersect FMF from the 2 source fcmps.
1521 NewFCmpInst
->copyIRFlags(Op0
);
1522 NewFCmpInst
->andIRFlags(BO10
);
1524 return BinaryOperator::Create(Opcode
, NewFCmp
, BO11
);
1527 /// Match De Morgan's Laws:
1528 /// (~A & ~B) == (~(A | B))
1529 /// (~A | ~B) == (~(A & B))
1530 static Instruction
*matchDeMorgansLaws(BinaryOperator
&I
,
1531 InstCombiner::BuilderTy
&Builder
) {
1532 auto Opcode
= I
.getOpcode();
1533 assert((Opcode
== Instruction::And
|| Opcode
== Instruction::Or
) &&
1534 "Trying to match De Morgan's Laws with something other than and/or");
1536 // Flip the logic operation.
1537 Opcode
= (Opcode
== Instruction::And
) ? Instruction::Or
: Instruction::And
;
1540 if (match(I
.getOperand(0), m_OneUse(m_Not(m_Value(A
)))) &&
1541 match(I
.getOperand(1), m_OneUse(m_Not(m_Value(B
)))) &&
1542 !InstCombiner::isFreeToInvert(A
, A
->hasOneUse()) &&
1543 !InstCombiner::isFreeToInvert(B
, B
->hasOneUse())) {
1544 Value
*AndOr
= Builder
.CreateBinOp(Opcode
, A
, B
, I
.getName() + ".demorgan");
1545 return BinaryOperator::CreateNot(AndOr
);
1551 bool InstCombinerImpl::shouldOptimizeCast(CastInst
*CI
) {
1552 Value
*CastSrc
= CI
->getOperand(0);
1554 // Noop casts and casts of constants should be eliminated trivially.
1555 if (CI
->getSrcTy() == CI
->getDestTy() || isa
<Constant
>(CastSrc
))
1558 // If this cast is paired with another cast that can be eliminated, we prefer
1559 // to have it eliminated.
1560 if (const auto *PrecedingCI
= dyn_cast
<CastInst
>(CastSrc
))
1561 if (isEliminableCastPair(PrecedingCI
, CI
))
1567 /// Fold {and,or,xor} (cast X), C.
1568 static Instruction
*foldLogicCastConstant(BinaryOperator
&Logic
, CastInst
*Cast
,
1569 InstCombiner::BuilderTy
&Builder
) {
1570 Constant
*C
= dyn_cast
<Constant
>(Logic
.getOperand(1));
1574 auto LogicOpc
= Logic
.getOpcode();
1575 Type
*DestTy
= Logic
.getType();
1576 Type
*SrcTy
= Cast
->getSrcTy();
1578 // Move the logic operation ahead of a zext or sext if the constant is
1579 // unchanged in the smaller source type. Performing the logic in a smaller
1580 // type may provide more information to later folds, and the smaller logic
1581 // instruction may be cheaper (particularly in the case of vectors).
1583 if (match(Cast
, m_OneUse(m_ZExt(m_Value(X
))))) {
1584 Constant
*TruncC
= ConstantExpr::getTrunc(C
, SrcTy
);
1585 Constant
*ZextTruncC
= ConstantExpr::getZExt(TruncC
, DestTy
);
1586 if (ZextTruncC
== C
) {
1587 // LogicOpc (zext X), C --> zext (LogicOpc X, C)
1588 Value
*NewOp
= Builder
.CreateBinOp(LogicOpc
, X
, TruncC
);
1589 return new ZExtInst(NewOp
, DestTy
);
1593 if (match(Cast
, m_OneUse(m_SExt(m_Value(X
))))) {
1594 Constant
*TruncC
= ConstantExpr::getTrunc(C
, SrcTy
);
1595 Constant
*SextTruncC
= ConstantExpr::getSExt(TruncC
, DestTy
);
1596 if (SextTruncC
== C
) {
1597 // LogicOpc (sext X), C --> sext (LogicOpc X, C)
1598 Value
*NewOp
= Builder
.CreateBinOp(LogicOpc
, X
, TruncC
);
1599 return new SExtInst(NewOp
, DestTy
);
1606 /// Fold {and,or,xor} (cast X), Y.
1607 Instruction
*InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator
&I
) {
1608 auto LogicOpc
= I
.getOpcode();
1609 assert(I
.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
1611 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1612 CastInst
*Cast0
= dyn_cast
<CastInst
>(Op0
);
1616 // This must be a cast from an integer or integer vector source type to allow
1617 // transformation of the logic operation to the source type.
1618 Type
*DestTy
= I
.getType();
1619 Type
*SrcTy
= Cast0
->getSrcTy();
1620 if (!SrcTy
->isIntOrIntVectorTy())
1623 if (Instruction
*Ret
= foldLogicCastConstant(I
, Cast0
, Builder
))
1626 CastInst
*Cast1
= dyn_cast
<CastInst
>(Op1
);
1630 // Both operands of the logic operation are casts. The casts must be of the
1631 // same type for reduction.
1632 auto CastOpcode
= Cast0
->getOpcode();
1633 if (CastOpcode
!= Cast1
->getOpcode() || SrcTy
!= Cast1
->getSrcTy())
1636 Value
*Cast0Src
= Cast0
->getOperand(0);
1637 Value
*Cast1Src
= Cast1
->getOperand(0);
1639 // fold logic(cast(A), cast(B)) -> cast(logic(A, B))
1640 if (shouldOptimizeCast(Cast0
) && shouldOptimizeCast(Cast1
)) {
1641 Value
*NewOp
= Builder
.CreateBinOp(LogicOpc
, Cast0Src
, Cast1Src
,
1643 return CastInst::Create(CastOpcode
, NewOp
, DestTy
);
1646 // For now, only 'and'/'or' have optimizations after this.
1647 if (LogicOpc
== Instruction::Xor
)
1650 // If this is logic(cast(icmp), cast(icmp)), try to fold this even if the
1651 // cast is otherwise not optimizable. This happens for vector sexts.
1652 ICmpInst
*ICmp0
= dyn_cast
<ICmpInst
>(Cast0Src
);
1653 ICmpInst
*ICmp1
= dyn_cast
<ICmpInst
>(Cast1Src
);
1654 if (ICmp0
&& ICmp1
) {
1655 Value
*Res
= LogicOpc
== Instruction::And
? foldAndOfICmps(ICmp0
, ICmp1
, I
)
1656 : foldOrOfICmps(ICmp0
, ICmp1
, I
);
1658 return CastInst::Create(CastOpcode
, Res
, DestTy
);
1662 // If this is logic(cast(fcmp), cast(fcmp)), try to fold this even if the
1663 // cast is otherwise not optimizable. This happens for vector sexts.
1664 FCmpInst
*FCmp0
= dyn_cast
<FCmpInst
>(Cast0Src
);
1665 FCmpInst
*FCmp1
= dyn_cast
<FCmpInst
>(Cast1Src
);
1667 if (Value
*R
= foldLogicOfFCmps(FCmp0
, FCmp1
, LogicOpc
== Instruction::And
))
1668 return CastInst::Create(CastOpcode
, R
, DestTy
);
1673 static Instruction
*foldAndToXor(BinaryOperator
&I
,
1674 InstCombiner::BuilderTy
&Builder
) {
1675 assert(I
.getOpcode() == Instruction::And
);
1676 Value
*Op0
= I
.getOperand(0);
1677 Value
*Op1
= I
.getOperand(1);
1680 // Operand complexity canonicalization guarantees that the 'or' is Op0.
1681 // (A | B) & ~(A & B) --> A ^ B
1682 // (A | B) & ~(B & A) --> A ^ B
1683 if (match(&I
, m_BinOp(m_Or(m_Value(A
), m_Value(B
)),
1684 m_Not(m_c_And(m_Deferred(A
), m_Deferred(B
))))))
1685 return BinaryOperator::CreateXor(A
, B
);
1687 // (A | ~B) & (~A | B) --> ~(A ^ B)
1688 // (A | ~B) & (B | ~A) --> ~(A ^ B)
1689 // (~B | A) & (~A | B) --> ~(A ^ B)
1690 // (~B | A) & (B | ~A) --> ~(A ^ B)
1691 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1692 if (match(&I
, m_BinOp(m_c_Or(m_Value(A
), m_Not(m_Value(B
))),
1693 m_c_Or(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
1694 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
1699 static Instruction
*foldOrToXor(BinaryOperator
&I
,
1700 InstCombiner::BuilderTy
&Builder
) {
1701 assert(I
.getOpcode() == Instruction::Or
);
1702 Value
*Op0
= I
.getOperand(0);
1703 Value
*Op1
= I
.getOperand(1);
1706 // Operand complexity canonicalization guarantees that the 'and' is Op0.
1707 // (A & B) | ~(A | B) --> ~(A ^ B)
1708 // (A & B) | ~(B | A) --> ~(A ^ B)
1709 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1710 if (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
1711 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))))
1712 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
1714 // Operand complexity canonicalization guarantees that the 'xor' is Op0.
1715 // (A ^ B) | ~(A | B) --> ~(A & B)
1716 // (A ^ B) | ~(B | A) --> ~(A & B)
1717 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1718 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
1719 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))))
1720 return BinaryOperator::CreateNot(Builder
.CreateAnd(A
, B
));
1722 // (A & ~B) | (~A & B) --> A ^ B
1723 // (A & ~B) | (B & ~A) --> A ^ B
1724 // (~B & A) | (~A & B) --> A ^ B
1725 // (~B & A) | (B & ~A) --> A ^ B
1726 if (match(Op0
, m_c_And(m_Value(A
), m_Not(m_Value(B
)))) &&
1727 match(Op1
, m_c_And(m_Not(m_Specific(A
)), m_Specific(B
))))
1728 return BinaryOperator::CreateXor(A
, B
);
1733 /// Return true if a constant shift amount is always less than the specified
1734 /// bit-width. If not, the shift could create poison in the narrower type.
1735 static bool canNarrowShiftAmt(Constant
*C
, unsigned BitWidth
) {
1736 APInt
Threshold(C
->getType()->getScalarSizeInBits(), BitWidth
);
1737 return match(C
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
));
1740 /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
1741 /// a common zext operand: and (binop (zext X), C), (zext X).
1742 Instruction
*InstCombinerImpl::narrowMaskedBinOp(BinaryOperator
&And
) {
1743 // This transform could also apply to {or, and, xor}, but there are better
1744 // folds for those cases, so we don't expect those patterns here. AShr is not
1745 // handled because it should always be transformed to LShr in this sequence.
1746 // The subtract transform is different because it has a constant on the left.
1747 // Add/mul commute the constant to RHS; sub with constant RHS becomes add.
1748 Value
*Op0
= And
.getOperand(0), *Op1
= And
.getOperand(1);
1750 if (!match(Op0
, m_OneUse(m_Add(m_Specific(Op1
), m_Constant(C
)))) &&
1751 !match(Op0
, m_OneUse(m_Mul(m_Specific(Op1
), m_Constant(C
)))) &&
1752 !match(Op0
, m_OneUse(m_LShr(m_Specific(Op1
), m_Constant(C
)))) &&
1753 !match(Op0
, m_OneUse(m_Shl(m_Specific(Op1
), m_Constant(C
)))) &&
1754 !match(Op0
, m_OneUse(m_Sub(m_Constant(C
), m_Specific(Op1
)))))
1758 if (!match(Op1
, m_ZExt(m_Value(X
))) || Op1
->hasNUsesOrMore(3))
1761 Type
*Ty
= And
.getType();
1762 if (!isa
<VectorType
>(Ty
) && !shouldChangeType(Ty
, X
->getType()))
1765 // If we're narrowing a shift, the shift amount must be safe (less than the
1766 // width) in the narrower type. If the shift amount is greater, instsimplify
1767 // usually handles that case, but we can't guarantee/assert it.
1768 Instruction::BinaryOps Opc
= cast
<BinaryOperator
>(Op0
)->getOpcode();
1769 if (Opc
== Instruction::LShr
|| Opc
== Instruction::Shl
)
1770 if (!canNarrowShiftAmt(C
, X
->getType()->getScalarSizeInBits()))
1773 // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)
1774 // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)
1775 Value
*NewC
= ConstantExpr::getTrunc(C
, X
->getType());
1776 Value
*NewBO
= Opc
== Instruction::Sub
? Builder
.CreateBinOp(Opc
, NewC
, X
)
1777 : Builder
.CreateBinOp(Opc
, X
, NewC
);
1778 return new ZExtInst(Builder
.CreateAnd(NewBO
, X
), Ty
);
1781 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
1782 // here. We should standardize that construct where it is needed or choose some
1783 // other way to ensure that commutated variants of patterns are not missed.
1784 Instruction
*InstCombinerImpl::visitAnd(BinaryOperator
&I
) {
1785 Type
*Ty
= I
.getType();
1787 if (Value
*V
= SimplifyAndInst(I
.getOperand(0), I
.getOperand(1),
1788 SQ
.getWithInstruction(&I
)))
1789 return replaceInstUsesWith(I
, V
);
1791 if (SimplifyAssociativeOrCommutative(I
))
1794 if (Instruction
*X
= foldVectorBinop(I
))
1797 // See if we can simplify any instructions used by the instruction whose sole
1798 // purpose is to compute bits we don't care about.
1799 if (SimplifyDemandedInstructionBits(I
))
1802 // Do this before using distributive laws to catch simple and/or/not patterns.
1803 if (Instruction
*Xor
= foldAndToXor(I
, Builder
))
1806 // (A|B)&(A|C) -> A|(B&C) etc
1807 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
1808 return replaceInstUsesWith(I
, V
);
1810 if (Value
*V
= SimplifyBSwap(I
, Builder
))
1811 return replaceInstUsesWith(I
, V
);
1813 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1816 if (match(Op0
, m_OneUse(m_LogicalShift(m_One(), m_Value(X
)))) &&
1817 match(Op1
, m_One())) {
1818 // (1 << X) & 1 --> zext(X == 0)
1819 // (1 >> X) & 1 --> zext(X == 0)
1820 Value
*IsZero
= Builder
.CreateICmpEQ(X
, ConstantInt::get(Ty
, 0));
1821 return new ZExtInst(IsZero
, Ty
);
1825 if (match(Op1
, m_APInt(C
))) {
1827 if (match(Op0
, m_OneUse(m_Xor(m_Value(X
), m_APInt(XorC
))))) {
1828 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
1829 Constant
*NewC
= ConstantInt::get(Ty
, *C
& *XorC
);
1830 Value
*And
= Builder
.CreateAnd(X
, Op1
);
1832 return BinaryOperator::CreateXor(And
, NewC
);
1836 if (match(Op0
, m_OneUse(m_Or(m_Value(X
), m_APInt(OrC
))))) {
1837 // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2)
1838 // NOTE: This reduces the number of bits set in the & mask, which
1839 // can expose opportunities for store narrowing for scalars.
1840 // NOTE: SimplifyDemandedBits should have already removed bits from C1
1841 // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in
1842 // above, but this feels safer.
1843 APInt Together
= *C
& *OrC
;
1844 Value
*And
= Builder
.CreateAnd(X
, ConstantInt::get(Ty
, Together
^ *C
));
1846 return BinaryOperator::CreateOr(And
, ConstantInt::get(Ty
, Together
));
1849 // If the mask is only needed on one incoming arm, push the 'and' op up.
1850 if (match(Op0
, m_OneUse(m_Xor(m_Value(X
), m_Value(Y
)))) ||
1851 match(Op0
, m_OneUse(m_Or(m_Value(X
), m_Value(Y
))))) {
1852 APInt
NotAndMask(~(*C
));
1853 BinaryOperator::BinaryOps BinOp
= cast
<BinaryOperator
>(Op0
)->getOpcode();
1854 if (MaskedValueIsZero(X
, NotAndMask
, 0, &I
)) {
1855 // Not masking anything out for the LHS, move mask to RHS.
1856 // and ({x}or X, Y), C --> {x}or X, (and Y, C)
1857 Value
*NewRHS
= Builder
.CreateAnd(Y
, Op1
, Y
->getName() + ".masked");
1858 return BinaryOperator::Create(BinOp
, X
, NewRHS
);
1860 if (!isa
<Constant
>(Y
) && MaskedValueIsZero(Y
, NotAndMask
, 0, &I
)) {
1861 // Not masking anything out for the RHS, move mask to LHS.
1862 // and ({x}or X, Y), C --> {x}or (and X, C), Y
1863 Value
*NewLHS
= Builder
.CreateAnd(X
, Op1
, X
->getName() + ".masked");
1864 return BinaryOperator::Create(BinOp
, NewLHS
, Y
);
1868 unsigned Width
= Ty
->getScalarSizeInBits();
1869 const APInt
*ShiftC
;
1870 if (match(Op0
, m_OneUse(m_SExt(m_AShr(m_Value(X
), m_APInt(ShiftC
)))))) {
1871 if (*C
== APInt::getLowBitsSet(Width
, Width
- ShiftC
->getZExtValue())) {
1872 // We are clearing high bits that were potentially set by sext+ashr:
1873 // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC
1874 Value
*Sext
= Builder
.CreateSExt(X
, Ty
);
1875 Constant
*ShAmtC
= ConstantInt::get(Ty
, ShiftC
->zext(Width
));
1876 return BinaryOperator::CreateLShr(Sext
, ShAmtC
);
1881 if (match(Op0
, m_Add(m_Value(X
), m_APInt(AddC
)))) {
1882 // If we add zeros to every bit below a mask, the add has no effect:
1883 // (X + AddC) & LowMaskC --> X & LowMaskC
1884 unsigned Ctlz
= C
->countLeadingZeros();
1885 APInt
LowMask(APInt::getLowBitsSet(Width
, Width
- Ctlz
));
1886 if ((*AddC
& LowMask
).isNullValue())
1887 return BinaryOperator::CreateAnd(X
, Op1
);
1889 // If we are masking the result of the add down to exactly one bit and
1890 // the constant we are adding has no bits set below that bit, then the
1891 // add is flipping a single bit. Example:
1892 // (X + 4) & 4 --> (X & 4) ^ 4
1893 if (Op0
->hasOneUse() && C
->isPowerOf2() && (*AddC
& (*C
- 1)) == 0) {
1894 assert((*C
& *AddC
) != 0 && "Expected common bit");
1895 Value
*NewAnd
= Builder
.CreateAnd(X
, Op1
);
1896 return BinaryOperator::CreateXor(NewAnd
, Op1
);
1901 ConstantInt
*AndRHS
;
1902 if (match(Op1
, m_ConstantInt(AndRHS
))) {
1903 const APInt
&AndRHSMask
= AndRHS
->getValue();
1905 // Optimize a variety of ((val OP C1) & C2) combinations...
1906 if (BinaryOperator
*Op0I
= dyn_cast
<BinaryOperator
>(Op0
)) {
1907 // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
1908 // of X and OP behaves well when given trunc(C1) and X.
1909 // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt.
1910 switch (Op0I
->getOpcode()) {
1913 case Instruction::Xor
:
1914 case Instruction::Or
:
1915 case Instruction::Mul
:
1916 case Instruction::Add
:
1917 case Instruction::Sub
:
1920 // TODO: The one use restrictions could be relaxed a little if the AND
1921 // is going to be removed.
1922 if (match(Op0I
, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X
))),
1923 m_ConstantInt(C1
))))) {
1924 if (AndRHSMask
.isIntN(X
->getType()->getScalarSizeInBits())) {
1925 auto *TruncC1
= ConstantExpr::getTrunc(C1
, X
->getType());
1927 Value
*Op0LHS
= Op0I
->getOperand(0);
1928 if (isa
<ZExtInst
>(Op0LHS
))
1929 BinOp
= Builder
.CreateBinOp(Op0I
->getOpcode(), X
, TruncC1
);
1931 BinOp
= Builder
.CreateBinOp(Op0I
->getOpcode(), TruncC1
, X
);
1932 auto *TruncC2
= ConstantExpr::getTrunc(AndRHS
, X
->getType());
1933 auto *And
= Builder
.CreateAnd(BinOp
, TruncC2
);
1934 return new ZExtInst(And
, Ty
);
1941 if (match(&I
, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X
)), m_Value(Y
))),
1943 match(Y
, m_SpecificInt_ICMP(
1944 ICmpInst::Predicate::ICMP_EQ
,
1945 APInt(Ty
->getScalarSizeInBits(),
1946 Ty
->getScalarSizeInBits() -
1947 X
->getType()->getScalarSizeInBits())))) {
1948 auto *SExt
= Builder
.CreateSExt(X
, Ty
, X
->getName() + ".signext");
1949 auto *SanitizedSignMask
= cast
<Constant
>(Op1
);
1950 // We must be careful with the undef elements of the sign bit mask, however:
1951 // the mask elt can be undef iff the shift amount for that lane was undef,
1952 // otherwise we need to sanitize undef masks to zero.
1953 SanitizedSignMask
= Constant::replaceUndefsWith(
1954 SanitizedSignMask
, ConstantInt::getNullValue(Ty
->getScalarType()));
1956 Constant::mergeUndefsWith(SanitizedSignMask
, cast
<Constant
>(Y
));
1957 return BinaryOperator::CreateAnd(SExt
, SanitizedSignMask
);
1960 if (Instruction
*Z
= narrowMaskedBinOp(I
))
1963 if (I
.getType()->isIntOrIntVectorTy(1)) {
1964 if (auto *SI0
= dyn_cast
<SelectInst
>(Op0
)) {
1966 foldAndOrOfSelectUsingImpliedCond(Op1
, *SI0
, /* IsAnd */ true))
1969 if (auto *SI1
= dyn_cast
<SelectInst
>(Op1
)) {
1971 foldAndOrOfSelectUsingImpliedCond(Op0
, *SI1
, /* IsAnd */ true))
1976 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
1979 if (Instruction
*DeMorgan
= matchDeMorgansLaws(I
, Builder
))
1984 // A & (A ^ B) --> A & ~B
1985 if (match(Op1
, m_OneUse(m_c_Xor(m_Specific(Op0
), m_Value(B
)))))
1986 return BinaryOperator::CreateAnd(Op0
, Builder
.CreateNot(B
));
1987 // (A ^ B) & A --> A & ~B
1988 if (match(Op0
, m_OneUse(m_c_Xor(m_Specific(Op1
), m_Value(B
)))))
1989 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(B
));
1991 // A & ~(A ^ B) --> A & B
1992 if (match(Op1
, m_Not(m_c_Xor(m_Specific(Op0
), m_Value(B
)))))
1993 return BinaryOperator::CreateAnd(Op0
, B
);
1994 // ~(A ^ B) & A --> A & B
1995 if (match(Op0
, m_Not(m_c_Xor(m_Specific(Op1
), m_Value(B
)))))
1996 return BinaryOperator::CreateAnd(Op1
, B
);
1998 // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
1999 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))))
2000 if (match(Op1
, m_Xor(m_Xor(m_Specific(B
), m_Value(C
)), m_Specific(A
))))
2001 if (Op1
->hasOneUse() || isFreeToInvert(C
, C
->hasOneUse()))
2002 return BinaryOperator::CreateAnd(Op0
, Builder
.CreateNot(C
));
2004 // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
2005 if (match(Op0
, m_Xor(m_Xor(m_Value(A
), m_Value(C
)), m_Value(B
))))
2006 if (match(Op1
, m_Xor(m_Specific(B
), m_Specific(A
))))
2007 if (Op0
->hasOneUse() || isFreeToInvert(C
, C
->hasOneUse()))
2008 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(C
));
2010 // (A | B) & ((~A) ^ B) -> (A & B)
2011 // (A | B) & (B ^ (~A)) -> (A & B)
2012 // (B | A) & ((~A) ^ B) -> (A & B)
2013 // (B | A) & (B ^ (~A)) -> (A & B)
2014 if (match(Op1
, m_c_Xor(m_Not(m_Value(A
)), m_Value(B
))) &&
2015 match(Op0
, m_c_Or(m_Specific(A
), m_Specific(B
))))
2016 return BinaryOperator::CreateAnd(A
, B
);
2018 // ((~A) ^ B) & (A | B) -> (A & B)
2019 // ((~A) ^ B) & (B | A) -> (A & B)
2020 // (B ^ (~A)) & (A | B) -> (A & B)
2021 // (B ^ (~A)) & (B | A) -> (A & B)
2022 if (match(Op0
, m_c_Xor(m_Not(m_Value(A
)), m_Value(B
))) &&
2023 match(Op1
, m_c_Or(m_Specific(A
), m_Specific(B
))))
2024 return BinaryOperator::CreateAnd(A
, B
);
2028 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(Op0
);
2029 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(Op1
);
2031 if (Value
*Res
= foldAndOfICmps(LHS
, RHS
, I
))
2032 return replaceInstUsesWith(I
, Res
);
2034 // TODO: Make this recursive; it's a little tricky because an arbitrary
2035 // number of 'and' instructions might have to be created.
2036 if (LHS
&& match(Op1
, m_OneUse(m_And(m_Value(X
), m_Value(Y
))))) {
2037 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2038 if (Value
*Res
= foldAndOfICmps(LHS
, Cmp
, I
))
2039 return replaceInstUsesWith(I
, Builder
.CreateAnd(Res
, Y
));
2040 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2041 if (Value
*Res
= foldAndOfICmps(LHS
, Cmp
, I
))
2042 return replaceInstUsesWith(I
, Builder
.CreateAnd(Res
, X
));
2044 if (RHS
&& match(Op0
, m_OneUse(m_And(m_Value(X
), m_Value(Y
))))) {
2045 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2046 if (Value
*Res
= foldAndOfICmps(Cmp
, RHS
, I
))
2047 return replaceInstUsesWith(I
, Builder
.CreateAnd(Res
, Y
));
2048 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2049 if (Value
*Res
= foldAndOfICmps(Cmp
, RHS
, I
))
2050 return replaceInstUsesWith(I
, Builder
.CreateAnd(Res
, X
));
2054 if (FCmpInst
*LHS
= dyn_cast
<FCmpInst
>(I
.getOperand(0)))
2055 if (FCmpInst
*RHS
= dyn_cast
<FCmpInst
>(I
.getOperand(1)))
2056 if (Value
*Res
= foldLogicOfFCmps(LHS
, RHS
, true))
2057 return replaceInstUsesWith(I
, Res
);
2059 if (Instruction
*FoldedFCmps
= reassociateFCmps(I
, Builder
))
2062 if (Instruction
*CastedAnd
= foldCastedBitwiseLogic(I
))
2065 // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
2067 if (match(Op0
, m_OneUse(m_SExt(m_Value(A
)))) &&
2068 A
->getType()->isIntOrIntVectorTy(1))
2069 return SelectInst::Create(A
, Op1
, Constant::getNullValue(Ty
));
2070 if (match(Op1
, m_OneUse(m_SExt(m_Value(A
)))) &&
2071 A
->getType()->isIntOrIntVectorTy(1))
2072 return SelectInst::Create(A
, Op0
, Constant::getNullValue(Ty
));
2074 // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
2075 if (match(&I
, m_c_And(m_OneUse(m_AShr(
2076 m_NSWSub(m_Value(Y
), m_Value(X
)),
2077 m_SpecificInt(Ty
->getScalarSizeInBits() - 1))),
2079 Value
*NewICmpInst
= Builder
.CreateICmpSGT(X
, Y
);
2080 return SelectInst::Create(NewICmpInst
, X
, ConstantInt::getNullValue(Ty
));
2083 // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
2084 if (sinkNotIntoOtherHandOfAndOrOr(I
))
2087 // An and recurrence w/loop invariant step is equivelent to (and start, step)
2088 PHINode
*PN
= nullptr;
2089 Value
*Start
= nullptr, *Step
= nullptr;
2090 if (matchSimpleRecurrence(&I
, PN
, Start
, Step
) && DT
.dominates(Step
, PN
))
2091 return replaceInstUsesWith(I
, Builder
.CreateAnd(Start
, Step
));
2096 Instruction
*InstCombinerImpl::matchBSwapOrBitReverse(Instruction
&I
,
2098 bool MatchBitReversals
) {
2099 SmallVector
<Instruction
*, 4> Insts
;
2100 if (!recognizeBSwapOrBitReverseIdiom(&I
, MatchBSwaps
, MatchBitReversals
,
2103 Instruction
*LastInst
= Insts
.pop_back_val();
2104 LastInst
->removeFromParent();
2106 for (auto *Inst
: Insts
)
2107 Worklist
.push(Inst
);
2111 /// Match UB-safe variants of the funnel shift intrinsic.
2112 static Instruction
*matchFunnelShift(Instruction
&Or
, InstCombinerImpl
&IC
) {
2113 // TODO: Can we reduce the code duplication between this and the related
2114 // rotate matching code under visitSelect and visitTrunc?
2115 unsigned Width
= Or
.getType()->getScalarSizeInBits();
2117 // First, find an or'd pair of opposite shifts:
2118 // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2119 BinaryOperator
*Or0
, *Or1
;
2120 if (!match(Or
.getOperand(0), m_BinOp(Or0
)) ||
2121 !match(Or
.getOperand(1), m_BinOp(Or1
)))
2124 Value
*ShVal0
, *ShVal1
, *ShAmt0
, *ShAmt1
;
2125 if (!match(Or0
, m_OneUse(m_LogicalShift(m_Value(ShVal0
), m_Value(ShAmt0
)))) ||
2126 !match(Or1
, m_OneUse(m_LogicalShift(m_Value(ShVal1
), m_Value(ShAmt1
)))) ||
2127 Or0
->getOpcode() == Or1
->getOpcode())
2130 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2131 if (Or0
->getOpcode() == BinaryOperator::LShr
) {
2132 std::swap(Or0
, Or1
);
2133 std::swap(ShVal0
, ShVal1
);
2134 std::swap(ShAmt0
, ShAmt1
);
2136 assert(Or0
->getOpcode() == BinaryOperator::Shl
&&
2137 Or1
->getOpcode() == BinaryOperator::LShr
&&
2138 "Illegal or(shift,shift) pair");
2140 // Match the shift amount operands for a funnel shift pattern. This always
2141 // matches a subtraction on the R operand.
2142 auto matchShiftAmount
= [&](Value
*L
, Value
*R
, unsigned Width
) -> Value
* {
2143 // Check for constant shift amounts that sum to the bitwidth.
2144 const APInt
*LI
, *RI
;
2145 if (match(L
, m_APIntAllowUndef(LI
)) && match(R
, m_APIntAllowUndef(RI
)))
2146 if (LI
->ult(Width
) && RI
->ult(Width
) && (*LI
+ *RI
) == Width
)
2147 return ConstantInt::get(L
->getType(), *LI
);
2150 if (match(L
, m_Constant(LC
)) && match(R
, m_Constant(RC
)) &&
2151 match(L
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, APInt(Width
, Width
))) &&
2152 match(R
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, APInt(Width
, Width
))) &&
2153 match(ConstantExpr::getAdd(LC
, RC
), m_SpecificIntAllowUndef(Width
)))
2154 return ConstantExpr::mergeUndefsWith(LC
, RC
);
2156 // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2157 // We limit this to X < Width in case the backend re-expands the intrinsic,
2158 // and has to reintroduce a shift modulo operation (InstCombine might remove
2159 // it after this fold). This still doesn't guarantee that the final codegen
2160 // will match this original pattern.
2161 if (match(R
, m_OneUse(m_Sub(m_SpecificInt(Width
), m_Specific(L
))))) {
2162 KnownBits KnownL
= IC
.computeKnownBits(L
, /*Depth*/ 0, &Or
);
2163 return KnownL
.getMaxValue().ult(Width
) ? L
: nullptr;
2166 // For non-constant cases, the following patterns currently only work for
2167 // rotation patterns.
2168 // TODO: Add general funnel-shift compatible patterns.
2169 if (ShVal0
!= ShVal1
)
2172 // For non-constant cases we don't support non-pow2 shift masks.
2173 // TODO: Is it worth matching urem as well?
2174 if (!isPowerOf2_32(Width
))
2177 // The shift amount may be masked with negation:
2178 // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2180 unsigned Mask
= Width
- 1;
2181 if (match(L
, m_And(m_Value(X
), m_SpecificInt(Mask
))) &&
2182 match(R
, m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
))))
2185 // Similar to above, but the shift amount may be extended after masking,
2186 // so return the extended value as the parameter for the intrinsic.
2187 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
2188 match(R
, m_And(m_Neg(m_ZExt(m_And(m_Specific(X
), m_SpecificInt(Mask
)))),
2189 m_SpecificInt(Mask
))))
2192 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
2193 match(R
, m_ZExt(m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
)))))
2199 Value
*ShAmt
= matchShiftAmount(ShAmt0
, ShAmt1
, Width
);
2200 bool IsFshl
= true; // Sub on LSHR.
2202 ShAmt
= matchShiftAmount(ShAmt1
, ShAmt0
, Width
);
2203 IsFshl
= false; // Sub on SHL.
2208 Intrinsic::ID IID
= IsFshl
? Intrinsic::fshl
: Intrinsic::fshr
;
2209 Function
*F
= Intrinsic::getDeclaration(Or
.getModule(), IID
, Or
.getType());
2210 return CallInst::Create(F
, {ShVal0
, ShVal1
, ShAmt
});
2213 /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
2214 static Instruction
*matchOrConcat(Instruction
&Or
,
2215 InstCombiner::BuilderTy
&Builder
) {
2216 assert(Or
.getOpcode() == Instruction::Or
&& "bswap requires an 'or'");
2217 Value
*Op0
= Or
.getOperand(0), *Op1
= Or
.getOperand(1);
2218 Type
*Ty
= Or
.getType();
2220 unsigned Width
= Ty
->getScalarSizeInBits();
2221 if ((Width
& 1) != 0)
2223 unsigned HalfWidth
= Width
/ 2;
2225 // Canonicalize zext (lower half) to LHS.
2226 if (!isa
<ZExtInst
>(Op0
))
2227 std::swap(Op0
, Op1
);
2229 // Find lower/upper half.
2230 Value
*LowerSrc
, *ShlVal
, *UpperSrc
;
2232 if (!match(Op0
, m_OneUse(m_ZExt(m_Value(LowerSrc
)))) ||
2233 !match(Op1
, m_OneUse(m_Shl(m_Value(ShlVal
), m_APInt(C
)))) ||
2234 !match(ShlVal
, m_OneUse(m_ZExt(m_Value(UpperSrc
)))))
2236 if (*C
!= HalfWidth
|| LowerSrc
->getType() != UpperSrc
->getType() ||
2237 LowerSrc
->getType()->getScalarSizeInBits() != HalfWidth
)
2240 auto ConcatIntrinsicCalls
= [&](Intrinsic::ID id
, Value
*Lo
, Value
*Hi
) {
2241 Value
*NewLower
= Builder
.CreateZExt(Lo
, Ty
);
2242 Value
*NewUpper
= Builder
.CreateZExt(Hi
, Ty
);
2243 NewUpper
= Builder
.CreateShl(NewUpper
, HalfWidth
);
2244 Value
*BinOp
= Builder
.CreateOr(NewLower
, NewUpper
);
2245 Function
*F
= Intrinsic::getDeclaration(Or
.getModule(), id
, Ty
);
2246 return Builder
.CreateCall(F
, BinOp
);
2249 // BSWAP: Push the concat down, swapping the lower/upper sources.
2250 // concat(bswap(x),bswap(y)) -> bswap(concat(x,y))
2251 Value
*LowerBSwap
, *UpperBSwap
;
2252 if (match(LowerSrc
, m_BSwap(m_Value(LowerBSwap
))) &&
2253 match(UpperSrc
, m_BSwap(m_Value(UpperBSwap
))))
2254 return ConcatIntrinsicCalls(Intrinsic::bswap
, UpperBSwap
, LowerBSwap
);
2256 // BITREVERSE: Push the concat down, swapping the lower/upper sources.
2257 // concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y))
2258 Value
*LowerBRev
, *UpperBRev
;
2259 if (match(LowerSrc
, m_BitReverse(m_Value(LowerBRev
))) &&
2260 match(UpperSrc
, m_BitReverse(m_Value(UpperBRev
))))
2261 return ConcatIntrinsicCalls(Intrinsic::bitreverse
, UpperBRev
, LowerBRev
);
2266 /// If all elements of two constant vectors are 0/-1 and inverses, return true.
2267 static bool areInverseVectorBitmasks(Constant
*C1
, Constant
*C2
) {
2268 unsigned NumElts
= cast
<FixedVectorType
>(C1
->getType())->getNumElements();
2269 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2270 Constant
*EltC1
= C1
->getAggregateElement(i
);
2271 Constant
*EltC2
= C2
->getAggregateElement(i
);
2272 if (!EltC1
|| !EltC2
)
2275 // One element must be all ones, and the other must be all zeros.
2276 if (!((match(EltC1
, m_Zero()) && match(EltC2
, m_AllOnes())) ||
2277 (match(EltC2
, m_Zero()) && match(EltC1
, m_AllOnes()))))
2283 /// We have an expression of the form (A & C) | (B & D). If A is a scalar or
2284 /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
2285 /// B, it can be used as the condition operand of a select instruction.
2286 Value
*InstCombinerImpl::getSelectCondition(Value
*A
, Value
*B
) {
2287 // Step 1: We may have peeked through bitcasts in the caller.
2288 // Exit immediately if we don't have (vector) integer types.
2289 Type
*Ty
= A
->getType();
2290 if (!Ty
->isIntOrIntVectorTy() || !B
->getType()->isIntOrIntVectorTy())
2293 // Step 2: We need 0 or all-1's bitmasks.
2294 if (ComputeNumSignBits(A
) != Ty
->getScalarSizeInBits())
2297 // Step 3: If B is the 'not' value of A, we have our answer.
2298 if (match(A
, m_Not(m_Specific(B
)))) {
2299 // If these are scalars or vectors of i1, A can be used directly.
2300 if (Ty
->isIntOrIntVectorTy(1))
2302 return Builder
.CreateTrunc(A
, CmpInst::makeCmpResultType(Ty
));
2305 // If both operands are constants, see if the constants are inverse bitmasks.
2306 Constant
*AConst
, *BConst
;
2307 if (match(A
, m_Constant(AConst
)) && match(B
, m_Constant(BConst
)))
2308 if (AConst
== ConstantExpr::getNot(BConst
))
2309 return Builder
.CreateZExtOrTrunc(A
, CmpInst::makeCmpResultType(Ty
));
2311 // Look for more complex patterns. The 'not' op may be hidden behind various
2312 // casts. Look through sexts and bitcasts to find the booleans.
2315 if (match(A
, m_SExt(m_Value(Cond
))) &&
2316 Cond
->getType()->isIntOrIntVectorTy(1) &&
2317 match(B
, m_OneUse(m_Not(m_Value(NotB
))))) {
2318 NotB
= peekThroughBitcast(NotB
, true);
2319 if (match(NotB
, m_SExt(m_Specific(Cond
))))
2323 // All scalar (and most vector) possibilities should be handled now.
2324 // Try more matches that only apply to non-splat constant vectors.
2325 if (!Ty
->isVectorTy())
2328 // If both operands are xor'd with constants using the same sexted boolean
2329 // operand, see if the constants are inverse bitmasks.
2330 // TODO: Use ConstantExpr::getNot()?
2331 if (match(A
, (m_Xor(m_SExt(m_Value(Cond
)), m_Constant(AConst
)))) &&
2332 match(B
, (m_Xor(m_SExt(m_Specific(Cond
)), m_Constant(BConst
)))) &&
2333 Cond
->getType()->isIntOrIntVectorTy(1) &&
2334 areInverseVectorBitmasks(AConst
, BConst
)) {
2335 AConst
= ConstantExpr::getTrunc(AConst
, CmpInst::makeCmpResultType(Ty
));
2336 return Builder
.CreateXor(Cond
, AConst
);
2341 /// We have an expression of the form (A & C) | (B & D). Try to simplify this
2342 /// to "A' ? C : D", where A' is a boolean or vector of booleans.
2343 Value
*InstCombinerImpl::matchSelectFromAndOr(Value
*A
, Value
*C
, Value
*B
,
2345 // The potential condition of the select may be bitcasted. In that case, look
2346 // through its bitcast and the corresponding bitcast of the 'not' condition.
2347 Type
*OrigType
= A
->getType();
2348 A
= peekThroughBitcast(A
, true);
2349 B
= peekThroughBitcast(B
, true);
2350 if (Value
*Cond
= getSelectCondition(A
, B
)) {
2351 // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
2352 // The bitcasts will either all exist or all not exist. The builder will
2353 // not create unnecessary casts if the types already match.
2354 Value
*BitcastC
= Builder
.CreateBitCast(C
, A
->getType());
2355 Value
*BitcastD
= Builder
.CreateBitCast(D
, A
->getType());
2356 Value
*Select
= Builder
.CreateSelect(Cond
, BitcastC
, BitcastD
);
2357 return Builder
.CreateBitCast(Select
, OrigType
);
2363 /// Fold (icmp)|(icmp) if possible.
2364 Value
*InstCombinerImpl::foldOrOfICmps(ICmpInst
*LHS
, ICmpInst
*RHS
,
2365 BinaryOperator
&Or
) {
2366 const SimplifyQuery Q
= SQ
.getWithInstruction(&Or
);
2368 // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
2369 // if K1 and K2 are a one-bit mask.
2370 if (Value
*V
= foldAndOrOfICmpsOfAndWithPow2(LHS
, RHS
, &Or
,
2374 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
2375 Value
*LHS0
= LHS
->getOperand(0), *RHS0
= RHS
->getOperand(0);
2376 Value
*LHS1
= LHS
->getOperand(1), *RHS1
= RHS
->getOperand(1);
2377 auto *LHSC
= dyn_cast
<ConstantInt
>(LHS1
);
2378 auto *RHSC
= dyn_cast
<ConstantInt
>(RHS1
);
2380 // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
2381 // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
2382 // The original condition actually refers to the following two ranges:
2383 // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3]
2384 // We can fold these two ranges if:
2385 // 1) C1 and C2 is unsigned greater than C3.
2386 // 2) The two ranges are separated.
2387 // 3) C1 ^ C2 is one-bit mask.
2388 // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
2389 // This implies all values in the two ranges differ by exactly one bit.
2390 if ((PredL
== ICmpInst::ICMP_ULT
|| PredL
== ICmpInst::ICMP_ULE
) &&
2391 PredL
== PredR
&& LHSC
&& RHSC
&& LHS
->hasOneUse() && RHS
->hasOneUse() &&
2392 LHSC
->getType() == RHSC
->getType() &&
2393 LHSC
->getValue() == (RHSC
->getValue())) {
2396 ConstantInt
*LAddC
, *RAddC
;
2397 if (match(LHS0
, m_Add(m_Value(AddOpnd
), m_ConstantInt(LAddC
))) &&
2398 match(RHS0
, m_Add(m_Specific(AddOpnd
), m_ConstantInt(RAddC
))) &&
2399 LAddC
->getValue().ugt(LHSC
->getValue()) &&
2400 RAddC
->getValue().ugt(LHSC
->getValue())) {
2402 APInt DiffC
= LAddC
->getValue() ^ RAddC
->getValue();
2403 if (DiffC
.isPowerOf2()) {
2404 ConstantInt
*MaxAddC
= nullptr;
2405 if (LAddC
->getValue().ult(RAddC
->getValue()))
2410 APInt RRangeLow
= -RAddC
->getValue();
2411 APInt RRangeHigh
= RRangeLow
+ LHSC
->getValue();
2412 APInt LRangeLow
= -LAddC
->getValue();
2413 APInt LRangeHigh
= LRangeLow
+ LHSC
->getValue();
2414 APInt LowRangeDiff
= RRangeLow
^ LRangeLow
;
2415 APInt HighRangeDiff
= RRangeHigh
^ LRangeHigh
;
2416 APInt RangeDiff
= LRangeLow
.sgt(RRangeLow
) ? LRangeLow
- RRangeLow
2417 : RRangeLow
- LRangeLow
;
2419 if (LowRangeDiff
.isPowerOf2() && LowRangeDiff
== HighRangeDiff
&&
2420 RangeDiff
.ugt(LHSC
->getValue())) {
2421 Value
*MaskC
= ConstantInt::get(LAddC
->getType(), ~DiffC
);
2423 Value
*NewAnd
= Builder
.CreateAnd(AddOpnd
, MaskC
);
2424 Value
*NewAdd
= Builder
.CreateAdd(NewAnd
, MaxAddC
);
2425 return Builder
.CreateICmp(LHS
->getPredicate(), NewAdd
, LHSC
);
2431 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
2432 if (predicatesFoldable(PredL
, PredR
)) {
2433 if (LHS0
== RHS1
&& LHS1
== RHS0
)
2434 LHS
->swapOperands();
2435 if (LHS0
== RHS0
&& LHS1
== RHS1
) {
2436 unsigned Code
= getICmpCode(LHS
) | getICmpCode(RHS
);
2437 bool IsSigned
= LHS
->isSigned() || RHS
->isSigned();
2438 return getNewICmpValue(Code
, IsSigned
, LHS0
, LHS1
, Builder
);
2442 // handle (roughly):
2443 // (icmp ne (A & B), C) | (icmp ne (A & D), E)
2444 if (Value
*V
= foldLogOpOfMaskedICmps(LHS
, RHS
, false, Builder
))
2447 if (LHS
->hasOneUse() || RHS
->hasOneUse()) {
2448 // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
2449 // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
2450 Value
*A
= nullptr, *B
= nullptr;
2451 if (PredL
== ICmpInst::ICMP_EQ
&& match(LHS1
, m_Zero())) {
2453 if (PredR
== ICmpInst::ICMP_ULT
&& LHS0
== RHS1
)
2455 else if (PredR
== ICmpInst::ICMP_UGT
&& LHS0
== RHS0
)
2458 // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
2459 // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
2460 else if (PredR
== ICmpInst::ICMP_EQ
&& match(RHS1
, m_Zero())) {
2462 if (PredL
== ICmpInst::ICMP_ULT
&& RHS0
== LHS1
)
2464 else if (PredL
== ICmpInst::ICMP_UGT
&& RHS0
== LHS0
)
2467 if (A
&& B
&& B
->getType()->isIntOrIntVectorTy())
2468 return Builder
.CreateICmp(
2470 Builder
.CreateAdd(B
, Constant::getAllOnesValue(B
->getType())), A
);
2473 if (Value
*V
= foldAndOrOfICmpsWithConstEq(LHS
, RHS
, Or
, Builder
, Q
))
2475 if (Value
*V
= foldAndOrOfICmpsWithConstEq(RHS
, LHS
, Or
, Builder
, Q
))
2478 // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
2479 if (Value
*V
= simplifyRangeCheck(LHS
, RHS
, /*Inverted=*/true))
2482 // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
2483 if (Value
*V
= simplifyRangeCheck(RHS
, LHS
, /*Inverted=*/true))
2486 if (Value
*V
= foldAndOrOfEqualityCmpsWithConstants(LHS
, RHS
, false, Builder
))
2489 if (Value
*V
= foldIsPowerOf2(LHS
, RHS
, false /* JoinedByAnd */, Builder
))
2493 foldUnsignedUnderflowCheck(LHS
, RHS
, /*IsAnd=*/false, Q
, Builder
))
2496 foldUnsignedUnderflowCheck(RHS
, LHS
, /*IsAnd=*/false, Q
, Builder
))
2499 if (Value
*X
= foldEqOfParts(LHS
, RHS
, /*IsAnd=*/false, Builder
))
2502 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
2503 // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors.
2504 if (PredL
== ICmpInst::ICMP_NE
&& match(LHS1
, m_Zero()) &&
2505 PredR
== ICmpInst::ICMP_NE
&& match(RHS1
, m_Zero()) &&
2506 LHS0
->getType()->isIntOrIntVectorTy() &&
2507 LHS0
->getType() == RHS0
->getType()) {
2508 Value
*NewOr
= Builder
.CreateOr(LHS0
, RHS0
);
2509 return Builder
.CreateICmp(PredL
, NewOr
,
2510 Constant::getNullValue(NewOr
->getType()));
2513 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
2517 // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
2518 // iff C2 + CA == C1.
2519 if (PredL
== ICmpInst::ICMP_ULT
&& PredR
== ICmpInst::ICMP_EQ
) {
2521 if (match(LHS0
, m_Add(m_Specific(RHS0
), m_ConstantInt(AddC
))))
2522 if (RHSC
->getValue() + AddC
->getValue() == LHSC
->getValue())
2523 return Builder
.CreateICmpULE(LHS0
, LHSC
);
2526 // From here on, we only handle:
2527 // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
2531 // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
2532 if (PredL
== ICmpInst::ICMP_UGE
|| PredL
== ICmpInst::ICMP_ULE
||
2533 PredR
== ICmpInst::ICMP_UGE
|| PredR
== ICmpInst::ICMP_ULE
||
2534 PredL
== ICmpInst::ICMP_SGE
|| PredL
== ICmpInst::ICMP_SLE
||
2535 PredR
== ICmpInst::ICMP_SGE
|| PredR
== ICmpInst::ICMP_SLE
)
2538 // We can't fold (ugt x, C) | (sgt x, C2).
2539 if (!predicatesFoldable(PredL
, PredR
))
2542 // Ensure that the larger constant is on the RHS.
2544 if (CmpInst::isSigned(PredL
) ||
2545 (ICmpInst::isEquality(PredL
) && CmpInst::isSigned(PredR
)))
2546 ShouldSwap
= LHSC
->getValue().sgt(RHSC
->getValue());
2548 ShouldSwap
= LHSC
->getValue().ugt(RHSC
->getValue());
2551 std::swap(LHS
, RHS
);
2552 std::swap(LHSC
, RHSC
);
2553 std::swap(PredL
, PredR
);
2556 // At this point, we know we have two icmp instructions
2557 // comparing a value against two constants and or'ing the result
2558 // together. Because of the above check, we know that we only have
2559 // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
2560 // icmp folding check above), that the two constants are not
2562 assert(LHSC
!= RHSC
&& "Compares not folded above?");
2566 llvm_unreachable("Unknown integer condition code!");
2567 case ICmpInst::ICMP_EQ
:
2570 llvm_unreachable("Unknown integer condition code!");
2571 case ICmpInst::ICMP_EQ
:
2572 // Potential folds for this case should already be handled.
2574 case ICmpInst::ICMP_UGT
:
2575 // (X == 0 || X u> C) -> (X-1) u>= C
2576 if (LHSC
->isMinValue(false))
2577 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue() + 1,
2579 // (X == 13 | X u> 14) -> no change
2581 case ICmpInst::ICMP_SGT
:
2582 // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN
2583 if (LHSC
->isMinValue(true))
2584 return insertRangeTest(LHS0
, LHSC
->getValue() + 1, RHSC
->getValue() + 1,
2586 // (X == 13 | X s> 14) -> no change
2590 case ICmpInst::ICMP_ULT
:
2593 llvm_unreachable("Unknown integer condition code!");
2594 case ICmpInst::ICMP_EQ
: // (X u< 13 | X == 14) -> no change
2595 // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C
2596 if (RHSC
->isMaxValue(false))
2597 return insertRangeTest(LHS0
, LHSC
->getValue(), RHSC
->getValue(),
2600 case ICmpInst::ICMP_UGT
: // (X u< 13 | X u> 15) -> (X-13) u> 2
2601 assert(!RHSC
->isMaxValue(false) && "Missed icmp simplification");
2602 return insertRangeTest(LHS0
, LHSC
->getValue(), RHSC
->getValue() + 1,
2606 case ICmpInst::ICMP_SLT
:
2609 llvm_unreachable("Unknown integer condition code!");
2610 case ICmpInst::ICMP_EQ
:
2611 // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C
2612 if (RHSC
->isMaxValue(true))
2613 return insertRangeTest(LHS0
, LHSC
->getValue(), RHSC
->getValue(),
2615 // (X s< 13 | X == 14) -> no change
2617 case ICmpInst::ICMP_SGT
: // (X s< 13 | X s> 15) -> (X-13) u> 2
2618 assert(!RHSC
->isMaxValue(true) && "Missed icmp simplification");
2619 return insertRangeTest(LHS0
, LHSC
->getValue(), RHSC
->getValue() + 1, true,
2627 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
2628 // here. We should standardize that construct where it is needed or choose some
2629 // other way to ensure that commutated variants of patterns are not missed.
2630 Instruction
*InstCombinerImpl::visitOr(BinaryOperator
&I
) {
2631 if (Value
*V
= SimplifyOrInst(I
.getOperand(0), I
.getOperand(1),
2632 SQ
.getWithInstruction(&I
)))
2633 return replaceInstUsesWith(I
, V
);
2635 if (SimplifyAssociativeOrCommutative(I
))
2638 if (Instruction
*X
= foldVectorBinop(I
))
2641 // See if we can simplify any instructions used by the instruction whose sole
2642 // purpose is to compute bits we don't care about.
2643 if (SimplifyDemandedInstructionBits(I
))
2646 // Do this before using distributive laws to catch simple and/or/not patterns.
2647 if (Instruction
*Xor
= foldOrToXor(I
, Builder
))
2650 // (A&B)|(A&C) -> A&(B|C) etc
2651 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
2652 return replaceInstUsesWith(I
, V
);
2654 if (Value
*V
= SimplifyBSwap(I
, Builder
))
2655 return replaceInstUsesWith(I
, V
);
2657 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
2658 if (I
.getType()->isIntOrIntVectorTy(1)) {
2659 if (auto *SI0
= dyn_cast
<SelectInst
>(Op0
)) {
2661 foldAndOrOfSelectUsingImpliedCond(Op1
, *SI0
, /* IsAnd */ false))
2664 if (auto *SI1
= dyn_cast
<SelectInst
>(Op1
)) {
2666 foldAndOrOfSelectUsingImpliedCond(Op0
, *SI1
, /* IsAnd */ false))
2671 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
2674 if (Instruction
*BitOp
= matchBSwapOrBitReverse(I
, /*MatchBSwaps*/ true,
2675 /*MatchBitReversals*/ true))
2678 if (Instruction
*Funnel
= matchFunnelShift(I
, *this))
2681 if (Instruction
*Concat
= matchOrConcat(I
, Builder
))
2682 return replaceInstUsesWith(I
, Concat
);
2686 if (match(&I
, m_c_Or(m_OneUse(m_Xor(m_Value(X
), m_APInt(CV
))), m_Value(Y
))) &&
2687 !CV
->isAllOnesValue() && MaskedValueIsZero(Y
, *CV
, 0, &I
)) {
2688 // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
2689 // The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
2690 Value
*Or
= Builder
.CreateOr(X
, Y
);
2691 return BinaryOperator::CreateXor(Or
, ConstantInt::get(I
.getType(), *CV
));
2695 Value
*A
, *B
, *C
, *D
;
2696 if (match(Op0
, m_And(m_Value(A
), m_Value(C
))) &&
2697 match(Op1
, m_And(m_Value(B
), m_Value(D
)))) {
2698 // (A & C1)|(B & C2)
2699 ConstantInt
*C1
, *C2
;
2700 if (match(C
, m_ConstantInt(C1
)) && match(D
, m_ConstantInt(C2
))) {
2701 Value
*V1
= nullptr, *V2
= nullptr;
2702 if ((C1
->getValue() & C2
->getValue()).isNullValue()) {
2703 // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
2704 // iff (C1&C2) == 0 and (N&~C1) == 0
2705 if (match(A
, m_Or(m_Value(V1
), m_Value(V2
))) &&
2707 MaskedValueIsZero(V2
, ~C1
->getValue(), 0, &I
)) || // (V|N)
2709 MaskedValueIsZero(V1
, ~C1
->getValue(), 0, &I
)))) // (N|V)
2710 return BinaryOperator::CreateAnd(A
,
2711 Builder
.getInt(C1
->getValue()|C2
->getValue()));
2712 // Or commutes, try both ways.
2713 if (match(B
, m_Or(m_Value(V1
), m_Value(V2
))) &&
2715 MaskedValueIsZero(V2
, ~C2
->getValue(), 0, &I
)) || // (V|N)
2717 MaskedValueIsZero(V1
, ~C2
->getValue(), 0, &I
)))) // (N|V)
2718 return BinaryOperator::CreateAnd(B
,
2719 Builder
.getInt(C1
->getValue()|C2
->getValue()));
2721 // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
2722 // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
2723 ConstantInt
*C3
= nullptr, *C4
= nullptr;
2724 if (match(A
, m_Or(m_Value(V1
), m_ConstantInt(C3
))) &&
2725 (C3
->getValue() & ~C1
->getValue()).isNullValue() &&
2726 match(B
, m_Or(m_Specific(V1
), m_ConstantInt(C4
))) &&
2727 (C4
->getValue() & ~C2
->getValue()).isNullValue()) {
2728 V2
= Builder
.CreateOr(V1
, ConstantExpr::getOr(C3
, C4
), "bitfield");
2729 return BinaryOperator::CreateAnd(V2
,
2730 Builder
.getInt(C1
->getValue()|C2
->getValue()));
2734 if (C1
->getValue() == ~C2
->getValue()) {
2737 // ((X|B)&C1)|(B&C2) -> (X&C1) | B iff C1 == ~C2
2738 if (match(A
, m_c_Or(m_Value(X
), m_Specific(B
))))
2739 return BinaryOperator::CreateOr(Builder
.CreateAnd(X
, C1
), B
);
2740 // (A&C2)|((X|A)&C1) -> (X&C2) | A iff C1 == ~C2
2741 if (match(B
, m_c_Or(m_Specific(A
), m_Value(X
))))
2742 return BinaryOperator::CreateOr(Builder
.CreateAnd(X
, C2
), A
);
2744 // ((X^B)&C1)|(B&C2) -> (X&C1) ^ B iff C1 == ~C2
2745 if (match(A
, m_c_Xor(m_Value(X
), m_Specific(B
))))
2746 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, C1
), B
);
2747 // (A&C2)|((X^A)&C1) -> (X&C2) ^ A iff C1 == ~C2
2748 if (match(B
, m_c_Xor(m_Specific(A
), m_Value(X
))))
2749 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, C2
), A
);
2753 // Don't try to form a select if it's unlikely that we'll get rid of at
2754 // least one of the operands. A select is generally more expensive than the
2755 // 'or' that it is replacing.
2756 if (Op0
->hasOneUse() || Op1
->hasOneUse()) {
2757 // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
2758 if (Value
*V
= matchSelectFromAndOr(A
, C
, B
, D
))
2759 return replaceInstUsesWith(I
, V
);
2760 if (Value
*V
= matchSelectFromAndOr(A
, C
, D
, B
))
2761 return replaceInstUsesWith(I
, V
);
2762 if (Value
*V
= matchSelectFromAndOr(C
, A
, B
, D
))
2763 return replaceInstUsesWith(I
, V
);
2764 if (Value
*V
= matchSelectFromAndOr(C
, A
, D
, B
))
2765 return replaceInstUsesWith(I
, V
);
2766 if (Value
*V
= matchSelectFromAndOr(B
, D
, A
, C
))
2767 return replaceInstUsesWith(I
, V
);
2768 if (Value
*V
= matchSelectFromAndOr(B
, D
, C
, A
))
2769 return replaceInstUsesWith(I
, V
);
2770 if (Value
*V
= matchSelectFromAndOr(D
, B
, A
, C
))
2771 return replaceInstUsesWith(I
, V
);
2772 if (Value
*V
= matchSelectFromAndOr(D
, B
, C
, A
))
2773 return replaceInstUsesWith(I
, V
);
2777 // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
2778 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))))
2779 if (match(Op1
, m_Xor(m_Xor(m_Specific(B
), m_Value(C
)), m_Specific(A
))))
2780 return BinaryOperator::CreateOr(Op0
, C
);
2782 // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
2783 if (match(Op0
, m_Xor(m_Xor(m_Value(A
), m_Value(C
)), m_Value(B
))))
2784 if (match(Op1
, m_Xor(m_Specific(B
), m_Specific(A
))))
2785 return BinaryOperator::CreateOr(Op1
, C
);
2787 // ((B | C) & A) | B -> B | (A & C)
2788 if (match(Op0
, m_And(m_Or(m_Specific(Op1
), m_Value(C
)), m_Value(A
))))
2789 return BinaryOperator::CreateOr(Op1
, Builder
.CreateAnd(A
, C
));
2791 if (Instruction
*DeMorgan
= matchDeMorgansLaws(I
, Builder
))
2794 // Canonicalize xor to the RHS.
2795 bool SwappedForXor
= false;
2796 if (match(Op0
, m_Xor(m_Value(), m_Value()))) {
2797 std::swap(Op0
, Op1
);
2798 SwappedForXor
= true;
2801 // A | ( A ^ B) -> A | B
2802 // A | (~A ^ B) -> A | ~B
2803 // (A & B) | (A ^ B)
2804 if (match(Op1
, m_Xor(m_Value(A
), m_Value(B
)))) {
2805 if (Op0
== A
|| Op0
== B
)
2806 return BinaryOperator::CreateOr(A
, B
);
2808 if (match(Op0
, m_And(m_Specific(A
), m_Specific(B
))) ||
2809 match(Op0
, m_And(m_Specific(B
), m_Specific(A
))))
2810 return BinaryOperator::CreateOr(A
, B
);
2812 if (Op1
->hasOneUse() && match(A
, m_Not(m_Specific(Op0
)))) {
2813 Value
*Not
= Builder
.CreateNot(B
, B
->getName() + ".not");
2814 return BinaryOperator::CreateOr(Not
, Op0
);
2816 if (Op1
->hasOneUse() && match(B
, m_Not(m_Specific(Op0
)))) {
2817 Value
*Not
= Builder
.CreateNot(A
, A
->getName() + ".not");
2818 return BinaryOperator::CreateOr(Not
, Op0
);
2822 // A | ~(A | B) -> A | ~B
2823 // A | ~(A ^ B) -> A | ~B
2824 if (match(Op1
, m_Not(m_Value(A
))))
2825 if (BinaryOperator
*B
= dyn_cast
<BinaryOperator
>(A
))
2826 if ((Op0
== B
->getOperand(0) || Op0
== B
->getOperand(1)) &&
2827 Op1
->hasOneUse() && (B
->getOpcode() == Instruction::Or
||
2828 B
->getOpcode() == Instruction::Xor
)) {
2829 Value
*NotOp
= Op0
== B
->getOperand(0) ? B
->getOperand(1) :
2831 Value
*Not
= Builder
.CreateNot(NotOp
, NotOp
->getName() + ".not");
2832 return BinaryOperator::CreateOr(Not
, Op0
);
2836 std::swap(Op0
, Op1
);
2839 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(Op0
);
2840 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(Op1
);
2842 if (Value
*Res
= foldOrOfICmps(LHS
, RHS
, I
))
2843 return replaceInstUsesWith(I
, Res
);
2845 // TODO: Make this recursive; it's a little tricky because an arbitrary
2846 // number of 'or' instructions might have to be created.
2848 if (LHS
&& match(Op1
, m_OneUse(m_Or(m_Value(X
), m_Value(Y
))))) {
2849 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2850 if (Value
*Res
= foldOrOfICmps(LHS
, Cmp
, I
))
2851 return replaceInstUsesWith(I
, Builder
.CreateOr(Res
, Y
));
2852 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2853 if (Value
*Res
= foldOrOfICmps(LHS
, Cmp
, I
))
2854 return replaceInstUsesWith(I
, Builder
.CreateOr(Res
, X
));
2856 if (RHS
&& match(Op0
, m_OneUse(m_Or(m_Value(X
), m_Value(Y
))))) {
2857 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2858 if (Value
*Res
= foldOrOfICmps(Cmp
, RHS
, I
))
2859 return replaceInstUsesWith(I
, Builder
.CreateOr(Res
, Y
));
2860 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2861 if (Value
*Res
= foldOrOfICmps(Cmp
, RHS
, I
))
2862 return replaceInstUsesWith(I
, Builder
.CreateOr(Res
, X
));
2866 if (FCmpInst
*LHS
= dyn_cast
<FCmpInst
>(I
.getOperand(0)))
2867 if (FCmpInst
*RHS
= dyn_cast
<FCmpInst
>(I
.getOperand(1)))
2868 if (Value
*Res
= foldLogicOfFCmps(LHS
, RHS
, false))
2869 return replaceInstUsesWith(I
, Res
);
2871 if (Instruction
*FoldedFCmps
= reassociateFCmps(I
, Builder
))
2874 if (Instruction
*CastedOr
= foldCastedBitwiseLogic(I
))
2877 // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
2878 if (match(Op0
, m_OneUse(m_SExt(m_Value(A
)))) &&
2879 A
->getType()->isIntOrIntVectorTy(1))
2880 return SelectInst::Create(A
, ConstantInt::getSigned(I
.getType(), -1), Op1
);
2881 if (match(Op1
, m_OneUse(m_SExt(m_Value(A
)))) &&
2882 A
->getType()->isIntOrIntVectorTy(1))
2883 return SelectInst::Create(A
, ConstantInt::getSigned(I
.getType(), -1), Op0
);
2885 // Note: If we've gotten to the point of visiting the outer OR, then the
2886 // inner one couldn't be simplified. If it was a constant, then it won't
2887 // be simplified by a later pass either, so we try swapping the inner/outer
2888 // ORs in the hopes that we'll be able to simplify it this way.
2889 // (X|C) | V --> (X|V) | C
2891 if (Op0
->hasOneUse() && !match(Op1
, m_ConstantInt()) &&
2892 match(Op0
, m_Or(m_Value(A
), m_ConstantInt(CI
)))) {
2893 Value
*Inner
= Builder
.CreateOr(A
, Op1
);
2894 Inner
->takeName(Op0
);
2895 return BinaryOperator::CreateOr(Inner
, CI
);
2898 // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
2899 // Since this OR statement hasn't been optimized further yet, we hope
2900 // that this transformation will allow the new ORs to be optimized.
2902 Value
*X
= nullptr, *Y
= nullptr;
2903 if (Op0
->hasOneUse() && Op1
->hasOneUse() &&
2904 match(Op0
, m_Select(m_Value(X
), m_Value(A
), m_Value(B
))) &&
2905 match(Op1
, m_Select(m_Value(Y
), m_Value(C
), m_Value(D
))) && X
== Y
) {
2906 Value
*orTrue
= Builder
.CreateOr(A
, C
);
2907 Value
*orFalse
= Builder
.CreateOr(B
, D
);
2908 return SelectInst::Create(X
, orTrue
, orFalse
);
2912 // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.
2915 Type
*Ty
= I
.getType();
2916 if (match(&I
, m_c_Or(m_OneUse(m_AShr(
2917 m_NSWSub(m_Value(Y
), m_Value(X
)),
2918 m_SpecificInt(Ty
->getScalarSizeInBits() - 1))),
2920 Value
*NewICmpInst
= Builder
.CreateICmpSGT(X
, Y
);
2921 Value
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
2922 return SelectInst::Create(NewICmpInst
, AllOnes
, X
);
2926 if (Instruction
*V
=
2927 canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I
))
2930 CmpInst::Predicate Pred
;
2931 Value
*Mul
, *Ov
, *MulIsNotZero
, *UMulWithOv
;
2932 // Check if the OR weakens the overflow condition for umul.with.overflow by
2933 // treating any non-zero result as overflow. In that case, we overflow if both
2934 // umul.with.overflow operands are != 0, as in that case the result can only
2935 // be 0, iff the multiplication overflows.
2937 m_c_Or(m_CombineAnd(m_ExtractValue
<1>(m_Value(UMulWithOv
)),
2939 m_CombineAnd(m_ICmp(Pred
,
2940 m_CombineAnd(m_ExtractValue
<0>(
2941 m_Deferred(UMulWithOv
)),
2944 m_Value(MulIsNotZero
)))) &&
2945 (Ov
->hasOneUse() || (MulIsNotZero
->hasOneUse() && Mul
->hasOneUse())) &&
2946 Pred
== CmpInst::ICMP_NE
) {
2948 if (match(UMulWithOv
, m_Intrinsic
<Intrinsic::umul_with_overflow
>(
2949 m_Value(A
), m_Value(B
)))) {
2950 Value
*NotNullA
= Builder
.CreateIsNotNull(A
);
2951 Value
*NotNullB
= Builder
.CreateIsNotNull(B
);
2952 return BinaryOperator::CreateAnd(NotNullA
, NotNullB
);
2956 // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
2957 if (sinkNotIntoOtherHandOfAndOrOr(I
))
2960 // Improve "get low bit mask up to and including bit X" pattern:
2961 // (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X)
2962 if (match(&I
, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X
)), m_AllOnes()),
2963 m_Shl(m_One(), m_Deferred(X
)))) &&
2964 match(&I
, m_c_Or(m_OneUse(m_Value()), m_Value()))) {
2965 Type
*Ty
= X
->getType();
2966 Value
*Sub
= Builder
.CreateSub(
2967 ConstantInt::get(Ty
, Ty
->getScalarSizeInBits() - 1), X
);
2968 return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty
), Sub
);
2971 // An or recurrence w/loop invariant step is equivelent to (or start, step)
2972 PHINode
*PN
= nullptr;
2973 Value
*Start
= nullptr, *Step
= nullptr;
2974 if (matchSimpleRecurrence(&I
, PN
, Start
, Step
) && DT
.dominates(Step
, PN
))
2975 return replaceInstUsesWith(I
, Builder
.CreateOr(Start
, Step
));
2980 /// A ^ B can be specified using other logic ops in a variety of patterns. We
2981 /// can fold these early and efficiently by morphing an existing instruction.
2982 static Instruction
*foldXorToXor(BinaryOperator
&I
,
2983 InstCombiner::BuilderTy
&Builder
) {
2984 assert(I
.getOpcode() == Instruction::Xor
);
2985 Value
*Op0
= I
.getOperand(0);
2986 Value
*Op1
= I
.getOperand(1);
2989 // There are 4 commuted variants for each of the basic patterns.
2991 // (A & B) ^ (A | B) -> A ^ B
2992 // (A & B) ^ (B | A) -> A ^ B
2993 // (A | B) ^ (A & B) -> A ^ B
2994 // (A | B) ^ (B & A) -> A ^ B
2995 if (match(&I
, m_c_Xor(m_And(m_Value(A
), m_Value(B
)),
2996 m_c_Or(m_Deferred(A
), m_Deferred(B
)))))
2997 return BinaryOperator::CreateXor(A
, B
);
2999 // (A | ~B) ^ (~A | B) -> A ^ B
3000 // (~B | A) ^ (~A | B) -> A ^ B
3001 // (~A | B) ^ (A | ~B) -> A ^ B
3002 // (B | ~A) ^ (A | ~B) -> A ^ B
3003 if (match(&I
, m_Xor(m_c_Or(m_Value(A
), m_Not(m_Value(B
))),
3004 m_c_Or(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
3005 return BinaryOperator::CreateXor(A
, B
);
3007 // (A & ~B) ^ (~A & B) -> A ^ B
3008 // (~B & A) ^ (~A & B) -> A ^ B
3009 // (~A & B) ^ (A & ~B) -> A ^ B
3010 // (B & ~A) ^ (A & ~B) -> A ^ B
3011 if (match(&I
, m_Xor(m_c_And(m_Value(A
), m_Not(m_Value(B
))),
3012 m_c_And(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
3013 return BinaryOperator::CreateXor(A
, B
);
3015 // For the remaining cases we need to get rid of one of the operands.
3016 if (!Op0
->hasOneUse() && !Op1
->hasOneUse())
3019 // (A | B) ^ ~(A & B) -> ~(A ^ B)
3020 // (A | B) ^ ~(B & A) -> ~(A ^ B)
3021 // (A & B) ^ ~(A | B) -> ~(A ^ B)
3022 // (A & B) ^ ~(B | A) -> ~(A ^ B)
3023 // Complexity sorting ensures the not will be on the right side.
3024 if ((match(Op0
, m_Or(m_Value(A
), m_Value(B
))) &&
3025 match(Op1
, m_Not(m_c_And(m_Specific(A
), m_Specific(B
))))) ||
3026 (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
3027 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
))))))
3028 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
3033 Value
*InstCombinerImpl::foldXorOfICmps(ICmpInst
*LHS
, ICmpInst
*RHS
,
3034 BinaryOperator
&I
) {
3035 assert(I
.getOpcode() == Instruction::Xor
&& I
.getOperand(0) == LHS
&&
3036 I
.getOperand(1) == RHS
&& "Should be 'xor' with these operands");
3038 if (predicatesFoldable(LHS
->getPredicate(), RHS
->getPredicate())) {
3039 if (LHS
->getOperand(0) == RHS
->getOperand(1) &&
3040 LHS
->getOperand(1) == RHS
->getOperand(0))
3041 LHS
->swapOperands();
3042 if (LHS
->getOperand(0) == RHS
->getOperand(0) &&
3043 LHS
->getOperand(1) == RHS
->getOperand(1)) {
3044 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
3045 Value
*Op0
= LHS
->getOperand(0), *Op1
= LHS
->getOperand(1);
3046 unsigned Code
= getICmpCode(LHS
) ^ getICmpCode(RHS
);
3047 bool IsSigned
= LHS
->isSigned() || RHS
->isSigned();
3048 return getNewICmpValue(Code
, IsSigned
, Op0
, Op1
, Builder
);
3052 // TODO: This can be generalized to compares of non-signbits using
3053 // decomposeBitTestICmp(). It could be enhanced more by using (something like)
3054 // foldLogOpOfMaskedICmps().
3055 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
3056 Value
*LHS0
= LHS
->getOperand(0), *LHS1
= LHS
->getOperand(1);
3057 Value
*RHS0
= RHS
->getOperand(0), *RHS1
= RHS
->getOperand(1);
3058 if ((LHS
->hasOneUse() || RHS
->hasOneUse()) &&
3059 LHS0
->getType() == RHS0
->getType() &&
3060 LHS0
->getType()->isIntOrIntVectorTy()) {
3061 // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
3062 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
3063 if ((PredL
== CmpInst::ICMP_SGT
&& match(LHS1
, m_AllOnes()) &&
3064 PredR
== CmpInst::ICMP_SGT
&& match(RHS1
, m_AllOnes())) ||
3065 (PredL
== CmpInst::ICMP_SLT
&& match(LHS1
, m_Zero()) &&
3066 PredR
== CmpInst::ICMP_SLT
&& match(RHS1
, m_Zero()))) {
3067 Value
*Zero
= ConstantInt::getNullValue(LHS0
->getType());
3068 return Builder
.CreateICmpSLT(Builder
.CreateXor(LHS0
, RHS0
), Zero
);
3070 // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
3071 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
3072 if ((PredL
== CmpInst::ICMP_SGT
&& match(LHS1
, m_AllOnes()) &&
3073 PredR
== CmpInst::ICMP_SLT
&& match(RHS1
, m_Zero())) ||
3074 (PredL
== CmpInst::ICMP_SLT
&& match(LHS1
, m_Zero()) &&
3075 PredR
== CmpInst::ICMP_SGT
&& match(RHS1
, m_AllOnes()))) {
3076 Value
*MinusOne
= ConstantInt::getAllOnesValue(LHS0
->getType());
3077 return Builder
.CreateICmpSGT(Builder
.CreateXor(LHS0
, RHS0
), MinusOne
);
3081 // Instead of trying to imitate the folds for and/or, decompose this 'xor'
3082 // into those logic ops. That is, try to turn this into an and-of-icmps
3083 // because we have many folds for that pattern.
3085 // This is based on a truth table definition of xor:
3086 // X ^ Y --> (X | Y) & !(X & Y)
3087 if (Value
*OrICmp
= SimplifyBinOp(Instruction::Or
, LHS
, RHS
, SQ
)) {
3088 // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
3089 // TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
3090 if (Value
*AndICmp
= SimplifyBinOp(Instruction::And
, LHS
, RHS
, SQ
)) {
3091 // TODO: Independently handle cases where the 'and' side is a constant.
3092 ICmpInst
*X
= nullptr, *Y
= nullptr;
3093 if (OrICmp
== LHS
&& AndICmp
== RHS
) {
3094 // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y
3098 if (OrICmp
== RHS
&& AndICmp
== LHS
) {
3099 // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X
3103 if (X
&& Y
&& (Y
->hasOneUse() || canFreelyInvertAllUsersOf(Y
, &I
))) {
3104 // Invert the predicate of 'Y', thus inverting its output.
3105 Y
->setPredicate(Y
->getInversePredicate());
3106 // So, are there other uses of Y?
3107 if (!Y
->hasOneUse()) {
3108 // We need to adapt other uses of Y though. Get a value that matches
3109 // the original value of Y before inversion. While this increases
3110 // immediate instruction count, we have just ensured that all the
3111 // users are freely-invertible, so that 'not' *will* get folded away.
3112 BuilderTy::InsertPointGuard
Guard(Builder
);
3113 // Set insertion point to right after the Y.
3114 Builder
.SetInsertPoint(Y
->getParent(), ++(Y
->getIterator()));
3115 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
3116 // Replace all uses of Y (excluding the one in NotY!) with NotY.
3117 Worklist
.pushUsersToWorkList(*Y
);
3118 Y
->replaceUsesWithIf(NotY
,
3119 [NotY
](Use
&U
) { return U
.getUser() != NotY
; });
3122 return Builder
.CreateAnd(LHS
, RHS
);
3130 /// If we have a masked merge, in the canonical form of:
3131 /// (assuming that A only has one use.)
3133 /// ((x ^ y) & M) ^ y
3135 /// * If M is inverted:
3137 /// ((x ^ y) & ~M) ^ y
3138 /// We can canonicalize by swapping the final xor operand
3139 /// to eliminate the 'not' of the mask.
3140 /// ((x ^ y) & M) ^ x
3141 /// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops
3142 /// because that shortens the dependency chain and improves analysis:
3143 /// (x & M) | (y & ~M)
3144 static Instruction
*visitMaskedMerge(BinaryOperator
&I
,
3145 InstCombiner::BuilderTy
&Builder
) {
3148 if (!match(&I
, m_c_Xor(m_Value(B
),
3150 m_CombineAnd(m_c_Xor(m_Deferred(B
), m_Value(X
)),
3156 if (match(M
, m_Not(m_Value(NotM
)))) {
3157 // De-invert the mask and swap the value in B part.
3158 Value
*NewA
= Builder
.CreateAnd(D
, NotM
);
3159 return BinaryOperator::CreateXor(NewA
, X
);
3163 if (D
->hasOneUse() && match(M
, m_Constant(C
))) {
3164 // Propagating undef is unsafe. Clamp undef elements to -1.
3165 Type
*EltTy
= C
->getType()->getScalarType();
3166 C
= Constant::replaceUndefsWith(C
, ConstantInt::getAllOnesValue(EltTy
));
3168 Value
*LHS
= Builder
.CreateAnd(X
, C
);
3169 Value
*NotC
= Builder
.CreateNot(C
);
3170 Value
*RHS
= Builder
.CreateAnd(B
, NotC
);
3171 return BinaryOperator::CreateOr(LHS
, RHS
);
3183 static Instruction
*sinkNotIntoXor(BinaryOperator
&I
,
3184 InstCombiner::BuilderTy
&Builder
) {
3186 // FIXME: one-use check is not needed in general, but currently we are unable
3187 // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)
3188 if (!match(&I
, m_Not(m_OneUse(m_Xor(m_Value(X
), m_Value(Y
))))))
3191 // We only want to do the transform if it is free to do.
3192 if (InstCombiner::isFreeToInvert(X
, X
->hasOneUse())) {
3194 } else if (InstCombiner::isFreeToInvert(Y
, Y
->hasOneUse())) {
3199 Value
*NotX
= Builder
.CreateNot(X
, X
->getName() + ".not");
3200 return BinaryOperator::CreateXor(NotX
, Y
, I
.getName() + ".demorgan");
3203 /// Canonicalize a shifty way to code absolute value to the more common pattern
3204 /// that uses negation and select.
3205 static Instruction
*canonicalizeAbs(BinaryOperator
&Xor
,
3206 InstCombiner::BuilderTy
&Builder
) {
3207 assert(Xor
.getOpcode() == Instruction::Xor
&& "Expected an xor instruction.");
3209 // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
3210 // We're relying on the fact that we only do this transform when the shift has
3211 // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
3213 Value
*Op0
= Xor
.getOperand(0), *Op1
= Xor
.getOperand(1);
3214 if (Op0
->hasNUses(2))
3215 std::swap(Op0
, Op1
);
3217 Type
*Ty
= Xor
.getType();
3220 if (match(Op1
, m_AShr(m_Value(A
), m_APInt(ShAmt
))) &&
3221 Op1
->hasNUses(2) && *ShAmt
== Ty
->getScalarSizeInBits() - 1 &&
3222 match(Op0
, m_OneUse(m_c_Add(m_Specific(A
), m_Specific(Op1
))))) {
3223 // Op1 = ashr i32 A, 31 ; smear the sign bit
3224 // xor (add A, Op1), Op1 ; add -1 and flip bits if negative
3225 // --> (A < 0) ? -A : A
3226 Value
*Cmp
= Builder
.CreateICmpSLT(A
, ConstantInt::getNullValue(Ty
));
3227 // Copy the nuw/nsw flags from the add to the negate.
3228 auto *Add
= cast
<BinaryOperator
>(Op0
);
3229 Value
*Neg
= Builder
.CreateNeg(A
, "", Add
->hasNoUnsignedWrap(),
3230 Add
->hasNoSignedWrap());
3231 return SelectInst::Create(Cmp
, Neg
, A
);
3239 // z = ~(x |/& (~y))
3240 // iff y is free to invert and all uses of z can be freely updated.
3241 bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator
&I
) {
3242 Instruction::BinaryOps NewOpc
;
3243 switch (I
.getOpcode()) {
3244 case Instruction::And
:
3245 NewOpc
= Instruction::Or
;
3247 case Instruction::Or
:
3248 NewOpc
= Instruction::And
;
3255 if (!match(&I
, m_c_BinOp(m_Not(m_Value(X
)), m_Value(Y
))))
3258 // Will we be able to fold the `not` into Y eventually?
3259 if (!InstCombiner::isFreeToInvert(Y
, Y
->hasOneUse()))
3262 // And can our users be adapted?
3263 if (!InstCombiner::canFreelyInvertAllUsersOf(&I
, /*IgnoredUser=*/nullptr))
3266 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
3268 BinaryOperator::Create(NewOpc
, X
, NotY
, I
.getName() + ".not");
3269 Builder
.Insert(NewBinOp
);
3270 replaceInstUsesWith(I
, NewBinOp
);
3271 // We can not just create an outer `not`, it will most likely be immediately
3272 // folded back, reconstructing our initial pattern, and causing an
3273 // infinite combine loop, so immediately manually fold it away.
3274 freelyInvertAllUsersOf(NewBinOp
);
3278 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
3279 // here. We should standardize that construct where it is needed or choose some
3280 // other way to ensure that commutated variants of patterns are not missed.
3281 Instruction
*InstCombinerImpl::visitXor(BinaryOperator
&I
) {
3282 if (Value
*V
= SimplifyXorInst(I
.getOperand(0), I
.getOperand(1),
3283 SQ
.getWithInstruction(&I
)))
3284 return replaceInstUsesWith(I
, V
);
3286 if (SimplifyAssociativeOrCommutative(I
))
3289 if (Instruction
*X
= foldVectorBinop(I
))
3292 if (Instruction
*NewXor
= foldXorToXor(I
, Builder
))
3295 // (A&B)^(A&C) -> A&(B^C) etc
3296 if (Value
*V
= SimplifyUsingDistributiveLaws(I
))
3297 return replaceInstUsesWith(I
, V
);
3299 // See if we can simplify any instructions used by the instruction whose sole
3300 // purpose is to compute bits we don't care about.
3301 if (SimplifyDemandedInstructionBits(I
))
3304 if (Value
*V
= SimplifyBSwap(I
, Builder
))
3305 return replaceInstUsesWith(I
, V
);
3307 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
3308 Type
*Ty
= I
.getType();
3310 // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
3311 // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
3312 // calls in there are unnecessary as SimplifyDemandedInstructionBits should
3313 // have already taken care of those cases.
3315 if (match(&I
, m_c_Xor(m_c_And(m_Not(m_Value(M
)), m_Value()),
3316 m_c_And(m_Deferred(M
), m_Value()))))
3317 return BinaryOperator::CreateOr(Op0
, Op1
);
3319 // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
3322 // We must eliminate the and/or (one-use) for these transforms to not increase
3323 // the instruction count.
3324 // ~(~X & Y) --> (X | ~Y)
3325 // ~(Y & ~X) --> (X | ~Y)
3326 if (match(&I
, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X
)), m_Value(Y
)))))) {
3327 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
3328 return BinaryOperator::CreateOr(X
, NotY
);
3330 // ~(~X | Y) --> (X & ~Y)
3331 // ~(Y | ~X) --> (X & ~Y)
3332 if (match(&I
, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X
)), m_Value(Y
)))))) {
3333 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
3334 return BinaryOperator::CreateAnd(X
, NotY
);
3337 if (Instruction
*Xor
= visitMaskedMerge(I
, Builder
))
3340 // Is this a 'not' (~) fed by a binary operator?
3341 BinaryOperator
*NotVal
;
3342 if (match(&I
, m_Not(m_BinOp(NotVal
)))) {
3343 if (NotVal
->getOpcode() == Instruction::And
||
3344 NotVal
->getOpcode() == Instruction::Or
) {
3345 // Apply DeMorgan's Law when inverts are free:
3346 // ~(X & Y) --> (~X | ~Y)
3347 // ~(X | Y) --> (~X & ~Y)
3348 if (isFreeToInvert(NotVal
->getOperand(0),
3349 NotVal
->getOperand(0)->hasOneUse()) &&
3350 isFreeToInvert(NotVal
->getOperand(1),
3351 NotVal
->getOperand(1)->hasOneUse())) {
3352 Value
*NotX
= Builder
.CreateNot(NotVal
->getOperand(0), "notlhs");
3353 Value
*NotY
= Builder
.CreateNot(NotVal
->getOperand(1), "notrhs");
3354 if (NotVal
->getOpcode() == Instruction::And
)
3355 return BinaryOperator::CreateOr(NotX
, NotY
);
3356 return BinaryOperator::CreateAnd(NotX
, NotY
);
3360 // ~((-X) | Y) --> (X - 1) & (~Y)
3362 m_OneUse(m_c_Or(m_OneUse(m_Neg(m_Value(X
))), m_Value(Y
))))) {
3363 Value
*DecX
= Builder
.CreateAdd(X
, ConstantInt::getAllOnesValue(Ty
));
3364 Value
*NotY
= Builder
.CreateNot(Y
);
3365 return BinaryOperator::CreateAnd(DecX
, NotY
);
3368 // ~(~X >>s Y) --> (X >>s Y)
3369 if (match(NotVal
, m_AShr(m_Not(m_Value(X
)), m_Value(Y
))))
3370 return BinaryOperator::CreateAShr(X
, Y
);
3372 // If we are inverting a right-shifted constant, we may be able to eliminate
3373 // the 'not' by inverting the constant and using the opposite shift type.
3374 // Canonicalization rules ensure that only a negative constant uses 'ashr',
3375 // but we must check that in case that transform has not fired yet.
3377 // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
3379 if (match(NotVal
, m_AShr(m_Constant(C
), m_Value(Y
))) &&
3380 match(C
, m_Negative())) {
3381 // We matched a negative constant, so propagating undef is unsafe.
3382 // Clamp undef elements to -1.
3383 Type
*EltTy
= Ty
->getScalarType();
3384 C
= Constant::replaceUndefsWith(C
, ConstantInt::getAllOnesValue(EltTy
));
3385 return BinaryOperator::CreateLShr(ConstantExpr::getNot(C
), Y
);
3388 // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
3389 if (match(NotVal
, m_LShr(m_Constant(C
), m_Value(Y
))) &&
3390 match(C
, m_NonNegative())) {
3391 // We matched a non-negative constant, so propagating undef is unsafe.
3392 // Clamp undef elements to 0.
3393 Type
*EltTy
= Ty
->getScalarType();
3394 C
= Constant::replaceUndefsWith(C
, ConstantInt::getNullValue(EltTy
));
3395 return BinaryOperator::CreateAShr(ConstantExpr::getNot(C
), Y
);
3398 // ~(X + C) --> ~C - X
3399 if (match(NotVal
, m_c_Add(m_Value(X
), m_ImmConstant(C
))))
3400 return BinaryOperator::CreateSub(ConstantExpr::getNot(C
), X
);
3402 // ~(X - Y) --> ~X + Y
3403 // FIXME: is it really beneficial to sink the `not` here?
3404 if (match(NotVal
, m_Sub(m_Value(X
), m_Value(Y
))))
3405 if (isa
<Constant
>(X
) || NotVal
->hasOneUse())
3406 return BinaryOperator::CreateAdd(Builder
.CreateNot(X
), Y
);
3408 // ~(~X + Y) --> X - Y
3409 if (match(NotVal
, m_c_Add(m_Not(m_Value(X
)), m_Value(Y
))))
3410 return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub
, X
, Y
,
3414 // Use DeMorgan and reassociation to eliminate a 'not' op.
3416 if (match(Op1
, m_Constant(C1
))) {
3418 if (match(Op0
, m_OneUse(m_Or(m_Not(m_Value(X
)), m_Constant(C2
))))) {
3419 // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
3420 Value
*And
= Builder
.CreateAnd(X
, ConstantExpr::getNot(C2
));
3421 return BinaryOperator::CreateXor(And
, ConstantExpr::getNot(C1
));
3423 if (match(Op0
, m_OneUse(m_And(m_Not(m_Value(X
)), m_Constant(C2
))))) {
3424 // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
3425 Value
*Or
= Builder
.CreateOr(X
, ConstantExpr::getNot(C2
));
3426 return BinaryOperator::CreateXor(Or
, ConstantExpr::getNot(C1
));
3430 // not (cmp A, B) = !cmp A, B
3431 CmpInst::Predicate Pred
;
3432 if (match(&I
, m_Not(m_OneUse(m_Cmp(Pred
, m_Value(), m_Value()))))) {
3433 cast
<CmpInst
>(Op0
)->setPredicate(CmpInst::getInversePredicate(Pred
));
3434 return replaceInstUsesWith(I
, Op0
);
3439 if (match(Op1
, m_APInt(RHSC
))) {
3442 // (C - X) ^ signmaskC --> (C + signmaskC) - X
3443 if (RHSC
->isSignMask() && match(Op0
, m_Sub(m_APInt(C
), m_Value(X
))))
3444 return BinaryOperator::CreateSub(ConstantInt::get(Ty
, *C
+ *RHSC
), X
);
3446 // (X + C) ^ signmaskC --> X + (C + signmaskC)
3447 if (RHSC
->isSignMask() && match(Op0
, m_Add(m_Value(X
), m_APInt(C
))))
3448 return BinaryOperator::CreateAdd(X
, ConstantInt::get(Ty
, *C
+ *RHSC
));
3450 // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0
3451 if (match(Op0
, m_Or(m_Value(X
), m_APInt(C
))) &&
3452 MaskedValueIsZero(X
, *C
, 0, &I
))
3453 return BinaryOperator::CreateXor(X
, ConstantInt::get(Ty
, *C
^ *RHSC
));
3455 // If RHSC is inverting the remaining bits of shifted X,
3456 // canonicalize to a 'not' before the shift to help SCEV and codegen:
3457 // (X << C) ^ RHSC --> ~X << C
3458 if (match(Op0
, m_OneUse(m_Shl(m_Value(X
), m_APInt(C
)))) &&
3459 *RHSC
== APInt::getAllOnesValue(Ty
->getScalarSizeInBits()).shl(*C
)) {
3460 Value
*NotX
= Builder
.CreateNot(X
);
3461 return BinaryOperator::CreateShl(NotX
, ConstantInt::get(Ty
, *C
));
3463 // (X >>u C) ^ RHSC --> ~X >>u C
3464 if (match(Op0
, m_OneUse(m_LShr(m_Value(X
), m_APInt(C
)))) &&
3465 *RHSC
== APInt::getAllOnesValue(Ty
->getScalarSizeInBits()).lshr(*C
)) {
3466 Value
*NotX
= Builder
.CreateNot(X
);
3467 return BinaryOperator::CreateLShr(NotX
, ConstantInt::get(Ty
, *C
));
3469 // TODO: We could handle 'ashr' here as well. That would be matching
3470 // a 'not' op and moving it before the shift. Doing that requires
3471 // preventing the inverse fold in canShiftBinOpWithConstantRHS().
3475 // FIXME: This should not be limited to scalar (pull into APInt match above).
3478 ConstantInt
*C1
, *C2
, *C3
;
3479 // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
3480 if (match(Op1
, m_ConstantInt(C3
)) &&
3481 match(Op0
, m_LShr(m_Xor(m_Value(X
), m_ConstantInt(C1
)),
3482 m_ConstantInt(C2
))) &&
3484 // fold (C1 >> C2) ^ C3
3485 APInt FoldConst
= C1
->getValue().lshr(C2
->getValue());
3486 FoldConst
^= C3
->getValue();
3487 // Prepare the two operands.
3488 auto *Opnd0
= cast
<Instruction
>(Builder
.CreateLShr(X
, C2
));
3489 Opnd0
->takeName(cast
<Instruction
>(Op0
));
3490 Opnd0
->setDebugLoc(I
.getDebugLoc());
3491 return BinaryOperator::CreateXor(Opnd0
, ConstantInt::get(Ty
, FoldConst
));
3495 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
3498 // Y ^ (X | Y) --> X & ~Y
3499 // Y ^ (Y | X) --> X & ~Y
3500 if (match(Op1
, m_OneUse(m_c_Or(m_Value(X
), m_Specific(Op0
)))))
3501 return BinaryOperator::CreateAnd(X
, Builder
.CreateNot(Op0
));
3502 // (X | Y) ^ Y --> X & ~Y
3503 // (Y | X) ^ Y --> X & ~Y
3504 if (match(Op0
, m_OneUse(m_c_Or(m_Value(X
), m_Specific(Op1
)))))
3505 return BinaryOperator::CreateAnd(X
, Builder
.CreateNot(Op1
));
3507 // Y ^ (X & Y) --> ~X & Y
3508 // Y ^ (Y & X) --> ~X & Y
3509 if (match(Op1
, m_OneUse(m_c_And(m_Value(X
), m_Specific(Op0
)))))
3510 return BinaryOperator::CreateAnd(Op0
, Builder
.CreateNot(X
));
3511 // (X & Y) ^ Y --> ~X & Y
3512 // (Y & X) ^ Y --> ~X & Y
3513 // Canonical form is (X & C) ^ C; don't touch that.
3514 // TODO: A 'not' op is better for analysis and codegen, but demanded bits must
3515 // be fixed to prefer that (otherwise we get infinite looping).
3516 if (!match(Op1
, m_Constant()) &&
3517 match(Op0
, m_OneUse(m_c_And(m_Value(X
), m_Specific(Op1
)))))
3518 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(X
));
3521 // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.
3522 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_Value(A
), m_Value(B
))),
3523 m_OneUse(m_c_Or(m_Deferred(A
), m_Value(C
))))))
3524 return BinaryOperator::CreateXor(
3525 Builder
.CreateAnd(Builder
.CreateNot(A
), C
), B
);
3527 // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.
3528 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_Value(A
), m_Value(B
))),
3529 m_OneUse(m_c_Or(m_Deferred(B
), m_Value(C
))))))
3530 return BinaryOperator::CreateXor(
3531 Builder
.CreateAnd(Builder
.CreateNot(B
), C
), A
);
3533 // (A & B) ^ (A ^ B) -> (A | B)
3534 if (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
3535 match(Op1
, m_c_Xor(m_Specific(A
), m_Specific(B
))))
3536 return BinaryOperator::CreateOr(A
, B
);
3537 // (A ^ B) ^ (A & B) -> (A | B)
3538 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
3539 match(Op1
, m_c_And(m_Specific(A
), m_Specific(B
))))
3540 return BinaryOperator::CreateOr(A
, B
);
3542 // (A & ~B) ^ ~A -> ~(A & B)
3543 // (~B & A) ^ ~A -> ~(A & B)
3544 if (match(Op0
, m_c_And(m_Value(A
), m_Not(m_Value(B
)))) &&
3545 match(Op1
, m_Not(m_Specific(A
))))
3546 return BinaryOperator::CreateNot(Builder
.CreateAnd(A
, B
));
3548 // (~A & B) ^ A --> A | B -- There are 4 commuted variants.
3549 if (match(&I
, m_c_Xor(m_c_And(m_Not(m_Value(A
)), m_Value(B
)), m_Deferred(A
))))
3550 return BinaryOperator::CreateOr(A
, B
);
3552 // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.
3553 // TODO: Loosen one-use restriction if common operand is a constant.
3555 if (match(Op0
, m_OneUse(m_Or(m_Value(A
), m_Value(B
)))) &&
3556 match(Op1
, m_OneUse(m_Or(m_Value(C
), m_Value(D
))))) {
3557 if (B
== C
|| B
== D
)
3562 Value
*NotA
= Builder
.CreateNot(A
);
3563 return BinaryOperator::CreateAnd(Builder
.CreateXor(B
, C
), NotA
);
3567 if (auto *LHS
= dyn_cast
<ICmpInst
>(I
.getOperand(0)))
3568 if (auto *RHS
= dyn_cast
<ICmpInst
>(I
.getOperand(1)))
3569 if (Value
*V
= foldXorOfICmps(LHS
, RHS
, I
))
3570 return replaceInstUsesWith(I
, V
);
3572 if (Instruction
*CastedXor
= foldCastedBitwiseLogic(I
))
3575 // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
3576 // ~min(~X, ~Y) --> max(X, Y)
3577 // ~max(~X, Y) --> min(X, ~Y)
3578 auto *II
= dyn_cast
<IntrinsicInst
>(Op0
);
3579 if (II
&& II
->hasOneUse() && match(Op1
, m_AllOnes())) {
3580 if (match(Op0
, m_MaxOrMin(m_Value(X
), m_Value(Y
))) &&
3581 isFreeToInvert(X
, X
->hasOneUse()) &&
3582 isFreeToInvert(Y
, Y
->hasOneUse())) {
3583 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(II
->getIntrinsicID());
3584 Value
*NotX
= Builder
.CreateNot(X
);
3585 Value
*NotY
= Builder
.CreateNot(Y
);
3586 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, NotX
, NotY
);
3587 return replaceInstUsesWith(I
, InvMaxMin
);
3589 if (match(Op0
, m_c_MaxOrMin(m_Not(m_Value(X
)), m_Value(Y
)))) {
3590 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(II
->getIntrinsicID());
3591 Value
*NotY
= Builder
.CreateNot(Y
);
3592 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, NotY
);
3593 return replaceInstUsesWith(I
, InvMaxMin
);
3597 // TODO: Remove folds if we canonicalize to intrinsics (see above).
3598 // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
3600 // %notx = xor i32 %x, -1
3601 // %cmp1 = icmp sgt i32 %notx, %y
3602 // %smax = select i1 %cmp1, i32 %notx, i32 %y
3603 // %res = xor i32 %smax, -1
3605 // %noty = xor i32 %y, -1
3606 // %cmp2 = icmp slt %x, %noty
3607 // %res = select i1 %cmp2, i32 %x, i32 %noty
3609 // Same is applicable for smin/umax/umin.
3610 if (match(Op1
, m_AllOnes()) && Op0
->hasOneUse()) {
3612 SelectPatternFlavor SPF
= matchSelectPattern(Op0
, LHS
, RHS
).Flavor
;
3613 if (SelectPatternResult::isMinOrMax(SPF
)) {
3614 // It's possible we get here before the not has been simplified, so make
3615 // sure the input to the not isn't freely invertible.
3616 if (match(LHS
, m_Not(m_Value(X
))) && !isFreeToInvert(X
, X
->hasOneUse())) {
3617 Value
*NotY
= Builder
.CreateNot(RHS
);
3618 return SelectInst::Create(
3619 Builder
.CreateICmp(getInverseMinMaxPred(SPF
), X
, NotY
), X
, NotY
);
3622 // It's possible we get here before the not has been simplified, so make
3623 // sure the input to the not isn't freely invertible.
3624 if (match(RHS
, m_Not(m_Value(Y
))) && !isFreeToInvert(Y
, Y
->hasOneUse())) {
3625 Value
*NotX
= Builder
.CreateNot(LHS
);
3626 return SelectInst::Create(
3627 Builder
.CreateICmp(getInverseMinMaxPred(SPF
), NotX
, Y
), NotX
, Y
);
3630 // If both sides are freely invertible, then we can get rid of the xor
3632 if (isFreeToInvert(LHS
, !LHS
->hasNUsesOrMore(3)) &&
3633 isFreeToInvert(RHS
, !RHS
->hasNUsesOrMore(3))) {
3634 Value
*NotLHS
= Builder
.CreateNot(LHS
);
3635 Value
*NotRHS
= Builder
.CreateNot(RHS
);
3636 return SelectInst::Create(
3637 Builder
.CreateICmp(getInverseMinMaxPred(SPF
), NotLHS
, NotRHS
),
3642 // Pull 'not' into operands of select if both operands are one-use compares
3643 // or one is one-use compare and the other one is a constant.
3644 // Inverting the predicates eliminates the 'not' operation.
3646 // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
3647 // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
3648 // not (select ?, (cmp TPred, ?, ?), true -->
3649 // select ?, (cmp InvTPred, ?, ?), false
3650 if (auto *Sel
= dyn_cast
<SelectInst
>(Op0
)) {
3651 Value
*TV
= Sel
->getTrueValue();
3652 Value
*FV
= Sel
->getFalseValue();
3653 auto *CmpT
= dyn_cast
<CmpInst
>(TV
);
3654 auto *CmpF
= dyn_cast
<CmpInst
>(FV
);
3655 bool InvertibleT
= (CmpT
&& CmpT
->hasOneUse()) || isa
<Constant
>(TV
);
3656 bool InvertibleF
= (CmpF
&& CmpF
->hasOneUse()) || isa
<Constant
>(FV
);
3657 if (InvertibleT
&& InvertibleF
) {
3659 CmpT
->setPredicate(CmpT
->getInversePredicate());
3661 Sel
->setTrueValue(ConstantExpr::getNot(cast
<Constant
>(TV
)));
3663 CmpF
->setPredicate(CmpF
->getInversePredicate());
3665 Sel
->setFalseValue(ConstantExpr::getNot(cast
<Constant
>(FV
)));
3666 return replaceInstUsesWith(I
, Sel
);
3671 if (Instruction
*NewXor
= sinkNotIntoXor(I
, Builder
))
3674 if (Instruction
*Abs
= canonicalizeAbs(I
, Builder
))
3677 // Otherwise, if all else failed, try to hoist the xor-by-constant:
3678 // (X ^ C) ^ Y --> (X ^ Y) ^ C
3679 // Just like we do in other places, we completely avoid the fold
3680 // for constantexprs, at least to avoid endless combine loop.
3681 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X
),
3682 m_Unless(m_ConstantExpr())),
3683 m_ImmConstant(C1
))),
3685 return BinaryOperator::CreateXor(Builder
.CreateXor(X
, Y
), C1
);