1 //===- InstCombineAndOrXor.cpp --------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitAnd, visitOr, and visitXor functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/Analysis/CmpInstAnalysis.h"
15 #include "llvm/Analysis/InstructionSimplify.h"
16 #include "llvm/IR/ConstantRange.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Transforms/InstCombine/InstCombiner.h"
20 #include "llvm/Transforms/Utils/Local.h"
23 using namespace PatternMatch
;
25 #define DEBUG_TYPE "instcombine"
27 /// This is the complement of getICmpCode, which turns an opcode and two
28 /// operands into either a constant true or false, or a brand new ICmp
29 /// instruction. The sign is passed in to determine which kind of predicate to
30 /// use in the new icmp instruction.
31 static Value
*getNewICmpValue(unsigned Code
, bool Sign
, Value
*LHS
, Value
*RHS
,
32 InstCombiner::BuilderTy
&Builder
) {
33 ICmpInst::Predicate NewPred
;
34 if (Constant
*TorF
= getPredForICmpCode(Code
, Sign
, LHS
->getType(), NewPred
))
36 return Builder
.CreateICmp(NewPred
, LHS
, RHS
);
39 /// This is the complement of getFCmpCode, which turns an opcode and two
40 /// operands into either a FCmp instruction, or a true/false constant.
41 static Value
*getFCmpValue(unsigned Code
, Value
*LHS
, Value
*RHS
,
42 InstCombiner::BuilderTy
&Builder
) {
43 FCmpInst::Predicate NewPred
;
44 if (Constant
*TorF
= getPredForFCmpCode(Code
, LHS
->getType(), NewPred
))
46 return Builder
.CreateFCmp(NewPred
, LHS
, RHS
);
49 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
50 /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
51 /// whether to treat V, Lo, and Hi as signed or not.
52 Value
*InstCombinerImpl::insertRangeTest(Value
*V
, const APInt
&Lo
,
53 const APInt
&Hi
, bool isSigned
,
55 assert((isSigned
? Lo
.slt(Hi
) : Lo
.ult(Hi
)) &&
56 "Lo is not < Hi in range emission code!");
58 Type
*Ty
= V
->getType();
60 // V >= Min && V < Hi --> V < Hi
61 // V < Min || V >= Hi --> V >= Hi
62 ICmpInst::Predicate Pred
= Inside
? ICmpInst::ICMP_ULT
: ICmpInst::ICMP_UGE
;
63 if (isSigned
? Lo
.isMinSignedValue() : Lo
.isMinValue()) {
64 Pred
= isSigned
? ICmpInst::getSignedPredicate(Pred
) : Pred
;
65 return Builder
.CreateICmp(Pred
, V
, ConstantInt::get(Ty
, Hi
));
68 // V >= Lo && V < Hi --> V - Lo u< Hi - Lo
69 // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo
71 Builder
.CreateSub(V
, ConstantInt::get(Ty
, Lo
), V
->getName() + ".off");
72 Constant
*HiMinusLo
= ConstantInt::get(Ty
, Hi
- Lo
);
73 return Builder
.CreateICmp(Pred
, VMinusLo
, HiMinusLo
);
76 /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
77 /// that can be simplified.
78 /// One of A and B is considered the mask. The other is the value. This is
79 /// described as the "AMask" or "BMask" part of the enum. If the enum contains
80 /// only "Mask", then both A and B can be considered masks. If A is the mask,
81 /// then it was proven that (A & C) == C. This is trivial if C == A or C == 0.
82 /// If both A and C are constants, this proof is also easy.
83 /// For the following explanations, we assume that A is the mask.
85 /// "AllOnes" declares that the comparison is true only if (A & B) == A or all
86 /// bits of A are set in B.
87 /// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes
89 /// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all
90 /// bits of A are cleared in B.
91 /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes
93 /// "Mixed" declares that (A & B) == C and C might or might not contain any
94 /// number of one bits and zero bits.
95 /// Example: (icmp eq (A & 3), 1) -> AMask_Mixed
97 /// "Not" means that in above descriptions "==" should be replaced by "!=".
98 /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes
100 /// If the mask A contains a single bit, then the following is equivalent:
101 /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
102 /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
103 enum MaskedICmpType
{
105 AMask_NotAllOnes
= 2,
107 BMask_NotAllOnes
= 8,
109 Mask_NotAllZeros
= 32,
111 AMask_NotMixed
= 128,
116 /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)
118 static unsigned getMaskedICmpType(Value
*A
, Value
*B
, Value
*C
,
119 ICmpInst::Predicate Pred
) {
120 const APInt
*ConstA
= nullptr, *ConstB
= nullptr, *ConstC
= nullptr;
121 match(A
, m_APInt(ConstA
));
122 match(B
, m_APInt(ConstB
));
123 match(C
, m_APInt(ConstC
));
124 bool IsEq
= (Pred
== ICmpInst::ICMP_EQ
);
125 bool IsAPow2
= ConstA
&& ConstA
->isPowerOf2();
126 bool IsBPow2
= ConstB
&& ConstB
->isPowerOf2();
127 unsigned MaskVal
= 0;
128 if (ConstC
&& ConstC
->isZero()) {
129 // if C is zero, then both A and B qualify as mask
130 MaskVal
|= (IsEq
? (Mask_AllZeros
| AMask_Mixed
| BMask_Mixed
)
131 : (Mask_NotAllZeros
| AMask_NotMixed
| BMask_NotMixed
));
133 MaskVal
|= (IsEq
? (AMask_NotAllOnes
| AMask_NotMixed
)
134 : (AMask_AllOnes
| AMask_Mixed
));
136 MaskVal
|= (IsEq
? (BMask_NotAllOnes
| BMask_NotMixed
)
137 : (BMask_AllOnes
| BMask_Mixed
));
142 MaskVal
|= (IsEq
? (AMask_AllOnes
| AMask_Mixed
)
143 : (AMask_NotAllOnes
| AMask_NotMixed
));
145 MaskVal
|= (IsEq
? (Mask_NotAllZeros
| AMask_NotMixed
)
146 : (Mask_AllZeros
| AMask_Mixed
));
147 } else if (ConstA
&& ConstC
&& ConstC
->isSubsetOf(*ConstA
)) {
148 MaskVal
|= (IsEq
? AMask_Mixed
: AMask_NotMixed
);
152 MaskVal
|= (IsEq
? (BMask_AllOnes
| BMask_Mixed
)
153 : (BMask_NotAllOnes
| BMask_NotMixed
));
155 MaskVal
|= (IsEq
? (Mask_NotAllZeros
| BMask_NotMixed
)
156 : (Mask_AllZeros
| BMask_Mixed
));
157 } else if (ConstB
&& ConstC
&& ConstC
->isSubsetOf(*ConstB
)) {
158 MaskVal
|= (IsEq
? BMask_Mixed
: BMask_NotMixed
);
164 /// Convert an analysis of a masked ICmp into its equivalent if all boolean
165 /// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
166 /// is adjacent to the corresponding normal flag (recording ==), this just
167 /// involves swapping those bits over.
168 static unsigned conjugateICmpMask(unsigned Mask
) {
170 NewMask
= (Mask
& (AMask_AllOnes
| BMask_AllOnes
| Mask_AllZeros
|
171 AMask_Mixed
| BMask_Mixed
))
174 NewMask
|= (Mask
& (AMask_NotAllOnes
| BMask_NotAllOnes
| Mask_NotAllZeros
|
175 AMask_NotMixed
| BMask_NotMixed
))
181 // Adapts the external decomposeBitTestICmp for local use.
182 static bool decomposeBitTestICmp(Value
*LHS
, Value
*RHS
, CmpInst::Predicate
&Pred
,
183 Value
*&X
, Value
*&Y
, Value
*&Z
) {
185 if (!llvm::decomposeBitTestICmp(LHS
, RHS
, Pred
, X
, Mask
))
188 Y
= ConstantInt::get(X
->getType(), Mask
);
189 Z
= ConstantInt::get(X
->getType(), 0);
193 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
194 /// Return the pattern classes (from MaskedICmpType) for the left hand side and
195 /// the right hand side as a pair.
196 /// LHS and RHS are the left hand side and the right hand side ICmps and PredL
197 /// and PredR are their predicates, respectively.
198 static std::optional
<std::pair
<unsigned, unsigned>> getMaskedTypeForICmpPair(
199 Value
*&A
, Value
*&B
, Value
*&C
, Value
*&D
, Value
*&E
, ICmpInst
*LHS
,
200 ICmpInst
*RHS
, ICmpInst::Predicate
&PredL
, ICmpInst::Predicate
&PredR
) {
201 // Don't allow pointers. Splat vectors are fine.
202 if (!LHS
->getOperand(0)->getType()->isIntOrIntVectorTy() ||
203 !RHS
->getOperand(0)->getType()->isIntOrIntVectorTy())
206 // Here comes the tricky part:
207 // LHS might be of the form L11 & L12 == X, X == L21 & L22,
208 // and L11 & L12 == L21 & L22. The same goes for RHS.
209 // Now we must find those components L** and R**, that are equal, so
210 // that we can extract the parameters A, B, C, D, and E for the canonical
212 Value
*L1
= LHS
->getOperand(0);
213 Value
*L2
= LHS
->getOperand(1);
214 Value
*L11
, *L12
, *L21
, *L22
;
215 // Check whether the icmp can be decomposed into a bit test.
216 if (decomposeBitTestICmp(L1
, L2
, PredL
, L11
, L12
, L2
)) {
217 L21
= L22
= L1
= nullptr;
219 // Look for ANDs in the LHS icmp.
220 if (!match(L1
, m_And(m_Value(L11
), m_Value(L12
)))) {
221 // Any icmp can be viewed as being trivially masked; if it allows us to
222 // remove one, it's worth it.
224 L12
= Constant::getAllOnesValue(L1
->getType());
227 if (!match(L2
, m_And(m_Value(L21
), m_Value(L22
)))) {
229 L22
= Constant::getAllOnesValue(L2
->getType());
233 // Bail if LHS was a icmp that can't be decomposed into an equality.
234 if (!ICmpInst::isEquality(PredL
))
237 Value
*R1
= RHS
->getOperand(0);
238 Value
*R2
= RHS
->getOperand(1);
241 if (decomposeBitTestICmp(R1
, R2
, PredR
, R11
, R12
, R2
)) {
242 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
245 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
255 if (!match(R1
, m_And(m_Value(R11
), m_Value(R12
)))) {
256 // As before, model no mask as a trivial mask if it'll let us do an
259 R12
= Constant::getAllOnesValue(R1
->getType());
262 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
267 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
275 // Bail if RHS was a icmp that can't be decomposed into an equality.
276 if (!ICmpInst::isEquality(PredR
))
279 // Look for ANDs on the right side of the RHS icmp.
281 if (!match(R2
, m_And(m_Value(R11
), m_Value(R12
)))) {
283 R12
= Constant::getAllOnesValue(R2
->getType());
286 if (R11
== L11
|| R11
== L12
|| R11
== L21
|| R11
== L22
) {
291 } else if (R12
== L11
|| R12
== L12
|| R12
== L21
|| R12
== L22
) {
300 assert(Ok
&& "Failed to find AND on the right side of the RHS icmp.");
306 } else if (L12
== A
) {
309 } else if (L21
== A
) {
312 } else if (L22
== A
) {
317 unsigned LeftType
= getMaskedICmpType(A
, B
, C
, PredL
);
318 unsigned RightType
= getMaskedICmpType(A
, D
, E
, PredR
);
319 return std::optional
<std::pair
<unsigned, unsigned>>(
320 std::make_pair(LeftType
, RightType
));
323 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
324 /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
325 /// and the right hand side is of type BMask_Mixed. For example,
326 /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
327 /// Also used for logical and/or, must be poison safe.
328 static Value
*foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
329 ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
, Value
*A
, Value
*B
, Value
*C
,
330 Value
*D
, Value
*E
, ICmpInst::Predicate PredL
, ICmpInst::Predicate PredR
,
331 InstCombiner::BuilderTy
&Builder
) {
332 // We are given the canonical form:
333 // (icmp ne (A & B), 0) & (icmp eq (A & D), E).
336 // If IsAnd is false, we get it in negated form:
337 // (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
338 // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
340 // We currently handle the case of B, C, D, E are constant.
342 const APInt
*BCst
, *CCst
, *DCst
, *OrigECst
;
343 if (!match(B
, m_APInt(BCst
)) || !match(C
, m_APInt(CCst
)) ||
344 !match(D
, m_APInt(DCst
)) || !match(E
, m_APInt(OrigECst
)))
347 ICmpInst::Predicate NewCC
= IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
;
349 // Update E to the canonical form when D is a power of two and RHS is
351 // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
352 // (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
353 APInt ECst
= *OrigECst
;
357 // If B or D is zero, skip because if LHS or RHS can be trivially folded by
358 // other folding rules and this pattern won't apply any more.
359 if (*BCst
== 0 || *DCst
== 0)
362 // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
363 // deduce anything from it.
365 // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
366 if ((*BCst
& *DCst
) == 0)
369 // If the following two conditions are met:
371 // 1. mask B covers only a single bit that's not covered by mask D, that is,
372 // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
373 // B and D has only one bit set) and,
375 // 2. RHS (and E) indicates that the rest of B's bits are zero (in other
376 // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
378 // then that single bit in B must be one and thus the whole expression can be
380 // (A & (B | D)) == (B & (B ^ D)) | E.
383 // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
384 // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
385 if ((((*BCst
& *DCst
) & ECst
) == 0) &&
386 (*BCst
& (*BCst
^ *DCst
)).isPowerOf2()) {
387 APInt BorD
= *BCst
| *DCst
;
388 APInt BandBxorDorE
= (*BCst
& (*BCst
^ *DCst
)) | ECst
;
389 Value
*NewMask
= ConstantInt::get(A
->getType(), BorD
);
390 Value
*NewMaskedValue
= ConstantInt::get(A
->getType(), BandBxorDorE
);
391 Value
*NewAnd
= Builder
.CreateAnd(A
, NewMask
);
392 return Builder
.CreateICmp(NewCC
, NewAnd
, NewMaskedValue
);
395 auto IsSubSetOrEqual
= [](const APInt
*C1
, const APInt
*C2
) {
396 return (*C1
& *C2
) == *C1
;
398 auto IsSuperSetOrEqual
= [](const APInt
*C1
, const APInt
*C2
) {
399 return (*C1
& *C2
) == *C2
;
402 // In the following, we consider only the cases where B is a superset of D, B
403 // is a subset of D, or B == D because otherwise there's at least one bit
404 // covered by B but not D, in which case we can't deduce much from it, so
405 // no folding (aside from the single must-be-one bit case right above.)
407 // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
408 if (!IsSubSetOrEqual(BCst
, DCst
) && !IsSuperSetOrEqual(BCst
, DCst
))
411 // At this point, either B is a superset of D, B is a subset of D or B == D.
413 // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
414 // and the whole expression becomes false (or true if negated), otherwise, no
417 // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
418 // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
420 if (IsSubSetOrEqual(BCst
, DCst
))
421 return ConstantInt::get(LHS
->getType(), !IsAnd
);
425 // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
426 // D. If B is a superset of (or equal to) D, since E is not zero, LHS is
427 // subsumed by RHS (RHS implies LHS.) So the whole expression becomes
429 // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
430 // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
431 if (IsSuperSetOrEqual(BCst
, DCst
))
433 // Otherwise, B is a subset of D. If B and E have a common bit set,
434 // ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
435 // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
436 assert(IsSubSetOrEqual(BCst
, DCst
) && "Precondition due to above code");
437 if ((*BCst
& ECst
) != 0)
439 // Otherwise, LHS and RHS contradict and the whole expression becomes false
440 // (or true if negated.) For example,
441 // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
442 // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
443 return ConstantInt::get(LHS
->getType(), !IsAnd
);
446 /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
447 /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
448 /// aren't of the common mask pattern type.
449 /// Also used for logical and/or, must be poison safe.
450 static Value
*foldLogOpOfMaskedICmpsAsymmetric(
451 ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
, Value
*A
, Value
*B
, Value
*C
,
452 Value
*D
, Value
*E
, ICmpInst::Predicate PredL
, ICmpInst::Predicate PredR
,
453 unsigned LHSMask
, unsigned RHSMask
, InstCombiner::BuilderTy
&Builder
) {
454 assert(ICmpInst::isEquality(PredL
) && ICmpInst::isEquality(PredR
) &&
455 "Expected equality predicates for masked type of icmps.");
456 // Handle Mask_NotAllZeros-BMask_Mixed cases.
457 // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
458 // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
459 // which gets swapped to
460 // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
462 LHSMask
= conjugateICmpMask(LHSMask
);
463 RHSMask
= conjugateICmpMask(RHSMask
);
465 if ((LHSMask
& Mask_NotAllZeros
) && (RHSMask
& BMask_Mixed
)) {
466 if (Value
*V
= foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
467 LHS
, RHS
, IsAnd
, A
, B
, C
, D
, E
,
468 PredL
, PredR
, Builder
)) {
471 } else if ((LHSMask
& BMask_Mixed
) && (RHSMask
& Mask_NotAllZeros
)) {
472 if (Value
*V
= foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
473 RHS
, LHS
, IsAnd
, A
, D
, E
, B
, C
,
474 PredR
, PredL
, Builder
)) {
481 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
482 /// into a single (icmp(A & X) ==/!= Y).
483 static Value
*foldLogOpOfMaskedICmps(ICmpInst
*LHS
, ICmpInst
*RHS
, bool IsAnd
,
485 InstCombiner::BuilderTy
&Builder
) {
486 Value
*A
= nullptr, *B
= nullptr, *C
= nullptr, *D
= nullptr, *E
= nullptr;
487 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
488 std::optional
<std::pair
<unsigned, unsigned>> MaskPair
=
489 getMaskedTypeForICmpPair(A
, B
, C
, D
, E
, LHS
, RHS
, PredL
, PredR
);
492 assert(ICmpInst::isEquality(PredL
) && ICmpInst::isEquality(PredR
) &&
493 "Expected equality predicates for masked type of icmps.");
494 unsigned LHSMask
= MaskPair
->first
;
495 unsigned RHSMask
= MaskPair
->second
;
496 unsigned Mask
= LHSMask
& RHSMask
;
498 // Even if the two sides don't share a common pattern, check if folding can
500 if (Value
*V
= foldLogOpOfMaskedICmpsAsymmetric(
501 LHS
, RHS
, IsAnd
, A
, B
, C
, D
, E
, PredL
, PredR
, LHSMask
, RHSMask
,
507 // In full generality:
508 // (icmp (A & B) Op C) | (icmp (A & D) Op E)
509 // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
511 // If the latter can be converted into (icmp (A & X) Op Y) then the former is
512 // equivalent to (icmp (A & X) !Op Y).
514 // Therefore, we can pretend for the rest of this function that we're dealing
515 // with the conjunction, provided we flip the sense of any comparisons (both
516 // input and output).
518 // In most cases we're going to produce an EQ for the "&&" case.
519 ICmpInst::Predicate NewCC
= IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
;
521 // Convert the masking analysis into its equivalent with negated
523 Mask
= conjugateICmpMask(Mask
);
526 if (Mask
& Mask_AllZeros
) {
527 // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
528 // -> (icmp eq (A & (B|D)), 0)
529 if (IsLogical
&& !isGuaranteedNotToBeUndefOrPoison(D
))
530 return nullptr; // TODO: Use freeze?
531 Value
*NewOr
= Builder
.CreateOr(B
, D
);
532 Value
*NewAnd
= Builder
.CreateAnd(A
, NewOr
);
533 // We can't use C as zero because we might actually handle
534 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
535 // with B and D, having a single bit set.
536 Value
*Zero
= Constant::getNullValue(A
->getType());
537 return Builder
.CreateICmp(NewCC
, NewAnd
, Zero
);
539 if (Mask
& BMask_AllOnes
) {
540 // (icmp eq (A & B), B) & (icmp eq (A & D), D)
541 // -> (icmp eq (A & (B|D)), (B|D))
542 if (IsLogical
&& !isGuaranteedNotToBeUndefOrPoison(D
))
543 return nullptr; // TODO: Use freeze?
544 Value
*NewOr
= Builder
.CreateOr(B
, D
);
545 Value
*NewAnd
= Builder
.CreateAnd(A
, NewOr
);
546 return Builder
.CreateICmp(NewCC
, NewAnd
, NewOr
);
548 if (Mask
& AMask_AllOnes
) {
549 // (icmp eq (A & B), A) & (icmp eq (A & D), A)
550 // -> (icmp eq (A & (B&D)), A)
551 if (IsLogical
&& !isGuaranteedNotToBeUndefOrPoison(D
))
552 return nullptr; // TODO: Use freeze?
553 Value
*NewAnd1
= Builder
.CreateAnd(B
, D
);
554 Value
*NewAnd2
= Builder
.CreateAnd(A
, NewAnd1
);
555 return Builder
.CreateICmp(NewCC
, NewAnd2
, A
);
558 // Remaining cases assume at least that B and D are constant, and depend on
559 // their actual values. This isn't strictly necessary, just a "handle the
560 // easy cases for now" decision.
561 const APInt
*ConstB
, *ConstD
;
562 if (!match(B
, m_APInt(ConstB
)) || !match(D
, m_APInt(ConstD
)))
565 if (Mask
& (Mask_NotAllZeros
| BMask_NotAllOnes
)) {
566 // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
567 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
568 // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
569 // Only valid if one of the masks is a superset of the other (check "B&D" is
570 // the same as either B or D).
571 APInt NewMask
= *ConstB
& *ConstD
;
572 if (NewMask
== *ConstB
)
574 else if (NewMask
== *ConstD
)
578 if (Mask
& AMask_NotAllOnes
) {
579 // (icmp ne (A & B), B) & (icmp ne (A & D), D)
580 // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
581 // Only valid if one of the masks is a superset of the other (check "B|D" is
582 // the same as either B or D).
583 APInt NewMask
= *ConstB
| *ConstD
;
584 if (NewMask
== *ConstB
)
586 else if (NewMask
== *ConstD
)
590 if (Mask
& (BMask_Mixed
| BMask_NotMixed
)) {
592 // (icmp eq (A & B), C) & (icmp eq (A & D), E)
593 // We already know that B & C == C && D & E == E.
594 // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
595 // C and E, which are shared by both the mask B and the mask D, don't
596 // contradict, then we can transform to
597 // -> (icmp eq (A & (B|D)), (C|E))
598 // Currently, we only handle the case of B, C, D, and E being constant.
599 // We can't simply use C and E because we might actually handle
600 // (icmp ne (A & B), B) & (icmp eq (A & D), D)
601 // with B and D, having a single bit set.
604 // (icmp ne (A & B), C) & (icmp ne (A & D), E)
605 // -> (icmp ne (A & (B & D)), (C & E))
606 // Check the intersection (B & D) for inequality.
607 // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B
608 // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the
609 // B and the D, don't contradict.
610 // Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous
611 // operation should delete these icmps if it hadn't been met.
613 const APInt
*OldConstC
, *OldConstE
;
614 if (!match(C
, m_APInt(OldConstC
)) || !match(E
, m_APInt(OldConstE
)))
617 auto FoldBMixed
= [&](ICmpInst::Predicate CC
, bool IsNot
) -> Value
* {
618 CC
= IsNot
? CmpInst::getInversePredicate(CC
) : CC
;
619 const APInt ConstC
= PredL
!= CC
? *ConstB
^ *OldConstC
: *OldConstC
;
620 const APInt ConstE
= PredR
!= CC
? *ConstD
^ *OldConstE
: *OldConstE
;
622 if (((*ConstB
& *ConstD
) & (ConstC
^ ConstE
)).getBoolValue())
623 return IsNot
? nullptr : ConstantInt::get(LHS
->getType(), !IsAnd
);
625 if (IsNot
&& !ConstB
->isSubsetOf(*ConstD
) && !ConstD
->isSubsetOf(*ConstB
))
630 BD
= *ConstB
& *ConstD
;
631 CE
= ConstC
& ConstE
;
633 BD
= *ConstB
| *ConstD
;
634 CE
= ConstC
| ConstE
;
636 Value
*NewAnd
= Builder
.CreateAnd(A
, BD
);
637 Value
*CEVal
= ConstantInt::get(A
->getType(), CE
);
638 return Builder
.CreateICmp(CC
, CEVal
, NewAnd
);
641 if (Mask
& BMask_Mixed
)
642 return FoldBMixed(NewCC
, false);
643 if (Mask
& BMask_NotMixed
) // can be else also
644 return FoldBMixed(NewCC
, true);
649 /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
650 /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
651 /// If \p Inverted is true then the check is for the inverted range, e.g.
652 /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
653 Value
*InstCombinerImpl::simplifyRangeCheck(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
655 // Check the lower range comparison, e.g. x >= 0
656 // InstCombine already ensured that if there is a constant it's on the RHS.
657 ConstantInt
*RangeStart
= dyn_cast
<ConstantInt
>(Cmp0
->getOperand(1));
661 ICmpInst::Predicate Pred0
= (Inverted
? Cmp0
->getInversePredicate() :
662 Cmp0
->getPredicate());
664 // Accept x > -1 or x >= 0 (after potentially inverting the predicate).
665 if (!((Pred0
== ICmpInst::ICMP_SGT
&& RangeStart
->isMinusOne()) ||
666 (Pred0
== ICmpInst::ICMP_SGE
&& RangeStart
->isZero())))
669 ICmpInst::Predicate Pred1
= (Inverted
? Cmp1
->getInversePredicate() :
670 Cmp1
->getPredicate());
672 Value
*Input
= Cmp0
->getOperand(0);
674 if (Cmp1
->getOperand(0) == Input
) {
675 // For the upper range compare we have: icmp x, n
676 RangeEnd
= Cmp1
->getOperand(1);
677 } else if (Cmp1
->getOperand(1) == Input
) {
678 // For the upper range compare we have: icmp n, x
679 RangeEnd
= Cmp1
->getOperand(0);
680 Pred1
= ICmpInst::getSwappedPredicate(Pred1
);
685 // Check the upper range comparison, e.g. x < n
686 ICmpInst::Predicate NewPred
;
688 case ICmpInst::ICMP_SLT
: NewPred
= ICmpInst::ICMP_ULT
; break;
689 case ICmpInst::ICMP_SLE
: NewPred
= ICmpInst::ICMP_ULE
; break;
690 default: return nullptr;
693 // This simplification is only valid if the upper range is not negative.
694 KnownBits Known
= computeKnownBits(RangeEnd
, /*Depth=*/0, Cmp1
);
695 if (!Known
.isNonNegative())
699 NewPred
= ICmpInst::getInversePredicate(NewPred
);
701 return Builder
.CreateICmp(NewPred
, Input
, RangeEnd
);
704 // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
705 // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
706 Value
*InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst
*LHS
,
711 CmpInst::Predicate Pred
= IsAnd
? CmpInst::ICMP_NE
: CmpInst::ICMP_EQ
;
712 if (LHS
->getPredicate() != Pred
|| RHS
->getPredicate() != Pred
)
715 if (!match(LHS
->getOperand(1), m_Zero()) ||
716 !match(RHS
->getOperand(1), m_Zero()))
719 Value
*L1
, *L2
, *R1
, *R2
;
720 if (match(LHS
->getOperand(0), m_And(m_Value(L1
), m_Value(L2
))) &&
721 match(RHS
->getOperand(0), m_And(m_Value(R1
), m_Value(R2
)))) {
722 if (L1
== R2
|| L2
== R2
)
728 isKnownToBeAPowerOfTwo(L2
, false, 0, CxtI
) &&
729 isKnownToBeAPowerOfTwo(R2
, false, 0, CxtI
)) {
730 // If this is a logical and/or, then we must prevent propagation of a
731 // poison value from the RHS by inserting freeze.
733 R2
= Builder
.CreateFreeze(R2
);
734 Value
*Mask
= Builder
.CreateOr(L2
, R2
);
735 Value
*Masked
= Builder
.CreateAnd(L1
, Mask
);
736 auto NewPred
= IsAnd
? CmpInst::ICMP_EQ
: CmpInst::ICMP_NE
;
737 return Builder
.CreateICmp(NewPred
, Masked
, Mask
);
747 /// Where Y is checking that all the high bits (covered by a mask 4294967168)
748 /// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0
749 /// Pattern can be one of:
750 /// %t = add i32 %arg, 128
751 /// %r = icmp ult i32 %t, 256
753 /// %t0 = shl i32 %arg, 24
754 /// %t1 = ashr i32 %t0, 24
755 /// %r = icmp eq i32 %t1, %arg
757 /// %t0 = trunc i32 %arg to i8
758 /// %t1 = sext i8 %t0 to i32
759 /// %r = icmp eq i32 %t1, %arg
760 /// This pattern is a signed truncation check.
762 /// And X is checking that some bit in that same mask is zero.
763 /// I.e. can be one of:
764 /// %r = icmp sgt i32 %arg, -1
766 /// %t = and i32 %arg, 2147483648
767 /// %r = icmp eq i32 %t, 0
769 /// Since we are checking that all the bits in that mask are the same,
770 /// and a particular bit is zero, what we are really checking is that all the
771 /// masked bits are zero.
772 /// So this should be transformed to:
773 /// %r = icmp ult i32 %arg, 128
774 static Value
*foldSignedTruncationCheck(ICmpInst
*ICmp0
, ICmpInst
*ICmp1
,
776 InstCombiner::BuilderTy
&Builder
) {
777 assert(CxtI
.getOpcode() == Instruction::And
);
779 // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two)
780 auto tryToMatchSignedTruncationCheck
= [](ICmpInst
*ICmp
, Value
*&X
,
781 APInt
&SignBitMask
) -> bool {
782 CmpInst::Predicate Pred
;
783 const APInt
*I01
, *I1
; // powers of two; I1 == I01 << 1
785 m_ICmp(Pred
, m_Add(m_Value(X
), m_Power2(I01
)), m_Power2(I1
))) &&
786 Pred
== ICmpInst::ICMP_ULT
&& I1
->ugt(*I01
) && I01
->shl(1) == *I1
))
788 // Which bit is the new sign bit as per the 'signed truncation' pattern?
793 // One icmp needs to be 'signed truncation check'.
794 // We need to match this first, else we will mismatch commutative cases.
798 if (tryToMatchSignedTruncationCheck(ICmp1
, X1
, HighestBit
))
800 else if (tryToMatchSignedTruncationCheck(ICmp0
, X1
, HighestBit
))
805 assert(HighestBit
.isPowerOf2() && "expected to be power of two (non-zero)");
807 // Try to match/decompose into: icmp eq (X & Mask), 0
808 auto tryToDecompose
= [](ICmpInst
*ICmp
, Value
*&X
,
809 APInt
&UnsetBitsMask
) -> bool {
810 CmpInst::Predicate Pred
= ICmp
->getPredicate();
811 // Can it be decomposed into icmp eq (X & Mask), 0 ?
812 if (llvm::decomposeBitTestICmp(ICmp
->getOperand(0), ICmp
->getOperand(1),
813 Pred
, X
, UnsetBitsMask
,
814 /*LookThroughTrunc=*/false) &&
815 Pred
== ICmpInst::ICMP_EQ
)
817 // Is it icmp eq (X & Mask), 0 already?
819 if (match(ICmp
, m_ICmp(Pred
, m_And(m_Value(X
), m_APInt(Mask
)), m_Zero())) &&
820 Pred
== ICmpInst::ICMP_EQ
) {
821 UnsetBitsMask
= *Mask
;
827 // And the other icmp needs to be decomposable into a bit test.
830 if (!tryToDecompose(OtherICmp
, X0
, UnsetBitsMask
))
833 assert(!UnsetBitsMask
.isZero() && "empty mask makes no sense.");
835 // Are they working on the same value?
840 } else if (match(X0
, m_Trunc(m_Specific(X1
)))) {
841 UnsetBitsMask
= UnsetBitsMask
.zext(X1
->getType()->getScalarSizeInBits());
846 // So which bits should be uniform as per the 'signed truncation check'?
847 // (all the bits starting with (i.e. including) HighestBit)
848 APInt SignBitsMask
= ~(HighestBit
- 1U);
850 // UnsetBitsMask must have some common bits with SignBitsMask,
851 if (!UnsetBitsMask
.intersects(SignBitsMask
))
854 // Does UnsetBitsMask contain any bits outside of SignBitsMask?
855 if (!UnsetBitsMask
.isSubsetOf(SignBitsMask
)) {
856 APInt OtherHighestBit
= (~UnsetBitsMask
) + 1U;
857 if (!OtherHighestBit
.isPowerOf2())
859 HighestBit
= APIntOps::umin(HighestBit
, OtherHighestBit
);
861 // Else, if it does not, then all is ok as-is.
863 // %r = icmp ult %X, SignBit
864 return Builder
.CreateICmpULT(X
, ConstantInt::get(X
->getType(), HighestBit
),
865 CxtI
.getName() + ".simplified");
868 /// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and
869 /// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1).
870 /// Also used for logical and/or, must be poison safe.
871 static Value
*foldIsPowerOf2OrZero(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
, bool IsAnd
,
872 InstCombiner::BuilderTy
&Builder
) {
873 CmpInst::Predicate Pred0
, Pred1
;
875 if (!match(Cmp0
, m_ICmp(Pred0
, m_Intrinsic
<Intrinsic::ctpop
>(m_Value(X
)),
876 m_SpecificInt(1))) ||
877 !match(Cmp1
, m_ICmp(Pred1
, m_Specific(X
), m_ZeroInt())))
880 Value
*CtPop
= Cmp0
->getOperand(0);
881 if (IsAnd
&& Pred0
== ICmpInst::ICMP_NE
&& Pred1
== ICmpInst::ICMP_NE
)
882 return Builder
.CreateICmpUGT(CtPop
, ConstantInt::get(CtPop
->getType(), 1));
883 if (!IsAnd
&& Pred0
== ICmpInst::ICMP_EQ
&& Pred1
== ICmpInst::ICMP_EQ
)
884 return Builder
.CreateICmpULT(CtPop
, ConstantInt::get(CtPop
->getType(), 2));
889 /// Reduce a pair of compares that check if a value has exactly 1 bit set.
890 /// Also used for logical and/or, must be poison safe.
891 static Value
*foldIsPowerOf2(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
, bool JoinedByAnd
,
892 InstCombiner::BuilderTy
&Builder
) {
893 // Handle 'and' / 'or' commutation: make the equality check the first operand.
894 if (JoinedByAnd
&& Cmp1
->getPredicate() == ICmpInst::ICMP_NE
)
895 std::swap(Cmp0
, Cmp1
);
896 else if (!JoinedByAnd
&& Cmp1
->getPredicate() == ICmpInst::ICMP_EQ
)
897 std::swap(Cmp0
, Cmp1
);
899 // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1
900 CmpInst::Predicate Pred0
, Pred1
;
902 if (JoinedByAnd
&& match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_ZeroInt())) &&
903 match(Cmp1
, m_ICmp(Pred1
, m_Intrinsic
<Intrinsic::ctpop
>(m_Specific(X
)),
904 m_SpecificInt(2))) &&
905 Pred0
== ICmpInst::ICMP_NE
&& Pred1
== ICmpInst::ICMP_ULT
) {
906 Value
*CtPop
= Cmp1
->getOperand(0);
907 return Builder
.CreateICmpEQ(CtPop
, ConstantInt::get(CtPop
->getType(), 1));
909 // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
910 if (!JoinedByAnd
&& match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_ZeroInt())) &&
911 match(Cmp1
, m_ICmp(Pred1
, m_Intrinsic
<Intrinsic::ctpop
>(m_Specific(X
)),
912 m_SpecificInt(1))) &&
913 Pred0
== ICmpInst::ICMP_EQ
&& Pred1
== ICmpInst::ICMP_UGT
) {
914 Value
*CtPop
= Cmp1
->getOperand(0);
915 return Builder
.CreateICmpNE(CtPop
, ConstantInt::get(CtPop
->getType(), 1));
920 /// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff
921 /// B is a contiguous set of ones starting from the most significant bit
922 /// (negative power of 2), D and E are equal, and D is a contiguous set of ones
923 /// starting at the most significant zero bit in B. Parameter B supports masking
924 /// using undef/poison in either scalar or vector values.
925 static Value
*foldNegativePower2AndShiftedMask(
926 Value
*A
, Value
*B
, Value
*D
, Value
*E
, ICmpInst::Predicate PredL
,
927 ICmpInst::Predicate PredR
, InstCombiner::BuilderTy
&Builder
) {
928 assert(ICmpInst::isEquality(PredL
) && ICmpInst::isEquality(PredR
) &&
929 "Expected equality predicates for masked type of icmps.");
930 if (PredL
!= ICmpInst::ICMP_EQ
|| PredR
!= ICmpInst::ICMP_NE
)
933 if (!match(B
, m_NegatedPower2()) || !match(D
, m_ShiftedMask()) ||
934 !match(E
, m_ShiftedMask()))
937 // Test scalar arguments for conversion. B has been validated earlier to be a
938 // negative power of two and thus is guaranteed to have one or more contiguous
939 // ones starting from the MSB followed by zero or more contiguous zeros. D has
940 // been validated earlier to be a shifted set of one or more contiguous ones.
941 // In order to match, B leading ones and D leading zeros should be equal. The
942 // predicate that B be a negative power of 2 prevents the condition of there
943 // ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that
944 // D always be a shifted mask prevents the condition of D equaling 0. This
945 // prevents matching the condition where B contains the maximum number of
946 // leading one bits (-1) and D contains the maximum number of leading zero
948 auto isReducible
= [](const Value
*B
, const Value
*D
, const Value
*E
) {
949 const APInt
*BCst
, *DCst
, *ECst
;
950 return match(B
, m_APIntAllowUndef(BCst
)) && match(D
, m_APInt(DCst
)) &&
951 match(E
, m_APInt(ECst
)) && *DCst
== *ECst
&&
952 (isa
<UndefValue
>(B
) ||
953 (BCst
->countLeadingOnes() == DCst
->countLeadingZeros()));
956 // Test vector type arguments for conversion.
957 if (const auto *BVTy
= dyn_cast
<VectorType
>(B
->getType())) {
958 const auto *BFVTy
= dyn_cast
<FixedVectorType
>(BVTy
);
959 const auto *BConst
= dyn_cast
<Constant
>(B
);
960 const auto *DConst
= dyn_cast
<Constant
>(D
);
961 const auto *EConst
= dyn_cast
<Constant
>(E
);
963 if (!BFVTy
|| !BConst
|| !DConst
|| !EConst
)
966 for (unsigned I
= 0; I
!= BFVTy
->getNumElements(); ++I
) {
967 const auto *BElt
= BConst
->getAggregateElement(I
);
968 const auto *DElt
= DConst
->getAggregateElement(I
);
969 const auto *EElt
= EConst
->getAggregateElement(I
);
971 if (!BElt
|| !DElt
|| !EElt
)
973 if (!isReducible(BElt
, DElt
, EElt
))
977 // Test scalar type arguments for conversion.
978 if (!isReducible(B
, D
, E
))
981 return Builder
.CreateICmp(ICmpInst::ICMP_ULT
, A
, D
);
984 /// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) &
985 /// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and
986 /// M is a contiguous shifted mask starting at the right most significant zero
987 /// bit in P. SGT is supported as when P is the largest representable power of
988 /// 2, an earlier optimization converts the expression into (icmp X s> -1).
989 /// Parameter P supports masking using undef/poison in either scalar or vector
991 static Value
*foldPowerOf2AndShiftedMask(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
993 InstCombiner::BuilderTy
&Builder
) {
996 Value
*A
= nullptr, *B
= nullptr, *C
= nullptr, *D
= nullptr, *E
= nullptr;
997 ICmpInst::Predicate CmpPred0
= Cmp0
->getPredicate(),
998 CmpPred1
= Cmp1
->getPredicate();
999 // Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u<
1000 // 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X &
1002 std::optional
<std::pair
<unsigned, unsigned>> MaskPair
=
1003 getMaskedTypeForICmpPair(A
, B
, C
, D
, E
, Cmp0
, Cmp1
, CmpPred0
, CmpPred1
);
1007 const auto compareBMask
= BMask_NotMixed
| BMask_NotAllOnes
;
1008 unsigned CmpMask0
= MaskPair
->first
;
1009 unsigned CmpMask1
= MaskPair
->second
;
1010 if ((CmpMask0
& Mask_AllZeros
) && (CmpMask1
== compareBMask
)) {
1011 if (Value
*V
= foldNegativePower2AndShiftedMask(A
, B
, D
, E
, CmpPred0
,
1014 } else if ((CmpMask0
== compareBMask
) && (CmpMask1
& Mask_AllZeros
)) {
1015 if (Value
*V
= foldNegativePower2AndShiftedMask(A
, D
, B
, C
, CmpPred1
,
1022 /// Commuted variants are assumed to be handled by calling this function again
1023 /// with the parameters swapped.
1024 static Value
*foldUnsignedUnderflowCheck(ICmpInst
*ZeroICmp
,
1025 ICmpInst
*UnsignedICmp
, bool IsAnd
,
1026 const SimplifyQuery
&Q
,
1027 InstCombiner::BuilderTy
&Builder
) {
1029 ICmpInst::Predicate EqPred
;
1030 if (!match(ZeroICmp
, m_ICmp(EqPred
, m_Value(ZeroCmpOp
), m_Zero())) ||
1031 !ICmpInst::isEquality(EqPred
))
1034 auto IsKnownNonZero
= [&](Value
*V
) {
1035 return isKnownNonZero(V
, Q
.DL
, /*Depth=*/0, Q
.AC
, Q
.CxtI
, Q
.DT
);
1038 ICmpInst::Predicate UnsignedPred
;
1041 if (match(UnsignedICmp
,
1042 m_c_ICmp(UnsignedPred
, m_Specific(ZeroCmpOp
), m_Value(A
))) &&
1043 match(ZeroCmpOp
, m_c_Add(m_Specific(A
), m_Value(B
))) &&
1044 (ZeroICmp
->hasOneUse() || UnsignedICmp
->hasOneUse())) {
1045 auto GetKnownNonZeroAndOther
= [&](Value
*&NonZero
, Value
*&Other
) {
1046 if (!IsKnownNonZero(NonZero
))
1047 std::swap(NonZero
, Other
);
1048 return IsKnownNonZero(NonZero
);
1051 // Given ZeroCmpOp = (A + B)
1052 // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff
1053 // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff
1054 // with X being the value (A/B) that is known to be non-zero,
1055 // and Y being remaining value.
1056 if (UnsignedPred
== ICmpInst::ICMP_ULT
&& EqPred
== ICmpInst::ICMP_NE
&&
1057 IsAnd
&& GetKnownNonZeroAndOther(B
, A
))
1058 return Builder
.CreateICmpULT(Builder
.CreateNeg(B
), A
);
1059 if (UnsignedPred
== ICmpInst::ICMP_UGE
&& EqPred
== ICmpInst::ICMP_EQ
&&
1060 !IsAnd
&& GetKnownNonZeroAndOther(B
, A
))
1061 return Builder
.CreateICmpUGE(Builder
.CreateNeg(B
), A
);
1073 /// Match an extraction of bits from an integer.
1074 static std::optional
<IntPart
> matchIntPart(Value
*V
) {
1076 if (!match(V
, m_OneUse(m_Trunc(m_Value(X
)))))
1077 return std::nullopt
;
1079 unsigned NumOriginalBits
= X
->getType()->getScalarSizeInBits();
1080 unsigned NumExtractedBits
= V
->getType()->getScalarSizeInBits();
1083 // For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits
1084 // from Y, not any shifted-in zeroes.
1085 if (match(X
, m_OneUse(m_LShr(m_Value(Y
), m_APInt(Shift
)))) &&
1086 Shift
->ule(NumOriginalBits
- NumExtractedBits
))
1087 return {{Y
, (unsigned)Shift
->getZExtValue(), NumExtractedBits
}};
1088 return {{X
, 0, NumExtractedBits
}};
1091 /// Materialize an extraction of bits from an integer in IR.
1092 static Value
*extractIntPart(const IntPart
&P
, IRBuilderBase
&Builder
) {
1095 V
= Builder
.CreateLShr(V
, P
.StartBit
);
1096 Type
*TruncTy
= V
->getType()->getWithNewBitWidth(P
.NumBits
);
1097 if (TruncTy
!= V
->getType())
1098 V
= Builder
.CreateTrunc(V
, TruncTy
);
1102 /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
1103 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
1104 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
1105 Value
*InstCombinerImpl::foldEqOfParts(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
1107 if (!Cmp0
->hasOneUse() || !Cmp1
->hasOneUse())
1110 CmpInst::Predicate Pred
= IsAnd
? CmpInst::ICMP_EQ
: CmpInst::ICMP_NE
;
1111 auto GetMatchPart
= [&](ICmpInst
*Cmp
,
1112 unsigned OpNo
) -> std::optional
<IntPart
> {
1113 if (Pred
== Cmp
->getPredicate())
1114 return matchIntPart(Cmp
->getOperand(OpNo
));
1117 // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to:
1118 // (icmp ult (xor x, y), 1 << C) so also look for that.
1119 if (Pred
== CmpInst::ICMP_EQ
&& Cmp
->getPredicate() == CmpInst::ICMP_ULT
) {
1120 if (!match(Cmp
->getOperand(1), m_Power2(C
)) ||
1121 !match(Cmp
->getOperand(0), m_Xor(m_Value(), m_Value())))
1122 return std::nullopt
;
1125 // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to:
1126 // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that.
1127 else if (Pred
== CmpInst::ICMP_NE
&&
1128 Cmp
->getPredicate() == CmpInst::ICMP_UGT
) {
1129 if (!match(Cmp
->getOperand(1), m_LowBitMask(C
)) ||
1130 !match(Cmp
->getOperand(0), m_Xor(m_Value(), m_Value())))
1131 return std::nullopt
;
1133 return std::nullopt
;
1136 unsigned From
= Pred
== CmpInst::ICMP_NE
? C
->popcount() : C
->countr_zero();
1137 Instruction
*I
= cast
<Instruction
>(Cmp
->getOperand(0));
1138 return {{I
->getOperand(OpNo
), From
, C
->getBitWidth() - From
}};
1141 std::optional
<IntPart
> L0
= GetMatchPart(Cmp0
, 0);
1142 std::optional
<IntPart
> R0
= GetMatchPart(Cmp0
, 1);
1143 std::optional
<IntPart
> L1
= GetMatchPart(Cmp1
, 0);
1144 std::optional
<IntPart
> R1
= GetMatchPart(Cmp1
, 1);
1145 if (!L0
|| !R0
|| !L1
|| !R1
)
1148 // Make sure the LHS/RHS compare a part of the same value, possibly after
1150 if (L0
->From
!= L1
->From
|| R0
->From
!= R1
->From
) {
1151 if (L0
->From
!= R1
->From
|| R0
->From
!= L1
->From
)
1156 // Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being
1157 // the low part and L1/R1 being the high part.
1158 if (L0
->StartBit
+ L0
->NumBits
!= L1
->StartBit
||
1159 R0
->StartBit
+ R0
->NumBits
!= R1
->StartBit
) {
1160 if (L1
->StartBit
+ L1
->NumBits
!= L0
->StartBit
||
1161 R1
->StartBit
+ R1
->NumBits
!= R0
->StartBit
)
1167 // We can simplify to a comparison of these larger parts of the integers.
1168 IntPart L
= {L0
->From
, L0
->StartBit
, L0
->NumBits
+ L1
->NumBits
};
1169 IntPart R
= {R0
->From
, R0
->StartBit
, R0
->NumBits
+ R1
->NumBits
};
1170 Value
*LValue
= extractIntPart(L
, Builder
);
1171 Value
*RValue
= extractIntPart(R
, Builder
);
1172 return Builder
.CreateICmp(Pred
, LValue
, RValue
);
1175 /// Reduce logic-of-compares with equality to a constant by substituting a
1176 /// common operand with the constant. Callers are expected to call this with
1177 /// Cmp0/Cmp1 switched to handle logic op commutativity.
1178 static Value
*foldAndOrOfICmpsWithConstEq(ICmpInst
*Cmp0
, ICmpInst
*Cmp1
,
1179 bool IsAnd
, bool IsLogical
,
1180 InstCombiner::BuilderTy
&Builder
,
1181 const SimplifyQuery
&Q
) {
1182 // Match an equality compare with a non-poison constant as Cmp0.
1183 // Also, give up if the compare can be constant-folded to avoid looping.
1184 ICmpInst::Predicate Pred0
;
1187 if (!match(Cmp0
, m_ICmp(Pred0
, m_Value(X
), m_Constant(C
))) ||
1188 !isGuaranteedNotToBeUndefOrPoison(C
) || isa
<Constant
>(X
))
1190 if ((IsAnd
&& Pred0
!= ICmpInst::ICMP_EQ
) ||
1191 (!IsAnd
&& Pred0
!= ICmpInst::ICMP_NE
))
1194 // The other compare must include a common operand (X). Canonicalize the
1195 // common operand as operand 1 (Pred1 is swapped if the common operand was
1198 ICmpInst::Predicate Pred1
;
1199 if (!match(Cmp1
, m_c_ICmp(Pred1
, m_Value(Y
), m_Deferred(X
))))
1202 // Replace variable with constant value equivalence to remove a variable use:
1203 // (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C)
1204 // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C)
1205 // Can think of the 'or' substitution with the 'and' bool equivalent:
1206 // A || B --> A || (!A && B)
1207 Value
*SubstituteCmp
= simplifyICmpInst(Pred1
, Y
, C
, Q
);
1208 if (!SubstituteCmp
) {
1209 // If we need to create a new instruction, require that the old compare can
1211 if (!Cmp1
->hasOneUse())
1213 SubstituteCmp
= Builder
.CreateICmp(Pred1
, Y
, C
);
1216 return IsAnd
? Builder
.CreateLogicalAnd(Cmp0
, SubstituteCmp
)
1217 : Builder
.CreateLogicalOr(Cmp0
, SubstituteCmp
);
1218 return Builder
.CreateBinOp(IsAnd
? Instruction::And
: Instruction::Or
, Cmp0
,
1222 /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2)
1223 /// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2)
1224 /// into a single comparison using range-based reasoning.
1225 /// NOTE: This is also used for logical and/or, must be poison-safe!
1226 Value
*InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst
*ICmp1
,
1229 ICmpInst::Predicate Pred1
, Pred2
;
1231 const APInt
*C1
, *C2
;
1232 if (!match(ICmp1
, m_ICmp(Pred1
, m_Value(V1
), m_APInt(C1
))) ||
1233 !match(ICmp2
, m_ICmp(Pred2
, m_Value(V2
), m_APInt(C2
))))
1236 // Look through add of a constant offset on V1, V2, or both operands. This
1237 // allows us to interpret the V + C' < C'' range idiom into a proper range.
1238 const APInt
*Offset1
= nullptr, *Offset2
= nullptr;
1241 if (match(V1
, m_Add(m_Value(X
), m_APInt(Offset1
))))
1243 if (match(V2
, m_Add(m_Value(X
), m_APInt(Offset2
))))
1250 ConstantRange CR1
= ConstantRange::makeExactICmpRegion(
1251 IsAnd
? ICmpInst::getInversePredicate(Pred1
) : Pred1
, *C1
);
1253 CR1
= CR1
.subtract(*Offset1
);
1255 ConstantRange CR2
= ConstantRange::makeExactICmpRegion(
1256 IsAnd
? ICmpInst::getInversePredicate(Pred2
) : Pred2
, *C2
);
1258 CR2
= CR2
.subtract(*Offset2
);
1260 Type
*Ty
= V1
->getType();
1262 std::optional
<ConstantRange
> CR
= CR1
.exactUnionWith(CR2
);
1264 if (!(ICmp1
->hasOneUse() && ICmp2
->hasOneUse()) || CR1
.isWrappedSet() ||
1268 // Check whether we have equal-size ranges that only differ by one bit.
1269 // In that case we can apply a mask to map one range onto the other.
1270 APInt LowerDiff
= CR1
.getLower() ^ CR2
.getLower();
1271 APInt UpperDiff
= (CR1
.getUpper() - 1) ^ (CR2
.getUpper() - 1);
1272 APInt CR1Size
= CR1
.getUpper() - CR1
.getLower();
1273 if (!LowerDiff
.isPowerOf2() || LowerDiff
!= UpperDiff
||
1274 CR1Size
!= CR2
.getUpper() - CR2
.getLower())
1277 CR
= CR1
.getLower().ult(CR2
.getLower()) ? CR1
: CR2
;
1278 NewV
= Builder
.CreateAnd(NewV
, ConstantInt::get(Ty
, ~LowerDiff
));
1284 CmpInst::Predicate NewPred
;
1286 CR
->getEquivalentICmp(NewPred
, NewC
, Offset
);
1289 NewV
= Builder
.CreateAdd(NewV
, ConstantInt::get(Ty
, Offset
));
1290 return Builder
.CreateICmp(NewPred
, NewV
, ConstantInt::get(Ty
, NewC
));
1293 /// Ignore all operations which only change the sign of a value, returning the
1294 /// underlying magnitude value.
1295 static Value
*stripSignOnlyFPOps(Value
*Val
) {
1296 match(Val
, m_FNeg(m_Value(Val
)));
1297 match(Val
, m_FAbs(m_Value(Val
)));
1298 match(Val
, m_CopySign(m_Value(Val
), m_Value()));
1302 /// Matches canonical form of isnan, fcmp ord x, 0
1303 static bool matchIsNotNaN(FCmpInst::Predicate P
, Value
*LHS
, Value
*RHS
) {
1304 return P
== FCmpInst::FCMP_ORD
&& match(RHS
, m_AnyZeroFP());
1307 /// Matches fcmp u__ x, +/-inf
1308 static bool matchUnorderedInfCompare(FCmpInst::Predicate P
, Value
*LHS
,
1310 return FCmpInst::isUnordered(P
) && match(RHS
, m_Inf());
1313 /// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf
1315 /// Clang emits this pattern for doing an isfinite check in __builtin_isnormal.
1316 static Value
*matchIsFiniteTest(InstCombiner::BuilderTy
&Builder
, FCmpInst
*LHS
,
1318 Value
*LHS0
= LHS
->getOperand(0), *LHS1
= LHS
->getOperand(1);
1319 Value
*RHS0
= RHS
->getOperand(0), *RHS1
= RHS
->getOperand(1);
1320 FCmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
1322 if (!matchIsNotNaN(PredL
, LHS0
, LHS1
) ||
1323 !matchUnorderedInfCompare(PredR
, RHS0
, RHS1
))
1326 IRBuilder
<>::FastMathFlagGuard
FMFG(Builder
);
1327 FastMathFlags FMF
= LHS
->getFastMathFlags();
1328 FMF
&= RHS
->getFastMathFlags();
1329 Builder
.setFastMathFlags(FMF
);
1331 return Builder
.CreateFCmp(FCmpInst::getOrderedPredicate(PredR
), RHS0
, RHS1
);
1334 Value
*InstCombinerImpl::foldLogicOfFCmps(FCmpInst
*LHS
, FCmpInst
*RHS
,
1335 bool IsAnd
, bool IsLogicalSelect
) {
1336 Value
*LHS0
= LHS
->getOperand(0), *LHS1
= LHS
->getOperand(1);
1337 Value
*RHS0
= RHS
->getOperand(0), *RHS1
= RHS
->getOperand(1);
1338 FCmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
1340 if (LHS0
== RHS1
&& RHS0
== LHS1
) {
1341 // Swap RHS operands to match LHS.
1342 PredR
= FCmpInst::getSwappedPredicate(PredR
);
1343 std::swap(RHS0
, RHS1
);
1346 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
1347 // Suppose the relation between x and y is R, where R is one of
1348 // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for
1349 // testing the desired relations.
1351 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
1352 // bool(R & CC0) && bool(R & CC1)
1353 // = bool((R & CC0) & (R & CC1))
1354 // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency
1356 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
1357 // bool(R & CC0) || bool(R & CC1)
1358 // = bool((R & CC0) | (R & CC1))
1359 // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
1360 if (LHS0
== RHS0
&& LHS1
== RHS1
) {
1361 unsigned FCmpCodeL
= getFCmpCode(PredL
);
1362 unsigned FCmpCodeR
= getFCmpCode(PredR
);
1363 unsigned NewPred
= IsAnd
? FCmpCodeL
& FCmpCodeR
: FCmpCodeL
| FCmpCodeR
;
1365 // Intersect the fast math flags.
1366 // TODO: We can union the fast math flags unless this is a logical select.
1367 IRBuilder
<>::FastMathFlagGuard
FMFG(Builder
);
1368 FastMathFlags FMF
= LHS
->getFastMathFlags();
1369 FMF
&= RHS
->getFastMathFlags();
1370 Builder
.setFastMathFlags(FMF
);
1372 return getFCmpValue(NewPred
, LHS0
, LHS1
, Builder
);
1375 // This transform is not valid for a logical select.
1376 if (!IsLogicalSelect
&&
1377 ((PredL
== FCmpInst::FCMP_ORD
&& PredR
== FCmpInst::FCMP_ORD
&& IsAnd
) ||
1378 (PredL
== FCmpInst::FCMP_UNO
&& PredR
== FCmpInst::FCMP_UNO
&&
1380 if (LHS0
->getType() != RHS0
->getType())
1383 // FCmp canonicalization ensures that (fcmp ord/uno X, X) and
1384 // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).
1385 if (match(LHS1
, m_PosZeroFP()) && match(RHS1
, m_PosZeroFP()))
1386 // Ignore the constants because they are obviously not NANs:
1387 // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)
1388 // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)
1389 return Builder
.CreateFCmp(PredL
, LHS0
, RHS0
);
1392 if (IsAnd
&& stripSignOnlyFPOps(LHS0
) == stripSignOnlyFPOps(RHS0
)) {
1393 // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf
1394 // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf
1395 if (Value
*Left
= matchIsFiniteTest(Builder
, LHS
, RHS
))
1397 if (Value
*Right
= matchIsFiniteTest(Builder
, RHS
, LHS
))
1401 // Turn at least two fcmps with constants into llvm.is.fpclass.
1403 // If we can represent a combined value test with one class call, we can
1404 // potentially eliminate 4-6 instructions. If we can represent a test with a
1405 // single fcmp with fneg and fabs, that's likely a better canonical form.
1406 if (LHS
->hasOneUse() && RHS
->hasOneUse()) {
1407 auto [ClassValRHS
, ClassMaskRHS
] =
1408 fcmpToClassTest(PredR
, *RHS
->getFunction(), RHS0
, RHS1
);
1410 auto [ClassValLHS
, ClassMaskLHS
] =
1411 fcmpToClassTest(PredL
, *LHS
->getFunction(), LHS0
, LHS1
);
1412 if (ClassValLHS
== ClassValRHS
) {
1413 unsigned CombinedMask
= IsAnd
? (ClassMaskLHS
& ClassMaskRHS
)
1414 : (ClassMaskLHS
| ClassMaskRHS
);
1415 return Builder
.CreateIntrinsic(
1416 Intrinsic::is_fpclass
, {ClassValLHS
->getType()},
1417 {ClassValLHS
, Builder
.getInt32(CombinedMask
)});
1425 /// Match an fcmp against a special value that performs a test possible by
1426 /// llvm.is.fpclass.
1427 static bool matchIsFPClassLikeFCmp(Value
*Op
, Value
*&ClassVal
,
1428 uint64_t &ClassMask
) {
1429 auto *FCmp
= dyn_cast
<FCmpInst
>(Op
);
1430 if (!FCmp
|| !FCmp
->hasOneUse())
1433 std::tie(ClassVal
, ClassMask
) =
1434 fcmpToClassTest(FCmp
->getPredicate(), *FCmp
->getParent()->getParent(),
1435 FCmp
->getOperand(0), FCmp
->getOperand(1));
1436 return ClassVal
!= nullptr;
1439 /// or (is_fpclass x, mask0), (is_fpclass x, mask1)
1440 /// -> is_fpclass x, (mask0 | mask1)
1441 /// and (is_fpclass x, mask0), (is_fpclass x, mask1)
1442 /// -> is_fpclass x, (mask0 & mask1)
1443 /// xor (is_fpclass x, mask0), (is_fpclass x, mask1)
1444 /// -> is_fpclass x, (mask0 ^ mask1)
1445 Instruction
*InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator
&BO
,
1446 Value
*Op0
, Value
*Op1
) {
1447 Value
*ClassVal0
= nullptr;
1448 Value
*ClassVal1
= nullptr;
1449 uint64_t ClassMask0
, ClassMask1
;
1451 // Restrict to folding one fcmp into one is.fpclass for now, don't introduce a
1454 // TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is
1458 match(Op0
, m_OneUse(m_Intrinsic
<Intrinsic::is_fpclass
>(
1459 m_Value(ClassVal0
), m_ConstantInt(ClassMask0
))));
1461 match(Op1
, m_OneUse(m_Intrinsic
<Intrinsic::is_fpclass
>(
1462 m_Value(ClassVal1
), m_ConstantInt(ClassMask1
))));
1463 if ((((IsLHSClass
|| matchIsFPClassLikeFCmp(Op0
, ClassVal0
, ClassMask0
)) &&
1464 (IsRHSClass
|| matchIsFPClassLikeFCmp(Op1
, ClassVal1
, ClassMask1
)))) &&
1465 ClassVal0
== ClassVal1
) {
1466 unsigned NewClassMask
;
1467 switch (BO
.getOpcode()) {
1468 case Instruction::And
:
1469 NewClassMask
= ClassMask0
& ClassMask1
;
1471 case Instruction::Or
:
1472 NewClassMask
= ClassMask0
| ClassMask1
;
1474 case Instruction::Xor
:
1475 NewClassMask
= ClassMask0
^ ClassMask1
;
1478 llvm_unreachable("not a binary logic operator");
1482 auto *II
= cast
<IntrinsicInst
>(Op0
);
1484 1, ConstantInt::get(II
->getArgOperand(1)->getType(), NewClassMask
));
1485 return replaceInstUsesWith(BO
, II
);
1489 auto *II
= cast
<IntrinsicInst
>(Op1
);
1491 1, ConstantInt::get(II
->getArgOperand(1)->getType(), NewClassMask
));
1492 return replaceInstUsesWith(BO
, II
);
1495 CallInst
*NewClass
=
1496 Builder
.CreateIntrinsic(Intrinsic::is_fpclass
, {ClassVal0
->getType()},
1497 {ClassVal0
, Builder
.getInt32(NewClassMask
)});
1498 return replaceInstUsesWith(BO
, NewClass
);
1504 /// Look for the pattern that conditionally negates a value via math operations:
1505 /// cond.splat = sext i1 cond
1506 /// sub = add cond.splat, x
1507 /// xor = xor sub, cond.splat
1508 /// and rewrite it to do the same, but via logical operations:
1509 /// value.neg = sub 0, value
1510 /// cond = select i1 neg, value.neg, value
1511 Instruction
*InstCombinerImpl::canonicalizeConditionalNegationViaMathToSelect(
1512 BinaryOperator
&I
) {
1513 assert(I
.getOpcode() == BinaryOperator::Xor
&& "Only for xor!");
1515 // As per complexity ordering, `xor` is not commutative here.
1516 if (!match(&I
, m_c_BinOp(m_OneUse(m_Value()), m_Value())) ||
1517 !match(I
.getOperand(1), m_SExt(m_Value(Cond
))) ||
1518 !Cond
->getType()->isIntOrIntVectorTy(1) ||
1519 !match(I
.getOperand(0), m_c_Add(m_SExt(m_Deferred(Cond
)), m_Value(X
))))
1521 return SelectInst::Create(Cond
, Builder
.CreateNeg(X
, X
->getName() + ".neg"),
1525 /// This a limited reassociation for a special case (see above) where we are
1526 /// checking if two values are either both NAN (unordered) or not-NAN (ordered).
1527 /// This could be handled more generally in '-reassociation', but it seems like
1528 /// an unlikely pattern for a large number of logic ops and fcmps.
1529 static Instruction
*reassociateFCmps(BinaryOperator
&BO
,
1530 InstCombiner::BuilderTy
&Builder
) {
1531 Instruction::BinaryOps Opcode
= BO
.getOpcode();
1532 assert((Opcode
== Instruction::And
|| Opcode
== Instruction::Or
) &&
1533 "Expecting and/or op for fcmp transform");
1535 // There are 4 commuted variants of the pattern. Canonicalize operands of this
1536 // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
1537 Value
*Op0
= BO
.getOperand(0), *Op1
= BO
.getOperand(1), *X
;
1538 FCmpInst::Predicate Pred
;
1539 if (match(Op1
, m_FCmp(Pred
, m_Value(), m_AnyZeroFP())))
1540 std::swap(Op0
, Op1
);
1542 // Match inner binop and the predicate for combining 2 NAN checks into 1.
1544 FCmpInst::Predicate NanPred
= Opcode
== Instruction::And
? FCmpInst::FCMP_ORD
1545 : FCmpInst::FCMP_UNO
;
1546 if (!match(Op0
, m_FCmp(Pred
, m_Value(X
), m_AnyZeroFP())) || Pred
!= NanPred
||
1547 !match(Op1
, m_BinOp(Opcode
, m_Value(BO10
), m_Value(BO11
))))
1550 // The inner logic op must have a matching fcmp operand.
1552 if (!match(BO10
, m_FCmp(Pred
, m_Value(Y
), m_AnyZeroFP())) ||
1553 Pred
!= NanPred
|| X
->getType() != Y
->getType())
1554 std::swap(BO10
, BO11
);
1556 if (!match(BO10
, m_FCmp(Pred
, m_Value(Y
), m_AnyZeroFP())) ||
1557 Pred
!= NanPred
|| X
->getType() != Y
->getType())
1560 // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
1561 // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
1562 Value
*NewFCmp
= Builder
.CreateFCmp(Pred
, X
, Y
);
1563 if (auto *NewFCmpInst
= dyn_cast
<FCmpInst
>(NewFCmp
)) {
1564 // Intersect FMF from the 2 source fcmps.
1565 NewFCmpInst
->copyIRFlags(Op0
);
1566 NewFCmpInst
->andIRFlags(BO10
);
1568 return BinaryOperator::Create(Opcode
, NewFCmp
, BO11
);
1571 /// Match variations of De Morgan's Laws:
1572 /// (~A & ~B) == (~(A | B))
1573 /// (~A | ~B) == (~(A & B))
1574 static Instruction
*matchDeMorgansLaws(BinaryOperator
&I
,
1576 const Instruction::BinaryOps Opcode
= I
.getOpcode();
1577 assert((Opcode
== Instruction::And
|| Opcode
== Instruction::Or
) &&
1578 "Trying to match De Morgan's Laws with something other than and/or");
1580 // Flip the logic operation.
1581 const Instruction::BinaryOps FlippedOpcode
=
1582 (Opcode
== Instruction::And
) ? Instruction::Or
: Instruction::And
;
1584 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1586 if (match(Op0
, m_OneUse(m_Not(m_Value(A
)))) &&
1587 match(Op1
, m_OneUse(m_Not(m_Value(B
)))) &&
1588 !IC
.isFreeToInvert(A
, A
->hasOneUse()) &&
1589 !IC
.isFreeToInvert(B
, B
->hasOneUse())) {
1591 IC
.Builder
.CreateBinOp(FlippedOpcode
, A
, B
, I
.getName() + ".demorgan");
1592 return BinaryOperator::CreateNot(AndOr
);
1595 // The 'not' ops may require reassociation.
1596 // (A & ~B) & ~C --> A & ~(B | C)
1597 // (~B & A) & ~C --> A & ~(B | C)
1598 // (A | ~B) | ~C --> A | ~(B & C)
1599 // (~B | A) | ~C --> A | ~(B & C)
1601 if (match(Op0
, m_OneUse(m_c_BinOp(Opcode
, m_Value(A
), m_Not(m_Value(B
))))) &&
1602 match(Op1
, m_Not(m_Value(C
)))) {
1603 Value
*FlippedBO
= IC
.Builder
.CreateBinOp(FlippedOpcode
, B
, C
);
1604 return BinaryOperator::Create(Opcode
, A
, IC
.Builder
.CreateNot(FlippedBO
));
1610 bool InstCombinerImpl::shouldOptimizeCast(CastInst
*CI
) {
1611 Value
*CastSrc
= CI
->getOperand(0);
1613 // Noop casts and casts of constants should be eliminated trivially.
1614 if (CI
->getSrcTy() == CI
->getDestTy() || isa
<Constant
>(CastSrc
))
1617 // If this cast is paired with another cast that can be eliminated, we prefer
1618 // to have it eliminated.
1619 if (const auto *PrecedingCI
= dyn_cast
<CastInst
>(CastSrc
))
1620 if (isEliminableCastPair(PrecedingCI
, CI
))
1626 /// Fold {and,or,xor} (cast X), C.
1627 static Instruction
*foldLogicCastConstant(BinaryOperator
&Logic
, CastInst
*Cast
,
1628 InstCombinerImpl
&IC
) {
1629 Constant
*C
= dyn_cast
<Constant
>(Logic
.getOperand(1));
1633 auto LogicOpc
= Logic
.getOpcode();
1634 Type
*DestTy
= Logic
.getType();
1635 Type
*SrcTy
= Cast
->getSrcTy();
1637 // Move the logic operation ahead of a zext or sext if the constant is
1638 // unchanged in the smaller source type. Performing the logic in a smaller
1639 // type may provide more information to later folds, and the smaller logic
1640 // instruction may be cheaper (particularly in the case of vectors).
1642 if (match(Cast
, m_OneUse(m_ZExt(m_Value(X
))))) {
1643 if (Constant
*TruncC
= IC
.getLosslessUnsignedTrunc(C
, SrcTy
)) {
1644 // LogicOpc (zext X), C --> zext (LogicOpc X, C)
1645 Value
*NewOp
= IC
.Builder
.CreateBinOp(LogicOpc
, X
, TruncC
);
1646 return new ZExtInst(NewOp
, DestTy
);
1650 if (match(Cast
, m_OneUse(m_SExt(m_Value(X
))))) {
1651 if (Constant
*TruncC
= IC
.getLosslessSignedTrunc(C
, SrcTy
)) {
1652 // LogicOpc (sext X), C --> sext (LogicOpc X, C)
1653 Value
*NewOp
= IC
.Builder
.CreateBinOp(LogicOpc
, X
, TruncC
);
1654 return new SExtInst(NewOp
, DestTy
);
1661 /// Fold {and,or,xor} (cast X), Y.
1662 Instruction
*InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator
&I
) {
1663 auto LogicOpc
= I
.getOpcode();
1664 assert(I
.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
1666 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1668 // fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the
1670 // -> bitwise(zext(A < 0), zext(icmp))
1671 // -> zext(bitwise(A < 0, icmp))
1672 auto FoldBitwiseICmpZeroWithICmp
= [&](Value
*Op0
,
1673 Value
*Op1
) -> Instruction
* {
1674 ICmpInst::Predicate Pred
;
1680 m_SpecificInt(Op0
->getType()->getScalarSizeInBits() - 1)))) &&
1681 match(Op1
, m_OneUse(m_ZExt(m_ICmp(Pred
, m_Value(), m_Value()))));
1687 Builder
.CreateICmpSLT(A
, Constant::getNullValue(A
->getType()));
1688 auto *ICmpR
= cast
<ZExtInst
>(Op1
)->getOperand(0);
1689 auto *BitwiseOp
= Builder
.CreateBinOp(LogicOpc
, ICmpL
, ICmpR
);
1691 return new ZExtInst(BitwiseOp
, Op0
->getType());
1694 if (auto *Ret
= FoldBitwiseICmpZeroWithICmp(Op0
, Op1
))
1697 if (auto *Ret
= FoldBitwiseICmpZeroWithICmp(Op1
, Op0
))
1700 CastInst
*Cast0
= dyn_cast
<CastInst
>(Op0
);
1704 // This must be a cast from an integer or integer vector source type to allow
1705 // transformation of the logic operation to the source type.
1706 Type
*DestTy
= I
.getType();
1707 Type
*SrcTy
= Cast0
->getSrcTy();
1708 if (!SrcTy
->isIntOrIntVectorTy())
1711 if (Instruction
*Ret
= foldLogicCastConstant(I
, Cast0
, *this))
1714 CastInst
*Cast1
= dyn_cast
<CastInst
>(Op1
);
1718 // Both operands of the logic operation are casts. The casts must be the
1719 // same kind for reduction.
1720 Instruction::CastOps CastOpcode
= Cast0
->getOpcode();
1721 if (CastOpcode
!= Cast1
->getOpcode())
1724 // If the source types do not match, but the casts are matching extends, we
1725 // can still narrow the logic op.
1726 if (SrcTy
!= Cast1
->getSrcTy()) {
1728 if (match(Cast0
, m_OneUse(m_ZExtOrSExt(m_Value(X
)))) &&
1729 match(Cast1
, m_OneUse(m_ZExtOrSExt(m_Value(Y
))))) {
1730 // Cast the narrower source to the wider source type.
1731 unsigned XNumBits
= X
->getType()->getScalarSizeInBits();
1732 unsigned YNumBits
= Y
->getType()->getScalarSizeInBits();
1733 if (XNumBits
< YNumBits
)
1734 X
= Builder
.CreateCast(CastOpcode
, X
, Y
->getType());
1736 Y
= Builder
.CreateCast(CastOpcode
, Y
, X
->getType());
1737 // Do the logic op in the intermediate width, then widen more.
1738 Value
*NarrowLogic
= Builder
.CreateBinOp(LogicOpc
, X
, Y
);
1739 return CastInst::Create(CastOpcode
, NarrowLogic
, DestTy
);
1742 // Give up for other cast opcodes.
1746 Value
*Cast0Src
= Cast0
->getOperand(0);
1747 Value
*Cast1Src
= Cast1
->getOperand(0);
1749 // fold logic(cast(A), cast(B)) -> cast(logic(A, B))
1750 if ((Cast0
->hasOneUse() || Cast1
->hasOneUse()) &&
1751 shouldOptimizeCast(Cast0
) && shouldOptimizeCast(Cast1
)) {
1752 Value
*NewOp
= Builder
.CreateBinOp(LogicOpc
, Cast0Src
, Cast1Src
,
1754 return CastInst::Create(CastOpcode
, NewOp
, DestTy
);
1760 static Instruction
*foldAndToXor(BinaryOperator
&I
,
1761 InstCombiner::BuilderTy
&Builder
) {
1762 assert(I
.getOpcode() == Instruction::And
);
1763 Value
*Op0
= I
.getOperand(0);
1764 Value
*Op1
= I
.getOperand(1);
1767 // Operand complexity canonicalization guarantees that the 'or' is Op0.
1768 // (A | B) & ~(A & B) --> A ^ B
1769 // (A | B) & ~(B & A) --> A ^ B
1770 if (match(&I
, m_BinOp(m_Or(m_Value(A
), m_Value(B
)),
1771 m_Not(m_c_And(m_Deferred(A
), m_Deferred(B
))))))
1772 return BinaryOperator::CreateXor(A
, B
);
1774 // (A | ~B) & (~A | B) --> ~(A ^ B)
1775 // (A | ~B) & (B | ~A) --> ~(A ^ B)
1776 // (~B | A) & (~A | B) --> ~(A ^ B)
1777 // (~B | A) & (B | ~A) --> ~(A ^ B)
1778 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1779 if (match(&I
, m_BinOp(m_c_Or(m_Value(A
), m_Not(m_Value(B
))),
1780 m_c_Or(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
1781 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
1786 static Instruction
*foldOrToXor(BinaryOperator
&I
,
1787 InstCombiner::BuilderTy
&Builder
) {
1788 assert(I
.getOpcode() == Instruction::Or
);
1789 Value
*Op0
= I
.getOperand(0);
1790 Value
*Op1
= I
.getOperand(1);
1793 // Operand complexity canonicalization guarantees that the 'and' is Op0.
1794 // (A & B) | ~(A | B) --> ~(A ^ B)
1795 // (A & B) | ~(B | A) --> ~(A ^ B)
1796 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1797 if (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
1798 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))))
1799 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
1801 // Operand complexity canonicalization guarantees that the 'xor' is Op0.
1802 // (A ^ B) | ~(A | B) --> ~(A & B)
1803 // (A ^ B) | ~(B | A) --> ~(A & B)
1804 if (Op0
->hasOneUse() || Op1
->hasOneUse())
1805 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
1806 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))))
1807 return BinaryOperator::CreateNot(Builder
.CreateAnd(A
, B
));
1809 // (A & ~B) | (~A & B) --> A ^ B
1810 // (A & ~B) | (B & ~A) --> A ^ B
1811 // (~B & A) | (~A & B) --> A ^ B
1812 // (~B & A) | (B & ~A) --> A ^ B
1813 if (match(Op0
, m_c_And(m_Value(A
), m_Not(m_Value(B
)))) &&
1814 match(Op1
, m_c_And(m_Not(m_Specific(A
)), m_Specific(B
))))
1815 return BinaryOperator::CreateXor(A
, B
);
1820 /// Return true if a constant shift amount is always less than the specified
1821 /// bit-width. If not, the shift could create poison in the narrower type.
1822 static bool canNarrowShiftAmt(Constant
*C
, unsigned BitWidth
) {
1823 APInt
Threshold(C
->getType()->getScalarSizeInBits(), BitWidth
);
1824 return match(C
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
));
1827 /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
1828 /// a common zext operand: and (binop (zext X), C), (zext X).
1829 Instruction
*InstCombinerImpl::narrowMaskedBinOp(BinaryOperator
&And
) {
1830 // This transform could also apply to {or, and, xor}, but there are better
1831 // folds for those cases, so we don't expect those patterns here. AShr is not
1832 // handled because it should always be transformed to LShr in this sequence.
1833 // The subtract transform is different because it has a constant on the left.
1834 // Add/mul commute the constant to RHS; sub with constant RHS becomes add.
1835 Value
*Op0
= And
.getOperand(0), *Op1
= And
.getOperand(1);
1837 if (!match(Op0
, m_OneUse(m_Add(m_Specific(Op1
), m_Constant(C
)))) &&
1838 !match(Op0
, m_OneUse(m_Mul(m_Specific(Op1
), m_Constant(C
)))) &&
1839 !match(Op0
, m_OneUse(m_LShr(m_Specific(Op1
), m_Constant(C
)))) &&
1840 !match(Op0
, m_OneUse(m_Shl(m_Specific(Op1
), m_Constant(C
)))) &&
1841 !match(Op0
, m_OneUse(m_Sub(m_Constant(C
), m_Specific(Op1
)))))
1845 if (!match(Op1
, m_ZExt(m_Value(X
))) || Op1
->hasNUsesOrMore(3))
1848 Type
*Ty
= And
.getType();
1849 if (!isa
<VectorType
>(Ty
) && !shouldChangeType(Ty
, X
->getType()))
1852 // If we're narrowing a shift, the shift amount must be safe (less than the
1853 // width) in the narrower type. If the shift amount is greater, instsimplify
1854 // usually handles that case, but we can't guarantee/assert it.
1855 Instruction::BinaryOps Opc
= cast
<BinaryOperator
>(Op0
)->getOpcode();
1856 if (Opc
== Instruction::LShr
|| Opc
== Instruction::Shl
)
1857 if (!canNarrowShiftAmt(C
, X
->getType()->getScalarSizeInBits()))
1860 // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)
1861 // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)
1862 Value
*NewC
= ConstantExpr::getTrunc(C
, X
->getType());
1863 Value
*NewBO
= Opc
== Instruction::Sub
? Builder
.CreateBinOp(Opc
, NewC
, X
)
1864 : Builder
.CreateBinOp(Opc
, X
, NewC
);
1865 return new ZExtInst(Builder
.CreateAnd(NewBO
, X
), Ty
);
1868 /// Try folding relatively complex patterns for both And and Or operations
1869 /// with all And and Or swapped.
1870 static Instruction
*foldComplexAndOrPatterns(BinaryOperator
&I
,
1871 InstCombiner::BuilderTy
&Builder
) {
1872 const Instruction::BinaryOps Opcode
= I
.getOpcode();
1873 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1875 // Flip the logic operation.
1876 const Instruction::BinaryOps FlippedOpcode
=
1877 (Opcode
== Instruction::And
) ? Instruction::Or
: Instruction::And
;
1879 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1880 Value
*A
, *B
, *C
, *X
, *Y
, *Dummy
;
1882 // Match following expressions:
1885 // Captures X = ~(A | B) or ~(A & B)
1886 const auto matchNotOrAnd
=
1887 [Opcode
, FlippedOpcode
](Value
*Op
, auto m_A
, auto m_B
, auto m_C
,
1888 Value
*&X
, bool CountUses
= false) -> bool {
1889 if (CountUses
&& !Op
->hasOneUse())
1892 if (match(Op
, m_c_BinOp(FlippedOpcode
,
1893 m_CombineAnd(m_Value(X
),
1894 m_Not(m_c_BinOp(Opcode
, m_A
, m_B
))),
1896 return !CountUses
|| X
->hasOneUse();
1901 // (~(A | B) & C) | ... --> ...
1902 // (~(A & B) | C) & ... --> ...
1903 // TODO: One use checks are conservative. We just need to check that a total
1904 // number of multiple used values does not exceed reduction
1906 if (matchNotOrAnd(Op0
, m_Value(A
), m_Value(B
), m_Value(C
), X
)) {
1907 // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A
1908 // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)
1909 if (matchNotOrAnd(Op1
, m_Specific(A
), m_Specific(C
), m_Specific(B
), Dummy
,
1911 Value
*Xor
= Builder
.CreateXor(B
, C
);
1912 return (Opcode
== Instruction::Or
)
1913 ? BinaryOperator::CreateAnd(Xor
, Builder
.CreateNot(A
))
1914 : BinaryOperator::CreateNot(Builder
.CreateAnd(Xor
, A
));
1917 // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B
1918 // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B)
1919 if (matchNotOrAnd(Op1
, m_Specific(B
), m_Specific(C
), m_Specific(A
), Dummy
,
1921 Value
*Xor
= Builder
.CreateXor(A
, C
);
1922 return (Opcode
== Instruction::Or
)
1923 ? BinaryOperator::CreateAnd(Xor
, Builder
.CreateNot(B
))
1924 : BinaryOperator::CreateNot(Builder
.CreateAnd(Xor
, B
));
1927 // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A)
1928 // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A)
1929 if (match(Op1
, m_OneUse(m_Not(m_OneUse(
1930 m_c_BinOp(Opcode
, m_Specific(A
), m_Specific(C
)))))))
1931 return BinaryOperator::CreateNot(Builder
.CreateBinOp(
1932 Opcode
, Builder
.CreateBinOp(FlippedOpcode
, B
, C
), A
));
1934 // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B)
1935 // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B)
1936 if (match(Op1
, m_OneUse(m_Not(m_OneUse(
1937 m_c_BinOp(Opcode
, m_Specific(B
), m_Specific(C
)))))))
1938 return BinaryOperator::CreateNot(Builder
.CreateBinOp(
1939 Opcode
, Builder
.CreateBinOp(FlippedOpcode
, A
, C
), B
));
1941 // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B)))
1942 // Note, the pattern with swapped and/or is not handled because the
1943 // result is more undefined than a source:
1944 // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid.
1945 if (Opcode
== Instruction::Or
&& Op0
->hasOneUse() &&
1946 match(Op1
, m_OneUse(m_Not(m_CombineAnd(
1948 m_c_BinOp(Opcode
, m_Specific(C
),
1949 m_c_Xor(m_Specific(A
), m_Specific(B
)))))))) {
1952 Value
*Or
= cast
<BinaryOperator
>(X
)->getOperand(0);
1953 return BinaryOperator::CreateNot(Builder
.CreateAnd(Or
, Y
));
1957 // (~A & B & C) | ... --> ...
1958 // (~A | B | C) | ... --> ...
1959 // TODO: One use checks are conservative. We just need to check that a total
1960 // number of multiple used values does not exceed reduction
1963 m_OneUse(m_c_BinOp(FlippedOpcode
,
1964 m_BinOp(FlippedOpcode
, m_Value(B
), m_Value(C
)),
1965 m_CombineAnd(m_Value(X
), m_Not(m_Value(A
)))))) ||
1966 match(Op0
, m_OneUse(m_c_BinOp(
1968 m_c_BinOp(FlippedOpcode
, m_Value(C
),
1969 m_CombineAnd(m_Value(X
), m_Not(m_Value(A
)))),
1972 // (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C))
1973 // (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C))
1974 if (match(Op1
, m_OneUse(m_Not(m_c_BinOp(
1975 Opcode
, m_c_BinOp(Opcode
, m_Specific(A
), m_Specific(B
)),
1976 m_Specific(C
))))) ||
1977 match(Op1
, m_OneUse(m_Not(m_c_BinOp(
1978 Opcode
, m_c_BinOp(Opcode
, m_Specific(B
), m_Specific(C
)),
1979 m_Specific(A
))))) ||
1980 match(Op1
, m_OneUse(m_Not(m_c_BinOp(
1981 Opcode
, m_c_BinOp(Opcode
, m_Specific(A
), m_Specific(C
)),
1982 m_Specific(B
)))))) {
1983 Value
*Xor
= Builder
.CreateXor(B
, C
);
1984 return (Opcode
== Instruction::Or
)
1985 ? BinaryOperator::CreateNot(Builder
.CreateOr(Xor
, A
))
1986 : BinaryOperator::CreateOr(Xor
, X
);
1989 // (~A & B & C) | ~(A | B) --> (C | ~B) & ~A
1990 // (~A | B | C) & ~(A & B) --> (C & ~B) | ~A
1991 if (match(Op1
, m_OneUse(m_Not(m_OneUse(
1992 m_c_BinOp(Opcode
, m_Specific(A
), m_Specific(B
)))))))
1993 return BinaryOperator::Create(
1994 FlippedOpcode
, Builder
.CreateBinOp(Opcode
, C
, Builder
.CreateNot(B
)),
1997 // (~A & B & C) | ~(A | C) --> (B | ~C) & ~A
1998 // (~A | B | C) & ~(A & C) --> (B & ~C) | ~A
1999 if (match(Op1
, m_OneUse(m_Not(m_OneUse(
2000 m_c_BinOp(Opcode
, m_Specific(A
), m_Specific(C
)))))))
2001 return BinaryOperator::Create(
2002 FlippedOpcode
, Builder
.CreateBinOp(Opcode
, B
, Builder
.CreateNot(C
)),
2009 /// Try to reassociate a pair of binops so that values with one use only are
2010 /// part of the same instruction. This may enable folds that are limited with
2011 /// multi-use restrictions and makes it more likely to match other patterns that
2012 /// are looking for a common operand.
2013 static Instruction
*reassociateForUses(BinaryOperator
&BO
,
2014 InstCombinerImpl::BuilderTy
&Builder
) {
2015 Instruction::BinaryOps Opcode
= BO
.getOpcode();
2018 m_c_BinOp(Opcode
, m_OneUse(m_BinOp(Opcode
, m_Value(X
), m_Value(Y
))),
2019 m_OneUse(m_Value(Z
))))) {
2020 if (!isa
<Constant
>(X
) && !isa
<Constant
>(Y
) && !isa
<Constant
>(Z
)) {
2021 // (X op Y) op Z --> (Y op Z) op X
2022 if (!X
->hasOneUse()) {
2023 Value
*YZ
= Builder
.CreateBinOp(Opcode
, Y
, Z
);
2024 return BinaryOperator::Create(Opcode
, YZ
, X
);
2026 // (X op Y) op Z --> (X op Z) op Y
2027 if (!Y
->hasOneUse()) {
2028 Value
*XZ
= Builder
.CreateBinOp(Opcode
, X
, Z
);
2029 return BinaryOperator::Create(Opcode
, XZ
, Y
);
2041 // and convert to do the bitwise logic first:
2045 // iff bits affected by logic op are lower than last bit affected by math op
2046 static Instruction
*canonicalizeLogicFirst(BinaryOperator
&I
,
2047 InstCombiner::BuilderTy
&Builder
) {
2048 Type
*Ty
= I
.getType();
2049 Instruction::BinaryOps OpC
= I
.getOpcode();
2050 Value
*Op0
= I
.getOperand(0);
2051 Value
*Op1
= I
.getOperand(1);
2053 const APInt
*C
, *C2
;
2055 if (!(match(Op0
, m_OneUse(m_Add(m_Value(X
), m_APInt(C2
)))) &&
2056 match(Op1
, m_APInt(C
))))
2059 unsigned Width
= Ty
->getScalarSizeInBits();
2060 unsigned LastOneMath
= Width
- C2
->countr_zero();
2063 case Instruction::And
:
2064 if (C
->countl_one() < LastOneMath
)
2067 case Instruction::Xor
:
2068 case Instruction::Or
:
2069 if (C
->countl_zero() < LastOneMath
)
2073 llvm_unreachable("Unexpected BinaryOp!");
2076 Value
*NewBinOp
= Builder
.CreateBinOp(OpC
, X
, ConstantInt::get(Ty
, *C
));
2077 return BinaryOperator::CreateWithCopiedFlags(Instruction::Add
, NewBinOp
,
2078 ConstantInt::get(Ty
, *C2
), Op0
);
2081 // binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) ->
2082 // shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt)
2083 // where both shifts are the same and AddC is a valid shift amount.
2084 Instruction
*InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator
&I
) {
2085 assert((I
.isBitwiseLogicOp() || I
.getOpcode() == Instruction::Add
) &&
2086 "Unexpected opcode");
2089 Constant
*ShiftedC1
, *ShiftedC2
, *AddC
;
2090 Type
*Ty
= I
.getType();
2091 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2092 if (!match(&I
, m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1
), m_Value(ShAmt
)),
2093 m_Shift(m_ImmConstant(ShiftedC2
),
2094 m_AddLike(m_Deferred(ShAmt
),
2095 m_ImmConstant(AddC
))))))
2098 // Make sure the add constant is a valid shift amount.
2100 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, APInt(BitWidth
, BitWidth
))))
2103 // Avoid constant expressions.
2104 auto *Op0Inst
= dyn_cast
<Instruction
>(I
.getOperand(0));
2105 auto *Op1Inst
= dyn_cast
<Instruction
>(I
.getOperand(1));
2106 if (!Op0Inst
|| !Op1Inst
)
2109 // Both shifts must be the same.
2110 Instruction::BinaryOps ShiftOp
=
2111 static_cast<Instruction::BinaryOps
>(Op0Inst
->getOpcode());
2112 if (ShiftOp
!= Op1Inst
->getOpcode())
2115 // For adds, only left shifts are supported.
2116 if (I
.getOpcode() == Instruction::Add
&& ShiftOp
!= Instruction::Shl
)
2119 Value
*NewC
= Builder
.CreateBinOp(
2120 I
.getOpcode(), ShiftedC1
, Builder
.CreateBinOp(ShiftOp
, ShiftedC2
, AddC
));
2121 return BinaryOperator::Create(ShiftOp
, NewC
, ShAmt
);
2124 // Fold and/or/xor with two equal intrinsic IDs:
2125 // bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt))
2126 // -> fshl(bitwise(A, C), bitwise(B, D), ShAmt)
2127 // bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt))
2128 // -> fshr(bitwise(A, C), bitwise(B, D), ShAmt)
2129 // bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B))
2130 // bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C)))
2131 // bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B))
2132 // bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C)))
2133 static Instruction
*
2134 foldBitwiseLogicWithIntrinsics(BinaryOperator
&I
,
2135 InstCombiner::BuilderTy
&Builder
) {
2136 assert(I
.isBitwiseLogicOp() && "Should and/or/xor");
2137 if (!I
.getOperand(0)->hasOneUse())
2139 IntrinsicInst
*X
= dyn_cast
<IntrinsicInst
>(I
.getOperand(0));
2143 IntrinsicInst
*Y
= dyn_cast
<IntrinsicInst
>(I
.getOperand(1));
2144 if (Y
&& (!Y
->hasOneUse() || X
->getIntrinsicID() != Y
->getIntrinsicID()))
2147 Intrinsic::ID IID
= X
->getIntrinsicID();
2149 // Try to match constant RHS.
2150 if (!Y
&& (!(IID
== Intrinsic::bswap
|| IID
== Intrinsic::bitreverse
) ||
2151 !match(I
.getOperand(1), m_APInt(RHSC
))))
2155 case Intrinsic::fshl
:
2156 case Intrinsic::fshr
: {
2157 if (X
->getOperand(2) != Y
->getOperand(2))
2160 Builder
.CreateBinOp(I
.getOpcode(), X
->getOperand(0), Y
->getOperand(0));
2162 Builder
.CreateBinOp(I
.getOpcode(), X
->getOperand(1), Y
->getOperand(1));
2163 Function
*F
= Intrinsic::getDeclaration(I
.getModule(), IID
, I
.getType());
2164 return CallInst::Create(F
, {NewOp0
, NewOp1
, X
->getOperand(2)});
2166 case Intrinsic::bswap
:
2167 case Intrinsic::bitreverse
: {
2168 Value
*NewOp0
= Builder
.CreateBinOp(
2169 I
.getOpcode(), X
->getOperand(0),
2170 Y
? Y
->getOperand(0)
2171 : ConstantInt::get(I
.getType(), IID
== Intrinsic::bswap
2173 : RHSC
->reverseBits()));
2174 Function
*F
= Intrinsic::getDeclaration(I
.getModule(), IID
, I
.getType());
2175 return CallInst::Create(F
, {NewOp0
});
2182 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
2183 // here. We should standardize that construct where it is needed or choose some
2184 // other way to ensure that commutated variants of patterns are not missed.
2185 Instruction
*InstCombinerImpl::visitAnd(BinaryOperator
&I
) {
2186 Type
*Ty
= I
.getType();
2188 if (Value
*V
= simplifyAndInst(I
.getOperand(0), I
.getOperand(1),
2189 SQ
.getWithInstruction(&I
)))
2190 return replaceInstUsesWith(I
, V
);
2192 if (SimplifyAssociativeOrCommutative(I
))
2195 if (Instruction
*X
= foldVectorBinop(I
))
2198 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
2201 // See if we can simplify any instructions used by the instruction whose sole
2202 // purpose is to compute bits we don't care about.
2203 if (SimplifyDemandedInstructionBits(I
))
2206 // Do this before using distributive laws to catch simple and/or/not patterns.
2207 if (Instruction
*Xor
= foldAndToXor(I
, Builder
))
2210 if (Instruction
*X
= foldComplexAndOrPatterns(I
, Builder
))
2213 // (A|B)&(A|C) -> A|(B&C) etc
2214 if (Value
*V
= foldUsingDistributiveLaws(I
))
2215 return replaceInstUsesWith(I
, V
);
2217 if (Instruction
*R
= foldBinOpShiftWithShift(I
))
2220 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
2223 if (match(Op0
, m_OneUse(m_LogicalShift(m_One(), m_Value(X
)))) &&
2224 match(Op1
, m_One())) {
2225 // (1 << X) & 1 --> zext(X == 0)
2226 // (1 >> X) & 1 --> zext(X == 0)
2227 Value
*IsZero
= Builder
.CreateICmpEQ(X
, ConstantInt::get(Ty
, 0));
2228 return new ZExtInst(IsZero
, Ty
);
2231 // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y
2234 m_c_And(m_CombineAnd(m_Value(Neg
),
2235 m_OneUse(m_Neg(m_And(m_Value(), m_One())))),
2237 Value
*Cmp
= Builder
.CreateIsNull(Neg
);
2238 return SelectInst::Create(Cmp
, ConstantInt::getNullValue(Ty
), Y
);
2242 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
2243 if (match(&I
, m_c_And(m_Value(Y
), m_OneUse(m_CombineOr(
2244 m_c_Add(m_Value(X
), m_Deferred(Y
)),
2245 m_Sub(m_Value(X
), m_Deferred(Y
)))))) &&
2246 isKnownToBeAPowerOfTwo(Y
, /*OrZero*/ true, /*Depth*/ 0, &I
))
2247 return BinaryOperator::CreateAnd(Builder
.CreateNot(X
), Y
);
2250 if (match(Op1
, m_APInt(C
))) {
2252 if (match(Op0
, m_OneUse(m_Xor(m_Value(X
), m_APInt(XorC
))))) {
2253 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
2254 Constant
*NewC
= ConstantInt::get(Ty
, *C
& *XorC
);
2255 Value
*And
= Builder
.CreateAnd(X
, Op1
);
2257 return BinaryOperator::CreateXor(And
, NewC
);
2261 if (match(Op0
, m_OneUse(m_Or(m_Value(X
), m_APInt(OrC
))))) {
2262 // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2)
2263 // NOTE: This reduces the number of bits set in the & mask, which
2264 // can expose opportunities for store narrowing for scalars.
2265 // NOTE: SimplifyDemandedBits should have already removed bits from C1
2266 // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in
2267 // above, but this feels safer.
2268 APInt Together
= *C
& *OrC
;
2269 Value
*And
= Builder
.CreateAnd(X
, ConstantInt::get(Ty
, Together
^ *C
));
2271 return BinaryOperator::CreateOr(And
, ConstantInt::get(Ty
, Together
));
2274 unsigned Width
= Ty
->getScalarSizeInBits();
2275 const APInt
*ShiftC
;
2276 if (match(Op0
, m_OneUse(m_SExt(m_AShr(m_Value(X
), m_APInt(ShiftC
))))) &&
2277 ShiftC
->ult(Width
)) {
2278 if (*C
== APInt::getLowBitsSet(Width
, Width
- ShiftC
->getZExtValue())) {
2279 // We are clearing high bits that were potentially set by sext+ashr:
2280 // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC
2281 Value
*Sext
= Builder
.CreateSExt(X
, Ty
);
2282 Constant
*ShAmtC
= ConstantInt::get(Ty
, ShiftC
->zext(Width
));
2283 return BinaryOperator::CreateLShr(Sext
, ShAmtC
);
2287 // If this 'and' clears the sign-bits added by ashr, replace with lshr:
2288 // and (ashr X, ShiftC), C --> lshr X, ShiftC
2289 if (match(Op0
, m_AShr(m_Value(X
), m_APInt(ShiftC
))) && ShiftC
->ult(Width
) &&
2290 C
->isMask(Width
- ShiftC
->getZExtValue()))
2291 return BinaryOperator::CreateLShr(X
, ConstantInt::get(Ty
, *ShiftC
));
2294 if (match(Op0
, m_Add(m_Value(X
), m_APInt(AddC
)))) {
2295 // If we are masking the result of the add down to exactly one bit and
2296 // the constant we are adding has no bits set below that bit, then the
2297 // add is flipping a single bit. Example:
2298 // (X + 4) & 4 --> (X & 4) ^ 4
2299 if (Op0
->hasOneUse() && C
->isPowerOf2() && (*AddC
& (*C
- 1)) == 0) {
2300 assert((*C
& *AddC
) != 0 && "Expected common bit");
2301 Value
*NewAnd
= Builder
.CreateAnd(X
, Op1
);
2302 return BinaryOperator::CreateXor(NewAnd
, Op1
);
2306 // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the
2307 // bitwidth of X and OP behaves well when given trunc(C1) and X.
2308 auto isNarrowableBinOpcode
= [](BinaryOperator
*B
) {
2309 switch (B
->getOpcode()) {
2310 case Instruction::Xor
:
2311 case Instruction::Or
:
2312 case Instruction::Mul
:
2313 case Instruction::Add
:
2314 case Instruction::Sub
:
2321 if (match(Op0
, m_OneUse(m_BinOp(BO
))) && isNarrowableBinOpcode(BO
)) {
2322 Instruction::BinaryOps BOpcode
= BO
->getOpcode();
2325 // TODO: The one-use restrictions could be relaxed a little if the AND
2326 // is going to be removed.
2327 // Try to narrow the 'and' and a binop with constant operand:
2328 // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC)
2329 if (match(BO
, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X
))), m_APInt(C1
))) &&
2330 C
->isIntN(X
->getType()->getScalarSizeInBits())) {
2331 unsigned XWidth
= X
->getType()->getScalarSizeInBits();
2332 Constant
*TruncC1
= ConstantInt::get(X
->getType(), C1
->trunc(XWidth
));
2333 Value
*BinOp
= isa
<ZExtInst
>(BO
->getOperand(0))
2334 ? Builder
.CreateBinOp(BOpcode
, X
, TruncC1
)
2335 : Builder
.CreateBinOp(BOpcode
, TruncC1
, X
);
2336 Constant
*TruncC
= ConstantInt::get(X
->getType(), C
->trunc(XWidth
));
2337 Value
*And
= Builder
.CreateAnd(BinOp
, TruncC
);
2338 return new ZExtInst(And
, Ty
);
2341 // Similar to above: if the mask matches the zext input width, then the
2342 // 'and' can be eliminated, so we can truncate the other variable op:
2343 // and (bo (zext X), Y), C --> zext (bo X, (trunc Y))
2344 if (isa
<Instruction
>(BO
->getOperand(0)) &&
2345 match(BO
->getOperand(0), m_OneUse(m_ZExt(m_Value(X
)))) &&
2346 C
->isMask(X
->getType()->getScalarSizeInBits())) {
2347 Y
= BO
->getOperand(1);
2348 Value
*TrY
= Builder
.CreateTrunc(Y
, X
->getType(), Y
->getName() + ".tr");
2350 Builder
.CreateBinOp(BOpcode
, X
, TrY
, BO
->getName() + ".narrow");
2351 return new ZExtInst(NewBO
, Ty
);
2353 // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X)
2354 if (isa
<Instruction
>(BO
->getOperand(1)) &&
2355 match(BO
->getOperand(1), m_OneUse(m_ZExt(m_Value(X
)))) &&
2356 C
->isMask(X
->getType()->getScalarSizeInBits())) {
2357 Y
= BO
->getOperand(0);
2358 Value
*TrY
= Builder
.CreateTrunc(Y
, X
->getType(), Y
->getName() + ".tr");
2360 Builder
.CreateBinOp(BOpcode
, TrY
, X
, BO
->getName() + ".narrow");
2361 return new ZExtInst(NewBO
, Ty
);
2365 // This is intentionally placed after the narrowing transforms for
2366 // efficiency (transform directly to the narrow logic op if possible).
2367 // If the mask is only needed on one incoming arm, push the 'and' op up.
2368 if (match(Op0
, m_OneUse(m_Xor(m_Value(X
), m_Value(Y
)))) ||
2369 match(Op0
, m_OneUse(m_Or(m_Value(X
), m_Value(Y
))))) {
2370 APInt
NotAndMask(~(*C
));
2371 BinaryOperator::BinaryOps BinOp
= cast
<BinaryOperator
>(Op0
)->getOpcode();
2372 if (MaskedValueIsZero(X
, NotAndMask
, 0, &I
)) {
2373 // Not masking anything out for the LHS, move mask to RHS.
2374 // and ({x}or X, Y), C --> {x}or X, (and Y, C)
2375 Value
*NewRHS
= Builder
.CreateAnd(Y
, Op1
, Y
->getName() + ".masked");
2376 return BinaryOperator::Create(BinOp
, X
, NewRHS
);
2378 if (!isa
<Constant
>(Y
) && MaskedValueIsZero(Y
, NotAndMask
, 0, &I
)) {
2379 // Not masking anything out for the RHS, move mask to LHS.
2380 // and ({x}or X, Y), C --> {x}or (and X, C), Y
2381 Value
*NewLHS
= Builder
.CreateAnd(X
, Op1
, X
->getName() + ".masked");
2382 return BinaryOperator::Create(BinOp
, NewLHS
, Y
);
2386 // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
2387 // constant, test if the shift amount equals the offset bit index:
2388 // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
2389 // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0
2390 if (C
->isPowerOf2() &&
2391 match(Op0
, m_OneUse(m_LogicalShift(m_Power2(ShiftC
), m_Value(X
))))) {
2392 int Log2ShiftC
= ShiftC
->exactLogBase2();
2393 int Log2C
= C
->exactLogBase2();
2395 cast
<BinaryOperator
>(Op0
)->getOpcode() == Instruction::Shl
;
2396 int BitNum
= IsShiftLeft
? Log2C
- Log2ShiftC
: Log2ShiftC
- Log2C
;
2397 assert(BitNum
>= 0 && "Expected demanded bits to handle impossible mask");
2398 Value
*Cmp
= Builder
.CreateICmpEQ(X
, ConstantInt::get(Ty
, BitNum
));
2399 return SelectInst::Create(Cmp
, ConstantInt::get(Ty
, *C
),
2400 ConstantInt::getNullValue(Ty
));
2404 const APInt
*C3
= C
;
2406 if (C3
->isPowerOf2()) {
2407 Constant
*Log2C3
= ConstantInt::get(Ty
, C3
->countr_zero());
2408 if (match(Op0
, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1
), m_Value(X
)),
2409 m_ImmConstant(C2
)))) &&
2410 match(C1
, m_Power2())) {
2411 Constant
*Log2C1
= ConstantExpr::getExactLogBase2(C1
);
2412 Constant
*LshrC
= ConstantExpr::getAdd(C2
, Log2C3
);
2413 KnownBits KnownLShrc
= computeKnownBits(LshrC
, 0, nullptr);
2414 if (KnownLShrc
.getMaxValue().ult(Width
)) {
2415 // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:
2416 // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0
2417 Constant
*CmpC
= ConstantExpr::getSub(LshrC
, Log2C1
);
2418 Value
*Cmp
= Builder
.CreateICmpEQ(X
, CmpC
);
2419 return SelectInst::Create(Cmp
, ConstantInt::get(Ty
, *C3
),
2420 ConstantInt::getNullValue(Ty
));
2424 if (match(Op0
, m_OneUse(m_Shl(m_LShr(m_ImmConstant(C1
), m_Value(X
)),
2425 m_ImmConstant(C2
)))) &&
2426 match(C1
, m_Power2())) {
2427 Constant
*Log2C1
= ConstantExpr::getExactLogBase2(C1
);
2429 ConstantExpr::getCompare(ICmpInst::ICMP_ULT
, Log2C3
, C2
);
2430 if (Cmp
->isZeroValue()) {
2431 // iff C1,C3 is pow2 and Log2(C3) >= C2:
2432 // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0
2433 Constant
*ShlC
= ConstantExpr::getAdd(C2
, Log2C1
);
2434 Constant
*CmpC
= ConstantExpr::getSub(ShlC
, Log2C3
);
2435 Value
*Cmp
= Builder
.CreateICmpEQ(X
, CmpC
);
2436 return SelectInst::Create(Cmp
, ConstantInt::get(Ty
, *C3
),
2437 ConstantInt::getNullValue(Ty
));
2443 // If we are clearing the sign bit of a floating-point value, convert this to
2444 // fabs, then cast back to integer.
2446 // This is a generous interpretation for noimplicitfloat, this is not a true
2447 // floating-point operation.
2449 // Assumes any IEEE-represented type has the sign bit in the high bit.
2450 // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt
2452 if (match(Op0
, m_BitCast(m_Value(CastOp
))) &&
2453 match(Op1
, m_MaxSignedValue()) &&
2454 !Builder
.GetInsertBlock()->getParent()->hasFnAttribute(
2455 Attribute::NoImplicitFloat
)) {
2456 Type
*EltTy
= CastOp
->getType()->getScalarType();
2457 if (EltTy
->isFloatingPointTy() && EltTy
->isIEEE() &&
2458 EltTy
->getPrimitiveSizeInBits() ==
2459 I
.getType()->getScalarType()->getPrimitiveSizeInBits()) {
2460 Value
*FAbs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, CastOp
);
2461 return new BitCastInst(FAbs
, I
.getType());
2465 if (match(&I
, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X
)), m_Value(Y
))),
2467 match(Y
, m_SpecificInt_ICMP(
2468 ICmpInst::Predicate::ICMP_EQ
,
2469 APInt(Ty
->getScalarSizeInBits(),
2470 Ty
->getScalarSizeInBits() -
2471 X
->getType()->getScalarSizeInBits())))) {
2472 auto *SExt
= Builder
.CreateSExt(X
, Ty
, X
->getName() + ".signext");
2473 auto *SanitizedSignMask
= cast
<Constant
>(Op1
);
2474 // We must be careful with the undef elements of the sign bit mask, however:
2475 // the mask elt can be undef iff the shift amount for that lane was undef,
2476 // otherwise we need to sanitize undef masks to zero.
2477 SanitizedSignMask
= Constant::replaceUndefsWith(
2478 SanitizedSignMask
, ConstantInt::getNullValue(Ty
->getScalarType()));
2480 Constant::mergeUndefsWith(SanitizedSignMask
, cast
<Constant
>(Y
));
2481 return BinaryOperator::CreateAnd(SExt
, SanitizedSignMask
);
2484 if (Instruction
*Z
= narrowMaskedBinOp(I
))
2487 if (I
.getType()->isIntOrIntVectorTy(1)) {
2488 if (auto *SI0
= dyn_cast
<SelectInst
>(Op0
)) {
2490 foldAndOrOfSelectUsingImpliedCond(Op1
, *SI0
, /* IsAnd */ true))
2493 if (auto *SI1
= dyn_cast
<SelectInst
>(Op1
)) {
2495 foldAndOrOfSelectUsingImpliedCond(Op0
, *SI1
, /* IsAnd */ true))
2500 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
2503 if (Instruction
*DeMorgan
= matchDeMorgansLaws(I
, *this))
2508 // A & (A ^ B) --> A & ~B
2509 if (match(Op1
, m_OneUse(m_c_Xor(m_Specific(Op0
), m_Value(B
)))))
2510 return BinaryOperator::CreateAnd(Op0
, Builder
.CreateNot(B
));
2511 // (A ^ B) & A --> A & ~B
2512 if (match(Op0
, m_OneUse(m_c_Xor(m_Specific(Op1
), m_Value(B
)))))
2513 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(B
));
2515 // A & ~(A ^ B) --> A & B
2516 if (match(Op1
, m_Not(m_c_Xor(m_Specific(Op0
), m_Value(B
)))))
2517 return BinaryOperator::CreateAnd(Op0
, B
);
2518 // ~(A ^ B) & A --> A & B
2519 if (match(Op0
, m_Not(m_c_Xor(m_Specific(Op1
), m_Value(B
)))))
2520 return BinaryOperator::CreateAnd(Op1
, B
);
2522 // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
2523 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
2524 match(Op1
, m_Xor(m_Xor(m_Specific(B
), m_Value(C
)), m_Specific(A
)))) {
2525 Value
*NotC
= Op1
->hasOneUse()
2526 ? Builder
.CreateNot(C
)
2527 : getFreelyInverted(C
, C
->hasOneUse(), &Builder
);
2528 if (NotC
!= nullptr)
2529 return BinaryOperator::CreateAnd(Op0
, NotC
);
2532 // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
2533 if (match(Op0
, m_Xor(m_Xor(m_Value(A
), m_Value(C
)), m_Value(B
))) &&
2534 match(Op1
, m_Xor(m_Specific(B
), m_Specific(A
)))) {
2535 Value
*NotC
= Op0
->hasOneUse()
2536 ? Builder
.CreateNot(C
)
2537 : getFreelyInverted(C
, C
->hasOneUse(), &Builder
);
2538 if (NotC
!= nullptr)
2539 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(C
));
2542 // (A | B) & (~A ^ B) -> A & B
2543 // (A | B) & (B ^ ~A) -> A & B
2544 // (B | A) & (~A ^ B) -> A & B
2545 // (B | A) & (B ^ ~A) -> A & B
2546 if (match(Op1
, m_c_Xor(m_Not(m_Value(A
)), m_Value(B
))) &&
2547 match(Op0
, m_c_Or(m_Specific(A
), m_Specific(B
))))
2548 return BinaryOperator::CreateAnd(A
, B
);
2550 // (~A ^ B) & (A | B) -> A & B
2551 // (~A ^ B) & (B | A) -> A & B
2552 // (B ^ ~A) & (A | B) -> A & B
2553 // (B ^ ~A) & (B | A) -> A & B
2554 if (match(Op0
, m_c_Xor(m_Not(m_Value(A
)), m_Value(B
))) &&
2555 match(Op1
, m_c_Or(m_Specific(A
), m_Specific(B
))))
2556 return BinaryOperator::CreateAnd(A
, B
);
2558 // (~A | B) & (A ^ B) -> ~A & B
2559 // (~A | B) & (B ^ A) -> ~A & B
2560 // (B | ~A) & (A ^ B) -> ~A & B
2561 // (B | ~A) & (B ^ A) -> ~A & B
2562 if (match(Op0
, m_c_Or(m_Not(m_Value(A
)), m_Value(B
))) &&
2563 match(Op1
, m_c_Xor(m_Specific(A
), m_Specific(B
))))
2564 return BinaryOperator::CreateAnd(Builder
.CreateNot(A
), B
);
2566 // (A ^ B) & (~A | B) -> ~A & B
2567 // (B ^ A) & (~A | B) -> ~A & B
2568 // (A ^ B) & (B | ~A) -> ~A & B
2569 // (B ^ A) & (B | ~A) -> ~A & B
2570 if (match(Op1
, m_c_Or(m_Not(m_Value(A
)), m_Value(B
))) &&
2571 match(Op0
, m_c_Xor(m_Specific(A
), m_Specific(B
))))
2572 return BinaryOperator::CreateAnd(Builder
.CreateNot(A
), B
);
2576 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(Op0
);
2577 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(Op1
);
2579 if (Value
*Res
= foldAndOrOfICmps(LHS
, RHS
, I
, /* IsAnd */ true))
2580 return replaceInstUsesWith(I
, Res
);
2582 // TODO: Make this recursive; it's a little tricky because an arbitrary
2583 // number of 'and' instructions might have to be created.
2584 if (LHS
&& match(Op1
, m_OneUse(m_LogicalAnd(m_Value(X
), m_Value(Y
))))) {
2585 bool IsLogical
= isa
<SelectInst
>(Op1
);
2586 // LHS & (X && Y) --> (LHS && X) && Y
2587 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2589 foldAndOrOfICmps(LHS
, Cmp
, I
, /* IsAnd */ true, IsLogical
))
2590 return replaceInstUsesWith(I
, IsLogical
2591 ? Builder
.CreateLogicalAnd(Res
, Y
)
2592 : Builder
.CreateAnd(Res
, Y
));
2593 // LHS & (X && Y) --> X && (LHS & Y)
2594 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2595 if (Value
*Res
= foldAndOrOfICmps(LHS
, Cmp
, I
, /* IsAnd */ true,
2596 /* IsLogical */ false))
2597 return replaceInstUsesWith(I
, IsLogical
2598 ? Builder
.CreateLogicalAnd(X
, Res
)
2599 : Builder
.CreateAnd(X
, Res
));
2601 if (RHS
&& match(Op0
, m_OneUse(m_LogicalAnd(m_Value(X
), m_Value(Y
))))) {
2602 bool IsLogical
= isa
<SelectInst
>(Op0
);
2603 // (X && Y) & RHS --> (X && RHS) && Y
2604 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
2606 foldAndOrOfICmps(Cmp
, RHS
, I
, /* IsAnd */ true, IsLogical
))
2607 return replaceInstUsesWith(I
, IsLogical
2608 ? Builder
.CreateLogicalAnd(Res
, Y
)
2609 : Builder
.CreateAnd(Res
, Y
));
2610 // (X && Y) & RHS --> X && (Y & RHS)
2611 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
2612 if (Value
*Res
= foldAndOrOfICmps(Cmp
, RHS
, I
, /* IsAnd */ true,
2613 /* IsLogical */ false))
2614 return replaceInstUsesWith(I
, IsLogical
2615 ? Builder
.CreateLogicalAnd(X
, Res
)
2616 : Builder
.CreateAnd(X
, Res
));
2620 if (FCmpInst
*LHS
= dyn_cast
<FCmpInst
>(I
.getOperand(0)))
2621 if (FCmpInst
*RHS
= dyn_cast
<FCmpInst
>(I
.getOperand(1)))
2622 if (Value
*Res
= foldLogicOfFCmps(LHS
, RHS
, /*IsAnd*/ true))
2623 return replaceInstUsesWith(I
, Res
);
2625 if (Instruction
*FoldedFCmps
= reassociateFCmps(I
, Builder
))
2628 if (Instruction
*CastedAnd
= foldCastedBitwiseLogic(I
))
2631 if (Instruction
*Sel
= foldBinopOfSextBoolToSelect(I
))
2634 // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
2635 // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
2636 // with binop identity constant. But creating a select with non-constant
2637 // arm may not be reversible due to poison semantics. Is that a good
2638 // canonicalization?
2640 if (match(&I
, m_c_And(m_OneUse(m_SExt(m_Value(A
))), m_Value(B
))) &&
2641 A
->getType()->isIntOrIntVectorTy(1))
2642 return SelectInst::Create(A
, B
, Constant::getNullValue(Ty
));
2644 // Similarly, a 'not' of the bool translates to a swap of the select arms:
2645 // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B
2646 if (match(&I
, m_c_And(m_Not(m_SExt(m_Value(A
))), m_Value(B
))) &&
2647 A
->getType()->isIntOrIntVectorTy(1))
2648 return SelectInst::Create(A
, Constant::getNullValue(Ty
), B
);
2650 // and(zext(A), B) -> A ? (B & 1) : 0
2651 if (match(&I
, m_c_And(m_OneUse(m_ZExt(m_Value(A
))), m_Value(B
))) &&
2652 A
->getType()->isIntOrIntVectorTy(1))
2653 return SelectInst::Create(A
, Builder
.CreateAnd(B
, ConstantInt::get(Ty
, 1)),
2654 Constant::getNullValue(Ty
));
2656 // (-1 + A) & B --> A ? 0 : B where A is 0/1.
2657 if (match(&I
, m_c_And(m_OneUse(m_Add(m_ZExtOrSelf(m_Value(A
)), m_AllOnes())),
2659 if (A
->getType()->isIntOrIntVectorTy(1))
2660 return SelectInst::Create(A
, Constant::getNullValue(Ty
), B
);
2661 if (computeKnownBits(A
, /* Depth */ 0, &I
).countMaxActiveBits() <= 1) {
2662 return SelectInst::Create(
2663 Builder
.CreateICmpEQ(A
, Constant::getNullValue(A
->getType())), B
,
2664 Constant::getNullValue(Ty
));
2668 // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext
2669 if (match(&I
, m_c_And(m_OneUse(m_SExtOrSelf(
2670 m_AShr(m_Value(X
), m_APIntAllowUndef(C
)))),
2672 *C
== X
->getType()->getScalarSizeInBits() - 1) {
2673 Value
*IsNeg
= Builder
.CreateIsNeg(X
, "isneg");
2674 return SelectInst::Create(IsNeg
, Y
, ConstantInt::getNullValue(Ty
));
2676 // If there's a 'not' of the shifted value, swap the select operands:
2677 // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y -- with optional sext
2678 if (match(&I
, m_c_And(m_OneUse(m_SExtOrSelf(
2679 m_Not(m_AShr(m_Value(X
), m_APIntAllowUndef(C
))))),
2681 *C
== X
->getType()->getScalarSizeInBits() - 1) {
2682 Value
*IsNeg
= Builder
.CreateIsNeg(X
, "isneg");
2683 return SelectInst::Create(IsNeg
, ConstantInt::getNullValue(Ty
), Y
);
2686 // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
2687 if (sinkNotIntoOtherHandOfLogicalOp(I
))
2690 // An and recurrence w/loop invariant step is equivelent to (and start, step)
2691 PHINode
*PN
= nullptr;
2692 Value
*Start
= nullptr, *Step
= nullptr;
2693 if (matchSimpleRecurrence(&I
, PN
, Start
, Step
) && DT
.dominates(Step
, PN
))
2694 return replaceInstUsesWith(I
, Builder
.CreateAnd(Start
, Step
));
2696 if (Instruction
*R
= reassociateForUses(I
, Builder
))
2699 if (Instruction
*Canonicalized
= canonicalizeLogicFirst(I
, Builder
))
2700 return Canonicalized
;
2702 if (Instruction
*Folded
= foldLogicOfIsFPClass(I
, Op0
, Op1
))
2705 if (Instruction
*Res
= foldBinOpOfDisplacedShifts(I
))
2708 if (Instruction
*Res
= foldBitwiseLogicWithIntrinsics(I
, Builder
))
2714 Instruction
*InstCombinerImpl::matchBSwapOrBitReverse(Instruction
&I
,
2716 bool MatchBitReversals
) {
2717 SmallVector
<Instruction
*, 4> Insts
;
2718 if (!recognizeBSwapOrBitReverseIdiom(&I
, MatchBSwaps
, MatchBitReversals
,
2721 Instruction
*LastInst
= Insts
.pop_back_val();
2722 LastInst
->removeFromParent();
2724 for (auto *Inst
: Insts
)
2725 Worklist
.push(Inst
);
2729 /// Match UB-safe variants of the funnel shift intrinsic.
2730 static Instruction
*matchFunnelShift(Instruction
&Or
, InstCombinerImpl
&IC
,
2731 const DominatorTree
&DT
) {
2732 // TODO: Can we reduce the code duplication between this and the related
2733 // rotate matching code under visitSelect and visitTrunc?
2734 unsigned Width
= Or
.getType()->getScalarSizeInBits();
2736 Instruction
*Or0
, *Or1
;
2737 if (!match(Or
.getOperand(0), m_Instruction(Or0
)) ||
2738 !match(Or
.getOperand(1), m_Instruction(Or1
)))
2741 bool IsFshl
= true; // Sub on LSHR.
2742 SmallVector
<Value
*, 3> FShiftArgs
;
2744 // First, find an or'd pair of opposite shifts:
2745 // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2746 if (isa
<BinaryOperator
>(Or0
) && isa
<BinaryOperator
>(Or1
)) {
2747 Value
*ShVal0
, *ShVal1
, *ShAmt0
, *ShAmt1
;
2749 m_OneUse(m_LogicalShift(m_Value(ShVal0
), m_Value(ShAmt0
)))) ||
2751 m_OneUse(m_LogicalShift(m_Value(ShVal1
), m_Value(ShAmt1
)))) ||
2752 Or0
->getOpcode() == Or1
->getOpcode())
2755 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2756 if (Or0
->getOpcode() == BinaryOperator::LShr
) {
2757 std::swap(Or0
, Or1
);
2758 std::swap(ShVal0
, ShVal1
);
2759 std::swap(ShAmt0
, ShAmt1
);
2761 assert(Or0
->getOpcode() == BinaryOperator::Shl
&&
2762 Or1
->getOpcode() == BinaryOperator::LShr
&&
2763 "Illegal or(shift,shift) pair");
2765 // Match the shift amount operands for a funnel shift pattern. This always
2766 // matches a subtraction on the R operand.
2767 auto matchShiftAmount
= [&](Value
*L
, Value
*R
, unsigned Width
) -> Value
* {
2768 // Check for constant shift amounts that sum to the bitwidth.
2769 const APInt
*LI
, *RI
;
2770 if (match(L
, m_APIntAllowUndef(LI
)) && match(R
, m_APIntAllowUndef(RI
)))
2771 if (LI
->ult(Width
) && RI
->ult(Width
) && (*LI
+ *RI
) == Width
)
2772 return ConstantInt::get(L
->getType(), *LI
);
2775 if (match(L
, m_Constant(LC
)) && match(R
, m_Constant(RC
)) &&
2777 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, APInt(Width
, Width
))) &&
2779 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, APInt(Width
, Width
))) &&
2780 match(ConstantExpr::getAdd(LC
, RC
), m_SpecificIntAllowUndef(Width
)))
2781 return ConstantExpr::mergeUndefsWith(LC
, RC
);
2783 // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2784 // We limit this to X < Width in case the backend re-expands the
2785 // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2786 // might remove it after this fold). This still doesn't guarantee that the
2787 // final codegen will match this original pattern.
2788 if (match(R
, m_OneUse(m_Sub(m_SpecificInt(Width
), m_Specific(L
))))) {
2789 KnownBits KnownL
= IC
.computeKnownBits(L
, /*Depth*/ 0, &Or
);
2790 return KnownL
.getMaxValue().ult(Width
) ? L
: nullptr;
2793 // For non-constant cases, the following patterns currently only work for
2794 // rotation patterns.
2795 // TODO: Add general funnel-shift compatible patterns.
2796 if (ShVal0
!= ShVal1
)
2799 // For non-constant cases we don't support non-pow2 shift masks.
2800 // TODO: Is it worth matching urem as well?
2801 if (!isPowerOf2_32(Width
))
2804 // The shift amount may be masked with negation:
2805 // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2807 unsigned Mask
= Width
- 1;
2808 if (match(L
, m_And(m_Value(X
), m_SpecificInt(Mask
))) &&
2809 match(R
, m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
))))
2812 // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))
2813 if (match(R
, m_And(m_Neg(m_Specific(L
)), m_SpecificInt(Mask
))))
2816 // Similar to above, but the shift amount may be extended after masking,
2817 // so return the extended value as the parameter for the intrinsic.
2818 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
2820 m_And(m_Neg(m_ZExt(m_And(m_Specific(X
), m_SpecificInt(Mask
)))),
2821 m_SpecificInt(Mask
))))
2824 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
2825 match(R
, m_ZExt(m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
)))))
2831 Value
*ShAmt
= matchShiftAmount(ShAmt0
, ShAmt1
, Width
);
2833 ShAmt
= matchShiftAmount(ShAmt1
, ShAmt0
, Width
);
2834 IsFshl
= false; // Sub on SHL.
2839 FShiftArgs
= {ShVal0
, ShVal1
, ShAmt
};
2840 } else if (isa
<ZExtInst
>(Or0
) || isa
<ZExtInst
>(Or1
)) {
2841 // If there are two 'or' instructions concat variables in opposite order:
2843 // Slot1 and Slot2 are all zero bits.
2844 // | Slot1 | Low | Slot2 | High |
2845 // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High)
2846 // | Slot2 | High | Slot1 | Low |
2847 // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)
2849 // the latter 'or' can be safely convert to
2850 // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt
2851 // if ZextLowShlAmt + ZextHighShlAmt == Width.
2852 if (!isa
<ZExtInst
>(Or1
))
2853 std::swap(Or0
, Or1
);
2855 Value
*High
, *ZextHigh
, *Low
;
2856 const APInt
*ZextHighShlAmt
;
2858 m_OneUse(m_Shl(m_Value(ZextHigh
), m_APInt(ZextHighShlAmt
)))))
2861 if (!match(Or1
, m_ZExt(m_Value(Low
))) ||
2862 !match(ZextHigh
, m_ZExt(m_Value(High
))))
2865 unsigned HighSize
= High
->getType()->getScalarSizeInBits();
2866 unsigned LowSize
= Low
->getType()->getScalarSizeInBits();
2867 // Make sure High does not overlap with Low and most significant bits of
2868 // High aren't shifted out.
2869 if (ZextHighShlAmt
->ult(LowSize
) || ZextHighShlAmt
->ugt(Width
- HighSize
))
2872 for (User
*U
: ZextHigh
->users()) {
2874 if (!match(U
, m_Or(m_Value(X
), m_Value(Y
))))
2877 if (!isa
<ZExtInst
>(Y
))
2880 const APInt
*ZextLowShlAmt
;
2881 if (!match(X
, m_Shl(m_Specific(Or1
), m_APInt(ZextLowShlAmt
))) ||
2882 !match(Y
, m_Specific(ZextHigh
)) || !DT
.dominates(U
, &Or
))
2885 // HighLow is good concat. If sum of two shifts amount equals to Width,
2886 // LowHigh must also be a good concat.
2887 if (*ZextLowShlAmt
+ *ZextHighShlAmt
!= Width
)
2890 // Low must not overlap with High and most significant bits of Low must
2891 // not be shifted out.
2892 assert(ZextLowShlAmt
->uge(HighSize
) &&
2893 ZextLowShlAmt
->ule(Width
- LowSize
) && "Invalid concat");
2895 FShiftArgs
= {U
, U
, ConstantInt::get(Or0
->getType(), *ZextHighShlAmt
)};
2900 if (FShiftArgs
.empty())
2903 Intrinsic::ID IID
= IsFshl
? Intrinsic::fshl
: Intrinsic::fshr
;
2904 Function
*F
= Intrinsic::getDeclaration(Or
.getModule(), IID
, Or
.getType());
2905 return CallInst::Create(F
, FShiftArgs
);
2908 /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
2909 static Instruction
*matchOrConcat(Instruction
&Or
,
2910 InstCombiner::BuilderTy
&Builder
) {
2911 assert(Or
.getOpcode() == Instruction::Or
&& "bswap requires an 'or'");
2912 Value
*Op0
= Or
.getOperand(0), *Op1
= Or
.getOperand(1);
2913 Type
*Ty
= Or
.getType();
2915 unsigned Width
= Ty
->getScalarSizeInBits();
2916 if ((Width
& 1) != 0)
2918 unsigned HalfWidth
= Width
/ 2;
2920 // Canonicalize zext (lower half) to LHS.
2921 if (!isa
<ZExtInst
>(Op0
))
2922 std::swap(Op0
, Op1
);
2924 // Find lower/upper half.
2925 Value
*LowerSrc
, *ShlVal
, *UpperSrc
;
2927 if (!match(Op0
, m_OneUse(m_ZExt(m_Value(LowerSrc
)))) ||
2928 !match(Op1
, m_OneUse(m_Shl(m_Value(ShlVal
), m_APInt(C
)))) ||
2929 !match(ShlVal
, m_OneUse(m_ZExt(m_Value(UpperSrc
)))))
2931 if (*C
!= HalfWidth
|| LowerSrc
->getType() != UpperSrc
->getType() ||
2932 LowerSrc
->getType()->getScalarSizeInBits() != HalfWidth
)
2935 auto ConcatIntrinsicCalls
= [&](Intrinsic::ID id
, Value
*Lo
, Value
*Hi
) {
2936 Value
*NewLower
= Builder
.CreateZExt(Lo
, Ty
);
2937 Value
*NewUpper
= Builder
.CreateZExt(Hi
, Ty
);
2938 NewUpper
= Builder
.CreateShl(NewUpper
, HalfWidth
);
2939 Value
*BinOp
= Builder
.CreateOr(NewLower
, NewUpper
);
2940 Function
*F
= Intrinsic::getDeclaration(Or
.getModule(), id
, Ty
);
2941 return Builder
.CreateCall(F
, BinOp
);
2944 // BSWAP: Push the concat down, swapping the lower/upper sources.
2945 // concat(bswap(x),bswap(y)) -> bswap(concat(x,y))
2946 Value
*LowerBSwap
, *UpperBSwap
;
2947 if (match(LowerSrc
, m_BSwap(m_Value(LowerBSwap
))) &&
2948 match(UpperSrc
, m_BSwap(m_Value(UpperBSwap
))))
2949 return ConcatIntrinsicCalls(Intrinsic::bswap
, UpperBSwap
, LowerBSwap
);
2951 // BITREVERSE: Push the concat down, swapping the lower/upper sources.
2952 // concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y))
2953 Value
*LowerBRev
, *UpperBRev
;
2954 if (match(LowerSrc
, m_BitReverse(m_Value(LowerBRev
))) &&
2955 match(UpperSrc
, m_BitReverse(m_Value(UpperBRev
))))
2956 return ConcatIntrinsicCalls(Intrinsic::bitreverse
, UpperBRev
, LowerBRev
);
2961 /// If all elements of two constant vectors are 0/-1 and inverses, return true.
2962 static bool areInverseVectorBitmasks(Constant
*C1
, Constant
*C2
) {
2963 unsigned NumElts
= cast
<FixedVectorType
>(C1
->getType())->getNumElements();
2964 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2965 Constant
*EltC1
= C1
->getAggregateElement(i
);
2966 Constant
*EltC2
= C2
->getAggregateElement(i
);
2967 if (!EltC1
|| !EltC2
)
2970 // One element must be all ones, and the other must be all zeros.
2971 if (!((match(EltC1
, m_Zero()) && match(EltC2
, m_AllOnes())) ||
2972 (match(EltC2
, m_Zero()) && match(EltC1
, m_AllOnes()))))
2978 /// We have an expression of the form (A & C) | (B & D). If A is a scalar or
2979 /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
2980 /// B, it can be used as the condition operand of a select instruction.
2981 /// We will detect (A & C) | ~(B | D) when the flag ABIsTheSame enabled.
2982 Value
*InstCombinerImpl::getSelectCondition(Value
*A
, Value
*B
,
2984 // We may have peeked through bitcasts in the caller.
2985 // Exit immediately if we don't have (vector) integer types.
2986 Type
*Ty
= A
->getType();
2987 if (!Ty
->isIntOrIntVectorTy() || !B
->getType()->isIntOrIntVectorTy())
2990 // If A is the 'not' operand of B and has enough signbits, we have our answer.
2991 if (ABIsTheSame
? (A
== B
) : match(B
, m_Not(m_Specific(A
)))) {
2992 // If these are scalars or vectors of i1, A can be used directly.
2993 if (Ty
->isIntOrIntVectorTy(1))
2996 // If we look through a vector bitcast, the caller will bitcast the operands
2997 // to match the condition's number of bits (N x i1).
2998 // To make this poison-safe, disallow bitcast from wide element to narrow
2999 // element. That could allow poison in lanes where it was not present in the
3001 A
= peekThroughBitcast(A
);
3002 if (A
->getType()->isIntOrIntVectorTy()) {
3003 unsigned NumSignBits
= ComputeNumSignBits(A
);
3004 if (NumSignBits
== A
->getType()->getScalarSizeInBits() &&
3005 NumSignBits
<= Ty
->getScalarSizeInBits())
3006 return Builder
.CreateTrunc(A
, CmpInst::makeCmpResultType(A
->getType()));
3011 // TODO: add support for sext and constant case
3015 // If both operands are constants, see if the constants are inverse bitmasks.
3016 Constant
*AConst
, *BConst
;
3017 if (match(A
, m_Constant(AConst
)) && match(B
, m_Constant(BConst
)))
3018 if (AConst
== ConstantExpr::getNot(BConst
) &&
3019 ComputeNumSignBits(A
) == Ty
->getScalarSizeInBits())
3020 return Builder
.CreateZExtOrTrunc(A
, CmpInst::makeCmpResultType(Ty
));
3022 // Look for more complex patterns. The 'not' op may be hidden behind various
3023 // casts. Look through sexts and bitcasts to find the booleans.
3026 if (match(A
, m_SExt(m_Value(Cond
))) &&
3027 Cond
->getType()->isIntOrIntVectorTy(1)) {
3028 // A = sext i1 Cond; B = sext (not (i1 Cond))
3029 if (match(B
, m_SExt(m_Not(m_Specific(Cond
)))))
3032 // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond)))
3033 // TODO: The one-use checks are unnecessary or misplaced. If the caller
3034 // checked for uses on logic ops/casts, that should be enough to
3035 // make this transform worthwhile.
3036 if (match(B
, m_OneUse(m_Not(m_Value(NotB
))))) {
3037 NotB
= peekThroughBitcast(NotB
, true);
3038 if (match(NotB
, m_SExt(m_Specific(Cond
))))
3043 // All scalar (and most vector) possibilities should be handled now.
3044 // Try more matches that only apply to non-splat constant vectors.
3045 if (!Ty
->isVectorTy())
3048 // If both operands are xor'd with constants using the same sexted boolean
3049 // operand, see if the constants are inverse bitmasks.
3050 // TODO: Use ConstantExpr::getNot()?
3051 if (match(A
, (m_Xor(m_SExt(m_Value(Cond
)), m_Constant(AConst
)))) &&
3052 match(B
, (m_Xor(m_SExt(m_Specific(Cond
)), m_Constant(BConst
)))) &&
3053 Cond
->getType()->isIntOrIntVectorTy(1) &&
3054 areInverseVectorBitmasks(AConst
, BConst
)) {
3055 AConst
= ConstantExpr::getTrunc(AConst
, CmpInst::makeCmpResultType(Ty
));
3056 return Builder
.CreateXor(Cond
, AConst
);
3061 /// We have an expression of the form (A & C) | (B & D). Try to simplify this
3062 /// to "A' ? C : D", where A' is a boolean or vector of booleans.
3063 /// When InvertFalseVal is set to true, we try to match the pattern
3064 /// where we have peeked through a 'not' op and A and B are the same:
3065 /// (A & C) | ~(A | D) --> (A & C) | (~A & ~D) --> A' ? C : ~D
3066 Value
*InstCombinerImpl::matchSelectFromAndOr(Value
*A
, Value
*C
, Value
*B
,
3067 Value
*D
, bool InvertFalseVal
) {
3068 // The potential condition of the select may be bitcasted. In that case, look
3069 // through its bitcast and the corresponding bitcast of the 'not' condition.
3070 Type
*OrigType
= A
->getType();
3071 A
= peekThroughBitcast(A
, true);
3072 B
= peekThroughBitcast(B
, true);
3073 if (Value
*Cond
= getSelectCondition(A
, B
, InvertFalseVal
)) {
3074 // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
3075 // If this is a vector, we may need to cast to match the condition's length.
3076 // The bitcasts will either all exist or all not exist. The builder will
3077 // not create unnecessary casts if the types already match.
3078 Type
*SelTy
= A
->getType();
3079 if (auto *VecTy
= dyn_cast
<VectorType
>(Cond
->getType())) {
3080 // For a fixed or scalable vector get N from <{vscale x} N x iM>
3081 unsigned Elts
= VecTy
->getElementCount().getKnownMinValue();
3082 // For a fixed or scalable vector, get the size in bits of N x iM; for a
3083 // scalar this is just M.
3084 unsigned SelEltSize
= SelTy
->getPrimitiveSizeInBits().getKnownMinValue();
3085 Type
*EltTy
= Builder
.getIntNTy(SelEltSize
/ Elts
);
3086 SelTy
= VectorType::get(EltTy
, VecTy
->getElementCount());
3088 Value
*BitcastC
= Builder
.CreateBitCast(C
, SelTy
);
3090 D
= Builder
.CreateNot(D
);
3091 Value
*BitcastD
= Builder
.CreateBitCast(D
, SelTy
);
3092 Value
*Select
= Builder
.CreateSelect(Cond
, BitcastC
, BitcastD
);
3093 return Builder
.CreateBitCast(Select
, OrigType
);
3099 // (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1)))
3100 // (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1)))
3101 static Value
*foldAndOrOfICmpEqConstantAndICmp(ICmpInst
*LHS
, ICmpInst
*RHS
,
3102 bool IsAnd
, bool IsLogical
,
3103 IRBuilderBase
&Builder
) {
3104 Value
*LHS0
= LHS
->getOperand(0);
3105 Value
*RHS0
= RHS
->getOperand(0);
3106 Value
*RHS1
= RHS
->getOperand(1);
3108 ICmpInst::Predicate LPred
=
3109 IsAnd
? LHS
->getInversePredicate() : LHS
->getPredicate();
3110 ICmpInst::Predicate RPred
=
3111 IsAnd
? RHS
->getInversePredicate() : RHS
->getPredicate();
3114 if (LPred
!= ICmpInst::ICMP_EQ
||
3115 !match(LHS
->getOperand(1), m_APIntAllowUndef(CInt
)) ||
3116 !LHS0
->getType()->isIntOrIntVectorTy() ||
3117 !(LHS
->hasOneUse() || RHS
->hasOneUse()))
3120 auto MatchRHSOp
= [LHS0
, CInt
](const Value
*RHSOp
) {
3122 m_Add(m_Specific(LHS0
), m_SpecificIntAllowUndef(-*CInt
))) ||
3123 (CInt
->isZero() && RHSOp
== LHS0
);
3127 if (RPred
== ICmpInst::ICMP_ULT
&& MatchRHSOp(RHS1
))
3129 else if (RPred
== ICmpInst::ICMP_UGT
&& MatchRHSOp(RHS0
))
3135 Other
= Builder
.CreateFreeze(Other
);
3137 return Builder
.CreateICmp(
3138 IsAnd
? ICmpInst::ICMP_ULT
: ICmpInst::ICMP_UGE
,
3139 Builder
.CreateSub(LHS0
, ConstantInt::get(LHS0
->getType(), *CInt
+ 1)),
3143 /// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible.
3144 /// If IsLogical is true, then the and/or is in select form and the transform
3145 /// must be poison-safe.
3146 Value
*InstCombinerImpl::foldAndOrOfICmps(ICmpInst
*LHS
, ICmpInst
*RHS
,
3147 Instruction
&I
, bool IsAnd
,
3149 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
3151 // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
3152 // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
3153 // if K1 and K2 are a one-bit mask.
3154 if (Value
*V
= foldAndOrOfICmpsOfAndWithPow2(LHS
, RHS
, &I
, IsAnd
, IsLogical
))
3157 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
3158 Value
*LHS0
= LHS
->getOperand(0), *RHS0
= RHS
->getOperand(0);
3159 Value
*LHS1
= LHS
->getOperand(1), *RHS1
= RHS
->getOperand(1);
3160 const APInt
*LHSC
= nullptr, *RHSC
= nullptr;
3161 match(LHS1
, m_APInt(LHSC
));
3162 match(RHS1
, m_APInt(RHSC
));
3164 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
3165 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
3166 if (predicatesFoldable(PredL
, PredR
)) {
3167 if (LHS0
== RHS1
&& LHS1
== RHS0
) {
3168 PredL
= ICmpInst::getSwappedPredicate(PredL
);
3169 std::swap(LHS0
, LHS1
);
3171 if (LHS0
== RHS0
&& LHS1
== RHS1
) {
3172 unsigned Code
= IsAnd
? getICmpCode(PredL
) & getICmpCode(PredR
)
3173 : getICmpCode(PredL
) | getICmpCode(PredR
);
3174 bool IsSigned
= LHS
->isSigned() || RHS
->isSigned();
3175 return getNewICmpValue(Code
, IsSigned
, LHS0
, LHS1
, Builder
);
3179 // handle (roughly):
3180 // (icmp ne (A & B), C) | (icmp ne (A & D), E)
3181 // (icmp eq (A & B), C) & (icmp eq (A & D), E)
3182 if (Value
*V
= foldLogOpOfMaskedICmps(LHS
, RHS
, IsAnd
, IsLogical
, Builder
))
3186 foldAndOrOfICmpEqConstantAndICmp(LHS
, RHS
, IsAnd
, IsLogical
, Builder
))
3188 // We can treat logical like bitwise here, because both operands are used on
3189 // the LHS, and as such poison from both will propagate.
3190 if (Value
*V
= foldAndOrOfICmpEqConstantAndICmp(RHS
, LHS
, IsAnd
,
3191 /*IsLogical*/ false, Builder
))
3195 foldAndOrOfICmpsWithConstEq(LHS
, RHS
, IsAnd
, IsLogical
, Builder
, Q
))
3197 // We can convert this case to bitwise and, because both operands are used
3198 // on the LHS, and as such poison from both will propagate.
3199 if (Value
*V
= foldAndOrOfICmpsWithConstEq(RHS
, LHS
, IsAnd
,
3200 /*IsLogical*/ false, Builder
, Q
))
3203 if (Value
*V
= foldIsPowerOf2OrZero(LHS
, RHS
, IsAnd
, Builder
))
3205 if (Value
*V
= foldIsPowerOf2OrZero(RHS
, LHS
, IsAnd
, Builder
))
3208 // TODO: One of these directions is fine with logical and/or, the other could
3209 // be supported by inserting freeze.
3211 // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
3212 // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
3213 if (Value
*V
= simplifyRangeCheck(LHS
, RHS
, /*Inverted=*/!IsAnd
))
3216 // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
3217 // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
3218 if (Value
*V
= simplifyRangeCheck(RHS
, LHS
, /*Inverted=*/!IsAnd
))
3222 // TODO: Add conjugated or fold, check whether it is safe for logical and/or.
3223 if (IsAnd
&& !IsLogical
)
3224 if (Value
*V
= foldSignedTruncationCheck(LHS
, RHS
, I
, Builder
))
3227 if (Value
*V
= foldIsPowerOf2(LHS
, RHS
, IsAnd
, Builder
))
3230 if (Value
*V
= foldPowerOf2AndShiftedMask(LHS
, RHS
, IsAnd
, Builder
))
3233 // TODO: Verify whether this is safe for logical and/or.
3235 if (Value
*X
= foldUnsignedUnderflowCheck(LHS
, RHS
, IsAnd
, Q
, Builder
))
3237 if (Value
*X
= foldUnsignedUnderflowCheck(RHS
, LHS
, IsAnd
, Q
, Builder
))
3241 if (Value
*X
= foldEqOfParts(LHS
, RHS
, IsAnd
))
3244 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
3245 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
3246 // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs.
3247 if (!IsLogical
&& PredL
== (IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
) &&
3248 PredL
== PredR
&& match(LHS1
, m_ZeroInt()) && match(RHS1
, m_ZeroInt()) &&
3249 LHS0
->getType() == RHS0
->getType()) {
3250 Value
*NewOr
= Builder
.CreateOr(LHS0
, RHS0
);
3251 return Builder
.CreateICmp(PredL
, NewOr
,
3252 Constant::getNullValue(NewOr
->getType()));
3255 // (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1)
3256 // (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1)
3257 if (!IsLogical
&& PredL
== (IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
) &&
3258 PredL
== PredR
&& match(LHS1
, m_AllOnes()) && match(RHS1
, m_AllOnes()) &&
3259 LHS0
->getType() == RHS0
->getType()) {
3260 Value
*NewAnd
= Builder
.CreateAnd(LHS0
, RHS0
);
3261 return Builder
.CreateICmp(PredL
, NewAnd
,
3262 Constant::getAllOnesValue(LHS0
->getType()));
3265 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
3269 // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
3270 // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2
3271 // where CMAX is the all ones value for the truncated type,
3272 // iff the lower bits of C2 and CA are zero.
3273 if (PredL
== (IsAnd
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_NE
) &&
3274 PredL
== PredR
&& LHS
->hasOneUse() && RHS
->hasOneUse()) {
3276 const APInt
*AndC
, *SmallC
= nullptr, *BigC
= nullptr;
3278 // (trunc x) == C1 & (and x, CA) == C2
3279 // (and x, CA) == C2 & (trunc x) == C1
3280 if (match(RHS0
, m_Trunc(m_Value(V
))) &&
3281 match(LHS0
, m_And(m_Specific(V
), m_APInt(AndC
)))) {
3284 } else if (match(LHS0
, m_Trunc(m_Value(V
))) &&
3285 match(RHS0
, m_And(m_Specific(V
), m_APInt(AndC
)))) {
3290 if (SmallC
&& BigC
) {
3291 unsigned BigBitSize
= BigC
->getBitWidth();
3292 unsigned SmallBitSize
= SmallC
->getBitWidth();
3294 // Check that the low bits are zero.
3295 APInt Low
= APInt::getLowBitsSet(BigBitSize
, SmallBitSize
);
3296 if ((Low
& *AndC
).isZero() && (Low
& *BigC
).isZero()) {
3297 Value
*NewAnd
= Builder
.CreateAnd(V
, Low
| *AndC
);
3298 APInt N
= SmallC
->zext(BigBitSize
) | *BigC
;
3299 Value
*NewVal
= ConstantInt::get(NewAnd
->getType(), N
);
3300 return Builder
.CreateICmp(PredL
, NewAnd
, NewVal
);
3305 // Match naive pattern (and its inverted form) for checking if two values
3306 // share same sign. An example of the pattern:
3307 // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1)
3308 // Inverted form (example):
3309 // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0)
3310 bool TrueIfSignedL
, TrueIfSignedR
;
3311 if (isSignBitCheck(PredL
, *LHSC
, TrueIfSignedL
) &&
3312 isSignBitCheck(PredR
, *RHSC
, TrueIfSignedR
) &&
3313 (RHS
->hasOneUse() || LHS
->hasOneUse())) {
3316 if ((TrueIfSignedL
&& !TrueIfSignedR
&&
3317 match(LHS0
, m_Or(m_Value(X
), m_Value(Y
))) &&
3318 match(RHS0
, m_c_And(m_Specific(X
), m_Specific(Y
)))) ||
3319 (!TrueIfSignedL
&& TrueIfSignedR
&&
3320 match(LHS0
, m_And(m_Value(X
), m_Value(Y
))) &&
3321 match(RHS0
, m_c_Or(m_Specific(X
), m_Specific(Y
))))) {
3322 Value
*NewXor
= Builder
.CreateXor(X
, Y
);
3323 return Builder
.CreateIsNeg(NewXor
);
3326 if ((TrueIfSignedL
&& !TrueIfSignedR
&&
3327 match(LHS0
, m_And(m_Value(X
), m_Value(Y
))) &&
3328 match(RHS0
, m_c_Or(m_Specific(X
), m_Specific(Y
)))) ||
3329 (!TrueIfSignedL
&& TrueIfSignedR
&&
3330 match(LHS0
, m_Or(m_Value(X
), m_Value(Y
))) &&
3331 match(RHS0
, m_c_And(m_Specific(X
), m_Specific(Y
))))) {
3332 Value
*NewXor
= Builder
.CreateXor(X
, Y
);
3333 return Builder
.CreateIsNotNeg(NewXor
);
3338 return foldAndOrOfICmpsUsingRanges(LHS
, RHS
, IsAnd
);
3341 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
3342 // here. We should standardize that construct where it is needed or choose some
3343 // other way to ensure that commutated variants of patterns are not missed.
3344 Instruction
*InstCombinerImpl::visitOr(BinaryOperator
&I
) {
3345 if (Value
*V
= simplifyOrInst(I
.getOperand(0), I
.getOperand(1),
3346 SQ
.getWithInstruction(&I
)))
3347 return replaceInstUsesWith(I
, V
);
3349 if (SimplifyAssociativeOrCommutative(I
))
3352 if (Instruction
*X
= foldVectorBinop(I
))
3355 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
3358 // See if we can simplify any instructions used by the instruction whose sole
3359 // purpose is to compute bits we don't care about.
3360 if (SimplifyDemandedInstructionBits(I
))
3363 // Do this before using distributive laws to catch simple and/or/not patterns.
3364 if (Instruction
*Xor
= foldOrToXor(I
, Builder
))
3367 if (Instruction
*X
= foldComplexAndOrPatterns(I
, Builder
))
3370 // (A&B)|(A&C) -> A&(B|C) etc
3371 if (Value
*V
= foldUsingDistributiveLaws(I
))
3372 return replaceInstUsesWith(I
, V
);
3374 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
3375 Type
*Ty
= I
.getType();
3376 if (Ty
->isIntOrIntVectorTy(1)) {
3377 if (auto *SI0
= dyn_cast
<SelectInst
>(Op0
)) {
3379 foldAndOrOfSelectUsingImpliedCond(Op1
, *SI0
, /* IsAnd */ false))
3382 if (auto *SI1
= dyn_cast
<SelectInst
>(Op1
)) {
3384 foldAndOrOfSelectUsingImpliedCond(Op0
, *SI1
, /* IsAnd */ false))
3389 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
3392 if (Instruction
*BitOp
= matchBSwapOrBitReverse(I
, /*MatchBSwaps*/ true,
3393 /*MatchBitReversals*/ true))
3396 if (Instruction
*Funnel
= matchFunnelShift(I
, *this, DT
))
3399 if (Instruction
*Concat
= matchOrConcat(I
, Builder
))
3400 return replaceInstUsesWith(I
, Concat
);
3402 if (Instruction
*R
= foldBinOpShiftWithShift(I
))
3405 if (Instruction
*R
= tryFoldInstWithCtpopWithNot(&I
))
3410 if (match(&I
, m_c_Or(m_OneUse(m_Xor(m_Value(X
), m_APInt(CV
))), m_Value(Y
))) &&
3411 !CV
->isAllOnes() && MaskedValueIsZero(Y
, *CV
, 0, &I
)) {
3412 // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
3413 // The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
3414 Value
*Or
= Builder
.CreateOr(X
, Y
);
3415 return BinaryOperator::CreateXor(Or
, ConstantInt::get(Ty
, *CV
));
3418 // If the operands have no common bits set:
3419 // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)
3420 if (match(&I
, m_c_DisjointOr(m_OneUse(m_Mul(m_Value(X
), m_Value(Y
))),
3422 Value
*IncrementY
= Builder
.CreateAdd(Y
, ConstantInt::get(Ty
, 1));
3423 return BinaryOperator::CreateMul(X
, IncrementY
);
3426 // X | (X ^ Y) --> X | Y (4 commuted patterns)
3427 if (match(&I
, m_c_Or(m_Value(X
), m_c_Xor(m_Deferred(X
), m_Value(Y
)))))
3428 return BinaryOperator::CreateOr(X
, Y
);
3430 // (A & C) | (B & D)
3431 Value
*A
, *B
, *C
, *D
;
3432 if (match(Op0
, m_And(m_Value(A
), m_Value(C
))) &&
3433 match(Op1
, m_And(m_Value(B
), m_Value(D
)))) {
3435 // (A & C0) | (B & C1)
3436 const APInt
*C0
, *C1
;
3437 if (match(C
, m_APInt(C0
)) && match(D
, m_APInt(C1
))) {
3440 // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B
3441 if (match(A
, m_c_Or(m_Value(X
), m_Specific(B
))))
3442 return BinaryOperator::CreateOr(Builder
.CreateAnd(X
, *C0
), B
);
3443 // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A
3444 if (match(B
, m_c_Or(m_Specific(A
), m_Value(X
))))
3445 return BinaryOperator::CreateOr(Builder
.CreateAnd(X
, *C1
), A
);
3447 // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B
3448 if (match(A
, m_c_Xor(m_Value(X
), m_Specific(B
))))
3449 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, *C0
), B
);
3450 // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A
3451 if (match(B
, m_c_Xor(m_Specific(A
), m_Value(X
))))
3452 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, *C1
), A
);
3455 if ((*C0
& *C1
).isZero()) {
3456 // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1)
3457 // iff (C0 & C1) == 0 and (X & ~C0) == 0
3458 if (match(A
, m_c_Or(m_Value(X
), m_Specific(B
))) &&
3459 MaskedValueIsZero(X
, ~*C0
, 0, &I
)) {
3460 Constant
*C01
= ConstantInt::get(Ty
, *C0
| *C1
);
3461 return BinaryOperator::CreateAnd(A
, C01
);
3463 // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)
3464 // iff (C0 & C1) == 0 and (X & ~C1) == 0
3465 if (match(B
, m_c_Or(m_Value(X
), m_Specific(A
))) &&
3466 MaskedValueIsZero(X
, ~*C1
, 0, &I
)) {
3467 Constant
*C01
= ConstantInt::get(Ty
, *C0
| *C1
);
3468 return BinaryOperator::CreateAnd(B
, C01
);
3470 // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)
3471 // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0.
3472 const APInt
*C2
, *C3
;
3473 if (match(A
, m_Or(m_Value(X
), m_APInt(C2
))) &&
3474 match(B
, m_Or(m_Specific(X
), m_APInt(C3
))) &&
3475 (*C2
& ~*C0
).isZero() && (*C3
& ~*C1
).isZero()) {
3476 Value
*Or
= Builder
.CreateOr(X
, *C2
| *C3
, "bitfield");
3477 Constant
*C01
= ConstantInt::get(Ty
, *C0
| *C1
);
3478 return BinaryOperator::CreateAnd(Or
, C01
);
3483 // Don't try to form a select if it's unlikely that we'll get rid of at
3484 // least one of the operands. A select is generally more expensive than the
3485 // 'or' that it is replacing.
3486 if (Op0
->hasOneUse() || Op1
->hasOneUse()) {
3487 // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
3488 if (Value
*V
= matchSelectFromAndOr(A
, C
, B
, D
))
3489 return replaceInstUsesWith(I
, V
);
3490 if (Value
*V
= matchSelectFromAndOr(A
, C
, D
, B
))
3491 return replaceInstUsesWith(I
, V
);
3492 if (Value
*V
= matchSelectFromAndOr(C
, A
, B
, D
))
3493 return replaceInstUsesWith(I
, V
);
3494 if (Value
*V
= matchSelectFromAndOr(C
, A
, D
, B
))
3495 return replaceInstUsesWith(I
, V
);
3496 if (Value
*V
= matchSelectFromAndOr(B
, D
, A
, C
))
3497 return replaceInstUsesWith(I
, V
);
3498 if (Value
*V
= matchSelectFromAndOr(B
, D
, C
, A
))
3499 return replaceInstUsesWith(I
, V
);
3500 if (Value
*V
= matchSelectFromAndOr(D
, B
, A
, C
))
3501 return replaceInstUsesWith(I
, V
);
3502 if (Value
*V
= matchSelectFromAndOr(D
, B
, C
, A
))
3503 return replaceInstUsesWith(I
, V
);
3507 if (match(Op0
, m_And(m_Value(A
), m_Value(C
))) &&
3508 match(Op1
, m_Not(m_Or(m_Value(B
), m_Value(D
)))) &&
3509 (Op0
->hasOneUse() || Op1
->hasOneUse())) {
3510 // (Cond & C) | ~(Cond | D) -> Cond ? C : ~D
3511 if (Value
*V
= matchSelectFromAndOr(A
, C
, B
, D
, true))
3512 return replaceInstUsesWith(I
, V
);
3513 if (Value
*V
= matchSelectFromAndOr(A
, C
, D
, B
, true))
3514 return replaceInstUsesWith(I
, V
);
3515 if (Value
*V
= matchSelectFromAndOr(C
, A
, B
, D
, true))
3516 return replaceInstUsesWith(I
, V
);
3517 if (Value
*V
= matchSelectFromAndOr(C
, A
, D
, B
, true))
3518 return replaceInstUsesWith(I
, V
);
3521 // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
3522 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))))
3523 if (match(Op1
, m_Xor(m_Xor(m_Specific(B
), m_Value(C
)), m_Specific(A
))))
3524 return BinaryOperator::CreateOr(Op0
, C
);
3526 // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
3527 if (match(Op0
, m_Xor(m_Xor(m_Value(A
), m_Value(C
)), m_Value(B
))))
3528 if (match(Op1
, m_Xor(m_Specific(B
), m_Specific(A
))))
3529 return BinaryOperator::CreateOr(Op1
, C
);
3531 // ((A & B) ^ C) | B -> C | B
3532 if (match(Op0
, m_c_Xor(m_c_And(m_Value(A
), m_Specific(Op1
)), m_Value(C
))))
3533 return BinaryOperator::CreateOr(C
, Op1
);
3535 // B | ((A & B) ^ C) -> B | C
3536 if (match(Op1
, m_c_Xor(m_c_And(m_Value(A
), m_Specific(Op0
)), m_Value(C
))))
3537 return BinaryOperator::CreateOr(Op0
, C
);
3539 // ((B | C) & A) | B -> B | (A & C)
3540 if (match(Op0
, m_c_And(m_c_Or(m_Specific(Op1
), m_Value(C
)), m_Value(A
))))
3541 return BinaryOperator::CreateOr(Op1
, Builder
.CreateAnd(A
, C
));
3543 // B | ((B | C) & A) -> B | (A & C)
3544 if (match(Op1
, m_c_And(m_c_Or(m_Specific(Op0
), m_Value(C
)), m_Value(A
))))
3545 return BinaryOperator::CreateOr(Op0
, Builder
.CreateAnd(A
, C
));
3547 if (Instruction
*DeMorgan
= matchDeMorgansLaws(I
, *this))
3550 // Canonicalize xor to the RHS.
3551 bool SwappedForXor
= false;
3552 if (match(Op0
, m_Xor(m_Value(), m_Value()))) {
3553 std::swap(Op0
, Op1
);
3554 SwappedForXor
= true;
3557 if (match(Op1
, m_Xor(m_Value(A
), m_Value(B
)))) {
3558 // (A | ?) | (A ^ B) --> (A | ?) | B
3559 // (B | ?) | (A ^ B) --> (B | ?) | A
3560 if (match(Op0
, m_c_Or(m_Specific(A
), m_Value())))
3561 return BinaryOperator::CreateOr(Op0
, B
);
3562 if (match(Op0
, m_c_Or(m_Specific(B
), m_Value())))
3563 return BinaryOperator::CreateOr(Op0
, A
);
3565 // (A & B) | (A ^ B) --> A | B
3566 // (B & A) | (A ^ B) --> A | B
3567 if (match(Op0
, m_And(m_Specific(A
), m_Specific(B
))) ||
3568 match(Op0
, m_And(m_Specific(B
), m_Specific(A
))))
3569 return BinaryOperator::CreateOr(A
, B
);
3571 // ~A | (A ^ B) --> ~(A & B)
3572 // ~B | (A ^ B) --> ~(A & B)
3573 // The swap above should always make Op0 the 'not'.
3574 if ((Op0
->hasOneUse() || Op1
->hasOneUse()) &&
3575 (match(Op0
, m_Not(m_Specific(A
))) || match(Op0
, m_Not(m_Specific(B
)))))
3576 return BinaryOperator::CreateNot(Builder
.CreateAnd(A
, B
));
3578 // Same as above, but peek through an 'and' to the common operand:
3579 // ~(A & ?) | (A ^ B) --> ~((A & ?) & B)
3580 // ~(B & ?) | (A ^ B) --> ~((B & ?) & A)
3582 if ((Op0
->hasOneUse() || Op1
->hasOneUse()) &&
3583 match(Op0
, m_Not(m_CombineAnd(m_Instruction(And
),
3584 m_c_And(m_Specific(A
), m_Value())))))
3585 return BinaryOperator::CreateNot(Builder
.CreateAnd(And
, B
));
3586 if ((Op0
->hasOneUse() || Op1
->hasOneUse()) &&
3587 match(Op0
, m_Not(m_CombineAnd(m_Instruction(And
),
3588 m_c_And(m_Specific(B
), m_Value())))))
3589 return BinaryOperator::CreateNot(Builder
.CreateAnd(And
, A
));
3591 // (~A | C) | (A ^ B) --> ~(A & B) | C
3592 // (~B | C) | (A ^ B) --> ~(A & B) | C
3593 if (Op0
->hasOneUse() && Op1
->hasOneUse() &&
3594 (match(Op0
, m_c_Or(m_Not(m_Specific(A
)), m_Value(C
))) ||
3595 match(Op0
, m_c_Or(m_Not(m_Specific(B
)), m_Value(C
))))) {
3596 Value
*Nand
= Builder
.CreateNot(Builder
.CreateAnd(A
, B
), "nand");
3597 return BinaryOperator::CreateOr(Nand
, C
);
3600 // A | (~A ^ B) --> ~B | A
3601 // B | (A ^ ~B) --> ~A | B
3602 if (Op1
->hasOneUse() && match(A
, m_Not(m_Specific(Op0
)))) {
3603 Value
*NotB
= Builder
.CreateNot(B
, B
->getName() + ".not");
3604 return BinaryOperator::CreateOr(NotB
, Op0
);
3606 if (Op1
->hasOneUse() && match(B
, m_Not(m_Specific(Op0
)))) {
3607 Value
*NotA
= Builder
.CreateNot(A
, A
->getName() + ".not");
3608 return BinaryOperator::CreateOr(NotA
, Op0
);
3612 // A | ~(A | B) -> A | ~B
3613 // A | ~(A ^ B) -> A | ~B
3614 if (match(Op1
, m_Not(m_Value(A
))))
3615 if (BinaryOperator
*B
= dyn_cast
<BinaryOperator
>(A
))
3616 if ((Op0
== B
->getOperand(0) || Op0
== B
->getOperand(1)) &&
3617 Op1
->hasOneUse() && (B
->getOpcode() == Instruction::Or
||
3618 B
->getOpcode() == Instruction::Xor
)) {
3619 Value
*NotOp
= Op0
== B
->getOperand(0) ? B
->getOperand(1) :
3621 Value
*Not
= Builder
.CreateNot(NotOp
, NotOp
->getName() + ".not");
3622 return BinaryOperator::CreateOr(Not
, Op0
);
3626 std::swap(Op0
, Op1
);
3629 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(Op0
);
3630 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(Op1
);
3632 if (Value
*Res
= foldAndOrOfICmps(LHS
, RHS
, I
, /* IsAnd */ false))
3633 return replaceInstUsesWith(I
, Res
);
3635 // TODO: Make this recursive; it's a little tricky because an arbitrary
3636 // number of 'or' instructions might have to be created.
3638 if (LHS
&& match(Op1
, m_OneUse(m_LogicalOr(m_Value(X
), m_Value(Y
))))) {
3639 bool IsLogical
= isa
<SelectInst
>(Op1
);
3640 // LHS | (X || Y) --> (LHS || X) || Y
3641 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
3643 foldAndOrOfICmps(LHS
, Cmp
, I
, /* IsAnd */ false, IsLogical
))
3644 return replaceInstUsesWith(I
, IsLogical
3645 ? Builder
.CreateLogicalOr(Res
, Y
)
3646 : Builder
.CreateOr(Res
, Y
));
3647 // LHS | (X || Y) --> X || (LHS | Y)
3648 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
3649 if (Value
*Res
= foldAndOrOfICmps(LHS
, Cmp
, I
, /* IsAnd */ false,
3650 /* IsLogical */ false))
3651 return replaceInstUsesWith(I
, IsLogical
3652 ? Builder
.CreateLogicalOr(X
, Res
)
3653 : Builder
.CreateOr(X
, Res
));
3655 if (RHS
&& match(Op0
, m_OneUse(m_LogicalOr(m_Value(X
), m_Value(Y
))))) {
3656 bool IsLogical
= isa
<SelectInst
>(Op0
);
3657 // (X || Y) | RHS --> (X || RHS) || Y
3658 if (auto *Cmp
= dyn_cast
<ICmpInst
>(X
))
3660 foldAndOrOfICmps(Cmp
, RHS
, I
, /* IsAnd */ false, IsLogical
))
3661 return replaceInstUsesWith(I
, IsLogical
3662 ? Builder
.CreateLogicalOr(Res
, Y
)
3663 : Builder
.CreateOr(Res
, Y
));
3664 // (X || Y) | RHS --> X || (Y | RHS)
3665 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Y
))
3666 if (Value
*Res
= foldAndOrOfICmps(Cmp
, RHS
, I
, /* IsAnd */ false,
3667 /* IsLogical */ false))
3668 return replaceInstUsesWith(I
, IsLogical
3669 ? Builder
.CreateLogicalOr(X
, Res
)
3670 : Builder
.CreateOr(X
, Res
));
3674 if (FCmpInst
*LHS
= dyn_cast
<FCmpInst
>(I
.getOperand(0)))
3675 if (FCmpInst
*RHS
= dyn_cast
<FCmpInst
>(I
.getOperand(1)))
3676 if (Value
*Res
= foldLogicOfFCmps(LHS
, RHS
, /*IsAnd*/ false))
3677 return replaceInstUsesWith(I
, Res
);
3679 if (Instruction
*FoldedFCmps
= reassociateFCmps(I
, Builder
))
3682 if (Instruction
*CastedOr
= foldCastedBitwiseLogic(I
))
3685 if (Instruction
*Sel
= foldBinopOfSextBoolToSelect(I
))
3688 // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
3689 // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
3690 // with binop identity constant. But creating a select with non-constant
3691 // arm may not be reversible due to poison semantics. Is that a good
3692 // canonicalization?
3693 if (match(&I
, m_c_Or(m_OneUse(m_SExt(m_Value(A
))), m_Value(B
))) &&
3694 A
->getType()->isIntOrIntVectorTy(1))
3695 return SelectInst::Create(A
, ConstantInt::getAllOnesValue(Ty
), B
);
3697 // Note: If we've gotten to the point of visiting the outer OR, then the
3698 // inner one couldn't be simplified. If it was a constant, then it won't
3699 // be simplified by a later pass either, so we try swapping the inner/outer
3700 // ORs in the hopes that we'll be able to simplify it this way.
3701 // (X|C) | V --> (X|V) | C
3703 if (Op0
->hasOneUse() && !match(Op1
, m_ConstantInt()) &&
3704 match(Op0
, m_Or(m_Value(A
), m_ConstantInt(CI
)))) {
3705 Value
*Inner
= Builder
.CreateOr(A
, Op1
);
3706 Inner
->takeName(Op0
);
3707 return BinaryOperator::CreateOr(Inner
, CI
);
3710 // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
3711 // Since this OR statement hasn't been optimized further yet, we hope
3712 // that this transformation will allow the new ORs to be optimized.
3714 Value
*X
= nullptr, *Y
= nullptr;
3715 if (Op0
->hasOneUse() && Op1
->hasOneUse() &&
3716 match(Op0
, m_Select(m_Value(X
), m_Value(A
), m_Value(B
))) &&
3717 match(Op1
, m_Select(m_Value(Y
), m_Value(C
), m_Value(D
))) && X
== Y
) {
3718 Value
*orTrue
= Builder
.CreateOr(A
, C
);
3719 Value
*orFalse
= Builder
.CreateOr(B
, D
);
3720 return SelectInst::Create(X
, orTrue
, orFalse
);
3724 // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.
3727 if (match(&I
, m_c_Or(m_OneUse(m_AShr(
3728 m_NSWSub(m_Value(Y
), m_Value(X
)),
3729 m_SpecificInt(Ty
->getScalarSizeInBits() - 1))),
3731 Value
*NewICmpInst
= Builder
.CreateICmpSGT(X
, Y
);
3732 Value
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
3733 return SelectInst::Create(NewICmpInst
, AllOnes
, X
);
3738 // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B
3739 // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B
3740 // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B
3741 // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B
3742 const auto TryXorOpt
= [&](Value
*Lhs
, Value
*Rhs
) -> Instruction
* {
3743 if (match(Lhs
, m_c_Xor(m_And(m_Value(A
), m_Value(B
)), m_Deferred(A
))) &&
3745 m_c_Xor(m_And(m_Specific(A
), m_Specific(B
)), m_Deferred(B
)))) {
3746 return BinaryOperator::CreateXor(A
, B
);
3751 if (Instruction
*Result
= TryXorOpt(Op0
, Op1
))
3753 if (Instruction
*Result
= TryXorOpt(Op1
, Op0
))
3757 if (Instruction
*V
=
3758 canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I
))
3761 CmpInst::Predicate Pred
;
3762 Value
*Mul
, *Ov
, *MulIsNotZero
, *UMulWithOv
;
3763 // Check if the OR weakens the overflow condition for umul.with.overflow by
3764 // treating any non-zero result as overflow. In that case, we overflow if both
3765 // umul.with.overflow operands are != 0, as in that case the result can only
3766 // be 0, iff the multiplication overflows.
3768 m_c_Or(m_CombineAnd(m_ExtractValue
<1>(m_Value(UMulWithOv
)),
3770 m_CombineAnd(m_ICmp(Pred
,
3771 m_CombineAnd(m_ExtractValue
<0>(
3772 m_Deferred(UMulWithOv
)),
3775 m_Value(MulIsNotZero
)))) &&
3776 (Ov
->hasOneUse() || (MulIsNotZero
->hasOneUse() && Mul
->hasOneUse())) &&
3777 Pred
== CmpInst::ICMP_NE
) {
3779 if (match(UMulWithOv
, m_Intrinsic
<Intrinsic::umul_with_overflow
>(
3780 m_Value(A
), m_Value(B
)))) {
3781 Value
*NotNullA
= Builder
.CreateIsNotNull(A
);
3782 Value
*NotNullB
= Builder
.CreateIsNotNull(B
);
3783 return BinaryOperator::CreateAnd(NotNullA
, NotNullB
);
3787 /// Res, Overflow = xxx_with_overflow X, C1
3788 /// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into
3789 /// "Overflow | icmp pred X, C2 +/- C1".
3790 const WithOverflowInst
*WO
;
3792 const APInt
*C1
, *C2
;
3793 if (match(&I
, m_c_Or(m_CombineAnd(m_ExtractValue
<1>(m_CombineAnd(
3794 m_WithOverflowInst(WO
), m_Value(WOV
))),
3796 m_OneUse(m_ICmp(Pred
, m_ExtractValue
<0>(m_Deferred(WOV
)),
3798 (WO
->getBinaryOp() == Instruction::Add
||
3799 WO
->getBinaryOp() == Instruction::Sub
) &&
3800 (ICmpInst::isEquality(Pred
) ||
3801 WO
->isSigned() == ICmpInst::isSigned(Pred
)) &&
3802 match(WO
->getRHS(), m_APInt(C1
))) {
3804 APInt NewC
= WO
->getBinaryOp() == Instruction::Add
3805 ? (ICmpInst::isSigned(Pred
) ? C2
->ssub_ov(*C1
, Overflow
)
3806 : C2
->usub_ov(*C1
, Overflow
))
3807 : (ICmpInst::isSigned(Pred
) ? C2
->sadd_ov(*C1
, Overflow
)
3808 : C2
->uadd_ov(*C1
, Overflow
));
3809 if (!Overflow
|| ICmpInst::isEquality(Pred
)) {
3810 Value
*NewCmp
= Builder
.CreateICmp(
3811 Pred
, WO
->getLHS(), ConstantInt::get(WO
->getLHS()->getType(), NewC
));
3812 return BinaryOperator::CreateOr(Ov
, NewCmp
);
3816 // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
3817 if (sinkNotIntoOtherHandOfLogicalOp(I
))
3820 // Improve "get low bit mask up to and including bit X" pattern:
3821 // (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X)
3822 if (match(&I
, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X
)), m_AllOnes()),
3823 m_Shl(m_One(), m_Deferred(X
)))) &&
3824 match(&I
, m_c_Or(m_OneUse(m_Value()), m_Value()))) {
3825 Value
*Sub
= Builder
.CreateSub(
3826 ConstantInt::get(Ty
, Ty
->getScalarSizeInBits() - 1), X
);
3827 return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty
), Sub
);
3830 // An or recurrence w/loop invariant step is equivelent to (or start, step)
3831 PHINode
*PN
= nullptr;
3832 Value
*Start
= nullptr, *Step
= nullptr;
3833 if (matchSimpleRecurrence(&I
, PN
, Start
, Step
) && DT
.dominates(Step
, PN
))
3834 return replaceInstUsesWith(I
, Builder
.CreateOr(Start
, Step
));
3836 // (A & B) | (C | D) or (C | D) | (A & B)
3837 // Can be combined if C or D is of type (A/B & X)
3838 if (match(&I
, m_c_Or(m_OneUse(m_And(m_Value(A
), m_Value(B
))),
3839 m_OneUse(m_Or(m_Value(C
), m_Value(D
)))))) {
3840 // (A & B) | (C | ?) -> C | (? | (A & B))
3841 // (A & B) | (C | ?) -> C | (? | (A & B))
3842 // (A & B) | (C | ?) -> C | (? | (A & B))
3843 // (A & B) | (C | ?) -> C | (? | (A & B))
3844 // (C | ?) | (A & B) -> C | (? | (A & B))
3845 // (C | ?) | (A & B) -> C | (? | (A & B))
3846 // (C | ?) | (A & B) -> C | (? | (A & B))
3847 // (C | ?) | (A & B) -> C | (? | (A & B))
3848 if (match(D
, m_OneUse(m_c_And(m_Specific(A
), m_Value()))) ||
3849 match(D
, m_OneUse(m_c_And(m_Specific(B
), m_Value()))))
3850 return BinaryOperator::CreateOr(
3851 C
, Builder
.CreateOr(D
, Builder
.CreateAnd(A
, B
)));
3852 // (A & B) | (? | D) -> (? | (A & B)) | D
3853 // (A & B) | (? | D) -> (? | (A & B)) | D
3854 // (A & B) | (? | D) -> (? | (A & B)) | D
3855 // (A & B) | (? | D) -> (? | (A & B)) | D
3856 // (? | D) | (A & B) -> (? | (A & B)) | D
3857 // (? | D) | (A & B) -> (? | (A & B)) | D
3858 // (? | D) | (A & B) -> (? | (A & B)) | D
3859 // (? | D) | (A & B) -> (? | (A & B)) | D
3860 if (match(C
, m_OneUse(m_c_And(m_Specific(A
), m_Value()))) ||
3861 match(C
, m_OneUse(m_c_And(m_Specific(B
), m_Value()))))
3862 return BinaryOperator::CreateOr(
3863 Builder
.CreateOr(C
, Builder
.CreateAnd(A
, B
)), D
);
3866 if (Instruction
*R
= reassociateForUses(I
, Builder
))
3869 if (Instruction
*Canonicalized
= canonicalizeLogicFirst(I
, Builder
))
3870 return Canonicalized
;
3872 if (Instruction
*Folded
= foldLogicOfIsFPClass(I
, Op0
, Op1
))
3875 if (Instruction
*Res
= foldBinOpOfDisplacedShifts(I
))
3878 // If we are setting the sign bit of a floating-point value, convert
3879 // this to fneg(fabs), then cast back to integer.
3881 // If the result isn't immediately cast back to a float, this will increase
3882 // the number of instructions. This is still probably a better canonical form
3883 // as it enables FP value tracking.
3885 // Assumes any IEEE-represented type has the sign bit in the high bit.
3887 // This is generous interpretation of noimplicitfloat, this is not a true
3888 // floating-point operation.
3890 if (match(Op0
, m_BitCast(m_Value(CastOp
))) && match(Op1
, m_SignMask()) &&
3891 !Builder
.GetInsertBlock()->getParent()->hasFnAttribute(
3892 Attribute::NoImplicitFloat
)) {
3893 Type
*EltTy
= CastOp
->getType()->getScalarType();
3894 if (EltTy
->isFloatingPointTy() && EltTy
->isIEEE() &&
3895 EltTy
->getPrimitiveSizeInBits() ==
3896 I
.getType()->getScalarType()->getPrimitiveSizeInBits()) {
3897 Value
*FAbs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, CastOp
);
3898 Value
*FNegFAbs
= Builder
.CreateFNeg(FAbs
);
3899 return new BitCastInst(FNegFAbs
, I
.getType());
3903 // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2
3904 if (match(Op0
, m_OneUse(m_And(m_Value(X
), m_APInt(C1
)))) &&
3905 match(Op1
, m_APInt(C2
))) {
3906 KnownBits KnownX
= computeKnownBits(X
, /*Depth*/ 0, &I
);
3907 if ((KnownX
.One
& *C2
) == *C2
)
3908 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, *C1
| *C2
));
3911 if (Instruction
*Res
= foldBitwiseLogicWithIntrinsics(I
, Builder
))
3917 /// A ^ B can be specified using other logic ops in a variety of patterns. We
3918 /// can fold these early and efficiently by morphing an existing instruction.
3919 static Instruction
*foldXorToXor(BinaryOperator
&I
,
3920 InstCombiner::BuilderTy
&Builder
) {
3921 assert(I
.getOpcode() == Instruction::Xor
);
3922 Value
*Op0
= I
.getOperand(0);
3923 Value
*Op1
= I
.getOperand(1);
3926 // There are 4 commuted variants for each of the basic patterns.
3928 // (A & B) ^ (A | B) -> A ^ B
3929 // (A & B) ^ (B | A) -> A ^ B
3930 // (A | B) ^ (A & B) -> A ^ B
3931 // (A | B) ^ (B & A) -> A ^ B
3932 if (match(&I
, m_c_Xor(m_And(m_Value(A
), m_Value(B
)),
3933 m_c_Or(m_Deferred(A
), m_Deferred(B
)))))
3934 return BinaryOperator::CreateXor(A
, B
);
3936 // (A | ~B) ^ (~A | B) -> A ^ B
3937 // (~B | A) ^ (~A | B) -> A ^ B
3938 // (~A | B) ^ (A | ~B) -> A ^ B
3939 // (B | ~A) ^ (A | ~B) -> A ^ B
3940 if (match(&I
, m_Xor(m_c_Or(m_Value(A
), m_Not(m_Value(B
))),
3941 m_c_Or(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
3942 return BinaryOperator::CreateXor(A
, B
);
3944 // (A & ~B) ^ (~A & B) -> A ^ B
3945 // (~B & A) ^ (~A & B) -> A ^ B
3946 // (~A & B) ^ (A & ~B) -> A ^ B
3947 // (B & ~A) ^ (A & ~B) -> A ^ B
3948 if (match(&I
, m_Xor(m_c_And(m_Value(A
), m_Not(m_Value(B
))),
3949 m_c_And(m_Not(m_Deferred(A
)), m_Deferred(B
)))))
3950 return BinaryOperator::CreateXor(A
, B
);
3952 // For the remaining cases we need to get rid of one of the operands.
3953 if (!Op0
->hasOneUse() && !Op1
->hasOneUse())
3956 // (A | B) ^ ~(A & B) -> ~(A ^ B)
3957 // (A | B) ^ ~(B & A) -> ~(A ^ B)
3958 // (A & B) ^ ~(A | B) -> ~(A ^ B)
3959 // (A & B) ^ ~(B | A) -> ~(A ^ B)
3960 // Complexity sorting ensures the not will be on the right side.
3961 if ((match(Op0
, m_Or(m_Value(A
), m_Value(B
))) &&
3962 match(Op1
, m_Not(m_c_And(m_Specific(A
), m_Specific(B
))))) ||
3963 (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
3964 match(Op1
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
))))))
3965 return BinaryOperator::CreateNot(Builder
.CreateXor(A
, B
));
3970 Value
*InstCombinerImpl::foldXorOfICmps(ICmpInst
*LHS
, ICmpInst
*RHS
,
3971 BinaryOperator
&I
) {
3972 assert(I
.getOpcode() == Instruction::Xor
&& I
.getOperand(0) == LHS
&&
3973 I
.getOperand(1) == RHS
&& "Should be 'xor' with these operands");
3975 ICmpInst::Predicate PredL
= LHS
->getPredicate(), PredR
= RHS
->getPredicate();
3976 Value
*LHS0
= LHS
->getOperand(0), *LHS1
= LHS
->getOperand(1);
3977 Value
*RHS0
= RHS
->getOperand(0), *RHS1
= RHS
->getOperand(1);
3979 if (predicatesFoldable(PredL
, PredR
)) {
3980 if (LHS0
== RHS1
&& LHS1
== RHS0
) {
3981 std::swap(LHS0
, LHS1
);
3982 PredL
= ICmpInst::getSwappedPredicate(PredL
);
3984 if (LHS0
== RHS0
&& LHS1
== RHS1
) {
3985 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
3986 unsigned Code
= getICmpCode(PredL
) ^ getICmpCode(PredR
);
3987 bool IsSigned
= LHS
->isSigned() || RHS
->isSigned();
3988 return getNewICmpValue(Code
, IsSigned
, LHS0
, LHS1
, Builder
);
3992 // TODO: This can be generalized to compares of non-signbits using
3993 // decomposeBitTestICmp(). It could be enhanced more by using (something like)
3994 // foldLogOpOfMaskedICmps().
3995 const APInt
*LC
, *RC
;
3996 if (match(LHS1
, m_APInt(LC
)) && match(RHS1
, m_APInt(RC
)) &&
3997 LHS0
->getType() == RHS0
->getType() &&
3998 LHS0
->getType()->isIntOrIntVectorTy()) {
3999 // Convert xor of signbit tests to signbit test of xor'd values:
4000 // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
4001 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
4002 // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
4003 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
4004 bool TrueIfSignedL
, TrueIfSignedR
;
4005 if ((LHS
->hasOneUse() || RHS
->hasOneUse()) &&
4006 isSignBitCheck(PredL
, *LC
, TrueIfSignedL
) &&
4007 isSignBitCheck(PredR
, *RC
, TrueIfSignedR
)) {
4008 Value
*XorLR
= Builder
.CreateXor(LHS0
, RHS0
);
4009 return TrueIfSignedL
== TrueIfSignedR
? Builder
.CreateIsNeg(XorLR
) :
4010 Builder
.CreateIsNotNeg(XorLR
);
4013 // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2)
4014 // into a single comparison using range-based reasoning.
4016 ConstantRange CR1
= ConstantRange::makeExactICmpRegion(PredL
, *LC
);
4017 ConstantRange CR2
= ConstantRange::makeExactICmpRegion(PredR
, *RC
);
4018 auto CRUnion
= CR1
.exactUnionWith(CR2
);
4019 auto CRIntersect
= CR1
.exactIntersectWith(CR2
);
4020 if (CRUnion
&& CRIntersect
)
4021 if (auto CR
= CRUnion
->exactIntersectWith(CRIntersect
->inverse())) {
4022 if (CR
->isFullSet())
4023 return ConstantInt::getTrue(I
.getType());
4024 if (CR
->isEmptySet())
4025 return ConstantInt::getFalse(I
.getType());
4027 CmpInst::Predicate NewPred
;
4029 CR
->getEquivalentICmp(NewPred
, NewC
, Offset
);
4031 if ((Offset
.isZero() && (LHS
->hasOneUse() || RHS
->hasOneUse())) ||
4032 (LHS
->hasOneUse() && RHS
->hasOneUse())) {
4034 Type
*Ty
= LHS0
->getType();
4035 if (!Offset
.isZero())
4036 NewV
= Builder
.CreateAdd(NewV
, ConstantInt::get(Ty
, Offset
));
4037 return Builder
.CreateICmp(NewPred
, NewV
,
4038 ConstantInt::get(Ty
, NewC
));
4044 // Instead of trying to imitate the folds for and/or, decompose this 'xor'
4045 // into those logic ops. That is, try to turn this into an and-of-icmps
4046 // because we have many folds for that pattern.
4048 // This is based on a truth table definition of xor:
4049 // X ^ Y --> (X | Y) & !(X & Y)
4050 if (Value
*OrICmp
= simplifyBinOp(Instruction::Or
, LHS
, RHS
, SQ
)) {
4051 // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
4052 // TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
4053 if (Value
*AndICmp
= simplifyBinOp(Instruction::And
, LHS
, RHS
, SQ
)) {
4054 // TODO: Independently handle cases where the 'and' side is a constant.
4055 ICmpInst
*X
= nullptr, *Y
= nullptr;
4056 if (OrICmp
== LHS
&& AndICmp
== RHS
) {
4057 // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y
4061 if (OrICmp
== RHS
&& AndICmp
== LHS
) {
4062 // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X
4066 if (X
&& Y
&& (Y
->hasOneUse() || canFreelyInvertAllUsersOf(Y
, &I
))) {
4067 // Invert the predicate of 'Y', thus inverting its output.
4068 Y
->setPredicate(Y
->getInversePredicate());
4069 // So, are there other uses of Y?
4070 if (!Y
->hasOneUse()) {
4071 // We need to adapt other uses of Y though. Get a value that matches
4072 // the original value of Y before inversion. While this increases
4073 // immediate instruction count, we have just ensured that all the
4074 // users are freely-invertible, so that 'not' *will* get folded away.
4075 BuilderTy::InsertPointGuard
Guard(Builder
);
4076 // Set insertion point to right after the Y.
4077 Builder
.SetInsertPoint(Y
->getParent(), ++(Y
->getIterator()));
4078 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
4079 // Replace all uses of Y (excluding the one in NotY!) with NotY.
4080 Worklist
.pushUsersToWorkList(*Y
);
4081 Y
->replaceUsesWithIf(NotY
,
4082 [NotY
](Use
&U
) { return U
.getUser() != NotY
; });
4085 return Builder
.CreateAnd(LHS
, RHS
);
4093 /// If we have a masked merge, in the canonical form of:
4094 /// (assuming that A only has one use.)
4096 /// ((x ^ y) & M) ^ y
4098 /// * If M is inverted:
4100 /// ((x ^ y) & ~M) ^ y
4101 /// We can canonicalize by swapping the final xor operand
4102 /// to eliminate the 'not' of the mask.
4103 /// ((x ^ y) & M) ^ x
4104 /// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops
4105 /// because that shortens the dependency chain and improves analysis:
4106 /// (x & M) | (y & ~M)
4107 static Instruction
*visitMaskedMerge(BinaryOperator
&I
,
4108 InstCombiner::BuilderTy
&Builder
) {
4111 if (!match(&I
, m_c_Xor(m_Value(B
),
4113 m_CombineAnd(m_c_Xor(m_Deferred(B
), m_Value(X
)),
4119 if (match(M
, m_Not(m_Value(NotM
)))) {
4120 // De-invert the mask and swap the value in B part.
4121 Value
*NewA
= Builder
.CreateAnd(D
, NotM
);
4122 return BinaryOperator::CreateXor(NewA
, X
);
4126 if (D
->hasOneUse() && match(M
, m_Constant(C
))) {
4127 // Propagating undef is unsafe. Clamp undef elements to -1.
4128 Type
*EltTy
= C
->getType()->getScalarType();
4129 C
= Constant::replaceUndefsWith(C
, ConstantInt::getAllOnesValue(EltTy
));
4131 Value
*LHS
= Builder
.CreateAnd(X
, C
);
4132 Value
*NotC
= Builder
.CreateNot(C
);
4133 Value
*RHS
= Builder
.CreateAnd(B
, NotC
);
4134 return BinaryOperator::CreateOr(LHS
, RHS
);
4140 static Instruction
*foldNotXor(BinaryOperator
&I
,
4141 InstCombiner::BuilderTy
&Builder
) {
4143 // FIXME: one-use check is not needed in general, but currently we are unable
4144 // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)
4145 if (!match(&I
, m_Not(m_OneUse(m_Xor(m_Value(X
), m_Value(Y
))))))
4148 auto hasCommonOperand
= [](Value
*A
, Value
*B
, Value
*C
, Value
*D
) {
4149 return A
== C
|| A
== D
|| B
== C
|| B
== D
;
4152 Value
*A
, *B
, *C
, *D
;
4153 // Canonicalize ~((A & B) ^ (A | ?)) -> (A & B) | ~(A | ?)
4154 // 4 commuted variants
4155 if (match(X
, m_And(m_Value(A
), m_Value(B
))) &&
4156 match(Y
, m_Or(m_Value(C
), m_Value(D
))) && hasCommonOperand(A
, B
, C
, D
)) {
4157 Value
*NotY
= Builder
.CreateNot(Y
);
4158 return BinaryOperator::CreateOr(X
, NotY
);
4161 // Canonicalize ~((A | ?) ^ (A & B)) -> (A & B) | ~(A | ?)
4162 // 4 commuted variants
4163 if (match(Y
, m_And(m_Value(A
), m_Value(B
))) &&
4164 match(X
, m_Or(m_Value(C
), m_Value(D
))) && hasCommonOperand(A
, B
, C
, D
)) {
4165 Value
*NotX
= Builder
.CreateNot(X
);
4166 return BinaryOperator::CreateOr(Y
, NotX
);
4172 /// Canonicalize a shifty way to code absolute value to the more common pattern
4173 /// that uses negation and select.
4174 static Instruction
*canonicalizeAbs(BinaryOperator
&Xor
,
4175 InstCombiner::BuilderTy
&Builder
) {
4176 assert(Xor
.getOpcode() == Instruction::Xor
&& "Expected an xor instruction.");
4178 // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
4179 // We're relying on the fact that we only do this transform when the shift has
4180 // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
4182 Value
*Op0
= Xor
.getOperand(0), *Op1
= Xor
.getOperand(1);
4183 if (Op0
->hasNUses(2))
4184 std::swap(Op0
, Op1
);
4186 Type
*Ty
= Xor
.getType();
4189 if (match(Op1
, m_AShr(m_Value(A
), m_APInt(ShAmt
))) &&
4190 Op1
->hasNUses(2) && *ShAmt
== Ty
->getScalarSizeInBits() - 1 &&
4191 match(Op0
, m_OneUse(m_c_Add(m_Specific(A
), m_Specific(Op1
))))) {
4192 // Op1 = ashr i32 A, 31 ; smear the sign bit
4193 // xor (add A, Op1), Op1 ; add -1 and flip bits if negative
4194 // --> (A < 0) ? -A : A
4195 Value
*IsNeg
= Builder
.CreateIsNeg(A
);
4196 // Copy the nuw/nsw flags from the add to the negate.
4197 auto *Add
= cast
<BinaryOperator
>(Op0
);
4198 Value
*NegA
= Builder
.CreateNeg(A
, "", Add
->hasNoUnsignedWrap(),
4199 Add
->hasNoSignedWrap());
4200 return SelectInst::Create(IsNeg
, NegA
, A
);
4205 static bool canFreelyInvert(InstCombiner
&IC
, Value
*Op
,
4206 Instruction
*IgnoredUser
) {
4207 auto *I
= dyn_cast
<Instruction
>(Op
);
4208 return I
&& IC
.isFreeToInvert(I
, /*WillInvertAllUses=*/true) &&
4209 IC
.canFreelyInvertAllUsersOf(I
, IgnoredUser
);
4212 static Value
*freelyInvert(InstCombinerImpl
&IC
, Value
*Op
,
4213 Instruction
*IgnoredUser
) {
4214 auto *I
= cast
<Instruction
>(Op
);
4215 IC
.Builder
.SetInsertPoint(*I
->getInsertionPointAfterDef());
4216 Value
*NotOp
= IC
.Builder
.CreateNot(Op
, Op
->getName() + ".not");
4217 Op
->replaceUsesWithIf(NotOp
,
4218 [NotOp
](Use
&U
) { return U
.getUser() != NotOp
; });
4219 IC
.freelyInvertAllUsersOf(NotOp
, IgnoredUser
);
4226 // z = ((~x) |/& (~y))
4227 // iff both x and y are free to invert and all uses of z can be freely updated.
4228 bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction
&I
) {
4230 if (!match(&I
, m_LogicalOp(m_Value(Op0
), m_Value(Op1
))))
4233 // If this logic op has not been simplified yet, just bail out and let that
4234 // happen first. Otherwise, the code below may wrongly invert.
4238 Instruction::BinaryOps NewOpc
=
4239 match(&I
, m_LogicalAnd()) ? Instruction::Or
: Instruction::And
;
4240 bool IsBinaryOp
= isa
<BinaryOperator
>(I
);
4242 // Can our users be adapted?
4243 if (!InstCombiner::canFreelyInvertAllUsersOf(&I
, /*IgnoredUser=*/nullptr))
4246 // And can the operands be adapted?
4247 if (!canFreelyInvert(*this, Op0
, &I
) || !canFreelyInvert(*this, Op1
, &I
))
4250 Op0
= freelyInvert(*this, Op0
, &I
);
4251 Op1
= freelyInvert(*this, Op1
, &I
);
4253 Builder
.SetInsertPoint(*I
.getInsertionPointAfterDef());
4256 NewLogicOp
= Builder
.CreateBinOp(NewOpc
, Op0
, Op1
, I
.getName() + ".not");
4259 Builder
.CreateLogicalOp(NewOpc
, Op0
, Op1
, I
.getName() + ".not");
4261 replaceInstUsesWith(I
, NewLogicOp
);
4262 // We can not just create an outer `not`, it will most likely be immediately
4263 // folded back, reconstructing our initial pattern, and causing an
4264 // infinite combine loop, so immediately manually fold it away.
4265 freelyInvertAllUsersOf(NewLogicOp
);
4272 // z = ~(x |/& (~y))
4273 // iff y is free to invert and all uses of z can be freely updated.
4274 bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction
&I
) {
4276 if (!match(&I
, m_LogicalOp(m_Value(Op0
), m_Value(Op1
))))
4278 Instruction::BinaryOps NewOpc
=
4279 match(&I
, m_LogicalAnd()) ? Instruction::Or
: Instruction::And
;
4280 bool IsBinaryOp
= isa
<BinaryOperator
>(I
);
4282 Value
*NotOp0
= nullptr;
4283 Value
*NotOp1
= nullptr;
4284 Value
**OpToInvert
= nullptr;
4285 if (match(Op0
, m_Not(m_Value(NotOp0
))) && canFreelyInvert(*this, Op1
, &I
)) {
4288 } else if (match(Op1
, m_Not(m_Value(NotOp1
))) &&
4289 canFreelyInvert(*this, Op0
, &I
)) {
4295 // And can our users be adapted?
4296 if (!InstCombiner::canFreelyInvertAllUsersOf(&I
, /*IgnoredUser=*/nullptr))
4299 *OpToInvert
= freelyInvert(*this, *OpToInvert
, &I
);
4301 Builder
.SetInsertPoint(*I
.getInsertionPointAfterDef());
4304 NewBinOp
= Builder
.CreateBinOp(NewOpc
, Op0
, Op1
, I
.getName() + ".not");
4306 NewBinOp
= Builder
.CreateLogicalOp(NewOpc
, Op0
, Op1
, I
.getName() + ".not");
4307 replaceInstUsesWith(I
, NewBinOp
);
4308 // We can not just create an outer `not`, it will most likely be immediately
4309 // folded back, reconstructing our initial pattern, and causing an
4310 // infinite combine loop, so immediately manually fold it away.
4311 freelyInvertAllUsersOf(NewBinOp
);
4315 Instruction
*InstCombinerImpl::foldNot(BinaryOperator
&I
) {
4317 if (!match(&I
, m_Not(m_Value(NotOp
))))
4320 // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
4321 // We must eliminate the and/or (one-use) for these transforms to not increase
4322 // the instruction count.
4324 // ~(~X & Y) --> (X | ~Y)
4325 // ~(Y & ~X) --> (X | ~Y)
4327 // Note: The logical matches do not check for the commuted patterns because
4328 // those are handled via SimplifySelectsFeedingBinaryOp().
4329 Type
*Ty
= I
.getType();
4331 if (match(NotOp
, m_OneUse(m_c_And(m_Not(m_Value(X
)), m_Value(Y
))))) {
4332 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
4333 return BinaryOperator::CreateOr(X
, NotY
);
4335 if (match(NotOp
, m_OneUse(m_LogicalAnd(m_Not(m_Value(X
)), m_Value(Y
))))) {
4336 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
4337 return SelectInst::Create(X
, ConstantInt::getTrue(Ty
), NotY
);
4340 // ~(~X | Y) --> (X & ~Y)
4341 // ~(Y | ~X) --> (X & ~Y)
4342 if (match(NotOp
, m_OneUse(m_c_Or(m_Not(m_Value(X
)), m_Value(Y
))))) {
4343 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
4344 return BinaryOperator::CreateAnd(X
, NotY
);
4346 if (match(NotOp
, m_OneUse(m_LogicalOr(m_Not(m_Value(X
)), m_Value(Y
))))) {
4347 Value
*NotY
= Builder
.CreateNot(Y
, Y
->getName() + ".not");
4348 return SelectInst::Create(X
, NotY
, ConstantInt::getFalse(Ty
));
4351 // Is this a 'not' (~) fed by a binary operator?
4352 BinaryOperator
*NotVal
;
4353 if (match(NotOp
, m_BinOp(NotVal
))) {
4354 // ~((-X) | Y) --> (X - 1) & (~Y)
4356 m_OneUse(m_c_Or(m_OneUse(m_Neg(m_Value(X
))), m_Value(Y
))))) {
4357 Value
*DecX
= Builder
.CreateAdd(X
, ConstantInt::getAllOnesValue(Ty
));
4358 Value
*NotY
= Builder
.CreateNot(Y
);
4359 return BinaryOperator::CreateAnd(DecX
, NotY
);
4362 // ~(~X >>s Y) --> (X >>s Y)
4363 if (match(NotVal
, m_AShr(m_Not(m_Value(X
)), m_Value(Y
))))
4364 return BinaryOperator::CreateAShr(X
, Y
);
4366 // Treat lshr with non-negative operand as ashr.
4367 // ~(~X >>u Y) --> (X >>s Y) iff X is known negative
4368 if (match(NotVal
, m_LShr(m_Not(m_Value(X
)), m_Value(Y
))) &&
4369 isKnownNegative(X
, SQ
.getWithInstruction(NotVal
)))
4370 return BinaryOperator::CreateAShr(X
, Y
);
4372 // Bit-hack form of a signbit test for iN type:
4373 // ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN
4374 unsigned FullShift
= Ty
->getScalarSizeInBits() - 1;
4375 if (match(NotVal
, m_OneUse(m_AShr(m_Value(X
), m_SpecificInt(FullShift
))))) {
4376 Value
*IsNotNeg
= Builder
.CreateIsNotNeg(X
, "isnotneg");
4377 return new SExtInst(IsNotNeg
, Ty
);
4380 // If we are inverting a right-shifted constant, we may be able to eliminate
4381 // the 'not' by inverting the constant and using the opposite shift type.
4382 // Canonicalization rules ensure that only a negative constant uses 'ashr',
4383 // but we must check that in case that transform has not fired yet.
4385 // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
4387 if (match(NotVal
, m_AShr(m_Constant(C
), m_Value(Y
))) &&
4388 match(C
, m_Negative())) {
4389 // We matched a negative constant, so propagating undef is unsafe.
4390 // Clamp undef elements to -1.
4391 Type
*EltTy
= Ty
->getScalarType();
4392 C
= Constant::replaceUndefsWith(C
, ConstantInt::getAllOnesValue(EltTy
));
4393 return BinaryOperator::CreateLShr(ConstantExpr::getNot(C
), Y
);
4396 // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
4397 if (match(NotVal
, m_LShr(m_Constant(C
), m_Value(Y
))) &&
4398 match(C
, m_NonNegative())) {
4399 // We matched a non-negative constant, so propagating undef is unsafe.
4400 // Clamp undef elements to 0.
4401 Type
*EltTy
= Ty
->getScalarType();
4402 C
= Constant::replaceUndefsWith(C
, ConstantInt::getNullValue(EltTy
));
4403 return BinaryOperator::CreateAShr(ConstantExpr::getNot(C
), Y
);
4406 // ~(X + C) --> ~C - X
4407 if (match(NotVal
, m_c_Add(m_Value(X
), m_ImmConstant(C
))))
4408 return BinaryOperator::CreateSub(ConstantExpr::getNot(C
), X
);
4410 // ~(X - Y) --> ~X + Y
4411 // FIXME: is it really beneficial to sink the `not` here?
4412 if (match(NotVal
, m_Sub(m_Value(X
), m_Value(Y
))))
4413 if (isa
<Constant
>(X
) || NotVal
->hasOneUse())
4414 return BinaryOperator::CreateAdd(Builder
.CreateNot(X
), Y
);
4416 // ~(~X + Y) --> X - Y
4417 if (match(NotVal
, m_c_Add(m_Not(m_Value(X
)), m_Value(Y
))))
4418 return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub
, X
, Y
,
4422 // not (cmp A, B) = !cmp A, B
4423 CmpInst::Predicate Pred
;
4424 if (match(NotOp
, m_Cmp(Pred
, m_Value(), m_Value())) &&
4425 (NotOp
->hasOneUse() ||
4426 InstCombiner::canFreelyInvertAllUsersOf(cast
<Instruction
>(NotOp
),
4427 /*IgnoredUser=*/nullptr))) {
4428 cast
<CmpInst
>(NotOp
)->setPredicate(CmpInst::getInversePredicate(Pred
));
4429 freelyInvertAllUsersOf(NotOp
);
4433 // Move a 'not' ahead of casts of a bool to enable logic reduction:
4434 // not (bitcast (sext i1 X)) --> bitcast (sext (not i1 X))
4435 if (match(NotOp
, m_OneUse(m_BitCast(m_OneUse(m_SExt(m_Value(X
)))))) && X
->getType()->isIntOrIntVectorTy(1)) {
4436 Type
*SextTy
= cast
<BitCastOperator
>(NotOp
)->getSrcTy();
4437 Value
*NotX
= Builder
.CreateNot(X
);
4438 Value
*Sext
= Builder
.CreateSExt(NotX
, SextTy
);
4439 return CastInst::CreateBitOrPointerCast(Sext
, Ty
);
4442 if (auto *NotOpI
= dyn_cast
<Instruction
>(NotOp
))
4443 if (sinkNotIntoLogicalOp(*NotOpI
))
4446 // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
4447 // ~min(~X, ~Y) --> max(X, Y)
4448 // ~max(~X, Y) --> min(X, ~Y)
4449 auto *II
= dyn_cast
<IntrinsicInst
>(NotOp
);
4450 if (II
&& II
->hasOneUse()) {
4451 if (match(NotOp
, m_c_MaxOrMin(m_Not(m_Value(X
)), m_Value(Y
)))) {
4452 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(II
->getIntrinsicID());
4453 Value
*NotY
= Builder
.CreateNot(Y
);
4454 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, NotY
);
4455 return replaceInstUsesWith(I
, InvMaxMin
);
4458 if (II
->getIntrinsicID() == Intrinsic::is_fpclass
) {
4459 ConstantInt
*ClassMask
= cast
<ConstantInt
>(II
->getArgOperand(1));
4461 1, ConstantInt::get(ClassMask
->getType(),
4462 ~ClassMask
->getZExtValue() & fcAllFlags
));
4463 return replaceInstUsesWith(I
, II
);
4467 if (NotOp
->hasOneUse()) {
4468 // Pull 'not' into operands of select if both operands are one-use compares
4469 // or one is one-use compare and the other one is a constant.
4470 // Inverting the predicates eliminates the 'not' operation.
4472 // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
4473 // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
4474 // not (select ?, (cmp TPred, ?, ?), true -->
4475 // select ?, (cmp InvTPred, ?, ?), false
4476 if (auto *Sel
= dyn_cast
<SelectInst
>(NotOp
)) {
4477 Value
*TV
= Sel
->getTrueValue();
4478 Value
*FV
= Sel
->getFalseValue();
4479 auto *CmpT
= dyn_cast
<CmpInst
>(TV
);
4480 auto *CmpF
= dyn_cast
<CmpInst
>(FV
);
4481 bool InvertibleT
= (CmpT
&& CmpT
->hasOneUse()) || isa
<Constant
>(TV
);
4482 bool InvertibleF
= (CmpF
&& CmpF
->hasOneUse()) || isa
<Constant
>(FV
);
4483 if (InvertibleT
&& InvertibleF
) {
4485 CmpT
->setPredicate(CmpT
->getInversePredicate());
4487 Sel
->setTrueValue(ConstantExpr::getNot(cast
<Constant
>(TV
)));
4489 CmpF
->setPredicate(CmpF
->getInversePredicate());
4491 Sel
->setFalseValue(ConstantExpr::getNot(cast
<Constant
>(FV
)));
4492 return replaceInstUsesWith(I
, Sel
);
4497 if (Instruction
*NewXor
= foldNotXor(I
, Builder
))
4500 // TODO: Could handle multi-use better by checking if all uses of NotOp (other
4501 // than I) can be inverted.
4502 if (Value
*R
= getFreelyInverted(NotOp
, NotOp
->hasOneUse(), &Builder
))
4503 return replaceInstUsesWith(I
, R
);
4508 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
4509 // here. We should standardize that construct where it is needed or choose some
4510 // other way to ensure that commutated variants of patterns are not missed.
4511 Instruction
*InstCombinerImpl::visitXor(BinaryOperator
&I
) {
4512 if (Value
*V
= simplifyXorInst(I
.getOperand(0), I
.getOperand(1),
4513 SQ
.getWithInstruction(&I
)))
4514 return replaceInstUsesWith(I
, V
);
4516 if (SimplifyAssociativeOrCommutative(I
))
4519 if (Instruction
*X
= foldVectorBinop(I
))
4522 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
4525 if (Instruction
*NewXor
= foldXorToXor(I
, Builder
))
4528 // (A&B)^(A&C) -> A&(B^C) etc
4529 if (Value
*V
= foldUsingDistributiveLaws(I
))
4530 return replaceInstUsesWith(I
, V
);
4532 // See if we can simplify any instructions used by the instruction whose sole
4533 // purpose is to compute bits we don't care about.
4534 if (SimplifyDemandedInstructionBits(I
))
4537 if (Instruction
*R
= foldNot(I
))
4540 if (Instruction
*R
= foldBinOpShiftWithShift(I
))
4543 // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
4544 // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
4545 // calls in there are unnecessary as SimplifyDemandedInstructionBits should
4546 // have already taken care of those cases.
4547 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
4549 if (match(&I
, m_c_Xor(m_c_And(m_Not(m_Value(M
)), m_Value()),
4550 m_c_And(m_Deferred(M
), m_Value()))))
4551 return BinaryOperator::CreateDisjointOr(Op0
, Op1
);
4553 if (Instruction
*Xor
= visitMaskedMerge(I
, Builder
))
4558 if (match(Op1
, m_Constant(C1
))) {
4561 if (match(Op0
, m_OneUse(m_Or(m_Value(X
), m_ImmConstant(C2
)))) &&
4562 match(C1
, m_ImmConstant())) {
4563 // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2)
4564 C2
= Constant::replaceUndefsWith(
4565 C2
, Constant::getAllOnesValue(C2
->getType()->getScalarType()));
4566 Value
*And
= Builder
.CreateAnd(
4567 X
, Constant::mergeUndefsWith(ConstantExpr::getNot(C2
), C1
));
4568 return BinaryOperator::CreateXor(
4569 And
, Constant::mergeUndefsWith(ConstantExpr::getXor(C1
, C2
), C1
));
4572 // Use DeMorgan and reassociation to eliminate a 'not' op.
4573 if (match(Op0
, m_OneUse(m_Or(m_Not(m_Value(X
)), m_Constant(C2
))))) {
4574 // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
4575 Value
*And
= Builder
.CreateAnd(X
, ConstantExpr::getNot(C2
));
4576 return BinaryOperator::CreateXor(And
, ConstantExpr::getNot(C1
));
4578 if (match(Op0
, m_OneUse(m_And(m_Not(m_Value(X
)), m_Constant(C2
))))) {
4579 // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
4580 Value
*Or
= Builder
.CreateOr(X
, ConstantExpr::getNot(C2
));
4581 return BinaryOperator::CreateXor(Or
, ConstantExpr::getNot(C1
));
4584 // Convert xor ([trunc] (ashr X, BW-1)), C =>
4585 // select(X >s -1, C, ~C)
4586 // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the
4587 // constant depending on whether this input is less than 0.
4589 if (match(Op0
, m_OneUse(m_TruncOrSelf(
4590 m_AShr(m_Value(X
), m_APIntAllowUndef(CA
))))) &&
4591 *CA
== X
->getType()->getScalarSizeInBits() - 1 &&
4592 !match(C1
, m_AllOnes())) {
4593 assert(!C1
->isZeroValue() && "Unexpected xor with 0");
4594 Value
*IsNotNeg
= Builder
.CreateIsNotNeg(X
);
4595 return SelectInst::Create(IsNotNeg
, Op1
, Builder
.CreateNot(Op1
));
4599 Type
*Ty
= I
.getType();
4602 if (match(Op1
, m_APInt(RHSC
))) {
4605 // (C - X) ^ signmaskC --> (C + signmaskC) - X
4606 if (RHSC
->isSignMask() && match(Op0
, m_Sub(m_APInt(C
), m_Value(X
))))
4607 return BinaryOperator::CreateSub(ConstantInt::get(Ty
, *C
+ *RHSC
), X
);
4609 // (X + C) ^ signmaskC --> X + (C + signmaskC)
4610 if (RHSC
->isSignMask() && match(Op0
, m_Add(m_Value(X
), m_APInt(C
))))
4611 return BinaryOperator::CreateAdd(X
, ConstantInt::get(Ty
, *C
+ *RHSC
));
4613 // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0
4614 if (match(Op0
, m_Or(m_Value(X
), m_APInt(C
))) &&
4615 MaskedValueIsZero(X
, *C
, 0, &I
))
4616 return BinaryOperator::CreateXor(X
, ConstantInt::get(Ty
, *C
^ *RHSC
));
4618 // When X is a power-of-two or zero and zero input is poison:
4619 // ctlz(i32 X) ^ 31 --> cttz(X)
4620 // cttz(i32 X) ^ 31 --> ctlz(X)
4621 auto *II
= dyn_cast
<IntrinsicInst
>(Op0
);
4622 if (II
&& II
->hasOneUse() && *RHSC
== Ty
->getScalarSizeInBits() - 1) {
4623 Intrinsic::ID IID
= II
->getIntrinsicID();
4624 if ((IID
== Intrinsic::ctlz
|| IID
== Intrinsic::cttz
) &&
4625 match(II
->getArgOperand(1), m_One()) &&
4626 isKnownToBeAPowerOfTwo(II
->getArgOperand(0), /*OrZero */ true)) {
4627 IID
= (IID
== Intrinsic::ctlz
) ? Intrinsic::cttz
: Intrinsic::ctlz
;
4628 Function
*F
= Intrinsic::getDeclaration(II
->getModule(), IID
, Ty
);
4629 return CallInst::Create(F
, {II
->getArgOperand(0), Builder
.getTrue()});
4633 // If RHSC is inverting the remaining bits of shifted X,
4634 // canonicalize to a 'not' before the shift to help SCEV and codegen:
4635 // (X << C) ^ RHSC --> ~X << C
4636 if (match(Op0
, m_OneUse(m_Shl(m_Value(X
), m_APInt(C
)))) &&
4637 *RHSC
== APInt::getAllOnes(Ty
->getScalarSizeInBits()).shl(*C
)) {
4638 Value
*NotX
= Builder
.CreateNot(X
);
4639 return BinaryOperator::CreateShl(NotX
, ConstantInt::get(Ty
, *C
));
4641 // (X >>u C) ^ RHSC --> ~X >>u C
4642 if (match(Op0
, m_OneUse(m_LShr(m_Value(X
), m_APInt(C
)))) &&
4643 *RHSC
== APInt::getAllOnes(Ty
->getScalarSizeInBits()).lshr(*C
)) {
4644 Value
*NotX
= Builder
.CreateNot(X
);
4645 return BinaryOperator::CreateLShr(NotX
, ConstantInt::get(Ty
, *C
));
4647 // TODO: We could handle 'ashr' here as well. That would be matching
4648 // a 'not' op and moving it before the shift. Doing that requires
4649 // preventing the inverse fold in canShiftBinOpWithConstantRHS().
4652 // If we are XORing the sign bit of a floating-point value, convert
4653 // this to fneg, then cast back to integer.
4655 // This is generous interpretation of noimplicitfloat, this is not a true
4656 // floating-point operation.
4658 // Assumes any IEEE-represented type has the sign bit in the high bit.
4659 // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt
4661 if (match(Op0
, m_BitCast(m_Value(CastOp
))) && match(Op1
, m_SignMask()) &&
4662 !Builder
.GetInsertBlock()->getParent()->hasFnAttribute(
4663 Attribute::NoImplicitFloat
)) {
4664 Type
*EltTy
= CastOp
->getType()->getScalarType();
4665 if (EltTy
->isFloatingPointTy() && EltTy
->isIEEE() &&
4666 EltTy
->getPrimitiveSizeInBits() ==
4667 I
.getType()->getScalarType()->getPrimitiveSizeInBits()) {
4668 Value
*FNeg
= Builder
.CreateFNeg(CastOp
);
4669 return new BitCastInst(FNeg
, I
.getType());
4674 // FIXME: This should not be limited to scalar (pull into APInt match above).
4677 ConstantInt
*C1
, *C2
, *C3
;
4678 // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
4679 if (match(Op1
, m_ConstantInt(C3
)) &&
4680 match(Op0
, m_LShr(m_Xor(m_Value(X
), m_ConstantInt(C1
)),
4681 m_ConstantInt(C2
))) &&
4683 // fold (C1 >> C2) ^ C3
4684 APInt FoldConst
= C1
->getValue().lshr(C2
->getValue());
4685 FoldConst
^= C3
->getValue();
4686 // Prepare the two operands.
4687 auto *Opnd0
= Builder
.CreateLShr(X
, C2
);
4688 Opnd0
->takeName(Op0
);
4689 return BinaryOperator::CreateXor(Opnd0
, ConstantInt::get(Ty
, FoldConst
));
4693 if (Instruction
*FoldedLogic
= foldBinOpIntoSelectOrPhi(I
))
4696 // Y ^ (X | Y) --> X & ~Y
4697 // Y ^ (Y | X) --> X & ~Y
4698 if (match(Op1
, m_OneUse(m_c_Or(m_Value(X
), m_Specific(Op0
)))))
4699 return BinaryOperator::CreateAnd(X
, Builder
.CreateNot(Op0
));
4700 // (X | Y) ^ Y --> X & ~Y
4701 // (Y | X) ^ Y --> X & ~Y
4702 if (match(Op0
, m_OneUse(m_c_Or(m_Value(X
), m_Specific(Op1
)))))
4703 return BinaryOperator::CreateAnd(X
, Builder
.CreateNot(Op1
));
4705 // Y ^ (X & Y) --> ~X & Y
4706 // Y ^ (Y & X) --> ~X & Y
4707 if (match(Op1
, m_OneUse(m_c_And(m_Value(X
), m_Specific(Op0
)))))
4708 return BinaryOperator::CreateAnd(Op0
, Builder
.CreateNot(X
));
4709 // (X & Y) ^ Y --> ~X & Y
4710 // (Y & X) ^ Y --> ~X & Y
4711 // Canonical form is (X & C) ^ C; don't touch that.
4712 // TODO: A 'not' op is better for analysis and codegen, but demanded bits must
4713 // be fixed to prefer that (otherwise we get infinite looping).
4714 if (!match(Op1
, m_Constant()) &&
4715 match(Op0
, m_OneUse(m_c_And(m_Value(X
), m_Specific(Op1
)))))
4716 return BinaryOperator::CreateAnd(Op1
, Builder
.CreateNot(X
));
4719 // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.
4720 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_Value(A
), m_Value(B
))),
4721 m_OneUse(m_c_Or(m_Deferred(A
), m_Value(C
))))))
4722 return BinaryOperator::CreateXor(
4723 Builder
.CreateAnd(Builder
.CreateNot(A
), C
), B
);
4725 // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.
4726 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_Value(A
), m_Value(B
))),
4727 m_OneUse(m_c_Or(m_Deferred(B
), m_Value(C
))))))
4728 return BinaryOperator::CreateXor(
4729 Builder
.CreateAnd(Builder
.CreateNot(B
), C
), A
);
4731 // (A & B) ^ (A ^ B) -> (A | B)
4732 if (match(Op0
, m_And(m_Value(A
), m_Value(B
))) &&
4733 match(Op1
, m_c_Xor(m_Specific(A
), m_Specific(B
))))
4734 return BinaryOperator::CreateOr(A
, B
);
4735 // (A ^ B) ^ (A & B) -> (A | B)
4736 if (match(Op0
, m_Xor(m_Value(A
), m_Value(B
))) &&
4737 match(Op1
, m_c_And(m_Specific(A
), m_Specific(B
))))
4738 return BinaryOperator::CreateOr(A
, B
);
4740 // (A & ~B) ^ ~A -> ~(A & B)
4741 // (~B & A) ^ ~A -> ~(A & B)
4742 if (match(Op0
, m_c_And(m_Value(A
), m_Not(m_Value(B
)))) &&
4743 match(Op1
, m_Not(m_Specific(A
))))
4744 return BinaryOperator::CreateNot(Builder
.CreateAnd(A
, B
));
4746 // (~A & B) ^ A --> A | B -- There are 4 commuted variants.
4747 if (match(&I
, m_c_Xor(m_c_And(m_Not(m_Value(A
)), m_Value(B
)), m_Deferred(A
))))
4748 return BinaryOperator::CreateOr(A
, B
);
4750 // (~A | B) ^ A --> ~(A & B)
4751 if (match(Op0
, m_OneUse(m_c_Or(m_Not(m_Specific(Op1
)), m_Value(B
)))))
4752 return BinaryOperator::CreateNot(Builder
.CreateAnd(Op1
, B
));
4754 // A ^ (~A | B) --> ~(A & B)
4755 if (match(Op1
, m_OneUse(m_c_Or(m_Not(m_Specific(Op0
)), m_Value(B
)))))
4756 return BinaryOperator::CreateNot(Builder
.CreateAnd(Op0
, B
));
4758 // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.
4759 // TODO: Loosen one-use restriction if common operand is a constant.
4761 if (match(Op0
, m_OneUse(m_Or(m_Value(A
), m_Value(B
)))) &&
4762 match(Op1
, m_OneUse(m_Or(m_Value(C
), m_Value(D
))))) {
4763 if (B
== C
|| B
== D
)
4768 Value
*NotA
= Builder
.CreateNot(A
);
4769 return BinaryOperator::CreateAnd(Builder
.CreateXor(B
, C
), NotA
);
4773 // (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants.
4774 if (I
.getType()->isIntOrIntVectorTy(1) &&
4775 match(Op0
, m_OneUse(m_LogicalAnd(m_Value(A
), m_Value(B
)))) &&
4776 match(Op1
, m_OneUse(m_LogicalOr(m_Value(C
), m_Value(D
))))) {
4777 bool NeedFreeze
= isa
<SelectInst
>(Op0
) && isa
<SelectInst
>(Op1
) && B
== D
;
4778 if (B
== C
|| B
== D
)
4784 A
= Builder
.CreateFreeze(A
);
4785 Value
*NotB
= Builder
.CreateNot(B
);
4786 return SelectInst::Create(A
, NotB
, C
);
4790 if (auto *LHS
= dyn_cast
<ICmpInst
>(I
.getOperand(0)))
4791 if (auto *RHS
= dyn_cast
<ICmpInst
>(I
.getOperand(1)))
4792 if (Value
*V
= foldXorOfICmps(LHS
, RHS
, I
))
4793 return replaceInstUsesWith(I
, V
);
4795 if (Instruction
*CastedXor
= foldCastedBitwiseLogic(I
))
4798 if (Instruction
*Abs
= canonicalizeAbs(I
, Builder
))
4801 // Otherwise, if all else failed, try to hoist the xor-by-constant:
4802 // (X ^ C) ^ Y --> (X ^ Y) ^ C
4803 // Just like we do in other places, we completely avoid the fold
4804 // for constantexprs, at least to avoid endless combine loop.
4805 if (match(&I
, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X
),
4806 m_Unless(m_ConstantExpr())),
4807 m_ImmConstant(C1
))),
4809 return BinaryOperator::CreateXor(Builder
.CreateXor(X
, Y
), C1
);
4811 if (Instruction
*R
= reassociateForUses(I
, Builder
))
4814 if (Instruction
*Canonicalized
= canonicalizeLogicFirst(I
, Builder
))
4815 return Canonicalized
;
4817 if (Instruction
*Folded
= foldLogicOfIsFPClass(I
, Op0
, Op1
))
4820 if (Instruction
*Folded
= canonicalizeConditionalNegationViaMathToSelect(I
))
4823 if (Instruction
*Res
= foldBinOpOfDisplacedShifts(I
))
4826 if (Instruction
*Res
= foldBitwiseLogicWithIntrinsics(I
, Builder
))