1 //===- InstCombineShifts.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitShl, visitLShr, and visitAShr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/Analysis/InstructionSimplify.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/PatternMatch.h"
17 #include "llvm/Transforms/InstCombine/InstCombiner.h"
19 using namespace PatternMatch
;
21 #define DEBUG_TYPE "instcombine"
23 bool canTryToConstantAddTwoShiftAmounts(Value
*Sh0
, Value
*ShAmt0
, Value
*Sh1
,
25 // We have two shift amounts from two different shifts. The types of those
26 // shift amounts may not match. If that's the case let's bailout now..
27 if (ShAmt0
->getType() != ShAmt1
->getType())
30 // As input, we have the following pattern:
32 // We want to rewrite that as:
33 // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
34 // While we know that originally (Q+K) would not overflow
35 // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
36 // shift amounts. so it may now overflow in smaller bitwidth.
37 // To ensure that does not happen, we need to ensure that the total maximal
38 // shift amount is still representable in that smaller bit width.
39 unsigned MaximalPossibleTotalShiftAmount
=
40 (Sh0
->getType()->getScalarSizeInBits() - 1) +
41 (Sh1
->getType()->getScalarSizeInBits() - 1);
42 APInt MaximalRepresentableShiftAmount
=
43 APInt::getAllOnes(ShAmt0
->getType()->getScalarSizeInBits());
44 return MaximalRepresentableShiftAmount
.uge(MaximalPossibleTotalShiftAmount
);
48 // (x shiftopcode Q) shiftopcode K
49 // we should rewrite it as
50 // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) and
52 // This is valid for any shift, but they must be identical, and we must be
53 // careful in case we have (zext(Q)+zext(K)) and look past extensions,
54 // (Q+K) must not overflow or else (Q+K) u< bitwidth(x) is bogus.
56 // AnalyzeForSignBitExtraction indicates that we will only analyze whether this
57 // pattern has any 2 right-shifts that sum to 1 less than original bit width.
58 Value
*InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
59 BinaryOperator
*Sh0
, const SimplifyQuery
&SQ
,
60 bool AnalyzeForSignBitExtraction
) {
61 // Look for a shift of some instruction, ignore zext of shift amount if any.
65 m_Shift(m_Instruction(Sh0Op0
), m_ZExtOrSelf(m_Value(ShAmt0
)))))
68 // If there is a truncation between the two shifts, we must make note of it
69 // and look through it. The truncation imposes additional constraints on the
72 Value
*Trunc
= nullptr;
74 m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1
)), m_Value(Trunc
)),
77 // Inner shift: (x shiftopcode ShAmt1)
78 // Like with other shift, ignore zext of shift amount if any.
80 if (!match(Sh1
, m_Shift(m_Value(X
), m_ZExtOrSelf(m_Value(ShAmt1
)))))
83 // Verify that it would be safe to try to add those two shift amounts.
84 if (!canTryToConstantAddTwoShiftAmounts(Sh0
, ShAmt0
, Sh1
, ShAmt1
))
87 // We are only looking for signbit extraction if we have two right shifts.
88 bool HadTwoRightShifts
= match(Sh0
, m_Shr(m_Value(), m_Value())) &&
89 match(Sh1
, m_Shr(m_Value(), m_Value()));
90 // ... and if it's not two right-shifts, we know the answer already.
91 if (AnalyzeForSignBitExtraction
&& !HadTwoRightShifts
)
94 // The shift opcodes must be identical, unless we are just checking whether
95 // this pattern can be interpreted as a sign-bit-extraction.
96 Instruction::BinaryOps ShiftOpcode
= Sh0
->getOpcode();
97 bool IdenticalShOpcodes
= Sh0
->getOpcode() == Sh1
->getOpcode();
98 if (!IdenticalShOpcodes
&& !AnalyzeForSignBitExtraction
)
101 // If we saw truncation, we'll need to produce extra instruction,
102 // and for that one of the operands of the shift must be one-use,
103 // unless of course we don't actually plan to produce any instructions here.
104 if (Trunc
&& !AnalyzeForSignBitExtraction
&&
105 !match(Sh0
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
108 // Can we fold (ShAmt0+ShAmt1) ?
109 auto *NewShAmt
= dyn_cast_or_null
<Constant
>(
110 simplifyAddInst(ShAmt0
, ShAmt1
, /*isNSW=*/false, /*isNUW=*/false,
111 SQ
.getWithInstruction(Sh0
)));
113 return nullptr; // Did not simplify.
114 unsigned NewShAmtBitWidth
= NewShAmt
->getType()->getScalarSizeInBits();
115 unsigned XBitWidth
= X
->getType()->getScalarSizeInBits();
116 // Is the new shift amount smaller than the bit width of inner/new shift?
117 if (!match(NewShAmt
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT
,
118 APInt(NewShAmtBitWidth
, XBitWidth
))))
119 return nullptr; // FIXME: could perform constant-folding.
121 // If there was a truncation, and we have a right-shift, we can only fold if
122 // we are left with the original sign bit. Likewise, if we were just checking
123 // that this is a sighbit extraction, this is the place to check it.
124 // FIXME: zero shift amount is also legal here, but we can't *easily* check
125 // more than one predicate so it's not really worth it.
126 if (HadTwoRightShifts
&& (Trunc
|| AnalyzeForSignBitExtraction
)) {
127 // If it's not a sign bit extraction, then we're done.
129 m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
130 APInt(NewShAmtBitWidth
, XBitWidth
- 1))))
132 // If it is, and that was the question, return the base value.
133 if (AnalyzeForSignBitExtraction
)
137 assert(IdenticalShOpcodes
&& "Should not get here with different shifts.");
139 if (NewShAmt
->getType() != X
->getType()) {
140 NewShAmt
= ConstantFoldCastOperand(Instruction::ZExt
, NewShAmt
,
141 X
->getType(), SQ
.DL
);
146 // All good, we can do this fold.
147 BinaryOperator
*NewShift
= BinaryOperator::Create(ShiftOpcode
, X
, NewShAmt
);
149 // The flags can only be propagated if there wasn't a trunc.
151 // If the pattern did not involve trunc, and both of the original shifts
152 // had the same flag set, preserve the flag.
153 if (ShiftOpcode
== Instruction::BinaryOps::Shl
) {
154 NewShift
->setHasNoUnsignedWrap(Sh0
->hasNoUnsignedWrap() &&
155 Sh1
->hasNoUnsignedWrap());
156 NewShift
->setHasNoSignedWrap(Sh0
->hasNoSignedWrap() &&
157 Sh1
->hasNoSignedWrap());
159 NewShift
->setIsExact(Sh0
->isExact() && Sh1
->isExact());
163 Instruction
*Ret
= NewShift
;
165 Builder
.Insert(NewShift
);
166 Ret
= CastInst::Create(Instruction::Trunc
, NewShift
, Sh0
->getType());
172 // If we have some pattern that leaves only some low bits set, and then performs
173 // left-shift of those bits, if none of the bits that are left after the final
174 // shift are modified by the mask, we can omit the mask.
176 // There are many variants to this pattern:
177 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
178 // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
179 // c) (x & (-1 l>> MaskShAmt)) << ShiftShAmt
180 // d) (x & ((-1 << MaskShAmt) l>> MaskShAmt)) << ShiftShAmt
181 // e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
182 // f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
183 // All these patterns can be simplified to just:
186 // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
187 // c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
189 dropRedundantMaskingOfLeftShiftInput(BinaryOperator
*OuterShift
,
190 const SimplifyQuery
&Q
,
191 InstCombiner::BuilderTy
&Builder
) {
192 assert(OuterShift
->getOpcode() == Instruction::BinaryOps::Shl
&&
193 "The input must be 'shl'!");
195 Value
*Masked
, *ShiftShAmt
;
197 m_Shift(m_Value(Masked
), m_ZExtOrSelf(m_Value(ShiftShAmt
))));
199 // *If* there is a truncation between an outer shift and a possibly-mask,
200 // then said truncation *must* be one-use, else we can't perform the fold.
202 if (match(Masked
, m_CombineAnd(m_Trunc(m_Value(Masked
)), m_Value(Trunc
))) &&
206 Type
*NarrowestTy
= OuterShift
->getType();
207 Type
*WidestTy
= Masked
->getType();
208 bool HadTrunc
= WidestTy
!= NarrowestTy
;
210 // The mask must be computed in a type twice as wide to ensure
211 // that no bits are lost if the sum-of-shifts is wider than the base type.
212 Type
*ExtendedTy
= WidestTy
->getExtendedType();
216 // ((1 << MaskShAmt) - 1)
217 auto MaskA
= m_Add(m_Shl(m_One(), m_Value(MaskShAmt
)), m_AllOnes());
218 // (~(-1 << maskNbits))
219 auto MaskB
= m_Not(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)));
220 // (-1 l>> MaskShAmt)
221 auto MaskC
= m_LShr(m_AllOnes(), m_Value(MaskShAmt
));
222 // ((-1 << MaskShAmt) l>> MaskShAmt)
224 m_LShr(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)), m_Deferred(MaskShAmt
));
229 if (match(Masked
, m_c_And(m_CombineOr(MaskA
, MaskB
), m_Value(X
)))) {
230 // Peek through an optional zext of the shift amount.
231 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
233 // Verify that it would be safe to try to add those two shift amounts.
234 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
238 // Can we simplify (MaskShAmt+ShiftShAmt) ?
239 auto *SumOfShAmts
= dyn_cast_or_null
<Constant
>(simplifyAddInst(
240 MaskShAmt
, ShiftShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
242 return nullptr; // Did not simplify.
243 // In this pattern SumOfShAmts correlates with the number of low bits
244 // that shall remain in the root value (OuterShift).
246 // An extend of an undef value becomes zero because the high bits are never
247 // completely unknown. Replace the `undef` shift amounts with final
248 // shift bitwidth to ensure that the value remains undef when creating the
249 // subsequent shift op.
250 SumOfShAmts
= Constant::replaceUndefsWith(
251 SumOfShAmts
, ConstantInt::get(SumOfShAmts
->getType()->getScalarType(),
252 ExtendedTy
->getScalarSizeInBits()));
253 auto *ExtendedSumOfShAmts
= ConstantFoldCastOperand(
254 Instruction::ZExt
, SumOfShAmts
, ExtendedTy
, Q
.DL
);
255 if (!ExtendedSumOfShAmts
)
258 // And compute the mask as usual: ~(-1 << (SumOfShAmts))
259 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
260 Constant
*ExtendedInvertedMask
= ConstantFoldBinaryOpOperands(
261 Instruction::Shl
, ExtendedAllOnes
, ExtendedSumOfShAmts
, Q
.DL
);
262 if (!ExtendedInvertedMask
)
265 NewMask
= ConstantExpr::getNot(ExtendedInvertedMask
);
266 } else if (match(Masked
, m_c_And(m_CombineOr(MaskC
, MaskD
), m_Value(X
))) ||
267 match(Masked
, m_Shr(m_Shl(m_Value(X
), m_Value(MaskShAmt
)),
268 m_Deferred(MaskShAmt
)))) {
269 // Peek through an optional zext of the shift amount.
270 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
272 // Verify that it would be safe to try to add those two shift amounts.
273 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
277 // Can we simplify (ShiftShAmt-MaskShAmt) ?
278 auto *ShAmtsDiff
= dyn_cast_or_null
<Constant
>(simplifySubInst(
279 ShiftShAmt
, MaskShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
281 return nullptr; // Did not simplify.
282 // In this pattern ShAmtsDiff correlates with the number of high bits that
283 // shall be unset in the root value (OuterShift).
285 // An extend of an undef value becomes zero because the high bits are never
286 // completely unknown. Replace the `undef` shift amounts with negated
287 // bitwidth of innermost shift to ensure that the value remains undef when
288 // creating the subsequent shift op.
289 unsigned WidestTyBitWidth
= WidestTy
->getScalarSizeInBits();
290 ShAmtsDiff
= Constant::replaceUndefsWith(
291 ShAmtsDiff
, ConstantInt::get(ShAmtsDiff
->getType()->getScalarType(),
293 auto *ExtendedNumHighBitsToClear
= ConstantFoldCastOperand(
295 ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff
->getType(),
300 if (!ExtendedNumHighBitsToClear
)
303 // And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
304 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
305 NewMask
= ConstantFoldBinaryOpOperands(Instruction::LShr
, ExtendedAllOnes
,
306 ExtendedNumHighBitsToClear
, Q
.DL
);
310 return nullptr; // Don't know anything about this pattern.
312 NewMask
= ConstantExpr::getTrunc(NewMask
, NarrowestTy
);
314 // Does this mask has any unset bits? If not then we can just not apply it.
315 bool NeedMask
= !match(NewMask
, m_AllOnes());
317 // If we need to apply a mask, there are several more restrictions we have.
319 // The old masking instruction must go away.
320 if (!Masked
->hasOneUse())
322 // The original "masking" instruction must not have been`ashr`.
323 if (match(Masked
, m_AShr(m_Value(), m_Value())))
327 // If we need to apply truncation, let's do it first, since we can.
328 // We have already ensured that the old truncation will go away.
330 X
= Builder
.CreateTrunc(X
, NarrowestTy
);
332 // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
333 // We didn't change the Type of this outermost shift, so we can just do it.
334 auto *NewShift
= BinaryOperator::Create(OuterShift
->getOpcode(), X
,
335 OuterShift
->getOperand(1));
339 Builder
.Insert(NewShift
);
340 return BinaryOperator::Create(Instruction::And
, NewShift
, NewMask
);
343 /// If we have a shift-by-constant of a bin op (bitwise logic op or add/sub w/
344 /// shl) that itself has a shift-by-constant operand with identical opcode, we
345 /// may be able to convert that into 2 independent shifts followed by the logic
346 /// op. This eliminates a use of an intermediate value (reduces dependency
348 static Instruction
*foldShiftOfShiftedBinOp(BinaryOperator
&I
,
349 InstCombiner::BuilderTy
&Builder
) {
350 assert(I
.isShift() && "Expected a shift as input");
351 auto *BinInst
= dyn_cast
<BinaryOperator
>(I
.getOperand(0));
353 (!BinInst
->isBitwiseLogicOp() &&
354 BinInst
->getOpcode() != Instruction::Add
&&
355 BinInst
->getOpcode() != Instruction::Sub
) ||
356 !BinInst
->hasOneUse())
360 if (!match(I
.getOperand(1), m_Constant(C1
)))
363 Instruction::BinaryOps ShiftOpcode
= I
.getOpcode();
364 // Transform for add/sub only works with shl.
365 if ((BinInst
->getOpcode() == Instruction::Add
||
366 BinInst
->getOpcode() == Instruction::Sub
) &&
367 ShiftOpcode
!= Instruction::Shl
)
370 Type
*Ty
= I
.getType();
372 // Find a matching shift by constant. The fold is not valid if the sum
373 // of the shift values equals or exceeds bitwidth.
375 auto matchFirstShift
= [&](Value
*V
, Value
*W
) {
376 unsigned Size
= Ty
->getScalarSizeInBits();
377 APInt
Threshold(Size
, Size
);
378 return match(V
, m_BinOp(ShiftOpcode
, m_Value(X
), m_Constant(C0
))) &&
379 (V
->hasOneUse() || match(W
, m_ImmConstant())) &&
380 match(ConstantExpr::getAdd(C0
, C1
),
381 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
));
384 // Logic ops and Add are commutative, so check each operand for a match. Sub
385 // is not so we cannot reoder if we match operand(1) and need to keep the
386 // operands in their original positions.
387 bool FirstShiftIsOp1
= false;
388 if (matchFirstShift(BinInst
->getOperand(0), BinInst
->getOperand(1)))
389 Y
= BinInst
->getOperand(1);
390 else if (matchFirstShift(BinInst
->getOperand(1), BinInst
->getOperand(0))) {
391 Y
= BinInst
->getOperand(0);
392 FirstShiftIsOp1
= BinInst
->getOpcode() == Instruction::Sub
;
396 // shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
397 Constant
*ShiftSumC
= ConstantExpr::getAdd(C0
, C1
);
398 Value
*NewShift1
= Builder
.CreateBinOp(ShiftOpcode
, X
, ShiftSumC
);
399 Value
*NewShift2
= Builder
.CreateBinOp(ShiftOpcode
, Y
, C1
);
400 Value
*Op1
= FirstShiftIsOp1
? NewShift2
: NewShift1
;
401 Value
*Op2
= FirstShiftIsOp1
? NewShift1
: NewShift2
;
402 return BinaryOperator::Create(BinInst
->getOpcode(), Op1
, Op2
);
405 Instruction
*InstCombinerImpl::commonShiftTransforms(BinaryOperator
&I
) {
406 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
409 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
410 assert(Op0
->getType() == Op1
->getType());
411 Type
*Ty
= I
.getType();
413 // If the shift amount is a one-use `sext`, we can demote it to `zext`.
415 if (match(Op1
, m_OneUse(m_SExt(m_Value(Y
))))) {
416 Value
*NewExt
= Builder
.CreateZExt(Y
, Ty
, Op1
->getName());
417 return BinaryOperator::Create(I
.getOpcode(), Op0
, NewExt
);
420 // See if we can fold away this shift.
421 if (SimplifyDemandedInstructionBits(I
))
424 // Try to fold constant and into select arguments.
425 if (isa
<Constant
>(Op0
))
426 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
427 if (Instruction
*R
= FoldOpIntoSelect(I
, SI
))
431 if (match(Op1
, m_ImmConstant(CUI
)))
432 if (Instruction
*Res
= FoldShiftByConstant(Op0
, CUI
, I
))
435 if (auto *NewShift
= cast_or_null
<Instruction
>(
436 reassociateShiftAmtsOfTwoSameDirectionShifts(&I
, SQ
)))
439 // Pre-shift a constant shifted by a variable amount with constant offset:
440 // C shift (A add nuw C1) --> (C shift C1) shift A
443 if (match(Op0
, m_Constant(C
)) &&
444 match(Op1
, m_NUWAddLike(m_Value(A
), m_Constant(C1
)))) {
445 Value
*NewC
= Builder
.CreateBinOp(I
.getOpcode(), C
, C1
);
446 BinaryOperator
*NewShiftOp
= BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
447 if (I
.getOpcode() == Instruction::Shl
) {
448 NewShiftOp
->setHasNoSignedWrap(I
.hasNoSignedWrap());
449 NewShiftOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
451 NewShiftOp
->setIsExact(I
.isExact());
456 unsigned BitWidth
= Ty
->getScalarSizeInBits();
458 const APInt
*AC
, *AddC
;
459 // Try to pre-shift a constant shifted by a variable amount added with a
461 // C << (X - AddC) --> (C >> AddC) << X
463 // C >> (X - AddC) --> (C << AddC) >> X
464 if (match(Op0
, m_APInt(AC
)) && match(Op1
, m_Add(m_Value(A
), m_APInt(AddC
))) &&
465 AddC
->isNegative() && (-*AddC
).ult(BitWidth
)) {
466 assert(!AC
->isZero() && "Expected simplify of shifted zero");
467 unsigned PosOffset
= (-*AddC
).getZExtValue();
469 auto isSuitableForPreShift
= [PosOffset
, &I
, AC
]() {
470 switch (I
.getOpcode()) {
473 case Instruction::Shl
:
474 return (I
.hasNoSignedWrap() || I
.hasNoUnsignedWrap()) &&
475 AC
->eq(AC
->lshr(PosOffset
).shl(PosOffset
));
476 case Instruction::LShr
:
477 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).lshr(PosOffset
));
478 case Instruction::AShr
:
479 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).ashr(PosOffset
));
482 if (isSuitableForPreShift()) {
483 Constant
*NewC
= ConstantInt::get(Ty
, I
.getOpcode() == Instruction::Shl
484 ? AC
->lshr(PosOffset
)
485 : AC
->shl(PosOffset
));
486 BinaryOperator
*NewShiftOp
=
487 BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
488 if (I
.getOpcode() == Instruction::Shl
) {
489 NewShiftOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
491 NewShiftOp
->setIsExact();
497 // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2.
498 // Because shifts by negative values (which could occur if A were negative)
500 if (Op1
->hasOneUse() && match(Op1
, m_SRem(m_Value(A
), m_Constant(C
))) &&
501 match(C
, m_Power2())) {
502 // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
503 // demand the sign bit (and many others) here??
504 Constant
*Mask
= ConstantExpr::getSub(C
, ConstantInt::get(Ty
, 1));
505 Value
*Rem
= Builder
.CreateAnd(A
, Mask
, Op1
->getName());
506 return replaceOperand(I
, 1, Rem
);
509 if (Instruction
*Logic
= foldShiftOfShiftedBinOp(I
, Builder
))
512 if (match(Op1
, m_Or(m_Value(), m_SpecificInt(BitWidth
- 1))))
513 return replaceOperand(I
, 1, ConstantInt::get(Ty
, BitWidth
- 1));
515 Instruction
*CmpIntr
;
516 if ((I
.getOpcode() == Instruction::LShr
||
517 I
.getOpcode() == Instruction::AShr
) &&
518 match(Op0
, m_OneUse(m_Instruction(CmpIntr
))) &&
519 isa
<CmpIntrinsic
>(CmpIntr
) &&
520 match(Op1
, m_SpecificInt(Ty
->getScalarSizeInBits() - 1))) {
522 Builder
.CreateICmp(cast
<CmpIntrinsic
>(CmpIntr
)->getLTPredicate(),
523 CmpIntr
->getOperand(0), CmpIntr
->getOperand(1));
524 return CastInst::Create(I
.getOpcode() == Instruction::LShr
533 /// Return true if we can simplify two logical (either left or right) shifts
534 /// that have constant shift amounts: OuterShift (InnerShift X, C1), C2.
535 static bool canEvaluateShiftedShift(unsigned OuterShAmt
, bool IsOuterShl
,
536 Instruction
*InnerShift
,
537 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
538 assert(InnerShift
->isLogicalShift() && "Unexpected instruction type");
540 // We need constant scalar or constant splat shifts.
541 const APInt
*InnerShiftConst
;
542 if (!match(InnerShift
->getOperand(1), m_APInt(InnerShiftConst
)))
545 // Two logical shifts in the same direction:
546 // shl (shl X, C1), C2 --> shl X, C1 + C2
547 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
548 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
549 if (IsInnerShl
== IsOuterShl
)
552 // Equal shift amounts in opposite directions become bitwise 'and':
553 // lshr (shl X, C), C --> and X, C'
554 // shl (lshr X, C), C --> and X, C'
555 if (*InnerShiftConst
== OuterShAmt
)
558 // If the 2nd shift is bigger than the 1st, we can fold:
559 // lshr (shl X, C1), C2 --> and (shl X, C1 - C2), C3
560 // shl (lshr X, C1), C2 --> and (lshr X, C1 - C2), C3
561 // but it isn't profitable unless we know the and'd out bits are already zero.
562 // Also, check that the inner shift is valid (less than the type width) or
563 // we'll crash trying to produce the bit mask for the 'and'.
564 unsigned TypeWidth
= InnerShift
->getType()->getScalarSizeInBits();
565 if (InnerShiftConst
->ugt(OuterShAmt
) && InnerShiftConst
->ult(TypeWidth
)) {
566 unsigned InnerShAmt
= InnerShiftConst
->getZExtValue();
568 IsInnerShl
? TypeWidth
- InnerShAmt
: InnerShAmt
- OuterShAmt
;
569 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, OuterShAmt
) << MaskShift
;
570 if (IC
.MaskedValueIsZero(InnerShift
->getOperand(0), Mask
, 0, CxtI
))
577 /// See if we can compute the specified value, but shifted logically to the left
578 /// or right by some number of bits. This should return true if the expression
579 /// can be computed for the same cost as the current expression tree. This is
580 /// used to eliminate extraneous shifting from things like:
581 /// %C = shl i128 %A, 64
582 /// %D = shl i128 %B, 96
583 /// %E = or i128 %C, %D
584 /// %F = lshr i128 %E, 64
585 /// where the client will ask if E can be computed shifted right by 64-bits. If
586 /// this succeeds, getShiftedValue() will be called to produce the value.
587 static bool canEvaluateShifted(Value
*V
, unsigned NumBits
, bool IsLeftShift
,
588 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
589 // We can always evaluate immediate constants.
590 if (match(V
, m_ImmConstant()))
593 Instruction
*I
= dyn_cast
<Instruction
>(V
);
594 if (!I
) return false;
596 // We can't mutate something that has multiple uses: doing so would
597 // require duplicating the instruction in general, which isn't profitable.
598 if (!I
->hasOneUse()) return false;
600 switch (I
->getOpcode()) {
601 default: return false;
602 case Instruction::And
:
603 case Instruction::Or
:
604 case Instruction::Xor
:
605 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
606 return canEvaluateShifted(I
->getOperand(0), NumBits
, IsLeftShift
, IC
, I
) &&
607 canEvaluateShifted(I
->getOperand(1), NumBits
, IsLeftShift
, IC
, I
);
609 case Instruction::Shl
:
610 case Instruction::LShr
:
611 return canEvaluateShiftedShift(NumBits
, IsLeftShift
, I
, IC
, CxtI
);
613 case Instruction::Select
: {
614 SelectInst
*SI
= cast
<SelectInst
>(I
);
615 Value
*TrueVal
= SI
->getTrueValue();
616 Value
*FalseVal
= SI
->getFalseValue();
617 return canEvaluateShifted(TrueVal
, NumBits
, IsLeftShift
, IC
, SI
) &&
618 canEvaluateShifted(FalseVal
, NumBits
, IsLeftShift
, IC
, SI
);
620 case Instruction::PHI
: {
621 // We can change a phi if we can change all operands. Note that we never
622 // get into trouble with cyclic PHIs here because we only consider
623 // instructions with a single use.
624 PHINode
*PN
= cast
<PHINode
>(I
);
625 for (Value
*IncValue
: PN
->incoming_values())
626 if (!canEvaluateShifted(IncValue
, NumBits
, IsLeftShift
, IC
, PN
))
630 case Instruction::Mul
: {
631 const APInt
*MulConst
;
632 // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`)
633 return !IsLeftShift
&& match(I
->getOperand(1), m_APInt(MulConst
)) &&
634 MulConst
->isNegatedPowerOf2() && MulConst
->countr_zero() == NumBits
;
639 /// Fold OuterShift (InnerShift X, C1), C2.
640 /// See canEvaluateShiftedShift() for the constraints on these instructions.
641 static Value
*foldShiftedShift(BinaryOperator
*InnerShift
, unsigned OuterShAmt
,
643 InstCombiner::BuilderTy
&Builder
) {
644 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
645 Type
*ShType
= InnerShift
->getType();
646 unsigned TypeWidth
= ShType
->getScalarSizeInBits();
648 // We only accept shifts-by-a-constant in canEvaluateShifted().
650 match(InnerShift
->getOperand(1), m_APInt(C1
));
651 unsigned InnerShAmt
= C1
->getZExtValue();
653 // Change the shift amount and clear the appropriate IR flags.
654 auto NewInnerShift
= [&](unsigned ShAmt
) {
655 InnerShift
->setOperand(1, ConstantInt::get(ShType
, ShAmt
));
657 InnerShift
->setHasNoUnsignedWrap(false);
658 InnerShift
->setHasNoSignedWrap(false);
660 InnerShift
->setIsExact(false);
665 // Two logical shifts in the same direction:
666 // shl (shl X, C1), C2 --> shl X, C1 + C2
667 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
668 if (IsInnerShl
== IsOuterShl
) {
669 // If this is an oversized composite shift, then unsigned shifts get 0.
670 if (InnerShAmt
+ OuterShAmt
>= TypeWidth
)
671 return Constant::getNullValue(ShType
);
673 return NewInnerShift(InnerShAmt
+ OuterShAmt
);
676 // Equal shift amounts in opposite directions become bitwise 'and':
677 // lshr (shl X, C), C --> and X, C'
678 // shl (lshr X, C), C --> and X, C'
679 if (InnerShAmt
== OuterShAmt
) {
680 APInt Mask
= IsInnerShl
681 ? APInt::getLowBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
)
682 : APInt::getHighBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
);
683 Value
*And
= Builder
.CreateAnd(InnerShift
->getOperand(0),
684 ConstantInt::get(ShType
, Mask
));
685 if (auto *AndI
= dyn_cast
<Instruction
>(And
)) {
686 AndI
->moveBefore(InnerShift
);
687 AndI
->takeName(InnerShift
);
692 assert(InnerShAmt
> OuterShAmt
&&
693 "Unexpected opposite direction logical shift pair");
695 // In general, we would need an 'and' for this transform, but
696 // canEvaluateShiftedShift() guarantees that the masked-off bits are not used.
697 // lshr (shl X, C1), C2 --> shl X, C1 - C2
698 // shl (lshr X, C1), C2 --> lshr X, C1 - C2
699 return NewInnerShift(InnerShAmt
- OuterShAmt
);
702 /// When canEvaluateShifted() returns true for an expression, this function
703 /// inserts the new computation that produces the shifted value.
704 static Value
*getShiftedValue(Value
*V
, unsigned NumBits
, bool isLeftShift
,
705 InstCombinerImpl
&IC
, const DataLayout
&DL
) {
706 // We can always evaluate constants shifted.
707 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
709 return IC
.Builder
.CreateShl(C
, NumBits
);
711 return IC
.Builder
.CreateLShr(C
, NumBits
);
714 Instruction
*I
= cast
<Instruction
>(V
);
717 switch (I
->getOpcode()) {
718 default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
719 case Instruction::And
:
720 case Instruction::Or
:
721 case Instruction::Xor
:
722 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
724 0, getShiftedValue(I
->getOperand(0), NumBits
, isLeftShift
, IC
, DL
));
726 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
729 case Instruction::Shl
:
730 case Instruction::LShr
:
731 return foldShiftedShift(cast
<BinaryOperator
>(I
), NumBits
, isLeftShift
,
734 case Instruction::Select
:
736 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
738 2, getShiftedValue(I
->getOperand(2), NumBits
, isLeftShift
, IC
, DL
));
740 case Instruction::PHI
: {
741 // We can change a phi if we can change all operands. Note that we never
742 // get into trouble with cyclic PHIs here because we only consider
743 // instructions with a single use.
744 PHINode
*PN
= cast
<PHINode
>(I
);
745 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
746 PN
->setIncomingValue(i
, getShiftedValue(PN
->getIncomingValue(i
), NumBits
,
747 isLeftShift
, IC
, DL
));
750 case Instruction::Mul
: {
751 assert(!isLeftShift
&& "Unexpected shift direction!");
752 auto *Neg
= BinaryOperator::CreateNeg(I
->getOperand(0));
753 IC
.InsertNewInstWith(Neg
, I
->getIterator());
754 unsigned TypeWidth
= I
->getType()->getScalarSizeInBits();
755 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, TypeWidth
- NumBits
);
756 auto *And
= BinaryOperator::CreateAnd(Neg
,
757 ConstantInt::get(I
->getType(), Mask
));
759 return IC
.InsertNewInstWith(And
, I
->getIterator());
764 // If this is a bitwise operator or add with a constant RHS we might be able
765 // to pull it through a shift.
766 static bool canShiftBinOpWithConstantRHS(BinaryOperator
&Shift
,
767 BinaryOperator
*BO
) {
768 switch (BO
->getOpcode()) {
770 return false; // Do not perform transform!
771 case Instruction::Add
:
772 return Shift
.getOpcode() == Instruction::Shl
;
773 case Instruction::Or
:
774 case Instruction::And
:
776 case Instruction::Xor
:
777 // Do not change a 'not' of logical shift because that would create a normal
778 // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen.
779 return !(Shift
.isLogicalShift() && match(BO
, m_Not(m_Value())));
783 Instruction
*InstCombinerImpl::FoldShiftByConstant(Value
*Op0
, Constant
*C1
,
785 // (C2 << X) << C1 --> (C2 << C1) << X
786 // (C2 >> X) >> C1 --> (C2 >> C1) >> X
789 bool IsLeftShift
= I
.getOpcode() == Instruction::Shl
;
790 if (match(Op0
, m_BinOp(I
.getOpcode(), m_ImmConstant(C2
), m_Value(X
)))) {
791 Instruction
*R
= BinaryOperator::Create(
792 I
.getOpcode(), Builder
.CreateBinOp(I
.getOpcode(), C2
, C1
), X
);
793 BinaryOperator
*BO0
= cast
<BinaryOperator
>(Op0
);
795 R
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap() &&
796 BO0
->hasNoUnsignedWrap());
797 R
->setHasNoSignedWrap(I
.hasNoSignedWrap() && BO0
->hasNoSignedWrap());
799 R
->setIsExact(I
.isExact() && BO0
->isExact());
803 Type
*Ty
= I
.getType();
804 unsigned TypeBits
= Ty
->getScalarSizeInBits();
806 // (X / +DivC) >> (Width - 1) --> ext (X <= -DivC)
807 // (X / -DivC) >> (Width - 1) --> ext (X >= +DivC)
809 if (!IsLeftShift
&& match(C1
, m_SpecificIntAllowPoison(TypeBits
- 1)) &&
810 match(Op0
, m_SDiv(m_Value(X
), m_APInt(DivC
))) && !DivC
->isZero() &&
811 !DivC
->isMinSignedValue()) {
812 Constant
*NegDivC
= ConstantInt::get(Ty
, -(*DivC
));
813 ICmpInst::Predicate Pred
=
814 DivC
->isNegative() ? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_SLE
;
815 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, NegDivC
);
816 auto ExtOpcode
= (I
.getOpcode() == Instruction::AShr
) ? Instruction::SExt
818 return CastInst::Create(ExtOpcode
, Cmp
, Ty
);
822 if (!match(C1
, m_APInt(Op1C
)))
825 assert(!Op1C
->uge(TypeBits
) &&
826 "Shift over the type width should have been removed already");
828 // See if we can propagate this shift into the input, this covers the trivial
829 // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
830 if (I
.getOpcode() != Instruction::AShr
&&
831 canEvaluateShifted(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, &I
)) {
833 dbgs() << "ICE: GetShiftedValue propagating shift through expression"
834 " to eliminate shift:\n IN: "
835 << *Op0
<< "\n SH: " << I
<< "\n");
837 return replaceInstUsesWith(
838 I
, getShiftedValue(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, DL
));
841 if (Instruction
*FoldedShift
= foldBinOpIntoSelectOrPhi(I
))
844 if (!Op0
->hasOneUse())
847 if (auto *Op0BO
= dyn_cast
<BinaryOperator
>(Op0
)) {
848 // If the operand is a bitwise operator with a constant RHS, and the
849 // shift is the only use, we can pull it out of the shift.
851 if (match(Op0BO
->getOperand(1), m_APInt(Op0C
))) {
852 if (canShiftBinOpWithConstantRHS(I
, Op0BO
)) {
854 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(1), C1
);
857 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(0), C1
);
858 NewShift
->takeName(Op0BO
);
860 return BinaryOperator::Create(Op0BO
->getOpcode(), NewShift
, NewRHS
);
865 // If we have a select that conditionally executes some binary operator,
866 // see if we can pull it the select and operator through the shift.
868 // For example, turning:
869 // shl (select C, (add X, C1), X), C2
872 // select C, (add Y, C1 << C2), Y
876 if (match(Op0
, m_Select(m_Value(Cond
), m_OneUse(m_BinOp(TBO
)),
877 m_Value(FalseVal
)))) {
879 if (!isa
<Constant
>(FalseVal
) && TBO
->getOperand(0) == FalseVal
&&
880 match(TBO
->getOperand(1), m_APInt(C
)) &&
881 canShiftBinOpWithConstantRHS(I
, TBO
)) {
883 Builder
.CreateBinOp(I
.getOpcode(), TBO
->getOperand(1), C1
);
885 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), FalseVal
, C1
);
886 Value
*NewOp
= Builder
.CreateBinOp(TBO
->getOpcode(), NewShift
, NewRHS
);
887 return SelectInst::Create(Cond
, NewOp
, NewShift
);
893 if (match(Op0
, m_Select(m_Value(Cond
), m_Value(TrueVal
),
894 m_OneUse(m_BinOp(FBO
))))) {
896 if (!isa
<Constant
>(TrueVal
) && FBO
->getOperand(0) == TrueVal
&&
897 match(FBO
->getOperand(1), m_APInt(C
)) &&
898 canShiftBinOpWithConstantRHS(I
, FBO
)) {
900 Builder
.CreateBinOp(I
.getOpcode(), FBO
->getOperand(1), C1
);
902 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), TrueVal
, C1
);
903 Value
*NewOp
= Builder
.CreateBinOp(FBO
->getOpcode(), NewShift
, NewRHS
);
904 return SelectInst::Create(Cond
, NewShift
, NewOp
);
912 // (lshr (add (zext X), (zext Y)), K)
913 // -> (icmp ult (add X, Y), X)
915 // - The add's operands are zexts from a K-bits integer to a bigger type.
916 // - The add is only used by the shr, or by iK (or narrower) truncates.
917 // - The lshr type has more than 2 bits (other types are boolean math).
920 // - The resulting add cannot have nuw/nsw, else on overflow we get a
921 // poison value and the transform isn't legal anymore.
922 Instruction
*InstCombinerImpl::foldLShrOverflowBit(BinaryOperator
&I
) {
923 assert(I
.getOpcode() == Instruction::LShr
);
925 Value
*Add
= I
.getOperand(0);
926 Value
*ShiftAmt
= I
.getOperand(1);
927 Type
*Ty
= I
.getType();
929 if (Ty
->getScalarSizeInBits() < 3)
932 const APInt
*ShAmtAPInt
= nullptr;
933 Value
*X
= nullptr, *Y
= nullptr;
934 if (!match(ShiftAmt
, m_APInt(ShAmtAPInt
)) ||
936 m_Add(m_OneUse(m_ZExt(m_Value(X
))), m_OneUse(m_ZExt(m_Value(Y
))))))
939 const unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
943 // X/Y are zexts from `ShAmt`-sized ints.
944 if (X
->getType()->getScalarSizeInBits() != ShAmt
||
945 Y
->getType()->getScalarSizeInBits() != ShAmt
)
948 // Make sure that `Add` is only used by `I` and `ShAmt`-truncates.
949 if (!Add
->hasOneUse()) {
950 for (User
*U
: Add
->users()) {
954 TruncInst
*Trunc
= dyn_cast
<TruncInst
>(U
);
955 if (!Trunc
|| Trunc
->getType()->getScalarSizeInBits() > ShAmt
)
960 // Insert at Add so that the newly created `NarrowAdd` will dominate it's
961 // users (i.e. `Add`'s users).
962 Instruction
*AddInst
= cast
<Instruction
>(Add
);
963 Builder
.SetInsertPoint(AddInst
);
965 Value
*NarrowAdd
= Builder
.CreateAdd(X
, Y
, "add.narrowed");
967 Builder
.CreateICmpULT(NarrowAdd
, X
, "add.narrowed.overflow");
969 // Replace the uses of the original add with a zext of the
970 // NarrowAdd's result. Note that all users at this stage are known to
971 // be ShAmt-sized truncs, or the lshr itself.
972 if (!Add
->hasOneUse()) {
973 replaceInstUsesWith(*AddInst
, Builder
.CreateZExt(NarrowAdd
, Ty
));
974 eraseInstFromFunction(*AddInst
);
977 // Replace the LShr with a zext of the overflow check.
978 return new ZExtInst(Overflow
, Ty
);
981 // Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits.
982 static bool setShiftFlags(BinaryOperator
&I
, const SimplifyQuery
&Q
) {
983 assert(I
.isShift() && "Expected a shift as input");
984 // We already have all the flags.
985 if (I
.getOpcode() == Instruction::Shl
) {
986 if (I
.hasNoUnsignedWrap() && I
.hasNoSignedWrap())
993 if (match(I
.getOperand(0), m_Shl(m_Value(), m_Specific(I
.getOperand(1))))) {
999 // Compute what we know about shift count.
1000 KnownBits KnownCnt
= computeKnownBits(I
.getOperand(1), /* Depth */ 0, Q
);
1001 unsigned BitWidth
= KnownCnt
.getBitWidth();
1002 // Since shift produces a poison value if RHS is equal to or larger than the
1003 // bit width, we can safely assume that RHS is less than the bit width.
1004 uint64_t MaxCnt
= KnownCnt
.getMaxValue().getLimitedValue(BitWidth
- 1);
1006 KnownBits KnownAmt
= computeKnownBits(I
.getOperand(0), /* Depth */ 0, Q
);
1007 bool Changed
= false;
1009 if (I
.getOpcode() == Instruction::Shl
) {
1010 // If we have as many leading zeros than maximum shift cnt we have nuw.
1011 if (!I
.hasNoUnsignedWrap() && MaxCnt
<= KnownAmt
.countMinLeadingZeros()) {
1012 I
.setHasNoUnsignedWrap();
1015 // If we have more sign bits than maximum shift cnt we have nsw.
1016 if (!I
.hasNoSignedWrap()) {
1017 if (MaxCnt
< KnownAmt
.countMinSignBits() ||
1018 MaxCnt
< ComputeNumSignBits(I
.getOperand(0), Q
.DL
, /*Depth*/ 0, Q
.AC
,
1020 I
.setHasNoSignedWrap();
1027 // If we have at least as many trailing zeros as maximum count then we have
1029 Changed
= MaxCnt
<= KnownAmt
.countMinTrailingZeros();
1030 I
.setIsExact(Changed
);
1035 Instruction
*InstCombinerImpl::visitShl(BinaryOperator
&I
) {
1036 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1038 if (Value
*V
= simplifyShlInst(I
.getOperand(0), I
.getOperand(1),
1039 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(), Q
))
1040 return replaceInstUsesWith(I
, V
);
1042 if (Instruction
*X
= foldVectorBinop(I
))
1045 if (Instruction
*V
= commonShiftTransforms(I
))
1048 if (Instruction
*V
= dropRedundantMaskingOfLeftShiftInput(&I
, Q
, Builder
))
1051 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1052 Type
*Ty
= I
.getType();
1053 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1056 if (match(Op1
, m_APInt(C
))) {
1057 unsigned ShAmtC
= C
->getZExtValue();
1059 // shl (zext X), C --> zext (shl X, C)
1060 // This is only valid if X would have zeros shifted out.
1062 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
1063 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1064 if (ShAmtC
< SrcWidth
&&
1065 MaskedValueIsZero(X
, APInt::getHighBitsSet(SrcWidth
, ShAmtC
), 0, &I
))
1066 return new ZExtInst(Builder
.CreateShl(X
, ShAmtC
), Ty
);
1069 // (X >> C) << C --> X & (-1 << C)
1070 if (match(Op0
, m_Shr(m_Value(X
), m_Specific(Op1
)))) {
1071 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1072 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1076 if (match(Op0
, m_Exact(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1077 C1
->ult(BitWidth
)) {
1078 unsigned ShrAmt
= C1
->getZExtValue();
1079 if (ShrAmt
< ShAmtC
) {
1080 // If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
1081 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1082 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1083 NewShl
->setHasNoUnsignedWrap(
1084 I
.hasNoUnsignedWrap() ||
1086 cast
<Instruction
>(Op0
)->getOpcode() == Instruction::LShr
&&
1087 I
.hasNoSignedWrap()));
1088 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1091 if (ShrAmt
> ShAmtC
) {
1092 // If C1 > C: (X >>?exact C1) << C --> X >>?exact (C1 - C)
1093 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1094 auto *NewShr
= BinaryOperator::Create(
1095 cast
<BinaryOperator
>(Op0
)->getOpcode(), X
, ShiftDiff
);
1096 NewShr
->setIsExact(true);
1101 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1102 C1
->ult(BitWidth
)) {
1103 unsigned ShrAmt
= C1
->getZExtValue();
1104 if (ShrAmt
< ShAmtC
) {
1105 // If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
1106 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1107 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1108 NewShl
->setHasNoUnsignedWrap(
1109 I
.hasNoUnsignedWrap() ||
1111 cast
<Instruction
>(Op0
)->getOpcode() == Instruction::LShr
&&
1112 I
.hasNoSignedWrap()));
1113 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1114 Builder
.Insert(NewShl
);
1115 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1116 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1118 if (ShrAmt
> ShAmtC
) {
1119 // If C1 > C: (X >>? C1) << C --> (X >>? (C1 - C)) & (-1 << C)
1120 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1121 auto *OldShr
= cast
<BinaryOperator
>(Op0
);
1123 BinaryOperator::Create(OldShr
->getOpcode(), X
, ShiftDiff
);
1124 NewShr
->setIsExact(OldShr
->isExact());
1125 Builder
.Insert(NewShr
);
1126 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1127 return BinaryOperator::CreateAnd(NewShr
, ConstantInt::get(Ty
, Mask
));
1131 // Similar to above, but look through an intermediate trunc instruction.
1132 BinaryOperator
*Shr
;
1133 if (match(Op0
, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr
))))) &&
1134 match(Shr
, m_Shr(m_Value(X
), m_APInt(C1
)))) {
1135 // The larger shift direction survives through the transform.
1136 unsigned ShrAmtC
= C1
->getZExtValue();
1137 unsigned ShDiff
= ShrAmtC
> ShAmtC
? ShrAmtC
- ShAmtC
: ShAmtC
- ShrAmtC
;
1138 Constant
*ShiftDiffC
= ConstantInt::get(X
->getType(), ShDiff
);
1139 auto ShiftOpc
= ShrAmtC
> ShAmtC
? Shr
->getOpcode() : Instruction::Shl
;
1142 // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
1144 // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
1145 Value
*NewShift
= Builder
.CreateBinOp(ShiftOpc
, X
, ShiftDiffC
, "sh.diff");
1146 Value
*Trunc
= Builder
.CreateTrunc(NewShift
, Ty
, "tr.sh.diff");
1147 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1148 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, Mask
));
1151 // If we have an opposite shift by the same amount, we may be able to
1152 // reorder binops and shifts to eliminate math/logic.
1153 auto isSuitableBinOpcode
= [](Instruction::BinaryOps BinOpcode
) {
1154 switch (BinOpcode
) {
1157 case Instruction::Add
:
1158 case Instruction::And
:
1159 case Instruction::Or
:
1160 case Instruction::Xor
:
1161 case Instruction::Sub
:
1162 // NOTE: Sub is not commutable and the tranforms below may not be valid
1163 // when the shift-right is operand 1 (RHS) of the sub.
1167 BinaryOperator
*Op0BO
;
1168 if (match(Op0
, m_OneUse(m_BinOp(Op0BO
))) &&
1169 isSuitableBinOpcode(Op0BO
->getOpcode())) {
1170 // Commute so shift-right is on LHS of the binop.
1171 // (Y bop (X >> C)) << C -> ((X >> C) bop Y) << C
1172 // (Y bop ((X >> C) & CC)) << C -> (((X >> C) & CC) bop Y) << C
1173 Value
*Shr
= Op0BO
->getOperand(0);
1174 Value
*Y
= Op0BO
->getOperand(1);
1177 if (Op0BO
->isCommutative() && Y
->hasOneUse() &&
1178 (match(Y
, m_Shr(m_Value(), m_Specific(Op1
))) ||
1179 match(Y
, m_And(m_OneUse(m_Shr(m_Value(), m_Specific(Op1
))),
1183 // ((X >> C) bop Y) << C -> (X bop (Y << C)) & (~0 << C)
1184 if (match(Shr
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1186 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1189 Builder
.CreateBinOp(Op0BO
->getOpcode(), X
, YS
, Shr
->getName());
1190 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1191 APInt Bits
= APInt::getHighBitsSet(BitWidth
, BitWidth
- Op1Val
);
1192 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1193 return BinaryOperator::CreateAnd(B
, Mask
);
1196 // (((X >> C) & CC) bop Y) << C -> (X & (CC << C)) bop (Y << C)
1198 m_OneUse(m_And(m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))),
1201 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1203 Value
*M
= Builder
.CreateAnd(X
, ConstantInt::get(Ty
, CC
->shl(*C
)),
1204 X
->getName() + ".mask");
1205 auto *NewOp
= BinaryOperator::Create(Op0BO
->getOpcode(), M
, YS
);
1206 if (auto *Disjoint
= dyn_cast
<PossiblyDisjointInst
>(Op0BO
);
1207 Disjoint
&& Disjoint
->isDisjoint())
1208 cast
<PossiblyDisjointInst
>(NewOp
)->setIsDisjoint(true);
1213 // (C1 - X) << C --> (C1 << C) - (X << C)
1214 if (match(Op0
, m_OneUse(m_Sub(m_APInt(C1
), m_Value(X
))))) {
1215 Constant
*NewLHS
= ConstantInt::get(Ty
, C1
->shl(*C
));
1216 Value
*NewShift
= Builder
.CreateShl(X
, Op1
);
1217 return BinaryOperator::CreateSub(NewLHS
, NewShift
);
1221 if (setShiftFlags(I
, Q
))
1224 // Transform (x >> y) << y to x & (-1 << y)
1225 // Valid for any type of right-shift.
1227 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1228 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1229 Value
*Mask
= Builder
.CreateShl(AllOnes
, Op1
);
1230 return BinaryOperator::CreateAnd(Mask
, X
);
1233 // Transform (-1 >> y) << y to -1 << y
1234 if (match(Op0
, m_LShr(m_AllOnes(), m_Specific(Op1
)))) {
1235 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1236 return BinaryOperator::CreateShl(AllOnes
, Op1
);
1240 if (match(Op1
, m_ImmConstant(C1
))) {
1243 // (X * C2) << C1 --> X * (C2 << C1)
1244 if (match(Op0
, m_Mul(m_Value(X
), m_ImmConstant(C2
))))
1245 return BinaryOperator::CreateMul(X
, Builder
.CreateShl(C2
, C1
));
1247 // shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
1248 if (match(Op0
, m_ZExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1249 auto *NewC
= Builder
.CreateShl(ConstantInt::get(Ty
, 1), C1
);
1250 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1254 if (match(Op0
, m_One())) {
1255 // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
1256 if (match(Op1
, m_Sub(m_SpecificInt(BitWidth
- 1), m_Value(X
))))
1257 return BinaryOperator::CreateLShr(
1258 ConstantInt::get(Ty
, APInt::getSignMask(BitWidth
)), X
);
1260 // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
1261 // 1 << (cttz X) --> -X & X
1263 m_OneUse(m_Intrinsic
<Intrinsic::cttz
>(m_Value(X
), m_Value())))) {
1264 Value
*NegX
= Builder
.CreateNeg(X
, "neg");
1265 return BinaryOperator::CreateAnd(NegX
, X
);
1272 Instruction
*InstCombinerImpl::visitLShr(BinaryOperator
&I
) {
1273 if (Value
*V
= simplifyLShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1274 SQ
.getWithInstruction(&I
)))
1275 return replaceInstUsesWith(I
, V
);
1277 if (Instruction
*X
= foldVectorBinop(I
))
1280 if (Instruction
*R
= commonShiftTransforms(I
))
1283 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1284 Type
*Ty
= I
.getType();
1287 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1289 // (iN (~X) u>> (N - 1)) --> zext (X > -1)
1290 if (match(Op0
, m_OneUse(m_Not(m_Value(X
)))) &&
1291 match(Op1
, m_SpecificIntAllowPoison(BitWidth
- 1)))
1292 return new ZExtInst(Builder
.CreateIsNotNeg(X
, "isnotneg"), Ty
);
1294 // ((X << nuw Z) sub nuw Y) >>u exact Z --> X sub nuw (Y >>u exact Z)
1297 match(Op0
, m_OneUse(m_NUWSub(m_NUWShl(m_Value(X
), m_Specific(Op1
)),
1299 Value
*NewLshr
= Builder
.CreateLShr(Y
, Op1
, "", /*isExact=*/true);
1300 auto *NewSub
= BinaryOperator::CreateNUWSub(X
, NewLshr
);
1301 NewSub
->setHasNoSignedWrap(
1302 cast
<OverflowingBinaryOperator
>(Op0
)->hasNoSignedWrap());
1306 // Fold (X + Y) / 2 --> (X & Y) iff (X u<= 1) && (Y u<= 1)
1307 if (match(Op0
, m_Add(m_Value(X
), m_Value(Y
))) && match(Op1
, m_One()) &&
1308 computeKnownBits(X
, /*Depth=*/0, &I
).countMaxActiveBits() <= 1 &&
1309 computeKnownBits(Y
, /*Depth=*/0, &I
).countMaxActiveBits() <= 1)
1310 return BinaryOperator::CreateAnd(X
, Y
);
1312 // (sub nuw X, (Y << nuw Z)) >>u exact Z --> (X >>u exact Z) sub nuw Y
1314 match(Op0
, m_OneUse(m_NUWSub(m_Value(X
),
1315 m_NUWShl(m_Value(Y
), m_Specific(Op1
)))))) {
1316 Value
*NewLshr
= Builder
.CreateLShr(X
, Op1
, "", /*isExact=*/true);
1317 auto *NewSub
= BinaryOperator::CreateNUWSub(NewLshr
, Y
);
1318 NewSub
->setHasNoSignedWrap(
1319 cast
<OverflowingBinaryOperator
>(Op0
)->hasNoSignedWrap());
1323 auto isSuitableBinOpcode
= [](Instruction::BinaryOps BinOpcode
) {
1324 switch (BinOpcode
) {
1327 case Instruction::Add
:
1328 case Instruction::And
:
1329 case Instruction::Or
:
1330 case Instruction::Xor
:
1331 // Sub is handled separately.
1336 // If both the binop and the shift are nuw, then:
1337 // ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z)
1338 if (match(Op0
, m_OneUse(m_c_BinOp(m_NUWShl(m_Value(X
), m_Specific(Op1
)),
1340 BinaryOperator
*Op0OB
= cast
<BinaryOperator
>(Op0
);
1341 if (isSuitableBinOpcode(Op0OB
->getOpcode())) {
1342 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(Op0
);
1343 !OBO
|| OBO
->hasNoUnsignedWrap()) {
1344 Value
*NewLshr
= Builder
.CreateLShr(
1345 Y
, Op1
, "", I
.isExact() && Op0OB
->getOpcode() != Instruction::And
);
1346 auto *NewBinOp
= BinaryOperator::Create(Op0OB
->getOpcode(), NewLshr
, X
);
1348 NewBinOp
->setHasNoUnsignedWrap(true);
1349 NewBinOp
->setHasNoSignedWrap(OBO
->hasNoSignedWrap());
1350 } else if (auto *Disjoint
= dyn_cast
<PossiblyDisjointInst
>(Op0
)) {
1351 cast
<PossiblyDisjointInst
>(NewBinOp
)->setIsDisjoint(
1352 Disjoint
->isDisjoint());
1359 if (match(Op1
, m_APInt(C
))) {
1360 unsigned ShAmtC
= C
->getZExtValue();
1361 auto *II
= dyn_cast
<IntrinsicInst
>(Op0
);
1362 if (II
&& isPowerOf2_32(BitWidth
) && Log2_32(BitWidth
) == ShAmtC
&&
1363 (II
->getIntrinsicID() == Intrinsic::ctlz
||
1364 II
->getIntrinsicID() == Intrinsic::cttz
||
1365 II
->getIntrinsicID() == Intrinsic::ctpop
)) {
1366 // ctlz.i32(x)>>5 --> zext(x == 0)
1367 // cttz.i32(x)>>5 --> zext(x == 0)
1368 // ctpop.i32(x)>>5 --> zext(x == -1)
1369 bool IsPop
= II
->getIntrinsicID() == Intrinsic::ctpop
;
1370 Constant
*RHS
= ConstantInt::getSigned(Ty
, IsPop
? -1 : 0);
1371 Value
*Cmp
= Builder
.CreateICmpEQ(II
->getArgOperand(0), RHS
);
1372 return new ZExtInst(Cmp
, Ty
);
1376 if (match(Op0
, m_Shl(m_Value(X
), m_APInt(C1
))) && C1
->ult(BitWidth
)) {
1377 if (C1
->ult(ShAmtC
)) {
1378 unsigned ShlAmtC
= C1
->getZExtValue();
1379 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShlAmtC
);
1380 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1381 // (X <<nuw C1) >>u C --> X >>u (C - C1)
1382 auto *NewLShr
= BinaryOperator::CreateLShr(X
, ShiftDiff
);
1383 NewLShr
->setIsExact(I
.isExact());
1386 if (Op0
->hasOneUse()) {
1387 // (X << C1) >>u C --> (X >>u (C - C1)) & (-1 >> C)
1388 Value
*NewLShr
= Builder
.CreateLShr(X
, ShiftDiff
, "", I
.isExact());
1389 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1390 return BinaryOperator::CreateAnd(NewLShr
, ConstantInt::get(Ty
, Mask
));
1392 } else if (C1
->ugt(ShAmtC
)) {
1393 unsigned ShlAmtC
= C1
->getZExtValue();
1394 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmtC
- ShAmtC
);
1395 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1396 // (X <<nuw C1) >>u C --> X <<nuw/nsw (C1 - C)
1397 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1398 NewShl
->setHasNoUnsignedWrap(true);
1399 NewShl
->setHasNoSignedWrap(ShAmtC
> 0);
1402 if (Op0
->hasOneUse()) {
1403 // (X << C1) >>u C --> X << (C1 - C) & (-1 >> C)
1404 Value
*NewShl
= Builder
.CreateShl(X
, ShiftDiff
);
1405 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1406 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1409 assert(*C1
== ShAmtC
);
1410 // (X << C) >>u C --> X & (-1 >>u C)
1411 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1412 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1416 // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
1417 // TODO: Consolidate with the more general transform that starts from shl
1418 // (the shifts are in the opposite order).
1420 m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))),
1422 Value
*NewLshr
= Builder
.CreateLShr(Y
, Op1
);
1423 Value
*NewAdd
= Builder
.CreateAdd(NewLshr
, X
);
1424 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1425 APInt Bits
= APInt::getLowBitsSet(BitWidth
, BitWidth
- Op1Val
);
1426 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1427 return BinaryOperator::CreateAnd(NewAdd
, Mask
);
1430 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) &&
1431 (!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType()))) {
1432 assert(ShAmtC
< X
->getType()->getScalarSizeInBits() &&
1433 "Big shift not simplified to zero?");
1434 // lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
1435 Value
*NewLShr
= Builder
.CreateLShr(X
, ShAmtC
);
1436 return new ZExtInst(NewLShr
, Ty
);
1439 if (match(Op0
, m_SExt(m_Value(X
)))) {
1440 unsigned SrcTyBitWidth
= X
->getType()->getScalarSizeInBits();
1441 // lshr (sext i1 X to iN), C --> select (X, -1 >> C, 0)
1442 if (SrcTyBitWidth
== 1) {
1443 auto *NewC
= ConstantInt::get(
1444 Ty
, APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1445 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1448 if ((!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType())) &&
1450 // Are we moving the sign bit to the low bit and widening with high
1451 // zeros? lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
1452 if (ShAmtC
== BitWidth
- 1) {
1453 Value
*NewLShr
= Builder
.CreateLShr(X
, SrcTyBitWidth
- 1);
1454 return new ZExtInst(NewLShr
, Ty
);
1457 // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
1458 if (ShAmtC
== BitWidth
- SrcTyBitWidth
) {
1459 // The new shift amount can't be more than the narrow source type.
1460 unsigned NewShAmt
= std::min(ShAmtC
, SrcTyBitWidth
- 1);
1461 Value
*AShr
= Builder
.CreateAShr(X
, NewShAmt
);
1462 return new ZExtInst(AShr
, Ty
);
1467 if (ShAmtC
== BitWidth
- 1) {
1468 // lshr i32 or(X,-X), 31 --> zext (X != 0)
1469 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1470 return new ZExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1472 // lshr i32 (X -nsw Y), 31 --> zext (X < Y)
1473 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1474 return new ZExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1476 // Check if a number is negative and odd:
1477 // lshr i32 (srem X, 2), 31 --> and (X >> 31), X
1478 if (match(Op0
, m_OneUse(m_SRem(m_Value(X
), m_SpecificInt(2))))) {
1479 Value
*Signbit
= Builder
.CreateLShr(X
, ShAmtC
);
1480 return BinaryOperator::CreateAnd(Signbit
, X
);
1483 // lshr iN (X - 1) & ~X, N-1 --> zext (X == 0)
1484 if (match(Op0
, m_OneUse(m_c_And(m_Add(m_Value(X
), m_AllOnes()),
1485 m_Not(m_Deferred(X
))))))
1486 return new ZExtInst(Builder
.CreateIsNull(X
), Ty
);
1489 Instruction
*TruncSrc
;
1490 if (match(Op0
, m_OneUse(m_Trunc(m_Instruction(TruncSrc
)))) &&
1491 match(TruncSrc
, m_LShr(m_Value(X
), m_APInt(C1
)))) {
1492 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1493 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1495 // If the combined shift fits in the source width:
1496 // (trunc (X >>u C1)) >>u C --> and (trunc (X >>u (C1 + C)), MaskC
1498 // If the first shift covers the number of bits truncated, then the
1499 // mask instruction is eliminated (and so the use check is relaxed).
1500 if (AmtSum
< SrcWidth
&&
1501 (TruncSrc
->hasOneUse() || C1
->uge(SrcWidth
- BitWidth
))) {
1502 Value
*SumShift
= Builder
.CreateLShr(X
, AmtSum
, "sum.shift");
1503 Value
*Trunc
= Builder
.CreateTrunc(SumShift
, Ty
, I
.getName());
1505 // If the first shift does not cover the number of bits truncated, then
1506 // we require a mask to get rid of high bits in the result.
1507 APInt MaskC
= APInt::getAllOnes(BitWidth
).lshr(ShAmtC
);
1508 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, MaskC
));
1513 if (match(Op0
, m_NUWMul(m_Value(X
), m_APInt(MulC
)))) {
1514 if (BitWidth
> 2 && (*MulC
- 1).isPowerOf2() &&
1515 MulC
->logBase2() == ShAmtC
) {
1516 // Look for a "splat" mul pattern - it replicates bits across each half
1517 // of a value, so a right shift simplifies back to just X:
1518 // lshr i[2N] (mul nuw X, (2^N)+1), N --> X
1519 if (ShAmtC
* 2 == BitWidth
)
1520 return replaceInstUsesWith(I
, X
);
1522 // lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N))
1523 if (Op0
->hasOneUse()) {
1524 auto *NewAdd
= BinaryOperator::CreateNUWAdd(
1525 X
, Builder
.CreateLShr(X
, ConstantInt::get(Ty
, ShAmtC
), "",
1527 NewAdd
->setHasNoSignedWrap(
1528 cast
<OverflowingBinaryOperator
>(Op0
)->hasNoSignedWrap());
1533 // The one-use check is not strictly necessary, but codegen may not be
1534 // able to invert the transform and perf may suffer with an extra mul
1536 if (Op0
->hasOneUse()) {
1537 APInt NewMulC
= MulC
->lshr(ShAmtC
);
1538 // if c is divisible by (1 << ShAmtC):
1539 // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
1540 if (MulC
->eq(NewMulC
.shl(ShAmtC
))) {
1542 BinaryOperator::CreateNUWMul(X
, ConstantInt::get(Ty
, NewMulC
));
1543 assert(ShAmtC
!= 0 &&
1544 "lshr X, 0 should be handled by simplifyLShrInst.");
1545 NewMul
->setHasNoSignedWrap(true);
1551 // lshr (mul nsw (X, 2^N + 1)), N -> add nsw (X, lshr(X, N))
1552 if (match(Op0
, m_OneUse(m_NSWMul(m_Value(X
), m_APInt(MulC
))))) {
1553 if (BitWidth
> 2 && (*MulC
- 1).isPowerOf2() &&
1554 MulC
->logBase2() == ShAmtC
) {
1555 return BinaryOperator::CreateNSWAdd(
1556 X
, Builder
.CreateLShr(X
, ConstantInt::get(Ty
, ShAmtC
), "",
1561 // Try to narrow bswap.
1562 // In the case where the shift amount equals the bitwidth difference, the
1563 // shift is eliminated.
1564 if (match(Op0
, m_OneUse(m_Intrinsic
<Intrinsic::bswap
>(
1565 m_OneUse(m_ZExt(m_Value(X
))))))) {
1566 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1567 unsigned WidthDiff
= BitWidth
- SrcWidth
;
1568 if (SrcWidth
% 16 == 0) {
1569 Value
*NarrowSwap
= Builder
.CreateUnaryIntrinsic(Intrinsic::bswap
, X
);
1570 if (ShAmtC
>= WidthDiff
) {
1571 // (bswap (zext X)) >> C --> zext (bswap X >> C')
1572 Value
*NewShift
= Builder
.CreateLShr(NarrowSwap
, ShAmtC
- WidthDiff
);
1573 return new ZExtInst(NewShift
, Ty
);
1575 // (bswap (zext X)) >> C --> (zext (bswap X)) << C'
1576 Value
*NewZExt
= Builder
.CreateZExt(NarrowSwap
, Ty
);
1577 Constant
*ShiftDiff
= ConstantInt::get(Ty
, WidthDiff
- ShAmtC
);
1578 return BinaryOperator::CreateShl(NewZExt
, ShiftDiff
);
1583 // Reduce add-carry of bools to logic:
1584 // ((zext BoolX) + (zext BoolY)) >> 1 --> zext (BoolX && BoolY)
1585 Value
*BoolX
, *BoolY
;
1586 if (ShAmtC
== 1 && match(Op0
, m_Add(m_Value(X
), m_Value(Y
))) &&
1587 match(X
, m_ZExt(m_Value(BoolX
))) && match(Y
, m_ZExt(m_Value(BoolY
))) &&
1588 BoolX
->getType()->isIntOrIntVectorTy(1) &&
1589 BoolY
->getType()->isIntOrIntVectorTy(1) &&
1590 (X
->hasOneUse() || Y
->hasOneUse() || Op0
->hasOneUse())) {
1591 Value
*And
= Builder
.CreateAnd(BoolX
, BoolY
);
1592 return new ZExtInst(And
, Ty
);
1596 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1597 if (setShiftFlags(I
, Q
))
1600 // Transform (x << y) >> y to x & (-1 >> y)
1601 if (match(Op0
, m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))))) {
1602 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1603 Value
*Mask
= Builder
.CreateLShr(AllOnes
, Op1
);
1604 return BinaryOperator::CreateAnd(Mask
, X
);
1607 // Transform (-1 << y) >> y to -1 >> y
1608 if (match(Op0
, m_Shl(m_AllOnes(), m_Specific(Op1
)))) {
1609 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1610 return BinaryOperator::CreateLShr(AllOnes
, Op1
);
1613 if (Instruction
*Overflow
= foldLShrOverflowBit(I
))
1620 InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract(
1621 BinaryOperator
&OldAShr
) {
1622 assert(OldAShr
.getOpcode() == Instruction::AShr
&&
1623 "Must be called with arithmetic right-shift instruction only.");
1625 // Check that constant C is a splat of the element-wise bitwidth of V.
1626 auto BitWidthSplat
= [](Constant
*C
, Value
*V
) {
1628 C
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
1629 APInt(C
->getType()->getScalarSizeInBits(),
1630 V
->getType()->getScalarSizeInBits())));
1633 // It should look like variable-length sign-extension on the outside:
1634 // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits)
1636 Instruction
*MaybeTrunc
;
1638 if (!match(&OldAShr
,
1639 m_AShr(m_Shl(m_Instruction(MaybeTrunc
),
1640 m_ZExtOrSelf(m_Sub(m_Constant(C1
),
1641 m_ZExtOrSelf(m_Value(NBits
))))),
1642 m_ZExtOrSelf(m_Sub(m_Constant(C2
),
1643 m_ZExtOrSelf(m_Deferred(NBits
)))))) ||
1644 !BitWidthSplat(C1
, &OldAShr
) || !BitWidthSplat(C2
, &OldAShr
))
1647 // There may or may not be a truncation after outer two shifts.
1648 Instruction
*HighBitExtract
;
1649 match(MaybeTrunc
, m_TruncOrSelf(m_Instruction(HighBitExtract
)));
1650 bool HadTrunc
= MaybeTrunc
!= HighBitExtract
;
1652 // And finally, the innermost part of the pattern must be a right-shift.
1653 Value
*X
, *NumLowBitsToSkip
;
1654 if (!match(HighBitExtract
, m_Shr(m_Value(X
), m_Value(NumLowBitsToSkip
))))
1657 // Said right-shift must extract high NBits bits - C0 must be it's bitwidth.
1659 if (!match(NumLowBitsToSkip
,
1661 m_Sub(m_Constant(C0
), m_ZExtOrSelf(m_Specific(NBits
))))) ||
1662 !BitWidthSplat(C0
, HighBitExtract
))
1665 // Since the NBits is identical for all shifts, if the outermost and
1666 // innermost shifts are identical, then outermost shifts are redundant.
1667 // If we had truncation, do keep it though.
1668 if (HighBitExtract
->getOpcode() == OldAShr
.getOpcode())
1669 return replaceInstUsesWith(OldAShr
, MaybeTrunc
);
1671 // Else, if there was a truncation, then we need to ensure that one
1672 // instruction will go away.
1673 if (HadTrunc
&& !match(&OldAShr
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
1676 // Finally, bypass two innermost shifts, and perform the outermost shift on
1677 // the operands of the innermost shift.
1678 Instruction
*NewAShr
=
1679 BinaryOperator::Create(OldAShr
.getOpcode(), X
, NumLowBitsToSkip
);
1680 NewAShr
->copyIRFlags(HighBitExtract
); // We can preserve 'exact'-ness.
1684 Builder
.Insert(NewAShr
);
1685 return TruncInst::CreateTruncOrBitCast(NewAShr
, OldAShr
.getType());
1688 Instruction
*InstCombinerImpl::visitAShr(BinaryOperator
&I
) {
1689 if (Value
*V
= simplifyAShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1690 SQ
.getWithInstruction(&I
)))
1691 return replaceInstUsesWith(I
, V
);
1693 if (Instruction
*X
= foldVectorBinop(I
))
1696 if (Instruction
*R
= commonShiftTransforms(I
))
1699 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1700 Type
*Ty
= I
.getType();
1701 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1702 const APInt
*ShAmtAPInt
;
1703 if (match(Op1
, m_APInt(ShAmtAPInt
)) && ShAmtAPInt
->ult(BitWidth
)) {
1704 unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
1706 // If the shift amount equals the difference in width of the destination
1707 // and source scalar types:
1708 // ashr (shl (zext X), C), C --> sext X
1710 if (match(Op0
, m_Shl(m_ZExt(m_Value(X
)), m_Specific(Op1
))) &&
1711 ShAmt
== BitWidth
- X
->getType()->getScalarSizeInBits())
1712 return new SExtInst(X
, Ty
);
1714 // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However,
1715 // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
1717 if (match(Op0
, m_NSWShl(m_Value(X
), m_APInt(ShOp1
))) &&
1718 ShOp1
->ult(BitWidth
)) {
1719 unsigned ShlAmt
= ShOp1
->getZExtValue();
1720 if (ShlAmt
< ShAmt
) {
1721 // (X <<nsw C1) >>s C2 --> X >>s (C2 - C1)
1722 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmt
- ShlAmt
);
1723 auto *NewAShr
= BinaryOperator::CreateAShr(X
, ShiftDiff
);
1724 NewAShr
->setIsExact(I
.isExact());
1727 if (ShlAmt
> ShAmt
) {
1728 // (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
1729 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmt
- ShAmt
);
1730 auto *NewShl
= BinaryOperator::Create(Instruction::Shl
, X
, ShiftDiff
);
1731 NewShl
->setHasNoSignedWrap(true);
1736 if (match(Op0
, m_AShr(m_Value(X
), m_APInt(ShOp1
))) &&
1737 ShOp1
->ult(BitWidth
)) {
1738 unsigned AmtSum
= ShAmt
+ ShOp1
->getZExtValue();
1739 // Oversized arithmetic shifts replicate the sign bit.
1740 AmtSum
= std::min(AmtSum
, BitWidth
- 1);
1741 // (X >>s C1) >>s C2 --> X >>s (C1 + C2)
1742 return BinaryOperator::CreateAShr(X
, ConstantInt::get(Ty
, AmtSum
));
1745 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
)))) &&
1746 (Ty
->isVectorTy() || shouldChangeType(Ty
, X
->getType()))) {
1747 // ashr (sext X), C --> sext (ashr X, C')
1748 Type
*SrcTy
= X
->getType();
1749 ShAmt
= std::min(ShAmt
, SrcTy
->getScalarSizeInBits() - 1);
1750 Value
*NewSh
= Builder
.CreateAShr(X
, ConstantInt::get(SrcTy
, ShAmt
));
1751 return new SExtInst(NewSh
, Ty
);
1754 if (ShAmt
== BitWidth
- 1) {
1755 // ashr i32 or(X,-X), 31 --> sext (X != 0)
1756 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1757 return new SExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1759 // ashr i32 (X -nsw Y), 31 --> sext (X < Y)
1761 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1762 return new SExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1764 // ashr iN (X - 1) & ~X, N-1 --> sext (X == 0)
1765 if (match(Op0
, m_OneUse(m_c_And(m_Add(m_Value(X
), m_AllOnes()),
1766 m_Not(m_Deferred(X
))))))
1767 return new SExtInst(Builder
.CreateIsNull(X
), Ty
);
1771 if (match(Op0
, m_OneUse(m_NSWMul(m_Value(X
), m_APInt(MulC
)))) &&
1772 (BitWidth
> 2 && (*MulC
- 1).isPowerOf2() &&
1773 MulC
->logBase2() == ShAmt
&&
1774 (ShAmt
< BitWidth
- 1))) /* Minus 1 for the sign bit */ {
1776 // ashr (mul nsw (X, 2^N + 1)), N -> add nsw (X, ashr(X, N))
1777 auto *NewAdd
= BinaryOperator::CreateNSWAdd(
1779 Builder
.CreateAShr(X
, ConstantInt::get(Ty
, ShAmt
), "", I
.isExact()));
1780 NewAdd
->setHasNoUnsignedWrap(
1781 cast
<OverflowingBinaryOperator
>(Op0
)->hasNoUnsignedWrap());
1786 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1787 if (setShiftFlags(I
, Q
))
1790 // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
1791 // as the pattern to splat the lowest bit.
1792 // FIXME: iff X is already masked, we don't need the one-use check.
1794 if (match(Op1
, m_SpecificIntAllowPoison(BitWidth
- 1)) &&
1795 match(Op0
, m_OneUse(m_Shl(m_Value(X
),
1796 m_SpecificIntAllowPoison(BitWidth
- 1))))) {
1797 Constant
*Mask
= ConstantInt::get(Ty
, 1);
1798 // Retain the knowledge about the ignored lanes.
1799 Mask
= Constant::mergeUndefsWith(
1800 Constant::mergeUndefsWith(Mask
, cast
<Constant
>(Op1
)),
1801 cast
<Constant
>(cast
<Instruction
>(Op0
)->getOperand(1)));
1802 X
= Builder
.CreateAnd(X
, Mask
);
1803 return BinaryOperator::CreateNeg(X
);
1806 if (Instruction
*R
= foldVariableSignZeroExtensionOfVariableHighBitExtract(I
))
1809 // See if we can turn a signed shr into an unsigned shr.
1810 if (MaskedValueIsZero(Op0
, APInt::getSignMask(BitWidth
), 0, &I
)) {
1811 Instruction
*Lshr
= BinaryOperator::CreateLShr(Op0
, Op1
);
1812 Lshr
->setIsExact(I
.isExact());
1816 // ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
1817 if (match(Op0
, m_OneUse(m_Not(m_Value(X
))))) {
1818 // Note that we must drop 'exact'-ness of the shift!
1819 // Note that we can't keep undef's in -1 vector constant!
1820 auto *NewAShr
= Builder
.CreateAShr(X
, Op1
, Op0
->getName() + ".not");
1821 return BinaryOperator::CreateNot(NewAShr
);