1 //===- InstCombineShifts.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitShl, visitLShr, and visitAShr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/Analysis/InstructionSimplify.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/PatternMatch.h"
17 #include "llvm/Transforms/InstCombine/InstCombiner.h"
19 using namespace PatternMatch
;
21 #define DEBUG_TYPE "instcombine"
23 bool canTryToConstantAddTwoShiftAmounts(Value
*Sh0
, Value
*ShAmt0
, Value
*Sh1
,
25 // We have two shift amounts from two different shifts. The types of those
26 // shift amounts may not match. If that's the case let's bailout now..
27 if (ShAmt0
->getType() != ShAmt1
->getType())
30 // As input, we have the following pattern:
32 // We want to rewrite that as:
33 // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
34 // While we know that originally (Q+K) would not overflow
35 // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
36 // shift amounts. so it may now overflow in smaller bitwidth.
37 // To ensure that does not happen, we need to ensure that the total maximal
38 // shift amount is still representable in that smaller bit width.
39 unsigned MaximalPossibleTotalShiftAmount
=
40 (Sh0
->getType()->getScalarSizeInBits() - 1) +
41 (Sh1
->getType()->getScalarSizeInBits() - 1);
42 APInt MaximalRepresentableShiftAmount
=
43 APInt::getAllOnes(ShAmt0
->getType()->getScalarSizeInBits());
44 return MaximalRepresentableShiftAmount
.uge(MaximalPossibleTotalShiftAmount
);
48 // (x shiftopcode Q) shiftopcode K
49 // we should rewrite it as
50 // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) and
52 // This is valid for any shift, but they must be identical, and we must be
53 // careful in case we have (zext(Q)+zext(K)) and look past extensions,
54 // (Q+K) must not overflow or else (Q+K) u< bitwidth(x) is bogus.
56 // AnalyzeForSignBitExtraction indicates that we will only analyze whether this
57 // pattern has any 2 right-shifts that sum to 1 less than original bit width.
58 Value
*InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
59 BinaryOperator
*Sh0
, const SimplifyQuery
&SQ
,
60 bool AnalyzeForSignBitExtraction
) {
61 // Look for a shift of some instruction, ignore zext of shift amount if any.
65 m_Shift(m_Instruction(Sh0Op0
), m_ZExtOrSelf(m_Value(ShAmt0
)))))
68 // If there is a truncation between the two shifts, we must make note of it
69 // and look through it. The truncation imposes additional constraints on the
72 Value
*Trunc
= nullptr;
74 m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1
)), m_Value(Trunc
)),
77 // Inner shift: (x shiftopcode ShAmt1)
78 // Like with other shift, ignore zext of shift amount if any.
80 if (!match(Sh1
, m_Shift(m_Value(X
), m_ZExtOrSelf(m_Value(ShAmt1
)))))
83 // Verify that it would be safe to try to add those two shift amounts.
84 if (!canTryToConstantAddTwoShiftAmounts(Sh0
, ShAmt0
, Sh1
, ShAmt1
))
87 // We are only looking for signbit extraction if we have two right shifts.
88 bool HadTwoRightShifts
= match(Sh0
, m_Shr(m_Value(), m_Value())) &&
89 match(Sh1
, m_Shr(m_Value(), m_Value()));
90 // ... and if it's not two right-shifts, we know the answer already.
91 if (AnalyzeForSignBitExtraction
&& !HadTwoRightShifts
)
94 // The shift opcodes must be identical, unless we are just checking whether
95 // this pattern can be interpreted as a sign-bit-extraction.
96 Instruction::BinaryOps ShiftOpcode
= Sh0
->getOpcode();
97 bool IdenticalShOpcodes
= Sh0
->getOpcode() == Sh1
->getOpcode();
98 if (!IdenticalShOpcodes
&& !AnalyzeForSignBitExtraction
)
101 // If we saw truncation, we'll need to produce extra instruction,
102 // and for that one of the operands of the shift must be one-use,
103 // unless of course we don't actually plan to produce any instructions here.
104 if (Trunc
&& !AnalyzeForSignBitExtraction
&&
105 !match(Sh0
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
108 // Can we fold (ShAmt0+ShAmt1) ?
109 auto *NewShAmt
= dyn_cast_or_null
<Constant
>(
110 simplifyAddInst(ShAmt0
, ShAmt1
, /*isNSW=*/false, /*isNUW=*/false,
111 SQ
.getWithInstruction(Sh0
)));
113 return nullptr; // Did not simplify.
114 unsigned NewShAmtBitWidth
= NewShAmt
->getType()->getScalarSizeInBits();
115 unsigned XBitWidth
= X
->getType()->getScalarSizeInBits();
116 // Is the new shift amount smaller than the bit width of inner/new shift?
117 if (!match(NewShAmt
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT
,
118 APInt(NewShAmtBitWidth
, XBitWidth
))))
119 return nullptr; // FIXME: could perform constant-folding.
121 // If there was a truncation, and we have a right-shift, we can only fold if
122 // we are left with the original sign bit. Likewise, if we were just checking
123 // that this is a sighbit extraction, this is the place to check it.
124 // FIXME: zero shift amount is also legal here, but we can't *easily* check
125 // more than one predicate so it's not really worth it.
126 if (HadTwoRightShifts
&& (Trunc
|| AnalyzeForSignBitExtraction
)) {
127 // If it's not a sign bit extraction, then we're done.
129 m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
130 APInt(NewShAmtBitWidth
, XBitWidth
- 1))))
132 // If it is, and that was the question, return the base value.
133 if (AnalyzeForSignBitExtraction
)
137 assert(IdenticalShOpcodes
&& "Should not get here with different shifts.");
139 if (NewShAmt
->getType() != X
->getType()) {
140 NewShAmt
= ConstantFoldCastOperand(Instruction::ZExt
, NewShAmt
,
141 X
->getType(), SQ
.DL
);
146 // All good, we can do this fold.
147 BinaryOperator
*NewShift
= BinaryOperator::Create(ShiftOpcode
, X
, NewShAmt
);
149 // The flags can only be propagated if there wasn't a trunc.
151 // If the pattern did not involve trunc, and both of the original shifts
152 // had the same flag set, preserve the flag.
153 if (ShiftOpcode
== Instruction::BinaryOps::Shl
) {
154 NewShift
->setHasNoUnsignedWrap(Sh0
->hasNoUnsignedWrap() &&
155 Sh1
->hasNoUnsignedWrap());
156 NewShift
->setHasNoSignedWrap(Sh0
->hasNoSignedWrap() &&
157 Sh1
->hasNoSignedWrap());
159 NewShift
->setIsExact(Sh0
->isExact() && Sh1
->isExact());
163 Instruction
*Ret
= NewShift
;
165 Builder
.Insert(NewShift
);
166 Ret
= CastInst::Create(Instruction::Trunc
, NewShift
, Sh0
->getType());
172 // If we have some pattern that leaves only some low bits set, and then performs
173 // left-shift of those bits, if none of the bits that are left after the final
174 // shift are modified by the mask, we can omit the mask.
176 // There are many variants to this pattern:
177 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
178 // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
179 // c) (x & (-1 l>> MaskShAmt)) << ShiftShAmt
180 // d) (x & ((-1 << MaskShAmt) l>> MaskShAmt)) << ShiftShAmt
181 // e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
182 // f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
183 // All these patterns can be simplified to just:
186 // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
187 // c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
189 dropRedundantMaskingOfLeftShiftInput(BinaryOperator
*OuterShift
,
190 const SimplifyQuery
&Q
,
191 InstCombiner::BuilderTy
&Builder
) {
192 assert(OuterShift
->getOpcode() == Instruction::BinaryOps::Shl
&&
193 "The input must be 'shl'!");
195 Value
*Masked
, *ShiftShAmt
;
197 m_Shift(m_Value(Masked
), m_ZExtOrSelf(m_Value(ShiftShAmt
))));
199 // *If* there is a truncation between an outer shift and a possibly-mask,
200 // then said truncation *must* be one-use, else we can't perform the fold.
202 if (match(Masked
, m_CombineAnd(m_Trunc(m_Value(Masked
)), m_Value(Trunc
))) &&
206 Type
*NarrowestTy
= OuterShift
->getType();
207 Type
*WidestTy
= Masked
->getType();
208 bool HadTrunc
= WidestTy
!= NarrowestTy
;
210 // The mask must be computed in a type twice as wide to ensure
211 // that no bits are lost if the sum-of-shifts is wider than the base type.
212 Type
*ExtendedTy
= WidestTy
->getExtendedType();
216 // ((1 << MaskShAmt) - 1)
217 auto MaskA
= m_Add(m_Shl(m_One(), m_Value(MaskShAmt
)), m_AllOnes());
218 // (~(-1 << maskNbits))
219 auto MaskB
= m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)), m_AllOnes());
220 // (-1 l>> MaskShAmt)
221 auto MaskC
= m_LShr(m_AllOnes(), m_Value(MaskShAmt
));
222 // ((-1 << MaskShAmt) l>> MaskShAmt)
224 m_LShr(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)), m_Deferred(MaskShAmt
));
229 if (match(Masked
, m_c_And(m_CombineOr(MaskA
, MaskB
), m_Value(X
)))) {
230 // Peek through an optional zext of the shift amount.
231 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
233 // Verify that it would be safe to try to add those two shift amounts.
234 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
238 // Can we simplify (MaskShAmt+ShiftShAmt) ?
239 auto *SumOfShAmts
= dyn_cast_or_null
<Constant
>(simplifyAddInst(
240 MaskShAmt
, ShiftShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
242 return nullptr; // Did not simplify.
243 // In this pattern SumOfShAmts correlates with the number of low bits
244 // that shall remain in the root value (OuterShift).
246 // An extend of an undef value becomes zero because the high bits are never
247 // completely unknown. Replace the `undef` shift amounts with final
248 // shift bitwidth to ensure that the value remains undef when creating the
249 // subsequent shift op.
250 SumOfShAmts
= Constant::replaceUndefsWith(
251 SumOfShAmts
, ConstantInt::get(SumOfShAmts
->getType()->getScalarType(),
252 ExtendedTy
->getScalarSizeInBits()));
253 auto *ExtendedSumOfShAmts
= ConstantFoldCastOperand(
254 Instruction::ZExt
, SumOfShAmts
, ExtendedTy
, Q
.DL
);
255 if (!ExtendedSumOfShAmts
)
258 // And compute the mask as usual: ~(-1 << (SumOfShAmts))
259 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
260 auto *ExtendedInvertedMask
=
261 ConstantExpr::getShl(ExtendedAllOnes
, ExtendedSumOfShAmts
);
262 NewMask
= ConstantExpr::getNot(ExtendedInvertedMask
);
263 } else if (match(Masked
, m_c_And(m_CombineOr(MaskC
, MaskD
), m_Value(X
))) ||
264 match(Masked
, m_Shr(m_Shl(m_Value(X
), m_Value(MaskShAmt
)),
265 m_Deferred(MaskShAmt
)))) {
266 // Peek through an optional zext of the shift amount.
267 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
269 // Verify that it would be safe to try to add those two shift amounts.
270 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
274 // Can we simplify (ShiftShAmt-MaskShAmt) ?
275 auto *ShAmtsDiff
= dyn_cast_or_null
<Constant
>(simplifySubInst(
276 ShiftShAmt
, MaskShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
278 return nullptr; // Did not simplify.
279 // In this pattern ShAmtsDiff correlates with the number of high bits that
280 // shall be unset in the root value (OuterShift).
282 // An extend of an undef value becomes zero because the high bits are never
283 // completely unknown. Replace the `undef` shift amounts with negated
284 // bitwidth of innermost shift to ensure that the value remains undef when
285 // creating the subsequent shift op.
286 unsigned WidestTyBitWidth
= WidestTy
->getScalarSizeInBits();
287 ShAmtsDiff
= Constant::replaceUndefsWith(
288 ShAmtsDiff
, ConstantInt::get(ShAmtsDiff
->getType()->getScalarType(),
290 auto *ExtendedNumHighBitsToClear
= ConstantFoldCastOperand(
292 ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff
->getType(),
297 if (!ExtendedNumHighBitsToClear
)
300 // And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
301 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
303 ConstantExpr::getLShr(ExtendedAllOnes
, ExtendedNumHighBitsToClear
);
305 return nullptr; // Don't know anything about this pattern.
307 NewMask
= ConstantExpr::getTrunc(NewMask
, NarrowestTy
);
309 // Does this mask has any unset bits? If not then we can just not apply it.
310 bool NeedMask
= !match(NewMask
, m_AllOnes());
312 // If we need to apply a mask, there are several more restrictions we have.
314 // The old masking instruction must go away.
315 if (!Masked
->hasOneUse())
317 // The original "masking" instruction must not have been`ashr`.
318 if (match(Masked
, m_AShr(m_Value(), m_Value())))
322 // If we need to apply truncation, let's do it first, since we can.
323 // We have already ensured that the old truncation will go away.
325 X
= Builder
.CreateTrunc(X
, NarrowestTy
);
327 // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
328 // We didn't change the Type of this outermost shift, so we can just do it.
329 auto *NewShift
= BinaryOperator::Create(OuterShift
->getOpcode(), X
,
330 OuterShift
->getOperand(1));
334 Builder
.Insert(NewShift
);
335 return BinaryOperator::Create(Instruction::And
, NewShift
, NewMask
);
338 /// If we have a shift-by-constant of a bin op (bitwise logic op or add/sub w/
339 /// shl) that itself has a shift-by-constant operand with identical opcode, we
340 /// may be able to convert that into 2 independent shifts followed by the logic
341 /// op. This eliminates a use of an intermediate value (reduces dependency
343 static Instruction
*foldShiftOfShiftedBinOp(BinaryOperator
&I
,
344 InstCombiner::BuilderTy
&Builder
) {
345 assert(I
.isShift() && "Expected a shift as input");
346 auto *BinInst
= dyn_cast
<BinaryOperator
>(I
.getOperand(0));
348 (!BinInst
->isBitwiseLogicOp() &&
349 BinInst
->getOpcode() != Instruction::Add
&&
350 BinInst
->getOpcode() != Instruction::Sub
) ||
351 !BinInst
->hasOneUse())
355 if (!match(I
.getOperand(1), m_Constant(C1
)))
358 Instruction::BinaryOps ShiftOpcode
= I
.getOpcode();
359 // Transform for add/sub only works with shl.
360 if ((BinInst
->getOpcode() == Instruction::Add
||
361 BinInst
->getOpcode() == Instruction::Sub
) &&
362 ShiftOpcode
!= Instruction::Shl
)
365 Type
*Ty
= I
.getType();
367 // Find a matching one-use shift by constant. The fold is not valid if the sum
368 // of the shift values equals or exceeds bitwidth.
369 // TODO: Remove the one-use check if the other logic operand (Y) is constant.
371 auto matchFirstShift
= [&](Value
*V
) {
372 APInt
Threshold(Ty
->getScalarSizeInBits(), Ty
->getScalarSizeInBits());
374 m_OneUse(m_BinOp(ShiftOpcode
, m_Value(X
), m_Constant(C0
)))) &&
375 match(ConstantExpr::getAdd(C0
, C1
),
376 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
));
379 // Logic ops and Add are commutative, so check each operand for a match. Sub
380 // is not so we cannot reoder if we match operand(1) and need to keep the
381 // operands in their original positions.
382 bool FirstShiftIsOp1
= false;
383 if (matchFirstShift(BinInst
->getOperand(0)))
384 Y
= BinInst
->getOperand(1);
385 else if (matchFirstShift(BinInst
->getOperand(1))) {
386 Y
= BinInst
->getOperand(0);
387 FirstShiftIsOp1
= BinInst
->getOpcode() == Instruction::Sub
;
391 // shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
392 Constant
*ShiftSumC
= ConstantExpr::getAdd(C0
, C1
);
393 Value
*NewShift1
= Builder
.CreateBinOp(ShiftOpcode
, X
, ShiftSumC
);
394 Value
*NewShift2
= Builder
.CreateBinOp(ShiftOpcode
, Y
, C1
);
395 Value
*Op1
= FirstShiftIsOp1
? NewShift2
: NewShift1
;
396 Value
*Op2
= FirstShiftIsOp1
? NewShift1
: NewShift2
;
397 return BinaryOperator::Create(BinInst
->getOpcode(), Op1
, Op2
);
400 Instruction
*InstCombinerImpl::commonShiftTransforms(BinaryOperator
&I
) {
401 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
404 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
405 assert(Op0
->getType() == Op1
->getType());
406 Type
*Ty
= I
.getType();
408 // If the shift amount is a one-use `sext`, we can demote it to `zext`.
410 if (match(Op1
, m_OneUse(m_SExt(m_Value(Y
))))) {
411 Value
*NewExt
= Builder
.CreateZExt(Y
, Ty
, Op1
->getName());
412 return BinaryOperator::Create(I
.getOpcode(), Op0
, NewExt
);
415 // See if we can fold away this shift.
416 if (SimplifyDemandedInstructionBits(I
))
419 // Try to fold constant and into select arguments.
420 if (isa
<Constant
>(Op0
))
421 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
422 if (Instruction
*R
= FoldOpIntoSelect(I
, SI
))
425 if (Constant
*CUI
= dyn_cast
<Constant
>(Op1
))
426 if (Instruction
*Res
= FoldShiftByConstant(Op0
, CUI
, I
))
429 if (auto *NewShift
= cast_or_null
<Instruction
>(
430 reassociateShiftAmtsOfTwoSameDirectionShifts(&I
, SQ
)))
433 // Pre-shift a constant shifted by a variable amount with constant offset:
434 // C shift (A add nuw C1) --> (C shift C1) shift A
437 if (match(Op0
, m_Constant(C
)) &&
438 match(Op1
, m_NUWAdd(m_Value(A
), m_Constant(C1
)))) {
439 Value
*NewC
= Builder
.CreateBinOp(I
.getOpcode(), C
, C1
);
440 return BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
443 unsigned BitWidth
= Ty
->getScalarSizeInBits();
445 const APInt
*AC
, *AddC
;
446 // Try to pre-shift a constant shifted by a variable amount added with a
448 // C << (X - AddC) --> (C >> AddC) << X
450 // C >> (X - AddC) --> (C << AddC) >> X
451 if (match(Op0
, m_APInt(AC
)) && match(Op1
, m_Add(m_Value(A
), m_APInt(AddC
))) &&
452 AddC
->isNegative() && (-*AddC
).ult(BitWidth
)) {
453 assert(!AC
->isZero() && "Expected simplify of shifted zero");
454 unsigned PosOffset
= (-*AddC
).getZExtValue();
456 auto isSuitableForPreShift
= [PosOffset
, &I
, AC
]() {
457 switch (I
.getOpcode()) {
460 case Instruction::Shl
:
461 return (I
.hasNoSignedWrap() || I
.hasNoUnsignedWrap()) &&
462 AC
->eq(AC
->lshr(PosOffset
).shl(PosOffset
));
463 case Instruction::LShr
:
464 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).lshr(PosOffset
));
465 case Instruction::AShr
:
466 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).ashr(PosOffset
));
469 if (isSuitableForPreShift()) {
470 Constant
*NewC
= ConstantInt::get(Ty
, I
.getOpcode() == Instruction::Shl
471 ? AC
->lshr(PosOffset
)
472 : AC
->shl(PosOffset
));
473 BinaryOperator
*NewShiftOp
=
474 BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
475 if (I
.getOpcode() == Instruction::Shl
) {
476 NewShiftOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
478 NewShiftOp
->setIsExact();
484 // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2.
485 // Because shifts by negative values (which could occur if A were negative)
487 if (Op1
->hasOneUse() && match(Op1
, m_SRem(m_Value(A
), m_Constant(C
))) &&
488 match(C
, m_Power2())) {
489 // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
490 // demand the sign bit (and many others) here??
491 Constant
*Mask
= ConstantExpr::getSub(C
, ConstantInt::get(Ty
, 1));
492 Value
*Rem
= Builder
.CreateAnd(A
, Mask
, Op1
->getName());
493 return replaceOperand(I
, 1, Rem
);
496 if (Instruction
*Logic
= foldShiftOfShiftedBinOp(I
, Builder
))
499 if (match(Op1
, m_Or(m_Value(), m_SpecificInt(BitWidth
- 1))))
500 return replaceOperand(I
, 1, ConstantInt::get(Ty
, BitWidth
- 1));
505 /// Return true if we can simplify two logical (either left or right) shifts
506 /// that have constant shift amounts: OuterShift (InnerShift X, C1), C2.
507 static bool canEvaluateShiftedShift(unsigned OuterShAmt
, bool IsOuterShl
,
508 Instruction
*InnerShift
,
509 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
510 assert(InnerShift
->isLogicalShift() && "Unexpected instruction type");
512 // We need constant scalar or constant splat shifts.
513 const APInt
*InnerShiftConst
;
514 if (!match(InnerShift
->getOperand(1), m_APInt(InnerShiftConst
)))
517 // Two logical shifts in the same direction:
518 // shl (shl X, C1), C2 --> shl X, C1 + C2
519 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
520 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
521 if (IsInnerShl
== IsOuterShl
)
524 // Equal shift amounts in opposite directions become bitwise 'and':
525 // lshr (shl X, C), C --> and X, C'
526 // shl (lshr X, C), C --> and X, C'
527 if (*InnerShiftConst
== OuterShAmt
)
530 // If the 2nd shift is bigger than the 1st, we can fold:
531 // lshr (shl X, C1), C2 --> and (shl X, C1 - C2), C3
532 // shl (lshr X, C1), C2 --> and (lshr X, C1 - C2), C3
533 // but it isn't profitable unless we know the and'd out bits are already zero.
534 // Also, check that the inner shift is valid (less than the type width) or
535 // we'll crash trying to produce the bit mask for the 'and'.
536 unsigned TypeWidth
= InnerShift
->getType()->getScalarSizeInBits();
537 if (InnerShiftConst
->ugt(OuterShAmt
) && InnerShiftConst
->ult(TypeWidth
)) {
538 unsigned InnerShAmt
= InnerShiftConst
->getZExtValue();
540 IsInnerShl
? TypeWidth
- InnerShAmt
: InnerShAmt
- OuterShAmt
;
541 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, OuterShAmt
) << MaskShift
;
542 if (IC
.MaskedValueIsZero(InnerShift
->getOperand(0), Mask
, 0, CxtI
))
549 /// See if we can compute the specified value, but shifted logically to the left
550 /// or right by some number of bits. This should return true if the expression
551 /// can be computed for the same cost as the current expression tree. This is
552 /// used to eliminate extraneous shifting from things like:
553 /// %C = shl i128 %A, 64
554 /// %D = shl i128 %B, 96
555 /// %E = or i128 %C, %D
556 /// %F = lshr i128 %E, 64
557 /// where the client will ask if E can be computed shifted right by 64-bits. If
558 /// this succeeds, getShiftedValue() will be called to produce the value.
559 static bool canEvaluateShifted(Value
*V
, unsigned NumBits
, bool IsLeftShift
,
560 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
561 // We can always evaluate constants shifted.
562 if (isa
<Constant
>(V
))
565 Instruction
*I
= dyn_cast
<Instruction
>(V
);
566 if (!I
) return false;
568 // We can't mutate something that has multiple uses: doing so would
569 // require duplicating the instruction in general, which isn't profitable.
570 if (!I
->hasOneUse()) return false;
572 switch (I
->getOpcode()) {
573 default: return false;
574 case Instruction::And
:
575 case Instruction::Or
:
576 case Instruction::Xor
:
577 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
578 return canEvaluateShifted(I
->getOperand(0), NumBits
, IsLeftShift
, IC
, I
) &&
579 canEvaluateShifted(I
->getOperand(1), NumBits
, IsLeftShift
, IC
, I
);
581 case Instruction::Shl
:
582 case Instruction::LShr
:
583 return canEvaluateShiftedShift(NumBits
, IsLeftShift
, I
, IC
, CxtI
);
585 case Instruction::Select
: {
586 SelectInst
*SI
= cast
<SelectInst
>(I
);
587 Value
*TrueVal
= SI
->getTrueValue();
588 Value
*FalseVal
= SI
->getFalseValue();
589 return canEvaluateShifted(TrueVal
, NumBits
, IsLeftShift
, IC
, SI
) &&
590 canEvaluateShifted(FalseVal
, NumBits
, IsLeftShift
, IC
, SI
);
592 case Instruction::PHI
: {
593 // We can change a phi if we can change all operands. Note that we never
594 // get into trouble with cyclic PHIs here because we only consider
595 // instructions with a single use.
596 PHINode
*PN
= cast
<PHINode
>(I
);
597 for (Value
*IncValue
: PN
->incoming_values())
598 if (!canEvaluateShifted(IncValue
, NumBits
, IsLeftShift
, IC
, PN
))
602 case Instruction::Mul
: {
603 const APInt
*MulConst
;
604 // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`)
605 return !IsLeftShift
&& match(I
->getOperand(1), m_APInt(MulConst
)) &&
606 MulConst
->isNegatedPowerOf2() && MulConst
->countr_zero() == NumBits
;
611 /// Fold OuterShift (InnerShift X, C1), C2.
612 /// See canEvaluateShiftedShift() for the constraints on these instructions.
613 static Value
*foldShiftedShift(BinaryOperator
*InnerShift
, unsigned OuterShAmt
,
615 InstCombiner::BuilderTy
&Builder
) {
616 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
617 Type
*ShType
= InnerShift
->getType();
618 unsigned TypeWidth
= ShType
->getScalarSizeInBits();
620 // We only accept shifts-by-a-constant in canEvaluateShifted().
622 match(InnerShift
->getOperand(1), m_APInt(C1
));
623 unsigned InnerShAmt
= C1
->getZExtValue();
625 // Change the shift amount and clear the appropriate IR flags.
626 auto NewInnerShift
= [&](unsigned ShAmt
) {
627 InnerShift
->setOperand(1, ConstantInt::get(ShType
, ShAmt
));
629 InnerShift
->setHasNoUnsignedWrap(false);
630 InnerShift
->setHasNoSignedWrap(false);
632 InnerShift
->setIsExact(false);
637 // Two logical shifts in the same direction:
638 // shl (shl X, C1), C2 --> shl X, C1 + C2
639 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
640 if (IsInnerShl
== IsOuterShl
) {
641 // If this is an oversized composite shift, then unsigned shifts get 0.
642 if (InnerShAmt
+ OuterShAmt
>= TypeWidth
)
643 return Constant::getNullValue(ShType
);
645 return NewInnerShift(InnerShAmt
+ OuterShAmt
);
648 // Equal shift amounts in opposite directions become bitwise 'and':
649 // lshr (shl X, C), C --> and X, C'
650 // shl (lshr X, C), C --> and X, C'
651 if (InnerShAmt
== OuterShAmt
) {
652 APInt Mask
= IsInnerShl
653 ? APInt::getLowBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
)
654 : APInt::getHighBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
);
655 Value
*And
= Builder
.CreateAnd(InnerShift
->getOperand(0),
656 ConstantInt::get(ShType
, Mask
));
657 if (auto *AndI
= dyn_cast
<Instruction
>(And
)) {
658 AndI
->moveBefore(InnerShift
);
659 AndI
->takeName(InnerShift
);
664 assert(InnerShAmt
> OuterShAmt
&&
665 "Unexpected opposite direction logical shift pair");
667 // In general, we would need an 'and' for this transform, but
668 // canEvaluateShiftedShift() guarantees that the masked-off bits are not used.
669 // lshr (shl X, C1), C2 --> shl X, C1 - C2
670 // shl (lshr X, C1), C2 --> lshr X, C1 - C2
671 return NewInnerShift(InnerShAmt
- OuterShAmt
);
674 /// When canEvaluateShifted() returns true for an expression, this function
675 /// inserts the new computation that produces the shifted value.
676 static Value
*getShiftedValue(Value
*V
, unsigned NumBits
, bool isLeftShift
,
677 InstCombinerImpl
&IC
, const DataLayout
&DL
) {
678 // We can always evaluate constants shifted.
679 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
681 return IC
.Builder
.CreateShl(C
, NumBits
);
683 return IC
.Builder
.CreateLShr(C
, NumBits
);
686 Instruction
*I
= cast
<Instruction
>(V
);
689 switch (I
->getOpcode()) {
690 default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
691 case Instruction::And
:
692 case Instruction::Or
:
693 case Instruction::Xor
:
694 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
696 0, getShiftedValue(I
->getOperand(0), NumBits
, isLeftShift
, IC
, DL
));
698 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
701 case Instruction::Shl
:
702 case Instruction::LShr
:
703 return foldShiftedShift(cast
<BinaryOperator
>(I
), NumBits
, isLeftShift
,
706 case Instruction::Select
:
708 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
710 2, getShiftedValue(I
->getOperand(2), NumBits
, isLeftShift
, IC
, DL
));
712 case Instruction::PHI
: {
713 // We can change a phi if we can change all operands. Note that we never
714 // get into trouble with cyclic PHIs here because we only consider
715 // instructions with a single use.
716 PHINode
*PN
= cast
<PHINode
>(I
);
717 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
718 PN
->setIncomingValue(i
, getShiftedValue(PN
->getIncomingValue(i
), NumBits
,
719 isLeftShift
, IC
, DL
));
722 case Instruction::Mul
: {
723 assert(!isLeftShift
&& "Unexpected shift direction!");
724 auto *Neg
= BinaryOperator::CreateNeg(I
->getOperand(0));
725 IC
.InsertNewInstWith(Neg
, I
->getIterator());
726 unsigned TypeWidth
= I
->getType()->getScalarSizeInBits();
727 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, TypeWidth
- NumBits
);
728 auto *And
= BinaryOperator::CreateAnd(Neg
,
729 ConstantInt::get(I
->getType(), Mask
));
731 return IC
.InsertNewInstWith(And
, I
->getIterator());
736 // If this is a bitwise operator or add with a constant RHS we might be able
737 // to pull it through a shift.
738 static bool canShiftBinOpWithConstantRHS(BinaryOperator
&Shift
,
739 BinaryOperator
*BO
) {
740 switch (BO
->getOpcode()) {
742 return false; // Do not perform transform!
743 case Instruction::Add
:
744 return Shift
.getOpcode() == Instruction::Shl
;
745 case Instruction::Or
:
746 case Instruction::And
:
748 case Instruction::Xor
:
749 // Do not change a 'not' of logical shift because that would create a normal
750 // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen.
751 return !(Shift
.isLogicalShift() && match(BO
, m_Not(m_Value())));
755 Instruction
*InstCombinerImpl::FoldShiftByConstant(Value
*Op0
, Constant
*C1
,
757 // (C2 << X) << C1 --> (C2 << C1) << X
758 // (C2 >> X) >> C1 --> (C2 >> C1) >> X
761 if (match(Op0
, m_BinOp(I
.getOpcode(), m_Constant(C2
), m_Value(X
))))
762 return BinaryOperator::Create(
763 I
.getOpcode(), Builder
.CreateBinOp(I
.getOpcode(), C2
, C1
), X
);
765 bool IsLeftShift
= I
.getOpcode() == Instruction::Shl
;
766 Type
*Ty
= I
.getType();
767 unsigned TypeBits
= Ty
->getScalarSizeInBits();
769 // (X / +DivC) >> (Width - 1) --> ext (X <= -DivC)
770 // (X / -DivC) >> (Width - 1) --> ext (X >= +DivC)
772 if (!IsLeftShift
&& match(C1
, m_SpecificIntAllowUndef(TypeBits
- 1)) &&
773 match(Op0
, m_SDiv(m_Value(X
), m_APInt(DivC
))) && !DivC
->isZero() &&
774 !DivC
->isMinSignedValue()) {
775 Constant
*NegDivC
= ConstantInt::get(Ty
, -(*DivC
));
776 ICmpInst::Predicate Pred
=
777 DivC
->isNegative() ? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_SLE
;
778 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, NegDivC
);
779 auto ExtOpcode
= (I
.getOpcode() == Instruction::AShr
) ? Instruction::SExt
781 return CastInst::Create(ExtOpcode
, Cmp
, Ty
);
785 if (!match(C1
, m_APInt(Op1C
)))
788 assert(!Op1C
->uge(TypeBits
) &&
789 "Shift over the type width should have been removed already");
791 // See if we can propagate this shift into the input, this covers the trivial
792 // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
793 if (I
.getOpcode() != Instruction::AShr
&&
794 canEvaluateShifted(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, &I
)) {
796 dbgs() << "ICE: GetShiftedValue propagating shift through expression"
797 " to eliminate shift:\n IN: "
798 << *Op0
<< "\n SH: " << I
<< "\n");
800 return replaceInstUsesWith(
801 I
, getShiftedValue(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, DL
));
804 if (Instruction
*FoldedShift
= foldBinOpIntoSelectOrPhi(I
))
807 if (!Op0
->hasOneUse())
810 if (auto *Op0BO
= dyn_cast
<BinaryOperator
>(Op0
)) {
811 // If the operand is a bitwise operator with a constant RHS, and the
812 // shift is the only use, we can pull it out of the shift.
814 if (match(Op0BO
->getOperand(1), m_APInt(Op0C
))) {
815 if (canShiftBinOpWithConstantRHS(I
, Op0BO
)) {
817 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(1), C1
);
820 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(0), C1
);
821 NewShift
->takeName(Op0BO
);
823 return BinaryOperator::Create(Op0BO
->getOpcode(), NewShift
, NewRHS
);
828 // If we have a select that conditionally executes some binary operator,
829 // see if we can pull it the select and operator through the shift.
831 // For example, turning:
832 // shl (select C, (add X, C1), X), C2
835 // select C, (add Y, C1 << C2), Y
839 if (match(Op0
, m_Select(m_Value(Cond
), m_OneUse(m_BinOp(TBO
)),
840 m_Value(FalseVal
)))) {
842 if (!isa
<Constant
>(FalseVal
) && TBO
->getOperand(0) == FalseVal
&&
843 match(TBO
->getOperand(1), m_APInt(C
)) &&
844 canShiftBinOpWithConstantRHS(I
, TBO
)) {
846 Builder
.CreateBinOp(I
.getOpcode(), TBO
->getOperand(1), C1
);
848 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), FalseVal
, C1
);
849 Value
*NewOp
= Builder
.CreateBinOp(TBO
->getOpcode(), NewShift
, NewRHS
);
850 return SelectInst::Create(Cond
, NewOp
, NewShift
);
856 if (match(Op0
, m_Select(m_Value(Cond
), m_Value(TrueVal
),
857 m_OneUse(m_BinOp(FBO
))))) {
859 if (!isa
<Constant
>(TrueVal
) && FBO
->getOperand(0) == TrueVal
&&
860 match(FBO
->getOperand(1), m_APInt(C
)) &&
861 canShiftBinOpWithConstantRHS(I
, FBO
)) {
863 Builder
.CreateBinOp(I
.getOpcode(), FBO
->getOperand(1), C1
);
865 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), TrueVal
, C1
);
866 Value
*NewOp
= Builder
.CreateBinOp(FBO
->getOpcode(), NewShift
, NewRHS
);
867 return SelectInst::Create(Cond
, NewShift
, NewOp
);
875 // (lshr (add (zext X), (zext Y)), K)
876 // -> (icmp ult (add X, Y), X)
878 // - The add's operands are zexts from a K-bits integer to a bigger type.
879 // - The add is only used by the shr, or by iK (or narrower) truncates.
880 // - The lshr type has more than 2 bits (other types are boolean math).
883 // - The resulting add cannot have nuw/nsw, else on overflow we get a
884 // poison value and the transform isn't legal anymore.
885 Instruction
*InstCombinerImpl::foldLShrOverflowBit(BinaryOperator
&I
) {
886 assert(I
.getOpcode() == Instruction::LShr
);
888 Value
*Add
= I
.getOperand(0);
889 Value
*ShiftAmt
= I
.getOperand(1);
890 Type
*Ty
= I
.getType();
892 if (Ty
->getScalarSizeInBits() < 3)
895 const APInt
*ShAmtAPInt
= nullptr;
896 Value
*X
= nullptr, *Y
= nullptr;
897 if (!match(ShiftAmt
, m_APInt(ShAmtAPInt
)) ||
899 m_Add(m_OneUse(m_ZExt(m_Value(X
))), m_OneUse(m_ZExt(m_Value(Y
))))))
902 const unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
906 // X/Y are zexts from `ShAmt`-sized ints.
907 if (X
->getType()->getScalarSizeInBits() != ShAmt
||
908 Y
->getType()->getScalarSizeInBits() != ShAmt
)
911 // Make sure that `Add` is only used by `I` and `ShAmt`-truncates.
912 if (!Add
->hasOneUse()) {
913 for (User
*U
: Add
->users()) {
917 TruncInst
*Trunc
= dyn_cast
<TruncInst
>(U
);
918 if (!Trunc
|| Trunc
->getType()->getScalarSizeInBits() > ShAmt
)
923 // Insert at Add so that the newly created `NarrowAdd` will dominate it's
924 // users (i.e. `Add`'s users).
925 Instruction
*AddInst
= cast
<Instruction
>(Add
);
926 Builder
.SetInsertPoint(AddInst
);
928 Value
*NarrowAdd
= Builder
.CreateAdd(X
, Y
, "add.narrowed");
930 Builder
.CreateICmpULT(NarrowAdd
, X
, "add.narrowed.overflow");
932 // Replace the uses of the original add with a zext of the
933 // NarrowAdd's result. Note that all users at this stage are known to
934 // be ShAmt-sized truncs, or the lshr itself.
935 if (!Add
->hasOneUse()) {
936 replaceInstUsesWith(*AddInst
, Builder
.CreateZExt(NarrowAdd
, Ty
));
937 eraseInstFromFunction(*AddInst
);
940 // Replace the LShr with a zext of the overflow check.
941 return new ZExtInst(Overflow
, Ty
);
944 // Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits.
945 static bool setShiftFlags(BinaryOperator
&I
, const SimplifyQuery
&Q
) {
946 assert(I
.isShift() && "Expected a shift as input");
947 // We already have all the flags.
948 if (I
.getOpcode() == Instruction::Shl
) {
949 if (I
.hasNoUnsignedWrap() && I
.hasNoSignedWrap())
956 // Compute what we know about shift count.
958 computeKnownBits(I
.getOperand(1), Q
.DL
, /*Depth*/ 0, Q
.AC
, Q
.CxtI
, Q
.DT
);
959 // If we know nothing about shift count or its a poison shift, we won't be
960 // able to prove anything so return before computing shift amount.
961 if (KnownCnt
.isUnknown())
963 unsigned BitWidth
= KnownCnt
.getBitWidth();
964 APInt MaxCnt
= KnownCnt
.getMaxValue();
965 if (MaxCnt
.uge(BitWidth
))
969 computeKnownBits(I
.getOperand(0), Q
.DL
, /*Depth*/ 0, Q
.AC
, Q
.CxtI
, Q
.DT
);
970 bool Changed
= false;
972 if (I
.getOpcode() == Instruction::Shl
) {
973 // If we have as many leading zeros than maximum shift cnt we have nuw.
974 if (!I
.hasNoUnsignedWrap() && MaxCnt
.ule(KnownAmt
.countMinLeadingZeros())) {
975 I
.setHasNoUnsignedWrap();
978 // If we have more sign bits than maximum shift cnt we have nsw.
979 if (!I
.hasNoSignedWrap()) {
980 if (MaxCnt
.ult(KnownAmt
.countMinSignBits()) ||
981 MaxCnt
.ult(ComputeNumSignBits(I
.getOperand(0), Q
.DL
, /*Depth*/ 0,
982 Q
.AC
, Q
.CxtI
, Q
.DT
))) {
983 I
.setHasNoSignedWrap();
990 // If we have at least as many trailing zeros as maximum count then we have
992 Changed
= MaxCnt
.ule(KnownAmt
.countMinTrailingZeros());
993 I
.setIsExact(Changed
);
998 Instruction
*InstCombinerImpl::visitShl(BinaryOperator
&I
) {
999 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1001 if (Value
*V
= simplifyShlInst(I
.getOperand(0), I
.getOperand(1),
1002 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(), Q
))
1003 return replaceInstUsesWith(I
, V
);
1005 if (Instruction
*X
= foldVectorBinop(I
))
1008 if (Instruction
*V
= commonShiftTransforms(I
))
1011 if (Instruction
*V
= dropRedundantMaskingOfLeftShiftInput(&I
, Q
, Builder
))
1014 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1015 Type
*Ty
= I
.getType();
1016 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1019 if (match(Op1
, m_APInt(C
))) {
1020 unsigned ShAmtC
= C
->getZExtValue();
1022 // shl (zext X), C --> zext (shl X, C)
1023 // This is only valid if X would have zeros shifted out.
1025 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
1026 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1027 if (ShAmtC
< SrcWidth
&&
1028 MaskedValueIsZero(X
, APInt::getHighBitsSet(SrcWidth
, ShAmtC
), 0, &I
))
1029 return new ZExtInst(Builder
.CreateShl(X
, ShAmtC
), Ty
);
1032 // (X >> C) << C --> X & (-1 << C)
1033 if (match(Op0
, m_Shr(m_Value(X
), m_Specific(Op1
)))) {
1034 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1035 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1039 if (match(Op0
, m_Exact(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1040 C1
->ult(BitWidth
)) {
1041 unsigned ShrAmt
= C1
->getZExtValue();
1042 if (ShrAmt
< ShAmtC
) {
1043 // If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
1044 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1045 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1046 NewShl
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1047 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1050 if (ShrAmt
> ShAmtC
) {
1051 // If C1 > C: (X >>?exact C1) << C --> X >>?exact (C1 - C)
1052 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1053 auto *NewShr
= BinaryOperator::Create(
1054 cast
<BinaryOperator
>(Op0
)->getOpcode(), X
, ShiftDiff
);
1055 NewShr
->setIsExact(true);
1060 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1061 C1
->ult(BitWidth
)) {
1062 unsigned ShrAmt
= C1
->getZExtValue();
1063 if (ShrAmt
< ShAmtC
) {
1064 // If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
1065 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1066 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1067 NewShl
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
1068 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1069 Builder
.Insert(NewShl
);
1070 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1071 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1073 if (ShrAmt
> ShAmtC
) {
1074 // If C1 > C: (X >>? C1) << C --> (X >>? (C1 - C)) & (-1 << C)
1075 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1076 auto *OldShr
= cast
<BinaryOperator
>(Op0
);
1078 BinaryOperator::Create(OldShr
->getOpcode(), X
, ShiftDiff
);
1079 NewShr
->setIsExact(OldShr
->isExact());
1080 Builder
.Insert(NewShr
);
1081 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1082 return BinaryOperator::CreateAnd(NewShr
, ConstantInt::get(Ty
, Mask
));
1086 // Similar to above, but look through an intermediate trunc instruction.
1087 BinaryOperator
*Shr
;
1088 if (match(Op0
, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr
))))) &&
1089 match(Shr
, m_Shr(m_Value(X
), m_APInt(C1
)))) {
1090 // The larger shift direction survives through the transform.
1091 unsigned ShrAmtC
= C1
->getZExtValue();
1092 unsigned ShDiff
= ShrAmtC
> ShAmtC
? ShrAmtC
- ShAmtC
: ShAmtC
- ShrAmtC
;
1093 Constant
*ShiftDiffC
= ConstantInt::get(X
->getType(), ShDiff
);
1094 auto ShiftOpc
= ShrAmtC
> ShAmtC
? Shr
->getOpcode() : Instruction::Shl
;
1097 // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
1099 // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
1100 Value
*NewShift
= Builder
.CreateBinOp(ShiftOpc
, X
, ShiftDiffC
, "sh.diff");
1101 Value
*Trunc
= Builder
.CreateTrunc(NewShift
, Ty
, "tr.sh.diff");
1102 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1103 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, Mask
));
1106 if (match(Op0
, m_Shl(m_Value(X
), m_APInt(C1
))) && C1
->ult(BitWidth
)) {
1107 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1108 // Oversized shifts are simplified to zero in InstSimplify.
1109 if (AmtSum
< BitWidth
)
1110 // (X << C1) << C2 --> X << (C1 + C2)
1111 return BinaryOperator::CreateShl(X
, ConstantInt::get(Ty
, AmtSum
));
1114 // If we have an opposite shift by the same amount, we may be able to
1115 // reorder binops and shifts to eliminate math/logic.
1116 auto isSuitableBinOpcode
= [](Instruction::BinaryOps BinOpcode
) {
1117 switch (BinOpcode
) {
1120 case Instruction::Add
:
1121 case Instruction::And
:
1122 case Instruction::Or
:
1123 case Instruction::Xor
:
1124 case Instruction::Sub
:
1125 // NOTE: Sub is not commutable and the tranforms below may not be valid
1126 // when the shift-right is operand 1 (RHS) of the sub.
1130 BinaryOperator
*Op0BO
;
1131 if (match(Op0
, m_OneUse(m_BinOp(Op0BO
))) &&
1132 isSuitableBinOpcode(Op0BO
->getOpcode())) {
1133 // Commute so shift-right is on LHS of the binop.
1134 // (Y bop (X >> C)) << C -> ((X >> C) bop Y) << C
1135 // (Y bop ((X >> C) & CC)) << C -> (((X >> C) & CC) bop Y) << C
1136 Value
*Shr
= Op0BO
->getOperand(0);
1137 Value
*Y
= Op0BO
->getOperand(1);
1140 if (Op0BO
->isCommutative() && Y
->hasOneUse() &&
1141 (match(Y
, m_Shr(m_Value(), m_Specific(Op1
))) ||
1142 match(Y
, m_And(m_OneUse(m_Shr(m_Value(), m_Specific(Op1
))),
1146 // ((X >> C) bop Y) << C -> (X bop (Y << C)) & (~0 << C)
1147 if (match(Shr
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1149 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1152 Builder
.CreateBinOp(Op0BO
->getOpcode(), X
, YS
, Shr
->getName());
1153 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1154 APInt Bits
= APInt::getHighBitsSet(BitWidth
, BitWidth
- Op1Val
);
1155 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1156 return BinaryOperator::CreateAnd(B
, Mask
);
1159 // (((X >> C) & CC) bop Y) << C -> (X & (CC << C)) bop (Y << C)
1161 m_OneUse(m_And(m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))),
1164 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1166 Value
*M
= Builder
.CreateAnd(X
, ConstantInt::get(Ty
, CC
->shl(*C
)),
1167 X
->getName() + ".mask");
1168 return BinaryOperator::Create(Op0BO
->getOpcode(), M
, YS
);
1172 // (C1 - X) << C --> (C1 << C) - (X << C)
1173 if (match(Op0
, m_OneUse(m_Sub(m_APInt(C1
), m_Value(X
))))) {
1174 Constant
*NewLHS
= ConstantInt::get(Ty
, C1
->shl(*C
));
1175 Value
*NewShift
= Builder
.CreateShl(X
, Op1
);
1176 return BinaryOperator::CreateSub(NewLHS
, NewShift
);
1180 if (setShiftFlags(I
, Q
))
1183 // Transform (x >> y) << y to x & (-1 << y)
1184 // Valid for any type of right-shift.
1186 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1187 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1188 Value
*Mask
= Builder
.CreateShl(AllOnes
, Op1
);
1189 return BinaryOperator::CreateAnd(Mask
, X
);
1193 if (match(Op1
, m_Constant(C1
))) {
1196 // (X * C2) << C1 --> X * (C2 << C1)
1197 if (match(Op0
, m_Mul(m_Value(X
), m_Constant(C2
))))
1198 return BinaryOperator::CreateMul(X
, ConstantExpr::getShl(C2
, C1
));
1200 // shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
1201 if (match(Op0
, m_ZExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1202 auto *NewC
= ConstantExpr::getShl(ConstantInt::get(Ty
, 1), C1
);
1203 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1207 if (match(Op0
, m_One())) {
1208 // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
1209 if (match(Op1
, m_Sub(m_SpecificInt(BitWidth
- 1), m_Value(X
))))
1210 return BinaryOperator::CreateLShr(
1211 ConstantInt::get(Ty
, APInt::getSignMask(BitWidth
)), X
);
1213 // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
1214 // 1 << (cttz X) --> -X & X
1216 m_OneUse(m_Intrinsic
<Intrinsic::cttz
>(m_Value(X
), m_Value())))) {
1217 Value
*NegX
= Builder
.CreateNeg(X
, "neg");
1218 return BinaryOperator::CreateAnd(NegX
, X
);
1221 // The only way to shift out the 1 is with an over-shift, so that would
1222 // be poison with or without "nuw". Undef is excluded because (undef << X)
1223 // is not undef (it is zero).
1224 Constant
*ConstantOne
= cast
<Constant
>(Op0
);
1225 if (!I
.hasNoUnsignedWrap() && !ConstantOne
->containsUndefElement()) {
1226 I
.setHasNoUnsignedWrap();
1234 Instruction
*InstCombinerImpl::visitLShr(BinaryOperator
&I
) {
1235 if (Value
*V
= simplifyLShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1236 SQ
.getWithInstruction(&I
)))
1237 return replaceInstUsesWith(I
, V
);
1239 if (Instruction
*X
= foldVectorBinop(I
))
1242 if (Instruction
*R
= commonShiftTransforms(I
))
1245 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1246 Type
*Ty
= I
.getType();
1249 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1251 // (iN (~X) u>> (N - 1)) --> zext (X > -1)
1252 if (match(Op0
, m_OneUse(m_Not(m_Value(X
)))) &&
1253 match(Op1
, m_SpecificIntAllowUndef(BitWidth
- 1)))
1254 return new ZExtInst(Builder
.CreateIsNotNeg(X
, "isnotneg"), Ty
);
1256 if (match(Op1
, m_APInt(C
))) {
1257 unsigned ShAmtC
= C
->getZExtValue();
1258 auto *II
= dyn_cast
<IntrinsicInst
>(Op0
);
1259 if (II
&& isPowerOf2_32(BitWidth
) && Log2_32(BitWidth
) == ShAmtC
&&
1260 (II
->getIntrinsicID() == Intrinsic::ctlz
||
1261 II
->getIntrinsicID() == Intrinsic::cttz
||
1262 II
->getIntrinsicID() == Intrinsic::ctpop
)) {
1263 // ctlz.i32(x)>>5 --> zext(x == 0)
1264 // cttz.i32(x)>>5 --> zext(x == 0)
1265 // ctpop.i32(x)>>5 --> zext(x == -1)
1266 bool IsPop
= II
->getIntrinsicID() == Intrinsic::ctpop
;
1267 Constant
*RHS
= ConstantInt::getSigned(Ty
, IsPop
? -1 : 0);
1268 Value
*Cmp
= Builder
.CreateICmpEQ(II
->getArgOperand(0), RHS
);
1269 return new ZExtInst(Cmp
, Ty
);
1274 if (match(Op0
, m_Shl(m_Value(X
), m_APInt(C1
))) && C1
->ult(BitWidth
)) {
1275 if (C1
->ult(ShAmtC
)) {
1276 unsigned ShlAmtC
= C1
->getZExtValue();
1277 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShlAmtC
);
1278 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1279 // (X <<nuw C1) >>u C --> X >>u (C - C1)
1280 auto *NewLShr
= BinaryOperator::CreateLShr(X
, ShiftDiff
);
1281 NewLShr
->setIsExact(I
.isExact());
1284 if (Op0
->hasOneUse()) {
1285 // (X << C1) >>u C --> (X >>u (C - C1)) & (-1 >> C)
1286 Value
*NewLShr
= Builder
.CreateLShr(X
, ShiftDiff
, "", I
.isExact());
1287 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1288 return BinaryOperator::CreateAnd(NewLShr
, ConstantInt::get(Ty
, Mask
));
1290 } else if (C1
->ugt(ShAmtC
)) {
1291 unsigned ShlAmtC
= C1
->getZExtValue();
1292 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmtC
- ShAmtC
);
1293 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1294 // (X <<nuw C1) >>u C --> X <<nuw (C1 - C)
1295 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1296 NewShl
->setHasNoUnsignedWrap(true);
1299 if (Op0
->hasOneUse()) {
1300 // (X << C1) >>u C --> X << (C1 - C) & (-1 >> C)
1301 Value
*NewShl
= Builder
.CreateShl(X
, ShiftDiff
);
1302 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1303 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1306 assert(*C1
== ShAmtC
);
1307 // (X << C) >>u C --> X & (-1 >>u C)
1308 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1309 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1313 // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
1314 // TODO: Consolidate with the more general transform that starts from shl
1315 // (the shifts are in the opposite order).
1318 m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))),
1320 Value
*NewLshr
= Builder
.CreateLShr(Y
, Op1
);
1321 Value
*NewAdd
= Builder
.CreateAdd(NewLshr
, X
);
1322 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1323 APInt Bits
= APInt::getLowBitsSet(BitWidth
, BitWidth
- Op1Val
);
1324 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1325 return BinaryOperator::CreateAnd(NewAdd
, Mask
);
1328 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) &&
1329 (!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType()))) {
1330 assert(ShAmtC
< X
->getType()->getScalarSizeInBits() &&
1331 "Big shift not simplified to zero?");
1332 // lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
1333 Value
*NewLShr
= Builder
.CreateLShr(X
, ShAmtC
);
1334 return new ZExtInst(NewLShr
, Ty
);
1337 if (match(Op0
, m_SExt(m_Value(X
)))) {
1338 unsigned SrcTyBitWidth
= X
->getType()->getScalarSizeInBits();
1339 // lshr (sext i1 X to iN), C --> select (X, -1 >> C, 0)
1340 if (SrcTyBitWidth
== 1) {
1341 auto *NewC
= ConstantInt::get(
1342 Ty
, APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1343 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1346 if ((!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType())) &&
1348 // Are we moving the sign bit to the low bit and widening with high
1349 // zeros? lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
1350 if (ShAmtC
== BitWidth
- 1) {
1351 Value
*NewLShr
= Builder
.CreateLShr(X
, SrcTyBitWidth
- 1);
1352 return new ZExtInst(NewLShr
, Ty
);
1355 // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
1356 if (ShAmtC
== BitWidth
- SrcTyBitWidth
) {
1357 // The new shift amount can't be more than the narrow source type.
1358 unsigned NewShAmt
= std::min(ShAmtC
, SrcTyBitWidth
- 1);
1359 Value
*AShr
= Builder
.CreateAShr(X
, NewShAmt
);
1360 return new ZExtInst(AShr
, Ty
);
1365 if (ShAmtC
== BitWidth
- 1) {
1366 // lshr i32 or(X,-X), 31 --> zext (X != 0)
1367 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1368 return new ZExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1370 // lshr i32 (X -nsw Y), 31 --> zext (X < Y)
1371 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1372 return new ZExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1374 // Check if a number is negative and odd:
1375 // lshr i32 (srem X, 2), 31 --> and (X >> 31), X
1376 if (match(Op0
, m_OneUse(m_SRem(m_Value(X
), m_SpecificInt(2))))) {
1377 Value
*Signbit
= Builder
.CreateLShr(X
, ShAmtC
);
1378 return BinaryOperator::CreateAnd(Signbit
, X
);
1382 // (X >>u C1) >>u C --> X >>u (C1 + C)
1383 if (match(Op0
, m_LShr(m_Value(X
), m_APInt(C1
)))) {
1384 // Oversized shifts are simplified to zero in InstSimplify.
1385 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1386 if (AmtSum
< BitWidth
)
1387 return BinaryOperator::CreateLShr(X
, ConstantInt::get(Ty
, AmtSum
));
1390 Instruction
*TruncSrc
;
1391 if (match(Op0
, m_OneUse(m_Trunc(m_Instruction(TruncSrc
)))) &&
1392 match(TruncSrc
, m_LShr(m_Value(X
), m_APInt(C1
)))) {
1393 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1394 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1396 // If the combined shift fits in the source width:
1397 // (trunc (X >>u C1)) >>u C --> and (trunc (X >>u (C1 + C)), MaskC
1399 // If the first shift covers the number of bits truncated, then the
1400 // mask instruction is eliminated (and so the use check is relaxed).
1401 if (AmtSum
< SrcWidth
&&
1402 (TruncSrc
->hasOneUse() || C1
->uge(SrcWidth
- BitWidth
))) {
1403 Value
*SumShift
= Builder
.CreateLShr(X
, AmtSum
, "sum.shift");
1404 Value
*Trunc
= Builder
.CreateTrunc(SumShift
, Ty
, I
.getName());
1406 // If the first shift does not cover the number of bits truncated, then
1407 // we require a mask to get rid of high bits in the result.
1408 APInt MaskC
= APInt::getAllOnes(BitWidth
).lshr(ShAmtC
);
1409 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, MaskC
));
1414 if (match(Op0
, m_NUWMul(m_Value(X
), m_APInt(MulC
)))) {
1415 // Look for a "splat" mul pattern - it replicates bits across each half of
1416 // a value, so a right shift is just a mask of the low bits:
1417 // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1418 // TODO: Generalize to allow more than just half-width shifts?
1419 if (BitWidth
> 2 && ShAmtC
* 2 == BitWidth
&& (*MulC
- 1).isPowerOf2() &&
1420 MulC
->logBase2() == ShAmtC
)
1421 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, *MulC
- 2));
1423 // The one-use check is not strictly necessary, but codegen may not be
1424 // able to invert the transform and perf may suffer with an extra mul
1426 if (Op0
->hasOneUse()) {
1427 APInt NewMulC
= MulC
->lshr(ShAmtC
);
1428 // if c is divisible by (1 << ShAmtC):
1429 // lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC)
1430 if (MulC
->eq(NewMulC
.shl(ShAmtC
))) {
1432 BinaryOperator::CreateNUWMul(X
, ConstantInt::get(Ty
, NewMulC
));
1433 BinaryOperator
*OrigMul
= cast
<BinaryOperator
>(Op0
);
1434 NewMul
->setHasNoSignedWrap(OrigMul
->hasNoSignedWrap());
1440 // Try to narrow bswap.
1441 // In the case where the shift amount equals the bitwidth difference, the
1442 // shift is eliminated.
1443 if (match(Op0
, m_OneUse(m_Intrinsic
<Intrinsic::bswap
>(
1444 m_OneUse(m_ZExt(m_Value(X
))))))) {
1445 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1446 unsigned WidthDiff
= BitWidth
- SrcWidth
;
1447 if (SrcWidth
% 16 == 0) {
1448 Value
*NarrowSwap
= Builder
.CreateUnaryIntrinsic(Intrinsic::bswap
, X
);
1449 if (ShAmtC
>= WidthDiff
) {
1450 // (bswap (zext X)) >> C --> zext (bswap X >> C')
1451 Value
*NewShift
= Builder
.CreateLShr(NarrowSwap
, ShAmtC
- WidthDiff
);
1452 return new ZExtInst(NewShift
, Ty
);
1454 // (bswap (zext X)) >> C --> (zext (bswap X)) << C'
1455 Value
*NewZExt
= Builder
.CreateZExt(NarrowSwap
, Ty
);
1456 Constant
*ShiftDiff
= ConstantInt::get(Ty
, WidthDiff
- ShAmtC
);
1457 return BinaryOperator::CreateShl(NewZExt
, ShiftDiff
);
1462 // Reduce add-carry of bools to logic:
1463 // ((zext BoolX) + (zext BoolY)) >> 1 --> zext (BoolX && BoolY)
1464 Value
*BoolX
, *BoolY
;
1465 if (ShAmtC
== 1 && match(Op0
, m_Add(m_Value(X
), m_Value(Y
))) &&
1466 match(X
, m_ZExt(m_Value(BoolX
))) && match(Y
, m_ZExt(m_Value(BoolY
))) &&
1467 BoolX
->getType()->isIntOrIntVectorTy(1) &&
1468 BoolY
->getType()->isIntOrIntVectorTy(1) &&
1469 (X
->hasOneUse() || Y
->hasOneUse() || Op0
->hasOneUse())) {
1470 Value
*And
= Builder
.CreateAnd(BoolX
, BoolY
);
1471 return new ZExtInst(And
, Ty
);
1475 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1476 if (setShiftFlags(I
, Q
))
1479 // Transform (x << y) >> y to x & (-1 >> y)
1480 if (match(Op0
, m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))))) {
1481 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1482 Value
*Mask
= Builder
.CreateLShr(AllOnes
, Op1
);
1483 return BinaryOperator::CreateAnd(Mask
, X
);
1486 if (Instruction
*Overflow
= foldLShrOverflowBit(I
))
1493 InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract(
1494 BinaryOperator
&OldAShr
) {
1495 assert(OldAShr
.getOpcode() == Instruction::AShr
&&
1496 "Must be called with arithmetic right-shift instruction only.");
1498 // Check that constant C is a splat of the element-wise bitwidth of V.
1499 auto BitWidthSplat
= [](Constant
*C
, Value
*V
) {
1501 C
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
1502 APInt(C
->getType()->getScalarSizeInBits(),
1503 V
->getType()->getScalarSizeInBits())));
1506 // It should look like variable-length sign-extension on the outside:
1507 // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits)
1509 Instruction
*MaybeTrunc
;
1511 if (!match(&OldAShr
,
1512 m_AShr(m_Shl(m_Instruction(MaybeTrunc
),
1513 m_ZExtOrSelf(m_Sub(m_Constant(C1
),
1514 m_ZExtOrSelf(m_Value(NBits
))))),
1515 m_ZExtOrSelf(m_Sub(m_Constant(C2
),
1516 m_ZExtOrSelf(m_Deferred(NBits
)))))) ||
1517 !BitWidthSplat(C1
, &OldAShr
) || !BitWidthSplat(C2
, &OldAShr
))
1520 // There may or may not be a truncation after outer two shifts.
1521 Instruction
*HighBitExtract
;
1522 match(MaybeTrunc
, m_TruncOrSelf(m_Instruction(HighBitExtract
)));
1523 bool HadTrunc
= MaybeTrunc
!= HighBitExtract
;
1525 // And finally, the innermost part of the pattern must be a right-shift.
1526 Value
*X
, *NumLowBitsToSkip
;
1527 if (!match(HighBitExtract
, m_Shr(m_Value(X
), m_Value(NumLowBitsToSkip
))))
1530 // Said right-shift must extract high NBits bits - C0 must be it's bitwidth.
1532 if (!match(NumLowBitsToSkip
,
1534 m_Sub(m_Constant(C0
), m_ZExtOrSelf(m_Specific(NBits
))))) ||
1535 !BitWidthSplat(C0
, HighBitExtract
))
1538 // Since the NBits is identical for all shifts, if the outermost and
1539 // innermost shifts are identical, then outermost shifts are redundant.
1540 // If we had truncation, do keep it though.
1541 if (HighBitExtract
->getOpcode() == OldAShr
.getOpcode())
1542 return replaceInstUsesWith(OldAShr
, MaybeTrunc
);
1544 // Else, if there was a truncation, then we need to ensure that one
1545 // instruction will go away.
1546 if (HadTrunc
&& !match(&OldAShr
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
1549 // Finally, bypass two innermost shifts, and perform the outermost shift on
1550 // the operands of the innermost shift.
1551 Instruction
*NewAShr
=
1552 BinaryOperator::Create(OldAShr
.getOpcode(), X
, NumLowBitsToSkip
);
1553 NewAShr
->copyIRFlags(HighBitExtract
); // We can preserve 'exact'-ness.
1557 Builder
.Insert(NewAShr
);
1558 return TruncInst::CreateTruncOrBitCast(NewAShr
, OldAShr
.getType());
1561 Instruction
*InstCombinerImpl::visitAShr(BinaryOperator
&I
) {
1562 if (Value
*V
= simplifyAShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1563 SQ
.getWithInstruction(&I
)))
1564 return replaceInstUsesWith(I
, V
);
1566 if (Instruction
*X
= foldVectorBinop(I
))
1569 if (Instruction
*R
= commonShiftTransforms(I
))
1572 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1573 Type
*Ty
= I
.getType();
1574 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1575 const APInt
*ShAmtAPInt
;
1576 if (match(Op1
, m_APInt(ShAmtAPInt
)) && ShAmtAPInt
->ult(BitWidth
)) {
1577 unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
1579 // If the shift amount equals the difference in width of the destination
1580 // and source scalar types:
1581 // ashr (shl (zext X), C), C --> sext X
1583 if (match(Op0
, m_Shl(m_ZExt(m_Value(X
)), m_Specific(Op1
))) &&
1584 ShAmt
== BitWidth
- X
->getType()->getScalarSizeInBits())
1585 return new SExtInst(X
, Ty
);
1587 // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However,
1588 // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
1590 if (match(Op0
, m_NSWShl(m_Value(X
), m_APInt(ShOp1
))) &&
1591 ShOp1
->ult(BitWidth
)) {
1592 unsigned ShlAmt
= ShOp1
->getZExtValue();
1593 if (ShlAmt
< ShAmt
) {
1594 // (X <<nsw C1) >>s C2 --> X >>s (C2 - C1)
1595 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmt
- ShlAmt
);
1596 auto *NewAShr
= BinaryOperator::CreateAShr(X
, ShiftDiff
);
1597 NewAShr
->setIsExact(I
.isExact());
1600 if (ShlAmt
> ShAmt
) {
1601 // (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
1602 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmt
- ShAmt
);
1603 auto *NewShl
= BinaryOperator::Create(Instruction::Shl
, X
, ShiftDiff
);
1604 NewShl
->setHasNoSignedWrap(true);
1609 if (match(Op0
, m_AShr(m_Value(X
), m_APInt(ShOp1
))) &&
1610 ShOp1
->ult(BitWidth
)) {
1611 unsigned AmtSum
= ShAmt
+ ShOp1
->getZExtValue();
1612 // Oversized arithmetic shifts replicate the sign bit.
1613 AmtSum
= std::min(AmtSum
, BitWidth
- 1);
1614 // (X >>s C1) >>s C2 --> X >>s (C1 + C2)
1615 return BinaryOperator::CreateAShr(X
, ConstantInt::get(Ty
, AmtSum
));
1618 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
)))) &&
1619 (Ty
->isVectorTy() || shouldChangeType(Ty
, X
->getType()))) {
1620 // ashr (sext X), C --> sext (ashr X, C')
1621 Type
*SrcTy
= X
->getType();
1622 ShAmt
= std::min(ShAmt
, SrcTy
->getScalarSizeInBits() - 1);
1623 Value
*NewSh
= Builder
.CreateAShr(X
, ConstantInt::get(SrcTy
, ShAmt
));
1624 return new SExtInst(NewSh
, Ty
);
1627 if (ShAmt
== BitWidth
- 1) {
1628 // ashr i32 or(X,-X), 31 --> sext (X != 0)
1629 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1630 return new SExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1632 // ashr i32 (X -nsw Y), 31 --> sext (X < Y)
1634 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1635 return new SExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1639 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1640 if (setShiftFlags(I
, Q
))
1643 // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
1644 // as the pattern to splat the lowest bit.
1645 // FIXME: iff X is already masked, we don't need the one-use check.
1647 if (match(Op1
, m_SpecificIntAllowUndef(BitWidth
- 1)) &&
1648 match(Op0
, m_OneUse(m_Shl(m_Value(X
),
1649 m_SpecificIntAllowUndef(BitWidth
- 1))))) {
1650 Constant
*Mask
= ConstantInt::get(Ty
, 1);
1651 // Retain the knowledge about the ignored lanes.
1652 Mask
= Constant::mergeUndefsWith(
1653 Constant::mergeUndefsWith(Mask
, cast
<Constant
>(Op1
)),
1654 cast
<Constant
>(cast
<Instruction
>(Op0
)->getOperand(1)));
1655 X
= Builder
.CreateAnd(X
, Mask
);
1656 return BinaryOperator::CreateNeg(X
);
1659 if (Instruction
*R
= foldVariableSignZeroExtensionOfVariableHighBitExtract(I
))
1662 // See if we can turn a signed shr into an unsigned shr.
1663 if (MaskedValueIsZero(Op0
, APInt::getSignMask(BitWidth
), 0, &I
)) {
1664 Instruction
*Lshr
= BinaryOperator::CreateLShr(Op0
, Op1
);
1665 Lshr
->setIsExact(I
.isExact());
1669 // ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
1670 if (match(Op0
, m_OneUse(m_Not(m_Value(X
))))) {
1671 // Note that we must drop 'exact'-ness of the shift!
1672 // Note that we can't keep undef's in -1 vector constant!
1673 auto *NewAShr
= Builder
.CreateAShr(X
, Op1
, Op0
->getName() + ".not");
1674 return BinaryOperator::CreateNot(NewAShr
);