1 //===- InstCombineShifts.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitShl, visitLShr, and visitAShr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/Analysis/InstructionSimplify.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/PatternMatch.h"
17 #include "llvm/Transforms/InstCombine/InstCombiner.h"
19 using namespace PatternMatch
;
21 #define DEBUG_TYPE "instcombine"
23 bool canTryToConstantAddTwoShiftAmounts(Value
*Sh0
, Value
*ShAmt0
, Value
*Sh1
,
25 // We have two shift amounts from two different shifts. The types of those
26 // shift amounts may not match. If that's the case let's bailout now..
27 if (ShAmt0
->getType() != ShAmt1
->getType())
30 // As input, we have the following pattern:
32 // We want to rewrite that as:
33 // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
34 // While we know that originally (Q+K) would not overflow
35 // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
36 // shift amounts. so it may now overflow in smaller bitwidth.
37 // To ensure that does not happen, we need to ensure that the total maximal
38 // shift amount is still representable in that smaller bit width.
39 unsigned MaximalPossibleTotalShiftAmount
=
40 (Sh0
->getType()->getScalarSizeInBits() - 1) +
41 (Sh1
->getType()->getScalarSizeInBits() - 1);
42 APInt MaximalRepresentableShiftAmount
=
43 APInt::getAllOnes(ShAmt0
->getType()->getScalarSizeInBits());
44 return MaximalRepresentableShiftAmount
.uge(MaximalPossibleTotalShiftAmount
);
48 // (x shiftopcode Q) shiftopcode K
49 // we should rewrite it as
50 // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) and
52 // This is valid for any shift, but they must be identical, and we must be
53 // careful in case we have (zext(Q)+zext(K)) and look past extensions,
54 // (Q+K) must not overflow or else (Q+K) u< bitwidth(x) is bogus.
56 // AnalyzeForSignBitExtraction indicates that we will only analyze whether this
57 // pattern has any 2 right-shifts that sum to 1 less than original bit width.
58 Value
*InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
59 BinaryOperator
*Sh0
, const SimplifyQuery
&SQ
,
60 bool AnalyzeForSignBitExtraction
) {
61 // Look for a shift of some instruction, ignore zext of shift amount if any.
65 m_Shift(m_Instruction(Sh0Op0
), m_ZExtOrSelf(m_Value(ShAmt0
)))))
68 // If there is a truncation between the two shifts, we must make note of it
69 // and look through it. The truncation imposes additional constraints on the
72 Value
*Trunc
= nullptr;
74 m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1
)), m_Value(Trunc
)),
77 // Inner shift: (x shiftopcode ShAmt1)
78 // Like with other shift, ignore zext of shift amount if any.
80 if (!match(Sh1
, m_Shift(m_Value(X
), m_ZExtOrSelf(m_Value(ShAmt1
)))))
83 // Verify that it would be safe to try to add those two shift amounts.
84 if (!canTryToConstantAddTwoShiftAmounts(Sh0
, ShAmt0
, Sh1
, ShAmt1
))
87 // We are only looking for signbit extraction if we have two right shifts.
88 bool HadTwoRightShifts
= match(Sh0
, m_Shr(m_Value(), m_Value())) &&
89 match(Sh1
, m_Shr(m_Value(), m_Value()));
90 // ... and if it's not two right-shifts, we know the answer already.
91 if (AnalyzeForSignBitExtraction
&& !HadTwoRightShifts
)
94 // The shift opcodes must be identical, unless we are just checking whether
95 // this pattern can be interpreted as a sign-bit-extraction.
96 Instruction::BinaryOps ShiftOpcode
= Sh0
->getOpcode();
97 bool IdenticalShOpcodes
= Sh0
->getOpcode() == Sh1
->getOpcode();
98 if (!IdenticalShOpcodes
&& !AnalyzeForSignBitExtraction
)
101 // If we saw truncation, we'll need to produce extra instruction,
102 // and for that one of the operands of the shift must be one-use,
103 // unless of course we don't actually plan to produce any instructions here.
104 if (Trunc
&& !AnalyzeForSignBitExtraction
&&
105 !match(Sh0
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
108 // Can we fold (ShAmt0+ShAmt1) ?
109 auto *NewShAmt
= dyn_cast_or_null
<Constant
>(
110 simplifyAddInst(ShAmt0
, ShAmt1
, /*isNSW=*/false, /*isNUW=*/false,
111 SQ
.getWithInstruction(Sh0
)));
113 return nullptr; // Did not simplify.
114 unsigned NewShAmtBitWidth
= NewShAmt
->getType()->getScalarSizeInBits();
115 unsigned XBitWidth
= X
->getType()->getScalarSizeInBits();
116 // Is the new shift amount smaller than the bit width of inner/new shift?
117 if (!match(NewShAmt
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT
,
118 APInt(NewShAmtBitWidth
, XBitWidth
))))
119 return nullptr; // FIXME: could perform constant-folding.
121 // If there was a truncation, and we have a right-shift, we can only fold if
122 // we are left with the original sign bit. Likewise, if we were just checking
123 // that this is a sighbit extraction, this is the place to check it.
124 // FIXME: zero shift amount is also legal here, but we can't *easily* check
125 // more than one predicate so it's not really worth it.
126 if (HadTwoRightShifts
&& (Trunc
|| AnalyzeForSignBitExtraction
)) {
127 // If it's not a sign bit extraction, then we're done.
129 m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
130 APInt(NewShAmtBitWidth
, XBitWidth
- 1))))
132 // If it is, and that was the question, return the base value.
133 if (AnalyzeForSignBitExtraction
)
137 assert(IdenticalShOpcodes
&& "Should not get here with different shifts.");
139 if (NewShAmt
->getType() != X
->getType()) {
140 NewShAmt
= ConstantFoldCastOperand(Instruction::ZExt
, NewShAmt
,
141 X
->getType(), SQ
.DL
);
146 // All good, we can do this fold.
147 BinaryOperator
*NewShift
= BinaryOperator::Create(ShiftOpcode
, X
, NewShAmt
);
149 // The flags can only be propagated if there wasn't a trunc.
151 // If the pattern did not involve trunc, and both of the original shifts
152 // had the same flag set, preserve the flag.
153 if (ShiftOpcode
== Instruction::BinaryOps::Shl
) {
154 NewShift
->setHasNoUnsignedWrap(Sh0
->hasNoUnsignedWrap() &&
155 Sh1
->hasNoUnsignedWrap());
156 NewShift
->setHasNoSignedWrap(Sh0
->hasNoSignedWrap() &&
157 Sh1
->hasNoSignedWrap());
159 NewShift
->setIsExact(Sh0
->isExact() && Sh1
->isExact());
163 Instruction
*Ret
= NewShift
;
165 Builder
.Insert(NewShift
);
166 Ret
= CastInst::Create(Instruction::Trunc
, NewShift
, Sh0
->getType());
172 // If we have some pattern that leaves only some low bits set, and then performs
173 // left-shift of those bits, if none of the bits that are left after the final
174 // shift are modified by the mask, we can omit the mask.
176 // There are many variants to this pattern:
177 // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
178 // b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
179 // c) (x & (-1 l>> MaskShAmt)) << ShiftShAmt
180 // d) (x & ((-1 << MaskShAmt) l>> MaskShAmt)) << ShiftShAmt
181 // e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
182 // f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
183 // All these patterns can be simplified to just:
186 // a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
187 // c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
189 dropRedundantMaskingOfLeftShiftInput(BinaryOperator
*OuterShift
,
190 const SimplifyQuery
&Q
,
191 InstCombiner::BuilderTy
&Builder
) {
192 assert(OuterShift
->getOpcode() == Instruction::BinaryOps::Shl
&&
193 "The input must be 'shl'!");
195 Value
*Masked
, *ShiftShAmt
;
197 m_Shift(m_Value(Masked
), m_ZExtOrSelf(m_Value(ShiftShAmt
))));
199 // *If* there is a truncation between an outer shift and a possibly-mask,
200 // then said truncation *must* be one-use, else we can't perform the fold.
202 if (match(Masked
, m_CombineAnd(m_Trunc(m_Value(Masked
)), m_Value(Trunc
))) &&
206 Type
*NarrowestTy
= OuterShift
->getType();
207 Type
*WidestTy
= Masked
->getType();
208 bool HadTrunc
= WidestTy
!= NarrowestTy
;
210 // The mask must be computed in a type twice as wide to ensure
211 // that no bits are lost if the sum-of-shifts is wider than the base type.
212 Type
*ExtendedTy
= WidestTy
->getExtendedType();
216 // ((1 << MaskShAmt) - 1)
217 auto MaskA
= m_Add(m_Shl(m_One(), m_Value(MaskShAmt
)), m_AllOnes());
218 // (~(-1 << maskNbits))
219 auto MaskB
= m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)), m_AllOnes());
220 // (-1 l>> MaskShAmt)
221 auto MaskC
= m_LShr(m_AllOnes(), m_Value(MaskShAmt
));
222 // ((-1 << MaskShAmt) l>> MaskShAmt)
224 m_LShr(m_Shl(m_AllOnes(), m_Value(MaskShAmt
)), m_Deferred(MaskShAmt
));
229 if (match(Masked
, m_c_And(m_CombineOr(MaskA
, MaskB
), m_Value(X
)))) {
230 // Peek through an optional zext of the shift amount.
231 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
233 // Verify that it would be safe to try to add those two shift amounts.
234 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
238 // Can we simplify (MaskShAmt+ShiftShAmt) ?
239 auto *SumOfShAmts
= dyn_cast_or_null
<Constant
>(simplifyAddInst(
240 MaskShAmt
, ShiftShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
242 return nullptr; // Did not simplify.
243 // In this pattern SumOfShAmts correlates with the number of low bits
244 // that shall remain in the root value (OuterShift).
246 // An extend of an undef value becomes zero because the high bits are never
247 // completely unknown. Replace the `undef` shift amounts with final
248 // shift bitwidth to ensure that the value remains undef when creating the
249 // subsequent shift op.
250 SumOfShAmts
= Constant::replaceUndefsWith(
251 SumOfShAmts
, ConstantInt::get(SumOfShAmts
->getType()->getScalarType(),
252 ExtendedTy
->getScalarSizeInBits()));
253 auto *ExtendedSumOfShAmts
= ConstantFoldCastOperand(
254 Instruction::ZExt
, SumOfShAmts
, ExtendedTy
, Q
.DL
);
255 if (!ExtendedSumOfShAmts
)
258 // And compute the mask as usual: ~(-1 << (SumOfShAmts))
259 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
260 auto *ExtendedInvertedMask
=
261 ConstantExpr::getShl(ExtendedAllOnes
, ExtendedSumOfShAmts
);
262 NewMask
= ConstantExpr::getNot(ExtendedInvertedMask
);
263 } else if (match(Masked
, m_c_And(m_CombineOr(MaskC
, MaskD
), m_Value(X
))) ||
264 match(Masked
, m_Shr(m_Shl(m_Value(X
), m_Value(MaskShAmt
)),
265 m_Deferred(MaskShAmt
)))) {
266 // Peek through an optional zext of the shift amount.
267 match(MaskShAmt
, m_ZExtOrSelf(m_Value(MaskShAmt
)));
269 // Verify that it would be safe to try to add those two shift amounts.
270 if (!canTryToConstantAddTwoShiftAmounts(OuterShift
, ShiftShAmt
, Masked
,
274 // Can we simplify (ShiftShAmt-MaskShAmt) ?
275 auto *ShAmtsDiff
= dyn_cast_or_null
<Constant
>(simplifySubInst(
276 ShiftShAmt
, MaskShAmt
, /*IsNSW=*/false, /*IsNUW=*/false, Q
));
278 return nullptr; // Did not simplify.
279 // In this pattern ShAmtsDiff correlates with the number of high bits that
280 // shall be unset in the root value (OuterShift).
282 // An extend of an undef value becomes zero because the high bits are never
283 // completely unknown. Replace the `undef` shift amounts with negated
284 // bitwidth of innermost shift to ensure that the value remains undef when
285 // creating the subsequent shift op.
286 unsigned WidestTyBitWidth
= WidestTy
->getScalarSizeInBits();
287 ShAmtsDiff
= Constant::replaceUndefsWith(
288 ShAmtsDiff
, ConstantInt::get(ShAmtsDiff
->getType()->getScalarType(),
290 auto *ExtendedNumHighBitsToClear
= ConstantFoldCastOperand(
292 ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff
->getType(),
297 if (!ExtendedNumHighBitsToClear
)
300 // And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
301 auto *ExtendedAllOnes
= ConstantExpr::getAllOnesValue(ExtendedTy
);
302 NewMask
= ConstantFoldBinaryOpOperands(Instruction::LShr
, ExtendedAllOnes
,
303 ExtendedNumHighBitsToClear
, Q
.DL
);
307 return nullptr; // Don't know anything about this pattern.
309 NewMask
= ConstantExpr::getTrunc(NewMask
, NarrowestTy
);
311 // Does this mask has any unset bits? If not then we can just not apply it.
312 bool NeedMask
= !match(NewMask
, m_AllOnes());
314 // If we need to apply a mask, there are several more restrictions we have.
316 // The old masking instruction must go away.
317 if (!Masked
->hasOneUse())
319 // The original "masking" instruction must not have been`ashr`.
320 if (match(Masked
, m_AShr(m_Value(), m_Value())))
324 // If we need to apply truncation, let's do it first, since we can.
325 // We have already ensured that the old truncation will go away.
327 X
= Builder
.CreateTrunc(X
, NarrowestTy
);
329 // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
330 // We didn't change the Type of this outermost shift, so we can just do it.
331 auto *NewShift
= BinaryOperator::Create(OuterShift
->getOpcode(), X
,
332 OuterShift
->getOperand(1));
336 Builder
.Insert(NewShift
);
337 return BinaryOperator::Create(Instruction::And
, NewShift
, NewMask
);
340 /// If we have a shift-by-constant of a bin op (bitwise logic op or add/sub w/
341 /// shl) that itself has a shift-by-constant operand with identical opcode, we
342 /// may be able to convert that into 2 independent shifts followed by the logic
343 /// op. This eliminates a use of an intermediate value (reduces dependency
345 static Instruction
*foldShiftOfShiftedBinOp(BinaryOperator
&I
,
346 InstCombiner::BuilderTy
&Builder
) {
347 assert(I
.isShift() && "Expected a shift as input");
348 auto *BinInst
= dyn_cast
<BinaryOperator
>(I
.getOperand(0));
350 (!BinInst
->isBitwiseLogicOp() &&
351 BinInst
->getOpcode() != Instruction::Add
&&
352 BinInst
->getOpcode() != Instruction::Sub
) ||
353 !BinInst
->hasOneUse())
357 if (!match(I
.getOperand(1), m_Constant(C1
)))
360 Instruction::BinaryOps ShiftOpcode
= I
.getOpcode();
361 // Transform for add/sub only works with shl.
362 if ((BinInst
->getOpcode() == Instruction::Add
||
363 BinInst
->getOpcode() == Instruction::Sub
) &&
364 ShiftOpcode
!= Instruction::Shl
)
367 Type
*Ty
= I
.getType();
369 // Find a matching shift by constant. The fold is not valid if the sum
370 // of the shift values equals or exceeds bitwidth.
372 auto matchFirstShift
= [&](Value
*V
, Value
*W
) {
373 unsigned Size
= Ty
->getScalarSizeInBits();
374 APInt
Threshold(Size
, Size
);
375 return match(V
, m_BinOp(ShiftOpcode
, m_Value(X
), m_Constant(C0
))) &&
376 (V
->hasOneUse() || match(W
, m_ImmConstant())) &&
377 match(ConstantExpr::getAdd(C0
, C1
),
378 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
));
381 // Logic ops and Add are commutative, so check each operand for a match. Sub
382 // is not so we cannot reoder if we match operand(1) and need to keep the
383 // operands in their original positions.
384 bool FirstShiftIsOp1
= false;
385 if (matchFirstShift(BinInst
->getOperand(0), BinInst
->getOperand(1)))
386 Y
= BinInst
->getOperand(1);
387 else if (matchFirstShift(BinInst
->getOperand(1), BinInst
->getOperand(0))) {
388 Y
= BinInst
->getOperand(0);
389 FirstShiftIsOp1
= BinInst
->getOpcode() == Instruction::Sub
;
393 // shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
394 Constant
*ShiftSumC
= ConstantExpr::getAdd(C0
, C1
);
395 Value
*NewShift1
= Builder
.CreateBinOp(ShiftOpcode
, X
, ShiftSumC
);
396 Value
*NewShift2
= Builder
.CreateBinOp(ShiftOpcode
, Y
, C1
);
397 Value
*Op1
= FirstShiftIsOp1
? NewShift2
: NewShift1
;
398 Value
*Op2
= FirstShiftIsOp1
? NewShift1
: NewShift2
;
399 return BinaryOperator::Create(BinInst
->getOpcode(), Op1
, Op2
);
402 Instruction
*InstCombinerImpl::commonShiftTransforms(BinaryOperator
&I
) {
403 if (Instruction
*Phi
= foldBinopWithPhiOperands(I
))
406 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
407 assert(Op0
->getType() == Op1
->getType());
408 Type
*Ty
= I
.getType();
410 // If the shift amount is a one-use `sext`, we can demote it to `zext`.
412 if (match(Op1
, m_OneUse(m_SExt(m_Value(Y
))))) {
413 Value
*NewExt
= Builder
.CreateZExt(Y
, Ty
, Op1
->getName());
414 return BinaryOperator::Create(I
.getOpcode(), Op0
, NewExt
);
417 // See if we can fold away this shift.
418 if (SimplifyDemandedInstructionBits(I
))
421 // Try to fold constant and into select arguments.
422 if (isa
<Constant
>(Op0
))
423 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Op1
))
424 if (Instruction
*R
= FoldOpIntoSelect(I
, SI
))
427 if (Constant
*CUI
= dyn_cast
<Constant
>(Op1
))
428 if (Instruction
*Res
= FoldShiftByConstant(Op0
, CUI
, I
))
431 if (auto *NewShift
= cast_or_null
<Instruction
>(
432 reassociateShiftAmtsOfTwoSameDirectionShifts(&I
, SQ
)))
435 // Pre-shift a constant shifted by a variable amount with constant offset:
436 // C shift (A add nuw C1) --> (C shift C1) shift A
439 if (match(Op0
, m_Constant(C
)) &&
440 match(Op1
, m_NUWAdd(m_Value(A
), m_Constant(C1
)))) {
441 Value
*NewC
= Builder
.CreateBinOp(I
.getOpcode(), C
, C1
);
442 return BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
445 unsigned BitWidth
= Ty
->getScalarSizeInBits();
447 const APInt
*AC
, *AddC
;
448 // Try to pre-shift a constant shifted by a variable amount added with a
450 // C << (X - AddC) --> (C >> AddC) << X
452 // C >> (X - AddC) --> (C << AddC) >> X
453 if (match(Op0
, m_APInt(AC
)) && match(Op1
, m_Add(m_Value(A
), m_APInt(AddC
))) &&
454 AddC
->isNegative() && (-*AddC
).ult(BitWidth
)) {
455 assert(!AC
->isZero() && "Expected simplify of shifted zero");
456 unsigned PosOffset
= (-*AddC
).getZExtValue();
458 auto isSuitableForPreShift
= [PosOffset
, &I
, AC
]() {
459 switch (I
.getOpcode()) {
462 case Instruction::Shl
:
463 return (I
.hasNoSignedWrap() || I
.hasNoUnsignedWrap()) &&
464 AC
->eq(AC
->lshr(PosOffset
).shl(PosOffset
));
465 case Instruction::LShr
:
466 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).lshr(PosOffset
));
467 case Instruction::AShr
:
468 return I
.isExact() && AC
->eq(AC
->shl(PosOffset
).ashr(PosOffset
));
471 if (isSuitableForPreShift()) {
472 Constant
*NewC
= ConstantInt::get(Ty
, I
.getOpcode() == Instruction::Shl
473 ? AC
->lshr(PosOffset
)
474 : AC
->shl(PosOffset
));
475 BinaryOperator
*NewShiftOp
=
476 BinaryOperator::Create(I
.getOpcode(), NewC
, A
);
477 if (I
.getOpcode() == Instruction::Shl
) {
478 NewShiftOp
->setHasNoUnsignedWrap(I
.hasNoUnsignedWrap());
480 NewShiftOp
->setIsExact();
486 // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2.
487 // Because shifts by negative values (which could occur if A were negative)
489 if (Op1
->hasOneUse() && match(Op1
, m_SRem(m_Value(A
), m_Constant(C
))) &&
490 match(C
, m_Power2())) {
491 // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
492 // demand the sign bit (and many others) here??
493 Constant
*Mask
= ConstantExpr::getSub(C
, ConstantInt::get(Ty
, 1));
494 Value
*Rem
= Builder
.CreateAnd(A
, Mask
, Op1
->getName());
495 return replaceOperand(I
, 1, Rem
);
498 if (Instruction
*Logic
= foldShiftOfShiftedBinOp(I
, Builder
))
501 if (match(Op1
, m_Or(m_Value(), m_SpecificInt(BitWidth
- 1))))
502 return replaceOperand(I
, 1, ConstantInt::get(Ty
, BitWidth
- 1));
507 /// Return true if we can simplify two logical (either left or right) shifts
508 /// that have constant shift amounts: OuterShift (InnerShift X, C1), C2.
509 static bool canEvaluateShiftedShift(unsigned OuterShAmt
, bool IsOuterShl
,
510 Instruction
*InnerShift
,
511 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
512 assert(InnerShift
->isLogicalShift() && "Unexpected instruction type");
514 // We need constant scalar or constant splat shifts.
515 const APInt
*InnerShiftConst
;
516 if (!match(InnerShift
->getOperand(1), m_APInt(InnerShiftConst
)))
519 // Two logical shifts in the same direction:
520 // shl (shl X, C1), C2 --> shl X, C1 + C2
521 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
522 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
523 if (IsInnerShl
== IsOuterShl
)
526 // Equal shift amounts in opposite directions become bitwise 'and':
527 // lshr (shl X, C), C --> and X, C'
528 // shl (lshr X, C), C --> and X, C'
529 if (*InnerShiftConst
== OuterShAmt
)
532 // If the 2nd shift is bigger than the 1st, we can fold:
533 // lshr (shl X, C1), C2 --> and (shl X, C1 - C2), C3
534 // shl (lshr X, C1), C2 --> and (lshr X, C1 - C2), C3
535 // but it isn't profitable unless we know the and'd out bits are already zero.
536 // Also, check that the inner shift is valid (less than the type width) or
537 // we'll crash trying to produce the bit mask for the 'and'.
538 unsigned TypeWidth
= InnerShift
->getType()->getScalarSizeInBits();
539 if (InnerShiftConst
->ugt(OuterShAmt
) && InnerShiftConst
->ult(TypeWidth
)) {
540 unsigned InnerShAmt
= InnerShiftConst
->getZExtValue();
542 IsInnerShl
? TypeWidth
- InnerShAmt
: InnerShAmt
- OuterShAmt
;
543 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, OuterShAmt
) << MaskShift
;
544 if (IC
.MaskedValueIsZero(InnerShift
->getOperand(0), Mask
, 0, CxtI
))
551 /// See if we can compute the specified value, but shifted logically to the left
552 /// or right by some number of bits. This should return true if the expression
553 /// can be computed for the same cost as the current expression tree. This is
554 /// used to eliminate extraneous shifting from things like:
555 /// %C = shl i128 %A, 64
556 /// %D = shl i128 %B, 96
557 /// %E = or i128 %C, %D
558 /// %F = lshr i128 %E, 64
559 /// where the client will ask if E can be computed shifted right by 64-bits. If
560 /// this succeeds, getShiftedValue() will be called to produce the value.
561 static bool canEvaluateShifted(Value
*V
, unsigned NumBits
, bool IsLeftShift
,
562 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
563 // We can always evaluate immediate constants.
564 if (match(V
, m_ImmConstant()))
567 Instruction
*I
= dyn_cast
<Instruction
>(V
);
568 if (!I
) return false;
570 // We can't mutate something that has multiple uses: doing so would
571 // require duplicating the instruction in general, which isn't profitable.
572 if (!I
->hasOneUse()) return false;
574 switch (I
->getOpcode()) {
575 default: return false;
576 case Instruction::And
:
577 case Instruction::Or
:
578 case Instruction::Xor
:
579 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
580 return canEvaluateShifted(I
->getOperand(0), NumBits
, IsLeftShift
, IC
, I
) &&
581 canEvaluateShifted(I
->getOperand(1), NumBits
, IsLeftShift
, IC
, I
);
583 case Instruction::Shl
:
584 case Instruction::LShr
:
585 return canEvaluateShiftedShift(NumBits
, IsLeftShift
, I
, IC
, CxtI
);
587 case Instruction::Select
: {
588 SelectInst
*SI
= cast
<SelectInst
>(I
);
589 Value
*TrueVal
= SI
->getTrueValue();
590 Value
*FalseVal
= SI
->getFalseValue();
591 return canEvaluateShifted(TrueVal
, NumBits
, IsLeftShift
, IC
, SI
) &&
592 canEvaluateShifted(FalseVal
, NumBits
, IsLeftShift
, IC
, SI
);
594 case Instruction::PHI
: {
595 // We can change a phi if we can change all operands. Note that we never
596 // get into trouble with cyclic PHIs here because we only consider
597 // instructions with a single use.
598 PHINode
*PN
= cast
<PHINode
>(I
);
599 for (Value
*IncValue
: PN
->incoming_values())
600 if (!canEvaluateShifted(IncValue
, NumBits
, IsLeftShift
, IC
, PN
))
604 case Instruction::Mul
: {
605 const APInt
*MulConst
;
606 // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`)
607 return !IsLeftShift
&& match(I
->getOperand(1), m_APInt(MulConst
)) &&
608 MulConst
->isNegatedPowerOf2() && MulConst
->countr_zero() == NumBits
;
613 /// Fold OuterShift (InnerShift X, C1), C2.
614 /// See canEvaluateShiftedShift() for the constraints on these instructions.
615 static Value
*foldShiftedShift(BinaryOperator
*InnerShift
, unsigned OuterShAmt
,
617 InstCombiner::BuilderTy
&Builder
) {
618 bool IsInnerShl
= InnerShift
->getOpcode() == Instruction::Shl
;
619 Type
*ShType
= InnerShift
->getType();
620 unsigned TypeWidth
= ShType
->getScalarSizeInBits();
622 // We only accept shifts-by-a-constant in canEvaluateShifted().
624 match(InnerShift
->getOperand(1), m_APInt(C1
));
625 unsigned InnerShAmt
= C1
->getZExtValue();
627 // Change the shift amount and clear the appropriate IR flags.
628 auto NewInnerShift
= [&](unsigned ShAmt
) {
629 InnerShift
->setOperand(1, ConstantInt::get(ShType
, ShAmt
));
631 InnerShift
->setHasNoUnsignedWrap(false);
632 InnerShift
->setHasNoSignedWrap(false);
634 InnerShift
->setIsExact(false);
639 // Two logical shifts in the same direction:
640 // shl (shl X, C1), C2 --> shl X, C1 + C2
641 // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
642 if (IsInnerShl
== IsOuterShl
) {
643 // If this is an oversized composite shift, then unsigned shifts get 0.
644 if (InnerShAmt
+ OuterShAmt
>= TypeWidth
)
645 return Constant::getNullValue(ShType
);
647 return NewInnerShift(InnerShAmt
+ OuterShAmt
);
650 // Equal shift amounts in opposite directions become bitwise 'and':
651 // lshr (shl X, C), C --> and X, C'
652 // shl (lshr X, C), C --> and X, C'
653 if (InnerShAmt
== OuterShAmt
) {
654 APInt Mask
= IsInnerShl
655 ? APInt::getLowBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
)
656 : APInt::getHighBitsSet(TypeWidth
, TypeWidth
- OuterShAmt
);
657 Value
*And
= Builder
.CreateAnd(InnerShift
->getOperand(0),
658 ConstantInt::get(ShType
, Mask
));
659 if (auto *AndI
= dyn_cast
<Instruction
>(And
)) {
660 AndI
->moveBefore(InnerShift
);
661 AndI
->takeName(InnerShift
);
666 assert(InnerShAmt
> OuterShAmt
&&
667 "Unexpected opposite direction logical shift pair");
669 // In general, we would need an 'and' for this transform, but
670 // canEvaluateShiftedShift() guarantees that the masked-off bits are not used.
671 // lshr (shl X, C1), C2 --> shl X, C1 - C2
672 // shl (lshr X, C1), C2 --> lshr X, C1 - C2
673 return NewInnerShift(InnerShAmt
- OuterShAmt
);
676 /// When canEvaluateShifted() returns true for an expression, this function
677 /// inserts the new computation that produces the shifted value.
678 static Value
*getShiftedValue(Value
*V
, unsigned NumBits
, bool isLeftShift
,
679 InstCombinerImpl
&IC
, const DataLayout
&DL
) {
680 // We can always evaluate constants shifted.
681 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
683 return IC
.Builder
.CreateShl(C
, NumBits
);
685 return IC
.Builder
.CreateLShr(C
, NumBits
);
688 Instruction
*I
= cast
<Instruction
>(V
);
691 switch (I
->getOpcode()) {
692 default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
693 case Instruction::And
:
694 case Instruction::Or
:
695 case Instruction::Xor
:
696 // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
698 0, getShiftedValue(I
->getOperand(0), NumBits
, isLeftShift
, IC
, DL
));
700 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
703 case Instruction::Shl
:
704 case Instruction::LShr
:
705 return foldShiftedShift(cast
<BinaryOperator
>(I
), NumBits
, isLeftShift
,
708 case Instruction::Select
:
710 1, getShiftedValue(I
->getOperand(1), NumBits
, isLeftShift
, IC
, DL
));
712 2, getShiftedValue(I
->getOperand(2), NumBits
, isLeftShift
, IC
, DL
));
714 case Instruction::PHI
: {
715 // We can change a phi if we can change all operands. Note that we never
716 // get into trouble with cyclic PHIs here because we only consider
717 // instructions with a single use.
718 PHINode
*PN
= cast
<PHINode
>(I
);
719 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
720 PN
->setIncomingValue(i
, getShiftedValue(PN
->getIncomingValue(i
), NumBits
,
721 isLeftShift
, IC
, DL
));
724 case Instruction::Mul
: {
725 assert(!isLeftShift
&& "Unexpected shift direction!");
726 auto *Neg
= BinaryOperator::CreateNeg(I
->getOperand(0));
727 IC
.InsertNewInstWith(Neg
, I
->getIterator());
728 unsigned TypeWidth
= I
->getType()->getScalarSizeInBits();
729 APInt Mask
= APInt::getLowBitsSet(TypeWidth
, TypeWidth
- NumBits
);
730 auto *And
= BinaryOperator::CreateAnd(Neg
,
731 ConstantInt::get(I
->getType(), Mask
));
733 return IC
.InsertNewInstWith(And
, I
->getIterator());
738 // If this is a bitwise operator or add with a constant RHS we might be able
739 // to pull it through a shift.
740 static bool canShiftBinOpWithConstantRHS(BinaryOperator
&Shift
,
741 BinaryOperator
*BO
) {
742 switch (BO
->getOpcode()) {
744 return false; // Do not perform transform!
745 case Instruction::Add
:
746 return Shift
.getOpcode() == Instruction::Shl
;
747 case Instruction::Or
:
748 case Instruction::And
:
750 case Instruction::Xor
:
751 // Do not change a 'not' of logical shift because that would create a normal
752 // 'xor'. The 'not' is likely better for analysis, SCEV, and codegen.
753 return !(Shift
.isLogicalShift() && match(BO
, m_Not(m_Value())));
757 Instruction
*InstCombinerImpl::FoldShiftByConstant(Value
*Op0
, Constant
*C1
,
759 // (C2 << X) << C1 --> (C2 << C1) << X
760 // (C2 >> X) >> C1 --> (C2 >> C1) >> X
763 if (match(Op0
, m_BinOp(I
.getOpcode(), m_ImmConstant(C2
), m_Value(X
))))
764 return BinaryOperator::Create(
765 I
.getOpcode(), Builder
.CreateBinOp(I
.getOpcode(), C2
, C1
), X
);
767 bool IsLeftShift
= I
.getOpcode() == Instruction::Shl
;
768 Type
*Ty
= I
.getType();
769 unsigned TypeBits
= Ty
->getScalarSizeInBits();
771 // (X / +DivC) >> (Width - 1) --> ext (X <= -DivC)
772 // (X / -DivC) >> (Width - 1) --> ext (X >= +DivC)
774 if (!IsLeftShift
&& match(C1
, m_SpecificIntAllowUndef(TypeBits
- 1)) &&
775 match(Op0
, m_SDiv(m_Value(X
), m_APInt(DivC
))) && !DivC
->isZero() &&
776 !DivC
->isMinSignedValue()) {
777 Constant
*NegDivC
= ConstantInt::get(Ty
, -(*DivC
));
778 ICmpInst::Predicate Pred
=
779 DivC
->isNegative() ? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_SLE
;
780 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, NegDivC
);
781 auto ExtOpcode
= (I
.getOpcode() == Instruction::AShr
) ? Instruction::SExt
783 return CastInst::Create(ExtOpcode
, Cmp
, Ty
);
787 if (!match(C1
, m_APInt(Op1C
)))
790 assert(!Op1C
->uge(TypeBits
) &&
791 "Shift over the type width should have been removed already");
793 // See if we can propagate this shift into the input, this covers the trivial
794 // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
795 if (I
.getOpcode() != Instruction::AShr
&&
796 canEvaluateShifted(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, &I
)) {
798 dbgs() << "ICE: GetShiftedValue propagating shift through expression"
799 " to eliminate shift:\n IN: "
800 << *Op0
<< "\n SH: " << I
<< "\n");
802 return replaceInstUsesWith(
803 I
, getShiftedValue(Op0
, Op1C
->getZExtValue(), IsLeftShift
, *this, DL
));
806 if (Instruction
*FoldedShift
= foldBinOpIntoSelectOrPhi(I
))
809 if (!Op0
->hasOneUse())
812 if (auto *Op0BO
= dyn_cast
<BinaryOperator
>(Op0
)) {
813 // If the operand is a bitwise operator with a constant RHS, and the
814 // shift is the only use, we can pull it out of the shift.
816 if (match(Op0BO
->getOperand(1), m_APInt(Op0C
))) {
817 if (canShiftBinOpWithConstantRHS(I
, Op0BO
)) {
819 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(1), C1
);
822 Builder
.CreateBinOp(I
.getOpcode(), Op0BO
->getOperand(0), C1
);
823 NewShift
->takeName(Op0BO
);
825 return BinaryOperator::Create(Op0BO
->getOpcode(), NewShift
, NewRHS
);
830 // If we have a select that conditionally executes some binary operator,
831 // see if we can pull it the select and operator through the shift.
833 // For example, turning:
834 // shl (select C, (add X, C1), X), C2
837 // select C, (add Y, C1 << C2), Y
841 if (match(Op0
, m_Select(m_Value(Cond
), m_OneUse(m_BinOp(TBO
)),
842 m_Value(FalseVal
)))) {
844 if (!isa
<Constant
>(FalseVal
) && TBO
->getOperand(0) == FalseVal
&&
845 match(TBO
->getOperand(1), m_APInt(C
)) &&
846 canShiftBinOpWithConstantRHS(I
, TBO
)) {
848 Builder
.CreateBinOp(I
.getOpcode(), TBO
->getOperand(1), C1
);
850 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), FalseVal
, C1
);
851 Value
*NewOp
= Builder
.CreateBinOp(TBO
->getOpcode(), NewShift
, NewRHS
);
852 return SelectInst::Create(Cond
, NewOp
, NewShift
);
858 if (match(Op0
, m_Select(m_Value(Cond
), m_Value(TrueVal
),
859 m_OneUse(m_BinOp(FBO
))))) {
861 if (!isa
<Constant
>(TrueVal
) && FBO
->getOperand(0) == TrueVal
&&
862 match(FBO
->getOperand(1), m_APInt(C
)) &&
863 canShiftBinOpWithConstantRHS(I
, FBO
)) {
865 Builder
.CreateBinOp(I
.getOpcode(), FBO
->getOperand(1), C1
);
867 Value
*NewShift
= Builder
.CreateBinOp(I
.getOpcode(), TrueVal
, C1
);
868 Value
*NewOp
= Builder
.CreateBinOp(FBO
->getOpcode(), NewShift
, NewRHS
);
869 return SelectInst::Create(Cond
, NewShift
, NewOp
);
877 // (lshr (add (zext X), (zext Y)), K)
878 // -> (icmp ult (add X, Y), X)
880 // - The add's operands are zexts from a K-bits integer to a bigger type.
881 // - The add is only used by the shr, or by iK (or narrower) truncates.
882 // - The lshr type has more than 2 bits (other types are boolean math).
885 // - The resulting add cannot have nuw/nsw, else on overflow we get a
886 // poison value and the transform isn't legal anymore.
887 Instruction
*InstCombinerImpl::foldLShrOverflowBit(BinaryOperator
&I
) {
888 assert(I
.getOpcode() == Instruction::LShr
);
890 Value
*Add
= I
.getOperand(0);
891 Value
*ShiftAmt
= I
.getOperand(1);
892 Type
*Ty
= I
.getType();
894 if (Ty
->getScalarSizeInBits() < 3)
897 const APInt
*ShAmtAPInt
= nullptr;
898 Value
*X
= nullptr, *Y
= nullptr;
899 if (!match(ShiftAmt
, m_APInt(ShAmtAPInt
)) ||
901 m_Add(m_OneUse(m_ZExt(m_Value(X
))), m_OneUse(m_ZExt(m_Value(Y
))))))
904 const unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
908 // X/Y are zexts from `ShAmt`-sized ints.
909 if (X
->getType()->getScalarSizeInBits() != ShAmt
||
910 Y
->getType()->getScalarSizeInBits() != ShAmt
)
913 // Make sure that `Add` is only used by `I` and `ShAmt`-truncates.
914 if (!Add
->hasOneUse()) {
915 for (User
*U
: Add
->users()) {
919 TruncInst
*Trunc
= dyn_cast
<TruncInst
>(U
);
920 if (!Trunc
|| Trunc
->getType()->getScalarSizeInBits() > ShAmt
)
925 // Insert at Add so that the newly created `NarrowAdd` will dominate it's
926 // users (i.e. `Add`'s users).
927 Instruction
*AddInst
= cast
<Instruction
>(Add
);
928 Builder
.SetInsertPoint(AddInst
);
930 Value
*NarrowAdd
= Builder
.CreateAdd(X
, Y
, "add.narrowed");
932 Builder
.CreateICmpULT(NarrowAdd
, X
, "add.narrowed.overflow");
934 // Replace the uses of the original add with a zext of the
935 // NarrowAdd's result. Note that all users at this stage are known to
936 // be ShAmt-sized truncs, or the lshr itself.
937 if (!Add
->hasOneUse()) {
938 replaceInstUsesWith(*AddInst
, Builder
.CreateZExt(NarrowAdd
, Ty
));
939 eraseInstFromFunction(*AddInst
);
942 // Replace the LShr with a zext of the overflow check.
943 return new ZExtInst(Overflow
, Ty
);
946 // Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits.
947 static bool setShiftFlags(BinaryOperator
&I
, const SimplifyQuery
&Q
) {
948 assert(I
.isShift() && "Expected a shift as input");
949 // We already have all the flags.
950 if (I
.getOpcode() == Instruction::Shl
) {
951 if (I
.hasNoUnsignedWrap() && I
.hasNoSignedWrap())
958 if (match(I
.getOperand(0), m_Shl(m_Value(), m_Specific(I
.getOperand(1))))) {
964 // Compute what we know about shift count.
965 KnownBits KnownCnt
= computeKnownBits(I
.getOperand(1), /* Depth */ 0, Q
);
966 unsigned BitWidth
= KnownCnt
.getBitWidth();
967 // Since shift produces a poison value if RHS is equal to or larger than the
968 // bit width, we can safely assume that RHS is less than the bit width.
969 uint64_t MaxCnt
= KnownCnt
.getMaxValue().getLimitedValue(BitWidth
- 1);
971 KnownBits KnownAmt
= computeKnownBits(I
.getOperand(0), /* Depth */ 0, Q
);
972 bool Changed
= false;
974 if (I
.getOpcode() == Instruction::Shl
) {
975 // If we have as many leading zeros than maximum shift cnt we have nuw.
976 if (!I
.hasNoUnsignedWrap() && MaxCnt
<= KnownAmt
.countMinLeadingZeros()) {
977 I
.setHasNoUnsignedWrap();
980 // If we have more sign bits than maximum shift cnt we have nsw.
981 if (!I
.hasNoSignedWrap()) {
982 if (MaxCnt
< KnownAmt
.countMinSignBits() ||
983 MaxCnt
< ComputeNumSignBits(I
.getOperand(0), Q
.DL
, /*Depth*/ 0, Q
.AC
,
985 I
.setHasNoSignedWrap();
992 // If we have at least as many trailing zeros as maximum count then we have
994 Changed
= MaxCnt
<= KnownAmt
.countMinTrailingZeros();
995 I
.setIsExact(Changed
);
1000 Instruction
*InstCombinerImpl::visitShl(BinaryOperator
&I
) {
1001 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1003 if (Value
*V
= simplifyShlInst(I
.getOperand(0), I
.getOperand(1),
1004 I
.hasNoSignedWrap(), I
.hasNoUnsignedWrap(), Q
))
1005 return replaceInstUsesWith(I
, V
);
1007 if (Instruction
*X
= foldVectorBinop(I
))
1010 if (Instruction
*V
= commonShiftTransforms(I
))
1013 if (Instruction
*V
= dropRedundantMaskingOfLeftShiftInput(&I
, Q
, Builder
))
1016 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1017 Type
*Ty
= I
.getType();
1018 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1021 if (match(Op1
, m_APInt(C
))) {
1022 unsigned ShAmtC
= C
->getZExtValue();
1024 // shl (zext X), C --> zext (shl X, C)
1025 // This is only valid if X would have zeros shifted out.
1027 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
1028 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1029 if (ShAmtC
< SrcWidth
&&
1030 MaskedValueIsZero(X
, APInt::getHighBitsSet(SrcWidth
, ShAmtC
), 0, &I
))
1031 return new ZExtInst(Builder
.CreateShl(X
, ShAmtC
), Ty
);
1034 // (X >> C) << C --> X & (-1 << C)
1035 if (match(Op0
, m_Shr(m_Value(X
), m_Specific(Op1
)))) {
1036 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1037 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1041 if (match(Op0
, m_Exact(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1042 C1
->ult(BitWidth
)) {
1043 unsigned ShrAmt
= C1
->getZExtValue();
1044 if (ShrAmt
< ShAmtC
) {
1045 // If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
1046 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1047 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1048 NewShl
->setHasNoUnsignedWrap(
1049 I
.hasNoUnsignedWrap() ||
1051 cast
<Instruction
>(Op0
)->getOpcode() == Instruction::LShr
&&
1052 I
.hasNoSignedWrap()));
1053 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1056 if (ShrAmt
> ShAmtC
) {
1057 // If C1 > C: (X >>?exact C1) << C --> X >>?exact (C1 - C)
1058 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1059 auto *NewShr
= BinaryOperator::Create(
1060 cast
<BinaryOperator
>(Op0
)->getOpcode(), X
, ShiftDiff
);
1061 NewShr
->setIsExact(true);
1066 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_APInt(C1
)))) &&
1067 C1
->ult(BitWidth
)) {
1068 unsigned ShrAmt
= C1
->getZExtValue();
1069 if (ShrAmt
< ShAmtC
) {
1070 // If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
1071 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShrAmt
);
1072 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1073 NewShl
->setHasNoUnsignedWrap(
1074 I
.hasNoUnsignedWrap() ||
1076 cast
<Instruction
>(Op0
)->getOpcode() == Instruction::LShr
&&
1077 I
.hasNoSignedWrap()));
1078 NewShl
->setHasNoSignedWrap(I
.hasNoSignedWrap());
1079 Builder
.Insert(NewShl
);
1080 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1081 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1083 if (ShrAmt
> ShAmtC
) {
1084 // If C1 > C: (X >>? C1) << C --> (X >>? (C1 - C)) & (-1 << C)
1085 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShrAmt
- ShAmtC
);
1086 auto *OldShr
= cast
<BinaryOperator
>(Op0
);
1088 BinaryOperator::Create(OldShr
->getOpcode(), X
, ShiftDiff
);
1089 NewShr
->setIsExact(OldShr
->isExact());
1090 Builder
.Insert(NewShr
);
1091 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1092 return BinaryOperator::CreateAnd(NewShr
, ConstantInt::get(Ty
, Mask
));
1096 // Similar to above, but look through an intermediate trunc instruction.
1097 BinaryOperator
*Shr
;
1098 if (match(Op0
, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr
))))) &&
1099 match(Shr
, m_Shr(m_Value(X
), m_APInt(C1
)))) {
1100 // The larger shift direction survives through the transform.
1101 unsigned ShrAmtC
= C1
->getZExtValue();
1102 unsigned ShDiff
= ShrAmtC
> ShAmtC
? ShrAmtC
- ShAmtC
: ShAmtC
- ShrAmtC
;
1103 Constant
*ShiftDiffC
= ConstantInt::get(X
->getType(), ShDiff
);
1104 auto ShiftOpc
= ShrAmtC
> ShAmtC
? Shr
->getOpcode() : Instruction::Shl
;
1107 // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
1109 // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
1110 Value
*NewShift
= Builder
.CreateBinOp(ShiftOpc
, X
, ShiftDiffC
, "sh.diff");
1111 Value
*Trunc
= Builder
.CreateTrunc(NewShift
, Ty
, "tr.sh.diff");
1112 APInt
Mask(APInt::getHighBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1113 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, Mask
));
1116 if (match(Op0
, m_Shl(m_Value(X
), m_APInt(C1
))) && C1
->ult(BitWidth
)) {
1117 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1118 // Oversized shifts are simplified to zero in InstSimplify.
1119 if (AmtSum
< BitWidth
)
1120 // (X << C1) << C2 --> X << (C1 + C2)
1121 return BinaryOperator::CreateShl(X
, ConstantInt::get(Ty
, AmtSum
));
1124 // If we have an opposite shift by the same amount, we may be able to
1125 // reorder binops and shifts to eliminate math/logic.
1126 auto isSuitableBinOpcode
= [](Instruction::BinaryOps BinOpcode
) {
1127 switch (BinOpcode
) {
1130 case Instruction::Add
:
1131 case Instruction::And
:
1132 case Instruction::Or
:
1133 case Instruction::Xor
:
1134 case Instruction::Sub
:
1135 // NOTE: Sub is not commutable and the tranforms below may not be valid
1136 // when the shift-right is operand 1 (RHS) of the sub.
1140 BinaryOperator
*Op0BO
;
1141 if (match(Op0
, m_OneUse(m_BinOp(Op0BO
))) &&
1142 isSuitableBinOpcode(Op0BO
->getOpcode())) {
1143 // Commute so shift-right is on LHS of the binop.
1144 // (Y bop (X >> C)) << C -> ((X >> C) bop Y) << C
1145 // (Y bop ((X >> C) & CC)) << C -> (((X >> C) & CC) bop Y) << C
1146 Value
*Shr
= Op0BO
->getOperand(0);
1147 Value
*Y
= Op0BO
->getOperand(1);
1150 if (Op0BO
->isCommutative() && Y
->hasOneUse() &&
1151 (match(Y
, m_Shr(m_Value(), m_Specific(Op1
))) ||
1152 match(Y
, m_And(m_OneUse(m_Shr(m_Value(), m_Specific(Op1
))),
1156 // ((X >> C) bop Y) << C -> (X bop (Y << C)) & (~0 << C)
1157 if (match(Shr
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1159 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1162 Builder
.CreateBinOp(Op0BO
->getOpcode(), X
, YS
, Shr
->getName());
1163 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1164 APInt Bits
= APInt::getHighBitsSet(BitWidth
, BitWidth
- Op1Val
);
1165 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1166 return BinaryOperator::CreateAnd(B
, Mask
);
1169 // (((X >> C) & CC) bop Y) << C -> (X & (CC << C)) bop (Y << C)
1171 m_OneUse(m_And(m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))),
1174 Value
*YS
= Builder
.CreateShl(Y
, Op1
, Op0BO
->getName());
1176 Value
*M
= Builder
.CreateAnd(X
, ConstantInt::get(Ty
, CC
->shl(*C
)),
1177 X
->getName() + ".mask");
1178 return BinaryOperator::Create(Op0BO
->getOpcode(), M
, YS
);
1182 // (C1 - X) << C --> (C1 << C) - (X << C)
1183 if (match(Op0
, m_OneUse(m_Sub(m_APInt(C1
), m_Value(X
))))) {
1184 Constant
*NewLHS
= ConstantInt::get(Ty
, C1
->shl(*C
));
1185 Value
*NewShift
= Builder
.CreateShl(X
, Op1
);
1186 return BinaryOperator::CreateSub(NewLHS
, NewShift
);
1190 if (setShiftFlags(I
, Q
))
1193 // Transform (x >> y) << y to x & (-1 << y)
1194 // Valid for any type of right-shift.
1196 if (match(Op0
, m_OneUse(m_Shr(m_Value(X
), m_Specific(Op1
))))) {
1197 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1198 Value
*Mask
= Builder
.CreateShl(AllOnes
, Op1
);
1199 return BinaryOperator::CreateAnd(Mask
, X
);
1203 if (match(Op1
, m_Constant(C1
))) {
1206 // (X * C2) << C1 --> X * (C2 << C1)
1207 if (match(Op0
, m_Mul(m_Value(X
), m_Constant(C2
))))
1208 return BinaryOperator::CreateMul(X
, ConstantExpr::getShl(C2
, C1
));
1210 // shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
1211 if (match(Op0
, m_ZExt(m_Value(X
))) && X
->getType()->isIntOrIntVectorTy(1)) {
1212 auto *NewC
= ConstantExpr::getShl(ConstantInt::get(Ty
, 1), C1
);
1213 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1217 if (match(Op0
, m_One())) {
1218 // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
1219 if (match(Op1
, m_Sub(m_SpecificInt(BitWidth
- 1), m_Value(X
))))
1220 return BinaryOperator::CreateLShr(
1221 ConstantInt::get(Ty
, APInt::getSignMask(BitWidth
)), X
);
1223 // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
1224 // 1 << (cttz X) --> -X & X
1226 m_OneUse(m_Intrinsic
<Intrinsic::cttz
>(m_Value(X
), m_Value())))) {
1227 Value
*NegX
= Builder
.CreateNeg(X
, "neg");
1228 return BinaryOperator::CreateAnd(NegX
, X
);
1235 Instruction
*InstCombinerImpl::visitLShr(BinaryOperator
&I
) {
1236 if (Value
*V
= simplifyLShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1237 SQ
.getWithInstruction(&I
)))
1238 return replaceInstUsesWith(I
, V
);
1240 if (Instruction
*X
= foldVectorBinop(I
))
1243 if (Instruction
*R
= commonShiftTransforms(I
))
1246 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1247 Type
*Ty
= I
.getType();
1250 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1252 // (iN (~X) u>> (N - 1)) --> zext (X > -1)
1253 if (match(Op0
, m_OneUse(m_Not(m_Value(X
)))) &&
1254 match(Op1
, m_SpecificIntAllowUndef(BitWidth
- 1)))
1255 return new ZExtInst(Builder
.CreateIsNotNeg(X
, "isnotneg"), Ty
);
1257 if (match(Op1
, m_APInt(C
))) {
1258 unsigned ShAmtC
= C
->getZExtValue();
1259 auto *II
= dyn_cast
<IntrinsicInst
>(Op0
);
1260 if (II
&& isPowerOf2_32(BitWidth
) && Log2_32(BitWidth
) == ShAmtC
&&
1261 (II
->getIntrinsicID() == Intrinsic::ctlz
||
1262 II
->getIntrinsicID() == Intrinsic::cttz
||
1263 II
->getIntrinsicID() == Intrinsic::ctpop
)) {
1264 // ctlz.i32(x)>>5 --> zext(x == 0)
1265 // cttz.i32(x)>>5 --> zext(x == 0)
1266 // ctpop.i32(x)>>5 --> zext(x == -1)
1267 bool IsPop
= II
->getIntrinsicID() == Intrinsic::ctpop
;
1268 Constant
*RHS
= ConstantInt::getSigned(Ty
, IsPop
? -1 : 0);
1269 Value
*Cmp
= Builder
.CreateICmpEQ(II
->getArgOperand(0), RHS
);
1270 return new ZExtInst(Cmp
, Ty
);
1275 if (match(Op0
, m_Shl(m_Value(X
), m_APInt(C1
))) && C1
->ult(BitWidth
)) {
1276 if (C1
->ult(ShAmtC
)) {
1277 unsigned ShlAmtC
= C1
->getZExtValue();
1278 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmtC
- ShlAmtC
);
1279 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1280 // (X <<nuw C1) >>u C --> X >>u (C - C1)
1281 auto *NewLShr
= BinaryOperator::CreateLShr(X
, ShiftDiff
);
1282 NewLShr
->setIsExact(I
.isExact());
1285 if (Op0
->hasOneUse()) {
1286 // (X << C1) >>u C --> (X >>u (C - C1)) & (-1 >> C)
1287 Value
*NewLShr
= Builder
.CreateLShr(X
, ShiftDiff
, "", I
.isExact());
1288 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1289 return BinaryOperator::CreateAnd(NewLShr
, ConstantInt::get(Ty
, Mask
));
1291 } else if (C1
->ugt(ShAmtC
)) {
1292 unsigned ShlAmtC
= C1
->getZExtValue();
1293 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmtC
- ShAmtC
);
1294 if (cast
<BinaryOperator
>(Op0
)->hasNoUnsignedWrap()) {
1295 // (X <<nuw C1) >>u C --> X <<nuw/nsw (C1 - C)
1296 auto *NewShl
= BinaryOperator::CreateShl(X
, ShiftDiff
);
1297 NewShl
->setHasNoUnsignedWrap(true);
1298 NewShl
->setHasNoSignedWrap(ShAmtC
> 0);
1301 if (Op0
->hasOneUse()) {
1302 // (X << C1) >>u C --> X << (C1 - C) & (-1 >> C)
1303 Value
*NewShl
= Builder
.CreateShl(X
, ShiftDiff
);
1304 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1305 return BinaryOperator::CreateAnd(NewShl
, ConstantInt::get(Ty
, Mask
));
1308 assert(*C1
== ShAmtC
);
1309 // (X << C) >>u C --> X & (-1 >>u C)
1310 APInt
Mask(APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1311 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, Mask
));
1315 // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
1316 // TODO: Consolidate with the more general transform that starts from shl
1317 // (the shifts are in the opposite order).
1320 m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))),
1322 Value
*NewLshr
= Builder
.CreateLShr(Y
, Op1
);
1323 Value
*NewAdd
= Builder
.CreateAdd(NewLshr
, X
);
1324 unsigned Op1Val
= C
->getLimitedValue(BitWidth
);
1325 APInt Bits
= APInt::getLowBitsSet(BitWidth
, BitWidth
- Op1Val
);
1326 Constant
*Mask
= ConstantInt::get(Ty
, Bits
);
1327 return BinaryOperator::CreateAnd(NewAdd
, Mask
);
1330 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) &&
1331 (!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType()))) {
1332 assert(ShAmtC
< X
->getType()->getScalarSizeInBits() &&
1333 "Big shift not simplified to zero?");
1334 // lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
1335 Value
*NewLShr
= Builder
.CreateLShr(X
, ShAmtC
);
1336 return new ZExtInst(NewLShr
, Ty
);
1339 if (match(Op0
, m_SExt(m_Value(X
)))) {
1340 unsigned SrcTyBitWidth
= X
->getType()->getScalarSizeInBits();
1341 // lshr (sext i1 X to iN), C --> select (X, -1 >> C, 0)
1342 if (SrcTyBitWidth
== 1) {
1343 auto *NewC
= ConstantInt::get(
1344 Ty
, APInt::getLowBitsSet(BitWidth
, BitWidth
- ShAmtC
));
1345 return SelectInst::Create(X
, NewC
, ConstantInt::getNullValue(Ty
));
1348 if ((!Ty
->isIntegerTy() || shouldChangeType(Ty
, X
->getType())) &&
1350 // Are we moving the sign bit to the low bit and widening with high
1351 // zeros? lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
1352 if (ShAmtC
== BitWidth
- 1) {
1353 Value
*NewLShr
= Builder
.CreateLShr(X
, SrcTyBitWidth
- 1);
1354 return new ZExtInst(NewLShr
, Ty
);
1357 // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
1358 if (ShAmtC
== BitWidth
- SrcTyBitWidth
) {
1359 // The new shift amount can't be more than the narrow source type.
1360 unsigned NewShAmt
= std::min(ShAmtC
, SrcTyBitWidth
- 1);
1361 Value
*AShr
= Builder
.CreateAShr(X
, NewShAmt
);
1362 return new ZExtInst(AShr
, Ty
);
1367 if (ShAmtC
== BitWidth
- 1) {
1368 // lshr i32 or(X,-X), 31 --> zext (X != 0)
1369 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1370 return new ZExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1372 // lshr i32 (X -nsw Y), 31 --> zext (X < Y)
1373 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1374 return new ZExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1376 // Check if a number is negative and odd:
1377 // lshr i32 (srem X, 2), 31 --> and (X >> 31), X
1378 if (match(Op0
, m_OneUse(m_SRem(m_Value(X
), m_SpecificInt(2))))) {
1379 Value
*Signbit
= Builder
.CreateLShr(X
, ShAmtC
);
1380 return BinaryOperator::CreateAnd(Signbit
, X
);
1384 // (X >>u C1) >>u C --> X >>u (C1 + C)
1385 if (match(Op0
, m_LShr(m_Value(X
), m_APInt(C1
)))) {
1386 // Oversized shifts are simplified to zero in InstSimplify.
1387 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1388 if (AmtSum
< BitWidth
)
1389 return BinaryOperator::CreateLShr(X
, ConstantInt::get(Ty
, AmtSum
));
1392 Instruction
*TruncSrc
;
1393 if (match(Op0
, m_OneUse(m_Trunc(m_Instruction(TruncSrc
)))) &&
1394 match(TruncSrc
, m_LShr(m_Value(X
), m_APInt(C1
)))) {
1395 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1396 unsigned AmtSum
= ShAmtC
+ C1
->getZExtValue();
1398 // If the combined shift fits in the source width:
1399 // (trunc (X >>u C1)) >>u C --> and (trunc (X >>u (C1 + C)), MaskC
1401 // If the first shift covers the number of bits truncated, then the
1402 // mask instruction is eliminated (and so the use check is relaxed).
1403 if (AmtSum
< SrcWidth
&&
1404 (TruncSrc
->hasOneUse() || C1
->uge(SrcWidth
- BitWidth
))) {
1405 Value
*SumShift
= Builder
.CreateLShr(X
, AmtSum
, "sum.shift");
1406 Value
*Trunc
= Builder
.CreateTrunc(SumShift
, Ty
, I
.getName());
1408 // If the first shift does not cover the number of bits truncated, then
1409 // we require a mask to get rid of high bits in the result.
1410 APInt MaskC
= APInt::getAllOnes(BitWidth
).lshr(ShAmtC
);
1411 return BinaryOperator::CreateAnd(Trunc
, ConstantInt::get(Ty
, MaskC
));
1416 if (match(Op0
, m_NUWMul(m_Value(X
), m_APInt(MulC
)))) {
1417 // Look for a "splat" mul pattern - it replicates bits across each half of
1418 // a value, so a right shift is just a mask of the low bits:
1419 // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1420 // TODO: Generalize to allow more than just half-width shifts?
1421 if (BitWidth
> 2 && ShAmtC
* 2 == BitWidth
&& (*MulC
- 1).isPowerOf2() &&
1422 MulC
->logBase2() == ShAmtC
)
1423 return BinaryOperator::CreateAnd(X
, ConstantInt::get(Ty
, *MulC
- 2));
1425 // The one-use check is not strictly necessary, but codegen may not be
1426 // able to invert the transform and perf may suffer with an extra mul
1428 if (Op0
->hasOneUse()) {
1429 APInt NewMulC
= MulC
->lshr(ShAmtC
);
1430 // if c is divisible by (1 << ShAmtC):
1431 // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
1432 if (MulC
->eq(NewMulC
.shl(ShAmtC
))) {
1434 BinaryOperator::CreateNUWMul(X
, ConstantInt::get(Ty
, NewMulC
));
1435 assert(ShAmtC
!= 0 &&
1436 "lshr X, 0 should be handled by simplifyLShrInst.");
1437 NewMul
->setHasNoSignedWrap(true);
1443 // Try to narrow bswap.
1444 // In the case where the shift amount equals the bitwidth difference, the
1445 // shift is eliminated.
1446 if (match(Op0
, m_OneUse(m_Intrinsic
<Intrinsic::bswap
>(
1447 m_OneUse(m_ZExt(m_Value(X
))))))) {
1448 unsigned SrcWidth
= X
->getType()->getScalarSizeInBits();
1449 unsigned WidthDiff
= BitWidth
- SrcWidth
;
1450 if (SrcWidth
% 16 == 0) {
1451 Value
*NarrowSwap
= Builder
.CreateUnaryIntrinsic(Intrinsic::bswap
, X
);
1452 if (ShAmtC
>= WidthDiff
) {
1453 // (bswap (zext X)) >> C --> zext (bswap X >> C')
1454 Value
*NewShift
= Builder
.CreateLShr(NarrowSwap
, ShAmtC
- WidthDiff
);
1455 return new ZExtInst(NewShift
, Ty
);
1457 // (bswap (zext X)) >> C --> (zext (bswap X)) << C'
1458 Value
*NewZExt
= Builder
.CreateZExt(NarrowSwap
, Ty
);
1459 Constant
*ShiftDiff
= ConstantInt::get(Ty
, WidthDiff
- ShAmtC
);
1460 return BinaryOperator::CreateShl(NewZExt
, ShiftDiff
);
1465 // Reduce add-carry of bools to logic:
1466 // ((zext BoolX) + (zext BoolY)) >> 1 --> zext (BoolX && BoolY)
1467 Value
*BoolX
, *BoolY
;
1468 if (ShAmtC
== 1 && match(Op0
, m_Add(m_Value(X
), m_Value(Y
))) &&
1469 match(X
, m_ZExt(m_Value(BoolX
))) && match(Y
, m_ZExt(m_Value(BoolY
))) &&
1470 BoolX
->getType()->isIntOrIntVectorTy(1) &&
1471 BoolY
->getType()->isIntOrIntVectorTy(1) &&
1472 (X
->hasOneUse() || Y
->hasOneUse() || Op0
->hasOneUse())) {
1473 Value
*And
= Builder
.CreateAnd(BoolX
, BoolY
);
1474 return new ZExtInst(And
, Ty
);
1478 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1479 if (setShiftFlags(I
, Q
))
1482 // Transform (x << y) >> y to x & (-1 >> y)
1483 if (match(Op0
, m_OneUse(m_Shl(m_Value(X
), m_Specific(Op1
))))) {
1484 Constant
*AllOnes
= ConstantInt::getAllOnesValue(Ty
);
1485 Value
*Mask
= Builder
.CreateLShr(AllOnes
, Op1
);
1486 return BinaryOperator::CreateAnd(Mask
, X
);
1489 if (Instruction
*Overflow
= foldLShrOverflowBit(I
))
1496 InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract(
1497 BinaryOperator
&OldAShr
) {
1498 assert(OldAShr
.getOpcode() == Instruction::AShr
&&
1499 "Must be called with arithmetic right-shift instruction only.");
1501 // Check that constant C is a splat of the element-wise bitwidth of V.
1502 auto BitWidthSplat
= [](Constant
*C
, Value
*V
) {
1504 C
, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ
,
1505 APInt(C
->getType()->getScalarSizeInBits(),
1506 V
->getType()->getScalarSizeInBits())));
1509 // It should look like variable-length sign-extension on the outside:
1510 // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits)
1512 Instruction
*MaybeTrunc
;
1514 if (!match(&OldAShr
,
1515 m_AShr(m_Shl(m_Instruction(MaybeTrunc
),
1516 m_ZExtOrSelf(m_Sub(m_Constant(C1
),
1517 m_ZExtOrSelf(m_Value(NBits
))))),
1518 m_ZExtOrSelf(m_Sub(m_Constant(C2
),
1519 m_ZExtOrSelf(m_Deferred(NBits
)))))) ||
1520 !BitWidthSplat(C1
, &OldAShr
) || !BitWidthSplat(C2
, &OldAShr
))
1523 // There may or may not be a truncation after outer two shifts.
1524 Instruction
*HighBitExtract
;
1525 match(MaybeTrunc
, m_TruncOrSelf(m_Instruction(HighBitExtract
)));
1526 bool HadTrunc
= MaybeTrunc
!= HighBitExtract
;
1528 // And finally, the innermost part of the pattern must be a right-shift.
1529 Value
*X
, *NumLowBitsToSkip
;
1530 if (!match(HighBitExtract
, m_Shr(m_Value(X
), m_Value(NumLowBitsToSkip
))))
1533 // Said right-shift must extract high NBits bits - C0 must be it's bitwidth.
1535 if (!match(NumLowBitsToSkip
,
1537 m_Sub(m_Constant(C0
), m_ZExtOrSelf(m_Specific(NBits
))))) ||
1538 !BitWidthSplat(C0
, HighBitExtract
))
1541 // Since the NBits is identical for all shifts, if the outermost and
1542 // innermost shifts are identical, then outermost shifts are redundant.
1543 // If we had truncation, do keep it though.
1544 if (HighBitExtract
->getOpcode() == OldAShr
.getOpcode())
1545 return replaceInstUsesWith(OldAShr
, MaybeTrunc
);
1547 // Else, if there was a truncation, then we need to ensure that one
1548 // instruction will go away.
1549 if (HadTrunc
&& !match(&OldAShr
, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
1552 // Finally, bypass two innermost shifts, and perform the outermost shift on
1553 // the operands of the innermost shift.
1554 Instruction
*NewAShr
=
1555 BinaryOperator::Create(OldAShr
.getOpcode(), X
, NumLowBitsToSkip
);
1556 NewAShr
->copyIRFlags(HighBitExtract
); // We can preserve 'exact'-ness.
1560 Builder
.Insert(NewAShr
);
1561 return TruncInst::CreateTruncOrBitCast(NewAShr
, OldAShr
.getType());
1564 Instruction
*InstCombinerImpl::visitAShr(BinaryOperator
&I
) {
1565 if (Value
*V
= simplifyAShrInst(I
.getOperand(0), I
.getOperand(1), I
.isExact(),
1566 SQ
.getWithInstruction(&I
)))
1567 return replaceInstUsesWith(I
, V
);
1569 if (Instruction
*X
= foldVectorBinop(I
))
1572 if (Instruction
*R
= commonShiftTransforms(I
))
1575 Value
*Op0
= I
.getOperand(0), *Op1
= I
.getOperand(1);
1576 Type
*Ty
= I
.getType();
1577 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1578 const APInt
*ShAmtAPInt
;
1579 if (match(Op1
, m_APInt(ShAmtAPInt
)) && ShAmtAPInt
->ult(BitWidth
)) {
1580 unsigned ShAmt
= ShAmtAPInt
->getZExtValue();
1582 // If the shift amount equals the difference in width of the destination
1583 // and source scalar types:
1584 // ashr (shl (zext X), C), C --> sext X
1586 if (match(Op0
, m_Shl(m_ZExt(m_Value(X
)), m_Specific(Op1
))) &&
1587 ShAmt
== BitWidth
- X
->getType()->getScalarSizeInBits())
1588 return new SExtInst(X
, Ty
);
1590 // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However,
1591 // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
1593 if (match(Op0
, m_NSWShl(m_Value(X
), m_APInt(ShOp1
))) &&
1594 ShOp1
->ult(BitWidth
)) {
1595 unsigned ShlAmt
= ShOp1
->getZExtValue();
1596 if (ShlAmt
< ShAmt
) {
1597 // (X <<nsw C1) >>s C2 --> X >>s (C2 - C1)
1598 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShAmt
- ShlAmt
);
1599 auto *NewAShr
= BinaryOperator::CreateAShr(X
, ShiftDiff
);
1600 NewAShr
->setIsExact(I
.isExact());
1603 if (ShlAmt
> ShAmt
) {
1604 // (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
1605 Constant
*ShiftDiff
= ConstantInt::get(Ty
, ShlAmt
- ShAmt
);
1606 auto *NewShl
= BinaryOperator::Create(Instruction::Shl
, X
, ShiftDiff
);
1607 NewShl
->setHasNoSignedWrap(true);
1612 if (match(Op0
, m_AShr(m_Value(X
), m_APInt(ShOp1
))) &&
1613 ShOp1
->ult(BitWidth
)) {
1614 unsigned AmtSum
= ShAmt
+ ShOp1
->getZExtValue();
1615 // Oversized arithmetic shifts replicate the sign bit.
1616 AmtSum
= std::min(AmtSum
, BitWidth
- 1);
1617 // (X >>s C1) >>s C2 --> X >>s (C1 + C2)
1618 return BinaryOperator::CreateAShr(X
, ConstantInt::get(Ty
, AmtSum
));
1621 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
)))) &&
1622 (Ty
->isVectorTy() || shouldChangeType(Ty
, X
->getType()))) {
1623 // ashr (sext X), C --> sext (ashr X, C')
1624 Type
*SrcTy
= X
->getType();
1625 ShAmt
= std::min(ShAmt
, SrcTy
->getScalarSizeInBits() - 1);
1626 Value
*NewSh
= Builder
.CreateAShr(X
, ConstantInt::get(SrcTy
, ShAmt
));
1627 return new SExtInst(NewSh
, Ty
);
1630 if (ShAmt
== BitWidth
- 1) {
1631 // ashr i32 or(X,-X), 31 --> sext (X != 0)
1632 if (match(Op0
, m_OneUse(m_c_Or(m_Neg(m_Value(X
)), m_Deferred(X
)))))
1633 return new SExtInst(Builder
.CreateIsNotNull(X
), Ty
);
1635 // ashr i32 (X -nsw Y), 31 --> sext (X < Y)
1637 if (match(Op0
, m_OneUse(m_NSWSub(m_Value(X
), m_Value(Y
)))))
1638 return new SExtInst(Builder
.CreateICmpSLT(X
, Y
), Ty
);
1642 const SimplifyQuery Q
= SQ
.getWithInstruction(&I
);
1643 if (setShiftFlags(I
, Q
))
1646 // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
1647 // as the pattern to splat the lowest bit.
1648 // FIXME: iff X is already masked, we don't need the one-use check.
1650 if (match(Op1
, m_SpecificIntAllowUndef(BitWidth
- 1)) &&
1651 match(Op0
, m_OneUse(m_Shl(m_Value(X
),
1652 m_SpecificIntAllowUndef(BitWidth
- 1))))) {
1653 Constant
*Mask
= ConstantInt::get(Ty
, 1);
1654 // Retain the knowledge about the ignored lanes.
1655 Mask
= Constant::mergeUndefsWith(
1656 Constant::mergeUndefsWith(Mask
, cast
<Constant
>(Op1
)),
1657 cast
<Constant
>(cast
<Instruction
>(Op0
)->getOperand(1)));
1658 X
= Builder
.CreateAnd(X
, Mask
);
1659 return BinaryOperator::CreateNeg(X
);
1662 if (Instruction
*R
= foldVariableSignZeroExtensionOfVariableHighBitExtract(I
))
1665 // See if we can turn a signed shr into an unsigned shr.
1666 if (MaskedValueIsZero(Op0
, APInt::getSignMask(BitWidth
), 0, &I
)) {
1667 Instruction
*Lshr
= BinaryOperator::CreateLShr(Op0
, Op1
);
1668 Lshr
->setIsExact(I
.isExact());
1672 // ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
1673 if (match(Op0
, m_OneUse(m_Not(m_Value(X
))))) {
1674 // Note that we must drop 'exact'-ness of the shift!
1675 // Note that we can't keep undef's in -1 vector constant!
1676 auto *NewAShr
= Builder
.CreateAShr(X
, Op1
, Op0
->getName() + ".not");
1677 return BinaryOperator::CreateNot(NewAShr
);