1 //===- InstCombineCasts.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visit functions for cast operations.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/Analysis/ConstantFolding.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Support/KnownBits.h"
20 #include "llvm/Transforms/InstCombine/InstCombiner.h"
24 using namespace PatternMatch
;
26 #define DEBUG_TYPE "instcombine"
28 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29 /// true for, actually insert the code to evaluate the expression.
30 Value
*InstCombinerImpl::EvaluateInDifferentType(Value
*V
, Type
*Ty
,
32 if (Constant
*C
= dyn_cast
<Constant
>(V
))
33 return ConstantFoldIntegerCast(C
, Ty
, isSigned
, DL
);
35 // Otherwise, it must be an instruction.
36 Instruction
*I
= cast
<Instruction
>(V
);
37 Instruction
*Res
= nullptr;
38 unsigned Opc
= I
->getOpcode();
40 case Instruction::Add
:
41 case Instruction::Sub
:
42 case Instruction::Mul
:
43 case Instruction::And
:
45 case Instruction::Xor
:
46 case Instruction::AShr
:
47 case Instruction::LShr
:
48 case Instruction::Shl
:
49 case Instruction::UDiv
:
50 case Instruction::URem
: {
51 Value
*LHS
= EvaluateInDifferentType(I
->getOperand(0), Ty
, isSigned
);
52 Value
*RHS
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
53 Res
= BinaryOperator::Create((Instruction::BinaryOps
)Opc
, LHS
, RHS
);
56 case Instruction::Trunc
:
57 case Instruction::ZExt
:
58 case Instruction::SExt
:
59 // If the source type of the cast is the type we're trying for then we can
60 // just return the source. There's no need to insert it because it is not
62 if (I
->getOperand(0)->getType() == Ty
)
63 return I
->getOperand(0);
65 // Otherwise, must be the same type of cast, so just reinsert a new one.
66 // This also handles the case of zext(trunc(x)) -> zext(x).
67 Res
= CastInst::CreateIntegerCast(I
->getOperand(0), Ty
,
68 Opc
== Instruction::SExt
);
70 case Instruction::Select
: {
71 Value
*True
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
72 Value
*False
= EvaluateInDifferentType(I
->getOperand(2), Ty
, isSigned
);
73 Res
= SelectInst::Create(I
->getOperand(0), True
, False
);
76 case Instruction::PHI
: {
77 PHINode
*OPN
= cast
<PHINode
>(I
);
78 PHINode
*NPN
= PHINode::Create(Ty
, OPN
->getNumIncomingValues());
79 for (unsigned i
= 0, e
= OPN
->getNumIncomingValues(); i
!= e
; ++i
) {
81 EvaluateInDifferentType(OPN
->getIncomingValue(i
), Ty
, isSigned
);
82 NPN
->addIncoming(V
, OPN
->getIncomingBlock(i
));
87 case Instruction::FPToUI
:
88 case Instruction::FPToSI
:
89 Res
= CastInst::Create(
90 static_cast<Instruction::CastOps
>(Opc
), I
->getOperand(0), Ty
);
92 case Instruction::Call
:
93 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
94 switch (II
->getIntrinsicID()) {
96 llvm_unreachable("Unsupported call!");
97 case Intrinsic::vscale
: {
99 Intrinsic::getDeclaration(I
->getModule(), Intrinsic::vscale
, {Ty
});
100 Res
= CallInst::Create(Fn
->getFunctionType(), Fn
);
107 // TODO: Can handle more cases here.
108 llvm_unreachable("Unreachable!");
112 return InsertNewInstWith(Res
, I
->getIterator());
116 InstCombinerImpl::isEliminableCastPair(const CastInst
*CI1
,
117 const CastInst
*CI2
) {
118 Type
*SrcTy
= CI1
->getSrcTy();
119 Type
*MidTy
= CI1
->getDestTy();
120 Type
*DstTy
= CI2
->getDestTy();
122 Instruction::CastOps firstOp
= CI1
->getOpcode();
123 Instruction::CastOps secondOp
= CI2
->getOpcode();
125 SrcTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(SrcTy
) : nullptr;
127 MidTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(MidTy
) : nullptr;
129 DstTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(DstTy
) : nullptr;
130 unsigned Res
= CastInst::isEliminableCastPair(firstOp
, secondOp
, SrcTy
, MidTy
,
131 DstTy
, SrcIntPtrTy
, MidIntPtrTy
,
134 // We don't want to form an inttoptr or ptrtoint that converts to an integer
135 // type that differs from the pointer size.
136 if ((Res
== Instruction::IntToPtr
&& SrcTy
!= DstIntPtrTy
) ||
137 (Res
== Instruction::PtrToInt
&& DstTy
!= SrcIntPtrTy
))
140 return Instruction::CastOps(Res
);
143 /// Implement the transforms common to all CastInst visitors.
144 Instruction
*InstCombinerImpl::commonCastTransforms(CastInst
&CI
) {
145 Value
*Src
= CI
.getOperand(0);
146 Type
*Ty
= CI
.getType();
148 if (auto *SrcC
= dyn_cast
<Constant
>(Src
))
149 if (Constant
*Res
= ConstantFoldCastOperand(CI
.getOpcode(), SrcC
, Ty
, DL
))
150 return replaceInstUsesWith(CI
, Res
);
152 // Try to eliminate a cast of a cast.
153 if (auto *CSrc
= dyn_cast
<CastInst
>(Src
)) { // A->B->C cast
154 if (Instruction::CastOps NewOpc
= isEliminableCastPair(CSrc
, &CI
)) {
155 // The first cast (CSrc) is eliminable so we need to fix up or replace
156 // the second cast (CI). CSrc will then have a good chance of being dead.
157 auto *Res
= CastInst::Create(NewOpc
, CSrc
->getOperand(0), Ty
);
158 // Point debug users of the dying cast to the new one.
159 if (CSrc
->hasOneUse())
160 replaceAllDbgUsesWith(*CSrc
, *Res
, CI
, DT
);
165 if (auto *Sel
= dyn_cast
<SelectInst
>(Src
)) {
166 // We are casting a select. Try to fold the cast into the select if the
167 // select does not have a compare instruction with matching operand types
168 // or the select is likely better done in a narrow type.
169 // Creating a select with operands that are different sizes than its
170 // condition may inhibit other folds and lead to worse codegen.
171 auto *Cmp
= dyn_cast
<CmpInst
>(Sel
->getCondition());
172 if (!Cmp
|| Cmp
->getOperand(0)->getType() != Sel
->getType() ||
173 (CI
.getOpcode() == Instruction::Trunc
&&
174 shouldChangeType(CI
.getSrcTy(), CI
.getType()))) {
175 if (Instruction
*NV
= FoldOpIntoSelect(CI
, Sel
)) {
176 replaceAllDbgUsesWith(*Sel
, *NV
, CI
, DT
);
182 // If we are casting a PHI, then fold the cast into the PHI.
183 if (auto *PN
= dyn_cast
<PHINode
>(Src
)) {
184 // Don't do this if it would create a PHI node with an illegal type from a
186 if (!Src
->getType()->isIntegerTy() || !CI
.getType()->isIntegerTy() ||
187 shouldChangeType(CI
.getSrcTy(), CI
.getType()))
188 if (Instruction
*NV
= foldOpIntoPhi(CI
, PN
))
192 // Canonicalize a unary shuffle after the cast if neither operation changes
193 // the size or element size of the input vector.
194 // TODO: We could allow size-changing ops if that doesn't harm codegen.
195 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
198 if (match(Src
, m_OneUse(m_Shuffle(m_Value(X
), m_Undef(), m_Mask(Mask
))))) {
199 // TODO: Allow scalable vectors?
200 auto *SrcTy
= dyn_cast
<FixedVectorType
>(X
->getType());
201 auto *DestTy
= dyn_cast
<FixedVectorType
>(Ty
);
202 if (SrcTy
&& DestTy
&&
203 SrcTy
->getNumElements() == DestTy
->getNumElements() &&
204 SrcTy
->getPrimitiveSizeInBits() == DestTy
->getPrimitiveSizeInBits()) {
205 Value
*CastX
= Builder
.CreateCast(CI
.getOpcode(), X
, DestTy
);
206 return new ShuffleVectorInst(CastX
, Mask
);
213 /// Constants and extensions/truncates from the destination type are always
214 /// free to be evaluated in that type. This is a helper for canEvaluate*.
215 static bool canAlwaysEvaluateInType(Value
*V
, Type
*Ty
) {
216 if (isa
<Constant
>(V
))
217 return match(V
, m_ImmConstant());
220 if ((match(V
, m_ZExtOrSExt(m_Value(X
))) || match(V
, m_Trunc(m_Value(X
)))) &&
227 /// Filter out values that we can not evaluate in the destination type for free.
228 /// This is a helper for canEvaluate*.
229 static bool canNotEvaluateInType(Value
*V
, Type
*Ty
) {
230 if (!isa
<Instruction
>(V
))
232 // We don't extend or shrink something that has multiple uses -- doing so
233 // would require duplicating the instruction which isn't profitable.
240 /// Return true if we can evaluate the specified expression tree as type Ty
241 /// instead of its larger type, and arrive with the same value.
242 /// This is used by code that tries to eliminate truncates.
244 /// Ty will always be a type smaller than V. We should return true if trunc(V)
245 /// can be computed by computing V in the smaller type. If V is an instruction,
246 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
247 /// makes sense if x and y can be efficiently truncated.
249 /// This function works on both vectors and scalars.
251 static bool canEvaluateTruncated(Value
*V
, Type
*Ty
, InstCombinerImpl
&IC
,
253 if (canAlwaysEvaluateInType(V
, Ty
))
255 if (canNotEvaluateInType(V
, Ty
))
258 auto *I
= cast
<Instruction
>(V
);
259 Type
*OrigTy
= V
->getType();
260 switch (I
->getOpcode()) {
261 case Instruction::Add
:
262 case Instruction::Sub
:
263 case Instruction::Mul
:
264 case Instruction::And
:
265 case Instruction::Or
:
266 case Instruction::Xor
:
267 // These operators can all arbitrarily be extended or truncated.
268 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
269 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
271 case Instruction::UDiv
:
272 case Instruction::URem
: {
273 // UDiv and URem can be truncated if all the truncated bits are zero.
274 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
275 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
276 assert(BitWidth
< OrigBitWidth
&& "Unexpected bitwidths!");
277 APInt Mask
= APInt::getBitsSetFrom(OrigBitWidth
, BitWidth
);
278 if (IC
.MaskedValueIsZero(I
->getOperand(0), Mask
, 0, CxtI
) &&
279 IC
.MaskedValueIsZero(I
->getOperand(1), Mask
, 0, CxtI
)) {
280 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
281 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
285 case Instruction::Shl
: {
286 // If we are truncating the result of this SHL, and if it's a shift of an
287 // inrange amount, we can always perform a SHL in a smaller type.
288 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
289 KnownBits AmtKnownBits
=
290 llvm::computeKnownBits(I
->getOperand(1), IC
.getDataLayout());
291 if (AmtKnownBits
.getMaxValue().ult(BitWidth
))
292 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
293 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
296 case Instruction::LShr
: {
297 // If this is a truncate of a logical shr, we can truncate it to a smaller
298 // lshr iff we know that the bits we would otherwise be shifting in are
300 // TODO: It is enough to check that the bits we would be shifting in are
301 // zero - use AmtKnownBits.getMaxValue().
302 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
303 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
304 KnownBits AmtKnownBits
=
305 llvm::computeKnownBits(I
->getOperand(1), IC
.getDataLayout());
306 APInt ShiftedBits
= APInt::getBitsSetFrom(OrigBitWidth
, BitWidth
);
307 if (AmtKnownBits
.getMaxValue().ult(BitWidth
) &&
308 IC
.MaskedValueIsZero(I
->getOperand(0), ShiftedBits
, 0, CxtI
)) {
309 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
310 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
314 case Instruction::AShr
: {
315 // If this is a truncate of an arithmetic shr, we can truncate it to a
316 // smaller ashr iff we know that all the bits from the sign bit of the
317 // original type and the sign bit of the truncate type are similar.
318 // TODO: It is enough to check that the bits we would be shifting in are
319 // similar to sign bit of the truncate type.
320 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
321 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
322 KnownBits AmtKnownBits
=
323 llvm::computeKnownBits(I
->getOperand(1), IC
.getDataLayout());
324 unsigned ShiftedBits
= OrigBitWidth
- BitWidth
;
325 if (AmtKnownBits
.getMaxValue().ult(BitWidth
) &&
326 ShiftedBits
< IC
.ComputeNumSignBits(I
->getOperand(0), 0, CxtI
))
327 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
328 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
331 case Instruction::Trunc
:
332 // trunc(trunc(x)) -> trunc(x)
334 case Instruction::ZExt
:
335 case Instruction::SExt
:
336 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
337 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
339 case Instruction::Select
: {
340 SelectInst
*SI
= cast
<SelectInst
>(I
);
341 return canEvaluateTruncated(SI
->getTrueValue(), Ty
, IC
, CxtI
) &&
342 canEvaluateTruncated(SI
->getFalseValue(), Ty
, IC
, CxtI
);
344 case Instruction::PHI
: {
345 // We can change a phi if we can change all operands. Note that we never
346 // get into trouble with cyclic PHIs here because we only consider
347 // instructions with a single use.
348 PHINode
*PN
= cast
<PHINode
>(I
);
349 for (Value
*IncValue
: PN
->incoming_values())
350 if (!canEvaluateTruncated(IncValue
, Ty
, IC
, CxtI
))
354 case Instruction::FPToUI
:
355 case Instruction::FPToSI
: {
356 // If the integer type can hold the max FP value, it is safe to cast
357 // directly to that type. Otherwise, we may create poison via overflow
358 // that did not exist in the original code.
359 Type
*InputTy
= I
->getOperand(0)->getType()->getScalarType();
360 const fltSemantics
&Semantics
= InputTy
->getFltSemantics();
361 uint32_t MinBitWidth
=
362 APFloatBase::semanticsIntSizeInBits(Semantics
,
363 I
->getOpcode() == Instruction::FPToSI
);
364 return Ty
->getScalarSizeInBits() >= MinBitWidth
;
367 // TODO: Can handle more cases here.
374 /// Given a vector that is bitcast to an integer, optionally logically
375 /// right-shifted, and truncated, convert it to an extractelement.
376 /// Example (big endian):
377 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
379 /// extractelement <4 x i32> %X, 1
380 static Instruction
*foldVecTruncToExtElt(TruncInst
&Trunc
,
381 InstCombinerImpl
&IC
) {
382 Value
*TruncOp
= Trunc
.getOperand(0);
383 Type
*DestType
= Trunc
.getType();
384 if (!TruncOp
->hasOneUse() || !isa
<IntegerType
>(DestType
))
387 Value
*VecInput
= nullptr;
388 ConstantInt
*ShiftVal
= nullptr;
389 if (!match(TruncOp
, m_CombineOr(m_BitCast(m_Value(VecInput
)),
390 m_LShr(m_BitCast(m_Value(VecInput
)),
391 m_ConstantInt(ShiftVal
)))) ||
392 !isa
<VectorType
>(VecInput
->getType()))
395 VectorType
*VecType
= cast
<VectorType
>(VecInput
->getType());
396 unsigned VecWidth
= VecType
->getPrimitiveSizeInBits();
397 unsigned DestWidth
= DestType
->getPrimitiveSizeInBits();
398 unsigned ShiftAmount
= ShiftVal
? ShiftVal
->getZExtValue() : 0;
400 if ((VecWidth
% DestWidth
!= 0) || (ShiftAmount
% DestWidth
!= 0))
403 // If the element type of the vector doesn't match the result type,
404 // bitcast it to a vector type that we can extract from.
405 unsigned NumVecElts
= VecWidth
/ DestWidth
;
406 if (VecType
->getElementType() != DestType
) {
407 VecType
= FixedVectorType::get(DestType
, NumVecElts
);
408 VecInput
= IC
.Builder
.CreateBitCast(VecInput
, VecType
, "bc");
411 unsigned Elt
= ShiftAmount
/ DestWidth
;
412 if (IC
.getDataLayout().isBigEndian())
413 Elt
= NumVecElts
- 1 - Elt
;
415 return ExtractElementInst::Create(VecInput
, IC
.Builder
.getInt32(Elt
));
418 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
419 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
420 Instruction
*InstCombinerImpl::narrowFunnelShift(TruncInst
&Trunc
) {
421 assert((isa
<VectorType
>(Trunc
.getSrcTy()) ||
422 shouldChangeType(Trunc
.getSrcTy(), Trunc
.getType())) &&
423 "Don't narrow to an illegal scalar type");
425 // Bail out on strange types. It is possible to handle some of these patterns
426 // even with non-power-of-2 sizes, but it is not a likely scenario.
427 Type
*DestTy
= Trunc
.getType();
428 unsigned NarrowWidth
= DestTy
->getScalarSizeInBits();
429 unsigned WideWidth
= Trunc
.getSrcTy()->getScalarSizeInBits();
430 if (!isPowerOf2_32(NarrowWidth
))
433 // First, find an or'd pair of opposite shifts:
434 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
435 BinaryOperator
*Or0
, *Or1
;
436 if (!match(Trunc
.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0
), m_BinOp(Or1
)))))
439 Value
*ShVal0
, *ShVal1
, *ShAmt0
, *ShAmt1
;
440 if (!match(Or0
, m_OneUse(m_LogicalShift(m_Value(ShVal0
), m_Value(ShAmt0
)))) ||
441 !match(Or1
, m_OneUse(m_LogicalShift(m_Value(ShVal1
), m_Value(ShAmt1
)))) ||
442 Or0
->getOpcode() == Or1
->getOpcode())
445 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
446 if (Or0
->getOpcode() == BinaryOperator::LShr
) {
448 std::swap(ShVal0
, ShVal1
);
449 std::swap(ShAmt0
, ShAmt1
);
451 assert(Or0
->getOpcode() == BinaryOperator::Shl
&&
452 Or1
->getOpcode() == BinaryOperator::LShr
&&
453 "Illegal or(shift,shift) pair");
455 // Match the shift amount operands for a funnel/rotate pattern. This always
456 // matches a subtraction on the R operand.
457 auto matchShiftAmount
= [&](Value
*L
, Value
*R
, unsigned Width
) -> Value
* {
458 // The shift amounts may add up to the narrow bit width:
459 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
460 // If this is a funnel shift (different operands are shifted), then the
461 // shift amount can not over-shift (create poison) in the narrow type.
462 unsigned MaxShiftAmountWidth
= Log2_32(NarrowWidth
);
463 APInt HiBitMask
= ~APInt::getLowBitsSet(WideWidth
, MaxShiftAmountWidth
);
464 if (ShVal0
== ShVal1
|| MaskedValueIsZero(L
, HiBitMask
))
465 if (match(R
, m_OneUse(m_Sub(m_SpecificInt(Width
), m_Specific(L
)))))
468 // The following patterns currently only work for rotation patterns.
469 // TODO: Add more general funnel-shift compatible patterns.
470 if (ShVal0
!= ShVal1
)
473 // The shift amount may be masked with negation:
474 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
476 unsigned Mask
= Width
- 1;
477 if (match(L
, m_And(m_Value(X
), m_SpecificInt(Mask
))) &&
478 match(R
, m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
))))
481 // Same as above, but the shift amount may be extended after masking:
482 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
483 match(R
, m_ZExt(m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
)))))
489 Value
*ShAmt
= matchShiftAmount(ShAmt0
, ShAmt1
, NarrowWidth
);
490 bool IsFshl
= true; // Sub on LSHR.
492 ShAmt
= matchShiftAmount(ShAmt1
, ShAmt0
, NarrowWidth
);
493 IsFshl
= false; // Sub on SHL.
498 // The right-shifted value must have high zeros in the wide type (for example
499 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
500 // truncated, so those do not matter.
501 APInt HiBitMask
= APInt::getHighBitsSet(WideWidth
, WideWidth
- NarrowWidth
);
502 if (!MaskedValueIsZero(ShVal1
, HiBitMask
, 0, &Trunc
))
505 // We have an unnecessarily wide rotate!
506 // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
507 // Narrow the inputs and convert to funnel shift intrinsic:
508 // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
509 Value
*NarrowShAmt
= Builder
.CreateTrunc(ShAmt
, DestTy
);
511 X
= Y
= Builder
.CreateTrunc(ShVal0
, DestTy
);
512 if (ShVal0
!= ShVal1
)
513 Y
= Builder
.CreateTrunc(ShVal1
, DestTy
);
514 Intrinsic::ID IID
= IsFshl
? Intrinsic::fshl
: Intrinsic::fshr
;
515 Function
*F
= Intrinsic::getDeclaration(Trunc
.getModule(), IID
, DestTy
);
516 return CallInst::Create(F
, {X
, Y
, NarrowShAmt
});
519 /// Try to narrow the width of math or bitwise logic instructions by pulling a
520 /// truncate ahead of binary operators.
521 Instruction
*InstCombinerImpl::narrowBinOp(TruncInst
&Trunc
) {
522 Type
*SrcTy
= Trunc
.getSrcTy();
523 Type
*DestTy
= Trunc
.getType();
524 unsigned SrcWidth
= SrcTy
->getScalarSizeInBits();
525 unsigned DestWidth
= DestTy
->getScalarSizeInBits();
527 if (!isa
<VectorType
>(SrcTy
) && !shouldChangeType(SrcTy
, DestTy
))
530 BinaryOperator
*BinOp
;
531 if (!match(Trunc
.getOperand(0), m_OneUse(m_BinOp(BinOp
))))
534 Value
*BinOp0
= BinOp
->getOperand(0);
535 Value
*BinOp1
= BinOp
->getOperand(1);
536 switch (BinOp
->getOpcode()) {
537 case Instruction::And
:
538 case Instruction::Or
:
539 case Instruction::Xor
:
540 case Instruction::Add
:
541 case Instruction::Sub
:
542 case Instruction::Mul
: {
544 if (match(BinOp0
, m_Constant(C
))) {
545 // trunc (binop C, X) --> binop (trunc C', X)
546 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, DestTy
);
547 Value
*TruncX
= Builder
.CreateTrunc(BinOp1
, DestTy
);
548 return BinaryOperator::Create(BinOp
->getOpcode(), NarrowC
, TruncX
);
550 if (match(BinOp1
, m_Constant(C
))) {
551 // trunc (binop X, C) --> binop (trunc X, C')
552 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, DestTy
);
553 Value
*TruncX
= Builder
.CreateTrunc(BinOp0
, DestTy
);
554 return BinaryOperator::Create(BinOp
->getOpcode(), TruncX
, NarrowC
);
557 if (match(BinOp0
, m_ZExtOrSExt(m_Value(X
))) && X
->getType() == DestTy
) {
558 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
559 Value
*NarrowOp1
= Builder
.CreateTrunc(BinOp1
, DestTy
);
560 return BinaryOperator::Create(BinOp
->getOpcode(), X
, NarrowOp1
);
562 if (match(BinOp1
, m_ZExtOrSExt(m_Value(X
))) && X
->getType() == DestTy
) {
563 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
564 Value
*NarrowOp0
= Builder
.CreateTrunc(BinOp0
, DestTy
);
565 return BinaryOperator::Create(BinOp
->getOpcode(), NarrowOp0
, X
);
569 case Instruction::LShr
:
570 case Instruction::AShr
: {
571 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
574 if (match(BinOp0
, m_Trunc(m_Value(A
))) && match(BinOp1
, m_Constant(C
))) {
575 unsigned MaxShiftAmt
= SrcWidth
- DestWidth
;
576 // If the shift is small enough, all zero/sign bits created by the shift
577 // are removed by the trunc.
578 if (match(C
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE
,
579 APInt(SrcWidth
, MaxShiftAmt
)))) {
580 auto *OldShift
= cast
<Instruction
>(Trunc
.getOperand(0));
581 bool IsExact
= OldShift
->isExact();
582 if (Constant
*ShAmt
= ConstantFoldIntegerCast(C
, A
->getType(),
583 /*IsSigned*/ true, DL
)) {
584 ShAmt
= Constant::mergeUndefsWith(ShAmt
, C
);
586 OldShift
->getOpcode() == Instruction::AShr
587 ? Builder
.CreateAShr(A
, ShAmt
, OldShift
->getName(), IsExact
)
588 : Builder
.CreateLShr(A
, ShAmt
, OldShift
->getName(), IsExact
);
589 return CastInst::CreateTruncOrBitCast(Shift
, DestTy
);
598 if (Instruction
*NarrowOr
= narrowFunnelShift(Trunc
))
604 /// Try to narrow the width of a splat shuffle. This could be generalized to any
605 /// shuffle with a constant operand, but we limit the transform to avoid
606 /// creating a shuffle type that targets may not be able to lower effectively.
607 static Instruction
*shrinkSplatShuffle(TruncInst
&Trunc
,
608 InstCombiner::BuilderTy
&Builder
) {
609 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Trunc
.getOperand(0));
610 if (Shuf
&& Shuf
->hasOneUse() && match(Shuf
->getOperand(1), m_Undef()) &&
611 all_equal(Shuf
->getShuffleMask()) &&
612 Shuf
->getType() == Shuf
->getOperand(0)->getType()) {
613 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
614 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
615 Value
*NarrowOp
= Builder
.CreateTrunc(Shuf
->getOperand(0), Trunc
.getType());
616 return new ShuffleVectorInst(NarrowOp
, Shuf
->getShuffleMask());
622 /// Try to narrow the width of an insert element. This could be generalized for
623 /// any vector constant, but we limit the transform to insertion into undef to
624 /// avoid potential backend problems from unsupported insertion widths. This
625 /// could also be extended to handle the case of inserting a scalar constant
626 /// into a vector variable.
627 static Instruction
*shrinkInsertElt(CastInst
&Trunc
,
628 InstCombiner::BuilderTy
&Builder
) {
629 Instruction::CastOps Opcode
= Trunc
.getOpcode();
630 assert((Opcode
== Instruction::Trunc
|| Opcode
== Instruction::FPTrunc
) &&
631 "Unexpected instruction for shrinking");
633 auto *InsElt
= dyn_cast
<InsertElementInst
>(Trunc
.getOperand(0));
634 if (!InsElt
|| !InsElt
->hasOneUse())
637 Type
*DestTy
= Trunc
.getType();
638 Type
*DestScalarTy
= DestTy
->getScalarType();
639 Value
*VecOp
= InsElt
->getOperand(0);
640 Value
*ScalarOp
= InsElt
->getOperand(1);
641 Value
*Index
= InsElt
->getOperand(2);
643 if (match(VecOp
, m_Undef())) {
644 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
645 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
646 UndefValue
*NarrowUndef
= UndefValue::get(DestTy
);
647 Value
*NarrowOp
= Builder
.CreateCast(Opcode
, ScalarOp
, DestScalarTy
);
648 return InsertElementInst::Create(NarrowUndef
, NarrowOp
, Index
);
654 Instruction
*InstCombinerImpl::visitTrunc(TruncInst
&Trunc
) {
655 if (Instruction
*Result
= commonCastTransforms(Trunc
))
658 Value
*Src
= Trunc
.getOperand(0);
659 Type
*DestTy
= Trunc
.getType(), *SrcTy
= Src
->getType();
660 unsigned DestWidth
= DestTy
->getScalarSizeInBits();
661 unsigned SrcWidth
= SrcTy
->getScalarSizeInBits();
663 // Attempt to truncate the entire input expression tree to the destination
664 // type. Only do this if the dest type is a simple type, don't convert the
665 // expression tree to something weird like i93 unless the source is also
667 if ((DestTy
->isVectorTy() || shouldChangeType(SrcTy
, DestTy
)) &&
668 canEvaluateTruncated(Src
, DestTy
, *this, &Trunc
)) {
670 // If this cast is a truncate, evaluting in a different type always
671 // eliminates the cast, so it is always a win.
673 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
676 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
677 assert(Res
->getType() == DestTy
);
678 return replaceInstUsesWith(Trunc
, Res
);
681 // For integer types, check if we can shorten the entire input expression to
682 // DestWidth * 2, which won't allow removing the truncate, but reducing the
683 // width may enable further optimizations, e.g. allowing for larger
684 // vectorization factors.
685 if (auto *DestITy
= dyn_cast
<IntegerType
>(DestTy
)) {
686 if (DestWidth
* 2 < SrcWidth
) {
687 auto *NewDestTy
= DestITy
->getExtendedType();
688 if (shouldChangeType(SrcTy
, NewDestTy
) &&
689 canEvaluateTruncated(Src
, NewDestTy
, *this, &Trunc
)) {
691 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
692 " to reduce the width of operand of"
694 Value
*Res
= EvaluateInDifferentType(Src
, NewDestTy
, false);
695 return new TruncInst(Res
, DestTy
);
700 // Test if the trunc is the user of a select which is part of a
701 // minimum or maximum operation. If so, don't do any more simplification.
702 // Even simplifying demanded bits can break the canonical form of a
705 if (SelectInst
*Sel
= dyn_cast
<SelectInst
>(Src
))
706 if (matchSelectPattern(Sel
, LHS
, RHS
).Flavor
!= SPF_UNKNOWN
)
709 // See if we can simplify any instructions used by the input whose sole
710 // purpose is to compute bits we don't care about.
711 if (SimplifyDemandedInstructionBits(Trunc
))
714 if (DestWidth
== 1) {
715 Value
*Zero
= Constant::getNullValue(SrcTy
);
716 if (DestTy
->isIntegerTy()) {
717 // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
718 // TODO: We canonicalize to more instructions here because we are probably
719 // lacking equivalent analysis for trunc relative to icmp. There may also
720 // be codegen concerns. If those trunc limitations were removed, we could
721 // remove this transform.
722 Value
*And
= Builder
.CreateAnd(Src
, ConstantInt::get(SrcTy
, 1));
723 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
726 // For vectors, we do not canonicalize all truncs to icmp, so optimize
727 // patterns that would be covered within visitICmpInst.
730 if (match(Src
, m_OneUse(m_LShr(m_Value(X
), m_Constant(C
))))) {
731 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
732 Constant
*One
= ConstantInt::get(SrcTy
, APInt(SrcWidth
, 1));
733 Constant
*MaskC
= ConstantExpr::getShl(One
, C
);
734 Value
*And
= Builder
.CreateAnd(X
, MaskC
);
735 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
737 if (match(Src
, m_OneUse(m_c_Or(m_LShr(m_Value(X
), m_ImmConstant(C
)),
739 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
740 Constant
*One
= ConstantInt::get(SrcTy
, APInt(SrcWidth
, 1));
741 Constant
*MaskC
= ConstantExpr::getShl(One
, C
);
742 Value
*And
= Builder
.CreateAnd(X
, Builder
.CreateOr(MaskC
, One
));
743 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
749 if (match(Src
, m_LShr(m_SExt(m_Value(A
)), m_Constant(C
)))) {
750 unsigned AWidth
= A
->getType()->getScalarSizeInBits();
751 unsigned MaxShiftAmt
= SrcWidth
- std::max(DestWidth
, AWidth
);
752 auto *OldSh
= cast
<Instruction
>(Src
);
753 bool IsExact
= OldSh
->isExact();
755 // If the shift is small enough, all zero bits created by the shift are
756 // removed by the trunc.
757 if (match(C
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE
,
758 APInt(SrcWidth
, MaxShiftAmt
)))) {
759 auto GetNewShAmt
= [&](unsigned Width
) {
760 Constant
*MaxAmt
= ConstantInt::get(SrcTy
, Width
- 1, false);
762 ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT
, C
, MaxAmt
, DL
);
763 Constant
*ShAmt
= ConstantFoldSelectInstruction(Cmp
, C
, MaxAmt
);
764 return ConstantFoldCastOperand(Instruction::Trunc
, ShAmt
, A
->getType(),
768 // trunc (lshr (sext A), C) --> ashr A, C
769 if (A
->getType() == DestTy
) {
770 Constant
*ShAmt
= GetNewShAmt(DestWidth
);
771 ShAmt
= Constant::mergeUndefsWith(ShAmt
, C
);
772 return IsExact
? BinaryOperator::CreateExactAShr(A
, ShAmt
)
773 : BinaryOperator::CreateAShr(A
, ShAmt
);
775 // The types are mismatched, so create a cast after shifting:
776 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
777 if (Src
->hasOneUse()) {
778 Constant
*ShAmt
= GetNewShAmt(AWidth
);
779 Value
*Shift
= Builder
.CreateAShr(A
, ShAmt
, "", IsExact
);
780 return CastInst::CreateIntegerCast(Shift
, DestTy
, true);
783 // TODO: Mask high bits with 'and'.
786 if (Instruction
*I
= narrowBinOp(Trunc
))
789 if (Instruction
*I
= shrinkSplatShuffle(Trunc
, Builder
))
792 if (Instruction
*I
= shrinkInsertElt(Trunc
, Builder
))
795 if (Src
->hasOneUse() &&
796 (isa
<VectorType
>(SrcTy
) || shouldChangeType(SrcTy
, DestTy
))) {
797 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
798 // dest type is native and cst < dest size.
799 if (match(Src
, m_Shl(m_Value(A
), m_Constant(C
))) &&
800 !match(A
, m_Shr(m_Value(), m_Constant()))) {
801 // Skip shifts of shift by constants. It undoes a combine in
802 // FoldShiftByConstant and is the extend in reg pattern.
803 APInt Threshold
= APInt(C
->getType()->getScalarSizeInBits(), DestWidth
);
804 if (match(C
, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT
, Threshold
))) {
805 Value
*NewTrunc
= Builder
.CreateTrunc(A
, DestTy
, A
->getName() + ".tr");
806 return BinaryOperator::Create(Instruction::Shl
, NewTrunc
,
807 ConstantExpr::getTrunc(C
, DestTy
));
812 if (Instruction
*I
= foldVecTruncToExtElt(Trunc
, *this))
815 // Whenever an element is extracted from a vector, and then truncated,
816 // canonicalize by converting it to a bitcast followed by an
819 // Example (little endian):
820 // trunc (extractelement <4 x i64> %X, 0) to i32
822 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
825 if (match(Src
, m_OneUse(m_ExtractElt(m_Value(VecOp
), m_ConstantInt(Cst
))))) {
826 auto *VecOpTy
= cast
<VectorType
>(VecOp
->getType());
827 auto VecElts
= VecOpTy
->getElementCount();
829 // A badly fit destination size would result in an invalid cast.
830 if (SrcWidth
% DestWidth
== 0) {
831 uint64_t TruncRatio
= SrcWidth
/ DestWidth
;
832 uint64_t BitCastNumElts
= VecElts
.getKnownMinValue() * TruncRatio
;
833 uint64_t VecOpIdx
= Cst
->getZExtValue();
834 uint64_t NewIdx
= DL
.isBigEndian() ? (VecOpIdx
+ 1) * TruncRatio
- 1
835 : VecOpIdx
* TruncRatio
;
836 assert(BitCastNumElts
<= std::numeric_limits
<uint32_t>::max() &&
840 VectorType::get(DestTy
, BitCastNumElts
, VecElts
.isScalable());
841 Value
*BitCast
= Builder
.CreateBitCast(VecOp
, BitCastTo
);
842 return ExtractElementInst::Create(BitCast
, Builder
.getInt32(NewIdx
));
846 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
847 if (match(Src
, m_OneUse(m_Intrinsic
<Intrinsic::ctlz
>(m_ZExt(m_Value(A
)),
849 unsigned AWidth
= A
->getType()->getScalarSizeInBits();
850 if (AWidth
== DestWidth
&& AWidth
> Log2_32(SrcWidth
)) {
851 Value
*WidthDiff
= ConstantInt::get(A
->getType(), SrcWidth
- AWidth
);
853 Builder
.CreateIntrinsic(Intrinsic::ctlz
, {Trunc
.getType()}, {A
, B
});
854 return BinaryOperator::CreateAdd(NarrowCtlz
, WidthDiff
);
858 if (match(Src
, m_VScale())) {
859 if (Trunc
.getFunction() &&
860 Trunc
.getFunction()->hasFnAttribute(Attribute::VScaleRange
)) {
862 Trunc
.getFunction()->getFnAttribute(Attribute::VScaleRange
);
863 if (std::optional
<unsigned> MaxVScale
= Attr
.getVScaleRangeMax()) {
864 if (Log2_32(*MaxVScale
) < DestWidth
) {
865 Value
*VScale
= Builder
.CreateVScale(ConstantInt::get(DestTy
, 1));
866 return replaceInstUsesWith(Trunc
, VScale
);
875 Instruction
*InstCombinerImpl::transformZExtICmp(ICmpInst
*Cmp
,
877 // If we are just checking for a icmp eq of a single bit and zext'ing it
878 // to an integer, then shift the bit to the appropriate place and then
879 // cast to integer to avoid the comparison.
881 // FIXME: This set of transforms does not check for extra uses and/or creates
882 // an extra instruction (an optional final cast is not included
883 // in the transform comments). We may also want to favor icmp over
884 // shifts in cases of equal instructions because icmp has better
885 // analysis in general (invert the transform).
888 if (match(Cmp
->getOperand(1), m_APInt(Op1CV
))) {
890 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
891 if (Cmp
->getPredicate() == ICmpInst::ICMP_SLT
&& Op1CV
->isZero()) {
892 Value
*In
= Cmp
->getOperand(0);
893 Value
*Sh
= ConstantInt::get(In
->getType(),
894 In
->getType()->getScalarSizeInBits() - 1);
895 In
= Builder
.CreateLShr(In
, Sh
, In
->getName() + ".lobit");
896 if (In
->getType() != Zext
.getType())
897 In
= Builder
.CreateIntCast(In
, Zext
.getType(), false /*ZExt*/);
899 return replaceInstUsesWith(Zext
, In
);
902 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
903 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
904 // zext (X != 0) to i32 --> X iff X has only the low bit set.
905 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
907 if (Op1CV
->isZero() && Cmp
->isEquality()) {
908 // Exactly 1 possible 1? But not the high-bit because that is
909 // canonicalized to this form.
910 KnownBits Known
= computeKnownBits(Cmp
->getOperand(0), 0, &Zext
);
911 APInt
KnownZeroMask(~Known
.Zero
);
912 uint32_t ShAmt
= KnownZeroMask
.logBase2();
913 bool IsExpectShAmt
= KnownZeroMask
.isPowerOf2() &&
914 (Zext
.getType()->getScalarSizeInBits() != ShAmt
+ 1);
916 (Cmp
->getOperand(0)->getType() == Zext
.getType() ||
917 Cmp
->getPredicate() == ICmpInst::ICMP_NE
|| ShAmt
== 0)) {
918 Value
*In
= Cmp
->getOperand(0);
920 // Perform a logical shr by shiftamt.
921 // Insert the shift to put the result in the low bit.
922 In
= Builder
.CreateLShr(In
, ConstantInt::get(In
->getType(), ShAmt
),
923 In
->getName() + ".lobit");
926 // Toggle the low bit for "X == 0".
927 if (Cmp
->getPredicate() == ICmpInst::ICMP_EQ
)
928 In
= Builder
.CreateXor(In
, ConstantInt::get(In
->getType(), 1));
930 if (Zext
.getType() == In
->getType())
931 return replaceInstUsesWith(Zext
, In
);
933 Value
*IntCast
= Builder
.CreateIntCast(In
, Zext
.getType(), false);
934 return replaceInstUsesWith(Zext
, IntCast
);
939 if (Cmp
->isEquality() && Zext
.getType() == Cmp
->getOperand(0)->getType()) {
940 // Test if a bit is clear/set using a shifted-one mask:
941 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
942 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
944 if (Cmp
->hasOneUse() && match(Cmp
->getOperand(1), m_ZeroInt()) &&
945 match(Cmp
->getOperand(0),
946 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt
)), m_Value(X
))))) {
947 if (Cmp
->getPredicate() == ICmpInst::ICMP_EQ
)
948 X
= Builder
.CreateNot(X
);
949 Value
*Lshr
= Builder
.CreateLShr(X
, ShAmt
);
950 Value
*And1
= Builder
.CreateAnd(Lshr
, ConstantInt::get(X
->getType(), 1));
951 return replaceInstUsesWith(Zext
, And1
);
958 /// Determine if the specified value can be computed in the specified wider type
959 /// and produce the same low bits. If not, return false.
961 /// If this function returns true, it can also return a non-zero number of bits
962 /// (in BitsToClear) which indicates that the value it computes is correct for
963 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
964 /// out. For example, to promote something like:
966 /// %B = trunc i64 %A to i32
967 /// %C = lshr i32 %B, 8
968 /// %E = zext i32 %C to i64
970 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
971 /// set to 8 to indicate that the promoted value needs to have bits 24-31
972 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
973 /// clear the top bits anyway, doing this has no extra cost.
975 /// This function works on both vectors and scalars.
976 static bool canEvaluateZExtd(Value
*V
, Type
*Ty
, unsigned &BitsToClear
,
977 InstCombinerImpl
&IC
, Instruction
*CxtI
) {
979 if (canAlwaysEvaluateInType(V
, Ty
))
981 if (canNotEvaluateInType(V
, Ty
))
984 auto *I
= cast
<Instruction
>(V
);
986 switch (I
->getOpcode()) {
987 case Instruction::ZExt
: // zext(zext(x)) -> zext(x).
988 case Instruction::SExt
: // zext(sext(x)) -> sext(x).
989 case Instruction::Trunc
: // zext(trunc(x)) -> trunc(x) or zext(x)
991 case Instruction::And
:
992 case Instruction::Or
:
993 case Instruction::Xor
:
994 case Instruction::Add
:
995 case Instruction::Sub
:
996 case Instruction::Mul
:
997 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
) ||
998 !canEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
, IC
, CxtI
))
1000 // These can all be promoted if neither operand has 'bits to clear'.
1001 if (BitsToClear
== 0 && Tmp
== 0)
1004 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1005 // other side, BitsToClear is ok.
1006 if (Tmp
== 0 && I
->isBitwiseLogicOp()) {
1007 // We use MaskedValueIsZero here for generality, but the case we care
1008 // about the most is constant RHS.
1009 unsigned VSize
= V
->getType()->getScalarSizeInBits();
1010 if (IC
.MaskedValueIsZero(I
->getOperand(1),
1011 APInt::getHighBitsSet(VSize
, BitsToClear
),
1013 // If this is an And instruction and all of the BitsToClear are
1014 // known to be zero we can reset BitsToClear.
1015 if (I
->getOpcode() == Instruction::And
)
1021 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1024 case Instruction::Shl
: {
1025 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1026 // upper bits we can reduce BitsToClear by the shift amount.
1028 if (match(I
->getOperand(1), m_APInt(Amt
))) {
1029 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
))
1031 uint64_t ShiftAmt
= Amt
->getZExtValue();
1032 BitsToClear
= ShiftAmt
< BitsToClear
? BitsToClear
- ShiftAmt
: 0;
1037 case Instruction::LShr
: {
1038 // We can promote lshr(x, cst) if we can promote x. This requires the
1039 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1041 if (match(I
->getOperand(1), m_APInt(Amt
))) {
1042 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
))
1044 BitsToClear
+= Amt
->getZExtValue();
1045 if (BitsToClear
> V
->getType()->getScalarSizeInBits())
1046 BitsToClear
= V
->getType()->getScalarSizeInBits();
1049 // Cannot promote variable LSHR.
1052 case Instruction::Select
:
1053 if (!canEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
, IC
, CxtI
) ||
1054 !canEvaluateZExtd(I
->getOperand(2), Ty
, BitsToClear
, IC
, CxtI
) ||
1055 // TODO: If important, we could handle the case when the BitsToClear are
1056 // known zero in the disagreeing side.
1061 case Instruction::PHI
: {
1062 // We can change a phi if we can change all operands. Note that we never
1063 // get into trouble with cyclic PHIs here because we only consider
1064 // instructions with a single use.
1065 PHINode
*PN
= cast
<PHINode
>(I
);
1066 if (!canEvaluateZExtd(PN
->getIncomingValue(0), Ty
, BitsToClear
, IC
, CxtI
))
1068 for (unsigned i
= 1, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1069 if (!canEvaluateZExtd(PN
->getIncomingValue(i
), Ty
, Tmp
, IC
, CxtI
) ||
1070 // TODO: If important, we could handle the case when the BitsToClear
1071 // are known zero in the disagreeing input.
1076 case Instruction::Call
:
1077 // llvm.vscale() can always be executed in larger type, because the
1078 // value is automatically zero-extended.
1079 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
1080 if (II
->getIntrinsicID() == Intrinsic::vscale
)
1084 // TODO: Can handle more cases here.
1089 Instruction
*InstCombinerImpl::visitZExt(ZExtInst
&Zext
) {
1090 // If this zero extend is only used by a truncate, let the truncate be
1091 // eliminated before we try to optimize this zext.
1092 if (Zext
.hasOneUse() && isa
<TruncInst
>(Zext
.user_back()) &&
1093 !isa
<Constant
>(Zext
.getOperand(0)))
1096 // If one of the common conversion will work, do it.
1097 if (Instruction
*Result
= commonCastTransforms(Zext
))
1100 Value
*Src
= Zext
.getOperand(0);
1101 Type
*SrcTy
= Src
->getType(), *DestTy
= Zext
.getType();
1103 // Try to extend the entire expression tree to the wide destination type.
1104 unsigned BitsToClear
;
1105 if (shouldChangeType(SrcTy
, DestTy
) &&
1106 canEvaluateZExtd(Src
, DestTy
, BitsToClear
, *this, &Zext
)) {
1107 assert(BitsToClear
<= SrcTy
->getScalarSizeInBits() &&
1108 "Can't clear more bits than in SrcTy");
1110 // Okay, we can transform this! Insert the new expression now.
1112 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1113 " to avoid zero extend: "
1115 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
1116 assert(Res
->getType() == DestTy
);
1118 // Preserve debug values referring to Src if the zext is its last use.
1119 if (auto *SrcOp
= dyn_cast
<Instruction
>(Src
))
1120 if (SrcOp
->hasOneUse())
1121 replaceAllDbgUsesWith(*SrcOp
, *Res
, Zext
, DT
);
1123 uint32_t SrcBitsKept
= SrcTy
->getScalarSizeInBits() - BitsToClear
;
1124 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1126 // If the high bits are already filled with zeros, just replace this
1127 // cast with the result.
1128 if (MaskedValueIsZero(Res
,
1129 APInt::getHighBitsSet(DestBitSize
,
1130 DestBitSize
- SrcBitsKept
),
1132 return replaceInstUsesWith(Zext
, Res
);
1134 // We need to emit an AND to clear the high bits.
1135 Constant
*C
= ConstantInt::get(Res
->getType(),
1136 APInt::getLowBitsSet(DestBitSize
, SrcBitsKept
));
1137 return BinaryOperator::CreateAnd(Res
, C
);
1140 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1141 // types and if the sizes are just right we can convert this into a logical
1142 // 'and' which will be much cheaper than the pair of casts.
1143 if (auto *CSrc
= dyn_cast
<TruncInst
>(Src
)) { // A->B->C cast
1144 // TODO: Subsume this into EvaluateInDifferentType.
1146 // Get the sizes of the types involved. We know that the intermediate type
1147 // will be smaller than A or C, but don't know the relation between A and C.
1148 Value
*A
= CSrc
->getOperand(0);
1149 unsigned SrcSize
= A
->getType()->getScalarSizeInBits();
1150 unsigned MidSize
= CSrc
->getType()->getScalarSizeInBits();
1151 unsigned DstSize
= DestTy
->getScalarSizeInBits();
1152 // If we're actually extending zero bits, then if
1153 // SrcSize < DstSize: zext(a & mask)
1154 // SrcSize == DstSize: a & mask
1155 // SrcSize > DstSize: trunc(a) & mask
1156 if (SrcSize
< DstSize
) {
1157 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
1158 Constant
*AndConst
= ConstantInt::get(A
->getType(), AndValue
);
1159 Value
*And
= Builder
.CreateAnd(A
, AndConst
, CSrc
->getName() + ".mask");
1160 return new ZExtInst(And
, DestTy
);
1163 if (SrcSize
== DstSize
) {
1164 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
1165 return BinaryOperator::CreateAnd(A
, ConstantInt::get(A
->getType(),
1168 if (SrcSize
> DstSize
) {
1169 Value
*Trunc
= Builder
.CreateTrunc(A
, DestTy
);
1170 APInt
AndValue(APInt::getLowBitsSet(DstSize
, MidSize
));
1171 return BinaryOperator::CreateAnd(Trunc
,
1172 ConstantInt::get(Trunc
->getType(),
1177 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Src
))
1178 return transformZExtICmp(Cmp
, Zext
);
1180 // zext(trunc(X) & C) -> (X & zext(C)).
1183 if (match(Src
, m_OneUse(m_And(m_Trunc(m_Value(X
)), m_Constant(C
)))) &&
1184 X
->getType() == DestTy
)
1185 return BinaryOperator::CreateAnd(X
, Builder
.CreateZExt(C
, DestTy
));
1187 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1189 if (match(Src
, m_OneUse(m_Xor(m_Value(And
), m_Constant(C
)))) &&
1190 match(And
, m_OneUse(m_And(m_Trunc(m_Value(X
)), m_Specific(C
)))) &&
1191 X
->getType() == DestTy
) {
1192 Value
*ZC
= Builder
.CreateZExt(C
, DestTy
);
1193 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, ZC
), ZC
);
1196 // If we are truncating, masking, and then zexting back to the original type,
1197 // that's just a mask. This is not handled by canEvaluateZextd if the
1198 // intermediate values have extra uses. This could be generalized further for
1199 // a non-constant mask operand.
1200 // zext (and (trunc X), C) --> and X, (zext C)
1201 if (match(Src
, m_And(m_Trunc(m_Value(X
)), m_Constant(C
))) &&
1202 X
->getType() == DestTy
) {
1203 Value
*ZextC
= Builder
.CreateZExt(C
, DestTy
);
1204 return BinaryOperator::CreateAnd(X
, ZextC
);
1207 if (match(Src
, m_VScale())) {
1208 if (Zext
.getFunction() &&
1209 Zext
.getFunction()->hasFnAttribute(Attribute::VScaleRange
)) {
1211 Zext
.getFunction()->getFnAttribute(Attribute::VScaleRange
);
1212 if (std::optional
<unsigned> MaxVScale
= Attr
.getVScaleRangeMax()) {
1213 unsigned TypeWidth
= Src
->getType()->getScalarSizeInBits();
1214 if (Log2_32(*MaxVScale
) < TypeWidth
) {
1215 Value
*VScale
= Builder
.CreateVScale(ConstantInt::get(DestTy
, 1));
1216 return replaceInstUsesWith(Zext
, VScale
);
1225 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1226 Instruction
*InstCombinerImpl::transformSExtICmp(ICmpInst
*Cmp
,
1228 Value
*Op0
= Cmp
->getOperand(0), *Op1
= Cmp
->getOperand(1);
1229 ICmpInst::Predicate Pred
= Cmp
->getPredicate();
1231 // Don't bother if Op1 isn't of vector or integer type.
1232 if (!Op1
->getType()->isIntOrIntVectorTy())
1235 if (Pred
== ICmpInst::ICMP_SLT
&& match(Op1
, m_ZeroInt())) {
1236 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1237 Value
*Sh
= ConstantInt::get(Op0
->getType(),
1238 Op0
->getType()->getScalarSizeInBits() - 1);
1239 Value
*In
= Builder
.CreateAShr(Op0
, Sh
, Op0
->getName() + ".lobit");
1240 if (In
->getType() != Sext
.getType())
1241 In
= Builder
.CreateIntCast(In
, Sext
.getType(), true /*SExt*/);
1243 return replaceInstUsesWith(Sext
, In
);
1246 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
1247 // If we know that only one bit of the LHS of the icmp can be set and we
1248 // have an equality comparison with zero or a power of 2, we can transform
1249 // the icmp and sext into bitwise/integer operations.
1250 if (Cmp
->hasOneUse() &&
1251 Cmp
->isEquality() && (Op1C
->isZero() || Op1C
->getValue().isPowerOf2())){
1252 KnownBits Known
= computeKnownBits(Op0
, 0, &Sext
);
1254 APInt
KnownZeroMask(~Known
.Zero
);
1255 if (KnownZeroMask
.isPowerOf2()) {
1256 Value
*In
= Cmp
->getOperand(0);
1258 // If the icmp tests for a known zero bit we can constant fold it.
1259 if (!Op1C
->isZero() && Op1C
->getValue() != KnownZeroMask
) {
1260 Value
*V
= Pred
== ICmpInst::ICMP_NE
?
1261 ConstantInt::getAllOnesValue(Sext
.getType()) :
1262 ConstantInt::getNullValue(Sext
.getType());
1263 return replaceInstUsesWith(Sext
, V
);
1266 if (!Op1C
->isZero() == (Pred
== ICmpInst::ICMP_NE
)) {
1267 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1268 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1269 unsigned ShiftAmt
= KnownZeroMask
.countr_zero();
1270 // Perform a right shift to place the desired bit in the LSB.
1272 In
= Builder
.CreateLShr(In
,
1273 ConstantInt::get(In
->getType(), ShiftAmt
));
1275 // At this point "In" is either 1 or 0. Subtract 1 to turn
1276 // {1, 0} -> {0, -1}.
1277 In
= Builder
.CreateAdd(In
,
1278 ConstantInt::getAllOnesValue(In
->getType()),
1281 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1282 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1283 unsigned ShiftAmt
= KnownZeroMask
.countl_zero();
1284 // Perform a left shift to place the desired bit in the MSB.
1286 In
= Builder
.CreateShl(In
,
1287 ConstantInt::get(In
->getType(), ShiftAmt
));
1289 // Distribute the bit over the whole bit width.
1290 In
= Builder
.CreateAShr(In
, ConstantInt::get(In
->getType(),
1291 KnownZeroMask
.getBitWidth() - 1), "sext");
1294 if (Sext
.getType() == In
->getType())
1295 return replaceInstUsesWith(Sext
, In
);
1296 return CastInst::CreateIntegerCast(In
, Sext
.getType(), true/*SExt*/);
1304 /// Return true if we can take the specified value and return it as type Ty
1305 /// without inserting any new casts and without changing the value of the common
1306 /// low bits. This is used by code that tries to promote integer operations to
1307 /// a wider types will allow us to eliminate the extension.
1309 /// This function works on both vectors and scalars.
1311 static bool canEvaluateSExtd(Value
*V
, Type
*Ty
) {
1312 assert(V
->getType()->getScalarSizeInBits() < Ty
->getScalarSizeInBits() &&
1313 "Can't sign extend type to a smaller type");
1314 if (canAlwaysEvaluateInType(V
, Ty
))
1316 if (canNotEvaluateInType(V
, Ty
))
1319 auto *I
= cast
<Instruction
>(V
);
1320 switch (I
->getOpcode()) {
1321 case Instruction::SExt
: // sext(sext(x)) -> sext(x)
1322 case Instruction::ZExt
: // sext(zext(x)) -> zext(x)
1323 case Instruction::Trunc
: // sext(trunc(x)) -> trunc(x) or sext(x)
1325 case Instruction::And
:
1326 case Instruction::Or
:
1327 case Instruction::Xor
:
1328 case Instruction::Add
:
1329 case Instruction::Sub
:
1330 case Instruction::Mul
:
1331 // These operators can all arbitrarily be extended if their inputs can.
1332 return canEvaluateSExtd(I
->getOperand(0), Ty
) &&
1333 canEvaluateSExtd(I
->getOperand(1), Ty
);
1335 //case Instruction::Shl: TODO
1336 //case Instruction::LShr: TODO
1338 case Instruction::Select
:
1339 return canEvaluateSExtd(I
->getOperand(1), Ty
) &&
1340 canEvaluateSExtd(I
->getOperand(2), Ty
);
1342 case Instruction::PHI
: {
1343 // We can change a phi if we can change all operands. Note that we never
1344 // get into trouble with cyclic PHIs here because we only consider
1345 // instructions with a single use.
1346 PHINode
*PN
= cast
<PHINode
>(I
);
1347 for (Value
*IncValue
: PN
->incoming_values())
1348 if (!canEvaluateSExtd(IncValue
, Ty
)) return false;
1352 // TODO: Can handle more cases here.
1359 Instruction
*InstCombinerImpl::visitSExt(SExtInst
&Sext
) {
1360 // If this sign extend is only used by a truncate, let the truncate be
1361 // eliminated before we try to optimize this sext.
1362 if (Sext
.hasOneUse() && isa
<TruncInst
>(Sext
.user_back()))
1365 if (Instruction
*I
= commonCastTransforms(Sext
))
1368 Value
*Src
= Sext
.getOperand(0);
1369 Type
*SrcTy
= Src
->getType(), *DestTy
= Sext
.getType();
1370 unsigned SrcBitSize
= SrcTy
->getScalarSizeInBits();
1371 unsigned DestBitSize
= DestTy
->getScalarSizeInBits();
1373 // If the value being extended is zero or positive, use a zext instead.
1374 if (isKnownNonNegative(Src
, DL
, 0, &AC
, &Sext
, &DT
)) {
1375 auto CI
= CastInst::Create(Instruction::ZExt
, Src
, DestTy
);
1376 CI
->setNonNeg(true);
1380 // Try to extend the entire expression tree to the wide destination type.
1381 if (shouldChangeType(SrcTy
, DestTy
) && canEvaluateSExtd(Src
, DestTy
)) {
1382 // Okay, we can transform this! Insert the new expression now.
1384 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1385 " to avoid sign extend: "
1387 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, true);
1388 assert(Res
->getType() == DestTy
);
1390 // If the high bits are already filled with sign bit, just replace this
1391 // cast with the result.
1392 if (ComputeNumSignBits(Res
, 0, &Sext
) > DestBitSize
- SrcBitSize
)
1393 return replaceInstUsesWith(Sext
, Res
);
1395 // We need to emit a shl + ashr to do the sign extend.
1396 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1397 return BinaryOperator::CreateAShr(Builder
.CreateShl(Res
, ShAmt
, "sext"),
1402 if (match(Src
, m_Trunc(m_Value(X
)))) {
1403 // If the input has more sign bits than bits truncated, then convert
1404 // directly to final type.
1405 unsigned XBitSize
= X
->getType()->getScalarSizeInBits();
1406 if (ComputeNumSignBits(X
, 0, &Sext
) > XBitSize
- SrcBitSize
)
1407 return CastInst::CreateIntegerCast(X
, DestTy
, /* isSigned */ true);
1409 // If input is a trunc from the destination type, then convert into shifts.
1410 if (Src
->hasOneUse() && X
->getType() == DestTy
) {
1411 // sext (trunc X) --> ashr (shl X, C), C
1412 Constant
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
- SrcBitSize
);
1413 return BinaryOperator::CreateAShr(Builder
.CreateShl(X
, ShAmt
), ShAmt
);
1416 // If we are replacing shifted-in high zero bits with sign bits, convert
1417 // the logic shift to arithmetic shift and eliminate the cast to
1418 // intermediate type:
1419 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1421 if (Src
->hasOneUse() &&
1422 match(X
, m_LShr(m_Value(Y
),
1423 m_SpecificIntAllowUndef(XBitSize
- SrcBitSize
)))) {
1424 Value
*Ashr
= Builder
.CreateAShr(Y
, XBitSize
- SrcBitSize
);
1425 return CastInst::CreateIntegerCast(Ashr
, DestTy
, /* isSigned */ true);
1429 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Src
))
1430 return transformSExtICmp(Cmp
, Sext
);
1432 // If the input is a shl/ashr pair of a same constant, then this is a sign
1433 // extension from a smaller value. If we could trust arbitrary bitwidth
1434 // integers, we could turn this into a truncate to the smaller bit and then
1435 // use a sext for the whole extension. Since we don't, look deeper and check
1436 // for a truncate. If the source and dest are the same type, eliminate the
1437 // trunc and extend and just do shifts. For example, turn:
1438 // %a = trunc i32 %i to i8
1439 // %b = shl i8 %a, C
1440 // %c = ashr i8 %b, C
1441 // %d = sext i8 %c to i32
1443 // %a = shl i32 %i, 32-(8-C)
1444 // %d = ashr i32 %a, 32-(8-C)
1446 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1447 Constant
*BA
= nullptr, *CA
= nullptr;
1448 if (match(Src
, m_AShr(m_Shl(m_Trunc(m_Value(A
)), m_Constant(BA
)),
1449 m_ImmConstant(CA
))) &&
1450 BA
->isElementWiseEqual(CA
) && A
->getType() == DestTy
) {
1451 Constant
*WideCurrShAmt
=
1452 ConstantFoldCastOperand(Instruction::SExt
, CA
, DestTy
, DL
);
1453 assert(WideCurrShAmt
&& "Constant folding of ImmConstant cannot fail");
1454 Constant
*NumLowbitsLeft
= ConstantExpr::getSub(
1455 ConstantInt::get(DestTy
, SrcTy
->getScalarSizeInBits()), WideCurrShAmt
);
1456 Constant
*NewShAmt
= ConstantExpr::getSub(
1457 ConstantInt::get(DestTy
, DestTy
->getScalarSizeInBits()),
1460 Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt
, BA
), CA
);
1461 A
= Builder
.CreateShl(A
, NewShAmt
, Sext
.getName());
1462 return BinaryOperator::CreateAShr(A
, NewShAmt
);
1465 // Splatting a bit of constant-index across a value:
1466 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1467 // If the dest type is different, use a cast (adjust use check).
1468 if (match(Src
, m_OneUse(m_AShr(m_Trunc(m_Value(X
)),
1469 m_SpecificInt(SrcBitSize
- 1))))) {
1470 Type
*XTy
= X
->getType();
1471 unsigned XBitSize
= XTy
->getScalarSizeInBits();
1472 Constant
*ShlAmtC
= ConstantInt::get(XTy
, XBitSize
- SrcBitSize
);
1473 Constant
*AshrAmtC
= ConstantInt::get(XTy
, XBitSize
- 1);
1475 return BinaryOperator::CreateAShr(Builder
.CreateShl(X
, ShlAmtC
),
1477 if (cast
<BinaryOperator
>(Src
)->getOperand(0)->hasOneUse()) {
1478 Value
*Ashr
= Builder
.CreateAShr(Builder
.CreateShl(X
, ShlAmtC
), AshrAmtC
);
1479 return CastInst::CreateIntegerCast(Ashr
, DestTy
, /* isSigned */ true);
1483 if (match(Src
, m_VScale())) {
1484 if (Sext
.getFunction() &&
1485 Sext
.getFunction()->hasFnAttribute(Attribute::VScaleRange
)) {
1487 Sext
.getFunction()->getFnAttribute(Attribute::VScaleRange
);
1488 if (std::optional
<unsigned> MaxVScale
= Attr
.getVScaleRangeMax()) {
1489 if (Log2_32(*MaxVScale
) < (SrcBitSize
- 1)) {
1490 Value
*VScale
= Builder
.CreateVScale(ConstantInt::get(DestTy
, 1));
1491 return replaceInstUsesWith(Sext
, VScale
);
1500 /// Return a Constant* for the specified floating-point constant if it fits
1501 /// in the specified FP type without changing its value.
1502 static bool fitsInFPType(ConstantFP
*CFP
, const fltSemantics
&Sem
) {
1504 APFloat F
= CFP
->getValueAPF();
1505 (void)F
.convert(Sem
, APFloat::rmNearestTiesToEven
, &losesInfo
);
1509 static Type
*shrinkFPConstant(ConstantFP
*CFP
) {
1510 if (CFP
->getType() == Type::getPPC_FP128Ty(CFP
->getContext()))
1511 return nullptr; // No constant folding of this.
1512 // See if the value can be truncated to half and then reextended.
1513 if (fitsInFPType(CFP
, APFloat::IEEEhalf()))
1514 return Type::getHalfTy(CFP
->getContext());
1515 // See if the value can be truncated to float and then reextended.
1516 if (fitsInFPType(CFP
, APFloat::IEEEsingle()))
1517 return Type::getFloatTy(CFP
->getContext());
1518 if (CFP
->getType()->isDoubleTy())
1519 return nullptr; // Won't shrink.
1520 if (fitsInFPType(CFP
, APFloat::IEEEdouble()))
1521 return Type::getDoubleTy(CFP
->getContext());
1522 // Don't try to shrink to various long double types.
1526 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1527 // type we can safely truncate all elements to.
1528 static Type
*shrinkFPConstantVector(Value
*V
) {
1529 auto *CV
= dyn_cast
<Constant
>(V
);
1530 auto *CVVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
1534 Type
*MinType
= nullptr;
1536 unsigned NumElts
= CVVTy
->getNumElements();
1538 // For fixed-width vectors we find the minimal type by looking
1539 // through the constant values of the vector.
1540 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1541 if (isa
<UndefValue
>(CV
->getAggregateElement(i
)))
1544 auto *CFP
= dyn_cast_or_null
<ConstantFP
>(CV
->getAggregateElement(i
));
1548 Type
*T
= shrinkFPConstant(CFP
);
1552 // If we haven't found a type yet or this type has a larger mantissa than
1553 // our previous type, this is our new minimal type.
1554 if (!MinType
|| T
->getFPMantissaWidth() > MinType
->getFPMantissaWidth())
1558 // Make a vector type from the minimal type.
1559 return MinType
? FixedVectorType::get(MinType
, NumElts
) : nullptr;
1562 /// Find the minimum FP type we can safely truncate to.
1563 static Type
*getMinimumFPType(Value
*V
) {
1564 if (auto *FPExt
= dyn_cast
<FPExtInst
>(V
))
1565 return FPExt
->getOperand(0)->getType();
1567 // If this value is a constant, return the constant in the smallest FP type
1568 // that can accurately represent it. This allows us to turn
1569 // (float)((double)X+2.0) into x+2.0f.
1570 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
))
1571 if (Type
*T
= shrinkFPConstant(CFP
))
1574 // We can only correctly find a minimum type for a scalable vector when it is
1575 // a splat. For splats of constant values the fpext is wrapped up as a
1577 if (auto *FPCExt
= dyn_cast
<ConstantExpr
>(V
))
1578 if (FPCExt
->getOpcode() == Instruction::FPExt
)
1579 return FPCExt
->getOperand(0)->getType();
1581 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1583 if (Type
*T
= shrinkFPConstantVector(V
))
1586 return V
->getType();
1589 /// Return true if the cast from integer to FP can be proven to be exact for all
1590 /// possible inputs (the conversion does not lose any precision).
1591 static bool isKnownExactCastIntToFP(CastInst
&I
, InstCombinerImpl
&IC
) {
1592 CastInst::CastOps Opcode
= I
.getOpcode();
1593 assert((Opcode
== CastInst::SIToFP
|| Opcode
== CastInst::UIToFP
) &&
1595 Value
*Src
= I
.getOperand(0);
1596 Type
*SrcTy
= Src
->getType();
1597 Type
*FPTy
= I
.getType();
1598 bool IsSigned
= Opcode
== Instruction::SIToFP
;
1599 int SrcSize
= (int)SrcTy
->getScalarSizeInBits() - IsSigned
;
1601 // Easy case - if the source integer type has less bits than the FP mantissa,
1602 // then the cast must be exact.
1603 int DestNumSigBits
= FPTy
->getFPMantissaWidth();
1604 if (SrcSize
<= DestNumSigBits
)
1607 // Cast from FP to integer and back to FP is independent of the intermediate
1608 // integer width because of poison on overflow.
1610 if (match(Src
, m_FPToSI(m_Value(F
))) || match(Src
, m_FPToUI(m_Value(F
)))) {
1611 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1612 // potential rounding of negative FP input values.
1613 int SrcNumSigBits
= F
->getType()->getFPMantissaWidth();
1614 if (!IsSigned
&& match(Src
, m_FPToSI(m_Value())))
1617 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1618 // significant bits than the destination (and make sure neither type is
1619 // weird -- ppc_fp128).
1620 if (SrcNumSigBits
> 0 && DestNumSigBits
> 0 &&
1621 SrcNumSigBits
<= DestNumSigBits
)
1626 // Try harder to find if the source integer type has less significant bits.
1627 // For example, compute number of sign bits.
1628 KnownBits SrcKnown
= IC
.computeKnownBits(Src
, 0, &I
);
1629 int SigBits
= (int)SrcTy
->getScalarSizeInBits() -
1630 SrcKnown
.countMinLeadingZeros() -
1631 SrcKnown
.countMinTrailingZeros();
1632 if (SigBits
<= DestNumSigBits
)
1638 Instruction
*InstCombinerImpl::visitFPTrunc(FPTruncInst
&FPT
) {
1639 if (Instruction
*I
= commonCastTransforms(FPT
))
1642 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1643 // simplify this expression to avoid one or more of the trunc/extend
1644 // operations if we can do so without changing the numerical results.
1646 // The exact manner in which the widths of the operands interact to limit
1647 // what we can and cannot do safely varies from operation to operation, and
1648 // is explained below in the various case statements.
1649 Type
*Ty
= FPT
.getType();
1650 auto *BO
= dyn_cast
<BinaryOperator
>(FPT
.getOperand(0));
1651 if (BO
&& BO
->hasOneUse()) {
1652 Type
*LHSMinType
= getMinimumFPType(BO
->getOperand(0));
1653 Type
*RHSMinType
= getMinimumFPType(BO
->getOperand(1));
1654 unsigned OpWidth
= BO
->getType()->getFPMantissaWidth();
1655 unsigned LHSWidth
= LHSMinType
->getFPMantissaWidth();
1656 unsigned RHSWidth
= RHSMinType
->getFPMantissaWidth();
1657 unsigned SrcWidth
= std::max(LHSWidth
, RHSWidth
);
1658 unsigned DstWidth
= Ty
->getFPMantissaWidth();
1659 switch (BO
->getOpcode()) {
1661 case Instruction::FAdd
:
1662 case Instruction::FSub
:
1663 // For addition and subtraction, the infinitely precise result can
1664 // essentially be arbitrarily wide; proving that double rounding
1665 // will not occur because the result of OpI is exact (as we will for
1666 // FMul, for example) is hopeless. However, we *can* nonetheless
1667 // frequently know that double rounding cannot occur (or that it is
1668 // innocuous) by taking advantage of the specific structure of
1669 // infinitely-precise results that admit double rounding.
1671 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1672 // to represent both sources, we can guarantee that the double
1673 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1674 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1675 // for proof of this fact).
1677 // Note: Figueroa does not consider the case where DstFormat !=
1678 // SrcFormat. It's possible (likely even!) that this analysis
1679 // could be tightened for those cases, but they are rare (the main
1680 // case of interest here is (float)((double)float + float)).
1681 if (OpWidth
>= 2*DstWidth
+1 && DstWidth
>= SrcWidth
) {
1682 Value
*LHS
= Builder
.CreateFPTrunc(BO
->getOperand(0), Ty
);
1683 Value
*RHS
= Builder
.CreateFPTrunc(BO
->getOperand(1), Ty
);
1684 Instruction
*RI
= BinaryOperator::Create(BO
->getOpcode(), LHS
, RHS
);
1685 RI
->copyFastMathFlags(BO
);
1689 case Instruction::FMul
:
1690 // For multiplication, the infinitely precise result has at most
1691 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1692 // that such a value can be exactly represented, then no double
1693 // rounding can possibly occur; we can safely perform the operation
1694 // in the destination format if it can represent both sources.
1695 if (OpWidth
>= LHSWidth
+ RHSWidth
&& DstWidth
>= SrcWidth
) {
1696 Value
*LHS
= Builder
.CreateFPTrunc(BO
->getOperand(0), Ty
);
1697 Value
*RHS
= Builder
.CreateFPTrunc(BO
->getOperand(1), Ty
);
1698 return BinaryOperator::CreateFMulFMF(LHS
, RHS
, BO
);
1701 case Instruction::FDiv
:
1702 // For division, we use again use the bound from Figueroa's
1703 // dissertation. I am entirely certain that this bound can be
1704 // tightened in the unbalanced operand case by an analysis based on
1705 // the diophantine rational approximation bound, but the well-known
1706 // condition used here is a good conservative first pass.
1707 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1708 if (OpWidth
>= 2*DstWidth
&& DstWidth
>= SrcWidth
) {
1709 Value
*LHS
= Builder
.CreateFPTrunc(BO
->getOperand(0), Ty
);
1710 Value
*RHS
= Builder
.CreateFPTrunc(BO
->getOperand(1), Ty
);
1711 return BinaryOperator::CreateFDivFMF(LHS
, RHS
, BO
);
1714 case Instruction::FRem
: {
1715 // Remainder is straightforward. Remainder is always exact, so the
1716 // type of OpI doesn't enter into things at all. We simply evaluate
1717 // in whichever source type is larger, then convert to the
1718 // destination type.
1719 if (SrcWidth
== OpWidth
)
1722 if (LHSWidth
== SrcWidth
) {
1723 LHS
= Builder
.CreateFPTrunc(BO
->getOperand(0), LHSMinType
);
1724 RHS
= Builder
.CreateFPTrunc(BO
->getOperand(1), LHSMinType
);
1726 LHS
= Builder
.CreateFPTrunc(BO
->getOperand(0), RHSMinType
);
1727 RHS
= Builder
.CreateFPTrunc(BO
->getOperand(1), RHSMinType
);
1730 Value
*ExactResult
= Builder
.CreateFRemFMF(LHS
, RHS
, BO
);
1731 return CastInst::CreateFPCast(ExactResult
, Ty
);
1736 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1738 Instruction
*Op
= dyn_cast
<Instruction
>(FPT
.getOperand(0));
1739 if (Op
&& Op
->hasOneUse()) {
1740 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1741 IRBuilder
<>::FastMathFlagGuard
FMFG(Builder
);
1742 if (isa
<FPMathOperator
>(Op
))
1743 Builder
.setFastMathFlags(Op
->getFastMathFlags());
1745 if (match(Op
, m_FNeg(m_Value(X
)))) {
1746 Value
*InnerTrunc
= Builder
.CreateFPTrunc(X
, Ty
);
1748 return UnaryOperator::CreateFNegFMF(InnerTrunc
, Op
);
1751 // If we are truncating a select that has an extended operand, we can
1752 // narrow the other operand and do the select as a narrow op.
1753 Value
*Cond
, *X
, *Y
;
1754 if (match(Op
, m_Select(m_Value(Cond
), m_FPExt(m_Value(X
)), m_Value(Y
))) &&
1755 X
->getType() == Ty
) {
1756 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1757 Value
*NarrowY
= Builder
.CreateFPTrunc(Y
, Ty
);
1758 Value
*Sel
= Builder
.CreateSelect(Cond
, X
, NarrowY
, "narrow.sel", Op
);
1759 return replaceInstUsesWith(FPT
, Sel
);
1761 if (match(Op
, m_Select(m_Value(Cond
), m_Value(Y
), m_FPExt(m_Value(X
)))) &&
1762 X
->getType() == Ty
) {
1763 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1764 Value
*NarrowY
= Builder
.CreateFPTrunc(Y
, Ty
);
1765 Value
*Sel
= Builder
.CreateSelect(Cond
, NarrowY
, X
, "narrow.sel", Op
);
1766 return replaceInstUsesWith(FPT
, Sel
);
1770 if (auto *II
= dyn_cast
<IntrinsicInst
>(FPT
.getOperand(0))) {
1771 switch (II
->getIntrinsicID()) {
1773 case Intrinsic::ceil
:
1774 case Intrinsic::fabs
:
1775 case Intrinsic::floor
:
1776 case Intrinsic::nearbyint
:
1777 case Intrinsic::rint
:
1778 case Intrinsic::round
:
1779 case Intrinsic::roundeven
:
1780 case Intrinsic::trunc
: {
1781 Value
*Src
= II
->getArgOperand(0);
1782 if (!Src
->hasOneUse())
1785 // Except for fabs, this transformation requires the input of the unary FP
1786 // operation to be itself an fpext from the type to which we're
1788 if (II
->getIntrinsicID() != Intrinsic::fabs
) {
1789 FPExtInst
*FPExtSrc
= dyn_cast
<FPExtInst
>(Src
);
1790 if (!FPExtSrc
|| FPExtSrc
->getSrcTy() != Ty
)
1794 // Do unary FP operation on smaller type.
1795 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1796 Value
*InnerTrunc
= Builder
.CreateFPTrunc(Src
, Ty
);
1797 Function
*Overload
= Intrinsic::getDeclaration(FPT
.getModule(),
1798 II
->getIntrinsicID(), Ty
);
1799 SmallVector
<OperandBundleDef
, 1> OpBundles
;
1800 II
->getOperandBundlesAsDefs(OpBundles
);
1802 CallInst::Create(Overload
, {InnerTrunc
}, OpBundles
, II
->getName());
1803 NewCI
->copyFastMathFlags(II
);
1809 if (Instruction
*I
= shrinkInsertElt(FPT
, Builder
))
1812 Value
*Src
= FPT
.getOperand(0);
1813 if (isa
<SIToFPInst
>(Src
) || isa
<UIToFPInst
>(Src
)) {
1814 auto *FPCast
= cast
<CastInst
>(Src
);
1815 if (isKnownExactCastIntToFP(*FPCast
, *this))
1816 return CastInst::Create(FPCast
->getOpcode(), FPCast
->getOperand(0), Ty
);
1822 Instruction
*InstCombinerImpl::visitFPExt(CastInst
&FPExt
) {
1823 // If the source operand is a cast from integer to FP and known exact, then
1824 // cast the integer operand directly to the destination type.
1825 Type
*Ty
= FPExt
.getType();
1826 Value
*Src
= FPExt
.getOperand(0);
1827 if (isa
<SIToFPInst
>(Src
) || isa
<UIToFPInst
>(Src
)) {
1828 auto *FPCast
= cast
<CastInst
>(Src
);
1829 if (isKnownExactCastIntToFP(*FPCast
, *this))
1830 return CastInst::Create(FPCast
->getOpcode(), FPCast
->getOperand(0), Ty
);
1833 return commonCastTransforms(FPExt
);
1836 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1837 /// This is safe if the intermediate type has enough bits in its mantissa to
1838 /// accurately represent all values of X. For example, this won't work with
1839 /// i64 -> float -> i64.
1840 Instruction
*InstCombinerImpl::foldItoFPtoI(CastInst
&FI
) {
1841 if (!isa
<UIToFPInst
>(FI
.getOperand(0)) && !isa
<SIToFPInst
>(FI
.getOperand(0)))
1844 auto *OpI
= cast
<CastInst
>(FI
.getOperand(0));
1845 Value
*X
= OpI
->getOperand(0);
1846 Type
*XType
= X
->getType();
1847 Type
*DestType
= FI
.getType();
1848 bool IsOutputSigned
= isa
<FPToSIInst
>(FI
);
1850 // Since we can assume the conversion won't overflow, our decision as to
1851 // whether the input will fit in the float should depend on the minimum
1852 // of the input range and output range.
1854 // This means this is also safe for a signed input and unsigned output, since
1855 // a negative input would lead to undefined behavior.
1856 if (!isKnownExactCastIntToFP(*OpI
, *this)) {
1857 // The first cast may not round exactly based on the source integer width
1858 // and FP width, but the overflow UB rules can still allow this to fold.
1859 // If the destination type is narrow, that means the intermediate FP value
1860 // must be large enough to hold the source value exactly.
1861 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1862 int OutputSize
= (int)DestType
->getScalarSizeInBits();
1863 if (OutputSize
> OpI
->getType()->getFPMantissaWidth())
1867 if (DestType
->getScalarSizeInBits() > XType
->getScalarSizeInBits()) {
1868 bool IsInputSigned
= isa
<SIToFPInst
>(OpI
);
1869 if (IsInputSigned
&& IsOutputSigned
)
1870 return new SExtInst(X
, DestType
);
1871 return new ZExtInst(X
, DestType
);
1873 if (DestType
->getScalarSizeInBits() < XType
->getScalarSizeInBits())
1874 return new TruncInst(X
, DestType
);
1876 assert(XType
== DestType
&& "Unexpected types for int to FP to int casts");
1877 return replaceInstUsesWith(FI
, X
);
1880 Instruction
*InstCombinerImpl::visitFPToUI(FPToUIInst
&FI
) {
1881 if (Instruction
*I
= foldItoFPtoI(FI
))
1884 return commonCastTransforms(FI
);
1887 Instruction
*InstCombinerImpl::visitFPToSI(FPToSIInst
&FI
) {
1888 if (Instruction
*I
= foldItoFPtoI(FI
))
1891 return commonCastTransforms(FI
);
1894 Instruction
*InstCombinerImpl::visitUIToFP(CastInst
&CI
) {
1895 return commonCastTransforms(CI
);
1898 Instruction
*InstCombinerImpl::visitSIToFP(CastInst
&CI
) {
1899 return commonCastTransforms(CI
);
1902 Instruction
*InstCombinerImpl::visitIntToPtr(IntToPtrInst
&CI
) {
1903 // If the source integer type is not the intptr_t type for this target, do a
1904 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1905 // cast to be exposed to other transforms.
1906 unsigned AS
= CI
.getAddressSpace();
1907 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() !=
1908 DL
.getPointerSizeInBits(AS
)) {
1909 Type
*Ty
= CI
.getOperand(0)->getType()->getWithNewType(
1910 DL
.getIntPtrType(CI
.getContext(), AS
));
1911 Value
*P
= Builder
.CreateZExtOrTrunc(CI
.getOperand(0), Ty
);
1912 return new IntToPtrInst(P
, CI
.getType());
1915 if (Instruction
*I
= commonCastTransforms(CI
))
1921 Instruction
*InstCombinerImpl::visitPtrToInt(PtrToIntInst
&CI
) {
1922 // If the destination integer type is not the intptr_t type for this target,
1923 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1924 // to be exposed to other transforms.
1925 Value
*SrcOp
= CI
.getPointerOperand();
1926 Type
*SrcTy
= SrcOp
->getType();
1927 Type
*Ty
= CI
.getType();
1928 unsigned AS
= CI
.getPointerAddressSpace();
1929 unsigned TySize
= Ty
->getScalarSizeInBits();
1930 unsigned PtrSize
= DL
.getPointerSizeInBits(AS
);
1931 if (TySize
!= PtrSize
) {
1933 SrcTy
->getWithNewType(DL
.getIntPtrType(CI
.getContext(), AS
));
1934 Value
*P
= Builder
.CreatePtrToInt(SrcOp
, IntPtrTy
);
1935 return CastInst::CreateIntegerCast(P
, Ty
, /*isSigned=*/false);
1938 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(SrcOp
)) {
1939 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
1940 // While this can increase the number of instructions it doesn't actually
1941 // increase the overall complexity since the arithmetic is just part of
1942 // the GEP otherwise.
1943 if (GEP
->hasOneUse() &&
1944 isa
<ConstantPointerNull
>(GEP
->getPointerOperand())) {
1945 return replaceInstUsesWith(CI
,
1946 Builder
.CreateIntCast(EmitGEPOffset(GEP
), Ty
,
1947 /*isSigned=*/false));
1951 Value
*Vec
, *Scalar
, *Index
;
1952 if (match(SrcOp
, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec
)),
1953 m_Value(Scalar
), m_Value(Index
)))) &&
1954 Vec
->getType() == Ty
) {
1955 assert(Vec
->getType()->getScalarSizeInBits() == PtrSize
&& "Wrong type");
1956 // Convert the scalar to int followed by insert to eliminate one cast:
1957 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
1958 Value
*NewCast
= Builder
.CreatePtrToInt(Scalar
, Ty
->getScalarType());
1959 return InsertElementInst::Create(Vec
, NewCast
, Index
);
1962 return commonCastTransforms(CI
);
1965 /// This input value (which is known to have vector type) is being zero extended
1966 /// or truncated to the specified vector type. Since the zext/trunc is done
1967 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
1968 /// endianness will impact which end of the vector that is extended or
1971 /// A vector is always stored with index 0 at the lowest address, which
1972 /// corresponds to the most significant bits for a big endian stored integer and
1973 /// the least significant bits for little endian. A trunc/zext of an integer
1974 /// impacts the big end of the integer. Thus, we need to add/remove elements at
1975 /// the front of the vector for big endian targets, and the back of the vector
1976 /// for little endian targets.
1978 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
1980 /// The source and destination vector types may have different element types.
1981 static Instruction
*
1982 optimizeVectorResizeWithIntegerBitCasts(Value
*InVal
, VectorType
*DestTy
,
1983 InstCombinerImpl
&IC
) {
1984 // We can only do this optimization if the output is a multiple of the input
1985 // element size, or the input is a multiple of the output element size.
1986 // Convert the input type to have the same element type as the output.
1987 VectorType
*SrcTy
= cast
<VectorType
>(InVal
->getType());
1989 if (SrcTy
->getElementType() != DestTy
->getElementType()) {
1990 // The input types don't need to be identical, but for now they must be the
1991 // same size. There is no specific reason we couldn't handle things like
1992 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
1994 if (SrcTy
->getElementType()->getPrimitiveSizeInBits() !=
1995 DestTy
->getElementType()->getPrimitiveSizeInBits())
1999 FixedVectorType::get(DestTy
->getElementType(),
2000 cast
<FixedVectorType
>(SrcTy
)->getNumElements());
2001 InVal
= IC
.Builder
.CreateBitCast(InVal
, SrcTy
);
2004 bool IsBigEndian
= IC
.getDataLayout().isBigEndian();
2005 unsigned SrcElts
= cast
<FixedVectorType
>(SrcTy
)->getNumElements();
2006 unsigned DestElts
= cast
<FixedVectorType
>(DestTy
)->getNumElements();
2008 assert(SrcElts
!= DestElts
&& "Element counts should be different.");
2010 // Now that the element types match, get the shuffle mask and RHS of the
2011 // shuffle to use, which depends on whether we're increasing or decreasing the
2012 // size of the input.
2013 auto ShuffleMaskStorage
= llvm::to_vector
<16>(llvm::seq
<int>(0, SrcElts
));
2014 ArrayRef
<int> ShuffleMask
;
2017 if (SrcElts
> DestElts
) {
2018 // If we're shrinking the number of elements (rewriting an integer
2019 // truncate), just shuffle in the elements corresponding to the least
2020 // significant bits from the input and use poison as the second shuffle
2022 V2
= PoisonValue::get(SrcTy
);
2023 // Make sure the shuffle mask selects the "least significant bits" by
2024 // keeping elements from back of the src vector for big endian, and from the
2025 // front for little endian.
2026 ShuffleMask
= ShuffleMaskStorage
;
2028 ShuffleMask
= ShuffleMask
.take_back(DestElts
);
2030 ShuffleMask
= ShuffleMask
.take_front(DestElts
);
2032 // If we're increasing the number of elements (rewriting an integer zext),
2033 // shuffle in all of the elements from InVal. Fill the rest of the result
2034 // elements with zeros from a constant zero.
2035 V2
= Constant::getNullValue(SrcTy
);
2036 // Use first elt from V2 when indicating zero in the shuffle mask.
2037 uint32_t NullElt
= SrcElts
;
2038 // Extend with null values in the "most significant bits" by adding elements
2039 // in front of the src vector for big endian, and at the back for little
2041 unsigned DeltaElts
= DestElts
- SrcElts
;
2043 ShuffleMaskStorage
.insert(ShuffleMaskStorage
.begin(), DeltaElts
, NullElt
);
2045 ShuffleMaskStorage
.append(DeltaElts
, NullElt
);
2046 ShuffleMask
= ShuffleMaskStorage
;
2049 return new ShuffleVectorInst(InVal
, V2
, ShuffleMask
);
2052 static bool isMultipleOfTypeSize(unsigned Value
, Type
*Ty
) {
2053 return Value
% Ty
->getPrimitiveSizeInBits() == 0;
2056 static unsigned getTypeSizeIndex(unsigned Value
, Type
*Ty
) {
2057 return Value
/ Ty
->getPrimitiveSizeInBits();
2060 /// V is a value which is inserted into a vector of VecEltTy.
2061 /// Look through the value to see if we can decompose it into
2062 /// insertions into the vector. See the example in the comment for
2063 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2064 /// The type of V is always a non-zero multiple of VecEltTy's size.
2065 /// Shift is the number of bits between the lsb of V and the lsb of
2068 /// This returns false if the pattern can't be matched or true if it can,
2069 /// filling in Elements with the elements found here.
2070 static bool collectInsertionElements(Value
*V
, unsigned Shift
,
2071 SmallVectorImpl
<Value
*> &Elements
,
2072 Type
*VecEltTy
, bool isBigEndian
) {
2073 assert(isMultipleOfTypeSize(Shift
, VecEltTy
) &&
2074 "Shift should be a multiple of the element type size");
2076 // Undef values never contribute useful bits to the result.
2077 if (isa
<UndefValue
>(V
)) return true;
2079 // If we got down to a value of the right type, we win, try inserting into the
2081 if (V
->getType() == VecEltTy
) {
2082 // Inserting null doesn't actually insert any elements.
2083 if (Constant
*C
= dyn_cast
<Constant
>(V
))
2084 if (C
->isNullValue())
2087 unsigned ElementIndex
= getTypeSizeIndex(Shift
, VecEltTy
);
2089 ElementIndex
= Elements
.size() - ElementIndex
- 1;
2091 // Fail if multiple elements are inserted into this slot.
2092 if (Elements
[ElementIndex
])
2095 Elements
[ElementIndex
] = V
;
2099 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
2100 // Figure out the # elements this provides, and bitcast it or slice it up
2102 unsigned NumElts
= getTypeSizeIndex(C
->getType()->getPrimitiveSizeInBits(),
2104 // If the constant is the size of a vector element, we just need to bitcast
2105 // it to the right type so it gets properly inserted.
2107 return collectInsertionElements(ConstantExpr::getBitCast(C
, VecEltTy
),
2108 Shift
, Elements
, VecEltTy
, isBigEndian
);
2110 // Okay, this is a constant that covers multiple elements. Slice it up into
2111 // pieces and insert each element-sized piece into the vector.
2112 if (!isa
<IntegerType
>(C
->getType()))
2113 C
= ConstantExpr::getBitCast(C
, IntegerType::get(V
->getContext(),
2114 C
->getType()->getPrimitiveSizeInBits()));
2115 unsigned ElementSize
= VecEltTy
->getPrimitiveSizeInBits();
2116 Type
*ElementIntTy
= IntegerType::get(C
->getContext(), ElementSize
);
2118 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2119 unsigned ShiftI
= Shift
+i
*ElementSize
;
2120 Constant
*Piece
= ConstantExpr::getLShr(C
, ConstantInt::get(C
->getType(),
2122 Piece
= ConstantExpr::getTrunc(Piece
, ElementIntTy
);
2123 if (!collectInsertionElements(Piece
, ShiftI
, Elements
, VecEltTy
,
2130 if (!V
->hasOneUse()) return false;
2132 Instruction
*I
= dyn_cast
<Instruction
>(V
);
2133 if (!I
) return false;
2134 switch (I
->getOpcode()) {
2135 default: return false; // Unhandled case.
2136 case Instruction::BitCast
:
2137 if (I
->getOperand(0)->getType()->isVectorTy())
2139 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
2141 case Instruction::ZExt
:
2142 if (!isMultipleOfTypeSize(
2143 I
->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2146 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
2148 case Instruction::Or
:
2149 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
2151 collectInsertionElements(I
->getOperand(1), Shift
, Elements
, VecEltTy
,
2153 case Instruction::Shl
: {
2154 // Must be shifting by a constant that is a multiple of the element size.
2155 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1));
2156 if (!CI
) return false;
2157 Shift
+= CI
->getZExtValue();
2158 if (!isMultipleOfTypeSize(Shift
, VecEltTy
)) return false;
2159 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
2167 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2168 /// assemble the elements of the vector manually.
2169 /// Try to rip the code out and replace it with insertelements. This is to
2170 /// optimize code like this:
2172 /// %tmp37 = bitcast float %inc to i32
2173 /// %tmp38 = zext i32 %tmp37 to i64
2174 /// %tmp31 = bitcast float %inc5 to i32
2175 /// %tmp32 = zext i32 %tmp31 to i64
2176 /// %tmp33 = shl i64 %tmp32, 32
2177 /// %ins35 = or i64 %tmp33, %tmp38
2178 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2180 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2181 static Value
*optimizeIntegerToVectorInsertions(BitCastInst
&CI
,
2182 InstCombinerImpl
&IC
) {
2183 auto *DestVecTy
= cast
<FixedVectorType
>(CI
.getType());
2184 Value
*IntInput
= CI
.getOperand(0);
2186 SmallVector
<Value
*, 8> Elements(DestVecTy
->getNumElements());
2187 if (!collectInsertionElements(IntInput
, 0, Elements
,
2188 DestVecTy
->getElementType(),
2189 IC
.getDataLayout().isBigEndian()))
2192 // If we succeeded, we know that all of the element are specified by Elements
2193 // or are zero if Elements has a null entry. Recast this as a set of
2195 Value
*Result
= Constant::getNullValue(CI
.getType());
2196 for (unsigned i
= 0, e
= Elements
.size(); i
!= e
; ++i
) {
2197 if (!Elements
[i
]) continue; // Unset element.
2199 Result
= IC
.Builder
.CreateInsertElement(Result
, Elements
[i
],
2200 IC
.Builder
.getInt32(i
));
2206 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2207 /// vector followed by extract element. The backend tends to handle bitcasts of
2208 /// vectors better than bitcasts of scalars because vector registers are
2209 /// usually not type-specific like scalar integer or scalar floating-point.
2210 static Instruction
*canonicalizeBitCastExtElt(BitCastInst
&BitCast
,
2211 InstCombinerImpl
&IC
) {
2212 Value
*VecOp
, *Index
;
2213 if (!match(BitCast
.getOperand(0),
2214 m_OneUse(m_ExtractElt(m_Value(VecOp
), m_Value(Index
)))))
2217 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2218 // type to extract from.
2219 Type
*DestType
= BitCast
.getType();
2220 VectorType
*VecType
= cast
<VectorType
>(VecOp
->getType());
2221 if (VectorType::isValidElementType(DestType
)) {
2222 auto *NewVecType
= VectorType::get(DestType
, VecType
);
2223 auto *NewBC
= IC
.Builder
.CreateBitCast(VecOp
, NewVecType
, "bc");
2224 return ExtractElementInst::Create(NewBC
, Index
);
2227 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2228 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2229 auto *FixedVType
= dyn_cast
<FixedVectorType
>(VecType
);
2230 if (DestType
->isVectorTy() && FixedVType
&& FixedVType
->getNumElements() == 1)
2231 return CastInst::Create(Instruction::BitCast
, VecOp
, DestType
);
2236 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2237 static Instruction
*foldBitCastBitwiseLogic(BitCastInst
&BitCast
,
2238 InstCombiner::BuilderTy
&Builder
) {
2239 Type
*DestTy
= BitCast
.getType();
2242 if (!match(BitCast
.getOperand(0), m_OneUse(m_BinOp(BO
))) ||
2243 !BO
->isBitwiseLogicOp())
2246 // FIXME: This transform is restricted to vector types to avoid backend
2247 // problems caused by creating potentially illegal operations. If a fix-up is
2248 // added to handle that situation, we can remove this check.
2249 if (!DestTy
->isVectorTy() || !BO
->getType()->isVectorTy())
2252 if (DestTy
->isFPOrFPVectorTy()) {
2254 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2255 if (match(BO
->getOperand(0), m_OneUse(m_BitCast(m_Value(X
)))) &&
2256 match(BO
->getOperand(1), m_OneUse(m_BitCast(m_Value(Y
))))) {
2257 if (X
->getType()->isFPOrFPVectorTy() &&
2258 Y
->getType()->isIntOrIntVectorTy()) {
2260 Builder
.CreateBitCast(BO
->getOperand(0), Y
->getType());
2261 Value
*NewBO
= Builder
.CreateBinOp(BO
->getOpcode(), CastedOp
, Y
);
2262 return CastInst::CreateBitOrPointerCast(NewBO
, DestTy
);
2264 if (X
->getType()->isIntOrIntVectorTy() &&
2265 Y
->getType()->isFPOrFPVectorTy()) {
2267 Builder
.CreateBitCast(BO
->getOperand(1), X
->getType());
2268 Value
*NewBO
= Builder
.CreateBinOp(BO
->getOpcode(), CastedOp
, X
);
2269 return CastInst::CreateBitOrPointerCast(NewBO
, DestTy
);
2275 if (!DestTy
->isIntOrIntVectorTy())
2279 if (match(BO
->getOperand(0), m_OneUse(m_BitCast(m_Value(X
)))) &&
2280 X
->getType() == DestTy
&& !isa
<Constant
>(X
)) {
2281 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2282 Value
*CastedOp1
= Builder
.CreateBitCast(BO
->getOperand(1), DestTy
);
2283 return BinaryOperator::Create(BO
->getOpcode(), X
, CastedOp1
);
2286 if (match(BO
->getOperand(1), m_OneUse(m_BitCast(m_Value(X
)))) &&
2287 X
->getType() == DestTy
&& !isa
<Constant
>(X
)) {
2288 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2289 Value
*CastedOp0
= Builder
.CreateBitCast(BO
->getOperand(0), DestTy
);
2290 return BinaryOperator::Create(BO
->getOpcode(), CastedOp0
, X
);
2293 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2294 // constant. This eases recognition of special constants for later ops.
2296 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2298 if (match(BO
->getOperand(1), m_Constant(C
))) {
2299 // bitcast (logic X, C) --> logic (bitcast X, C')
2300 Value
*CastedOp0
= Builder
.CreateBitCast(BO
->getOperand(0), DestTy
);
2301 Value
*CastedC
= Builder
.CreateBitCast(C
, DestTy
);
2302 return BinaryOperator::Create(BO
->getOpcode(), CastedOp0
, CastedC
);
2308 /// Change the type of a select if we can eliminate a bitcast.
2309 static Instruction
*foldBitCastSelect(BitCastInst
&BitCast
,
2310 InstCombiner::BuilderTy
&Builder
) {
2311 Value
*Cond
, *TVal
, *FVal
;
2312 if (!match(BitCast
.getOperand(0),
2313 m_OneUse(m_Select(m_Value(Cond
), m_Value(TVal
), m_Value(FVal
)))))
2316 // A vector select must maintain the same number of elements in its operands.
2317 Type
*CondTy
= Cond
->getType();
2318 Type
*DestTy
= BitCast
.getType();
2319 if (auto *CondVTy
= dyn_cast
<VectorType
>(CondTy
))
2320 if (!DestTy
->isVectorTy() ||
2321 CondVTy
->getElementCount() !=
2322 cast
<VectorType
>(DestTy
)->getElementCount())
2325 // FIXME: This transform is restricted from changing the select between
2326 // scalars and vectors to avoid backend problems caused by creating
2327 // potentially illegal operations. If a fix-up is added to handle that
2328 // situation, we can remove this check.
2329 if (DestTy
->isVectorTy() != TVal
->getType()->isVectorTy())
2332 auto *Sel
= cast
<Instruction
>(BitCast
.getOperand(0));
2334 if (match(TVal
, m_OneUse(m_BitCast(m_Value(X
)))) && X
->getType() == DestTy
&&
2335 !isa
<Constant
>(X
)) {
2336 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2337 Value
*CastedVal
= Builder
.CreateBitCast(FVal
, DestTy
);
2338 return SelectInst::Create(Cond
, X
, CastedVal
, "", nullptr, Sel
);
2341 if (match(FVal
, m_OneUse(m_BitCast(m_Value(X
)))) && X
->getType() == DestTy
&&
2342 !isa
<Constant
>(X
)) {
2343 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2344 Value
*CastedVal
= Builder
.CreateBitCast(TVal
, DestTy
);
2345 return SelectInst::Create(Cond
, CastedVal
, X
, "", nullptr, Sel
);
2351 /// Check if all users of CI are StoreInsts.
2352 static bool hasStoreUsersOnly(CastInst
&CI
) {
2353 for (User
*U
: CI
.users()) {
2354 if (!isa
<StoreInst
>(U
))
2360 /// This function handles following case
2366 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2367 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2368 Instruction
*InstCombinerImpl::optimizeBitCastFromPhi(CastInst
&CI
,
2370 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2371 if (hasStoreUsersOnly(CI
))
2374 Value
*Src
= CI
.getOperand(0);
2375 Type
*SrcTy
= Src
->getType(); // Type B
2376 Type
*DestTy
= CI
.getType(); // Type A
2378 SmallVector
<PHINode
*, 4> PhiWorklist
;
2379 SmallSetVector
<PHINode
*, 4> OldPhiNodes
;
2381 // Find all of the A->B casts and PHI nodes.
2382 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2383 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2384 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2385 PhiWorklist
.push_back(PN
);
2386 OldPhiNodes
.insert(PN
);
2387 while (!PhiWorklist
.empty()) {
2388 auto *OldPN
= PhiWorklist
.pop_back_val();
2389 for (Value
*IncValue
: OldPN
->incoming_values()) {
2390 if (isa
<Constant
>(IncValue
))
2393 if (auto *LI
= dyn_cast
<LoadInst
>(IncValue
)) {
2394 // If there is a sequence of one or more load instructions, each loaded
2395 // value is used as address of later load instruction, bitcast is
2396 // necessary to change the value type, don't optimize it. For
2397 // simplicity we give up if the load address comes from another load.
2398 Value
*Addr
= LI
->getOperand(0);
2399 if (Addr
== &CI
|| isa
<LoadInst
>(Addr
))
2401 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2402 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2403 // TODO: Remove this check when bitcast between vector and x86_amx
2404 // is replaced with a specific intrinsic.
2405 if (DestTy
->isX86_AMXTy())
2407 if (LI
->hasOneUse() && LI
->isSimple())
2409 // If a LoadInst has more than one use, changing the type of loaded
2410 // value may create another bitcast.
2414 if (auto *PNode
= dyn_cast
<PHINode
>(IncValue
)) {
2415 if (OldPhiNodes
.insert(PNode
))
2416 PhiWorklist
.push_back(PNode
);
2420 auto *BCI
= dyn_cast
<BitCastInst
>(IncValue
);
2421 // We can't handle other instructions.
2425 // Verify it's a A->B cast.
2426 Type
*TyA
= BCI
->getOperand(0)->getType();
2427 Type
*TyB
= BCI
->getType();
2428 if (TyA
!= DestTy
|| TyB
!= SrcTy
)
2433 // Check that each user of each old PHI node is something that we can
2434 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2435 for (auto *OldPN
: OldPhiNodes
) {
2436 for (User
*V
: OldPN
->users()) {
2437 if (auto *SI
= dyn_cast
<StoreInst
>(V
)) {
2438 if (!SI
->isSimple() || SI
->getOperand(0) != OldPN
)
2440 } else if (auto *BCI
= dyn_cast
<BitCastInst
>(V
)) {
2441 // Verify it's a B->A cast.
2442 Type
*TyB
= BCI
->getOperand(0)->getType();
2443 Type
*TyA
= BCI
->getType();
2444 if (TyA
!= DestTy
|| TyB
!= SrcTy
)
2446 } else if (auto *PHI
= dyn_cast
<PHINode
>(V
)) {
2447 // As long as the user is another old PHI node, then even if we don't
2448 // rewrite it, the PHI web we're considering won't have any users
2449 // outside itself, so it'll be dead.
2450 if (!OldPhiNodes
.contains(PHI
))
2458 // For each old PHI node, create a corresponding new PHI node with a type A.
2459 SmallDenseMap
<PHINode
*, PHINode
*> NewPNodes
;
2460 for (auto *OldPN
: OldPhiNodes
) {
2461 Builder
.SetInsertPoint(OldPN
);
2462 PHINode
*NewPN
= Builder
.CreatePHI(DestTy
, OldPN
->getNumOperands());
2463 NewPNodes
[OldPN
] = NewPN
;
2466 // Fill in the operands of new PHI nodes.
2467 for (auto *OldPN
: OldPhiNodes
) {
2468 PHINode
*NewPN
= NewPNodes
[OldPN
];
2469 for (unsigned j
= 0, e
= OldPN
->getNumOperands(); j
!= e
; ++j
) {
2470 Value
*V
= OldPN
->getOperand(j
);
2471 Value
*NewV
= nullptr;
2472 if (auto *C
= dyn_cast
<Constant
>(V
)) {
2473 NewV
= ConstantExpr::getBitCast(C
, DestTy
);
2474 } else if (auto *LI
= dyn_cast
<LoadInst
>(V
)) {
2475 // Explicitly perform load combine to make sure no opposing transform
2476 // can remove the bitcast in the meantime and trigger an infinite loop.
2477 Builder
.SetInsertPoint(LI
);
2478 NewV
= combineLoadToNewType(*LI
, DestTy
);
2479 // Remove the old load and its use in the old phi, which itself becomes
2480 // dead once the whole transform finishes.
2481 replaceInstUsesWith(*LI
, PoisonValue::get(LI
->getType()));
2482 eraseInstFromFunction(*LI
);
2483 } else if (auto *BCI
= dyn_cast
<BitCastInst
>(V
)) {
2484 NewV
= BCI
->getOperand(0);
2485 } else if (auto *PrevPN
= dyn_cast
<PHINode
>(V
)) {
2486 NewV
= NewPNodes
[PrevPN
];
2489 NewPN
->addIncoming(NewV
, OldPN
->getIncomingBlock(j
));
2493 // Traverse all accumulated PHI nodes and process its users,
2494 // which are Stores and BitcCasts. Without this processing
2495 // NewPHI nodes could be replicated and could lead to extra
2496 // moves generated after DeSSA.
2497 // If there is a store with type B, change it to type A.
2500 // Replace users of BitCast B->A with NewPHI. These will help
2501 // later to get rid off a closure formed by OldPHI nodes.
2502 Instruction
*RetVal
= nullptr;
2503 for (auto *OldPN
: OldPhiNodes
) {
2504 PHINode
*NewPN
= NewPNodes
[OldPN
];
2505 for (User
*V
: make_early_inc_range(OldPN
->users())) {
2506 if (auto *SI
= dyn_cast
<StoreInst
>(V
)) {
2507 assert(SI
->isSimple() && SI
->getOperand(0) == OldPN
);
2508 Builder
.SetInsertPoint(SI
);
2510 cast
<BitCastInst
>(Builder
.CreateBitCast(NewPN
, SrcTy
));
2511 SI
->setOperand(0, NewBC
);
2513 assert(hasStoreUsersOnly(*NewBC
));
2515 else if (auto *BCI
= dyn_cast
<BitCastInst
>(V
)) {
2516 Type
*TyB
= BCI
->getOperand(0)->getType();
2517 Type
*TyA
= BCI
->getType();
2518 assert(TyA
== DestTy
&& TyB
== SrcTy
);
2521 Instruction
*I
= replaceInstUsesWith(*BCI
, NewPN
);
2524 } else if (auto *PHI
= dyn_cast
<PHINode
>(V
)) {
2525 assert(OldPhiNodes
.contains(PHI
));
2528 llvm_unreachable("all uses should be handled");
2536 Instruction
*InstCombinerImpl::visitBitCast(BitCastInst
&CI
) {
2537 // If the operands are integer typed then apply the integer transforms,
2538 // otherwise just apply the common ones.
2539 Value
*Src
= CI
.getOperand(0);
2540 Type
*SrcTy
= Src
->getType();
2541 Type
*DestTy
= CI
.getType();
2543 // Get rid of casts from one type to the same type. These are useless and can
2544 // be replaced by the operand.
2545 if (DestTy
== Src
->getType())
2546 return replaceInstUsesWith(CI
, Src
);
2548 if (FixedVectorType
*DestVTy
= dyn_cast
<FixedVectorType
>(DestTy
)) {
2549 // Beware: messing with this target-specific oddity may cause trouble.
2550 if (DestVTy
->getNumElements() == 1 && SrcTy
->isX86_MMXTy()) {
2551 Value
*Elem
= Builder
.CreateBitCast(Src
, DestVTy
->getElementType());
2552 return InsertElementInst::Create(PoisonValue::get(DestTy
), Elem
,
2553 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
2556 if (isa
<IntegerType
>(SrcTy
)) {
2557 // If this is a cast from an integer to vector, check to see if the input
2558 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2559 // the casts with a shuffle and (potentially) a bitcast.
2560 if (isa
<TruncInst
>(Src
) || isa
<ZExtInst
>(Src
)) {
2561 CastInst
*SrcCast
= cast
<CastInst
>(Src
);
2562 if (BitCastInst
*BCIn
= dyn_cast
<BitCastInst
>(SrcCast
->getOperand(0)))
2563 if (isa
<VectorType
>(BCIn
->getOperand(0)->getType()))
2564 if (Instruction
*I
= optimizeVectorResizeWithIntegerBitCasts(
2565 BCIn
->getOperand(0), cast
<VectorType
>(DestTy
), *this))
2569 // If the input is an 'or' instruction, we may be doing shifts and ors to
2570 // assemble the elements of the vector manually. Try to rip the code out
2571 // and replace it with insertelements.
2572 if (Value
*V
= optimizeIntegerToVectorInsertions(CI
, *this))
2573 return replaceInstUsesWith(CI
, V
);
2577 if (FixedVectorType
*SrcVTy
= dyn_cast
<FixedVectorType
>(SrcTy
)) {
2578 if (SrcVTy
->getNumElements() == 1) {
2579 // If our destination is not a vector, then make this a straight
2580 // scalar-scalar cast.
2581 if (!DestTy
->isVectorTy()) {
2583 Builder
.CreateExtractElement(Src
,
2584 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
2585 return CastInst::Create(Instruction::BitCast
, Elem
, DestTy
);
2588 // Otherwise, see if our source is an insert. If so, then use the scalar
2589 // component directly:
2590 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2591 if (auto *InsElt
= dyn_cast
<InsertElementInst
>(Src
))
2592 return new BitCastInst(InsElt
->getOperand(1), DestTy
);
2595 // Convert an artificial vector insert into more analyzable bitwise logic.
2596 unsigned BitWidth
= DestTy
->getScalarSizeInBits();
2599 if (match(Src
, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X
))),
2600 m_Value(Y
), m_ConstantInt(IndexC
)))) &&
2601 DestTy
->isIntegerTy() && X
->getType() == DestTy
&&
2602 Y
->getType()->isIntegerTy() && isDesirableIntType(BitWidth
)) {
2603 // Adjust for big endian - the LSBs are at the high index.
2604 if (DL
.isBigEndian())
2605 IndexC
= SrcVTy
->getNumElements() - 1 - IndexC
;
2607 // We only handle (endian-normalized) insert to index 0. Any other insert
2608 // would require a left-shift, so that is an extra instruction.
2610 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2611 unsigned EltWidth
= Y
->getType()->getScalarSizeInBits();
2612 APInt MaskC
= APInt::getHighBitsSet(BitWidth
, BitWidth
- EltWidth
);
2613 Value
*AndX
= Builder
.CreateAnd(X
, MaskC
);
2614 Value
*ZextY
= Builder
.CreateZExt(Y
, DestTy
);
2615 return BinaryOperator::CreateOr(AndX
, ZextY
);
2620 if (auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Src
)) {
2621 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2622 // a bitcast to a vector with the same # elts.
2623 Value
*ShufOp0
= Shuf
->getOperand(0);
2624 Value
*ShufOp1
= Shuf
->getOperand(1);
2625 auto ShufElts
= cast
<VectorType
>(Shuf
->getType())->getElementCount();
2626 auto SrcVecElts
= cast
<VectorType
>(ShufOp0
->getType())->getElementCount();
2627 if (Shuf
->hasOneUse() && DestTy
->isVectorTy() &&
2628 cast
<VectorType
>(DestTy
)->getElementCount() == ShufElts
&&
2629 ShufElts
== SrcVecElts
) {
2631 // If either of the operands is a cast from CI.getType(), then
2632 // evaluating the shuffle in the casted destination's type will allow
2633 // us to eliminate at least one cast.
2634 if (((Tmp
= dyn_cast
<BitCastInst
>(ShufOp0
)) &&
2635 Tmp
->getOperand(0)->getType() == DestTy
) ||
2636 ((Tmp
= dyn_cast
<BitCastInst
>(ShufOp1
)) &&
2637 Tmp
->getOperand(0)->getType() == DestTy
)) {
2638 Value
*LHS
= Builder
.CreateBitCast(ShufOp0
, DestTy
);
2639 Value
*RHS
= Builder
.CreateBitCast(ShufOp1
, DestTy
);
2640 // Return a new shuffle vector. Use the same element ID's, as we
2641 // know the vector types match #elts.
2642 return new ShuffleVectorInst(LHS
, RHS
, Shuf
->getShuffleMask());
2646 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2647 // as a byte/bit swap:
2648 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2649 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2650 if (DestTy
->isIntegerTy() && ShufElts
.getKnownMinValue() % 2 == 0 &&
2651 Shuf
->hasOneUse() && Shuf
->isReverse()) {
2652 unsigned IntrinsicNum
= 0;
2653 if (DL
.isLegalInteger(DestTy
->getScalarSizeInBits()) &&
2654 SrcTy
->getScalarSizeInBits() == 8) {
2655 IntrinsicNum
= Intrinsic::bswap
;
2656 } else if (SrcTy
->getScalarSizeInBits() == 1) {
2657 IntrinsicNum
= Intrinsic::bitreverse
;
2659 if (IntrinsicNum
!= 0) {
2660 assert(ShufOp0
->getType() == SrcTy
&& "Unexpected shuffle mask");
2661 assert(match(ShufOp1
, m_Undef()) && "Unexpected shuffle op");
2662 Function
*BswapOrBitreverse
=
2663 Intrinsic::getDeclaration(CI
.getModule(), IntrinsicNum
, DestTy
);
2664 Value
*ScalarX
= Builder
.CreateBitCast(ShufOp0
, DestTy
);
2665 return CallInst::Create(BswapOrBitreverse
, {ScalarX
});
2670 // Handle the A->B->A cast, and there is an intervening PHI node.
2671 if (PHINode
*PN
= dyn_cast
<PHINode
>(Src
))
2672 if (Instruction
*I
= optimizeBitCastFromPhi(CI
, PN
))
2675 if (Instruction
*I
= canonicalizeBitCastExtElt(CI
, *this))
2678 if (Instruction
*I
= foldBitCastBitwiseLogic(CI
, Builder
))
2681 if (Instruction
*I
= foldBitCastSelect(CI
, Builder
))
2684 return commonCastTransforms(CI
);
2687 Instruction
*InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst
&CI
) {
2688 return commonCastTransforms(CI
);