1 //===- InstCombineCasts.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visit functions for cast operations.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/Analysis/ConstantFolding.h"
16 #include "llvm/Analysis/TargetLibraryInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/PatternMatch.h"
20 #include "llvm/Support/KnownBits.h"
22 using namespace PatternMatch
;
24 #define DEBUG_TYPE "instcombine"
26 /// Analyze 'Val', seeing if it is a simple linear expression.
27 /// If so, decompose it, returning some value X, such that Val is
30 static Value
*decomposeSimpleLinearExpr(Value
*Val
, unsigned &Scale
,
32 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
33 Offset
= CI
->getZExtValue();
35 return ConstantInt::get(Val
->getType(), 0);
38 if (BinaryOperator
*I
= dyn_cast
<BinaryOperator
>(Val
)) {
39 // Cannot look past anything that might overflow.
40 OverflowingBinaryOperator
*OBI
= dyn_cast
<OverflowingBinaryOperator
>(Val
);
41 if (OBI
&& !OBI
->hasNoUnsignedWrap() && !OBI
->hasNoSignedWrap()) {
47 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
48 if (I
->getOpcode() == Instruction::Shl
) {
49 // This is a value scaled by '1 << the shift amt'.
50 Scale
= UINT64_C(1) << RHS
->getZExtValue();
52 return I
->getOperand(0);
55 if (I
->getOpcode() == Instruction::Mul
) {
56 // This value is scaled by 'RHS'.
57 Scale
= RHS
->getZExtValue();
59 return I
->getOperand(0);
62 if (I
->getOpcode() == Instruction::Add
) {
63 // We have X+C. Check to see if we really have (X*C2)+C1,
64 // where C1 is divisible by C2.
67 decomposeSimpleLinearExpr(I
->getOperand(0), SubScale
, Offset
);
68 Offset
+= RHS
->getZExtValue();
75 // Otherwise, we can't look past this.
81 /// If we find a cast of an allocation instruction, try to eliminate the cast by
82 /// moving the type information into the alloc.
83 Instruction
*InstCombiner::PromoteCastOfAllocation(BitCastInst
&CI
,
85 PointerType
*PTy
= cast
<PointerType
>(CI
.getType());
87 BuilderTy
AllocaBuilder(Builder
);
88 AllocaBuilder
.SetInsertPoint(&AI
);
90 // Get the type really allocated and the type casted to.
91 Type
*AllocElTy
= AI
.getAllocatedType();
92 Type
*CastElTy
= PTy
->getElementType();
93 if (!AllocElTy
->isSized() || !CastElTy
->isSized()) return nullptr;
95 unsigned AllocElTyAlign
= DL
.getABITypeAlignment(AllocElTy
);
96 unsigned CastElTyAlign
= DL
.getABITypeAlignment(CastElTy
);
97 if (CastElTyAlign
< AllocElTyAlign
) return nullptr;
99 // If the allocation has multiple uses, only promote it if we are strictly
100 // increasing the alignment of the resultant allocation. If we keep it the
101 // same, we open the door to infinite loops of various kinds.
102 if (!AI
.hasOneUse() && CastElTyAlign
== AllocElTyAlign
) return nullptr;
104 uint64_t AllocElTySize
= DL
.getTypeAllocSize(AllocElTy
);
105 uint64_t CastElTySize
= DL
.getTypeAllocSize(CastElTy
);
106 if (CastElTySize
== 0 || AllocElTySize
== 0) return nullptr;
108 // If the allocation has multiple uses, only promote it if we're not
109 // shrinking the amount of memory being allocated.
110 uint64_t AllocElTyStoreSize
= DL
.getTypeStoreSize(AllocElTy
);
111 uint64_t CastElTyStoreSize
= DL
.getTypeStoreSize(CastElTy
);
112 if (!AI
.hasOneUse() && CastElTyStoreSize
< AllocElTyStoreSize
) return nullptr;
114 // See if we can satisfy the modulus by pulling a scale out of the array
116 unsigned ArraySizeScale
;
117 uint64_t ArrayOffset
;
118 Value
*NumElements
= // See if the array size is a decomposable linear expr.
119 decomposeSimpleLinearExpr(AI
.getOperand(0), ArraySizeScale
, ArrayOffset
);
121 // If we can now satisfy the modulus, by using a non-1 scale, we really can
123 if ((AllocElTySize
*ArraySizeScale
) % CastElTySize
!= 0 ||
124 (AllocElTySize
*ArrayOffset
) % CastElTySize
!= 0) return nullptr;
126 unsigned Scale
= (AllocElTySize
*ArraySizeScale
)/CastElTySize
;
127 Value
*Amt
= nullptr;
131 Amt
= ConstantInt::get(AI
.getArraySize()->getType(), Scale
);
132 // Insert before the alloca, not before the cast.
133 Amt
= AllocaBuilder
.CreateMul(Amt
, NumElements
);
136 if (uint64_t Offset
= (AllocElTySize
*ArrayOffset
)/CastElTySize
) {
137 Value
*Off
= ConstantInt::get(AI
.getArraySize()->getType(),
139 Amt
= AllocaBuilder
.CreateAdd(Amt
, Off
);
142 AllocaInst
*New
= AllocaBuilder
.CreateAlloca(CastElTy
, Amt
);
143 New
->setAlignment(AI
.getAlignment());
145 New
->setUsedWithInAlloca(AI
.isUsedWithInAlloca());
147 // If the allocation has multiple real uses, insert a cast and change all
148 // things that used it to use the new cast. This will also hack on CI, but it
150 if (!AI
.hasOneUse()) {
151 // New is the allocation instruction, pointer typed. AI is the original
152 // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
153 Value
*NewCast
= AllocaBuilder
.CreateBitCast(New
, AI
.getType(), "tmpcast");
154 replaceInstUsesWith(AI
, NewCast
);
156 return replaceInstUsesWith(CI
, New
);
159 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
160 /// true for, actually insert the code to evaluate the expression.
161 Value
*InstCombiner::EvaluateInDifferentType(Value
*V
, Type
*Ty
,
163 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
164 C
= ConstantExpr::getIntegerCast(C
, Ty
, isSigned
/*Sext or ZExt*/);
165 // If we got a constantexpr back, try to simplify it with DL info.
166 if (Constant
*FoldedC
= ConstantFoldConstant(C
, DL
, &TLI
))
171 // Otherwise, it must be an instruction.
172 Instruction
*I
= cast
<Instruction
>(V
);
173 Instruction
*Res
= nullptr;
174 unsigned Opc
= I
->getOpcode();
176 case Instruction::Add
:
177 case Instruction::Sub
:
178 case Instruction::Mul
:
179 case Instruction::And
:
180 case Instruction::Or
:
181 case Instruction::Xor
:
182 case Instruction::AShr
:
183 case Instruction::LShr
:
184 case Instruction::Shl
:
185 case Instruction::UDiv
:
186 case Instruction::URem
: {
187 Value
*LHS
= EvaluateInDifferentType(I
->getOperand(0), Ty
, isSigned
);
188 Value
*RHS
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
189 Res
= BinaryOperator::Create((Instruction::BinaryOps
)Opc
, LHS
, RHS
);
192 case Instruction::Trunc
:
193 case Instruction::ZExt
:
194 case Instruction::SExt
:
195 // If the source type of the cast is the type we're trying for then we can
196 // just return the source. There's no need to insert it because it is not
198 if (I
->getOperand(0)->getType() == Ty
)
199 return I
->getOperand(0);
201 // Otherwise, must be the same type of cast, so just reinsert a new one.
202 // This also handles the case of zext(trunc(x)) -> zext(x).
203 Res
= CastInst::CreateIntegerCast(I
->getOperand(0), Ty
,
204 Opc
== Instruction::SExt
);
206 case Instruction::Select
: {
207 Value
*True
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
208 Value
*False
= EvaluateInDifferentType(I
->getOperand(2), Ty
, isSigned
);
209 Res
= SelectInst::Create(I
->getOperand(0), True
, False
);
212 case Instruction::PHI
: {
213 PHINode
*OPN
= cast
<PHINode
>(I
);
214 PHINode
*NPN
= PHINode::Create(Ty
, OPN
->getNumIncomingValues());
215 for (unsigned i
= 0, e
= OPN
->getNumIncomingValues(); i
!= e
; ++i
) {
217 EvaluateInDifferentType(OPN
->getIncomingValue(i
), Ty
, isSigned
);
218 NPN
->addIncoming(V
, OPN
->getIncomingBlock(i
));
224 // TODO: Can handle more cases here.
225 llvm_unreachable("Unreachable!");
229 return InsertNewInstWith(Res
, *I
);
232 Instruction::CastOps
InstCombiner::isEliminableCastPair(const CastInst
*CI1
,
233 const CastInst
*CI2
) {
234 Type
*SrcTy
= CI1
->getSrcTy();
235 Type
*MidTy
= CI1
->getDestTy();
236 Type
*DstTy
= CI2
->getDestTy();
238 Instruction::CastOps firstOp
= CI1
->getOpcode();
239 Instruction::CastOps secondOp
= CI2
->getOpcode();
241 SrcTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(SrcTy
) : nullptr;
243 MidTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(MidTy
) : nullptr;
245 DstTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(DstTy
) : nullptr;
246 unsigned Res
= CastInst::isEliminableCastPair(firstOp
, secondOp
, SrcTy
, MidTy
,
247 DstTy
, SrcIntPtrTy
, MidIntPtrTy
,
250 // We don't want to form an inttoptr or ptrtoint that converts to an integer
251 // type that differs from the pointer size.
252 if ((Res
== Instruction::IntToPtr
&& SrcTy
!= DstIntPtrTy
) ||
253 (Res
== Instruction::PtrToInt
&& DstTy
!= SrcIntPtrTy
))
256 return Instruction::CastOps(Res
);
259 /// Implement the transforms common to all CastInst visitors.
260 Instruction
*InstCombiner::commonCastTransforms(CastInst
&CI
) {
261 Value
*Src
= CI
.getOperand(0);
263 // Try to eliminate a cast of a cast.
264 if (auto *CSrc
= dyn_cast
<CastInst
>(Src
)) { // A->B->C cast
265 if (Instruction::CastOps NewOpc
= isEliminableCastPair(CSrc
, &CI
)) {
266 // The first cast (CSrc) is eliminable so we need to fix up or replace
267 // the second cast (CI). CSrc will then have a good chance of being dead.
268 auto *Ty
= CI
.getType();
269 auto *Res
= CastInst::Create(NewOpc
, CSrc
->getOperand(0), Ty
);
270 // Point debug users of the dying cast to the new one.
271 if (CSrc
->hasOneUse())
272 replaceAllDbgUsesWith(*CSrc
, *Res
, CI
, DT
);
277 if (auto *Sel
= dyn_cast
<SelectInst
>(Src
)) {
278 // We are casting a select. Try to fold the cast into the select, but only
279 // if the select does not have a compare instruction with matching operand
280 // types. Creating a select with operands that are different sizes than its
281 // condition may inhibit other folds and lead to worse codegen.
282 auto *Cmp
= dyn_cast
<CmpInst
>(Sel
->getCondition());
283 if (!Cmp
|| Cmp
->getOperand(0)->getType() != Sel
->getType())
284 if (Instruction
*NV
= FoldOpIntoSelect(CI
, Sel
)) {
285 replaceAllDbgUsesWith(*Sel
, *NV
, CI
, DT
);
290 // If we are casting a PHI, then fold the cast into the PHI.
291 if (auto *PN
= dyn_cast
<PHINode
>(Src
)) {
292 // Don't do this if it would create a PHI node with an illegal type from a
294 if (!Src
->getType()->isIntegerTy() || !CI
.getType()->isIntegerTy() ||
295 shouldChangeType(CI
.getType(), Src
->getType()))
296 if (Instruction
*NV
= foldOpIntoPhi(CI
, PN
))
303 /// Constants and extensions/truncates from the destination type are always
304 /// free to be evaluated in that type. This is a helper for canEvaluate*.
305 static bool canAlwaysEvaluateInType(Value
*V
, Type
*Ty
) {
306 if (isa
<Constant
>(V
))
309 if ((match(V
, m_ZExtOrSExt(m_Value(X
))) || match(V
, m_Trunc(m_Value(X
)))) &&
316 /// Filter out values that we can not evaluate in the destination type for free.
317 /// This is a helper for canEvaluate*.
318 static bool canNotEvaluateInType(Value
*V
, Type
*Ty
) {
319 assert(!isa
<Constant
>(V
) && "Constant should already be handled.");
320 if (!isa
<Instruction
>(V
))
322 // We don't extend or shrink something that has multiple uses -- doing so
323 // would require duplicating the instruction which isn't profitable.
330 /// Return true if we can evaluate the specified expression tree as type Ty
331 /// instead of its larger type, and arrive with the same value.
332 /// This is used by code that tries to eliminate truncates.
334 /// Ty will always be a type smaller than V. We should return true if trunc(V)
335 /// can be computed by computing V in the smaller type. If V is an instruction,
336 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
337 /// makes sense if x and y can be efficiently truncated.
339 /// This function works on both vectors and scalars.
341 static bool canEvaluateTruncated(Value
*V
, Type
*Ty
, InstCombiner
&IC
,
343 if (canAlwaysEvaluateInType(V
, Ty
))
345 if (canNotEvaluateInType(V
, Ty
))
348 auto *I
= cast
<Instruction
>(V
);
349 Type
*OrigTy
= V
->getType();
350 switch (I
->getOpcode()) {
351 case Instruction::Add
:
352 case Instruction::Sub
:
353 case Instruction::Mul
:
354 case Instruction::And
:
355 case Instruction::Or
:
356 case Instruction::Xor
:
357 // These operators can all arbitrarily be extended or truncated.
358 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
359 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
361 case Instruction::UDiv
:
362 case Instruction::URem
: {
363 // UDiv and URem can be truncated if all the truncated bits are zero.
364 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
365 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
366 assert(BitWidth
< OrigBitWidth
&& "Unexpected bitwidths!");
367 APInt Mask
= APInt::getBitsSetFrom(OrigBitWidth
, BitWidth
);
368 if (IC
.MaskedValueIsZero(I
->getOperand(0), Mask
, 0, CxtI
) &&
369 IC
.MaskedValueIsZero(I
->getOperand(1), Mask
, 0, CxtI
)) {
370 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
) &&
371 canEvaluateTruncated(I
->getOperand(1), Ty
, IC
, CxtI
);
375 case Instruction::Shl
: {
376 // If we are truncating the result of this SHL, and if it's a shift of a
377 // constant amount, we can always perform a SHL in a smaller type.
379 if (match(I
->getOperand(1), m_APInt(Amt
))) {
380 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
381 if (Amt
->getLimitedValue(BitWidth
) < BitWidth
)
382 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
);
386 case Instruction::LShr
: {
387 // If this is a truncate of a logical shr, we can truncate it to a smaller
388 // lshr iff we know that the bits we would otherwise be shifting in are
391 if (match(I
->getOperand(1), m_APInt(Amt
))) {
392 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
393 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
394 if (Amt
->getLimitedValue(BitWidth
) < BitWidth
&&
395 IC
.MaskedValueIsZero(I
->getOperand(0),
396 APInt::getBitsSetFrom(OrigBitWidth
, BitWidth
), 0, CxtI
)) {
397 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
);
402 case Instruction::AShr
: {
403 // If this is a truncate of an arithmetic shr, we can truncate it to a
404 // smaller ashr iff we know that all the bits from the sign bit of the
405 // original type and the sign bit of the truncate type are similar.
406 // TODO: It is enough to check that the bits we would be shifting in are
407 // similar to sign bit of the truncate type.
409 if (match(I
->getOperand(1), m_APInt(Amt
))) {
410 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
411 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
412 if (Amt
->getLimitedValue(BitWidth
) < BitWidth
&&
413 OrigBitWidth
- BitWidth
<
414 IC
.ComputeNumSignBits(I
->getOperand(0), 0, CxtI
))
415 return canEvaluateTruncated(I
->getOperand(0), Ty
, IC
, CxtI
);
419 case Instruction::Trunc
:
420 // trunc(trunc(x)) -> trunc(x)
422 case Instruction::ZExt
:
423 case Instruction::SExt
:
424 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
425 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
427 case Instruction::Select
: {
428 SelectInst
*SI
= cast
<SelectInst
>(I
);
429 return canEvaluateTruncated(SI
->getTrueValue(), Ty
, IC
, CxtI
) &&
430 canEvaluateTruncated(SI
->getFalseValue(), Ty
, IC
, CxtI
);
432 case Instruction::PHI
: {
433 // We can change a phi if we can change all operands. Note that we never
434 // get into trouble with cyclic PHIs here because we only consider
435 // instructions with a single use.
436 PHINode
*PN
= cast
<PHINode
>(I
);
437 for (Value
*IncValue
: PN
->incoming_values())
438 if (!canEvaluateTruncated(IncValue
, Ty
, IC
, CxtI
))
443 // TODO: Can handle more cases here.
450 /// Given a vector that is bitcast to an integer, optionally logically
451 /// right-shifted, and truncated, convert it to an extractelement.
452 /// Example (big endian):
453 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
455 /// extractelement <4 x i32> %X, 1
456 static Instruction
*foldVecTruncToExtElt(TruncInst
&Trunc
, InstCombiner
&IC
) {
457 Value
*TruncOp
= Trunc
.getOperand(0);
458 Type
*DestType
= Trunc
.getType();
459 if (!TruncOp
->hasOneUse() || !isa
<IntegerType
>(DestType
))
462 Value
*VecInput
= nullptr;
463 ConstantInt
*ShiftVal
= nullptr;
464 if (!match(TruncOp
, m_CombineOr(m_BitCast(m_Value(VecInput
)),
465 m_LShr(m_BitCast(m_Value(VecInput
)),
466 m_ConstantInt(ShiftVal
)))) ||
467 !isa
<VectorType
>(VecInput
->getType()))
470 VectorType
*VecType
= cast
<VectorType
>(VecInput
->getType());
471 unsigned VecWidth
= VecType
->getPrimitiveSizeInBits();
472 unsigned DestWidth
= DestType
->getPrimitiveSizeInBits();
473 unsigned ShiftAmount
= ShiftVal
? ShiftVal
->getZExtValue() : 0;
475 if ((VecWidth
% DestWidth
!= 0) || (ShiftAmount
% DestWidth
!= 0))
478 // If the element type of the vector doesn't match the result type,
479 // bitcast it to a vector type that we can extract from.
480 unsigned NumVecElts
= VecWidth
/ DestWidth
;
481 if (VecType
->getElementType() != DestType
) {
482 VecType
= VectorType::get(DestType
, NumVecElts
);
483 VecInput
= IC
.Builder
.CreateBitCast(VecInput
, VecType
, "bc");
486 unsigned Elt
= ShiftAmount
/ DestWidth
;
487 if (IC
.getDataLayout().isBigEndian())
488 Elt
= NumVecElts
- 1 - Elt
;
490 return ExtractElementInst::Create(VecInput
, IC
.Builder
.getInt32(Elt
));
493 /// Rotate left/right may occur in a wider type than necessary because of type
494 /// promotion rules. Try to narrow the inputs and convert to funnel shift.
495 Instruction
*InstCombiner::narrowRotate(TruncInst
&Trunc
) {
496 assert((isa
<VectorType
>(Trunc
.getSrcTy()) ||
497 shouldChangeType(Trunc
.getSrcTy(), Trunc
.getType())) &&
498 "Don't narrow to an illegal scalar type");
500 // Bail out on strange types. It is possible to handle some of these patterns
501 // even with non-power-of-2 sizes, but it is not a likely scenario.
502 Type
*DestTy
= Trunc
.getType();
503 unsigned NarrowWidth
= DestTy
->getScalarSizeInBits();
504 if (!isPowerOf2_32(NarrowWidth
))
507 // First, find an or'd pair of opposite shifts with the same shifted operand:
508 // trunc (or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1))
510 if (!match(Trunc
.getOperand(0), m_OneUse(m_Or(m_Value(Or0
), m_Value(Or1
)))))
513 Value
*ShVal
, *ShAmt0
, *ShAmt1
;
514 if (!match(Or0
, m_OneUse(m_LogicalShift(m_Value(ShVal
), m_Value(ShAmt0
)))) ||
515 !match(Or1
, m_OneUse(m_LogicalShift(m_Specific(ShVal
), m_Value(ShAmt1
)))))
518 auto ShiftOpcode0
= cast
<BinaryOperator
>(Or0
)->getOpcode();
519 auto ShiftOpcode1
= cast
<BinaryOperator
>(Or1
)->getOpcode();
520 if (ShiftOpcode0
== ShiftOpcode1
)
523 // Match the shift amount operands for a rotate pattern. This always matches
524 // a subtraction on the R operand.
525 auto matchShiftAmount
= [](Value
*L
, Value
*R
, unsigned Width
) -> Value
* {
526 // The shift amounts may add up to the narrow bit width:
527 // (shl ShVal, L) | (lshr ShVal, Width - L)
528 if (match(R
, m_OneUse(m_Sub(m_SpecificInt(Width
), m_Specific(L
)))))
531 // The shift amount may be masked with negation:
532 // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
534 unsigned Mask
= Width
- 1;
535 if (match(L
, m_And(m_Value(X
), m_SpecificInt(Mask
))) &&
536 match(R
, m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
))))
539 // Same as above, but the shift amount may be extended after masking:
540 if (match(L
, m_ZExt(m_And(m_Value(X
), m_SpecificInt(Mask
)))) &&
541 match(R
, m_ZExt(m_And(m_Neg(m_Specific(X
)), m_SpecificInt(Mask
)))))
547 Value
*ShAmt
= matchShiftAmount(ShAmt0
, ShAmt1
, NarrowWidth
);
548 bool SubIsOnLHS
= false;
550 ShAmt
= matchShiftAmount(ShAmt1
, ShAmt0
, NarrowWidth
);
556 // The shifted value must have high zeros in the wide type. Typically, this
557 // will be a zext, but it could also be the result of an 'and' or 'shift'.
558 unsigned WideWidth
= Trunc
.getSrcTy()->getScalarSizeInBits();
559 APInt HiBitMask
= APInt::getHighBitsSet(WideWidth
, WideWidth
- NarrowWidth
);
560 if (!MaskedValueIsZero(ShVal
, HiBitMask
, 0, &Trunc
))
563 // We have an unnecessarily wide rotate!
564 // trunc (or (lshr ShVal, ShAmt), (shl ShVal, BitWidth - ShAmt))
565 // Narrow the inputs and convert to funnel shift intrinsic:
566 // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
567 Value
*NarrowShAmt
= Builder
.CreateTrunc(ShAmt
, DestTy
);
568 Value
*X
= Builder
.CreateTrunc(ShVal
, DestTy
);
569 bool IsFshl
= (!SubIsOnLHS
&& ShiftOpcode0
== BinaryOperator::Shl
) ||
570 (SubIsOnLHS
&& ShiftOpcode1
== BinaryOperator::Shl
);
571 Intrinsic::ID IID
= IsFshl
? Intrinsic::fshl
: Intrinsic::fshr
;
572 Function
*F
= Intrinsic::getDeclaration(Trunc
.getModule(), IID
, DestTy
);
573 return IntrinsicInst::Create(F
, { X
, X
, NarrowShAmt
});
576 /// Try to narrow the width of math or bitwise logic instructions by pulling a
577 /// truncate ahead of binary operators.
578 /// TODO: Transforms for truncated shifts should be moved into here.
579 Instruction
*InstCombiner::narrowBinOp(TruncInst
&Trunc
) {
580 Type
*SrcTy
= Trunc
.getSrcTy();
581 Type
*DestTy
= Trunc
.getType();
582 if (!isa
<VectorType
>(SrcTy
) && !shouldChangeType(SrcTy
, DestTy
))
585 BinaryOperator
*BinOp
;
586 if (!match(Trunc
.getOperand(0), m_OneUse(m_BinOp(BinOp
))))
589 Value
*BinOp0
= BinOp
->getOperand(0);
590 Value
*BinOp1
= BinOp
->getOperand(1);
591 switch (BinOp
->getOpcode()) {
592 case Instruction::And
:
593 case Instruction::Or
:
594 case Instruction::Xor
:
595 case Instruction::Add
:
596 case Instruction::Sub
:
597 case Instruction::Mul
: {
599 if (match(BinOp0
, m_Constant(C
))) {
600 // trunc (binop C, X) --> binop (trunc C', X)
601 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, DestTy
);
602 Value
*TruncX
= Builder
.CreateTrunc(BinOp1
, DestTy
);
603 return BinaryOperator::Create(BinOp
->getOpcode(), NarrowC
, TruncX
);
605 if (match(BinOp1
, m_Constant(C
))) {
606 // trunc (binop X, C) --> binop (trunc X, C')
607 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, DestTy
);
608 Value
*TruncX
= Builder
.CreateTrunc(BinOp0
, DestTy
);
609 return BinaryOperator::Create(BinOp
->getOpcode(), TruncX
, NarrowC
);
612 if (match(BinOp0
, m_ZExtOrSExt(m_Value(X
))) && X
->getType() == DestTy
) {
613 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
614 Value
*NarrowOp1
= Builder
.CreateTrunc(BinOp1
, DestTy
);
615 return BinaryOperator::Create(BinOp
->getOpcode(), X
, NarrowOp1
);
617 if (match(BinOp1
, m_ZExtOrSExt(m_Value(X
))) && X
->getType() == DestTy
) {
618 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
619 Value
*NarrowOp0
= Builder
.CreateTrunc(BinOp0
, DestTy
);
620 return BinaryOperator::Create(BinOp
->getOpcode(), NarrowOp0
, X
);
628 if (Instruction
*NarrowOr
= narrowRotate(Trunc
))
634 /// Try to narrow the width of a splat shuffle. This could be generalized to any
635 /// shuffle with a constant operand, but we limit the transform to avoid
636 /// creating a shuffle type that targets may not be able to lower effectively.
637 static Instruction
*shrinkSplatShuffle(TruncInst
&Trunc
,
638 InstCombiner::BuilderTy
&Builder
) {
639 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Trunc
.getOperand(0));
640 if (Shuf
&& Shuf
->hasOneUse() && isa
<UndefValue
>(Shuf
->getOperand(1)) &&
641 Shuf
->getMask()->getSplatValue() &&
642 Shuf
->getType() == Shuf
->getOperand(0)->getType()) {
643 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
644 Constant
*NarrowUndef
= UndefValue::get(Trunc
.getType());
645 Value
*NarrowOp
= Builder
.CreateTrunc(Shuf
->getOperand(0), Trunc
.getType());
646 return new ShuffleVectorInst(NarrowOp
, NarrowUndef
, Shuf
->getMask());
652 /// Try to narrow the width of an insert element. This could be generalized for
653 /// any vector constant, but we limit the transform to insertion into undef to
654 /// avoid potential backend problems from unsupported insertion widths. This
655 /// could also be extended to handle the case of inserting a scalar constant
656 /// into a vector variable.
657 static Instruction
*shrinkInsertElt(CastInst
&Trunc
,
658 InstCombiner::BuilderTy
&Builder
) {
659 Instruction::CastOps Opcode
= Trunc
.getOpcode();
660 assert((Opcode
== Instruction::Trunc
|| Opcode
== Instruction::FPTrunc
) &&
661 "Unexpected instruction for shrinking");
663 auto *InsElt
= dyn_cast
<InsertElementInst
>(Trunc
.getOperand(0));
664 if (!InsElt
|| !InsElt
->hasOneUse())
667 Type
*DestTy
= Trunc
.getType();
668 Type
*DestScalarTy
= DestTy
->getScalarType();
669 Value
*VecOp
= InsElt
->getOperand(0);
670 Value
*ScalarOp
= InsElt
->getOperand(1);
671 Value
*Index
= InsElt
->getOperand(2);
673 if (isa
<UndefValue
>(VecOp
)) {
674 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
675 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
676 UndefValue
*NarrowUndef
= UndefValue::get(DestTy
);
677 Value
*NarrowOp
= Builder
.CreateCast(Opcode
, ScalarOp
, DestScalarTy
);
678 return InsertElementInst::Create(NarrowUndef
, NarrowOp
, Index
);
684 Instruction
*InstCombiner::visitTrunc(TruncInst
&CI
) {
685 if (Instruction
*Result
= commonCastTransforms(CI
))
688 Value
*Src
= CI
.getOperand(0);
689 Type
*DestTy
= CI
.getType(), *SrcTy
= Src
->getType();
691 // Attempt to truncate the entire input expression tree to the destination
692 // type. Only do this if the dest type is a simple type, don't convert the
693 // expression tree to something weird like i93 unless the source is also
695 if ((DestTy
->isVectorTy() || shouldChangeType(SrcTy
, DestTy
)) &&
696 canEvaluateTruncated(Src
, DestTy
, *this, &CI
)) {
698 // If this cast is a truncate, evaluting in a different type always
699 // eliminates the cast, so it is always a win.
701 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
704 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
705 assert(Res
->getType() == DestTy
);
706 return replaceInstUsesWith(CI
, Res
);
709 // Test if the trunc is the user of a select which is part of a
710 // minimum or maximum operation. If so, don't do any more simplification.
711 // Even simplifying demanded bits can break the canonical form of a
714 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(CI
.getOperand(0)))
715 if (matchSelectPattern(SI
, LHS
, RHS
).Flavor
!= SPF_UNKNOWN
)
718 // See if we can simplify any instructions used by the input whose sole
719 // purpose is to compute bits we don't care about.
720 if (SimplifyDemandedInstructionBits(CI
))
723 if (DestTy
->getScalarSizeInBits() == 1) {
724 Value
*Zero
= Constant::getNullValue(Src
->getType());
725 if (DestTy
->isIntegerTy()) {
726 // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
727 // TODO: We canonicalize to more instructions here because we are probably
728 // lacking equivalent analysis for trunc relative to icmp. There may also
729 // be codegen concerns. If those trunc limitations were removed, we could
730 // remove this transform.
731 Value
*And
= Builder
.CreateAnd(Src
, ConstantInt::get(SrcTy
, 1));
732 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
735 // For vectors, we do not canonicalize all truncs to icmp, so optimize
736 // patterns that would be covered within visitICmpInst.
739 if (match(Src
, m_OneUse(m_LShr(m_Value(X
), m_APInt(C
))))) {
740 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
741 APInt MaskC
= APInt(SrcTy
->getScalarSizeInBits(), 1).shl(*C
);
742 Value
*And
= Builder
.CreateAnd(X
, ConstantInt::get(SrcTy
, MaskC
));
743 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
745 if (match(Src
, m_OneUse(m_c_Or(m_LShr(m_Value(X
), m_APInt(C
)),
747 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
748 APInt MaskC
= APInt(SrcTy
->getScalarSizeInBits(), 1).shl(*C
) | 1;
749 Value
*And
= Builder
.CreateAnd(X
, ConstantInt::get(SrcTy
, MaskC
));
750 return new ICmpInst(ICmpInst::ICMP_NE
, And
, Zero
);
754 // FIXME: Maybe combine the next two transforms to handle the no cast case
755 // more efficiently. Support vector types. Cleanup code by using m_OneUse.
757 // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
758 Value
*A
= nullptr; ConstantInt
*Cst
= nullptr;
759 if (Src
->hasOneUse() &&
760 match(Src
, m_LShr(m_ZExt(m_Value(A
)), m_ConstantInt(Cst
)))) {
761 // We have three types to worry about here, the type of A, the source of
762 // the truncate (MidSize), and the destination of the truncate. We know that
763 // ASize < MidSize and MidSize > ResultSize, but don't know the relation
764 // between ASize and ResultSize.
765 unsigned ASize
= A
->getType()->getPrimitiveSizeInBits();
767 // If the shift amount is larger than the size of A, then the result is
768 // known to be zero because all the input bits got shifted out.
769 if (Cst
->getZExtValue() >= ASize
)
770 return replaceInstUsesWith(CI
, Constant::getNullValue(DestTy
));
772 // Since we're doing an lshr and a zero extend, and know that the shift
773 // amount is smaller than ASize, it is always safe to do the shift in A's
774 // type, then zero extend or truncate to the result.
775 Value
*Shift
= Builder
.CreateLShr(A
, Cst
->getZExtValue());
776 Shift
->takeName(Src
);
777 return CastInst::CreateIntegerCast(Shift
, DestTy
, false);
780 // FIXME: We should canonicalize to zext/trunc and remove this transform.
781 // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
783 // It works because bits coming from sign extension have the same value as
784 // the sign bit of the original value; performing ashr instead of lshr
785 // generates bits of the same value as the sign bit.
786 if (Src
->hasOneUse() &&
787 match(Src
, m_LShr(m_SExt(m_Value(A
)), m_ConstantInt(Cst
)))) {
788 Value
*SExt
= cast
<Instruction
>(Src
)->getOperand(0);
789 const unsigned SExtSize
= SExt
->getType()->getPrimitiveSizeInBits();
790 const unsigned ASize
= A
->getType()->getPrimitiveSizeInBits();
791 const unsigned CISize
= CI
.getType()->getPrimitiveSizeInBits();
792 const unsigned MaxAmt
= SExtSize
- std::max(CISize
, ASize
);
793 unsigned ShiftAmt
= Cst
->getZExtValue();
795 // This optimization can be only performed when zero bits generated by
796 // the original lshr aren't pulled into the value after truncation, so we
797 // can only shift by values no larger than the number of extension bits.
798 // FIXME: Instead of bailing when the shift is too large, use and to clear
800 if (ShiftAmt
<= MaxAmt
) {
802 return BinaryOperator::CreateAShr(A
, ConstantInt::get(CI
.getType(),
803 std::min(ShiftAmt
, ASize
- 1)));
804 if (SExt
->hasOneUse()) {
805 Value
*Shift
= Builder
.CreateAShr(A
, std::min(ShiftAmt
, ASize
- 1));
806 Shift
->takeName(Src
);
807 return CastInst::CreateIntegerCast(Shift
, CI
.getType(), true);
812 if (Instruction
*I
= narrowBinOp(CI
))
815 if (Instruction
*I
= shrinkSplatShuffle(CI
, Builder
))
818 if (Instruction
*I
= shrinkInsertElt(CI
, Builder
))
821 if (Src
->hasOneUse() && isa
<IntegerType
>(SrcTy
) &&
822 shouldChangeType(SrcTy
, DestTy
)) {
823 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
824 // dest type is native and cst < dest size.
825 if (match(Src
, m_Shl(m_Value(A
), m_ConstantInt(Cst
))) &&
826 !match(A
, m_Shr(m_Value(), m_Constant()))) {
827 // Skip shifts of shift by constants. It undoes a combine in
828 // FoldShiftByConstant and is the extend in reg pattern.
829 const unsigned DestSize
= DestTy
->getScalarSizeInBits();
830 if (Cst
->getValue().ult(DestSize
)) {
831 Value
*NewTrunc
= Builder
.CreateTrunc(A
, DestTy
, A
->getName() + ".tr");
833 return BinaryOperator::Create(
834 Instruction::Shl
, NewTrunc
,
835 ConstantInt::get(DestTy
, Cst
->getValue().trunc(DestSize
)));
840 if (Instruction
*I
= foldVecTruncToExtElt(CI
, *this))
846 Instruction
*InstCombiner::transformZExtICmp(ICmpInst
*ICI
, ZExtInst
&CI
,
848 // If we are just checking for a icmp eq of a single bit and zext'ing it
849 // to an integer, then shift the bit to the appropriate place and then
850 // cast to integer to avoid the comparison.
852 if (match(ICI
->getOperand(1), m_APInt(Op1CV
))) {
854 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
855 // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
856 if ((ICI
->getPredicate() == ICmpInst::ICMP_SLT
&& Op1CV
->isNullValue()) ||
857 (ICI
->getPredicate() == ICmpInst::ICMP_SGT
&& Op1CV
->isAllOnesValue())) {
858 if (!DoTransform
) return ICI
;
860 Value
*In
= ICI
->getOperand(0);
861 Value
*Sh
= ConstantInt::get(In
->getType(),
862 In
->getType()->getScalarSizeInBits() - 1);
863 In
= Builder
.CreateLShr(In
, Sh
, In
->getName() + ".lobit");
864 if (In
->getType() != CI
.getType())
865 In
= Builder
.CreateIntCast(In
, CI
.getType(), false /*ZExt*/);
867 if (ICI
->getPredicate() == ICmpInst::ICMP_SGT
) {
868 Constant
*One
= ConstantInt::get(In
->getType(), 1);
869 In
= Builder
.CreateXor(In
, One
, In
->getName() + ".not");
872 return replaceInstUsesWith(CI
, In
);
875 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
876 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
877 // zext (X == 1) to i32 --> X iff X has only the low bit set.
878 // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
879 // zext (X != 0) to i32 --> X iff X has only the low bit set.
880 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
881 // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
882 // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
883 if ((Op1CV
->isNullValue() || Op1CV
->isPowerOf2()) &&
884 // This only works for EQ and NE
886 // If Op1C some other power of two, convert:
887 KnownBits Known
= computeKnownBits(ICI
->getOperand(0), 0, &CI
);
889 APInt
KnownZeroMask(~Known
.Zero
);
890 if (KnownZeroMask
.isPowerOf2()) { // Exactly 1 possible 1?
891 if (!DoTransform
) return ICI
;
893 bool isNE
= ICI
->getPredicate() == ICmpInst::ICMP_NE
;
894 if (!Op1CV
->isNullValue() && (*Op1CV
!= KnownZeroMask
)) {
895 // (X&4) == 2 --> false
896 // (X&4) != 2 --> true
897 Constant
*Res
= ConstantInt::get(CI
.getType(), isNE
);
898 return replaceInstUsesWith(CI
, Res
);
901 uint32_t ShAmt
= KnownZeroMask
.logBase2();
902 Value
*In
= ICI
->getOperand(0);
904 // Perform a logical shr by shiftamt.
905 // Insert the shift to put the result in the low bit.
906 In
= Builder
.CreateLShr(In
, ConstantInt::get(In
->getType(), ShAmt
),
907 In
->getName() + ".lobit");
910 if (!Op1CV
->isNullValue() == isNE
) { // Toggle the low bit.
911 Constant
*One
= ConstantInt::get(In
->getType(), 1);
912 In
= Builder
.CreateXor(In
, One
);
915 if (CI
.getType() == In
->getType())
916 return replaceInstUsesWith(CI
, In
);
918 Value
*IntCast
= Builder
.CreateIntCast(In
, CI
.getType(), false);
919 return replaceInstUsesWith(CI
, IntCast
);
924 // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
925 // It is also profitable to transform icmp eq into not(xor(A, B)) because that
926 // may lead to additional simplifications.
927 if (ICI
->isEquality() && CI
.getType() == ICI
->getOperand(0)->getType()) {
928 if (IntegerType
*ITy
= dyn_cast
<IntegerType
>(CI
.getType())) {
929 Value
*LHS
= ICI
->getOperand(0);
930 Value
*RHS
= ICI
->getOperand(1);
932 KnownBits KnownLHS
= computeKnownBits(LHS
, 0, &CI
);
933 KnownBits KnownRHS
= computeKnownBits(RHS
, 0, &CI
);
935 if (KnownLHS
.Zero
== KnownRHS
.Zero
&& KnownLHS
.One
== KnownRHS
.One
) {
936 APInt KnownBits
= KnownLHS
.Zero
| KnownLHS
.One
;
937 APInt UnknownBit
= ~KnownBits
;
938 if (UnknownBit
.countPopulation() == 1) {
939 if (!DoTransform
) return ICI
;
941 Value
*Result
= Builder
.CreateXor(LHS
, RHS
);
943 // Mask off any bits that are set and won't be shifted away.
944 if (KnownLHS
.One
.uge(UnknownBit
))
945 Result
= Builder
.CreateAnd(Result
,
946 ConstantInt::get(ITy
, UnknownBit
));
948 // Shift the bit we're testing down to the lsb.
949 Result
= Builder
.CreateLShr(
950 Result
, ConstantInt::get(ITy
, UnknownBit
.countTrailingZeros()));
952 if (ICI
->getPredicate() == ICmpInst::ICMP_EQ
)
953 Result
= Builder
.CreateXor(Result
, ConstantInt::get(ITy
, 1));
954 Result
->takeName(ICI
);
955 return replaceInstUsesWith(CI
, Result
);
964 /// Determine if the specified value can be computed in the specified wider type
965 /// and produce the same low bits. If not, return false.
967 /// If this function returns true, it can also return a non-zero number of bits
968 /// (in BitsToClear) which indicates that the value it computes is correct for
969 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
970 /// out. For example, to promote something like:
972 /// %B = trunc i64 %A to i32
973 /// %C = lshr i32 %B, 8
974 /// %E = zext i32 %C to i64
976 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
977 /// set to 8 to indicate that the promoted value needs to have bits 24-31
978 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
979 /// clear the top bits anyway, doing this has no extra cost.
981 /// This function works on both vectors and scalars.
982 static bool canEvaluateZExtd(Value
*V
, Type
*Ty
, unsigned &BitsToClear
,
983 InstCombiner
&IC
, Instruction
*CxtI
) {
985 if (canAlwaysEvaluateInType(V
, Ty
))
987 if (canNotEvaluateInType(V
, Ty
))
990 auto *I
= cast
<Instruction
>(V
);
992 switch (I
->getOpcode()) {
993 case Instruction::ZExt
: // zext(zext(x)) -> zext(x).
994 case Instruction::SExt
: // zext(sext(x)) -> sext(x).
995 case Instruction::Trunc
: // zext(trunc(x)) -> trunc(x) or zext(x)
997 case Instruction::And
:
998 case Instruction::Or
:
999 case Instruction::Xor
:
1000 case Instruction::Add
:
1001 case Instruction::Sub
:
1002 case Instruction::Mul
:
1003 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
) ||
1004 !canEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
, IC
, CxtI
))
1006 // These can all be promoted if neither operand has 'bits to clear'.
1007 if (BitsToClear
== 0 && Tmp
== 0)
1010 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1011 // other side, BitsToClear is ok.
1012 if (Tmp
== 0 && I
->isBitwiseLogicOp()) {
1013 // We use MaskedValueIsZero here for generality, but the case we care
1014 // about the most is constant RHS.
1015 unsigned VSize
= V
->getType()->getScalarSizeInBits();
1016 if (IC
.MaskedValueIsZero(I
->getOperand(1),
1017 APInt::getHighBitsSet(VSize
, BitsToClear
),
1019 // If this is an And instruction and all of the BitsToClear are
1020 // known to be zero we can reset BitsToClear.
1021 if (I
->getOpcode() == Instruction::And
)
1027 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1030 case Instruction::Shl
: {
1031 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1032 // upper bits we can reduce BitsToClear by the shift amount.
1034 if (match(I
->getOperand(1), m_APInt(Amt
))) {
1035 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
))
1037 uint64_t ShiftAmt
= Amt
->getZExtValue();
1038 BitsToClear
= ShiftAmt
< BitsToClear
? BitsToClear
- ShiftAmt
: 0;
1043 case Instruction::LShr
: {
1044 // We can promote lshr(x, cst) if we can promote x. This requires the
1045 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1047 if (match(I
->getOperand(1), m_APInt(Amt
))) {
1048 if (!canEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
, IC
, CxtI
))
1050 BitsToClear
+= Amt
->getZExtValue();
1051 if (BitsToClear
> V
->getType()->getScalarSizeInBits())
1052 BitsToClear
= V
->getType()->getScalarSizeInBits();
1055 // Cannot promote variable LSHR.
1058 case Instruction::Select
:
1059 if (!canEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
, IC
, CxtI
) ||
1060 !canEvaluateZExtd(I
->getOperand(2), Ty
, BitsToClear
, IC
, CxtI
) ||
1061 // TODO: If important, we could handle the case when the BitsToClear are
1062 // known zero in the disagreeing side.
1067 case Instruction::PHI
: {
1068 // We can change a phi if we can change all operands. Note that we never
1069 // get into trouble with cyclic PHIs here because we only consider
1070 // instructions with a single use.
1071 PHINode
*PN
= cast
<PHINode
>(I
);
1072 if (!canEvaluateZExtd(PN
->getIncomingValue(0), Ty
, BitsToClear
, IC
, CxtI
))
1074 for (unsigned i
= 1, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1075 if (!canEvaluateZExtd(PN
->getIncomingValue(i
), Ty
, Tmp
, IC
, CxtI
) ||
1076 // TODO: If important, we could handle the case when the BitsToClear
1077 // are known zero in the disagreeing input.
1083 // TODO: Can handle more cases here.
1088 Instruction
*InstCombiner::visitZExt(ZExtInst
&CI
) {
1089 // If this zero extend is only used by a truncate, let the truncate be
1090 // eliminated before we try to optimize this zext.
1091 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.user_back()))
1094 // If one of the common conversion will work, do it.
1095 if (Instruction
*Result
= commonCastTransforms(CI
))
1098 Value
*Src
= CI
.getOperand(0);
1099 Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
1101 // Try to extend the entire expression tree to the wide destination type.
1102 unsigned BitsToClear
;
1103 if (shouldChangeType(SrcTy
, DestTy
) &&
1104 canEvaluateZExtd(Src
, DestTy
, BitsToClear
, *this, &CI
)) {
1105 assert(BitsToClear
<= SrcTy
->getScalarSizeInBits() &&
1106 "Can't clear more bits than in SrcTy");
1108 // Okay, we can transform this! Insert the new expression now.
1110 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1111 " to avoid zero extend: "
1113 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
1114 assert(Res
->getType() == DestTy
);
1116 // Preserve debug values referring to Src if the zext is its last use.
1117 if (auto *SrcOp
= dyn_cast
<Instruction
>(Src
))
1118 if (SrcOp
->hasOneUse())
1119 replaceAllDbgUsesWith(*SrcOp
, *Res
, CI
, DT
);
1121 uint32_t SrcBitsKept
= SrcTy
->getScalarSizeInBits()-BitsToClear
;
1122 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1124 // If the high bits are already filled with zeros, just replace this
1125 // cast with the result.
1126 if (MaskedValueIsZero(Res
,
1127 APInt::getHighBitsSet(DestBitSize
,
1128 DestBitSize
-SrcBitsKept
),
1130 return replaceInstUsesWith(CI
, Res
);
1132 // We need to emit an AND to clear the high bits.
1133 Constant
*C
= ConstantInt::get(Res
->getType(),
1134 APInt::getLowBitsSet(DestBitSize
, SrcBitsKept
));
1135 return BinaryOperator::CreateAnd(Res
, C
);
1138 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1139 // types and if the sizes are just right we can convert this into a logical
1140 // 'and' which will be much cheaper than the pair of casts.
1141 if (TruncInst
*CSrc
= dyn_cast
<TruncInst
>(Src
)) { // A->B->C cast
1142 // TODO: Subsume this into EvaluateInDifferentType.
1144 // Get the sizes of the types involved. We know that the intermediate type
1145 // will be smaller than A or C, but don't know the relation between A and C.
1146 Value
*A
= CSrc
->getOperand(0);
1147 unsigned SrcSize
= A
->getType()->getScalarSizeInBits();
1148 unsigned MidSize
= CSrc
->getType()->getScalarSizeInBits();
1149 unsigned DstSize
= CI
.getType()->getScalarSizeInBits();
1150 // If we're actually extending zero bits, then if
1151 // SrcSize < DstSize: zext(a & mask)
1152 // SrcSize == DstSize: a & mask
1153 // SrcSize > DstSize: trunc(a) & mask
1154 if (SrcSize
< DstSize
) {
1155 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
1156 Constant
*AndConst
= ConstantInt::get(A
->getType(), AndValue
);
1157 Value
*And
= Builder
.CreateAnd(A
, AndConst
, CSrc
->getName() + ".mask");
1158 return new ZExtInst(And
, CI
.getType());
1161 if (SrcSize
== DstSize
) {
1162 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
1163 return BinaryOperator::CreateAnd(A
, ConstantInt::get(A
->getType(),
1166 if (SrcSize
> DstSize
) {
1167 Value
*Trunc
= Builder
.CreateTrunc(A
, CI
.getType());
1168 APInt
AndValue(APInt::getLowBitsSet(DstSize
, MidSize
));
1169 return BinaryOperator::CreateAnd(Trunc
,
1170 ConstantInt::get(Trunc
->getType(),
1175 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
1176 return transformZExtICmp(ICI
, CI
);
1178 BinaryOperator
*SrcI
= dyn_cast
<BinaryOperator
>(Src
);
1179 if (SrcI
&& SrcI
->getOpcode() == Instruction::Or
) {
1180 // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
1181 // of the (zext icmp) can be eliminated. If so, immediately perform the
1182 // according elimination.
1183 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(0));
1184 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(1));
1185 if (LHS
&& RHS
&& LHS
->hasOneUse() && RHS
->hasOneUse() &&
1186 (transformZExtICmp(LHS
, CI
, false) ||
1187 transformZExtICmp(RHS
, CI
, false))) {
1188 // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
1189 Value
*LCast
= Builder
.CreateZExt(LHS
, CI
.getType(), LHS
->getName());
1190 Value
*RCast
= Builder
.CreateZExt(RHS
, CI
.getType(), RHS
->getName());
1191 BinaryOperator
*Or
= BinaryOperator::Create(Instruction::Or
, LCast
, RCast
);
1193 // Perform the elimination.
1194 if (auto *LZExt
= dyn_cast
<ZExtInst
>(LCast
))
1195 transformZExtICmp(LHS
, *LZExt
);
1196 if (auto *RZExt
= dyn_cast
<ZExtInst
>(RCast
))
1197 transformZExtICmp(RHS
, *RZExt
);
1203 // zext(trunc(X) & C) -> (X & zext(C)).
1207 match(SrcI
, m_OneUse(m_And(m_Trunc(m_Value(X
)), m_Constant(C
)))) &&
1208 X
->getType() == CI
.getType())
1209 return BinaryOperator::CreateAnd(X
, ConstantExpr::getZExt(C
, CI
.getType()));
1211 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1213 if (SrcI
&& match(SrcI
, m_OneUse(m_Xor(m_Value(And
), m_Constant(C
)))) &&
1214 match(And
, m_OneUse(m_And(m_Trunc(m_Value(X
)), m_Specific(C
)))) &&
1215 X
->getType() == CI
.getType()) {
1216 Constant
*ZC
= ConstantExpr::getZExt(C
, CI
.getType());
1217 return BinaryOperator::CreateXor(Builder
.CreateAnd(X
, ZC
), ZC
);
1223 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1224 Instruction
*InstCombiner::transformSExtICmp(ICmpInst
*ICI
, Instruction
&CI
) {
1225 Value
*Op0
= ICI
->getOperand(0), *Op1
= ICI
->getOperand(1);
1226 ICmpInst::Predicate Pred
= ICI
->getPredicate();
1228 // Don't bother if Op1 isn't of vector or integer type.
1229 if (!Op1
->getType()->isIntOrIntVectorTy())
1232 if ((Pred
== ICmpInst::ICMP_SLT
&& match(Op1
, m_ZeroInt())) ||
1233 (Pred
== ICmpInst::ICMP_SGT
&& match(Op1
, m_AllOnes()))) {
1234 // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
1235 // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
1236 Value
*Sh
= ConstantInt::get(Op0
->getType(),
1237 Op0
->getType()->getScalarSizeInBits() - 1);
1238 Value
*In
= Builder
.CreateAShr(Op0
, Sh
, Op0
->getName() + ".lobit");
1239 if (In
->getType() != CI
.getType())
1240 In
= Builder
.CreateIntCast(In
, CI
.getType(), true /*SExt*/);
1242 if (Pred
== ICmpInst::ICMP_SGT
)
1243 In
= Builder
.CreateNot(In
, In
->getName() + ".not");
1244 return replaceInstUsesWith(CI
, In
);
1247 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
1248 // If we know that only one bit of the LHS of the icmp can be set and we
1249 // have an equality comparison with zero or a power of 2, we can transform
1250 // the icmp and sext into bitwise/integer operations.
1251 if (ICI
->hasOneUse() &&
1252 ICI
->isEquality() && (Op1C
->isZero() || Op1C
->getValue().isPowerOf2())){
1253 KnownBits Known
= computeKnownBits(Op0
, 0, &CI
);
1255 APInt
KnownZeroMask(~Known
.Zero
);
1256 if (KnownZeroMask
.isPowerOf2()) {
1257 Value
*In
= ICI
->getOperand(0);
1259 // If the icmp tests for a known zero bit we can constant fold it.
1260 if (!Op1C
->isZero() && Op1C
->getValue() != KnownZeroMask
) {
1261 Value
*V
= Pred
== ICmpInst::ICMP_NE
?
1262 ConstantInt::getAllOnesValue(CI
.getType()) :
1263 ConstantInt::getNullValue(CI
.getType());
1264 return replaceInstUsesWith(CI
, V
);
1267 if (!Op1C
->isZero() == (Pred
== ICmpInst::ICMP_NE
)) {
1268 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1269 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1270 unsigned ShiftAmt
= KnownZeroMask
.countTrailingZeros();
1271 // Perform a right shift to place the desired bit in the LSB.
1273 In
= Builder
.CreateLShr(In
,
1274 ConstantInt::get(In
->getType(), ShiftAmt
));
1276 // At this point "In" is either 1 or 0. Subtract 1 to turn
1277 // {1, 0} -> {0, -1}.
1278 In
= Builder
.CreateAdd(In
,
1279 ConstantInt::getAllOnesValue(In
->getType()),
1282 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1283 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1284 unsigned ShiftAmt
= KnownZeroMask
.countLeadingZeros();
1285 // Perform a left shift to place the desired bit in the MSB.
1287 In
= Builder
.CreateShl(In
,
1288 ConstantInt::get(In
->getType(), ShiftAmt
));
1290 // Distribute the bit over the whole bit width.
1291 In
= Builder
.CreateAShr(In
, ConstantInt::get(In
->getType(),
1292 KnownZeroMask
.getBitWidth() - 1), "sext");
1295 if (CI
.getType() == In
->getType())
1296 return replaceInstUsesWith(CI
, In
);
1297 return CastInst::CreateIntegerCast(In
, CI
.getType(), true/*SExt*/);
1305 /// Return true if we can take the specified value and return it as type Ty
1306 /// without inserting any new casts and without changing the value of the common
1307 /// low bits. This is used by code that tries to promote integer operations to
1308 /// a wider types will allow us to eliminate the extension.
1310 /// This function works on both vectors and scalars.
1312 static bool canEvaluateSExtd(Value
*V
, Type
*Ty
) {
1313 assert(V
->getType()->getScalarSizeInBits() < Ty
->getScalarSizeInBits() &&
1314 "Can't sign extend type to a smaller type");
1315 if (canAlwaysEvaluateInType(V
, Ty
))
1317 if (canNotEvaluateInType(V
, Ty
))
1320 auto *I
= cast
<Instruction
>(V
);
1321 switch (I
->getOpcode()) {
1322 case Instruction::SExt
: // sext(sext(x)) -> sext(x)
1323 case Instruction::ZExt
: // sext(zext(x)) -> zext(x)
1324 case Instruction::Trunc
: // sext(trunc(x)) -> trunc(x) or sext(x)
1326 case Instruction::And
:
1327 case Instruction::Or
:
1328 case Instruction::Xor
:
1329 case Instruction::Add
:
1330 case Instruction::Sub
:
1331 case Instruction::Mul
:
1332 // These operators can all arbitrarily be extended if their inputs can.
1333 return canEvaluateSExtd(I
->getOperand(0), Ty
) &&
1334 canEvaluateSExtd(I
->getOperand(1), Ty
);
1336 //case Instruction::Shl: TODO
1337 //case Instruction::LShr: TODO
1339 case Instruction::Select
:
1340 return canEvaluateSExtd(I
->getOperand(1), Ty
) &&
1341 canEvaluateSExtd(I
->getOperand(2), Ty
);
1343 case Instruction::PHI
: {
1344 // We can change a phi if we can change all operands. Note that we never
1345 // get into trouble with cyclic PHIs here because we only consider
1346 // instructions with a single use.
1347 PHINode
*PN
= cast
<PHINode
>(I
);
1348 for (Value
*IncValue
: PN
->incoming_values())
1349 if (!canEvaluateSExtd(IncValue
, Ty
)) return false;
1353 // TODO: Can handle more cases here.
1360 Instruction
*InstCombiner::visitSExt(SExtInst
&CI
) {
1361 // If this sign extend is only used by a truncate, let the truncate be
1362 // eliminated before we try to optimize this sext.
1363 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.user_back()))
1366 if (Instruction
*I
= commonCastTransforms(CI
))
1369 Value
*Src
= CI
.getOperand(0);
1370 Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
1372 // If we know that the value being extended is positive, we can use a zext
1374 KnownBits Known
= computeKnownBits(Src
, 0, &CI
);
1375 if (Known
.isNonNegative())
1376 return CastInst::Create(Instruction::ZExt
, Src
, DestTy
);
1378 // Try to extend the entire expression tree to the wide destination type.
1379 if (shouldChangeType(SrcTy
, DestTy
) && canEvaluateSExtd(Src
, DestTy
)) {
1380 // Okay, we can transform this! Insert the new expression now.
1382 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1383 " to avoid sign extend: "
1385 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, true);
1386 assert(Res
->getType() == DestTy
);
1388 uint32_t SrcBitSize
= SrcTy
->getScalarSizeInBits();
1389 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1391 // If the high bits are already filled with sign bit, just replace this
1392 // cast with the result.
1393 if (ComputeNumSignBits(Res
, 0, &CI
) > DestBitSize
- SrcBitSize
)
1394 return replaceInstUsesWith(CI
, Res
);
1396 // We need to emit a shl + ashr to do the sign extend.
1397 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1398 return BinaryOperator::CreateAShr(Builder
.CreateShl(Res
, ShAmt
, "sext"),
1402 // If the input is a trunc from the destination type, then turn sext(trunc(x))
1405 if (match(Src
, m_OneUse(m_Trunc(m_Value(X
)))) && X
->getType() == DestTy
) {
1406 // sext(trunc(X)) --> ashr(shl(X, C), C)
1407 unsigned SrcBitSize
= SrcTy
->getScalarSizeInBits();
1408 unsigned DestBitSize
= DestTy
->getScalarSizeInBits();
1409 Constant
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
- SrcBitSize
);
1410 return BinaryOperator::CreateAShr(Builder
.CreateShl(X
, ShAmt
), ShAmt
);
1413 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
1414 return transformSExtICmp(ICI
, CI
);
1416 // If the input is a shl/ashr pair of a same constant, then this is a sign
1417 // extension from a smaller value. If we could trust arbitrary bitwidth
1418 // integers, we could turn this into a truncate to the smaller bit and then
1419 // use a sext for the whole extension. Since we don't, look deeper and check
1420 // for a truncate. If the source and dest are the same type, eliminate the
1421 // trunc and extend and just do shifts. For example, turn:
1422 // %a = trunc i32 %i to i8
1423 // %b = shl i8 %a, 6
1424 // %c = ashr i8 %b, 6
1425 // %d = sext i8 %c to i32
1427 // %a = shl i32 %i, 30
1428 // %d = ashr i32 %a, 30
1430 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1431 ConstantInt
*BA
= nullptr, *CA
= nullptr;
1432 if (match(Src
, m_AShr(m_Shl(m_Trunc(m_Value(A
)), m_ConstantInt(BA
)),
1433 m_ConstantInt(CA
))) &&
1434 BA
== CA
&& A
->getType() == CI
.getType()) {
1435 unsigned MidSize
= Src
->getType()->getScalarSizeInBits();
1436 unsigned SrcDstSize
= CI
.getType()->getScalarSizeInBits();
1437 unsigned ShAmt
= CA
->getZExtValue()+SrcDstSize
-MidSize
;
1438 Constant
*ShAmtV
= ConstantInt::get(CI
.getType(), ShAmt
);
1439 A
= Builder
.CreateShl(A
, ShAmtV
, CI
.getName());
1440 return BinaryOperator::CreateAShr(A
, ShAmtV
);
1447 /// Return a Constant* for the specified floating-point constant if it fits
1448 /// in the specified FP type without changing its value.
1449 static bool fitsInFPType(ConstantFP
*CFP
, const fltSemantics
&Sem
) {
1451 APFloat F
= CFP
->getValueAPF();
1452 (void)F
.convert(Sem
, APFloat::rmNearestTiesToEven
, &losesInfo
);
1456 static Type
*shrinkFPConstant(ConstantFP
*CFP
) {
1457 if (CFP
->getType() == Type::getPPC_FP128Ty(CFP
->getContext()))
1458 return nullptr; // No constant folding of this.
1459 // See if the value can be truncated to half and then reextended.
1460 if (fitsInFPType(CFP
, APFloat::IEEEhalf()))
1461 return Type::getHalfTy(CFP
->getContext());
1462 // See if the value can be truncated to float and then reextended.
1463 if (fitsInFPType(CFP
, APFloat::IEEEsingle()))
1464 return Type::getFloatTy(CFP
->getContext());
1465 if (CFP
->getType()->isDoubleTy())
1466 return nullptr; // Won't shrink.
1467 if (fitsInFPType(CFP
, APFloat::IEEEdouble()))
1468 return Type::getDoubleTy(CFP
->getContext());
1469 // Don't try to shrink to various long double types.
1473 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1474 // type we can safely truncate all elements to.
1475 // TODO: Make these support undef elements.
1476 static Type
*shrinkFPConstantVector(Value
*V
) {
1477 auto *CV
= dyn_cast
<Constant
>(V
);
1478 if (!CV
|| !CV
->getType()->isVectorTy())
1481 Type
*MinType
= nullptr;
1483 unsigned NumElts
= CV
->getType()->getVectorNumElements();
1484 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1485 auto *CFP
= dyn_cast_or_null
<ConstantFP
>(CV
->getAggregateElement(i
));
1489 Type
*T
= shrinkFPConstant(CFP
);
1493 // If we haven't found a type yet or this type has a larger mantissa than
1494 // our previous type, this is our new minimal type.
1495 if (!MinType
|| T
->getFPMantissaWidth() > MinType
->getFPMantissaWidth())
1499 // Make a vector type from the minimal type.
1500 return VectorType::get(MinType
, NumElts
);
1503 /// Find the minimum FP type we can safely truncate to.
1504 static Type
*getMinimumFPType(Value
*V
) {
1505 if (auto *FPExt
= dyn_cast
<FPExtInst
>(V
))
1506 return FPExt
->getOperand(0)->getType();
1508 // If this value is a constant, return the constant in the smallest FP type
1509 // that can accurately represent it. This allows us to turn
1510 // (float)((double)X+2.0) into x+2.0f.
1511 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
))
1512 if (Type
*T
= shrinkFPConstant(CFP
))
1515 // Try to shrink a vector of FP constants.
1516 if (Type
*T
= shrinkFPConstantVector(V
))
1519 return V
->getType();
1522 Instruction
*InstCombiner::visitFPTrunc(FPTruncInst
&FPT
) {
1523 if (Instruction
*I
= commonCastTransforms(FPT
))
1526 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1527 // simplify this expression to avoid one or more of the trunc/extend
1528 // operations if we can do so without changing the numerical results.
1530 // The exact manner in which the widths of the operands interact to limit
1531 // what we can and cannot do safely varies from operation to operation, and
1532 // is explained below in the various case statements.
1533 Type
*Ty
= FPT
.getType();
1534 BinaryOperator
*OpI
= dyn_cast
<BinaryOperator
>(FPT
.getOperand(0));
1535 if (OpI
&& OpI
->hasOneUse()) {
1536 Type
*LHSMinType
= getMinimumFPType(OpI
->getOperand(0));
1537 Type
*RHSMinType
= getMinimumFPType(OpI
->getOperand(1));
1538 unsigned OpWidth
= OpI
->getType()->getFPMantissaWidth();
1539 unsigned LHSWidth
= LHSMinType
->getFPMantissaWidth();
1540 unsigned RHSWidth
= RHSMinType
->getFPMantissaWidth();
1541 unsigned SrcWidth
= std::max(LHSWidth
, RHSWidth
);
1542 unsigned DstWidth
= Ty
->getFPMantissaWidth();
1543 switch (OpI
->getOpcode()) {
1545 case Instruction::FAdd
:
1546 case Instruction::FSub
:
1547 // For addition and subtraction, the infinitely precise result can
1548 // essentially be arbitrarily wide; proving that double rounding
1549 // will not occur because the result of OpI is exact (as we will for
1550 // FMul, for example) is hopeless. However, we *can* nonetheless
1551 // frequently know that double rounding cannot occur (or that it is
1552 // innocuous) by taking advantage of the specific structure of
1553 // infinitely-precise results that admit double rounding.
1555 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1556 // to represent both sources, we can guarantee that the double
1557 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1558 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1559 // for proof of this fact).
1561 // Note: Figueroa does not consider the case where DstFormat !=
1562 // SrcFormat. It's possible (likely even!) that this analysis
1563 // could be tightened for those cases, but they are rare (the main
1564 // case of interest here is (float)((double)float + float)).
1565 if (OpWidth
>= 2*DstWidth
+1 && DstWidth
>= SrcWidth
) {
1566 Value
*LHS
= Builder
.CreateFPTrunc(OpI
->getOperand(0), Ty
);
1567 Value
*RHS
= Builder
.CreateFPTrunc(OpI
->getOperand(1), Ty
);
1568 Instruction
*RI
= BinaryOperator::Create(OpI
->getOpcode(), LHS
, RHS
);
1569 RI
->copyFastMathFlags(OpI
);
1573 case Instruction::FMul
:
1574 // For multiplication, the infinitely precise result has at most
1575 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1576 // that such a value can be exactly represented, then no double
1577 // rounding can possibly occur; we can safely perform the operation
1578 // in the destination format if it can represent both sources.
1579 if (OpWidth
>= LHSWidth
+ RHSWidth
&& DstWidth
>= SrcWidth
) {
1580 Value
*LHS
= Builder
.CreateFPTrunc(OpI
->getOperand(0), Ty
);
1581 Value
*RHS
= Builder
.CreateFPTrunc(OpI
->getOperand(1), Ty
);
1582 return BinaryOperator::CreateFMulFMF(LHS
, RHS
, OpI
);
1585 case Instruction::FDiv
:
1586 // For division, we use again use the bound from Figueroa's
1587 // dissertation. I am entirely certain that this bound can be
1588 // tightened in the unbalanced operand case by an analysis based on
1589 // the diophantine rational approximation bound, but the well-known
1590 // condition used here is a good conservative first pass.
1591 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1592 if (OpWidth
>= 2*DstWidth
&& DstWidth
>= SrcWidth
) {
1593 Value
*LHS
= Builder
.CreateFPTrunc(OpI
->getOperand(0), Ty
);
1594 Value
*RHS
= Builder
.CreateFPTrunc(OpI
->getOperand(1), Ty
);
1595 return BinaryOperator::CreateFDivFMF(LHS
, RHS
, OpI
);
1598 case Instruction::FRem
: {
1599 // Remainder is straightforward. Remainder is always exact, so the
1600 // type of OpI doesn't enter into things at all. We simply evaluate
1601 // in whichever source type is larger, then convert to the
1602 // destination type.
1603 if (SrcWidth
== OpWidth
)
1606 if (LHSWidth
== SrcWidth
) {
1607 LHS
= Builder
.CreateFPTrunc(OpI
->getOperand(0), LHSMinType
);
1608 RHS
= Builder
.CreateFPTrunc(OpI
->getOperand(1), LHSMinType
);
1610 LHS
= Builder
.CreateFPTrunc(OpI
->getOperand(0), RHSMinType
);
1611 RHS
= Builder
.CreateFPTrunc(OpI
->getOperand(1), RHSMinType
);
1614 Value
*ExactResult
= Builder
.CreateFRemFMF(LHS
, RHS
, OpI
);
1615 return CastInst::CreateFPCast(ExactResult
, Ty
);
1620 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1622 Instruction
*Op
= dyn_cast
<Instruction
>(FPT
.getOperand(0));
1623 if (Op
&& Op
->hasOneUse()) {
1624 if (match(Op
, m_FNeg(m_Value(X
)))) {
1625 Value
*InnerTrunc
= Builder
.CreateFPTrunc(X
, Ty
);
1627 // FIXME: Once we're sure that unary FNeg optimizations are on par with
1628 // binary FNeg, this should always return a unary operator.
1629 if (isa
<BinaryOperator
>(Op
))
1630 return BinaryOperator::CreateFNegFMF(InnerTrunc
, Op
);
1631 return UnaryOperator::CreateFNegFMF(InnerTrunc
, Op
);
1635 if (auto *II
= dyn_cast
<IntrinsicInst
>(FPT
.getOperand(0))) {
1636 switch (II
->getIntrinsicID()) {
1638 case Intrinsic::ceil
:
1639 case Intrinsic::fabs
:
1640 case Intrinsic::floor
:
1641 case Intrinsic::nearbyint
:
1642 case Intrinsic::rint
:
1643 case Intrinsic::round
:
1644 case Intrinsic::trunc
: {
1645 Value
*Src
= II
->getArgOperand(0);
1646 if (!Src
->hasOneUse())
1649 // Except for fabs, this transformation requires the input of the unary FP
1650 // operation to be itself an fpext from the type to which we're
1652 if (II
->getIntrinsicID() != Intrinsic::fabs
) {
1653 FPExtInst
*FPExtSrc
= dyn_cast
<FPExtInst
>(Src
);
1654 if (!FPExtSrc
|| FPExtSrc
->getSrcTy() != Ty
)
1658 // Do unary FP operation on smaller type.
1659 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1660 Value
*InnerTrunc
= Builder
.CreateFPTrunc(Src
, Ty
);
1661 Function
*Overload
= Intrinsic::getDeclaration(FPT
.getModule(),
1662 II
->getIntrinsicID(), Ty
);
1663 SmallVector
<OperandBundleDef
, 1> OpBundles
;
1664 II
->getOperandBundlesAsDefs(OpBundles
);
1666 CallInst::Create(Overload
, {InnerTrunc
}, OpBundles
, II
->getName());
1667 NewCI
->copyFastMathFlags(II
);
1673 if (Instruction
*I
= shrinkInsertElt(FPT
, Builder
))
1679 Instruction
*InstCombiner::visitFPExt(CastInst
&CI
) {
1680 return commonCastTransforms(CI
);
1683 // fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1684 // This is safe if the intermediate type has enough bits in its mantissa to
1685 // accurately represent all values of X. For example, this won't work with
1686 // i64 -> float -> i64.
1687 Instruction
*InstCombiner::FoldItoFPtoI(Instruction
&FI
) {
1688 if (!isa
<UIToFPInst
>(FI
.getOperand(0)) && !isa
<SIToFPInst
>(FI
.getOperand(0)))
1690 Instruction
*OpI
= cast
<Instruction
>(FI
.getOperand(0));
1692 Value
*SrcI
= OpI
->getOperand(0);
1693 Type
*FITy
= FI
.getType();
1694 Type
*OpITy
= OpI
->getType();
1695 Type
*SrcTy
= SrcI
->getType();
1696 bool IsInputSigned
= isa
<SIToFPInst
>(OpI
);
1697 bool IsOutputSigned
= isa
<FPToSIInst
>(FI
);
1699 // We can safely assume the conversion won't overflow the output range,
1700 // because (for example) (uint8_t)18293.f is undefined behavior.
1702 // Since we can assume the conversion won't overflow, our decision as to
1703 // whether the input will fit in the float should depend on the minimum
1704 // of the input range and output range.
1706 // This means this is also safe for a signed input and unsigned output, since
1707 // a negative input would lead to undefined behavior.
1708 int InputSize
= (int)SrcTy
->getScalarSizeInBits() - IsInputSigned
;
1709 int OutputSize
= (int)FITy
->getScalarSizeInBits() - IsOutputSigned
;
1710 int ActualSize
= std::min(InputSize
, OutputSize
);
1712 if (ActualSize
<= OpITy
->getFPMantissaWidth()) {
1713 if (FITy
->getScalarSizeInBits() > SrcTy
->getScalarSizeInBits()) {
1714 if (IsInputSigned
&& IsOutputSigned
)
1715 return new SExtInst(SrcI
, FITy
);
1716 return new ZExtInst(SrcI
, FITy
);
1718 if (FITy
->getScalarSizeInBits() < SrcTy
->getScalarSizeInBits())
1719 return new TruncInst(SrcI
, FITy
);
1721 return replaceInstUsesWith(FI
, SrcI
);
1722 return new BitCastInst(SrcI
, FITy
);
1727 Instruction
*InstCombiner::visitFPToUI(FPToUIInst
&FI
) {
1728 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1730 return commonCastTransforms(FI
);
1732 if (Instruction
*I
= FoldItoFPtoI(FI
))
1735 return commonCastTransforms(FI
);
1738 Instruction
*InstCombiner::visitFPToSI(FPToSIInst
&FI
) {
1739 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1741 return commonCastTransforms(FI
);
1743 if (Instruction
*I
= FoldItoFPtoI(FI
))
1746 return commonCastTransforms(FI
);
1749 Instruction
*InstCombiner::visitUIToFP(CastInst
&CI
) {
1750 return commonCastTransforms(CI
);
1753 Instruction
*InstCombiner::visitSIToFP(CastInst
&CI
) {
1754 return commonCastTransforms(CI
);
1757 Instruction
*InstCombiner::visitIntToPtr(IntToPtrInst
&CI
) {
1758 // If the source integer type is not the intptr_t type for this target, do a
1759 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1760 // cast to be exposed to other transforms.
1761 unsigned AS
= CI
.getAddressSpace();
1762 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() !=
1763 DL
.getPointerSizeInBits(AS
)) {
1764 Type
*Ty
= DL
.getIntPtrType(CI
.getContext(), AS
);
1765 if (CI
.getType()->isVectorTy()) // Handle vectors of pointers.
1766 Ty
= VectorType::get(Ty
, CI
.getType()->getVectorNumElements());
1768 Value
*P
= Builder
.CreateZExtOrTrunc(CI
.getOperand(0), Ty
);
1769 return new IntToPtrInst(P
, CI
.getType());
1772 if (Instruction
*I
= commonCastTransforms(CI
))
1778 /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
1779 Instruction
*InstCombiner::commonPointerCastTransforms(CastInst
&CI
) {
1780 Value
*Src
= CI
.getOperand(0);
1782 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Src
)) {
1783 // If casting the result of a getelementptr instruction with no offset, turn
1784 // this into a cast of the original pointer!
1785 if (GEP
->hasAllZeroIndices() &&
1786 // If CI is an addrspacecast and GEP changes the poiner type, merging
1787 // GEP into CI would undo canonicalizing addrspacecast with different
1788 // pointer types, causing infinite loops.
1789 (!isa
<AddrSpaceCastInst
>(CI
) ||
1790 GEP
->getType() == GEP
->getPointerOperandType())) {
1791 // Changing the cast operand is usually not a good idea but it is safe
1792 // here because the pointer operand is being replaced with another
1793 // pointer operand so the opcode doesn't need to change.
1795 CI
.setOperand(0, GEP
->getOperand(0));
1800 return commonCastTransforms(CI
);
1803 Instruction
*InstCombiner::visitPtrToInt(PtrToIntInst
&CI
) {
1804 // If the destination integer type is not the intptr_t type for this target,
1805 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1806 // to be exposed to other transforms.
1808 Type
*Ty
= CI
.getType();
1809 unsigned AS
= CI
.getPointerAddressSpace();
1811 if (Ty
->getScalarSizeInBits() == DL
.getIndexSizeInBits(AS
))
1812 return commonPointerCastTransforms(CI
);
1814 Type
*PtrTy
= DL
.getIntPtrType(CI
.getContext(), AS
);
1815 if (Ty
->isVectorTy()) // Handle vectors of pointers.
1816 PtrTy
= VectorType::get(PtrTy
, Ty
->getVectorNumElements());
1818 Value
*P
= Builder
.CreatePtrToInt(CI
.getOperand(0), PtrTy
);
1819 return CastInst::CreateIntegerCast(P
, Ty
, /*isSigned=*/false);
1822 /// This input value (which is known to have vector type) is being zero extended
1823 /// or truncated to the specified vector type.
1824 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
1826 /// The source and destination vector types may have different element types.
1827 static Instruction
*optimizeVectorResize(Value
*InVal
, VectorType
*DestTy
,
1829 // We can only do this optimization if the output is a multiple of the input
1830 // element size, or the input is a multiple of the output element size.
1831 // Convert the input type to have the same element type as the output.
1832 VectorType
*SrcTy
= cast
<VectorType
>(InVal
->getType());
1834 if (SrcTy
->getElementType() != DestTy
->getElementType()) {
1835 // The input types don't need to be identical, but for now they must be the
1836 // same size. There is no specific reason we couldn't handle things like
1837 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
1839 if (SrcTy
->getElementType()->getPrimitiveSizeInBits() !=
1840 DestTy
->getElementType()->getPrimitiveSizeInBits())
1843 SrcTy
= VectorType::get(DestTy
->getElementType(), SrcTy
->getNumElements());
1844 InVal
= IC
.Builder
.CreateBitCast(InVal
, SrcTy
);
1847 // Now that the element types match, get the shuffle mask and RHS of the
1848 // shuffle to use, which depends on whether we're increasing or decreasing the
1849 // size of the input.
1850 SmallVector
<uint32_t, 16> ShuffleMask
;
1853 if (SrcTy
->getNumElements() > DestTy
->getNumElements()) {
1854 // If we're shrinking the number of elements, just shuffle in the low
1855 // elements from the input and use undef as the second shuffle input.
1856 V2
= UndefValue::get(SrcTy
);
1857 for (unsigned i
= 0, e
= DestTy
->getNumElements(); i
!= e
; ++i
)
1858 ShuffleMask
.push_back(i
);
1861 // If we're increasing the number of elements, shuffle in all of the
1862 // elements from InVal and fill the rest of the result elements with zeros
1863 // from a constant zero.
1864 V2
= Constant::getNullValue(SrcTy
);
1865 unsigned SrcElts
= SrcTy
->getNumElements();
1866 for (unsigned i
= 0, e
= SrcElts
; i
!= e
; ++i
)
1867 ShuffleMask
.push_back(i
);
1869 // The excess elements reference the first element of the zero input.
1870 for (unsigned i
= 0, e
= DestTy
->getNumElements()-SrcElts
; i
!= e
; ++i
)
1871 ShuffleMask
.push_back(SrcElts
);
1874 return new ShuffleVectorInst(InVal
, V2
,
1875 ConstantDataVector::get(V2
->getContext(),
1879 static bool isMultipleOfTypeSize(unsigned Value
, Type
*Ty
) {
1880 return Value
% Ty
->getPrimitiveSizeInBits() == 0;
1883 static unsigned getTypeSizeIndex(unsigned Value
, Type
*Ty
) {
1884 return Value
/ Ty
->getPrimitiveSizeInBits();
1887 /// V is a value which is inserted into a vector of VecEltTy.
1888 /// Look through the value to see if we can decompose it into
1889 /// insertions into the vector. See the example in the comment for
1890 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
1891 /// The type of V is always a non-zero multiple of VecEltTy's size.
1892 /// Shift is the number of bits between the lsb of V and the lsb of
1895 /// This returns false if the pattern can't be matched or true if it can,
1896 /// filling in Elements with the elements found here.
1897 static bool collectInsertionElements(Value
*V
, unsigned Shift
,
1898 SmallVectorImpl
<Value
*> &Elements
,
1899 Type
*VecEltTy
, bool isBigEndian
) {
1900 assert(isMultipleOfTypeSize(Shift
, VecEltTy
) &&
1901 "Shift should be a multiple of the element type size");
1903 // Undef values never contribute useful bits to the result.
1904 if (isa
<UndefValue
>(V
)) return true;
1906 // If we got down to a value of the right type, we win, try inserting into the
1908 if (V
->getType() == VecEltTy
) {
1909 // Inserting null doesn't actually insert any elements.
1910 if (Constant
*C
= dyn_cast
<Constant
>(V
))
1911 if (C
->isNullValue())
1914 unsigned ElementIndex
= getTypeSizeIndex(Shift
, VecEltTy
);
1916 ElementIndex
= Elements
.size() - ElementIndex
- 1;
1918 // Fail if multiple elements are inserted into this slot.
1919 if (Elements
[ElementIndex
])
1922 Elements
[ElementIndex
] = V
;
1926 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
1927 // Figure out the # elements this provides, and bitcast it or slice it up
1929 unsigned NumElts
= getTypeSizeIndex(C
->getType()->getPrimitiveSizeInBits(),
1931 // If the constant is the size of a vector element, we just need to bitcast
1932 // it to the right type so it gets properly inserted.
1934 return collectInsertionElements(ConstantExpr::getBitCast(C
, VecEltTy
),
1935 Shift
, Elements
, VecEltTy
, isBigEndian
);
1937 // Okay, this is a constant that covers multiple elements. Slice it up into
1938 // pieces and insert each element-sized piece into the vector.
1939 if (!isa
<IntegerType
>(C
->getType()))
1940 C
= ConstantExpr::getBitCast(C
, IntegerType::get(V
->getContext(),
1941 C
->getType()->getPrimitiveSizeInBits()));
1942 unsigned ElementSize
= VecEltTy
->getPrimitiveSizeInBits();
1943 Type
*ElementIntTy
= IntegerType::get(C
->getContext(), ElementSize
);
1945 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1946 unsigned ShiftI
= Shift
+i
*ElementSize
;
1947 Constant
*Piece
= ConstantExpr::getLShr(C
, ConstantInt::get(C
->getType(),
1949 Piece
= ConstantExpr::getTrunc(Piece
, ElementIntTy
);
1950 if (!collectInsertionElements(Piece
, ShiftI
, Elements
, VecEltTy
,
1957 if (!V
->hasOneUse()) return false;
1959 Instruction
*I
= dyn_cast
<Instruction
>(V
);
1960 if (!I
) return false;
1961 switch (I
->getOpcode()) {
1962 default: return false; // Unhandled case.
1963 case Instruction::BitCast
:
1964 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
1966 case Instruction::ZExt
:
1967 if (!isMultipleOfTypeSize(
1968 I
->getOperand(0)->getType()->getPrimitiveSizeInBits(),
1971 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
1973 case Instruction::Or
:
1974 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
1976 collectInsertionElements(I
->getOperand(1), Shift
, Elements
, VecEltTy
,
1978 case Instruction::Shl
: {
1979 // Must be shifting by a constant that is a multiple of the element size.
1980 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1));
1981 if (!CI
) return false;
1982 Shift
+= CI
->getZExtValue();
1983 if (!isMultipleOfTypeSize(Shift
, VecEltTy
)) return false;
1984 return collectInsertionElements(I
->getOperand(0), Shift
, Elements
, VecEltTy
,
1992 /// If the input is an 'or' instruction, we may be doing shifts and ors to
1993 /// assemble the elements of the vector manually.
1994 /// Try to rip the code out and replace it with insertelements. This is to
1995 /// optimize code like this:
1997 /// %tmp37 = bitcast float %inc to i32
1998 /// %tmp38 = zext i32 %tmp37 to i64
1999 /// %tmp31 = bitcast float %inc5 to i32
2000 /// %tmp32 = zext i32 %tmp31 to i64
2001 /// %tmp33 = shl i64 %tmp32, 32
2002 /// %ins35 = or i64 %tmp33, %tmp38
2003 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2005 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2006 static Value
*optimizeIntegerToVectorInsertions(BitCastInst
&CI
,
2008 VectorType
*DestVecTy
= cast
<VectorType
>(CI
.getType());
2009 Value
*IntInput
= CI
.getOperand(0);
2011 SmallVector
<Value
*, 8> Elements(DestVecTy
->getNumElements());
2012 if (!collectInsertionElements(IntInput
, 0, Elements
,
2013 DestVecTy
->getElementType(),
2014 IC
.getDataLayout().isBigEndian()))
2017 // If we succeeded, we know that all of the element are specified by Elements
2018 // or are zero if Elements has a null entry. Recast this as a set of
2020 Value
*Result
= Constant::getNullValue(CI
.getType());
2021 for (unsigned i
= 0, e
= Elements
.size(); i
!= e
; ++i
) {
2022 if (!Elements
[i
]) continue; // Unset element.
2024 Result
= IC
.Builder
.CreateInsertElement(Result
, Elements
[i
],
2025 IC
.Builder
.getInt32(i
));
2031 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2032 /// vector followed by extract element. The backend tends to handle bitcasts of
2033 /// vectors better than bitcasts of scalars because vector registers are
2034 /// usually not type-specific like scalar integer or scalar floating-point.
2035 static Instruction
*canonicalizeBitCastExtElt(BitCastInst
&BitCast
,
2037 // TODO: Create and use a pattern matcher for ExtractElementInst.
2038 auto *ExtElt
= dyn_cast
<ExtractElementInst
>(BitCast
.getOperand(0));
2039 if (!ExtElt
|| !ExtElt
->hasOneUse())
2042 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2043 // type to extract from.
2044 Type
*DestType
= BitCast
.getType();
2045 if (!VectorType::isValidElementType(DestType
))
2048 unsigned NumElts
= ExtElt
->getVectorOperandType()->getNumElements();
2049 auto *NewVecType
= VectorType::get(DestType
, NumElts
);
2050 auto *NewBC
= IC
.Builder
.CreateBitCast(ExtElt
->getVectorOperand(),
2052 return ExtractElementInst::Create(NewBC
, ExtElt
->getIndexOperand());
2055 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2056 static Instruction
*foldBitCastBitwiseLogic(BitCastInst
&BitCast
,
2057 InstCombiner::BuilderTy
&Builder
) {
2058 Type
*DestTy
= BitCast
.getType();
2060 if (!DestTy
->isIntOrIntVectorTy() ||
2061 !match(BitCast
.getOperand(0), m_OneUse(m_BinOp(BO
))) ||
2062 !BO
->isBitwiseLogicOp())
2065 // FIXME: This transform is restricted to vector types to avoid backend
2066 // problems caused by creating potentially illegal operations. If a fix-up is
2067 // added to handle that situation, we can remove this check.
2068 if (!DestTy
->isVectorTy() || !BO
->getType()->isVectorTy())
2072 if (match(BO
->getOperand(0), m_OneUse(m_BitCast(m_Value(X
)))) &&
2073 X
->getType() == DestTy
&& !isa
<Constant
>(X
)) {
2074 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2075 Value
*CastedOp1
= Builder
.CreateBitCast(BO
->getOperand(1), DestTy
);
2076 return BinaryOperator::Create(BO
->getOpcode(), X
, CastedOp1
);
2079 if (match(BO
->getOperand(1), m_OneUse(m_BitCast(m_Value(X
)))) &&
2080 X
->getType() == DestTy
&& !isa
<Constant
>(X
)) {
2081 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2082 Value
*CastedOp0
= Builder
.CreateBitCast(BO
->getOperand(0), DestTy
);
2083 return BinaryOperator::Create(BO
->getOpcode(), CastedOp0
, X
);
2086 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2087 // constant. This eases recognition of special constants for later ops.
2089 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2091 if (match(BO
->getOperand(1), m_Constant(C
))) {
2092 // bitcast (logic X, C) --> logic (bitcast X, C')
2093 Value
*CastedOp0
= Builder
.CreateBitCast(BO
->getOperand(0), DestTy
);
2094 Value
*CastedC
= ConstantExpr::getBitCast(C
, DestTy
);
2095 return BinaryOperator::Create(BO
->getOpcode(), CastedOp0
, CastedC
);
2101 /// Change the type of a select if we can eliminate a bitcast.
2102 static Instruction
*foldBitCastSelect(BitCastInst
&BitCast
,
2103 InstCombiner::BuilderTy
&Builder
) {
2104 Value
*Cond
, *TVal
, *FVal
;
2105 if (!match(BitCast
.getOperand(0),
2106 m_OneUse(m_Select(m_Value(Cond
), m_Value(TVal
), m_Value(FVal
)))))
2109 // A vector select must maintain the same number of elements in its operands.
2110 Type
*CondTy
= Cond
->getType();
2111 Type
*DestTy
= BitCast
.getType();
2112 if (CondTy
->isVectorTy()) {
2113 if (!DestTy
->isVectorTy())
2115 if (DestTy
->getVectorNumElements() != CondTy
->getVectorNumElements())
2119 // FIXME: This transform is restricted from changing the select between
2120 // scalars and vectors to avoid backend problems caused by creating
2121 // potentially illegal operations. If a fix-up is added to handle that
2122 // situation, we can remove this check.
2123 if (DestTy
->isVectorTy() != TVal
->getType()->isVectorTy())
2126 auto *Sel
= cast
<Instruction
>(BitCast
.getOperand(0));
2128 if (match(TVal
, m_OneUse(m_BitCast(m_Value(X
)))) && X
->getType() == DestTy
&&
2129 !isa
<Constant
>(X
)) {
2130 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2131 Value
*CastedVal
= Builder
.CreateBitCast(FVal
, DestTy
);
2132 return SelectInst::Create(Cond
, X
, CastedVal
, "", nullptr, Sel
);
2135 if (match(FVal
, m_OneUse(m_BitCast(m_Value(X
)))) && X
->getType() == DestTy
&&
2136 !isa
<Constant
>(X
)) {
2137 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2138 Value
*CastedVal
= Builder
.CreateBitCast(TVal
, DestTy
);
2139 return SelectInst::Create(Cond
, CastedVal
, X
, "", nullptr, Sel
);
2145 /// Check if all users of CI are StoreInsts.
2146 static bool hasStoreUsersOnly(CastInst
&CI
) {
2147 for (User
*U
: CI
.users()) {
2148 if (!isa
<StoreInst
>(U
))
2154 /// This function handles following case
2160 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2161 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2162 Instruction
*InstCombiner::optimizeBitCastFromPhi(CastInst
&CI
, PHINode
*PN
) {
2163 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2164 if (hasStoreUsersOnly(CI
))
2167 Value
*Src
= CI
.getOperand(0);
2168 Type
*SrcTy
= Src
->getType(); // Type B
2169 Type
*DestTy
= CI
.getType(); // Type A
2171 SmallVector
<PHINode
*, 4> PhiWorklist
;
2172 SmallSetVector
<PHINode
*, 4> OldPhiNodes
;
2174 // Find all of the A->B casts and PHI nodes.
2175 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2176 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2177 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2178 PhiWorklist
.push_back(PN
);
2179 OldPhiNodes
.insert(PN
);
2180 while (!PhiWorklist
.empty()) {
2181 auto *OldPN
= PhiWorklist
.pop_back_val();
2182 for (Value
*IncValue
: OldPN
->incoming_values()) {
2183 if (isa
<Constant
>(IncValue
))
2186 if (auto *LI
= dyn_cast
<LoadInst
>(IncValue
)) {
2187 // If there is a sequence of one or more load instructions, each loaded
2188 // value is used as address of later load instruction, bitcast is
2189 // necessary to change the value type, don't optimize it. For
2190 // simplicity we give up if the load address comes from another load.
2191 Value
*Addr
= LI
->getOperand(0);
2192 if (Addr
== &CI
|| isa
<LoadInst
>(Addr
))
2194 if (LI
->hasOneUse() && LI
->isSimple())
2196 // If a LoadInst has more than one use, changing the type of loaded
2197 // value may create another bitcast.
2201 if (auto *PNode
= dyn_cast
<PHINode
>(IncValue
)) {
2202 if (OldPhiNodes
.insert(PNode
))
2203 PhiWorklist
.push_back(PNode
);
2207 auto *BCI
= dyn_cast
<BitCastInst
>(IncValue
);
2208 // We can't handle other instructions.
2212 // Verify it's a A->B cast.
2213 Type
*TyA
= BCI
->getOperand(0)->getType();
2214 Type
*TyB
= BCI
->getType();
2215 if (TyA
!= DestTy
|| TyB
!= SrcTy
)
2220 // For each old PHI node, create a corresponding new PHI node with a type A.
2221 SmallDenseMap
<PHINode
*, PHINode
*> NewPNodes
;
2222 for (auto *OldPN
: OldPhiNodes
) {
2223 Builder
.SetInsertPoint(OldPN
);
2224 PHINode
*NewPN
= Builder
.CreatePHI(DestTy
, OldPN
->getNumOperands());
2225 NewPNodes
[OldPN
] = NewPN
;
2228 // Fill in the operands of new PHI nodes.
2229 for (auto *OldPN
: OldPhiNodes
) {
2230 PHINode
*NewPN
= NewPNodes
[OldPN
];
2231 for (unsigned j
= 0, e
= OldPN
->getNumOperands(); j
!= e
; ++j
) {
2232 Value
*V
= OldPN
->getOperand(j
);
2233 Value
*NewV
= nullptr;
2234 if (auto *C
= dyn_cast
<Constant
>(V
)) {
2235 NewV
= ConstantExpr::getBitCast(C
, DestTy
);
2236 } else if (auto *LI
= dyn_cast
<LoadInst
>(V
)) {
2237 Builder
.SetInsertPoint(LI
->getNextNode());
2238 NewV
= Builder
.CreateBitCast(LI
, DestTy
);
2240 } else if (auto *BCI
= dyn_cast
<BitCastInst
>(V
)) {
2241 NewV
= BCI
->getOperand(0);
2242 } else if (auto *PrevPN
= dyn_cast
<PHINode
>(V
)) {
2243 NewV
= NewPNodes
[PrevPN
];
2246 NewPN
->addIncoming(NewV
, OldPN
->getIncomingBlock(j
));
2250 // Traverse all accumulated PHI nodes and process its users,
2251 // which are Stores and BitcCasts. Without this processing
2252 // NewPHI nodes could be replicated and could lead to extra
2253 // moves generated after DeSSA.
2254 // If there is a store with type B, change it to type A.
2257 // Replace users of BitCast B->A with NewPHI. These will help
2258 // later to get rid off a closure formed by OldPHI nodes.
2259 Instruction
*RetVal
= nullptr;
2260 for (auto *OldPN
: OldPhiNodes
) {
2261 PHINode
*NewPN
= NewPNodes
[OldPN
];
2262 for (User
*V
: OldPN
->users()) {
2263 if (auto *SI
= dyn_cast
<StoreInst
>(V
)) {
2264 if (SI
->isSimple() && SI
->getOperand(0) == OldPN
) {
2265 Builder
.SetInsertPoint(SI
);
2267 cast
<BitCastInst
>(Builder
.CreateBitCast(NewPN
, SrcTy
));
2268 SI
->setOperand(0, NewBC
);
2270 assert(hasStoreUsersOnly(*NewBC
));
2273 else if (auto *BCI
= dyn_cast
<BitCastInst
>(V
)) {
2274 // Verify it's a B->A cast.
2275 Type
*TyB
= BCI
->getOperand(0)->getType();
2276 Type
*TyA
= BCI
->getType();
2277 if (TyA
== DestTy
&& TyB
== SrcTy
) {
2278 Instruction
*I
= replaceInstUsesWith(*BCI
, NewPN
);
2289 Instruction
*InstCombiner::visitBitCast(BitCastInst
&CI
) {
2290 // If the operands are integer typed then apply the integer transforms,
2291 // otherwise just apply the common ones.
2292 Value
*Src
= CI
.getOperand(0);
2293 Type
*SrcTy
= Src
->getType();
2294 Type
*DestTy
= CI
.getType();
2296 // Get rid of casts from one type to the same type. These are useless and can
2297 // be replaced by the operand.
2298 if (DestTy
== Src
->getType())
2299 return replaceInstUsesWith(CI
, Src
);
2301 if (PointerType
*DstPTy
= dyn_cast
<PointerType
>(DestTy
)) {
2302 PointerType
*SrcPTy
= cast
<PointerType
>(SrcTy
);
2303 Type
*DstElTy
= DstPTy
->getElementType();
2304 Type
*SrcElTy
= SrcPTy
->getElementType();
2306 // Casting pointers between the same type, but with different address spaces
2307 // is an addrspace cast rather than a bitcast.
2308 if ((DstElTy
== SrcElTy
) &&
2309 (DstPTy
->getAddressSpace() != SrcPTy
->getAddressSpace()))
2310 return new AddrSpaceCastInst(Src
, DestTy
);
2312 // If we are casting a alloca to a pointer to a type of the same
2313 // size, rewrite the allocation instruction to allocate the "right" type.
2314 // There is no need to modify malloc calls because it is their bitcast that
2315 // needs to be cleaned up.
2316 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Src
))
2317 if (Instruction
*V
= PromoteCastOfAllocation(CI
, *AI
))
2320 // When the type pointed to is not sized the cast cannot be
2321 // turned into a gep.
2323 cast
<PointerType
>(Src
->getType()->getScalarType())->getElementType();
2324 if (!PointeeType
->isSized())
2327 // If the source and destination are pointers, and this cast is equivalent
2328 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2329 // This can enhance SROA and other transforms that want type-safe pointers.
2330 unsigned NumZeros
= 0;
2331 while (SrcElTy
!= DstElTy
&&
2332 isa
<CompositeType
>(SrcElTy
) && !SrcElTy
->isPointerTy() &&
2333 SrcElTy
->getNumContainedTypes() /* not "{}" */) {
2334 SrcElTy
= cast
<CompositeType
>(SrcElTy
)->getTypeAtIndex(0U);
2338 // If we found a path from the src to dest, create the getelementptr now.
2339 if (SrcElTy
== DstElTy
) {
2340 SmallVector
<Value
*, 8> Idxs(NumZeros
+ 1, Builder
.getInt32(0));
2341 return GetElementPtrInst::CreateInBounds(SrcPTy
->getElementType(), Src
,
2346 if (VectorType
*DestVTy
= dyn_cast
<VectorType
>(DestTy
)) {
2347 if (DestVTy
->getNumElements() == 1 && !SrcTy
->isVectorTy()) {
2348 Value
*Elem
= Builder
.CreateBitCast(Src
, DestVTy
->getElementType());
2349 return InsertElementInst::Create(UndefValue::get(DestTy
), Elem
,
2350 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
2351 // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
2354 if (isa
<IntegerType
>(SrcTy
)) {
2355 // If this is a cast from an integer to vector, check to see if the input
2356 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2357 // the casts with a shuffle and (potentially) a bitcast.
2358 if (isa
<TruncInst
>(Src
) || isa
<ZExtInst
>(Src
)) {
2359 CastInst
*SrcCast
= cast
<CastInst
>(Src
);
2360 if (BitCastInst
*BCIn
= dyn_cast
<BitCastInst
>(SrcCast
->getOperand(0)))
2361 if (isa
<VectorType
>(BCIn
->getOperand(0)->getType()))
2362 if (Instruction
*I
= optimizeVectorResize(BCIn
->getOperand(0),
2363 cast
<VectorType
>(DestTy
), *this))
2367 // If the input is an 'or' instruction, we may be doing shifts and ors to
2368 // assemble the elements of the vector manually. Try to rip the code out
2369 // and replace it with insertelements.
2370 if (Value
*V
= optimizeIntegerToVectorInsertions(CI
, *this))
2371 return replaceInstUsesWith(CI
, V
);
2375 if (VectorType
*SrcVTy
= dyn_cast
<VectorType
>(SrcTy
)) {
2376 if (SrcVTy
->getNumElements() == 1) {
2377 // If our destination is not a vector, then make this a straight
2378 // scalar-scalar cast.
2379 if (!DestTy
->isVectorTy()) {
2381 Builder
.CreateExtractElement(Src
,
2382 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
2383 return CastInst::Create(Instruction::BitCast
, Elem
, DestTy
);
2386 // Otherwise, see if our source is an insert. If so, then use the scalar
2387 // component directly:
2388 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2389 if (auto *InsElt
= dyn_cast
<InsertElementInst
>(Src
))
2390 return new BitCastInst(InsElt
->getOperand(1), DestTy
);
2394 if (auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Src
)) {
2395 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2396 // a bitcast to a vector with the same # elts.
2397 Value
*ShufOp0
= Shuf
->getOperand(0);
2398 Value
*ShufOp1
= Shuf
->getOperand(1);
2399 unsigned NumShufElts
= Shuf
->getType()->getVectorNumElements();
2400 unsigned NumSrcVecElts
= ShufOp0
->getType()->getVectorNumElements();
2401 if (Shuf
->hasOneUse() && DestTy
->isVectorTy() &&
2402 DestTy
->getVectorNumElements() == NumShufElts
&&
2403 NumShufElts
== NumSrcVecElts
) {
2405 // If either of the operands is a cast from CI.getType(), then
2406 // evaluating the shuffle in the casted destination's type will allow
2407 // us to eliminate at least one cast.
2408 if (((Tmp
= dyn_cast
<BitCastInst
>(ShufOp0
)) &&
2409 Tmp
->getOperand(0)->getType() == DestTy
) ||
2410 ((Tmp
= dyn_cast
<BitCastInst
>(ShufOp1
)) &&
2411 Tmp
->getOperand(0)->getType() == DestTy
)) {
2412 Value
*LHS
= Builder
.CreateBitCast(ShufOp0
, DestTy
);
2413 Value
*RHS
= Builder
.CreateBitCast(ShufOp1
, DestTy
);
2414 // Return a new shuffle vector. Use the same element ID's, as we
2415 // know the vector types match #elts.
2416 return new ShuffleVectorInst(LHS
, RHS
, Shuf
->getOperand(2));
2420 // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2422 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2423 // TODO: We should match the related pattern for bitreverse.
2424 if (DestTy
->isIntegerTy() &&
2425 DL
.isLegalInteger(DestTy
->getScalarSizeInBits()) &&
2426 SrcTy
->getScalarSizeInBits() == 8 && NumShufElts
% 2 == 0 &&
2427 Shuf
->hasOneUse() && Shuf
->isReverse()) {
2428 assert(ShufOp0
->getType() == SrcTy
&& "Unexpected shuffle mask");
2429 assert(isa
<UndefValue
>(ShufOp1
) && "Unexpected shuffle op");
2431 Intrinsic::getDeclaration(CI
.getModule(), Intrinsic::bswap
, DestTy
);
2432 Value
*ScalarX
= Builder
.CreateBitCast(ShufOp0
, DestTy
);
2433 return IntrinsicInst::Create(Bswap
, { ScalarX
});
2437 // Handle the A->B->A cast, and there is an intervening PHI node.
2438 if (PHINode
*PN
= dyn_cast
<PHINode
>(Src
))
2439 if (Instruction
*I
= optimizeBitCastFromPhi(CI
, PN
))
2442 if (Instruction
*I
= canonicalizeBitCastExtElt(CI
, *this))
2445 if (Instruction
*I
= foldBitCastBitwiseLogic(CI
, Builder
))
2448 if (Instruction
*I
= foldBitCastSelect(CI
, Builder
))
2451 if (SrcTy
->isPointerTy())
2452 return commonPointerCastTransforms(CI
);
2453 return commonCastTransforms(CI
);
2456 Instruction
*InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst
&CI
) {
2457 // If the destination pointer element type is not the same as the source's
2458 // first do a bitcast to the destination type, and then the addrspacecast.
2459 // This allows the cast to be exposed to other transforms.
2460 Value
*Src
= CI
.getOperand(0);
2461 PointerType
*SrcTy
= cast
<PointerType
>(Src
->getType()->getScalarType());
2462 PointerType
*DestTy
= cast
<PointerType
>(CI
.getType()->getScalarType());
2464 Type
*DestElemTy
= DestTy
->getElementType();
2465 if (SrcTy
->getElementType() != DestElemTy
) {
2466 Type
*MidTy
= PointerType::get(DestElemTy
, SrcTy
->getAddressSpace());
2467 if (VectorType
*VT
= dyn_cast
<VectorType
>(CI
.getType())) {
2468 // Handle vectors of pointers.
2469 MidTy
= VectorType::get(MidTy
, VT
->getNumElements());
2472 Value
*NewBitCast
= Builder
.CreateBitCast(Src
, MidTy
);
2473 return new AddrSpaceCastInst(NewBitCast
, CI
.getType());
2476 return commonPointerCastTransforms(CI
);