1 //===- InstCombineCasts.cpp -----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visit functions for cast operations.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombine.h"
15 #include "llvm/Target/TargetData.h"
16 #include "llvm/Support/PatternMatch.h"
18 using namespace PatternMatch
;
20 /// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
21 /// expression. If so, decompose it, returning some value X, such that Val is
24 static Value
*DecomposeSimpleLinearExpr(Value
*Val
, unsigned &Scale
,
26 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
27 Offset
= CI
->getZExtValue();
29 return ConstantInt::get(Val
->getType(), 0);
32 if (BinaryOperator
*I
= dyn_cast
<BinaryOperator
>(Val
)) {
33 // Cannot look past anything that might overflow.
34 OverflowingBinaryOperator
*OBI
= dyn_cast
<OverflowingBinaryOperator
>(Val
);
35 if (OBI
&& !OBI
->hasNoUnsignedWrap()) {
41 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
42 if (I
->getOpcode() == Instruction::Shl
) {
43 // This is a value scaled by '1 << the shift amt'.
44 Scale
= UINT64_C(1) << RHS
->getZExtValue();
46 return I
->getOperand(0);
49 if (I
->getOpcode() == Instruction::Mul
) {
50 // This value is scaled by 'RHS'.
51 Scale
= RHS
->getZExtValue();
53 return I
->getOperand(0);
56 if (I
->getOpcode() == Instruction::Add
) {
57 // We have X+C. Check to see if we really have (X*C2)+C1,
58 // where C1 is divisible by C2.
61 DecomposeSimpleLinearExpr(I
->getOperand(0), SubScale
, Offset
);
62 Offset
+= RHS
->getZExtValue();
69 // Otherwise, we can't look past this.
75 /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
76 /// try to eliminate the cast by moving the type information into the alloc.
77 Instruction
*InstCombiner::PromoteCastOfAllocation(BitCastInst
&CI
,
79 // This requires TargetData to get the alloca alignment and size information.
82 const PointerType
*PTy
= cast
<PointerType
>(CI
.getType());
84 BuilderTy
AllocaBuilder(*Builder
);
85 AllocaBuilder
.SetInsertPoint(AI
.getParent(), &AI
);
87 // Get the type really allocated and the type casted to.
88 const Type
*AllocElTy
= AI
.getAllocatedType();
89 const Type
*CastElTy
= PTy
->getElementType();
90 if (!AllocElTy
->isSized() || !CastElTy
->isSized()) return 0;
92 unsigned AllocElTyAlign
= TD
->getABITypeAlignment(AllocElTy
);
93 unsigned CastElTyAlign
= TD
->getABITypeAlignment(CastElTy
);
94 if (CastElTyAlign
< AllocElTyAlign
) return 0;
96 // If the allocation has multiple uses, only promote it if we are strictly
97 // increasing the alignment of the resultant allocation. If we keep it the
98 // same, we open the door to infinite loops of various kinds.
99 if (!AI
.hasOneUse() && CastElTyAlign
== AllocElTyAlign
) return 0;
101 uint64_t AllocElTySize
= TD
->getTypeAllocSize(AllocElTy
);
102 uint64_t CastElTySize
= TD
->getTypeAllocSize(CastElTy
);
103 if (CastElTySize
== 0 || AllocElTySize
== 0) return 0;
105 // See if we can satisfy the modulus by pulling a scale out of the array
107 unsigned ArraySizeScale
;
108 uint64_t ArrayOffset
;
109 Value
*NumElements
= // See if the array size is a decomposable linear expr.
110 DecomposeSimpleLinearExpr(AI
.getOperand(0), ArraySizeScale
, ArrayOffset
);
112 // If we can now satisfy the modulus, by using a non-1 scale, we really can
114 if ((AllocElTySize
*ArraySizeScale
) % CastElTySize
!= 0 ||
115 (AllocElTySize
*ArrayOffset
) % CastElTySize
!= 0) return 0;
117 unsigned Scale
= (AllocElTySize
*ArraySizeScale
)/CastElTySize
;
122 Amt
= ConstantInt::get(AI
.getArraySize()->getType(), Scale
);
123 // Insert before the alloca, not before the cast.
124 Amt
= AllocaBuilder
.CreateMul(Amt
, NumElements
, "tmp");
127 if (uint64_t Offset
= (AllocElTySize
*ArrayOffset
)/CastElTySize
) {
128 Value
*Off
= ConstantInt::get(AI
.getArraySize()->getType(),
130 Amt
= AllocaBuilder
.CreateAdd(Amt
, Off
, "tmp");
133 AllocaInst
*New
= AllocaBuilder
.CreateAlloca(CastElTy
, Amt
);
134 New
->setAlignment(AI
.getAlignment());
137 // If the allocation has multiple real uses, insert a cast and change all
138 // things that used it to use the new cast. This will also hack on CI, but it
140 if (!AI
.hasOneUse()) {
141 // New is the allocation instruction, pointer typed. AI is the original
142 // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
143 Value
*NewCast
= AllocaBuilder
.CreateBitCast(New
, AI
.getType(), "tmpcast");
144 ReplaceInstUsesWith(AI
, NewCast
);
146 return ReplaceInstUsesWith(CI
, New
);
151 /// EvaluateInDifferentType - Given an expression that
152 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
153 /// insert the code to evaluate the expression.
154 Value
*InstCombiner::EvaluateInDifferentType(Value
*V
, const Type
*Ty
,
156 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
157 C
= ConstantExpr::getIntegerCast(C
, Ty
, isSigned
/*Sext or ZExt*/);
158 // If we got a constantexpr back, try to simplify it with TD info.
159 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
160 C
= ConstantFoldConstantExpression(CE
, TD
);
164 // Otherwise, it must be an instruction.
165 Instruction
*I
= cast
<Instruction
>(V
);
166 Instruction
*Res
= 0;
167 unsigned Opc
= I
->getOpcode();
169 case Instruction::Add
:
170 case Instruction::Sub
:
171 case Instruction::Mul
:
172 case Instruction::And
:
173 case Instruction::Or
:
174 case Instruction::Xor
:
175 case Instruction::AShr
:
176 case Instruction::LShr
:
177 case Instruction::Shl
:
178 case Instruction::UDiv
:
179 case Instruction::URem
: {
180 Value
*LHS
= EvaluateInDifferentType(I
->getOperand(0), Ty
, isSigned
);
181 Value
*RHS
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
182 Res
= BinaryOperator::Create((Instruction::BinaryOps
)Opc
, LHS
, RHS
);
185 case Instruction::Trunc
:
186 case Instruction::ZExt
:
187 case Instruction::SExt
:
188 // If the source type of the cast is the type we're trying for then we can
189 // just return the source. There's no need to insert it because it is not
191 if (I
->getOperand(0)->getType() == Ty
)
192 return I
->getOperand(0);
194 // Otherwise, must be the same type of cast, so just reinsert a new one.
195 // This also handles the case of zext(trunc(x)) -> zext(x).
196 Res
= CastInst::CreateIntegerCast(I
->getOperand(0), Ty
,
197 Opc
== Instruction::SExt
);
199 case Instruction::Select
: {
200 Value
*True
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
201 Value
*False
= EvaluateInDifferentType(I
->getOperand(2), Ty
, isSigned
);
202 Res
= SelectInst::Create(I
->getOperand(0), True
, False
);
205 case Instruction::PHI
: {
206 PHINode
*OPN
= cast
<PHINode
>(I
);
207 PHINode
*NPN
= PHINode::Create(Ty
, OPN
->getNumIncomingValues());
208 for (unsigned i
= 0, e
= OPN
->getNumIncomingValues(); i
!= e
; ++i
) {
209 Value
*V
=EvaluateInDifferentType(OPN
->getIncomingValue(i
), Ty
, isSigned
);
210 NPN
->addIncoming(V
, OPN
->getIncomingBlock(i
));
216 // TODO: Can handle more cases here.
217 llvm_unreachable("Unreachable!");
222 return InsertNewInstWith(Res
, *I
);
226 /// This function is a wrapper around CastInst::isEliminableCastPair. It
227 /// simply extracts arguments and returns what that function returns.
228 static Instruction::CastOps
229 isEliminableCastPair(
230 const CastInst
*CI
, ///< The first cast instruction
231 unsigned opcode
, ///< The opcode of the second cast instruction
232 const Type
*DstTy
, ///< The target type for the second cast instruction
233 TargetData
*TD
///< The target data for pointer size
236 const Type
*SrcTy
= CI
->getOperand(0)->getType(); // A from above
237 const Type
*MidTy
= CI
->getType(); // B from above
239 // Get the opcodes of the two Cast instructions
240 Instruction::CastOps firstOp
= Instruction::CastOps(CI
->getOpcode());
241 Instruction::CastOps secondOp
= Instruction::CastOps(opcode
);
243 unsigned Res
= CastInst::isEliminableCastPair(firstOp
, secondOp
, SrcTy
, MidTy
,
245 TD
? TD
->getIntPtrType(CI
->getContext()) : 0);
247 // We don't want to form an inttoptr or ptrtoint that converts to an integer
248 // type that differs from the pointer size.
249 if ((Res
== Instruction::IntToPtr
&&
250 (!TD
|| SrcTy
!= TD
->getIntPtrType(CI
->getContext()))) ||
251 (Res
== Instruction::PtrToInt
&&
252 (!TD
|| DstTy
!= TD
->getIntPtrType(CI
->getContext()))))
255 return Instruction::CastOps(Res
);
258 /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
259 /// results in any code being generated and is interesting to optimize out. If
260 /// the cast can be eliminated by some other simple transformation, we prefer
261 /// to do the simplification first.
262 bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc
, const Value
*V
,
264 // Noop casts and casts of constants should be eliminated trivially.
265 if (V
->getType() == Ty
|| isa
<Constant
>(V
)) return false;
267 // If this is another cast that can be eliminated, we prefer to have it
269 if (const CastInst
*CI
= dyn_cast
<CastInst
>(V
))
270 if (isEliminableCastPair(CI
, opc
, Ty
, TD
))
273 // If this is a vector sext from a compare, then we don't want to break the
274 // idiom where each element of the extended vector is either zero or all ones.
275 if (opc
== Instruction::SExt
&& isa
<CmpInst
>(V
) && Ty
->isVectorTy())
282 /// @brief Implement the transforms common to all CastInst visitors.
283 Instruction
*InstCombiner::commonCastTransforms(CastInst
&CI
) {
284 Value
*Src
= CI
.getOperand(0);
286 // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
288 if (CastInst
*CSrc
= dyn_cast
<CastInst
>(Src
)) { // A->B->C cast
289 if (Instruction::CastOps opc
=
290 isEliminableCastPair(CSrc
, CI
.getOpcode(), CI
.getType(), TD
)) {
291 // The first cast (CSrc) is eliminable so we need to fix up or replace
292 // the second cast (CI). CSrc will then have a good chance of being dead.
293 return CastInst::Create(opc
, CSrc
->getOperand(0), CI
.getType());
297 // If we are casting a select then fold the cast into the select
298 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Src
))
299 if (Instruction
*NV
= FoldOpIntoSelect(CI
, SI
))
302 // If we are casting a PHI then fold the cast into the PHI
303 if (isa
<PHINode
>(Src
)) {
304 // We don't do this if this would create a PHI node with an illegal type if
305 // it is currently legal.
306 if (!Src
->getType()->isIntegerTy() ||
307 !CI
.getType()->isIntegerTy() ||
308 ShouldChangeType(CI
.getType(), Src
->getType()))
309 if (Instruction
*NV
= FoldOpIntoPhi(CI
))
316 /// CanEvaluateTruncated - Return true if we can evaluate the specified
317 /// expression tree as type Ty instead of its larger type, and arrive with the
318 /// same value. This is used by code that tries to eliminate truncates.
320 /// Ty will always be a type smaller than V. We should return true if trunc(V)
321 /// can be computed by computing V in the smaller type. If V is an instruction,
322 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
323 /// makes sense if x and y can be efficiently truncated.
325 /// This function works on both vectors and scalars.
327 static bool CanEvaluateTruncated(Value
*V
, const Type
*Ty
) {
328 // We can always evaluate constants in another type.
329 if (isa
<Constant
>(V
))
332 Instruction
*I
= dyn_cast
<Instruction
>(V
);
333 if (!I
) return false;
335 const Type
*OrigTy
= V
->getType();
337 // If this is an extension from the dest type, we can eliminate it, even if it
338 // has multiple uses.
339 if ((isa
<ZExtInst
>(I
) || isa
<SExtInst
>(I
)) &&
340 I
->getOperand(0)->getType() == Ty
)
343 // We can't extend or shrink something that has multiple uses: doing so would
344 // require duplicating the instruction in general, which isn't profitable.
345 if (!I
->hasOneUse()) return false;
347 unsigned Opc
= I
->getOpcode();
349 case Instruction::Add
:
350 case Instruction::Sub
:
351 case Instruction::Mul
:
352 case Instruction::And
:
353 case Instruction::Or
:
354 case Instruction::Xor
:
355 // These operators can all arbitrarily be extended or truncated.
356 return CanEvaluateTruncated(I
->getOperand(0), Ty
) &&
357 CanEvaluateTruncated(I
->getOperand(1), Ty
);
359 case Instruction::UDiv
:
360 case Instruction::URem
: {
361 // UDiv and URem can be truncated if all the truncated bits are zero.
362 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
363 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
364 if (BitWidth
< OrigBitWidth
) {
365 APInt Mask
= APInt::getHighBitsSet(OrigBitWidth
, OrigBitWidth
-BitWidth
);
366 if (MaskedValueIsZero(I
->getOperand(0), Mask
) &&
367 MaskedValueIsZero(I
->getOperand(1), Mask
)) {
368 return CanEvaluateTruncated(I
->getOperand(0), Ty
) &&
369 CanEvaluateTruncated(I
->getOperand(1), Ty
);
374 case Instruction::Shl
:
375 // If we are truncating the result of this SHL, and if it's a shift of a
376 // constant amount, we can always perform a SHL in a smaller type.
377 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
378 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
379 if (CI
->getLimitedValue(BitWidth
) < BitWidth
)
380 return CanEvaluateTruncated(I
->getOperand(0), Ty
);
383 case Instruction::LShr
:
384 // If this is a truncate of a logical shr, we can truncate it to a smaller
385 // lshr iff we know that the bits we would otherwise be shifting in are
387 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
388 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
389 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
390 if (MaskedValueIsZero(I
->getOperand(0),
391 APInt::getHighBitsSet(OrigBitWidth
, OrigBitWidth
-BitWidth
)) &&
392 CI
->getLimitedValue(BitWidth
) < BitWidth
) {
393 return CanEvaluateTruncated(I
->getOperand(0), Ty
);
397 case Instruction::Trunc
:
398 // trunc(trunc(x)) -> trunc(x)
400 case Instruction::ZExt
:
401 case Instruction::SExt
:
402 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
403 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
405 case Instruction::Select
: {
406 SelectInst
*SI
= cast
<SelectInst
>(I
);
407 return CanEvaluateTruncated(SI
->getTrueValue(), Ty
) &&
408 CanEvaluateTruncated(SI
->getFalseValue(), Ty
);
410 case Instruction::PHI
: {
411 // We can change a phi if we can change all operands. Note that we never
412 // get into trouble with cyclic PHIs here because we only consider
413 // instructions with a single use.
414 PHINode
*PN
= cast
<PHINode
>(I
);
415 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
416 if (!CanEvaluateTruncated(PN
->getIncomingValue(i
), Ty
))
421 // TODO: Can handle more cases here.
428 Instruction
*InstCombiner::visitTrunc(TruncInst
&CI
) {
429 if (Instruction
*Result
= commonCastTransforms(CI
))
432 // See if we can simplify any instructions used by the input whose sole
433 // purpose is to compute bits we don't care about.
434 if (SimplifyDemandedInstructionBits(CI
))
437 Value
*Src
= CI
.getOperand(0);
438 const Type
*DestTy
= CI
.getType(), *SrcTy
= Src
->getType();
440 // Attempt to truncate the entire input expression tree to the destination
441 // type. Only do this if the dest type is a simple type, don't convert the
442 // expression tree to something weird like i93 unless the source is also
444 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
445 CanEvaluateTruncated(Src
, DestTy
)) {
447 // If this cast is a truncate, evaluting in a different type always
448 // eliminates the cast, so it is always a win.
449 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
450 " to avoid cast: " << CI
<< '\n');
451 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
452 assert(Res
->getType() == DestTy
);
453 return ReplaceInstUsesWith(CI
, Res
);
456 // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
457 if (DestTy
->getScalarSizeInBits() == 1) {
458 Constant
*One
= ConstantInt::get(Src
->getType(), 1);
459 Src
= Builder
->CreateAnd(Src
, One
, "tmp");
460 Value
*Zero
= Constant::getNullValue(Src
->getType());
461 return new ICmpInst(ICmpInst::ICMP_NE
, Src
, Zero
);
464 // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
465 Value
*A
= 0; ConstantInt
*Cst
= 0;
466 if (Src
->hasOneUse() &&
467 match(Src
, m_LShr(m_ZExt(m_Value(A
)), m_ConstantInt(Cst
)))) {
468 // We have three types to worry about here, the type of A, the source of
469 // the truncate (MidSize), and the destination of the truncate. We know that
470 // ASize < MidSize and MidSize > ResultSize, but don't know the relation
471 // between ASize and ResultSize.
472 unsigned ASize
= A
->getType()->getPrimitiveSizeInBits();
474 // If the shift amount is larger than the size of A, then the result is
475 // known to be zero because all the input bits got shifted out.
476 if (Cst
->getZExtValue() >= ASize
)
477 return ReplaceInstUsesWith(CI
, Constant::getNullValue(CI
.getType()));
479 // Since we're doing an lshr and a zero extend, and know that the shift
480 // amount is smaller than ASize, it is always safe to do the shift in A's
481 // type, then zero extend or truncate to the result.
482 Value
*Shift
= Builder
->CreateLShr(A
, Cst
->getZExtValue());
483 Shift
->takeName(Src
);
484 return CastInst::CreateIntegerCast(Shift
, CI
.getType(), false);
487 // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
488 // type isn't non-native.
489 if (Src
->hasOneUse() && isa
<IntegerType
>(Src
->getType()) &&
490 ShouldChangeType(Src
->getType(), CI
.getType()) &&
491 match(Src
, m_And(m_Value(A
), m_ConstantInt(Cst
)))) {
492 Value
*NewTrunc
= Builder
->CreateTrunc(A
, CI
.getType(), A
->getName()+".tr");
493 return BinaryOperator::CreateAnd(NewTrunc
,
494 ConstantExpr::getTrunc(Cst
, CI
.getType()));
500 /// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
501 /// in order to eliminate the icmp.
502 Instruction
*InstCombiner::transformZExtICmp(ICmpInst
*ICI
, Instruction
&CI
,
504 // If we are just checking for a icmp eq of a single bit and zext'ing it
505 // to an integer, then shift the bit to the appropriate place and then
506 // cast to integer to avoid the comparison.
507 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(ICI
->getOperand(1))) {
508 const APInt
&Op1CV
= Op1C
->getValue();
510 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
511 // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
512 if ((ICI
->getPredicate() == ICmpInst::ICMP_SLT
&& Op1CV
== 0) ||
513 (ICI
->getPredicate() == ICmpInst::ICMP_SGT
&&Op1CV
.isAllOnesValue())) {
514 if (!DoXform
) return ICI
;
516 Value
*In
= ICI
->getOperand(0);
517 Value
*Sh
= ConstantInt::get(In
->getType(),
518 In
->getType()->getScalarSizeInBits()-1);
519 In
= Builder
->CreateLShr(In
, Sh
, In
->getName()+".lobit");
520 if (In
->getType() != CI
.getType())
521 In
= Builder
->CreateIntCast(In
, CI
.getType(), false/*ZExt*/, "tmp");
523 if (ICI
->getPredicate() == ICmpInst::ICMP_SGT
) {
524 Constant
*One
= ConstantInt::get(In
->getType(), 1);
525 In
= Builder
->CreateXor(In
, One
, In
->getName()+".not");
528 return ReplaceInstUsesWith(CI
, In
);
533 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
534 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
535 // zext (X == 1) to i32 --> X iff X has only the low bit set.
536 // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
537 // zext (X != 0) to i32 --> X iff X has only the low bit set.
538 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
539 // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
540 // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
541 if ((Op1CV
== 0 || Op1CV
.isPowerOf2()) &&
542 // This only works for EQ and NE
544 // If Op1C some other power of two, convert:
545 uint32_t BitWidth
= Op1C
->getType()->getBitWidth();
546 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
547 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
548 ComputeMaskedBits(ICI
->getOperand(0), TypeMask
, KnownZero
, KnownOne
);
550 APInt
KnownZeroMask(~KnownZero
);
551 if (KnownZeroMask
.isPowerOf2()) { // Exactly 1 possible 1?
552 if (!DoXform
) return ICI
;
554 bool isNE
= ICI
->getPredicate() == ICmpInst::ICMP_NE
;
555 if (Op1CV
!= 0 && (Op1CV
!= KnownZeroMask
)) {
556 // (X&4) == 2 --> false
557 // (X&4) != 2 --> true
558 Constant
*Res
= ConstantInt::get(Type::getInt1Ty(CI
.getContext()),
560 Res
= ConstantExpr::getZExt(Res
, CI
.getType());
561 return ReplaceInstUsesWith(CI
, Res
);
564 uint32_t ShiftAmt
= KnownZeroMask
.logBase2();
565 Value
*In
= ICI
->getOperand(0);
567 // Perform a logical shr by shiftamt.
568 // Insert the shift to put the result in the low bit.
569 In
= Builder
->CreateLShr(In
, ConstantInt::get(In
->getType(),ShiftAmt
),
570 In
->getName()+".lobit");
573 if ((Op1CV
!= 0) == isNE
) { // Toggle the low bit.
574 Constant
*One
= ConstantInt::get(In
->getType(), 1);
575 In
= Builder
->CreateXor(In
, One
, "tmp");
578 if (CI
.getType() == In
->getType())
579 return ReplaceInstUsesWith(CI
, In
);
580 return CastInst::CreateIntegerCast(In
, CI
.getType(), false/*ZExt*/);
585 // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
586 // It is also profitable to transform icmp eq into not(xor(A, B)) because that
587 // may lead to additional simplifications.
588 if (ICI
->isEquality() && CI
.getType() == ICI
->getOperand(0)->getType()) {
589 if (const IntegerType
*ITy
= dyn_cast
<IntegerType
>(CI
.getType())) {
590 uint32_t BitWidth
= ITy
->getBitWidth();
591 Value
*LHS
= ICI
->getOperand(0);
592 Value
*RHS
= ICI
->getOperand(1);
594 APInt
KnownZeroLHS(BitWidth
, 0), KnownOneLHS(BitWidth
, 0);
595 APInt
KnownZeroRHS(BitWidth
, 0), KnownOneRHS(BitWidth
, 0);
596 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
597 ComputeMaskedBits(LHS
, TypeMask
, KnownZeroLHS
, KnownOneLHS
);
598 ComputeMaskedBits(RHS
, TypeMask
, KnownZeroRHS
, KnownOneRHS
);
600 if (KnownZeroLHS
== KnownZeroRHS
&& KnownOneLHS
== KnownOneRHS
) {
601 APInt KnownBits
= KnownZeroLHS
| KnownOneLHS
;
602 APInt UnknownBit
= ~KnownBits
;
603 if (UnknownBit
.countPopulation() == 1) {
604 if (!DoXform
) return ICI
;
606 Value
*Result
= Builder
->CreateXor(LHS
, RHS
);
608 // Mask off any bits that are set and won't be shifted away.
609 if (KnownOneLHS
.uge(UnknownBit
))
610 Result
= Builder
->CreateAnd(Result
,
611 ConstantInt::get(ITy
, UnknownBit
));
613 // Shift the bit we're testing down to the lsb.
614 Result
= Builder
->CreateLShr(
615 Result
, ConstantInt::get(ITy
, UnknownBit
.countTrailingZeros()));
617 if (ICI
->getPredicate() == ICmpInst::ICMP_EQ
)
618 Result
= Builder
->CreateXor(Result
, ConstantInt::get(ITy
, 1));
619 Result
->takeName(ICI
);
620 return ReplaceInstUsesWith(CI
, Result
);
629 /// CanEvaluateZExtd - Determine if the specified value can be computed in the
630 /// specified wider type and produce the same low bits. If not, return false.
632 /// If this function returns true, it can also return a non-zero number of bits
633 /// (in BitsToClear) which indicates that the value it computes is correct for
634 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
635 /// out. For example, to promote something like:
637 /// %B = trunc i64 %A to i32
638 /// %C = lshr i32 %B, 8
639 /// %E = zext i32 %C to i64
641 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
642 /// set to 8 to indicate that the promoted value needs to have bits 24-31
643 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
644 /// clear the top bits anyway, doing this has no extra cost.
646 /// This function works on both vectors and scalars.
647 static bool CanEvaluateZExtd(Value
*V
, const Type
*Ty
, unsigned &BitsToClear
) {
649 if (isa
<Constant
>(V
))
652 Instruction
*I
= dyn_cast
<Instruction
>(V
);
653 if (!I
) return false;
655 // If the input is a truncate from the destination type, we can trivially
656 // eliminate it, even if it has multiple uses.
657 // FIXME: This is currently disabled until codegen can handle this without
658 // pessimizing code, PR5997.
659 if (0 && isa
<TruncInst
>(I
) && I
->getOperand(0)->getType() == Ty
)
662 // We can't extend or shrink something that has multiple uses: doing so would
663 // require duplicating the instruction in general, which isn't profitable.
664 if (!I
->hasOneUse()) return false;
666 unsigned Opc
= I
->getOpcode(), Tmp
;
668 case Instruction::ZExt
: // zext(zext(x)) -> zext(x).
669 case Instruction::SExt
: // zext(sext(x)) -> sext(x).
670 case Instruction::Trunc
: // zext(trunc(x)) -> trunc(x) or zext(x)
672 case Instruction::And
:
673 case Instruction::Or
:
674 case Instruction::Xor
:
675 case Instruction::Add
:
676 case Instruction::Sub
:
677 case Instruction::Mul
:
678 case Instruction::Shl
:
679 if (!CanEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
) ||
680 !CanEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
))
682 // These can all be promoted if neither operand has 'bits to clear'.
683 if (BitsToClear
== 0 && Tmp
== 0)
686 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
687 // other side, BitsToClear is ok.
689 (Opc
== Instruction::And
|| Opc
== Instruction::Or
||
690 Opc
== Instruction::Xor
)) {
691 // We use MaskedValueIsZero here for generality, but the case we care
692 // about the most is constant RHS.
693 unsigned VSize
= V
->getType()->getScalarSizeInBits();
694 if (MaskedValueIsZero(I
->getOperand(1),
695 APInt::getHighBitsSet(VSize
, BitsToClear
)))
699 // Otherwise, we don't know how to analyze this BitsToClear case yet.
702 case Instruction::LShr
:
703 // We can promote lshr(x, cst) if we can promote x. This requires the
704 // ultimate 'and' to clear out the high zero bits we're clearing out though.
705 if (ConstantInt
*Amt
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
706 if (!CanEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
))
708 BitsToClear
+= Amt
->getZExtValue();
709 if (BitsToClear
> V
->getType()->getScalarSizeInBits())
710 BitsToClear
= V
->getType()->getScalarSizeInBits();
713 // Cannot promote variable LSHR.
715 case Instruction::Select
:
716 if (!CanEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
) ||
717 !CanEvaluateZExtd(I
->getOperand(2), Ty
, BitsToClear
) ||
718 // TODO: If important, we could handle the case when the BitsToClear are
719 // known zero in the disagreeing side.
724 case Instruction::PHI
: {
725 // We can change a phi if we can change all operands. Note that we never
726 // get into trouble with cyclic PHIs here because we only consider
727 // instructions with a single use.
728 PHINode
*PN
= cast
<PHINode
>(I
);
729 if (!CanEvaluateZExtd(PN
->getIncomingValue(0), Ty
, BitsToClear
))
731 for (unsigned i
= 1, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
732 if (!CanEvaluateZExtd(PN
->getIncomingValue(i
), Ty
, Tmp
) ||
733 // TODO: If important, we could handle the case when the BitsToClear
734 // are known zero in the disagreeing input.
740 // TODO: Can handle more cases here.
745 Instruction
*InstCombiner::visitZExt(ZExtInst
&CI
) {
746 // If this zero extend is only used by a truncate, let the truncate by
747 // eliminated before we try to optimize this zext.
748 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.use_back()))
751 // If one of the common conversion will work, do it.
752 if (Instruction
*Result
= commonCastTransforms(CI
))
755 // See if we can simplify any instructions used by the input whose sole
756 // purpose is to compute bits we don't care about.
757 if (SimplifyDemandedInstructionBits(CI
))
760 Value
*Src
= CI
.getOperand(0);
761 const Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
763 // Attempt to extend the entire input expression tree to the destination
764 // type. Only do this if the dest type is a simple type, don't convert the
765 // expression tree to something weird like i93 unless the source is also
767 unsigned BitsToClear
;
768 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
769 CanEvaluateZExtd(Src
, DestTy
, BitsToClear
)) {
770 assert(BitsToClear
< SrcTy
->getScalarSizeInBits() &&
771 "Unreasonable BitsToClear");
773 // Okay, we can transform this! Insert the new expression now.
774 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
775 " to avoid zero extend: " << CI
);
776 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
777 assert(Res
->getType() == DestTy
);
779 uint32_t SrcBitsKept
= SrcTy
->getScalarSizeInBits()-BitsToClear
;
780 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
782 // If the high bits are already filled with zeros, just replace this
783 // cast with the result.
784 if (MaskedValueIsZero(Res
, APInt::getHighBitsSet(DestBitSize
,
785 DestBitSize
-SrcBitsKept
)))
786 return ReplaceInstUsesWith(CI
, Res
);
788 // We need to emit an AND to clear the high bits.
789 Constant
*C
= ConstantInt::get(Res
->getType(),
790 APInt::getLowBitsSet(DestBitSize
, SrcBitsKept
));
791 return BinaryOperator::CreateAnd(Res
, C
);
794 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
795 // types and if the sizes are just right we can convert this into a logical
796 // 'and' which will be much cheaper than the pair of casts.
797 if (TruncInst
*CSrc
= dyn_cast
<TruncInst
>(Src
)) { // A->B->C cast
798 // TODO: Subsume this into EvaluateInDifferentType.
800 // Get the sizes of the types involved. We know that the intermediate type
801 // will be smaller than A or C, but don't know the relation between A and C.
802 Value
*A
= CSrc
->getOperand(0);
803 unsigned SrcSize
= A
->getType()->getScalarSizeInBits();
804 unsigned MidSize
= CSrc
->getType()->getScalarSizeInBits();
805 unsigned DstSize
= CI
.getType()->getScalarSizeInBits();
806 // If we're actually extending zero bits, then if
807 // SrcSize < DstSize: zext(a & mask)
808 // SrcSize == DstSize: a & mask
809 // SrcSize > DstSize: trunc(a) & mask
810 if (SrcSize
< DstSize
) {
811 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
812 Constant
*AndConst
= ConstantInt::get(A
->getType(), AndValue
);
813 Value
*And
= Builder
->CreateAnd(A
, AndConst
, CSrc
->getName()+".mask");
814 return new ZExtInst(And
, CI
.getType());
817 if (SrcSize
== DstSize
) {
818 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
819 return BinaryOperator::CreateAnd(A
, ConstantInt::get(A
->getType(),
822 if (SrcSize
> DstSize
) {
823 Value
*Trunc
= Builder
->CreateTrunc(A
, CI
.getType(), "tmp");
824 APInt
AndValue(APInt::getLowBitsSet(DstSize
, MidSize
));
825 return BinaryOperator::CreateAnd(Trunc
,
826 ConstantInt::get(Trunc
->getType(),
831 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
832 return transformZExtICmp(ICI
, CI
);
834 BinaryOperator
*SrcI
= dyn_cast
<BinaryOperator
>(Src
);
835 if (SrcI
&& SrcI
->getOpcode() == Instruction::Or
) {
836 // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
837 // of the (zext icmp) will be transformed.
838 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(0));
839 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(1));
840 if (LHS
&& RHS
&& LHS
->hasOneUse() && RHS
->hasOneUse() &&
841 (transformZExtICmp(LHS
, CI
, false) ||
842 transformZExtICmp(RHS
, CI
, false))) {
843 Value
*LCast
= Builder
->CreateZExt(LHS
, CI
.getType(), LHS
->getName());
844 Value
*RCast
= Builder
->CreateZExt(RHS
, CI
.getType(), RHS
->getName());
845 return BinaryOperator::Create(Instruction::Or
, LCast
, RCast
);
849 // zext(trunc(t) & C) -> (t & zext(C)).
850 if (SrcI
&& SrcI
->getOpcode() == Instruction::And
&& SrcI
->hasOneUse())
851 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(SrcI
->getOperand(1)))
852 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(SrcI
->getOperand(0))) {
853 Value
*TI0
= TI
->getOperand(0);
854 if (TI0
->getType() == CI
.getType())
856 BinaryOperator::CreateAnd(TI0
,
857 ConstantExpr::getZExt(C
, CI
.getType()));
860 // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
861 if (SrcI
&& SrcI
->getOpcode() == Instruction::Xor
&& SrcI
->hasOneUse())
862 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(SrcI
->getOperand(1)))
863 if (BinaryOperator
*And
= dyn_cast
<BinaryOperator
>(SrcI
->getOperand(0)))
864 if (And
->getOpcode() == Instruction::And
&& And
->hasOneUse() &&
865 And
->getOperand(1) == C
)
866 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(And
->getOperand(0))) {
867 Value
*TI0
= TI
->getOperand(0);
868 if (TI0
->getType() == CI
.getType()) {
869 Constant
*ZC
= ConstantExpr::getZExt(C
, CI
.getType());
870 Value
*NewAnd
= Builder
->CreateAnd(TI0
, ZC
, "tmp");
871 return BinaryOperator::CreateXor(NewAnd
, ZC
);
875 // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
877 if (SrcI
&& SrcI
->hasOneUse() && SrcI
->getType()->isIntegerTy(1) &&
878 match(SrcI
, m_Not(m_Value(X
))) &&
879 (!X
->hasOneUse() || !isa
<CmpInst
>(X
))) {
880 Value
*New
= Builder
->CreateZExt(X
, CI
.getType());
881 return BinaryOperator::CreateXor(New
, ConstantInt::get(CI
.getType(), 1));
887 /// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
888 /// in order to eliminate the icmp.
889 Instruction
*InstCombiner::transformSExtICmp(ICmpInst
*ICI
, Instruction
&CI
) {
890 Value
*Op0
= ICI
->getOperand(0), *Op1
= ICI
->getOperand(1);
891 ICmpInst::Predicate Pred
= ICI
->getPredicate();
893 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
894 // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
895 // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
896 if ((Pred
== ICmpInst::ICMP_SLT
&& Op1C
->isZero()) ||
897 (Pred
== ICmpInst::ICMP_SGT
&& Op1C
->isAllOnesValue())) {
899 Value
*Sh
= ConstantInt::get(Op0
->getType(),
900 Op0
->getType()->getScalarSizeInBits()-1);
901 Value
*In
= Builder
->CreateAShr(Op0
, Sh
, Op0
->getName()+".lobit");
902 if (In
->getType() != CI
.getType())
903 In
= Builder
->CreateIntCast(In
, CI
.getType(), true/*SExt*/, "tmp");
905 if (Pred
== ICmpInst::ICMP_SGT
)
906 In
= Builder
->CreateNot(In
, In
->getName()+".not");
907 return ReplaceInstUsesWith(CI
, In
);
910 // If we know that only one bit of the LHS of the icmp can be set and we
911 // have an equality comparison with zero or a power of 2, we can transform
912 // the icmp and sext into bitwise/integer operations.
913 if (ICI
->hasOneUse() &&
914 ICI
->isEquality() && (Op1C
->isZero() || Op1C
->getValue().isPowerOf2())){
915 unsigned BitWidth
= Op1C
->getType()->getBitWidth();
916 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
917 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
918 ComputeMaskedBits(Op0
, TypeMask
, KnownZero
, KnownOne
);
920 APInt
KnownZeroMask(~KnownZero
);
921 if (KnownZeroMask
.isPowerOf2()) {
922 Value
*In
= ICI
->getOperand(0);
924 // If the icmp tests for a known zero bit we can constant fold it.
925 if (!Op1C
->isZero() && Op1C
->getValue() != KnownZeroMask
) {
926 Value
*V
= Pred
== ICmpInst::ICMP_NE
?
927 ConstantInt::getAllOnesValue(CI
.getType()) :
928 ConstantInt::getNullValue(CI
.getType());
929 return ReplaceInstUsesWith(CI
, V
);
932 if (!Op1C
->isZero() == (Pred
== ICmpInst::ICMP_NE
)) {
933 // sext ((x & 2^n) == 0) -> (x >> n) - 1
934 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
935 unsigned ShiftAmt
= KnownZeroMask
.countTrailingZeros();
936 // Perform a right shift to place the desired bit in the LSB.
938 In
= Builder
->CreateLShr(In
,
939 ConstantInt::get(In
->getType(), ShiftAmt
));
941 // At this point "In" is either 1 or 0. Subtract 1 to turn
942 // {1, 0} -> {0, -1}.
943 In
= Builder
->CreateAdd(In
,
944 ConstantInt::getAllOnesValue(In
->getType()),
947 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
948 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
949 unsigned ShiftAmt
= KnownZeroMask
.countLeadingZeros();
950 // Perform a left shift to place the desired bit in the MSB.
952 In
= Builder
->CreateShl(In
,
953 ConstantInt::get(In
->getType(), ShiftAmt
));
955 // Distribute the bit over the whole bit width.
956 In
= Builder
->CreateAShr(In
, ConstantInt::get(In
->getType(),
957 BitWidth
- 1), "sext");
960 if (CI
.getType() == In
->getType())
961 return ReplaceInstUsesWith(CI
, In
);
962 return CastInst::CreateIntegerCast(In
, CI
.getType(), true/*SExt*/);
967 // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
968 if (const VectorType
*VTy
= dyn_cast
<VectorType
>(CI
.getType())) {
969 if (Pred
== ICmpInst::ICMP_SLT
&& match(Op1
, m_Zero()) &&
970 Op0
->getType() == CI
.getType()) {
971 const Type
*EltTy
= VTy
->getElementType();
973 // splat the shift constant to a constant vector.
974 Constant
*VSh
= ConstantInt::get(VTy
, EltTy
->getScalarSizeInBits()-1);
975 Value
*In
= Builder
->CreateAShr(Op0
, VSh
, Op0
->getName()+".lobit");
976 return ReplaceInstUsesWith(CI
, In
);
983 /// CanEvaluateSExtd - Return true if we can take the specified value
984 /// and return it as type Ty without inserting any new casts and without
985 /// changing the value of the common low bits. This is used by code that tries
986 /// to promote integer operations to a wider types will allow us to eliminate
989 /// This function works on both vectors and scalars.
991 static bool CanEvaluateSExtd(Value
*V
, const Type
*Ty
) {
992 assert(V
->getType()->getScalarSizeInBits() < Ty
->getScalarSizeInBits() &&
993 "Can't sign extend type to a smaller type");
994 // If this is a constant, it can be trivially promoted.
995 if (isa
<Constant
>(V
))
998 Instruction
*I
= dyn_cast
<Instruction
>(V
);
999 if (!I
) return false;
1001 // If this is a truncate from the dest type, we can trivially eliminate it,
1002 // even if it has multiple uses.
1003 // FIXME: This is currently disabled until codegen can handle this without
1004 // pessimizing code, PR5997.
1005 if (0 && isa
<TruncInst
>(I
) && I
->getOperand(0)->getType() == Ty
)
1008 // We can't extend or shrink something that has multiple uses: doing so would
1009 // require duplicating the instruction in general, which isn't profitable.
1010 if (!I
->hasOneUse()) return false;
1012 switch (I
->getOpcode()) {
1013 case Instruction::SExt
: // sext(sext(x)) -> sext(x)
1014 case Instruction::ZExt
: // sext(zext(x)) -> zext(x)
1015 case Instruction::Trunc
: // sext(trunc(x)) -> trunc(x) or sext(x)
1017 case Instruction::And
:
1018 case Instruction::Or
:
1019 case Instruction::Xor
:
1020 case Instruction::Add
:
1021 case Instruction::Sub
:
1022 case Instruction::Mul
:
1023 // These operators can all arbitrarily be extended if their inputs can.
1024 return CanEvaluateSExtd(I
->getOperand(0), Ty
) &&
1025 CanEvaluateSExtd(I
->getOperand(1), Ty
);
1027 //case Instruction::Shl: TODO
1028 //case Instruction::LShr: TODO
1030 case Instruction::Select
:
1031 return CanEvaluateSExtd(I
->getOperand(1), Ty
) &&
1032 CanEvaluateSExtd(I
->getOperand(2), Ty
);
1034 case Instruction::PHI
: {
1035 // We can change a phi if we can change all operands. Note that we never
1036 // get into trouble with cyclic PHIs here because we only consider
1037 // instructions with a single use.
1038 PHINode
*PN
= cast
<PHINode
>(I
);
1039 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1040 if (!CanEvaluateSExtd(PN
->getIncomingValue(i
), Ty
)) return false;
1044 // TODO: Can handle more cases here.
1051 Instruction
*InstCombiner::visitSExt(SExtInst
&CI
) {
1052 // If this sign extend is only used by a truncate, let the truncate by
1053 // eliminated before we try to optimize this zext.
1054 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.use_back()))
1057 if (Instruction
*I
= commonCastTransforms(CI
))
1060 // See if we can simplify any instructions used by the input whose sole
1061 // purpose is to compute bits we don't care about.
1062 if (SimplifyDemandedInstructionBits(CI
))
1065 Value
*Src
= CI
.getOperand(0);
1066 const Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
1068 // Attempt to extend the entire input expression tree to the destination
1069 // type. Only do this if the dest type is a simple type, don't convert the
1070 // expression tree to something weird like i93 unless the source is also
1072 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
1073 CanEvaluateSExtd(Src
, DestTy
)) {
1074 // Okay, we can transform this! Insert the new expression now.
1075 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1076 " to avoid sign extend: " << CI
);
1077 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, true);
1078 assert(Res
->getType() == DestTy
);
1080 uint32_t SrcBitSize
= SrcTy
->getScalarSizeInBits();
1081 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1083 // If the high bits are already filled with sign bit, just replace this
1084 // cast with the result.
1085 if (ComputeNumSignBits(Res
) > DestBitSize
- SrcBitSize
)
1086 return ReplaceInstUsesWith(CI
, Res
);
1088 // We need to emit a shl + ashr to do the sign extend.
1089 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1090 return BinaryOperator::CreateAShr(Builder
->CreateShl(Res
, ShAmt
, "sext"),
1094 // If this input is a trunc from our destination, then turn sext(trunc(x))
1096 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(Src
))
1097 if (TI
->hasOneUse() && TI
->getOperand(0)->getType() == DestTy
) {
1098 uint32_t SrcBitSize
= SrcTy
->getScalarSizeInBits();
1099 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1101 // We need to emit a shl + ashr to do the sign extend.
1102 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1103 Value
*Res
= Builder
->CreateShl(TI
->getOperand(0), ShAmt
, "sext");
1104 return BinaryOperator::CreateAShr(Res
, ShAmt
);
1107 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
1108 return transformSExtICmp(ICI
, CI
);
1110 // If the input is a shl/ashr pair of a same constant, then this is a sign
1111 // extension from a smaller value. If we could trust arbitrary bitwidth
1112 // integers, we could turn this into a truncate to the smaller bit and then
1113 // use a sext for the whole extension. Since we don't, look deeper and check
1114 // for a truncate. If the source and dest are the same type, eliminate the
1115 // trunc and extend and just do shifts. For example, turn:
1116 // %a = trunc i32 %i to i8
1117 // %b = shl i8 %a, 6
1118 // %c = ashr i8 %b, 6
1119 // %d = sext i8 %c to i32
1121 // %a = shl i32 %i, 30
1122 // %d = ashr i32 %a, 30
1124 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1125 ConstantInt
*BA
= 0, *CA
= 0;
1126 if (match(Src
, m_AShr(m_Shl(m_Trunc(m_Value(A
)), m_ConstantInt(BA
)),
1127 m_ConstantInt(CA
))) &&
1128 BA
== CA
&& A
->getType() == CI
.getType()) {
1129 unsigned MidSize
= Src
->getType()->getScalarSizeInBits();
1130 unsigned SrcDstSize
= CI
.getType()->getScalarSizeInBits();
1131 unsigned ShAmt
= CA
->getZExtValue()+SrcDstSize
-MidSize
;
1132 Constant
*ShAmtV
= ConstantInt::get(CI
.getType(), ShAmt
);
1133 A
= Builder
->CreateShl(A
, ShAmtV
, CI
.getName());
1134 return BinaryOperator::CreateAShr(A
, ShAmtV
);
1141 /// FitsInFPType - Return a Constant* for the specified FP constant if it fits
1142 /// in the specified FP type without changing its value.
1143 static Constant
*FitsInFPType(ConstantFP
*CFP
, const fltSemantics
&Sem
) {
1145 APFloat F
= CFP
->getValueAPF();
1146 (void)F
.convert(Sem
, APFloat::rmNearestTiesToEven
, &losesInfo
);
1148 return ConstantFP::get(CFP
->getContext(), F
);
1152 /// LookThroughFPExtensions - If this is an fp extension instruction, look
1153 /// through it until we get the source value.
1154 static Value
*LookThroughFPExtensions(Value
*V
) {
1155 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1156 if (I
->getOpcode() == Instruction::FPExt
)
1157 return LookThroughFPExtensions(I
->getOperand(0));
1159 // If this value is a constant, return the constant in the smallest FP type
1160 // that can accurately represent it. This allows us to turn
1161 // (float)((double)X+2.0) into x+2.0f.
1162 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(V
)) {
1163 if (CFP
->getType() == Type::getPPC_FP128Ty(V
->getContext()))
1164 return V
; // No constant folding of this.
1165 // See if the value can be truncated to float and then reextended.
1166 if (Value
*V
= FitsInFPType(CFP
, APFloat::IEEEsingle
))
1168 if (CFP
->getType()->isDoubleTy())
1169 return V
; // Won't shrink.
1170 if (Value
*V
= FitsInFPType(CFP
, APFloat::IEEEdouble
))
1172 // Don't try to shrink to various long double types.
1178 Instruction
*InstCombiner::visitFPTrunc(FPTruncInst
&CI
) {
1179 if (Instruction
*I
= commonCastTransforms(CI
))
1182 // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
1183 // smaller than the destination type, we can eliminate the truncate by doing
1184 // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
1185 // as many builtins (sqrt, etc).
1186 BinaryOperator
*OpI
= dyn_cast
<BinaryOperator
>(CI
.getOperand(0));
1187 if (OpI
&& OpI
->hasOneUse()) {
1188 switch (OpI
->getOpcode()) {
1190 case Instruction::FAdd
:
1191 case Instruction::FSub
:
1192 case Instruction::FMul
:
1193 case Instruction::FDiv
:
1194 case Instruction::FRem
:
1195 const Type
*SrcTy
= OpI
->getType();
1196 Value
*LHSTrunc
= LookThroughFPExtensions(OpI
->getOperand(0));
1197 Value
*RHSTrunc
= LookThroughFPExtensions(OpI
->getOperand(1));
1198 if (LHSTrunc
->getType() != SrcTy
&&
1199 RHSTrunc
->getType() != SrcTy
) {
1200 unsigned DstSize
= CI
.getType()->getScalarSizeInBits();
1201 // If the source types were both smaller than the destination type of
1202 // the cast, do this xform.
1203 if (LHSTrunc
->getType()->getScalarSizeInBits() <= DstSize
&&
1204 RHSTrunc
->getType()->getScalarSizeInBits() <= DstSize
) {
1205 LHSTrunc
= Builder
->CreateFPExt(LHSTrunc
, CI
.getType());
1206 RHSTrunc
= Builder
->CreateFPExt(RHSTrunc
, CI
.getType());
1207 return BinaryOperator::Create(OpI
->getOpcode(), LHSTrunc
, RHSTrunc
);
1214 // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
1215 // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
1216 CallInst
*Call
= dyn_cast
<CallInst
>(CI
.getOperand(0));
1217 if (Call
&& Call
->getCalledFunction() &&
1218 Call
->getCalledFunction()->getName() == "sqrt" &&
1219 Call
->getNumArgOperands() == 1) {
1220 CastInst
*Arg
= dyn_cast
<CastInst
>(Call
->getArgOperand(0));
1221 if (Arg
&& Arg
->getOpcode() == Instruction::FPExt
&&
1222 CI
.getType()->isFloatTy() &&
1223 Call
->getType()->isDoubleTy() &&
1224 Arg
->getType()->isDoubleTy() &&
1225 Arg
->getOperand(0)->getType()->isFloatTy()) {
1226 Function
*Callee
= Call
->getCalledFunction();
1227 Module
*M
= CI
.getParent()->getParent()->getParent();
1228 Constant
*SqrtfFunc
= M
->getOrInsertFunction("sqrtf",
1229 Callee
->getAttributes(),
1230 Builder
->getFloatTy(),
1231 Builder
->getFloatTy(),
1233 CallInst
*ret
= CallInst::Create(SqrtfFunc
, Arg
->getOperand(0),
1235 ret
->setAttributes(Callee
->getAttributes());
1238 // Remove the old Call. With -fmath-errno, it won't get marked readnone.
1239 ReplaceInstUsesWith(*Call
, UndefValue::get(Call
->getType()));
1240 EraseInstFromFunction(*Call
);
1248 Instruction
*InstCombiner::visitFPExt(CastInst
&CI
) {
1249 return commonCastTransforms(CI
);
1252 Instruction
*InstCombiner::visitFPToUI(FPToUIInst
&FI
) {
1253 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1255 return commonCastTransforms(FI
);
1257 // fptoui(uitofp(X)) --> X
1258 // fptoui(sitofp(X)) --> X
1259 // This is safe if the intermediate type has enough bits in its mantissa to
1260 // accurately represent all values of X. For example, do not do this with
1261 // i64->float->i64. This is also safe for sitofp case, because any negative
1262 // 'X' value would cause an undefined result for the fptoui.
1263 if ((isa
<UIToFPInst
>(OpI
) || isa
<SIToFPInst
>(OpI
)) &&
1264 OpI
->getOperand(0)->getType() == FI
.getType() &&
1265 (int)FI
.getType()->getScalarSizeInBits() < /*extra bit for sign */
1266 OpI
->getType()->getFPMantissaWidth())
1267 return ReplaceInstUsesWith(FI
, OpI
->getOperand(0));
1269 return commonCastTransforms(FI
);
1272 Instruction
*InstCombiner::visitFPToSI(FPToSIInst
&FI
) {
1273 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1275 return commonCastTransforms(FI
);
1277 // fptosi(sitofp(X)) --> X
1278 // fptosi(uitofp(X)) --> X
1279 // This is safe if the intermediate type has enough bits in its mantissa to
1280 // accurately represent all values of X. For example, do not do this with
1281 // i64->float->i64. This is also safe for sitofp case, because any negative
1282 // 'X' value would cause an undefined result for the fptoui.
1283 if ((isa
<UIToFPInst
>(OpI
) || isa
<SIToFPInst
>(OpI
)) &&
1284 OpI
->getOperand(0)->getType() == FI
.getType() &&
1285 (int)FI
.getType()->getScalarSizeInBits() <=
1286 OpI
->getType()->getFPMantissaWidth())
1287 return ReplaceInstUsesWith(FI
, OpI
->getOperand(0));
1289 return commonCastTransforms(FI
);
1292 Instruction
*InstCombiner::visitUIToFP(CastInst
&CI
) {
1293 return commonCastTransforms(CI
);
1296 Instruction
*InstCombiner::visitSIToFP(CastInst
&CI
) {
1297 return commonCastTransforms(CI
);
1300 Instruction
*InstCombiner::visitIntToPtr(IntToPtrInst
&CI
) {
1301 // If the source integer type is not the intptr_t type for this target, do a
1302 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1303 // cast to be exposed to other transforms.
1305 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() >
1306 TD
->getPointerSizeInBits()) {
1307 Value
*P
= Builder
->CreateTrunc(CI
.getOperand(0),
1308 TD
->getIntPtrType(CI
.getContext()), "tmp");
1309 return new IntToPtrInst(P
, CI
.getType());
1311 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() <
1312 TD
->getPointerSizeInBits()) {
1313 Value
*P
= Builder
->CreateZExt(CI
.getOperand(0),
1314 TD
->getIntPtrType(CI
.getContext()), "tmp");
1315 return new IntToPtrInst(P
, CI
.getType());
1319 if (Instruction
*I
= commonCastTransforms(CI
))
1325 /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
1326 Instruction
*InstCombiner::commonPointerCastTransforms(CastInst
&CI
) {
1327 Value
*Src
= CI
.getOperand(0);
1329 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Src
)) {
1330 // If casting the result of a getelementptr instruction with no offset, turn
1331 // this into a cast of the original pointer!
1332 if (GEP
->hasAllZeroIndices()) {
1333 // Changing the cast operand is usually not a good idea but it is safe
1334 // here because the pointer operand is being replaced with another
1335 // pointer operand so the opcode doesn't need to change.
1337 CI
.setOperand(0, GEP
->getOperand(0));
1341 // If the GEP has a single use, and the base pointer is a bitcast, and the
1342 // GEP computes a constant offset, see if we can convert these three
1343 // instructions into fewer. This typically happens with unions and other
1344 // non-type-safe code.
1345 if (TD
&& GEP
->hasOneUse() && isa
<BitCastInst
>(GEP
->getOperand(0)) &&
1346 GEP
->hasAllConstantIndices()) {
1347 // We are guaranteed to get a constant from EmitGEPOffset.
1348 ConstantInt
*OffsetV
= cast
<ConstantInt
>(EmitGEPOffset(GEP
));
1349 int64_t Offset
= OffsetV
->getSExtValue();
1351 // Get the base pointer input of the bitcast, and the type it points to.
1352 Value
*OrigBase
= cast
<BitCastInst
>(GEP
->getOperand(0))->getOperand(0);
1353 const Type
*GEPIdxTy
=
1354 cast
<PointerType
>(OrigBase
->getType())->getElementType();
1355 SmallVector
<Value
*, 8> NewIndices
;
1356 if (FindElementAtOffset(GEPIdxTy
, Offset
, NewIndices
)) {
1357 // If we were able to index down into an element, create the GEP
1358 // and bitcast the result. This eliminates one bitcast, potentially
1360 Value
*NGEP
= cast
<GEPOperator
>(GEP
)->isInBounds() ?
1361 Builder
->CreateInBoundsGEP(OrigBase
,
1362 NewIndices
.begin(), NewIndices
.end()) :
1363 Builder
->CreateGEP(OrigBase
, NewIndices
.begin(), NewIndices
.end());
1364 NGEP
->takeName(GEP
);
1366 if (isa
<BitCastInst
>(CI
))
1367 return new BitCastInst(NGEP
, CI
.getType());
1368 assert(isa
<PtrToIntInst
>(CI
));
1369 return new PtrToIntInst(NGEP
, CI
.getType());
1374 return commonCastTransforms(CI
);
1377 Instruction
*InstCombiner::visitPtrToInt(PtrToIntInst
&CI
) {
1378 // If the destination integer type is not the intptr_t type for this target,
1379 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1380 // to be exposed to other transforms.
1382 if (CI
.getType()->getScalarSizeInBits() < TD
->getPointerSizeInBits()) {
1383 Value
*P
= Builder
->CreatePtrToInt(CI
.getOperand(0),
1384 TD
->getIntPtrType(CI
.getContext()),
1386 return new TruncInst(P
, CI
.getType());
1388 if (CI
.getType()->getScalarSizeInBits() > TD
->getPointerSizeInBits()) {
1389 Value
*P
= Builder
->CreatePtrToInt(CI
.getOperand(0),
1390 TD
->getIntPtrType(CI
.getContext()),
1392 return new ZExtInst(P
, CI
.getType());
1396 return commonPointerCastTransforms(CI
);
1399 /// OptimizeVectorResize - This input value (which is known to have vector type)
1400 /// is being zero extended or truncated to the specified vector type. Try to
1401 /// replace it with a shuffle (and vector/vector bitcast) if possible.
1403 /// The source and destination vector types may have different element types.
1404 static Instruction
*OptimizeVectorResize(Value
*InVal
, const VectorType
*DestTy
,
1406 // We can only do this optimization if the output is a multiple of the input
1407 // element size, or the input is a multiple of the output element size.
1408 // Convert the input type to have the same element type as the output.
1409 const VectorType
*SrcTy
= cast
<VectorType
>(InVal
->getType());
1411 if (SrcTy
->getElementType() != DestTy
->getElementType()) {
1412 // The input types don't need to be identical, but for now they must be the
1413 // same size. There is no specific reason we couldn't handle things like
1414 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
1416 if (SrcTy
->getElementType()->getPrimitiveSizeInBits() !=
1417 DestTy
->getElementType()->getPrimitiveSizeInBits())
1420 SrcTy
= VectorType::get(DestTy
->getElementType(), SrcTy
->getNumElements());
1421 InVal
= IC
.Builder
->CreateBitCast(InVal
, SrcTy
);
1424 // Now that the element types match, get the shuffle mask and RHS of the
1425 // shuffle to use, which depends on whether we're increasing or decreasing the
1426 // size of the input.
1427 SmallVector
<Constant
*, 16> ShuffleMask
;
1429 const IntegerType
*Int32Ty
= Type::getInt32Ty(SrcTy
->getContext());
1431 if (SrcTy
->getNumElements() > DestTy
->getNumElements()) {
1432 // If we're shrinking the number of elements, just shuffle in the low
1433 // elements from the input and use undef as the second shuffle input.
1434 V2
= UndefValue::get(SrcTy
);
1435 for (unsigned i
= 0, e
= DestTy
->getNumElements(); i
!= e
; ++i
)
1436 ShuffleMask
.push_back(ConstantInt::get(Int32Ty
, i
));
1439 // If we're increasing the number of elements, shuffle in all of the
1440 // elements from InVal and fill the rest of the result elements with zeros
1441 // from a constant zero.
1442 V2
= Constant::getNullValue(SrcTy
);
1443 unsigned SrcElts
= SrcTy
->getNumElements();
1444 for (unsigned i
= 0, e
= SrcElts
; i
!= e
; ++i
)
1445 ShuffleMask
.push_back(ConstantInt::get(Int32Ty
, i
));
1447 // The excess elements reference the first element of the zero input.
1448 ShuffleMask
.append(DestTy
->getNumElements()-SrcElts
,
1449 ConstantInt::get(Int32Ty
, SrcElts
));
1452 return new ShuffleVectorInst(InVal
, V2
, ConstantVector::get(ShuffleMask
));
1455 static bool isMultipleOfTypeSize(unsigned Value
, const Type
*Ty
) {
1456 return Value
% Ty
->getPrimitiveSizeInBits() == 0;
1459 static unsigned getTypeSizeIndex(unsigned Value
, const Type
*Ty
) {
1460 return Value
/ Ty
->getPrimitiveSizeInBits();
1463 /// CollectInsertionElements - V is a value which is inserted into a vector of
1464 /// VecEltTy. Look through the value to see if we can decompose it into
1465 /// insertions into the vector. See the example in the comment for
1466 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
1467 /// The type of V is always a non-zero multiple of VecEltTy's size.
1469 /// This returns false if the pattern can't be matched or true if it can,
1470 /// filling in Elements with the elements found here.
1471 static bool CollectInsertionElements(Value
*V
, unsigned ElementIndex
,
1472 SmallVectorImpl
<Value
*> &Elements
,
1473 const Type
*VecEltTy
) {
1474 // Undef values never contribute useful bits to the result.
1475 if (isa
<UndefValue
>(V
)) return true;
1477 // If we got down to a value of the right type, we win, try inserting into the
1479 if (V
->getType() == VecEltTy
) {
1480 // Inserting null doesn't actually insert any elements.
1481 if (Constant
*C
= dyn_cast
<Constant
>(V
))
1482 if (C
->isNullValue())
1485 // Fail if multiple elements are inserted into this slot.
1486 if (ElementIndex
>= Elements
.size() || Elements
[ElementIndex
] != 0)
1489 Elements
[ElementIndex
] = V
;
1493 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
1494 // Figure out the # elements this provides, and bitcast it or slice it up
1496 unsigned NumElts
= getTypeSizeIndex(C
->getType()->getPrimitiveSizeInBits(),
1498 // If the constant is the size of a vector element, we just need to bitcast
1499 // it to the right type so it gets properly inserted.
1501 return CollectInsertionElements(ConstantExpr::getBitCast(C
, VecEltTy
),
1502 ElementIndex
, Elements
, VecEltTy
);
1504 // Okay, this is a constant that covers multiple elements. Slice it up into
1505 // pieces and insert each element-sized piece into the vector.
1506 if (!isa
<IntegerType
>(C
->getType()))
1507 C
= ConstantExpr::getBitCast(C
, IntegerType::get(V
->getContext(),
1508 C
->getType()->getPrimitiveSizeInBits()));
1509 unsigned ElementSize
= VecEltTy
->getPrimitiveSizeInBits();
1510 const Type
*ElementIntTy
= IntegerType::get(C
->getContext(), ElementSize
);
1512 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1513 Constant
*Piece
= ConstantExpr::getLShr(C
, ConstantInt::get(C
->getType(),
1515 Piece
= ConstantExpr::getTrunc(Piece
, ElementIntTy
);
1516 if (!CollectInsertionElements(Piece
, ElementIndex
+i
, Elements
, VecEltTy
))
1522 if (!V
->hasOneUse()) return false;
1524 Instruction
*I
= dyn_cast
<Instruction
>(V
);
1525 if (I
== 0) return false;
1526 switch (I
->getOpcode()) {
1527 default: return false; // Unhandled case.
1528 case Instruction::BitCast
:
1529 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1530 Elements
, VecEltTy
);
1531 case Instruction::ZExt
:
1532 if (!isMultipleOfTypeSize(
1533 I
->getOperand(0)->getType()->getPrimitiveSizeInBits(),
1536 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1537 Elements
, VecEltTy
);
1538 case Instruction::Or
:
1539 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1540 Elements
, VecEltTy
) &&
1541 CollectInsertionElements(I
->getOperand(1), ElementIndex
,
1542 Elements
, VecEltTy
);
1543 case Instruction::Shl
: {
1544 // Must be shifting by a constant that is a multiple of the element size.
1545 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1));
1546 if (CI
== 0) return false;
1547 if (!isMultipleOfTypeSize(CI
->getZExtValue(), VecEltTy
)) return false;
1548 unsigned IndexShift
= getTypeSizeIndex(CI
->getZExtValue(), VecEltTy
);
1550 return CollectInsertionElements(I
->getOperand(0), ElementIndex
+IndexShift
,
1551 Elements
, VecEltTy
);
1558 /// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
1559 /// may be doing shifts and ors to assemble the elements of the vector manually.
1560 /// Try to rip the code out and replace it with insertelements. This is to
1561 /// optimize code like this:
1563 /// %tmp37 = bitcast float %inc to i32
1564 /// %tmp38 = zext i32 %tmp37 to i64
1565 /// %tmp31 = bitcast float %inc5 to i32
1566 /// %tmp32 = zext i32 %tmp31 to i64
1567 /// %tmp33 = shl i64 %tmp32, 32
1568 /// %ins35 = or i64 %tmp33, %tmp38
1569 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
1571 /// Into two insertelements that do "buildvector{%inc, %inc5}".
1572 static Value
*OptimizeIntegerToVectorInsertions(BitCastInst
&CI
,
1574 const VectorType
*DestVecTy
= cast
<VectorType
>(CI
.getType());
1575 Value
*IntInput
= CI
.getOperand(0);
1577 SmallVector
<Value
*, 8> Elements(DestVecTy
->getNumElements());
1578 if (!CollectInsertionElements(IntInput
, 0, Elements
,
1579 DestVecTy
->getElementType()))
1582 // If we succeeded, we know that all of the element are specified by Elements
1583 // or are zero if Elements has a null entry. Recast this as a set of
1585 Value
*Result
= Constant::getNullValue(CI
.getType());
1586 for (unsigned i
= 0, e
= Elements
.size(); i
!= e
; ++i
) {
1587 if (Elements
[i
] == 0) continue; // Unset element.
1589 Result
= IC
.Builder
->CreateInsertElement(Result
, Elements
[i
],
1590 IC
.Builder
->getInt32(i
));
1597 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
1598 /// bitcast. The various long double bitcasts can't get in here.
1599 static Instruction
*OptimizeIntToFloatBitCast(BitCastInst
&CI
,InstCombiner
&IC
){
1600 Value
*Src
= CI
.getOperand(0);
1601 const Type
*DestTy
= CI
.getType();
1603 // If this is a bitcast from int to float, check to see if the int is an
1604 // extraction from a vector.
1605 Value
*VecInput
= 0;
1606 // bitcast(trunc(bitcast(somevector)))
1607 if (match(Src
, m_Trunc(m_BitCast(m_Value(VecInput
)))) &&
1608 isa
<VectorType
>(VecInput
->getType())) {
1609 const VectorType
*VecTy
= cast
<VectorType
>(VecInput
->getType());
1610 unsigned DestWidth
= DestTy
->getPrimitiveSizeInBits();
1612 if (VecTy
->getPrimitiveSizeInBits() % DestWidth
== 0) {
1613 // If the element type of the vector doesn't match the result type,
1614 // bitcast it to be a vector type we can extract from.
1615 if (VecTy
->getElementType() != DestTy
) {
1616 VecTy
= VectorType::get(DestTy
,
1617 VecTy
->getPrimitiveSizeInBits() / DestWidth
);
1618 VecInput
= IC
.Builder
->CreateBitCast(VecInput
, VecTy
);
1621 return ExtractElementInst::Create(VecInput
, IC
.Builder
->getInt32(0));
1625 // bitcast(trunc(lshr(bitcast(somevector), cst))
1626 ConstantInt
*ShAmt
= 0;
1627 if (match(Src
, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput
)),
1628 m_ConstantInt(ShAmt
)))) &&
1629 isa
<VectorType
>(VecInput
->getType())) {
1630 const VectorType
*VecTy
= cast
<VectorType
>(VecInput
->getType());
1631 unsigned DestWidth
= DestTy
->getPrimitiveSizeInBits();
1632 if (VecTy
->getPrimitiveSizeInBits() % DestWidth
== 0 &&
1633 ShAmt
->getZExtValue() % DestWidth
== 0) {
1634 // If the element type of the vector doesn't match the result type,
1635 // bitcast it to be a vector type we can extract from.
1636 if (VecTy
->getElementType() != DestTy
) {
1637 VecTy
= VectorType::get(DestTy
,
1638 VecTy
->getPrimitiveSizeInBits() / DestWidth
);
1639 VecInput
= IC
.Builder
->CreateBitCast(VecInput
, VecTy
);
1642 unsigned Elt
= ShAmt
->getZExtValue() / DestWidth
;
1643 return ExtractElementInst::Create(VecInput
, IC
.Builder
->getInt32(Elt
));
1649 Instruction
*InstCombiner::visitBitCast(BitCastInst
&CI
) {
1650 // If the operands are integer typed then apply the integer transforms,
1651 // otherwise just apply the common ones.
1652 Value
*Src
= CI
.getOperand(0);
1653 const Type
*SrcTy
= Src
->getType();
1654 const Type
*DestTy
= CI
.getType();
1656 // Get rid of casts from one type to the same type. These are useless and can
1657 // be replaced by the operand.
1658 if (DestTy
== Src
->getType())
1659 return ReplaceInstUsesWith(CI
, Src
);
1661 if (const PointerType
*DstPTy
= dyn_cast
<PointerType
>(DestTy
)) {
1662 const PointerType
*SrcPTy
= cast
<PointerType
>(SrcTy
);
1663 const Type
*DstElTy
= DstPTy
->getElementType();
1664 const Type
*SrcElTy
= SrcPTy
->getElementType();
1666 // If the address spaces don't match, don't eliminate the bitcast, which is
1667 // required for changing types.
1668 if (SrcPTy
->getAddressSpace() != DstPTy
->getAddressSpace())
1671 // If we are casting a alloca to a pointer to a type of the same
1672 // size, rewrite the allocation instruction to allocate the "right" type.
1673 // There is no need to modify malloc calls because it is their bitcast that
1674 // needs to be cleaned up.
1675 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Src
))
1676 if (Instruction
*V
= PromoteCastOfAllocation(CI
, *AI
))
1679 // If the source and destination are pointers, and this cast is equivalent
1680 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
1681 // This can enhance SROA and other transforms that want type-safe pointers.
1682 Constant
*ZeroUInt
=
1683 Constant::getNullValue(Type::getInt32Ty(CI
.getContext()));
1684 unsigned NumZeros
= 0;
1685 while (SrcElTy
!= DstElTy
&&
1686 isa
<CompositeType
>(SrcElTy
) && !SrcElTy
->isPointerTy() &&
1687 SrcElTy
->getNumContainedTypes() /* not "{}" */) {
1688 SrcElTy
= cast
<CompositeType
>(SrcElTy
)->getTypeAtIndex(ZeroUInt
);
1692 // If we found a path from the src to dest, create the getelementptr now.
1693 if (SrcElTy
== DstElTy
) {
1694 SmallVector
<Value
*, 8> Idxs(NumZeros
+1, ZeroUInt
);
1695 return GetElementPtrInst::CreateInBounds(Src
, Idxs
.begin(), Idxs
.end());
1699 // Try to optimize int -> float bitcasts.
1700 if ((DestTy
->isFloatTy() || DestTy
->isDoubleTy()) && isa
<IntegerType
>(SrcTy
))
1701 if (Instruction
*I
= OptimizeIntToFloatBitCast(CI
, *this))
1704 if (const VectorType
*DestVTy
= dyn_cast
<VectorType
>(DestTy
)) {
1705 if (DestVTy
->getNumElements() == 1 && !SrcTy
->isVectorTy()) {
1706 Value
*Elem
= Builder
->CreateBitCast(Src
, DestVTy
->getElementType());
1707 return InsertElementInst::Create(UndefValue::get(DestTy
), Elem
,
1708 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
1709 // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
1712 if (isa
<IntegerType
>(SrcTy
)) {
1713 // If this is a cast from an integer to vector, check to see if the input
1714 // is a trunc or zext of a bitcast from vector. If so, we can replace all
1715 // the casts with a shuffle and (potentially) a bitcast.
1716 if (isa
<TruncInst
>(Src
) || isa
<ZExtInst
>(Src
)) {
1717 CastInst
*SrcCast
= cast
<CastInst
>(Src
);
1718 if (BitCastInst
*BCIn
= dyn_cast
<BitCastInst
>(SrcCast
->getOperand(0)))
1719 if (isa
<VectorType
>(BCIn
->getOperand(0)->getType()))
1720 if (Instruction
*I
= OptimizeVectorResize(BCIn
->getOperand(0),
1721 cast
<VectorType
>(DestTy
), *this))
1725 // If the input is an 'or' instruction, we may be doing shifts and ors to
1726 // assemble the elements of the vector manually. Try to rip the code out
1727 // and replace it with insertelements.
1728 if (Value
*V
= OptimizeIntegerToVectorInsertions(CI
, *this))
1729 return ReplaceInstUsesWith(CI
, V
);
1733 if (const VectorType
*SrcVTy
= dyn_cast
<VectorType
>(SrcTy
)) {
1734 if (SrcVTy
->getNumElements() == 1 && !DestTy
->isVectorTy()) {
1736 Builder
->CreateExtractElement(Src
,
1737 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
1738 return CastInst::Create(Instruction::BitCast
, Elem
, DestTy
);
1742 if (ShuffleVectorInst
*SVI
= dyn_cast
<ShuffleVectorInst
>(Src
)) {
1743 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
1744 // a bitcast to a vector with the same # elts.
1745 if (SVI
->hasOneUse() && DestTy
->isVectorTy() &&
1746 cast
<VectorType
>(DestTy
)->getNumElements() ==
1747 SVI
->getType()->getNumElements() &&
1748 SVI
->getType()->getNumElements() ==
1749 cast
<VectorType
>(SVI
->getOperand(0)->getType())->getNumElements()) {
1751 // If either of the operands is a cast from CI.getType(), then
1752 // evaluating the shuffle in the casted destination's type will allow
1753 // us to eliminate at least one cast.
1754 if (((Tmp
= dyn_cast
<BitCastInst
>(SVI
->getOperand(0))) &&
1755 Tmp
->getOperand(0)->getType() == DestTy
) ||
1756 ((Tmp
= dyn_cast
<BitCastInst
>(SVI
->getOperand(1))) &&
1757 Tmp
->getOperand(0)->getType() == DestTy
)) {
1758 Value
*LHS
= Builder
->CreateBitCast(SVI
->getOperand(0), DestTy
);
1759 Value
*RHS
= Builder
->CreateBitCast(SVI
->getOperand(1), DestTy
);
1760 // Return a new shuffle vector. Use the same element ID's, as we
1761 // know the vector types match #elts.
1762 return new ShuffleVectorInst(LHS
, RHS
, SVI
->getOperand(2));
1767 if (SrcTy
->isPointerTy())
1768 return commonPointerCastTransforms(CI
);
1769 return commonCastTransforms(CI
);