1 //===- InstCombineCasts.cpp -----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visit functions for cast operations.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombine.h"
15 #include "llvm/Target/TargetData.h"
16 #include "llvm/Support/PatternMatch.h"
18 using namespace PatternMatch
;
20 /// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
21 /// expression. If so, decompose it, returning some value X, such that Val is
24 static Value
*DecomposeSimpleLinearExpr(Value
*Val
, unsigned &Scale
,
26 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Val
)) {
27 Offset
= CI
->getZExtValue();
29 return ConstantInt::get(Val
->getType(), 0);
32 if (BinaryOperator
*I
= dyn_cast
<BinaryOperator
>(Val
)) {
33 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
34 if (I
->getOpcode() == Instruction::Shl
) {
35 // This is a value scaled by '1 << the shift amt'.
36 Scale
= UINT64_C(1) << RHS
->getZExtValue();
38 return I
->getOperand(0);
41 if (I
->getOpcode() == Instruction::Mul
) {
42 // This value is scaled by 'RHS'.
43 Scale
= RHS
->getZExtValue();
45 return I
->getOperand(0);
48 if (I
->getOpcode() == Instruction::Add
) {
49 // We have X+C. Check to see if we really have (X*C2)+C1,
50 // where C1 is divisible by C2.
53 DecomposeSimpleLinearExpr(I
->getOperand(0), SubScale
, Offset
);
54 Offset
+= RHS
->getZExtValue();
61 // Otherwise, we can't look past this.
67 /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
68 /// try to eliminate the cast by moving the type information into the alloc.
69 Instruction
*InstCombiner::PromoteCastOfAllocation(BitCastInst
&CI
,
71 // This requires TargetData to get the alloca alignment and size information.
74 const PointerType
*PTy
= cast
<PointerType
>(CI
.getType());
76 BuilderTy
AllocaBuilder(*Builder
);
77 AllocaBuilder
.SetInsertPoint(AI
.getParent(), &AI
);
79 // Get the type really allocated and the type casted to.
80 const Type
*AllocElTy
= AI
.getAllocatedType();
81 const Type
*CastElTy
= PTy
->getElementType();
82 if (!AllocElTy
->isSized() || !CastElTy
->isSized()) return 0;
84 unsigned AllocElTyAlign
= TD
->getABITypeAlignment(AllocElTy
);
85 unsigned CastElTyAlign
= TD
->getABITypeAlignment(CastElTy
);
86 if (CastElTyAlign
< AllocElTyAlign
) return 0;
88 // If the allocation has multiple uses, only promote it if we are strictly
89 // increasing the alignment of the resultant allocation. If we keep it the
90 // same, we open the door to infinite loops of various kinds.
91 if (!AI
.hasOneUse() && CastElTyAlign
== AllocElTyAlign
) return 0;
93 uint64_t AllocElTySize
= TD
->getTypeAllocSize(AllocElTy
);
94 uint64_t CastElTySize
= TD
->getTypeAllocSize(CastElTy
);
95 if (CastElTySize
== 0 || AllocElTySize
== 0) return 0;
97 // See if we can satisfy the modulus by pulling a scale out of the array
99 unsigned ArraySizeScale
;
100 uint64_t ArrayOffset
;
101 Value
*NumElements
= // See if the array size is a decomposable linear expr.
102 DecomposeSimpleLinearExpr(AI
.getOperand(0), ArraySizeScale
, ArrayOffset
);
104 // If we can now satisfy the modulus, by using a non-1 scale, we really can
106 if ((AllocElTySize
*ArraySizeScale
) % CastElTySize
!= 0 ||
107 (AllocElTySize
*ArrayOffset
) % CastElTySize
!= 0) return 0;
109 unsigned Scale
= (AllocElTySize
*ArraySizeScale
)/CastElTySize
;
114 Amt
= ConstantInt::get(AI
.getArraySize()->getType(), Scale
);
115 // Insert before the alloca, not before the cast.
116 Amt
= AllocaBuilder
.CreateMul(Amt
, NumElements
, "tmp");
119 if (uint64_t Offset
= (AllocElTySize
*ArrayOffset
)/CastElTySize
) {
120 Value
*Off
= ConstantInt::get(AI
.getArraySize()->getType(),
122 Amt
= AllocaBuilder
.CreateAdd(Amt
, Off
, "tmp");
125 AllocaInst
*New
= AllocaBuilder
.CreateAlloca(CastElTy
, Amt
);
126 New
->setAlignment(AI
.getAlignment());
129 // If the allocation has multiple real uses, insert a cast and change all
130 // things that used it to use the new cast. This will also hack on CI, but it
132 if (!AI
.hasOneUse()) {
133 // New is the allocation instruction, pointer typed. AI is the original
134 // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
135 Value
*NewCast
= AllocaBuilder
.CreateBitCast(New
, AI
.getType(), "tmpcast");
136 AI
.replaceAllUsesWith(NewCast
);
138 return ReplaceInstUsesWith(CI
, New
);
143 /// EvaluateInDifferentType - Given an expression that
144 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
145 /// insert the code to evaluate the expression.
146 Value
*InstCombiner::EvaluateInDifferentType(Value
*V
, const Type
*Ty
,
148 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
149 C
= ConstantExpr::getIntegerCast(C
, Ty
, isSigned
/*Sext or ZExt*/);
150 // If we got a constantexpr back, try to simplify it with TD info.
151 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
152 C
= ConstantFoldConstantExpression(CE
, TD
);
156 // Otherwise, it must be an instruction.
157 Instruction
*I
= cast
<Instruction
>(V
);
158 Instruction
*Res
= 0;
159 unsigned Opc
= I
->getOpcode();
161 case Instruction::Add
:
162 case Instruction::Sub
:
163 case Instruction::Mul
:
164 case Instruction::And
:
165 case Instruction::Or
:
166 case Instruction::Xor
:
167 case Instruction::AShr
:
168 case Instruction::LShr
:
169 case Instruction::Shl
:
170 case Instruction::UDiv
:
171 case Instruction::URem
: {
172 Value
*LHS
= EvaluateInDifferentType(I
->getOperand(0), Ty
, isSigned
);
173 Value
*RHS
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
174 Res
= BinaryOperator::Create((Instruction::BinaryOps
)Opc
, LHS
, RHS
);
177 case Instruction::Trunc
:
178 case Instruction::ZExt
:
179 case Instruction::SExt
:
180 // If the source type of the cast is the type we're trying for then we can
181 // just return the source. There's no need to insert it because it is not
183 if (I
->getOperand(0)->getType() == Ty
)
184 return I
->getOperand(0);
186 // Otherwise, must be the same type of cast, so just reinsert a new one.
187 // This also handles the case of zext(trunc(x)) -> zext(x).
188 Res
= CastInst::CreateIntegerCast(I
->getOperand(0), Ty
,
189 Opc
== Instruction::SExt
);
191 case Instruction::Select
: {
192 Value
*True
= EvaluateInDifferentType(I
->getOperand(1), Ty
, isSigned
);
193 Value
*False
= EvaluateInDifferentType(I
->getOperand(2), Ty
, isSigned
);
194 Res
= SelectInst::Create(I
->getOperand(0), True
, False
);
197 case Instruction::PHI
: {
198 PHINode
*OPN
= cast
<PHINode
>(I
);
199 PHINode
*NPN
= PHINode::Create(Ty
, OPN
->getNumIncomingValues());
200 for (unsigned i
= 0, e
= OPN
->getNumIncomingValues(); i
!= e
; ++i
) {
201 Value
*V
=EvaluateInDifferentType(OPN
->getIncomingValue(i
), Ty
, isSigned
);
202 NPN
->addIncoming(V
, OPN
->getIncomingBlock(i
));
208 // TODO: Can handle more cases here.
209 llvm_unreachable("Unreachable!");
214 return InsertNewInstBefore(Res
, *I
);
218 /// This function is a wrapper around CastInst::isEliminableCastPair. It
219 /// simply extracts arguments and returns what that function returns.
220 static Instruction::CastOps
221 isEliminableCastPair(
222 const CastInst
*CI
, ///< The first cast instruction
223 unsigned opcode
, ///< The opcode of the second cast instruction
224 const Type
*DstTy
, ///< The target type for the second cast instruction
225 TargetData
*TD
///< The target data for pointer size
228 const Type
*SrcTy
= CI
->getOperand(0)->getType(); // A from above
229 const Type
*MidTy
= CI
->getType(); // B from above
231 // Get the opcodes of the two Cast instructions
232 Instruction::CastOps firstOp
= Instruction::CastOps(CI
->getOpcode());
233 Instruction::CastOps secondOp
= Instruction::CastOps(opcode
);
235 unsigned Res
= CastInst::isEliminableCastPair(firstOp
, secondOp
, SrcTy
, MidTy
,
237 TD
? TD
->getIntPtrType(CI
->getContext()) : 0);
239 // We don't want to form an inttoptr or ptrtoint that converts to an integer
240 // type that differs from the pointer size.
241 if ((Res
== Instruction::IntToPtr
&&
242 (!TD
|| SrcTy
!= TD
->getIntPtrType(CI
->getContext()))) ||
243 (Res
== Instruction::PtrToInt
&&
244 (!TD
|| DstTy
!= TD
->getIntPtrType(CI
->getContext()))))
247 return Instruction::CastOps(Res
);
250 /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
251 /// results in any code being generated and is interesting to optimize out. If
252 /// the cast can be eliminated by some other simple transformation, we prefer
253 /// to do the simplification first.
254 bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc
, const Value
*V
,
256 // Noop casts and casts of constants should be eliminated trivially.
257 if (V
->getType() == Ty
|| isa
<Constant
>(V
)) return false;
259 // If this is another cast that can be eliminated, we prefer to have it
261 if (const CastInst
*CI
= dyn_cast
<CastInst
>(V
))
262 if (isEliminableCastPair(CI
, opc
, Ty
, TD
))
265 // If this is a vector sext from a compare, then we don't want to break the
266 // idiom where each element of the extended vector is either zero or all ones.
267 if (opc
== Instruction::SExt
&& isa
<CmpInst
>(V
) && Ty
->isVectorTy())
274 /// @brief Implement the transforms common to all CastInst visitors.
275 Instruction
*InstCombiner::commonCastTransforms(CastInst
&CI
) {
276 Value
*Src
= CI
.getOperand(0);
278 // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
280 if (CastInst
*CSrc
= dyn_cast
<CastInst
>(Src
)) { // A->B->C cast
281 if (Instruction::CastOps opc
=
282 isEliminableCastPair(CSrc
, CI
.getOpcode(), CI
.getType(), TD
)) {
283 // The first cast (CSrc) is eliminable so we need to fix up or replace
284 // the second cast (CI). CSrc will then have a good chance of being dead.
285 return CastInst::Create(opc
, CSrc
->getOperand(0), CI
.getType());
289 // If we are casting a select then fold the cast into the select
290 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Src
))
291 if (Instruction
*NV
= FoldOpIntoSelect(CI
, SI
))
294 // If we are casting a PHI then fold the cast into the PHI
295 if (isa
<PHINode
>(Src
)) {
296 // We don't do this if this would create a PHI node with an illegal type if
297 // it is currently legal.
298 if (!Src
->getType()->isIntegerTy() ||
299 !CI
.getType()->isIntegerTy() ||
300 ShouldChangeType(CI
.getType(), Src
->getType()))
301 if (Instruction
*NV
= FoldOpIntoPhi(CI
))
308 /// CanEvaluateTruncated - Return true if we can evaluate the specified
309 /// expression tree as type Ty instead of its larger type, and arrive with the
310 /// same value. This is used by code that tries to eliminate truncates.
312 /// Ty will always be a type smaller than V. We should return true if trunc(V)
313 /// can be computed by computing V in the smaller type. If V is an instruction,
314 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
315 /// makes sense if x and y can be efficiently truncated.
317 /// This function works on both vectors and scalars.
319 static bool CanEvaluateTruncated(Value
*V
, const Type
*Ty
) {
320 // We can always evaluate constants in another type.
321 if (isa
<Constant
>(V
))
324 Instruction
*I
= dyn_cast
<Instruction
>(V
);
325 if (!I
) return false;
327 const Type
*OrigTy
= V
->getType();
329 // If this is an extension from the dest type, we can eliminate it, even if it
330 // has multiple uses.
331 if ((isa
<ZExtInst
>(I
) || isa
<SExtInst
>(I
)) &&
332 I
->getOperand(0)->getType() == Ty
)
335 // We can't extend or shrink something that has multiple uses: doing so would
336 // require duplicating the instruction in general, which isn't profitable.
337 if (!I
->hasOneUse()) return false;
339 unsigned Opc
= I
->getOpcode();
341 case Instruction::Add
:
342 case Instruction::Sub
:
343 case Instruction::Mul
:
344 case Instruction::And
:
345 case Instruction::Or
:
346 case Instruction::Xor
:
347 // These operators can all arbitrarily be extended or truncated.
348 return CanEvaluateTruncated(I
->getOperand(0), Ty
) &&
349 CanEvaluateTruncated(I
->getOperand(1), Ty
);
351 case Instruction::UDiv
:
352 case Instruction::URem
: {
353 // UDiv and URem can be truncated if all the truncated bits are zero.
354 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
355 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
356 if (BitWidth
< OrigBitWidth
) {
357 APInt Mask
= APInt::getHighBitsSet(OrigBitWidth
, OrigBitWidth
-BitWidth
);
358 if (MaskedValueIsZero(I
->getOperand(0), Mask
) &&
359 MaskedValueIsZero(I
->getOperand(1), Mask
)) {
360 return CanEvaluateTruncated(I
->getOperand(0), Ty
) &&
361 CanEvaluateTruncated(I
->getOperand(1), Ty
);
366 case Instruction::Shl
:
367 // If we are truncating the result of this SHL, and if it's a shift of a
368 // constant amount, we can always perform a SHL in a smaller type.
369 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
370 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
371 if (CI
->getLimitedValue(BitWidth
) < BitWidth
)
372 return CanEvaluateTruncated(I
->getOperand(0), Ty
);
375 case Instruction::LShr
:
376 // If this is a truncate of a logical shr, we can truncate it to a smaller
377 // lshr iff we know that the bits we would otherwise be shifting in are
379 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
380 uint32_t OrigBitWidth
= OrigTy
->getScalarSizeInBits();
381 uint32_t BitWidth
= Ty
->getScalarSizeInBits();
382 if (MaskedValueIsZero(I
->getOperand(0),
383 APInt::getHighBitsSet(OrigBitWidth
, OrigBitWidth
-BitWidth
)) &&
384 CI
->getLimitedValue(BitWidth
) < BitWidth
) {
385 return CanEvaluateTruncated(I
->getOperand(0), Ty
);
389 case Instruction::Trunc
:
390 // trunc(trunc(x)) -> trunc(x)
392 case Instruction::ZExt
:
393 case Instruction::SExt
:
394 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
395 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
397 case Instruction::Select
: {
398 SelectInst
*SI
= cast
<SelectInst
>(I
);
399 return CanEvaluateTruncated(SI
->getTrueValue(), Ty
) &&
400 CanEvaluateTruncated(SI
->getFalseValue(), Ty
);
402 case Instruction::PHI
: {
403 // We can change a phi if we can change all operands. Note that we never
404 // get into trouble with cyclic PHIs here because we only consider
405 // instructions with a single use.
406 PHINode
*PN
= cast
<PHINode
>(I
);
407 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
408 if (!CanEvaluateTruncated(PN
->getIncomingValue(i
), Ty
))
413 // TODO: Can handle more cases here.
420 Instruction
*InstCombiner::visitTrunc(TruncInst
&CI
) {
421 if (Instruction
*Result
= commonCastTransforms(CI
))
424 // See if we can simplify any instructions used by the input whose sole
425 // purpose is to compute bits we don't care about.
426 if (SimplifyDemandedInstructionBits(CI
))
429 Value
*Src
= CI
.getOperand(0);
430 const Type
*DestTy
= CI
.getType(), *SrcTy
= Src
->getType();
432 // Attempt to truncate the entire input expression tree to the destination
433 // type. Only do this if the dest type is a simple type, don't convert the
434 // expression tree to something weird like i93 unless the source is also
436 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
437 CanEvaluateTruncated(Src
, DestTy
)) {
439 // If this cast is a truncate, evaluting in a different type always
440 // eliminates the cast, so it is always a win.
441 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
442 " to avoid cast: " << CI
<< '\n');
443 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
444 assert(Res
->getType() == DestTy
);
445 return ReplaceInstUsesWith(CI
, Res
);
448 // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
449 if (DestTy
->getScalarSizeInBits() == 1) {
450 Constant
*One
= ConstantInt::get(Src
->getType(), 1);
451 Src
= Builder
->CreateAnd(Src
, One
, "tmp");
452 Value
*Zero
= Constant::getNullValue(Src
->getType());
453 return new ICmpInst(ICmpInst::ICMP_NE
, Src
, Zero
);
456 // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
457 Value
*A
= 0; ConstantInt
*Cst
= 0;
458 if (Src
->hasOneUse() &&
459 match(Src
, m_LShr(m_ZExt(m_Value(A
)), m_ConstantInt(Cst
)))) {
460 // We have three types to worry about here, the type of A, the source of
461 // the truncate (MidSize), and the destination of the truncate. We know that
462 // ASize < MidSize and MidSize > ResultSize, but don't know the relation
463 // between ASize and ResultSize.
464 unsigned ASize
= A
->getType()->getPrimitiveSizeInBits();
466 // If the shift amount is larger than the size of A, then the result is
467 // known to be zero because all the input bits got shifted out.
468 if (Cst
->getZExtValue() >= ASize
)
469 return ReplaceInstUsesWith(CI
, Constant::getNullValue(CI
.getType()));
471 // Since we're doing an lshr and a zero extend, and know that the shift
472 // amount is smaller than ASize, it is always safe to do the shift in A's
473 // type, then zero extend or truncate to the result.
474 Value
*Shift
= Builder
->CreateLShr(A
, Cst
->getZExtValue());
475 Shift
->takeName(Src
);
476 return CastInst::CreateIntegerCast(Shift
, CI
.getType(), false);
479 // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
480 // type isn't non-native.
481 if (Src
->hasOneUse() && isa
<IntegerType
>(Src
->getType()) &&
482 ShouldChangeType(Src
->getType(), CI
.getType()) &&
483 match(Src
, m_And(m_Value(A
), m_ConstantInt(Cst
)))) {
484 Value
*NewTrunc
= Builder
->CreateTrunc(A
, CI
.getType(), A
->getName()+".tr");
485 return BinaryOperator::CreateAnd(NewTrunc
,
486 ConstantExpr::getTrunc(Cst
, CI
.getType()));
492 /// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
493 /// in order to eliminate the icmp.
494 Instruction
*InstCombiner::transformZExtICmp(ICmpInst
*ICI
, Instruction
&CI
,
496 // If we are just checking for a icmp eq of a single bit and zext'ing it
497 // to an integer, then shift the bit to the appropriate place and then
498 // cast to integer to avoid the comparison.
499 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(ICI
->getOperand(1))) {
500 const APInt
&Op1CV
= Op1C
->getValue();
502 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
503 // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
504 if ((ICI
->getPredicate() == ICmpInst::ICMP_SLT
&& Op1CV
== 0) ||
505 (ICI
->getPredicate() == ICmpInst::ICMP_SGT
&&Op1CV
.isAllOnesValue())) {
506 if (!DoXform
) return ICI
;
508 Value
*In
= ICI
->getOperand(0);
509 Value
*Sh
= ConstantInt::get(In
->getType(),
510 In
->getType()->getScalarSizeInBits()-1);
511 In
= Builder
->CreateLShr(In
, Sh
, In
->getName()+".lobit");
512 if (In
->getType() != CI
.getType())
513 In
= Builder
->CreateIntCast(In
, CI
.getType(), false/*ZExt*/, "tmp");
515 if (ICI
->getPredicate() == ICmpInst::ICMP_SGT
) {
516 Constant
*One
= ConstantInt::get(In
->getType(), 1);
517 In
= Builder
->CreateXor(In
, One
, In
->getName()+".not");
520 return ReplaceInstUsesWith(CI
, In
);
525 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
526 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
527 // zext (X == 1) to i32 --> X iff X has only the low bit set.
528 // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
529 // zext (X != 0) to i32 --> X iff X has only the low bit set.
530 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
531 // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
532 // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
533 if ((Op1CV
== 0 || Op1CV
.isPowerOf2()) &&
534 // This only works for EQ and NE
536 // If Op1C some other power of two, convert:
537 uint32_t BitWidth
= Op1C
->getType()->getBitWidth();
538 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
539 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
540 ComputeMaskedBits(ICI
->getOperand(0), TypeMask
, KnownZero
, KnownOne
);
542 APInt
KnownZeroMask(~KnownZero
);
543 if (KnownZeroMask
.isPowerOf2()) { // Exactly 1 possible 1?
544 if (!DoXform
) return ICI
;
546 bool isNE
= ICI
->getPredicate() == ICmpInst::ICMP_NE
;
547 if (Op1CV
!= 0 && (Op1CV
!= KnownZeroMask
)) {
548 // (X&4) == 2 --> false
549 // (X&4) != 2 --> true
550 Constant
*Res
= ConstantInt::get(Type::getInt1Ty(CI
.getContext()),
552 Res
= ConstantExpr::getZExt(Res
, CI
.getType());
553 return ReplaceInstUsesWith(CI
, Res
);
556 uint32_t ShiftAmt
= KnownZeroMask
.logBase2();
557 Value
*In
= ICI
->getOperand(0);
559 // Perform a logical shr by shiftamt.
560 // Insert the shift to put the result in the low bit.
561 In
= Builder
->CreateLShr(In
, ConstantInt::get(In
->getType(),ShiftAmt
),
562 In
->getName()+".lobit");
565 if ((Op1CV
!= 0) == isNE
) { // Toggle the low bit.
566 Constant
*One
= ConstantInt::get(In
->getType(), 1);
567 In
= Builder
->CreateXor(In
, One
, "tmp");
570 if (CI
.getType() == In
->getType())
571 return ReplaceInstUsesWith(CI
, In
);
572 return CastInst::CreateIntegerCast(In
, CI
.getType(), false/*ZExt*/);
577 // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
578 // It is also profitable to transform icmp eq into not(xor(A, B)) because that
579 // may lead to additional simplifications.
580 if (ICI
->isEquality() && CI
.getType() == ICI
->getOperand(0)->getType()) {
581 if (const IntegerType
*ITy
= dyn_cast
<IntegerType
>(CI
.getType())) {
582 uint32_t BitWidth
= ITy
->getBitWidth();
583 Value
*LHS
= ICI
->getOperand(0);
584 Value
*RHS
= ICI
->getOperand(1);
586 APInt
KnownZeroLHS(BitWidth
, 0), KnownOneLHS(BitWidth
, 0);
587 APInt
KnownZeroRHS(BitWidth
, 0), KnownOneRHS(BitWidth
, 0);
588 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
589 ComputeMaskedBits(LHS
, TypeMask
, KnownZeroLHS
, KnownOneLHS
);
590 ComputeMaskedBits(RHS
, TypeMask
, KnownZeroRHS
, KnownOneRHS
);
592 if (KnownZeroLHS
== KnownZeroRHS
&& KnownOneLHS
== KnownOneRHS
) {
593 APInt KnownBits
= KnownZeroLHS
| KnownOneLHS
;
594 APInt UnknownBit
= ~KnownBits
;
595 if (UnknownBit
.countPopulation() == 1) {
596 if (!DoXform
) return ICI
;
598 Value
*Result
= Builder
->CreateXor(LHS
, RHS
);
600 // Mask off any bits that are set and won't be shifted away.
601 if (KnownOneLHS
.uge(UnknownBit
))
602 Result
= Builder
->CreateAnd(Result
,
603 ConstantInt::get(ITy
, UnknownBit
));
605 // Shift the bit we're testing down to the lsb.
606 Result
= Builder
->CreateLShr(
607 Result
, ConstantInt::get(ITy
, UnknownBit
.countTrailingZeros()));
609 if (ICI
->getPredicate() == ICmpInst::ICMP_EQ
)
610 Result
= Builder
->CreateXor(Result
, ConstantInt::get(ITy
, 1));
611 Result
->takeName(ICI
);
612 return ReplaceInstUsesWith(CI
, Result
);
621 /// CanEvaluateZExtd - Determine if the specified value can be computed in the
622 /// specified wider type and produce the same low bits. If not, return false.
624 /// If this function returns true, it can also return a non-zero number of bits
625 /// (in BitsToClear) which indicates that the value it computes is correct for
626 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
627 /// out. For example, to promote something like:
629 /// %B = trunc i64 %A to i32
630 /// %C = lshr i32 %B, 8
631 /// %E = zext i32 %C to i64
633 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
634 /// set to 8 to indicate that the promoted value needs to have bits 24-31
635 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
636 /// clear the top bits anyway, doing this has no extra cost.
638 /// This function works on both vectors and scalars.
639 static bool CanEvaluateZExtd(Value
*V
, const Type
*Ty
, unsigned &BitsToClear
) {
641 if (isa
<Constant
>(V
))
644 Instruction
*I
= dyn_cast
<Instruction
>(V
);
645 if (!I
) return false;
647 // If the input is a truncate from the destination type, we can trivially
648 // eliminate it, even if it has multiple uses.
649 // FIXME: This is currently disabled until codegen can handle this without
650 // pessimizing code, PR5997.
651 if (0 && isa
<TruncInst
>(I
) && I
->getOperand(0)->getType() == Ty
)
654 // We can't extend or shrink something that has multiple uses: doing so would
655 // require duplicating the instruction in general, which isn't profitable.
656 if (!I
->hasOneUse()) return false;
658 unsigned Opc
= I
->getOpcode(), Tmp
;
660 case Instruction::ZExt
: // zext(zext(x)) -> zext(x).
661 case Instruction::SExt
: // zext(sext(x)) -> sext(x).
662 case Instruction::Trunc
: // zext(trunc(x)) -> trunc(x) or zext(x)
664 case Instruction::And
:
665 case Instruction::Or
:
666 case Instruction::Xor
:
667 case Instruction::Add
:
668 case Instruction::Sub
:
669 case Instruction::Mul
:
670 case Instruction::Shl
:
671 if (!CanEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
) ||
672 !CanEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
))
674 // These can all be promoted if neither operand has 'bits to clear'.
675 if (BitsToClear
== 0 && Tmp
== 0)
678 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
679 // other side, BitsToClear is ok.
681 (Opc
== Instruction::And
|| Opc
== Instruction::Or
||
682 Opc
== Instruction::Xor
)) {
683 // We use MaskedValueIsZero here for generality, but the case we care
684 // about the most is constant RHS.
685 unsigned VSize
= V
->getType()->getScalarSizeInBits();
686 if (MaskedValueIsZero(I
->getOperand(1),
687 APInt::getHighBitsSet(VSize
, BitsToClear
)))
691 // Otherwise, we don't know how to analyze this BitsToClear case yet.
694 case Instruction::LShr
:
695 // We can promote lshr(x, cst) if we can promote x. This requires the
696 // ultimate 'and' to clear out the high zero bits we're clearing out though.
697 if (ConstantInt
*Amt
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
698 if (!CanEvaluateZExtd(I
->getOperand(0), Ty
, BitsToClear
))
700 BitsToClear
+= Amt
->getZExtValue();
701 if (BitsToClear
> V
->getType()->getScalarSizeInBits())
702 BitsToClear
= V
->getType()->getScalarSizeInBits();
705 // Cannot promote variable LSHR.
707 case Instruction::Select
:
708 if (!CanEvaluateZExtd(I
->getOperand(1), Ty
, Tmp
) ||
709 !CanEvaluateZExtd(I
->getOperand(2), Ty
, BitsToClear
) ||
710 // TODO: If important, we could handle the case when the BitsToClear are
711 // known zero in the disagreeing side.
716 case Instruction::PHI
: {
717 // We can change a phi if we can change all operands. Note that we never
718 // get into trouble with cyclic PHIs here because we only consider
719 // instructions with a single use.
720 PHINode
*PN
= cast
<PHINode
>(I
);
721 if (!CanEvaluateZExtd(PN
->getIncomingValue(0), Ty
, BitsToClear
))
723 for (unsigned i
= 1, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
724 if (!CanEvaluateZExtd(PN
->getIncomingValue(i
), Ty
, Tmp
) ||
725 // TODO: If important, we could handle the case when the BitsToClear
726 // are known zero in the disagreeing input.
732 // TODO: Can handle more cases here.
737 Instruction
*InstCombiner::visitZExt(ZExtInst
&CI
) {
738 // If this zero extend is only used by a truncate, let the truncate by
739 // eliminated before we try to optimize this zext.
740 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.use_back()))
743 // If one of the common conversion will work, do it.
744 if (Instruction
*Result
= commonCastTransforms(CI
))
747 // See if we can simplify any instructions used by the input whose sole
748 // purpose is to compute bits we don't care about.
749 if (SimplifyDemandedInstructionBits(CI
))
752 Value
*Src
= CI
.getOperand(0);
753 const Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
755 // Attempt to extend the entire input expression tree to the destination
756 // type. Only do this if the dest type is a simple type, don't convert the
757 // expression tree to something weird like i93 unless the source is also
759 unsigned BitsToClear
;
760 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
761 CanEvaluateZExtd(Src
, DestTy
, BitsToClear
)) {
762 assert(BitsToClear
< SrcTy
->getScalarSizeInBits() &&
763 "Unreasonable BitsToClear");
765 // Okay, we can transform this! Insert the new expression now.
766 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
767 " to avoid zero extend: " << CI
);
768 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, false);
769 assert(Res
->getType() == DestTy
);
771 uint32_t SrcBitsKept
= SrcTy
->getScalarSizeInBits()-BitsToClear
;
772 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
774 // If the high bits are already filled with zeros, just replace this
775 // cast with the result.
776 if (MaskedValueIsZero(Res
, APInt::getHighBitsSet(DestBitSize
,
777 DestBitSize
-SrcBitsKept
)))
778 return ReplaceInstUsesWith(CI
, Res
);
780 // We need to emit an AND to clear the high bits.
781 Constant
*C
= ConstantInt::get(Res
->getType(),
782 APInt::getLowBitsSet(DestBitSize
, SrcBitsKept
));
783 return BinaryOperator::CreateAnd(Res
, C
);
786 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
787 // types and if the sizes are just right we can convert this into a logical
788 // 'and' which will be much cheaper than the pair of casts.
789 if (TruncInst
*CSrc
= dyn_cast
<TruncInst
>(Src
)) { // A->B->C cast
790 // TODO: Subsume this into EvaluateInDifferentType.
792 // Get the sizes of the types involved. We know that the intermediate type
793 // will be smaller than A or C, but don't know the relation between A and C.
794 Value
*A
= CSrc
->getOperand(0);
795 unsigned SrcSize
= A
->getType()->getScalarSizeInBits();
796 unsigned MidSize
= CSrc
->getType()->getScalarSizeInBits();
797 unsigned DstSize
= CI
.getType()->getScalarSizeInBits();
798 // If we're actually extending zero bits, then if
799 // SrcSize < DstSize: zext(a & mask)
800 // SrcSize == DstSize: a & mask
801 // SrcSize > DstSize: trunc(a) & mask
802 if (SrcSize
< DstSize
) {
803 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
804 Constant
*AndConst
= ConstantInt::get(A
->getType(), AndValue
);
805 Value
*And
= Builder
->CreateAnd(A
, AndConst
, CSrc
->getName()+".mask");
806 return new ZExtInst(And
, CI
.getType());
809 if (SrcSize
== DstSize
) {
810 APInt
AndValue(APInt::getLowBitsSet(SrcSize
, MidSize
));
811 return BinaryOperator::CreateAnd(A
, ConstantInt::get(A
->getType(),
814 if (SrcSize
> DstSize
) {
815 Value
*Trunc
= Builder
->CreateTrunc(A
, CI
.getType(), "tmp");
816 APInt
AndValue(APInt::getLowBitsSet(DstSize
, MidSize
));
817 return BinaryOperator::CreateAnd(Trunc
,
818 ConstantInt::get(Trunc
->getType(),
823 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
824 return transformZExtICmp(ICI
, CI
);
826 BinaryOperator
*SrcI
= dyn_cast
<BinaryOperator
>(Src
);
827 if (SrcI
&& SrcI
->getOpcode() == Instruction::Or
) {
828 // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
829 // of the (zext icmp) will be transformed.
830 ICmpInst
*LHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(0));
831 ICmpInst
*RHS
= dyn_cast
<ICmpInst
>(SrcI
->getOperand(1));
832 if (LHS
&& RHS
&& LHS
->hasOneUse() && RHS
->hasOneUse() &&
833 (transformZExtICmp(LHS
, CI
, false) ||
834 transformZExtICmp(RHS
, CI
, false))) {
835 Value
*LCast
= Builder
->CreateZExt(LHS
, CI
.getType(), LHS
->getName());
836 Value
*RCast
= Builder
->CreateZExt(RHS
, CI
.getType(), RHS
->getName());
837 return BinaryOperator::Create(Instruction::Or
, LCast
, RCast
);
841 // zext(trunc(t) & C) -> (t & zext(C)).
842 if (SrcI
&& SrcI
->getOpcode() == Instruction::And
&& SrcI
->hasOneUse())
843 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(SrcI
->getOperand(1)))
844 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(SrcI
->getOperand(0))) {
845 Value
*TI0
= TI
->getOperand(0);
846 if (TI0
->getType() == CI
.getType())
848 BinaryOperator::CreateAnd(TI0
,
849 ConstantExpr::getZExt(C
, CI
.getType()));
852 // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
853 if (SrcI
&& SrcI
->getOpcode() == Instruction::Xor
&& SrcI
->hasOneUse())
854 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(SrcI
->getOperand(1)))
855 if (BinaryOperator
*And
= dyn_cast
<BinaryOperator
>(SrcI
->getOperand(0)))
856 if (And
->getOpcode() == Instruction::And
&& And
->hasOneUse() &&
857 And
->getOperand(1) == C
)
858 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(And
->getOperand(0))) {
859 Value
*TI0
= TI
->getOperand(0);
860 if (TI0
->getType() == CI
.getType()) {
861 Constant
*ZC
= ConstantExpr::getZExt(C
, CI
.getType());
862 Value
*NewAnd
= Builder
->CreateAnd(TI0
, ZC
, "tmp");
863 return BinaryOperator::CreateXor(NewAnd
, ZC
);
867 // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
869 if (SrcI
&& SrcI
->hasOneUse() && SrcI
->getType()->isIntegerTy(1) &&
870 match(SrcI
, m_Not(m_Value(X
))) &&
871 (!X
->hasOneUse() || !isa
<CmpInst
>(X
))) {
872 Value
*New
= Builder
->CreateZExt(X
, CI
.getType());
873 return BinaryOperator::CreateXor(New
, ConstantInt::get(CI
.getType(), 1));
879 /// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
880 /// in order to eliminate the icmp.
881 Instruction
*InstCombiner::transformSExtICmp(ICmpInst
*ICI
, Instruction
&CI
) {
882 Value
*Op0
= ICI
->getOperand(0), *Op1
= ICI
->getOperand(1);
883 ICmpInst::Predicate Pred
= ICI
->getPredicate();
885 if (ConstantInt
*Op1C
= dyn_cast
<ConstantInt
>(Op1
)) {
886 // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
887 // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
888 if ((Pred
== ICmpInst::ICMP_SLT
&& Op1C
->isZero()) ||
889 (Pred
== ICmpInst::ICMP_SGT
&& Op1C
->isAllOnesValue())) {
891 Value
*Sh
= ConstantInt::get(Op0
->getType(),
892 Op0
->getType()->getScalarSizeInBits()-1);
893 Value
*In
= Builder
->CreateAShr(Op0
, Sh
, Op0
->getName()+".lobit");
894 if (In
->getType() != CI
.getType())
895 In
= Builder
->CreateIntCast(In
, CI
.getType(), true/*SExt*/, "tmp");
897 if (Pred
== ICmpInst::ICMP_SGT
)
898 In
= Builder
->CreateNot(In
, In
->getName()+".not");
899 return ReplaceInstUsesWith(CI
, In
);
902 // If we know that only one bit of the LHS of the icmp can be set and we
903 // have an equality comparison with zero or a power of 2, we can transform
904 // the icmp and sext into bitwise/integer operations.
905 if (ICI
->hasOneUse() &&
906 ICI
->isEquality() && (Op1C
->isZero() || Op1C
->getValue().isPowerOf2())){
907 unsigned BitWidth
= Op1C
->getType()->getBitWidth();
908 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
909 APInt
TypeMask(APInt::getAllOnesValue(BitWidth
));
910 ComputeMaskedBits(Op0
, TypeMask
, KnownZero
, KnownOne
);
912 APInt
KnownZeroMask(~KnownZero
);
913 if (KnownZeroMask
.isPowerOf2()) {
914 Value
*In
= ICI
->getOperand(0);
916 // If the icmp tests for a known zero bit we can constant fold it.
917 if (!Op1C
->isZero() && Op1C
->getValue() != KnownZeroMask
) {
918 Value
*V
= Pred
== ICmpInst::ICMP_NE
?
919 ConstantInt::getAllOnesValue(CI
.getType()) :
920 ConstantInt::getNullValue(CI
.getType());
921 return ReplaceInstUsesWith(CI
, V
);
924 if (!Op1C
->isZero() == (Pred
== ICmpInst::ICMP_NE
)) {
925 // sext ((x & 2^n) == 0) -> (x >> n) - 1
926 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
927 unsigned ShiftAmt
= KnownZeroMask
.countTrailingZeros();
928 // Perform a right shift to place the desired bit in the LSB.
930 In
= Builder
->CreateLShr(In
,
931 ConstantInt::get(In
->getType(), ShiftAmt
));
933 // At this point "In" is either 1 or 0. Subtract 1 to turn
934 // {1, 0} -> {0, -1}.
935 In
= Builder
->CreateAdd(In
,
936 ConstantInt::getAllOnesValue(In
->getType()),
939 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
940 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
941 unsigned ShiftAmt
= KnownZeroMask
.countLeadingZeros();
942 // Perform a left shift to place the desired bit in the MSB.
944 In
= Builder
->CreateShl(In
,
945 ConstantInt::get(In
->getType(), ShiftAmt
));
947 // Distribute the bit over the whole bit width.
948 In
= Builder
->CreateAShr(In
, ConstantInt::get(In
->getType(),
949 BitWidth
- 1), "sext");
952 if (CI
.getType() == In
->getType())
953 return ReplaceInstUsesWith(CI
, In
);
954 return CastInst::CreateIntegerCast(In
, CI
.getType(), true/*SExt*/);
959 // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
960 if (const VectorType
*VTy
= dyn_cast
<VectorType
>(CI
.getType())) {
961 if (Pred
== ICmpInst::ICMP_SLT
&& match(Op1
, m_Zero()) &&
962 Op0
->getType() == CI
.getType()) {
963 const Type
*EltTy
= VTy
->getElementType();
965 // splat the shift constant to a constant vector.
966 Constant
*VSh
= ConstantInt::get(VTy
, EltTy
->getScalarSizeInBits()-1);
967 Value
*In
= Builder
->CreateAShr(Op0
, VSh
, Op0
->getName()+".lobit");
968 return ReplaceInstUsesWith(CI
, In
);
975 /// CanEvaluateSExtd - Return true if we can take the specified value
976 /// and return it as type Ty without inserting any new casts and without
977 /// changing the value of the common low bits. This is used by code that tries
978 /// to promote integer operations to a wider types will allow us to eliminate
981 /// This function works on both vectors and scalars.
983 static bool CanEvaluateSExtd(Value
*V
, const Type
*Ty
) {
984 assert(V
->getType()->getScalarSizeInBits() < Ty
->getScalarSizeInBits() &&
985 "Can't sign extend type to a smaller type");
986 // If this is a constant, it can be trivially promoted.
987 if (isa
<Constant
>(V
))
990 Instruction
*I
= dyn_cast
<Instruction
>(V
);
991 if (!I
) return false;
993 // If this is a truncate from the dest type, we can trivially eliminate it,
994 // even if it has multiple uses.
995 // FIXME: This is currently disabled until codegen can handle this without
996 // pessimizing code, PR5997.
997 if (0 && isa
<TruncInst
>(I
) && I
->getOperand(0)->getType() == Ty
)
1000 // We can't extend or shrink something that has multiple uses: doing so would
1001 // require duplicating the instruction in general, which isn't profitable.
1002 if (!I
->hasOneUse()) return false;
1004 switch (I
->getOpcode()) {
1005 case Instruction::SExt
: // sext(sext(x)) -> sext(x)
1006 case Instruction::ZExt
: // sext(zext(x)) -> zext(x)
1007 case Instruction::Trunc
: // sext(trunc(x)) -> trunc(x) or sext(x)
1009 case Instruction::And
:
1010 case Instruction::Or
:
1011 case Instruction::Xor
:
1012 case Instruction::Add
:
1013 case Instruction::Sub
:
1014 case Instruction::Mul
:
1015 // These operators can all arbitrarily be extended if their inputs can.
1016 return CanEvaluateSExtd(I
->getOperand(0), Ty
) &&
1017 CanEvaluateSExtd(I
->getOperand(1), Ty
);
1019 //case Instruction::Shl: TODO
1020 //case Instruction::LShr: TODO
1022 case Instruction::Select
:
1023 return CanEvaluateSExtd(I
->getOperand(1), Ty
) &&
1024 CanEvaluateSExtd(I
->getOperand(2), Ty
);
1026 case Instruction::PHI
: {
1027 // We can change a phi if we can change all operands. Note that we never
1028 // get into trouble with cyclic PHIs here because we only consider
1029 // instructions with a single use.
1030 PHINode
*PN
= cast
<PHINode
>(I
);
1031 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1032 if (!CanEvaluateSExtd(PN
->getIncomingValue(i
), Ty
)) return false;
1036 // TODO: Can handle more cases here.
1043 Instruction
*InstCombiner::visitSExt(SExtInst
&CI
) {
1044 // If this sign extend is only used by a truncate, let the truncate by
1045 // eliminated before we try to optimize this zext.
1046 if (CI
.hasOneUse() && isa
<TruncInst
>(CI
.use_back()))
1049 if (Instruction
*I
= commonCastTransforms(CI
))
1052 // See if we can simplify any instructions used by the input whose sole
1053 // purpose is to compute bits we don't care about.
1054 if (SimplifyDemandedInstructionBits(CI
))
1057 Value
*Src
= CI
.getOperand(0);
1058 const Type
*SrcTy
= Src
->getType(), *DestTy
= CI
.getType();
1060 // Attempt to extend the entire input expression tree to the destination
1061 // type. Only do this if the dest type is a simple type, don't convert the
1062 // expression tree to something weird like i93 unless the source is also
1064 if ((DestTy
->isVectorTy() || ShouldChangeType(SrcTy
, DestTy
)) &&
1065 CanEvaluateSExtd(Src
, DestTy
)) {
1066 // Okay, we can transform this! Insert the new expression now.
1067 DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1068 " to avoid sign extend: " << CI
);
1069 Value
*Res
= EvaluateInDifferentType(Src
, DestTy
, true);
1070 assert(Res
->getType() == DestTy
);
1072 uint32_t SrcBitSize
= SrcTy
->getScalarSizeInBits();
1073 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1075 // If the high bits are already filled with sign bit, just replace this
1076 // cast with the result.
1077 if (ComputeNumSignBits(Res
) > DestBitSize
- SrcBitSize
)
1078 return ReplaceInstUsesWith(CI
, Res
);
1080 // We need to emit a shl + ashr to do the sign extend.
1081 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1082 return BinaryOperator::CreateAShr(Builder
->CreateShl(Res
, ShAmt
, "sext"),
1086 // If this input is a trunc from our destination, then turn sext(trunc(x))
1088 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(Src
))
1089 if (TI
->hasOneUse() && TI
->getOperand(0)->getType() == DestTy
) {
1090 uint32_t SrcBitSize
= SrcTy
->getScalarSizeInBits();
1091 uint32_t DestBitSize
= DestTy
->getScalarSizeInBits();
1093 // We need to emit a shl + ashr to do the sign extend.
1094 Value
*ShAmt
= ConstantInt::get(DestTy
, DestBitSize
-SrcBitSize
);
1095 Value
*Res
= Builder
->CreateShl(TI
->getOperand(0), ShAmt
, "sext");
1096 return BinaryOperator::CreateAShr(Res
, ShAmt
);
1099 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Src
))
1100 return transformSExtICmp(ICI
, CI
);
1102 // If the input is a shl/ashr pair of a same constant, then this is a sign
1103 // extension from a smaller value. If we could trust arbitrary bitwidth
1104 // integers, we could turn this into a truncate to the smaller bit and then
1105 // use a sext for the whole extension. Since we don't, look deeper and check
1106 // for a truncate. If the source and dest are the same type, eliminate the
1107 // trunc and extend and just do shifts. For example, turn:
1108 // %a = trunc i32 %i to i8
1109 // %b = shl i8 %a, 6
1110 // %c = ashr i8 %b, 6
1111 // %d = sext i8 %c to i32
1113 // %a = shl i32 %i, 30
1114 // %d = ashr i32 %a, 30
1116 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1117 ConstantInt
*BA
= 0, *CA
= 0;
1118 if (match(Src
, m_AShr(m_Shl(m_Trunc(m_Value(A
)), m_ConstantInt(BA
)),
1119 m_ConstantInt(CA
))) &&
1120 BA
== CA
&& A
->getType() == CI
.getType()) {
1121 unsigned MidSize
= Src
->getType()->getScalarSizeInBits();
1122 unsigned SrcDstSize
= CI
.getType()->getScalarSizeInBits();
1123 unsigned ShAmt
= CA
->getZExtValue()+SrcDstSize
-MidSize
;
1124 Constant
*ShAmtV
= ConstantInt::get(CI
.getType(), ShAmt
);
1125 A
= Builder
->CreateShl(A
, ShAmtV
, CI
.getName());
1126 return BinaryOperator::CreateAShr(A
, ShAmtV
);
1133 /// FitsInFPType - Return a Constant* for the specified FP constant if it fits
1134 /// in the specified FP type without changing its value.
1135 static Constant
*FitsInFPType(ConstantFP
*CFP
, const fltSemantics
&Sem
) {
1137 APFloat F
= CFP
->getValueAPF();
1138 (void)F
.convert(Sem
, APFloat::rmNearestTiesToEven
, &losesInfo
);
1140 return ConstantFP::get(CFP
->getContext(), F
);
1144 /// LookThroughFPExtensions - If this is an fp extension instruction, look
1145 /// through it until we get the source value.
1146 static Value
*LookThroughFPExtensions(Value
*V
) {
1147 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1148 if (I
->getOpcode() == Instruction::FPExt
)
1149 return LookThroughFPExtensions(I
->getOperand(0));
1151 // If this value is a constant, return the constant in the smallest FP type
1152 // that can accurately represent it. This allows us to turn
1153 // (float)((double)X+2.0) into x+2.0f.
1154 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(V
)) {
1155 if (CFP
->getType() == Type::getPPC_FP128Ty(V
->getContext()))
1156 return V
; // No constant folding of this.
1157 // See if the value can be truncated to float and then reextended.
1158 if (Value
*V
= FitsInFPType(CFP
, APFloat::IEEEsingle
))
1160 if (CFP
->getType()->isDoubleTy())
1161 return V
; // Won't shrink.
1162 if (Value
*V
= FitsInFPType(CFP
, APFloat::IEEEdouble
))
1164 // Don't try to shrink to various long double types.
1170 Instruction
*InstCombiner::visitFPTrunc(FPTruncInst
&CI
) {
1171 if (Instruction
*I
= commonCastTransforms(CI
))
1174 // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
1175 // smaller than the destination type, we can eliminate the truncate by doing
1176 // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
1177 // as many builtins (sqrt, etc).
1178 BinaryOperator
*OpI
= dyn_cast
<BinaryOperator
>(CI
.getOperand(0));
1179 if (OpI
&& OpI
->hasOneUse()) {
1180 switch (OpI
->getOpcode()) {
1182 case Instruction::FAdd
:
1183 case Instruction::FSub
:
1184 case Instruction::FMul
:
1185 case Instruction::FDiv
:
1186 case Instruction::FRem
:
1187 const Type
*SrcTy
= OpI
->getType();
1188 Value
*LHSTrunc
= LookThroughFPExtensions(OpI
->getOperand(0));
1189 Value
*RHSTrunc
= LookThroughFPExtensions(OpI
->getOperand(1));
1190 if (LHSTrunc
->getType() != SrcTy
&&
1191 RHSTrunc
->getType() != SrcTy
) {
1192 unsigned DstSize
= CI
.getType()->getScalarSizeInBits();
1193 // If the source types were both smaller than the destination type of
1194 // the cast, do this xform.
1195 if (LHSTrunc
->getType()->getScalarSizeInBits() <= DstSize
&&
1196 RHSTrunc
->getType()->getScalarSizeInBits() <= DstSize
) {
1197 LHSTrunc
= Builder
->CreateFPExt(LHSTrunc
, CI
.getType());
1198 RHSTrunc
= Builder
->CreateFPExt(RHSTrunc
, CI
.getType());
1199 return BinaryOperator::Create(OpI
->getOpcode(), LHSTrunc
, RHSTrunc
);
1206 // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
1207 // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
1208 CallInst
*Call
= dyn_cast
<CallInst
>(CI
.getOperand(0));
1209 if (Call
&& Call
->getCalledFunction() &&
1210 Call
->getCalledFunction()->getName() == "sqrt" &&
1211 Call
->getNumArgOperands() == 1) {
1212 CastInst
*Arg
= dyn_cast
<CastInst
>(Call
->getArgOperand(0));
1213 if (Arg
&& Arg
->getOpcode() == Instruction::FPExt
&&
1214 CI
.getType()->isFloatTy() &&
1215 Call
->getType()->isDoubleTy() &&
1216 Arg
->getType()->isDoubleTy() &&
1217 Arg
->getOperand(0)->getType()->isFloatTy()) {
1218 Function
*Callee
= Call
->getCalledFunction();
1219 Module
*M
= CI
.getParent()->getParent()->getParent();
1220 Constant
*SqrtfFunc
= M
->getOrInsertFunction("sqrtf",
1221 Callee
->getAttributes(),
1222 Builder
->getFloatTy(),
1223 Builder
->getFloatTy(),
1225 CallInst
*ret
= CallInst::Create(SqrtfFunc
, Arg
->getOperand(0),
1227 ret
->setAttributes(Callee
->getAttributes());
1230 // Remove the old Call. With -fmath-errno, it won't get marked readnone.
1231 Call
->replaceAllUsesWith(UndefValue::get(Call
->getType()));
1232 EraseInstFromFunction(*Call
);
1240 Instruction
*InstCombiner::visitFPExt(CastInst
&CI
) {
1241 return commonCastTransforms(CI
);
1244 Instruction
*InstCombiner::visitFPToUI(FPToUIInst
&FI
) {
1245 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1247 return commonCastTransforms(FI
);
1249 // fptoui(uitofp(X)) --> X
1250 // fptoui(sitofp(X)) --> X
1251 // This is safe if the intermediate type has enough bits in its mantissa to
1252 // accurately represent all values of X. For example, do not do this with
1253 // i64->float->i64. This is also safe for sitofp case, because any negative
1254 // 'X' value would cause an undefined result for the fptoui.
1255 if ((isa
<UIToFPInst
>(OpI
) || isa
<SIToFPInst
>(OpI
)) &&
1256 OpI
->getOperand(0)->getType() == FI
.getType() &&
1257 (int)FI
.getType()->getScalarSizeInBits() < /*extra bit for sign */
1258 OpI
->getType()->getFPMantissaWidth())
1259 return ReplaceInstUsesWith(FI
, OpI
->getOperand(0));
1261 return commonCastTransforms(FI
);
1264 Instruction
*InstCombiner::visitFPToSI(FPToSIInst
&FI
) {
1265 Instruction
*OpI
= dyn_cast
<Instruction
>(FI
.getOperand(0));
1267 return commonCastTransforms(FI
);
1269 // fptosi(sitofp(X)) --> X
1270 // fptosi(uitofp(X)) --> X
1271 // This is safe if the intermediate type has enough bits in its mantissa to
1272 // accurately represent all values of X. For example, do not do this with
1273 // i64->float->i64. This is also safe for sitofp case, because any negative
1274 // 'X' value would cause an undefined result for the fptoui.
1275 if ((isa
<UIToFPInst
>(OpI
) || isa
<SIToFPInst
>(OpI
)) &&
1276 OpI
->getOperand(0)->getType() == FI
.getType() &&
1277 (int)FI
.getType()->getScalarSizeInBits() <=
1278 OpI
->getType()->getFPMantissaWidth())
1279 return ReplaceInstUsesWith(FI
, OpI
->getOperand(0));
1281 return commonCastTransforms(FI
);
1284 Instruction
*InstCombiner::visitUIToFP(CastInst
&CI
) {
1285 return commonCastTransforms(CI
);
1288 Instruction
*InstCombiner::visitSIToFP(CastInst
&CI
) {
1289 return commonCastTransforms(CI
);
1292 Instruction
*InstCombiner::visitIntToPtr(IntToPtrInst
&CI
) {
1293 // If the source integer type is not the intptr_t type for this target, do a
1294 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1295 // cast to be exposed to other transforms.
1297 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() >
1298 TD
->getPointerSizeInBits()) {
1299 Value
*P
= Builder
->CreateTrunc(CI
.getOperand(0),
1300 TD
->getIntPtrType(CI
.getContext()), "tmp");
1301 return new IntToPtrInst(P
, CI
.getType());
1303 if (CI
.getOperand(0)->getType()->getScalarSizeInBits() <
1304 TD
->getPointerSizeInBits()) {
1305 Value
*P
= Builder
->CreateZExt(CI
.getOperand(0),
1306 TD
->getIntPtrType(CI
.getContext()), "tmp");
1307 return new IntToPtrInst(P
, CI
.getType());
1311 if (Instruction
*I
= commonCastTransforms(CI
))
1317 /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
1318 Instruction
*InstCombiner::commonPointerCastTransforms(CastInst
&CI
) {
1319 Value
*Src
= CI
.getOperand(0);
1321 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Src
)) {
1322 // If casting the result of a getelementptr instruction with no offset, turn
1323 // this into a cast of the original pointer!
1324 if (GEP
->hasAllZeroIndices()) {
1325 // Changing the cast operand is usually not a good idea but it is safe
1326 // here because the pointer operand is being replaced with another
1327 // pointer operand so the opcode doesn't need to change.
1329 CI
.setOperand(0, GEP
->getOperand(0));
1333 // If the GEP has a single use, and the base pointer is a bitcast, and the
1334 // GEP computes a constant offset, see if we can convert these three
1335 // instructions into fewer. This typically happens with unions and other
1336 // non-type-safe code.
1337 if (TD
&& GEP
->hasOneUse() && isa
<BitCastInst
>(GEP
->getOperand(0)) &&
1338 GEP
->hasAllConstantIndices()) {
1339 // We are guaranteed to get a constant from EmitGEPOffset.
1340 ConstantInt
*OffsetV
= cast
<ConstantInt
>(EmitGEPOffset(GEP
));
1341 int64_t Offset
= OffsetV
->getSExtValue();
1343 // Get the base pointer input of the bitcast, and the type it points to.
1344 Value
*OrigBase
= cast
<BitCastInst
>(GEP
->getOperand(0))->getOperand(0);
1345 const Type
*GEPIdxTy
=
1346 cast
<PointerType
>(OrigBase
->getType())->getElementType();
1347 SmallVector
<Value
*, 8> NewIndices
;
1348 if (FindElementAtOffset(GEPIdxTy
, Offset
, NewIndices
)) {
1349 // If we were able to index down into an element, create the GEP
1350 // and bitcast the result. This eliminates one bitcast, potentially
1352 Value
*NGEP
= cast
<GEPOperator
>(GEP
)->isInBounds() ?
1353 Builder
->CreateInBoundsGEP(OrigBase
,
1354 NewIndices
.begin(), NewIndices
.end()) :
1355 Builder
->CreateGEP(OrigBase
, NewIndices
.begin(), NewIndices
.end());
1356 NGEP
->takeName(GEP
);
1358 if (isa
<BitCastInst
>(CI
))
1359 return new BitCastInst(NGEP
, CI
.getType());
1360 assert(isa
<PtrToIntInst
>(CI
));
1361 return new PtrToIntInst(NGEP
, CI
.getType());
1366 return commonCastTransforms(CI
);
1369 Instruction
*InstCombiner::visitPtrToInt(PtrToIntInst
&CI
) {
1370 // If the destination integer type is not the intptr_t type for this target,
1371 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1372 // to be exposed to other transforms.
1374 if (CI
.getType()->getScalarSizeInBits() < TD
->getPointerSizeInBits()) {
1375 Value
*P
= Builder
->CreatePtrToInt(CI
.getOperand(0),
1376 TD
->getIntPtrType(CI
.getContext()),
1378 return new TruncInst(P
, CI
.getType());
1380 if (CI
.getType()->getScalarSizeInBits() > TD
->getPointerSizeInBits()) {
1381 Value
*P
= Builder
->CreatePtrToInt(CI
.getOperand(0),
1382 TD
->getIntPtrType(CI
.getContext()),
1384 return new ZExtInst(P
, CI
.getType());
1388 return commonPointerCastTransforms(CI
);
1391 /// OptimizeVectorResize - This input value (which is known to have vector type)
1392 /// is being zero extended or truncated to the specified vector type. Try to
1393 /// replace it with a shuffle (and vector/vector bitcast) if possible.
1395 /// The source and destination vector types may have different element types.
1396 static Instruction
*OptimizeVectorResize(Value
*InVal
, const VectorType
*DestTy
,
1398 // We can only do this optimization if the output is a multiple of the input
1399 // element size, or the input is a multiple of the output element size.
1400 // Convert the input type to have the same element type as the output.
1401 const VectorType
*SrcTy
= cast
<VectorType
>(InVal
->getType());
1403 if (SrcTy
->getElementType() != DestTy
->getElementType()) {
1404 // The input types don't need to be identical, but for now they must be the
1405 // same size. There is no specific reason we couldn't handle things like
1406 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
1408 if (SrcTy
->getElementType()->getPrimitiveSizeInBits() !=
1409 DestTy
->getElementType()->getPrimitiveSizeInBits())
1412 SrcTy
= VectorType::get(DestTy
->getElementType(), SrcTy
->getNumElements());
1413 InVal
= IC
.Builder
->CreateBitCast(InVal
, SrcTy
);
1416 // Now that the element types match, get the shuffle mask and RHS of the
1417 // shuffle to use, which depends on whether we're increasing or decreasing the
1418 // size of the input.
1419 SmallVector
<Constant
*, 16> ShuffleMask
;
1421 const IntegerType
*Int32Ty
= Type::getInt32Ty(SrcTy
->getContext());
1423 if (SrcTy
->getNumElements() > DestTy
->getNumElements()) {
1424 // If we're shrinking the number of elements, just shuffle in the low
1425 // elements from the input and use undef as the second shuffle input.
1426 V2
= UndefValue::get(SrcTy
);
1427 for (unsigned i
= 0, e
= DestTy
->getNumElements(); i
!= e
; ++i
)
1428 ShuffleMask
.push_back(ConstantInt::get(Int32Ty
, i
));
1431 // If we're increasing the number of elements, shuffle in all of the
1432 // elements from InVal and fill the rest of the result elements with zeros
1433 // from a constant zero.
1434 V2
= Constant::getNullValue(SrcTy
);
1435 unsigned SrcElts
= SrcTy
->getNumElements();
1436 for (unsigned i
= 0, e
= SrcElts
; i
!= e
; ++i
)
1437 ShuffleMask
.push_back(ConstantInt::get(Int32Ty
, i
));
1439 // The excess elements reference the first element of the zero input.
1440 ShuffleMask
.append(DestTy
->getNumElements()-SrcElts
,
1441 ConstantInt::get(Int32Ty
, SrcElts
));
1444 return new ShuffleVectorInst(InVal
, V2
, ConstantVector::get(ShuffleMask
));
1447 static bool isMultipleOfTypeSize(unsigned Value
, const Type
*Ty
) {
1448 return Value
% Ty
->getPrimitiveSizeInBits() == 0;
1451 static unsigned getTypeSizeIndex(unsigned Value
, const Type
*Ty
) {
1452 return Value
/ Ty
->getPrimitiveSizeInBits();
1455 /// CollectInsertionElements - V is a value which is inserted into a vector of
1456 /// VecEltTy. Look through the value to see if we can decompose it into
1457 /// insertions into the vector. See the example in the comment for
1458 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
1459 /// The type of V is always a non-zero multiple of VecEltTy's size.
1461 /// This returns false if the pattern can't be matched or true if it can,
1462 /// filling in Elements with the elements found here.
1463 static bool CollectInsertionElements(Value
*V
, unsigned ElementIndex
,
1464 SmallVectorImpl
<Value
*> &Elements
,
1465 const Type
*VecEltTy
) {
1466 // Undef values never contribute useful bits to the result.
1467 if (isa
<UndefValue
>(V
)) return true;
1469 // If we got down to a value of the right type, we win, try inserting into the
1471 if (V
->getType() == VecEltTy
) {
1472 // Inserting null doesn't actually insert any elements.
1473 if (Constant
*C
= dyn_cast
<Constant
>(V
))
1474 if (C
->isNullValue())
1477 // Fail if multiple elements are inserted into this slot.
1478 if (ElementIndex
>= Elements
.size() || Elements
[ElementIndex
] != 0)
1481 Elements
[ElementIndex
] = V
;
1485 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
1486 // Figure out the # elements this provides, and bitcast it or slice it up
1488 unsigned NumElts
= getTypeSizeIndex(C
->getType()->getPrimitiveSizeInBits(),
1490 // If the constant is the size of a vector element, we just need to bitcast
1491 // it to the right type so it gets properly inserted.
1493 return CollectInsertionElements(ConstantExpr::getBitCast(C
, VecEltTy
),
1494 ElementIndex
, Elements
, VecEltTy
);
1496 // Okay, this is a constant that covers multiple elements. Slice it up into
1497 // pieces and insert each element-sized piece into the vector.
1498 if (!isa
<IntegerType
>(C
->getType()))
1499 C
= ConstantExpr::getBitCast(C
, IntegerType::get(V
->getContext(),
1500 C
->getType()->getPrimitiveSizeInBits()));
1501 unsigned ElementSize
= VecEltTy
->getPrimitiveSizeInBits();
1502 const Type
*ElementIntTy
= IntegerType::get(C
->getContext(), ElementSize
);
1504 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1505 Constant
*Piece
= ConstantExpr::getLShr(C
, ConstantInt::get(C
->getType(),
1507 Piece
= ConstantExpr::getTrunc(Piece
, ElementIntTy
);
1508 if (!CollectInsertionElements(Piece
, ElementIndex
+i
, Elements
, VecEltTy
))
1514 if (!V
->hasOneUse()) return false;
1516 Instruction
*I
= dyn_cast
<Instruction
>(V
);
1517 if (I
== 0) return false;
1518 switch (I
->getOpcode()) {
1519 default: return false; // Unhandled case.
1520 case Instruction::BitCast
:
1521 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1522 Elements
, VecEltTy
);
1523 case Instruction::ZExt
:
1524 if (!isMultipleOfTypeSize(
1525 I
->getOperand(0)->getType()->getPrimitiveSizeInBits(),
1528 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1529 Elements
, VecEltTy
);
1530 case Instruction::Or
:
1531 return CollectInsertionElements(I
->getOperand(0), ElementIndex
,
1532 Elements
, VecEltTy
) &&
1533 CollectInsertionElements(I
->getOperand(1), ElementIndex
,
1534 Elements
, VecEltTy
);
1535 case Instruction::Shl
: {
1536 // Must be shifting by a constant that is a multiple of the element size.
1537 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(I
->getOperand(1));
1538 if (CI
== 0) return false;
1539 if (!isMultipleOfTypeSize(CI
->getZExtValue(), VecEltTy
)) return false;
1540 unsigned IndexShift
= getTypeSizeIndex(CI
->getZExtValue(), VecEltTy
);
1542 return CollectInsertionElements(I
->getOperand(0), ElementIndex
+IndexShift
,
1543 Elements
, VecEltTy
);
1550 /// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
1551 /// may be doing shifts and ors to assemble the elements of the vector manually.
1552 /// Try to rip the code out and replace it with insertelements. This is to
1553 /// optimize code like this:
1555 /// %tmp37 = bitcast float %inc to i32
1556 /// %tmp38 = zext i32 %tmp37 to i64
1557 /// %tmp31 = bitcast float %inc5 to i32
1558 /// %tmp32 = zext i32 %tmp31 to i64
1559 /// %tmp33 = shl i64 %tmp32, 32
1560 /// %ins35 = or i64 %tmp33, %tmp38
1561 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
1563 /// Into two insertelements that do "buildvector{%inc, %inc5}".
1564 static Value
*OptimizeIntegerToVectorInsertions(BitCastInst
&CI
,
1566 const VectorType
*DestVecTy
= cast
<VectorType
>(CI
.getType());
1567 Value
*IntInput
= CI
.getOperand(0);
1569 SmallVector
<Value
*, 8> Elements(DestVecTy
->getNumElements());
1570 if (!CollectInsertionElements(IntInput
, 0, Elements
,
1571 DestVecTy
->getElementType()))
1574 // If we succeeded, we know that all of the element are specified by Elements
1575 // or are zero if Elements has a null entry. Recast this as a set of
1577 Value
*Result
= Constant::getNullValue(CI
.getType());
1578 for (unsigned i
= 0, e
= Elements
.size(); i
!= e
; ++i
) {
1579 if (Elements
[i
] == 0) continue; // Unset element.
1581 Result
= IC
.Builder
->CreateInsertElement(Result
, Elements
[i
],
1582 IC
.Builder
->getInt32(i
));
1589 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
1590 /// bitcast. The various long double bitcasts can't get in here.
1591 static Instruction
*OptimizeIntToFloatBitCast(BitCastInst
&CI
,InstCombiner
&IC
){
1592 Value
*Src
= CI
.getOperand(0);
1593 const Type
*DestTy
= CI
.getType();
1595 // If this is a bitcast from int to float, check to see if the int is an
1596 // extraction from a vector.
1597 Value
*VecInput
= 0;
1598 // bitcast(trunc(bitcast(somevector)))
1599 if (match(Src
, m_Trunc(m_BitCast(m_Value(VecInput
)))) &&
1600 isa
<VectorType
>(VecInput
->getType())) {
1601 const VectorType
*VecTy
= cast
<VectorType
>(VecInput
->getType());
1602 unsigned DestWidth
= DestTy
->getPrimitiveSizeInBits();
1604 if (VecTy
->getPrimitiveSizeInBits() % DestWidth
== 0) {
1605 // If the element type of the vector doesn't match the result type,
1606 // bitcast it to be a vector type we can extract from.
1607 if (VecTy
->getElementType() != DestTy
) {
1608 VecTy
= VectorType::get(DestTy
,
1609 VecTy
->getPrimitiveSizeInBits() / DestWidth
);
1610 VecInput
= IC
.Builder
->CreateBitCast(VecInput
, VecTy
);
1613 return ExtractElementInst::Create(VecInput
, IC
.Builder
->getInt32(0));
1617 // bitcast(trunc(lshr(bitcast(somevector), cst))
1618 ConstantInt
*ShAmt
= 0;
1619 if (match(Src
, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput
)),
1620 m_ConstantInt(ShAmt
)))) &&
1621 isa
<VectorType
>(VecInput
->getType())) {
1622 const VectorType
*VecTy
= cast
<VectorType
>(VecInput
->getType());
1623 unsigned DestWidth
= DestTy
->getPrimitiveSizeInBits();
1624 if (VecTy
->getPrimitiveSizeInBits() % DestWidth
== 0 &&
1625 ShAmt
->getZExtValue() % DestWidth
== 0) {
1626 // If the element type of the vector doesn't match the result type,
1627 // bitcast it to be a vector type we can extract from.
1628 if (VecTy
->getElementType() != DestTy
) {
1629 VecTy
= VectorType::get(DestTy
,
1630 VecTy
->getPrimitiveSizeInBits() / DestWidth
);
1631 VecInput
= IC
.Builder
->CreateBitCast(VecInput
, VecTy
);
1634 unsigned Elt
= ShAmt
->getZExtValue() / DestWidth
;
1635 return ExtractElementInst::Create(VecInput
, IC
.Builder
->getInt32(Elt
));
1641 Instruction
*InstCombiner::visitBitCast(BitCastInst
&CI
) {
1642 // If the operands are integer typed then apply the integer transforms,
1643 // otherwise just apply the common ones.
1644 Value
*Src
= CI
.getOperand(0);
1645 const Type
*SrcTy
= Src
->getType();
1646 const Type
*DestTy
= CI
.getType();
1648 // Get rid of casts from one type to the same type. These are useless and can
1649 // be replaced by the operand.
1650 if (DestTy
== Src
->getType())
1651 return ReplaceInstUsesWith(CI
, Src
);
1653 if (const PointerType
*DstPTy
= dyn_cast
<PointerType
>(DestTy
)) {
1654 const PointerType
*SrcPTy
= cast
<PointerType
>(SrcTy
);
1655 const Type
*DstElTy
= DstPTy
->getElementType();
1656 const Type
*SrcElTy
= SrcPTy
->getElementType();
1658 // If the address spaces don't match, don't eliminate the bitcast, which is
1659 // required for changing types.
1660 if (SrcPTy
->getAddressSpace() != DstPTy
->getAddressSpace())
1663 // If we are casting a alloca to a pointer to a type of the same
1664 // size, rewrite the allocation instruction to allocate the "right" type.
1665 // There is no need to modify malloc calls because it is their bitcast that
1666 // needs to be cleaned up.
1667 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Src
))
1668 if (Instruction
*V
= PromoteCastOfAllocation(CI
, *AI
))
1671 // If the source and destination are pointers, and this cast is equivalent
1672 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
1673 // This can enhance SROA and other transforms that want type-safe pointers.
1674 Constant
*ZeroUInt
=
1675 Constant::getNullValue(Type::getInt32Ty(CI
.getContext()));
1676 unsigned NumZeros
= 0;
1677 while (SrcElTy
!= DstElTy
&&
1678 isa
<CompositeType
>(SrcElTy
) && !SrcElTy
->isPointerTy() &&
1679 SrcElTy
->getNumContainedTypes() /* not "{}" */) {
1680 SrcElTy
= cast
<CompositeType
>(SrcElTy
)->getTypeAtIndex(ZeroUInt
);
1684 // If we found a path from the src to dest, create the getelementptr now.
1685 if (SrcElTy
== DstElTy
) {
1686 SmallVector
<Value
*, 8> Idxs(NumZeros
+1, ZeroUInt
);
1687 return GetElementPtrInst::CreateInBounds(Src
, Idxs
.begin(), Idxs
.end(),"",
1688 ((Instruction
*)NULL
));
1692 // Try to optimize int -> float bitcasts.
1693 if ((DestTy
->isFloatTy() || DestTy
->isDoubleTy()) && isa
<IntegerType
>(SrcTy
))
1694 if (Instruction
*I
= OptimizeIntToFloatBitCast(CI
, *this))
1697 if (const VectorType
*DestVTy
= dyn_cast
<VectorType
>(DestTy
)) {
1698 if (DestVTy
->getNumElements() == 1 && !SrcTy
->isVectorTy()) {
1699 Value
*Elem
= Builder
->CreateBitCast(Src
, DestVTy
->getElementType());
1700 return InsertElementInst::Create(UndefValue::get(DestTy
), Elem
,
1701 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
1702 // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
1705 if (isa
<IntegerType
>(SrcTy
)) {
1706 // If this is a cast from an integer to vector, check to see if the input
1707 // is a trunc or zext of a bitcast from vector. If so, we can replace all
1708 // the casts with a shuffle and (potentially) a bitcast.
1709 if (isa
<TruncInst
>(Src
) || isa
<ZExtInst
>(Src
)) {
1710 CastInst
*SrcCast
= cast
<CastInst
>(Src
);
1711 if (BitCastInst
*BCIn
= dyn_cast
<BitCastInst
>(SrcCast
->getOperand(0)))
1712 if (isa
<VectorType
>(BCIn
->getOperand(0)->getType()))
1713 if (Instruction
*I
= OptimizeVectorResize(BCIn
->getOperand(0),
1714 cast
<VectorType
>(DestTy
), *this))
1718 // If the input is an 'or' instruction, we may be doing shifts and ors to
1719 // assemble the elements of the vector manually. Try to rip the code out
1720 // and replace it with insertelements.
1721 if (Value
*V
= OptimizeIntegerToVectorInsertions(CI
, *this))
1722 return ReplaceInstUsesWith(CI
, V
);
1726 if (const VectorType
*SrcVTy
= dyn_cast
<VectorType
>(SrcTy
)) {
1727 if (SrcVTy
->getNumElements() == 1 && !DestTy
->isVectorTy()) {
1729 Builder
->CreateExtractElement(Src
,
1730 Constant::getNullValue(Type::getInt32Ty(CI
.getContext())));
1731 return CastInst::Create(Instruction::BitCast
, Elem
, DestTy
);
1735 if (ShuffleVectorInst
*SVI
= dyn_cast
<ShuffleVectorInst
>(Src
)) {
1736 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
1737 // a bitcast to a vector with the same # elts.
1738 if (SVI
->hasOneUse() && DestTy
->isVectorTy() &&
1739 cast
<VectorType
>(DestTy
)->getNumElements() ==
1740 SVI
->getType()->getNumElements() &&
1741 SVI
->getType()->getNumElements() ==
1742 cast
<VectorType
>(SVI
->getOperand(0)->getType())->getNumElements()) {
1744 // If either of the operands is a cast from CI.getType(), then
1745 // evaluating the shuffle in the casted destination's type will allow
1746 // us to eliminate at least one cast.
1747 if (((Tmp
= dyn_cast
<BitCastInst
>(SVI
->getOperand(0))) &&
1748 Tmp
->getOperand(0)->getType() == DestTy
) ||
1749 ((Tmp
= dyn_cast
<BitCastInst
>(SVI
->getOperand(1))) &&
1750 Tmp
->getOperand(0)->getType() == DestTy
)) {
1751 Value
*LHS
= Builder
->CreateBitCast(SVI
->getOperand(0), DestTy
);
1752 Value
*RHS
= Builder
->CreateBitCast(SVI
->getOperand(1), DestTy
);
1753 // Return a new shuffle vector. Use the same element ID's, as we
1754 // know the vector types match #elts.
1755 return new ShuffleVectorInst(LHS
, RHS
, SVI
->getOperand(2));
1760 if (SrcTy
->isPointerTy())
1761 return commonPointerCastTransforms(CI
);
1762 return commonCastTransforms(CI
);