1 //===- InstCombineCalls.cpp -----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visitCall and visitInvoke functions.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombine.h"
15 #include "llvm/IntrinsicInst.h"
16 #include "llvm/Support/CallSite.h"
17 #include "llvm/Target/TargetData.h"
18 #include "llvm/Analysis/MemoryBuiltins.h"
19 #include "llvm/Transforms/Utils/BuildLibCalls.h"
20 #include "llvm/Transforms/Utils/Local.h"
23 /// getPromotedType - Return the specified type promoted as it would be to pass
24 /// though a va_arg area.
25 static const Type
*getPromotedType(const Type
*Ty
) {
26 if (const IntegerType
* ITy
= dyn_cast
<IntegerType
>(Ty
)) {
27 if (ITy
->getBitWidth() < 32)
28 return Type::getInt32Ty(Ty
->getContext());
34 Instruction
*InstCombiner::SimplifyMemTransfer(MemIntrinsic
*MI
) {
35 unsigned DstAlign
= getKnownAlignment(MI
->getArgOperand(0), TD
);
36 unsigned SrcAlign
= getKnownAlignment(MI
->getArgOperand(1), TD
);
37 unsigned MinAlign
= std::min(DstAlign
, SrcAlign
);
38 unsigned CopyAlign
= MI
->getAlignment();
40 if (CopyAlign
< MinAlign
) {
41 MI
->setAlignment(ConstantInt::get(MI
->getAlignmentType(),
46 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
48 ConstantInt
*MemOpLength
= dyn_cast
<ConstantInt
>(MI
->getArgOperand(2));
49 if (MemOpLength
== 0) return 0;
51 // Source and destination pointer types are always "i8*" for intrinsic. See
52 // if the size is something we can handle with a single primitive load/store.
53 // A single load+store correctly handles overlapping memory in the memmove
55 unsigned Size
= MemOpLength
->getZExtValue();
56 if (Size
== 0) return MI
; // Delete this mem transfer.
58 if (Size
> 8 || (Size
&(Size
-1)))
59 return 0; // If not 1/2/4/8 bytes, exit.
61 // Use an integer load+store unless we can find something better.
63 cast
<PointerType
>(MI
->getArgOperand(1)->getType())->getAddressSpace();
65 cast
<PointerType
>(MI
->getArgOperand(0)->getType())->getAddressSpace();
67 const IntegerType
* IntType
= IntegerType::get(MI
->getContext(), Size
<<3);
68 Type
*NewSrcPtrTy
= PointerType::get(IntType
, SrcAddrSp
);
69 Type
*NewDstPtrTy
= PointerType::get(IntType
, DstAddrSp
);
71 // Memcpy forces the use of i8* for the source and destination. That means
72 // that if you're using memcpy to move one double around, you'll get a cast
73 // from double* to i8*. We'd much rather use a double load+store rather than
74 // an i64 load+store, here because this improves the odds that the source or
75 // dest address will be promotable. See if we can find a better type than the
77 Value
*StrippedDest
= MI
->getArgOperand(0)->stripPointerCasts();
78 if (StrippedDest
!= MI
->getArgOperand(0)) {
79 const Type
*SrcETy
= cast
<PointerType
>(StrippedDest
->getType())
81 if (TD
&& SrcETy
->isSized() && TD
->getTypeStoreSize(SrcETy
) == Size
) {
82 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
83 // down through these levels if so.
84 while (!SrcETy
->isSingleValueType()) {
85 if (const StructType
*STy
= dyn_cast
<StructType
>(SrcETy
)) {
86 if (STy
->getNumElements() == 1)
87 SrcETy
= STy
->getElementType(0);
90 } else if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(SrcETy
)) {
91 if (ATy
->getNumElements() == 1)
92 SrcETy
= ATy
->getElementType();
99 if (SrcETy
->isSingleValueType()) {
100 NewSrcPtrTy
= PointerType::get(SrcETy
, SrcAddrSp
);
101 NewDstPtrTy
= PointerType::get(SrcETy
, DstAddrSp
);
107 // If the memcpy/memmove provides better alignment info than we can
109 SrcAlign
= std::max(SrcAlign
, CopyAlign
);
110 DstAlign
= std::max(DstAlign
, CopyAlign
);
112 Value
*Src
= Builder
->CreateBitCast(MI
->getArgOperand(1), NewSrcPtrTy
);
113 Value
*Dest
= Builder
->CreateBitCast(MI
->getArgOperand(0), NewDstPtrTy
);
114 LoadInst
*L
= Builder
->CreateLoad(Src
, MI
->isVolatile());
115 L
->setAlignment(SrcAlign
);
116 StoreInst
*S
= Builder
->CreateStore(L
, Dest
, MI
->isVolatile());
117 S
->setAlignment(DstAlign
);
119 // Set the size of the copy to 0, it will be deleted on the next iteration.
120 MI
->setArgOperand(2, Constant::getNullValue(MemOpLength
->getType()));
124 Instruction
*InstCombiner::SimplifyMemSet(MemSetInst
*MI
) {
125 unsigned Alignment
= getKnownAlignment(MI
->getDest(), TD
);
126 if (MI
->getAlignment() < Alignment
) {
127 MI
->setAlignment(ConstantInt::get(MI
->getAlignmentType(),
132 // Extract the length and alignment and fill if they are constant.
133 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(MI
->getLength());
134 ConstantInt
*FillC
= dyn_cast
<ConstantInt
>(MI
->getValue());
135 if (!LenC
|| !FillC
|| !FillC
->getType()->isIntegerTy(8))
137 uint64_t Len
= LenC
->getZExtValue();
138 Alignment
= MI
->getAlignment();
140 // If the length is zero, this is a no-op
141 if (Len
== 0) return MI
; // memset(d,c,0,a) -> noop
143 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
144 if (Len
<= 8 && isPowerOf2_32((uint32_t)Len
)) {
145 const Type
*ITy
= IntegerType::get(MI
->getContext(), Len
*8); // n=1 -> i8.
147 Value
*Dest
= MI
->getDest();
148 unsigned DstAddrSp
= cast
<PointerType
>(Dest
->getType())->getAddressSpace();
149 Type
*NewDstPtrTy
= PointerType::get(ITy
, DstAddrSp
);
150 Dest
= Builder
->CreateBitCast(Dest
, NewDstPtrTy
);
152 // Alignment 0 is identity for alignment 1 for memset, but not store.
153 if (Alignment
== 0) Alignment
= 1;
155 // Extract the fill value and store.
156 uint64_t Fill
= FillC
->getZExtValue()*0x0101010101010101ULL
;
157 StoreInst
*S
= Builder
->CreateStore(ConstantInt::get(ITy
, Fill
), Dest
,
159 S
->setAlignment(Alignment
);
161 // Set the size of the copy to 0, it will be deleted on the next iteration.
162 MI
->setLength(Constant::getNullValue(LenC
->getType()));
169 /// visitCallInst - CallInst simplification. This mostly only handles folding
170 /// of intrinsic instructions. For normal calls, it allows visitCallSite to do
171 /// the heavy lifting.
173 Instruction
*InstCombiner::visitCallInst(CallInst
&CI
) {
175 return visitFree(CI
);
177 return visitMalloc(CI
);
179 // If the caller function is nounwind, mark the call as nounwind, even if the
181 if (CI
.getParent()->getParent()->doesNotThrow() &&
182 !CI
.doesNotThrow()) {
183 CI
.setDoesNotThrow();
187 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&CI
);
188 if (!II
) return visitCallSite(&CI
);
190 // Intrinsics cannot occur in an invoke, so handle them here instead of in
192 if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(II
)) {
193 bool Changed
= false;
195 // memmove/cpy/set of zero bytes is a noop.
196 if (Constant
*NumBytes
= dyn_cast
<Constant
>(MI
->getLength())) {
197 if (NumBytes
->isNullValue())
198 return EraseInstFromFunction(CI
);
200 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(NumBytes
))
201 if (CI
->getZExtValue() == 1) {
202 // Replace the instruction with just byte operations. We would
203 // transform other cases to loads/stores, but we don't know if
204 // alignment is sufficient.
208 // No other transformations apply to volatile transfers.
209 if (MI
->isVolatile())
212 // If we have a memmove and the source operation is a constant global,
213 // then the source and dest pointers can't alias, so we can change this
214 // into a call to memcpy.
215 if (MemMoveInst
*MMI
= dyn_cast
<MemMoveInst
>(MI
)) {
216 if (GlobalVariable
*GVSrc
= dyn_cast
<GlobalVariable
>(MMI
->getSource()))
217 if (GVSrc
->isConstant()) {
218 Module
*M
= CI
.getParent()->getParent()->getParent();
219 Intrinsic::ID MemCpyID
= Intrinsic::memcpy
;
220 const Type
*Tys
[3] = { CI
.getArgOperand(0)->getType(),
221 CI
.getArgOperand(1)->getType(),
222 CI
.getArgOperand(2)->getType() };
223 CI
.setCalledFunction(Intrinsic::getDeclaration(M
, MemCpyID
, Tys
, 3));
228 if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(MI
)) {
229 // memmove(x,x,size) -> noop.
230 if (MTI
->getSource() == MTI
->getDest())
231 return EraseInstFromFunction(CI
);
234 // If we can determine a pointer alignment that is bigger than currently
235 // set, update the alignment.
236 if (isa
<MemTransferInst
>(MI
)) {
237 if (Instruction
*I
= SimplifyMemTransfer(MI
))
239 } else if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(MI
)) {
240 if (Instruction
*I
= SimplifyMemSet(MSI
))
244 if (Changed
) return II
;
247 switch (II
->getIntrinsicID()) {
249 case Intrinsic::objectsize
: {
250 // We need target data for just about everything so depend on it.
253 const Type
*ReturnTy
= CI
.getType();
254 uint64_t DontKnow
= II
->getArgOperand(1) == Builder
->getTrue() ? 0 : -1ULL;
256 // Get to the real allocated thing and offset as fast as possible.
257 Value
*Op1
= II
->getArgOperand(0)->stripPointerCasts();
260 uint64_t Size
= -1ULL;
262 // Try to look through constant GEPs.
263 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(Op1
)) {
264 if (!GEP
->hasAllConstantIndices()) break;
266 // Get the current byte offset into the thing. Use the original
267 // operand in case we're looking through a bitcast.
268 SmallVector
<Value
*, 8> Ops(GEP
->idx_begin(), GEP
->idx_end());
269 Offset
= TD
->getIndexedOffset(GEP
->getPointerOperandType(),
270 Ops
.data(), Ops
.size());
272 Op1
= GEP
->getPointerOperand()->stripPointerCasts();
274 // Make sure we're not a constant offset from an external
276 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Op1
))
277 if (!GV
->hasDefinitiveInitializer()) break;
280 // If we've stripped down to a single global variable that we
281 // can know the size of then just return that.
282 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Op1
)) {
283 if (GV
->hasDefinitiveInitializer()) {
284 Constant
*C
= GV
->getInitializer();
285 Size
= TD
->getTypeAllocSize(C
->getType());
287 // Can't determine size of the GV.
288 Constant
*RetVal
= ConstantInt::get(ReturnTy
, DontKnow
);
289 return ReplaceInstUsesWith(CI
, RetVal
);
291 } else if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Op1
)) {
293 if (AI
->getAllocatedType()->isSized()) {
294 Size
= TD
->getTypeAllocSize(AI
->getAllocatedType());
295 if (AI
->isArrayAllocation()) {
296 const ConstantInt
*C
= dyn_cast
<ConstantInt
>(AI
->getArraySize());
298 Size
*= C
->getZExtValue();
301 } else if (CallInst
*MI
= extractMallocCall(Op1
)) {
302 // Get allocation size.
303 const Type
* MallocType
= getMallocAllocatedType(MI
);
304 if (MallocType
&& MallocType
->isSized())
305 if (Value
*NElems
= getMallocArraySize(MI
, TD
, true))
306 if (ConstantInt
*NElements
= dyn_cast
<ConstantInt
>(NElems
))
307 Size
= NElements
->getZExtValue() * TD
->getTypeAllocSize(MallocType
);
310 // Do not return "I don't know" here. Later optimization passes could
311 // make it possible to evaluate objectsize to a constant.
316 // Out of bound reference? Negative index normalized to large
317 // index? Just return "I don't know".
318 return ReplaceInstUsesWith(CI
, ConstantInt::get(ReturnTy
, DontKnow
));
320 return ReplaceInstUsesWith(CI
, ConstantInt::get(ReturnTy
, Size
-Offset
));
322 case Intrinsic::bswap
:
323 // bswap(bswap(x)) -> x
324 if (IntrinsicInst
*Operand
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0)))
325 if (Operand
->getIntrinsicID() == Intrinsic::bswap
)
326 return ReplaceInstUsesWith(CI
, Operand
->getArgOperand(0));
328 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
329 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(II
->getArgOperand(0))) {
330 if (IntrinsicInst
*Operand
= dyn_cast
<IntrinsicInst
>(TI
->getOperand(0)))
331 if (Operand
->getIntrinsicID() == Intrinsic::bswap
) {
332 unsigned C
= Operand
->getType()->getPrimitiveSizeInBits() -
333 TI
->getType()->getPrimitiveSizeInBits();
334 Value
*CV
= ConstantInt::get(Operand
->getType(), C
);
335 Value
*V
= Builder
->CreateLShr(Operand
->getArgOperand(0), CV
);
336 return new TruncInst(V
, TI
->getType());
341 case Intrinsic::powi
:
342 if (ConstantInt
*Power
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
345 return ReplaceInstUsesWith(CI
, ConstantFP::get(CI
.getType(), 1.0));
348 return ReplaceInstUsesWith(CI
, II
->getArgOperand(0));
349 // powi(x, -1) -> 1/x
350 if (Power
->isAllOnesValue())
351 return BinaryOperator::CreateFDiv(ConstantFP::get(CI
.getType(), 1.0),
352 II
->getArgOperand(0));
355 case Intrinsic::cttz
: {
356 // If all bits below the first known one are known zero,
357 // this value is constant.
358 const IntegerType
*IT
= dyn_cast
<IntegerType
>(II
->getArgOperand(0)->getType());
359 // FIXME: Try to simplify vectors of integers.
361 uint32_t BitWidth
= IT
->getBitWidth();
362 APInt
KnownZero(BitWidth
, 0);
363 APInt
KnownOne(BitWidth
, 0);
364 ComputeMaskedBits(II
->getArgOperand(0), APInt::getAllOnesValue(BitWidth
),
365 KnownZero
, KnownOne
);
366 unsigned TrailingZeros
= KnownOne
.countTrailingZeros();
367 APInt
Mask(APInt::getLowBitsSet(BitWidth
, TrailingZeros
));
368 if ((Mask
& KnownZero
) == Mask
)
369 return ReplaceInstUsesWith(CI
, ConstantInt::get(IT
,
370 APInt(BitWidth
, TrailingZeros
)));
374 case Intrinsic::ctlz
: {
375 // If all bits above the first known one are known zero,
376 // this value is constant.
377 const IntegerType
*IT
= dyn_cast
<IntegerType
>(II
->getArgOperand(0)->getType());
378 // FIXME: Try to simplify vectors of integers.
380 uint32_t BitWidth
= IT
->getBitWidth();
381 APInt
KnownZero(BitWidth
, 0);
382 APInt
KnownOne(BitWidth
, 0);
383 ComputeMaskedBits(II
->getArgOperand(0), APInt::getAllOnesValue(BitWidth
),
384 KnownZero
, KnownOne
);
385 unsigned LeadingZeros
= KnownOne
.countLeadingZeros();
386 APInt
Mask(APInt::getHighBitsSet(BitWidth
, LeadingZeros
));
387 if ((Mask
& KnownZero
) == Mask
)
388 return ReplaceInstUsesWith(CI
, ConstantInt::get(IT
,
389 APInt(BitWidth
, LeadingZeros
)));
393 case Intrinsic::uadd_with_overflow
: {
394 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
395 const IntegerType
*IT
= cast
<IntegerType
>(II
->getArgOperand(0)->getType());
396 uint32_t BitWidth
= IT
->getBitWidth();
397 APInt Mask
= APInt::getSignBit(BitWidth
);
398 APInt
LHSKnownZero(BitWidth
, 0);
399 APInt
LHSKnownOne(BitWidth
, 0);
400 ComputeMaskedBits(LHS
, Mask
, LHSKnownZero
, LHSKnownOne
);
401 bool LHSKnownNegative
= LHSKnownOne
[BitWidth
- 1];
402 bool LHSKnownPositive
= LHSKnownZero
[BitWidth
- 1];
404 if (LHSKnownNegative
|| LHSKnownPositive
) {
405 APInt
RHSKnownZero(BitWidth
, 0);
406 APInt
RHSKnownOne(BitWidth
, 0);
407 ComputeMaskedBits(RHS
, Mask
, RHSKnownZero
, RHSKnownOne
);
408 bool RHSKnownNegative
= RHSKnownOne
[BitWidth
- 1];
409 bool RHSKnownPositive
= RHSKnownZero
[BitWidth
- 1];
410 if (LHSKnownNegative
&& RHSKnownNegative
) {
411 // The sign bit is set in both cases: this MUST overflow.
412 // Create a simple add instruction, and insert it into the struct.
413 Value
*Add
= Builder
->CreateAdd(LHS
, RHS
);
416 UndefValue::get(LHS
->getType()),
417 ConstantInt::getTrue(II
->getContext())
419 const StructType
*ST
= cast
<StructType
>(II
->getType());
420 Constant
*Struct
= ConstantStruct::get(ST
, V
);
421 return InsertValueInst::Create(Struct
, Add
, 0);
424 if (LHSKnownPositive
&& RHSKnownPositive
) {
425 // The sign bit is clear in both cases: this CANNOT overflow.
426 // Create a simple add instruction, and insert it into the struct.
427 Value
*Add
= Builder
->CreateNUWAdd(LHS
, RHS
);
430 UndefValue::get(LHS
->getType()),
431 ConstantInt::getFalse(II
->getContext())
433 const StructType
*ST
= cast
<StructType
>(II
->getType());
434 Constant
*Struct
= ConstantStruct::get(ST
, V
);
435 return InsertValueInst::Create(Struct
, Add
, 0);
439 // FALL THROUGH uadd into sadd
440 case Intrinsic::sadd_with_overflow
:
441 // Canonicalize constants into the RHS.
442 if (isa
<Constant
>(II
->getArgOperand(0)) &&
443 !isa
<Constant
>(II
->getArgOperand(1))) {
444 Value
*LHS
= II
->getArgOperand(0);
445 II
->setArgOperand(0, II
->getArgOperand(1));
446 II
->setArgOperand(1, LHS
);
450 // X + undef -> undef
451 if (isa
<UndefValue
>(II
->getArgOperand(1)))
452 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
454 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
455 // X + 0 -> {X, false}
458 UndefValue::get(II
->getArgOperand(0)->getType()),
459 ConstantInt::getFalse(II
->getContext())
462 ConstantStruct::get(cast
<StructType
>(II
->getType()), V
);
463 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
467 case Intrinsic::usub_with_overflow
:
468 case Intrinsic::ssub_with_overflow
:
469 // undef - X -> undef
470 // X - undef -> undef
471 if (isa
<UndefValue
>(II
->getArgOperand(0)) ||
472 isa
<UndefValue
>(II
->getArgOperand(1)))
473 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
475 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
476 // X - 0 -> {X, false}
479 UndefValue::get(II
->getArgOperand(0)->getType()),
480 ConstantInt::getFalse(II
->getContext())
483 ConstantStruct::get(cast
<StructType
>(II
->getType()), V
);
484 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
488 case Intrinsic::umul_with_overflow
: {
489 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
490 unsigned BitWidth
= cast
<IntegerType
>(LHS
->getType())->getBitWidth();
491 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
493 APInt
LHSKnownZero(BitWidth
, 0);
494 APInt
LHSKnownOne(BitWidth
, 0);
495 ComputeMaskedBits(LHS
, Mask
, LHSKnownZero
, LHSKnownOne
);
496 APInt
RHSKnownZero(BitWidth
, 0);
497 APInt
RHSKnownOne(BitWidth
, 0);
498 ComputeMaskedBits(RHS
, Mask
, RHSKnownZero
, RHSKnownOne
);
500 // Get the largest possible values for each operand.
501 APInt LHSMax
= ~LHSKnownZero
;
502 APInt RHSMax
= ~RHSKnownZero
;
504 // If multiplying the maximum values does not overflow then we can turn
505 // this into a plain NUW mul.
507 LHSMax
.umul_ov(RHSMax
, Overflow
);
509 Value
*Mul
= Builder
->CreateNUWMul(LHS
, RHS
, "umul_with_overflow");
511 UndefValue::get(LHS
->getType()),
514 Constant
*Struct
= ConstantStruct::get(cast
<StructType
>(II
->getType()),V
);
515 return InsertValueInst::Create(Struct
, Mul
, 0);
518 case Intrinsic::smul_with_overflow
:
519 // Canonicalize constants into the RHS.
520 if (isa
<Constant
>(II
->getArgOperand(0)) &&
521 !isa
<Constant
>(II
->getArgOperand(1))) {
522 Value
*LHS
= II
->getArgOperand(0);
523 II
->setArgOperand(0, II
->getArgOperand(1));
524 II
->setArgOperand(1, LHS
);
528 // X * undef -> undef
529 if (isa
<UndefValue
>(II
->getArgOperand(1)))
530 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
532 if (ConstantInt
*RHSI
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
535 return ReplaceInstUsesWith(CI
, Constant::getNullValue(II
->getType()));
537 // X * 1 -> {X, false}
538 if (RHSI
->equalsInt(1)) {
540 UndefValue::get(II
->getArgOperand(0)->getType()),
541 ConstantInt::getFalse(II
->getContext())
544 ConstantStruct::get(cast
<StructType
>(II
->getType()), V
);
545 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
549 case Intrinsic::ppc_altivec_lvx
:
550 case Intrinsic::ppc_altivec_lvxl
:
551 // Turn PPC lvx -> load if the pointer is known aligned.
552 if (getOrEnforceKnownAlignment(II
->getArgOperand(0), 16, TD
) >= 16) {
553 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(0),
554 PointerType::getUnqual(II
->getType()));
555 return new LoadInst(Ptr
);
558 case Intrinsic::ppc_altivec_stvx
:
559 case Intrinsic::ppc_altivec_stvxl
:
560 // Turn stvx -> store if the pointer is known aligned.
561 if (getOrEnforceKnownAlignment(II
->getArgOperand(1), 16, TD
) >= 16) {
562 const Type
*OpPtrTy
=
563 PointerType::getUnqual(II
->getArgOperand(0)->getType());
564 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(1), OpPtrTy
);
565 return new StoreInst(II
->getArgOperand(0), Ptr
);
568 case Intrinsic::x86_sse_storeu_ps
:
569 case Intrinsic::x86_sse2_storeu_pd
:
570 case Intrinsic::x86_sse2_storeu_dq
:
571 // Turn X86 storeu -> store if the pointer is known aligned.
572 if (getOrEnforceKnownAlignment(II
->getArgOperand(0), 16, TD
) >= 16) {
573 const Type
*OpPtrTy
=
574 PointerType::getUnqual(II
->getArgOperand(1)->getType());
575 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(0), OpPtrTy
);
576 return new StoreInst(II
->getArgOperand(1), Ptr
);
580 case Intrinsic::x86_sse_cvtss2si
:
581 case Intrinsic::x86_sse_cvtss2si64
:
582 case Intrinsic::x86_sse_cvttss2si
:
583 case Intrinsic::x86_sse_cvttss2si64
:
584 case Intrinsic::x86_sse2_cvtsd2si
:
585 case Intrinsic::x86_sse2_cvtsd2si64
:
586 case Intrinsic::x86_sse2_cvttsd2si
:
587 case Intrinsic::x86_sse2_cvttsd2si64
: {
588 // These intrinsics only demand the 0th element of their input vectors. If
589 // we can simplify the input based on that, do so now.
591 cast
<VectorType
>(II
->getArgOperand(0)->getType())->getNumElements();
592 APInt
DemandedElts(VWidth
, 1);
593 APInt
UndefElts(VWidth
, 0);
594 if (Value
*V
= SimplifyDemandedVectorElts(II
->getArgOperand(0),
595 DemandedElts
, UndefElts
)) {
596 II
->setArgOperand(0, V
);
603 case Intrinsic::x86_sse41_pmovsxbw
:
604 case Intrinsic::x86_sse41_pmovsxwd
:
605 case Intrinsic::x86_sse41_pmovsxdq
:
606 case Intrinsic::x86_sse41_pmovzxbw
:
607 case Intrinsic::x86_sse41_pmovzxwd
:
608 case Intrinsic::x86_sse41_pmovzxdq
: {
609 // pmov{s|z}x ignores the upper half of their input vectors.
611 cast
<VectorType
>(II
->getArgOperand(0)->getType())->getNumElements();
612 unsigned LowHalfElts
= VWidth
/ 2;
613 APInt
InputDemandedElts(APInt::getBitsSet(VWidth
, 0, LowHalfElts
));
614 APInt
UndefElts(VWidth
, 0);
615 if (Value
*TmpV
= SimplifyDemandedVectorElts(II
->getArgOperand(0),
618 II
->setArgOperand(0, TmpV
);
624 case Intrinsic::ppc_altivec_vperm
:
625 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
626 if (ConstantVector
*Mask
= dyn_cast
<ConstantVector
>(II
->getArgOperand(2))) {
627 assert(Mask
->getNumOperands() == 16 && "Bad type for intrinsic!");
629 // Check that all of the elements are integer constants or undefs.
630 bool AllEltsOk
= true;
631 for (unsigned i
= 0; i
!= 16; ++i
) {
632 if (!isa
<ConstantInt
>(Mask
->getOperand(i
)) &&
633 !isa
<UndefValue
>(Mask
->getOperand(i
))) {
640 // Cast the input vectors to byte vectors.
641 Value
*Op0
= Builder
->CreateBitCast(II
->getArgOperand(0),
643 Value
*Op1
= Builder
->CreateBitCast(II
->getArgOperand(1),
645 Value
*Result
= UndefValue::get(Op0
->getType());
647 // Only extract each element once.
648 Value
*ExtractedElts
[32];
649 memset(ExtractedElts
, 0, sizeof(ExtractedElts
));
651 for (unsigned i
= 0; i
!= 16; ++i
) {
652 if (isa
<UndefValue
>(Mask
->getOperand(i
)))
654 unsigned Idx
=cast
<ConstantInt
>(Mask
->getOperand(i
))->getZExtValue();
655 Idx
&= 31; // Match the hardware behavior.
657 if (ExtractedElts
[Idx
] == 0) {
659 Builder
->CreateExtractElement(Idx
< 16 ? Op0
: Op1
,
660 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
661 Idx
&15, false), "tmp");
664 // Insert this value into the result vector.
665 Result
= Builder
->CreateInsertElement(Result
, ExtractedElts
[Idx
],
666 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
669 return CastInst::Create(Instruction::BitCast
, Result
, CI
.getType());
674 case Intrinsic::arm_neon_vld1
:
675 case Intrinsic::arm_neon_vld2
:
676 case Intrinsic::arm_neon_vld3
:
677 case Intrinsic::arm_neon_vld4
:
678 case Intrinsic::arm_neon_vld2lane
:
679 case Intrinsic::arm_neon_vld3lane
:
680 case Intrinsic::arm_neon_vld4lane
:
681 case Intrinsic::arm_neon_vst1
:
682 case Intrinsic::arm_neon_vst2
:
683 case Intrinsic::arm_neon_vst3
:
684 case Intrinsic::arm_neon_vst4
:
685 case Intrinsic::arm_neon_vst2lane
:
686 case Intrinsic::arm_neon_vst3lane
:
687 case Intrinsic::arm_neon_vst4lane
: {
688 unsigned MemAlign
= getKnownAlignment(II
->getArgOperand(0), TD
);
689 unsigned AlignArg
= II
->getNumArgOperands() - 1;
690 ConstantInt
*IntrAlign
= dyn_cast
<ConstantInt
>(II
->getArgOperand(AlignArg
));
691 if (IntrAlign
&& IntrAlign
->getZExtValue() < MemAlign
) {
692 II
->setArgOperand(AlignArg
,
693 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
700 case Intrinsic::stackrestore
: {
701 // If the save is right next to the restore, remove the restore. This can
702 // happen when variable allocas are DCE'd.
703 if (IntrinsicInst
*SS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
704 if (SS
->getIntrinsicID() == Intrinsic::stacksave
) {
705 BasicBlock::iterator BI
= SS
;
707 return EraseInstFromFunction(CI
);
711 // Scan down this block to see if there is another stack restore in the
712 // same block without an intervening call/alloca.
713 BasicBlock::iterator BI
= II
;
714 TerminatorInst
*TI
= II
->getParent()->getTerminator();
715 bool CannotRemove
= false;
716 for (++BI
; &*BI
!= TI
; ++BI
) {
717 if (isa
<AllocaInst
>(BI
) || isMalloc(BI
)) {
721 if (CallInst
*BCI
= dyn_cast
<CallInst
>(BI
)) {
722 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(BCI
)) {
723 // If there is a stackrestore below this one, remove this one.
724 if (II
->getIntrinsicID() == Intrinsic::stackrestore
)
725 return EraseInstFromFunction(CI
);
726 // Otherwise, ignore the intrinsic.
728 // If we found a non-intrinsic call, we can't remove the stack
736 // If the stack restore is in a return/unwind block and if there are no
737 // allocas or calls between the restore and the return, nuke the restore.
738 if (!CannotRemove
&& (isa
<ReturnInst
>(TI
) || isa
<UnwindInst
>(TI
)))
739 return EraseInstFromFunction(CI
);
744 return visitCallSite(II
);
747 // InvokeInst simplification
749 Instruction
*InstCombiner::visitInvokeInst(InvokeInst
&II
) {
750 return visitCallSite(&II
);
753 /// isSafeToEliminateVarargsCast - If this cast does not affect the value
754 /// passed through the varargs area, we can eliminate the use of the cast.
755 static bool isSafeToEliminateVarargsCast(const CallSite CS
,
756 const CastInst
* const CI
,
757 const TargetData
* const TD
,
759 if (!CI
->isLosslessCast())
762 // The size of ByVal arguments is derived from the type, so we
763 // can't change to a type with a different size. If the size were
764 // passed explicitly we could avoid this check.
765 if (!CS
.paramHasAttr(ix
, Attribute::ByVal
))
769 cast
<PointerType
>(CI
->getOperand(0)->getType())->getElementType();
770 const Type
* DstTy
= cast
<PointerType
>(CI
->getType())->getElementType();
771 if (!SrcTy
->isSized() || !DstTy
->isSized())
773 if (!TD
|| TD
->getTypeAllocSize(SrcTy
) != TD
->getTypeAllocSize(DstTy
))
779 class InstCombineFortifiedLibCalls
: public SimplifyFortifiedLibCalls
{
782 void replaceCall(Value
*With
) {
783 NewInstruction
= IC
->ReplaceInstUsesWith(*CI
, With
);
785 bool isFoldable(unsigned SizeCIOp
, unsigned SizeArgOp
, bool isString
) const {
786 if (CI
->getArgOperand(SizeCIOp
) == CI
->getArgOperand(SizeArgOp
))
788 if (ConstantInt
*SizeCI
=
789 dyn_cast
<ConstantInt
>(CI
->getArgOperand(SizeCIOp
))) {
790 if (SizeCI
->isAllOnesValue())
793 uint64_t Len
= GetStringLength(CI
->getArgOperand(SizeArgOp
));
794 // If the length is 0 we don't know how long it is and so we can't
796 if (Len
== 0) return false;
797 return SizeCI
->getZExtValue() >= Len
;
799 if (ConstantInt
*Arg
= dyn_cast
<ConstantInt
>(
800 CI
->getArgOperand(SizeArgOp
)))
801 return SizeCI
->getZExtValue() >= Arg
->getZExtValue();
806 InstCombineFortifiedLibCalls(InstCombiner
*IC
) : IC(IC
), NewInstruction(0) { }
807 Instruction
*NewInstruction
;
809 } // end anonymous namespace
811 // Try to fold some different type of calls here.
812 // Currently we're only working with the checking functions, memcpy_chk,
813 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
814 // strcat_chk and strncat_chk.
815 Instruction
*InstCombiner::tryOptimizeCall(CallInst
*CI
, const TargetData
*TD
) {
816 if (CI
->getCalledFunction() == 0) return 0;
818 InstCombineFortifiedLibCalls
Simplifier(this);
819 Simplifier
.fold(CI
, TD
);
820 return Simplifier
.NewInstruction
;
823 // visitCallSite - Improvements for call and invoke instructions.
825 Instruction
*InstCombiner::visitCallSite(CallSite CS
) {
826 bool Changed
= false;
828 // If the callee is a pointer to a function, attempt to move any casts to the
829 // arguments of the call/invoke.
830 Value
*Callee
= CS
.getCalledValue();
831 if (!isa
<Function
>(Callee
) && transformConstExprCastCall(CS
))
834 if (Function
*CalleeF
= dyn_cast
<Function
>(Callee
))
835 // If the call and callee calling conventions don't match, this call must
836 // be unreachable, as the call is undefined.
837 if (CalleeF
->getCallingConv() != CS
.getCallingConv() &&
838 // Only do this for calls to a function with a body. A prototype may
839 // not actually end up matching the implementation's calling conv for a
840 // variety of reasons (e.g. it may be written in assembly).
841 !CalleeF
->isDeclaration()) {
842 Instruction
*OldCall
= CS
.getInstruction();
843 new StoreInst(ConstantInt::getTrue(Callee
->getContext()),
844 UndefValue::get(Type::getInt1PtrTy(Callee
->getContext())),
846 // If OldCall dues not return void then replaceAllUsesWith undef.
847 // This allows ValueHandlers and custom metadata to adjust itself.
848 if (!OldCall
->getType()->isVoidTy())
849 ReplaceInstUsesWith(*OldCall
, UndefValue::get(OldCall
->getType()));
850 if (isa
<CallInst
>(OldCall
))
851 return EraseInstFromFunction(*OldCall
);
853 // We cannot remove an invoke, because it would change the CFG, just
854 // change the callee to a null pointer.
855 cast
<InvokeInst
>(OldCall
)->setCalledFunction(
856 Constant::getNullValue(CalleeF
->getType()));
860 if (isa
<ConstantPointerNull
>(Callee
) || isa
<UndefValue
>(Callee
)) {
861 // This instruction is not reachable, just remove it. We insert a store to
862 // undef so that we know that this code is not reachable, despite the fact
863 // that we can't modify the CFG here.
864 new StoreInst(ConstantInt::getTrue(Callee
->getContext()),
865 UndefValue::get(Type::getInt1PtrTy(Callee
->getContext())),
866 CS
.getInstruction());
868 // If CS does not return void then replaceAllUsesWith undef.
869 // This allows ValueHandlers and custom metadata to adjust itself.
870 if (!CS
.getInstruction()->getType()->isVoidTy())
871 ReplaceInstUsesWith(*CS
.getInstruction(),
872 UndefValue::get(CS
.getInstruction()->getType()));
874 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(CS
.getInstruction())) {
875 // Don't break the CFG, insert a dummy cond branch.
876 BranchInst::Create(II
->getNormalDest(), II
->getUnwindDest(),
877 ConstantInt::getTrue(Callee
->getContext()), II
);
879 return EraseInstFromFunction(*CS
.getInstruction());
882 if (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(Callee
))
883 if (IntrinsicInst
*In
= dyn_cast
<IntrinsicInst
>(BC
->getOperand(0)))
884 if (In
->getIntrinsicID() == Intrinsic::init_trampoline
)
885 return transformCallThroughTrampoline(CS
);
887 const PointerType
*PTy
= cast
<PointerType
>(Callee
->getType());
888 const FunctionType
*FTy
= cast
<FunctionType
>(PTy
->getElementType());
889 if (FTy
->isVarArg()) {
890 int ix
= FTy
->getNumParams() + (isa
<InvokeInst
>(Callee
) ? 3 : 1);
891 // See if we can optimize any arguments passed through the varargs area of
893 for (CallSite::arg_iterator I
= CS
.arg_begin()+FTy
->getNumParams(),
894 E
= CS
.arg_end(); I
!= E
; ++I
, ++ix
) {
895 CastInst
*CI
= dyn_cast
<CastInst
>(*I
);
896 if (CI
&& isSafeToEliminateVarargsCast(CS
, CI
, TD
, ix
)) {
897 *I
= CI
->getOperand(0);
903 if (isa
<InlineAsm
>(Callee
) && !CS
.doesNotThrow()) {
904 // Inline asm calls cannot throw - mark them 'nounwind'.
905 CS
.setDoesNotThrow();
909 // Try to optimize the call if possible, we require TargetData for most of
910 // this. None of these calls are seen as possibly dead so go ahead and
911 // delete the instruction now.
912 if (CallInst
*CI
= dyn_cast
<CallInst
>(CS
.getInstruction())) {
913 Instruction
*I
= tryOptimizeCall(CI
, TD
);
914 // If we changed something return the result, etc. Otherwise let
915 // the fallthrough check.
916 if (I
) return EraseInstFromFunction(*I
);
919 return Changed
? CS
.getInstruction() : 0;
922 // transformConstExprCastCall - If the callee is a constexpr cast of a function,
923 // attempt to move the cast to the arguments of the call/invoke.
925 bool InstCombiner::transformConstExprCastCall(CallSite CS
) {
927 dyn_cast
<Function
>(CS
.getCalledValue()->stripPointerCasts());
930 Instruction
*Caller
= CS
.getInstruction();
931 const AttrListPtr
&CallerPAL
= CS
.getAttributes();
933 // Okay, this is a cast from a function to a different type. Unless doing so
934 // would cause a type conversion of one of our arguments, change this call to
935 // be a direct call with arguments casted to the appropriate types.
937 const FunctionType
*FT
= Callee
->getFunctionType();
938 const Type
*OldRetTy
= Caller
->getType();
939 const Type
*NewRetTy
= FT
->getReturnType();
941 if (NewRetTy
->isStructTy())
942 return false; // TODO: Handle multiple return values.
944 // Check to see if we are changing the return type...
945 if (OldRetTy
!= NewRetTy
) {
946 if (Callee
->isDeclaration() &&
947 // Conversion is ok if changing from one pointer type to another or from
948 // a pointer to an integer of the same size.
949 !((OldRetTy
->isPointerTy() || !TD
||
950 OldRetTy
== TD
->getIntPtrType(Caller
->getContext())) &&
951 (NewRetTy
->isPointerTy() || !TD
||
952 NewRetTy
== TD
->getIntPtrType(Caller
->getContext()))))
953 return false; // Cannot transform this return value.
955 if (!Caller
->use_empty() &&
956 // void -> non-void is handled specially
957 !NewRetTy
->isVoidTy() && !CastInst::isCastable(NewRetTy
, OldRetTy
))
958 return false; // Cannot transform this return value.
960 if (!CallerPAL
.isEmpty() && !Caller
->use_empty()) {
961 Attributes RAttrs
= CallerPAL
.getRetAttributes();
962 if (RAttrs
& Attribute::typeIncompatible(NewRetTy
))
963 return false; // Attribute not compatible with transformed value.
966 // If the callsite is an invoke instruction, and the return value is used by
967 // a PHI node in a successor, we cannot change the return type of the call
968 // because there is no place to put the cast instruction (without breaking
969 // the critical edge). Bail out in this case.
970 if (!Caller
->use_empty())
971 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
))
972 for (Value::use_iterator UI
= II
->use_begin(), E
= II
->use_end();
974 if (PHINode
*PN
= dyn_cast
<PHINode
>(*UI
))
975 if (PN
->getParent() == II
->getNormalDest() ||
976 PN
->getParent() == II
->getUnwindDest())
980 unsigned NumActualArgs
= unsigned(CS
.arg_end()-CS
.arg_begin());
981 unsigned NumCommonArgs
= std::min(FT
->getNumParams(), NumActualArgs
);
983 CallSite::arg_iterator AI
= CS
.arg_begin();
984 for (unsigned i
= 0, e
= NumCommonArgs
; i
!= e
; ++i
, ++AI
) {
985 const Type
*ParamTy
= FT
->getParamType(i
);
986 const Type
*ActTy
= (*AI
)->getType();
988 if (!CastInst::isCastable(ActTy
, ParamTy
))
989 return false; // Cannot transform this parameter value.
991 unsigned Attrs
= CallerPAL
.getParamAttributes(i
+ 1);
992 if (Attrs
& Attribute::typeIncompatible(ParamTy
))
993 return false; // Attribute not compatible with transformed value.
995 // If the parameter is passed as a byval argument, then we have to have a
996 // sized type and the sized type has to have the same size as the old type.
997 if (ParamTy
!= ActTy
&& (Attrs
& Attribute::ByVal
)) {
998 const PointerType
*ParamPTy
= dyn_cast
<PointerType
>(ParamTy
);
999 if (ParamPTy
== 0 || !ParamPTy
->getElementType()->isSized() || TD
== 0)
1002 const Type
*CurElTy
= cast
<PointerType
>(ActTy
)->getElementType();
1003 if (TD
->getTypeAllocSize(CurElTy
) !=
1004 TD
->getTypeAllocSize(ParamPTy
->getElementType()))
1008 // Converting from one pointer type to another or between a pointer and an
1009 // integer of the same size is safe even if we do not have a body.
1010 bool isConvertible
= ActTy
== ParamTy
||
1011 (TD
&& ((ParamTy
->isPointerTy() ||
1012 ParamTy
== TD
->getIntPtrType(Caller
->getContext())) &&
1013 (ActTy
->isPointerTy() ||
1014 ActTy
== TD
->getIntPtrType(Caller
->getContext()))));
1015 if (Callee
->isDeclaration() && !isConvertible
) return false;
1018 if (Callee
->isDeclaration()) {
1019 // Do not delete arguments unless we have a function body.
1020 if (FT
->getNumParams() < NumActualArgs
&& !FT
->isVarArg())
1023 // If the callee is just a declaration, don't change the varargsness of the
1024 // call. We don't want to introduce a varargs call where one doesn't
1026 const PointerType
*APTy
= cast
<PointerType
>(CS
.getCalledValue()->getType());
1027 if (FT
->isVarArg()!=cast
<FunctionType
>(APTy
->getElementType())->isVarArg())
1031 if (FT
->getNumParams() < NumActualArgs
&& FT
->isVarArg() &&
1032 !CallerPAL
.isEmpty())
1033 // In this case we have more arguments than the new function type, but we
1034 // won't be dropping them. Check that these extra arguments have attributes
1035 // that are compatible with being a vararg call argument.
1036 for (unsigned i
= CallerPAL
.getNumSlots(); i
; --i
) {
1037 if (CallerPAL
.getSlot(i
- 1).Index
<= FT
->getNumParams())
1039 Attributes PAttrs
= CallerPAL
.getSlot(i
- 1).Attrs
;
1040 if (PAttrs
& Attribute::VarArgsIncompatible
)
1045 // Okay, we decided that this is a safe thing to do: go ahead and start
1046 // inserting cast instructions as necessary.
1047 std::vector
<Value
*> Args
;
1048 Args
.reserve(NumActualArgs
);
1049 SmallVector
<AttributeWithIndex
, 8> attrVec
;
1050 attrVec
.reserve(NumCommonArgs
);
1052 // Get any return attributes.
1053 Attributes RAttrs
= CallerPAL
.getRetAttributes();
1055 // If the return value is not being used, the type may not be compatible
1056 // with the existing attributes. Wipe out any problematic attributes.
1057 RAttrs
&= ~Attribute::typeIncompatible(NewRetTy
);
1059 // Add the new return attributes.
1061 attrVec
.push_back(AttributeWithIndex::get(0, RAttrs
));
1063 AI
= CS
.arg_begin();
1064 for (unsigned i
= 0; i
!= NumCommonArgs
; ++i
, ++AI
) {
1065 const Type
*ParamTy
= FT
->getParamType(i
);
1066 if ((*AI
)->getType() == ParamTy
) {
1067 Args
.push_back(*AI
);
1069 Instruction::CastOps opcode
= CastInst::getCastOpcode(*AI
,
1070 false, ParamTy
, false);
1071 Args
.push_back(Builder
->CreateCast(opcode
, *AI
, ParamTy
, "tmp"));
1074 // Add any parameter attributes.
1075 if (Attributes PAttrs
= CallerPAL
.getParamAttributes(i
+ 1))
1076 attrVec
.push_back(AttributeWithIndex::get(i
+ 1, PAttrs
));
1079 // If the function takes more arguments than the call was taking, add them
1081 for (unsigned i
= NumCommonArgs
; i
!= FT
->getNumParams(); ++i
)
1082 Args
.push_back(Constant::getNullValue(FT
->getParamType(i
)));
1084 // If we are removing arguments to the function, emit an obnoxious warning.
1085 if (FT
->getNumParams() < NumActualArgs
) {
1086 if (!FT
->isVarArg()) {
1087 errs() << "WARNING: While resolving call to function '"
1088 << Callee
->getName() << "' arguments were dropped!\n";
1090 // Add all of the arguments in their promoted form to the arg list.
1091 for (unsigned i
= FT
->getNumParams(); i
!= NumActualArgs
; ++i
, ++AI
) {
1092 const Type
*PTy
= getPromotedType((*AI
)->getType());
1093 if (PTy
!= (*AI
)->getType()) {
1094 // Must promote to pass through va_arg area!
1095 Instruction::CastOps opcode
=
1096 CastInst::getCastOpcode(*AI
, false, PTy
, false);
1097 Args
.push_back(Builder
->CreateCast(opcode
, *AI
, PTy
, "tmp"));
1099 Args
.push_back(*AI
);
1102 // Add any parameter attributes.
1103 if (Attributes PAttrs
= CallerPAL
.getParamAttributes(i
+ 1))
1104 attrVec
.push_back(AttributeWithIndex::get(i
+ 1, PAttrs
));
1109 if (Attributes FnAttrs
= CallerPAL
.getFnAttributes())
1110 attrVec
.push_back(AttributeWithIndex::get(~0, FnAttrs
));
1112 if (NewRetTy
->isVoidTy())
1113 Caller
->setName(""); // Void type should not have a name.
1115 const AttrListPtr
&NewCallerPAL
= AttrListPtr::get(attrVec
.begin(),
1119 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1120 NC
= Builder
->CreateInvoke(Callee
, II
->getNormalDest(),
1121 II
->getUnwindDest(), Args
.begin(), Args
.end());
1123 cast
<InvokeInst
>(NC
)->setCallingConv(II
->getCallingConv());
1124 cast
<InvokeInst
>(NC
)->setAttributes(NewCallerPAL
);
1126 CallInst
*CI
= cast
<CallInst
>(Caller
);
1127 NC
= Builder
->CreateCall(Callee
, Args
.begin(), Args
.end());
1129 if (CI
->isTailCall())
1130 cast
<CallInst
>(NC
)->setTailCall();
1131 cast
<CallInst
>(NC
)->setCallingConv(CI
->getCallingConv());
1132 cast
<CallInst
>(NC
)->setAttributes(NewCallerPAL
);
1135 // Insert a cast of the return type as necessary.
1137 if (OldRetTy
!= NV
->getType() && !Caller
->use_empty()) {
1138 if (!NV
->getType()->isVoidTy()) {
1139 Instruction::CastOps opcode
=
1140 CastInst::getCastOpcode(NC
, false, OldRetTy
, false);
1141 NV
= NC
= CastInst::Create(opcode
, NC
, OldRetTy
, "tmp");
1142 NC
->setDebugLoc(Caller
->getDebugLoc());
1144 // If this is an invoke instruction, we should insert it after the first
1145 // non-phi, instruction in the normal successor block.
1146 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1147 BasicBlock::iterator I
= II
->getNormalDest()->getFirstNonPHI();
1148 InsertNewInstBefore(NC
, *I
);
1150 // Otherwise, it's a call, just insert cast right after the call.
1151 InsertNewInstBefore(NC
, *Caller
);
1153 Worklist
.AddUsersToWorkList(*Caller
);
1155 NV
= UndefValue::get(Caller
->getType());
1159 if (!Caller
->use_empty())
1160 ReplaceInstUsesWith(*Caller
, NV
);
1162 EraseInstFromFunction(*Caller
);
1166 // transformCallThroughTrampoline - Turn a call to a function created by the
1167 // init_trampoline intrinsic into a direct call to the underlying function.
1169 Instruction
*InstCombiner::transformCallThroughTrampoline(CallSite CS
) {
1170 Value
*Callee
= CS
.getCalledValue();
1171 const PointerType
*PTy
= cast
<PointerType
>(Callee
->getType());
1172 const FunctionType
*FTy
= cast
<FunctionType
>(PTy
->getElementType());
1173 const AttrListPtr
&Attrs
= CS
.getAttributes();
1175 // If the call already has the 'nest' attribute somewhere then give up -
1176 // otherwise 'nest' would occur twice after splicing in the chain.
1177 if (Attrs
.hasAttrSomewhere(Attribute::Nest
))
1180 IntrinsicInst
*Tramp
=
1181 cast
<IntrinsicInst
>(cast
<BitCastInst
>(Callee
)->getOperand(0));
1183 Function
*NestF
=cast
<Function
>(Tramp
->getArgOperand(1)->stripPointerCasts());
1184 const PointerType
*NestFPTy
= cast
<PointerType
>(NestF
->getType());
1185 const FunctionType
*NestFTy
= cast
<FunctionType
>(NestFPTy
->getElementType());
1187 const AttrListPtr
&NestAttrs
= NestF
->getAttributes();
1188 if (!NestAttrs
.isEmpty()) {
1189 unsigned NestIdx
= 1;
1190 const Type
*NestTy
= 0;
1191 Attributes NestAttr
= Attribute::None
;
1193 // Look for a parameter marked with the 'nest' attribute.
1194 for (FunctionType::param_iterator I
= NestFTy
->param_begin(),
1195 E
= NestFTy
->param_end(); I
!= E
; ++NestIdx
, ++I
)
1196 if (NestAttrs
.paramHasAttr(NestIdx
, Attribute::Nest
)) {
1197 // Record the parameter type and any other attributes.
1199 NestAttr
= NestAttrs
.getParamAttributes(NestIdx
);
1204 Instruction
*Caller
= CS
.getInstruction();
1205 std::vector
<Value
*> NewArgs
;
1206 NewArgs
.reserve(unsigned(CS
.arg_end()-CS
.arg_begin())+1);
1208 SmallVector
<AttributeWithIndex
, 8> NewAttrs
;
1209 NewAttrs
.reserve(Attrs
.getNumSlots() + 1);
1211 // Insert the nest argument into the call argument list, which may
1212 // mean appending it. Likewise for attributes.
1214 // Add any result attributes.
1215 if (Attributes Attr
= Attrs
.getRetAttributes())
1216 NewAttrs
.push_back(AttributeWithIndex::get(0, Attr
));
1220 CallSite::arg_iterator I
= CS
.arg_begin(), E
= CS
.arg_end();
1222 if (Idx
== NestIdx
) {
1223 // Add the chain argument and attributes.
1224 Value
*NestVal
= Tramp
->getArgOperand(2);
1225 if (NestVal
->getType() != NestTy
)
1226 NestVal
= Builder
->CreateBitCast(NestVal
, NestTy
, "nest");
1227 NewArgs
.push_back(NestVal
);
1228 NewAttrs
.push_back(AttributeWithIndex::get(NestIdx
, NestAttr
));
1234 // Add the original argument and attributes.
1235 NewArgs
.push_back(*I
);
1236 if (Attributes Attr
= Attrs
.getParamAttributes(Idx
))
1238 (AttributeWithIndex::get(Idx
+ (Idx
>= NestIdx
), Attr
));
1244 // Add any function attributes.
1245 if (Attributes Attr
= Attrs
.getFnAttributes())
1246 NewAttrs
.push_back(AttributeWithIndex::get(~0, Attr
));
1248 // The trampoline may have been bitcast to a bogus type (FTy).
1249 // Handle this by synthesizing a new function type, equal to FTy
1250 // with the chain parameter inserted.
1252 std::vector
<const Type
*> NewTypes
;
1253 NewTypes
.reserve(FTy
->getNumParams()+1);
1255 // Insert the chain's type into the list of parameter types, which may
1256 // mean appending it.
1259 FunctionType::param_iterator I
= FTy
->param_begin(),
1260 E
= FTy
->param_end();
1264 // Add the chain's type.
1265 NewTypes
.push_back(NestTy
);
1270 // Add the original type.
1271 NewTypes
.push_back(*I
);
1277 // Replace the trampoline call with a direct call. Let the generic
1278 // code sort out any function type mismatches.
1279 FunctionType
*NewFTy
= FunctionType::get(FTy
->getReturnType(), NewTypes
,
1281 Constant
*NewCallee
=
1282 NestF
->getType() == PointerType::getUnqual(NewFTy
) ?
1283 NestF
: ConstantExpr::getBitCast(NestF
,
1284 PointerType::getUnqual(NewFTy
));
1285 const AttrListPtr
&NewPAL
= AttrListPtr::get(NewAttrs
.begin(),
1288 Instruction
*NewCaller
;
1289 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1290 NewCaller
= InvokeInst::Create(NewCallee
,
1291 II
->getNormalDest(), II
->getUnwindDest(),
1292 NewArgs
.begin(), NewArgs
.end());
1293 cast
<InvokeInst
>(NewCaller
)->setCallingConv(II
->getCallingConv());
1294 cast
<InvokeInst
>(NewCaller
)->setAttributes(NewPAL
);
1296 NewCaller
= CallInst::Create(NewCallee
, NewArgs
.begin(), NewArgs
.end());
1297 if (cast
<CallInst
>(Caller
)->isTailCall())
1298 cast
<CallInst
>(NewCaller
)->setTailCall();
1299 cast
<CallInst
>(NewCaller
)->
1300 setCallingConv(cast
<CallInst
>(Caller
)->getCallingConv());
1301 cast
<CallInst
>(NewCaller
)->setAttributes(NewPAL
);
1308 // Replace the trampoline call with a direct call. Since there is no 'nest'
1309 // parameter, there is no need to adjust the argument list. Let the generic
1310 // code sort out any function type mismatches.
1311 Constant
*NewCallee
=
1312 NestF
->getType() == PTy
? NestF
:
1313 ConstantExpr::getBitCast(NestF
, PTy
);
1314 CS
.setCalledFunction(NewCallee
);
1315 return CS
.getInstruction();