1 //===- InstCombineCalls.cpp -----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visitCall and visitInvoke functions.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombine.h"
15 #include "llvm/IntrinsicInst.h"
16 #include "llvm/Support/CallSite.h"
17 #include "llvm/Target/TargetData.h"
18 #include "llvm/Analysis/MemoryBuiltins.h"
19 #include "llvm/Transforms/Utils/BuildLibCalls.h"
20 #include "llvm/Transforms/Utils/Local.h"
23 /// getPromotedType - Return the specified type promoted as it would be to pass
24 /// though a va_arg area.
25 static const Type
*getPromotedType(const Type
*Ty
) {
26 if (const IntegerType
* ITy
= dyn_cast
<IntegerType
>(Ty
)) {
27 if (ITy
->getBitWidth() < 32)
28 return Type::getInt32Ty(Ty
->getContext());
34 Instruction
*InstCombiner::SimplifyMemTransfer(MemIntrinsic
*MI
) {
35 unsigned DstAlign
= getKnownAlignment(MI
->getArgOperand(0), TD
);
36 unsigned SrcAlign
= getKnownAlignment(MI
->getArgOperand(1), TD
);
37 unsigned MinAlign
= std::min(DstAlign
, SrcAlign
);
38 unsigned CopyAlign
= MI
->getAlignment();
40 if (CopyAlign
< MinAlign
) {
41 MI
->setAlignment(ConstantInt::get(MI
->getAlignmentType(),
46 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
48 ConstantInt
*MemOpLength
= dyn_cast
<ConstantInt
>(MI
->getArgOperand(2));
49 if (MemOpLength
== 0) return 0;
51 // Source and destination pointer types are always "i8*" for intrinsic. See
52 // if the size is something we can handle with a single primitive load/store.
53 // A single load+store correctly handles overlapping memory in the memmove
55 unsigned Size
= MemOpLength
->getZExtValue();
56 if (Size
== 0) return MI
; // Delete this mem transfer.
58 if (Size
> 8 || (Size
&(Size
-1)))
59 return 0; // If not 1/2/4/8 bytes, exit.
61 // Use an integer load+store unless we can find something better.
63 cast
<PointerType
>(MI
->getArgOperand(1)->getType())->getAddressSpace();
65 cast
<PointerType
>(MI
->getArgOperand(0)->getType())->getAddressSpace();
67 const IntegerType
* IntType
= IntegerType::get(MI
->getContext(), Size
<<3);
68 Type
*NewSrcPtrTy
= PointerType::get(IntType
, SrcAddrSp
);
69 Type
*NewDstPtrTy
= PointerType::get(IntType
, DstAddrSp
);
71 // Memcpy forces the use of i8* for the source and destination. That means
72 // that if you're using memcpy to move one double around, you'll get a cast
73 // from double* to i8*. We'd much rather use a double load+store rather than
74 // an i64 load+store, here because this improves the odds that the source or
75 // dest address will be promotable. See if we can find a better type than the
77 Value
*StrippedDest
= MI
->getArgOperand(0)->stripPointerCasts();
78 if (StrippedDest
!= MI
->getArgOperand(0)) {
79 const Type
*SrcETy
= cast
<PointerType
>(StrippedDest
->getType())
81 if (TD
&& SrcETy
->isSized() && TD
->getTypeStoreSize(SrcETy
) == Size
) {
82 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
83 // down through these levels if so.
84 while (!SrcETy
->isSingleValueType()) {
85 if (const StructType
*STy
= dyn_cast
<StructType
>(SrcETy
)) {
86 if (STy
->getNumElements() == 1)
87 SrcETy
= STy
->getElementType(0);
90 } else if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(SrcETy
)) {
91 if (ATy
->getNumElements() == 1)
92 SrcETy
= ATy
->getElementType();
99 if (SrcETy
->isSingleValueType()) {
100 NewSrcPtrTy
= PointerType::get(SrcETy
, SrcAddrSp
);
101 NewDstPtrTy
= PointerType::get(SrcETy
, DstAddrSp
);
107 // If the memcpy/memmove provides better alignment info than we can
109 SrcAlign
= std::max(SrcAlign
, CopyAlign
);
110 DstAlign
= std::max(DstAlign
, CopyAlign
);
112 Value
*Src
= Builder
->CreateBitCast(MI
->getArgOperand(1), NewSrcPtrTy
);
113 Value
*Dest
= Builder
->CreateBitCast(MI
->getArgOperand(0), NewDstPtrTy
);
114 Instruction
*L
= new LoadInst(Src
, "tmp", MI
->isVolatile(), SrcAlign
);
115 InsertNewInstBefore(L
, *MI
);
116 InsertNewInstBefore(new StoreInst(L
, Dest
, MI
->isVolatile(), DstAlign
),
119 // Set the size of the copy to 0, it will be deleted on the next iteration.
120 MI
->setArgOperand(2, Constant::getNullValue(MemOpLength
->getType()));
124 Instruction
*InstCombiner::SimplifyMemSet(MemSetInst
*MI
) {
125 unsigned Alignment
= getKnownAlignment(MI
->getDest(), TD
);
126 if (MI
->getAlignment() < Alignment
) {
127 MI
->setAlignment(ConstantInt::get(MI
->getAlignmentType(),
132 // Extract the length and alignment and fill if they are constant.
133 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(MI
->getLength());
134 ConstantInt
*FillC
= dyn_cast
<ConstantInt
>(MI
->getValue());
135 if (!LenC
|| !FillC
|| !FillC
->getType()->isIntegerTy(8))
137 uint64_t Len
= LenC
->getZExtValue();
138 Alignment
= MI
->getAlignment();
140 // If the length is zero, this is a no-op
141 if (Len
== 0) return MI
; // memset(d,c,0,a) -> noop
143 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
144 if (Len
<= 8 && isPowerOf2_32((uint32_t)Len
)) {
145 const Type
*ITy
= IntegerType::get(MI
->getContext(), Len
*8); // n=1 -> i8.
147 Value
*Dest
= MI
->getDest();
148 unsigned DstAddrSp
= cast
<PointerType
>(Dest
->getType())->getAddressSpace();
149 Type
*NewDstPtrTy
= PointerType::get(ITy
, DstAddrSp
);
150 Dest
= Builder
->CreateBitCast(Dest
, NewDstPtrTy
);
152 // Alignment 0 is identity for alignment 1 for memset, but not store.
153 if (Alignment
== 0) Alignment
= 1;
155 // Extract the fill value and store.
156 uint64_t Fill
= FillC
->getZExtValue()*0x0101010101010101ULL
;
157 InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy
, Fill
),
158 Dest
, false, Alignment
), *MI
);
160 // Set the size of the copy to 0, it will be deleted on the next iteration.
161 MI
->setLength(Constant::getNullValue(LenC
->getType()));
168 /// visitCallInst - CallInst simplification. This mostly only handles folding
169 /// of intrinsic instructions. For normal calls, it allows visitCallSite to do
170 /// the heavy lifting.
172 Instruction
*InstCombiner::visitCallInst(CallInst
&CI
) {
174 return visitFree(CI
);
176 return visitMalloc(CI
);
178 // If the caller function is nounwind, mark the call as nounwind, even if the
180 if (CI
.getParent()->getParent()->doesNotThrow() &&
181 !CI
.doesNotThrow()) {
182 CI
.setDoesNotThrow();
186 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&CI
);
187 if (!II
) return visitCallSite(&CI
);
189 // Intrinsics cannot occur in an invoke, so handle them here instead of in
191 if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(II
)) {
192 bool Changed
= false;
194 // memmove/cpy/set of zero bytes is a noop.
195 if (Constant
*NumBytes
= dyn_cast
<Constant
>(MI
->getLength())) {
196 if (NumBytes
->isNullValue())
197 return EraseInstFromFunction(CI
);
199 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(NumBytes
))
200 if (CI
->getZExtValue() == 1) {
201 // Replace the instruction with just byte operations. We would
202 // transform other cases to loads/stores, but we don't know if
203 // alignment is sufficient.
207 // No other transformations apply to volatile transfers.
208 if (MI
->isVolatile())
211 // If we have a memmove and the source operation is a constant global,
212 // then the source and dest pointers can't alias, so we can change this
213 // into a call to memcpy.
214 if (MemMoveInst
*MMI
= dyn_cast
<MemMoveInst
>(MI
)) {
215 if (GlobalVariable
*GVSrc
= dyn_cast
<GlobalVariable
>(MMI
->getSource()))
216 if (GVSrc
->isConstant()) {
217 Module
*M
= CI
.getParent()->getParent()->getParent();
218 Intrinsic::ID MemCpyID
= Intrinsic::memcpy
;
219 const Type
*Tys
[3] = { CI
.getArgOperand(0)->getType(),
220 CI
.getArgOperand(1)->getType(),
221 CI
.getArgOperand(2)->getType() };
222 CI
.setCalledFunction(Intrinsic::getDeclaration(M
, MemCpyID
, Tys
, 3));
227 if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(MI
)) {
228 // memmove(x,x,size) -> noop.
229 if (MTI
->getSource() == MTI
->getDest())
230 return EraseInstFromFunction(CI
);
233 // If we can determine a pointer alignment that is bigger than currently
234 // set, update the alignment.
235 if (isa
<MemTransferInst
>(MI
)) {
236 if (Instruction
*I
= SimplifyMemTransfer(MI
))
238 } else if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(MI
)) {
239 if (Instruction
*I
= SimplifyMemSet(MSI
))
243 if (Changed
) return II
;
246 switch (II
->getIntrinsicID()) {
248 case Intrinsic::objectsize
: {
249 // We need target data for just about everything so depend on it.
252 const Type
*ReturnTy
= CI
.getType();
253 uint64_t DontKnow
= II
->getArgOperand(1) == Builder
->getTrue() ? 0 : -1ULL;
255 // Get to the real allocated thing and offset as fast as possible.
256 Value
*Op1
= II
->getArgOperand(0)->stripPointerCasts();
259 uint64_t Size
= -1ULL;
261 // Try to look through constant GEPs.
262 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(Op1
)) {
263 if (!GEP
->hasAllConstantIndices()) break;
265 // Get the current byte offset into the thing. Use the original
266 // operand in case we're looking through a bitcast.
267 SmallVector
<Value
*, 8> Ops(GEP
->idx_begin(), GEP
->idx_end());
268 Offset
= TD
->getIndexedOffset(GEP
->getPointerOperandType(),
269 Ops
.data(), Ops
.size());
271 Op1
= GEP
->getPointerOperand()->stripPointerCasts();
273 // Make sure we're not a constant offset from an external
275 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Op1
))
276 if (!GV
->hasDefinitiveInitializer()) break;
279 // If we've stripped down to a single global variable that we
280 // can know the size of then just return that.
281 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Op1
)) {
282 if (GV
->hasDefinitiveInitializer()) {
283 Constant
*C
= GV
->getInitializer();
284 Size
= TD
->getTypeAllocSize(C
->getType());
286 // Can't determine size of the GV.
287 Constant
*RetVal
= ConstantInt::get(ReturnTy
, DontKnow
);
288 return ReplaceInstUsesWith(CI
, RetVal
);
290 } else if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Op1
)) {
292 if (AI
->getAllocatedType()->isSized()) {
293 Size
= TD
->getTypeAllocSize(AI
->getAllocatedType());
294 if (AI
->isArrayAllocation()) {
295 const ConstantInt
*C
= dyn_cast
<ConstantInt
>(AI
->getArraySize());
297 Size
*= C
->getZExtValue();
300 } else if (CallInst
*MI
= extractMallocCall(Op1
)) {
301 // Get allocation size.
302 const Type
* MallocType
= getMallocAllocatedType(MI
);
303 if (MallocType
&& MallocType
->isSized())
304 if (Value
*NElems
= getMallocArraySize(MI
, TD
, true))
305 if (ConstantInt
*NElements
= dyn_cast
<ConstantInt
>(NElems
))
306 Size
= NElements
->getZExtValue() * TD
->getTypeAllocSize(MallocType
);
309 // Do not return "I don't know" here. Later optimization passes could
310 // make it possible to evaluate objectsize to a constant.
315 // Out of bound reference? Negative index normalized to large
316 // index? Just return "I don't know".
317 return ReplaceInstUsesWith(CI
, ConstantInt::get(ReturnTy
, DontKnow
));
319 return ReplaceInstUsesWith(CI
, ConstantInt::get(ReturnTy
, Size
-Offset
));
321 case Intrinsic::bswap
:
322 // bswap(bswap(x)) -> x
323 if (IntrinsicInst
*Operand
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0)))
324 if (Operand
->getIntrinsicID() == Intrinsic::bswap
)
325 return ReplaceInstUsesWith(CI
, Operand
->getArgOperand(0));
327 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
328 if (TruncInst
*TI
= dyn_cast
<TruncInst
>(II
->getArgOperand(0))) {
329 if (IntrinsicInst
*Operand
= dyn_cast
<IntrinsicInst
>(TI
->getOperand(0)))
330 if (Operand
->getIntrinsicID() == Intrinsic::bswap
) {
331 unsigned C
= Operand
->getType()->getPrimitiveSizeInBits() -
332 TI
->getType()->getPrimitiveSizeInBits();
333 Value
*CV
= ConstantInt::get(Operand
->getType(), C
);
334 Value
*V
= Builder
->CreateLShr(Operand
->getArgOperand(0), CV
);
335 return new TruncInst(V
, TI
->getType());
340 case Intrinsic::powi
:
341 if (ConstantInt
*Power
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
344 return ReplaceInstUsesWith(CI
, ConstantFP::get(CI
.getType(), 1.0));
347 return ReplaceInstUsesWith(CI
, II
->getArgOperand(0));
348 // powi(x, -1) -> 1/x
349 if (Power
->isAllOnesValue())
350 return BinaryOperator::CreateFDiv(ConstantFP::get(CI
.getType(), 1.0),
351 II
->getArgOperand(0));
354 case Intrinsic::cttz
: {
355 // If all bits below the first known one are known zero,
356 // this value is constant.
357 const IntegerType
*IT
= cast
<IntegerType
>(II
->getArgOperand(0)->getType());
358 uint32_t BitWidth
= IT
->getBitWidth();
359 APInt
KnownZero(BitWidth
, 0);
360 APInt
KnownOne(BitWidth
, 0);
361 ComputeMaskedBits(II
->getArgOperand(0), APInt::getAllOnesValue(BitWidth
),
362 KnownZero
, KnownOne
);
363 unsigned TrailingZeros
= KnownOne
.countTrailingZeros();
364 APInt
Mask(APInt::getLowBitsSet(BitWidth
, TrailingZeros
));
365 if ((Mask
& KnownZero
) == Mask
)
366 return ReplaceInstUsesWith(CI
, ConstantInt::get(IT
,
367 APInt(BitWidth
, TrailingZeros
)));
371 case Intrinsic::ctlz
: {
372 // If all bits above the first known one are known zero,
373 // this value is constant.
374 const IntegerType
*IT
= cast
<IntegerType
>(II
->getArgOperand(0)->getType());
375 uint32_t BitWidth
= IT
->getBitWidth();
376 APInt
KnownZero(BitWidth
, 0);
377 APInt
KnownOne(BitWidth
, 0);
378 ComputeMaskedBits(II
->getArgOperand(0), APInt::getAllOnesValue(BitWidth
),
379 KnownZero
, KnownOne
);
380 unsigned LeadingZeros
= KnownOne
.countLeadingZeros();
381 APInt
Mask(APInt::getHighBitsSet(BitWidth
, LeadingZeros
));
382 if ((Mask
& KnownZero
) == Mask
)
383 return ReplaceInstUsesWith(CI
, ConstantInt::get(IT
,
384 APInt(BitWidth
, LeadingZeros
)));
388 case Intrinsic::uadd_with_overflow
: {
389 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
390 const IntegerType
*IT
= cast
<IntegerType
>(II
->getArgOperand(0)->getType());
391 uint32_t BitWidth
= IT
->getBitWidth();
392 APInt Mask
= APInt::getSignBit(BitWidth
);
393 APInt
LHSKnownZero(BitWidth
, 0);
394 APInt
LHSKnownOne(BitWidth
, 0);
395 ComputeMaskedBits(LHS
, Mask
, LHSKnownZero
, LHSKnownOne
);
396 bool LHSKnownNegative
= LHSKnownOne
[BitWidth
- 1];
397 bool LHSKnownPositive
= LHSKnownZero
[BitWidth
- 1];
399 if (LHSKnownNegative
|| LHSKnownPositive
) {
400 APInt
RHSKnownZero(BitWidth
, 0);
401 APInt
RHSKnownOne(BitWidth
, 0);
402 ComputeMaskedBits(RHS
, Mask
, RHSKnownZero
, RHSKnownOne
);
403 bool RHSKnownNegative
= RHSKnownOne
[BitWidth
- 1];
404 bool RHSKnownPositive
= RHSKnownZero
[BitWidth
- 1];
405 if (LHSKnownNegative
&& RHSKnownNegative
) {
406 // The sign bit is set in both cases: this MUST overflow.
407 // Create a simple add instruction, and insert it into the struct.
408 Instruction
*Add
= BinaryOperator::CreateAdd(LHS
, RHS
, "", &CI
);
411 UndefValue::get(LHS
->getType()),ConstantInt::getTrue(II
->getContext())
413 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
414 return InsertValueInst::Create(Struct
, Add
, 0);
417 if (LHSKnownPositive
&& RHSKnownPositive
) {
418 // The sign bit is clear in both cases: this CANNOT overflow.
419 // Create a simple add instruction, and insert it into the struct.
420 Instruction
*Add
= BinaryOperator::CreateNUWAdd(LHS
, RHS
, "", &CI
);
423 UndefValue::get(LHS
->getType()),
424 ConstantInt::getFalse(II
->getContext())
426 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
427 return InsertValueInst::Create(Struct
, Add
, 0);
431 // FALL THROUGH uadd into sadd
432 case Intrinsic::sadd_with_overflow
:
433 // Canonicalize constants into the RHS.
434 if (isa
<Constant
>(II
->getArgOperand(0)) &&
435 !isa
<Constant
>(II
->getArgOperand(1))) {
436 Value
*LHS
= II
->getArgOperand(0);
437 II
->setArgOperand(0, II
->getArgOperand(1));
438 II
->setArgOperand(1, LHS
);
442 // X + undef -> undef
443 if (isa
<UndefValue
>(II
->getArgOperand(1)))
444 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
446 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
447 // X + 0 -> {X, false}
450 UndefValue::get(II
->getArgOperand(0)->getType()),
451 ConstantInt::getFalse(II
->getContext())
453 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
454 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
458 case Intrinsic::usub_with_overflow
:
459 case Intrinsic::ssub_with_overflow
:
460 // undef - X -> undef
461 // X - undef -> undef
462 if (isa
<UndefValue
>(II
->getArgOperand(0)) ||
463 isa
<UndefValue
>(II
->getArgOperand(1)))
464 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
466 if (ConstantInt
*RHS
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
467 // X - 0 -> {X, false}
470 UndefValue::get(II
->getArgOperand(0)->getType()),
471 ConstantInt::getFalse(II
->getContext())
473 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
474 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
478 case Intrinsic::umul_with_overflow
: {
479 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
480 unsigned BitWidth
= cast
<IntegerType
>(LHS
->getType())->getBitWidth();
481 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
483 APInt
LHSKnownZero(BitWidth
, 0);
484 APInt
LHSKnownOne(BitWidth
, 0);
485 ComputeMaskedBits(LHS
, Mask
, LHSKnownZero
, LHSKnownOne
);
486 APInt
RHSKnownZero(BitWidth
, 0);
487 APInt
RHSKnownOne(BitWidth
, 0);
488 ComputeMaskedBits(RHS
, Mask
, RHSKnownZero
, RHSKnownOne
);
490 // Get the largest possible values for each operand.
491 APInt LHSMax
= ~LHSKnownZero
;
492 APInt RHSMax
= ~RHSKnownZero
;
494 // If multiplying the maximum values does not overflow then we can turn
495 // this into a plain NUW mul.
497 LHSMax
.umul_ov(RHSMax
, Overflow
);
499 Value
*Mul
= Builder
->CreateNUWMul(LHS
, RHS
, "umul_with_overflow");
501 UndefValue::get(LHS
->getType()),
504 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
505 return InsertValueInst::Create(Struct
, Mul
, 0);
508 case Intrinsic::smul_with_overflow
:
509 // Canonicalize constants into the RHS.
510 if (isa
<Constant
>(II
->getArgOperand(0)) &&
511 !isa
<Constant
>(II
->getArgOperand(1))) {
512 Value
*LHS
= II
->getArgOperand(0);
513 II
->setArgOperand(0, II
->getArgOperand(1));
514 II
->setArgOperand(1, LHS
);
518 // X * undef -> undef
519 if (isa
<UndefValue
>(II
->getArgOperand(1)))
520 return ReplaceInstUsesWith(CI
, UndefValue::get(II
->getType()));
522 if (ConstantInt
*RHSI
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
525 return ReplaceInstUsesWith(CI
, Constant::getNullValue(II
->getType()));
527 // X * 1 -> {X, false}
528 if (RHSI
->equalsInt(1)) {
530 UndefValue::get(II
->getArgOperand(0)->getType()),
531 ConstantInt::getFalse(II
->getContext())
533 Constant
*Struct
= ConstantStruct::get(II
->getContext(), V
, 2, false);
534 return InsertValueInst::Create(Struct
, II
->getArgOperand(0), 0);
538 case Intrinsic::ppc_altivec_lvx
:
539 case Intrinsic::ppc_altivec_lvxl
:
540 // Turn PPC lvx -> load if the pointer is known aligned.
541 if (getOrEnforceKnownAlignment(II
->getArgOperand(0), 16, TD
) >= 16) {
542 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(0),
543 PointerType::getUnqual(II
->getType()));
544 return new LoadInst(Ptr
);
547 case Intrinsic::ppc_altivec_stvx
:
548 case Intrinsic::ppc_altivec_stvxl
:
549 // Turn stvx -> store if the pointer is known aligned.
550 if (getOrEnforceKnownAlignment(II
->getArgOperand(1), 16, TD
) >= 16) {
551 const Type
*OpPtrTy
=
552 PointerType::getUnqual(II
->getArgOperand(0)->getType());
553 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(1), OpPtrTy
);
554 return new StoreInst(II
->getArgOperand(0), Ptr
);
557 case Intrinsic::x86_sse_storeu_ps
:
558 case Intrinsic::x86_sse2_storeu_pd
:
559 case Intrinsic::x86_sse2_storeu_dq
:
560 // Turn X86 storeu -> store if the pointer is known aligned.
561 if (getOrEnforceKnownAlignment(II
->getArgOperand(0), 16, TD
) >= 16) {
562 const Type
*OpPtrTy
=
563 PointerType::getUnqual(II
->getArgOperand(1)->getType());
564 Value
*Ptr
= Builder
->CreateBitCast(II
->getArgOperand(0), OpPtrTy
);
565 return new StoreInst(II
->getArgOperand(1), Ptr
);
569 case Intrinsic::x86_sse_cvtss2si
:
570 case Intrinsic::x86_sse_cvtss2si64
:
571 case Intrinsic::x86_sse_cvttss2si
:
572 case Intrinsic::x86_sse_cvttss2si64
:
573 case Intrinsic::x86_sse2_cvtsd2si
:
574 case Intrinsic::x86_sse2_cvtsd2si64
:
575 case Intrinsic::x86_sse2_cvttsd2si
:
576 case Intrinsic::x86_sse2_cvttsd2si64
: {
577 // These intrinsics only demand the 0th element of their input vectors. If
578 // we can simplify the input based on that, do so now.
580 cast
<VectorType
>(II
->getArgOperand(0)->getType())->getNumElements();
581 APInt
DemandedElts(VWidth
, 1);
582 APInt
UndefElts(VWidth
, 0);
583 if (Value
*V
= SimplifyDemandedVectorElts(II
->getArgOperand(0),
584 DemandedElts
, UndefElts
)) {
585 II
->setArgOperand(0, V
);
591 case Intrinsic::ppc_altivec_vperm
:
592 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
593 if (ConstantVector
*Mask
= dyn_cast
<ConstantVector
>(II
->getArgOperand(2))) {
594 assert(Mask
->getNumOperands() == 16 && "Bad type for intrinsic!");
596 // Check that all of the elements are integer constants or undefs.
597 bool AllEltsOk
= true;
598 for (unsigned i
= 0; i
!= 16; ++i
) {
599 if (!isa
<ConstantInt
>(Mask
->getOperand(i
)) &&
600 !isa
<UndefValue
>(Mask
->getOperand(i
))) {
607 // Cast the input vectors to byte vectors.
608 Value
*Op0
= Builder
->CreateBitCast(II
->getArgOperand(0),
610 Value
*Op1
= Builder
->CreateBitCast(II
->getArgOperand(1),
612 Value
*Result
= UndefValue::get(Op0
->getType());
614 // Only extract each element once.
615 Value
*ExtractedElts
[32];
616 memset(ExtractedElts
, 0, sizeof(ExtractedElts
));
618 for (unsigned i
= 0; i
!= 16; ++i
) {
619 if (isa
<UndefValue
>(Mask
->getOperand(i
)))
621 unsigned Idx
=cast
<ConstantInt
>(Mask
->getOperand(i
))->getZExtValue();
622 Idx
&= 31; // Match the hardware behavior.
624 if (ExtractedElts
[Idx
] == 0) {
626 Builder
->CreateExtractElement(Idx
< 16 ? Op0
: Op1
,
627 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
628 Idx
&15, false), "tmp");
631 // Insert this value into the result vector.
632 Result
= Builder
->CreateInsertElement(Result
, ExtractedElts
[Idx
],
633 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
636 return CastInst::Create(Instruction::BitCast
, Result
, CI
.getType());
641 case Intrinsic::arm_neon_vld1
:
642 case Intrinsic::arm_neon_vld2
:
643 case Intrinsic::arm_neon_vld3
:
644 case Intrinsic::arm_neon_vld4
:
645 case Intrinsic::arm_neon_vld2lane
:
646 case Intrinsic::arm_neon_vld3lane
:
647 case Intrinsic::arm_neon_vld4lane
:
648 case Intrinsic::arm_neon_vst1
:
649 case Intrinsic::arm_neon_vst2
:
650 case Intrinsic::arm_neon_vst3
:
651 case Intrinsic::arm_neon_vst4
:
652 case Intrinsic::arm_neon_vst2lane
:
653 case Intrinsic::arm_neon_vst3lane
:
654 case Intrinsic::arm_neon_vst4lane
: {
655 unsigned MemAlign
= getKnownAlignment(II
->getArgOperand(0), TD
);
656 unsigned AlignArg
= II
->getNumArgOperands() - 1;
657 ConstantInt
*IntrAlign
= dyn_cast
<ConstantInt
>(II
->getArgOperand(AlignArg
));
658 if (IntrAlign
&& IntrAlign
->getZExtValue() < MemAlign
) {
659 II
->setArgOperand(AlignArg
,
660 ConstantInt::get(Type::getInt32Ty(II
->getContext()),
667 case Intrinsic::stackrestore
: {
668 // If the save is right next to the restore, remove the restore. This can
669 // happen when variable allocas are DCE'd.
670 if (IntrinsicInst
*SS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
671 if (SS
->getIntrinsicID() == Intrinsic::stacksave
) {
672 BasicBlock::iterator BI
= SS
;
674 return EraseInstFromFunction(CI
);
678 // Scan down this block to see if there is another stack restore in the
679 // same block without an intervening call/alloca.
680 BasicBlock::iterator BI
= II
;
681 TerminatorInst
*TI
= II
->getParent()->getTerminator();
682 bool CannotRemove
= false;
683 for (++BI
; &*BI
!= TI
; ++BI
) {
684 if (isa
<AllocaInst
>(BI
) || isMalloc(BI
)) {
688 if (CallInst
*BCI
= dyn_cast
<CallInst
>(BI
)) {
689 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(BCI
)) {
690 // If there is a stackrestore below this one, remove this one.
691 if (II
->getIntrinsicID() == Intrinsic::stackrestore
)
692 return EraseInstFromFunction(CI
);
693 // Otherwise, ignore the intrinsic.
695 // If we found a non-intrinsic call, we can't remove the stack
703 // If the stack restore is in a return/unwind block and if there are no
704 // allocas or calls between the restore and the return, nuke the restore.
705 if (!CannotRemove
&& (isa
<ReturnInst
>(TI
) || isa
<UnwindInst
>(TI
)))
706 return EraseInstFromFunction(CI
);
711 return visitCallSite(II
);
714 // InvokeInst simplification
716 Instruction
*InstCombiner::visitInvokeInst(InvokeInst
&II
) {
717 return visitCallSite(&II
);
720 /// isSafeToEliminateVarargsCast - If this cast does not affect the value
721 /// passed through the varargs area, we can eliminate the use of the cast.
722 static bool isSafeToEliminateVarargsCast(const CallSite CS
,
723 const CastInst
* const CI
,
724 const TargetData
* const TD
,
726 if (!CI
->isLosslessCast())
729 // The size of ByVal arguments is derived from the type, so we
730 // can't change to a type with a different size. If the size were
731 // passed explicitly we could avoid this check.
732 if (!CS
.paramHasAttr(ix
, Attribute::ByVal
))
736 cast
<PointerType
>(CI
->getOperand(0)->getType())->getElementType();
737 const Type
* DstTy
= cast
<PointerType
>(CI
->getType())->getElementType();
738 if (!SrcTy
->isSized() || !DstTy
->isSized())
740 if (!TD
|| TD
->getTypeAllocSize(SrcTy
) != TD
->getTypeAllocSize(DstTy
))
746 class InstCombineFortifiedLibCalls
: public SimplifyFortifiedLibCalls
{
749 void replaceCall(Value
*With
) {
750 NewInstruction
= IC
->ReplaceInstUsesWith(*CI
, With
);
752 bool isFoldable(unsigned SizeCIOp
, unsigned SizeArgOp
, bool isString
) const {
753 if (CI
->getArgOperand(SizeCIOp
) == CI
->getArgOperand(SizeArgOp
))
755 if (ConstantInt
*SizeCI
=
756 dyn_cast
<ConstantInt
>(CI
->getArgOperand(SizeCIOp
))) {
757 if (SizeCI
->isAllOnesValue())
760 uint64_t Len
= GetStringLength(CI
->getArgOperand(SizeArgOp
));
761 // If the length is 0 we don't know how long it is and so we can't
763 if (Len
== 0) return false;
764 return SizeCI
->getZExtValue() >= Len
;
766 if (ConstantInt
*Arg
= dyn_cast
<ConstantInt
>(
767 CI
->getArgOperand(SizeArgOp
)))
768 return SizeCI
->getZExtValue() >= Arg
->getZExtValue();
773 InstCombineFortifiedLibCalls(InstCombiner
*IC
) : IC(IC
), NewInstruction(0) { }
774 Instruction
*NewInstruction
;
776 } // end anonymous namespace
778 // Try to fold some different type of calls here.
779 // Currently we're only working with the checking functions, memcpy_chk,
780 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
781 // strcat_chk and strncat_chk.
782 Instruction
*InstCombiner::tryOptimizeCall(CallInst
*CI
, const TargetData
*TD
) {
783 if (CI
->getCalledFunction() == 0) return 0;
785 InstCombineFortifiedLibCalls
Simplifier(this);
786 Simplifier
.fold(CI
, TD
);
787 return Simplifier
.NewInstruction
;
790 // visitCallSite - Improvements for call and invoke instructions.
792 Instruction
*InstCombiner::visitCallSite(CallSite CS
) {
793 bool Changed
= false;
795 // If the callee is a pointer to a function, attempt to move any casts to the
796 // arguments of the call/invoke.
797 Value
*Callee
= CS
.getCalledValue();
798 if (!isa
<Function
>(Callee
) && transformConstExprCastCall(CS
))
801 if (Function
*CalleeF
= dyn_cast
<Function
>(Callee
))
802 // If the call and callee calling conventions don't match, this call must
803 // be unreachable, as the call is undefined.
804 if (CalleeF
->getCallingConv() != CS
.getCallingConv() &&
805 // Only do this for calls to a function with a body. A prototype may
806 // not actually end up matching the implementation's calling conv for a
807 // variety of reasons (e.g. it may be written in assembly).
808 !CalleeF
->isDeclaration()) {
809 Instruction
*OldCall
= CS
.getInstruction();
810 new StoreInst(ConstantInt::getTrue(Callee
->getContext()),
811 UndefValue::get(Type::getInt1PtrTy(Callee
->getContext())),
813 // If OldCall dues not return void then replaceAllUsesWith undef.
814 // This allows ValueHandlers and custom metadata to adjust itself.
815 if (!OldCall
->getType()->isVoidTy())
816 OldCall
->replaceAllUsesWith(UndefValue::get(OldCall
->getType()));
817 if (isa
<CallInst
>(OldCall
))
818 return EraseInstFromFunction(*OldCall
);
820 // We cannot remove an invoke, because it would change the CFG, just
821 // change the callee to a null pointer.
822 cast
<InvokeInst
>(OldCall
)->setCalledFunction(
823 Constant::getNullValue(CalleeF
->getType()));
827 if (isa
<ConstantPointerNull
>(Callee
) || isa
<UndefValue
>(Callee
)) {
828 // This instruction is not reachable, just remove it. We insert a store to
829 // undef so that we know that this code is not reachable, despite the fact
830 // that we can't modify the CFG here.
831 new StoreInst(ConstantInt::getTrue(Callee
->getContext()),
832 UndefValue::get(Type::getInt1PtrTy(Callee
->getContext())),
833 CS
.getInstruction());
835 // If CS does not return void then replaceAllUsesWith undef.
836 // This allows ValueHandlers and custom metadata to adjust itself.
837 if (!CS
.getInstruction()->getType()->isVoidTy())
838 CS
.getInstruction()->
839 replaceAllUsesWith(UndefValue::get(CS
.getInstruction()->getType()));
841 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(CS
.getInstruction())) {
842 // Don't break the CFG, insert a dummy cond branch.
843 BranchInst::Create(II
->getNormalDest(), II
->getUnwindDest(),
844 ConstantInt::getTrue(Callee
->getContext()), II
);
846 return EraseInstFromFunction(*CS
.getInstruction());
849 if (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(Callee
))
850 if (IntrinsicInst
*In
= dyn_cast
<IntrinsicInst
>(BC
->getOperand(0)))
851 if (In
->getIntrinsicID() == Intrinsic::init_trampoline
)
852 return transformCallThroughTrampoline(CS
);
854 const PointerType
*PTy
= cast
<PointerType
>(Callee
->getType());
855 const FunctionType
*FTy
= cast
<FunctionType
>(PTy
->getElementType());
856 if (FTy
->isVarArg()) {
857 int ix
= FTy
->getNumParams() + (isa
<InvokeInst
>(Callee
) ? 3 : 1);
858 // See if we can optimize any arguments passed through the varargs area of
860 for (CallSite::arg_iterator I
= CS
.arg_begin()+FTy
->getNumParams(),
861 E
= CS
.arg_end(); I
!= E
; ++I
, ++ix
) {
862 CastInst
*CI
= dyn_cast
<CastInst
>(*I
);
863 if (CI
&& isSafeToEliminateVarargsCast(CS
, CI
, TD
, ix
)) {
864 *I
= CI
->getOperand(0);
870 if (isa
<InlineAsm
>(Callee
) && !CS
.doesNotThrow()) {
871 // Inline asm calls cannot throw - mark them 'nounwind'.
872 CS
.setDoesNotThrow();
876 // Try to optimize the call if possible, we require TargetData for most of
877 // this. None of these calls are seen as possibly dead so go ahead and
878 // delete the instruction now.
879 if (CallInst
*CI
= dyn_cast
<CallInst
>(CS
.getInstruction())) {
880 Instruction
*I
= tryOptimizeCall(CI
, TD
);
881 // If we changed something return the result, etc. Otherwise let
882 // the fallthrough check.
883 if (I
) return EraseInstFromFunction(*I
);
886 return Changed
? CS
.getInstruction() : 0;
889 // transformConstExprCastCall - If the callee is a constexpr cast of a function,
890 // attempt to move the cast to the arguments of the call/invoke.
892 bool InstCombiner::transformConstExprCastCall(CallSite CS
) {
894 dyn_cast
<Function
>(CS
.getCalledValue()->stripPointerCasts());
897 Instruction
*Caller
= CS
.getInstruction();
898 const AttrListPtr
&CallerPAL
= CS
.getAttributes();
900 // Okay, this is a cast from a function to a different type. Unless doing so
901 // would cause a type conversion of one of our arguments, change this call to
902 // be a direct call with arguments casted to the appropriate types.
904 const FunctionType
*FT
= Callee
->getFunctionType();
905 const Type
*OldRetTy
= Caller
->getType();
906 const Type
*NewRetTy
= FT
->getReturnType();
908 if (NewRetTy
->isStructTy())
909 return false; // TODO: Handle multiple return values.
911 // Check to see if we are changing the return type...
912 if (OldRetTy
!= NewRetTy
) {
913 if (Callee
->isDeclaration() &&
914 // Conversion is ok if changing from one pointer type to another or from
915 // a pointer to an integer of the same size.
916 !((OldRetTy
->isPointerTy() || !TD
||
917 OldRetTy
== TD
->getIntPtrType(Caller
->getContext())) &&
918 (NewRetTy
->isPointerTy() || !TD
||
919 NewRetTy
== TD
->getIntPtrType(Caller
->getContext()))))
920 return false; // Cannot transform this return value.
922 if (!Caller
->use_empty() &&
923 // void -> non-void is handled specially
924 !NewRetTy
->isVoidTy() && !CastInst::isCastable(NewRetTy
, OldRetTy
))
925 return false; // Cannot transform this return value.
927 if (!CallerPAL
.isEmpty() && !Caller
->use_empty()) {
928 Attributes RAttrs
= CallerPAL
.getRetAttributes();
929 if (RAttrs
& Attribute::typeIncompatible(NewRetTy
))
930 return false; // Attribute not compatible with transformed value.
933 // If the callsite is an invoke instruction, and the return value is used by
934 // a PHI node in a successor, we cannot change the return type of the call
935 // because there is no place to put the cast instruction (without breaking
936 // the critical edge). Bail out in this case.
937 if (!Caller
->use_empty())
938 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
))
939 for (Value::use_iterator UI
= II
->use_begin(), E
= II
->use_end();
941 if (PHINode
*PN
= dyn_cast
<PHINode
>(*UI
))
942 if (PN
->getParent() == II
->getNormalDest() ||
943 PN
->getParent() == II
->getUnwindDest())
947 unsigned NumActualArgs
= unsigned(CS
.arg_end()-CS
.arg_begin());
948 unsigned NumCommonArgs
= std::min(FT
->getNumParams(), NumActualArgs
);
950 CallSite::arg_iterator AI
= CS
.arg_begin();
951 for (unsigned i
= 0, e
= NumCommonArgs
; i
!= e
; ++i
, ++AI
) {
952 const Type
*ParamTy
= FT
->getParamType(i
);
953 const Type
*ActTy
= (*AI
)->getType();
955 if (!CastInst::isCastable(ActTy
, ParamTy
))
956 return false; // Cannot transform this parameter value.
958 unsigned Attrs
= CallerPAL
.getParamAttributes(i
+ 1);
959 if (Attrs
& Attribute::typeIncompatible(ParamTy
))
960 return false; // Attribute not compatible with transformed value.
962 // If the parameter is passed as a byval argument, then we have to have a
963 // sized type and the sized type has to have the same size as the old type.
964 if (ParamTy
!= ActTy
&& (Attrs
& Attribute::ByVal
)) {
965 const PointerType
*ParamPTy
= dyn_cast
<PointerType
>(ParamTy
);
966 if (ParamPTy
== 0 || !ParamPTy
->getElementType()->isSized() || TD
== 0)
969 const Type
*CurElTy
= cast
<PointerType
>(ActTy
)->getElementType();
970 if (TD
->getTypeAllocSize(CurElTy
) !=
971 TD
->getTypeAllocSize(ParamPTy
->getElementType()))
975 // Converting from one pointer type to another or between a pointer and an
976 // integer of the same size is safe even if we do not have a body.
977 bool isConvertible
= ActTy
== ParamTy
||
978 (TD
&& ((ParamTy
->isPointerTy() ||
979 ParamTy
== TD
->getIntPtrType(Caller
->getContext())) &&
980 (ActTy
->isPointerTy() ||
981 ActTy
== TD
->getIntPtrType(Caller
->getContext()))));
982 if (Callee
->isDeclaration() && !isConvertible
) return false;
985 if (Callee
->isDeclaration()) {
986 // Do not delete arguments unless we have a function body.
987 if (FT
->getNumParams() < NumActualArgs
&& !FT
->isVarArg())
990 // If the callee is just a declaration, don't change the varargsness of the
991 // call. We don't want to introduce a varargs call where one doesn't
993 const PointerType
*APTy
= cast
<PointerType
>(CS
.getCalledValue()->getType());
994 if (FT
->isVarArg()!=cast
<FunctionType
>(APTy
->getElementType())->isVarArg())
998 if (FT
->getNumParams() < NumActualArgs
&& FT
->isVarArg() &&
999 !CallerPAL
.isEmpty())
1000 // In this case we have more arguments than the new function type, but we
1001 // won't be dropping them. Check that these extra arguments have attributes
1002 // that are compatible with being a vararg call argument.
1003 for (unsigned i
= CallerPAL
.getNumSlots(); i
; --i
) {
1004 if (CallerPAL
.getSlot(i
- 1).Index
<= FT
->getNumParams())
1006 Attributes PAttrs
= CallerPAL
.getSlot(i
- 1).Attrs
;
1007 if (PAttrs
& Attribute::VarArgsIncompatible
)
1012 // Okay, we decided that this is a safe thing to do: go ahead and start
1013 // inserting cast instructions as necessary.
1014 std::vector
<Value
*> Args
;
1015 Args
.reserve(NumActualArgs
);
1016 SmallVector
<AttributeWithIndex
, 8> attrVec
;
1017 attrVec
.reserve(NumCommonArgs
);
1019 // Get any return attributes.
1020 Attributes RAttrs
= CallerPAL
.getRetAttributes();
1022 // If the return value is not being used, the type may not be compatible
1023 // with the existing attributes. Wipe out any problematic attributes.
1024 RAttrs
&= ~Attribute::typeIncompatible(NewRetTy
);
1026 // Add the new return attributes.
1028 attrVec
.push_back(AttributeWithIndex::get(0, RAttrs
));
1030 AI
= CS
.arg_begin();
1031 for (unsigned i
= 0; i
!= NumCommonArgs
; ++i
, ++AI
) {
1032 const Type
*ParamTy
= FT
->getParamType(i
);
1033 if ((*AI
)->getType() == ParamTy
) {
1034 Args
.push_back(*AI
);
1036 Instruction::CastOps opcode
= CastInst::getCastOpcode(*AI
,
1037 false, ParamTy
, false);
1038 Args
.push_back(Builder
->CreateCast(opcode
, *AI
, ParamTy
, "tmp"));
1041 // Add any parameter attributes.
1042 if (Attributes PAttrs
= CallerPAL
.getParamAttributes(i
+ 1))
1043 attrVec
.push_back(AttributeWithIndex::get(i
+ 1, PAttrs
));
1046 // If the function takes more arguments than the call was taking, add them
1048 for (unsigned i
= NumCommonArgs
; i
!= FT
->getNumParams(); ++i
)
1049 Args
.push_back(Constant::getNullValue(FT
->getParamType(i
)));
1051 // If we are removing arguments to the function, emit an obnoxious warning.
1052 if (FT
->getNumParams() < NumActualArgs
) {
1053 if (!FT
->isVarArg()) {
1054 errs() << "WARNING: While resolving call to function '"
1055 << Callee
->getName() << "' arguments were dropped!\n";
1057 // Add all of the arguments in their promoted form to the arg list.
1058 for (unsigned i
= FT
->getNumParams(); i
!= NumActualArgs
; ++i
, ++AI
) {
1059 const Type
*PTy
= getPromotedType((*AI
)->getType());
1060 if (PTy
!= (*AI
)->getType()) {
1061 // Must promote to pass through va_arg area!
1062 Instruction::CastOps opcode
=
1063 CastInst::getCastOpcode(*AI
, false, PTy
, false);
1064 Args
.push_back(Builder
->CreateCast(opcode
, *AI
, PTy
, "tmp"));
1066 Args
.push_back(*AI
);
1069 // Add any parameter attributes.
1070 if (Attributes PAttrs
= CallerPAL
.getParamAttributes(i
+ 1))
1071 attrVec
.push_back(AttributeWithIndex::get(i
+ 1, PAttrs
));
1076 if (Attributes FnAttrs
= CallerPAL
.getFnAttributes())
1077 attrVec
.push_back(AttributeWithIndex::get(~0, FnAttrs
));
1079 if (NewRetTy
->isVoidTy())
1080 Caller
->setName(""); // Void type should not have a name.
1082 const AttrListPtr
&NewCallerPAL
= AttrListPtr::get(attrVec
.begin(),
1086 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1087 NC
= InvokeInst::Create(Callee
, II
->getNormalDest(), II
->getUnwindDest(),
1088 Args
.begin(), Args
.end(),
1089 Caller
->getName(), Caller
);
1090 cast
<InvokeInst
>(NC
)->setCallingConv(II
->getCallingConv());
1091 cast
<InvokeInst
>(NC
)->setAttributes(NewCallerPAL
);
1093 NC
= CallInst::Create(Callee
, Args
.begin(), Args
.end(),
1094 Caller
->getName(), Caller
);
1095 CallInst
*CI
= cast
<CallInst
>(Caller
);
1096 if (CI
->isTailCall())
1097 cast
<CallInst
>(NC
)->setTailCall();
1098 cast
<CallInst
>(NC
)->setCallingConv(CI
->getCallingConv());
1099 cast
<CallInst
>(NC
)->setAttributes(NewCallerPAL
);
1102 // Insert a cast of the return type as necessary.
1104 if (OldRetTy
!= NV
->getType() && !Caller
->use_empty()) {
1105 if (!NV
->getType()->isVoidTy()) {
1106 Instruction::CastOps opcode
=
1107 CastInst::getCastOpcode(NC
, false, OldRetTy
, false);
1108 NV
= NC
= CastInst::Create(opcode
, NC
, OldRetTy
, "tmp");
1110 // If this is an invoke instruction, we should insert it after the first
1111 // non-phi, instruction in the normal successor block.
1112 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1113 BasicBlock::iterator I
= II
->getNormalDest()->getFirstNonPHI();
1114 InsertNewInstBefore(NC
, *I
);
1116 // Otherwise, it's a call, just insert cast right after the call.
1117 InsertNewInstBefore(NC
, *Caller
);
1119 Worklist
.AddUsersToWorkList(*Caller
);
1121 NV
= UndefValue::get(Caller
->getType());
1125 if (!Caller
->use_empty())
1126 Caller
->replaceAllUsesWith(NV
);
1128 EraseInstFromFunction(*Caller
);
1132 // transformCallThroughTrampoline - Turn a call to a function created by the
1133 // init_trampoline intrinsic into a direct call to the underlying function.
1135 Instruction
*InstCombiner::transformCallThroughTrampoline(CallSite CS
) {
1136 Value
*Callee
= CS
.getCalledValue();
1137 const PointerType
*PTy
= cast
<PointerType
>(Callee
->getType());
1138 const FunctionType
*FTy
= cast
<FunctionType
>(PTy
->getElementType());
1139 const AttrListPtr
&Attrs
= CS
.getAttributes();
1141 // If the call already has the 'nest' attribute somewhere then give up -
1142 // otherwise 'nest' would occur twice after splicing in the chain.
1143 if (Attrs
.hasAttrSomewhere(Attribute::Nest
))
1146 IntrinsicInst
*Tramp
=
1147 cast
<IntrinsicInst
>(cast
<BitCastInst
>(Callee
)->getOperand(0));
1149 Function
*NestF
=cast
<Function
>(Tramp
->getArgOperand(1)->stripPointerCasts());
1150 const PointerType
*NestFPTy
= cast
<PointerType
>(NestF
->getType());
1151 const FunctionType
*NestFTy
= cast
<FunctionType
>(NestFPTy
->getElementType());
1153 const AttrListPtr
&NestAttrs
= NestF
->getAttributes();
1154 if (!NestAttrs
.isEmpty()) {
1155 unsigned NestIdx
= 1;
1156 const Type
*NestTy
= 0;
1157 Attributes NestAttr
= Attribute::None
;
1159 // Look for a parameter marked with the 'nest' attribute.
1160 for (FunctionType::param_iterator I
= NestFTy
->param_begin(),
1161 E
= NestFTy
->param_end(); I
!= E
; ++NestIdx
, ++I
)
1162 if (NestAttrs
.paramHasAttr(NestIdx
, Attribute::Nest
)) {
1163 // Record the parameter type and any other attributes.
1165 NestAttr
= NestAttrs
.getParamAttributes(NestIdx
);
1170 Instruction
*Caller
= CS
.getInstruction();
1171 std::vector
<Value
*> NewArgs
;
1172 NewArgs
.reserve(unsigned(CS
.arg_end()-CS
.arg_begin())+1);
1174 SmallVector
<AttributeWithIndex
, 8> NewAttrs
;
1175 NewAttrs
.reserve(Attrs
.getNumSlots() + 1);
1177 // Insert the nest argument into the call argument list, which may
1178 // mean appending it. Likewise for attributes.
1180 // Add any result attributes.
1181 if (Attributes Attr
= Attrs
.getRetAttributes())
1182 NewAttrs
.push_back(AttributeWithIndex::get(0, Attr
));
1186 CallSite::arg_iterator I
= CS
.arg_begin(), E
= CS
.arg_end();
1188 if (Idx
== NestIdx
) {
1189 // Add the chain argument and attributes.
1190 Value
*NestVal
= Tramp
->getArgOperand(2);
1191 if (NestVal
->getType() != NestTy
)
1192 NestVal
= new BitCastInst(NestVal
, NestTy
, "nest", Caller
);
1193 NewArgs
.push_back(NestVal
);
1194 NewAttrs
.push_back(AttributeWithIndex::get(NestIdx
, NestAttr
));
1200 // Add the original argument and attributes.
1201 NewArgs
.push_back(*I
);
1202 if (Attributes Attr
= Attrs
.getParamAttributes(Idx
))
1204 (AttributeWithIndex::get(Idx
+ (Idx
>= NestIdx
), Attr
));
1210 // Add any function attributes.
1211 if (Attributes Attr
= Attrs
.getFnAttributes())
1212 NewAttrs
.push_back(AttributeWithIndex::get(~0, Attr
));
1214 // The trampoline may have been bitcast to a bogus type (FTy).
1215 // Handle this by synthesizing a new function type, equal to FTy
1216 // with the chain parameter inserted.
1218 std::vector
<const Type
*> NewTypes
;
1219 NewTypes
.reserve(FTy
->getNumParams()+1);
1221 // Insert the chain's type into the list of parameter types, which may
1222 // mean appending it.
1225 FunctionType::param_iterator I
= FTy
->param_begin(),
1226 E
= FTy
->param_end();
1230 // Add the chain's type.
1231 NewTypes
.push_back(NestTy
);
1236 // Add the original type.
1237 NewTypes
.push_back(*I
);
1243 // Replace the trampoline call with a direct call. Let the generic
1244 // code sort out any function type mismatches.
1245 FunctionType
*NewFTy
= FunctionType::get(FTy
->getReturnType(), NewTypes
,
1247 Constant
*NewCallee
=
1248 NestF
->getType() == PointerType::getUnqual(NewFTy
) ?
1249 NestF
: ConstantExpr::getBitCast(NestF
,
1250 PointerType::getUnqual(NewFTy
));
1251 const AttrListPtr
&NewPAL
= AttrListPtr::get(NewAttrs
.begin(),
1254 Instruction
*NewCaller
;
1255 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
1256 NewCaller
= InvokeInst::Create(NewCallee
,
1257 II
->getNormalDest(), II
->getUnwindDest(),
1258 NewArgs
.begin(), NewArgs
.end(),
1259 Caller
->getName(), Caller
);
1260 cast
<InvokeInst
>(NewCaller
)->setCallingConv(II
->getCallingConv());
1261 cast
<InvokeInst
>(NewCaller
)->setAttributes(NewPAL
);
1263 NewCaller
= CallInst::Create(NewCallee
, NewArgs
.begin(), NewArgs
.end(),
1264 Caller
->getName(), Caller
);
1265 if (cast
<CallInst
>(Caller
)->isTailCall())
1266 cast
<CallInst
>(NewCaller
)->setTailCall();
1267 cast
<CallInst
>(NewCaller
)->
1268 setCallingConv(cast
<CallInst
>(Caller
)->getCallingConv());
1269 cast
<CallInst
>(NewCaller
)->setAttributes(NewPAL
);
1271 if (!Caller
->getType()->isVoidTy())
1272 Caller
->replaceAllUsesWith(NewCaller
);
1273 Caller
->eraseFromParent();
1274 Worklist
.Remove(Caller
);
1279 // Replace the trampoline call with a direct call. Since there is no 'nest'
1280 // parameter, there is no need to adjust the argument list. Let the generic
1281 // code sort out any function type mismatches.
1282 Constant
*NewCallee
=
1283 NestF
->getType() == PTy
? NestF
:
1284 ConstantExpr::getBitCast(NestF
, PTy
);
1285 CS
.setCalledFunction(NewCallee
);
1286 return CS
.getInstruction();