1 //===- InstCombineCalls.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitCall, visitInvoke, and visitCallBr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FloatingPointMode.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/Analysis/AssumeBundleQueries.h"
28 #include "llvm/Analysis/AssumptionCache.h"
29 #include "llvm/Analysis/InstructionSimplify.h"
30 #include "llvm/Analysis/Loads.h"
31 #include "llvm/Analysis/MemoryBuiltins.h"
32 #include "llvm/Analysis/TargetTransformInfo.h"
33 #include "llvm/Analysis/ValueTracking.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/IR/Attributes.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/Constant.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/GlobalVariable.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Intrinsics.h"
49 #include "llvm/IR/IntrinsicsAArch64.h"
50 #include "llvm/IR/IntrinsicsAMDGPU.h"
51 #include "llvm/IR/IntrinsicsARM.h"
52 #include "llvm/IR/IntrinsicsHexagon.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/IR/PatternMatch.h"
56 #include "llvm/IR/Statepoint.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/IR/ValueHandle.h"
61 #include "llvm/Support/AtomicOrdering.h"
62 #include "llvm/Support/Casting.h"
63 #include "llvm/Support/CommandLine.h"
64 #include "llvm/Support/Compiler.h"
65 #include "llvm/Support/Debug.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/KnownBits.h"
68 #include "llvm/Support/MathExtras.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
71 #include "llvm/Transforms/InstCombine/InstCombiner.h"
72 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73 #include "llvm/Transforms/Utils/Local.h"
74 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
83 using namespace PatternMatch
;
85 #define DEBUG_TYPE "instcombine"
87 STATISTIC(NumSimplified
, "Number of library calls simplified");
89 static cl::opt
<unsigned> GuardWideningWindow(
90 "instcombine-guard-widening-window",
92 cl::desc("How wide an instruction window to bypass looking for "
96 /// enable preservation of attributes in assume like:
97 /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
98 extern cl::opt
<bool> EnableKnowledgeRetention
;
101 /// Return the specified type promoted as it would be to pass though a va_arg
103 static Type
*getPromotedType(Type
*Ty
) {
104 if (IntegerType
* ITy
= dyn_cast
<IntegerType
>(Ty
)) {
105 if (ITy
->getBitWidth() < 32)
106 return Type::getInt32Ty(Ty
->getContext());
111 Instruction
*InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst
*MI
) {
112 Align DstAlign
= getKnownAlignment(MI
->getRawDest(), DL
, MI
, &AC
, &DT
);
113 MaybeAlign CopyDstAlign
= MI
->getDestAlign();
114 if (!CopyDstAlign
|| *CopyDstAlign
< DstAlign
) {
115 MI
->setDestAlignment(DstAlign
);
119 Align SrcAlign
= getKnownAlignment(MI
->getRawSource(), DL
, MI
, &AC
, &DT
);
120 MaybeAlign CopySrcAlign
= MI
->getSourceAlign();
121 if (!CopySrcAlign
|| *CopySrcAlign
< SrcAlign
) {
122 MI
->setSourceAlignment(SrcAlign
);
126 // If we have a store to a location which is known constant, we can conclude
127 // that the store must be storing the constant value (else the memory
128 // wouldn't be constant), and this must be a noop.
129 if (AA
->pointsToConstantMemory(MI
->getDest())) {
130 // Set the size of the copy to 0, it will be deleted on the next iteration.
131 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
135 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
137 ConstantInt
*MemOpLength
= dyn_cast
<ConstantInt
>(MI
->getLength());
138 if (!MemOpLength
) return nullptr;
140 // Source and destination pointer types are always "i8*" for intrinsic. See
141 // if the size is something we can handle with a single primitive load/store.
142 // A single load+store correctly handles overlapping memory in the memmove
144 uint64_t Size
= MemOpLength
->getLimitedValue();
145 assert(Size
&& "0-sized memory transferring should be removed already.");
147 if (Size
> 8 || (Size
&(Size
-1)))
148 return nullptr; // If not 1/2/4/8 bytes, exit.
150 // If it is an atomic and alignment is less than the size then we will
151 // introduce the unaligned memory access which will be later transformed
152 // into libcall in CodeGen. This is not evident performance gain so disable
154 if (isa
<AtomicMemTransferInst
>(MI
))
155 if (*CopyDstAlign
< Size
|| *CopySrcAlign
< Size
)
158 // Use an integer load+store unless we can find something better.
160 cast
<PointerType
>(MI
->getArgOperand(1)->getType())->getAddressSpace();
162 cast
<PointerType
>(MI
->getArgOperand(0)->getType())->getAddressSpace();
164 IntegerType
* IntType
= IntegerType::get(MI
->getContext(), Size
<<3);
165 Type
*NewSrcPtrTy
= PointerType::get(IntType
, SrcAddrSp
);
166 Type
*NewDstPtrTy
= PointerType::get(IntType
, DstAddrSp
);
168 // If the memcpy has metadata describing the members, see if we can get the
169 // TBAA tag describing our copy.
170 MDNode
*CopyMD
= nullptr;
171 if (MDNode
*M
= MI
->getMetadata(LLVMContext::MD_tbaa
)) {
173 } else if (MDNode
*M
= MI
->getMetadata(LLVMContext::MD_tbaa_struct
)) {
174 if (M
->getNumOperands() == 3 && M
->getOperand(0) &&
175 mdconst::hasa
<ConstantInt
>(M
->getOperand(0)) &&
176 mdconst::extract
<ConstantInt
>(M
->getOperand(0))->isZero() &&
178 mdconst::hasa
<ConstantInt
>(M
->getOperand(1)) &&
179 mdconst::extract
<ConstantInt
>(M
->getOperand(1))->getValue() ==
181 M
->getOperand(2) && isa
<MDNode
>(M
->getOperand(2)))
182 CopyMD
= cast
<MDNode
>(M
->getOperand(2));
185 Value
*Src
= Builder
.CreateBitCast(MI
->getArgOperand(1), NewSrcPtrTy
);
186 Value
*Dest
= Builder
.CreateBitCast(MI
->getArgOperand(0), NewDstPtrTy
);
187 LoadInst
*L
= Builder
.CreateLoad(IntType
, Src
);
188 // Alignment from the mem intrinsic will be better, so use it.
189 L
->setAlignment(*CopySrcAlign
);
191 L
->setMetadata(LLVMContext::MD_tbaa
, CopyMD
);
192 MDNode
*LoopMemParallelMD
=
193 MI
->getMetadata(LLVMContext::MD_mem_parallel_loop_access
);
194 if (LoopMemParallelMD
)
195 L
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
196 MDNode
*AccessGroupMD
= MI
->getMetadata(LLVMContext::MD_access_group
);
198 L
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
200 StoreInst
*S
= Builder
.CreateStore(L
, Dest
);
201 // Alignment from the mem intrinsic will be better, so use it.
202 S
->setAlignment(*CopyDstAlign
);
204 S
->setMetadata(LLVMContext::MD_tbaa
, CopyMD
);
205 if (LoopMemParallelMD
)
206 S
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
208 S
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
210 if (auto *MT
= dyn_cast
<MemTransferInst
>(MI
)) {
211 // non-atomics can be volatile
212 L
->setVolatile(MT
->isVolatile());
213 S
->setVolatile(MT
->isVolatile());
215 if (isa
<AtomicMemTransferInst
>(MI
)) {
216 // atomics have to be unordered
217 L
->setOrdering(AtomicOrdering::Unordered
);
218 S
->setOrdering(AtomicOrdering::Unordered
);
221 // Set the size of the copy to 0, it will be deleted on the next iteration.
222 MI
->setLength(Constant::getNullValue(MemOpLength
->getType()));
226 Instruction
*InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst
*MI
) {
227 const Align KnownAlignment
=
228 getKnownAlignment(MI
->getDest(), DL
, MI
, &AC
, &DT
);
229 MaybeAlign MemSetAlign
= MI
->getDestAlign();
230 if (!MemSetAlign
|| *MemSetAlign
< KnownAlignment
) {
231 MI
->setDestAlignment(KnownAlignment
);
235 // If we have a store to a location which is known constant, we can conclude
236 // that the store must be storing the constant value (else the memory
237 // wouldn't be constant), and this must be a noop.
238 if (AA
->pointsToConstantMemory(MI
->getDest())) {
239 // Set the size of the copy to 0, it will be deleted on the next iteration.
240 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
244 // Extract the length and alignment and fill if they are constant.
245 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(MI
->getLength());
246 ConstantInt
*FillC
= dyn_cast
<ConstantInt
>(MI
->getValue());
247 if (!LenC
|| !FillC
|| !FillC
->getType()->isIntegerTy(8))
249 const uint64_t Len
= LenC
->getLimitedValue();
250 assert(Len
&& "0-sized memory setting should be removed already.");
251 const Align Alignment
= assumeAligned(MI
->getDestAlignment());
253 // If it is an atomic and alignment is less than the size then we will
254 // introduce the unaligned memory access which will be later transformed
255 // into libcall in CodeGen. This is not evident performance gain so disable
257 if (isa
<AtomicMemSetInst
>(MI
))
261 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
262 if (Len
<= 8 && isPowerOf2_32((uint32_t)Len
)) {
263 Type
*ITy
= IntegerType::get(MI
->getContext(), Len
*8); // n=1 -> i8.
265 Value
*Dest
= MI
->getDest();
266 unsigned DstAddrSp
= cast
<PointerType
>(Dest
->getType())->getAddressSpace();
267 Type
*NewDstPtrTy
= PointerType::get(ITy
, DstAddrSp
);
268 Dest
= Builder
.CreateBitCast(Dest
, NewDstPtrTy
);
270 // Extract the fill value and store.
271 uint64_t Fill
= FillC
->getZExtValue()*0x0101010101010101ULL
;
272 StoreInst
*S
= Builder
.CreateStore(ConstantInt::get(ITy
, Fill
), Dest
,
274 S
->setAlignment(Alignment
);
275 if (isa
<AtomicMemSetInst
>(MI
))
276 S
->setOrdering(AtomicOrdering::Unordered
);
278 // Set the size of the copy to 0, it will be deleted on the next iteration.
279 MI
->setLength(Constant::getNullValue(LenC
->getType()));
286 // TODO, Obvious Missing Transforms:
287 // * Narrow width by halfs excluding zero/undef lanes
288 Value
*InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst
&II
) {
289 Value
*LoadPtr
= II
.getArgOperand(0);
290 const Align Alignment
=
291 cast
<ConstantInt
>(II
.getArgOperand(1))->getAlignValue();
293 // If the mask is all ones or undefs, this is a plain vector load of the 1st
295 if (maskIsAllOneOrUndef(II
.getArgOperand(2))) {
296 LoadInst
*L
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
302 // If we can unconditionally load from this address, replace with a
303 // load/select idiom. TODO: use DT for context sensitive query
304 if (isDereferenceablePointer(LoadPtr
, II
.getType(),
305 II
.getModule()->getDataLayout(), &II
, nullptr)) {
306 LoadInst
*LI
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
308 LI
->copyMetadata(II
);
309 return Builder
.CreateSelect(II
.getArgOperand(2), LI
, II
.getArgOperand(3));
315 // TODO, Obvious Missing Transforms:
316 // * Single constant active lane -> store
317 // * Narrow width by halfs excluding zero/undef lanes
318 Instruction
*InstCombinerImpl::simplifyMaskedStore(IntrinsicInst
&II
) {
319 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
323 // If the mask is all zeros, this instruction does nothing.
324 if (ConstMask
->isNullValue())
325 return eraseInstFromFunction(II
);
327 // If the mask is all ones, this is a plain vector store of the 1st argument.
328 if (ConstMask
->isAllOnesValue()) {
329 Value
*StorePtr
= II
.getArgOperand(1);
330 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
332 new StoreInst(II
.getArgOperand(0), StorePtr
, false, Alignment
);
337 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
340 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
341 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
342 APInt
UndefElts(DemandedElts
.getBitWidth(), 0);
344 SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
, UndefElts
))
345 return replaceOperand(II
, 0, V
);
350 // TODO, Obvious Missing Transforms:
351 // * Single constant active lane load -> load
352 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
353 // * Adjacent vector addresses -> masked.load
354 // * Narrow width by halfs excluding zero/undef lanes
355 // * Vector splat address w/known mask -> scalar load
356 // * Vector incrementing address -> vector masked load
357 Instruction
*InstCombinerImpl::simplifyMaskedGather(IntrinsicInst
&II
) {
361 // TODO, Obvious Missing Transforms:
362 // * Single constant active lane -> store
363 // * Adjacent vector addresses -> masked.store
364 // * Narrow store width by halfs excluding zero/undef lanes
365 // * Vector splat address w/known mask -> scalar store
366 // * Vector incrementing address -> vector masked store
367 Instruction
*InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst
&II
) {
368 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
372 // If the mask is all zeros, a scatter does nothing.
373 if (ConstMask
->isNullValue())
374 return eraseInstFromFunction(II
);
376 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
379 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
380 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
381 APInt
UndefElts(DemandedElts
.getBitWidth(), 0);
383 SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
, UndefElts
))
384 return replaceOperand(II
, 0, V
);
386 SimplifyDemandedVectorElts(II
.getOperand(1), DemandedElts
, UndefElts
))
387 return replaceOperand(II
, 1, V
);
392 /// This function transforms launder.invariant.group and strip.invariant.group
394 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
395 /// launder(strip(%x)) -> launder(%x)
396 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
397 /// strip(launder(%x)) -> strip(%x)
398 /// This is legal because it preserves the most recent information about
399 /// the presence or absence of invariant.group.
400 static Instruction
*simplifyInvariantGroupIntrinsic(IntrinsicInst
&II
,
401 InstCombinerImpl
&IC
) {
402 auto *Arg
= II
.getArgOperand(0);
403 auto *StrippedArg
= Arg
->stripPointerCasts();
404 auto *StrippedInvariantGroupsArg
= StrippedArg
;
405 while (auto *Intr
= dyn_cast
<IntrinsicInst
>(StrippedInvariantGroupsArg
)) {
406 if (Intr
->getIntrinsicID() != Intrinsic::launder_invariant_group
&&
407 Intr
->getIntrinsicID() != Intrinsic::strip_invariant_group
)
409 StrippedInvariantGroupsArg
= Intr
->getArgOperand(0)->stripPointerCasts();
411 if (StrippedArg
== StrippedInvariantGroupsArg
)
412 return nullptr; // No launders/strips to remove.
414 Value
*Result
= nullptr;
416 if (II
.getIntrinsicID() == Intrinsic::launder_invariant_group
)
417 Result
= IC
.Builder
.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg
);
418 else if (II
.getIntrinsicID() == Intrinsic::strip_invariant_group
)
419 Result
= IC
.Builder
.CreateStripInvariantGroup(StrippedInvariantGroupsArg
);
422 "simplifyInvariantGroupIntrinsic only handles launder and strip");
423 if (Result
->getType()->getPointerAddressSpace() !=
424 II
.getType()->getPointerAddressSpace())
425 Result
= IC
.Builder
.CreateAddrSpaceCast(Result
, II
.getType());
426 if (Result
->getType() != II
.getType())
427 Result
= IC
.Builder
.CreateBitCast(Result
, II
.getType());
429 return cast
<Instruction
>(Result
);
432 static Instruction
*foldCttzCtlz(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
433 assert((II
.getIntrinsicID() == Intrinsic::cttz
||
434 II
.getIntrinsicID() == Intrinsic::ctlz
) &&
435 "Expected cttz or ctlz intrinsic");
436 bool IsTZ
= II
.getIntrinsicID() == Intrinsic::cttz
;
437 Value
*Op0
= II
.getArgOperand(0);
438 Value
*Op1
= II
.getArgOperand(1);
440 // ctlz(bitreverse(x)) -> cttz(x)
441 // cttz(bitreverse(x)) -> ctlz(x)
442 if (match(Op0
, m_BitReverse(m_Value(X
)))) {
443 Intrinsic::ID ID
= IsTZ
? Intrinsic::ctlz
: Intrinsic::cttz
;
444 Function
*F
= Intrinsic::getDeclaration(II
.getModule(), ID
, II
.getType());
445 return CallInst::Create(F
, {X
, II
.getArgOperand(1)});
448 if (II
.getType()->isIntOrIntVectorTy(1)) {
449 // ctlz/cttz i1 Op0 --> not Op0
450 if (match(Op1
, m_Zero()))
451 return BinaryOperator::CreateNot(Op0
);
452 // If zero is undef, then the input can be assumed to be "true", so the
453 // instruction simplifies to "false".
454 assert(match(Op1
, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
455 return IC
.replaceInstUsesWith(II
, ConstantInt::getNullValue(II
.getType()));
458 // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
459 if (auto *Sel
= dyn_cast
<SelectInst
>(Op0
))
460 if (Instruction
*R
= IC
.FoldOpIntoSelect(II
, Sel
))
464 // cttz(-x) -> cttz(x)
465 if (match(Op0
, m_Neg(m_Value(X
))))
466 return IC
.replaceOperand(II
, 0, X
);
468 // cttz(sext(x)) -> cttz(zext(x))
469 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
))))) {
470 auto *Zext
= IC
.Builder
.CreateZExt(X
, II
.getType());
472 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, Zext
, Op1
);
473 return IC
.replaceInstUsesWith(II
, CttzZext
);
476 // Zext doesn't change the number of trailing zeros, so narrow:
477 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'.
478 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) && match(Op1
, m_One())) {
479 auto *Cttz
= IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, X
,
480 IC
.Builder
.getTrue());
481 auto *ZextCttz
= IC
.Builder
.CreateZExt(Cttz
, II
.getType());
482 return IC
.replaceInstUsesWith(II
, ZextCttz
);
485 // cttz(abs(x)) -> cttz(x)
486 // cttz(nabs(x)) -> cttz(x)
488 SelectPatternFlavor SPF
= matchSelectPattern(Op0
, X
, Y
).Flavor
;
489 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
)
490 return IC
.replaceOperand(II
, 0, X
);
492 if (match(Op0
, m_Intrinsic
<Intrinsic::abs
>(m_Value(X
))))
493 return IC
.replaceOperand(II
, 0, X
);
496 KnownBits Known
= IC
.computeKnownBits(Op0
, 0, &II
);
498 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
499 unsigned PossibleZeros
= IsTZ
? Known
.countMaxTrailingZeros()
500 : Known
.countMaxLeadingZeros();
501 unsigned DefiniteZeros
= IsTZ
? Known
.countMinTrailingZeros()
502 : Known
.countMinLeadingZeros();
504 // If all bits above (ctlz) or below (cttz) the first known one are known
505 // zero, this value is constant.
506 // FIXME: This should be in InstSimplify because we're replacing an
507 // instruction with a constant.
508 if (PossibleZeros
== DefiniteZeros
) {
509 auto *C
= ConstantInt::get(Op0
->getType(), DefiniteZeros
);
510 return IC
.replaceInstUsesWith(II
, C
);
513 // If the input to cttz/ctlz is known to be non-zero,
514 // then change the 'ZeroIsUndef' parameter to 'true'
515 // because we know the zero behavior can't affect the result.
516 if (!Known
.One
.isNullValue() ||
517 isKnownNonZero(Op0
, IC
.getDataLayout(), 0, &IC
.getAssumptionCache(), &II
,
518 &IC
.getDominatorTree())) {
519 if (!match(II
.getArgOperand(1), m_One()))
520 return IC
.replaceOperand(II
, 1, IC
.Builder
.getTrue());
523 // Add range metadata since known bits can't completely reflect what we know.
524 // TODO: Handle splat vectors.
525 auto *IT
= dyn_cast
<IntegerType
>(Op0
->getType());
526 if (IT
&& IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
527 Metadata
*LowAndHigh
[] = {
528 ConstantAsMetadata::get(ConstantInt::get(IT
, DefiniteZeros
)),
529 ConstantAsMetadata::get(ConstantInt::get(IT
, PossibleZeros
+ 1))};
530 II
.setMetadata(LLVMContext::MD_range
,
531 MDNode::get(II
.getContext(), LowAndHigh
));
538 static Instruction
*foldCtpop(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
539 assert(II
.getIntrinsicID() == Intrinsic::ctpop
&&
540 "Expected ctpop intrinsic");
541 Type
*Ty
= II
.getType();
542 unsigned BitWidth
= Ty
->getScalarSizeInBits();
543 Value
*Op0
= II
.getArgOperand(0);
546 // ctpop(bitreverse(x)) -> ctpop(x)
547 // ctpop(bswap(x)) -> ctpop(x)
548 if (match(Op0
, m_BitReverse(m_Value(X
))) || match(Op0
, m_BSwap(m_Value(X
))))
549 return IC
.replaceOperand(II
, 0, X
);
551 // ctpop(rot(x)) -> ctpop(x)
552 if ((match(Op0
, m_FShl(m_Value(X
), m_Value(Y
), m_Value())) ||
553 match(Op0
, m_FShr(m_Value(X
), m_Value(Y
), m_Value()))) &&
555 return IC
.replaceOperand(II
, 0, X
);
557 // ctpop(x | -x) -> bitwidth - cttz(x, false)
558 if (Op0
->hasOneUse() &&
559 match(Op0
, m_c_Or(m_Value(X
), m_Neg(m_Deferred(X
))))) {
561 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
562 auto *Cttz
= IC
.Builder
.CreateCall(F
, {X
, IC
.Builder
.getFalse()});
563 auto *Bw
= ConstantInt::get(Ty
, APInt(BitWidth
, BitWidth
));
564 return IC
.replaceInstUsesWith(II
, IC
.Builder
.CreateSub(Bw
, Cttz
));
567 // ctpop(~x & (x - 1)) -> cttz(x, false)
569 m_c_And(m_Not(m_Value(X
)), m_Add(m_Deferred(X
), m_AllOnes())))) {
571 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
572 return CallInst::Create(F
, {X
, IC
.Builder
.getFalse()});
575 // Zext doesn't change the number of set bits, so narrow:
576 // ctpop (zext X) --> zext (ctpop X)
577 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
578 Value
*NarrowPop
= IC
.Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, X
);
579 return CastInst::Create(Instruction::ZExt
, NarrowPop
, Ty
);
582 // If the operand is a select with constant arm(s), try to hoist ctpop.
583 if (auto *Sel
= dyn_cast
<SelectInst
>(Op0
))
584 if (Instruction
*R
= IC
.FoldOpIntoSelect(II
, Sel
))
587 KnownBits
Known(BitWidth
);
588 IC
.computeKnownBits(Op0
, Known
, 0, &II
);
590 // If all bits are zero except for exactly one fixed bit, then the result
591 // must be 0 or 1, and we can get that answer by shifting to LSB:
592 // ctpop (X & 32) --> (X & 32) >> 5
593 if ((~Known
.Zero
).isPowerOf2())
594 return BinaryOperator::CreateLShr(
595 Op0
, ConstantInt::get(Ty
, (~Known
.Zero
).exactLogBase2()));
597 // FIXME: Try to simplify vectors of integers.
598 auto *IT
= dyn_cast
<IntegerType
>(Ty
);
602 // Add range metadata since known bits can't completely reflect what we know.
603 unsigned MinCount
= Known
.countMinPopulation();
604 unsigned MaxCount
= Known
.countMaxPopulation();
605 if (IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
606 Metadata
*LowAndHigh
[] = {
607 ConstantAsMetadata::get(ConstantInt::get(IT
, MinCount
)),
608 ConstantAsMetadata::get(ConstantInt::get(IT
, MaxCount
+ 1))};
609 II
.setMetadata(LLVMContext::MD_range
,
610 MDNode::get(II
.getContext(), LowAndHigh
));
617 /// Convert a table lookup to shufflevector if the mask is constant.
618 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
619 /// which case we could lower the shufflevector with rev64 instructions
620 /// as it's actually a byte reverse.
621 static Value
*simplifyNeonTbl1(const IntrinsicInst
&II
,
622 InstCombiner::BuilderTy
&Builder
) {
623 // Bail out if the mask is not a constant.
624 auto *C
= dyn_cast
<Constant
>(II
.getArgOperand(1));
628 auto *VecTy
= cast
<FixedVectorType
>(II
.getType());
629 unsigned NumElts
= VecTy
->getNumElements();
631 // Only perform this transformation for <8 x i8> vector types.
632 if (!VecTy
->getElementType()->isIntegerTy(8) || NumElts
!= 8)
637 for (unsigned I
= 0; I
< NumElts
; ++I
) {
638 Constant
*COp
= C
->getAggregateElement(I
);
640 if (!COp
|| !isa
<ConstantInt
>(COp
))
643 Indexes
[I
] = cast
<ConstantInt
>(COp
)->getLimitedValue();
645 // Make sure the mask indices are in range.
646 if ((unsigned)Indexes
[I
] >= NumElts
)
650 auto *V1
= II
.getArgOperand(0);
651 auto *V2
= Constant::getNullValue(V1
->getType());
652 return Builder
.CreateShuffleVector(V1
, V2
, makeArrayRef(Indexes
));
655 // Returns true iff the 2 intrinsics have the same operands, limiting the
656 // comparison to the first NumOperands.
657 static bool haveSameOperands(const IntrinsicInst
&I
, const IntrinsicInst
&E
,
658 unsigned NumOperands
) {
659 assert(I
.getNumArgOperands() >= NumOperands
&& "Not enough operands");
660 assert(E
.getNumArgOperands() >= NumOperands
&& "Not enough operands");
661 for (unsigned i
= 0; i
< NumOperands
; i
++)
662 if (I
.getArgOperand(i
) != E
.getArgOperand(i
))
667 // Remove trivially empty start/end intrinsic ranges, i.e. a start
668 // immediately followed by an end (ignoring debuginfo or other
669 // start/end intrinsics in between). As this handles only the most trivial
670 // cases, tracking the nesting level is not needed:
672 // call @llvm.foo.start(i1 0)
673 // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
674 // call @llvm.foo.end(i1 0)
675 // call @llvm.foo.end(i1 0) ; &I
677 removeTriviallyEmptyRange(IntrinsicInst
&EndI
, InstCombinerImpl
&IC
,
678 std::function
<bool(const IntrinsicInst
&)> IsStart
) {
679 // We start from the end intrinsic and scan backwards, so that InstCombine
680 // has already processed (and potentially removed) all the instructions
681 // before the end intrinsic.
682 BasicBlock::reverse_iterator
BI(EndI
), BE(EndI
.getParent()->rend());
683 for (; BI
!= BE
; ++BI
) {
684 if (auto *I
= dyn_cast
<IntrinsicInst
>(&*BI
)) {
685 if (isa
<DbgInfoIntrinsic
>(I
) ||
686 I
->getIntrinsicID() == EndI
.getIntrinsicID())
689 if (haveSameOperands(EndI
, *I
, EndI
.getNumArgOperands())) {
690 IC
.eraseInstFromFunction(*I
);
691 IC
.eraseInstFromFunction(EndI
);
694 // Skip start intrinsics that don't pair with this end intrinsic.
704 Instruction
*InstCombinerImpl::visitVAEndInst(VAEndInst
&I
) {
705 removeTriviallyEmptyRange(I
, *this, [](const IntrinsicInst
&I
) {
706 return I
.getIntrinsicID() == Intrinsic::vastart
||
707 I
.getIntrinsicID() == Intrinsic::vacopy
;
712 static CallInst
*canonicalizeConstantArg0ToArg1(CallInst
&Call
) {
713 assert(Call
.getNumArgOperands() > 1 && "Need at least 2 args to swap");
714 Value
*Arg0
= Call
.getArgOperand(0), *Arg1
= Call
.getArgOperand(1);
715 if (isa
<Constant
>(Arg0
) && !isa
<Constant
>(Arg1
)) {
716 Call
.setArgOperand(0, Arg1
);
717 Call
.setArgOperand(1, Arg0
);
723 /// Creates a result tuple for an overflow intrinsic \p II with a given
724 /// \p Result and a constant \p Overflow value.
725 static Instruction
*createOverflowTuple(IntrinsicInst
*II
, Value
*Result
,
726 Constant
*Overflow
) {
727 Constant
*V
[] = {UndefValue::get(Result
->getType()), Overflow
};
728 StructType
*ST
= cast
<StructType
>(II
->getType());
729 Constant
*Struct
= ConstantStruct::get(ST
, V
);
730 return InsertValueInst::Create(Struct
, Result
, 0);
734 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst
*II
) {
735 WithOverflowInst
*WO
= cast
<WithOverflowInst
>(II
);
736 Value
*OperationResult
= nullptr;
737 Constant
*OverflowResult
= nullptr;
738 if (OptimizeOverflowCheck(WO
->getBinaryOp(), WO
->isSigned(), WO
->getLHS(),
739 WO
->getRHS(), *WO
, OperationResult
, OverflowResult
))
740 return createOverflowTuple(WO
, OperationResult
, OverflowResult
);
744 static Optional
<bool> getKnownSign(Value
*Op
, Instruction
*CxtI
,
745 const DataLayout
&DL
, AssumptionCache
*AC
,
747 KnownBits Known
= computeKnownBits(Op
, DL
, 0, AC
, CxtI
, DT
);
748 if (Known
.isNonNegative())
750 if (Known
.isNegative())
753 return isImpliedByDomCondition(
754 ICmpInst::ICMP_SLT
, Op
, Constant::getNullValue(Op
->getType()), CxtI
, DL
);
757 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
758 /// can only be one of two possible constant values -- turn that into a select
760 static Instruction
*foldClampRangeOfTwo(IntrinsicInst
*II
,
761 InstCombiner::BuilderTy
&Builder
) {
762 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
764 const APInt
*C0
, *C1
;
765 if (!match(I1
, m_APInt(C1
)) || !I0
->hasOneUse())
768 CmpInst::Predicate Pred
= CmpInst::BAD_ICMP_PREDICATE
;
769 switch (II
->getIntrinsicID()) {
770 case Intrinsic::smax
:
771 if (match(I0
, m_SMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
772 Pred
= ICmpInst::ICMP_SGT
;
774 case Intrinsic::smin
:
775 if (match(I0
, m_SMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
776 Pred
= ICmpInst::ICMP_SLT
;
778 case Intrinsic::umax
:
779 if (match(I0
, m_UMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
780 Pred
= ICmpInst::ICMP_UGT
;
782 case Intrinsic::umin
:
783 if (match(I0
, m_UMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
784 Pred
= ICmpInst::ICMP_ULT
;
787 llvm_unreachable("Expected min/max intrinsic");
789 if (Pred
== CmpInst::BAD_ICMP_PREDICATE
)
792 // max (min X, 42), 41 --> X > 41 ? 42 : 41
793 // min (max X, 42), 43 --> X < 43 ? 42 : 43
794 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, I1
);
795 return SelectInst::Create(Cmp
, ConstantInt::get(II
->getType(), *C0
), I1
);
798 /// Reduce a sequence of min/max intrinsics with a common operand.
799 static Instruction
*factorizeMinMaxTree(IntrinsicInst
*II
) {
800 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
801 auto *LHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
802 auto *RHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(1));
803 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
804 if (!LHS
|| !RHS
|| LHS
->getIntrinsicID() != MinMaxID
||
805 RHS
->getIntrinsicID() != MinMaxID
||
806 (!LHS
->hasOneUse() && !RHS
->hasOneUse()))
809 Value
*A
= LHS
->getArgOperand(0);
810 Value
*B
= LHS
->getArgOperand(1);
811 Value
*C
= RHS
->getArgOperand(0);
812 Value
*D
= RHS
->getArgOperand(1);
814 // Look for a common operand.
815 Value
*MinMaxOp
= nullptr;
816 Value
*ThirdOp
= nullptr;
817 if (LHS
->hasOneUse()) {
818 // If the LHS is only used in this chain and the RHS is used outside of it,
819 // reuse the RHS min/max because that will eliminate the LHS.
820 if (D
== A
|| C
== A
) {
821 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
822 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
825 } else if (D
== B
|| C
== B
) {
826 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
827 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
832 assert(RHS
->hasOneUse() && "Expected one-use operand");
833 // Reuse the LHS. This will eliminate the RHS.
834 if (D
== A
|| D
== B
) {
835 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
836 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
839 } else if (C
== A
|| C
== B
) {
840 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
841 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
847 if (!MinMaxOp
|| !ThirdOp
)
850 Module
*Mod
= II
->getModule();
851 Function
*MinMax
= Intrinsic::getDeclaration(Mod
, MinMaxID
, II
->getType());
852 return CallInst::Create(MinMax
, { MinMaxOp
, ThirdOp
});
855 /// CallInst simplification. This mostly only handles folding of intrinsic
856 /// instructions. For normal calls, it allows visitCallBase to do the heavy
858 Instruction
*InstCombinerImpl::visitCallInst(CallInst
&CI
) {
859 // Don't try to simplify calls without uses. It will not do anything useful,
860 // but will result in the following folds being skipped.
862 if (Value
*V
= SimplifyCall(&CI
, SQ
.getWithInstruction(&CI
)))
863 return replaceInstUsesWith(CI
, V
);
865 if (isFreeCall(&CI
, &TLI
))
866 return visitFree(CI
);
868 // If the caller function is nounwind, mark the call as nounwind, even if the
870 if (CI
.getFunction()->doesNotThrow() && !CI
.doesNotThrow()) {
871 CI
.setDoesNotThrow();
875 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&CI
);
876 if (!II
) return visitCallBase(CI
);
878 // For atomic unordered mem intrinsics if len is not a positive or
879 // not a multiple of element size then behavior is undefined.
880 if (auto *AMI
= dyn_cast
<AtomicMemIntrinsic
>(II
))
881 if (ConstantInt
*NumBytes
= dyn_cast
<ConstantInt
>(AMI
->getLength()))
882 if (NumBytes
->getSExtValue() < 0 ||
883 (NumBytes
->getZExtValue() % AMI
->getElementSizeInBytes() != 0)) {
884 CreateNonTerminatorUnreachable(AMI
);
885 assert(AMI
->getType()->isVoidTy() &&
886 "non void atomic unordered mem intrinsic");
887 return eraseInstFromFunction(*AMI
);
890 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
891 // instead of in visitCallBase.
892 if (auto *MI
= dyn_cast
<AnyMemIntrinsic
>(II
)) {
893 bool Changed
= false;
895 // memmove/cpy/set of zero bytes is a noop.
896 if (Constant
*NumBytes
= dyn_cast
<Constant
>(MI
->getLength())) {
897 if (NumBytes
->isNullValue())
898 return eraseInstFromFunction(CI
);
900 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(NumBytes
))
901 if (CI
->getZExtValue() == 1) {
902 // Replace the instruction with just byte operations. We would
903 // transform other cases to loads/stores, but we don't know if
904 // alignment is sufficient.
908 // No other transformations apply to volatile transfers.
909 if (auto *M
= dyn_cast
<MemIntrinsic
>(MI
))
913 // If we have a memmove and the source operation is a constant global,
914 // then the source and dest pointers can't alias, so we can change this
915 // into a call to memcpy.
916 if (auto *MMI
= dyn_cast
<AnyMemMoveInst
>(MI
)) {
917 if (GlobalVariable
*GVSrc
= dyn_cast
<GlobalVariable
>(MMI
->getSource()))
918 if (GVSrc
->isConstant()) {
919 Module
*M
= CI
.getModule();
920 Intrinsic::ID MemCpyID
=
921 isa
<AtomicMemMoveInst
>(MMI
)
922 ? Intrinsic::memcpy_element_unordered_atomic
924 Type
*Tys
[3] = { CI
.getArgOperand(0)->getType(),
925 CI
.getArgOperand(1)->getType(),
926 CI
.getArgOperand(2)->getType() };
927 CI
.setCalledFunction(Intrinsic::getDeclaration(M
, MemCpyID
, Tys
));
932 if (AnyMemTransferInst
*MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
933 // memmove(x,x,size) -> noop.
934 if (MTI
->getSource() == MTI
->getDest())
935 return eraseInstFromFunction(CI
);
938 // If we can determine a pointer alignment that is bigger than currently
939 // set, update the alignment.
940 if (auto *MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
941 if (Instruction
*I
= SimplifyAnyMemTransfer(MTI
))
943 } else if (auto *MSI
= dyn_cast
<AnyMemSetInst
>(MI
)) {
944 if (Instruction
*I
= SimplifyAnyMemSet(MSI
))
948 if (Changed
) return II
;
951 // For fixed width vector result intrinsics, use the generic demanded vector
953 if (auto *IIFVTy
= dyn_cast
<FixedVectorType
>(II
->getType())) {
954 auto VWidth
= IIFVTy
->getNumElements();
955 APInt
UndefElts(VWidth
, 0);
956 APInt
AllOnesEltMask(APInt::getAllOnesValue(VWidth
));
957 if (Value
*V
= SimplifyDemandedVectorElts(II
, AllOnesEltMask
, UndefElts
)) {
959 return replaceInstUsesWith(*II
, V
);
964 if (II
->isCommutative()) {
965 if (CallInst
*NewCall
= canonicalizeConstantArg0ToArg1(CI
))
969 Intrinsic::ID IID
= II
->getIntrinsicID();
971 case Intrinsic::objectsize
:
972 if (Value
*V
= lowerObjectSizeCall(II
, DL
, &TLI
, /*MustSucceed=*/false))
973 return replaceInstUsesWith(CI
, V
);
975 case Intrinsic::abs
: {
976 Value
*IIOperand
= II
->getArgOperand(0);
977 bool IntMinIsPoison
= cast
<Constant
>(II
->getArgOperand(1))->isOneValue();
980 // TODO: Copy nsw if it was present on the neg?
982 if (match(IIOperand
, m_Neg(m_Value(X
))))
983 return replaceOperand(*II
, 0, X
);
984 if (match(IIOperand
, m_Select(m_Value(), m_Value(X
), m_Neg(m_Deferred(X
)))))
985 return replaceOperand(*II
, 0, X
);
986 if (match(IIOperand
, m_Select(m_Value(), m_Neg(m_Value(X
)), m_Deferred(X
))))
987 return replaceOperand(*II
, 0, X
);
989 if (Optional
<bool> Sign
= getKnownSign(IIOperand
, II
, DL
, &AC
, &DT
)) {
990 // abs(x) -> x if x >= 0
992 return replaceInstUsesWith(*II
, IIOperand
);
994 // abs(x) -> -x if x < 0
996 return BinaryOperator::CreateNSWNeg(IIOperand
);
997 return BinaryOperator::CreateNeg(IIOperand
);
1000 // abs (sext X) --> zext (abs X*)
1001 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1002 if (match(IIOperand
, m_OneUse(m_SExt(m_Value(X
))))) {
1004 Builder
.CreateBinaryIntrinsic(Intrinsic::abs
, X
, Builder
.getFalse());
1005 return CastInst::Create(Instruction::ZExt
, NarrowAbs
, II
->getType());
1008 // Match a complicated way to check if a number is odd/even:
1009 // abs (srem X, 2) --> and X, 1
1011 if (match(IIOperand
, m_SRem(m_Value(X
), m_APInt(C
))) && *C
== 2)
1012 return BinaryOperator::CreateAnd(X
, ConstantInt::get(II
->getType(), 1));
1016 case Intrinsic::umin
: {
1017 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1018 // umin(x, 1) == zext(x != 0)
1019 if (match(I1
, m_One())) {
1020 Value
*Zero
= Constant::getNullValue(I0
->getType());
1021 Value
*Cmp
= Builder
.CreateICmpNE(I0
, Zero
);
1022 return CastInst::Create(Instruction::ZExt
, Cmp
, II
->getType());
1026 case Intrinsic::umax
: {
1027 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1029 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_ZExt(m_Value(Y
))) &&
1030 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1031 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1032 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1035 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1037 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, X
->getType());
1038 if (ConstantExpr::getZExt(NarrowC
, II
->getType()) == C
) {
1039 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1040 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1043 // If both operands of unsigned min/max are sign-extended, it is still ok
1044 // to narrow the operation.
1047 case Intrinsic::smax
:
1048 case Intrinsic::smin
: {
1049 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1051 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_SExt(m_Value(Y
))) &&
1052 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1053 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1054 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1058 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1060 Constant
*NarrowC
= ConstantExpr::getTrunc(C
, X
->getType());
1061 if (ConstantExpr::getSExt(NarrowC
, II
->getType()) == C
) {
1062 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1063 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1067 if (IID
== Intrinsic::smax
|| IID
== Intrinsic::smin
) {
1068 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1069 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1070 // TODO: Canonicalize neg after min/max if I1 is constant.
1071 if (match(I0
, m_NSWNeg(m_Value(X
))) && match(I1
, m_NSWNeg(m_Value(Y
))) &&
1072 (I0
->hasOneUse() || I1
->hasOneUse())) {
1073 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1074 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, Y
);
1075 return BinaryOperator::CreateNSWNeg(InvMaxMin
);
1079 if (match(I0
, m_Not(m_Value(X
)))) {
1080 // max (not X), (not Y) --> not (min X, Y)
1081 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1082 if (match(I1
, m_Not(m_Value(Y
))) &&
1083 (I0
->hasOneUse() || I1
->hasOneUse())) {
1084 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, Y
);
1085 return BinaryOperator::CreateNot(InvMaxMin
);
1087 // max (not X), C --> not(min X, ~C)
1088 if (match(I1
, m_Constant(C
)) && I0
->hasOneUse()) {
1089 Constant
*NotC
= ConstantExpr::getNot(C
);
1090 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, NotC
);
1091 return BinaryOperator::CreateNot(InvMaxMin
);
1095 // smax(X, -X) --> abs(X)
1096 // smin(X, -X) --> -abs(X)
1097 // umax(X, -X) --> -abs(X)
1098 // umin(X, -X) --> abs(X)
1099 if (isKnownNegation(I0
, I1
)) {
1100 // We can choose either operand as the input to abs(), but if we can
1101 // eliminate the only use of a value, that's better for subsequent
1102 // transforms/analysis.
1103 if (I0
->hasOneUse() && !I1
->hasOneUse())
1106 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
1107 // operation and potentially its negation.
1108 bool IntMinIsPoison
= isKnownNegation(I0
, I1
, /* NeedNSW */ true);
1109 Value
*Abs
= Builder
.CreateBinaryIntrinsic(
1111 ConstantInt::getBool(II
->getContext(), IntMinIsPoison
));
1113 // We don't have a "nabs" intrinsic, so negate if needed based on the
1114 // max/min operation.
1115 if (IID
== Intrinsic::smin
|| IID
== Intrinsic::umax
)
1116 Abs
= Builder
.CreateNeg(Abs
, "nabs", /* NUW */ false, IntMinIsPoison
);
1117 return replaceInstUsesWith(CI
, Abs
);
1120 if (Instruction
*Sel
= foldClampRangeOfTwo(II
, Builder
))
1123 if (Instruction
*SAdd
= matchSAddSubSat(*II
))
1126 if (match(I1
, m_ImmConstant()))
1127 if (auto *Sel
= dyn_cast
<SelectInst
>(I0
))
1128 if (Instruction
*R
= FoldOpIntoSelect(*II
, Sel
))
1131 if (Instruction
*NewMinMax
= factorizeMinMaxTree(II
))
1136 case Intrinsic::bswap
: {
1137 Value
*IIOperand
= II
->getArgOperand(0);
1140 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1141 if (match(IIOperand
, m_Trunc(m_BSwap(m_Value(X
))))) {
1142 unsigned C
= X
->getType()->getScalarSizeInBits() -
1143 IIOperand
->getType()->getScalarSizeInBits();
1144 Value
*CV
= ConstantInt::get(X
->getType(), C
);
1145 Value
*V
= Builder
.CreateLShr(X
, CV
);
1146 return new TruncInst(V
, IIOperand
->getType());
1150 case Intrinsic::masked_load
:
1151 if (Value
*SimplifiedMaskedOp
= simplifyMaskedLoad(*II
))
1152 return replaceInstUsesWith(CI
, SimplifiedMaskedOp
);
1154 case Intrinsic::masked_store
:
1155 return simplifyMaskedStore(*II
);
1156 case Intrinsic::masked_gather
:
1157 return simplifyMaskedGather(*II
);
1158 case Intrinsic::masked_scatter
:
1159 return simplifyMaskedScatter(*II
);
1160 case Intrinsic::launder_invariant_group
:
1161 case Intrinsic::strip_invariant_group
:
1162 if (auto *SkippedBarrier
= simplifyInvariantGroupIntrinsic(*II
, *this))
1163 return replaceInstUsesWith(*II
, SkippedBarrier
);
1165 case Intrinsic::powi
:
1166 if (ConstantInt
*Power
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
1167 // 0 and 1 are handled in instsimplify
1168 // powi(x, -1) -> 1/x
1169 if (Power
->isMinusOne())
1170 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI
.getType(), 1.0),
1171 II
->getArgOperand(0), II
);
1172 // powi(x, 2) -> x*x
1173 if (Power
->equalsInt(2))
1174 return BinaryOperator::CreateFMulFMF(II
->getArgOperand(0),
1175 II
->getArgOperand(0), II
);
1179 case Intrinsic::cttz
:
1180 case Intrinsic::ctlz
:
1181 if (auto *I
= foldCttzCtlz(*II
, *this))
1185 case Intrinsic::ctpop
:
1186 if (auto *I
= foldCtpop(*II
, *this))
1190 case Intrinsic::fshl
:
1191 case Intrinsic::fshr
: {
1192 Value
*Op0
= II
->getArgOperand(0), *Op1
= II
->getArgOperand(1);
1193 Type
*Ty
= II
->getType();
1194 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1196 if (match(II
->getArgOperand(2), m_ImmConstant(ShAmtC
)) &&
1197 !ShAmtC
->containsConstantExpression()) {
1198 // Canonicalize a shift amount constant operand to modulo the bit-width.
1199 Constant
*WidthC
= ConstantInt::get(Ty
, BitWidth
);
1200 Constant
*ModuloC
= ConstantExpr::getURem(ShAmtC
, WidthC
);
1201 if (ModuloC
!= ShAmtC
)
1202 return replaceOperand(*II
, 2, ModuloC
);
1204 assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT
, WidthC
, ShAmtC
) ==
1205 ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty
)) &&
1206 "Shift amount expected to be modulo bitwidth");
1208 // Canonicalize funnel shift right by constant to funnel shift left. This
1209 // is not entirely arbitrary. For historical reasons, the backend may
1210 // recognize rotate left patterns but miss rotate right patterns.
1211 if (IID
== Intrinsic::fshr
) {
1212 // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1213 Constant
*LeftShiftC
= ConstantExpr::getSub(WidthC
, ShAmtC
);
1214 Module
*Mod
= II
->getModule();
1215 Function
*Fshl
= Intrinsic::getDeclaration(Mod
, Intrinsic::fshl
, Ty
);
1216 return CallInst::Create(Fshl
, { Op0
, Op1
, LeftShiftC
});
1218 assert(IID
== Intrinsic::fshl
&&
1219 "All funnel shifts by simple constants should go left");
1221 // fshl(X, 0, C) --> shl X, C
1222 // fshl(X, undef, C) --> shl X, C
1223 if (match(Op1
, m_ZeroInt()) || match(Op1
, m_Undef()))
1224 return BinaryOperator::CreateShl(Op0
, ShAmtC
);
1226 // fshl(0, X, C) --> lshr X, (BW-C)
1227 // fshl(undef, X, C) --> lshr X, (BW-C)
1228 if (match(Op0
, m_ZeroInt()) || match(Op0
, m_Undef()))
1229 return BinaryOperator::CreateLShr(Op1
,
1230 ConstantExpr::getSub(WidthC
, ShAmtC
));
1232 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1233 if (Op0
== Op1
&& BitWidth
== 16 && match(ShAmtC
, m_SpecificInt(8))) {
1234 Module
*Mod
= II
->getModule();
1235 Function
*Bswap
= Intrinsic::getDeclaration(Mod
, Intrinsic::bswap
, Ty
);
1236 return CallInst::Create(Bswap
, { Op0
});
1240 // Left or right might be masked.
1241 if (SimplifyDemandedInstructionBits(*II
))
1244 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1245 // so only the low bits of the shift amount are demanded if the bitwidth is
1247 if (!isPowerOf2_32(BitWidth
))
1249 APInt Op2Demanded
= APInt::getLowBitsSet(BitWidth
, Log2_32_Ceil(BitWidth
));
1250 KnownBits
Op2Known(BitWidth
);
1251 if (SimplifyDemandedBits(II
, 2, Op2Demanded
, Op2Known
))
1255 case Intrinsic::uadd_with_overflow
:
1256 case Intrinsic::sadd_with_overflow
: {
1257 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
1260 // Given 2 constant operands whose sum does not overflow:
1261 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
1262 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
1264 const APInt
*C0
, *C1
;
1265 Value
*Arg0
= II
->getArgOperand(0);
1266 Value
*Arg1
= II
->getArgOperand(1);
1267 bool IsSigned
= IID
== Intrinsic::sadd_with_overflow
;
1268 bool HasNWAdd
= IsSigned
? match(Arg0
, m_NSWAdd(m_Value(X
), m_APInt(C0
)))
1269 : match(Arg0
, m_NUWAdd(m_Value(X
), m_APInt(C0
)));
1270 if (HasNWAdd
&& match(Arg1
, m_APInt(C1
))) {
1273 IsSigned
? C1
->sadd_ov(*C0
, Overflow
) : C1
->uadd_ov(*C0
, Overflow
);
1275 return replaceInstUsesWith(
1276 *II
, Builder
.CreateBinaryIntrinsic(
1277 IID
, X
, ConstantInt::get(Arg1
->getType(), NewC
)));
1282 case Intrinsic::umul_with_overflow
:
1283 case Intrinsic::smul_with_overflow
:
1284 case Intrinsic::usub_with_overflow
:
1285 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
1289 case Intrinsic::ssub_with_overflow
: {
1290 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
1294 Value
*Arg0
= II
->getArgOperand(0);
1295 Value
*Arg1
= II
->getArgOperand(1);
1296 // Given a constant C that is not the minimum signed value
1297 // for an integer of a given bit width:
1299 // ssubo X, C -> saddo X, -C
1300 if (match(Arg1
, m_Constant(C
)) && C
->isNotMinSignedValue()) {
1301 Value
*NegVal
= ConstantExpr::getNeg(C
);
1302 // Build a saddo call that is equivalent to the discovered
1304 return replaceInstUsesWith(
1305 *II
, Builder
.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow
,
1312 case Intrinsic::uadd_sat
:
1313 case Intrinsic::sadd_sat
:
1314 case Intrinsic::usub_sat
:
1315 case Intrinsic::ssub_sat
: {
1316 SaturatingInst
*SI
= cast
<SaturatingInst
>(II
);
1317 Type
*Ty
= SI
->getType();
1318 Value
*Arg0
= SI
->getLHS();
1319 Value
*Arg1
= SI
->getRHS();
1321 // Make use of known overflow information.
1322 OverflowResult OR
= computeOverflow(SI
->getBinaryOp(), SI
->isSigned(),
1325 case OverflowResult::MayOverflow
:
1327 case OverflowResult::NeverOverflows
:
1329 return BinaryOperator::CreateNSW(SI
->getBinaryOp(), Arg0
, Arg1
);
1331 return BinaryOperator::CreateNUW(SI
->getBinaryOp(), Arg0
, Arg1
);
1332 case OverflowResult::AlwaysOverflowsLow
: {
1333 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1334 APInt Min
= APSInt::getMinValue(BitWidth
, !SI
->isSigned());
1335 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Min
));
1337 case OverflowResult::AlwaysOverflowsHigh
: {
1338 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1339 APInt Max
= APSInt::getMaxValue(BitWidth
, !SI
->isSigned());
1340 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Max
));
1344 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
1346 if (IID
== Intrinsic::ssub_sat
&& match(Arg1
, m_Constant(C
)) &&
1347 C
->isNotMinSignedValue()) {
1348 Value
*NegVal
= ConstantExpr::getNeg(C
);
1349 return replaceInstUsesWith(
1350 *II
, Builder
.CreateBinaryIntrinsic(
1351 Intrinsic::sadd_sat
, Arg0
, NegVal
));
1354 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
1355 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
1356 // if Val and Val2 have the same sign
1357 if (auto *Other
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
1359 const APInt
*Val
, *Val2
;
1362 IID
== Intrinsic::uadd_sat
|| IID
== Intrinsic::usub_sat
;
1363 if (Other
->getIntrinsicID() == IID
&&
1364 match(Arg1
, m_APInt(Val
)) &&
1365 match(Other
->getArgOperand(0), m_Value(X
)) &&
1366 match(Other
->getArgOperand(1), m_APInt(Val2
))) {
1368 NewVal
= Val
->uadd_sat(*Val2
);
1369 else if (Val
->isNonNegative() == Val2
->isNonNegative()) {
1371 NewVal
= Val
->sadd_ov(*Val2
, Overflow
);
1373 // Both adds together may add more than SignedMaxValue
1374 // without saturating the final result.
1378 // Cannot fold saturated addition with different signs.
1382 return replaceInstUsesWith(
1383 *II
, Builder
.CreateBinaryIntrinsic(
1384 IID
, X
, ConstantInt::get(II
->getType(), NewVal
)));
1390 case Intrinsic::minnum
:
1391 case Intrinsic::maxnum
:
1392 case Intrinsic::minimum
:
1393 case Intrinsic::maximum
: {
1394 Value
*Arg0
= II
->getArgOperand(0);
1395 Value
*Arg1
= II
->getArgOperand(1);
1397 if (match(Arg0
, m_FNeg(m_Value(X
))) && match(Arg1
, m_FNeg(m_Value(Y
))) &&
1398 (Arg0
->hasOneUse() || Arg1
->hasOneUse())) {
1399 // If both operands are negated, invert the call and negate the result:
1400 // min(-X, -Y) --> -(max(X, Y))
1401 // max(-X, -Y) --> -(min(X, Y))
1402 Intrinsic::ID NewIID
;
1404 case Intrinsic::maxnum
:
1405 NewIID
= Intrinsic::minnum
;
1407 case Intrinsic::minnum
:
1408 NewIID
= Intrinsic::maxnum
;
1410 case Intrinsic::maximum
:
1411 NewIID
= Intrinsic::minimum
;
1413 case Intrinsic::minimum
:
1414 NewIID
= Intrinsic::maximum
;
1417 llvm_unreachable("unexpected intrinsic ID");
1419 Value
*NewCall
= Builder
.CreateBinaryIntrinsic(NewIID
, X
, Y
, II
);
1420 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewCall
);
1421 FNeg
->copyIRFlags(II
);
1425 // m(m(X, C2), C1) -> m(X, C)
1426 const APFloat
*C1
, *C2
;
1427 if (auto *M
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
1428 if (M
->getIntrinsicID() == IID
&& match(Arg1
, m_APFloat(C1
)) &&
1429 ((match(M
->getArgOperand(0), m_Value(X
)) &&
1430 match(M
->getArgOperand(1), m_APFloat(C2
))) ||
1431 (match(M
->getArgOperand(1), m_Value(X
)) &&
1432 match(M
->getArgOperand(0), m_APFloat(C2
))))) {
1435 case Intrinsic::maxnum
:
1436 Res
= maxnum(*C1
, *C2
);
1438 case Intrinsic::minnum
:
1439 Res
= minnum(*C1
, *C2
);
1441 case Intrinsic::maximum
:
1442 Res
= maximum(*C1
, *C2
);
1444 case Intrinsic::minimum
:
1445 Res
= minimum(*C1
, *C2
);
1448 llvm_unreachable("unexpected intrinsic ID");
1450 Instruction
*NewCall
= Builder
.CreateBinaryIntrinsic(
1451 IID
, X
, ConstantFP::get(Arg0
->getType(), Res
), II
);
1452 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
1453 // was a simplification (so Arg0 and its original flags could
1455 NewCall
->andIRFlags(M
);
1456 return replaceInstUsesWith(*II
, NewCall
);
1460 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
1461 if (match(Arg0
, m_OneUse(m_FPExt(m_Value(X
)))) &&
1462 match(Arg1
, m_OneUse(m_FPExt(m_Value(Y
)))) &&
1463 X
->getType() == Y
->getType()) {
1465 Builder
.CreateBinaryIntrinsic(IID
, X
, Y
, II
, II
->getName());
1466 return new FPExtInst(NewCall
, II
->getType());
1469 // max X, -X --> fabs X
1470 // min X, -X --> -(fabs X)
1471 // TODO: Remove one-use limitation? That is obviously better for max.
1472 // It would be an extra instruction for min (fnabs), but that is
1473 // still likely better for analysis and codegen.
1474 if ((match(Arg0
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg1
== X
) ||
1475 (match(Arg1
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg0
== X
)) {
1476 Value
*R
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, X
, II
);
1477 if (IID
== Intrinsic::minimum
|| IID
== Intrinsic::minnum
)
1478 R
= Builder
.CreateFNegFMF(R
, II
);
1479 return replaceInstUsesWith(*II
, R
);
1484 case Intrinsic::fmuladd
: {
1485 // Canonicalize fast fmuladd to the separate fmul + fadd.
1487 BuilderTy::FastMathFlagGuard
Guard(Builder
);
1488 Builder
.setFastMathFlags(II
->getFastMathFlags());
1489 Value
*Mul
= Builder
.CreateFMul(II
->getArgOperand(0),
1490 II
->getArgOperand(1));
1491 Value
*Add
= Builder
.CreateFAdd(Mul
, II
->getArgOperand(2));
1493 return replaceInstUsesWith(*II
, Add
);
1496 // Try to simplify the underlying FMul.
1497 if (Value
*V
= SimplifyFMulInst(II
->getArgOperand(0), II
->getArgOperand(1),
1498 II
->getFastMathFlags(),
1499 SQ
.getWithInstruction(II
))) {
1500 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
1501 FAdd
->copyFastMathFlags(II
);
1507 case Intrinsic::fma
: {
1508 // fma fneg(x), fneg(y), z -> fma x, y, z
1509 Value
*Src0
= II
->getArgOperand(0);
1510 Value
*Src1
= II
->getArgOperand(1);
1512 if (match(Src0
, m_FNeg(m_Value(X
))) && match(Src1
, m_FNeg(m_Value(Y
)))) {
1513 replaceOperand(*II
, 0, X
);
1514 replaceOperand(*II
, 1, Y
);
1518 // fma fabs(x), fabs(x), z -> fma x, x, z
1519 if (match(Src0
, m_FAbs(m_Value(X
))) &&
1520 match(Src1
, m_FAbs(m_Specific(X
)))) {
1521 replaceOperand(*II
, 0, X
);
1522 replaceOperand(*II
, 1, X
);
1526 // Try to simplify the underlying FMul. We can only apply simplifications
1527 // that do not require rounding.
1528 if (Value
*V
= SimplifyFMAFMul(II
->getArgOperand(0), II
->getArgOperand(1),
1529 II
->getFastMathFlags(),
1530 SQ
.getWithInstruction(II
))) {
1531 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
1532 FAdd
->copyFastMathFlags(II
);
1536 // fma x, y, 0 -> fmul x, y
1537 // This is always valid for -0.0, but requires nsz for +0.0 as
1538 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
1539 if (match(II
->getArgOperand(2), m_NegZeroFP()) ||
1540 (match(II
->getArgOperand(2), m_PosZeroFP()) &&
1541 II
->getFastMathFlags().noSignedZeros()))
1542 return BinaryOperator::CreateFMulFMF(Src0
, Src1
, II
);
1546 case Intrinsic::isnan
: {
1547 Value
*Arg
= II
->getArgOperand(0);
1548 if (const auto *FPMO
= dyn_cast
<FPMathOperator
>(Arg
)) {
1549 // If argument of this intrinsic call is an instruction that has 'nnan'
1550 // flag, we can assume that NaN cannot be produced, otherwise it is
1551 // undefined behavior.
1552 if (FPMO
->getFastMathFlags().noNaNs())
1553 return replaceInstUsesWith(
1554 *II
, ConstantInt::get(II
->getType(), APInt::getNullValue(1)));
1558 case Intrinsic::copysign
: {
1559 Value
*Mag
= II
->getArgOperand(0), *Sign
= II
->getArgOperand(1);
1560 if (SignBitMustBeZero(Sign
, &TLI
)) {
1561 // If we know that the sign argument is positive, reduce to FABS:
1562 // copysign Mag, +Sign --> fabs Mag
1563 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
1564 return replaceInstUsesWith(*II
, Fabs
);
1566 // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
1568 if (match(Sign
, m_APFloat(C
)) && C
->isNegative()) {
1569 // If we know that the sign argument is negative, reduce to FNABS:
1570 // copysign Mag, -Sign --> fneg (fabs Mag)
1571 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
1572 return replaceInstUsesWith(*II
, Builder
.CreateFNegFMF(Fabs
, II
));
1575 // Propagate sign argument through nested calls:
1576 // copysign Mag, (copysign ?, X) --> copysign Mag, X
1578 if (match(Sign
, m_Intrinsic
<Intrinsic::copysign
>(m_Value(), m_Value(X
))))
1579 return replaceOperand(*II
, 1, X
);
1581 // Peek through changes of magnitude's sign-bit. This call rewrites those:
1582 // copysign (fabs X), Sign --> copysign X, Sign
1583 // copysign (fneg X), Sign --> copysign X, Sign
1584 if (match(Mag
, m_FAbs(m_Value(X
))) || match(Mag
, m_FNeg(m_Value(X
))))
1585 return replaceOperand(*II
, 0, X
);
1589 case Intrinsic::fabs
: {
1590 Value
*Cond
, *TVal
, *FVal
;
1591 if (match(II
->getArgOperand(0),
1592 m_Select(m_Value(Cond
), m_Value(TVal
), m_Value(FVal
)))) {
1593 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
1594 if (isa
<Constant
>(TVal
) && isa
<Constant
>(FVal
)) {
1595 CallInst
*AbsT
= Builder
.CreateCall(II
->getCalledFunction(), {TVal
});
1596 CallInst
*AbsF
= Builder
.CreateCall(II
->getCalledFunction(), {FVal
});
1597 return SelectInst::Create(Cond
, AbsT
, AbsF
);
1599 // fabs (select Cond, -FVal, FVal) --> fabs FVal
1600 if (match(TVal
, m_FNeg(m_Specific(FVal
))))
1601 return replaceOperand(*II
, 0, FVal
);
1602 // fabs (select Cond, TVal, -TVal) --> fabs TVal
1603 if (match(FVal
, m_FNeg(m_Specific(TVal
))))
1604 return replaceOperand(*II
, 0, TVal
);
1609 case Intrinsic::ceil
:
1610 case Intrinsic::floor
:
1611 case Intrinsic::round
:
1612 case Intrinsic::roundeven
:
1613 case Intrinsic::nearbyint
:
1614 case Intrinsic::rint
:
1615 case Intrinsic::trunc
: {
1617 if (match(II
->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc
))))) {
1618 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
1619 Value
*NarrowII
= Builder
.CreateUnaryIntrinsic(IID
, ExtSrc
, II
);
1620 return new FPExtInst(NarrowII
, II
->getType());
1624 case Intrinsic::cos
:
1625 case Intrinsic::amdgcn_cos
: {
1627 Value
*Src
= II
->getArgOperand(0);
1628 if (match(Src
, m_FNeg(m_Value(X
))) || match(Src
, m_FAbs(m_Value(X
)))) {
1629 // cos(-x) -> cos(x)
1630 // cos(fabs(x)) -> cos(x)
1631 return replaceOperand(*II
, 0, X
);
1635 case Intrinsic::sin
: {
1637 if (match(II
->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X
))))) {
1638 // sin(-x) --> -sin(x)
1639 Value
*NewSin
= Builder
.CreateUnaryIntrinsic(Intrinsic::sin
, X
, II
);
1640 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewSin
);
1641 FNeg
->copyFastMathFlags(II
);
1647 case Intrinsic::arm_neon_vtbl1
:
1648 case Intrinsic::aarch64_neon_tbl1
:
1649 if (Value
*V
= simplifyNeonTbl1(*II
, Builder
))
1650 return replaceInstUsesWith(*II
, V
);
1653 case Intrinsic::arm_neon_vmulls
:
1654 case Intrinsic::arm_neon_vmullu
:
1655 case Intrinsic::aarch64_neon_smull
:
1656 case Intrinsic::aarch64_neon_umull
: {
1657 Value
*Arg0
= II
->getArgOperand(0);
1658 Value
*Arg1
= II
->getArgOperand(1);
1660 // Handle mul by zero first:
1661 if (isa
<ConstantAggregateZero
>(Arg0
) || isa
<ConstantAggregateZero
>(Arg1
)) {
1662 return replaceInstUsesWith(CI
, ConstantAggregateZero::get(II
->getType()));
1665 // Check for constant LHS & RHS - in this case we just simplify.
1666 bool Zext
= (IID
== Intrinsic::arm_neon_vmullu
||
1667 IID
== Intrinsic::aarch64_neon_umull
);
1668 VectorType
*NewVT
= cast
<VectorType
>(II
->getType());
1669 if (Constant
*CV0
= dyn_cast
<Constant
>(Arg0
)) {
1670 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
)) {
1671 CV0
= ConstantExpr::getIntegerCast(CV0
, NewVT
, /*isSigned=*/!Zext
);
1672 CV1
= ConstantExpr::getIntegerCast(CV1
, NewVT
, /*isSigned=*/!Zext
);
1674 return replaceInstUsesWith(CI
, ConstantExpr::getMul(CV0
, CV1
));
1677 // Couldn't simplify - canonicalize constant to the RHS.
1678 std::swap(Arg0
, Arg1
);
1681 // Handle mul by one:
1682 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
))
1683 if (ConstantInt
*Splat
=
1684 dyn_cast_or_null
<ConstantInt
>(CV1
->getSplatValue()))
1686 return CastInst::CreateIntegerCast(Arg0
, II
->getType(),
1687 /*isSigned=*/!Zext
);
1691 case Intrinsic::arm_neon_aesd
:
1692 case Intrinsic::arm_neon_aese
:
1693 case Intrinsic::aarch64_crypto_aesd
:
1694 case Intrinsic::aarch64_crypto_aese
: {
1695 Value
*DataArg
= II
->getArgOperand(0);
1696 Value
*KeyArg
= II
->getArgOperand(1);
1698 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
1700 if (match(KeyArg
, m_ZeroInt()) &&
1701 match(DataArg
, m_Xor(m_Value(Data
), m_Value(Key
)))) {
1702 replaceOperand(*II
, 0, Data
);
1703 replaceOperand(*II
, 1, Key
);
1708 case Intrinsic::hexagon_V6_vandvrt
:
1709 case Intrinsic::hexagon_V6_vandvrt_128B
: {
1710 // Simplify Q -> V -> Q conversion.
1711 if (auto Op0
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
1712 Intrinsic::ID ID0
= Op0
->getIntrinsicID();
1713 if (ID0
!= Intrinsic::hexagon_V6_vandqrt
&&
1714 ID0
!= Intrinsic::hexagon_V6_vandqrt_128B
)
1716 Value
*Bytes
= Op0
->getArgOperand(1), *Mask
= II
->getArgOperand(1);
1717 uint64_t Bytes1
= computeKnownBits(Bytes
, 0, Op0
).One
.getZExtValue();
1718 uint64_t Mask1
= computeKnownBits(Mask
, 0, II
).One
.getZExtValue();
1719 // Check if every byte has common bits in Bytes and Mask.
1720 uint64_t C
= Bytes1
& Mask1
;
1721 if ((C
& 0xFF) && (C
& 0xFF00) && (C
& 0xFF0000) && (C
& 0xFF000000))
1722 return replaceInstUsesWith(*II
, Op0
->getArgOperand(0));
1726 case Intrinsic::stackrestore
: {
1727 // If the save is right next to the restore, remove the restore. This can
1728 // happen when variable allocas are DCE'd.
1729 if (IntrinsicInst
*SS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
1730 if (SS
->getIntrinsicID() == Intrinsic::stacksave
) {
1731 // Skip over debug info.
1732 if (SS
->getNextNonDebugInstruction() == II
) {
1733 return eraseInstFromFunction(CI
);
1738 // Scan down this block to see if there is another stack restore in the
1739 // same block without an intervening call/alloca.
1740 BasicBlock::iterator
BI(II
);
1741 Instruction
*TI
= II
->getParent()->getTerminator();
1742 bool CannotRemove
= false;
1743 for (++BI
; &*BI
!= TI
; ++BI
) {
1744 if (isa
<AllocaInst
>(BI
)) {
1745 CannotRemove
= true;
1748 if (CallInst
*BCI
= dyn_cast
<CallInst
>(BI
)) {
1749 if (auto *II2
= dyn_cast
<IntrinsicInst
>(BCI
)) {
1750 // If there is a stackrestore below this one, remove this one.
1751 if (II2
->getIntrinsicID() == Intrinsic::stackrestore
)
1752 return eraseInstFromFunction(CI
);
1754 // Bail if we cross over an intrinsic with side effects, such as
1755 // llvm.stacksave, or llvm.read_register.
1756 if (II2
->mayHaveSideEffects()) {
1757 CannotRemove
= true;
1761 // If we found a non-intrinsic call, we can't remove the stack
1763 CannotRemove
= true;
1769 // If the stack restore is in a return, resume, or unwind block and if there
1770 // are no allocas or calls between the restore and the return, nuke the
1772 if (!CannotRemove
&& (isa
<ReturnInst
>(TI
) || isa
<ResumeInst
>(TI
)))
1773 return eraseInstFromFunction(CI
);
1776 case Intrinsic::lifetime_end
:
1777 // Asan needs to poison memory to detect invalid access which is possible
1778 // even for empty lifetime range.
1779 if (II
->getFunction()->hasFnAttribute(Attribute::SanitizeAddress
) ||
1780 II
->getFunction()->hasFnAttribute(Attribute::SanitizeMemory
) ||
1781 II
->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress
))
1784 if (removeTriviallyEmptyRange(*II
, *this, [](const IntrinsicInst
&I
) {
1785 return I
.getIntrinsicID() == Intrinsic::lifetime_start
;
1789 case Intrinsic::assume
: {
1790 Value
*IIOperand
= II
->getArgOperand(0);
1791 SmallVector
<OperandBundleDef
, 4> OpBundles
;
1792 II
->getOperandBundlesAsDefs(OpBundles
);
1794 /// This will remove the boolean Condition from the assume given as
1795 /// argument and remove the assume if it becomes useless.
1796 /// always returns nullptr for use as a return values.
1797 auto RemoveConditionFromAssume
= [&](Instruction
*Assume
) -> Instruction
* {
1798 assert(isa
<AssumeInst
>(Assume
));
1799 if (isAssumeWithEmptyBundle(*cast
<AssumeInst
>(II
)))
1800 return eraseInstFromFunction(CI
);
1801 replaceUse(II
->getOperandUse(0), ConstantInt::getTrue(II
->getContext()));
1804 // Remove an assume if it is followed by an identical assume.
1805 // TODO: Do we need this? Unless there are conflicting assumptions, the
1806 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
1807 Instruction
*Next
= II
->getNextNonDebugInstruction();
1808 if (match(Next
, m_Intrinsic
<Intrinsic::assume
>(m_Specific(IIOperand
))))
1809 return RemoveConditionFromAssume(Next
);
1811 // Canonicalize assume(a && b) -> assume(a); assume(b);
1812 // Note: New assumption intrinsics created here are registered by
1813 // the InstCombineIRInserter object.
1814 FunctionType
*AssumeIntrinsicTy
= II
->getFunctionType();
1815 Value
*AssumeIntrinsic
= II
->getCalledOperand();
1817 if (match(IIOperand
, m_LogicalAnd(m_Value(A
), m_Value(B
)))) {
1818 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, A
, OpBundles
,
1820 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, B
, II
->getName());
1821 return eraseInstFromFunction(*II
);
1823 // assume(!(a || b)) -> assume(!a); assume(!b);
1824 if (match(IIOperand
, m_Not(m_LogicalOr(m_Value(A
), m_Value(B
))))) {
1825 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
1826 Builder
.CreateNot(A
), OpBundles
, II
->getName());
1827 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
1828 Builder
.CreateNot(B
), II
->getName());
1829 return eraseInstFromFunction(*II
);
1832 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
1833 // (if assume is valid at the load)
1834 CmpInst::Predicate Pred
;
1836 if (match(IIOperand
, m_ICmp(Pred
, m_Instruction(LHS
), m_Zero())) &&
1837 Pred
== ICmpInst::ICMP_NE
&& LHS
->getOpcode() == Instruction::Load
&&
1838 LHS
->getType()->isPointerTy() &&
1839 isValidAssumeForContext(II
, LHS
, &DT
)) {
1840 MDNode
*MD
= MDNode::get(II
->getContext(), None
);
1841 LHS
->setMetadata(LLVMContext::MD_nonnull
, MD
);
1842 return RemoveConditionFromAssume(II
);
1844 // TODO: apply nonnull return attributes to calls and invokes
1845 // TODO: apply range metadata for range check patterns?
1848 // Convert nonnull assume like:
1849 // %A = icmp ne i32* %PTR, null
1850 // call void @llvm.assume(i1 %A)
1852 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
1853 if (EnableKnowledgeRetention
&&
1854 match(IIOperand
, m_Cmp(Pred
, m_Value(A
), m_Zero())) &&
1855 Pred
== CmpInst::ICMP_NE
&& A
->getType()->isPointerTy()) {
1856 if (auto *Replacement
= buildAssumeFromKnowledge(
1857 {RetainedKnowledge
{Attribute::NonNull
, 0, A
}}, Next
, &AC
, &DT
)) {
1859 Replacement
->insertBefore(Next
);
1860 AC
.registerAssumption(Replacement
);
1861 return RemoveConditionFromAssume(II
);
1865 // Convert alignment assume like:
1866 // %B = ptrtoint i32* %A to i64
1867 // %C = and i64 %B, Constant
1868 // %D = icmp eq i64 %C, 0
1869 // call void @llvm.assume(i1 %D)
1871 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
1873 if (EnableKnowledgeRetention
&&
1875 m_Cmp(Pred
, m_And(m_Value(A
), m_ConstantInt(AlignMask
)),
1877 Pred
== CmpInst::ICMP_EQ
) {
1878 if (isPowerOf2_64(AlignMask
+ 1)) {
1879 uint64_t Offset
= 0;
1880 match(A
, m_Add(m_Value(A
), m_ConstantInt(Offset
)));
1881 if (match(A
, m_PtrToInt(m_Value(A
)))) {
1882 /// Note: this doesn't preserve the offset information but merges
1883 /// offset and alignment.
1884 /// TODO: we can generate a GEP instead of merging the alignment with
1886 RetainedKnowledge RK
{Attribute::Alignment
,
1887 (unsigned)MinAlign(Offset
, AlignMask
+ 1), A
};
1888 if (auto *Replacement
=
1889 buildAssumeFromKnowledge(RK
, Next
, &AC
, &DT
)) {
1891 Replacement
->insertAfter(II
);
1892 AC
.registerAssumption(Replacement
);
1894 return RemoveConditionFromAssume(II
);
1899 /// Canonicalize Knowledge in operand bundles.
1900 if (EnableKnowledgeRetention
&& II
->hasOperandBundles()) {
1901 for (unsigned Idx
= 0; Idx
< II
->getNumOperandBundles(); Idx
++) {
1902 auto &BOI
= II
->bundle_op_info_begin()[Idx
];
1903 RetainedKnowledge RK
=
1904 llvm::getKnowledgeFromBundle(cast
<AssumeInst
>(*II
), BOI
);
1905 if (BOI
.End
- BOI
.Begin
> 2)
1906 continue; // Prevent reducing knowledge in an align with offset since
1907 // extracting a RetainedKnowledge form them looses offset
1909 RetainedKnowledge CanonRK
=
1910 llvm::simplifyRetainedKnowledge(cast
<AssumeInst
>(II
), RK
,
1911 &getAssumptionCache(),
1912 &getDominatorTree());
1916 if (BOI
.End
- BOI
.Begin
> 0) {
1917 Worklist
.pushValue(II
->op_begin()[BOI
.Begin
]);
1918 Value::dropDroppableUse(II
->op_begin()[BOI
.Begin
]);
1922 assert(RK
.AttrKind
== CanonRK
.AttrKind
);
1923 if (BOI
.End
- BOI
.Begin
> 0)
1924 II
->op_begin()[BOI
.Begin
].set(CanonRK
.WasOn
);
1925 if (BOI
.End
- BOI
.Begin
> 1)
1926 II
->op_begin()[BOI
.Begin
+ 1].set(ConstantInt::get(
1927 Type::getInt64Ty(II
->getContext()), CanonRK
.ArgValue
));
1929 Worklist
.pushValue(RK
.WasOn
);
1934 // If there is a dominating assume with the same condition as this one,
1935 // then this one is redundant, and should be removed.
1937 computeKnownBits(IIOperand
, Known
, 0, II
);
1938 if (Known
.isAllOnes() && isAssumeWithEmptyBundle(cast
<AssumeInst
>(*II
)))
1939 return eraseInstFromFunction(*II
);
1941 // Update the cache of affected values for this assumption (we might be
1942 // here because we just simplified the condition).
1943 AC
.updateAffectedValues(cast
<AssumeInst
>(II
));
1946 case Intrinsic::experimental_guard
: {
1947 // Is this guard followed by another guard? We scan forward over a small
1948 // fixed window of instructions to handle common cases with conditions
1949 // computed between guards.
1950 Instruction
*NextInst
= II
->getNextNonDebugInstruction();
1951 for (unsigned i
= 0; i
< GuardWideningWindow
; i
++) {
1952 // Note: Using context-free form to avoid compile time blow up
1953 if (!isSafeToSpeculativelyExecute(NextInst
))
1955 NextInst
= NextInst
->getNextNonDebugInstruction();
1957 Value
*NextCond
= nullptr;
1959 m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(NextCond
)))) {
1960 Value
*CurrCond
= II
->getArgOperand(0);
1962 // Remove a guard that it is immediately preceded by an identical guard.
1963 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
1964 if (CurrCond
!= NextCond
) {
1965 Instruction
*MoveI
= II
->getNextNonDebugInstruction();
1966 while (MoveI
!= NextInst
) {
1968 MoveI
= MoveI
->getNextNonDebugInstruction();
1969 Temp
->moveBefore(II
);
1971 replaceOperand(*II
, 0, Builder
.CreateAnd(CurrCond
, NextCond
));
1973 eraseInstFromFunction(*NextInst
);
1978 case Intrinsic::experimental_vector_insert
: {
1979 Value
*Vec
= II
->getArgOperand(0);
1980 Value
*SubVec
= II
->getArgOperand(1);
1981 Value
*Idx
= II
->getArgOperand(2);
1982 auto *DstTy
= dyn_cast
<FixedVectorType
>(II
->getType());
1983 auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
1984 auto *SubVecTy
= dyn_cast
<FixedVectorType
>(SubVec
->getType());
1986 // Only canonicalize if the destination vector, Vec, and SubVec are all
1988 if (DstTy
&& VecTy
&& SubVecTy
) {
1989 unsigned DstNumElts
= DstTy
->getNumElements();
1990 unsigned VecNumElts
= VecTy
->getNumElements();
1991 unsigned SubVecNumElts
= SubVecTy
->getNumElements();
1992 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
1994 // An insert that entirely overwrites Vec with SubVec is a nop.
1995 if (VecNumElts
== SubVecNumElts
)
1996 return replaceInstUsesWith(CI
, SubVec
);
1998 // Widen SubVec into a vector of the same width as Vec, since
1999 // shufflevector requires the two input vectors to be the same width.
2000 // Elements beyond the bounds of SubVec within the widened vector are
2002 SmallVector
<int, 8> WidenMask
;
2004 for (i
= 0; i
!= SubVecNumElts
; ++i
)
2005 WidenMask
.push_back(i
);
2006 for (; i
!= VecNumElts
; ++i
)
2007 WidenMask
.push_back(UndefMaskElem
);
2009 Value
*WidenShuffle
= Builder
.CreateShuffleVector(SubVec
, WidenMask
);
2011 SmallVector
<int, 8> Mask
;
2012 for (unsigned i
= 0; i
!= IdxN
; ++i
)
2014 for (unsigned i
= DstNumElts
; i
!= DstNumElts
+ SubVecNumElts
; ++i
)
2016 for (unsigned i
= IdxN
+ SubVecNumElts
; i
!= DstNumElts
; ++i
)
2019 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, WidenShuffle
, Mask
);
2020 return replaceInstUsesWith(CI
, Shuffle
);
2024 case Intrinsic::experimental_vector_extract
: {
2025 Value
*Vec
= II
->getArgOperand(0);
2026 Value
*Idx
= II
->getArgOperand(1);
2028 auto *DstTy
= dyn_cast
<FixedVectorType
>(II
->getType());
2029 auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
2031 // Only canonicalize if the the destination vector and Vec are fixed
2033 if (DstTy
&& VecTy
) {
2034 unsigned DstNumElts
= DstTy
->getNumElements();
2035 unsigned VecNumElts
= VecTy
->getNumElements();
2036 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
2038 // Extracting the entirety of Vec is a nop.
2039 if (VecNumElts
== DstNumElts
) {
2040 replaceInstUsesWith(CI
, Vec
);
2041 return eraseInstFromFunction(CI
);
2044 SmallVector
<int, 8> Mask
;
2045 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2046 Mask
.push_back(IdxN
+ i
);
2048 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, Mask
);
2049 return replaceInstUsesWith(CI
, Shuffle
);
2053 case Intrinsic::vector_reduce_or
:
2054 case Intrinsic::vector_reduce_and
: {
2055 // Canonicalize logical or/and reductions:
2056 // Or reduction for i1 is represented as:
2057 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2058 // %res = cmp ne iReduxWidth %val, 0
2059 // And reduction for i1 is represented as:
2060 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2061 // %res = cmp eq iReduxWidth %val, 11111
2062 Value
*Arg
= II
->getArgOperand(0);
2064 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2065 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2066 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2067 Value
*Res
= Builder
.CreateBitCast(
2068 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
2069 if (IID
== Intrinsic::vector_reduce_and
) {
2070 Res
= Builder
.CreateICmpEQ(
2071 Res
, ConstantInt::getAllOnesValue(Res
->getType()));
2073 assert(IID
== Intrinsic::vector_reduce_or
&&
2074 "Expected or reduction.");
2075 Res
= Builder
.CreateIsNotNull(Res
);
2078 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
2080 return replaceInstUsesWith(CI
, Res
);
2085 case Intrinsic::vector_reduce_add
: {
2086 if (IID
== Intrinsic::vector_reduce_add
) {
2087 // Convert vector_reduce_add(ZExt(<n x i1>)) to
2088 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2089 // Convert vector_reduce_add(SExt(<n x i1>)) to
2090 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2091 // Convert vector_reduce_add(<n x i1>) to
2092 // Trunc(ctpop(bitcast <n x i1> to in)).
2093 Value
*Arg
= II
->getArgOperand(0);
2095 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2096 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2097 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2098 Value
*V
= Builder
.CreateBitCast(
2099 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
2100 Value
*Res
= Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, V
);
2101 if (Res
->getType() != II
->getType())
2102 Res
= Builder
.CreateZExtOrTrunc(Res
, II
->getType());
2104 cast
<Instruction
>(Arg
)->getOpcode() == Instruction::SExt
)
2105 Res
= Builder
.CreateNeg(Res
);
2106 return replaceInstUsesWith(CI
, Res
);
2112 case Intrinsic::vector_reduce_xor
: {
2113 if (IID
== Intrinsic::vector_reduce_xor
) {
2114 // Exclusive disjunction reduction over the vector with
2115 // (potentially-extended) i1 element type is actually a
2116 // (potentially-extended) arithmetic `add` reduction over the original
2117 // non-extended value:
2118 // vector_reduce_xor(?ext(<n x i1>))
2120 // ?ext(vector_reduce_add(<n x i1>))
2121 Value
*Arg
= II
->getArgOperand(0);
2123 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2124 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2125 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2126 Value
*Res
= Builder
.CreateAddReduce(Vect
);
2128 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
2130 return replaceInstUsesWith(CI
, Res
);
2136 case Intrinsic::vector_reduce_mul
: {
2137 if (IID
== Intrinsic::vector_reduce_mul
) {
2138 // Multiplicative reduction over the vector with (potentially-extended)
2139 // i1 element type is actually a (potentially zero-extended)
2140 // logical `and` reduction over the original non-extended value:
2141 // vector_reduce_mul(?ext(<n x i1>))
2143 // zext(vector_reduce_and(<n x i1>))
2144 Value
*Arg
= II
->getArgOperand(0);
2146 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2147 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2148 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2149 Value
*Res
= Builder
.CreateAndReduce(Vect
);
2150 if (Res
->getType() != II
->getType())
2151 Res
= Builder
.CreateZExt(Res
, II
->getType());
2152 return replaceInstUsesWith(CI
, Res
);
2158 case Intrinsic::vector_reduce_umin
:
2159 case Intrinsic::vector_reduce_umax
: {
2160 if (IID
== Intrinsic::vector_reduce_umin
||
2161 IID
== Intrinsic::vector_reduce_umax
) {
2162 // UMin/UMax reduction over the vector with (potentially-extended)
2163 // i1 element type is actually a (potentially-extended)
2164 // logical `and`/`or` reduction over the original non-extended value:
2165 // vector_reduce_u{min,max}(?ext(<n x i1>))
2167 // ?ext(vector_reduce_{and,or}(<n x i1>))
2168 Value
*Arg
= II
->getArgOperand(0);
2170 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2171 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2172 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2173 Value
*Res
= IID
== Intrinsic::vector_reduce_umin
2174 ? Builder
.CreateAndReduce(Vect
)
2175 : Builder
.CreateOrReduce(Vect
);
2177 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
2179 return replaceInstUsesWith(CI
, Res
);
2185 case Intrinsic::vector_reduce_smin
:
2186 case Intrinsic::vector_reduce_smax
: {
2187 if (IID
== Intrinsic::vector_reduce_smin
||
2188 IID
== Intrinsic::vector_reduce_smax
) {
2189 // SMin/SMax reduction over the vector with (potentially-extended)
2190 // i1 element type is actually a (potentially-extended)
2191 // logical `and`/`or` reduction over the original non-extended value:
2192 // vector_reduce_s{min,max}(<n x i1>)
2194 // vector_reduce_{or,and}(<n x i1>)
2196 // vector_reduce_s{min,max}(sext(<n x i1>))
2198 // sext(vector_reduce_{or,and}(<n x i1>))
2200 // vector_reduce_s{min,max}(zext(<n x i1>))
2202 // zext(vector_reduce_{and,or}(<n x i1>))
2203 Value
*Arg
= II
->getArgOperand(0);
2205 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
2206 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
2207 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
2208 Instruction::CastOps ExtOpc
= Instruction::CastOps::CastOpsEnd
;
2210 ExtOpc
= cast
<CastInst
>(Arg
)->getOpcode();
2211 Value
*Res
= ((IID
== Intrinsic::vector_reduce_smin
) ==
2212 (ExtOpc
== Instruction::CastOps::ZExt
))
2213 ? Builder
.CreateAndReduce(Vect
)
2214 : Builder
.CreateOrReduce(Vect
);
2216 Res
= Builder
.CreateCast(ExtOpc
, Res
, II
->getType());
2217 return replaceInstUsesWith(CI
, Res
);
2223 case Intrinsic::vector_reduce_fmax
:
2224 case Intrinsic::vector_reduce_fmin
:
2225 case Intrinsic::vector_reduce_fadd
:
2226 case Intrinsic::vector_reduce_fmul
: {
2227 bool CanBeReassociated
= (IID
!= Intrinsic::vector_reduce_fadd
&&
2228 IID
!= Intrinsic::vector_reduce_fmul
) ||
2229 II
->hasAllowReassoc();
2230 const unsigned ArgIdx
= (IID
== Intrinsic::vector_reduce_fadd
||
2231 IID
== Intrinsic::vector_reduce_fmul
)
2234 Value
*Arg
= II
->getArgOperand(ArgIdx
);
2237 if (!isa
<FixedVectorType
>(Arg
->getType()) || !CanBeReassociated
||
2238 !match(Arg
, m_Shuffle(m_Value(V
), m_Undef(), m_Mask(Mask
))) ||
2239 !cast
<ShuffleVectorInst
>(Arg
)->isSingleSource())
2241 int Sz
= Mask
.size();
2242 SmallBitVector
UsedIndices(Sz
);
2243 for (int Idx
: Mask
) {
2244 if (Idx
== UndefMaskElem
|| UsedIndices
.test(Idx
))
2246 UsedIndices
.set(Idx
);
2248 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
2250 if (UsedIndices
.all()) {
2251 replaceUse(II
->getOperandUse(ArgIdx
), V
);
2257 // Handle target specific intrinsics
2258 Optional
<Instruction
*> V
= targetInstCombineIntrinsic(*II
);
2260 return V
.getValue();
2264 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
2265 // context, so it is handled in visitCallBase and we should trigger it.
2266 return visitCallBase(*II
);
2269 // Fence instruction simplification
2270 Instruction
*InstCombinerImpl::visitFenceInst(FenceInst
&FI
) {
2271 // Remove identical consecutive fences.
2272 Instruction
*Next
= FI
.getNextNonDebugInstruction();
2273 if (auto *NFI
= dyn_cast
<FenceInst
>(Next
))
2274 if (FI
.isIdenticalTo(NFI
))
2275 return eraseInstFromFunction(FI
);
2279 // InvokeInst simplification
2280 Instruction
*InstCombinerImpl::visitInvokeInst(InvokeInst
&II
) {
2281 return visitCallBase(II
);
2284 // CallBrInst simplification
2285 Instruction
*InstCombinerImpl::visitCallBrInst(CallBrInst
&CBI
) {
2286 return visitCallBase(CBI
);
2289 /// If this cast does not affect the value passed through the varargs area, we
2290 /// can eliminate the use of the cast.
2291 static bool isSafeToEliminateVarargsCast(const CallBase
&Call
,
2292 const DataLayout
&DL
,
2293 const CastInst
*const CI
,
2295 if (!CI
->isLosslessCast())
2298 // If this is a GC intrinsic, avoid munging types. We need types for
2299 // statepoint reconstruction in SelectionDAG.
2300 // TODO: This is probably something which should be expanded to all
2301 // intrinsics since the entire point of intrinsics is that
2302 // they are understandable by the optimizer.
2303 if (isa
<GCStatepointInst
>(Call
) || isa
<GCRelocateInst
>(Call
) ||
2304 isa
<GCResultInst
>(Call
))
2307 // Opaque pointers are compatible with any byval types.
2308 PointerType
*SrcTy
= cast
<PointerType
>(CI
->getOperand(0)->getType());
2309 if (SrcTy
->isOpaque())
2312 // The size of ByVal or InAlloca arguments is derived from the type, so we
2313 // can't change to a type with a different size. If the size were
2314 // passed explicitly we could avoid this check.
2315 if (!Call
.isPassPointeeByValueArgument(ix
))
2318 // The transform currently only handles type replacement for byval, not other
2319 // type-carrying attributes.
2320 if (!Call
.isByValArgument(ix
))
2323 Type
*SrcElemTy
= SrcTy
->getElementType();
2324 Type
*DstElemTy
= Call
.getParamByValType(ix
);
2325 if (!SrcElemTy
->isSized() || !DstElemTy
->isSized())
2327 if (DL
.getTypeAllocSize(SrcElemTy
) != DL
.getTypeAllocSize(DstElemTy
))
2332 Instruction
*InstCombinerImpl::tryOptimizeCall(CallInst
*CI
) {
2333 if (!CI
->getCalledFunction()) return nullptr;
2335 auto InstCombineRAUW
= [this](Instruction
*From
, Value
*With
) {
2336 replaceInstUsesWith(*From
, With
);
2338 auto InstCombineErase
= [this](Instruction
*I
) {
2339 eraseInstFromFunction(*I
);
2341 LibCallSimplifier
Simplifier(DL
, &TLI
, ORE
, BFI
, PSI
, InstCombineRAUW
,
2343 if (Value
*With
= Simplifier
.optimizeCall(CI
, Builder
)) {
2345 return CI
->use_empty() ? CI
: replaceInstUsesWith(*CI
, With
);
2351 static IntrinsicInst
*findInitTrampolineFromAlloca(Value
*TrampMem
) {
2352 // Strip off at most one level of pointer casts, looking for an alloca. This
2353 // is good enough in practice and simpler than handling any number of casts.
2354 Value
*Underlying
= TrampMem
->stripPointerCasts();
2355 if (Underlying
!= TrampMem
&&
2356 (!Underlying
->hasOneUse() || Underlying
->user_back() != TrampMem
))
2358 if (!isa
<AllocaInst
>(Underlying
))
2361 IntrinsicInst
*InitTrampoline
= nullptr;
2362 for (User
*U
: TrampMem
->users()) {
2363 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
2366 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
) {
2368 // More than one init_trampoline writes to this value. Give up.
2370 InitTrampoline
= II
;
2373 if (II
->getIntrinsicID() == Intrinsic::adjust_trampoline
)
2374 // Allow any number of calls to adjust.trampoline.
2379 // No call to init.trampoline found.
2380 if (!InitTrampoline
)
2383 // Check that the alloca is being used in the expected way.
2384 if (InitTrampoline
->getOperand(0) != TrampMem
)
2387 return InitTrampoline
;
2390 static IntrinsicInst
*findInitTrampolineFromBB(IntrinsicInst
*AdjustTramp
,
2392 // Visit all the previous instructions in the basic block, and try to find a
2393 // init.trampoline which has a direct path to the adjust.trampoline.
2394 for (BasicBlock::iterator I
= AdjustTramp
->getIterator(),
2395 E
= AdjustTramp
->getParent()->begin();
2397 Instruction
*Inst
= &*--I
;
2398 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2399 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
&&
2400 II
->getOperand(0) == TrampMem
)
2402 if (Inst
->mayWriteToMemory())
2408 // Given a call to llvm.adjust.trampoline, find and return the corresponding
2409 // call to llvm.init.trampoline if the call to the trampoline can be optimized
2410 // to a direct call to a function. Otherwise return NULL.
2411 static IntrinsicInst
*findInitTrampoline(Value
*Callee
) {
2412 Callee
= Callee
->stripPointerCasts();
2413 IntrinsicInst
*AdjustTramp
= dyn_cast
<IntrinsicInst
>(Callee
);
2415 AdjustTramp
->getIntrinsicID() != Intrinsic::adjust_trampoline
)
2418 Value
*TrampMem
= AdjustTramp
->getOperand(0);
2420 if (IntrinsicInst
*IT
= findInitTrampolineFromAlloca(TrampMem
))
2422 if (IntrinsicInst
*IT
= findInitTrampolineFromBB(AdjustTramp
, TrampMem
))
2427 void InstCombinerImpl::annotateAnyAllocSite(CallBase
&Call
, const TargetLibraryInfo
*TLI
) {
2428 unsigned NumArgs
= Call
.getNumArgOperands();
2429 ConstantInt
*Op0C
= dyn_cast
<ConstantInt
>(Call
.getOperand(0));
2431 (NumArgs
== 1) ? nullptr : dyn_cast
<ConstantInt
>(Call
.getOperand(1));
2432 // Bail out if the allocation size is zero (or an invalid alignment of zero
2433 // with aligned_alloc).
2434 if ((Op0C
&& Op0C
->isNullValue()) || (Op1C
&& Op1C
->isNullValue()))
2437 if (isMallocLikeFn(&Call
, TLI
) && Op0C
) {
2438 if (isOpNewLikeFn(&Call
, TLI
))
2439 Call
.addRetAttr(Attribute::getWithDereferenceableBytes(
2440 Call
.getContext(), Op0C
->getZExtValue()));
2442 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2443 Call
.getContext(), Op0C
->getZExtValue()));
2444 } else if (isAlignedAllocLikeFn(&Call
, TLI
)) {
2446 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2447 Call
.getContext(), Op1C
->getZExtValue()));
2448 // Add alignment attribute if alignment is a power of two constant.
2449 if (Op0C
&& Op0C
->getValue().ult(llvm::Value::MaximumAlignment
) &&
2450 isKnownNonZero(Call
.getOperand(1), DL
, 0, &AC
, &Call
, &DT
)) {
2451 uint64_t AlignmentVal
= Op0C
->getZExtValue();
2452 if (llvm::isPowerOf2_64(AlignmentVal
)) {
2453 Call
.removeRetAttr(Attribute::Alignment
);
2454 Call
.addRetAttr(Attribute::getWithAlignment(Call
.getContext(),
2455 Align(AlignmentVal
)));
2458 } else if (isReallocLikeFn(&Call
, TLI
) && Op1C
) {
2459 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2460 Call
.getContext(), Op1C
->getZExtValue()));
2461 } else if (isCallocLikeFn(&Call
, TLI
) && Op0C
&& Op1C
) {
2463 const APInt
&N
= Op0C
->getValue();
2464 APInt Size
= N
.umul_ov(Op1C
->getValue(), Overflow
);
2466 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2467 Call
.getContext(), Size
.getZExtValue()));
2468 } else if (isStrdupLikeFn(&Call
, TLI
)) {
2469 uint64_t Len
= GetStringLength(Call
.getOperand(0));
2473 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2474 Call
.getContext(), Len
));
2476 else if (NumArgs
== 2 && Op1C
)
2477 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2478 Call
.getContext(), std::min(Len
, Op1C
->getZExtValue() + 1)));
2483 /// Improvements for call, callbr and invoke instructions.
2484 Instruction
*InstCombinerImpl::visitCallBase(CallBase
&Call
) {
2485 if (isAllocationFn(&Call
, &TLI
))
2486 annotateAnyAllocSite(Call
, &TLI
);
2488 bool Changed
= false;
2490 // Mark any parameters that are known to be non-null with the nonnull
2491 // attribute. This is helpful for inlining calls to functions with null
2492 // checks on their arguments.
2493 SmallVector
<unsigned, 4> ArgNos
;
2496 for (Value
*V
: Call
.args()) {
2497 if (V
->getType()->isPointerTy() &&
2498 !Call
.paramHasAttr(ArgNo
, Attribute::NonNull
) &&
2499 isKnownNonZero(V
, DL
, 0, &AC
, &Call
, &DT
))
2500 ArgNos
.push_back(ArgNo
);
2504 assert(ArgNo
== Call
.arg_size() && "sanity check");
2506 if (!ArgNos
.empty()) {
2507 AttributeList AS
= Call
.getAttributes();
2508 LLVMContext
&Ctx
= Call
.getContext();
2509 AS
= AS
.addParamAttribute(Ctx
, ArgNos
,
2510 Attribute::get(Ctx
, Attribute::NonNull
));
2511 Call
.setAttributes(AS
);
2515 // If the callee is a pointer to a function, attempt to move any casts to the
2516 // arguments of the call/callbr/invoke.
2517 Value
*Callee
= Call
.getCalledOperand();
2518 if (!isa
<Function
>(Callee
) && transformConstExprCastCall(Call
))
2521 if (Function
*CalleeF
= dyn_cast
<Function
>(Callee
)) {
2522 // Remove the convergent attr on calls when the callee is not convergent.
2523 if (Call
.isConvergent() && !CalleeF
->isConvergent() &&
2524 !CalleeF
->isIntrinsic()) {
2525 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
2527 Call
.setNotConvergent();
2531 // If the call and callee calling conventions don't match, and neither one
2532 // of the calling conventions is compatible with C calling convention
2533 // this call must be unreachable, as the call is undefined.
2534 if ((CalleeF
->getCallingConv() != Call
.getCallingConv() &&
2535 !(CalleeF
->getCallingConv() == llvm::CallingConv::C
&&
2536 TargetLibraryInfoImpl::isCallingConvCCompatible(&Call
)) &&
2537 !(Call
.getCallingConv() == llvm::CallingConv::C
&&
2538 TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF
))) &&
2539 // Only do this for calls to a function with a body. A prototype may
2540 // not actually end up matching the implementation's calling conv for a
2541 // variety of reasons (e.g. it may be written in assembly).
2542 !CalleeF
->isDeclaration()) {
2543 Instruction
*OldCall
= &Call
;
2544 CreateNonTerminatorUnreachable(OldCall
);
2545 // If OldCall does not return void then replaceInstUsesWith poison.
2546 // This allows ValueHandlers and custom metadata to adjust itself.
2547 if (!OldCall
->getType()->isVoidTy())
2548 replaceInstUsesWith(*OldCall
, PoisonValue::get(OldCall
->getType()));
2549 if (isa
<CallInst
>(OldCall
))
2550 return eraseInstFromFunction(*OldCall
);
2552 // We cannot remove an invoke or a callbr, because it would change thexi
2553 // CFG, just change the callee to a null pointer.
2554 cast
<CallBase
>(OldCall
)->setCalledFunction(
2555 CalleeF
->getFunctionType(),
2556 Constant::getNullValue(CalleeF
->getType()));
2561 // Calling a null function pointer is undefined if a null address isn't
2563 if ((isa
<ConstantPointerNull
>(Callee
) &&
2564 !NullPointerIsDefined(Call
.getFunction())) ||
2565 isa
<UndefValue
>(Callee
)) {
2566 // If Call does not return void then replaceInstUsesWith poison.
2567 // This allows ValueHandlers and custom metadata to adjust itself.
2568 if (!Call
.getType()->isVoidTy())
2569 replaceInstUsesWith(Call
, PoisonValue::get(Call
.getType()));
2571 if (Call
.isTerminator()) {
2572 // Can't remove an invoke or callbr because we cannot change the CFG.
2576 // This instruction is not reachable, just remove it.
2577 CreateNonTerminatorUnreachable(&Call
);
2578 return eraseInstFromFunction(Call
);
2581 if (IntrinsicInst
*II
= findInitTrampoline(Callee
))
2582 return transformCallThroughTrampoline(Call
, *II
);
2584 // TODO: Drop this transform once opaque pointer transition is done.
2585 FunctionType
*FTy
= Call
.getFunctionType();
2586 if (FTy
->isVarArg()) {
2587 int ix
= FTy
->getNumParams();
2588 // See if we can optimize any arguments passed through the varargs area of
2590 for (auto I
= Call
.arg_begin() + FTy
->getNumParams(), E
= Call
.arg_end();
2591 I
!= E
; ++I
, ++ix
) {
2592 CastInst
*CI
= dyn_cast
<CastInst
>(*I
);
2593 if (CI
&& isSafeToEliminateVarargsCast(Call
, DL
, CI
, ix
)) {
2594 replaceUse(*I
, CI
->getOperand(0));
2596 // Update the byval type to match the pointer type.
2597 // Not necessary for opaque pointers.
2598 PointerType
*NewTy
= cast
<PointerType
>(CI
->getOperand(0)->getType());
2599 if (!NewTy
->isOpaque() && Call
.isByValArgument(ix
)) {
2600 Call
.removeParamAttr(ix
, Attribute::ByVal
);
2602 ix
, Attribute::getWithByValType(
2603 Call
.getContext(), NewTy
->getElementType()));
2610 if (isa
<InlineAsm
>(Callee
) && !Call
.doesNotThrow()) {
2611 InlineAsm
*IA
= cast
<InlineAsm
>(Callee
);
2612 if (!IA
->canThrow()) {
2613 // Normal inline asm calls cannot throw - mark them
2615 Call
.setDoesNotThrow();
2620 // Try to optimize the call if possible, we require DataLayout for most of
2621 // this. None of these calls are seen as possibly dead so go ahead and
2622 // delete the instruction now.
2623 if (CallInst
*CI
= dyn_cast
<CallInst
>(&Call
)) {
2624 Instruction
*I
= tryOptimizeCall(CI
);
2625 // If we changed something return the result, etc. Otherwise let
2626 // the fallthrough check.
2627 if (I
) return eraseInstFromFunction(*I
);
2630 if (!Call
.use_empty() && !Call
.isMustTailCall())
2631 if (Value
*ReturnedArg
= Call
.getReturnedArgOperand()) {
2632 Type
*CallTy
= Call
.getType();
2633 Type
*RetArgTy
= ReturnedArg
->getType();
2634 if (RetArgTy
->canLosslesslyBitCastTo(CallTy
))
2635 return replaceInstUsesWith(
2636 Call
, Builder
.CreateBitOrPointerCast(ReturnedArg
, CallTy
));
2639 if (isAllocLikeFn(&Call
, &TLI
))
2640 return visitAllocSite(Call
);
2642 // Handle intrinsics which can be used in both call and invoke context.
2643 switch (Call
.getIntrinsicID()) {
2644 case Intrinsic::experimental_gc_statepoint
: {
2645 GCStatepointInst
&GCSP
= *cast
<GCStatepointInst
>(&Call
);
2646 SmallPtrSet
<Value
*, 32> LiveGcValues
;
2647 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
2648 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
2650 // Remove the relocation if unused.
2651 if (GCR
.use_empty()) {
2652 eraseInstFromFunction(GCR
);
2656 Value
*DerivedPtr
= GCR
.getDerivedPtr();
2657 Value
*BasePtr
= GCR
.getBasePtr();
2659 // Undef is undef, even after relocation.
2660 if (isa
<UndefValue
>(DerivedPtr
) || isa
<UndefValue
>(BasePtr
)) {
2661 replaceInstUsesWith(GCR
, UndefValue::get(GCR
.getType()));
2662 eraseInstFromFunction(GCR
);
2666 if (auto *PT
= dyn_cast
<PointerType
>(GCR
.getType())) {
2667 // The relocation of null will be null for most any collector.
2668 // TODO: provide a hook for this in GCStrategy. There might be some
2669 // weird collector this property does not hold for.
2670 if (isa
<ConstantPointerNull
>(DerivedPtr
)) {
2671 // Use null-pointer of gc_relocate's type to replace it.
2672 replaceInstUsesWith(GCR
, ConstantPointerNull::get(PT
));
2673 eraseInstFromFunction(GCR
);
2677 // isKnownNonNull -> nonnull attribute
2678 if (!GCR
.hasRetAttr(Attribute::NonNull
) &&
2679 isKnownNonZero(DerivedPtr
, DL
, 0, &AC
, &Call
, &DT
)) {
2680 GCR
.addRetAttr(Attribute::NonNull
);
2681 // We discovered new fact, re-check users.
2682 Worklist
.pushUsersToWorkList(GCR
);
2686 // If we have two copies of the same pointer in the statepoint argument
2687 // list, canonicalize to one. This may let us common gc.relocates.
2688 if (GCR
.getBasePtr() == GCR
.getDerivedPtr() &&
2689 GCR
.getBasePtrIndex() != GCR
.getDerivedPtrIndex()) {
2690 auto *OpIntTy
= GCR
.getOperand(2)->getType();
2691 GCR
.setOperand(2, ConstantInt::get(OpIntTy
, GCR
.getBasePtrIndex()));
2694 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
2695 // Canonicalize on the type from the uses to the defs
2697 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
2698 LiveGcValues
.insert(BasePtr
);
2699 LiveGcValues
.insert(DerivedPtr
);
2701 Optional
<OperandBundleUse
> Bundle
=
2702 GCSP
.getOperandBundle(LLVMContext::OB_gc_live
);
2703 unsigned NumOfGCLives
= LiveGcValues
.size();
2704 if (!Bundle
.hasValue() || NumOfGCLives
== Bundle
->Inputs
.size())
2706 // We can reduce the size of gc live bundle.
2707 DenseMap
<Value
*, unsigned> Val2Idx
;
2708 std::vector
<Value
*> NewLiveGc
;
2709 for (unsigned I
= 0, E
= Bundle
->Inputs
.size(); I
< E
; ++I
) {
2710 Value
*V
= Bundle
->Inputs
[I
];
2711 if (Val2Idx
.count(V
))
2713 if (LiveGcValues
.count(V
)) {
2714 Val2Idx
[V
] = NewLiveGc
.size();
2715 NewLiveGc
.push_back(V
);
2717 Val2Idx
[V
] = NumOfGCLives
;
2719 // Update all gc.relocates
2720 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
2721 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
2722 Value
*BasePtr
= GCR
.getBasePtr();
2723 assert(Val2Idx
.count(BasePtr
) && Val2Idx
[BasePtr
] != NumOfGCLives
&&
2724 "Missed live gc for base pointer");
2725 auto *OpIntTy1
= GCR
.getOperand(1)->getType();
2726 GCR
.setOperand(1, ConstantInt::get(OpIntTy1
, Val2Idx
[BasePtr
]));
2727 Value
*DerivedPtr
= GCR
.getDerivedPtr();
2728 assert(Val2Idx
.count(DerivedPtr
) && Val2Idx
[DerivedPtr
] != NumOfGCLives
&&
2729 "Missed live gc for derived pointer");
2730 auto *OpIntTy2
= GCR
.getOperand(2)->getType();
2731 GCR
.setOperand(2, ConstantInt::get(OpIntTy2
, Val2Idx
[DerivedPtr
]));
2733 // Create new statepoint instruction.
2734 OperandBundleDef
NewBundle("gc-live", NewLiveGc
);
2735 return CallBase::Create(&Call
, NewBundle
);
2740 return Changed
? &Call
: nullptr;
2743 /// If the callee is a constexpr cast of a function, attempt to move the cast to
2744 /// the arguments of the call/callbr/invoke.
2745 bool InstCombinerImpl::transformConstExprCastCall(CallBase
&Call
) {
2747 dyn_cast
<Function
>(Call
.getCalledOperand()->stripPointerCasts());
2751 // If this is a call to a thunk function, don't remove the cast. Thunks are
2752 // used to transparently forward all incoming parameters and outgoing return
2753 // values, so it's important to leave the cast in place.
2754 if (Callee
->hasFnAttribute("thunk"))
2757 // If this is a musttail call, the callee's prototype must match the caller's
2758 // prototype with the exception of pointee types. The code below doesn't
2759 // implement that, so we can't do this transform.
2760 // TODO: Do the transform if it only requires adding pointer casts.
2761 if (Call
.isMustTailCall())
2764 Instruction
*Caller
= &Call
;
2765 const AttributeList
&CallerPAL
= Call
.getAttributes();
2767 // Okay, this is a cast from a function to a different type. Unless doing so
2768 // would cause a type conversion of one of our arguments, change this call to
2769 // be a direct call with arguments casted to the appropriate types.
2770 FunctionType
*FT
= Callee
->getFunctionType();
2771 Type
*OldRetTy
= Caller
->getType();
2772 Type
*NewRetTy
= FT
->getReturnType();
2774 // Check to see if we are changing the return type...
2775 if (OldRetTy
!= NewRetTy
) {
2777 if (NewRetTy
->isStructTy())
2778 return false; // TODO: Handle multiple return values.
2780 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy
, OldRetTy
, DL
)) {
2781 if (Callee
->isDeclaration())
2782 return false; // Cannot transform this return value.
2784 if (!Caller
->use_empty() &&
2785 // void -> non-void is handled specially
2786 !NewRetTy
->isVoidTy())
2787 return false; // Cannot transform this return value.
2790 if (!CallerPAL
.isEmpty() && !Caller
->use_empty()) {
2791 AttrBuilder
RAttrs(CallerPAL
, AttributeList::ReturnIndex
);
2792 if (RAttrs
.overlaps(AttributeFuncs::typeIncompatible(NewRetTy
)))
2793 return false; // Attribute not compatible with transformed value.
2796 // If the callbase is an invoke/callbr instruction, and the return value is
2797 // used by a PHI node in a successor, we cannot change the return type of
2798 // the call because there is no place to put the cast instruction (without
2799 // breaking the critical edge). Bail out in this case.
2800 if (!Caller
->use_empty()) {
2801 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
))
2802 for (User
*U
: II
->users())
2803 if (PHINode
*PN
= dyn_cast
<PHINode
>(U
))
2804 if (PN
->getParent() == II
->getNormalDest() ||
2805 PN
->getParent() == II
->getUnwindDest())
2807 // FIXME: Be conservative for callbr to avoid a quadratic search.
2808 if (isa
<CallBrInst
>(Caller
))
2813 unsigned NumActualArgs
= Call
.arg_size();
2814 unsigned NumCommonArgs
= std::min(FT
->getNumParams(), NumActualArgs
);
2816 // Prevent us turning:
2817 // declare void @takes_i32_inalloca(i32* inalloca)
2818 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
2821 // call void @takes_i32_inalloca(i32* null)
2823 // Similarly, avoid folding away bitcasts of byval calls.
2824 if (Callee
->getAttributes().hasAttrSomewhere(Attribute::InAlloca
) ||
2825 Callee
->getAttributes().hasAttrSomewhere(Attribute::Preallocated
) ||
2826 Callee
->getAttributes().hasAttrSomewhere(Attribute::ByVal
))
2829 auto AI
= Call
.arg_begin();
2830 for (unsigned i
= 0, e
= NumCommonArgs
; i
!= e
; ++i
, ++AI
) {
2831 Type
*ParamTy
= FT
->getParamType(i
);
2832 Type
*ActTy
= (*AI
)->getType();
2834 if (!CastInst::isBitOrNoopPointerCastable(ActTy
, ParamTy
, DL
))
2835 return false; // Cannot transform this parameter value.
2837 if (AttrBuilder(CallerPAL
.getParamAttrs(i
))
2838 .overlaps(AttributeFuncs::typeIncompatible(ParamTy
)))
2839 return false; // Attribute not compatible with transformed value.
2841 if (Call
.isInAllocaArgument(i
))
2842 return false; // Cannot transform to and from inalloca.
2844 if (CallerPAL
.hasParamAttr(i
, Attribute::SwiftError
))
2847 // If the parameter is passed as a byval argument, then we have to have a
2848 // sized type and the sized type has to have the same size as the old type.
2849 if (ParamTy
!= ActTy
&& CallerPAL
.hasParamAttr(i
, Attribute::ByVal
)) {
2850 PointerType
*ParamPTy
= dyn_cast
<PointerType
>(ParamTy
);
2851 if (!ParamPTy
|| !ParamPTy
->getElementType()->isSized())
2854 Type
*CurElTy
= Call
.getParamByValType(i
);
2855 if (DL
.getTypeAllocSize(CurElTy
) !=
2856 DL
.getTypeAllocSize(ParamPTy
->getElementType()))
2861 if (Callee
->isDeclaration()) {
2862 // Do not delete arguments unless we have a function body.
2863 if (FT
->getNumParams() < NumActualArgs
&& !FT
->isVarArg())
2866 // If the callee is just a declaration, don't change the varargsness of the
2867 // call. We don't want to introduce a varargs call where one doesn't
2869 PointerType
*APTy
= cast
<PointerType
>(Call
.getCalledOperand()->getType());
2870 if (FT
->isVarArg()!=cast
<FunctionType
>(APTy
->getElementType())->isVarArg())
2873 // If both the callee and the cast type are varargs, we still have to make
2874 // sure the number of fixed parameters are the same or we have the same
2875 // ABI issues as if we introduce a varargs call.
2876 if (FT
->isVarArg() &&
2877 cast
<FunctionType
>(APTy
->getElementType())->isVarArg() &&
2878 FT
->getNumParams() !=
2879 cast
<FunctionType
>(APTy
->getElementType())->getNumParams())
2883 if (FT
->getNumParams() < NumActualArgs
&& FT
->isVarArg() &&
2884 !CallerPAL
.isEmpty()) {
2885 // In this case we have more arguments than the new function type, but we
2886 // won't be dropping them. Check that these extra arguments have attributes
2887 // that are compatible with being a vararg call argument.
2889 if (CallerPAL
.hasAttrSomewhere(Attribute::StructRet
, &SRetIdx
) &&
2890 SRetIdx
> FT
->getNumParams())
2894 // Okay, we decided that this is a safe thing to do: go ahead and start
2895 // inserting cast instructions as necessary.
2896 SmallVector
<Value
*, 8> Args
;
2897 SmallVector
<AttributeSet
, 8> ArgAttrs
;
2898 Args
.reserve(NumActualArgs
);
2899 ArgAttrs
.reserve(NumActualArgs
);
2901 // Get any return attributes.
2902 AttrBuilder
RAttrs(CallerPAL
, AttributeList::ReturnIndex
);
2904 // If the return value is not being used, the type may not be compatible
2905 // with the existing attributes. Wipe out any problematic attributes.
2906 RAttrs
.remove(AttributeFuncs::typeIncompatible(NewRetTy
));
2908 LLVMContext
&Ctx
= Call
.getContext();
2909 AI
= Call
.arg_begin();
2910 for (unsigned i
= 0; i
!= NumCommonArgs
; ++i
, ++AI
) {
2911 Type
*ParamTy
= FT
->getParamType(i
);
2913 Value
*NewArg
= *AI
;
2914 if ((*AI
)->getType() != ParamTy
)
2915 NewArg
= Builder
.CreateBitOrPointerCast(*AI
, ParamTy
);
2916 Args
.push_back(NewArg
);
2918 // Add any parameter attributes.
2919 if (CallerPAL
.hasParamAttr(i
, Attribute::ByVal
)) {
2920 AttrBuilder
AB(CallerPAL
.getParamAttrs(i
));
2921 AB
.addByValAttr(NewArg
->getType()->getPointerElementType());
2922 ArgAttrs
.push_back(AttributeSet::get(Ctx
, AB
));
2924 ArgAttrs
.push_back(CallerPAL
.getParamAttrs(i
));
2927 // If the function takes more arguments than the call was taking, add them
2929 for (unsigned i
= NumCommonArgs
; i
!= FT
->getNumParams(); ++i
) {
2930 Args
.push_back(Constant::getNullValue(FT
->getParamType(i
)));
2931 ArgAttrs
.push_back(AttributeSet());
2934 // If we are removing arguments to the function, emit an obnoxious warning.
2935 if (FT
->getNumParams() < NumActualArgs
) {
2936 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
2937 if (FT
->isVarArg()) {
2938 // Add all of the arguments in their promoted form to the arg list.
2939 for (unsigned i
= FT
->getNumParams(); i
!= NumActualArgs
; ++i
, ++AI
) {
2940 Type
*PTy
= getPromotedType((*AI
)->getType());
2941 Value
*NewArg
= *AI
;
2942 if (PTy
!= (*AI
)->getType()) {
2943 // Must promote to pass through va_arg area!
2944 Instruction::CastOps opcode
=
2945 CastInst::getCastOpcode(*AI
, false, PTy
, false);
2946 NewArg
= Builder
.CreateCast(opcode
, *AI
, PTy
);
2948 Args
.push_back(NewArg
);
2950 // Add any parameter attributes.
2951 ArgAttrs
.push_back(CallerPAL
.getParamAttrs(i
));
2956 AttributeSet FnAttrs
= CallerPAL
.getFnAttrs();
2958 if (NewRetTy
->isVoidTy())
2959 Caller
->setName(""); // Void type should not have a name.
2961 assert((ArgAttrs
.size() == FT
->getNumParams() || FT
->isVarArg()) &&
2962 "missing argument attributes");
2963 AttributeList NewCallerPAL
= AttributeList::get(
2964 Ctx
, FnAttrs
, AttributeSet::get(Ctx
, RAttrs
), ArgAttrs
);
2966 SmallVector
<OperandBundleDef
, 1> OpBundles
;
2967 Call
.getOperandBundlesAsDefs(OpBundles
);
2970 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
2971 NewCall
= Builder
.CreateInvoke(Callee
, II
->getNormalDest(),
2972 II
->getUnwindDest(), Args
, OpBundles
);
2973 } else if (CallBrInst
*CBI
= dyn_cast
<CallBrInst
>(Caller
)) {
2974 NewCall
= Builder
.CreateCallBr(Callee
, CBI
->getDefaultDest(),
2975 CBI
->getIndirectDests(), Args
, OpBundles
);
2977 NewCall
= Builder
.CreateCall(Callee
, Args
, OpBundles
);
2978 cast
<CallInst
>(NewCall
)->setTailCallKind(
2979 cast
<CallInst
>(Caller
)->getTailCallKind());
2981 NewCall
->takeName(Caller
);
2982 NewCall
->setCallingConv(Call
.getCallingConv());
2983 NewCall
->setAttributes(NewCallerPAL
);
2985 // Preserve prof metadata if any.
2986 NewCall
->copyMetadata(*Caller
, {LLVMContext::MD_prof
});
2988 // Insert a cast of the return type as necessary.
2989 Instruction
*NC
= NewCall
;
2991 if (OldRetTy
!= NV
->getType() && !Caller
->use_empty()) {
2992 if (!NV
->getType()->isVoidTy()) {
2993 NV
= NC
= CastInst::CreateBitOrPointerCast(NC
, OldRetTy
);
2994 NC
->setDebugLoc(Caller
->getDebugLoc());
2996 // If this is an invoke/callbr instruction, we should insert it after the
2997 // first non-phi instruction in the normal successor block.
2998 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
2999 BasicBlock::iterator I
= II
->getNormalDest()->getFirstInsertionPt();
3000 InsertNewInstBefore(NC
, *I
);
3001 } else if (CallBrInst
*CBI
= dyn_cast
<CallBrInst
>(Caller
)) {
3002 BasicBlock::iterator I
= CBI
->getDefaultDest()->getFirstInsertionPt();
3003 InsertNewInstBefore(NC
, *I
);
3005 // Otherwise, it's a call, just insert cast right after the call.
3006 InsertNewInstBefore(NC
, *Caller
);
3008 Worklist
.pushUsersToWorkList(*Caller
);
3010 NV
= UndefValue::get(Caller
->getType());
3014 if (!Caller
->use_empty())
3015 replaceInstUsesWith(*Caller
, NV
);
3016 else if (Caller
->hasValueHandle()) {
3017 if (OldRetTy
== NV
->getType())
3018 ValueHandleBase::ValueIsRAUWd(Caller
, NV
);
3020 // We cannot call ValueIsRAUWd with a different type, and the
3021 // actual tracked value will disappear.
3022 ValueHandleBase::ValueIsDeleted(Caller
);
3025 eraseInstFromFunction(*Caller
);
3029 /// Turn a call to a function created by init_trampoline / adjust_trampoline
3030 /// intrinsic pair into a direct call to the underlying function.
3032 InstCombinerImpl::transformCallThroughTrampoline(CallBase
&Call
,
3033 IntrinsicInst
&Tramp
) {
3034 Value
*Callee
= Call
.getCalledOperand();
3035 Type
*CalleeTy
= Callee
->getType();
3036 FunctionType
*FTy
= Call
.getFunctionType();
3037 AttributeList Attrs
= Call
.getAttributes();
3039 // If the call already has the 'nest' attribute somewhere then give up -
3040 // otherwise 'nest' would occur twice after splicing in the chain.
3041 if (Attrs
.hasAttrSomewhere(Attribute::Nest
))
3044 Function
*NestF
= cast
<Function
>(Tramp
.getArgOperand(1)->stripPointerCasts());
3045 FunctionType
*NestFTy
= NestF
->getFunctionType();
3047 AttributeList NestAttrs
= NestF
->getAttributes();
3048 if (!NestAttrs
.isEmpty()) {
3049 unsigned NestArgNo
= 0;
3050 Type
*NestTy
= nullptr;
3051 AttributeSet NestAttr
;
3053 // Look for a parameter marked with the 'nest' attribute.
3054 for (FunctionType::param_iterator I
= NestFTy
->param_begin(),
3055 E
= NestFTy
->param_end();
3056 I
!= E
; ++NestArgNo
, ++I
) {
3057 AttributeSet AS
= NestAttrs
.getParamAttrs(NestArgNo
);
3058 if (AS
.hasAttribute(Attribute::Nest
)) {
3059 // Record the parameter type and any other attributes.
3067 std::vector
<Value
*> NewArgs
;
3068 std::vector
<AttributeSet
> NewArgAttrs
;
3069 NewArgs
.reserve(Call
.arg_size() + 1);
3070 NewArgAttrs
.reserve(Call
.arg_size());
3072 // Insert the nest argument into the call argument list, which may
3073 // mean appending it. Likewise for attributes.
3077 auto I
= Call
.arg_begin(), E
= Call
.arg_end();
3079 if (ArgNo
== NestArgNo
) {
3080 // Add the chain argument and attributes.
3081 Value
*NestVal
= Tramp
.getArgOperand(2);
3082 if (NestVal
->getType() != NestTy
)
3083 NestVal
= Builder
.CreateBitCast(NestVal
, NestTy
, "nest");
3084 NewArgs
.push_back(NestVal
);
3085 NewArgAttrs
.push_back(NestAttr
);
3091 // Add the original argument and attributes.
3092 NewArgs
.push_back(*I
);
3093 NewArgAttrs
.push_back(Attrs
.getParamAttrs(ArgNo
));
3100 // The trampoline may have been bitcast to a bogus type (FTy).
3101 // Handle this by synthesizing a new function type, equal to FTy
3102 // with the chain parameter inserted.
3104 std::vector
<Type
*> NewTypes
;
3105 NewTypes
.reserve(FTy
->getNumParams()+1);
3107 // Insert the chain's type into the list of parameter types, which may
3108 // mean appending it.
3111 FunctionType::param_iterator I
= FTy
->param_begin(),
3112 E
= FTy
->param_end();
3115 if (ArgNo
== NestArgNo
)
3116 // Add the chain's type.
3117 NewTypes
.push_back(NestTy
);
3122 // Add the original type.
3123 NewTypes
.push_back(*I
);
3130 // Replace the trampoline call with a direct call. Let the generic
3131 // code sort out any function type mismatches.
3132 FunctionType
*NewFTy
= FunctionType::get(FTy
->getReturnType(), NewTypes
,
3134 Constant
*NewCallee
=
3135 NestF
->getType() == PointerType::getUnqual(NewFTy
) ?
3136 NestF
: ConstantExpr::getBitCast(NestF
,
3137 PointerType::getUnqual(NewFTy
));
3138 AttributeList NewPAL
=
3139 AttributeList::get(FTy
->getContext(), Attrs
.getFnAttrs(),
3140 Attrs
.getRetAttrs(), NewArgAttrs
);
3142 SmallVector
<OperandBundleDef
, 1> OpBundles
;
3143 Call
.getOperandBundlesAsDefs(OpBundles
);
3145 Instruction
*NewCaller
;
3146 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&Call
)) {
3147 NewCaller
= InvokeInst::Create(NewFTy
, NewCallee
,
3148 II
->getNormalDest(), II
->getUnwindDest(),
3149 NewArgs
, OpBundles
);
3150 cast
<InvokeInst
>(NewCaller
)->setCallingConv(II
->getCallingConv());
3151 cast
<InvokeInst
>(NewCaller
)->setAttributes(NewPAL
);
3152 } else if (CallBrInst
*CBI
= dyn_cast
<CallBrInst
>(&Call
)) {
3154 CallBrInst::Create(NewFTy
, NewCallee
, CBI
->getDefaultDest(),
3155 CBI
->getIndirectDests(), NewArgs
, OpBundles
);
3156 cast
<CallBrInst
>(NewCaller
)->setCallingConv(CBI
->getCallingConv());
3157 cast
<CallBrInst
>(NewCaller
)->setAttributes(NewPAL
);
3159 NewCaller
= CallInst::Create(NewFTy
, NewCallee
, NewArgs
, OpBundles
);
3160 cast
<CallInst
>(NewCaller
)->setTailCallKind(
3161 cast
<CallInst
>(Call
).getTailCallKind());
3162 cast
<CallInst
>(NewCaller
)->setCallingConv(
3163 cast
<CallInst
>(Call
).getCallingConv());
3164 cast
<CallInst
>(NewCaller
)->setAttributes(NewPAL
);
3166 NewCaller
->setDebugLoc(Call
.getDebugLoc());
3172 // Replace the trampoline call with a direct call. Since there is no 'nest'
3173 // parameter, there is no need to adjust the argument list. Let the generic
3174 // code sort out any function type mismatches.
3175 Constant
*NewCallee
= ConstantExpr::getBitCast(NestF
, CalleeTy
);
3176 Call
.setCalledFunction(FTy
, NewCallee
);