1 //===- InstCombineCalls.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the visitCall, visitInvoke, and visitCallBr functions.
11 //===----------------------------------------------------------------------===//
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/AssumeBundleQueries.h"
24 #include "llvm/Analysis/AssumptionCache.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/Loads.h"
27 #include "llvm/Analysis/MemoryBuiltins.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/Analysis/VectorUtils.h"
30 #include "llvm/IR/AttributeMask.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DebugInfo.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsAArch64.h"
47 #include "llvm/IR/IntrinsicsAMDGPU.h"
48 #include "llvm/IR/IntrinsicsARM.h"
49 #include "llvm/IR/IntrinsicsHexagon.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
58 #include "llvm/Support/AtomicOrdering.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/ErrorHandling.h"
64 #include "llvm/Support/KnownBits.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Transforms/InstCombine/InstCombiner.h"
68 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69 #include "llvm/Transforms/Utils/Local.h"
70 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
78 #define DEBUG_TYPE "instcombine"
79 #include "llvm/Transforms/Utils/InstructionWorklist.h"
82 using namespace PatternMatch
;
84 STATISTIC(NumSimplified
, "Number of library calls simplified");
86 static cl::opt
<unsigned> GuardWideningWindow(
87 "instcombine-guard-widening-window",
89 cl::desc("How wide an instruction window to bypass looking for "
92 /// Return the specified type promoted as it would be to pass though a va_arg
94 static Type
*getPromotedType(Type
*Ty
) {
95 if (IntegerType
* ITy
= dyn_cast
<IntegerType
>(Ty
)) {
96 if (ITy
->getBitWidth() < 32)
97 return Type::getInt32Ty(Ty
->getContext());
102 /// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
103 /// TODO: This should probably be integrated with visitAllocSites, but that
104 /// requires a deeper change to allow either unread or unwritten objects.
105 static bool hasUndefSource(AnyMemTransferInst
*MI
) {
106 auto *Src
= MI
->getRawSource();
107 while (isa
<GetElementPtrInst
>(Src
) || isa
<BitCastInst
>(Src
)) {
108 if (!Src
->hasOneUse())
110 Src
= cast
<Instruction
>(Src
)->getOperand(0);
112 return isa
<AllocaInst
>(Src
) && Src
->hasOneUse();
115 Instruction
*InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst
*MI
) {
116 Align DstAlign
= getKnownAlignment(MI
->getRawDest(), DL
, MI
, &AC
, &DT
);
117 MaybeAlign CopyDstAlign
= MI
->getDestAlign();
118 if (!CopyDstAlign
|| *CopyDstAlign
< DstAlign
) {
119 MI
->setDestAlignment(DstAlign
);
123 Align SrcAlign
= getKnownAlignment(MI
->getRawSource(), DL
, MI
, &AC
, &DT
);
124 MaybeAlign CopySrcAlign
= MI
->getSourceAlign();
125 if (!CopySrcAlign
|| *CopySrcAlign
< SrcAlign
) {
126 MI
->setSourceAlignment(SrcAlign
);
130 // If we have a store to a location which is known constant, we can conclude
131 // that the store must be storing the constant value (else the memory
132 // wouldn't be constant), and this must be a noop.
133 if (!isModSet(AA
->getModRefInfoMask(MI
->getDest()))) {
134 // Set the size of the copy to 0, it will be deleted on the next iteration.
135 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
139 // If the source is provably undef, the memcpy/memmove doesn't do anything
140 // (unless the transfer is volatile).
141 if (hasUndefSource(MI
) && !MI
->isVolatile()) {
142 // Set the size of the copy to 0, it will be deleted on the next iteration.
143 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
147 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
149 ConstantInt
*MemOpLength
= dyn_cast
<ConstantInt
>(MI
->getLength());
150 if (!MemOpLength
) return nullptr;
152 // Source and destination pointer types are always "i8*" for intrinsic. See
153 // if the size is something we can handle with a single primitive load/store.
154 // A single load+store correctly handles overlapping memory in the memmove
156 uint64_t Size
= MemOpLength
->getLimitedValue();
157 assert(Size
&& "0-sized memory transferring should be removed already.");
159 if (Size
> 8 || (Size
&(Size
-1)))
160 return nullptr; // If not 1/2/4/8 bytes, exit.
162 // If it is an atomic and alignment is less than the size then we will
163 // introduce the unaligned memory access which will be later transformed
164 // into libcall in CodeGen. This is not evident performance gain so disable
166 if (isa
<AtomicMemTransferInst
>(MI
))
167 if (*CopyDstAlign
< Size
|| *CopySrcAlign
< Size
)
170 // Use an integer load+store unless we can find something better.
171 IntegerType
* IntType
= IntegerType::get(MI
->getContext(), Size
<<3);
173 // If the memcpy has metadata describing the members, see if we can get the
174 // TBAA tag describing our copy.
175 AAMDNodes AACopyMD
= MI
->getAAMetadata();
177 if (MDNode
*M
= AACopyMD
.TBAAStruct
) {
178 AACopyMD
.TBAAStruct
= nullptr;
179 if (M
->getNumOperands() == 3 && M
->getOperand(0) &&
180 mdconst::hasa
<ConstantInt
>(M
->getOperand(0)) &&
181 mdconst::extract
<ConstantInt
>(M
->getOperand(0))->isZero() &&
183 mdconst::hasa
<ConstantInt
>(M
->getOperand(1)) &&
184 mdconst::extract
<ConstantInt
>(M
->getOperand(1))->getValue() ==
186 M
->getOperand(2) && isa
<MDNode
>(M
->getOperand(2)))
187 AACopyMD
.TBAA
= cast
<MDNode
>(M
->getOperand(2));
190 Value
*Src
= MI
->getArgOperand(1);
191 Value
*Dest
= MI
->getArgOperand(0);
192 LoadInst
*L
= Builder
.CreateLoad(IntType
, Src
);
193 // Alignment from the mem intrinsic will be better, so use it.
194 L
->setAlignment(*CopySrcAlign
);
195 L
->setAAMetadata(AACopyMD
);
196 MDNode
*LoopMemParallelMD
=
197 MI
->getMetadata(LLVMContext::MD_mem_parallel_loop_access
);
198 if (LoopMemParallelMD
)
199 L
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
200 MDNode
*AccessGroupMD
= MI
->getMetadata(LLVMContext::MD_access_group
);
202 L
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
204 StoreInst
*S
= Builder
.CreateStore(L
, Dest
);
205 // Alignment from the mem intrinsic will be better, so use it.
206 S
->setAlignment(*CopyDstAlign
);
207 S
->setAAMetadata(AACopyMD
);
208 if (LoopMemParallelMD
)
209 S
->setMetadata(LLVMContext::MD_mem_parallel_loop_access
, LoopMemParallelMD
);
211 S
->setMetadata(LLVMContext::MD_access_group
, AccessGroupMD
);
212 S
->copyMetadata(*MI
, LLVMContext::MD_DIAssignID
);
214 if (auto *MT
= dyn_cast
<MemTransferInst
>(MI
)) {
215 // non-atomics can be volatile
216 L
->setVolatile(MT
->isVolatile());
217 S
->setVolatile(MT
->isVolatile());
219 if (isa
<AtomicMemTransferInst
>(MI
)) {
220 // atomics have to be unordered
221 L
->setOrdering(AtomicOrdering::Unordered
);
222 S
->setOrdering(AtomicOrdering::Unordered
);
225 // Set the size of the copy to 0, it will be deleted on the next iteration.
226 MI
->setLength(Constant::getNullValue(MemOpLength
->getType()));
230 Instruction
*InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst
*MI
) {
231 const Align KnownAlignment
=
232 getKnownAlignment(MI
->getDest(), DL
, MI
, &AC
, &DT
);
233 MaybeAlign MemSetAlign
= MI
->getDestAlign();
234 if (!MemSetAlign
|| *MemSetAlign
< KnownAlignment
) {
235 MI
->setDestAlignment(KnownAlignment
);
239 // If we have a store to a location which is known constant, we can conclude
240 // that the store must be storing the constant value (else the memory
241 // wouldn't be constant), and this must be a noop.
242 if (!isModSet(AA
->getModRefInfoMask(MI
->getDest()))) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
248 // Remove memset with an undef value.
249 // FIXME: This is technically incorrect because it might overwrite a poison
250 // value. Change to PoisonValue once #52930 is resolved.
251 if (isa
<UndefValue
>(MI
->getValue())) {
252 // Set the size of the copy to 0, it will be deleted on the next iteration.
253 MI
->setLength(Constant::getNullValue(MI
->getLength()->getType()));
257 // Extract the length and alignment and fill if they are constant.
258 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(MI
->getLength());
259 ConstantInt
*FillC
= dyn_cast
<ConstantInt
>(MI
->getValue());
260 if (!LenC
|| !FillC
|| !FillC
->getType()->isIntegerTy(8))
262 const uint64_t Len
= LenC
->getLimitedValue();
263 assert(Len
&& "0-sized memory setting should be removed already.");
264 const Align Alignment
= MI
->getDestAlign().valueOrOne();
266 // If it is an atomic and alignment is less than the size then we will
267 // introduce the unaligned memory access which will be later transformed
268 // into libcall in CodeGen. This is not evident performance gain so disable
270 if (isa
<AtomicMemSetInst
>(MI
))
274 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
275 if (Len
<= 8 && isPowerOf2_32((uint32_t)Len
)) {
276 Type
*ITy
= IntegerType::get(MI
->getContext(), Len
*8); // n=1 -> i8.
278 Value
*Dest
= MI
->getDest();
280 // Extract the fill value and store.
281 const uint64_t Fill
= FillC
->getZExtValue()*0x0101010101010101ULL
;
282 Constant
*FillVal
= ConstantInt::get(ITy
, Fill
);
283 StoreInst
*S
= Builder
.CreateStore(FillVal
, Dest
, MI
->isVolatile());
284 S
->copyMetadata(*MI
, LLVMContext::MD_DIAssignID
);
285 auto replaceOpForAssignmentMarkers
= [FillC
, FillVal
](auto *DbgAssign
) {
286 if (llvm::is_contained(DbgAssign
->location_ops(), FillC
))
287 DbgAssign
->replaceVariableLocationOp(FillC
, FillVal
);
289 for_each(at::getAssignmentMarkers(S
), replaceOpForAssignmentMarkers
);
290 for_each(at::getDPVAssignmentMarkers(S
), replaceOpForAssignmentMarkers
);
292 S
->setAlignment(Alignment
);
293 if (isa
<AtomicMemSetInst
>(MI
))
294 S
->setOrdering(AtomicOrdering::Unordered
);
296 // Set the size of the copy to 0, it will be deleted on the next iteration.
297 MI
->setLength(Constant::getNullValue(LenC
->getType()));
304 // TODO, Obvious Missing Transforms:
305 // * Narrow width by halfs excluding zero/undef lanes
306 Value
*InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst
&II
) {
307 Value
*LoadPtr
= II
.getArgOperand(0);
308 const Align Alignment
=
309 cast
<ConstantInt
>(II
.getArgOperand(1))->getAlignValue();
311 // If the mask is all ones or undefs, this is a plain vector load of the 1st
313 if (maskIsAllOneOrUndef(II
.getArgOperand(2))) {
314 LoadInst
*L
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
320 // If we can unconditionally load from this address, replace with a
321 // load/select idiom. TODO: use DT for context sensitive query
322 if (isDereferenceablePointer(LoadPtr
, II
.getType(),
323 II
.getModule()->getDataLayout(), &II
, &AC
)) {
324 LoadInst
*LI
= Builder
.CreateAlignedLoad(II
.getType(), LoadPtr
, Alignment
,
326 LI
->copyMetadata(II
);
327 return Builder
.CreateSelect(II
.getArgOperand(2), LI
, II
.getArgOperand(3));
333 // TODO, Obvious Missing Transforms:
334 // * Single constant active lane -> store
335 // * Narrow width by halfs excluding zero/undef lanes
336 Instruction
*InstCombinerImpl::simplifyMaskedStore(IntrinsicInst
&II
) {
337 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
341 // If the mask is all zeros, this instruction does nothing.
342 if (ConstMask
->isNullValue())
343 return eraseInstFromFunction(II
);
345 // If the mask is all ones, this is a plain vector store of the 1st argument.
346 if (ConstMask
->isAllOnesValue()) {
347 Value
*StorePtr
= II
.getArgOperand(1);
348 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
350 new StoreInst(II
.getArgOperand(0), StorePtr
, false, Alignment
);
355 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
358 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
359 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
360 APInt
PoisonElts(DemandedElts
.getBitWidth(), 0);
361 if (Value
*V
= SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
,
363 return replaceOperand(II
, 0, V
);
368 // TODO, Obvious Missing Transforms:
369 // * Single constant active lane load -> load
370 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
371 // * Adjacent vector addresses -> masked.load
372 // * Narrow width by halfs excluding zero/undef lanes
373 // * Vector incrementing address -> vector masked load
374 Instruction
*InstCombinerImpl::simplifyMaskedGather(IntrinsicInst
&II
) {
375 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(2));
379 // Vector splat address w/known mask -> scalar load
380 // Fold the gather to load the source vector first lane
381 // because it is reloading the same value each time
382 if (ConstMask
->isAllOnesValue())
383 if (auto *SplatPtr
= getSplatValue(II
.getArgOperand(0))) {
384 auto *VecTy
= cast
<VectorType
>(II
.getType());
385 const Align Alignment
=
386 cast
<ConstantInt
>(II
.getArgOperand(1))->getAlignValue();
387 LoadInst
*L
= Builder
.CreateAlignedLoad(VecTy
->getElementType(), SplatPtr
,
388 Alignment
, "load.scalar");
390 Builder
.CreateVectorSplat(VecTy
->getElementCount(), L
, "broadcast");
391 return replaceInstUsesWith(II
, cast
<Instruction
>(Shuf
));
397 // TODO, Obvious Missing Transforms:
398 // * Single constant active lane -> store
399 // * Adjacent vector addresses -> masked.store
400 // * Narrow store width by halfs excluding zero/undef lanes
401 // * Vector incrementing address -> vector masked store
402 Instruction
*InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst
&II
) {
403 auto *ConstMask
= dyn_cast
<Constant
>(II
.getArgOperand(3));
407 // If the mask is all zeros, a scatter does nothing.
408 if (ConstMask
->isNullValue())
409 return eraseInstFromFunction(II
);
411 // Vector splat address -> scalar store
412 if (auto *SplatPtr
= getSplatValue(II
.getArgOperand(1))) {
413 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
414 if (auto *SplatValue
= getSplatValue(II
.getArgOperand(0))) {
415 if (maskContainsAllOneOrUndef(ConstMask
)) {
417 cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
418 StoreInst
*S
= new StoreInst(SplatValue
, SplatPtr
, /*IsVolatile=*/false,
424 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
426 if (ConstMask
->isAllOnesValue()) {
427 Align Alignment
= cast
<ConstantInt
>(II
.getArgOperand(2))->getAlignValue();
428 VectorType
*WideLoadTy
= cast
<VectorType
>(II
.getArgOperand(1)->getType());
429 ElementCount VF
= WideLoadTy
->getElementCount();
430 Value
*RunTimeVF
= Builder
.CreateElementCount(Builder
.getInt32Ty(), VF
);
431 Value
*LastLane
= Builder
.CreateSub(RunTimeVF
, Builder
.getInt32(1));
433 Builder
.CreateExtractElement(II
.getArgOperand(0), LastLane
);
435 new StoreInst(Extract
, SplatPtr
, /*IsVolatile=*/false, Alignment
);
440 if (isa
<ScalableVectorType
>(ConstMask
->getType()))
443 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
444 APInt DemandedElts
= possiblyDemandedEltsInMask(ConstMask
);
445 APInt
PoisonElts(DemandedElts
.getBitWidth(), 0);
446 if (Value
*V
= SimplifyDemandedVectorElts(II
.getOperand(0), DemandedElts
,
448 return replaceOperand(II
, 0, V
);
449 if (Value
*V
= SimplifyDemandedVectorElts(II
.getOperand(1), DemandedElts
,
451 return replaceOperand(II
, 1, V
);
456 /// This function transforms launder.invariant.group and strip.invariant.group
458 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
459 /// launder(strip(%x)) -> launder(%x)
460 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
461 /// strip(launder(%x)) -> strip(%x)
462 /// This is legal because it preserves the most recent information about
463 /// the presence or absence of invariant.group.
464 static Instruction
*simplifyInvariantGroupIntrinsic(IntrinsicInst
&II
,
465 InstCombinerImpl
&IC
) {
466 auto *Arg
= II
.getArgOperand(0);
467 auto *StrippedArg
= Arg
->stripPointerCasts();
468 auto *StrippedInvariantGroupsArg
= StrippedArg
;
469 while (auto *Intr
= dyn_cast
<IntrinsicInst
>(StrippedInvariantGroupsArg
)) {
470 if (Intr
->getIntrinsicID() != Intrinsic::launder_invariant_group
&&
471 Intr
->getIntrinsicID() != Intrinsic::strip_invariant_group
)
473 StrippedInvariantGroupsArg
= Intr
->getArgOperand(0)->stripPointerCasts();
475 if (StrippedArg
== StrippedInvariantGroupsArg
)
476 return nullptr; // No launders/strips to remove.
478 Value
*Result
= nullptr;
480 if (II
.getIntrinsicID() == Intrinsic::launder_invariant_group
)
481 Result
= IC
.Builder
.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg
);
482 else if (II
.getIntrinsicID() == Intrinsic::strip_invariant_group
)
483 Result
= IC
.Builder
.CreateStripInvariantGroup(StrippedInvariantGroupsArg
);
486 "simplifyInvariantGroupIntrinsic only handles launder and strip");
487 if (Result
->getType()->getPointerAddressSpace() !=
488 II
.getType()->getPointerAddressSpace())
489 Result
= IC
.Builder
.CreateAddrSpaceCast(Result
, II
.getType());
491 return cast
<Instruction
>(Result
);
494 static Instruction
*foldCttzCtlz(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
495 assert((II
.getIntrinsicID() == Intrinsic::cttz
||
496 II
.getIntrinsicID() == Intrinsic::ctlz
) &&
497 "Expected cttz or ctlz intrinsic");
498 bool IsTZ
= II
.getIntrinsicID() == Intrinsic::cttz
;
499 Value
*Op0
= II
.getArgOperand(0);
500 Value
*Op1
= II
.getArgOperand(1);
502 // ctlz(bitreverse(x)) -> cttz(x)
503 // cttz(bitreverse(x)) -> ctlz(x)
504 if (match(Op0
, m_BitReverse(m_Value(X
)))) {
505 Intrinsic::ID ID
= IsTZ
? Intrinsic::ctlz
: Intrinsic::cttz
;
506 Function
*F
= Intrinsic::getDeclaration(II
.getModule(), ID
, II
.getType());
507 return CallInst::Create(F
, {X
, II
.getArgOperand(1)});
510 if (II
.getType()->isIntOrIntVectorTy(1)) {
511 // ctlz/cttz i1 Op0 --> not Op0
512 if (match(Op1
, m_Zero()))
513 return BinaryOperator::CreateNot(Op0
);
514 // If zero is poison, then the input can be assumed to be "true", so the
515 // instruction simplifies to "false".
516 assert(match(Op1
, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
517 return IC
.replaceInstUsesWith(II
, ConstantInt::getNullValue(II
.getType()));
523 // cttz(-x) -> cttz(x)
524 if (match(Op0
, m_Neg(m_Value(X
))))
525 return IC
.replaceOperand(II
, 0, X
);
527 // cttz(-x & x) -> cttz(x)
528 if (match(Op0
, m_c_And(m_Neg(m_Value(X
)), m_Deferred(X
))))
529 return IC
.replaceOperand(II
, 0, X
);
531 // cttz(sext(x)) -> cttz(zext(x))
532 if (match(Op0
, m_OneUse(m_SExt(m_Value(X
))))) {
533 auto *Zext
= IC
.Builder
.CreateZExt(X
, II
.getType());
535 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, Zext
, Op1
);
536 return IC
.replaceInstUsesWith(II
, CttzZext
);
539 // Zext doesn't change the number of trailing zeros, so narrow:
540 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
541 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
)))) && match(Op1
, m_One())) {
542 auto *Cttz
= IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, X
,
543 IC
.Builder
.getTrue());
544 auto *ZextCttz
= IC
.Builder
.CreateZExt(Cttz
, II
.getType());
545 return IC
.replaceInstUsesWith(II
, ZextCttz
);
548 // cttz(abs(x)) -> cttz(x)
549 // cttz(nabs(x)) -> cttz(x)
551 SelectPatternFlavor SPF
= matchSelectPattern(Op0
, X
, Y
).Flavor
;
552 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
)
553 return IC
.replaceOperand(II
, 0, X
);
555 if (match(Op0
, m_Intrinsic
<Intrinsic::abs
>(m_Value(X
))))
556 return IC
.replaceOperand(II
, 0, X
);
558 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
559 if (match(Op0
, m_Shl(m_ImmConstant(C
), m_Value(X
))) &&
560 match(Op1
, m_One())) {
562 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, C
, Op1
);
563 return BinaryOperator::CreateAdd(ConstCttz
, X
);
566 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
567 if (match(Op0
, m_Exact(m_LShr(m_ImmConstant(C
), m_Value(X
)))) &&
568 match(Op1
, m_One())) {
570 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::cttz
, C
, Op1
);
571 return BinaryOperator::CreateSub(ConstCttz
, X
);
574 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
575 if (match(Op0
, m_LShr(m_ImmConstant(C
), m_Value(X
))) &&
576 match(Op1
, m_One())) {
578 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::ctlz
, C
, Op1
);
579 return BinaryOperator::CreateAdd(ConstCtlz
, X
);
582 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
583 if (match(Op0
, m_NUWShl(m_ImmConstant(C
), m_Value(X
))) &&
584 match(Op1
, m_One())) {
586 IC
.Builder
.CreateBinaryIntrinsic(Intrinsic::ctlz
, C
, Op1
);
587 return BinaryOperator::CreateSub(ConstCtlz
, X
);
591 KnownBits Known
= IC
.computeKnownBits(Op0
, 0, &II
);
593 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
594 unsigned PossibleZeros
= IsTZ
? Known
.countMaxTrailingZeros()
595 : Known
.countMaxLeadingZeros();
596 unsigned DefiniteZeros
= IsTZ
? Known
.countMinTrailingZeros()
597 : Known
.countMinLeadingZeros();
599 // If all bits above (ctlz) or below (cttz) the first known one are known
600 // zero, this value is constant.
601 // FIXME: This should be in InstSimplify because we're replacing an
602 // instruction with a constant.
603 if (PossibleZeros
== DefiniteZeros
) {
604 auto *C
= ConstantInt::get(Op0
->getType(), DefiniteZeros
);
605 return IC
.replaceInstUsesWith(II
, C
);
608 // If the input to cttz/ctlz is known to be non-zero,
609 // then change the 'ZeroIsPoison' parameter to 'true'
610 // because we know the zero behavior can't affect the result.
611 if (!Known
.One
.isZero() ||
612 isKnownNonZero(Op0
, IC
.getDataLayout(), 0, &IC
.getAssumptionCache(), &II
,
613 &IC
.getDominatorTree())) {
614 if (!match(II
.getArgOperand(1), m_One()))
615 return IC
.replaceOperand(II
, 1, IC
.Builder
.getTrue());
618 // Add range metadata since known bits can't completely reflect what we know.
619 auto *IT
= cast
<IntegerType
>(Op0
->getType()->getScalarType());
620 if (IT
&& IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
621 Metadata
*LowAndHigh
[] = {
622 ConstantAsMetadata::get(ConstantInt::get(IT
, DefiniteZeros
)),
623 ConstantAsMetadata::get(ConstantInt::get(IT
, PossibleZeros
+ 1))};
624 II
.setMetadata(LLVMContext::MD_range
,
625 MDNode::get(II
.getContext(), LowAndHigh
));
632 static Instruction
*foldCtpop(IntrinsicInst
&II
, InstCombinerImpl
&IC
) {
633 assert(II
.getIntrinsicID() == Intrinsic::ctpop
&&
634 "Expected ctpop intrinsic");
635 Type
*Ty
= II
.getType();
636 unsigned BitWidth
= Ty
->getScalarSizeInBits();
637 Value
*Op0
= II
.getArgOperand(0);
640 // ctpop(bitreverse(x)) -> ctpop(x)
641 // ctpop(bswap(x)) -> ctpop(x)
642 if (match(Op0
, m_BitReverse(m_Value(X
))) || match(Op0
, m_BSwap(m_Value(X
))))
643 return IC
.replaceOperand(II
, 0, X
);
645 // ctpop(rot(x)) -> ctpop(x)
646 if ((match(Op0
, m_FShl(m_Value(X
), m_Value(Y
), m_Value())) ||
647 match(Op0
, m_FShr(m_Value(X
), m_Value(Y
), m_Value()))) &&
649 return IC
.replaceOperand(II
, 0, X
);
651 // ctpop(x | -x) -> bitwidth - cttz(x, false)
652 if (Op0
->hasOneUse() &&
653 match(Op0
, m_c_Or(m_Value(X
), m_Neg(m_Deferred(X
))))) {
655 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
656 auto *Cttz
= IC
.Builder
.CreateCall(F
, {X
, IC
.Builder
.getFalse()});
657 auto *Bw
= ConstantInt::get(Ty
, APInt(BitWidth
, BitWidth
));
658 return IC
.replaceInstUsesWith(II
, IC
.Builder
.CreateSub(Bw
, Cttz
));
661 // ctpop(~x & (x - 1)) -> cttz(x, false)
663 m_c_And(m_Not(m_Value(X
)), m_Add(m_Deferred(X
), m_AllOnes())))) {
665 Intrinsic::getDeclaration(II
.getModule(), Intrinsic::cttz
, Ty
);
666 return CallInst::Create(F
, {X
, IC
.Builder
.getFalse()});
669 // Zext doesn't change the number of set bits, so narrow:
670 // ctpop (zext X) --> zext (ctpop X)
671 if (match(Op0
, m_OneUse(m_ZExt(m_Value(X
))))) {
672 Value
*NarrowPop
= IC
.Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, X
);
673 return CastInst::Create(Instruction::ZExt
, NarrowPop
, Ty
);
676 KnownBits
Known(BitWidth
);
677 IC
.computeKnownBits(Op0
, Known
, 0, &II
);
679 // If all bits are zero except for exactly one fixed bit, then the result
680 // must be 0 or 1, and we can get that answer by shifting to LSB:
681 // ctpop (X & 32) --> (X & 32) >> 5
682 // TODO: Investigate removing this as its likely unnecessary given the below
683 // `isKnownToBeAPowerOfTwo` check.
684 if ((~Known
.Zero
).isPowerOf2())
685 return BinaryOperator::CreateLShr(
686 Op0
, ConstantInt::get(Ty
, (~Known
.Zero
).exactLogBase2()));
688 // More generally we can also handle non-constant power of 2 patterns such as
689 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
690 // ctpop(Pow2OrZero) --> icmp ne X, 0
691 if (IC
.isKnownToBeAPowerOfTwo(Op0
, /* OrZero */ true))
692 return CastInst::Create(Instruction::ZExt
,
693 IC
.Builder
.CreateICmp(ICmpInst::ICMP_NE
, Op0
,
694 Constant::getNullValue(Ty
)),
697 // Add range metadata since known bits can't completely reflect what we know.
698 auto *IT
= cast
<IntegerType
>(Ty
->getScalarType());
699 unsigned MinCount
= Known
.countMinPopulation();
700 unsigned MaxCount
= Known
.countMaxPopulation();
701 if (IT
->getBitWidth() != 1 && !II
.getMetadata(LLVMContext::MD_range
)) {
702 Metadata
*LowAndHigh
[] = {
703 ConstantAsMetadata::get(ConstantInt::get(IT
, MinCount
)),
704 ConstantAsMetadata::get(ConstantInt::get(IT
, MaxCount
+ 1))};
705 II
.setMetadata(LLVMContext::MD_range
,
706 MDNode::get(II
.getContext(), LowAndHigh
));
713 /// Convert a table lookup to shufflevector if the mask is constant.
714 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
715 /// which case we could lower the shufflevector with rev64 instructions
716 /// as it's actually a byte reverse.
717 static Value
*simplifyNeonTbl1(const IntrinsicInst
&II
,
718 InstCombiner::BuilderTy
&Builder
) {
719 // Bail out if the mask is not a constant.
720 auto *C
= dyn_cast
<Constant
>(II
.getArgOperand(1));
724 auto *VecTy
= cast
<FixedVectorType
>(II
.getType());
725 unsigned NumElts
= VecTy
->getNumElements();
727 // Only perform this transformation for <8 x i8> vector types.
728 if (!VecTy
->getElementType()->isIntegerTy(8) || NumElts
!= 8)
733 for (unsigned I
= 0; I
< NumElts
; ++I
) {
734 Constant
*COp
= C
->getAggregateElement(I
);
736 if (!COp
|| !isa
<ConstantInt
>(COp
))
739 Indexes
[I
] = cast
<ConstantInt
>(COp
)->getLimitedValue();
741 // Make sure the mask indices are in range.
742 if ((unsigned)Indexes
[I
] >= NumElts
)
746 auto *V1
= II
.getArgOperand(0);
747 auto *V2
= Constant::getNullValue(V1
->getType());
748 return Builder
.CreateShuffleVector(V1
, V2
, ArrayRef(Indexes
));
751 // Returns true iff the 2 intrinsics have the same operands, limiting the
752 // comparison to the first NumOperands.
753 static bool haveSameOperands(const IntrinsicInst
&I
, const IntrinsicInst
&E
,
754 unsigned NumOperands
) {
755 assert(I
.arg_size() >= NumOperands
&& "Not enough operands");
756 assert(E
.arg_size() >= NumOperands
&& "Not enough operands");
757 for (unsigned i
= 0; i
< NumOperands
; i
++)
758 if (I
.getArgOperand(i
) != E
.getArgOperand(i
))
763 // Remove trivially empty start/end intrinsic ranges, i.e. a start
764 // immediately followed by an end (ignoring debuginfo or other
765 // start/end intrinsics in between). As this handles only the most trivial
766 // cases, tracking the nesting level is not needed:
768 // call @llvm.foo.start(i1 0)
769 // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
770 // call @llvm.foo.end(i1 0)
771 // call @llvm.foo.end(i1 0) ; &I
773 removeTriviallyEmptyRange(IntrinsicInst
&EndI
, InstCombinerImpl
&IC
,
774 std::function
<bool(const IntrinsicInst
&)> IsStart
) {
775 // We start from the end intrinsic and scan backwards, so that InstCombine
776 // has already processed (and potentially removed) all the instructions
777 // before the end intrinsic.
778 BasicBlock::reverse_iterator
BI(EndI
), BE(EndI
.getParent()->rend());
779 for (; BI
!= BE
; ++BI
) {
780 if (auto *I
= dyn_cast
<IntrinsicInst
>(&*BI
)) {
781 if (I
->isDebugOrPseudoInst() ||
782 I
->getIntrinsicID() == EndI
.getIntrinsicID())
785 if (haveSameOperands(EndI
, *I
, EndI
.arg_size())) {
786 IC
.eraseInstFromFunction(*I
);
787 IC
.eraseInstFromFunction(EndI
);
790 // Skip start intrinsics that don't pair with this end intrinsic.
800 Instruction
*InstCombinerImpl::visitVAEndInst(VAEndInst
&I
) {
801 removeTriviallyEmptyRange(I
, *this, [](const IntrinsicInst
&I
) {
802 return I
.getIntrinsicID() == Intrinsic::vastart
||
803 I
.getIntrinsicID() == Intrinsic::vacopy
;
808 static CallInst
*canonicalizeConstantArg0ToArg1(CallInst
&Call
) {
809 assert(Call
.arg_size() > 1 && "Need at least 2 args to swap");
810 Value
*Arg0
= Call
.getArgOperand(0), *Arg1
= Call
.getArgOperand(1);
811 if (isa
<Constant
>(Arg0
) && !isa
<Constant
>(Arg1
)) {
812 Call
.setArgOperand(0, Arg1
);
813 Call
.setArgOperand(1, Arg0
);
819 /// Creates a result tuple for an overflow intrinsic \p II with a given
820 /// \p Result and a constant \p Overflow value.
821 static Instruction
*createOverflowTuple(IntrinsicInst
*II
, Value
*Result
,
822 Constant
*Overflow
) {
823 Constant
*V
[] = {PoisonValue::get(Result
->getType()), Overflow
};
824 StructType
*ST
= cast
<StructType
>(II
->getType());
825 Constant
*Struct
= ConstantStruct::get(ST
, V
);
826 return InsertValueInst::Create(Struct
, Result
, 0);
830 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst
*II
) {
831 WithOverflowInst
*WO
= cast
<WithOverflowInst
>(II
);
832 Value
*OperationResult
= nullptr;
833 Constant
*OverflowResult
= nullptr;
834 if (OptimizeOverflowCheck(WO
->getBinaryOp(), WO
->isSigned(), WO
->getLHS(),
835 WO
->getRHS(), *WO
, OperationResult
, OverflowResult
))
836 return createOverflowTuple(WO
, OperationResult
, OverflowResult
);
840 static bool inputDenormalIsIEEE(const Function
&F
, const Type
*Ty
) {
841 Ty
= Ty
->getScalarType();
842 return F
.getDenormalMode(Ty
->getFltSemantics()).Input
== DenormalMode::IEEE
;
845 static bool inputDenormalIsDAZ(const Function
&F
, const Type
*Ty
) {
846 Ty
= Ty
->getScalarType();
847 return F
.getDenormalMode(Ty
->getFltSemantics()).inputsAreZero();
850 /// \returns the compare predicate type if the test performed by
851 /// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
852 /// floating-point environment assumed for \p F for type \p Ty
853 static FCmpInst::Predicate
fpclassTestIsFCmp0(FPClassTest Mask
,
854 const Function
&F
, Type
*Ty
) {
855 switch (static_cast<unsigned>(Mask
)) {
857 if (inputDenormalIsIEEE(F
, Ty
))
858 return FCmpInst::FCMP_OEQ
;
860 case fcZero
| fcSubnormal
:
861 if (inputDenormalIsDAZ(F
, Ty
))
862 return FCmpInst::FCMP_OEQ
;
864 case fcPositive
| fcNegZero
:
865 if (inputDenormalIsIEEE(F
, Ty
))
866 return FCmpInst::FCMP_OGE
;
868 case fcPositive
| fcNegZero
| fcNegSubnormal
:
869 if (inputDenormalIsDAZ(F
, Ty
))
870 return FCmpInst::FCMP_OGE
;
872 case fcPosSubnormal
| fcPosNormal
| fcPosInf
:
873 if (inputDenormalIsIEEE(F
, Ty
))
874 return FCmpInst::FCMP_OGT
;
876 case fcNegative
| fcPosZero
:
877 if (inputDenormalIsIEEE(F
, Ty
))
878 return FCmpInst::FCMP_OLE
;
880 case fcNegative
| fcPosZero
| fcPosSubnormal
:
881 if (inputDenormalIsDAZ(F
, Ty
))
882 return FCmpInst::FCMP_OLE
;
884 case fcNegSubnormal
| fcNegNormal
| fcNegInf
:
885 if (inputDenormalIsIEEE(F
, Ty
))
886 return FCmpInst::FCMP_OLT
;
888 case fcPosNormal
| fcPosInf
:
889 if (inputDenormalIsDAZ(F
, Ty
))
890 return FCmpInst::FCMP_OGT
;
892 case fcNegNormal
| fcNegInf
:
893 if (inputDenormalIsDAZ(F
, Ty
))
894 return FCmpInst::FCMP_OLT
;
896 case ~fcZero
& ~fcNan
:
897 if (inputDenormalIsIEEE(F
, Ty
))
898 return FCmpInst::FCMP_ONE
;
900 case ~(fcZero
| fcSubnormal
) & ~fcNan
:
901 if (inputDenormalIsDAZ(F
, Ty
))
902 return FCmpInst::FCMP_ONE
;
908 return FCmpInst::BAD_FCMP_PREDICATE
;
911 Instruction
*InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst
&II
) {
912 Value
*Src0
= II
.getArgOperand(0);
913 Value
*Src1
= II
.getArgOperand(1);
914 const ConstantInt
*CMask
= cast
<ConstantInt
>(Src1
);
915 FPClassTest Mask
= static_cast<FPClassTest
>(CMask
->getZExtValue());
916 const bool IsUnordered
= (Mask
& fcNan
) == fcNan
;
917 const bool IsOrdered
= (Mask
& fcNan
) == fcNone
;
918 const FPClassTest OrderedMask
= Mask
& ~fcNan
;
919 const FPClassTest OrderedInvertedMask
= ~OrderedMask
& ~fcNan
;
921 const bool IsStrict
= II
.isStrictFP();
924 if (match(Src0
, m_FNeg(m_Value(FNegSrc
)))) {
925 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
927 II
.setArgOperand(1, ConstantInt::get(Src1
->getType(), fneg(Mask
)));
928 return replaceOperand(II
, 0, FNegSrc
);
932 if (match(Src0
, m_FAbs(m_Value(FAbsSrc
)))) {
933 II
.setArgOperand(1, ConstantInt::get(Src1
->getType(), inverse_fabs(Mask
)));
934 return replaceOperand(II
, 0, FAbsSrc
);
937 if ((OrderedMask
== fcInf
|| OrderedInvertedMask
== fcInf
) &&
938 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
939 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
940 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
941 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
942 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
943 Constant
*Inf
= ConstantFP::getInfinity(Src0
->getType());
944 FCmpInst::Predicate Pred
=
945 IsUnordered
? FCmpInst::FCMP_UEQ
: FCmpInst::FCMP_OEQ
;
946 if (OrderedInvertedMask
== fcInf
)
947 Pred
= IsUnordered
? FCmpInst::FCMP_UNE
: FCmpInst::FCMP_ONE
;
949 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Src0
);
950 Value
*CmpInf
= Builder
.CreateFCmp(Pred
, Fabs
, Inf
);
951 CmpInf
->takeName(&II
);
952 return replaceInstUsesWith(II
, CmpInf
);
955 if ((OrderedMask
== fcPosInf
|| OrderedMask
== fcNegInf
) &&
956 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
957 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
958 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
959 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
960 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
962 ConstantFP::getInfinity(Src0
->getType(), OrderedMask
== fcNegInf
);
963 Value
*EqInf
= IsUnordered
? Builder
.CreateFCmpUEQ(Src0
, Inf
)
964 : Builder
.CreateFCmpOEQ(Src0
, Inf
);
966 EqInf
->takeName(&II
);
967 return replaceInstUsesWith(II
, EqInf
);
970 if ((OrderedInvertedMask
== fcPosInf
|| OrderedInvertedMask
== fcNegInf
) &&
971 (IsOrdered
|| IsUnordered
) && !IsStrict
) {
972 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
973 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
974 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
975 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
976 Constant
*Inf
= ConstantFP::getInfinity(Src0
->getType(),
977 OrderedInvertedMask
== fcNegInf
);
978 Value
*NeInf
= IsUnordered
? Builder
.CreateFCmpUNE(Src0
, Inf
)
979 : Builder
.CreateFCmpONE(Src0
, Inf
);
980 NeInf
->takeName(&II
);
981 return replaceInstUsesWith(II
, NeInf
);
984 if (Mask
== fcNan
&& !IsStrict
) {
985 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
988 Builder
.CreateFCmpUNO(Src0
, ConstantFP::getZero(Src0
->getType()));
989 IsNan
->takeName(&II
);
990 return replaceInstUsesWith(II
, IsNan
);
993 if (Mask
== (~fcNan
& fcAllFlags
) && !IsStrict
) {
994 // Equivalent of !isnan. Replace with standard fcmp.
996 Builder
.CreateFCmpORD(Src0
, ConstantFP::getZero(Src0
->getType()));
998 return replaceInstUsesWith(II
, FCmp
);
1001 FCmpInst::Predicate PredType
= FCmpInst::BAD_FCMP_PREDICATE
;
1003 // Try to replace with an fcmp with 0
1005 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1006 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1007 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1008 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1010 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1011 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1013 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1014 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1016 if (!IsStrict
&& (IsOrdered
|| IsUnordered
) &&
1017 (PredType
= fpclassTestIsFCmp0(OrderedMask
, *II
.getFunction(),
1018 Src0
->getType())) !=
1019 FCmpInst::BAD_FCMP_PREDICATE
) {
1020 Constant
*Zero
= ConstantFP::getZero(Src0
->getType());
1021 // Equivalent of == 0.
1022 Value
*FCmp
= Builder
.CreateFCmp(
1023 IsUnordered
? FCmpInst::getUnorderedPredicate(PredType
) : PredType
,
1026 FCmp
->takeName(&II
);
1027 return replaceInstUsesWith(II
, FCmp
);
1030 KnownFPClass Known
= computeKnownFPClass(Src0
, Mask
, &II
);
1032 // Clear test bits we know must be false from the source value.
1033 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1034 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1035 if ((Mask
& Known
.KnownFPClasses
) != Mask
) {
1037 1, ConstantInt::get(Src1
->getType(), Mask
& Known
.KnownFPClasses
));
1041 // If none of the tests which can return false are possible, fold to true.
1042 // fp_class (nnan x), ~(qnan|snan) -> true
1043 // fp_class (ninf x), ~(ninf|pinf) -> true
1044 if (Mask
== Known
.KnownFPClasses
)
1045 return replaceInstUsesWith(II
, ConstantInt::get(II
.getType(), true));
1050 static std::optional
<bool> getKnownSign(Value
*Op
, Instruction
*CxtI
,
1051 const DataLayout
&DL
, AssumptionCache
*AC
,
1052 DominatorTree
*DT
) {
1053 KnownBits Known
= computeKnownBits(Op
, DL
, 0, AC
, CxtI
, DT
);
1054 if (Known
.isNonNegative())
1056 if (Known
.isNegative())
1060 if (match(Op
, m_NSWSub(m_Value(X
), m_Value(Y
))))
1061 return isImpliedByDomCondition(ICmpInst::ICMP_SLT
, X
, Y
, CxtI
, DL
);
1063 return isImpliedByDomCondition(
1064 ICmpInst::ICMP_SLT
, Op
, Constant::getNullValue(Op
->getType()), CxtI
, DL
);
1067 static std::optional
<bool> getKnownSignOrZero(Value
*Op
, Instruction
*CxtI
,
1068 const DataLayout
&DL
,
1069 AssumptionCache
*AC
,
1070 DominatorTree
*DT
) {
1071 if (std::optional
<bool> Sign
= getKnownSign(Op
, CxtI
, DL
, AC
, DT
))
1075 if (match(Op
, m_NSWSub(m_Value(X
), m_Value(Y
))))
1076 return isImpliedByDomCondition(ICmpInst::ICMP_SLE
, X
, Y
, CxtI
, DL
);
1078 return std::nullopt
;
1081 /// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1082 static bool signBitMustBeTheSame(Value
*Op0
, Value
*Op1
, Instruction
*CxtI
,
1083 const DataLayout
&DL
, AssumptionCache
*AC
,
1084 DominatorTree
*DT
) {
1085 std::optional
<bool> Known1
= getKnownSign(Op1
, CxtI
, DL
, AC
, DT
);
1088 std::optional
<bool> Known0
= getKnownSign(Op0
, CxtI
, DL
, AC
, DT
);
1091 return *Known0
== *Known1
;
1094 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1095 /// can trigger other combines.
1096 static Instruction
*moveAddAfterMinMax(IntrinsicInst
*II
,
1097 InstCombiner::BuilderTy
&Builder
) {
1098 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1099 assert((MinMaxID
== Intrinsic::smax
|| MinMaxID
== Intrinsic::smin
||
1100 MinMaxID
== Intrinsic::umax
|| MinMaxID
== Intrinsic::umin
) &&
1101 "Expected a min or max intrinsic");
1103 // TODO: Match vectors with undef elements, but undef may not propagate.
1104 Value
*Op0
= II
->getArgOperand(0), *Op1
= II
->getArgOperand(1);
1106 const APInt
*C0
, *C1
;
1107 if (!match(Op0
, m_OneUse(m_Add(m_Value(X
), m_APInt(C0
)))) ||
1108 !match(Op1
, m_APInt(C1
)))
1111 // Check for necessary no-wrap and overflow constraints.
1112 bool IsSigned
= MinMaxID
== Intrinsic::smax
|| MinMaxID
== Intrinsic::smin
;
1113 auto *Add
= cast
<BinaryOperator
>(Op0
);
1114 if ((IsSigned
&& !Add
->hasNoSignedWrap()) ||
1115 (!IsSigned
&& !Add
->hasNoUnsignedWrap()))
1118 // If the constant difference overflows, then instsimplify should reduce the
1119 // min/max to the add or C1.
1122 IsSigned
? C1
->ssub_ov(*C0
, Overflow
) : C1
->usub_ov(*C0
, Overflow
);
1123 assert(!Overflow
&& "Expected simplify of min/max");
1125 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1126 // Note: the "mismatched" no-overflow setting does not propagate.
1127 Constant
*NewMinMaxC
= ConstantInt::get(II
->getType(), CDiff
);
1128 Value
*NewMinMax
= Builder
.CreateBinaryIntrinsic(MinMaxID
, X
, NewMinMaxC
);
1129 return IsSigned
? BinaryOperator::CreateNSWAdd(NewMinMax
, Add
->getOperand(1))
1130 : BinaryOperator::CreateNUWAdd(NewMinMax
, Add
->getOperand(1));
1132 /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1133 Instruction
*InstCombinerImpl::matchSAddSubSat(IntrinsicInst
&MinMax1
) {
1134 Type
*Ty
= MinMax1
.getType();
1136 // We are looking for a tree of:
1137 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1138 // Where the min and max could be reversed
1139 Instruction
*MinMax2
;
1140 BinaryOperator
*AddSub
;
1141 const APInt
*MinValue
, *MaxValue
;
1142 if (match(&MinMax1
, m_SMin(m_Instruction(MinMax2
), m_APInt(MaxValue
)))) {
1143 if (!match(MinMax2
, m_SMax(m_BinOp(AddSub
), m_APInt(MinValue
))))
1145 } else if (match(&MinMax1
,
1146 m_SMax(m_Instruction(MinMax2
), m_APInt(MinValue
)))) {
1147 if (!match(MinMax2
, m_SMin(m_BinOp(AddSub
), m_APInt(MaxValue
))))
1152 // Check that the constants clamp a saturate, and that the new type would be
1153 // sensible to convert to.
1154 if (!(*MaxValue
+ 1).isPowerOf2() || -*MinValue
!= *MaxValue
+ 1)
1156 // In what bitwidth can this be treated as saturating arithmetics?
1157 unsigned NewBitWidth
= (*MaxValue
+ 1).logBase2() + 1;
1158 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1159 // good first approximation for what should be done there.
1160 if (!shouldChangeType(Ty
->getScalarType()->getIntegerBitWidth(), NewBitWidth
))
1163 // Also make sure that the inner min/max and the add/sub have one use.
1164 if (!MinMax2
->hasOneUse() || !AddSub
->hasOneUse())
1167 // Create the new type (which can be a vector type)
1168 Type
*NewTy
= Ty
->getWithNewBitWidth(NewBitWidth
);
1170 Intrinsic::ID IntrinsicID
;
1171 if (AddSub
->getOpcode() == Instruction::Add
)
1172 IntrinsicID
= Intrinsic::sadd_sat
;
1173 else if (AddSub
->getOpcode() == Instruction::Sub
)
1174 IntrinsicID
= Intrinsic::ssub_sat
;
1178 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1179 // is usually achieved via a sext from a smaller type.
1180 if (ComputeMaxSignificantBits(AddSub
->getOperand(0), 0, AddSub
) >
1182 ComputeMaxSignificantBits(AddSub
->getOperand(1), 0, AddSub
) > NewBitWidth
)
1185 // Finally create and return the sat intrinsic, truncated to the new type
1186 Function
*F
= Intrinsic::getDeclaration(MinMax1
.getModule(), IntrinsicID
, NewTy
);
1187 Value
*AT
= Builder
.CreateTrunc(AddSub
->getOperand(0), NewTy
);
1188 Value
*BT
= Builder
.CreateTrunc(AddSub
->getOperand(1), NewTy
);
1189 Value
*Sat
= Builder
.CreateCall(F
, {AT
, BT
});
1190 return CastInst::Create(Instruction::SExt
, Sat
, Ty
);
1194 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1195 /// can only be one of two possible constant values -- turn that into a select
1197 static Instruction
*foldClampRangeOfTwo(IntrinsicInst
*II
,
1198 InstCombiner::BuilderTy
&Builder
) {
1199 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1201 const APInt
*C0
, *C1
;
1202 if (!match(I1
, m_APInt(C1
)) || !I0
->hasOneUse())
1205 CmpInst::Predicate Pred
= CmpInst::BAD_ICMP_PREDICATE
;
1206 switch (II
->getIntrinsicID()) {
1207 case Intrinsic::smax
:
1208 if (match(I0
, m_SMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
1209 Pred
= ICmpInst::ICMP_SGT
;
1211 case Intrinsic::smin
:
1212 if (match(I0
, m_SMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
1213 Pred
= ICmpInst::ICMP_SLT
;
1215 case Intrinsic::umax
:
1216 if (match(I0
, m_UMin(m_Value(X
), m_APInt(C0
))) && *C0
== *C1
+ 1)
1217 Pred
= ICmpInst::ICMP_UGT
;
1219 case Intrinsic::umin
:
1220 if (match(I0
, m_UMax(m_Value(X
), m_APInt(C0
))) && *C1
== *C0
+ 1)
1221 Pred
= ICmpInst::ICMP_ULT
;
1224 llvm_unreachable("Expected min/max intrinsic");
1226 if (Pred
== CmpInst::BAD_ICMP_PREDICATE
)
1229 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1230 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1231 Value
*Cmp
= Builder
.CreateICmp(Pred
, X
, I1
);
1232 return SelectInst::Create(Cmp
, ConstantInt::get(II
->getType(), *C0
), I1
);
1235 /// If this min/max has a constant operand and an operand that is a matching
1236 /// min/max with a constant operand, constant-fold the 2 constant operands.
1237 static Value
*reassociateMinMaxWithConstants(IntrinsicInst
*II
,
1238 IRBuilderBase
&Builder
) {
1239 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1240 auto *LHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
1241 if (!LHS
|| LHS
->getIntrinsicID() != MinMaxID
)
1245 if (!match(LHS
->getArgOperand(1), m_ImmConstant(C0
)) ||
1246 !match(II
->getArgOperand(1), m_ImmConstant(C1
)))
1249 // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
1250 ICmpInst::Predicate Pred
= MinMaxIntrinsic::getPredicate(MinMaxID
);
1251 Value
*CondC
= Builder
.CreateICmp(Pred
, C0
, C1
);
1252 Value
*NewC
= Builder
.CreateSelect(CondC
, C0
, C1
);
1253 return Builder
.CreateIntrinsic(MinMaxID
, II
->getType(),
1254 {LHS
->getArgOperand(0), NewC
});
1257 /// If this min/max has a matching min/max operand with a constant, try to push
1258 /// the constant operand into this instruction. This can enable more folds.
1259 static Instruction
*
1260 reassociateMinMaxWithConstantInOperand(IntrinsicInst
*II
,
1261 InstCombiner::BuilderTy
&Builder
) {
1262 // Match and capture a min/max operand candidate.
1266 if (!match(II
, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
1267 m_Instruction(Inner
),
1268 m_MaxOrMin(m_Value(X
), m_ImmConstant(C
)))),
1272 // The inner op must match. Check for constants to avoid infinite loops.
1273 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1274 auto *InnerMM
= dyn_cast
<IntrinsicInst
>(Inner
);
1275 if (!InnerMM
|| InnerMM
->getIntrinsicID() != MinMaxID
||
1276 match(X
, m_ImmConstant()) || match(Y
, m_ImmConstant()))
1279 // max (max X, C), Y --> max (max X, Y), C
1281 Intrinsic::getDeclaration(II
->getModule(), MinMaxID
, II
->getType());
1282 Value
*NewInner
= Builder
.CreateBinaryIntrinsic(MinMaxID
, X
, Y
);
1283 NewInner
->takeName(Inner
);
1284 return CallInst::Create(MinMax
, {NewInner
, C
});
1287 /// Reduce a sequence of min/max intrinsics with a common operand.
1288 static Instruction
*factorizeMinMaxTree(IntrinsicInst
*II
) {
1289 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1290 auto *LHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
1291 auto *RHS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(1));
1292 Intrinsic::ID MinMaxID
= II
->getIntrinsicID();
1293 if (!LHS
|| !RHS
|| LHS
->getIntrinsicID() != MinMaxID
||
1294 RHS
->getIntrinsicID() != MinMaxID
||
1295 (!LHS
->hasOneUse() && !RHS
->hasOneUse()))
1298 Value
*A
= LHS
->getArgOperand(0);
1299 Value
*B
= LHS
->getArgOperand(1);
1300 Value
*C
= RHS
->getArgOperand(0);
1301 Value
*D
= RHS
->getArgOperand(1);
1303 // Look for a common operand.
1304 Value
*MinMaxOp
= nullptr;
1305 Value
*ThirdOp
= nullptr;
1306 if (LHS
->hasOneUse()) {
1307 // If the LHS is only used in this chain and the RHS is used outside of it,
1308 // reuse the RHS min/max because that will eliminate the LHS.
1309 if (D
== A
|| C
== A
) {
1310 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1311 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1314 } else if (D
== B
|| C
== B
) {
1315 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1316 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1321 assert(RHS
->hasOneUse() && "Expected one-use operand");
1322 // Reuse the LHS. This will eliminate the RHS.
1323 if (D
== A
|| D
== B
) {
1324 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1325 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1328 } else if (C
== A
|| C
== B
) {
1329 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1330 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1336 if (!MinMaxOp
|| !ThirdOp
)
1339 Module
*Mod
= II
->getModule();
1340 Function
*MinMax
= Intrinsic::getDeclaration(Mod
, MinMaxID
, II
->getType());
1341 return CallInst::Create(MinMax
, { MinMaxOp
, ThirdOp
});
1344 /// If all arguments of the intrinsic are unary shuffles with the same mask,
1345 /// try to shuffle after the intrinsic.
1346 static Instruction
*
1347 foldShuffledIntrinsicOperands(IntrinsicInst
*II
,
1348 InstCombiner::BuilderTy
&Builder
) {
1349 // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1350 // etc. Use llvm::isTriviallyVectorizable() and related to determine
1351 // which intrinsics are safe to shuffle?
1352 switch (II
->getIntrinsicID()) {
1353 case Intrinsic::smax
:
1354 case Intrinsic::smin
:
1355 case Intrinsic::umax
:
1356 case Intrinsic::umin
:
1357 case Intrinsic::fma
:
1358 case Intrinsic::fshl
:
1359 case Intrinsic::fshr
:
1367 if (!match(II
->getArgOperand(0),
1368 m_Shuffle(m_Value(X
), m_Undef(), m_Mask(Mask
))))
1371 // At least 1 operand must have 1 use because we are creating 2 instructions.
1372 if (none_of(II
->args(), [](Value
*V
) { return V
->hasOneUse(); }))
1375 // See if all arguments are shuffled with the same mask.
1376 SmallVector
<Value
*, 4> NewArgs(II
->arg_size());
1378 Type
*SrcTy
= X
->getType();
1379 for (unsigned i
= 1, e
= II
->arg_size(); i
!= e
; ++i
) {
1380 if (!match(II
->getArgOperand(i
),
1381 m_Shuffle(m_Value(X
), m_Undef(), m_SpecificMask(Mask
))) ||
1382 X
->getType() != SrcTy
)
1387 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1388 Instruction
*FPI
= isa
<FPMathOperator
>(II
) ? II
: nullptr;
1389 Value
*NewIntrinsic
=
1390 Builder
.CreateIntrinsic(II
->getIntrinsicID(), SrcTy
, NewArgs
, FPI
);
1391 return new ShuffleVectorInst(NewIntrinsic
, Mask
);
1394 /// Fold the following cases and accepts bswap and bitreverse intrinsics:
1395 /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1396 /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1397 template <Intrinsic::ID IntrID
>
1398 static Instruction
*foldBitOrderCrossLogicOp(Value
*V
,
1399 InstCombiner::BuilderTy
&Builder
) {
1400 static_assert(IntrID
== Intrinsic::bswap
|| IntrID
== Intrinsic::bitreverse
,
1401 "This helper only supports BSWAP and BITREVERSE intrinsics");
1404 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1405 // don't match ConstantExpr that aren't meaningful for this transform.
1406 if (match(V
, m_OneUse(m_BitwiseLogic(m_Value(X
), m_Value(Y
)))) &&
1407 isa
<BinaryOperator
>(V
)) {
1408 Value
*OldReorderX
, *OldReorderY
;
1409 BinaryOperator::BinaryOps Op
= cast
<BinaryOperator
>(V
)->getOpcode();
1411 // If both X and Y are bswap/bitreverse, the transform reduces the number
1412 // of instructions even if there's multiuse.
1413 // If only one operand is bswap/bitreverse, we need to ensure the operand
1414 // have only one use.
1415 if (match(X
, m_Intrinsic
<IntrID
>(m_Value(OldReorderX
))) &&
1416 match(Y
, m_Intrinsic
<IntrID
>(m_Value(OldReorderY
)))) {
1417 return BinaryOperator::Create(Op
, OldReorderX
, OldReorderY
);
1420 if (match(X
, m_OneUse(m_Intrinsic
<IntrID
>(m_Value(OldReorderX
))))) {
1421 Value
*NewReorder
= Builder
.CreateUnaryIntrinsic(IntrID
, Y
);
1422 return BinaryOperator::Create(Op
, OldReorderX
, NewReorder
);
1425 if (match(Y
, m_OneUse(m_Intrinsic
<IntrID
>(m_Value(OldReorderY
))))) {
1426 Value
*NewReorder
= Builder
.CreateUnaryIntrinsic(IntrID
, X
);
1427 return BinaryOperator::Create(Op
, NewReorder
, OldReorderY
);
1433 /// CallInst simplification. This mostly only handles folding of intrinsic
1434 /// instructions. For normal calls, it allows visitCallBase to do the heavy
1436 Instruction
*InstCombinerImpl::visitCallInst(CallInst
&CI
) {
1437 // Don't try to simplify calls without uses. It will not do anything useful,
1438 // but will result in the following folds being skipped.
1439 if (!CI
.use_empty()) {
1440 SmallVector
<Value
*, 4> Args
;
1441 Args
.reserve(CI
.arg_size());
1442 for (Value
*Op
: CI
.args())
1444 if (Value
*V
= simplifyCall(&CI
, CI
.getCalledOperand(), Args
,
1445 SQ
.getWithInstruction(&CI
)))
1446 return replaceInstUsesWith(CI
, V
);
1449 if (Value
*FreedOp
= getFreedOperand(&CI
, &TLI
))
1450 return visitFree(CI
, FreedOp
);
1452 // If the caller function (i.e. us, the function that contains this CallInst)
1453 // is nounwind, mark the call as nounwind, even if the callee isn't.
1454 if (CI
.getFunction()->doesNotThrow() && !CI
.doesNotThrow()) {
1455 CI
.setDoesNotThrow();
1459 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&CI
);
1460 if (!II
) return visitCallBase(CI
);
1462 // For atomic unordered mem intrinsics if len is not a positive or
1463 // not a multiple of element size then behavior is undefined.
1464 if (auto *AMI
= dyn_cast
<AtomicMemIntrinsic
>(II
))
1465 if (ConstantInt
*NumBytes
= dyn_cast
<ConstantInt
>(AMI
->getLength()))
1466 if (NumBytes
->isNegative() ||
1467 (NumBytes
->getZExtValue() % AMI
->getElementSizeInBytes() != 0)) {
1468 CreateNonTerminatorUnreachable(AMI
);
1469 assert(AMI
->getType()->isVoidTy() &&
1470 "non void atomic unordered mem intrinsic");
1471 return eraseInstFromFunction(*AMI
);
1474 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1475 // instead of in visitCallBase.
1476 if (auto *MI
= dyn_cast
<AnyMemIntrinsic
>(II
)) {
1477 bool Changed
= false;
1479 // memmove/cpy/set of zero bytes is a noop.
1480 if (Constant
*NumBytes
= dyn_cast
<Constant
>(MI
->getLength())) {
1481 if (NumBytes
->isNullValue())
1482 return eraseInstFromFunction(CI
);
1485 // No other transformations apply to volatile transfers.
1486 if (auto *M
= dyn_cast
<MemIntrinsic
>(MI
))
1487 if (M
->isVolatile())
1490 // If we have a memmove and the source operation is a constant global,
1491 // then the source and dest pointers can't alias, so we can change this
1492 // into a call to memcpy.
1493 if (auto *MMI
= dyn_cast
<AnyMemMoveInst
>(MI
)) {
1494 if (GlobalVariable
*GVSrc
= dyn_cast
<GlobalVariable
>(MMI
->getSource()))
1495 if (GVSrc
->isConstant()) {
1496 Module
*M
= CI
.getModule();
1497 Intrinsic::ID MemCpyID
=
1498 isa
<AtomicMemMoveInst
>(MMI
)
1499 ? Intrinsic::memcpy_element_unordered_atomic
1500 : Intrinsic::memcpy
;
1501 Type
*Tys
[3] = { CI
.getArgOperand(0)->getType(),
1502 CI
.getArgOperand(1)->getType(),
1503 CI
.getArgOperand(2)->getType() };
1504 CI
.setCalledFunction(Intrinsic::getDeclaration(M
, MemCpyID
, Tys
));
1509 if (AnyMemTransferInst
*MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
1510 // memmove(x,x,size) -> noop.
1511 if (MTI
->getSource() == MTI
->getDest())
1512 return eraseInstFromFunction(CI
);
1515 // If we can determine a pointer alignment that is bigger than currently
1516 // set, update the alignment.
1517 if (auto *MTI
= dyn_cast
<AnyMemTransferInst
>(MI
)) {
1518 if (Instruction
*I
= SimplifyAnyMemTransfer(MTI
))
1520 } else if (auto *MSI
= dyn_cast
<AnyMemSetInst
>(MI
)) {
1521 if (Instruction
*I
= SimplifyAnyMemSet(MSI
))
1525 if (Changed
) return II
;
1528 // For fixed width vector result intrinsics, use the generic demanded vector
1530 if (auto *IIFVTy
= dyn_cast
<FixedVectorType
>(II
->getType())) {
1531 auto VWidth
= IIFVTy
->getNumElements();
1532 APInt
PoisonElts(VWidth
, 0);
1533 APInt
AllOnesEltMask(APInt::getAllOnes(VWidth
));
1534 if (Value
*V
= SimplifyDemandedVectorElts(II
, AllOnesEltMask
, PoisonElts
)) {
1536 return replaceInstUsesWith(*II
, V
);
1541 if (II
->isCommutative()) {
1542 if (auto Pair
= matchSymmetricPair(II
->getOperand(0), II
->getOperand(1))) {
1543 replaceOperand(*II
, 0, Pair
->first
);
1544 replaceOperand(*II
, 1, Pair
->second
);
1548 if (CallInst
*NewCall
= canonicalizeConstantArg0ToArg1(CI
))
1552 // Unused constrained FP intrinsic calls may have declared side effect, which
1553 // prevents it from being removed. In some cases however the side effect is
1554 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1555 // returns a replacement, the call may be removed.
1556 if (CI
.use_empty() && isa
<ConstrainedFPIntrinsic
>(CI
)) {
1557 if (simplifyConstrainedFPCall(&CI
, SQ
.getWithInstruction(&CI
)))
1558 return eraseInstFromFunction(CI
);
1561 Intrinsic::ID IID
= II
->getIntrinsicID();
1563 case Intrinsic::objectsize
: {
1564 SmallVector
<Instruction
*> InsertedInstructions
;
1565 if (Value
*V
= lowerObjectSizeCall(II
, DL
, &TLI
, AA
, /*MustSucceed=*/false,
1566 &InsertedInstructions
)) {
1567 for (Instruction
*Inserted
: InsertedInstructions
)
1568 Worklist
.add(Inserted
);
1569 return replaceInstUsesWith(CI
, V
);
1573 case Intrinsic::abs
: {
1574 Value
*IIOperand
= II
->getArgOperand(0);
1575 bool IntMinIsPoison
= cast
<Constant
>(II
->getArgOperand(1))->isOneValue();
1577 // abs(-x) -> abs(x)
1578 // TODO: Copy nsw if it was present on the neg?
1580 if (match(IIOperand
, m_Neg(m_Value(X
))))
1581 return replaceOperand(*II
, 0, X
);
1582 if (match(IIOperand
, m_Select(m_Value(), m_Value(X
), m_Neg(m_Deferred(X
)))))
1583 return replaceOperand(*II
, 0, X
);
1584 if (match(IIOperand
, m_Select(m_Value(), m_Neg(m_Value(X
)), m_Deferred(X
))))
1585 return replaceOperand(*II
, 0, X
);
1587 if (std::optional
<bool> Known
=
1588 getKnownSignOrZero(IIOperand
, II
, DL
, &AC
, &DT
)) {
1589 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1590 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1592 return replaceInstUsesWith(*II
, IIOperand
);
1594 // abs(x) -> -x if x < 0
1595 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1597 return BinaryOperator::CreateNSWNeg(IIOperand
);
1598 return BinaryOperator::CreateNeg(IIOperand
);
1601 // abs (sext X) --> zext (abs X*)
1602 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1603 if (match(IIOperand
, m_OneUse(m_SExt(m_Value(X
))))) {
1605 Builder
.CreateBinaryIntrinsic(Intrinsic::abs
, X
, Builder
.getFalse());
1606 return CastInst::Create(Instruction::ZExt
, NarrowAbs
, II
->getType());
1609 // Match a complicated way to check if a number is odd/even:
1610 // abs (srem X, 2) --> and X, 1
1612 if (match(IIOperand
, m_SRem(m_Value(X
), m_APInt(C
))) && *C
== 2)
1613 return BinaryOperator::CreateAnd(X
, ConstantInt::get(II
->getType(), 1));
1617 case Intrinsic::umin
: {
1618 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1619 // umin(x, 1) == zext(x != 0)
1620 if (match(I1
, m_One())) {
1621 assert(II
->getType()->getScalarSizeInBits() != 1 &&
1622 "Expected simplify of umin with max constant");
1623 Value
*Zero
= Constant::getNullValue(I0
->getType());
1624 Value
*Cmp
= Builder
.CreateICmpNE(I0
, Zero
);
1625 return CastInst::Create(Instruction::ZExt
, Cmp
, II
->getType());
1629 case Intrinsic::umax
: {
1630 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1632 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_ZExt(m_Value(Y
))) &&
1633 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1634 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1635 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1638 if (match(I0
, m_ZExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1640 if (Constant
*NarrowC
= getLosslessUnsignedTrunc(C
, X
->getType())) {
1641 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1642 return CastInst::Create(Instruction::ZExt
, NarrowMaxMin
, II
->getType());
1645 // If both operands of unsigned min/max are sign-extended, it is still ok
1646 // to narrow the operation.
1649 case Intrinsic::smax
:
1650 case Intrinsic::smin
: {
1651 Value
*I0
= II
->getArgOperand(0), *I1
= II
->getArgOperand(1);
1653 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_SExt(m_Value(Y
))) &&
1654 (I0
->hasOneUse() || I1
->hasOneUse()) && X
->getType() == Y
->getType()) {
1655 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, Y
);
1656 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1660 if (match(I0
, m_SExt(m_Value(X
))) && match(I1
, m_Constant(C
)) &&
1662 if (Constant
*NarrowC
= getLosslessSignedTrunc(C
, X
->getType())) {
1663 Value
*NarrowMaxMin
= Builder
.CreateBinaryIntrinsic(IID
, X
, NarrowC
);
1664 return CastInst::Create(Instruction::SExt
, NarrowMaxMin
, II
->getType());
1668 // umin(i1 X, i1 Y) -> and i1 X, Y
1669 // smax(i1 X, i1 Y) -> and i1 X, Y
1670 if ((IID
== Intrinsic::umin
|| IID
== Intrinsic::smax
) &&
1671 II
->getType()->isIntOrIntVectorTy(1)) {
1672 return BinaryOperator::CreateAnd(I0
, I1
);
1675 // umax(i1 X, i1 Y) -> or i1 X, Y
1676 // smin(i1 X, i1 Y) -> or i1 X, Y
1677 if ((IID
== Intrinsic::umax
|| IID
== Intrinsic::smin
) &&
1678 II
->getType()->isIntOrIntVectorTy(1)) {
1679 return BinaryOperator::CreateOr(I0
, I1
);
1682 if (IID
== Intrinsic::smax
|| IID
== Intrinsic::smin
) {
1683 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1684 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1685 // TODO: Canonicalize neg after min/max if I1 is constant.
1686 if (match(I0
, m_NSWNeg(m_Value(X
))) && match(I1
, m_NSWNeg(m_Value(Y
))) &&
1687 (I0
->hasOneUse() || I1
->hasOneUse())) {
1688 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1689 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, X
, Y
);
1690 return BinaryOperator::CreateNSWNeg(InvMaxMin
);
1694 // (umax X, (xor X, Pow2))
1696 // (umin X, (xor X, Pow2))
1697 // -> (and X, ~Pow2)
1698 // (smax X, (xor X, Pos_Pow2))
1699 // -> (or X, Pos_Pow2)
1700 // (smin X, (xor X, Pos_Pow2))
1701 // -> (and X, ~Pos_Pow2)
1702 // (smax X, (xor X, Neg_Pow2))
1703 // -> (and X, ~Neg_Pow2)
1704 // (smin X, (xor X, Neg_Pow2))
1705 // -> (or X, Neg_Pow2)
1706 if ((match(I0
, m_c_Xor(m_Specific(I1
), m_Value(X
))) ||
1707 match(I1
, m_c_Xor(m_Specific(I0
), m_Value(X
)))) &&
1708 isKnownToBeAPowerOfTwo(X
, /* OrZero */ true)) {
1709 bool UseOr
= IID
== Intrinsic::smax
|| IID
== Intrinsic::umax
;
1710 bool UseAndN
= IID
== Intrinsic::smin
|| IID
== Intrinsic::umin
;
1712 if (IID
== Intrinsic::smax
|| IID
== Intrinsic::smin
) {
1713 auto KnownSign
= getKnownSign(X
, II
, DL
, &AC
, &DT
);
1714 if (KnownSign
== std::nullopt
) {
1717 } else if (*KnownSign
/* true is Signed. */) {
1720 Type
*Ty
= I0
->getType();
1721 // Negative power of 2 must be IntMin. It's possible to be able to
1722 // prove negative / power of 2 without actually having known bits, so
1723 // just get the value by hand.
1724 X
= Constant::getIntegerValue(
1725 Ty
, APInt::getSignedMinValue(Ty
->getScalarSizeInBits()));
1729 return BinaryOperator::CreateOr(I0
, X
);
1731 return BinaryOperator::CreateAnd(I0
, Builder
.CreateNot(X
));
1734 // If we can eliminate ~A and Y is free to invert:
1735 // max ~A, Y --> ~(min A, ~Y)
1738 // max ~A, ~Y --> ~(min A, Y)
1739 // max ~A, C --> ~(min A, ~C)
1740 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1741 auto moveNotAfterMinMax
= [&](Value
*X
, Value
*Y
) -> Instruction
* {
1743 if (match(X
, m_OneUse(m_Not(m_Value(A
)))) &&
1744 !isFreeToInvert(A
, A
->hasOneUse())) {
1745 if (Value
*NotY
= getFreelyInverted(Y
, Y
->hasOneUse(), &Builder
)) {
1746 Intrinsic::ID InvID
= getInverseMinMaxIntrinsic(IID
);
1747 Value
*InvMaxMin
= Builder
.CreateBinaryIntrinsic(InvID
, A
, NotY
);
1748 return BinaryOperator::CreateNot(InvMaxMin
);
1754 if (Instruction
*I
= moveNotAfterMinMax(I0
, I1
))
1756 if (Instruction
*I
= moveNotAfterMinMax(I1
, I0
))
1759 if (Instruction
*I
= moveAddAfterMinMax(II
, Builder
))
1762 // smax(X, -X) --> abs(X)
1763 // smin(X, -X) --> -abs(X)
1764 // umax(X, -X) --> -abs(X)
1765 // umin(X, -X) --> abs(X)
1766 if (isKnownNegation(I0
, I1
)) {
1767 // We can choose either operand as the input to abs(), but if we can
1768 // eliminate the only use of a value, that's better for subsequent
1769 // transforms/analysis.
1770 if (I0
->hasOneUse() && !I1
->hasOneUse())
1773 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
1774 // operation and potentially its negation.
1775 bool IntMinIsPoison
= isKnownNegation(I0
, I1
, /* NeedNSW */ true);
1776 Value
*Abs
= Builder
.CreateBinaryIntrinsic(
1778 ConstantInt::getBool(II
->getContext(), IntMinIsPoison
));
1780 // We don't have a "nabs" intrinsic, so negate if needed based on the
1781 // max/min operation.
1782 if (IID
== Intrinsic::smin
|| IID
== Intrinsic::umax
)
1783 Abs
= Builder
.CreateNeg(Abs
, "nabs", /* NUW */ false, IntMinIsPoison
);
1784 return replaceInstUsesWith(CI
, Abs
);
1787 if (Instruction
*Sel
= foldClampRangeOfTwo(II
, Builder
))
1790 if (Instruction
*SAdd
= matchSAddSubSat(*II
))
1793 if (Value
*NewMinMax
= reassociateMinMaxWithConstants(II
, Builder
))
1794 return replaceInstUsesWith(*II
, NewMinMax
);
1796 if (Instruction
*R
= reassociateMinMaxWithConstantInOperand(II
, Builder
))
1799 if (Instruction
*NewMinMax
= factorizeMinMaxTree(II
))
1802 // Try to fold minmax with constant RHS based on range information
1804 if (match(I1
, m_APIntAllowUndef(RHSC
))) {
1805 ICmpInst::Predicate Pred
=
1806 ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID
));
1807 bool IsSigned
= MinMaxIntrinsic::isSigned(IID
);
1808 ConstantRange LHS_CR
= computeConstantRangeIncludingKnownBits(
1809 I0
, IsSigned
, SQ
.getWithInstruction(II
));
1810 if (!LHS_CR
.isFullSet()) {
1811 if (LHS_CR
.icmp(Pred
, *RHSC
))
1812 return replaceInstUsesWith(*II
, I0
);
1813 if (LHS_CR
.icmp(ICmpInst::getSwappedPredicate(Pred
), *RHSC
))
1814 return replaceInstUsesWith(*II
,
1815 ConstantInt::get(II
->getType(), *RHSC
));
1821 case Intrinsic::bitreverse
: {
1822 Value
*IIOperand
= II
->getArgOperand(0);
1823 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
1825 if (match(IIOperand
, m_ZExt(m_Value(X
))) &&
1826 X
->getType()->isIntOrIntVectorTy(1)) {
1827 Type
*Ty
= II
->getType();
1828 APInt SignBit
= APInt::getSignMask(Ty
->getScalarSizeInBits());
1829 return SelectInst::Create(X
, ConstantInt::get(Ty
, SignBit
),
1830 ConstantInt::getNullValue(Ty
));
1833 if (Instruction
*crossLogicOpFold
=
1834 foldBitOrderCrossLogicOp
<Intrinsic::bitreverse
>(IIOperand
, Builder
))
1835 return crossLogicOpFold
;
1839 case Intrinsic::bswap
: {
1840 Value
*IIOperand
= II
->getArgOperand(0);
1842 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1843 // inverse-shift-of-bswap:
1844 // bswap (shl X, Y) --> lshr (bswap X), Y
1845 // bswap (lshr X, Y) --> shl (bswap X), Y
1847 if (match(IIOperand
, m_OneUse(m_LogicalShift(m_Value(X
), m_Value(Y
))))) {
1848 // The transform allows undef vector elements, so try a constant match
1849 // first. If knownbits can handle that case, that clause could be removed.
1850 unsigned BitWidth
= IIOperand
->getType()->getScalarSizeInBits();
1852 if ((match(Y
, m_APIntAllowUndef(C
)) && (*C
& 7) == 0) ||
1853 MaskedValueIsZero(Y
, APInt::getLowBitsSet(BitWidth
, 3))) {
1854 Value
*NewSwap
= Builder
.CreateUnaryIntrinsic(Intrinsic::bswap
, X
);
1855 BinaryOperator::BinaryOps InverseShift
=
1856 cast
<BinaryOperator
>(IIOperand
)->getOpcode() == Instruction::Shl
1859 return BinaryOperator::Create(InverseShift
, NewSwap
, Y
);
1863 KnownBits Known
= computeKnownBits(IIOperand
, 0, II
);
1864 uint64_t LZ
= alignDown(Known
.countMinLeadingZeros(), 8);
1865 uint64_t TZ
= alignDown(Known
.countMinTrailingZeros(), 8);
1866 unsigned BW
= Known
.getBitWidth();
1868 // bswap(x) -> shift(x) if x has exactly one "active byte"
1869 if (BW
- LZ
- TZ
== 8) {
1870 assert(LZ
!= TZ
&& "active byte cannot be in the middle");
1871 if (LZ
> TZ
) // -> shl(x) if the "active byte" is in the low part of x
1872 return BinaryOperator::CreateNUWShl(
1873 IIOperand
, ConstantInt::get(IIOperand
->getType(), LZ
- TZ
));
1874 // -> lshr(x) if the "active byte" is in the high part of x
1875 return BinaryOperator::CreateExactLShr(
1876 IIOperand
, ConstantInt::get(IIOperand
->getType(), TZ
- LZ
));
1879 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1880 if (match(IIOperand
, m_Trunc(m_BSwap(m_Value(X
))))) {
1881 unsigned C
= X
->getType()->getScalarSizeInBits() - BW
;
1882 Value
*CV
= ConstantInt::get(X
->getType(), C
);
1883 Value
*V
= Builder
.CreateLShr(X
, CV
);
1884 return new TruncInst(V
, IIOperand
->getType());
1887 if (Instruction
*crossLogicOpFold
=
1888 foldBitOrderCrossLogicOp
<Intrinsic::bswap
>(IIOperand
, Builder
)) {
1889 return crossLogicOpFold
;
1892 // Try to fold into bitreverse if bswap is the root of the expression tree.
1893 if (Instruction
*BitOp
= matchBSwapOrBitReverse(*II
, /*MatchBSwaps*/ false,
1894 /*MatchBitReversals*/ true))
1898 case Intrinsic::masked_load
:
1899 if (Value
*SimplifiedMaskedOp
= simplifyMaskedLoad(*II
))
1900 return replaceInstUsesWith(CI
, SimplifiedMaskedOp
);
1902 case Intrinsic::masked_store
:
1903 return simplifyMaskedStore(*II
);
1904 case Intrinsic::masked_gather
:
1905 return simplifyMaskedGather(*II
);
1906 case Intrinsic::masked_scatter
:
1907 return simplifyMaskedScatter(*II
);
1908 case Intrinsic::launder_invariant_group
:
1909 case Intrinsic::strip_invariant_group
:
1910 if (auto *SkippedBarrier
= simplifyInvariantGroupIntrinsic(*II
, *this))
1911 return replaceInstUsesWith(*II
, SkippedBarrier
);
1913 case Intrinsic::powi
:
1914 if (ConstantInt
*Power
= dyn_cast
<ConstantInt
>(II
->getArgOperand(1))) {
1915 // 0 and 1 are handled in instsimplify
1916 // powi(x, -1) -> 1/x
1917 if (Power
->isMinusOne())
1918 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI
.getType(), 1.0),
1919 II
->getArgOperand(0), II
);
1920 // powi(x, 2) -> x*x
1921 if (Power
->equalsInt(2))
1922 return BinaryOperator::CreateFMulFMF(II
->getArgOperand(0),
1923 II
->getArgOperand(0), II
);
1925 if (!Power
->getValue()[0]) {
1927 // If power is even:
1928 // powi(-x, p) -> powi(x, p)
1929 // powi(fabs(x), p) -> powi(x, p)
1930 // powi(copysign(x, y), p) -> powi(x, p)
1931 if (match(II
->getArgOperand(0), m_FNeg(m_Value(X
))) ||
1932 match(II
->getArgOperand(0), m_FAbs(m_Value(X
))) ||
1933 match(II
->getArgOperand(0),
1934 m_Intrinsic
<Intrinsic::copysign
>(m_Value(X
), m_Value())))
1935 return replaceOperand(*II
, 0, X
);
1940 case Intrinsic::cttz
:
1941 case Intrinsic::ctlz
:
1942 if (auto *I
= foldCttzCtlz(*II
, *this))
1946 case Intrinsic::ctpop
:
1947 if (auto *I
= foldCtpop(*II
, *this))
1951 case Intrinsic::fshl
:
1952 case Intrinsic::fshr
: {
1953 Value
*Op0
= II
->getArgOperand(0), *Op1
= II
->getArgOperand(1);
1954 Type
*Ty
= II
->getType();
1955 unsigned BitWidth
= Ty
->getScalarSizeInBits();
1957 if (match(II
->getArgOperand(2), m_ImmConstant(ShAmtC
))) {
1958 // Canonicalize a shift amount constant operand to modulo the bit-width.
1959 Constant
*WidthC
= ConstantInt::get(Ty
, BitWidth
);
1961 ConstantFoldBinaryOpOperands(Instruction::URem
, ShAmtC
, WidthC
, DL
);
1964 if (ModuloC
!= ShAmtC
)
1965 return replaceOperand(*II
, 2, ModuloC
);
1967 assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT
, WidthC
, ShAmtC
) ==
1968 ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty
)) &&
1969 "Shift amount expected to be modulo bitwidth");
1971 // Canonicalize funnel shift right by constant to funnel shift left. This
1972 // is not entirely arbitrary. For historical reasons, the backend may
1973 // recognize rotate left patterns but miss rotate right patterns.
1974 if (IID
== Intrinsic::fshr
) {
1975 // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1976 Constant
*LeftShiftC
= ConstantExpr::getSub(WidthC
, ShAmtC
);
1977 Module
*Mod
= II
->getModule();
1978 Function
*Fshl
= Intrinsic::getDeclaration(Mod
, Intrinsic::fshl
, Ty
);
1979 return CallInst::Create(Fshl
, { Op0
, Op1
, LeftShiftC
});
1981 assert(IID
== Intrinsic::fshl
&&
1982 "All funnel shifts by simple constants should go left");
1984 // fshl(X, 0, C) --> shl X, C
1985 // fshl(X, undef, C) --> shl X, C
1986 if (match(Op1
, m_ZeroInt()) || match(Op1
, m_Undef()))
1987 return BinaryOperator::CreateShl(Op0
, ShAmtC
);
1989 // fshl(0, X, C) --> lshr X, (BW-C)
1990 // fshl(undef, X, C) --> lshr X, (BW-C)
1991 if (match(Op0
, m_ZeroInt()) || match(Op0
, m_Undef()))
1992 return BinaryOperator::CreateLShr(Op1
,
1993 ConstantExpr::getSub(WidthC
, ShAmtC
));
1995 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1996 if (Op0
== Op1
&& BitWidth
== 16 && match(ShAmtC
, m_SpecificInt(8))) {
1997 Module
*Mod
= II
->getModule();
1998 Function
*Bswap
= Intrinsic::getDeclaration(Mod
, Intrinsic::bswap
, Ty
);
1999 return CallInst::Create(Bswap
, { Op0
});
2001 if (Instruction
*BitOp
=
2002 matchBSwapOrBitReverse(*II
, /*MatchBSwaps*/ true,
2003 /*MatchBitReversals*/ true))
2007 // Left or right might be masked.
2008 if (SimplifyDemandedInstructionBits(*II
))
2011 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2012 // so only the low bits of the shift amount are demanded if the bitwidth is
2014 if (!isPowerOf2_32(BitWidth
))
2016 APInt Op2Demanded
= APInt::getLowBitsSet(BitWidth
, Log2_32_Ceil(BitWidth
));
2017 KnownBits
Op2Known(BitWidth
);
2018 if (SimplifyDemandedBits(II
, 2, Op2Demanded
, Op2Known
))
2022 case Intrinsic::ptrmask
: {
2023 unsigned BitWidth
= DL
.getPointerTypeSizeInBits(II
->getType());
2024 KnownBits
Known(BitWidth
);
2025 if (SimplifyDemandedInstructionBits(*II
, Known
))
2028 Value
*InnerPtr
, *InnerMask
;
2029 bool Changed
= false;
2031 // (ptrmask (ptrmask p, A), B)
2032 // -> (ptrmask p, (and A, B))
2033 if (match(II
->getArgOperand(0),
2034 m_OneUse(m_Intrinsic
<Intrinsic::ptrmask
>(m_Value(InnerPtr
),
2035 m_Value(InnerMask
))))) {
2036 assert(II
->getArgOperand(1)->getType() == InnerMask
->getType() &&
2037 "Mask types must match");
2038 // TODO: If InnerMask == Op1, we could copy attributes from inner
2039 // callsite -> outer callsite.
2040 Value
*NewMask
= Builder
.CreateAnd(II
->getArgOperand(1), InnerMask
);
2041 replaceOperand(CI
, 0, InnerPtr
);
2042 replaceOperand(CI
, 1, NewMask
);
2046 // See if we can deduce non-null.
2047 if (!CI
.hasRetAttr(Attribute::NonNull
) &&
2048 (Known
.isNonZero() ||
2049 isKnownNonZero(II
, DL
, /*Depth*/ 0, &AC
, II
, &DT
))) {
2050 CI
.addRetAttr(Attribute::NonNull
);
2054 unsigned NewAlignmentLog
=
2055 std::min(Value::MaxAlignmentExponent
,
2056 std::min(BitWidth
- 1, Known
.countMinTrailingZeros()));
2057 // Known bits will capture if we had alignment information associated with
2058 // the pointer argument.
2059 if (NewAlignmentLog
> Log2(CI
.getRetAlign().valueOrOne())) {
2060 CI
.addRetAttr(Attribute::getWithAlignment(
2061 CI
.getContext(), Align(uint64_t(1) << NewAlignmentLog
)));
2068 case Intrinsic::uadd_with_overflow
:
2069 case Intrinsic::sadd_with_overflow
: {
2070 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
2073 // Given 2 constant operands whose sum does not overflow:
2074 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2075 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2077 const APInt
*C0
, *C1
;
2078 Value
*Arg0
= II
->getArgOperand(0);
2079 Value
*Arg1
= II
->getArgOperand(1);
2080 bool IsSigned
= IID
== Intrinsic::sadd_with_overflow
;
2081 bool HasNWAdd
= IsSigned
? match(Arg0
, m_NSWAdd(m_Value(X
), m_APInt(C0
)))
2082 : match(Arg0
, m_NUWAdd(m_Value(X
), m_APInt(C0
)));
2083 if (HasNWAdd
&& match(Arg1
, m_APInt(C1
))) {
2086 IsSigned
? C1
->sadd_ov(*C0
, Overflow
) : C1
->uadd_ov(*C0
, Overflow
);
2088 return replaceInstUsesWith(
2089 *II
, Builder
.CreateBinaryIntrinsic(
2090 IID
, X
, ConstantInt::get(Arg1
->getType(), NewC
)));
2095 case Intrinsic::umul_with_overflow
:
2096 case Intrinsic::smul_with_overflow
:
2097 case Intrinsic::usub_with_overflow
:
2098 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
2102 case Intrinsic::ssub_with_overflow
: {
2103 if (Instruction
*I
= foldIntrinsicWithOverflowCommon(II
))
2107 Value
*Arg0
= II
->getArgOperand(0);
2108 Value
*Arg1
= II
->getArgOperand(1);
2109 // Given a constant C that is not the minimum signed value
2110 // for an integer of a given bit width:
2112 // ssubo X, C -> saddo X, -C
2113 if (match(Arg1
, m_Constant(C
)) && C
->isNotMinSignedValue()) {
2114 Value
*NegVal
= ConstantExpr::getNeg(C
);
2115 // Build a saddo call that is equivalent to the discovered
2117 return replaceInstUsesWith(
2118 *II
, Builder
.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow
,
2125 case Intrinsic::uadd_sat
:
2126 case Intrinsic::sadd_sat
:
2127 case Intrinsic::usub_sat
:
2128 case Intrinsic::ssub_sat
: {
2129 SaturatingInst
*SI
= cast
<SaturatingInst
>(II
);
2130 Type
*Ty
= SI
->getType();
2131 Value
*Arg0
= SI
->getLHS();
2132 Value
*Arg1
= SI
->getRHS();
2134 // Make use of known overflow information.
2135 OverflowResult OR
= computeOverflow(SI
->getBinaryOp(), SI
->isSigned(),
2138 case OverflowResult::MayOverflow
:
2140 case OverflowResult::NeverOverflows
:
2142 return BinaryOperator::CreateNSW(SI
->getBinaryOp(), Arg0
, Arg1
);
2144 return BinaryOperator::CreateNUW(SI
->getBinaryOp(), Arg0
, Arg1
);
2145 case OverflowResult::AlwaysOverflowsLow
: {
2146 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2147 APInt Min
= APSInt::getMinValue(BitWidth
, !SI
->isSigned());
2148 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Min
));
2150 case OverflowResult::AlwaysOverflowsHigh
: {
2151 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2152 APInt Max
= APSInt::getMaxValue(BitWidth
, !SI
->isSigned());
2153 return replaceInstUsesWith(*SI
, ConstantInt::get(Ty
, Max
));
2157 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2159 if (IID
== Intrinsic::ssub_sat
&& match(Arg1
, m_Constant(C
)) &&
2160 C
->isNotMinSignedValue()) {
2161 Value
*NegVal
= ConstantExpr::getNeg(C
);
2162 return replaceInstUsesWith(
2163 *II
, Builder
.CreateBinaryIntrinsic(
2164 Intrinsic::sadd_sat
, Arg0
, NegVal
));
2167 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2168 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2169 // if Val and Val2 have the same sign
2170 if (auto *Other
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
2172 const APInt
*Val
, *Val2
;
2175 IID
== Intrinsic::uadd_sat
|| IID
== Intrinsic::usub_sat
;
2176 if (Other
->getIntrinsicID() == IID
&&
2177 match(Arg1
, m_APInt(Val
)) &&
2178 match(Other
->getArgOperand(0), m_Value(X
)) &&
2179 match(Other
->getArgOperand(1), m_APInt(Val2
))) {
2181 NewVal
= Val
->uadd_sat(*Val2
);
2182 else if (Val
->isNonNegative() == Val2
->isNonNegative()) {
2184 NewVal
= Val
->sadd_ov(*Val2
, Overflow
);
2186 // Both adds together may add more than SignedMaxValue
2187 // without saturating the final result.
2191 // Cannot fold saturated addition with different signs.
2195 return replaceInstUsesWith(
2196 *II
, Builder
.CreateBinaryIntrinsic(
2197 IID
, X
, ConstantInt::get(II
->getType(), NewVal
)));
2203 case Intrinsic::minnum
:
2204 case Intrinsic::maxnum
:
2205 case Intrinsic::minimum
:
2206 case Intrinsic::maximum
: {
2207 Value
*Arg0
= II
->getArgOperand(0);
2208 Value
*Arg1
= II
->getArgOperand(1);
2210 if (match(Arg0
, m_FNeg(m_Value(X
))) && match(Arg1
, m_FNeg(m_Value(Y
))) &&
2211 (Arg0
->hasOneUse() || Arg1
->hasOneUse())) {
2212 // If both operands are negated, invert the call and negate the result:
2213 // min(-X, -Y) --> -(max(X, Y))
2214 // max(-X, -Y) --> -(min(X, Y))
2215 Intrinsic::ID NewIID
;
2217 case Intrinsic::maxnum
:
2218 NewIID
= Intrinsic::minnum
;
2220 case Intrinsic::minnum
:
2221 NewIID
= Intrinsic::maxnum
;
2223 case Intrinsic::maximum
:
2224 NewIID
= Intrinsic::minimum
;
2226 case Intrinsic::minimum
:
2227 NewIID
= Intrinsic::maximum
;
2230 llvm_unreachable("unexpected intrinsic ID");
2232 Value
*NewCall
= Builder
.CreateBinaryIntrinsic(NewIID
, X
, Y
, II
);
2233 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewCall
);
2234 FNeg
->copyIRFlags(II
);
2238 // m(m(X, C2), C1) -> m(X, C)
2239 const APFloat
*C1
, *C2
;
2240 if (auto *M
= dyn_cast
<IntrinsicInst
>(Arg0
)) {
2241 if (M
->getIntrinsicID() == IID
&& match(Arg1
, m_APFloat(C1
)) &&
2242 ((match(M
->getArgOperand(0), m_Value(X
)) &&
2243 match(M
->getArgOperand(1), m_APFloat(C2
))) ||
2244 (match(M
->getArgOperand(1), m_Value(X
)) &&
2245 match(M
->getArgOperand(0), m_APFloat(C2
))))) {
2248 case Intrinsic::maxnum
:
2249 Res
= maxnum(*C1
, *C2
);
2251 case Intrinsic::minnum
:
2252 Res
= minnum(*C1
, *C2
);
2254 case Intrinsic::maximum
:
2255 Res
= maximum(*C1
, *C2
);
2257 case Intrinsic::minimum
:
2258 Res
= minimum(*C1
, *C2
);
2261 llvm_unreachable("unexpected intrinsic ID");
2263 Instruction
*NewCall
= Builder
.CreateBinaryIntrinsic(
2264 IID
, X
, ConstantFP::get(Arg0
->getType(), Res
), II
);
2265 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2266 // was a simplification (so Arg0 and its original flags could
2268 NewCall
->andIRFlags(M
);
2269 return replaceInstUsesWith(*II
, NewCall
);
2273 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2274 if (match(Arg0
, m_OneUse(m_FPExt(m_Value(X
)))) &&
2275 match(Arg1
, m_OneUse(m_FPExt(m_Value(Y
)))) &&
2276 X
->getType() == Y
->getType()) {
2278 Builder
.CreateBinaryIntrinsic(IID
, X
, Y
, II
, II
->getName());
2279 return new FPExtInst(NewCall
, II
->getType());
2282 // max X, -X --> fabs X
2283 // min X, -X --> -(fabs X)
2284 // TODO: Remove one-use limitation? That is obviously better for max.
2285 // It would be an extra instruction for min (fnabs), but that is
2286 // still likely better for analysis and codegen.
2287 if ((match(Arg0
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg1
== X
) ||
2288 (match(Arg1
, m_OneUse(m_FNeg(m_Value(X
)))) && Arg0
== X
)) {
2289 Value
*R
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, X
, II
);
2290 if (IID
== Intrinsic::minimum
|| IID
== Intrinsic::minnum
)
2291 R
= Builder
.CreateFNegFMF(R
, II
);
2292 return replaceInstUsesWith(*II
, R
);
2297 case Intrinsic::matrix_multiply
: {
2298 // Optimize negation in matrix multiplication.
2302 if (match(II
->getArgOperand(0), m_FNeg(m_Value(A
))) &&
2303 match(II
->getArgOperand(1), m_FNeg(m_Value(B
)))) {
2304 replaceOperand(*II
, 0, A
);
2305 replaceOperand(*II
, 1, B
);
2309 Value
*Op0
= II
->getOperand(0);
2310 Value
*Op1
= II
->getOperand(1);
2311 Value
*OpNotNeg
, *NegatedOp
;
2312 unsigned NegatedOpArg
, OtherOpArg
;
2313 if (match(Op0
, m_FNeg(m_Value(OpNotNeg
)))) {
2317 } else if (match(Op1
, m_FNeg(m_Value(OpNotNeg
)))) {
2322 // Multiplication doesn't have a negated operand.
2325 // Only optimize if the negated operand has only one use.
2326 if (!NegatedOp
->hasOneUse())
2329 Value
*OtherOp
= II
->getOperand(OtherOpArg
);
2330 VectorType
*RetTy
= cast
<VectorType
>(II
->getType());
2331 VectorType
*NegatedOpTy
= cast
<VectorType
>(NegatedOp
->getType());
2332 VectorType
*OtherOpTy
= cast
<VectorType
>(OtherOp
->getType());
2333 ElementCount NegatedCount
= NegatedOpTy
->getElementCount();
2334 ElementCount OtherCount
= OtherOpTy
->getElementCount();
2335 ElementCount RetCount
= RetTy
->getElementCount();
2336 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2337 if (ElementCount::isKnownGT(NegatedCount
, OtherCount
) &&
2338 ElementCount::isKnownLT(OtherCount
, RetCount
)) {
2339 Value
*InverseOtherOp
= Builder
.CreateFNeg(OtherOp
);
2340 replaceOperand(*II
, NegatedOpArg
, OpNotNeg
);
2341 replaceOperand(*II
, OtherOpArg
, InverseOtherOp
);
2344 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2345 if (ElementCount::isKnownGT(NegatedCount
, RetCount
)) {
2346 SmallVector
<Value
*, 5> NewArgs(II
->args());
2347 NewArgs
[NegatedOpArg
] = OpNotNeg
;
2348 Instruction
*NewMul
=
2349 Builder
.CreateIntrinsic(II
->getType(), IID
, NewArgs
, II
);
2350 return replaceInstUsesWith(*II
, Builder
.CreateFNegFMF(NewMul
, II
));
2354 case Intrinsic::fmuladd
: {
2355 // Canonicalize fast fmuladd to the separate fmul + fadd.
2357 BuilderTy::FastMathFlagGuard
Guard(Builder
);
2358 Builder
.setFastMathFlags(II
->getFastMathFlags());
2359 Value
*Mul
= Builder
.CreateFMul(II
->getArgOperand(0),
2360 II
->getArgOperand(1));
2361 Value
*Add
= Builder
.CreateFAdd(Mul
, II
->getArgOperand(2));
2363 return replaceInstUsesWith(*II
, Add
);
2366 // Try to simplify the underlying FMul.
2367 if (Value
*V
= simplifyFMulInst(II
->getArgOperand(0), II
->getArgOperand(1),
2368 II
->getFastMathFlags(),
2369 SQ
.getWithInstruction(II
))) {
2370 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
2371 FAdd
->copyFastMathFlags(II
);
2377 case Intrinsic::fma
: {
2378 // fma fneg(x), fneg(y), z -> fma x, y, z
2379 Value
*Src0
= II
->getArgOperand(0);
2380 Value
*Src1
= II
->getArgOperand(1);
2382 if (match(Src0
, m_FNeg(m_Value(X
))) && match(Src1
, m_FNeg(m_Value(Y
)))) {
2383 replaceOperand(*II
, 0, X
);
2384 replaceOperand(*II
, 1, Y
);
2388 // fma fabs(x), fabs(x), z -> fma x, x, z
2389 if (match(Src0
, m_FAbs(m_Value(X
))) &&
2390 match(Src1
, m_FAbs(m_Specific(X
)))) {
2391 replaceOperand(*II
, 0, X
);
2392 replaceOperand(*II
, 1, X
);
2396 // Try to simplify the underlying FMul. We can only apply simplifications
2397 // that do not require rounding.
2398 if (Value
*V
= simplifyFMAFMul(II
->getArgOperand(0), II
->getArgOperand(1),
2399 II
->getFastMathFlags(),
2400 SQ
.getWithInstruction(II
))) {
2401 auto *FAdd
= BinaryOperator::CreateFAdd(V
, II
->getArgOperand(2));
2402 FAdd
->copyFastMathFlags(II
);
2406 // fma x, y, 0 -> fmul x, y
2407 // This is always valid for -0.0, but requires nsz for +0.0 as
2408 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2409 if (match(II
->getArgOperand(2), m_NegZeroFP()) ||
2410 (match(II
->getArgOperand(2), m_PosZeroFP()) &&
2411 II
->getFastMathFlags().noSignedZeros()))
2412 return BinaryOperator::CreateFMulFMF(Src0
, Src1
, II
);
2416 case Intrinsic::copysign
: {
2417 Value
*Mag
= II
->getArgOperand(0), *Sign
= II
->getArgOperand(1);
2418 if (SignBitMustBeZero(Sign
, DL
, &TLI
)) {
2419 // If we know that the sign argument is positive, reduce to FABS:
2420 // copysign Mag, +Sign --> fabs Mag
2421 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
2422 return replaceInstUsesWith(*II
, Fabs
);
2424 // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
2426 if (match(Sign
, m_APFloat(C
)) && C
->isNegative()) {
2427 // If we know that the sign argument is negative, reduce to FNABS:
2428 // copysign Mag, -Sign --> fneg (fabs Mag)
2429 Value
*Fabs
= Builder
.CreateUnaryIntrinsic(Intrinsic::fabs
, Mag
, II
);
2430 return replaceInstUsesWith(*II
, Builder
.CreateFNegFMF(Fabs
, II
));
2433 // Propagate sign argument through nested calls:
2434 // copysign Mag, (copysign ?, X) --> copysign Mag, X
2436 if (match(Sign
, m_Intrinsic
<Intrinsic::copysign
>(m_Value(), m_Value(X
))))
2437 return replaceOperand(*II
, 1, X
);
2439 // Peek through changes of magnitude's sign-bit. This call rewrites those:
2440 // copysign (fabs X), Sign --> copysign X, Sign
2441 // copysign (fneg X), Sign --> copysign X, Sign
2442 if (match(Mag
, m_FAbs(m_Value(X
))) || match(Mag
, m_FNeg(m_Value(X
))))
2443 return replaceOperand(*II
, 0, X
);
2447 case Intrinsic::fabs
: {
2448 Value
*Cond
, *TVal
, *FVal
;
2449 if (match(II
->getArgOperand(0),
2450 m_Select(m_Value(Cond
), m_Value(TVal
), m_Value(FVal
)))) {
2451 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2452 if (isa
<Constant
>(TVal
) && isa
<Constant
>(FVal
)) {
2453 CallInst
*AbsT
= Builder
.CreateCall(II
->getCalledFunction(), {TVal
});
2454 CallInst
*AbsF
= Builder
.CreateCall(II
->getCalledFunction(), {FVal
});
2455 return SelectInst::Create(Cond
, AbsT
, AbsF
);
2457 // fabs (select Cond, -FVal, FVal) --> fabs FVal
2458 if (match(TVal
, m_FNeg(m_Specific(FVal
))))
2459 return replaceOperand(*II
, 0, FVal
);
2460 // fabs (select Cond, TVal, -TVal) --> fabs TVal
2461 if (match(FVal
, m_FNeg(m_Specific(TVal
))))
2462 return replaceOperand(*II
, 0, TVal
);
2465 Value
*Magnitude
, *Sign
;
2466 if (match(II
->getArgOperand(0),
2467 m_CopySign(m_Value(Magnitude
), m_Value(Sign
)))) {
2468 // fabs (copysign x, y) -> (fabs x)
2470 Builder
.CreateCall(II
->getCalledFunction(), {Magnitude
});
2471 AbsSign
->copyFastMathFlags(II
);
2472 return replaceInstUsesWith(*II
, AbsSign
);
2477 case Intrinsic::ceil
:
2478 case Intrinsic::floor
:
2479 case Intrinsic::round
:
2480 case Intrinsic::roundeven
:
2481 case Intrinsic::nearbyint
:
2482 case Intrinsic::rint
:
2483 case Intrinsic::trunc
: {
2485 if (match(II
->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc
))))) {
2486 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2487 Value
*NarrowII
= Builder
.CreateUnaryIntrinsic(IID
, ExtSrc
, II
);
2488 return new FPExtInst(NarrowII
, II
->getType());
2492 case Intrinsic::cos
:
2493 case Intrinsic::amdgcn_cos
: {
2495 Value
*Src
= II
->getArgOperand(0);
2496 if (match(Src
, m_FNeg(m_Value(X
))) || match(Src
, m_FAbs(m_Value(X
)))) {
2497 // cos(-x) -> cos(x)
2498 // cos(fabs(x)) -> cos(x)
2499 return replaceOperand(*II
, 0, X
);
2503 case Intrinsic::sin
: {
2505 if (match(II
->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X
))))) {
2506 // sin(-x) --> -sin(x)
2507 Value
*NewSin
= Builder
.CreateUnaryIntrinsic(Intrinsic::sin
, X
, II
);
2508 Instruction
*FNeg
= UnaryOperator::CreateFNeg(NewSin
);
2509 FNeg
->copyFastMathFlags(II
);
2514 case Intrinsic::ldexp
: {
2515 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2517 // The danger is if the first ldexp would overflow to infinity or underflow
2518 // to zero, but the combined exponent avoids it. We ignore this with
2521 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2522 // it would just double down on the overflow/underflow which would occur
2525 // TODO: Could do better if we had range tracking for the input value
2526 // exponent. Also could broaden sign check to cover == 0 case.
2527 Value
*Src
= II
->getArgOperand(0);
2528 Value
*Exp
= II
->getArgOperand(1);
2531 if (match(Src
, m_OneUse(m_Intrinsic
<Intrinsic::ldexp
>(
2532 m_Value(InnerSrc
), m_Value(InnerExp
)))) &&
2533 Exp
->getType() == InnerExp
->getType()) {
2534 FastMathFlags FMF
= II
->getFastMathFlags();
2535 FastMathFlags InnerFlags
= cast
<FPMathOperator
>(Src
)->getFastMathFlags();
2537 if ((FMF
.allowReassoc() && InnerFlags
.allowReassoc()) ||
2538 signBitMustBeTheSame(Exp
, InnerExp
, II
, DL
, &AC
, &DT
)) {
2539 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2541 Value
*NewExp
= Builder
.CreateAdd(InnerExp
, Exp
);
2542 II
->setArgOperand(1, NewExp
);
2543 II
->setFastMathFlags(InnerFlags
); // Or the inner flags.
2544 return replaceOperand(*II
, 0, InnerSrc
);
2550 case Intrinsic::ptrauth_auth
:
2551 case Intrinsic::ptrauth_resign
: {
2552 // (sign|resign) + (auth|resign) can be folded by omitting the middle
2553 // sign+auth component if the key and discriminator match.
2554 bool NeedSign
= II
->getIntrinsicID() == Intrinsic::ptrauth_resign
;
2555 Value
*Key
= II
->getArgOperand(1);
2556 Value
*Disc
= II
->getArgOperand(2);
2558 // AuthKey will be the key we need to end up authenticating against in
2559 // whatever we replace this sequence with.
2560 Value
*AuthKey
= nullptr, *AuthDisc
= nullptr, *BasePtr
;
2561 if (auto CI
= dyn_cast
<CallBase
>(II
->getArgOperand(0))) {
2562 BasePtr
= CI
->getArgOperand(0);
2563 if (CI
->getIntrinsicID() == Intrinsic::ptrauth_sign
) {
2564 if (CI
->getArgOperand(1) != Key
|| CI
->getArgOperand(2) != Disc
)
2566 } else if (CI
->getIntrinsicID() == Intrinsic::ptrauth_resign
) {
2567 if (CI
->getArgOperand(3) != Key
|| CI
->getArgOperand(4) != Disc
)
2569 AuthKey
= CI
->getArgOperand(1);
2570 AuthDisc
= CI
->getArgOperand(2);
2577 if (AuthKey
&& NeedSign
) {
2578 // resign(0,1) + resign(1,2) = resign(0, 2)
2579 NewIntrin
= Intrinsic::ptrauth_resign
;
2580 } else if (AuthKey
) {
2581 // resign(0,1) + auth(1) = auth(0)
2582 NewIntrin
= Intrinsic::ptrauth_auth
;
2583 } else if (NeedSign
) {
2584 // sign(0) + resign(0, 1) = sign(1)
2585 NewIntrin
= Intrinsic::ptrauth_sign
;
2587 // sign(0) + auth(0) = nop
2588 replaceInstUsesWith(*II
, BasePtr
);
2589 eraseInstFromFunction(*II
);
2593 SmallVector
<Value
*, 4> CallArgs
;
2594 CallArgs
.push_back(BasePtr
);
2596 CallArgs
.push_back(AuthKey
);
2597 CallArgs
.push_back(AuthDisc
);
2601 CallArgs
.push_back(II
->getArgOperand(3));
2602 CallArgs
.push_back(II
->getArgOperand(4));
2605 Function
*NewFn
= Intrinsic::getDeclaration(II
->getModule(), NewIntrin
);
2606 return CallInst::Create(NewFn
, CallArgs
);
2608 case Intrinsic::arm_neon_vtbl1
:
2609 case Intrinsic::aarch64_neon_tbl1
:
2610 if (Value
*V
= simplifyNeonTbl1(*II
, Builder
))
2611 return replaceInstUsesWith(*II
, V
);
2614 case Intrinsic::arm_neon_vmulls
:
2615 case Intrinsic::arm_neon_vmullu
:
2616 case Intrinsic::aarch64_neon_smull
:
2617 case Intrinsic::aarch64_neon_umull
: {
2618 Value
*Arg0
= II
->getArgOperand(0);
2619 Value
*Arg1
= II
->getArgOperand(1);
2621 // Handle mul by zero first:
2622 if (isa
<ConstantAggregateZero
>(Arg0
) || isa
<ConstantAggregateZero
>(Arg1
)) {
2623 return replaceInstUsesWith(CI
, ConstantAggregateZero::get(II
->getType()));
2626 // Check for constant LHS & RHS - in this case we just simplify.
2627 bool Zext
= (IID
== Intrinsic::arm_neon_vmullu
||
2628 IID
== Intrinsic::aarch64_neon_umull
);
2629 VectorType
*NewVT
= cast
<VectorType
>(II
->getType());
2630 if (Constant
*CV0
= dyn_cast
<Constant
>(Arg0
)) {
2631 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
)) {
2632 Value
*V0
= Builder
.CreateIntCast(CV0
, NewVT
, /*isSigned=*/!Zext
);
2633 Value
*V1
= Builder
.CreateIntCast(CV1
, NewVT
, /*isSigned=*/!Zext
);
2634 return replaceInstUsesWith(CI
, Builder
.CreateMul(V0
, V1
));
2637 // Couldn't simplify - canonicalize constant to the RHS.
2638 std::swap(Arg0
, Arg1
);
2641 // Handle mul by one:
2642 if (Constant
*CV1
= dyn_cast
<Constant
>(Arg1
))
2643 if (ConstantInt
*Splat
=
2644 dyn_cast_or_null
<ConstantInt
>(CV1
->getSplatValue()))
2646 return CastInst::CreateIntegerCast(Arg0
, II
->getType(),
2647 /*isSigned=*/!Zext
);
2651 case Intrinsic::arm_neon_aesd
:
2652 case Intrinsic::arm_neon_aese
:
2653 case Intrinsic::aarch64_crypto_aesd
:
2654 case Intrinsic::aarch64_crypto_aese
: {
2655 Value
*DataArg
= II
->getArgOperand(0);
2656 Value
*KeyArg
= II
->getArgOperand(1);
2658 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2660 if (match(KeyArg
, m_ZeroInt()) &&
2661 match(DataArg
, m_Xor(m_Value(Data
), m_Value(Key
)))) {
2662 replaceOperand(*II
, 0, Data
);
2663 replaceOperand(*II
, 1, Key
);
2668 case Intrinsic::hexagon_V6_vandvrt
:
2669 case Intrinsic::hexagon_V6_vandvrt_128B
: {
2670 // Simplify Q -> V -> Q conversion.
2671 if (auto Op0
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
2672 Intrinsic::ID ID0
= Op0
->getIntrinsicID();
2673 if (ID0
!= Intrinsic::hexagon_V6_vandqrt
&&
2674 ID0
!= Intrinsic::hexagon_V6_vandqrt_128B
)
2676 Value
*Bytes
= Op0
->getArgOperand(1), *Mask
= II
->getArgOperand(1);
2677 uint64_t Bytes1
= computeKnownBits(Bytes
, 0, Op0
).One
.getZExtValue();
2678 uint64_t Mask1
= computeKnownBits(Mask
, 0, II
).One
.getZExtValue();
2679 // Check if every byte has common bits in Bytes and Mask.
2680 uint64_t C
= Bytes1
& Mask1
;
2681 if ((C
& 0xFF) && (C
& 0xFF00) && (C
& 0xFF0000) && (C
& 0xFF000000))
2682 return replaceInstUsesWith(*II
, Op0
->getArgOperand(0));
2686 case Intrinsic::stackrestore
: {
2687 enum class ClassifyResult
{
2691 CallWithSideEffects
,
2693 auto Classify
= [](const Instruction
*I
) {
2694 if (isa
<AllocaInst
>(I
))
2695 return ClassifyResult::Alloca
;
2697 if (auto *CI
= dyn_cast
<CallInst
>(I
)) {
2698 if (auto *II
= dyn_cast
<IntrinsicInst
>(CI
)) {
2699 if (II
->getIntrinsicID() == Intrinsic::stackrestore
)
2700 return ClassifyResult::StackRestore
;
2702 if (II
->mayHaveSideEffects())
2703 return ClassifyResult::CallWithSideEffects
;
2705 // Consider all non-intrinsic calls to be side effects
2706 return ClassifyResult::CallWithSideEffects
;
2710 return ClassifyResult::None
;
2713 // If the stacksave and the stackrestore are in the same BB, and there is
2714 // no intervening call, alloca, or stackrestore of a different stacksave,
2715 // remove the restore. This can happen when variable allocas are DCE'd.
2716 if (IntrinsicInst
*SS
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0))) {
2717 if (SS
->getIntrinsicID() == Intrinsic::stacksave
&&
2718 SS
->getParent() == II
->getParent()) {
2719 BasicBlock::iterator
BI(SS
);
2720 bool CannotRemove
= false;
2721 for (++BI
; &*BI
!= II
; ++BI
) {
2722 switch (Classify(&*BI
)) {
2723 case ClassifyResult::None
:
2724 // So far so good, look at next instructions.
2727 case ClassifyResult::StackRestore
:
2728 // If we found an intervening stackrestore for a different
2729 // stacksave, we can't remove the stackrestore. Otherwise, continue.
2730 if (cast
<IntrinsicInst
>(*BI
).getArgOperand(0) != SS
)
2731 CannotRemove
= true;
2734 case ClassifyResult::Alloca
:
2735 case ClassifyResult::CallWithSideEffects
:
2736 // If we found an alloca, a non-intrinsic call, or an intrinsic
2737 // call with side effects, we can't remove the stackrestore.
2738 CannotRemove
= true;
2746 return eraseInstFromFunction(CI
);
2750 // Scan down this block to see if there is another stack restore in the
2751 // same block without an intervening call/alloca.
2752 BasicBlock::iterator
BI(II
);
2753 Instruction
*TI
= II
->getParent()->getTerminator();
2754 bool CannotRemove
= false;
2755 for (++BI
; &*BI
!= TI
; ++BI
) {
2756 switch (Classify(&*BI
)) {
2757 case ClassifyResult::None
:
2758 // So far so good, look at next instructions.
2761 case ClassifyResult::StackRestore
:
2762 // If there is a stackrestore below this one, remove this one.
2763 return eraseInstFromFunction(CI
);
2765 case ClassifyResult::Alloca
:
2766 case ClassifyResult::CallWithSideEffects
:
2767 // If we found an alloca, a non-intrinsic call, or an intrinsic call
2768 // with side effects (such as llvm.stacksave and llvm.read_register),
2769 // we can't remove the stack restore.
2770 CannotRemove
= true;
2777 // If the stack restore is in a return, resume, or unwind block and if there
2778 // are no allocas or calls between the restore and the return, nuke the
2780 if (!CannotRemove
&& (isa
<ReturnInst
>(TI
) || isa
<ResumeInst
>(TI
)))
2781 return eraseInstFromFunction(CI
);
2784 case Intrinsic::lifetime_end
:
2785 // Asan needs to poison memory to detect invalid access which is possible
2786 // even for empty lifetime range.
2787 if (II
->getFunction()->hasFnAttribute(Attribute::SanitizeAddress
) ||
2788 II
->getFunction()->hasFnAttribute(Attribute::SanitizeMemory
) ||
2789 II
->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress
))
2792 if (removeTriviallyEmptyRange(*II
, *this, [](const IntrinsicInst
&I
) {
2793 return I
.getIntrinsicID() == Intrinsic::lifetime_start
;
2797 case Intrinsic::assume
: {
2798 Value
*IIOperand
= II
->getArgOperand(0);
2799 SmallVector
<OperandBundleDef
, 4> OpBundles
;
2800 II
->getOperandBundlesAsDefs(OpBundles
);
2802 /// This will remove the boolean Condition from the assume given as
2803 /// argument and remove the assume if it becomes useless.
2804 /// always returns nullptr for use as a return values.
2805 auto RemoveConditionFromAssume
= [&](Instruction
*Assume
) -> Instruction
* {
2806 assert(isa
<AssumeInst
>(Assume
));
2807 if (isAssumeWithEmptyBundle(*cast
<AssumeInst
>(II
)))
2808 return eraseInstFromFunction(CI
);
2809 replaceUse(II
->getOperandUse(0), ConstantInt::getTrue(II
->getContext()));
2812 // Remove an assume if it is followed by an identical assume.
2813 // TODO: Do we need this? Unless there are conflicting assumptions, the
2814 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
2815 Instruction
*Next
= II
->getNextNonDebugInstruction();
2816 if (match(Next
, m_Intrinsic
<Intrinsic::assume
>(m_Specific(IIOperand
))))
2817 return RemoveConditionFromAssume(Next
);
2819 // Canonicalize assume(a && b) -> assume(a); assume(b);
2820 // Note: New assumption intrinsics created here are registered by
2821 // the InstCombineIRInserter object.
2822 FunctionType
*AssumeIntrinsicTy
= II
->getFunctionType();
2823 Value
*AssumeIntrinsic
= II
->getCalledOperand();
2825 if (match(IIOperand
, m_LogicalAnd(m_Value(A
), m_Value(B
)))) {
2826 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, A
, OpBundles
,
2828 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
, B
, II
->getName());
2829 return eraseInstFromFunction(*II
);
2831 // assume(!(a || b)) -> assume(!a); assume(!b);
2832 if (match(IIOperand
, m_Not(m_LogicalOr(m_Value(A
), m_Value(B
))))) {
2833 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
2834 Builder
.CreateNot(A
), OpBundles
, II
->getName());
2835 Builder
.CreateCall(AssumeIntrinsicTy
, AssumeIntrinsic
,
2836 Builder
.CreateNot(B
), II
->getName());
2837 return eraseInstFromFunction(*II
);
2840 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
2841 // (if assume is valid at the load)
2842 CmpInst::Predicate Pred
;
2844 if (match(IIOperand
, m_ICmp(Pred
, m_Instruction(LHS
), m_Zero())) &&
2845 Pred
== ICmpInst::ICMP_NE
&& LHS
->getOpcode() == Instruction::Load
&&
2846 LHS
->getType()->isPointerTy() &&
2847 isValidAssumeForContext(II
, LHS
, &DT
)) {
2848 MDNode
*MD
= MDNode::get(II
->getContext(), std::nullopt
);
2849 LHS
->setMetadata(LLVMContext::MD_nonnull
, MD
);
2850 LHS
->setMetadata(LLVMContext::MD_noundef
, MD
);
2851 return RemoveConditionFromAssume(II
);
2853 // TODO: apply nonnull return attributes to calls and invokes
2854 // TODO: apply range metadata for range check patterns?
2857 // Separate storage assumptions apply to the underlying allocations, not any
2858 // particular pointer within them. When evaluating the hints for AA purposes
2859 // we getUnderlyingObject them; by precomputing the answers here we can
2860 // avoid having to do so repeatedly there.
2861 for (unsigned Idx
= 0; Idx
< II
->getNumOperandBundles(); Idx
++) {
2862 OperandBundleUse OBU
= II
->getOperandBundleAt(Idx
);
2863 if (OBU
.getTagName() == "separate_storage") {
2864 assert(OBU
.Inputs
.size() == 2);
2865 auto MaybeSimplifyHint
= [&](const Use
&U
) {
2866 Value
*Hint
= U
.get();
2867 // Not having a limit is safe because InstCombine removes unreachable
2869 Value
*UnderlyingObject
= getUnderlyingObject(Hint
, /*MaxLookup*/ 0);
2870 if (Hint
!= UnderlyingObject
)
2871 replaceUse(const_cast<Use
&>(U
), UnderlyingObject
);
2873 MaybeSimplifyHint(OBU
.Inputs
[0]);
2874 MaybeSimplifyHint(OBU
.Inputs
[1]);
2878 // Convert nonnull assume like:
2879 // %A = icmp ne i32* %PTR, null
2880 // call void @llvm.assume(i1 %A)
2882 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
2883 if (EnableKnowledgeRetention
&&
2884 match(IIOperand
, m_Cmp(Pred
, m_Value(A
), m_Zero())) &&
2885 Pred
== CmpInst::ICMP_NE
&& A
->getType()->isPointerTy()) {
2886 if (auto *Replacement
= buildAssumeFromKnowledge(
2887 {RetainedKnowledge
{Attribute::NonNull
, 0, A
}}, Next
, &AC
, &DT
)) {
2889 Replacement
->insertBefore(Next
);
2890 AC
.registerAssumption(Replacement
);
2891 return RemoveConditionFromAssume(II
);
2895 // Convert alignment assume like:
2896 // %B = ptrtoint i32* %A to i64
2897 // %C = and i64 %B, Constant
2898 // %D = icmp eq i64 %C, 0
2899 // call void @llvm.assume(i1 %D)
2901 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
2903 if (EnableKnowledgeRetention
&&
2905 m_Cmp(Pred
, m_And(m_Value(A
), m_ConstantInt(AlignMask
)),
2907 Pred
== CmpInst::ICMP_EQ
) {
2908 if (isPowerOf2_64(AlignMask
+ 1)) {
2909 uint64_t Offset
= 0;
2910 match(A
, m_Add(m_Value(A
), m_ConstantInt(Offset
)));
2911 if (match(A
, m_PtrToInt(m_Value(A
)))) {
2912 /// Note: this doesn't preserve the offset information but merges
2913 /// offset and alignment.
2914 /// TODO: we can generate a GEP instead of merging the alignment with
2916 RetainedKnowledge RK
{Attribute::Alignment
,
2917 (unsigned)MinAlign(Offset
, AlignMask
+ 1), A
};
2918 if (auto *Replacement
=
2919 buildAssumeFromKnowledge(RK
, Next
, &AC
, &DT
)) {
2921 Replacement
->insertAfter(II
);
2922 AC
.registerAssumption(Replacement
);
2924 return RemoveConditionFromAssume(II
);
2929 /// Canonicalize Knowledge in operand bundles.
2930 if (EnableKnowledgeRetention
&& II
->hasOperandBundles()) {
2931 for (unsigned Idx
= 0; Idx
< II
->getNumOperandBundles(); Idx
++) {
2932 auto &BOI
= II
->bundle_op_info_begin()[Idx
];
2933 RetainedKnowledge RK
=
2934 llvm::getKnowledgeFromBundle(cast
<AssumeInst
>(*II
), BOI
);
2935 if (BOI
.End
- BOI
.Begin
> 2)
2936 continue; // Prevent reducing knowledge in an align with offset since
2937 // extracting a RetainedKnowledge from them looses offset
2939 RetainedKnowledge CanonRK
=
2940 llvm::simplifyRetainedKnowledge(cast
<AssumeInst
>(II
), RK
,
2941 &getAssumptionCache(),
2942 &getDominatorTree());
2946 if (BOI
.End
- BOI
.Begin
> 0) {
2947 Worklist
.pushValue(II
->op_begin()[BOI
.Begin
]);
2948 Value::dropDroppableUse(II
->op_begin()[BOI
.Begin
]);
2952 assert(RK
.AttrKind
== CanonRK
.AttrKind
);
2953 if (BOI
.End
- BOI
.Begin
> 0)
2954 II
->op_begin()[BOI
.Begin
].set(CanonRK
.WasOn
);
2955 if (BOI
.End
- BOI
.Begin
> 1)
2956 II
->op_begin()[BOI
.Begin
+ 1].set(ConstantInt::get(
2957 Type::getInt64Ty(II
->getContext()), CanonRK
.ArgValue
));
2959 Worklist
.pushValue(RK
.WasOn
);
2964 // If there is a dominating assume with the same condition as this one,
2965 // then this one is redundant, and should be removed.
2967 computeKnownBits(IIOperand
, Known
, 0, II
);
2968 if (Known
.isAllOnes() && isAssumeWithEmptyBundle(cast
<AssumeInst
>(*II
)))
2969 return eraseInstFromFunction(*II
);
2971 // assume(false) is unreachable.
2972 if (match(IIOperand
, m_CombineOr(m_Zero(), m_Undef()))) {
2973 CreateNonTerminatorUnreachable(II
);
2974 return eraseInstFromFunction(*II
);
2977 // Update the cache of affected values for this assumption (we might be
2978 // here because we just simplified the condition).
2979 AC
.updateAffectedValues(cast
<AssumeInst
>(II
));
2982 case Intrinsic::experimental_guard
: {
2983 // Is this guard followed by another guard? We scan forward over a small
2984 // fixed window of instructions to handle common cases with conditions
2985 // computed between guards.
2986 Instruction
*NextInst
= II
->getNextNonDebugInstruction();
2987 for (unsigned i
= 0; i
< GuardWideningWindow
; i
++) {
2988 // Note: Using context-free form to avoid compile time blow up
2989 if (!isSafeToSpeculativelyExecute(NextInst
))
2991 NextInst
= NextInst
->getNextNonDebugInstruction();
2993 Value
*NextCond
= nullptr;
2995 m_Intrinsic
<Intrinsic::experimental_guard
>(m_Value(NextCond
)))) {
2996 Value
*CurrCond
= II
->getArgOperand(0);
2998 // Remove a guard that it is immediately preceded by an identical guard.
2999 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3000 if (CurrCond
!= NextCond
) {
3001 Instruction
*MoveI
= II
->getNextNonDebugInstruction();
3002 while (MoveI
!= NextInst
) {
3004 MoveI
= MoveI
->getNextNonDebugInstruction();
3005 Temp
->moveBefore(II
);
3007 replaceOperand(*II
, 0, Builder
.CreateAnd(CurrCond
, NextCond
));
3009 eraseInstFromFunction(*NextInst
);
3014 case Intrinsic::vector_insert
: {
3015 Value
*Vec
= II
->getArgOperand(0);
3016 Value
*SubVec
= II
->getArgOperand(1);
3017 Value
*Idx
= II
->getArgOperand(2);
3018 auto *DstTy
= dyn_cast
<FixedVectorType
>(II
->getType());
3019 auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType());
3020 auto *SubVecTy
= dyn_cast
<FixedVectorType
>(SubVec
->getType());
3022 // Only canonicalize if the destination vector, Vec, and SubVec are all
3024 if (DstTy
&& VecTy
&& SubVecTy
) {
3025 unsigned DstNumElts
= DstTy
->getNumElements();
3026 unsigned VecNumElts
= VecTy
->getNumElements();
3027 unsigned SubVecNumElts
= SubVecTy
->getNumElements();
3028 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
3030 // An insert that entirely overwrites Vec with SubVec is a nop.
3031 if (VecNumElts
== SubVecNumElts
)
3032 return replaceInstUsesWith(CI
, SubVec
);
3034 // Widen SubVec into a vector of the same width as Vec, since
3035 // shufflevector requires the two input vectors to be the same width.
3036 // Elements beyond the bounds of SubVec within the widened vector are
3038 SmallVector
<int, 8> WidenMask
;
3040 for (i
= 0; i
!= SubVecNumElts
; ++i
)
3041 WidenMask
.push_back(i
);
3042 for (; i
!= VecNumElts
; ++i
)
3043 WidenMask
.push_back(PoisonMaskElem
);
3045 Value
*WidenShuffle
= Builder
.CreateShuffleVector(SubVec
, WidenMask
);
3047 SmallVector
<int, 8> Mask
;
3048 for (unsigned i
= 0; i
!= IdxN
; ++i
)
3050 for (unsigned i
= DstNumElts
; i
!= DstNumElts
+ SubVecNumElts
; ++i
)
3052 for (unsigned i
= IdxN
+ SubVecNumElts
; i
!= DstNumElts
; ++i
)
3055 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, WidenShuffle
, Mask
);
3056 return replaceInstUsesWith(CI
, Shuffle
);
3060 case Intrinsic::vector_extract
: {
3061 Value
*Vec
= II
->getArgOperand(0);
3062 Value
*Idx
= II
->getArgOperand(1);
3064 Type
*ReturnType
= II
->getType();
3065 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3067 unsigned ExtractIdx
= cast
<ConstantInt
>(Idx
)->getZExtValue();
3068 Value
*InsertTuple
, *InsertIdx
, *InsertValue
;
3069 if (match(Vec
, m_Intrinsic
<Intrinsic::vector_insert
>(m_Value(InsertTuple
),
3070 m_Value(InsertValue
),
3071 m_Value(InsertIdx
))) &&
3072 InsertValue
->getType() == ReturnType
) {
3073 unsigned Index
= cast
<ConstantInt
>(InsertIdx
)->getZExtValue();
3074 // Case where we get the same index right after setting it.
3075 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3077 if (ExtractIdx
== Index
)
3078 return replaceInstUsesWith(CI
, InsertValue
);
3079 // If we are getting a different index than what was set in the
3080 // insert.vector intrinsic. We can just set the input tuple to the one up
3081 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3082 // InsertIndex), ExtractIndex)
3083 // --> extract.vector(InsertTuple, ExtractIndex)
3085 return replaceOperand(CI
, 0, InsertTuple
);
3088 auto *DstTy
= dyn_cast
<VectorType
>(ReturnType
);
3089 auto *VecTy
= dyn_cast
<VectorType
>(Vec
->getType());
3091 if (DstTy
&& VecTy
) {
3092 auto DstEltCnt
= DstTy
->getElementCount();
3093 auto VecEltCnt
= VecTy
->getElementCount();
3094 unsigned IdxN
= cast
<ConstantInt
>(Idx
)->getZExtValue();
3096 // Extracting the entirety of Vec is a nop.
3097 if (DstEltCnt
== VecTy
->getElementCount()) {
3098 replaceInstUsesWith(CI
, Vec
);
3099 return eraseInstFromFunction(CI
);
3102 // Only canonicalize to shufflevector if the destination vector and
3103 // Vec are fixed vectors.
3104 if (VecEltCnt
.isScalable() || DstEltCnt
.isScalable())
3107 SmallVector
<int, 8> Mask
;
3108 for (unsigned i
= 0; i
!= DstEltCnt
.getKnownMinValue(); ++i
)
3109 Mask
.push_back(IdxN
+ i
);
3111 Value
*Shuffle
= Builder
.CreateShuffleVector(Vec
, Mask
);
3112 return replaceInstUsesWith(CI
, Shuffle
);
3116 case Intrinsic::experimental_vector_reverse
: {
3117 Value
*BO0
, *BO1
, *X
, *Y
;
3118 Value
*Vec
= II
->getArgOperand(0);
3119 if (match(Vec
, m_OneUse(m_BinOp(m_Value(BO0
), m_Value(BO1
))))) {
3120 auto *OldBinOp
= cast
<BinaryOperator
>(Vec
);
3121 if (match(BO0
, m_VecReverse(m_Value(X
)))) {
3122 // rev(binop rev(X), rev(Y)) --> binop X, Y
3123 if (match(BO1
, m_VecReverse(m_Value(Y
))))
3124 return replaceInstUsesWith(CI
,
3125 BinaryOperator::CreateWithCopiedFlags(
3126 OldBinOp
->getOpcode(), X
, Y
, OldBinOp
,
3127 OldBinOp
->getName(), II
));
3128 // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3129 if (isSplatValue(BO1
))
3130 return replaceInstUsesWith(CI
,
3131 BinaryOperator::CreateWithCopiedFlags(
3132 OldBinOp
->getOpcode(), X
, BO1
,
3133 OldBinOp
, OldBinOp
->getName(), II
));
3135 // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
3136 if (match(BO1
, m_VecReverse(m_Value(Y
))) && isSplatValue(BO0
))
3137 return replaceInstUsesWith(CI
, BinaryOperator::CreateWithCopiedFlags(
3138 OldBinOp
->getOpcode(), BO0
, Y
,
3139 OldBinOp
, OldBinOp
->getName(), II
));
3141 // rev(unop rev(X)) --> unop X
3142 if (match(Vec
, m_OneUse(m_UnOp(m_VecReverse(m_Value(X
)))))) {
3143 auto *OldUnOp
= cast
<UnaryOperator
>(Vec
);
3144 auto *NewUnOp
= UnaryOperator::CreateWithCopiedFlags(
3145 OldUnOp
->getOpcode(), X
, OldUnOp
, OldUnOp
->getName(), II
);
3146 return replaceInstUsesWith(CI
, NewUnOp
);
3150 case Intrinsic::vector_reduce_or
:
3151 case Intrinsic::vector_reduce_and
: {
3152 // Canonicalize logical or/and reductions:
3153 // Or reduction for i1 is represented as:
3154 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3155 // %res = cmp ne iReduxWidth %val, 0
3156 // And reduction for i1 is represented as:
3157 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3158 // %res = cmp eq iReduxWidth %val, 11111
3159 Value
*Arg
= II
->getArgOperand(0);
3161 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3162 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3163 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3164 Value
*Res
= Builder
.CreateBitCast(
3165 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
3166 if (IID
== Intrinsic::vector_reduce_and
) {
3167 Res
= Builder
.CreateICmpEQ(
3168 Res
, ConstantInt::getAllOnesValue(Res
->getType()));
3170 assert(IID
== Intrinsic::vector_reduce_or
&&
3171 "Expected or reduction.");
3172 Res
= Builder
.CreateIsNotNull(Res
);
3175 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3177 return replaceInstUsesWith(CI
, Res
);
3182 case Intrinsic::vector_reduce_add
: {
3183 if (IID
== Intrinsic::vector_reduce_add
) {
3184 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3185 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3186 // Convert vector_reduce_add(SExt(<n x i1>)) to
3187 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3188 // Convert vector_reduce_add(<n x i1>) to
3189 // Trunc(ctpop(bitcast <n x i1> to in)).
3190 Value
*Arg
= II
->getArgOperand(0);
3192 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3193 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3194 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3195 Value
*V
= Builder
.CreateBitCast(
3196 Vect
, Builder
.getIntNTy(FTy
->getNumElements()));
3197 Value
*Res
= Builder
.CreateUnaryIntrinsic(Intrinsic::ctpop
, V
);
3198 if (Res
->getType() != II
->getType())
3199 Res
= Builder
.CreateZExtOrTrunc(Res
, II
->getType());
3201 cast
<Instruction
>(Arg
)->getOpcode() == Instruction::SExt
)
3202 Res
= Builder
.CreateNeg(Res
);
3203 return replaceInstUsesWith(CI
, Res
);
3209 case Intrinsic::vector_reduce_xor
: {
3210 if (IID
== Intrinsic::vector_reduce_xor
) {
3211 // Exclusive disjunction reduction over the vector with
3212 // (potentially-extended) i1 element type is actually a
3213 // (potentially-extended) arithmetic `add` reduction over the original
3214 // non-extended value:
3215 // vector_reduce_xor(?ext(<n x i1>))
3217 // ?ext(vector_reduce_add(<n x i1>))
3218 Value
*Arg
= II
->getArgOperand(0);
3220 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3221 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3222 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3223 Value
*Res
= Builder
.CreateAddReduce(Vect
);
3225 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3227 return replaceInstUsesWith(CI
, Res
);
3233 case Intrinsic::vector_reduce_mul
: {
3234 if (IID
== Intrinsic::vector_reduce_mul
) {
3235 // Multiplicative reduction over the vector with (potentially-extended)
3236 // i1 element type is actually a (potentially zero-extended)
3237 // logical `and` reduction over the original non-extended value:
3238 // vector_reduce_mul(?ext(<n x i1>))
3240 // zext(vector_reduce_and(<n x i1>))
3241 Value
*Arg
= II
->getArgOperand(0);
3243 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3244 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3245 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3246 Value
*Res
= Builder
.CreateAndReduce(Vect
);
3247 if (Res
->getType() != II
->getType())
3248 Res
= Builder
.CreateZExt(Res
, II
->getType());
3249 return replaceInstUsesWith(CI
, Res
);
3255 case Intrinsic::vector_reduce_umin
:
3256 case Intrinsic::vector_reduce_umax
: {
3257 if (IID
== Intrinsic::vector_reduce_umin
||
3258 IID
== Intrinsic::vector_reduce_umax
) {
3259 // UMin/UMax reduction over the vector with (potentially-extended)
3260 // i1 element type is actually a (potentially-extended)
3261 // logical `and`/`or` reduction over the original non-extended value:
3262 // vector_reduce_u{min,max}(?ext(<n x i1>))
3264 // ?ext(vector_reduce_{and,or}(<n x i1>))
3265 Value
*Arg
= II
->getArgOperand(0);
3267 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3268 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3269 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3270 Value
*Res
= IID
== Intrinsic::vector_reduce_umin
3271 ? Builder
.CreateAndReduce(Vect
)
3272 : Builder
.CreateOrReduce(Vect
);
3274 Res
= Builder
.CreateCast(cast
<CastInst
>(Arg
)->getOpcode(), Res
,
3276 return replaceInstUsesWith(CI
, Res
);
3282 case Intrinsic::vector_reduce_smin
:
3283 case Intrinsic::vector_reduce_smax
: {
3284 if (IID
== Intrinsic::vector_reduce_smin
||
3285 IID
== Intrinsic::vector_reduce_smax
) {
3286 // SMin/SMax reduction over the vector with (potentially-extended)
3287 // i1 element type is actually a (potentially-extended)
3288 // logical `and`/`or` reduction over the original non-extended value:
3289 // vector_reduce_s{min,max}(<n x i1>)
3291 // vector_reduce_{or,and}(<n x i1>)
3293 // vector_reduce_s{min,max}(sext(<n x i1>))
3295 // sext(vector_reduce_{or,and}(<n x i1>))
3297 // vector_reduce_s{min,max}(zext(<n x i1>))
3299 // zext(vector_reduce_{and,or}(<n x i1>))
3300 Value
*Arg
= II
->getArgOperand(0);
3302 if (match(Arg
, m_ZExtOrSExtOrSelf(m_Value(Vect
)))) {
3303 if (auto *FTy
= dyn_cast
<FixedVectorType
>(Vect
->getType()))
3304 if (FTy
->getElementType() == Builder
.getInt1Ty()) {
3305 Instruction::CastOps ExtOpc
= Instruction::CastOps::CastOpsEnd
;
3307 ExtOpc
= cast
<CastInst
>(Arg
)->getOpcode();
3308 Value
*Res
= ((IID
== Intrinsic::vector_reduce_smin
) ==
3309 (ExtOpc
== Instruction::CastOps::ZExt
))
3310 ? Builder
.CreateAndReduce(Vect
)
3311 : Builder
.CreateOrReduce(Vect
);
3313 Res
= Builder
.CreateCast(ExtOpc
, Res
, II
->getType());
3314 return replaceInstUsesWith(CI
, Res
);
3320 case Intrinsic::vector_reduce_fmax
:
3321 case Intrinsic::vector_reduce_fmin
:
3322 case Intrinsic::vector_reduce_fadd
:
3323 case Intrinsic::vector_reduce_fmul
: {
3324 bool CanBeReassociated
= (IID
!= Intrinsic::vector_reduce_fadd
&&
3325 IID
!= Intrinsic::vector_reduce_fmul
) ||
3326 II
->hasAllowReassoc();
3327 const unsigned ArgIdx
= (IID
== Intrinsic::vector_reduce_fadd
||
3328 IID
== Intrinsic::vector_reduce_fmul
)
3331 Value
*Arg
= II
->getArgOperand(ArgIdx
);
3334 if (!isa
<FixedVectorType
>(Arg
->getType()) || !CanBeReassociated
||
3335 !match(Arg
, m_Shuffle(m_Value(V
), m_Undef(), m_Mask(Mask
))) ||
3336 !cast
<ShuffleVectorInst
>(Arg
)->isSingleSource())
3338 int Sz
= Mask
.size();
3339 SmallBitVector
UsedIndices(Sz
);
3340 for (int Idx
: Mask
) {
3341 if (Idx
== PoisonMaskElem
|| UsedIndices
.test(Idx
))
3343 UsedIndices
.set(Idx
);
3345 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
3347 if (UsedIndices
.all()) {
3348 replaceUse(II
->getOperandUse(ArgIdx
), V
);
3353 case Intrinsic::is_fpclass
: {
3354 if (Instruction
*I
= foldIntrinsicIsFPClass(*II
))
3359 // Handle target specific intrinsics
3360 std::optional
<Instruction
*> V
= targetInstCombineIntrinsic(*II
);
3367 // Try to fold intrinsic into select operands. This is legal if:
3368 // * The intrinsic is speculatable.
3369 // * The select condition is not a vector, or the intrinsic does not
3370 // perform cross-lane operations.
3372 case Intrinsic::ctlz
:
3373 case Intrinsic::cttz
:
3374 case Intrinsic::ctpop
:
3375 case Intrinsic::umin
:
3376 case Intrinsic::umax
:
3377 case Intrinsic::smin
:
3378 case Intrinsic::smax
:
3379 case Intrinsic::usub_sat
:
3380 case Intrinsic::uadd_sat
:
3381 case Intrinsic::ssub_sat
:
3382 case Intrinsic::sadd_sat
:
3383 for (Value
*Op
: II
->args())
3384 if (auto *Sel
= dyn_cast
<SelectInst
>(Op
))
3385 if (Instruction
*R
= FoldOpIntoSelect(*II
, Sel
))
3392 if (Instruction
*Shuf
= foldShuffledIntrinsicOperands(II
, Builder
))
3395 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3396 // context, so it is handled in visitCallBase and we should trigger it.
3397 return visitCallBase(*II
);
3400 // Fence instruction simplification
3401 Instruction
*InstCombinerImpl::visitFenceInst(FenceInst
&FI
) {
3402 auto *NFI
= dyn_cast
<FenceInst
>(FI
.getNextNonDebugInstruction());
3403 // This check is solely here to handle arbitrary target-dependent syncscopes.
3404 // TODO: Can remove if does not matter in practice.
3405 if (NFI
&& FI
.isIdenticalTo(NFI
))
3406 return eraseInstFromFunction(FI
);
3408 // Returns true if FI1 is identical or stronger fence than FI2.
3409 auto isIdenticalOrStrongerFence
= [](FenceInst
*FI1
, FenceInst
*FI2
) {
3410 auto FI1SyncScope
= FI1
->getSyncScopeID();
3411 // Consider same scope, where scope is global or single-thread.
3412 if (FI1SyncScope
!= FI2
->getSyncScopeID() ||
3413 (FI1SyncScope
!= SyncScope::System
&&
3414 FI1SyncScope
!= SyncScope::SingleThread
))
3417 return isAtLeastOrStrongerThan(FI1
->getOrdering(), FI2
->getOrdering());
3419 if (NFI
&& isIdenticalOrStrongerFence(NFI
, &FI
))
3420 return eraseInstFromFunction(FI
);
3422 if (auto *PFI
= dyn_cast_or_null
<FenceInst
>(FI
.getPrevNonDebugInstruction()))
3423 if (isIdenticalOrStrongerFence(PFI
, &FI
))
3424 return eraseInstFromFunction(FI
);
3428 // InvokeInst simplification
3429 Instruction
*InstCombinerImpl::visitInvokeInst(InvokeInst
&II
) {
3430 return visitCallBase(II
);
3433 // CallBrInst simplification
3434 Instruction
*InstCombinerImpl::visitCallBrInst(CallBrInst
&CBI
) {
3435 return visitCallBase(CBI
);
3438 Instruction
*InstCombinerImpl::tryOptimizeCall(CallInst
*CI
) {
3439 if (!CI
->getCalledFunction()) return nullptr;
3441 // Skip optimizing notail and musttail calls so
3442 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3443 // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
3444 if (CI
->isMustTailCall() || CI
->isNoTailCall())
3447 auto InstCombineRAUW
= [this](Instruction
*From
, Value
*With
) {
3448 replaceInstUsesWith(*From
, With
);
3450 auto InstCombineErase
= [this](Instruction
*I
) {
3451 eraseInstFromFunction(*I
);
3453 LibCallSimplifier
Simplifier(DL
, &TLI
, &AC
, ORE
, BFI
, PSI
, InstCombineRAUW
,
3455 if (Value
*With
= Simplifier
.optimizeCall(CI
, Builder
)) {
3457 return CI
->use_empty() ? CI
: replaceInstUsesWith(*CI
, With
);
3463 static IntrinsicInst
*findInitTrampolineFromAlloca(Value
*TrampMem
) {
3464 // Strip off at most one level of pointer casts, looking for an alloca. This
3465 // is good enough in practice and simpler than handling any number of casts.
3466 Value
*Underlying
= TrampMem
->stripPointerCasts();
3467 if (Underlying
!= TrampMem
&&
3468 (!Underlying
->hasOneUse() || Underlying
->user_back() != TrampMem
))
3470 if (!isa
<AllocaInst
>(Underlying
))
3473 IntrinsicInst
*InitTrampoline
= nullptr;
3474 for (User
*U
: TrampMem
->users()) {
3475 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
3478 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
) {
3480 // More than one init_trampoline writes to this value. Give up.
3482 InitTrampoline
= II
;
3485 if (II
->getIntrinsicID() == Intrinsic::adjust_trampoline
)
3486 // Allow any number of calls to adjust.trampoline.
3491 // No call to init.trampoline found.
3492 if (!InitTrampoline
)
3495 // Check that the alloca is being used in the expected way.
3496 if (InitTrampoline
->getOperand(0) != TrampMem
)
3499 return InitTrampoline
;
3502 static IntrinsicInst
*findInitTrampolineFromBB(IntrinsicInst
*AdjustTramp
,
3504 // Visit all the previous instructions in the basic block, and try to find a
3505 // init.trampoline which has a direct path to the adjust.trampoline.
3506 for (BasicBlock::iterator I
= AdjustTramp
->getIterator(),
3507 E
= AdjustTramp
->getParent()->begin();
3509 Instruction
*Inst
= &*--I
;
3510 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
3511 if (II
->getIntrinsicID() == Intrinsic::init_trampoline
&&
3512 II
->getOperand(0) == TrampMem
)
3514 if (Inst
->mayWriteToMemory())
3520 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3521 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3522 // to a direct call to a function. Otherwise return NULL.
3523 static IntrinsicInst
*findInitTrampoline(Value
*Callee
) {
3524 Callee
= Callee
->stripPointerCasts();
3525 IntrinsicInst
*AdjustTramp
= dyn_cast
<IntrinsicInst
>(Callee
);
3527 AdjustTramp
->getIntrinsicID() != Intrinsic::adjust_trampoline
)
3530 Value
*TrampMem
= AdjustTramp
->getOperand(0);
3532 if (IntrinsicInst
*IT
= findInitTrampolineFromAlloca(TrampMem
))
3534 if (IntrinsicInst
*IT
= findInitTrampolineFromBB(AdjustTramp
, TrampMem
))
3539 bool InstCombinerImpl::annotateAnyAllocSite(CallBase
&Call
,
3540 const TargetLibraryInfo
*TLI
) {
3541 // Note: We only handle cases which can't be driven from generic attributes
3542 // here. So, for example, nonnull and noalias (which are common properties
3543 // of some allocation functions) are expected to be handled via annotation
3544 // of the respective allocator declaration with generic attributes.
3545 bool Changed
= false;
3547 if (!Call
.getType()->isPointerTy())
3550 std::optional
<APInt
> Size
= getAllocSize(&Call
, TLI
);
3551 if (Size
&& *Size
!= 0) {
3552 // TODO: We really should just emit deref_or_null here and then
3553 // let the generic inference code combine that with nonnull.
3554 if (Call
.hasRetAttr(Attribute::NonNull
)) {
3555 Changed
= !Call
.hasRetAttr(Attribute::Dereferenceable
);
3556 Call
.addRetAttr(Attribute::getWithDereferenceableBytes(
3557 Call
.getContext(), Size
->getLimitedValue()));
3559 Changed
= !Call
.hasRetAttr(Attribute::DereferenceableOrNull
);
3560 Call
.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
3561 Call
.getContext(), Size
->getLimitedValue()));
3565 // Add alignment attribute if alignment is a power of two constant.
3566 Value
*Alignment
= getAllocAlignment(&Call
, TLI
);
3570 ConstantInt
*AlignOpC
= dyn_cast
<ConstantInt
>(Alignment
);
3571 if (AlignOpC
&& AlignOpC
->getValue().ult(llvm::Value::MaximumAlignment
)) {
3572 uint64_t AlignmentVal
= AlignOpC
->getZExtValue();
3573 if (llvm::isPowerOf2_64(AlignmentVal
)) {
3574 Align ExistingAlign
= Call
.getRetAlign().valueOrOne();
3575 Align NewAlign
= Align(AlignmentVal
);
3576 if (NewAlign
> ExistingAlign
) {
3578 Attribute::getWithAlignment(Call
.getContext(), NewAlign
));
3586 /// Improvements for call, callbr and invoke instructions.
3587 Instruction
*InstCombinerImpl::visitCallBase(CallBase
&Call
) {
3588 bool Changed
= annotateAnyAllocSite(Call
, &TLI
);
3590 // Mark any parameters that are known to be non-null with the nonnull
3591 // attribute. This is helpful for inlining calls to functions with null
3592 // checks on their arguments.
3593 SmallVector
<unsigned, 4> ArgNos
;
3596 for (Value
*V
: Call
.args()) {
3597 if (V
->getType()->isPointerTy() &&
3598 !Call
.paramHasAttr(ArgNo
, Attribute::NonNull
) &&
3599 isKnownNonZero(V
, DL
, 0, &AC
, &Call
, &DT
))
3600 ArgNos
.push_back(ArgNo
);
3604 assert(ArgNo
== Call
.arg_size() && "Call arguments not processed correctly.");
3606 if (!ArgNos
.empty()) {
3607 AttributeList AS
= Call
.getAttributes();
3608 LLVMContext
&Ctx
= Call
.getContext();
3609 AS
= AS
.addParamAttribute(Ctx
, ArgNos
,
3610 Attribute::get(Ctx
, Attribute::NonNull
));
3611 Call
.setAttributes(AS
);
3615 // If the callee is a pointer to a function, attempt to move any casts to the
3616 // arguments of the call/callbr/invoke.
3617 Value
*Callee
= Call
.getCalledOperand();
3618 Function
*CalleeF
= dyn_cast
<Function
>(Callee
);
3619 if ((!CalleeF
|| CalleeF
->getFunctionType() != Call
.getFunctionType()) &&
3620 transformConstExprCastCall(Call
))
3624 // Remove the convergent attr on calls when the callee is not convergent.
3625 if (Call
.isConvergent() && !CalleeF
->isConvergent() &&
3626 !CalleeF
->isIntrinsic()) {
3627 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3629 Call
.setNotConvergent();
3633 // If the call and callee calling conventions don't match, and neither one
3634 // of the calling conventions is compatible with C calling convention
3635 // this call must be unreachable, as the call is undefined.
3636 if ((CalleeF
->getCallingConv() != Call
.getCallingConv() &&
3637 !(CalleeF
->getCallingConv() == llvm::CallingConv::C
&&
3638 TargetLibraryInfoImpl::isCallingConvCCompatible(&Call
)) &&
3639 !(Call
.getCallingConv() == llvm::CallingConv::C
&&
3640 TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF
))) &&
3641 // Only do this for calls to a function with a body. A prototype may
3642 // not actually end up matching the implementation's calling conv for a
3643 // variety of reasons (e.g. it may be written in assembly).
3644 !CalleeF
->isDeclaration()) {
3645 Instruction
*OldCall
= &Call
;
3646 CreateNonTerminatorUnreachable(OldCall
);
3647 // If OldCall does not return void then replaceInstUsesWith poison.
3648 // This allows ValueHandlers and custom metadata to adjust itself.
3649 if (!OldCall
->getType()->isVoidTy())
3650 replaceInstUsesWith(*OldCall
, PoisonValue::get(OldCall
->getType()));
3651 if (isa
<CallInst
>(OldCall
))
3652 return eraseInstFromFunction(*OldCall
);
3654 // We cannot remove an invoke or a callbr, because it would change thexi
3655 // CFG, just change the callee to a null pointer.
3656 cast
<CallBase
>(OldCall
)->setCalledFunction(
3657 CalleeF
->getFunctionType(),
3658 Constant::getNullValue(CalleeF
->getType()));
3663 // Calling a null function pointer is undefined if a null address isn't
3665 if ((isa
<ConstantPointerNull
>(Callee
) &&
3666 !NullPointerIsDefined(Call
.getFunction())) ||
3667 isa
<UndefValue
>(Callee
)) {
3668 // If Call does not return void then replaceInstUsesWith poison.
3669 // This allows ValueHandlers and custom metadata to adjust itself.
3670 if (!Call
.getType()->isVoidTy())
3671 replaceInstUsesWith(Call
, PoisonValue::get(Call
.getType()));
3673 if (Call
.isTerminator()) {
3674 // Can't remove an invoke or callbr because we cannot change the CFG.
3678 // This instruction is not reachable, just remove it.
3679 CreateNonTerminatorUnreachable(&Call
);
3680 return eraseInstFromFunction(Call
);
3683 if (IntrinsicInst
*II
= findInitTrampoline(Callee
))
3684 return transformCallThroughTrampoline(Call
, *II
);
3686 if (isa
<InlineAsm
>(Callee
) && !Call
.doesNotThrow()) {
3687 InlineAsm
*IA
= cast
<InlineAsm
>(Callee
);
3688 if (!IA
->canThrow()) {
3689 // Normal inline asm calls cannot throw - mark them
3691 Call
.setDoesNotThrow();
3696 // Try to optimize the call if possible, we require DataLayout for most of
3697 // this. None of these calls are seen as possibly dead so go ahead and
3698 // delete the instruction now.
3699 if (CallInst
*CI
= dyn_cast
<CallInst
>(&Call
)) {
3700 Instruction
*I
= tryOptimizeCall(CI
);
3701 // If we changed something return the result, etc. Otherwise let
3702 // the fallthrough check.
3703 if (I
) return eraseInstFromFunction(*I
);
3706 if (!Call
.use_empty() && !Call
.isMustTailCall())
3707 if (Value
*ReturnedArg
= Call
.getReturnedArgOperand()) {
3708 Type
*CallTy
= Call
.getType();
3709 Type
*RetArgTy
= ReturnedArg
->getType();
3710 if (RetArgTy
->canLosslesslyBitCastTo(CallTy
))
3711 return replaceInstUsesWith(
3712 Call
, Builder
.CreateBitOrPointerCast(ReturnedArg
, CallTy
));
3715 // Drop unnecessary kcfi operand bundles from calls that were converted
3716 // into direct calls.
3717 auto Bundle
= Call
.getOperandBundle(LLVMContext::OB_kcfi
);
3718 if (Bundle
&& !Call
.isIndirectCall()) {
3719 DEBUG_WITH_TYPE(DEBUG_TYPE
"-kcfi", {
3721 ConstantInt
*FunctionType
= nullptr;
3722 ConstantInt
*ExpectedType
= cast
<ConstantInt
>(Bundle
->Inputs
[0]);
3724 if (MDNode
*MD
= CalleeF
->getMetadata(LLVMContext::MD_kcfi_type
))
3725 FunctionType
= mdconst::extract
<ConstantInt
>(MD
->getOperand(0));
3728 FunctionType
->getZExtValue() != ExpectedType
->getZExtValue())
3729 dbgs() << Call
.getModule()->getName()
3730 << ": warning: kcfi: " << Call
.getCaller()->getName()
3731 << ": call to " << CalleeF
->getName()
3732 << " using a mismatching function pointer type\n";
3736 return CallBase::removeOperandBundle(&Call
, LLVMContext::OB_kcfi
);
3739 if (isRemovableAlloc(&Call
, &TLI
))
3740 return visitAllocSite(Call
);
3742 // Handle intrinsics which can be used in both call and invoke context.
3743 switch (Call
.getIntrinsicID()) {
3744 case Intrinsic::experimental_gc_statepoint
: {
3745 GCStatepointInst
&GCSP
= *cast
<GCStatepointInst
>(&Call
);
3746 SmallPtrSet
<Value
*, 32> LiveGcValues
;
3747 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
3748 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
3750 // Remove the relocation if unused.
3751 if (GCR
.use_empty()) {
3752 eraseInstFromFunction(GCR
);
3756 Value
*DerivedPtr
= GCR
.getDerivedPtr();
3757 Value
*BasePtr
= GCR
.getBasePtr();
3759 // Undef is undef, even after relocation.
3760 if (isa
<UndefValue
>(DerivedPtr
) || isa
<UndefValue
>(BasePtr
)) {
3761 replaceInstUsesWith(GCR
, UndefValue::get(GCR
.getType()));
3762 eraseInstFromFunction(GCR
);
3766 if (auto *PT
= dyn_cast
<PointerType
>(GCR
.getType())) {
3767 // The relocation of null will be null for most any collector.
3768 // TODO: provide a hook for this in GCStrategy. There might be some
3769 // weird collector this property does not hold for.
3770 if (isa
<ConstantPointerNull
>(DerivedPtr
)) {
3771 // Use null-pointer of gc_relocate's type to replace it.
3772 replaceInstUsesWith(GCR
, ConstantPointerNull::get(PT
));
3773 eraseInstFromFunction(GCR
);
3777 // isKnownNonNull -> nonnull attribute
3778 if (!GCR
.hasRetAttr(Attribute::NonNull
) &&
3779 isKnownNonZero(DerivedPtr
, DL
, 0, &AC
, &Call
, &DT
)) {
3780 GCR
.addRetAttr(Attribute::NonNull
);
3781 // We discovered new fact, re-check users.
3782 Worklist
.pushUsersToWorkList(GCR
);
3786 // If we have two copies of the same pointer in the statepoint argument
3787 // list, canonicalize to one. This may let us common gc.relocates.
3788 if (GCR
.getBasePtr() == GCR
.getDerivedPtr() &&
3789 GCR
.getBasePtrIndex() != GCR
.getDerivedPtrIndex()) {
3790 auto *OpIntTy
= GCR
.getOperand(2)->getType();
3791 GCR
.setOperand(2, ConstantInt::get(OpIntTy
, GCR
.getBasePtrIndex()));
3794 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3795 // Canonicalize on the type from the uses to the defs
3797 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3798 LiveGcValues
.insert(BasePtr
);
3799 LiveGcValues
.insert(DerivedPtr
);
3801 std::optional
<OperandBundleUse
> Bundle
=
3802 GCSP
.getOperandBundle(LLVMContext::OB_gc_live
);
3803 unsigned NumOfGCLives
= LiveGcValues
.size();
3804 if (!Bundle
|| NumOfGCLives
== Bundle
->Inputs
.size())
3806 // We can reduce the size of gc live bundle.
3807 DenseMap
<Value
*, unsigned> Val2Idx
;
3808 std::vector
<Value
*> NewLiveGc
;
3809 for (Value
*V
: Bundle
->Inputs
) {
3810 if (Val2Idx
.count(V
))
3812 if (LiveGcValues
.count(V
)) {
3813 Val2Idx
[V
] = NewLiveGc
.size();
3814 NewLiveGc
.push_back(V
);
3816 Val2Idx
[V
] = NumOfGCLives
;
3818 // Update all gc.relocates
3819 for (const GCRelocateInst
*Reloc
: GCSP
.getGCRelocates()) {
3820 GCRelocateInst
&GCR
= *const_cast<GCRelocateInst
*>(Reloc
);
3821 Value
*BasePtr
= GCR
.getBasePtr();
3822 assert(Val2Idx
.count(BasePtr
) && Val2Idx
[BasePtr
] != NumOfGCLives
&&
3823 "Missed live gc for base pointer");
3824 auto *OpIntTy1
= GCR
.getOperand(1)->getType();
3825 GCR
.setOperand(1, ConstantInt::get(OpIntTy1
, Val2Idx
[BasePtr
]));
3826 Value
*DerivedPtr
= GCR
.getDerivedPtr();
3827 assert(Val2Idx
.count(DerivedPtr
) && Val2Idx
[DerivedPtr
] != NumOfGCLives
&&
3828 "Missed live gc for derived pointer");
3829 auto *OpIntTy2
= GCR
.getOperand(2)->getType();
3830 GCR
.setOperand(2, ConstantInt::get(OpIntTy2
, Val2Idx
[DerivedPtr
]));
3832 // Create new statepoint instruction.
3833 OperandBundleDef
NewBundle("gc-live", NewLiveGc
);
3834 return CallBase::Create(&Call
, NewBundle
);
3839 return Changed
? &Call
: nullptr;
3842 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3843 /// the arguments of the call/invoke.
3844 /// CallBrInst is not supported.
3845 bool InstCombinerImpl::transformConstExprCastCall(CallBase
&Call
) {
3847 dyn_cast
<Function
>(Call
.getCalledOperand()->stripPointerCasts());
3851 assert(!isa
<CallBrInst
>(Call
) &&
3852 "CallBr's don't have a single point after a def to insert at");
3854 // If this is a call to a thunk function, don't remove the cast. Thunks are
3855 // used to transparently forward all incoming parameters and outgoing return
3856 // values, so it's important to leave the cast in place.
3857 if (Callee
->hasFnAttribute("thunk"))
3860 // If this is a call to a naked function, the assembly might be
3861 // using an argument, or otherwise rely on the frame layout,
3862 // the function prototype will mismatch.
3863 if (Callee
->hasFnAttribute(Attribute::Naked
))
3866 // If this is a musttail call, the callee's prototype must match the caller's
3867 // prototype with the exception of pointee types. The code below doesn't
3868 // implement that, so we can't do this transform.
3869 // TODO: Do the transform if it only requires adding pointer casts.
3870 if (Call
.isMustTailCall())
3873 Instruction
*Caller
= &Call
;
3874 const AttributeList
&CallerPAL
= Call
.getAttributes();
3876 // Okay, this is a cast from a function to a different type. Unless doing so
3877 // would cause a type conversion of one of our arguments, change this call to
3878 // be a direct call with arguments casted to the appropriate types.
3879 FunctionType
*FT
= Callee
->getFunctionType();
3880 Type
*OldRetTy
= Caller
->getType();
3881 Type
*NewRetTy
= FT
->getReturnType();
3883 // Check to see if we are changing the return type...
3884 if (OldRetTy
!= NewRetTy
) {
3886 if (NewRetTy
->isStructTy())
3887 return false; // TODO: Handle multiple return values.
3889 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy
, OldRetTy
, DL
)) {
3890 if (Callee
->isDeclaration())
3891 return false; // Cannot transform this return value.
3893 if (!Caller
->use_empty() &&
3894 // void -> non-void is handled specially
3895 !NewRetTy
->isVoidTy())
3896 return false; // Cannot transform this return value.
3899 if (!CallerPAL
.isEmpty() && !Caller
->use_empty()) {
3900 AttrBuilder
RAttrs(FT
->getContext(), CallerPAL
.getRetAttrs());
3901 if (RAttrs
.overlaps(AttributeFuncs::typeIncompatible(NewRetTy
)))
3902 return false; // Attribute not compatible with transformed value.
3905 // If the callbase is an invoke instruction, and the return value is
3906 // used by a PHI node in a successor, we cannot change the return type of
3907 // the call because there is no place to put the cast instruction (without
3908 // breaking the critical edge). Bail out in this case.
3909 if (!Caller
->use_empty()) {
3910 BasicBlock
*PhisNotSupportedBlock
= nullptr;
3911 if (auto *II
= dyn_cast
<InvokeInst
>(Caller
))
3912 PhisNotSupportedBlock
= II
->getNormalDest();
3913 if (PhisNotSupportedBlock
)
3914 for (User
*U
: Caller
->users())
3915 if (PHINode
*PN
= dyn_cast
<PHINode
>(U
))
3916 if (PN
->getParent() == PhisNotSupportedBlock
)
3921 unsigned NumActualArgs
= Call
.arg_size();
3922 unsigned NumCommonArgs
= std::min(FT
->getNumParams(), NumActualArgs
);
3924 // Prevent us turning:
3925 // declare void @takes_i32_inalloca(i32* inalloca)
3926 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
3929 // call void @takes_i32_inalloca(i32* null)
3931 // Similarly, avoid folding away bitcasts of byval calls.
3932 if (Callee
->getAttributes().hasAttrSomewhere(Attribute::InAlloca
) ||
3933 Callee
->getAttributes().hasAttrSomewhere(Attribute::Preallocated
))
3936 auto AI
= Call
.arg_begin();
3937 for (unsigned i
= 0, e
= NumCommonArgs
; i
!= e
; ++i
, ++AI
) {
3938 Type
*ParamTy
= FT
->getParamType(i
);
3939 Type
*ActTy
= (*AI
)->getType();
3941 if (!CastInst::isBitOrNoopPointerCastable(ActTy
, ParamTy
, DL
))
3942 return false; // Cannot transform this parameter value.
3944 // Check if there are any incompatible attributes we cannot drop safely.
3945 if (AttrBuilder(FT
->getContext(), CallerPAL
.getParamAttrs(i
))
3946 .overlaps(AttributeFuncs::typeIncompatible(
3947 ParamTy
, AttributeFuncs::ASK_UNSAFE_TO_DROP
)))
3948 return false; // Attribute not compatible with transformed value.
3950 if (Call
.isInAllocaArgument(i
) ||
3951 CallerPAL
.hasParamAttr(i
, Attribute::Preallocated
))
3952 return false; // Cannot transform to and from inalloca/preallocated.
3954 if (CallerPAL
.hasParamAttr(i
, Attribute::SwiftError
))
3957 if (CallerPAL
.hasParamAttr(i
, Attribute::ByVal
) !=
3958 Callee
->getAttributes().hasParamAttr(i
, Attribute::ByVal
))
3959 return false; // Cannot transform to or from byval.
3962 if (Callee
->isDeclaration()) {
3963 // Do not delete arguments unless we have a function body.
3964 if (FT
->getNumParams() < NumActualArgs
&& !FT
->isVarArg())
3967 // If the callee is just a declaration, don't change the varargsness of the
3968 // call. We don't want to introduce a varargs call where one doesn't
3970 if (FT
->isVarArg() != Call
.getFunctionType()->isVarArg())
3973 // If both the callee and the cast type are varargs, we still have to make
3974 // sure the number of fixed parameters are the same or we have the same
3975 // ABI issues as if we introduce a varargs call.
3976 if (FT
->isVarArg() && Call
.getFunctionType()->isVarArg() &&
3977 FT
->getNumParams() != Call
.getFunctionType()->getNumParams())
3981 if (FT
->getNumParams() < NumActualArgs
&& FT
->isVarArg() &&
3982 !CallerPAL
.isEmpty()) {
3983 // In this case we have more arguments than the new function type, but we
3984 // won't be dropping them. Check that these extra arguments have attributes
3985 // that are compatible with being a vararg call argument.
3987 if (CallerPAL
.hasAttrSomewhere(Attribute::StructRet
, &SRetIdx
) &&
3988 SRetIdx
- AttributeList::FirstArgIndex
>= FT
->getNumParams())
3992 // Okay, we decided that this is a safe thing to do: go ahead and start
3993 // inserting cast instructions as necessary.
3994 SmallVector
<Value
*, 8> Args
;
3995 SmallVector
<AttributeSet
, 8> ArgAttrs
;
3996 Args
.reserve(NumActualArgs
);
3997 ArgAttrs
.reserve(NumActualArgs
);
3999 // Get any return attributes.
4000 AttrBuilder
RAttrs(FT
->getContext(), CallerPAL
.getRetAttrs());
4002 // If the return value is not being used, the type may not be compatible
4003 // with the existing attributes. Wipe out any problematic attributes.
4004 RAttrs
.remove(AttributeFuncs::typeIncompatible(NewRetTy
));
4006 LLVMContext
&Ctx
= Call
.getContext();
4007 AI
= Call
.arg_begin();
4008 for (unsigned i
= 0; i
!= NumCommonArgs
; ++i
, ++AI
) {
4009 Type
*ParamTy
= FT
->getParamType(i
);
4011 Value
*NewArg
= *AI
;
4012 if ((*AI
)->getType() != ParamTy
)
4013 NewArg
= Builder
.CreateBitOrPointerCast(*AI
, ParamTy
);
4014 Args
.push_back(NewArg
);
4016 // Add any parameter attributes except the ones incompatible with the new
4017 // type. Note that we made sure all incompatible ones are safe to drop.
4018 AttributeMask IncompatibleAttrs
= AttributeFuncs::typeIncompatible(
4019 ParamTy
, AttributeFuncs::ASK_SAFE_TO_DROP
);
4021 CallerPAL
.getParamAttrs(i
).removeAttributes(Ctx
, IncompatibleAttrs
));
4024 // If the function takes more arguments than the call was taking, add them
4026 for (unsigned i
= NumCommonArgs
; i
!= FT
->getNumParams(); ++i
) {
4027 Args
.push_back(Constant::getNullValue(FT
->getParamType(i
)));
4028 ArgAttrs
.push_back(AttributeSet());
4031 // If we are removing arguments to the function, emit an obnoxious warning.
4032 if (FT
->getNumParams() < NumActualArgs
) {
4033 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4034 if (FT
->isVarArg()) {
4035 // Add all of the arguments in their promoted form to the arg list.
4036 for (unsigned i
= FT
->getNumParams(); i
!= NumActualArgs
; ++i
, ++AI
) {
4037 Type
*PTy
= getPromotedType((*AI
)->getType());
4038 Value
*NewArg
= *AI
;
4039 if (PTy
!= (*AI
)->getType()) {
4040 // Must promote to pass through va_arg area!
4041 Instruction::CastOps opcode
=
4042 CastInst::getCastOpcode(*AI
, false, PTy
, false);
4043 NewArg
= Builder
.CreateCast(opcode
, *AI
, PTy
);
4045 Args
.push_back(NewArg
);
4047 // Add any parameter attributes.
4048 ArgAttrs
.push_back(CallerPAL
.getParamAttrs(i
));
4053 AttributeSet FnAttrs
= CallerPAL
.getFnAttrs();
4055 if (NewRetTy
->isVoidTy())
4056 Caller
->setName(""); // Void type should not have a name.
4058 assert((ArgAttrs
.size() == FT
->getNumParams() || FT
->isVarArg()) &&
4059 "missing argument attributes");
4060 AttributeList NewCallerPAL
= AttributeList::get(
4061 Ctx
, FnAttrs
, AttributeSet::get(Ctx
, RAttrs
), ArgAttrs
);
4063 SmallVector
<OperandBundleDef
, 1> OpBundles
;
4064 Call
.getOperandBundlesAsDefs(OpBundles
);
4067 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Caller
)) {
4068 NewCall
= Builder
.CreateInvoke(Callee
, II
->getNormalDest(),
4069 II
->getUnwindDest(), Args
, OpBundles
);
4071 NewCall
= Builder
.CreateCall(Callee
, Args
, OpBundles
);
4072 cast
<CallInst
>(NewCall
)->setTailCallKind(
4073 cast
<CallInst
>(Caller
)->getTailCallKind());
4075 NewCall
->takeName(Caller
);
4076 NewCall
->setCallingConv(Call
.getCallingConv());
4077 NewCall
->setAttributes(NewCallerPAL
);
4079 // Preserve prof metadata if any.
4080 NewCall
->copyMetadata(*Caller
, {LLVMContext::MD_prof
});
4082 // Insert a cast of the return type as necessary.
4083 Instruction
*NC
= NewCall
;
4085 if (OldRetTy
!= NV
->getType() && !Caller
->use_empty()) {
4086 if (!NV
->getType()->isVoidTy()) {
4087 NV
= NC
= CastInst::CreateBitOrPointerCast(NC
, OldRetTy
);
4088 NC
->setDebugLoc(Caller
->getDebugLoc());
4090 auto OptInsertPt
= NewCall
->getInsertionPointAfterDef();
4091 assert(OptInsertPt
&& "No place to insert cast");
4092 InsertNewInstBefore(NC
, *OptInsertPt
);
4093 Worklist
.pushUsersToWorkList(*Caller
);
4095 NV
= PoisonValue::get(Caller
->getType());
4099 if (!Caller
->use_empty())
4100 replaceInstUsesWith(*Caller
, NV
);
4101 else if (Caller
->hasValueHandle()) {
4102 if (OldRetTy
== NV
->getType())
4103 ValueHandleBase::ValueIsRAUWd(Caller
, NV
);
4105 // We cannot call ValueIsRAUWd with a different type, and the
4106 // actual tracked value will disappear.
4107 ValueHandleBase::ValueIsDeleted(Caller
);
4110 eraseInstFromFunction(*Caller
);
4114 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4115 /// intrinsic pair into a direct call to the underlying function.
4117 InstCombinerImpl::transformCallThroughTrampoline(CallBase
&Call
,
4118 IntrinsicInst
&Tramp
) {
4119 FunctionType
*FTy
= Call
.getFunctionType();
4120 AttributeList Attrs
= Call
.getAttributes();
4122 // If the call already has the 'nest' attribute somewhere then give up -
4123 // otherwise 'nest' would occur twice after splicing in the chain.
4124 if (Attrs
.hasAttrSomewhere(Attribute::Nest
))
4127 Function
*NestF
= cast
<Function
>(Tramp
.getArgOperand(1)->stripPointerCasts());
4128 FunctionType
*NestFTy
= NestF
->getFunctionType();
4130 AttributeList NestAttrs
= NestF
->getAttributes();
4131 if (!NestAttrs
.isEmpty()) {
4132 unsigned NestArgNo
= 0;
4133 Type
*NestTy
= nullptr;
4134 AttributeSet NestAttr
;
4136 // Look for a parameter marked with the 'nest' attribute.
4137 for (FunctionType::param_iterator I
= NestFTy
->param_begin(),
4138 E
= NestFTy
->param_end();
4139 I
!= E
; ++NestArgNo
, ++I
) {
4140 AttributeSet AS
= NestAttrs
.getParamAttrs(NestArgNo
);
4141 if (AS
.hasAttribute(Attribute::Nest
)) {
4142 // Record the parameter type and any other attributes.
4150 std::vector
<Value
*> NewArgs
;
4151 std::vector
<AttributeSet
> NewArgAttrs
;
4152 NewArgs
.reserve(Call
.arg_size() + 1);
4153 NewArgAttrs
.reserve(Call
.arg_size());
4155 // Insert the nest argument into the call argument list, which may
4156 // mean appending it. Likewise for attributes.
4160 auto I
= Call
.arg_begin(), E
= Call
.arg_end();
4162 if (ArgNo
== NestArgNo
) {
4163 // Add the chain argument and attributes.
4164 Value
*NestVal
= Tramp
.getArgOperand(2);
4165 if (NestVal
->getType() != NestTy
)
4166 NestVal
= Builder
.CreateBitCast(NestVal
, NestTy
, "nest");
4167 NewArgs
.push_back(NestVal
);
4168 NewArgAttrs
.push_back(NestAttr
);
4174 // Add the original argument and attributes.
4175 NewArgs
.push_back(*I
);
4176 NewArgAttrs
.push_back(Attrs
.getParamAttrs(ArgNo
));
4183 // The trampoline may have been bitcast to a bogus type (FTy).
4184 // Handle this by synthesizing a new function type, equal to FTy
4185 // with the chain parameter inserted.
4187 std::vector
<Type
*> NewTypes
;
4188 NewTypes
.reserve(FTy
->getNumParams()+1);
4190 // Insert the chain's type into the list of parameter types, which may
4191 // mean appending it.
4194 FunctionType::param_iterator I
= FTy
->param_begin(),
4195 E
= FTy
->param_end();
4198 if (ArgNo
== NestArgNo
)
4199 // Add the chain's type.
4200 NewTypes
.push_back(NestTy
);
4205 // Add the original type.
4206 NewTypes
.push_back(*I
);
4213 // Replace the trampoline call with a direct call. Let the generic
4214 // code sort out any function type mismatches.
4215 FunctionType
*NewFTy
=
4216 FunctionType::get(FTy
->getReturnType(), NewTypes
, FTy
->isVarArg());
4217 AttributeList NewPAL
=
4218 AttributeList::get(FTy
->getContext(), Attrs
.getFnAttrs(),
4219 Attrs
.getRetAttrs(), NewArgAttrs
);
4221 SmallVector
<OperandBundleDef
, 1> OpBundles
;
4222 Call
.getOperandBundlesAsDefs(OpBundles
);
4224 Instruction
*NewCaller
;
4225 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&Call
)) {
4226 NewCaller
= InvokeInst::Create(NewFTy
, NestF
, II
->getNormalDest(),
4227 II
->getUnwindDest(), NewArgs
, OpBundles
);
4228 cast
<InvokeInst
>(NewCaller
)->setCallingConv(II
->getCallingConv());
4229 cast
<InvokeInst
>(NewCaller
)->setAttributes(NewPAL
);
4230 } else if (CallBrInst
*CBI
= dyn_cast
<CallBrInst
>(&Call
)) {
4232 CallBrInst::Create(NewFTy
, NestF
, CBI
->getDefaultDest(),
4233 CBI
->getIndirectDests(), NewArgs
, OpBundles
);
4234 cast
<CallBrInst
>(NewCaller
)->setCallingConv(CBI
->getCallingConv());
4235 cast
<CallBrInst
>(NewCaller
)->setAttributes(NewPAL
);
4237 NewCaller
= CallInst::Create(NewFTy
, NestF
, NewArgs
, OpBundles
);
4238 cast
<CallInst
>(NewCaller
)->setTailCallKind(
4239 cast
<CallInst
>(Call
).getTailCallKind());
4240 cast
<CallInst
>(NewCaller
)->setCallingConv(
4241 cast
<CallInst
>(Call
).getCallingConv());
4242 cast
<CallInst
>(NewCaller
)->setAttributes(NewPAL
);
4244 NewCaller
->setDebugLoc(Call
.getDebugLoc());
4250 // Replace the trampoline call with a direct call. Since there is no 'nest'
4251 // parameter, there is no need to adjust the argument list. Let the generic
4252 // code sort out any function type mismatches.
4253 Call
.setCalledFunction(FTy
, NestF
);